diff options
Diffstat (limited to 'lib/lexer.mll')
-rw-r--r-- | lib/lexer.mll | 362 |
1 files changed, 0 insertions, 362 deletions
diff --git a/lib/lexer.mll b/lib/lexer.mll deleted file mode 100644 index d3d5e5b..0000000 --- a/lib/lexer.mll +++ /dev/null @@ -1,362 +0,0 @@ -{ - open Tokens - module T = Qsp_syntax.T - - exception UnclosedQuote of { content: string ; line : int} - - (* The comment system is terrible. The same symbol can be used for : - - starting a comment - - inequality operation - In order to manage this, I try to identify the context in a very basic way, - using a flag True False for determining the token to send. - *) - module Bucket = Ephemeron.K1.Bucket - type bucket = (Lexing.lexbuf, int) Bucket.t - let is_expression : bucket = Bucket.make () - - let incr_level lexbuf = - match Bucket.find is_expression lexbuf with - | None -> Bucket.add is_expression lexbuf 1 - | Some v -> Bucket.add is_expression lexbuf (v+1) - - let decr_level lexbuf = - match Bucket.find is_expression lexbuf with - | None -> () - | Some v -> - if v > 1 then - Bucket.add is_expression lexbuf (v-1) - else - Bucket.remove is_expression lexbuf - - let keyword_table = Hashtbl.create 53 - let _ = - List.iter (fun (kwd, tok) -> Hashtbl.add keyword_table kwd tok) - [ "ACT", ACT - ; "ADDLIB", KEYWORD "INCLIB" - ; "ADDOBJ", KEYWORD "ADDOBJ" - ; "ADD OBJ", KEYWORD "ADDOBJ" - ; "ADDQST", KEYWORD "INCLIB" - ; "AND", AND - ; "ARRCOMP", FUNCTION T.Arrcomp - ; "ARRPOS", FUNCTION T.Arrpos - ; "ARRSIZE", FUNCTION T.Arrsize -(* - ; "BACKIMAGE", KEYWORD "BACKIMAGE" - ; "$BACKIMAGE", KEYWORD "BACKIMAGE" - ; "BCOLOR", KEYWORD "BCOLOR" -*) - ; "CLA", KEYWORD "CLA" - ; "CLEAR", KEYWORD "CLEAR" - ; "*CLEAR", KEYWORD "*CLEAR" - ; "CLOSE", KEYWORD "CLOSE" - ; "CLOSE ALL", KEYWORD "CLOSEALL" - ; "CLR", KEYWORD "CLEAR" - ; "*CLR", KEYWORD "*CLEAR" - ; "CLS", KEYWORD "CLS" - ; "CMDCLEAR", KEYWORD "CMDCLEAR" - ; "CMDCLR", KEYWORD "CMDCLEAR" - ; "COPYARR", KEYWORD "COPYARR" - ; "COUNTOBJ", FUNCTION T.Countobj - ; "CURACTS", IDENT "CURACTS" - ; "$CURACTS", IDENT "$CURACTS" - ; "CURLOC", IDENT "CURLOC" - ; "$CURLOC", IDENT "$CURLOC" -(* - ; "DEBUG", KEYWORD "DEBUG" -*) - ; "DELACT", KEYWORD "DELACT" - ; "DEL ACT", KEYWORD "DELACT" - ; "DELLIB", KEYWORD "FREELIB" - ; "DELOBJ", KEYWORD "DELOBJ" - ; "DEL OBJ", KEYWORD "DELOBJ" - ; "DESC", FUNCTION T.Desc - ; "$DESC", FUNCTION T.Desc' -(* - ; "DISABLESCROLL", KEYWORD "DISABLESCROLL" - ; "DISABLESUBEX", KEYWORD "DISABLESUBEX" -*) - ; "DYNAMIC", KEYWORD "DYNAMIC" - ; "DYNEVAL", FUNCTION T.Dyneval - ; "$DYNEVAL", FUNCTION T.Dyneval' - ; "ELSE", ELSE - ; "ELSEIF", ELIF - ; "END", END - ; "EXEC", KEYWORD "EXEC" - ; "EXIT", KEYWORD "EXIT" -(* - ; "FCOLOR", KEYWORD "FCOLOR" - ; "$FNAME", KEYWORD "$FNAME" -*) - ; "FREELIB", KEYWORD "FREELIB" -(* - ; "FSIZE", KEYWORD "FSIZE" -*) - ; "FUNC", FUNCTION T.Func - ; "$FUNC", FUNCTION T.Func' - ; "GETOBJ", FUNCTION T.Getobj - ; "$GETOBJ", FUNCTION T.Getobj' - ; "GOSUB", KEYWORD "GOSUB" - ; "GOTO", KEYWORD "GOTO" -(* - ; "GC", KEYWORD "GC" -*) - ; "GS", KEYWORD "GOSUB" - ; "GT", KEYWORD "GOTO" - ; "IF", IF - ; "IIF", FUNCTION T.Iif - ; "$IIF", FUNCTION T.Iif' - ; "INCLIB", KEYWORD "INCLIB" - ; "INPUT", FUNCTION T.Input - ; "$INPUT", FUNCTION T.Input' - ; "INSTR", FUNCTION T.Instr - ; "ISNUM", FUNCTION T.Isnum - ; "ISPLAY", FUNCTION T.Isplay - ; "JUMP", KEYWORD "JUMP" - ; "KILLALL", KEYWORD "KILLALL" - ; "KILLOBJ", KEYWORD "KILLOBJ" - ; "KILLQST", KEYWORD "FREELIB" - ; "KILLVAR", KEYWORD "KILLVAR" - ; "LCASE", FUNCTION T.Lcase - ; "$LCASE", FUNCTION T.Lcase' -(* - ; "LCOLOR", KEYWORD "LCOLOR" -*) - ; "LEN", FUNCTION T.Len - ; "LET", LET - ; "LOC", FUNCTION T.Loc - ; "MAINTXT", IDENT "MAINTXT" - ; "$MAINTXT", IDENT "MAINTXT" - ; "MAX", FUNCTION T.Max - ; "$MAX", FUNCTION T.Max' - ; "MENU", KEYWORD "MENU" - ; "MID", FUNCTION T.Mid - ; "$MID", FUNCTION T.Mid' - ; "MIN", FUNCTION T.Min - ; "$MIN", FUNCTION T.Min' - ; "MOD", MOD - ; "MSECSCOUNT", FUNCTION T.Msecscount - ; "MSG", KEYWORD "MSG" - ; "NL", KEYWORD "NL" - ; "*NL", KEYWORD "*NL" - ; "NO", NO -(* - ; "NOSAVE", KEYWORD "NOSAVE" -*) - ; "OBJ", OBJ - ; "$ONACTSEL", IDENT "$ONACTSEL" - ; "$ONGLOAD", IDENT "$ONGLOAD" - ; "$ONGSAVE", IDENT "$ONGSAVE" - ; "$ONNEWLOC", IDENT "$ONNEWLOC" - ; "$ONOBJADD", IDENT "$ONOBJADD" - ; "$ONOBJDEL", IDENT "$ONOBJDEL" - ; "$ONOBJSEL", IDENT "$ONOBJSEL" - ; "OPENGAME", KEYWORD "OPENGAME" - ; "OPENQST", KEYWORD "OPENQST" - ; "OR", OR - ; "P", KEYWORD "P" - ; "*P", KEYWORD "*P" - ; "PL", KEYWORD "PL" - ; "*PL", KEYWORD "*PL" - ; "PLAY", KEYWORD "PLAY" - ; "QSPVER", FUNCTION T.Qspver - ; "$QSPVER", FUNCTION T.Qspver' - ; "RAND", FUNCTION T.Rand - ; "REFINT", KEYWORD "REFINT" - ; "REPLACE", FUNCTION T.Replace - ; "$REPLACE", FUNCTION T.Replace' - ; "RGB", FUNCTION T.Rgb - ; "RND", FUNCTION T.Rnd - ; "SAVEGAME", KEYWORD "SAVEGAME" - ; "SELACT", FUNCTION T.Selact - ; "$SELACT", IDENT "$SELACT" - ; "SELOBJ", IDENT "SELOBJ" - ; "$SELOBJ", IDENT "$SELOBJ" - ; "SET", SET - ; "SETTIMER", KEYWORD "SETTIMER" - ; "SHOWACTS", KEYWORD "SHOWACTS" - ; "SHOWINPUT", KEYWORD "SHOWINPUT" - ; "SHOWOBJS", KEYWORD "SHOWOBJS" - ; "SHOWSTAT", KEYWORD "SHOWSTAT" - ; "STATTXT", FUNCTION T.Stattxt - ; "$STATTXT", FUNCTION T.Stattxt' - ; "STR", FUNCTION T.Str - ; "$STR", FUNCTION T.Str' - ; "STRCOMP", FUNCTION T.Strcomp - ; "STRFIND", FUNCTION T.Strfind - ; "$STRFIND", FUNCTION T.Strfind' - ; "STRPOS", FUNCTION T.Strpos - ; "TRIM", FUNCTION T.Trim - ; "$TRIM", FUNCTION T.Trim' - ; "UCASE", FUNCTION T.Ucase - ; "$UCASE", FUNCTION T.Ucase' - ; "UNSEL", KEYWORD "UNSELECT" - ; "UNSELECT", KEYWORD "UNSELECT" - ; "USEHTML", IDENT "USEHTML" - ; "USERCOM", IDENT "USERCOM" - ; "$USERCOM", IDENT "$USERCOM" - ; "USER_TEXT", IDENT "USER_TEXT" - ; "$USER_TEXT", IDENT "USER_TEXT" - ; "USRTXT", IDENT "USER_TEXT" - ; "$USRTXT", IDENT "USER_TEXT" - ; "VAL", FUNCTION T.Val - ; "VIEW", KEYWORD "VIEW" - ; "WAIT", KEYWORD "WAIT" - ; "XGOTO", KEYWORD "XGOTO" - ; "XGT", KEYWORD "XGOTO" - ] - - let ident lexbuf id = - let id = String.uppercase_ascii id in - try - let value = Hashtbl.find keyword_table id in - let _ = match value with - | IF | ELIF -> incr_level lexbuf - | _ -> () in - value - - with Not_found -> IDENT id - - let wait_balance rule lexbuf = - try[@warning "-52"] - rule (Buffer.create 17) lexbuf - with - Failure "lexing: empty token" -> - let line = lexbuf.Lexing.lex_curr_p.pos_lnum - and content = Bytes.to_string lexbuf.Lexing.lex_buffer in - raise (UnclosedQuote {line; content}) - [@warning "+52"] -} - -let space = [ ' ' '\t' ] -let coma = ',' -let letters = [^ '!' ':' '&' '=' '<' '>' '+' '-' '*' '/' ',' '"' '\'' '(' ')' '[' ']' ' ' '\t' '\n' '\r' '{' '}'] -let digit = [ '0'-'9' ] -let eol = [ '\r' '\n' ] -let spaces = space+ -let ident = (letters # digit) letters* - -rule token = parse - -| '#' spaces* (('!' | '$' | '#' | '^')+)? (ident as loc) { - Lexing.set_filename lexbuf loc; - LOCATION_START loc} -| (digit+) as l { INTEGER l} -| '+' { PLUS } -| '-' { MINUS } -| "+=" { INCR } -| "-=" { DECR } -| "*=" { MULT_EQUAL } -| '/' { DIV } -| '*' { STAR } -| ':' { - (* We are leaving the block, the comment will be handled again *) - decr_level lexbuf; - COLUMN } -| '-' '-'+ [ ^ '\r' '\n']* { LOCATION_END } -| '[' { L_BRACKET } -| ']' { R_BRACKET } -| '(' { - incr_level lexbuf; - L_PAREN } -| ')' { - decr_level lexbuf; - R_PAREN } -| '<' { LT } -| '>' { GT } -| coma { COMA } -| eof { - Bucket.remove is_expression lexbuf; - EOF } -| '=' { - incr_level lexbuf; - EQUAL } -| ident as l { ident lexbuf l} -| eol { - Bucket.add is_expression lexbuf 0; - Lexing.new_line lexbuf; EOL } -| '&' { - Bucket.add is_expression lexbuf 0; - AMPERSAND } -| '!' { - match Bucket.find is_expression lexbuf with - | Some i when i <> 0 -> EXCLAMATION - | _ -> skip_comment lexbuf -} -| spaces { token lexbuf } -| '\'' { LITERAL (wait_balance read_quoted_string lexbuf) } -| '"' { LITERAL (wait_balance read_dquoted_string lexbuf) } -| '{' { LITERAL (wait_balance (read_long_string 0) lexbuf) } - -and skip_comment = parse -| [^ '\'' '"' '{' '\r' '\n' ] { skip_comment lexbuf } -| '{' { - let _ = wait_balance (read_long_string 0) lexbuf in - skip_comment lexbuf } -| '\'' { - let _ = wait_balance read_quoted_string lexbuf in - skip_comment lexbuf -} -| '"' { - let _ = wait_balance read_dquoted_string lexbuf in - skip_comment lexbuf -} -| eol { - (* Ugly hack used in order to put the eol in the front of the next - parsing. *) - lexbuf.lex_curr_pos <- lexbuf.lex_curr_pos - 1; - (*Lexing.new_line lexbuf;*) - COMMENT -} - -(* Read the content until we got another one quote *) -and read_quoted_string buf = parse - | "''" - { Buffer.add_char buf '\''; - read_quoted_string buf lexbuf - } - | [^ '\'' '\n' '\r']+ - { Buffer.add_string buf (Lexing.lexeme lexbuf); - read_quoted_string buf lexbuf - } - | '\'' - { (Buffer.contents buf) - } - | ['\n' '\r'] - { Buffer.add_string buf (Lexing.lexeme lexbuf); - Lexing.new_line lexbuf ; - read_quoted_string buf lexbuf - } - -and read_dquoted_string buf = parse - | [^ '"' ]+ - { Buffer.add_string buf (Lexing.lexeme lexbuf); - read_dquoted_string buf lexbuf - } - | "\"\"" - { Buffer.add_char buf '"'; - read_dquoted_string buf lexbuf - } - | '"' - { (Buffer.contents buf) - } - -and read_long_string level buf = parse - | [^ '{' '}' '\r' '\n' ]+ - { Buffer.add_string buf (Lexing.lexeme lexbuf); - read_long_string level buf lexbuf - } - | '{' - { Buffer.add_string buf (Lexing.lexeme lexbuf); - read_long_string (level + 1) buf lexbuf } - | '}' - { match level with - | 0 -> (Buffer.contents buf) - | _ -> Buffer.add_string buf (Lexing.lexeme lexbuf); - read_long_string (level - 1) buf lexbuf - } -| eol -{ - Lexing.new_line lexbuf; - Buffer.add_string buf (Lexing.lexeme lexbuf); - read_long_string level buf lexbuf -} |