{ open Tokens module T = Qsp_syntax.T exception UnclosedQuote of { content: string ; line : int} (* The comment system is terrible. The same symbol can be used for : - starting a comment - inequality operation In order to manage this, I try to identify the context in a very basic way, using a flag True False for determining the token to send. *) module Bucket = Ephemeron.K1.Bucket type bucket = (Lexing.lexbuf, int) Bucket.t let is_expression : bucket = Bucket.make () let incr_level lexbuf = match Bucket.find is_expression lexbuf with | None -> Bucket.add is_expression lexbuf 1 | Some v -> Bucket.add is_expression lexbuf (v+1) let decr_level lexbuf = match Bucket.find is_expression lexbuf with | None -> () | Some v -> if v > 1 then Bucket.add is_expression lexbuf (v-1) else Bucket.remove is_expression lexbuf let keyword_table = Hashtbl.create 53 let _ = List.iter (fun (kwd, tok) -> Hashtbl.add keyword_table kwd tok) [ "ACT", ACT ; "ADDLIB", KEYWORD "INCLIB" ; "ADDOBJ", KEYWORD "ADDOBJ" ; "ADD OBJ", KEYWORD "ADDOBJ" ; "ADDQST", KEYWORD "INCLIB" ; "AND", AND ; "ARRCOMP", FUNCTION T.Arrcomp ; "ARRPOS", FUNCTION T.Arrpos ; "ARRSIZE", FUNCTION T.Arrsize (* ; "BACKIMAGE", KEYWORD "BACKIMAGE" ; "$BACKIMAGE", KEYWORD "BACKIMAGE" ; "BCOLOR", KEYWORD "BCOLOR" *) ; "CLA", KEYWORD "CLA" ; "CLEAR", KEYWORD "CLEAR" ; "*CLEAR", KEYWORD "*CLEAR" ; "CLOSE", KEYWORD "CLOSE" ; "CLOSE ALL", KEYWORD "CLOSEALL" ; "CLR", KEYWORD "CLEAR" ; "*CLR", KEYWORD "*CLEAR" ; "CLS", KEYWORD "CLS" ; "CMDCLEAR", KEYWORD "CMDCLEAR" ; "CMDCLR", KEYWORD "CMDCLEAR" ; "COPYARR", KEYWORD "COPYARR" ; "COUNTOBJ", FUNCTION T.Countobj ; "CURACTS", IDENT "CURACTS" ; "$CURACTS", IDENT "$CURACTS" ; "CURLOC", IDENT "CURLOC" ; "$CURLOC", IDENT "$CURLOC" (* ; "DEBUG", KEYWORD "DEBUG" *) ; "DELACT", KEYWORD "DELACT" ; "DEL ACT", KEYWORD "DELACT" ; "DELLIB", KEYWORD "FREELIB" ; "DELOBJ", KEYWORD "DELOBJ" ; "DEL OBJ", KEYWORD "DELOBJ" ; "DESC", FUNCTION T.Desc ; "$DESC", FUNCTION T.Desc' (* ; "DISABLESCROLL", KEYWORD "DISABLESCROLL" ; "DISABLESUBEX", KEYWORD "DISABLESUBEX" *) ; "DYNAMIC", KEYWORD "DYNAMIC" ; "DYNEVAL", FUNCTION T.Dyneval ; "$DYNEVAL", FUNCTION T.Dyneval' ; "ELSE", ELSE ; "ELSEIF", ELIF ; "END", END ; "EXEC", KEYWORD "EXEC" ; "EXIT", KEYWORD "EXIT" (* ; "FCOLOR", KEYWORD "FCOLOR" ; "$FNAME", KEYWORD "$FNAME" *) ; "FREELIB", KEYWORD "FREELIB" (* ; "FSIZE", KEYWORD "FSIZE" *) ; "FUNC", FUNCTION T.Func ; "$FUNC", FUNCTION T.Func' ; "GETOBJ", FUNCTION T.Getobj ; "$GETOBJ", FUNCTION T.Getobj' ; "GOSUB", KEYWORD "GOSUB" ; "GOTO", KEYWORD "GOTO" (* ; "GC", KEYWORD "GC" *) ; "GS", KEYWORD "GOSUB" ; "GT", KEYWORD "GOTO" ; "IF", IF ; "IIF", FUNCTION T.Iif ; "$IIF", FUNCTION T.Iif' ; "INCLIB", KEYWORD "INCLIB" ; "INPUT", FUNCTION T.Input ; "$INPUT", FUNCTION T.Input' ; "INSTR", FUNCTION T.Instr ; "ISNUM", FUNCTION T.Isnum ; "ISPLAY", FUNCTION T.Isplay ; "JUMP", KEYWORD "JUMP" ; "KILLALL", KEYWORD "KILLALL" ; "KILLOBJ", KEYWORD "KILLOBJ" ; "KILLQST", KEYWORD "FREELIB" ; "KILLVAR", KEYWORD "KILLVAR" ; "LCASE", FUNCTION T.Lcase ; "$LCASE", FUNCTION T.Lcase' (* ; "LCOLOR", KEYWORD "LCOLOR" *) ; "LEN", FUNCTION T.Len ; "LET", LET ; "LOC", FUNCTION T.Loc ; "MAINTXT", IDENT "MAINTXT" ; "$MAINTXT", IDENT "MAINTXT" ; "MAX", FUNCTION T.Max ; "$MAX", FUNCTION T.Max' ; "MENU", KEYWORD "MENU" ; "MID", FUNCTION T.Mid ; "$MID", FUNCTION T.Mid' ; "MIN", FUNCTION T.Min ; "$MIN", FUNCTION T.Min' ; "MOD", MOD ; "MSECSCOUNT", FUNCTION T.Msecscount ; "MSG", KEYWORD "MSG" ; "NL", KEYWORD "NL" ; "*NL", KEYWORD "*NL" ; "NO", NO (* ; "NOSAVE", KEYWORD "NOSAVE" *) ; "OBJ", OBJ ; "$ONACTSEL", IDENT "$ONACTSEL" ; "$ONGLOAD", IDENT "$ONGLOAD" ; "$ONGSAVE", IDENT "$ONGSAVE" ; "$ONNEWLOC", IDENT "$ONNEWLOC" ; "$ONOBJADD", IDENT "$ONOBJADD" ; "$ONOBJDEL", IDENT "$ONOBJDEL" ; "$ONOBJSEL", IDENT "$ONOBJSEL" ; "OPENGAME", KEYWORD "OPENGAME" ; "OPENQST", KEYWORD "OPENQST" ; "OR", OR ; "P", KEYWORD "P" ; "*P", KEYWORD "*P" ; "PL", KEYWORD "PL" ; "*PL", KEYWORD "*PL" ; "PLAY", KEYWORD "PLAY" ; "QSPVER", FUNCTION T.Qspver ; "$QSPVER", FUNCTION T.Qspver' ; "RAND", FUNCTION T.Rand ; "REFINT", KEYWORD "REFINT" ; "REPLACE", FUNCTION T.Replace ; "$REPLACE", FUNCTION T.Replace' ; "RGB", FUNCTION T.Rgb ; "RND", FUNCTION T.Rnd ; "SAVEGAME", KEYWORD "SAVEGAME" ; "SELACT", FUNCTION T.Selact ; "$SELACT", IDENT "$SELACT" ; "SELOBJ", IDENT "SELOBJ" ; "$SELOBJ", IDENT "$SELOBJ" ; "SET", SET ; "SETTIMER", KEYWORD "SETTIMER" ; "SHOWACTS", KEYWORD "SHOWACTS" ; "SHOWINPUT", KEYWORD "SHOWINPUT" ; "SHOWOBJS", KEYWORD "SHOWOBJS" ; "SHOWSTAT", KEYWORD "SHOWSTAT" ; "STATTXT", FUNCTION T.Stattxt ; "$STATTXT", FUNCTION T.Stattxt' ; "STR", FUNCTION T.Str ; "$STR", FUNCTION T.Str' ; "STRCOMP", FUNCTION T.Strcomp ; "STRFIND", FUNCTION T.Strfind ; "$STRFIND", FUNCTION T.Strfind' ; "STRPOS", FUNCTION T.Strpos ; "TRIM", FUNCTION T.Trim ; "$TRIM", FUNCTION T.Trim' ; "UCASE", FUNCTION T.Ucase ; "$UCASE", FUNCTION T.Ucase' ; "UNSEL", KEYWORD "UNSELECT" ; "UNSELECT", KEYWORD "UNSELECT" ; "USEHTML", IDENT "USEHTML" ; "USERCOM", IDENT "USERCOM" ; "$USERCOM", IDENT "$USERCOM" ; "USER_TEXT", IDENT "USER_TEXT" ; "$USER_TEXT", IDENT "USER_TEXT" ; "USRTXT", IDENT "USER_TEXT" ; "$USRTXT", IDENT "USER_TEXT" ; "VAL", FUNCTION T.Val ; "VIEW", KEYWORD "VIEW" ; "WAIT", KEYWORD "WAIT" ; "XGOTO", KEYWORD "XGOTO" ; "XGT", KEYWORD "XGOTO" ] let ident lexbuf id = let id = String.uppercase_ascii id in try let value = Hashtbl.find keyword_table id in let _ = match value with | IF | ELIF -> incr_level lexbuf | _ -> () in value with Not_found -> IDENT id let wait_balance rule lexbuf = try[@warning "-52"] rule (Buffer.create 17) lexbuf with Failure "lexing: empty token" -> let line = lexbuf.Lexing.lex_curr_p.pos_lnum and content = Bytes.to_string lexbuf.Lexing.lex_buffer in raise (UnclosedQuote {line; content}) [@warning "+52"] } let space = [ ' ' '\t' ] let coma = ',' let letters = [^ '!' ':' '&' '=' '<' '>' '+' '-' '*' '/' ',' '"' '\'' '(' ')' '[' ']' ' ' '\t' '\n' '\r' '{' '}'] let digit = [ '0'-'9' ] let eol = [ '\r' '\n' ] let spaces = space+ let ident = (letters # digit) letters* rule token = parse | '#' spaces* (('!' | '$' | '#' | '^')+)? (ident as loc) { Lexing.set_filename lexbuf loc; LOCATION_START loc} | (digit+) as l { INTEGER l} | '+' { PLUS } | '-' { MINUS } | "+=" { INCR } | "-=" { DECR } | '/' { DIV } | '*' { STAR } | ':' { (* We are leaving the block, the comment will be handled again *) decr_level lexbuf; COLUMN } | '-' '-'+ [ ^ '\r' '\n']* { LOCATION_END } | '[' { L_BRACKET } | ']' { R_BRACKET } | '(' { incr_level lexbuf; L_PAREN } | ')' { decr_level lexbuf; R_PAREN } | '<' { LT } | '>' { GT } | coma { COMA } | eof { Bucket.remove is_expression lexbuf; EOF } | '=' { incr_level lexbuf; EQUAL } | ident as l { ident lexbuf l} | eol { Bucket.add is_expression lexbuf 0; Lexing.new_line lexbuf; EOL } | '&' { Bucket.add is_expression lexbuf 0; AMPERSAND } | '!' { match Bucket.find is_expression lexbuf with | Some i when i <> 0 -> EXCLAMATION | _ -> skip_comment lexbuf } | spaces { token lexbuf } | '\'' { LITERAL (wait_balance read_quoted_string lexbuf) } | '"' { LITERAL (wait_balance read_dquoted_string lexbuf) } | '{' { LITERAL (wait_balance (read_long_string 0) lexbuf) } and skip_comment = parse | [^ '\'' '"' '{' '\r' '\n' ] { skip_comment lexbuf } | '{' { let _ = wait_balance (read_long_string 0) lexbuf in skip_comment lexbuf } | '\'' { let _ = wait_balance read_quoted_string lexbuf in skip_comment lexbuf } | '"' { let _ = wait_balance read_dquoted_string lexbuf in skip_comment lexbuf } | eol { (* Ugly hack used in order to put the eol in the front of the next parsing. *) lexbuf.lex_curr_pos <- lexbuf.lex_curr_pos - 1; (*Lexing.new_line lexbuf;*) COMMENT } (* Read the content until we got another one quote *) and read_quoted_string buf = parse | "''" { Buffer.add_char buf '\''; read_quoted_string buf lexbuf } | [^ '\'' '\n' '\r']+ { Buffer.add_string buf (Lexing.lexeme lexbuf); read_quoted_string buf lexbuf } | '\'' { (Buffer.contents buf) } | ['\n' '\r'] { Buffer.add_string buf (Lexing.lexeme lexbuf); Lexing.new_line lexbuf ; read_quoted_string buf lexbuf } and read_dquoted_string buf = parse | [^ '"' ]+ { Buffer.add_string buf (Lexing.lexeme lexbuf); read_dquoted_string buf lexbuf } | "\"\"" { Buffer.add_char buf '"'; read_dquoted_string buf lexbuf } | '"' { (Buffer.contents buf) } and read_long_string level buf = parse | [^ '{' '}' '\r' '\n' ]+ { Buffer.add_string buf (Lexing.lexeme lexbuf); read_long_string level buf lexbuf } | '{' { Buffer.add_string buf (Lexing.lexeme lexbuf); read_long_string (level + 1) buf lexbuf } | '}' { match level with | 0 -> (Buffer.contents buf) | _ -> Buffer.add_string buf (Lexing.lexeme lexbuf); read_long_string (level - 1) buf lexbuf } | eol { Lexing.new_line lexbuf; Buffer.add_string buf (Lexing.lexeme lexbuf); read_long_string level buf lexbuf }