aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bin/dune1
-rw-r--r--bin/qsp_parser.ml3
-rw-r--r--lib/UTF16.ml201
-rw-r--r--lib/analyzer.ml4
-rw-r--r--lib/analyzer.mli2
-rw-r--r--lib/dune10
-rw-r--r--lib/idents.ml185
-rw-r--r--lib/interpreter.ml32
-rw-r--r--lib/lexer.mll362
-rw-r--r--test/dune1
-rw-r--r--test/qsp_parser_test.ml10
11 files changed, 421 insertions, 390 deletions
diff --git a/bin/dune b/bin/dune
index 9702e66..f7aacc3 100644
--- a/bin/dune
+++ b/bin/dune
@@ -2,6 +2,7 @@
(public_name qsp_parser)
(name qsp_parser)
(libraries
+ sedlex
qsp_syntax
qparser)
diff --git a/bin/qsp_parser.ml b/bin/qsp_parser.ml
index af12abe..4ed2ba7 100644
--- a/bin/qsp_parser.ml
+++ b/bin/qsp_parser.ml
@@ -43,7 +43,8 @@ let () =
let file_name = List.hd file_names in
let ic = Stdlib.open_in file_name in
- let lexer = Lexing.from_channel ~with_positions:true ic in
+ (*let lexer = Lexing.from_channel ~with_positions:true ic in*)
+ let lexer = Sedlexing.Utf8.from_channel ic in
let result = Qparser.Analyzer.parse (module Qsp_syntax.Type_of) lexer in
match result with
| Ok f -> (
diff --git a/lib/UTF16.ml b/lib/UTF16.ml
new file mode 100644
index 0000000..bdc48c7
--- /dev/null
+++ b/lib/UTF16.ml
@@ -0,0 +1,201 @@
+(**
+ Lexer using sedlex
+ *)
+
+open Tokens
+
+exception UnclosedQuote of { content : string; line : int }
+exception LexError of Lexing.position * string
+
+let pp_pos out { Lexing.pos_lnum; pos_cnum; pos_bol; _ } =
+ Format.fprintf out "line %d:%d" pos_lnum (pos_cnum - pos_bol)
+
+(* The comment system is terrible. The same symbol can be used for :
+ - starting a comment
+ - inequality operation
+ In order to manage this, I try to identify the context in a very basic way,
+ using a flag True False for determining the token to send.
+*)
+module Bucket = Ephemeron.K1.Bucket
+
+type bucket = (Sedlexing.lexbuf, int) Bucket.t
+
+let is_expression : bucket = Bucket.make ()
+
+let incr_level lexbuf =
+ match Bucket.find is_expression lexbuf with
+ | None -> Bucket.add is_expression lexbuf 1
+ | Some v -> Bucket.add is_expression lexbuf (v + 1)
+
+let decr_level lexbuf =
+ match Bucket.find is_expression lexbuf with
+ | None -> ()
+ | Some v ->
+ if v > 1 then Bucket.add is_expression lexbuf (v - 1)
+ else Bucket.remove is_expression lexbuf
+
+let build_ident lexbuf =
+ let id =
+ Sedlexing.lexeme lexbuf |> Idents.of_uchars |> String.uppercase_ascii
+ in
+ try
+ let value = Hashtbl.find Idents.keyword_table id in
+ let _ = match value with IF | ELIF -> incr_level lexbuf | _ -> () in
+ value
+ with Not_found -> IDENT id
+
+let incr_level lexbuf =
+ match Bucket.find is_expression lexbuf with
+ | None -> Bucket.add is_expression lexbuf 1
+ | Some v -> Bucket.add is_expression lexbuf (v + 1)
+
+let wait_balance rule lexbuf =
+ try[@warning "-52"] rule (Buffer.create 17) lexbuf
+ with Failure "lexing: empty token" ->
+ let position, _ = Sedlexing.lexing_positions lexbuf in
+ let line = position.Lexing.pos_lnum
+ and content = Sedlexing.lexeme lexbuf |> Idents.of_uchars in
+ (raise (UnclosedQuote { line; content }) [@warning "+52"])
+
+let space = [%sedlex.regexp? ' ' | '\t']
+let eol = [%sedlex.regexp? '\r' | '\n' | "\r\n"]
+let coma = [%sedlex.regexp? ',']
+let digit = [%sedlex.regexp? '0' .. '9']
+let letters = [%sedlex.regexp? 'a' .. 'z' | 'A' .. 'Z' | '_']
+let spaces = [%sedlex.regexp? Plus space]
+let ident = [%sedlex.regexp? ('$' | letters), Star (digit | letters)]
+let location_ident = [%sedlex.regexp? letters | digit]
+let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^']
+let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident]
+
+let rec read_long_string level buf lexbuf =
+ match%sedlex lexbuf with
+ | '{' ->
+ Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
+ read_long_string (level + 1) buf lexbuf
+ | '}' -> (
+ match level with
+ | 0 -> Buffer.contents buf
+ | _ ->
+ Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
+ read_long_string (level - 1) buf lexbuf)
+ | eol ->
+ Sedlexing.new_line lexbuf;
+ Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
+ read_long_string level buf lexbuf
+ | any ->
+ Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
+ read_long_string level buf lexbuf
+ | _ -> raise Not_found
+
+let rec read_dquoted_string buf lexbuf =
+ match%sedlex lexbuf with
+ | "\"\"" ->
+ Buffer.add_char buf '"';
+ read_dquoted_string buf lexbuf
+ | '"' -> Buffer.contents buf
+ | any ->
+ Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
+ read_dquoted_string buf lexbuf
+ | _ -> raise Not_found
+
+let rec read_quoted_string buf lexbuf =
+ match%sedlex lexbuf with
+ | "''" ->
+ Buffer.add_char buf '\'';
+ read_quoted_string buf lexbuf
+ | '\'' -> Buffer.contents buf
+ | eol ->
+ Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
+ Sedlexing.new_line lexbuf;
+ read_quoted_string buf lexbuf
+ | any ->
+ Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
+ read_quoted_string buf lexbuf
+ | _ -> raise Not_found
+
+let rec skip_comment lexbuf =
+ match%sedlex lexbuf with
+ | '{' ->
+ let _ = wait_balance (read_long_string 0) lexbuf in
+ skip_comment lexbuf
+ | '\'' ->
+ let _ = wait_balance read_quoted_string lexbuf in
+ skip_comment lexbuf
+ | '"' ->
+ let _ = wait_balance read_dquoted_string lexbuf in
+ skip_comment lexbuf
+ | eol ->
+ (* Ugly hack used in order to put the eol in the front of the next
+ parsing. *)
+ Sedlexing.rollback lexbuf;
+ COMMENT
+ | any -> skip_comment lexbuf
+ | _ -> raise Not_found
+
+let rec token lexbuf =
+ match%sedlex lexbuf with
+ | 0Xfeff ->
+ (* Ignore the BOM *)
+ token lexbuf
+ | '#', Star space, location ->
+ let ident = Idents.of_uchars (Sedlexing.lexeme lexbuf) in
+
+ LOCATION_START ident
+ | Plus digit -> INTEGER (Sedlexing.Utf8.lexeme lexbuf)
+ | '+' -> PLUS
+ | '-' -> MINUS
+ | "+=" -> INCR
+ | "-=" -> DECR
+ | "*=" -> MULT_EQUAL
+ | '/' -> DIV
+ | '*' -> STAR
+ | ':' ->
+ (* We are leaving the block, the comment will be handled again *)
+ decr_level lexbuf;
+ COLUMN
+ | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) -> LOCATION_END
+ | '[' -> L_BRACKET
+ | ']' -> R_BRACKET
+ | '(' ->
+ incr_level lexbuf;
+ L_PAREN
+ | ')' ->
+ decr_level lexbuf;
+ R_PAREN
+ | '<' -> LT
+ | '>' -> GT
+ | coma -> COMA
+ | eof ->
+ Bucket.remove is_expression lexbuf;
+ EOF
+ | '=' ->
+ incr_level lexbuf;
+ EQUAL
+ | ident -> build_ident lexbuf
+ | eol ->
+ Bucket.add is_expression lexbuf 0;
+ Sedlexing.new_line lexbuf;
+ EOL
+ | '&' ->
+ Bucket.add is_expression lexbuf 0;
+ AMPERSAND
+ | '!' -> (
+ match Bucket.find is_expression lexbuf with
+ | Some i when i <> 0 -> EXCLAMATION
+ | _ -> skip_comment lexbuf)
+ | spaces -> token lexbuf
+ | '\'' -> LITERAL (wait_balance read_quoted_string lexbuf)
+ | '"' -> LITERAL (wait_balance read_dquoted_string lexbuf)
+ | '{' -> LITERAL (wait_balance (read_long_string 0) lexbuf)
+ | _ ->
+ let position = fst @@ Sedlexing.lexing_positions lexbuf in
+ let tok = Sedlexing.Utf16.lexeme lexbuf Little_endian false in
+
+ let msg =
+ Format.asprintf "Unexpected character %S at %a" tok pp_pos position
+ in
+
+ raise @@ LexError (position, msg)
+
+let lexer buf = Sedlexing.with_tokenizer token buf
diff --git a/lib/analyzer.ml b/lib/analyzer.ml
index f0f8ca5..fe6ae90 100644
--- a/lib/analyzer.ml
+++ b/lib/analyzer.ml
@@ -23,14 +23,14 @@ let format_error : Format.formatter -> error -> unit =
*)
let parse :
(module Qsp_syntax.S.Analyzer with type Location.repr = 'a) ->
- Lexing.lexbuf ->
+ Sedlexing.lexbuf ->
('a, error) Result.t =
fun (type a) (module S : Qsp_syntax.S.Analyzer with type Location.repr = a) ->
let module Parser = Parser.Make (S) in
let module IncrementalParser =
Interpreter.Interpreter (Parser.MenhirInterpreter) in
fun lexbuf ->
- IncrementalParser.of_lexbuf lexbuf Lexer.token Parser.Incremental.main
+ IncrementalParser.of_lexbuf lexbuf UTF16.lexer Parser.Incremental.main
|> Result.map_error (fun e ->
let message =
match e.Interpreter.code with
diff --git a/lib/analyzer.mli b/lib/analyzer.mli
index 3032375..02d7b47 100644
--- a/lib/analyzer.mli
+++ b/lib/analyzer.mli
@@ -8,5 +8,5 @@ val format_error : Format.formatter -> error -> unit
val parse :
(module Qsp_syntax.S.Analyzer with type Location.repr = 'a) ->
- Lexing.lexbuf ->
+ Sedlexing.lexbuf ->
('a, error) Result.t
diff --git a/lib/dune b/lib/dune
index c88994d..5717f90 100644
--- a/lib/dune
+++ b/lib/dune
@@ -2,10 +2,11 @@
(name qparser)
(libraries
qsp_syntax
- menhirLib)
+ menhirLib
+ )
(preprocess (pps
- ppx_deriving.show
- ppx_deriving.eq ))
+ sedlex.ppx
+ ))
)
@@ -25,6 +26,3 @@
(flags --table --external-tokens Tokens)
(merge_into parser)
)
-
-(ocamllex lexer)
-
diff --git a/lib/idents.ml b/lib/idents.ml
new file mode 100644
index 0000000..dab0ba0
--- /dev/null
+++ b/lib/idents.ml
@@ -0,0 +1,185 @@
+open Tokens
+module T = Qsp_syntax.T
+
+let keyword_table = Hashtbl.create 53
+
+let char_of_uchar : Uchar.t -> char =
+ fun u -> match Uchar.is_char u with true -> Uchar.to_char u | _ -> '?'
+
+let of_uchars : Uchar.t array -> string =
+ fun arr -> Array.to_seq arr |> Seq.map char_of_uchar |> String.of_seq
+
+let _ =
+ List.iter
+ (fun (kwd, tok) -> Hashtbl.add keyword_table kwd tok)
+ [
+ ("ACT", ACT);
+ ("ADDLIB", KEYWORD "INCLIB");
+ ("ADDOBJ", KEYWORD "ADDOBJ");
+ ("ADD OBJ", KEYWORD "ADDOBJ");
+ ("ADDQST", KEYWORD "INCLIB");
+ ("AND", AND);
+ ("ARRCOMP", FUNCTION T.Arrcomp);
+ ("ARRPOS", FUNCTION T.Arrpos);
+ ("ARRSIZE", FUNCTION T.Arrsize)
+ (*
+ ; "BACKIMAGE", KEYWORD "BACKIMAGE"
+ ; "$BACKIMAGE", KEYWORD "BACKIMAGE"
+ ; "BCOLOR", KEYWORD "BCOLOR" *);
+ ("CLA", KEYWORD "CLA");
+ ("CLEAR", KEYWORD "CLEAR");
+ ("*CLEAR", KEYWORD "*CLEAR");
+ ("CLOSE", KEYWORD "CLOSE");
+ ("CLOSE ALL", KEYWORD "CLOSEALL");
+ ("CLR", KEYWORD "CLEAR");
+ ("*CLR", KEYWORD "*CLEAR");
+ ("CLS", KEYWORD "CLS");
+ ("CMDCLEAR", KEYWORD "CMDCLEAR");
+ ("CMDCLR", KEYWORD "CMDCLEAR");
+ ("COPYARR", KEYWORD "COPYARR");
+ ("COUNTOBJ", FUNCTION T.Countobj);
+ ("CURACTS", IDENT "CURACTS");
+ ("$CURACTS", IDENT "$CURACTS");
+ ("CURLOC", IDENT "CURLOC");
+ ("$CURLOC", IDENT "$CURLOC")
+ (*
+ ; "DEBUG", KEYWORD "DEBUG"
+*);
+ ("DELACT", KEYWORD "DELACT");
+ ("DEL ACT", KEYWORD "DELACT");
+ ("DELLIB", KEYWORD "FREELIB");
+ ("DELOBJ", KEYWORD "DELOBJ");
+ ("DEL OBJ", KEYWORD "DELOBJ");
+ ("DESC", FUNCTION T.Desc);
+ ("$DESC", FUNCTION T.Desc')
+ (*
+ ; "DISABLESCROLL", KEYWORD "DISABLESCROLL"
+ ; "DISABLESUBEX", KEYWORD "DISABLESUBEX"
+*);
+ ("DYNAMIC", KEYWORD "DYNAMIC");
+ ("DYNEVAL", FUNCTION T.Dyneval);
+ ("$DYNEVAL", FUNCTION T.Dyneval');
+ ("ELSE", ELSE);
+ ("ELSEIF", ELIF);
+ ("END", END);
+ ("EXEC", KEYWORD "EXEC");
+ ("EXIT", KEYWORD "EXIT")
+ (*
+ ; "FCOLOR", KEYWORD "FCOLOR"
+ ; "$FNAME", KEYWORD "$FNAME"
+*);
+ ("FREELIB", KEYWORD "FREELIB")
+ (*
+ ; "FSIZE", KEYWORD "FSIZE"
+*);
+ ("FUNC", FUNCTION T.Func);
+ ("$FUNC", FUNCTION T.Func');
+ ("GETOBJ", FUNCTION T.Getobj);
+ ("$GETOBJ", FUNCTION T.Getobj');
+ ("GOSUB", KEYWORD "GOSUB");
+ ("GOTO", KEYWORD "GOTO") (*
+ ; "GC", KEYWORD "GC"
+*);
+ ("GS", KEYWORD "GOSUB");
+ ("GT", KEYWORD "GOTO");
+ ("IF", IF);
+ ("IIF", FUNCTION T.Iif);
+ ("$IIF", FUNCTION T.Iif');
+ ("INCLIB", KEYWORD "INCLIB");
+ ("INPUT", FUNCTION T.Input);
+ ("$INPUT", FUNCTION T.Input');
+ ("INSTR", FUNCTION T.Instr);
+ ("ISNUM", FUNCTION T.Isnum);
+ ("ISPLAY", FUNCTION T.Isplay);
+ ("JUMP", KEYWORD "JUMP");
+ ("KILLALL", KEYWORD "KILLALL");
+ ("KILLOBJ", KEYWORD "KILLOBJ");
+ ("KILLQST", KEYWORD "FREELIB");
+ ("KILLVAR", KEYWORD "KILLVAR");
+ ("LCASE", FUNCTION T.Lcase);
+ ("$LCASE", FUNCTION T.Lcase')
+ (*
+ ; "LCOLOR", KEYWORD "LCOLOR"
+*);
+ ("LEN", FUNCTION T.Len);
+ ("LET", LET);
+ ("LOC", FUNCTION T.Loc);
+ ("MAINTXT", IDENT "MAINTXT");
+ ("$MAINTXT", IDENT "MAINTXT");
+ ("MAX", FUNCTION T.Max);
+ ("$MAX", FUNCTION T.Max');
+ ("MENU", KEYWORD "MENU");
+ ("MID", FUNCTION T.Mid);
+ ("$MID", FUNCTION T.Mid');
+ ("MIN", FUNCTION T.Min);
+ ("$MIN", FUNCTION T.Min');
+ ("MOD", MOD);
+ ("MSECSCOUNT", FUNCTION T.Msecscount);
+ ("MSG", KEYWORD "MSG");
+ ("NL", KEYWORD "NL");
+ ("*NL", KEYWORD "*NL");
+ ("NO", NO) (*
+ ; "NOSAVE", KEYWORD "NOSAVE"
+*);
+ ("OBJ", OBJ);
+ ("$ONACTSEL", IDENT "$ONACTSEL");
+ ("$ONGLOAD", IDENT "$ONGLOAD");
+ ("$ONGSAVE", IDENT "$ONGSAVE");
+ ("$ONNEWLOC", IDENT "$ONNEWLOC");
+ ("$ONOBJADD", IDENT "$ONOBJADD");
+ ("$ONOBJDEL", IDENT "$ONOBJDEL");
+ ("$ONOBJSEL", IDENT "$ONOBJSEL");
+ ("OPENGAME", KEYWORD "OPENGAME");
+ ("OPENQST", KEYWORD "OPENQST");
+ ("OR", OR);
+ ("P", KEYWORD "P");
+ ("*P", KEYWORD "*P");
+ ("PL", KEYWORD "PL");
+ ("*PL", KEYWORD "*PL");
+ ("PLAY", KEYWORD "PLAY");
+ ("QSPVER", FUNCTION T.Qspver);
+ ("$QSPVER", FUNCTION T.Qspver');
+ ("RAND", FUNCTION T.Rand);
+ ("REFINT", KEYWORD "REFINT");
+ ("REPLACE", FUNCTION T.Replace);
+ ("$REPLACE", FUNCTION T.Replace');
+ ("RGB", FUNCTION T.Rgb);
+ ("RND", FUNCTION T.Rnd);
+ ("SAVEGAME", KEYWORD "SAVEGAME");
+ ("SELACT", FUNCTION T.Selact);
+ ("$SELACT", IDENT "$SELACT");
+ ("SELOBJ", IDENT "SELOBJ");
+ ("$SELOBJ", IDENT "$SELOBJ");
+ ("SET", SET);
+ ("SETTIMER", KEYWORD "SETTIMER");
+ ("SHOWACTS", KEYWORD "SHOWACTS");
+ ("SHOWINPUT", KEYWORD "SHOWINPUT");
+ ("SHOWOBJS", KEYWORD "SHOWOBJS");
+ ("SHOWSTAT", KEYWORD "SHOWSTAT");
+ ("STATTXT", FUNCTION T.Stattxt);
+ ("$STATTXT", FUNCTION T.Stattxt');
+ ("STR", FUNCTION T.Str);
+ ("$STR", FUNCTION T.Str');
+ ("STRCOMP", FUNCTION T.Strcomp);
+ ("STRFIND", FUNCTION T.Strfind);
+ ("$STRFIND", FUNCTION T.Strfind');
+ ("STRPOS", FUNCTION T.Strpos);
+ ("TRIM", FUNCTION T.Trim);
+ ("$TRIM", FUNCTION T.Trim');
+ ("UCASE", FUNCTION T.Ucase);
+ ("$UCASE", FUNCTION T.Ucase');
+ ("UNSEL", KEYWORD "UNSELECT");
+ ("UNSELECT", KEYWORD "UNSELECT");
+ ("USEHTML", IDENT "USEHTML");
+ ("USERCOM", IDENT "USERCOM");
+ ("$USERCOM", IDENT "$USERCOM");
+ ("USER_TEXT", IDENT "USER_TEXT");
+ ("$USER_TEXT", IDENT "USER_TEXT");
+ ("USRTXT", IDENT "USER_TEXT");
+ ("$USRTXT", IDENT "USER_TEXT");
+ ("VAL", FUNCTION T.Val);
+ ("VIEW", KEYWORD "VIEW");
+ ("WAIT", KEYWORD "WAIT");
+ ("XGOTO", KEYWORD "XGOTO");
+ ("XGT", KEYWORD "XGOTO");
+ ]
diff --git a/lib/interpreter.ml b/lib/interpreter.ml
index b41d74e..21c1430 100644
--- a/lib/interpreter.ml
+++ b/lib/interpreter.ml
@@ -22,26 +22,27 @@ struct
module E = MenhirLib.ErrorReports
module L = MenhirLib.LexerUtil
- let range_message start_pos end_pos code = { code; start_pos; end_pos }
+ type step = MI.token * Lexing.position * Lexing.position
- let get_parse_error default_position env : error =
+ let range_message (start_pos, end_pos) code = { code; start_pos; end_pos }
+
+ let get_parse_error lexbuf env : error =
match MI.stack env with
| (lazy Nil) ->
- range_message default_position.Lexing.lex_start_p
- default_position.Lexing.lex_curr_p InvalidSyntax
+ let positions = Sedlexing.lexing_positions lexbuf in
+ range_message positions InvalidSyntax
| (lazy (Cons (MI.Element (state, _, start_pos, end_pos), _))) ->
- range_message start_pos end_pos (MenhirCode (MI.number state))
+ range_message (start_pos, end_pos) (MenhirCode (MI.number state))
let rec _parse :
- (Lexing.lexbuf -> MI.token) ->
- Lexing.lexbuf ->
+ (Sedlexing.lexbuf -> unit -> step) ->
+ Sedlexing.lexbuf ->
'a MI.checkpoint ->
('a, error) Result.t =
- fun get_token (lexbuf : Lexing.lexbuf) (checkpoint : 'a MI.checkpoint) ->
+ fun get_token (lexbuf : Sedlexing.lexbuf) (checkpoint : 'a MI.checkpoint) ->
match checkpoint with
| MI.InputNeeded _env ->
- let token = get_token lexbuf in
- let startp = lexbuf.Lexing.lex_start_p and endp = lexbuf.lex_curr_p in
+ let token, startp, endp = get_token lexbuf () in
let checkpoint = MI.offer checkpoint (token, startp, endp) in
_parse get_token lexbuf checkpoint
| MI.Shifting _ | MI.AboutToReduce _ ->
@@ -52,19 +53,18 @@ struct
Error err
| MI.Accepted v -> Ok v
| MI.Rejected ->
- let err =
- range_message lexbuf.lex_start_p lexbuf.lex_curr_p InvalidSyntax
- in
+ let positions = Sedlexing.lexing_positions lexbuf in
+ let err = range_message positions InvalidSyntax in
Error err
type 'a builder = Lexing.position -> 'a MI.checkpoint
let of_lexbuf :
- Lexing.lexbuf ->
- (Lexing.lexbuf -> MI.token) ->
+ Sedlexing.lexbuf ->
+ (Sedlexing.lexbuf -> unit -> step) ->
'a builder ->
('a, error) result =
fun lexbuf lexer f ->
- let init = f lexbuf.lex_curr_p in
+ let init = f (fst (Sedlexing.lexing_positions lexbuf)) in
_parse lexer lexbuf init
end
diff --git a/lib/lexer.mll b/lib/lexer.mll
deleted file mode 100644
index d3d5e5b..0000000
--- a/lib/lexer.mll
+++ /dev/null
@@ -1,362 +0,0 @@
-{
- open Tokens
- module T = Qsp_syntax.T
-
- exception UnclosedQuote of { content: string ; line : int}
-
- (* The comment system is terrible. The same symbol can be used for :
- - starting a comment
- - inequality operation
- In order to manage this, I try to identify the context in a very basic way,
- using a flag True False for determining the token to send.
- *)
- module Bucket = Ephemeron.K1.Bucket
- type bucket = (Lexing.lexbuf, int) Bucket.t
- let is_expression : bucket = Bucket.make ()
-
- let incr_level lexbuf =
- match Bucket.find is_expression lexbuf with
- | None -> Bucket.add is_expression lexbuf 1
- | Some v -> Bucket.add is_expression lexbuf (v+1)
-
- let decr_level lexbuf =
- match Bucket.find is_expression lexbuf with
- | None -> ()
- | Some v ->
- if v > 1 then
- Bucket.add is_expression lexbuf (v-1)
- else
- Bucket.remove is_expression lexbuf
-
- let keyword_table = Hashtbl.create 53
- let _ =
- List.iter (fun (kwd, tok) -> Hashtbl.add keyword_table kwd tok)
- [ "ACT", ACT
- ; "ADDLIB", KEYWORD "INCLIB"
- ; "ADDOBJ", KEYWORD "ADDOBJ"
- ; "ADD OBJ", KEYWORD "ADDOBJ"
- ; "ADDQST", KEYWORD "INCLIB"
- ; "AND", AND
- ; "ARRCOMP", FUNCTION T.Arrcomp
- ; "ARRPOS", FUNCTION T.Arrpos
- ; "ARRSIZE", FUNCTION T.Arrsize
-(*
- ; "BACKIMAGE", KEYWORD "BACKIMAGE"
- ; "$BACKIMAGE", KEYWORD "BACKIMAGE"
- ; "BCOLOR", KEYWORD "BCOLOR"
-*)
- ; "CLA", KEYWORD "CLA"
- ; "CLEAR", KEYWORD "CLEAR"
- ; "*CLEAR", KEYWORD "*CLEAR"
- ; "CLOSE", KEYWORD "CLOSE"
- ; "CLOSE ALL", KEYWORD "CLOSEALL"
- ; "CLR", KEYWORD "CLEAR"
- ; "*CLR", KEYWORD "*CLEAR"
- ; "CLS", KEYWORD "CLS"
- ; "CMDCLEAR", KEYWORD "CMDCLEAR"
- ; "CMDCLR", KEYWORD "CMDCLEAR"
- ; "COPYARR", KEYWORD "COPYARR"
- ; "COUNTOBJ", FUNCTION T.Countobj
- ; "CURACTS", IDENT "CURACTS"
- ; "$CURACTS", IDENT "$CURACTS"
- ; "CURLOC", IDENT "CURLOC"
- ; "$CURLOC", IDENT "$CURLOC"
-(*
- ; "DEBUG", KEYWORD "DEBUG"
-*)
- ; "DELACT", KEYWORD "DELACT"
- ; "DEL ACT", KEYWORD "DELACT"
- ; "DELLIB", KEYWORD "FREELIB"
- ; "DELOBJ", KEYWORD "DELOBJ"
- ; "DEL OBJ", KEYWORD "DELOBJ"
- ; "DESC", FUNCTION T.Desc
- ; "$DESC", FUNCTION T.Desc'
-(*
- ; "DISABLESCROLL", KEYWORD "DISABLESCROLL"
- ; "DISABLESUBEX", KEYWORD "DISABLESUBEX"
-*)
- ; "DYNAMIC", KEYWORD "DYNAMIC"
- ; "DYNEVAL", FUNCTION T.Dyneval
- ; "$DYNEVAL", FUNCTION T.Dyneval'
- ; "ELSE", ELSE
- ; "ELSEIF", ELIF
- ; "END", END
- ; "EXEC", KEYWORD "EXEC"
- ; "EXIT", KEYWORD "EXIT"
-(*
- ; "FCOLOR", KEYWORD "FCOLOR"
- ; "$FNAME", KEYWORD "$FNAME"
-*)
- ; "FREELIB", KEYWORD "FREELIB"
-(*
- ; "FSIZE", KEYWORD "FSIZE"
-*)
- ; "FUNC", FUNCTION T.Func
- ; "$FUNC", FUNCTION T.Func'
- ; "GETOBJ", FUNCTION T.Getobj
- ; "$GETOBJ", FUNCTION T.Getobj'
- ; "GOSUB", KEYWORD "GOSUB"
- ; "GOTO", KEYWORD "GOTO"
-(*
- ; "GC", KEYWORD "GC"
-*)
- ; "GS", KEYWORD "GOSUB"
- ; "GT", KEYWORD "GOTO"
- ; "IF", IF
- ; "IIF", FUNCTION T.Iif
- ; "$IIF", FUNCTION T.Iif'
- ; "INCLIB", KEYWORD "INCLIB"
- ; "INPUT", FUNCTION T.Input
- ; "$INPUT", FUNCTION T.Input'
- ; "INSTR", FUNCTION T.Instr
- ; "ISNUM", FUNCTION T.Isnum
- ; "ISPLAY", FUNCTION T.Isplay
- ; "JUMP", KEYWORD "JUMP"
- ; "KILLALL", KEYWORD "KILLALL"
- ; "KILLOBJ", KEYWORD "KILLOBJ"
- ; "KILLQST", KEYWORD "FREELIB"
- ; "KILLVAR", KEYWORD "KILLVAR"
- ; "LCASE", FUNCTION T.Lcase
- ; "$LCASE", FUNCTION T.Lcase'
-(*
- ; "LCOLOR", KEYWORD "LCOLOR"
-*)
- ; "LEN", FUNCTION T.Len
- ; "LET", LET
- ; "LOC", FUNCTION T.Loc
- ; "MAINTXT", IDENT "MAINTXT"
- ; "$MAINTXT", IDENT "MAINTXT"
- ; "MAX", FUNCTION T.Max
- ; "$MAX", FUNCTION T.Max'
- ; "MENU", KEYWORD "MENU"
- ; "MID", FUNCTION T.Mid
- ; "$MID", FUNCTION T.Mid'
- ; "MIN", FUNCTION T.Min
- ; "$MIN", FUNCTION T.Min'
- ; "MOD", MOD
- ; "MSECSCOUNT", FUNCTION T.Msecscount
- ; "MSG", KEYWORD "MSG"
- ; "NL", KEYWORD "NL"
- ; "*NL", KEYWORD "*NL"
- ; "NO", NO
-(*
- ; "NOSAVE", KEYWORD "NOSAVE"
-*)
- ; "OBJ", OBJ
- ; "$ONACTSEL", IDENT "$ONACTSEL"
- ; "$ONGLOAD", IDENT "$ONGLOAD"
- ; "$ONGSAVE", IDENT "$ONGSAVE"
- ; "$ONNEWLOC", IDENT "$ONNEWLOC"
- ; "$ONOBJADD", IDENT "$ONOBJADD"
- ; "$ONOBJDEL", IDENT "$ONOBJDEL"
- ; "$ONOBJSEL", IDENT "$ONOBJSEL"
- ; "OPENGAME", KEYWORD "OPENGAME"
- ; "OPENQST", KEYWORD "OPENQST"
- ; "OR", OR
- ; "P", KEYWORD "P"
- ; "*P", KEYWORD "*P"
- ; "PL", KEYWORD "PL"
- ; "*PL", KEYWORD "*PL"
- ; "PLAY", KEYWORD "PLAY"
- ; "QSPVER", FUNCTION T.Qspver
- ; "$QSPVER", FUNCTION T.Qspver'
- ; "RAND", FUNCTION T.Rand
- ; "REFINT", KEYWORD "REFINT"
- ; "REPLACE", FUNCTION T.Replace
- ; "$REPLACE", FUNCTION T.Replace'
- ; "RGB", FUNCTION T.Rgb
- ; "RND", FUNCTION T.Rnd
- ; "SAVEGAME", KEYWORD "SAVEGAME"
- ; "SELACT", FUNCTION T.Selact
- ; "$SELACT", IDENT "$SELACT"
- ; "SELOBJ", IDENT "SELOBJ"
- ; "$SELOBJ", IDENT "$SELOBJ"
- ; "SET", SET
- ; "SETTIMER", KEYWORD "SETTIMER"
- ; "SHOWACTS", KEYWORD "SHOWACTS"
- ; "SHOWINPUT", KEYWORD "SHOWINPUT"
- ; "SHOWOBJS", KEYWORD "SHOWOBJS"
- ; "SHOWSTAT", KEYWORD "SHOWSTAT"
- ; "STATTXT", FUNCTION T.Stattxt
- ; "$STATTXT", FUNCTION T.Stattxt'
- ; "STR", FUNCTION T.Str
- ; "$STR", FUNCTION T.Str'
- ; "STRCOMP", FUNCTION T.Strcomp
- ; "STRFIND", FUNCTION T.Strfind
- ; "$STRFIND", FUNCTION T.Strfind'
- ; "STRPOS", FUNCTION T.Strpos
- ; "TRIM", FUNCTION T.Trim
- ; "$TRIM", FUNCTION T.Trim'
- ; "UCASE", FUNCTION T.Ucase
- ; "$UCASE", FUNCTION T.Ucase'
- ; "UNSEL", KEYWORD "UNSELECT"
- ; "UNSELECT", KEYWORD "UNSELECT"
- ; "USEHTML", IDENT "USEHTML"
- ; "USERCOM", IDENT "USERCOM"
- ; "$USERCOM", IDENT "$USERCOM"
- ; "USER_TEXT", IDENT "USER_TEXT"
- ; "$USER_TEXT", IDENT "USER_TEXT"
- ; "USRTXT", IDENT "USER_TEXT"
- ; "$USRTXT", IDENT "USER_TEXT"
- ; "VAL", FUNCTION T.Val
- ; "VIEW", KEYWORD "VIEW"
- ; "WAIT", KEYWORD "WAIT"
- ; "XGOTO", KEYWORD "XGOTO"
- ; "XGT", KEYWORD "XGOTO"
- ]
-
- let ident lexbuf id =
- let id = String.uppercase_ascii id in
- try
- let value = Hashtbl.find keyword_table id in
- let _ = match value with
- | IF | ELIF -> incr_level lexbuf
- | _ -> () in
- value
-
- with Not_found -> IDENT id
-
- let wait_balance rule lexbuf =
- try[@warning "-52"]
- rule (Buffer.create 17) lexbuf
- with
- Failure "lexing: empty token" ->
- let line = lexbuf.Lexing.lex_curr_p.pos_lnum
- and content = Bytes.to_string lexbuf.Lexing.lex_buffer in
- raise (UnclosedQuote {line; content})
- [@warning "+52"]
-}
-
-let space = [ ' ' '\t' ]
-let coma = ','
-let letters = [^ '!' ':' '&' '=' '<' '>' '+' '-' '*' '/' ',' '"' '\'' '(' ')' '[' ']' ' ' '\t' '\n' '\r' '{' '}']
-let digit = [ '0'-'9' ]
-let eol = [ '\r' '\n' ]
-let spaces = space+
-let ident = (letters # digit) letters*
-
-rule token = parse
-
-| '#' spaces* (('!' | '$' | '#' | '^')+)? (ident as loc) {
- Lexing.set_filename lexbuf loc;
- LOCATION_START loc}
-| (digit+) as l { INTEGER l}
-| '+' { PLUS }
-| '-' { MINUS }
-| "+=" { INCR }
-| "-=" { DECR }
-| "*=" { MULT_EQUAL }
-| '/' { DIV }
-| '*' { STAR }
-| ':' {
- (* We are leaving the block, the comment will be handled again *)
- decr_level lexbuf;
- COLUMN }
-| '-' '-'+ [ ^ '\r' '\n']* { LOCATION_END }
-| '[' { L_BRACKET }
-| ']' { R_BRACKET }
-| '(' {
- incr_level lexbuf;
- L_PAREN }
-| ')' {
- decr_level lexbuf;
- R_PAREN }
-| '<' { LT }
-| '>' { GT }
-| coma { COMA }
-| eof {
- Bucket.remove is_expression lexbuf;
- EOF }
-| '=' {
- incr_level lexbuf;
- EQUAL }
-| ident as l { ident lexbuf l}
-| eol {
- Bucket.add is_expression lexbuf 0;
- Lexing.new_line lexbuf; EOL }
-| '&' {
- Bucket.add is_expression lexbuf 0;
- AMPERSAND }
-| '!' {
- match Bucket.find is_expression lexbuf with
- | Some i when i <> 0 -> EXCLAMATION
- | _ -> skip_comment lexbuf
-}
-| spaces { token lexbuf }
-| '\'' { LITERAL (wait_balance read_quoted_string lexbuf) }
-| '"' { LITERAL (wait_balance read_dquoted_string lexbuf) }
-| '{' { LITERAL (wait_balance (read_long_string 0) lexbuf) }
-
-and skip_comment = parse
-| [^ '\'' '"' '{' '\r' '\n' ] { skip_comment lexbuf }
-| '{' {
- let _ = wait_balance (read_long_string 0) lexbuf in
- skip_comment lexbuf }
-| '\'' {
- let _ = wait_balance read_quoted_string lexbuf in
- skip_comment lexbuf
-}
-| '"' {
- let _ = wait_balance read_dquoted_string lexbuf in
- skip_comment lexbuf
-}
-| eol {
- (* Ugly hack used in order to put the eol in the front of the next
- parsing. *)
- lexbuf.lex_curr_pos <- lexbuf.lex_curr_pos - 1;
- (*Lexing.new_line lexbuf;*)
- COMMENT
-}
-
-(* Read the content until we got another one quote *)
-and read_quoted_string buf = parse
- | "''"
- { Buffer.add_char buf '\'';
- read_quoted_string buf lexbuf
- }
- | [^ '\'' '\n' '\r']+
- { Buffer.add_string buf (Lexing.lexeme lexbuf);
- read_quoted_string buf lexbuf
- }
- | '\''
- { (Buffer.contents buf)
- }
- | ['\n' '\r']
- { Buffer.add_string buf (Lexing.lexeme lexbuf);
- Lexing.new_line lexbuf ;
- read_quoted_string buf lexbuf
- }
-
-and read_dquoted_string buf = parse
- | [^ '"' ]+
- { Buffer.add_string buf (Lexing.lexeme lexbuf);
- read_dquoted_string buf lexbuf
- }
- | "\"\""
- { Buffer.add_char buf '"';
- read_dquoted_string buf lexbuf
- }
- | '"'
- { (Buffer.contents buf)
- }
-
-and read_long_string level buf = parse
- | [^ '{' '}' '\r' '\n' ]+
- { Buffer.add_string buf (Lexing.lexeme lexbuf);
- read_long_string level buf lexbuf
- }
- | '{'
- { Buffer.add_string buf (Lexing.lexeme lexbuf);
- read_long_string (level + 1) buf lexbuf }
- | '}'
- { match level with
- | 0 -> (Buffer.contents buf)
- | _ -> Buffer.add_string buf (Lexing.lexeme lexbuf);
- read_long_string (level - 1) buf lexbuf
- }
-| eol
-{
- Lexing.new_line lexbuf;
- Buffer.add_string buf (Lexing.lexeme lexbuf);
- read_long_string level buf lexbuf
-}
diff --git a/test/dune b/test/dune
index 2f4e803..74546a0 100644
--- a/test/dune
+++ b/test/dune
@@ -2,6 +2,7 @@
(name qsp_parser_test)
(libraries
alcotest
+ sedlex
qparser
qsp_syntax
fmt
diff --git a/test/qsp_parser_test.ml b/test/qsp_parser_test.ml
index b833fe5..6bf249d 100644
--- a/test/qsp_parser_test.ml
+++ b/test/qsp_parser_test.ml
@@ -7,10 +7,16 @@ let _position = (Lexing.dummy_pos, Lexing.dummy_pos)
type 'a location = 'a * 'a Ast.statement list [@@deriving eq, show]
+(** Run the parser with the given expression and return the result *)
let parse : string -> T.pos location =
fun content ->
- let lexing = Lexing.from_string content in
- Parser.main Qparser.Lexer.token lexing
+ let lexing = Sedlexing.Latin1.from_string content in
+
+ match Qparser.Analyzer.parse (module Qsp_syntax.Tree) lexing with
+ | Ok e -> e
+ | Error e ->
+ let msg = Format.asprintf "%a" Qparser.Analyzer.format_error e in
+ raise (Failure msg)
let location : T.pos location Alcotest.testable =
let equal = equal_location (fun _ _ -> true) in