diff options
Diffstat (limited to 'lib/qparser')
-rw-r--r-- | lib/qparser/analyzer.ml | 42 | ||||
-rw-r--r-- | lib/qparser/analyzer.mli | 8 | ||||
-rw-r--r-- | lib/qparser/dune | 29 | ||||
-rwxr-xr-x | lib/qparser/explain.sh | 5 | ||||
-rw-r--r-- | lib/qparser/expression_parser.messages | 304 | ||||
-rwxr-xr-x | lib/qparser/generate_errors.sh | 8 | ||||
-rw-r--r-- | lib/qparser/grammar.txt | 100 | ||||
-rw-r--r-- | lib/qparser/idents.ml | 179 | ||||
-rw-r--r-- | lib/qparser/interpreter.ml | 67 | ||||
-rw-r--r-- | lib/qparser/lexbuf.ml | 61 | ||||
-rw-r--r-- | lib/qparser/lexbuf.mli | 25 | ||||
-rw-r--r-- | lib/qparser/lexer.ml | 212 | ||||
-rw-r--r-- | lib/qparser/lexer.mli | 4 | ||||
-rw-r--r-- | lib/qparser/parser.mly | 89 | ||||
-rw-r--r-- | lib/qparser/qsp_expression.mly | 86 | ||||
-rw-r--r-- | lib/qparser/qsp_instruction.mly | 104 | ||||
-rw-r--r-- | lib/qparser/tokens.mly | 73 |
17 files changed, 1396 insertions, 0 deletions
diff --git a/lib/qparser/analyzer.ml b/lib/qparser/analyzer.ml new file mode 100644 index 0000000..da1adbf --- /dev/null +++ b/lib/qparser/analyzer.ml @@ -0,0 +1,42 @@ +(** + Run the QSP parser and apply the analyzer over it. + + See [syntax/S] + *) +let parse : + (module Qsp_syntax.S.Analyzer with type Location.repr = 'a) -> + Lexbuf.t -> + ('a, Qsp_syntax.Report.t) Result.t = + fun (type a) (module S : Qsp_syntax.S.Analyzer with type Location.repr = a) -> + let module Parser = Parser.Make (S) in + let module IncrementalParser = + Interpreter.Interpreter (Parser.MenhirInterpreter) in + fun l -> + let lexer = Lexbuf.tokenize Lexer.token l in + + let init = Parser.Incremental.main (fst (Lexbuf.positions l)) in + + IncrementalParser.of_lexbuf lexer l init + |> Result.map_error (fun e -> + let message = + match e.IncrementalParser.code with + | Interpreter.InvalidSyntax -> "Invalid Syntax" + | Interpreter.UnrecoverableError -> "UnrecoverableError" + | Interpreter.MenhirCode c -> + let message_content = + try Parser_messages.message c + with Not_found -> + String.concat "" [ "(Error code "; string_of_int c; ")" ] + in + + String.concat "" [ String.trim @@ message_content ] + in + let report = + Qsp_syntax.Report.error (e.start_pos, e.end_pos) message + in + + (* Discard the remaining file to read. The parser is now in a blank + state, it does not make sense to keep feeding it with the new + tokens. *) + Lexer.discard l; + report) diff --git a/lib/qparser/analyzer.mli b/lib/qparser/analyzer.mli new file mode 100644 index 0000000..30b6625 --- /dev/null +++ b/lib/qparser/analyzer.mli @@ -0,0 +1,8 @@ +val parse : + (module Qsp_syntax.S.Analyzer with type Location.repr = 'a) -> + Lexbuf.t -> + ('a, Qsp_syntax.Report.t) Result.t +(** Read the source and build a analyzis over it. + +This method make the link between the source file and how to read it +(encoding…) and the AST we want to build. *) diff --git a/lib/qparser/dune b/lib/qparser/dune new file mode 100644 index 0000000..f62c90e --- /dev/null +++ b/lib/qparser/dune @@ -0,0 +1,29 @@ +(library + (name qparser) + (libraries + str + menhirLib + qsp_syntax + ) + (preprocess (pps + sedlex.ppx + )) + ) + + +(rule + (targets parser_messages.ml) + (deps expression_parser.messages tokens.mly qsp_expression.mly qsp_instruction.mly parser.mly) + (action (with-stdout-to %{targets} (run menhir --base parser.mly --compile-errors %{deps})))) + +(menhir + (modules tokens) + (flags --only-tokens) +) + + +(menhir + (modules tokens parser qsp_instruction qsp_expression) + (flags --table --external-tokens Tokens) + (merge_into parser) +) diff --git a/lib/qparser/explain.sh b/lib/qparser/explain.sh new file mode 100755 index 0000000..609d208 --- /dev/null +++ b/lib/qparser/explain.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +menhir --explain tokens.mly qsp_expression.mly --base qsp_expression.mly +menhir --explain tokens.mly qsp_expression.mly qsp_instruction.mly --base qsp_instruction.mly +menhir --explain tokens.mly qsp_expression.mly qsp_instruction.mly parser.mly --base parser.mly diff --git a/lib/qparser/expression_parser.messages b/lib/qparser/expression_parser.messages new file mode 100644 index 0000000..a493067 --- /dev/null +++ b/lib/qparser/expression_parser.messages @@ -0,0 +1,304 @@ +main: LOCATION_START EOL IDENT SET +## +## Ends in an error in state: 13. +## +## variable -> IDENT . option(delimited(L_BRACKET,option(expression),R_BRACKET)) [ STAR R_PAREN R_BRACKET PLUS OR MULT_EQUAL MOD MINUS LT INCR GT EXCLAMATION EQUAL EOL ELSE DIV DECR COMA COLUMN AND AMPERSAND ] +## +## The known suffix of the stack is as follows: +## IDENT +## + +Unexpected expression here. + +main: LOCATION_START EOL L_PAREN INTEGER SET +## +## Ends in an error in state: 69. +## +## expression -> L_PAREN expression . R_PAREN [ STAR R_PAREN R_BRACKET PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV COMA COLUMN AND AMPERSAND ] +## expression -> expression . EQUAL expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . LT GT expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . EXCLAMATION expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . PLUS expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . MINUS expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . STAR expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . DIV expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . MOD expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . GT expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . LT expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . AND expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . GT EQUAL expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . LT EQUAL expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . EQUAL GT expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . EQUAL LT expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . OR expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## +## The known suffix of the stack is as follows: +## L_PAREN expression +## + +Unexpected '('. Did you forgot a function before ? + +main: LOCATION_START EOL IF INTEGER SET +## +## Ends in an error in state: 87. +## +## expression -> expression . EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . LT GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . EXCLAMATION expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . PLUS expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . MINUS expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . STAR expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . DIV expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . MOD expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . LT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . AND expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . GT EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . LT EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . EQUAL GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . EQUAL LT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . OR expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## line_statement -> IF expression . COLUMN nonempty_list(EOL) list(line_statement) elif_else_body END option(IF) nonempty_list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] +## line_statement -> IF expression . COLUMN nonempty_list(EOL) list(line_statement) elif_else_body END option(IF) nonempty_list(AMPERSAND) list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] +## onliner(IF) -> IF expression . COLUMN final_inline_instruction [ EOL ELSE AMPERSAND ] +## +## The known suffix of the stack is as follows: +## IF expression +## + +The `IF` expression does not end properly. A `:` is expected before any instruction. + +main: LOCATION_START EOL COLUMN STAR +## +## Ends in an error in state: 92. +## +## line_statement -> COLUMN . IDENT list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] +## +## The known suffix of the stack is as follows: +## COLUMN +## + +A location is expected after ':' not an expression + +main: LOCATION_START EOL ACT INTEGER SET + +Invalid `ACT` label. You probably missed a ':' + +main: LOCATION_START EOL ACT IDENT COLUMN EOL LOCATION_END +## +## Ends in an error in state: 100. +## +## line_statement -> ACT expression COLUMN nonempty_list(EOL) . list(line_statement) empty_body END option(ACT) nonempty_list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] +## line_statement -> ACT expression COLUMN nonempty_list(EOL) . list(line_statement) empty_body END option(ACT) nonempty_list(AMPERSAND) list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] +## +## The known suffix of the stack is as follows: +## ACT expression COLUMN nonempty_list(EOL) +## +## WARNING: This example involves spurious reductions. +## This implies that, although the LR(1) items shown above provide an +## accurate view of the past (what has been recognized so far), they +## may provide an INCOMPLETE view of the future (what was expected next). +## In state 7, spurious reduction of production nonempty_list(EOL) -> EOL +## + +A block starting with `ACT` is not closed by `END` +If there are nested blocks, the error will points the highest block. + +main: LOCATION_START EOL IF IDENT COLUMN ELSE R_PAREN +## +## Ends in an error in state: 106. +## +## inline_action -> onliner(IF) ELSE . inline_action [ EOL ELSE AMPERSAND ] +## option(preceded(ELSE,instruction)) -> ELSE . instruction [ EOL ELSE AMPERSAND ] +## +## The known suffix of the stack is as follows: +## onliner(IF) ELSE +## + +Too manies instructions on a single line. + +main: LOCATION_START EOL IF IDENT COLUMN EOL IDENT AMPERSAND LOCATION_END +## +## Ends in an error in state: 155. +## +## line_statement -> IF expression COLUMN nonempty_list(EOL) list(line_statement) . elif_else_body END option(IF) nonempty_list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] +## line_statement -> IF expression COLUMN nonempty_list(EOL) list(line_statement) . elif_else_body END option(IF) nonempty_list(AMPERSAND) list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] +## +## The known suffix of the stack is as follows: +## IF expression COLUMN nonempty_list(EOL) list(line_statement) +## +## WARNING: This example involves spurious reductions. +## This implies that, although the LR(1) items shown above provide an +## accurate view of the past (what has been recognized so far), they +## may provide an INCOMPLETE view of the future (what was expected next). +## In state 117, spurious reduction of production nonempty_list(AMPERSAND) -> AMPERSAND +## In state 149, spurious reduction of production list(EOL) -> +## In state 150, spurious reduction of production line_statement -> instruction nonempty_list(AMPERSAND) list(EOL) +## In state 145, spurious reduction of production list(line_statement) -> +## In state 146, spurious reduction of production list(line_statement) -> line_statement list(line_statement) +## + +Unclosed `IF` block. + +main: LOCATION_START EOL IF IDENT COLUMN EOL ELSE EOL LOCATION_END +## +## Ends in an error in state: 163. +## +## else_ -> ELSE nonempty_list(EOL) . list(line_statement) [ END ] +## +## The known suffix of the stack is as follows: +## ELSE nonempty_list(EOL) +## +## WARNING: This example involves spurious reductions. +## This implies that, although the LR(1) items shown above provide an +## accurate view of the past (what has been recognized so far), they +## may provide an INCOMPLETE view of the future (what was expected next). +## In state 7, spurious reduction of production nonempty_list(EOL) -> EOL +## + +A block starting with `IF` is not closed by `END` +If there are nested blocks, the error will points the highest block. + +main: LOCATION_START EOL IDENT AMPERSAND END +## +## Ends in an error in state: 175. +## +## main -> list(before_location) LOCATION_START nonempty_list(EOL) list(line_statement) . LOCATION_END [ # ] +## +## The known suffix of the stack is as follows: +## list(before_location) LOCATION_START nonempty_list(EOL) list(line_statement) +## +## WARNING: This example involves spurious reductions. +## This implies that, although the LR(1) items shown above provide an +## accurate view of the past (what has been recognized so far), they +## may provide an INCOMPLETE view of the future (what was expected next). +## In state 117, spurious reduction of production nonempty_list(AMPERSAND) -> AMPERSAND +## In state 149, spurious reduction of production list(EOL) -> +## In state 150, spurious reduction of production line_statement -> instruction nonempty_list(AMPERSAND) list(EOL) +## In state 145, spurious reduction of production list(line_statement) -> +## In state 146, spurious reduction of production list(line_statement) -> line_statement list(line_statement) +## + +Unexpected `END`. +Maybe you added an `END` after an inline `ACT` or `IF` ? + +main: STAR +## +## Ends in an error in state: 0. +## +## main' -> . main [ # ] +## +## The known suffix of the stack is as follows: +## +## + +Missing location name + +main: LOCATION_START EOL INTEGER SET +## +## Ends in an error in state: 132. +## +## expression -> expression . EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . LT GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . EXCLAMATION expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . PLUS expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . MINUS expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . STAR expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . DIV expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . MOD expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . LT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . AND expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . GT EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . LT EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . EQUAL GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . EQUAL LT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . OR expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## single_instruction -> expression . [ EOL ELSE AMPERSAND ] +## +## The known suffix of the stack is as follows: +## expression +## + +Unexpected expression here. + +main: LOCATION_START EOL IF IDENT COLUMN EOL ELIF INTEGER SET +## +## Ends in an error in state: 157. +## +## elif -> ELIF expression . COLUMN nonempty_list(EOL) list(line_statement) [ END ELSE ELIF ] +## expression -> expression . EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . LT GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . EXCLAMATION expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . PLUS expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . MINUS expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . STAR expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . DIV expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . MOD expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . LT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . AND expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . GT EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . LT EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . EQUAL GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . EQUAL LT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . OR expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## +## The known suffix of the stack is as follows: +## ELIF expression +## + +The `ELIF` expression does not end properly. A `:` is expected before any instruction. + +main: LOCATION_START EOL IF IDENT COLUMN EOL ELIF IDENT COLUMN EOL IDENT AMPERSAND LOCATION_END +## +## Ends in an error in state: 173. +## +## list(elif) -> elif . list(elif) [ END ELSE ] +## +## The known suffix of the stack is as follows: +## elif +## +## WARNING: This example involves spurious reductions. +## This implies that, although the LR(1) items shown above provide an +## accurate view of the past (what has been recognized so far), they +## may provide an INCOMPLETE view of the future (what was expected next). +## In state 117, spurious reduction of production nonempty_list(AMPERSAND) -> AMPERSAND +## In state 149, spurious reduction of production list(EOL) -> +## In state 150, spurious reduction of production line_statement -> instruction nonempty_list(AMPERSAND) list(EOL) +## In state 145, spurious reduction of production list(line_statement) -> +## In state 146, spurious reduction of production list(line_statement) -> line_statement list(line_statement) +## In state 160, spurious reduction of production elif -> ELIF expression COLUMN nonempty_list(EOL) list(line_statement) +## + +Unclosed `ELIF` block. + +main: EOL STAR +## +## Ends in an error in state: 177. +## +## list(before_location) -> before_location . list(before_location) [ LOCATION_START ] +## +## The known suffix of the stack is as follows: +## before_location +## + +The context does not allow an instruction here. + +main: LOCATION_START EOL IF IDENT COLUMN EOL ELSE STAR +## +## Ends in an error in state: 162. +## +## else_ -> ELSE . nonempty_list(EOL) list(line_statement) [ END ] +## +## The known suffix of the stack is as follows: +## ELSE +## + +Unexpected operator after `ELSE` + +main: LOCATION_START EOL IDENT DECR INTEGER SET +main: LOCATION_START EOL SET IDENT DECR INTEGER SET +main: LOCATION_START EOL LET IDENT DECR INTEGER SET + +Missing separator between instructions diff --git a/lib/qparser/generate_errors.sh b/lib/qparser/generate_errors.sh new file mode 100755 index 0000000..3cff769 --- /dev/null +++ b/lib/qparser/generate_errors.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +# Generate the error file with the helper messages. This script is required +# when the syntax is modified because menhir will change the state number and +# we need to make it match with the comment in the messages. + +menhir --list-errors *.mly --base parser.mly > all_errors +menhir --merge-errors expression_parser.messages --merge-errors all_errors *.mly --base parser.mly > expression_parser.messages_new diff --git a/lib/qparser/grammar.txt b/lib/qparser/grammar.txt new file mode 100644 index 0000000..d7208ef --- /dev/null +++ b/lib/qparser/grammar.txt @@ -0,0 +1,100 @@ +location: + | LOCATION_START EOL* + line_statement* + LOCATION_END EOL* + EOF + +line_statement: + | COMMENT EOL+ // A comment + | COLUMN IDENT EOL* // A location + | instruction, line_sep + | inline_action line_sep + | ACT expression COLUMN EOL+ // ACT … END ACT + line_statement* + END ACT? // Yes you can have END or END ACT + line_sep + | IF expression COLUMN EOL+ // IF … END IF + line_statement* + elif* + else + END IF? // Yes you can have END or END IF + line_sep + +elif: + | ELIF + expression COLUMN EOL+ + line_statement* + +else: + | ELSE EOL+ + line_statement* + | + +line_sep: + | EOL+ + | AMPERSAND+ EOL* + + +instruction: + | expression + | let_assignation + | keyword argument(expression) + +keyword: + | STAR KEYWORD // A keyword starting with * + | KEYWORD + +let_assignation: + | assignation + variable + assignation_operator + expression + +assignation: + | + | LET + | SET + +assignation_operator: + | EQUAL + | INCR // += + | DECR // -= + +inline_action: + | ACT expression COLUMN // There is a recursive code here + | IF expression COLUMN // Because ACT: can contains an IF: etc + (ELSE, instruction)? // complicated to flatten here. + +expression: + | delimited(l_paren, expression, r_paren) + | unary_operator expression + | expression binary_operator expression + | literal + | integer + | variable + | function argument(expression) + +unary_operator: + | OBJ + | LOC + | NO + | MINUS + | PLUS + +binary_operator: + | EQUAL + | LT GT // Different + | EXCLAMATION // Neg, not a comment here + | PLUS + | MINUS + | STAR // Not the first char of keyword here + | DIV + | MOD + | GT + | LT + | GT EQUAL + | LT EQUAL + | EQUAL GT // Alternative syntax + | EQUAL LT // Alternative syntax + | AND + | OR diff --git a/lib/qparser/idents.ml b/lib/qparser/idents.ml new file mode 100644 index 0000000..baf23dc --- /dev/null +++ b/lib/qparser/idents.ml @@ -0,0 +1,179 @@ +open Tokens +module T = Qsp_syntax.T + +let keyword_table = Hashtbl.create 53 + +let _ = + List.iter + (fun (kwd, tok) -> Hashtbl.add keyword_table kwd tok) + [ + ("ACT", ACT); + ("ADDLIB", KEYWORD "INCLIB"); + ("ADDOBJ", KEYWORD "ADDOBJ"); + ("ADD OBJ", KEYWORD "ADDOBJ"); + ("ADDQST", KEYWORD "INCLIB"); + ("AND", AND); + ("ARRCOMP", FUNCTION T.Arrcomp); + ("ARRPOS", FUNCTION T.Arrpos); + ("ARRSIZE", FUNCTION T.Arrsize) + (* + ; "BACKIMAGE", KEYWORD "BACKIMAGE" + ; "$BACKIMAGE", KEYWORD "BACKIMAGE" + ; "BCOLOR", KEYWORD "BCOLOR" *); + ("CLA", KEYWORD "CLA"); + ("CLEAR", KEYWORD "CLEAR"); + ("*CLEAR", KEYWORD "*CLEAR"); + ("CLOSE", KEYWORD "CLOSE"); + ("CLOSE ALL", KEYWORD "CLOSEALL"); + ("CLR", KEYWORD "CLEAR"); + ("*CLR", KEYWORD "*CLEAR"); + ("CLS", KEYWORD "CLS"); + ("CMDCLEAR", KEYWORD "CMDCLEAR"); + ("CMDCLR", KEYWORD "CMDCLEAR"); + ("COPYARR", KEYWORD "COPYARR"); + ("COUNTOBJ", FUNCTION T.Countobj); + ("CURACTS", IDENT "CURACTS"); + ("$CURACTS", IDENT "$CURACTS"); + ("CURLOC", IDENT "CURLOC"); + ("$CURLOC", IDENT "$CURLOC") + (* + ; "DEBUG", KEYWORD "DEBUG" +*); + ("DELACT", KEYWORD "DELACT"); + ("DEL ACT", KEYWORD "DELACT"); + ("DELLIB", KEYWORD "FREELIB"); + ("DELOBJ", KEYWORD "DELOBJ"); + ("DEL OBJ", KEYWORD "DELOBJ"); + ("DESC", FUNCTION T.Desc); + ("$DESC", FUNCTION T.Desc') + (* + ; "DISABLESCROLL", KEYWORD "DISABLESCROLL" + ; "DISABLESUBEX", KEYWORD "DISABLESUBEX" +*); + ("DYNAMIC", KEYWORD "DYNAMIC"); + ("DYNEVAL", FUNCTION T.Dyneval); + ("$DYNEVAL", FUNCTION T.Dyneval'); + ("ELSE", ELSE); + ("ELSEIF", ELIF); + ("END", END); + ("EXEC", KEYWORD "EXEC"); + ("EXIT", KEYWORD "EXIT") + (* + ; "FCOLOR", KEYWORD "FCOLOR" + ; "$FNAME", KEYWORD "$FNAME" +*); + ("FREELIB", KEYWORD "FREELIB") + (* + ; "FSIZE", KEYWORD "FSIZE" +*); + ("FUNC", FUNCTION T.Func); + ("$FUNC", FUNCTION T.Func'); + ("GETOBJ", FUNCTION T.Getobj); + ("$GETOBJ", FUNCTION T.Getobj'); + ("GOSUB", KEYWORD "GOSUB"); + ("GOTO", KEYWORD "GOTO") (* + ; "GC", KEYWORD "GC" +*); + ("GS", KEYWORD "GOSUB"); + ("GT", KEYWORD "GOTO"); + ("IF", IF); + ("IIF", FUNCTION T.Iif); + ("$IIF", FUNCTION T.Iif'); + ("INCLIB", KEYWORD "INCLIB"); + ("INPUT", FUNCTION T.Input); + ("$INPUT", FUNCTION T.Input'); + ("INSTR", FUNCTION T.Instr); + ("ISNUM", FUNCTION T.Isnum); + ("ISPLAY", FUNCTION T.Isplay); + ("JUMP", KEYWORD "JUMP"); + ("KILLALL", KEYWORD "KILLALL"); + ("KILLOBJ", KEYWORD "KILLOBJ"); + ("KILLQST", KEYWORD "FREELIB"); + ("KILLVAR", KEYWORD "KILLVAR"); + ("LCASE", FUNCTION T.Lcase); + ("$LCASE", FUNCTION T.Lcase') + (* + ; "LCOLOR", KEYWORD "LCOLOR" +*); + ("LEN", FUNCTION T.Len); + ("LET", LET); + ("LOC", FUNCTION T.Loc); + ("MAINTXT", IDENT "MAINTXT"); + ("$MAINTXT", IDENT "MAINTXT"); + ("MAX", FUNCTION T.Max); + ("$MAX", FUNCTION T.Max'); + ("MENU", KEYWORD "MENU"); + ("MID", FUNCTION T.Mid); + ("$MID", FUNCTION T.Mid'); + ("MIN", FUNCTION T.Min); + ("$MIN", FUNCTION T.Min'); + ("MOD", MOD); + ("MSECSCOUNT", FUNCTION T.Msecscount); + ("MSG", KEYWORD "MSG"); + ("NL", KEYWORD "NL"); + ("*NL", KEYWORD "*NL"); + ("NO", NO) (* + ; "NOSAVE", KEYWORD "NOSAVE" +*); + ("OBJ", OBJ); + ("$ONACTSEL", IDENT "$ONACTSEL"); + ("$ONGLOAD", IDENT "$ONGLOAD"); + ("$ONGSAVE", IDENT "$ONGSAVE"); + ("$ONNEWLOC", IDENT "$ONNEWLOC"); + ("$ONOBJADD", IDENT "$ONOBJADD"); + ("$ONOBJDEL", IDENT "$ONOBJDEL"); + ("$ONOBJSEL", IDENT "$ONOBJSEL"); + ("OPENGAME", KEYWORD "OPENGAME"); + ("OPENQST", KEYWORD "OPENQST"); + ("OR", OR); + ("P", KEYWORD "P"); + ("*P", KEYWORD "*P"); + ("PL", KEYWORD "PL"); + ("*PL", KEYWORD "*PL"); + ("PLAY", KEYWORD "PLAY"); + ("QSPVER", FUNCTION T.Qspver); + ("$QSPVER", FUNCTION T.Qspver'); + ("RAND", FUNCTION T.Rand); + ("REFINT", KEYWORD "REFINT"); + ("REPLACE", FUNCTION T.Replace); + ("$REPLACE", FUNCTION T.Replace'); + ("RGB", FUNCTION T.Rgb); + ("RND", FUNCTION T.Rnd); + ("SAVEGAME", KEYWORD "SAVEGAME"); + ("SELACT", FUNCTION T.Selact); + ("$SELACT", IDENT "$SELACT"); + ("SELOBJ", IDENT "SELOBJ"); + ("$SELOBJ", IDENT "$SELOBJ"); + ("SET", SET); + ("SETTIMER", KEYWORD "SETTIMER"); + ("SHOWACTS", KEYWORD "SHOWACTS"); + ("SHOWINPUT", KEYWORD "SHOWINPUT"); + ("SHOWOBJS", KEYWORD "SHOWOBJS"); + ("SHOWSTAT", KEYWORD "SHOWSTAT"); + ("STATTXT", FUNCTION T.Stattxt); + ("$STATTXT", FUNCTION T.Stattxt'); + ("STR", FUNCTION T.Str); + ("$STR", FUNCTION T.Str'); + ("STRCOMP", FUNCTION T.Strcomp); + ("STRFIND", FUNCTION T.Strfind); + ("$STRFIND", FUNCTION T.Strfind'); + ("STRPOS", FUNCTION T.Strpos); + ("TRIM", FUNCTION T.Trim); + ("$TRIM", FUNCTION T.Trim'); + ("UCASE", FUNCTION T.Ucase); + ("$UCASE", FUNCTION T.Ucase'); + ("UNSEL", KEYWORD "UNSELECT"); + ("UNSELECT", KEYWORD "UNSELECT"); + ("USEHTML", IDENT "USEHTML"); + ("USERCOM", IDENT "USERCOM"); + ("$USERCOM", IDENT "$USERCOM"); + ("USER_TEXT", IDENT "USER_TEXT"); + ("$USER_TEXT", IDENT "USER_TEXT"); + ("USRTXT", IDENT "USER_TEXT"); + ("$USRTXT", IDENT "USER_TEXT"); + ("VAL", FUNCTION T.Val); + ("VIEW", KEYWORD "VIEW"); + ("WAIT", KEYWORD "WAIT"); + ("XGOTO", KEYWORD "XGOTO"); + ("XGT", KEYWORD "XGOTO"); + ] diff --git a/lib/qparser/interpreter.ml b/lib/qparser/interpreter.ml new file mode 100644 index 0000000..b719600 --- /dev/null +++ b/lib/qparser/interpreter.ml @@ -0,0 +1,67 @@ +(** + This module provide a way to build the syntax parser with the menhir + incremental engine. This feature allow to see the state of the parser, and + get detailed error message but is not intended to be used directly. + + Refer to the menhir manual in order to see the values. + + The interresting function here is [of_lexbuf] which return the error code in + case of invalid syntax. + *) + +type error_code = UnrecoverableError | InvalidSyntax | MenhirCode of int + +module Interpreter (MI : MenhirLib.IncrementalEngine.INCREMENTAL_ENGINE) = +struct + type error = { + code : error_code; + start_pos : Lexing.position; + end_pos : Lexing.position; + } + + module E = MenhirLib.ErrorReports + module L = MenhirLib.LexerUtil + + type step = MI.token * Lexing.position * Lexing.position + + let range_message (start_pos, end_pos) : error_code -> error = + fun code -> { code; start_pos; end_pos } + + let get_parse_error : Lexbuf.t -> 'a MI.env -> error = + fun buffer env -> + match MI.stack env with + | (lazy Nil) -> + (* The parser is in its initial state. We should not get an + error here *) + let positions = Lexbuf.positions buffer in + + range_message positions UnrecoverableError + | (lazy (Cons (MI.Element (state, _, start_pos, end_pos), _))) -> + range_message (start_pos, end_pos) (MenhirCode (MI.number state)) + + let rec _parse : + Lexbuf.t -> (unit -> step) -> 'a MI.checkpoint -> ('a, error) Result.t = + fun buffer get_token (checkpoint : 'a MI.checkpoint) -> + match checkpoint with + | MI.InputNeeded _env -> + let token, startp, endp = get_token () in + let checkpoint = MI.offer checkpoint (token, startp, endp) in + _parse buffer get_token checkpoint + | MI.Shifting _ | MI.AboutToReduce _ -> + let checkpoint = MI.resume checkpoint in + _parse buffer get_token checkpoint + | MI.HandlingError _env -> + let err = get_parse_error buffer _env in + Error err + | MI.Accepted v -> Ok v + | MI.Rejected -> + let positions = Lexbuf.positions buffer in + let err = range_message positions InvalidSyntax in + Error err + + type 'a builder = Lexing.position -> 'a MI.checkpoint + + let of_lexbuf : + (unit -> step) -> Lexbuf.t -> 'a MI.checkpoint -> ('a, error) result = + fun lexer buffer init -> _parse buffer lexer init +end diff --git a/lib/qparser/lexbuf.ml b/lib/qparser/lexbuf.ml new file mode 100644 index 0000000..3f0b186 --- /dev/null +++ b/lib/qparser/lexbuf.ml @@ -0,0 +1,61 @@ +type t = { + buffer : Sedlexing.lexbuf; + mutable start_p : Lexing.position option; + mutable expression_level : int; + reset_line : bool; +} + +let buffer : t -> Sedlexing.lexbuf = fun t -> t.buffer + +let start : t -> unit = + fun t -> + let _start_pos, end_pos = Sedlexing.lexing_positions t.buffer in + let () = + if not t.reset_line then + Sedlexing.set_position t.buffer { end_pos with Lexing.pos_lnum = 1 } + in + t.start_p <- None; + t.expression_level <- 0 + +let positions : t -> Lexing.position * Lexing.position = + fun t -> Sedlexing.lexing_positions t.buffer + +let content : t -> string = fun t -> Sedlexing.Utf8.lexeme t.buffer + +let from_lexbuf : ?reset_line:bool -> Sedlexing.lexbuf -> t = + fun ?(reset_line = true) t -> + { buffer = t; start_p = None; expression_level = 0; reset_line } + +let set_start_position : t -> Lexing.position -> unit = + fun t position -> t.start_p <- Some position + +let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position + = + fun f t -> + let lexer () = + (* Clear the previous registered start position if any *) + t.start_p <- None; + let token = f t in + let default, curr_p = positions t in + + let start_p = Option.value ~default t.start_p in + + (token, start_p, curr_p) + in + lexer + +(* The comment system is terrible. The same symbol can be used for : + - starting a comment + - inequality operation + In order to manage this, I try to identify the context in a very basic way, + using a counter for determining the token to send. +*) + +let incr_level : t -> unit = + fun t -> t.expression_level <- t.expression_level + 1 + +let decr_level : t -> unit = + fun t -> t.expression_level <- t.expression_level - 1 + +let reset_level : t -> unit = fun t -> t.expression_level <- 0 +let level : t -> int = fun t -> t.expression_level diff --git a/lib/qparser/lexbuf.mli b/lib/qparser/lexbuf.mli new file mode 100644 index 0000000..41f07d1 --- /dev/null +++ b/lib/qparser/lexbuf.mli @@ -0,0 +1,25 @@ +type t + +val from_lexbuf : ?reset_line:bool -> Sedlexing.lexbuf -> t +(** Create a new buffer *) + +val start : t -> unit +(** Intialize a new run *) + +val buffer : t -> Sedlexing.lexbuf +(** Extract the sedlex buffer. Required in each rule. *) + +val positions : t -> Lexing.position * Lexing.position +(** Extract the starting and ending position for the matched token *) + +val content : t -> string +(** Extract the token matched by the rule *) + +val set_start_position : t -> Lexing.position -> unit +val tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position +val incr_level : t -> unit +val decr_level : t -> unit +val reset_level : t -> unit + +val level : t -> int +(** Return the nested expression level *) diff --git a/lib/qparser/lexer.ml b/lib/qparser/lexer.ml new file mode 100644 index 0000000..c643577 --- /dev/null +++ b/lib/qparser/lexer.ml @@ -0,0 +1,212 @@ +(** + Lexer using sedlex + *) + +open Tokens + +exception UnclosedQuote of { content : string; line : int } +exception LexError of Lexing.position * string +exception EOF + +let pp_pos out { Lexing.pos_lnum; pos_cnum; pos_bol; _ } = + Format.fprintf out "line %d:%d" pos_lnum (pos_cnum - pos_bol) + +(* Extract the location name from the pattern *) +let location_name = Str.regexp {|.* \(.*\)|} + +let build_ident buffer = + let id = Lexbuf.content buffer |> String.uppercase_ascii in + try + let value = Hashtbl.find Idents.keyword_table id in + let _ = + match value with IF | ELIF -> Lexbuf.incr_level buffer | _ -> () + in + value + with Not_found -> IDENT id + +let wait_balance : (Buffer.t -> Lexbuf.t -> 'a) -> Lexbuf.t -> 'a = + fun rule lexbuf -> + let _, position = Lexbuf.positions lexbuf in + Lexbuf.set_start_position lexbuf position; + + try[@warning "-52"] + let token = rule (Buffer.create 256) lexbuf in + token + with Failure "lexing: empty token" -> + let position, _ = Lexbuf.positions lexbuf in + let line = position.Lexing.pos_lnum and content = Lexbuf.content lexbuf in + (raise (UnclosedQuote { line; content }) [@warning "+52"]) + +let space = [%sedlex.regexp? ' ' | '\t'] +let eol = [%sedlex.regexp? '\r' | '\n' | "\r\n"] +let coma = [%sedlex.regexp? ','] +let digit = [%sedlex.regexp? '0' .. '9'] +let letters = [%sedlex.regexp? 'a' .. 'z' | 'A' .. 'Z' | '_'] +let spaces = [%sedlex.regexp? Plus space] +let ident = [%sedlex.regexp? ('$' | letters), Star (digit | letters)] +let location_ident = [%sedlex.regexp? letters | digit] +let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^'] +let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident] + +let rec read_long_string level buf buffer = + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | '{' -> + Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf); + read_long_string (level + 1) buf buffer + | '}' -> ( + match level with + | 0 -> Buffer.contents buf + | _ -> + Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf); + read_long_string (level - 1) buf buffer) + | eol -> + Buffer.add_string buf (Lexbuf.content buffer); + read_long_string level buf buffer + | any -> + Buffer.add_string buf (Lexbuf.content buffer); + read_long_string level buf buffer + | _ -> raise Not_found + +let rec read_dquoted_string buf buffer = + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | "\"\"" -> + Buffer.add_char buf '"'; + read_dquoted_string buf buffer + | '"' -> Buffer.contents buf + | any -> + Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf); + read_dquoted_string buf buffer + | _ -> raise Not_found + +let rec read_quoted_string buf buffer = + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | "''" -> + Buffer.add_char buf '\''; + read_quoted_string buf buffer + | '\'' -> Buffer.contents buf + | eol -> + Buffer.add_string buf (Lexbuf.content buffer); + read_quoted_string buf buffer + | any -> + Buffer.add_string buf (Lexbuf.content buffer); + read_quoted_string buf buffer + | _ -> raise Not_found + +let rec skip_comment buffer = + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | '{' -> + let _ = wait_balance (read_long_string 0) buffer in + skip_comment buffer + | '\'' -> + let _ = wait_balance read_quoted_string buffer in + skip_comment buffer + | '"' -> + let _ = wait_balance read_dquoted_string buffer in + skip_comment buffer + | eol -> + (* Ugly hack used in order to put the eol in the front of the next + parsing. *) + Sedlexing.rollback lexbuf; + COMMENT + | any -> skip_comment buffer + | _ -> raise Not_found + +(** Main lexer *) +let rec token : Lexbuf.t -> token = + fun buffer -> + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | 0Xfeff -> + (* Ignore the BOM *) + token buffer + | '#', Star space, location -> + (* Extract the location name *) + let ident = Lexbuf.content buffer in + let () = + match Str.string_match location_name ident 0 with + | false -> () + | true -> Sedlexing.set_filename lexbuf (Str.matched_group 1 ident) + in + + (* Restart the line number (new location here) *) + Lexbuf.start buffer; + + LOCATION_START ident + | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) -> + Lexbuf.reset_level buffer; + LOCATION_END + | Plus digit -> INTEGER (Lexbuf.content buffer) + | '+' -> PLUS + | '-' -> MINUS + | "+=" -> INCR + | "-=" -> DECR + | "*=" -> MULT_EQUAL + | '/' -> DIV + | '*' -> STAR + | ':' -> + (* We are leaving the block, the comment will be handled again *) + Lexbuf.decr_level buffer; + + COLUMN + | '[' -> L_BRACKET + | ']' -> R_BRACKET + | '(' -> + Lexbuf.incr_level buffer; + L_PAREN + | ')' -> + Lexbuf.decr_level buffer; + R_PAREN + | '<' -> LT + | '>' -> GT + | coma -> COMA + | '=' -> + Lexbuf.incr_level buffer; + EQUAL + | ident -> build_ident buffer + | eol -> + Lexbuf.reset_level buffer; + EOL + | '&' -> + Lexbuf.reset_level buffer; + AMPERSAND + | '!' -> if Lexbuf.level buffer > 0 then EXCLAMATION else skip_comment buffer + | spaces -> token buffer + | '\'' -> LITERAL (wait_balance read_quoted_string buffer) + | '"' -> LITERAL (wait_balance read_dquoted_string buffer) + | '{' -> LITERAL (wait_balance (read_long_string 0) buffer) + | eof -> raise EOF + | _ -> + let position = fst @@ Sedlexing.lexing_positions lexbuf in + let tok = Lexbuf.content buffer in + + let msg = + Format.asprintf "Unexpected character %S at %a" tok pp_pos position + in + + raise @@ LexError (position, msg) + +let rec discard buffer = + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | '\'' -> + ignore (wait_balance read_quoted_string buffer); + discard buffer + | '"' -> + ignore (wait_balance read_dquoted_string buffer); + discard buffer + | '{' -> + ignore (wait_balance (read_long_string 0) buffer); + discard buffer + | eof -> raise EOF + | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) -> + Lexbuf.reset_level buffer; + () + | '!' -> + ignore @@ skip_comment buffer; + discard buffer + | any -> discard buffer + | _ -> raise EOF diff --git a/lib/qparser/lexer.mli b/lib/qparser/lexer.mli new file mode 100644 index 0000000..585877c --- /dev/null +++ b/lib/qparser/lexer.mli @@ -0,0 +1,4 @@ +exception EOF + +val token : Lexbuf.t -> Tokens.token +val discard : Lexbuf.t -> unit diff --git a/lib/qparser/parser.mly b/lib/qparser/parser.mly new file mode 100644 index 0000000..84c1af8 --- /dev/null +++ b/lib/qparser/parser.mly @@ -0,0 +1,89 @@ + +%{ + module T = Qsp_syntax.T +%} + +%parameter<Analyzer: Qsp_syntax.S.Analyzer> +%start <Analyzer.Location.repr>main +%on_error_reduce expression instruction unary_operator assignation_operator + +%% + +main: + | before_location* + LOCATION_START + EOL+ + expressions = line_statement* + LOCATION_END + { + Analyzer.Location.location $loc expressions + } + +before_location: + | EOL {} + | COMMENT EOL { } + +(* All these statement should terminate with EOL *) +line_statement: + | COMMENT EOL+ { Analyzer.Instruction.comment $loc } + | COLUMN i=IDENT EOL* { Analyzer.Instruction.location $loc i } + | s = terminated(instruction, line_sep) + | s = terminated(inline_action, line_sep) + { s } + | a = action_bloc(IF, elif_else_body) + { let loc, expression, statements, loc_s, body = a in + let elifs, else_ = match body with + | None -> [], [] + | Some (elifs, else_) -> (elifs, else_) + in + Analyzer.Instruction.if_ + loc + (loc_s, expression, statements) + ~elifs + ~else_ + } + | a = action_bloc(ACT, empty_body) + { let loc, label, statements, _, _ = a in + Analyzer.Instruction.act loc ~label statements + } + +(** Represent an instruction which can either be on a single line, + or created in a block until an END + *) +%inline action_bloc(TOKEN, BODY): + | TOKEN + e = expression + COLUMN EOL+ + s = line_statement* + b = BODY + END TOKEN? + line_sep + { $loc, e, s, $loc(s), b } + +empty_body: + | { None } + +elif: + | ELIF + e = expression + COLUMN EOL+ + s = line_statement* + { $loc, e, s } + +else_: + | ELSE EOL+ + expressions = line_statement* + { expressions } + | { [] } + + +elif_else_body: + | elifs = elif* + else_ = else_ + { Some (elifs, else_) } + + +%inline line_sep: + | EOL+ + | AMPERSAND+ EOL* + {} diff --git a/lib/qparser/qsp_expression.mly b/lib/qparser/qsp_expression.mly new file mode 100644 index 0000000..06cfadd --- /dev/null +++ b/lib/qparser/qsp_expression.mly @@ -0,0 +1,86 @@ +(* %start <(Elements.pos) Elements.exppression>expression *) + +%% + +%public arguments(X): + (** This rule allow the difference between elements with a single argument + (when the separator is required) which allow to write a spectif case when + we only have one element. + *) + | hd = X + COMA + tl = separated_nonempty_list(COMA, X) + { hd :: tl } + (** The case where we have nothing is easy to manage here *) + | + { [] } + (** The remaining case (only one argument) is handled outside of this + block: if we have a single argument we don’t have to bother with the + paren, they belongs to the expression. + *) + +%inline argument(X): + | a = delimited(L_PAREN, arguments(X), R_PAREN) { a } + | a = X { [ a ] } + +(** Declare an expression *) +%public expression: + | ex = delimited(L_PAREN, expression, R_PAREN) + { ex } + | op = unary_operator + expr = expression + { Analyzer.Expression.uoperator $loc op expr } + %prec NO + | + expr1 = expression + op = binary_operator + expr2 = expression + { Analyzer.Expression.boperator $loc op expr1 expr2 } + | v = LITERAL { Analyzer.Expression.literal $loc v } + | i = INTEGER { Analyzer.Expression.integer $loc i } + | v = variable { Analyzer.Expression.ident v } + %prec p_variable + | k = FUNCTION + arg = argument(expression) + { + (Analyzer.Expression.function_ $loc k arg) + } + +unary_operator: + | OBJ + | NO { T.No } + | MINUS { T.Neg } + | PLUS { T.Add } + +%inline binary_operator: + | EQUAL { T.Eq } + | LT GT { T.Neq } + | EXCLAMATION { T.Neq } + | PLUS { T.Plus } + | MINUS { T.Minus } + | STAR { T.Product } + | DIV { T.Div } + | MOD { T.Mod } + | GT { T.Gt } + | LT { T.Lt } + | AND { T.And } + | GT EQUAL { T.Gte } + | LT EQUAL { T.Lte } + | EQUAL GT { T.Gte } + | EQUAL LT { T.Lte } + | OR { T.Or } + +(** Declare a variable, either in the assignation (let var = …) or as a + reference is an expression + *) +%public variable: + | name = IDENT + brackets = delimited(L_BRACKET, expression?, R_BRACKET)? + { + let index = match brackets with + | None -> + (* No declaration, consider index at 0 *) + None + | Some other -> other in + Analyzer.Expression.{ pos = $loc ; name ; index } + } diff --git a/lib/qparser/qsp_instruction.mly b/lib/qparser/qsp_instruction.mly new file mode 100644 index 0000000..564e154 --- /dev/null +++ b/lib/qparser/qsp_instruction.mly @@ -0,0 +1,104 @@ +%% + +optionnal_delimited(opening, X, closing): + | v = delimited(opening, X, closing) { v } + | v = X { v } + +argument(X): + | a = optionnal_delimited(L_PAREN, arguments(X), R_PAREN) { a } + | a = X { [ a ] } + +(** At the opposite of an expression, an instruction does not return anything. *) +%public instruction: + | s = single_instruction { s } + +(** Action like act or if in a single line *) +%public inline_action: + | a = onliner(ACT) + { let loc, label, statements, _, _ = a in + Analyzer.Instruction.act loc ~label statements + } + | a = onliner(IF) + else_opt = preceded(ELSE, instruction)? + { let loc, expression, statements, loc_s, _body = a in + let elifs = [] + and else_ = Option.to_list else_opt in + Analyzer.Instruction.if_ + loc + (loc_s, expression, statements) + ~elifs + ~else_ + } + | a = onliner(IF) + else_= preceded(ELSE, inline_action) + { let loc, expression, statements, loc_s, _body = a in + let elifs = [] + and else_ = [ else_ ] in + Analyzer.Instruction.if_ + loc + (loc_s, expression, statements) + ~elifs + ~else_ + } +single_instruction: + | expr = expression + { + Analyzer.Instruction.expression expr + } + | e = let_assignation { e } + | k = keyword + arg = argument(expression) + { + Analyzer.Instruction.call $loc k arg + } + +keyword: + | STAR k = KEYWORD { "*" ^ k } + | k = KEYWORD { k } + +let_assignation: + | assignation + variable = variable + op = assignation_operator + value = expression + { + Analyzer.Instruction.assign $loc variable op value + } + +%inline assignation: + | + | LET + | SET {} + +assignation_operator: + | EQUAL { T.Eq' } + | INCR { T.Inc } + | DECR { T.Decr } + | MULT_EQUAL { T.Mult } + +inline_instruction: + | hd = inline_instruction + tl = single_instruction + AMPERSAND+ + { tl :: hd } + | + { [] } + +final_inline_instruction: + | hd = inline_instruction + tl = instruction + | hd = inline_instruction + tl = inline_action + { tl :: hd } + | hd = inline_instruction + COMMENT + { (Analyzer.Instruction.comment $loc) :: hd } + | hd = inline_instruction + { hd } + +onliner(TOKEN): + | TOKEN + e = expression + COLUMN + s = rev (final_inline_instruction) + { $loc, e, s, $loc(s), None } diff --git a/lib/qparser/tokens.mly b/lib/qparser/tokens.mly new file mode 100644 index 0000000..9ac4b10 --- /dev/null +++ b/lib/qparser/tokens.mly @@ -0,0 +1,73 @@ +%token <string>LOCATION_START +%token LOCATION_END + +%token PLUS +%token MINUS +%token INCR DECR +%token MULT_EQUAL +%token STAR +%token DIV +%token MOD + +%token AMPERSAND +%token COMA +%token EQUAL +%token COLUMN +%token L_BRACKET R_BRACKET +%token L_PAREN R_PAREN +%token LT GT +%token EXCLAMATION +%token AND OR + +%token EOF +%token EOL + +%token <string>IDENT +%token <string>LITERAL +%token <string>INTEGER + +%token COMMENT + +%token ACT +%token IF +%token ELSE +%token ELIF +%token END +%token LET +%token SET +%token OBJ +%token LOC +%token NO +%token <string>KEYWORD +%token <Qsp_syntax.T.function_>FUNCTION + +(* +(b) if the token was declared left-associative, then the conflict is resolved +in favor of reduction; + +(c) if the token was declared right-associative, then the conflict is resolved +in favor of shifting. + *) + +(* Exclamation should have the lower priority because the comments shall never + take place of the statements + *) +%right NO +(* The priority for the variable should be lower than the equality priority + if I want to allow declare new variables *) +%left p_variable +%left OR +%left AND +%left EQUAL +%left GT LT +%left EXCLAMATION +%left PLUS MINUS +%left STAR DIV +%left MOD +%left FUNCTION +%left L_PAREN +%right R_PAREN +%left COMA +%left KEYWORD + +%% |