From 97ab5c9a21166f0bffee482210d69877fd6809fa Mon Sep 17 00:00:00 2001 From: Chimrod <> Date: Fri, 6 Oct 2023 08:35:56 +0200 Subject: Moved qparser and syntax in the library folder --- lib/analyzer.ml | 42 ---- lib/analyzer.mli | 8 - lib/dune | 29 --- lib/explain.sh | 5 - lib/expression_parser.messages | 304 -------------------------- lib/generate_errors.sh | 8 - lib/grammar.txt | 100 --------- lib/idents.ml | 179 --------------- lib/interpreter.ml | 67 ------ lib/lexbuf.ml | 61 ------ lib/lexbuf.mli | 25 --- lib/lexer.ml | 212 ------------------ lib/lexer.mli | 4 - lib/parser.mly | 89 -------- lib/qparser/analyzer.ml | 42 ++++ lib/qparser/analyzer.mli | 8 + lib/qparser/dune | 29 +++ lib/qparser/explain.sh | 5 + lib/qparser/expression_parser.messages | 304 ++++++++++++++++++++++++++ lib/qparser/generate_errors.sh | 8 + lib/qparser/grammar.txt | 100 +++++++++ lib/qparser/idents.ml | 179 +++++++++++++++ lib/qparser/interpreter.ml | 67 ++++++ lib/qparser/lexbuf.ml | 61 ++++++ lib/qparser/lexbuf.mli | 25 +++ lib/qparser/lexer.ml | 212 ++++++++++++++++++ lib/qparser/lexer.mli | 4 + lib/qparser/parser.mly | 89 ++++++++ lib/qparser/qsp_expression.mly | 86 ++++++++ lib/qparser/qsp_instruction.mly | 104 +++++++++ lib/qparser/tokens.mly | 73 +++++++ lib/qsp_expression.mly | 86 -------- lib/qsp_instruction.mly | 104 --------- lib/syntax/S.ml | 91 ++++++++ lib/syntax/dune | 6 + lib/syntax/report.ml | 40 ++++ lib/syntax/t.ml | 78 +++++++ lib/syntax/tree.ml | 95 ++++++++ lib/syntax/tree.mli | 51 +++++ lib/syntax/type_of.ml | 385 +++++++++++++++++++++++++++++++++ lib/tokens.mly | 73 ------- 41 files changed, 2142 insertions(+), 1396 deletions(-) delete mode 100644 lib/analyzer.ml delete mode 100644 lib/analyzer.mli delete mode 100644 lib/dune delete mode 100755 lib/explain.sh delete mode 100644 lib/expression_parser.messages delete mode 100755 lib/generate_errors.sh delete mode 100644 lib/grammar.txt delete mode 100644 lib/idents.ml delete mode 100644 lib/interpreter.ml delete mode 100644 lib/lexbuf.ml delete mode 100644 lib/lexbuf.mli delete mode 100644 lib/lexer.ml delete mode 100644 lib/lexer.mli delete mode 100644 lib/parser.mly create mode 100644 lib/qparser/analyzer.ml create mode 100644 lib/qparser/analyzer.mli create mode 100644 lib/qparser/dune create mode 100755 lib/qparser/explain.sh create mode 100644 lib/qparser/expression_parser.messages create mode 100755 lib/qparser/generate_errors.sh create mode 100644 lib/qparser/grammar.txt create mode 100644 lib/qparser/idents.ml create mode 100644 lib/qparser/interpreter.ml create mode 100644 lib/qparser/lexbuf.ml create mode 100644 lib/qparser/lexbuf.mli create mode 100644 lib/qparser/lexer.ml create mode 100644 lib/qparser/lexer.mli create mode 100644 lib/qparser/parser.mly create mode 100644 lib/qparser/qsp_expression.mly create mode 100644 lib/qparser/qsp_instruction.mly create mode 100644 lib/qparser/tokens.mly delete mode 100644 lib/qsp_expression.mly delete mode 100644 lib/qsp_instruction.mly create mode 100644 lib/syntax/S.ml create mode 100644 lib/syntax/dune create mode 100644 lib/syntax/report.ml create mode 100644 lib/syntax/t.ml create mode 100644 lib/syntax/tree.ml create mode 100644 lib/syntax/tree.mli create mode 100644 lib/syntax/type_of.ml delete mode 100644 lib/tokens.mly (limited to 'lib') diff --git a/lib/analyzer.ml b/lib/analyzer.ml deleted file mode 100644 index da1adbf..0000000 --- a/lib/analyzer.ml +++ /dev/null @@ -1,42 +0,0 @@ -(** - Run the QSP parser and apply the analyzer over it. - - See [syntax/S] - *) -let parse : - (module Qsp_syntax.S.Analyzer with type Location.repr = 'a) -> - Lexbuf.t -> - ('a, Qsp_syntax.Report.t) Result.t = - fun (type a) (module S : Qsp_syntax.S.Analyzer with type Location.repr = a) -> - let module Parser = Parser.Make (S) in - let module IncrementalParser = - Interpreter.Interpreter (Parser.MenhirInterpreter) in - fun l -> - let lexer = Lexbuf.tokenize Lexer.token l in - - let init = Parser.Incremental.main (fst (Lexbuf.positions l)) in - - IncrementalParser.of_lexbuf lexer l init - |> Result.map_error (fun e -> - let message = - match e.IncrementalParser.code with - | Interpreter.InvalidSyntax -> "Invalid Syntax" - | Interpreter.UnrecoverableError -> "UnrecoverableError" - | Interpreter.MenhirCode c -> - let message_content = - try Parser_messages.message c - with Not_found -> - String.concat "" [ "(Error code "; string_of_int c; ")" ] - in - - String.concat "" [ String.trim @@ message_content ] - in - let report = - Qsp_syntax.Report.error (e.start_pos, e.end_pos) message - in - - (* Discard the remaining file to read. The parser is now in a blank - state, it does not make sense to keep feeding it with the new - tokens. *) - Lexer.discard l; - report) diff --git a/lib/analyzer.mli b/lib/analyzer.mli deleted file mode 100644 index 30b6625..0000000 --- a/lib/analyzer.mli +++ /dev/null @@ -1,8 +0,0 @@ -val parse : - (module Qsp_syntax.S.Analyzer with type Location.repr = 'a) -> - Lexbuf.t -> - ('a, Qsp_syntax.Report.t) Result.t -(** Read the source and build a analyzis over it. - -This method make the link between the source file and how to read it -(encoding…) and the AST we want to build. *) diff --git a/lib/dune b/lib/dune deleted file mode 100644 index f62c90e..0000000 --- a/lib/dune +++ /dev/null @@ -1,29 +0,0 @@ -(library - (name qparser) - (libraries - str - menhirLib - qsp_syntax - ) - (preprocess (pps - sedlex.ppx - )) - ) - - -(rule - (targets parser_messages.ml) - (deps expression_parser.messages tokens.mly qsp_expression.mly qsp_instruction.mly parser.mly) - (action (with-stdout-to %{targets} (run menhir --base parser.mly --compile-errors %{deps})))) - -(menhir - (modules tokens) - (flags --only-tokens) -) - - -(menhir - (modules tokens parser qsp_instruction qsp_expression) - (flags --table --external-tokens Tokens) - (merge_into parser) -) diff --git a/lib/explain.sh b/lib/explain.sh deleted file mode 100755 index 609d208..0000000 --- a/lib/explain.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -menhir --explain tokens.mly qsp_expression.mly --base qsp_expression.mly -menhir --explain tokens.mly qsp_expression.mly qsp_instruction.mly --base qsp_instruction.mly -menhir --explain tokens.mly qsp_expression.mly qsp_instruction.mly parser.mly --base parser.mly diff --git a/lib/expression_parser.messages b/lib/expression_parser.messages deleted file mode 100644 index a493067..0000000 --- a/lib/expression_parser.messages +++ /dev/null @@ -1,304 +0,0 @@ -main: LOCATION_START EOL IDENT SET -## -## Ends in an error in state: 13. -## -## variable -> IDENT . option(delimited(L_BRACKET,option(expression),R_BRACKET)) [ STAR R_PAREN R_BRACKET PLUS OR MULT_EQUAL MOD MINUS LT INCR GT EXCLAMATION EQUAL EOL ELSE DIV DECR COMA COLUMN AND AMPERSAND ] -## -## The known suffix of the stack is as follows: -## IDENT -## - -Unexpected expression here. - -main: LOCATION_START EOL L_PAREN INTEGER SET -## -## Ends in an error in state: 69. -## -## expression -> L_PAREN expression . R_PAREN [ STAR R_PAREN R_BRACKET PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV COMA COLUMN AND AMPERSAND ] -## expression -> expression . EQUAL expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] -## expression -> expression . LT GT expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] -## expression -> expression . EXCLAMATION expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] -## expression -> expression . PLUS expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] -## expression -> expression . MINUS expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] -## expression -> expression . STAR expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] -## expression -> expression . DIV expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] -## expression -> expression . MOD expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] -## expression -> expression . GT expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] -## expression -> expression . LT expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] -## expression -> expression . AND expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] -## expression -> expression . GT EQUAL expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] -## expression -> expression . LT EQUAL expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] -## expression -> expression . EQUAL GT expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] -## expression -> expression . EQUAL LT expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] -## expression -> expression . OR expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] -## -## The known suffix of the stack is as follows: -## L_PAREN expression -## - -Unexpected '('. Did you forgot a function before ? - -main: LOCATION_START EOL IF INTEGER SET -## -## Ends in an error in state: 87. -## -## expression -> expression . EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . LT GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . EXCLAMATION expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . PLUS expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . MINUS expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . STAR expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . DIV expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . MOD expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . LT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . AND expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . GT EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . LT EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . EQUAL GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . EQUAL LT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . OR expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## line_statement -> IF expression . COLUMN nonempty_list(EOL) list(line_statement) elif_else_body END option(IF) nonempty_list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] -## line_statement -> IF expression . COLUMN nonempty_list(EOL) list(line_statement) elif_else_body END option(IF) nonempty_list(AMPERSAND) list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] -## onliner(IF) -> IF expression . COLUMN final_inline_instruction [ EOL ELSE AMPERSAND ] -## -## The known suffix of the stack is as follows: -## IF expression -## - -The `IF` expression does not end properly. A `:` is expected before any instruction. - -main: LOCATION_START EOL COLUMN STAR -## -## Ends in an error in state: 92. -## -## line_statement -> COLUMN . IDENT list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] -## -## The known suffix of the stack is as follows: -## COLUMN -## - -A location is expected after ':' not an expression - -main: LOCATION_START EOL ACT INTEGER SET - -Invalid `ACT` label. You probably missed a ':' - -main: LOCATION_START EOL ACT IDENT COLUMN EOL LOCATION_END -## -## Ends in an error in state: 100. -## -## line_statement -> ACT expression COLUMN nonempty_list(EOL) . list(line_statement) empty_body END option(ACT) nonempty_list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] -## line_statement -> ACT expression COLUMN nonempty_list(EOL) . list(line_statement) empty_body END option(ACT) nonempty_list(AMPERSAND) list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] -## -## The known suffix of the stack is as follows: -## ACT expression COLUMN nonempty_list(EOL) -## -## WARNING: This example involves spurious reductions. -## This implies that, although the LR(1) items shown above provide an -## accurate view of the past (what has been recognized so far), they -## may provide an INCOMPLETE view of the future (what was expected next). -## In state 7, spurious reduction of production nonempty_list(EOL) -> EOL -## - -A block starting with `ACT` is not closed by `END` -If there are nested blocks, the error will points the highest block. - -main: LOCATION_START EOL IF IDENT COLUMN ELSE R_PAREN -## -## Ends in an error in state: 106. -## -## inline_action -> onliner(IF) ELSE . inline_action [ EOL ELSE AMPERSAND ] -## option(preceded(ELSE,instruction)) -> ELSE . instruction [ EOL ELSE AMPERSAND ] -## -## The known suffix of the stack is as follows: -## onliner(IF) ELSE -## - -Too manies instructions on a single line. - -main: LOCATION_START EOL IF IDENT COLUMN EOL IDENT AMPERSAND LOCATION_END -## -## Ends in an error in state: 155. -## -## line_statement -> IF expression COLUMN nonempty_list(EOL) list(line_statement) . elif_else_body END option(IF) nonempty_list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] -## line_statement -> IF expression COLUMN nonempty_list(EOL) list(line_statement) . elif_else_body END option(IF) nonempty_list(AMPERSAND) list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] -## -## The known suffix of the stack is as follows: -## IF expression COLUMN nonempty_list(EOL) list(line_statement) -## -## WARNING: This example involves spurious reductions. -## This implies that, although the LR(1) items shown above provide an -## accurate view of the past (what has been recognized so far), they -## may provide an INCOMPLETE view of the future (what was expected next). -## In state 117, spurious reduction of production nonempty_list(AMPERSAND) -> AMPERSAND -## In state 149, spurious reduction of production list(EOL) -> -## In state 150, spurious reduction of production line_statement -> instruction nonempty_list(AMPERSAND) list(EOL) -## In state 145, spurious reduction of production list(line_statement) -> -## In state 146, spurious reduction of production list(line_statement) -> line_statement list(line_statement) -## - -Unclosed `IF` block. - -main: LOCATION_START EOL IF IDENT COLUMN EOL ELSE EOL LOCATION_END -## -## Ends in an error in state: 163. -## -## else_ -> ELSE nonempty_list(EOL) . list(line_statement) [ END ] -## -## The known suffix of the stack is as follows: -## ELSE nonempty_list(EOL) -## -## WARNING: This example involves spurious reductions. -## This implies that, although the LR(1) items shown above provide an -## accurate view of the past (what has been recognized so far), they -## may provide an INCOMPLETE view of the future (what was expected next). -## In state 7, spurious reduction of production nonempty_list(EOL) -> EOL -## - -A block starting with `IF` is not closed by `END` -If there are nested blocks, the error will points the highest block. - -main: LOCATION_START EOL IDENT AMPERSAND END -## -## Ends in an error in state: 175. -## -## main -> list(before_location) LOCATION_START nonempty_list(EOL) list(line_statement) . LOCATION_END [ # ] -## -## The known suffix of the stack is as follows: -## list(before_location) LOCATION_START nonempty_list(EOL) list(line_statement) -## -## WARNING: This example involves spurious reductions. -## This implies that, although the LR(1) items shown above provide an -## accurate view of the past (what has been recognized so far), they -## may provide an INCOMPLETE view of the future (what was expected next). -## In state 117, spurious reduction of production nonempty_list(AMPERSAND) -> AMPERSAND -## In state 149, spurious reduction of production list(EOL) -> -## In state 150, spurious reduction of production line_statement -> instruction nonempty_list(AMPERSAND) list(EOL) -## In state 145, spurious reduction of production list(line_statement) -> -## In state 146, spurious reduction of production list(line_statement) -> line_statement list(line_statement) -## - -Unexpected `END`. -Maybe you added an `END` after an inline `ACT` or `IF` ? - -main: STAR -## -## Ends in an error in state: 0. -## -## main' -> . main [ # ] -## -## The known suffix of the stack is as follows: -## -## - -Missing location name - -main: LOCATION_START EOL INTEGER SET -## -## Ends in an error in state: 132. -## -## expression -> expression . EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] -## expression -> expression . LT GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] -## expression -> expression . EXCLAMATION expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] -## expression -> expression . PLUS expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] -## expression -> expression . MINUS expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] -## expression -> expression . STAR expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] -## expression -> expression . DIV expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] -## expression -> expression . MOD expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] -## expression -> expression . GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] -## expression -> expression . LT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] -## expression -> expression . AND expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] -## expression -> expression . GT EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] -## expression -> expression . LT EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] -## expression -> expression . EQUAL GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] -## expression -> expression . EQUAL LT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] -## expression -> expression . OR expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] -## single_instruction -> expression . [ EOL ELSE AMPERSAND ] -## -## The known suffix of the stack is as follows: -## expression -## - -Unexpected expression here. - -main: LOCATION_START EOL IF IDENT COLUMN EOL ELIF INTEGER SET -## -## Ends in an error in state: 157. -## -## elif -> ELIF expression . COLUMN nonempty_list(EOL) list(line_statement) [ END ELSE ELIF ] -## expression -> expression . EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . LT GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . EXCLAMATION expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . PLUS expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . MINUS expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . STAR expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . DIV expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . MOD expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . LT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . AND expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . GT EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . LT EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . EQUAL GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . EQUAL LT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## expression -> expression . OR expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] -## -## The known suffix of the stack is as follows: -## ELIF expression -## - -The `ELIF` expression does not end properly. A `:` is expected before any instruction. - -main: LOCATION_START EOL IF IDENT COLUMN EOL ELIF IDENT COLUMN EOL IDENT AMPERSAND LOCATION_END -## -## Ends in an error in state: 173. -## -## list(elif) -> elif . list(elif) [ END ELSE ] -## -## The known suffix of the stack is as follows: -## elif -## -## WARNING: This example involves spurious reductions. -## This implies that, although the LR(1) items shown above provide an -## accurate view of the past (what has been recognized so far), they -## may provide an INCOMPLETE view of the future (what was expected next). -## In state 117, spurious reduction of production nonempty_list(AMPERSAND) -> AMPERSAND -## In state 149, spurious reduction of production list(EOL) -> -## In state 150, spurious reduction of production line_statement -> instruction nonempty_list(AMPERSAND) list(EOL) -## In state 145, spurious reduction of production list(line_statement) -> -## In state 146, spurious reduction of production list(line_statement) -> line_statement list(line_statement) -## In state 160, spurious reduction of production elif -> ELIF expression COLUMN nonempty_list(EOL) list(line_statement) -## - -Unclosed `ELIF` block. - -main: EOL STAR -## -## Ends in an error in state: 177. -## -## list(before_location) -> before_location . list(before_location) [ LOCATION_START ] -## -## The known suffix of the stack is as follows: -## before_location -## - -The context does not allow an instruction here. - -main: LOCATION_START EOL IF IDENT COLUMN EOL ELSE STAR -## -## Ends in an error in state: 162. -## -## else_ -> ELSE . nonempty_list(EOL) list(line_statement) [ END ] -## -## The known suffix of the stack is as follows: -## ELSE -## - -Unexpected operator after `ELSE` - -main: LOCATION_START EOL IDENT DECR INTEGER SET -main: LOCATION_START EOL SET IDENT DECR INTEGER SET -main: LOCATION_START EOL LET IDENT DECR INTEGER SET - -Missing separator between instructions diff --git a/lib/generate_errors.sh b/lib/generate_errors.sh deleted file mode 100755 index 3cff769..0000000 --- a/lib/generate_errors.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh - -# Generate the error file with the helper messages. This script is required -# when the syntax is modified because menhir will change the state number and -# we need to make it match with the comment in the messages. - -menhir --list-errors *.mly --base parser.mly > all_errors -menhir --merge-errors expression_parser.messages --merge-errors all_errors *.mly --base parser.mly > expression_parser.messages_new diff --git a/lib/grammar.txt b/lib/grammar.txt deleted file mode 100644 index d7208ef..0000000 --- a/lib/grammar.txt +++ /dev/null @@ -1,100 +0,0 @@ -location: - | LOCATION_START EOL* - line_statement* - LOCATION_END EOL* - EOF - -line_statement: - | COMMENT EOL+ // A comment - | COLUMN IDENT EOL* // A location - | instruction, line_sep - | inline_action line_sep - | ACT expression COLUMN EOL+ // ACT … END ACT - line_statement* - END ACT? // Yes you can have END or END ACT - line_sep - | IF expression COLUMN EOL+ // IF … END IF - line_statement* - elif* - else - END IF? // Yes you can have END or END IF - line_sep - -elif: - | ELIF - expression COLUMN EOL+ - line_statement* - -else: - | ELSE EOL+ - line_statement* - | - -line_sep: - | EOL+ - | AMPERSAND+ EOL* - - -instruction: - | expression - | let_assignation - | keyword argument(expression) - -keyword: - | STAR KEYWORD // A keyword starting with * - | KEYWORD - -let_assignation: - | assignation - variable - assignation_operator - expression - -assignation: - | - | LET - | SET - -assignation_operator: - | EQUAL - | INCR // += - | DECR // -= - -inline_action: - | ACT expression COLUMN // There is a recursive code here - | IF expression COLUMN // Because ACT: can contains an IF: etc - (ELSE, instruction)? // complicated to flatten here. - -expression: - | delimited(l_paren, expression, r_paren) - | unary_operator expression - | expression binary_operator expression - | literal - | integer - | variable - | function argument(expression) - -unary_operator: - | OBJ - | LOC - | NO - | MINUS - | PLUS - -binary_operator: - | EQUAL - | LT GT // Different - | EXCLAMATION // Neg, not a comment here - | PLUS - | MINUS - | STAR // Not the first char of keyword here - | DIV - | MOD - | GT - | LT - | GT EQUAL - | LT EQUAL - | EQUAL GT // Alternative syntax - | EQUAL LT // Alternative syntax - | AND - | OR diff --git a/lib/idents.ml b/lib/idents.ml deleted file mode 100644 index baf23dc..0000000 --- a/lib/idents.ml +++ /dev/null @@ -1,179 +0,0 @@ -open Tokens -module T = Qsp_syntax.T - -let keyword_table = Hashtbl.create 53 - -let _ = - List.iter - (fun (kwd, tok) -> Hashtbl.add keyword_table kwd tok) - [ - ("ACT", ACT); - ("ADDLIB", KEYWORD "INCLIB"); - ("ADDOBJ", KEYWORD "ADDOBJ"); - ("ADD OBJ", KEYWORD "ADDOBJ"); - ("ADDQST", KEYWORD "INCLIB"); - ("AND", AND); - ("ARRCOMP", FUNCTION T.Arrcomp); - ("ARRPOS", FUNCTION T.Arrpos); - ("ARRSIZE", FUNCTION T.Arrsize) - (* - ; "BACKIMAGE", KEYWORD "BACKIMAGE" - ; "$BACKIMAGE", KEYWORD "BACKIMAGE" - ; "BCOLOR", KEYWORD "BCOLOR" *); - ("CLA", KEYWORD "CLA"); - ("CLEAR", KEYWORD "CLEAR"); - ("*CLEAR", KEYWORD "*CLEAR"); - ("CLOSE", KEYWORD "CLOSE"); - ("CLOSE ALL", KEYWORD "CLOSEALL"); - ("CLR", KEYWORD "CLEAR"); - ("*CLR", KEYWORD "*CLEAR"); - ("CLS", KEYWORD "CLS"); - ("CMDCLEAR", KEYWORD "CMDCLEAR"); - ("CMDCLR", KEYWORD "CMDCLEAR"); - ("COPYARR", KEYWORD "COPYARR"); - ("COUNTOBJ", FUNCTION T.Countobj); - ("CURACTS", IDENT "CURACTS"); - ("$CURACTS", IDENT "$CURACTS"); - ("CURLOC", IDENT "CURLOC"); - ("$CURLOC", IDENT "$CURLOC") - (* - ; "DEBUG", KEYWORD "DEBUG" -*); - ("DELACT", KEYWORD "DELACT"); - ("DEL ACT", KEYWORD "DELACT"); - ("DELLIB", KEYWORD "FREELIB"); - ("DELOBJ", KEYWORD "DELOBJ"); - ("DEL OBJ", KEYWORD "DELOBJ"); - ("DESC", FUNCTION T.Desc); - ("$DESC", FUNCTION T.Desc') - (* - ; "DISABLESCROLL", KEYWORD "DISABLESCROLL" - ; "DISABLESUBEX", KEYWORD "DISABLESUBEX" -*); - ("DYNAMIC", KEYWORD "DYNAMIC"); - ("DYNEVAL", FUNCTION T.Dyneval); - ("$DYNEVAL", FUNCTION T.Dyneval'); - ("ELSE", ELSE); - ("ELSEIF", ELIF); - ("END", END); - ("EXEC", KEYWORD "EXEC"); - ("EXIT", KEYWORD "EXIT") - (* - ; "FCOLOR", KEYWORD "FCOLOR" - ; "$FNAME", KEYWORD "$FNAME" -*); - ("FREELIB", KEYWORD "FREELIB") - (* - ; "FSIZE", KEYWORD "FSIZE" -*); - ("FUNC", FUNCTION T.Func); - ("$FUNC", FUNCTION T.Func'); - ("GETOBJ", FUNCTION T.Getobj); - ("$GETOBJ", FUNCTION T.Getobj'); - ("GOSUB", KEYWORD "GOSUB"); - ("GOTO", KEYWORD "GOTO") (* - ; "GC", KEYWORD "GC" -*); - ("GS", KEYWORD "GOSUB"); - ("GT", KEYWORD "GOTO"); - ("IF", IF); - ("IIF", FUNCTION T.Iif); - ("$IIF", FUNCTION T.Iif'); - ("INCLIB", KEYWORD "INCLIB"); - ("INPUT", FUNCTION T.Input); - ("$INPUT", FUNCTION T.Input'); - ("INSTR", FUNCTION T.Instr); - ("ISNUM", FUNCTION T.Isnum); - ("ISPLAY", FUNCTION T.Isplay); - ("JUMP", KEYWORD "JUMP"); - ("KILLALL", KEYWORD "KILLALL"); - ("KILLOBJ", KEYWORD "KILLOBJ"); - ("KILLQST", KEYWORD "FREELIB"); - ("KILLVAR", KEYWORD "KILLVAR"); - ("LCASE", FUNCTION T.Lcase); - ("$LCASE", FUNCTION T.Lcase') - (* - ; "LCOLOR", KEYWORD "LCOLOR" -*); - ("LEN", FUNCTION T.Len); - ("LET", LET); - ("LOC", FUNCTION T.Loc); - ("MAINTXT", IDENT "MAINTXT"); - ("$MAINTXT", IDENT "MAINTXT"); - ("MAX", FUNCTION T.Max); - ("$MAX", FUNCTION T.Max'); - ("MENU", KEYWORD "MENU"); - ("MID", FUNCTION T.Mid); - ("$MID", FUNCTION T.Mid'); - ("MIN", FUNCTION T.Min); - ("$MIN", FUNCTION T.Min'); - ("MOD", MOD); - ("MSECSCOUNT", FUNCTION T.Msecscount); - ("MSG", KEYWORD "MSG"); - ("NL", KEYWORD "NL"); - ("*NL", KEYWORD "*NL"); - ("NO", NO) (* - ; "NOSAVE", KEYWORD "NOSAVE" -*); - ("OBJ", OBJ); - ("$ONACTSEL", IDENT "$ONACTSEL"); - ("$ONGLOAD", IDENT "$ONGLOAD"); - ("$ONGSAVE", IDENT "$ONGSAVE"); - ("$ONNEWLOC", IDENT "$ONNEWLOC"); - ("$ONOBJADD", IDENT "$ONOBJADD"); - ("$ONOBJDEL", IDENT "$ONOBJDEL"); - ("$ONOBJSEL", IDENT "$ONOBJSEL"); - ("OPENGAME", KEYWORD "OPENGAME"); - ("OPENQST", KEYWORD "OPENQST"); - ("OR", OR); - ("P", KEYWORD "P"); - ("*P", KEYWORD "*P"); - ("PL", KEYWORD "PL"); - ("*PL", KEYWORD "*PL"); - ("PLAY", KEYWORD "PLAY"); - ("QSPVER", FUNCTION T.Qspver); - ("$QSPVER", FUNCTION T.Qspver'); - ("RAND", FUNCTION T.Rand); - ("REFINT", KEYWORD "REFINT"); - ("REPLACE", FUNCTION T.Replace); - ("$REPLACE", FUNCTION T.Replace'); - ("RGB", FUNCTION T.Rgb); - ("RND", FUNCTION T.Rnd); - ("SAVEGAME", KEYWORD "SAVEGAME"); - ("SELACT", FUNCTION T.Selact); - ("$SELACT", IDENT "$SELACT"); - ("SELOBJ", IDENT "SELOBJ"); - ("$SELOBJ", IDENT "$SELOBJ"); - ("SET", SET); - ("SETTIMER", KEYWORD "SETTIMER"); - ("SHOWACTS", KEYWORD "SHOWACTS"); - ("SHOWINPUT", KEYWORD "SHOWINPUT"); - ("SHOWOBJS", KEYWORD "SHOWOBJS"); - ("SHOWSTAT", KEYWORD "SHOWSTAT"); - ("STATTXT", FUNCTION T.Stattxt); - ("$STATTXT", FUNCTION T.Stattxt'); - ("STR", FUNCTION T.Str); - ("$STR", FUNCTION T.Str'); - ("STRCOMP", FUNCTION T.Strcomp); - ("STRFIND", FUNCTION T.Strfind); - ("$STRFIND", FUNCTION T.Strfind'); - ("STRPOS", FUNCTION T.Strpos); - ("TRIM", FUNCTION T.Trim); - ("$TRIM", FUNCTION T.Trim'); - ("UCASE", FUNCTION T.Ucase); - ("$UCASE", FUNCTION T.Ucase'); - ("UNSEL", KEYWORD "UNSELECT"); - ("UNSELECT", KEYWORD "UNSELECT"); - ("USEHTML", IDENT "USEHTML"); - ("USERCOM", IDENT "USERCOM"); - ("$USERCOM", IDENT "$USERCOM"); - ("USER_TEXT", IDENT "USER_TEXT"); - ("$USER_TEXT", IDENT "USER_TEXT"); - ("USRTXT", IDENT "USER_TEXT"); - ("$USRTXT", IDENT "USER_TEXT"); - ("VAL", FUNCTION T.Val); - ("VIEW", KEYWORD "VIEW"); - ("WAIT", KEYWORD "WAIT"); - ("XGOTO", KEYWORD "XGOTO"); - ("XGT", KEYWORD "XGOTO"); - ] diff --git a/lib/interpreter.ml b/lib/interpreter.ml deleted file mode 100644 index b719600..0000000 --- a/lib/interpreter.ml +++ /dev/null @@ -1,67 +0,0 @@ -(** - This module provide a way to build the syntax parser with the menhir - incremental engine. This feature allow to see the state of the parser, and - get detailed error message but is not intended to be used directly. - - Refer to the menhir manual in order to see the values. - - The interresting function here is [of_lexbuf] which return the error code in - case of invalid syntax. - *) - -type error_code = UnrecoverableError | InvalidSyntax | MenhirCode of int - -module Interpreter (MI : MenhirLib.IncrementalEngine.INCREMENTAL_ENGINE) = -struct - type error = { - code : error_code; - start_pos : Lexing.position; - end_pos : Lexing.position; - } - - module E = MenhirLib.ErrorReports - module L = MenhirLib.LexerUtil - - type step = MI.token * Lexing.position * Lexing.position - - let range_message (start_pos, end_pos) : error_code -> error = - fun code -> { code; start_pos; end_pos } - - let get_parse_error : Lexbuf.t -> 'a MI.env -> error = - fun buffer env -> - match MI.stack env with - | (lazy Nil) -> - (* The parser is in its initial state. We should not get an - error here *) - let positions = Lexbuf.positions buffer in - - range_message positions UnrecoverableError - | (lazy (Cons (MI.Element (state, _, start_pos, end_pos), _))) -> - range_message (start_pos, end_pos) (MenhirCode (MI.number state)) - - let rec _parse : - Lexbuf.t -> (unit -> step) -> 'a MI.checkpoint -> ('a, error) Result.t = - fun buffer get_token (checkpoint : 'a MI.checkpoint) -> - match checkpoint with - | MI.InputNeeded _env -> - let token, startp, endp = get_token () in - let checkpoint = MI.offer checkpoint (token, startp, endp) in - _parse buffer get_token checkpoint - | MI.Shifting _ | MI.AboutToReduce _ -> - let checkpoint = MI.resume checkpoint in - _parse buffer get_token checkpoint - | MI.HandlingError _env -> - let err = get_parse_error buffer _env in - Error err - | MI.Accepted v -> Ok v - | MI.Rejected -> - let positions = Lexbuf.positions buffer in - let err = range_message positions InvalidSyntax in - Error err - - type 'a builder = Lexing.position -> 'a MI.checkpoint - - let of_lexbuf : - (unit -> step) -> Lexbuf.t -> 'a MI.checkpoint -> ('a, error) result = - fun lexer buffer init -> _parse buffer lexer init -end diff --git a/lib/lexbuf.ml b/lib/lexbuf.ml deleted file mode 100644 index 3f0b186..0000000 --- a/lib/lexbuf.ml +++ /dev/null @@ -1,61 +0,0 @@ -type t = { - buffer : Sedlexing.lexbuf; - mutable start_p : Lexing.position option; - mutable expression_level : int; - reset_line : bool; -} - -let buffer : t -> Sedlexing.lexbuf = fun t -> t.buffer - -let start : t -> unit = - fun t -> - let _start_pos, end_pos = Sedlexing.lexing_positions t.buffer in - let () = - if not t.reset_line then - Sedlexing.set_position t.buffer { end_pos with Lexing.pos_lnum = 1 } - in - t.start_p <- None; - t.expression_level <- 0 - -let positions : t -> Lexing.position * Lexing.position = - fun t -> Sedlexing.lexing_positions t.buffer - -let content : t -> string = fun t -> Sedlexing.Utf8.lexeme t.buffer - -let from_lexbuf : ?reset_line:bool -> Sedlexing.lexbuf -> t = - fun ?(reset_line = true) t -> - { buffer = t; start_p = None; expression_level = 0; reset_line } - -let set_start_position : t -> Lexing.position -> unit = - fun t position -> t.start_p <- Some position - -let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position - = - fun f t -> - let lexer () = - (* Clear the previous registered start position if any *) - t.start_p <- None; - let token = f t in - let default, curr_p = positions t in - - let start_p = Option.value ~default t.start_p in - - (token, start_p, curr_p) - in - lexer - -(* The comment system is terrible. The same symbol can be used for : - - starting a comment - - inequality operation - In order to manage this, I try to identify the context in a very basic way, - using a counter for determining the token to send. -*) - -let incr_level : t -> unit = - fun t -> t.expression_level <- t.expression_level + 1 - -let decr_level : t -> unit = - fun t -> t.expression_level <- t.expression_level - 1 - -let reset_level : t -> unit = fun t -> t.expression_level <- 0 -let level : t -> int = fun t -> t.expression_level diff --git a/lib/lexbuf.mli b/lib/lexbuf.mli deleted file mode 100644 index 41f07d1..0000000 --- a/lib/lexbuf.mli +++ /dev/null @@ -1,25 +0,0 @@ -type t - -val from_lexbuf : ?reset_line:bool -> Sedlexing.lexbuf -> t -(** Create a new buffer *) - -val start : t -> unit -(** Intialize a new run *) - -val buffer : t -> Sedlexing.lexbuf -(** Extract the sedlex buffer. Required in each rule. *) - -val positions : t -> Lexing.position * Lexing.position -(** Extract the starting and ending position for the matched token *) - -val content : t -> string -(** Extract the token matched by the rule *) - -val set_start_position : t -> Lexing.position -> unit -val tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position -val incr_level : t -> unit -val decr_level : t -> unit -val reset_level : t -> unit - -val level : t -> int -(** Return the nested expression level *) diff --git a/lib/lexer.ml b/lib/lexer.ml deleted file mode 100644 index c643577..0000000 --- a/lib/lexer.ml +++ /dev/null @@ -1,212 +0,0 @@ -(** - Lexer using sedlex - *) - -open Tokens - -exception UnclosedQuote of { content : string; line : int } -exception LexError of Lexing.position * string -exception EOF - -let pp_pos out { Lexing.pos_lnum; pos_cnum; pos_bol; _ } = - Format.fprintf out "line %d:%d" pos_lnum (pos_cnum - pos_bol) - -(* Extract the location name from the pattern *) -let location_name = Str.regexp {|.* \(.*\)|} - -let build_ident buffer = - let id = Lexbuf.content buffer |> String.uppercase_ascii in - try - let value = Hashtbl.find Idents.keyword_table id in - let _ = - match value with IF | ELIF -> Lexbuf.incr_level buffer | _ -> () - in - value - with Not_found -> IDENT id - -let wait_balance : (Buffer.t -> Lexbuf.t -> 'a) -> Lexbuf.t -> 'a = - fun rule lexbuf -> - let _, position = Lexbuf.positions lexbuf in - Lexbuf.set_start_position lexbuf position; - - try[@warning "-52"] - let token = rule (Buffer.create 256) lexbuf in - token - with Failure "lexing: empty token" -> - let position, _ = Lexbuf.positions lexbuf in - let line = position.Lexing.pos_lnum and content = Lexbuf.content lexbuf in - (raise (UnclosedQuote { line; content }) [@warning "+52"]) - -let space = [%sedlex.regexp? ' ' | '\t'] -let eol = [%sedlex.regexp? '\r' | '\n' | "\r\n"] -let coma = [%sedlex.regexp? ','] -let digit = [%sedlex.regexp? '0' .. '9'] -let letters = [%sedlex.regexp? 'a' .. 'z' | 'A' .. 'Z' | '_'] -let spaces = [%sedlex.regexp? Plus space] -let ident = [%sedlex.regexp? ('$' | letters), Star (digit | letters)] -let location_ident = [%sedlex.regexp? letters | digit] -let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^'] -let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident] - -let rec read_long_string level buf buffer = - let lexbuf = Lexbuf.buffer buffer in - match%sedlex lexbuf with - | '{' -> - Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf); - read_long_string (level + 1) buf buffer - | '}' -> ( - match level with - | 0 -> Buffer.contents buf - | _ -> - Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf); - read_long_string (level - 1) buf buffer) - | eol -> - Buffer.add_string buf (Lexbuf.content buffer); - read_long_string level buf buffer - | any -> - Buffer.add_string buf (Lexbuf.content buffer); - read_long_string level buf buffer - | _ -> raise Not_found - -let rec read_dquoted_string buf buffer = - let lexbuf = Lexbuf.buffer buffer in - match%sedlex lexbuf with - | "\"\"" -> - Buffer.add_char buf '"'; - read_dquoted_string buf buffer - | '"' -> Buffer.contents buf - | any -> - Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf); - read_dquoted_string buf buffer - | _ -> raise Not_found - -let rec read_quoted_string buf buffer = - let lexbuf = Lexbuf.buffer buffer in - match%sedlex lexbuf with - | "''" -> - Buffer.add_char buf '\''; - read_quoted_string buf buffer - | '\'' -> Buffer.contents buf - | eol -> - Buffer.add_string buf (Lexbuf.content buffer); - read_quoted_string buf buffer - | any -> - Buffer.add_string buf (Lexbuf.content buffer); - read_quoted_string buf buffer - | _ -> raise Not_found - -let rec skip_comment buffer = - let lexbuf = Lexbuf.buffer buffer in - match%sedlex lexbuf with - | '{' -> - let _ = wait_balance (read_long_string 0) buffer in - skip_comment buffer - | '\'' -> - let _ = wait_balance read_quoted_string buffer in - skip_comment buffer - | '"' -> - let _ = wait_balance read_dquoted_string buffer in - skip_comment buffer - | eol -> - (* Ugly hack used in order to put the eol in the front of the next - parsing. *) - Sedlexing.rollback lexbuf; - COMMENT - | any -> skip_comment buffer - | _ -> raise Not_found - -(** Main lexer *) -let rec token : Lexbuf.t -> token = - fun buffer -> - let lexbuf = Lexbuf.buffer buffer in - match%sedlex lexbuf with - | 0Xfeff -> - (* Ignore the BOM *) - token buffer - | '#', Star space, location -> - (* Extract the location name *) - let ident = Lexbuf.content buffer in - let () = - match Str.string_match location_name ident 0 with - | false -> () - | true -> Sedlexing.set_filename lexbuf (Str.matched_group 1 ident) - in - - (* Restart the line number (new location here) *) - Lexbuf.start buffer; - - LOCATION_START ident - | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) -> - Lexbuf.reset_level buffer; - LOCATION_END - | Plus digit -> INTEGER (Lexbuf.content buffer) - | '+' -> PLUS - | '-' -> MINUS - | "+=" -> INCR - | "-=" -> DECR - | "*=" -> MULT_EQUAL - | '/' -> DIV - | '*' -> STAR - | ':' -> - (* We are leaving the block, the comment will be handled again *) - Lexbuf.decr_level buffer; - - COLUMN - | '[' -> L_BRACKET - | ']' -> R_BRACKET - | '(' -> - Lexbuf.incr_level buffer; - L_PAREN - | ')' -> - Lexbuf.decr_level buffer; - R_PAREN - | '<' -> LT - | '>' -> GT - | coma -> COMA - | '=' -> - Lexbuf.incr_level buffer; - EQUAL - | ident -> build_ident buffer - | eol -> - Lexbuf.reset_level buffer; - EOL - | '&' -> - Lexbuf.reset_level buffer; - AMPERSAND - | '!' -> if Lexbuf.level buffer > 0 then EXCLAMATION else skip_comment buffer - | spaces -> token buffer - | '\'' -> LITERAL (wait_balance read_quoted_string buffer) - | '"' -> LITERAL (wait_balance read_dquoted_string buffer) - | '{' -> LITERAL (wait_balance (read_long_string 0) buffer) - | eof -> raise EOF - | _ -> - let position = fst @@ Sedlexing.lexing_positions lexbuf in - let tok = Lexbuf.content buffer in - - let msg = - Format.asprintf "Unexpected character %S at %a" tok pp_pos position - in - - raise @@ LexError (position, msg) - -let rec discard buffer = - let lexbuf = Lexbuf.buffer buffer in - match%sedlex lexbuf with - | '\'' -> - ignore (wait_balance read_quoted_string buffer); - discard buffer - | '"' -> - ignore (wait_balance read_dquoted_string buffer); - discard buffer - | '{' -> - ignore (wait_balance (read_long_string 0) buffer); - discard buffer - | eof -> raise EOF - | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) -> - Lexbuf.reset_level buffer; - () - | '!' -> - ignore @@ skip_comment buffer; - discard buffer - | any -> discard buffer - | _ -> raise EOF diff --git a/lib/lexer.mli b/lib/lexer.mli deleted file mode 100644 index 585877c..0000000 --- a/lib/lexer.mli +++ /dev/null @@ -1,4 +0,0 @@ -exception EOF - -val token : Lexbuf.t -> Tokens.token -val discard : Lexbuf.t -> unit diff --git a/lib/parser.mly b/lib/parser.mly deleted file mode 100644 index 84c1af8..0000000 --- a/lib/parser.mly +++ /dev/null @@ -1,89 +0,0 @@ - -%{ - module T = Qsp_syntax.T -%} - -%parameter -%start main -%on_error_reduce expression instruction unary_operator assignation_operator - -%% - -main: - | before_location* - LOCATION_START - EOL+ - expressions = line_statement* - LOCATION_END - { - Analyzer.Location.location $loc expressions - } - -before_location: - | EOL {} - | COMMENT EOL { } - -(* All these statement should terminate with EOL *) -line_statement: - | COMMENT EOL+ { Analyzer.Instruction.comment $loc } - | COLUMN i=IDENT EOL* { Analyzer.Instruction.location $loc i } - | s = terminated(instruction, line_sep) - | s = terminated(inline_action, line_sep) - { s } - | a = action_bloc(IF, elif_else_body) - { let loc, expression, statements, loc_s, body = a in - let elifs, else_ = match body with - | None -> [], [] - | Some (elifs, else_) -> (elifs, else_) - in - Analyzer.Instruction.if_ - loc - (loc_s, expression, statements) - ~elifs - ~else_ - } - | a = action_bloc(ACT, empty_body) - { let loc, label, statements, _, _ = a in - Analyzer.Instruction.act loc ~label statements - } - -(** Represent an instruction which can either be on a single line, - or created in a block until an END - *) -%inline action_bloc(TOKEN, BODY): - | TOKEN - e = expression - COLUMN EOL+ - s = line_statement* - b = BODY - END TOKEN? - line_sep - { $loc, e, s, $loc(s), b } - -empty_body: - | { None } - -elif: - | ELIF - e = expression - COLUMN EOL+ - s = line_statement* - { $loc, e, s } - -else_: - | ELSE EOL+ - expressions = line_statement* - { expressions } - | { [] } - - -elif_else_body: - | elifs = elif* - else_ = else_ - { Some (elifs, else_) } - - -%inline line_sep: - | EOL+ - | AMPERSAND+ EOL* - {} diff --git a/lib/qparser/analyzer.ml b/lib/qparser/analyzer.ml new file mode 100644 index 0000000..da1adbf --- /dev/null +++ b/lib/qparser/analyzer.ml @@ -0,0 +1,42 @@ +(** + Run the QSP parser and apply the analyzer over it. + + See [syntax/S] + *) +let parse : + (module Qsp_syntax.S.Analyzer with type Location.repr = 'a) -> + Lexbuf.t -> + ('a, Qsp_syntax.Report.t) Result.t = + fun (type a) (module S : Qsp_syntax.S.Analyzer with type Location.repr = a) -> + let module Parser = Parser.Make (S) in + let module IncrementalParser = + Interpreter.Interpreter (Parser.MenhirInterpreter) in + fun l -> + let lexer = Lexbuf.tokenize Lexer.token l in + + let init = Parser.Incremental.main (fst (Lexbuf.positions l)) in + + IncrementalParser.of_lexbuf lexer l init + |> Result.map_error (fun e -> + let message = + match e.IncrementalParser.code with + | Interpreter.InvalidSyntax -> "Invalid Syntax" + | Interpreter.UnrecoverableError -> "UnrecoverableError" + | Interpreter.MenhirCode c -> + let message_content = + try Parser_messages.message c + with Not_found -> + String.concat "" [ "(Error code "; string_of_int c; ")" ] + in + + String.concat "" [ String.trim @@ message_content ] + in + let report = + Qsp_syntax.Report.error (e.start_pos, e.end_pos) message + in + + (* Discard the remaining file to read. The parser is now in a blank + state, it does not make sense to keep feeding it with the new + tokens. *) + Lexer.discard l; + report) diff --git a/lib/qparser/analyzer.mli b/lib/qparser/analyzer.mli new file mode 100644 index 0000000..30b6625 --- /dev/null +++ b/lib/qparser/analyzer.mli @@ -0,0 +1,8 @@ +val parse : + (module Qsp_syntax.S.Analyzer with type Location.repr = 'a) -> + Lexbuf.t -> + ('a, Qsp_syntax.Report.t) Result.t +(** Read the source and build a analyzis over it. + +This method make the link between the source file and how to read it +(encoding…) and the AST we want to build. *) diff --git a/lib/qparser/dune b/lib/qparser/dune new file mode 100644 index 0000000..f62c90e --- /dev/null +++ b/lib/qparser/dune @@ -0,0 +1,29 @@ +(library + (name qparser) + (libraries + str + menhirLib + qsp_syntax + ) + (preprocess (pps + sedlex.ppx + )) + ) + + +(rule + (targets parser_messages.ml) + (deps expression_parser.messages tokens.mly qsp_expression.mly qsp_instruction.mly parser.mly) + (action (with-stdout-to %{targets} (run menhir --base parser.mly --compile-errors %{deps})))) + +(menhir + (modules tokens) + (flags --only-tokens) +) + + +(menhir + (modules tokens parser qsp_instruction qsp_expression) + (flags --table --external-tokens Tokens) + (merge_into parser) +) diff --git a/lib/qparser/explain.sh b/lib/qparser/explain.sh new file mode 100755 index 0000000..609d208 --- /dev/null +++ b/lib/qparser/explain.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +menhir --explain tokens.mly qsp_expression.mly --base qsp_expression.mly +menhir --explain tokens.mly qsp_expression.mly qsp_instruction.mly --base qsp_instruction.mly +menhir --explain tokens.mly qsp_expression.mly qsp_instruction.mly parser.mly --base parser.mly diff --git a/lib/qparser/expression_parser.messages b/lib/qparser/expression_parser.messages new file mode 100644 index 0000000..a493067 --- /dev/null +++ b/lib/qparser/expression_parser.messages @@ -0,0 +1,304 @@ +main: LOCATION_START EOL IDENT SET +## +## Ends in an error in state: 13. +## +## variable -> IDENT . option(delimited(L_BRACKET,option(expression),R_BRACKET)) [ STAR R_PAREN R_BRACKET PLUS OR MULT_EQUAL MOD MINUS LT INCR GT EXCLAMATION EQUAL EOL ELSE DIV DECR COMA COLUMN AND AMPERSAND ] +## +## The known suffix of the stack is as follows: +## IDENT +## + +Unexpected expression here. + +main: LOCATION_START EOL L_PAREN INTEGER SET +## +## Ends in an error in state: 69. +## +## expression -> L_PAREN expression . R_PAREN [ STAR R_PAREN R_BRACKET PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV COMA COLUMN AND AMPERSAND ] +## expression -> expression . EQUAL expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . LT GT expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . EXCLAMATION expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . PLUS expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . MINUS expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . STAR expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . DIV expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . MOD expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . GT expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . LT expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . AND expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . GT EQUAL expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . LT EQUAL expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . EQUAL GT expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . EQUAL LT expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## expression -> expression . OR expression [ STAR R_PAREN PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV AND ] +## +## The known suffix of the stack is as follows: +## L_PAREN expression +## + +Unexpected '('. Did you forgot a function before ? + +main: LOCATION_START EOL IF INTEGER SET +## +## Ends in an error in state: 87. +## +## expression -> expression . EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . LT GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . EXCLAMATION expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . PLUS expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . MINUS expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . STAR expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . DIV expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . MOD expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . LT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . AND expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . GT EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . LT EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . EQUAL GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . EQUAL LT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . OR expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## line_statement -> IF expression . COLUMN nonempty_list(EOL) list(line_statement) elif_else_body END option(IF) nonempty_list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] +## line_statement -> IF expression . COLUMN nonempty_list(EOL) list(line_statement) elif_else_body END option(IF) nonempty_list(AMPERSAND) list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] +## onliner(IF) -> IF expression . COLUMN final_inline_instruction [ EOL ELSE AMPERSAND ] +## +## The known suffix of the stack is as follows: +## IF expression +## + +The `IF` expression does not end properly. A `:` is expected before any instruction. + +main: LOCATION_START EOL COLUMN STAR +## +## Ends in an error in state: 92. +## +## line_statement -> COLUMN . IDENT list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] +## +## The known suffix of the stack is as follows: +## COLUMN +## + +A location is expected after ':' not an expression + +main: LOCATION_START EOL ACT INTEGER SET + +Invalid `ACT` label. You probably missed a ':' + +main: LOCATION_START EOL ACT IDENT COLUMN EOL LOCATION_END +## +## Ends in an error in state: 100. +## +## line_statement -> ACT expression COLUMN nonempty_list(EOL) . list(line_statement) empty_body END option(ACT) nonempty_list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] +## line_statement -> ACT expression COLUMN nonempty_list(EOL) . list(line_statement) empty_body END option(ACT) nonempty_list(AMPERSAND) list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] +## +## The known suffix of the stack is as follows: +## ACT expression COLUMN nonempty_list(EOL) +## +## WARNING: This example involves spurious reductions. +## This implies that, although the LR(1) items shown above provide an +## accurate view of the past (what has been recognized so far), they +## may provide an INCOMPLETE view of the future (what was expected next). +## In state 7, spurious reduction of production nonempty_list(EOL) -> EOL +## + +A block starting with `ACT` is not closed by `END` +If there are nested blocks, the error will points the highest block. + +main: LOCATION_START EOL IF IDENT COLUMN ELSE R_PAREN +## +## Ends in an error in state: 106. +## +## inline_action -> onliner(IF) ELSE . inline_action [ EOL ELSE AMPERSAND ] +## option(preceded(ELSE,instruction)) -> ELSE . instruction [ EOL ELSE AMPERSAND ] +## +## The known suffix of the stack is as follows: +## onliner(IF) ELSE +## + +Too manies instructions on a single line. + +main: LOCATION_START EOL IF IDENT COLUMN EOL IDENT AMPERSAND LOCATION_END +## +## Ends in an error in state: 155. +## +## line_statement -> IF expression COLUMN nonempty_list(EOL) list(line_statement) . elif_else_body END option(IF) nonempty_list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] +## line_statement -> IF expression COLUMN nonempty_list(EOL) list(line_statement) . elif_else_body END option(IF) nonempty_list(AMPERSAND) list(EOL) [ STAR SET PLUS OBJ NO MINUS L_PAREN LOCATION_END LITERAL LET KEYWORD INTEGER IF IDENT FUNCTION END ELSE ELIF COMMENT COLUMN ACT ] +## +## The known suffix of the stack is as follows: +## IF expression COLUMN nonempty_list(EOL) list(line_statement) +## +## WARNING: This example involves spurious reductions. +## This implies that, although the LR(1) items shown above provide an +## accurate view of the past (what has been recognized so far), they +## may provide an INCOMPLETE view of the future (what was expected next). +## In state 117, spurious reduction of production nonempty_list(AMPERSAND) -> AMPERSAND +## In state 149, spurious reduction of production list(EOL) -> +## In state 150, spurious reduction of production line_statement -> instruction nonempty_list(AMPERSAND) list(EOL) +## In state 145, spurious reduction of production list(line_statement) -> +## In state 146, spurious reduction of production list(line_statement) -> line_statement list(line_statement) +## + +Unclosed `IF` block. + +main: LOCATION_START EOL IF IDENT COLUMN EOL ELSE EOL LOCATION_END +## +## Ends in an error in state: 163. +## +## else_ -> ELSE nonempty_list(EOL) . list(line_statement) [ END ] +## +## The known suffix of the stack is as follows: +## ELSE nonempty_list(EOL) +## +## WARNING: This example involves spurious reductions. +## This implies that, although the LR(1) items shown above provide an +## accurate view of the past (what has been recognized so far), they +## may provide an INCOMPLETE view of the future (what was expected next). +## In state 7, spurious reduction of production nonempty_list(EOL) -> EOL +## + +A block starting with `IF` is not closed by `END` +If there are nested blocks, the error will points the highest block. + +main: LOCATION_START EOL IDENT AMPERSAND END +## +## Ends in an error in state: 175. +## +## main -> list(before_location) LOCATION_START nonempty_list(EOL) list(line_statement) . LOCATION_END [ # ] +## +## The known suffix of the stack is as follows: +## list(before_location) LOCATION_START nonempty_list(EOL) list(line_statement) +## +## WARNING: This example involves spurious reductions. +## This implies that, although the LR(1) items shown above provide an +## accurate view of the past (what has been recognized so far), they +## may provide an INCOMPLETE view of the future (what was expected next). +## In state 117, spurious reduction of production nonempty_list(AMPERSAND) -> AMPERSAND +## In state 149, spurious reduction of production list(EOL) -> +## In state 150, spurious reduction of production line_statement -> instruction nonempty_list(AMPERSAND) list(EOL) +## In state 145, spurious reduction of production list(line_statement) -> +## In state 146, spurious reduction of production list(line_statement) -> line_statement list(line_statement) +## + +Unexpected `END`. +Maybe you added an `END` after an inline `ACT` or `IF` ? + +main: STAR +## +## Ends in an error in state: 0. +## +## main' -> . main [ # ] +## +## The known suffix of the stack is as follows: +## +## + +Missing location name + +main: LOCATION_START EOL INTEGER SET +## +## Ends in an error in state: 132. +## +## expression -> expression . EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . LT GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . EXCLAMATION expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . PLUS expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . MINUS expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . STAR expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . DIV expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . MOD expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . LT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . AND expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . GT EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . LT EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . EQUAL GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . EQUAL LT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## expression -> expression . OR expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL EOL ELSE DIV AND AMPERSAND ] +## single_instruction -> expression . [ EOL ELSE AMPERSAND ] +## +## The known suffix of the stack is as follows: +## expression +## + +Unexpected expression here. + +main: LOCATION_START EOL IF IDENT COLUMN EOL ELIF INTEGER SET +## +## Ends in an error in state: 157. +## +## elif -> ELIF expression . COLUMN nonempty_list(EOL) list(line_statement) [ END ELSE ELIF ] +## expression -> expression . EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . LT GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . EXCLAMATION expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . PLUS expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . MINUS expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . STAR expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . DIV expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . MOD expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . LT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . AND expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . GT EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . LT EQUAL expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . EQUAL GT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . EQUAL LT expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## expression -> expression . OR expression [ STAR PLUS OR MOD MINUS LT GT EXCLAMATION EQUAL DIV COLUMN AND ] +## +## The known suffix of the stack is as follows: +## ELIF expression +## + +The `ELIF` expression does not end properly. A `:` is expected before any instruction. + +main: LOCATION_START EOL IF IDENT COLUMN EOL ELIF IDENT COLUMN EOL IDENT AMPERSAND LOCATION_END +## +## Ends in an error in state: 173. +## +## list(elif) -> elif . list(elif) [ END ELSE ] +## +## The known suffix of the stack is as follows: +## elif +## +## WARNING: This example involves spurious reductions. +## This implies that, although the LR(1) items shown above provide an +## accurate view of the past (what has been recognized so far), they +## may provide an INCOMPLETE view of the future (what was expected next). +## In state 117, spurious reduction of production nonempty_list(AMPERSAND) -> AMPERSAND +## In state 149, spurious reduction of production list(EOL) -> +## In state 150, spurious reduction of production line_statement -> instruction nonempty_list(AMPERSAND) list(EOL) +## In state 145, spurious reduction of production list(line_statement) -> +## In state 146, spurious reduction of production list(line_statement) -> line_statement list(line_statement) +## In state 160, spurious reduction of production elif -> ELIF expression COLUMN nonempty_list(EOL) list(line_statement) +## + +Unclosed `ELIF` block. + +main: EOL STAR +## +## Ends in an error in state: 177. +## +## list(before_location) -> before_location . list(before_location) [ LOCATION_START ] +## +## The known suffix of the stack is as follows: +## before_location +## + +The context does not allow an instruction here. + +main: LOCATION_START EOL IF IDENT COLUMN EOL ELSE STAR +## +## Ends in an error in state: 162. +## +## else_ -> ELSE . nonempty_list(EOL) list(line_statement) [ END ] +## +## The known suffix of the stack is as follows: +## ELSE +## + +Unexpected operator after `ELSE` + +main: LOCATION_START EOL IDENT DECR INTEGER SET +main: LOCATION_START EOL SET IDENT DECR INTEGER SET +main: LOCATION_START EOL LET IDENT DECR INTEGER SET + +Missing separator between instructions diff --git a/lib/qparser/generate_errors.sh b/lib/qparser/generate_errors.sh new file mode 100755 index 0000000..3cff769 --- /dev/null +++ b/lib/qparser/generate_errors.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +# Generate the error file with the helper messages. This script is required +# when the syntax is modified because menhir will change the state number and +# we need to make it match with the comment in the messages. + +menhir --list-errors *.mly --base parser.mly > all_errors +menhir --merge-errors expression_parser.messages --merge-errors all_errors *.mly --base parser.mly > expression_parser.messages_new diff --git a/lib/qparser/grammar.txt b/lib/qparser/grammar.txt new file mode 100644 index 0000000..d7208ef --- /dev/null +++ b/lib/qparser/grammar.txt @@ -0,0 +1,100 @@ +location: + | LOCATION_START EOL* + line_statement* + LOCATION_END EOL* + EOF + +line_statement: + | COMMENT EOL+ // A comment + | COLUMN IDENT EOL* // A location + | instruction, line_sep + | inline_action line_sep + | ACT expression COLUMN EOL+ // ACT … END ACT + line_statement* + END ACT? // Yes you can have END or END ACT + line_sep + | IF expression COLUMN EOL+ // IF … END IF + line_statement* + elif* + else + END IF? // Yes you can have END or END IF + line_sep + +elif: + | ELIF + expression COLUMN EOL+ + line_statement* + +else: + | ELSE EOL+ + line_statement* + | + +line_sep: + | EOL+ + | AMPERSAND+ EOL* + + +instruction: + | expression + | let_assignation + | keyword argument(expression) + +keyword: + | STAR KEYWORD // A keyword starting with * + | KEYWORD + +let_assignation: + | assignation + variable + assignation_operator + expression + +assignation: + | + | LET + | SET + +assignation_operator: + | EQUAL + | INCR // += + | DECR // -= + +inline_action: + | ACT expression COLUMN // There is a recursive code here + | IF expression COLUMN // Because ACT: can contains an IF: etc + (ELSE, instruction)? // complicated to flatten here. + +expression: + | delimited(l_paren, expression, r_paren) + | unary_operator expression + | expression binary_operator expression + | literal + | integer + | variable + | function argument(expression) + +unary_operator: + | OBJ + | LOC + | NO + | MINUS + | PLUS + +binary_operator: + | EQUAL + | LT GT // Different + | EXCLAMATION // Neg, not a comment here + | PLUS + | MINUS + | STAR // Not the first char of keyword here + | DIV + | MOD + | GT + | LT + | GT EQUAL + | LT EQUAL + | EQUAL GT // Alternative syntax + | EQUAL LT // Alternative syntax + | AND + | OR diff --git a/lib/qparser/idents.ml b/lib/qparser/idents.ml new file mode 100644 index 0000000..baf23dc --- /dev/null +++ b/lib/qparser/idents.ml @@ -0,0 +1,179 @@ +open Tokens +module T = Qsp_syntax.T + +let keyword_table = Hashtbl.create 53 + +let _ = + List.iter + (fun (kwd, tok) -> Hashtbl.add keyword_table kwd tok) + [ + ("ACT", ACT); + ("ADDLIB", KEYWORD "INCLIB"); + ("ADDOBJ", KEYWORD "ADDOBJ"); + ("ADD OBJ", KEYWORD "ADDOBJ"); + ("ADDQST", KEYWORD "INCLIB"); + ("AND", AND); + ("ARRCOMP", FUNCTION T.Arrcomp); + ("ARRPOS", FUNCTION T.Arrpos); + ("ARRSIZE", FUNCTION T.Arrsize) + (* + ; "BACKIMAGE", KEYWORD "BACKIMAGE" + ; "$BACKIMAGE", KEYWORD "BACKIMAGE" + ; "BCOLOR", KEYWORD "BCOLOR" *); + ("CLA", KEYWORD "CLA"); + ("CLEAR", KEYWORD "CLEAR"); + ("*CLEAR", KEYWORD "*CLEAR"); + ("CLOSE", KEYWORD "CLOSE"); + ("CLOSE ALL", KEYWORD "CLOSEALL"); + ("CLR", KEYWORD "CLEAR"); + ("*CLR", KEYWORD "*CLEAR"); + ("CLS", KEYWORD "CLS"); + ("CMDCLEAR", KEYWORD "CMDCLEAR"); + ("CMDCLR", KEYWORD "CMDCLEAR"); + ("COPYARR", KEYWORD "COPYARR"); + ("COUNTOBJ", FUNCTION T.Countobj); + ("CURACTS", IDENT "CURACTS"); + ("$CURACTS", IDENT "$CURACTS"); + ("CURLOC", IDENT "CURLOC"); + ("$CURLOC", IDENT "$CURLOC") + (* + ; "DEBUG", KEYWORD "DEBUG" +*); + ("DELACT", KEYWORD "DELACT"); + ("DEL ACT", KEYWORD "DELACT"); + ("DELLIB", KEYWORD "FREELIB"); + ("DELOBJ", KEYWORD "DELOBJ"); + ("DEL OBJ", KEYWORD "DELOBJ"); + ("DESC", FUNCTION T.Desc); + ("$DESC", FUNCTION T.Desc') + (* + ; "DISABLESCROLL", KEYWORD "DISABLESCROLL" + ; "DISABLESUBEX", KEYWORD "DISABLESUBEX" +*); + ("DYNAMIC", KEYWORD "DYNAMIC"); + ("DYNEVAL", FUNCTION T.Dyneval); + ("$DYNEVAL", FUNCTION T.Dyneval'); + ("ELSE", ELSE); + ("ELSEIF", ELIF); + ("END", END); + ("EXEC", KEYWORD "EXEC"); + ("EXIT", KEYWORD "EXIT") + (* + ; "FCOLOR", KEYWORD "FCOLOR" + ; "$FNAME", KEYWORD "$FNAME" +*); + ("FREELIB", KEYWORD "FREELIB") + (* + ; "FSIZE", KEYWORD "FSIZE" +*); + ("FUNC", FUNCTION T.Func); + ("$FUNC", FUNCTION T.Func'); + ("GETOBJ", FUNCTION T.Getobj); + ("$GETOBJ", FUNCTION T.Getobj'); + ("GOSUB", KEYWORD "GOSUB"); + ("GOTO", KEYWORD "GOTO") (* + ; "GC", KEYWORD "GC" +*); + ("GS", KEYWORD "GOSUB"); + ("GT", KEYWORD "GOTO"); + ("IF", IF); + ("IIF", FUNCTION T.Iif); + ("$IIF", FUNCTION T.Iif'); + ("INCLIB", KEYWORD "INCLIB"); + ("INPUT", FUNCTION T.Input); + ("$INPUT", FUNCTION T.Input'); + ("INSTR", FUNCTION T.Instr); + ("ISNUM", FUNCTION T.Isnum); + ("ISPLAY", FUNCTION T.Isplay); + ("JUMP", KEYWORD "JUMP"); + ("KILLALL", KEYWORD "KILLALL"); + ("KILLOBJ", KEYWORD "KILLOBJ"); + ("KILLQST", KEYWORD "FREELIB"); + ("KILLVAR", KEYWORD "KILLVAR"); + ("LCASE", FUNCTION T.Lcase); + ("$LCASE", FUNCTION T.Lcase') + (* + ; "LCOLOR", KEYWORD "LCOLOR" +*); + ("LEN", FUNCTION T.Len); + ("LET", LET); + ("LOC", FUNCTION T.Loc); + ("MAINTXT", IDENT "MAINTXT"); + ("$MAINTXT", IDENT "MAINTXT"); + ("MAX", FUNCTION T.Max); + ("$MAX", FUNCTION T.Max'); + ("MENU", KEYWORD "MENU"); + ("MID", FUNCTION T.Mid); + ("$MID", FUNCTION T.Mid'); + ("MIN", FUNCTION T.Min); + ("$MIN", FUNCTION T.Min'); + ("MOD", MOD); + ("MSECSCOUNT", FUNCTION T.Msecscount); + ("MSG", KEYWORD "MSG"); + ("NL", KEYWORD "NL"); + ("*NL", KEYWORD "*NL"); + ("NO", NO) (* + ; "NOSAVE", KEYWORD "NOSAVE" +*); + ("OBJ", OBJ); + ("$ONACTSEL", IDENT "$ONACTSEL"); + ("$ONGLOAD", IDENT "$ONGLOAD"); + ("$ONGSAVE", IDENT "$ONGSAVE"); + ("$ONNEWLOC", IDENT "$ONNEWLOC"); + ("$ONOBJADD", IDENT "$ONOBJADD"); + ("$ONOBJDEL", IDENT "$ONOBJDEL"); + ("$ONOBJSEL", IDENT "$ONOBJSEL"); + ("OPENGAME", KEYWORD "OPENGAME"); + ("OPENQST", KEYWORD "OPENQST"); + ("OR", OR); + ("P", KEYWORD "P"); + ("*P", KEYWORD "*P"); + ("PL", KEYWORD "PL"); + ("*PL", KEYWORD "*PL"); + ("PLAY", KEYWORD "PLAY"); + ("QSPVER", FUNCTION T.Qspver); + ("$QSPVER", FUNCTION T.Qspver'); + ("RAND", FUNCTION T.Rand); + ("REFINT", KEYWORD "REFINT"); + ("REPLACE", FUNCTION T.Replace); + ("$REPLACE", FUNCTION T.Replace'); + ("RGB", FUNCTION T.Rgb); + ("RND", FUNCTION T.Rnd); + ("SAVEGAME", KEYWORD "SAVEGAME"); + ("SELACT", FUNCTION T.Selact); + ("$SELACT", IDENT "$SELACT"); + ("SELOBJ", IDENT "SELOBJ"); + ("$SELOBJ", IDENT "$SELOBJ"); + ("SET", SET); + ("SETTIMER", KEYWORD "SETTIMER"); + ("SHOWACTS", KEYWORD "SHOWACTS"); + ("SHOWINPUT", KEYWORD "SHOWINPUT"); + ("SHOWOBJS", KEYWORD "SHOWOBJS"); + ("SHOWSTAT", KEYWORD "SHOWSTAT"); + ("STATTXT", FUNCTION T.Stattxt); + ("$STATTXT", FUNCTION T.Stattxt'); + ("STR", FUNCTION T.Str); + ("$STR", FUNCTION T.Str'); + ("STRCOMP", FUNCTION T.Strcomp); + ("STRFIND", FUNCTION T.Strfind); + ("$STRFIND", FUNCTION T.Strfind'); + ("STRPOS", FUNCTION T.Strpos); + ("TRIM", FUNCTION T.Trim); + ("$TRIM", FUNCTION T.Trim'); + ("UCASE", FUNCTION T.Ucase); + ("$UCASE", FUNCTION T.Ucase'); + ("UNSEL", KEYWORD "UNSELECT"); + ("UNSELECT", KEYWORD "UNSELECT"); + ("USEHTML", IDENT "USEHTML"); + ("USERCOM", IDENT "USERCOM"); + ("$USERCOM", IDENT "$USERCOM"); + ("USER_TEXT", IDENT "USER_TEXT"); + ("$USER_TEXT", IDENT "USER_TEXT"); + ("USRTXT", IDENT "USER_TEXT"); + ("$USRTXT", IDENT "USER_TEXT"); + ("VAL", FUNCTION T.Val); + ("VIEW", KEYWORD "VIEW"); + ("WAIT", KEYWORD "WAIT"); + ("XGOTO", KEYWORD "XGOTO"); + ("XGT", KEYWORD "XGOTO"); + ] diff --git a/lib/qparser/interpreter.ml b/lib/qparser/interpreter.ml new file mode 100644 index 0000000..b719600 --- /dev/null +++ b/lib/qparser/interpreter.ml @@ -0,0 +1,67 @@ +(** + This module provide a way to build the syntax parser with the menhir + incremental engine. This feature allow to see the state of the parser, and + get detailed error message but is not intended to be used directly. + + Refer to the menhir manual in order to see the values. + + The interresting function here is [of_lexbuf] which return the error code in + case of invalid syntax. + *) + +type error_code = UnrecoverableError | InvalidSyntax | MenhirCode of int + +module Interpreter (MI : MenhirLib.IncrementalEngine.INCREMENTAL_ENGINE) = +struct + type error = { + code : error_code; + start_pos : Lexing.position; + end_pos : Lexing.position; + } + + module E = MenhirLib.ErrorReports + module L = MenhirLib.LexerUtil + + type step = MI.token * Lexing.position * Lexing.position + + let range_message (start_pos, end_pos) : error_code -> error = + fun code -> { code; start_pos; end_pos } + + let get_parse_error : Lexbuf.t -> 'a MI.env -> error = + fun buffer env -> + match MI.stack env with + | (lazy Nil) -> + (* The parser is in its initial state. We should not get an + error here *) + let positions = Lexbuf.positions buffer in + + range_message positions UnrecoverableError + | (lazy (Cons (MI.Element (state, _, start_pos, end_pos), _))) -> + range_message (start_pos, end_pos) (MenhirCode (MI.number state)) + + let rec _parse : + Lexbuf.t -> (unit -> step) -> 'a MI.checkpoint -> ('a, error) Result.t = + fun buffer get_token (checkpoint : 'a MI.checkpoint) -> + match checkpoint with + | MI.InputNeeded _env -> + let token, startp, endp = get_token () in + let checkpoint = MI.offer checkpoint (token, startp, endp) in + _parse buffer get_token checkpoint + | MI.Shifting _ | MI.AboutToReduce _ -> + let checkpoint = MI.resume checkpoint in + _parse buffer get_token checkpoint + | MI.HandlingError _env -> + let err = get_parse_error buffer _env in + Error err + | MI.Accepted v -> Ok v + | MI.Rejected -> + let positions = Lexbuf.positions buffer in + let err = range_message positions InvalidSyntax in + Error err + + type 'a builder = Lexing.position -> 'a MI.checkpoint + + let of_lexbuf : + (unit -> step) -> Lexbuf.t -> 'a MI.checkpoint -> ('a, error) result = + fun lexer buffer init -> _parse buffer lexer init +end diff --git a/lib/qparser/lexbuf.ml b/lib/qparser/lexbuf.ml new file mode 100644 index 0000000..3f0b186 --- /dev/null +++ b/lib/qparser/lexbuf.ml @@ -0,0 +1,61 @@ +type t = { + buffer : Sedlexing.lexbuf; + mutable start_p : Lexing.position option; + mutable expression_level : int; + reset_line : bool; +} + +let buffer : t -> Sedlexing.lexbuf = fun t -> t.buffer + +let start : t -> unit = + fun t -> + let _start_pos, end_pos = Sedlexing.lexing_positions t.buffer in + let () = + if not t.reset_line then + Sedlexing.set_position t.buffer { end_pos with Lexing.pos_lnum = 1 } + in + t.start_p <- None; + t.expression_level <- 0 + +let positions : t -> Lexing.position * Lexing.position = + fun t -> Sedlexing.lexing_positions t.buffer + +let content : t -> string = fun t -> Sedlexing.Utf8.lexeme t.buffer + +let from_lexbuf : ?reset_line:bool -> Sedlexing.lexbuf -> t = + fun ?(reset_line = true) t -> + { buffer = t; start_p = None; expression_level = 0; reset_line } + +let set_start_position : t -> Lexing.position -> unit = + fun t position -> t.start_p <- Some position + +let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position + = + fun f t -> + let lexer () = + (* Clear the previous registered start position if any *) + t.start_p <- None; + let token = f t in + let default, curr_p = positions t in + + let start_p = Option.value ~default t.start_p in + + (token, start_p, curr_p) + in + lexer + +(* The comment system is terrible. The same symbol can be used for : + - starting a comment + - inequality operation + In order to manage this, I try to identify the context in a very basic way, + using a counter for determining the token to send. +*) + +let incr_level : t -> unit = + fun t -> t.expression_level <- t.expression_level + 1 + +let decr_level : t -> unit = + fun t -> t.expression_level <- t.expression_level - 1 + +let reset_level : t -> unit = fun t -> t.expression_level <- 0 +let level : t -> int = fun t -> t.expression_level diff --git a/lib/qparser/lexbuf.mli b/lib/qparser/lexbuf.mli new file mode 100644 index 0000000..41f07d1 --- /dev/null +++ b/lib/qparser/lexbuf.mli @@ -0,0 +1,25 @@ +type t + +val from_lexbuf : ?reset_line:bool -> Sedlexing.lexbuf -> t +(** Create a new buffer *) + +val start : t -> unit +(** Intialize a new run *) + +val buffer : t -> Sedlexing.lexbuf +(** Extract the sedlex buffer. Required in each rule. *) + +val positions : t -> Lexing.position * Lexing.position +(** Extract the starting and ending position for the matched token *) + +val content : t -> string +(** Extract the token matched by the rule *) + +val set_start_position : t -> Lexing.position -> unit +val tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position +val incr_level : t -> unit +val decr_level : t -> unit +val reset_level : t -> unit + +val level : t -> int +(** Return the nested expression level *) diff --git a/lib/qparser/lexer.ml b/lib/qparser/lexer.ml new file mode 100644 index 0000000..c643577 --- /dev/null +++ b/lib/qparser/lexer.ml @@ -0,0 +1,212 @@ +(** + Lexer using sedlex + *) + +open Tokens + +exception UnclosedQuote of { content : string; line : int } +exception LexError of Lexing.position * string +exception EOF + +let pp_pos out { Lexing.pos_lnum; pos_cnum; pos_bol; _ } = + Format.fprintf out "line %d:%d" pos_lnum (pos_cnum - pos_bol) + +(* Extract the location name from the pattern *) +let location_name = Str.regexp {|.* \(.*\)|} + +let build_ident buffer = + let id = Lexbuf.content buffer |> String.uppercase_ascii in + try + let value = Hashtbl.find Idents.keyword_table id in + let _ = + match value with IF | ELIF -> Lexbuf.incr_level buffer | _ -> () + in + value + with Not_found -> IDENT id + +let wait_balance : (Buffer.t -> Lexbuf.t -> 'a) -> Lexbuf.t -> 'a = + fun rule lexbuf -> + let _, position = Lexbuf.positions lexbuf in + Lexbuf.set_start_position lexbuf position; + + try[@warning "-52"] + let token = rule (Buffer.create 256) lexbuf in + token + with Failure "lexing: empty token" -> + let position, _ = Lexbuf.positions lexbuf in + let line = position.Lexing.pos_lnum and content = Lexbuf.content lexbuf in + (raise (UnclosedQuote { line; content }) [@warning "+52"]) + +let space = [%sedlex.regexp? ' ' | '\t'] +let eol = [%sedlex.regexp? '\r' | '\n' | "\r\n"] +let coma = [%sedlex.regexp? ','] +let digit = [%sedlex.regexp? '0' .. '9'] +let letters = [%sedlex.regexp? 'a' .. 'z' | 'A' .. 'Z' | '_'] +let spaces = [%sedlex.regexp? Plus space] +let ident = [%sedlex.regexp? ('$' | letters), Star (digit | letters)] +let location_ident = [%sedlex.regexp? letters | digit] +let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^'] +let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident] + +let rec read_long_string level buf buffer = + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | '{' -> + Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf); + read_long_string (level + 1) buf buffer + | '}' -> ( + match level with + | 0 -> Buffer.contents buf + | _ -> + Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf); + read_long_string (level - 1) buf buffer) + | eol -> + Buffer.add_string buf (Lexbuf.content buffer); + read_long_string level buf buffer + | any -> + Buffer.add_string buf (Lexbuf.content buffer); + read_long_string level buf buffer + | _ -> raise Not_found + +let rec read_dquoted_string buf buffer = + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | "\"\"" -> + Buffer.add_char buf '"'; + read_dquoted_string buf buffer + | '"' -> Buffer.contents buf + | any -> + Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf); + read_dquoted_string buf buffer + | _ -> raise Not_found + +let rec read_quoted_string buf buffer = + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | "''" -> + Buffer.add_char buf '\''; + read_quoted_string buf buffer + | '\'' -> Buffer.contents buf + | eol -> + Buffer.add_string buf (Lexbuf.content buffer); + read_quoted_string buf buffer + | any -> + Buffer.add_string buf (Lexbuf.content buffer); + read_quoted_string buf buffer + | _ -> raise Not_found + +let rec skip_comment buffer = + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | '{' -> + let _ = wait_balance (read_long_string 0) buffer in + skip_comment buffer + | '\'' -> + let _ = wait_balance read_quoted_string buffer in + skip_comment buffer + | '"' -> + let _ = wait_balance read_dquoted_string buffer in + skip_comment buffer + | eol -> + (* Ugly hack used in order to put the eol in the front of the next + parsing. *) + Sedlexing.rollback lexbuf; + COMMENT + | any -> skip_comment buffer + | _ -> raise Not_found + +(** Main lexer *) +let rec token : Lexbuf.t -> token = + fun buffer -> + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | 0Xfeff -> + (* Ignore the BOM *) + token buffer + | '#', Star space, location -> + (* Extract the location name *) + let ident = Lexbuf.content buffer in + let () = + match Str.string_match location_name ident 0 with + | false -> () + | true -> Sedlexing.set_filename lexbuf (Str.matched_group 1 ident) + in + + (* Restart the line number (new location here) *) + Lexbuf.start buffer; + + LOCATION_START ident + | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) -> + Lexbuf.reset_level buffer; + LOCATION_END + | Plus digit -> INTEGER (Lexbuf.content buffer) + | '+' -> PLUS + | '-' -> MINUS + | "+=" -> INCR + | "-=" -> DECR + | "*=" -> MULT_EQUAL + | '/' -> DIV + | '*' -> STAR + | ':' -> + (* We are leaving the block, the comment will be handled again *) + Lexbuf.decr_level buffer; + + COLUMN + | '[' -> L_BRACKET + | ']' -> R_BRACKET + | '(' -> + Lexbuf.incr_level buffer; + L_PAREN + | ')' -> + Lexbuf.decr_level buffer; + R_PAREN + | '<' -> LT + | '>' -> GT + | coma -> COMA + | '=' -> + Lexbuf.incr_level buffer; + EQUAL + | ident -> build_ident buffer + | eol -> + Lexbuf.reset_level buffer; + EOL + | '&' -> + Lexbuf.reset_level buffer; + AMPERSAND + | '!' -> if Lexbuf.level buffer > 0 then EXCLAMATION else skip_comment buffer + | spaces -> token buffer + | '\'' -> LITERAL (wait_balance read_quoted_string buffer) + | '"' -> LITERAL (wait_balance read_dquoted_string buffer) + | '{' -> LITERAL (wait_balance (read_long_string 0) buffer) + | eof -> raise EOF + | _ -> + let position = fst @@ Sedlexing.lexing_positions lexbuf in + let tok = Lexbuf.content buffer in + + let msg = + Format.asprintf "Unexpected character %S at %a" tok pp_pos position + in + + raise @@ LexError (position, msg) + +let rec discard buffer = + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | '\'' -> + ignore (wait_balance read_quoted_string buffer); + discard buffer + | '"' -> + ignore (wait_balance read_dquoted_string buffer); + discard buffer + | '{' -> + ignore (wait_balance (read_long_string 0) buffer); + discard buffer + | eof -> raise EOF + | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) -> + Lexbuf.reset_level buffer; + () + | '!' -> + ignore @@ skip_comment buffer; + discard buffer + | any -> discard buffer + | _ -> raise EOF diff --git a/lib/qparser/lexer.mli b/lib/qparser/lexer.mli new file mode 100644 index 0000000..585877c --- /dev/null +++ b/lib/qparser/lexer.mli @@ -0,0 +1,4 @@ +exception EOF + +val token : Lexbuf.t -> Tokens.token +val discard : Lexbuf.t -> unit diff --git a/lib/qparser/parser.mly b/lib/qparser/parser.mly new file mode 100644 index 0000000..84c1af8 --- /dev/null +++ b/lib/qparser/parser.mly @@ -0,0 +1,89 @@ + +%{ + module T = Qsp_syntax.T +%} + +%parameter +%start main +%on_error_reduce expression instruction unary_operator assignation_operator + +%% + +main: + | before_location* + LOCATION_START + EOL+ + expressions = line_statement* + LOCATION_END + { + Analyzer.Location.location $loc expressions + } + +before_location: + | EOL {} + | COMMENT EOL { } + +(* All these statement should terminate with EOL *) +line_statement: + | COMMENT EOL+ { Analyzer.Instruction.comment $loc } + | COLUMN i=IDENT EOL* { Analyzer.Instruction.location $loc i } + | s = terminated(instruction, line_sep) + | s = terminated(inline_action, line_sep) + { s } + | a = action_bloc(IF, elif_else_body) + { let loc, expression, statements, loc_s, body = a in + let elifs, else_ = match body with + | None -> [], [] + | Some (elifs, else_) -> (elifs, else_) + in + Analyzer.Instruction.if_ + loc + (loc_s, expression, statements) + ~elifs + ~else_ + } + | a = action_bloc(ACT, empty_body) + { let loc, label, statements, _, _ = a in + Analyzer.Instruction.act loc ~label statements + } + +(** Represent an instruction which can either be on a single line, + or created in a block until an END + *) +%inline action_bloc(TOKEN, BODY): + | TOKEN + e = expression + COLUMN EOL+ + s = line_statement* + b = BODY + END TOKEN? + line_sep + { $loc, e, s, $loc(s), b } + +empty_body: + | { None } + +elif: + | ELIF + e = expression + COLUMN EOL+ + s = line_statement* + { $loc, e, s } + +else_: + | ELSE EOL+ + expressions = line_statement* + { expressions } + | { [] } + + +elif_else_body: + | elifs = elif* + else_ = else_ + { Some (elifs, else_) } + + +%inline line_sep: + | EOL+ + | AMPERSAND+ EOL* + {} diff --git a/lib/qparser/qsp_expression.mly b/lib/qparser/qsp_expression.mly new file mode 100644 index 0000000..06cfadd --- /dev/null +++ b/lib/qparser/qsp_expression.mly @@ -0,0 +1,86 @@ +(* %start <(Elements.pos) Elements.exppression>expression *) + +%% + +%public arguments(X): + (** This rule allow the difference between elements with a single argument + (when the separator is required) which allow to write a spectif case when + we only have one element. + *) + | hd = X + COMA + tl = separated_nonempty_list(COMA, X) + { hd :: tl } + (** The case where we have nothing is easy to manage here *) + | + { [] } + (** The remaining case (only one argument) is handled outside of this + block: if we have a single argument we don’t have to bother with the + paren, they belongs to the expression. + *) + +%inline argument(X): + | a = delimited(L_PAREN, arguments(X), R_PAREN) { a } + | a = X { [ a ] } + +(** Declare an expression *) +%public expression: + | ex = delimited(L_PAREN, expression, R_PAREN) + { ex } + | op = unary_operator + expr = expression + { Analyzer.Expression.uoperator $loc op expr } + %prec NO + | + expr1 = expression + op = binary_operator + expr2 = expression + { Analyzer.Expression.boperator $loc op expr1 expr2 } + | v = LITERAL { Analyzer.Expression.literal $loc v } + | i = INTEGER { Analyzer.Expression.integer $loc i } + | v = variable { Analyzer.Expression.ident v } + %prec p_variable + | k = FUNCTION + arg = argument(expression) + { + (Analyzer.Expression.function_ $loc k arg) + } + +unary_operator: + | OBJ + | NO { T.No } + | MINUS { T.Neg } + | PLUS { T.Add } + +%inline binary_operator: + | EQUAL { T.Eq } + | LT GT { T.Neq } + | EXCLAMATION { T.Neq } + | PLUS { T.Plus } + | MINUS { T.Minus } + | STAR { T.Product } + | DIV { T.Div } + | MOD { T.Mod } + | GT { T.Gt } + | LT { T.Lt } + | AND { T.And } + | GT EQUAL { T.Gte } + | LT EQUAL { T.Lte } + | EQUAL GT { T.Gte } + | EQUAL LT { T.Lte } + | OR { T.Or } + +(** Declare a variable, either in the assignation (let var = …) or as a + reference is an expression + *) +%public variable: + | name = IDENT + brackets = delimited(L_BRACKET, expression?, R_BRACKET)? + { + let index = match brackets with + | None -> + (* No declaration, consider index at 0 *) + None + | Some other -> other in + Analyzer.Expression.{ pos = $loc ; name ; index } + } diff --git a/lib/qparser/qsp_instruction.mly b/lib/qparser/qsp_instruction.mly new file mode 100644 index 0000000..564e154 --- /dev/null +++ b/lib/qparser/qsp_instruction.mly @@ -0,0 +1,104 @@ +%% + +optionnal_delimited(opening, X, closing): + | v = delimited(opening, X, closing) { v } + | v = X { v } + +argument(X): + | a = optionnal_delimited(L_PAREN, arguments(X), R_PAREN) { a } + | a = X { [ a ] } + +(** At the opposite of an expression, an instruction does not return anything. *) +%public instruction: + | s = single_instruction { s } + +(** Action like act or if in a single line *) +%public inline_action: + | a = onliner(ACT) + { let loc, label, statements, _, _ = a in + Analyzer.Instruction.act loc ~label statements + } + | a = onliner(IF) + else_opt = preceded(ELSE, instruction)? + { let loc, expression, statements, loc_s, _body = a in + let elifs = [] + and else_ = Option.to_list else_opt in + Analyzer.Instruction.if_ + loc + (loc_s, expression, statements) + ~elifs + ~else_ + } + | a = onliner(IF) + else_= preceded(ELSE, inline_action) + { let loc, expression, statements, loc_s, _body = a in + let elifs = [] + and else_ = [ else_ ] in + Analyzer.Instruction.if_ + loc + (loc_s, expression, statements) + ~elifs + ~else_ + } +single_instruction: + | expr = expression + { + Analyzer.Instruction.expression expr + } + | e = let_assignation { e } + | k = keyword + arg = argument(expression) + { + Analyzer.Instruction.call $loc k arg + } + +keyword: + | STAR k = KEYWORD { "*" ^ k } + | k = KEYWORD { k } + +let_assignation: + | assignation + variable = variable + op = assignation_operator + value = expression + { + Analyzer.Instruction.assign $loc variable op value + } + +%inline assignation: + | + | LET + | SET {} + +assignation_operator: + | EQUAL { T.Eq' } + | INCR { T.Inc } + | DECR { T.Decr } + | MULT_EQUAL { T.Mult } + +inline_instruction: + | hd = inline_instruction + tl = single_instruction + AMPERSAND+ + { tl :: hd } + | + { [] } + +final_inline_instruction: + | hd = inline_instruction + tl = instruction + | hd = inline_instruction + tl = inline_action + { tl :: hd } + | hd = inline_instruction + COMMENT + { (Analyzer.Instruction.comment $loc) :: hd } + | hd = inline_instruction + { hd } + +onliner(TOKEN): + | TOKEN + e = expression + COLUMN + s = rev (final_inline_instruction) + { $loc, e, s, $loc(s), None } diff --git a/lib/qparser/tokens.mly b/lib/qparser/tokens.mly new file mode 100644 index 0000000..9ac4b10 --- /dev/null +++ b/lib/qparser/tokens.mly @@ -0,0 +1,73 @@ +%token LOCATION_START +%token LOCATION_END + +%token PLUS +%token MINUS +%token INCR DECR +%token MULT_EQUAL +%token STAR +%token DIV +%token MOD + +%token AMPERSAND +%token COMA +%token EQUAL +%token COLUMN +%token L_BRACKET R_BRACKET +%token L_PAREN R_PAREN +%token LT GT +%token EXCLAMATION +%token AND OR + +%token EOF +%token EOL + +%token IDENT +%token LITERAL +%token INTEGER + +%token COMMENT + +%token ACT +%token IF +%token ELSE +%token ELIF +%token END +%token LET +%token SET +%token OBJ +%token LOC +%token NO +%token KEYWORD +%token FUNCTION + +(* +(b) if the token was declared left-associative, then the conflict is resolved +in favor of reduction; + +(c) if the token was declared right-associative, then the conflict is resolved +in favor of shifting. + *) + +(* Exclamation should have the lower priority because the comments shall never + take place of the statements + *) +%right NO +(* The priority for the variable should be lower than the equality priority + if I want to allow declare new variables *) +%left p_variable +%left OR +%left AND +%left EQUAL +%left GT LT +%left EXCLAMATION +%left PLUS MINUS +%left STAR DIV +%left MOD +%left FUNCTION +%left L_PAREN +%right R_PAREN +%left COMA +%left KEYWORD + +%% diff --git a/lib/qsp_expression.mly b/lib/qsp_expression.mly deleted file mode 100644 index 06cfadd..0000000 --- a/lib/qsp_expression.mly +++ /dev/null @@ -1,86 +0,0 @@ -(* %start <(Elements.pos) Elements.exppression>expression *) - -%% - -%public arguments(X): - (** This rule allow the difference between elements with a single argument - (when the separator is required) which allow to write a spectif case when - we only have one element. - *) - | hd = X - COMA - tl = separated_nonempty_list(COMA, X) - { hd :: tl } - (** The case where we have nothing is easy to manage here *) - | - { [] } - (** The remaining case (only one argument) is handled outside of this - block: if we have a single argument we don’t have to bother with the - paren, they belongs to the expression. - *) - -%inline argument(X): - | a = delimited(L_PAREN, arguments(X), R_PAREN) { a } - | a = X { [ a ] } - -(** Declare an expression *) -%public expression: - | ex = delimited(L_PAREN, expression, R_PAREN) - { ex } - | op = unary_operator - expr = expression - { Analyzer.Expression.uoperator $loc op expr } - %prec NO - | - expr1 = expression - op = binary_operator - expr2 = expression - { Analyzer.Expression.boperator $loc op expr1 expr2 } - | v = LITERAL { Analyzer.Expression.literal $loc v } - | i = INTEGER { Analyzer.Expression.integer $loc i } - | v = variable { Analyzer.Expression.ident v } - %prec p_variable - | k = FUNCTION - arg = argument(expression) - { - (Analyzer.Expression.function_ $loc k arg) - } - -unary_operator: - | OBJ - | NO { T.No } - | MINUS { T.Neg } - | PLUS { T.Add } - -%inline binary_operator: - | EQUAL { T.Eq } - | LT GT { T.Neq } - | EXCLAMATION { T.Neq } - | PLUS { T.Plus } - | MINUS { T.Minus } - | STAR { T.Product } - | DIV { T.Div } - | MOD { T.Mod } - | GT { T.Gt } - | LT { T.Lt } - | AND { T.And } - | GT EQUAL { T.Gte } - | LT EQUAL { T.Lte } - | EQUAL GT { T.Gte } - | EQUAL LT { T.Lte } - | OR { T.Or } - -(** Declare a variable, either in the assignation (let var = …) or as a - reference is an expression - *) -%public variable: - | name = IDENT - brackets = delimited(L_BRACKET, expression?, R_BRACKET)? - { - let index = match brackets with - | None -> - (* No declaration, consider index at 0 *) - None - | Some other -> other in - Analyzer.Expression.{ pos = $loc ; name ; index } - } diff --git a/lib/qsp_instruction.mly b/lib/qsp_instruction.mly deleted file mode 100644 index 564e154..0000000 --- a/lib/qsp_instruction.mly +++ /dev/null @@ -1,104 +0,0 @@ -%% - -optionnal_delimited(opening, X, closing): - | v = delimited(opening, X, closing) { v } - | v = X { v } - -argument(X): - | a = optionnal_delimited(L_PAREN, arguments(X), R_PAREN) { a } - | a = X { [ a ] } - -(** At the opposite of an expression, an instruction does not return anything. *) -%public instruction: - | s = single_instruction { s } - -(** Action like act or if in a single line *) -%public inline_action: - | a = onliner(ACT) - { let loc, label, statements, _, _ = a in - Analyzer.Instruction.act loc ~label statements - } - | a = onliner(IF) - else_opt = preceded(ELSE, instruction)? - { let loc, expression, statements, loc_s, _body = a in - let elifs = [] - and else_ = Option.to_list else_opt in - Analyzer.Instruction.if_ - loc - (loc_s, expression, statements) - ~elifs - ~else_ - } - | a = onliner(IF) - else_= preceded(ELSE, inline_action) - { let loc, expression, statements, loc_s, _body = a in - let elifs = [] - and else_ = [ else_ ] in - Analyzer.Instruction.if_ - loc - (loc_s, expression, statements) - ~elifs - ~else_ - } -single_instruction: - | expr = expression - { - Analyzer.Instruction.expression expr - } - | e = let_assignation { e } - | k = keyword - arg = argument(expression) - { - Analyzer.Instruction.call $loc k arg - } - -keyword: - | STAR k = KEYWORD { "*" ^ k } - | k = KEYWORD { k } - -let_assignation: - | assignation - variable = variable - op = assignation_operator - value = expression - { - Analyzer.Instruction.assign $loc variable op value - } - -%inline assignation: - | - | LET - | SET {} - -assignation_operator: - | EQUAL { T.Eq' } - | INCR { T.Inc } - | DECR { T.Decr } - | MULT_EQUAL { T.Mult } - -inline_instruction: - | hd = inline_instruction - tl = single_instruction - AMPERSAND+ - { tl :: hd } - | - { [] } - -final_inline_instruction: - | hd = inline_instruction - tl = instruction - | hd = inline_instruction - tl = inline_action - { tl :: hd } - | hd = inline_instruction - COMMENT - { (Analyzer.Instruction.comment $loc) :: hd } - | hd = inline_instruction - { hd } - -onliner(TOKEN): - | TOKEN - e = expression - COLUMN - s = rev (final_inline_instruction) - { $loc, e, s, $loc(s), None } diff --git a/lib/syntax/S.ml b/lib/syntax/S.ml new file mode 100644 index 0000000..3873eed --- /dev/null +++ b/lib/syntax/S.ml @@ -0,0 +1,91 @@ +(** + This module describe the type an analyzer must implement in order to be + used with the parser. + + The module is divided in three modules : + - Expression : the finest part of the QSP syntax. + - Instruction : if/act block, + - Location + + All the elements of the syntax are represented with a dedicated function + (instead of a big sum type). The module [Tree] provide an implementation + which build the AST. + + *) + +type pos = Lexing.position * Lexing.position +type ('a, 'b) variable = { pos : 'a; name : string; index : 'b option } + +(** Represent the evaluation over an expression *) +module type Expression = sig + type 'a obs + type repr + + type variable = { pos : pos; name : string; index : repr option } + (** + Describe a variable, using the name in capitalized text, and an optionnal + index. + + If missing, the index should be considered as [0]. + *) + + val ident : variable -> repr + + (* + Basic values, text, number… + *) + + val integer : pos -> string -> repr + val literal : pos -> string -> repr + + val function_ : pos -> T.function_ -> repr list -> repr + (** Call a function. The functions list is hardcoded in lib/lexer.mll *) + + val uoperator : pos -> T.uoperator -> repr -> repr + (** Unary operator like [-123] or [+'Text']*) + + val boperator : pos -> T.boperator -> repr -> repr -> repr + (** Binary operator, for a comparaison, or an operation *) +end + +module type Instruction = sig + type repr + type expression + type variable + + val call : pos -> string -> expression list -> repr + (** Call for an instruction like [GT] or [*CLR] *) + + val location : pos -> string -> repr + (** Label for a loop *) + + val comment : pos -> repr + (** Comment *) + + val expression : expression -> repr + (** Raw expression *) + + type clause = pos * expression * repr list + + val if_ : pos -> clause -> elifs:clause list -> else_:repr list -> repr + val act : pos -> label:expression -> repr list -> repr + val assign : pos -> variable -> T.assignation_operator -> expression -> repr +end + +module type Location = sig + type repr + type instruction + + val location : pos -> instruction list -> repr +end + +module type Analyzer = sig + module Expression : Expression + + module Instruction : + Instruction + with type expression = Expression.repr + and type variable = Expression.variable + + module Location : Location with type instruction = Instruction.repr +end diff --git a/lib/syntax/dune b/lib/syntax/dune new file mode 100644 index 0000000..666273f --- /dev/null +++ b/lib/syntax/dune @@ -0,0 +1,6 @@ +(library + (name qsp_syntax) + + (preprocess (pps + ppx_deriving.show ppx_deriving.enum + ppx_deriving.eq ))) diff --git a/lib/syntax/report.ml b/lib/syntax/report.ml new file mode 100644 index 0000000..9ad24c3 --- /dev/null +++ b/lib/syntax/report.ml @@ -0,0 +1,40 @@ +(** Report built over the differents analysis in the file *) + +type level = Error | Warn | Debug +[@@deriving show { with_path = false }, enum, eq] + +type pos = Lexing.position * Lexing.position + +let level_of_string : string -> (level, string) result = + fun level -> + match String.lowercase_ascii level with + | "error" -> Ok Error + | "warn" -> Ok Warn + | "debug" -> Ok Debug + | _ -> + Error + (Format.sprintf + "Unknown report level '%s'. Accepted values are error, warn, debug" + level) + +let pp_pos : Format.formatter -> pos -> unit = + fun f (start_pos, end_pos) -> + let start_c = start_pos.Lexing.pos_cnum - start_pos.Lexing.pos_bol + and end_c = end_pos.Lexing.pos_cnum - end_pos.Lexing.pos_bol + and start_line = start_pos.Lexing.pos_lnum + and end_line = end_pos.Lexing.pos_lnum in + + if start_line != end_line then + Format.fprintf f "Lines %d-%d" start_line end_line + else Format.fprintf f "Line %d %d:%d" start_line start_c end_c + +type t = { level : level; loc : pos; message : string } +[@@deriving show { with_path = false }] + +let warn : pos -> string -> t = + fun loc message -> { level = Warn; loc; message } + +let error : pos -> string -> t = + fun loc message -> { level = Error; loc; message } + +let message level loc message = { level; loc; message } diff --git a/lib/syntax/t.ml b/lib/syntax/t.ml new file mode 100644 index 0000000..9c25647 --- /dev/null +++ b/lib/syntax/t.ml @@ -0,0 +1,78 @@ +(** + This module contains the basic operators used in the QSP syntax. + *) + +type boperator = + | Eq + | Neq + | Plus + | Minus + | Product + | Div + | Gt + | Lt + | Gte + | Lte + | And + | Or + | Mod +[@@deriving eq, show] + +and uoperator = No | Neg | Add [@@deriving eq, show] + +and assignation_operator = Eq' | Inc (** += *) | Decr (** -= *) | Mult +[@@deriving eq, show] + +type function_ = + | Arrcomp + | Arrpos + | Arrsize + | Countobj + | Desc + | Desc' + | Dyneval + | Dyneval' + | Func + | Func' + | Getobj + | Getobj' + | Iif + | Iif' + | Input + | Input' + | Instr + | Isnum + | Isplay + | Lcase + | Lcase' + | Len + | Loc + | Max + | Max' + | Mid + | Mid' + | Min + | Min' + | Msecscount + | Qspver + | Qspver' + | Rand + | Replace + | Replace' + | Rgb + | Rnd + | Selact + | Stattxt + | Stattxt' + | Str + | Str' + | Strcomp + | Strfind + | Strfind' + | Strpos + | Trim + | Trim' + | Ucase + | Ucase' + | Val +[@@deriving eq, show] diff --git a/lib/syntax/tree.ml b/lib/syntax/tree.ml new file mode 100644 index 0000000..bb31253 --- /dev/null +++ b/lib/syntax/tree.ml @@ -0,0 +1,95 @@ +type pos = Lexing.position * Lexing.position + +module Ast = struct + type nonrec pos = pos + + type 'a variable = { pos : 'a; name : string; index : 'a expression option } + [@@deriving eq, show] + + and 'a expression = + | Integer of 'a * string + | Literal of 'a * string + | Ident of 'a variable + | BinaryOp of 'a * T.boperator * 'a expression * 'a expression + | Op of 'a * T.uoperator * 'a expression + | Function of 'a * T.function_ * 'a expression list + [@@deriving eq, show] + + and 'a condition = 'a * 'a expression * 'a statement list + + and 'a statement = + | If of { + loc : 'a; + then_ : 'a condition; + elifs : 'a condition list; + else_ : 'a statement list; + } + | Act of { loc : 'a; label : 'a expression; statements : 'a statement list } + | Declaration of ('a * 'a variable * T.assignation_operator * 'a expression) + | Expression of 'a expression + | Comment of 'a + | Call of 'a * string * 'a expression list + | Location of 'a * string + [@@deriving eq, show] +end + +(** Default implementation for the expression *) +module Expression : S.Expression with type repr = pos Ast.expression = struct + type 'a obs + type repr = pos Ast.expression + type variable = { pos : pos; name : string; index : repr option } + + let integer : pos -> string -> repr = fun pos i -> Ast.Integer (pos, i) + let literal : pos -> string -> repr = fun pos l -> Ast.Literal (pos, l) + + let function_ : pos -> T.function_ -> repr list -> repr = + fun pos name args -> Ast.Function (pos, name, args) + + let uoperator : pos -> T.uoperator -> repr -> repr = + fun pos op expression -> Ast.Op (pos, op, expression) + + let boperator : pos -> T.boperator -> repr -> repr -> repr = + fun pos op op1 op2 -> Ast.BinaryOp (pos, op, op1, op2) + + let ident : variable -> repr = + fun { pos; name; index } -> Ast.Ident { pos; name; index } +end + +module Instruction : + S.Instruction + with type expression = Expression.repr + and type repr = pos Ast.statement + and type variable = Expression.variable = struct + type repr = pos Ast.statement + type expression = Expression.repr + type variable = Expression.variable + + let call : pos -> string -> expression list -> repr = + fun pos name args -> Ast.Call (pos, name, args) + + let location : pos -> string -> repr = + fun loc label -> Ast.Location (loc, label) + + let comment : pos -> repr = fun pos -> Ast.Comment pos + let expression : expression -> repr = fun expr -> Ast.Expression expr + + type clause = pos * expression * repr list + + let if_ : pos -> clause -> elifs:clause list -> else_:repr list -> repr = + fun pos predicate ~elifs ~else_ -> + Ast.If { loc = pos; then_ = predicate; elifs; else_ } + + let act : pos -> label:expression -> repr list -> repr = + fun pos ~label statements -> Ast.Act { loc = pos; label; statements } + + let assign : pos -> variable -> T.assignation_operator -> expression -> repr = + fun pos_loc { pos; name; index } op expr -> + Ast.Declaration (pos_loc, { pos; name; index }, op, expr) +end + +module Location = struct + type instruction = pos Ast.statement + type repr = pos * instruction list + + let location : pos -> instruction list -> repr = fun pos block -> (pos, block) +end diff --git a/lib/syntax/tree.mli b/lib/syntax/tree.mli new file mode 100644 index 0000000..ca5a639 --- /dev/null +++ b/lib/syntax/tree.mli @@ -0,0 +1,51 @@ +(** + Implementation for S.Analyzer for building a complete Ast. + + Used in the unit test in order to check if the grammar is interpreted as + expected, not really usefull over a big qsp. + *) + +(** This module is the result of the evaluation. *) +module Ast : sig + type pos = Lexing.position * Lexing.position + + type 'a variable = { pos : 'a; name : string; index : 'a expression option } + [@@deriving eq, show] + (** A variable, used both in an expression (reference) or in a statement + (assignation) *) + + and 'a expression = + | Integer of 'a * string + | Literal of 'a * string + | Ident of 'a variable + | BinaryOp of 'a * T.boperator * 'a expression * 'a expression + | Op of 'a * T.uoperator * 'a expression + | Function of 'a * T.function_ * 'a expression list + [@@deriving eq, show] + + and 'a condition = 'a * 'a expression * 'a statement list + (** A condition in if or elseif statement *) + + and 'a statement = + | If of { + loc : 'a; + then_ : 'a condition; + elifs : 'a condition list; + else_ : 'a statement list; + } + | Act of { loc : 'a; label : 'a expression; statements : 'a statement list } + | Declaration of ('a * 'a variable * T.assignation_operator * 'a expression) + | Expression of 'a expression + | Comment of 'a + | Call of 'a * string * 'a expression list + | Location of 'a * string + [@@deriving eq, show] +end + +(** / **) + +include + S.Analyzer + with type Expression.repr = Ast.pos Ast.expression + and type Instruction.repr = Ast.pos Ast.statement + and type Location.repr = Ast.pos * Ast.pos Ast.statement list diff --git a/lib/syntax/type_of.ml b/lib/syntax/type_of.ml new file mode 100644 index 0000000..d578700 --- /dev/null +++ b/lib/syntax/type_of.ml @@ -0,0 +1,385 @@ +open StdLabels + +type pos = Lexing.position * Lexing.position +(** Extract the type for expression *) + +module Helper = struct + type t = Integer | Bool | String | Any + [@@deriving show { with_path = false }] + + type argument_repr = { pos : pos; t : t } + + type dyn_type = t -> t + (** Dynamic type is a type unknown during the code. + + For example, the equality operator accept either Integer or String, but + we expect that both sides of the equality uses the same type.*) + + (** Build a new dynamic type *) + let dyn_type : unit -> dyn_type = + fun () -> + let stored = ref None in + fun t -> + match !stored with + | None -> + stored := Some t; + t + | Some t -> t + + (** Declare an argument for a function. + + - Either we already know the type and we just have to compare. + - Either the type shall constrained by another one + - Or we have a variable number of arguments. *) + type argument = Fixed of t | Dynamic of dyn_type | Variable of argument + + let compare : + ?strict:bool -> + ?level:Report.level -> + t -> + argument_repr -> + Report.t list -> + Report.t list = + fun ?(strict = false) ?(level = Report.Warn) expected actual report -> + let equal = + match (expected, actual.t) with + | _, Any -> true + | Any, _ -> true + | String, String -> true + | Integer, Integer -> true + | Bool, Bool -> true + | Bool, Integer when not strict -> true + | Integer, Bool -> true + | String, Integer when not strict -> true + | String, Bool when not strict -> true + | _, _ -> false + in + if equal then report + else + let message = + Format.asprintf "The type %a is expected but got %a" pp expected pp + actual.t + in + Report.message level actual.pos message :: report + + let rec compare_parameter : + ?strict:bool -> + ?level:Report.level -> + argument -> + argument_repr -> + Report.t list -> + Report.t list = + fun ?(strict = false) ?(level = Report.Warn) expected param report -> + match expected with + | Fixed t -> compare ~level t param report + | Dynamic d -> + let type_ = d param.t in + compare ~strict ~level type_ param report + | Variable c -> compare_parameter ~level c param report + + (** Compare the arguments one by one *) + let compare_args : + ?strict:bool -> + ?level:Report.level -> + pos -> + argument list -> + argument_repr list -> + Report.t list -> + Report.t list = + fun ?(strict = false) ?(level = Report.Warn) pos expected actuals report -> + let tl, report = + List.fold_left actuals ~init:(expected, report) + ~f:(fun (expected, report) param -> + match expected with + | (Variable _ as hd) :: _ -> + let check = compare_parameter ~strict ~level hd param report in + (expected, check) + | hd :: tl -> + let check = compare_parameter ~strict ~level hd param report in + (tl, check) + | [] -> + let msg = Report.error param.pos "Unexpected argument" in + ([], msg :: report)) + in + match tl with + | [] | Variable _ :: _ -> report + | _ -> + let msg = Report.error pos "Not enougth arguments given" in + msg :: report +end + +module Expression = struct + type 'a obs + + type t = { + result : Helper.t; + report : Report.t list; (* See the comment below *) + pos : pos; + empty : bool; + } + + type repr = Report.t list -> t + (** The type repr is a function accepting the report as a first argement. + When the report is given, it will be reported into the tree and collected + in bottom-top. + + It’s easy to forget that the report is updated when the type is created. + The function takes the report in argument, and store the report in the + returned type. Maybe should I make a tupple instead in order to make it + explicit ? + *) + + type variable = { pos : pos; name : string; index : repr option } + + let arg_of_repr : t -> Helper.argument_repr = + fun { result; report; pos; empty } -> + ignore report; + ignore empty; + { pos; t = result } + + (** The variable has type string when starting with a '$' *) + let ident : variable -> repr = + fun var report -> + let empty = false in + match var.name.[0] with + | '$' -> { result = String; report; pos = var.pos; empty } + | _ -> { result = Integer; report; pos = var.pos; empty } + + let integer : pos -> string -> repr = + fun pos value report -> + let empty = + match int_of_string_opt value with Some 0 -> true | _ -> false + in + + { result = Integer; report; pos; empty } + + let literal : pos -> string -> repr = + fun pos value report -> + let empty = String.equal String.empty value in + { result = String; report; pos; empty } + + let function_ : pos -> T.function_ -> repr list -> repr = + fun pos function_ params _acc -> + (* Accumulate the expressions and get the results, the report is given in + the differents arguments, and we build a list with the type of the + parameters. *) + let types, report = + List.fold_left params ~init:([], _acc) ~f:(fun (types, report) param -> + let t = param report in + let arg = arg_of_repr t in + (arg :: types, t.report)) + in + let types = List.rev types + and default = { result = Any; report; pos; empty = false } in + + match function_ with + | Arrcomp | Arrpos | Arrsize | Countobj | Desc | Dyneval | Func | Getobj + | Instr | Isplay -> + { default with result = Integer } + | Desc' | Dyneval' | Func' | Getobj' -> { default with result = String } + | Iif | Iif' -> + let d = Helper.dyn_type () in + let expected = Helper.[ Fixed Bool; Dynamic d; Dynamic d ] in + let report = Helper.compare_args pos expected types report in + (* Extract the type for the expression *) + let result = d Helper.Bool in + { result; report; pos; empty = false } + | Input | Input' -> + (* Input should check the result if the variable is a num and raise a + message in this case.*) + let expected = Helper.[ Fixed String ] in + let report = Helper.compare_args pos expected types report in + { result = String; report; pos; empty = false } + | Isnum -> + let expected = Helper.[ Fixed String ] in + let report = Helper.compare_args pos expected types report in + { result = Bool; report; pos; empty = false } + | Lcase | Lcase' | Ucase | Ucase' -> + let expected = Helper.[ Fixed String ] in + let report = Helper.compare_args pos expected types report in + { result = String; report; pos; empty = false } + | Len -> + let expected = Helper.[ Fixed Any ] in + let report = Helper.compare_args pos expected types report in + { result = Integer; report; pos; empty = false } + | Loc -> + let expected = Helper.[ Fixed String ] in + let report = Helper.compare_args pos expected types report in + { result = Bool; report; pos; empty = false } + | Max | Max' | Min | Min' -> + let d = Helper.dyn_type () in + (* All the arguments must have the same type *) + let expected = Helper.[ Variable (Dynamic d) ] in + let report = Helper.compare_args pos expected types report in + let result = d Helper.Bool in + { result; report; pos; empty = false } + | Mid | Mid' -> + let expected = Helper.[ Fixed String; Variable (Fixed Integer) ] in + let report = Helper.compare_args pos expected types report in + { result = String; report; pos; empty = false } + | Msecscount -> { result = Integer; report; pos; empty = false } + | Rand -> + let expected = Helper.[ Variable (Fixed Integer) ] in + let report = Helper.compare_args pos expected types report in + { result = Integer; report; pos; empty = false } + | Replace -> { result = Integer; report; pos; empty = false } + | Replace' -> { result = String; report; pos; empty = false } + | Rgb -> { result = Integer; report; pos; empty = false } + | Qspver | Qspver' | Rnd -> + (* No arg *) + let report = Helper.compare_args pos [] types report in + { result = Integer; report; pos; empty = false } + | Selact -> { result = Integer; report; pos; empty = false } + | Stattxt -> { result = Integer; report; pos; empty = false } + | Stattxt' -> { result = String; report; pos; empty = false } + | Str | Str' -> + let expected = Helper.[ Variable (Fixed Integer) ] in + let report = Helper.compare_args pos expected types report in + { default with result = String; report } + | Strcomp -> { result = Integer; report; pos; empty = false } + | Strfind -> { result = Integer; report; pos; empty = false } + | Strfind' -> { result = String; report; pos; empty = false } + | Strpos -> { result = Integer; report; pos; empty = false } + | Trim -> { result = Integer; report; pos; empty = false } + | Trim' -> { result = String; report; pos; empty = false } + | Val -> + let expected = Helper.[ Fixed Any ] in + let report = Helper.compare_args pos expected types report in + { result = Integer; report; pos; empty = false } + + (** Unary operator like [-123] or [+'Text']*) + let uoperator : pos -> T.uoperator -> repr -> repr = + fun pos operator t1 report -> + let t = t1 report in + let report = t.report in + match operator with + | Add -> t + | Neg | No -> + let types = [ arg_of_repr t ] in + let expected = Helper.[ Fixed Integer ] in + let report = Helper.compare_args pos expected types report in + { result = Integer; report; pos; empty = false } + + let boperator : pos -> T.boperator -> repr -> repr -> repr = + fun pos operator t1 t2 report -> + let t1 = t1 report in + let t2 = t2 t1.report in + let report = t2.report in + let types = [ arg_of_repr t1; arg_of_repr t2 ] in + match operator with + | T.Plus -> + (* Operation over number *) + let d = Helper.(dyn_type ()) in + let expected = Helper.[ Dynamic d; Dynamic d ] in + let report = Helper.compare_args pos expected types report in + let result = d Helper.Integer in + { result; report; pos; empty = false } + | T.Eq | T.Neq -> + (* If the expression is '' or 0, we accept the comparaison as if + instead of raising a warning *) + if t1.empty || t2.empty then + { result = Bool; report; pos; empty = false } + else + let d = Helper.(Dynamic (dyn_type ())) in + let expected = [ d; d ] in + let report = + Helper.compare_args ~strict:true pos expected (List.rev types) + report + in + { result = Bool; report; pos; empty = false } + | Lt | Gte | Lte | Gt -> + let d = Helper.(Dynamic (dyn_type ())) in + let expected = [ d; d ] in + let report = Helper.compare_args pos expected types report in + { result = Bool; report; pos; empty = false } + | T.Mod | T.Minus | T.Product | T.Div -> + (* Operation over number *) + let expected = Helper.[ Fixed Integer; Fixed Integer ] in + let report = Helper.compare_args pos expected types report in + { result = Integer; report; pos; empty = false } + | T.And | T.Or -> + (* Operation over booleans *) + let expected = Helper.[ Fixed Bool; Fixed Bool ] in + let report = Helper.compare_args pos expected types report in + { result = Bool; report; pos; empty = false } +end + +module Instruction = struct + type repr = Report.t list -> Report.t list + type expression = Expression.repr + type variable = Expression.variable + + (** Call for an instruction like [GT] or [*CLR] *) + let call : pos -> string -> expression list -> repr = + fun _pos _ expressions report -> + List.fold_left expressions ~init:report ~f:(fun report expression -> + let result = expression report in + result.Expression.report) + + let location : pos -> string -> repr = fun _pos _ report -> report + + (** Comment *) + let comment : pos -> repr = fun _pos report -> report + + (** Raw expression *) + let expression : expression -> repr = + fun expression report -> (expression report).report + + type clause = pos * expression * repr list + + let if_ : pos -> clause -> elifs:clause list -> else_:repr list -> repr = + fun _pos clause ~elifs ~else_ report -> + (* Helper function *) + let fold_clause report (_pos, expr, instructions) : Report.t list = + let result = expr report in + let report = + Helper.compare Helper.Bool + (Expression.arg_of_repr result) + result.Expression.report + in + List.fold_left instructions ~init:report ~f:(fun report instruction -> + instruction report) + in + + (* Traverse the whole block recursively *) + let report = fold_clause report clause in + let report = List.fold_left elifs ~f:fold_clause ~init:report in + List.fold_left else_ ~init:report ~f:(fun report instruction -> + instruction report) + + let act : pos -> label:expression -> repr list -> repr = + fun _pos ~label instructions report -> + let result = label report in + let report = + Helper.compare Helper.String + (Expression.arg_of_repr result) + result.Expression.report + in + List.fold_left instructions ~init:report ~f:(fun report instruction -> + instruction report) + + let assign : pos -> variable -> T.assignation_operator -> expression -> repr = + fun pos variable _ expression report -> + let right_expression = expression report in + match right_expression.empty with + | true -> report + | false -> + let op1 = Expression.arg_of_repr (Expression.ident variable report) in + let report = right_expression.Expression.report in + let op2 = Expression.arg_of_repr right_expression in + + let d = Helper.dyn_type () in + (* Every part of the assignation should be the same type *) + let expected = Helper.[ Dynamic d; Dynamic d ] in + Helper.compare_args ~level:Report.Debug pos expected [ op1; op2 ] report +end + +module Location = struct + type repr = Instruction.repr + type instruction = Instruction.repr + + let location : pos -> instruction list -> repr = + fun _pos instructions report -> + List.fold_left instructions ~init:report ~f:(fun report instruction -> + instruction report) +end diff --git a/lib/tokens.mly b/lib/tokens.mly deleted file mode 100644 index 9ac4b10..0000000 --- a/lib/tokens.mly +++ /dev/null @@ -1,73 +0,0 @@ -%token LOCATION_START -%token LOCATION_END - -%token PLUS -%token MINUS -%token INCR DECR -%token MULT_EQUAL -%token STAR -%token DIV -%token MOD - -%token AMPERSAND -%token COMA -%token EQUAL -%token COLUMN -%token L_BRACKET R_BRACKET -%token L_PAREN R_PAREN -%token LT GT -%token EXCLAMATION -%token AND OR - -%token EOF -%token EOL - -%token IDENT -%token LITERAL -%token INTEGER - -%token COMMENT - -%token ACT -%token IF -%token ELSE -%token ELIF -%token END -%token LET -%token SET -%token OBJ -%token LOC -%token NO -%token KEYWORD -%token FUNCTION - -(* -(b) if the token was declared left-associative, then the conflict is resolved -in favor of reduction; - -(c) if the token was declared right-associative, then the conflict is resolved -in favor of shifting. - *) - -(* Exclamation should have the lower priority because the comments shall never - take place of the statements - *) -%right NO -(* The priority for the variable should be lower than the equality priority - if I want to allow declare new variables *) -%left p_variable -%left OR -%left AND -%left EQUAL -%left GT LT -%left EXCLAMATION -%left PLUS MINUS -%left STAR DIV -%left MOD -%left FUNCTION -%left L_PAREN -%right R_PAREN -%left COMA -%left KEYWORD - -%% -- cgit v1.2.3