diff options
author | Chimrod <> | 2024-09-13 09:25:38 +0200 |
---|---|---|
committer | Chimrod <> | 2024-09-13 09:25:38 +0200 |
commit | 8b4eb01afa698dd1e512f7a83fb761bfb146fdf7 (patch) | |
tree | 5c85e5f4d011a895955d8516f0cdb195acf82f24 | |
parent | 692d66ba7eb0ff55a46b68601b7cd81f825653cb (diff) |
Added a syntax check in the comments
-rw-r--r-- | lib/qparser/lexer.ml | 81 | ||||
-rw-r--r-- | lib/qparser/lexer.mli | 3 | ||||
-rw-r--r-- | test/qsp_parser_test.ml | 3 | ||||
-rw-r--r-- | test/syntax.ml | 34 | ||||
-rw-r--r-- | test/syntax_error.ml | 7 |
5 files changed, 107 insertions, 21 deletions
diff --git a/lib/qparser/lexer.ml b/lib/qparser/lexer.ml index e3524cc..42f3c1c 100644 --- a/lib/qparser/lexer.ml +++ b/lib/qparser/lexer.ml @@ -63,7 +63,18 @@ let location_ident = [%sedlex.regexp? letters | digit] let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^'] let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident] -let rec read_long_string ?(nested = false) level buf buffer = +(** Read a quote started with '{' + + The function return the parsed string, but the closing token has been + rollbacked, leaving the state in [Lexbuf.EndString _]. + + The next call to [main] will call the associated function, effectively + leaving the string mode in the parser. + + @param nested tell with started another block of string inside this one *) +let rec read_long_string : ?nested:bool -> int -> Buffer.t -> Lexbuf.t -> token + = + fun ?(nested = false) level buf buffer -> let lexbuf = Lexbuf.buffer buffer in match%sedlex lexbuf with | "<<" -> @@ -89,6 +100,7 @@ let rec read_long_string ?(nested = false) level buf buffer = Lexbuf.leave_state buffer; Lexbuf.enter_state buffer (Lexbuf.EndString Lex_state.readLongStringWraper); + (* rollback the latest token *) Lexbuf.rollback buffer; LITERAL (Buffer.contents buf) | _ -> @@ -141,34 +153,79 @@ let rec read_quoted_string : Lexbuf.stringWraper -> Lexbuf.buffer_builder = (f.wrap (read_quoted_string f)) buf buffer | _ -> raise Not_found +(** Track the kind of nested string inside a multiline string inside a + comment. + + Some constructions are not allowed in this specific case (see later) +*) +type commentedString = None | Quote | DQuote + let rec skip_comment buffer = (* Simplified way to skip the content of a string until the end marker. (expect the string to be well formed) *) - let rec parse_until_end f = - let _ = wait_balance f buffer in + let rec parse_until_end : (Buffer.t -> Lexbuf.t -> token) -> token = + fun f -> + let token = wait_balance f buffer in match Lexbuf.state buffer with | Some Lexbuf.Token -> Lexbuf.leave_state buffer; parse_until_end f - | Some (Lexbuf.EndString _) -> () + | Some (Lexbuf.EndString _) -> token | _ -> parse_until_end f in let lexbuf = Lexbuf.buffer buffer in match%sedlex lexbuf with | '{' -> - parse_until_end (read_long_string 0); - let _ = Lex_state.readLongStringWraper.end_string buffer in + (* There are illegal constructions inside a comment containing {} + block. + + Every opening text marker shall have the corresponding closing token + (but inside a nested block we can have unmatched double quote). + + [! { ' }] this gives an error + [! { ' ' }] is ok + *) + let token = parse_until_end (read_long_string 0) in + let () = + match token with + | Tokens.LITERAL (content : string) -> ( + (* Ensure every opening quote is closed before the end of the + comment *) + let string_state = + String.fold_left content ~init:None ~f:(fun state c -> + match (state, c) with + | None, '\'' -> Quote + | None, '"' -> DQuote + | Quote, '\'' -> None + | DQuote, '"' -> None + | _ -> state) + in + match string_state with + | None -> () + | _ -> + Lexbuf.leave_state buffer; + raise UnclosedQuote) + | _ -> () + in + let () = + try ignore (Lex_state.readLongStringWraper.end_string buffer) + with _ -> () + in skip_comment buffer | '\'' -> - parse_until_end - (Lex_state.quotedStringWraper.wrap - (read_quoted_string Lex_state.quotedStringWraper)); + let _ = + parse_until_end + (Lex_state.quotedStringWraper.wrap + (read_quoted_string Lex_state.quotedStringWraper)) + in let _ = Lex_state.quotedStringWraper.end_string buffer in skip_comment buffer | '"' -> - parse_until_end - (Lex_state.dQuotedStringWraper.wrap - (read_quoted_string Lex_state.dQuotedStringWraper)); + let _ = + parse_until_end + (Lex_state.dQuotedStringWraper.wrap + (read_quoted_string Lex_state.dQuotedStringWraper)) + in let _ = Lex_state.dQuotedStringWraper.end_string buffer in skip_comment buffer | eol -> diff --git a/lib/qparser/lexer.mli b/lib/qparser/lexer.mli index 299fa46..854bb1e 100644 --- a/lib/qparser/lexer.mli +++ b/lib/qparser/lexer.mli @@ -6,7 +6,10 @@ working. *) exception EOF + exception UnclosedQuote +(** Error reported when a string is not closed properly *) + exception LexError of string val discard : Lexbuf.t -> unit diff --git a/test/qsp_parser_test.ml b/test/qsp_parser_test.ml index ada04d3..43f9cb3 100644 --- a/test/qsp_parser_test.ml +++ b/test/qsp_parser_test.ml @@ -1,7 +1,8 @@ let () = Alcotest.run "qsp_parser" [ - Syntax.test; + Syntax.test_syntax; + Syntax.test_comments; Literals.test; Syntax_error.test; Get_type.test; diff --git a/test/syntax.ml b/test/syntax.ml index f15c72f..f922e4f 100644 --- a/test/syntax.ml +++ b/test/syntax.ml @@ -400,6 +400,18 @@ life is unfair." Oh yeah, {curly brackets also count}. This is still the same comment. |} [ Comment _position ] +let test_comment_string () = + _test_instruction {|! {}|} [ Comment _position ]; + _test_instruction {|! ''|} [ Comment _position ]; + _test_instruction {|! ""|} [ Comment _position ]; + _test_instruction {|! {''}|} [ Comment _position ]; + _test_instruction {|! {""}|} [ Comment _position ]; + _test_instruction {|! "{"|} [ Comment _position ]; + _test_instruction {|! '{'|} [ Comment _position ]; + _test_instruction {|! "'"|} [ Comment _position ]; + _test_instruction {|! '"'|} [ Comment _position ]; + () + (** This test ensure that the unary operator is applied to the whole expression *) let test_precedence () = @@ -882,7 +894,7 @@ let test_stattxt () = { Tree.Ast.pos = _position; name = "$STATTXT"; index = None } ); ] -let test = +let test_syntax = ( "Syntax", [ Alcotest.test_case "Location" `Quick test_empty_location; @@ -915,13 +927,6 @@ let test = Alcotest.test_case "Plus_litt" `Quick test_plus_litt; Alcotest.test_case "PlusChained" `Quick test_concat; Alcotest.test_case "Mod operator" `Quick test_mod; - Alcotest.test_case "Comment" `Quick test_comment; - Alcotest.test_case "Comment2" `Quick test_comment2; - Alcotest.test_case "Comment3" `Quick test_comment3; - Alcotest.test_case "Comment4" `Quick test_comment4; - Alcotest.test_case "Comment5" `Quick test_comment5; - Alcotest.test_case "Comment6" `Quick test_comment6; - Alcotest.test_case "Multiline Comment" `Quick test_long_comment; Alcotest.test_case "If" `Quick test_if; Alcotest.test_case "If_chained" `Quick test_if_chained; Alcotest.test_case "If_equality" `Quick test_if_equality; @@ -952,3 +957,16 @@ let test = Alcotest.test_case "Precedence8" `Quick test_precedence8; Alcotest.test_case "stattxt" `Quick test_stattxt; ] ) + +let test_comments = + ( "Comments", + [ + Alcotest.test_case "Simple Comment" `Quick test_comment; + Alcotest.test_case "& Comment" `Quick test_comment2; + Alcotest.test_case "Double Comment" `Quick test_comment3; + Alcotest.test_case "Comment vs operation" `Quick test_comment4; + Alcotest.test_case "Comment5" `Quick test_comment5; + Alcotest.test_case "Comment6" `Quick test_comment6; + Alcotest.test_case "Multiline Comment" `Quick test_long_comment; + Alcotest.test_case "Comments with strings" `Quick test_comment_string; + ] ) diff --git a/test/syntax_error.ml b/test/syntax_error.ml index 2187e89..b92cf28 100644 --- a/test/syntax_error.ml +++ b/test/syntax_error.ml @@ -94,6 +94,12 @@ let unclose_comment () = _test_instruction {| ! that's it|} { level = Error; loc = _position; message = "Unclosed string" } +(* Same but with nested string *) + +let unclose_comment2 () = + _test_instruction {| !{ that's it }|} + { level = Error; loc = _position; message = "Unclosed string" } + let syntax_error () = _test_instruction {|*clr $ cla|} { level = Error; loc = _position; message = "Unexpected character \"\"" } @@ -275,6 +281,7 @@ let test = Alcotest.test_case "act 1" `Quick act_no_column; Alcotest.test_case "no &" `Quick missing_ampersand; Alcotest.test_case "unclose_comment" `Quick unclose_comment; + Alcotest.test_case "unclose_comment2" `Quick unclose_comment2; Alcotest.test_case "Syntax error $" `Quick syntax_error; Alcotest.test_case "Missing operand" `Quick missing_operand; Alcotest.test_case "Unknown function" `Quick unknow_function; |