aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChimrod <>2024-09-13 09:25:38 +0200
committerChimrod <>2024-09-13 09:25:38 +0200
commit8b4eb01afa698dd1e512f7a83fb761bfb146fdf7 (patch)
tree5c85e5f4d011a895955d8516f0cdb195acf82f24
parent692d66ba7eb0ff55a46b68601b7cd81f825653cb (diff)
Added a syntax check in the comments
-rw-r--r--lib/qparser/lexer.ml81
-rw-r--r--lib/qparser/lexer.mli3
-rw-r--r--test/qsp_parser_test.ml3
-rw-r--r--test/syntax.ml34
-rw-r--r--test/syntax_error.ml7
5 files changed, 107 insertions, 21 deletions
diff --git a/lib/qparser/lexer.ml b/lib/qparser/lexer.ml
index e3524cc..42f3c1c 100644
--- a/lib/qparser/lexer.ml
+++ b/lib/qparser/lexer.ml
@@ -63,7 +63,18 @@ let location_ident = [%sedlex.regexp? letters | digit]
let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^']
let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident]
-let rec read_long_string ?(nested = false) level buf buffer =
+(** Read a quote started with '{'
+
+ The function return the parsed string, but the closing token has been
+ rollbacked, leaving the state in [Lexbuf.EndString _].
+
+ The next call to [main] will call the associated function, effectively
+ leaving the string mode in the parser.
+
+ @param nested tell with started another block of string inside this one *)
+let rec read_long_string : ?nested:bool -> int -> Buffer.t -> Lexbuf.t -> token
+ =
+ fun ?(nested = false) level buf buffer ->
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| "<<" ->
@@ -89,6 +100,7 @@ let rec read_long_string ?(nested = false) level buf buffer =
Lexbuf.leave_state buffer;
Lexbuf.enter_state buffer
(Lexbuf.EndString Lex_state.readLongStringWraper);
+ (* rollback the latest token *)
Lexbuf.rollback buffer;
LITERAL (Buffer.contents buf)
| _ ->
@@ -141,34 +153,79 @@ let rec read_quoted_string : Lexbuf.stringWraper -> Lexbuf.buffer_builder =
(f.wrap (read_quoted_string f)) buf buffer
| _ -> raise Not_found
+(** Track the kind of nested string inside a multiline string inside a
+ comment.
+
+ Some constructions are not allowed in this specific case (see later)
+*)
+type commentedString = None | Quote | DQuote
+
let rec skip_comment buffer =
(* Simplified way to skip the content of a string until the end marker.
(expect the string to be well formed) *)
- let rec parse_until_end f =
- let _ = wait_balance f buffer in
+ let rec parse_until_end : (Buffer.t -> Lexbuf.t -> token) -> token =
+ fun f ->
+ let token = wait_balance f buffer in
match Lexbuf.state buffer with
| Some Lexbuf.Token ->
Lexbuf.leave_state buffer;
parse_until_end f
- | Some (Lexbuf.EndString _) -> ()
+ | Some (Lexbuf.EndString _) -> token
| _ -> parse_until_end f
in
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| '{' ->
- parse_until_end (read_long_string 0);
- let _ = Lex_state.readLongStringWraper.end_string buffer in
+ (* There are illegal constructions inside a comment containing {}
+ block.
+
+ Every opening text marker shall have the corresponding closing token
+ (but inside a nested block we can have unmatched double quote).
+
+ [! { ' }] this gives an error
+ [! { ' ' }] is ok
+ *)
+ let token = parse_until_end (read_long_string 0) in
+ let () =
+ match token with
+ | Tokens.LITERAL (content : string) -> (
+ (* Ensure every opening quote is closed before the end of the
+ comment *)
+ let string_state =
+ String.fold_left content ~init:None ~f:(fun state c ->
+ match (state, c) with
+ | None, '\'' -> Quote
+ | None, '"' -> DQuote
+ | Quote, '\'' -> None
+ | DQuote, '"' -> None
+ | _ -> state)
+ in
+ match string_state with
+ | None -> ()
+ | _ ->
+ Lexbuf.leave_state buffer;
+ raise UnclosedQuote)
+ | _ -> ()
+ in
+ let () =
+ try ignore (Lex_state.readLongStringWraper.end_string buffer)
+ with _ -> ()
+ in
skip_comment buffer
| '\'' ->
- parse_until_end
- (Lex_state.quotedStringWraper.wrap
- (read_quoted_string Lex_state.quotedStringWraper));
+ let _ =
+ parse_until_end
+ (Lex_state.quotedStringWraper.wrap
+ (read_quoted_string Lex_state.quotedStringWraper))
+ in
let _ = Lex_state.quotedStringWraper.end_string buffer in
skip_comment buffer
| '"' ->
- parse_until_end
- (Lex_state.dQuotedStringWraper.wrap
- (read_quoted_string Lex_state.dQuotedStringWraper));
+ let _ =
+ parse_until_end
+ (Lex_state.dQuotedStringWraper.wrap
+ (read_quoted_string Lex_state.dQuotedStringWraper))
+ in
let _ = Lex_state.dQuotedStringWraper.end_string buffer in
skip_comment buffer
| eol ->
diff --git a/lib/qparser/lexer.mli b/lib/qparser/lexer.mli
index 299fa46..854bb1e 100644
--- a/lib/qparser/lexer.mli
+++ b/lib/qparser/lexer.mli
@@ -6,7 +6,10 @@
working. *)
exception EOF
+
exception UnclosedQuote
+(** Error reported when a string is not closed properly *)
+
exception LexError of string
val discard : Lexbuf.t -> unit
diff --git a/test/qsp_parser_test.ml b/test/qsp_parser_test.ml
index ada04d3..43f9cb3 100644
--- a/test/qsp_parser_test.ml
+++ b/test/qsp_parser_test.ml
@@ -1,7 +1,8 @@
let () =
Alcotest.run "qsp_parser"
[
- Syntax.test;
+ Syntax.test_syntax;
+ Syntax.test_comments;
Literals.test;
Syntax_error.test;
Get_type.test;
diff --git a/test/syntax.ml b/test/syntax.ml
index f15c72f..f922e4f 100644
--- a/test/syntax.ml
+++ b/test/syntax.ml
@@ -400,6 +400,18 @@ life is unfair." Oh yeah, {curly brackets
also count}. This is still the same comment. |}
[ Comment _position ]
+let test_comment_string () =
+ _test_instruction {|! {}|} [ Comment _position ];
+ _test_instruction {|! ''|} [ Comment _position ];
+ _test_instruction {|! ""|} [ Comment _position ];
+ _test_instruction {|! {''}|} [ Comment _position ];
+ _test_instruction {|! {""}|} [ Comment _position ];
+ _test_instruction {|! "{"|} [ Comment _position ];
+ _test_instruction {|! '{'|} [ Comment _position ];
+ _test_instruction {|! "'"|} [ Comment _position ];
+ _test_instruction {|! '"'|} [ Comment _position ];
+ ()
+
(** This test ensure that the unary operator is applied to the whole expression
*)
let test_precedence () =
@@ -882,7 +894,7 @@ let test_stattxt () =
{ Tree.Ast.pos = _position; name = "$STATTXT"; index = None } );
]
-let test =
+let test_syntax =
( "Syntax",
[
Alcotest.test_case "Location" `Quick test_empty_location;
@@ -915,13 +927,6 @@ let test =
Alcotest.test_case "Plus_litt" `Quick test_plus_litt;
Alcotest.test_case "PlusChained" `Quick test_concat;
Alcotest.test_case "Mod operator" `Quick test_mod;
- Alcotest.test_case "Comment" `Quick test_comment;
- Alcotest.test_case "Comment2" `Quick test_comment2;
- Alcotest.test_case "Comment3" `Quick test_comment3;
- Alcotest.test_case "Comment4" `Quick test_comment4;
- Alcotest.test_case "Comment5" `Quick test_comment5;
- Alcotest.test_case "Comment6" `Quick test_comment6;
- Alcotest.test_case "Multiline Comment" `Quick test_long_comment;
Alcotest.test_case "If" `Quick test_if;
Alcotest.test_case "If_chained" `Quick test_if_chained;
Alcotest.test_case "If_equality" `Quick test_if_equality;
@@ -952,3 +957,16 @@ let test =
Alcotest.test_case "Precedence8" `Quick test_precedence8;
Alcotest.test_case "stattxt" `Quick test_stattxt;
] )
+
+let test_comments =
+ ( "Comments",
+ [
+ Alcotest.test_case "Simple Comment" `Quick test_comment;
+ Alcotest.test_case "& Comment" `Quick test_comment2;
+ Alcotest.test_case "Double Comment" `Quick test_comment3;
+ Alcotest.test_case "Comment vs operation" `Quick test_comment4;
+ Alcotest.test_case "Comment5" `Quick test_comment5;
+ Alcotest.test_case "Comment6" `Quick test_comment6;
+ Alcotest.test_case "Multiline Comment" `Quick test_long_comment;
+ Alcotest.test_case "Comments with strings" `Quick test_comment_string;
+ ] )
diff --git a/test/syntax_error.ml b/test/syntax_error.ml
index 2187e89..b92cf28 100644
--- a/test/syntax_error.ml
+++ b/test/syntax_error.ml
@@ -94,6 +94,12 @@ let unclose_comment () =
_test_instruction {| ! that's it|}
{ level = Error; loc = _position; message = "Unclosed string" }
+(* Same but with nested string *)
+
+let unclose_comment2 () =
+ _test_instruction {| !{ that's it }|}
+ { level = Error; loc = _position; message = "Unclosed string" }
+
let syntax_error () =
_test_instruction {|*clr $ cla|}
{ level = Error; loc = _position; message = "Unexpected character \"\"" }
@@ -275,6 +281,7 @@ let test =
Alcotest.test_case "act 1" `Quick act_no_column;
Alcotest.test_case "no &" `Quick missing_ampersand;
Alcotest.test_case "unclose_comment" `Quick unclose_comment;
+ Alcotest.test_case "unclose_comment2" `Quick unclose_comment2;
Alcotest.test_case "Syntax error $" `Quick syntax_error;
Alcotest.test_case "Missing operand" `Quick missing_operand;
Alcotest.test_case "Unknown function" `Quick unknow_function;