From 8b4eb01afa698dd1e512f7a83fb761bfb146fdf7 Mon Sep 17 00:00:00 2001
From: Chimrod <>
Date: Fri, 13 Sep 2024 09:25:38 +0200
Subject: Added a syntax check in the comments

---
 lib/qparser/lexer.ml  | 81 +++++++++++++++++++++++++++++++++++++++++++--------
 lib/qparser/lexer.mli |  3 ++
 2 files changed, 72 insertions(+), 12 deletions(-)

(limited to 'lib/qparser')

diff --git a/lib/qparser/lexer.ml b/lib/qparser/lexer.ml
index e3524cc..42f3c1c 100644
--- a/lib/qparser/lexer.ml
+++ b/lib/qparser/lexer.ml
@@ -63,7 +63,18 @@ let location_ident = [%sedlex.regexp? letters | digit]
 let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^']
 let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident]
 
-let rec read_long_string ?(nested = false) level buf buffer =
+(** Read a quote started with '{'
+
+    The function return the parsed string, but the closing token has been
+    rollbacked, leaving the state in [Lexbuf.EndString _].
+
+    The next call to [main] will call the associated function, effectively
+    leaving the string mode in the parser.
+
+    @param nested tell with started another block of string inside this one *)
+let rec read_long_string : ?nested:bool -> int -> Buffer.t -> Lexbuf.t -> token
+    =
+ fun ?(nested = false) level buf buffer ->
   let lexbuf = Lexbuf.buffer buffer in
   match%sedlex lexbuf with
   | "<<" ->
@@ -89,6 +100,7 @@ let rec read_long_string ?(nested = false) level buf buffer =
           Lexbuf.leave_state buffer;
           Lexbuf.enter_state buffer
             (Lexbuf.EndString Lex_state.readLongStringWraper);
+          (* rollback the latest token *)
           Lexbuf.rollback buffer;
           LITERAL (Buffer.contents buf)
       | _ ->
@@ -141,34 +153,79 @@ let rec read_quoted_string : Lexbuf.stringWraper -> Lexbuf.buffer_builder =
       (f.wrap (read_quoted_string f)) buf buffer
   | _ -> raise Not_found
 
+(** Track the kind of nested string inside a multiline string inside a
+    comment.
+
+    Some constructions are not allowed in this specific case (see later)
+*)
+type commentedString = None | Quote | DQuote
+
 let rec skip_comment buffer =
   (* Simplified way to skip the content of a string until the end marker.
      (expect the string to be well formed) *)
-  let rec parse_until_end f =
-    let _ = wait_balance f buffer in
+  let rec parse_until_end : (Buffer.t -> Lexbuf.t -> token) -> token =
+   fun f ->
+    let token = wait_balance f buffer in
     match Lexbuf.state buffer with
     | Some Lexbuf.Token ->
         Lexbuf.leave_state buffer;
         parse_until_end f
-    | Some (Lexbuf.EndString _) -> ()
+    | Some (Lexbuf.EndString _) -> token
     | _ -> parse_until_end f
   in
   let lexbuf = Lexbuf.buffer buffer in
   match%sedlex lexbuf with
   | '{' ->
-      parse_until_end (read_long_string 0);
-      let _ = Lex_state.readLongStringWraper.end_string buffer in
+      (* There are illegal constructions inside a comment containing {}
+         block.
+
+         Every opening text marker shall have the corresponding closing token
+         (but inside a nested block we can have unmatched double quote).
+
+         [! { ' }] this gives an error
+         [! { ' ' }] is ok
+      *)
+      let token = parse_until_end (read_long_string 0) in
+      let () =
+        match token with
+        | Tokens.LITERAL (content : string) -> (
+            (* Ensure every opening quote is closed before the end of the
+               comment *)
+            let string_state =
+              String.fold_left content ~init:None ~f:(fun state c ->
+                  match (state, c) with
+                  | None, '\'' -> Quote
+                  | None, '"' -> DQuote
+                  | Quote, '\'' -> None
+                  | DQuote, '"' -> None
+                  | _ -> state)
+            in
+            match string_state with
+            | None -> ()
+            | _ ->
+                Lexbuf.leave_state buffer;
+                raise UnclosedQuote)
+        | _ -> ()
+      in
+      let () =
+        try ignore (Lex_state.readLongStringWraper.end_string buffer)
+        with _ -> ()
+      in
       skip_comment buffer
   | '\'' ->
-      parse_until_end
-        (Lex_state.quotedStringWraper.wrap
-           (read_quoted_string Lex_state.quotedStringWraper));
+      let _ =
+        parse_until_end
+          (Lex_state.quotedStringWraper.wrap
+             (read_quoted_string Lex_state.quotedStringWraper))
+      in
       let _ = Lex_state.quotedStringWraper.end_string buffer in
       skip_comment buffer
   | '"' ->
-      parse_until_end
-        (Lex_state.dQuotedStringWraper.wrap
-           (read_quoted_string Lex_state.dQuotedStringWraper));
+      let _ =
+        parse_until_end
+          (Lex_state.dQuotedStringWraper.wrap
+             (read_quoted_string Lex_state.dQuotedStringWraper))
+      in
       let _ = Lex_state.dQuotedStringWraper.end_string buffer in
       skip_comment buffer
   | eol ->
diff --git a/lib/qparser/lexer.mli b/lib/qparser/lexer.mli
index 299fa46..854bb1e 100644
--- a/lib/qparser/lexer.mli
+++ b/lib/qparser/lexer.mli
@@ -6,7 +6,10 @@
     working. *)
 
 exception EOF
+
 exception UnclosedQuote
+(** Error reported when a string is not closed properly *)
+
 exception LexError of string
 
 val discard : Lexbuf.t -> unit
-- 
cgit v1.2.3