Updated the way to process the strings

author: Chimrod <> 2023-10-27 09:59:28 +0200
committer: Chimrod <> 2023-11-02 11:06:12 +0100
commit: 8a7bdc73a7c65d23c79e1c470ba0fbff975b59a5 (patch)
tree: 2057dce34911c78046ce93469856ae2a0913c2ce /lib/qparser
parent: 4f5e33ef7b96d6daee29ff1088ea381b9302f846 (diff)
4 files changed, 42 insertions, 23 deletions
diff --git a/lib/qparser/lexbuf.ml b/lib/qparser/lexbuf.ml
index af8c48a..9498f4a 100644
--- a/lib/qparser/lexbuf.ml
+++ b/lib/qparser/lexbuf.ml
@@ -13,6 +13,9 @@ type t = {
   reset_line : bool;
 }
 
+let state : t -> state option = fun t -> Stack.top_opt t.state
+let enter_state : t -> state -> unit = fun t state -> Stack.push state t.state
+let leave_state : t -> unit = fun t -> ignore (Stack.pop_opt t.state)
 let buffer : t -> Sedlexing.lexbuf = fun t -> t.buffer
 
 let start : t -> unit =
@@ -22,6 +25,7 @@ let start : t -> unit =
     if not t.reset_line then
       Sedlexing.set_position t.buffer { end_pos with Lexing.pos_lnum = 1 }
   in
+  Stack.clear t.state;
   t.start_p <- None
 
 let positions : t -> Lexing.position * Lexing.position =
@@ -61,14 +65,3 @@ let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position
   lexer
 
 let rollback : t -> unit = fun t -> Sedlexing.rollback t.buffer
-
-(** The comment system is terrible. The same symbol can be used for :
-     - starting a comment
-     - inequality operation
-   In order to manage this, I try to identify the context in a very basic way,
-   using a counter for determining the token to send.
-*)
-let state : t -> state option = fun t -> Stack.top_opt t.state
-
-let enter_state : t -> state -> unit = fun t state -> Stack.push state t.state
-let leave_state : t -> unit = fun t -> ignore (Stack.pop_opt t.state)
diff --git a/lib/qparser/lexbuf.mli b/lib/qparser/lexbuf.mli
index ec94d1b..5fda8ff 100644
--- a/lib/qparser/lexbuf.mli
+++ b/lib/qparser/lexbuf.mli
@@ -13,7 +13,11 @@ val buffer : t -> Sedlexing.lexbuf
 (** Extract the sedlex buffer. Required in each rule. *)
 
 val positions : t -> Lexing.position * Lexing.position
-(** Extract the starting and ending position for the matched token *)
+(** Extract the starting and ending position for the matched token. 
+
+    This function is used outside of the parser, in order to get the position
+    of the latest token in the case of an error.
+ *)
 
 val content : t -> string
 (** Extract the token matched by the rule *)
@@ -29,18 +33,28 @@ val tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position
 val rollback : t -> unit
 (** Rollback the latest token matched *)
 
-(** {1 State in expressions} *)
+(** {1 State in expressions} 
+
+ The comment system is terrible. The same symbol can be used for :
+     - starting a comment
+     - inequality operation
+
+   In order to manage this, I try to identify the context in a very basic way,
+   using a stack for determining the token to send.
+*)
 
 type state =
-  | Token
-  | String
-  | DString
-  | MString of int
-  | EndString
-  | Expression
+  | Token  (** Default state, parsing the tokens *)
+  | String  (** String enclosed by [''] *)
+  | DString  (** String enclosed by [""] *)
+  | MString of int  (** String enclosed by [{}]*)
+  | EndString  (** State raised just before closing the string *)
+  | Expression  (** Expression where [!] is an operator *)
 
 val state : t -> state option
-(** Get the current state for the lexer *)
+(** Get the current state for the lexer. 
+
+    @return [None] when in the default state *)
 
 val enter_state : t -> state -> unit
 (** Enter into a new state *)
diff --git a/lib/qparser/lexer.ml b/lib/qparser/lexer.ml
index abe47ac..7878299 100644
--- a/lib/qparser/lexer.ml
+++ b/lib/qparser/lexer.ml
@@ -63,6 +63,14 @@ let location_ident = [%sedlex.regexp? letters | digit]
 let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^']
 let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident]
 
+(** Change the state when we are ending a string. Send the text marker to the
+    parser in order to tell the string is over.
+
+    This can work because the state EndString is only raised when the same
+    token is fetched inside the appropriate sting method lexer. The
+    [Lexbuf.rollback] function is called in order to let the same token occur
+    again.
+ *)
 let end_string : Lexbuf.t -> token =
  fun buffer ->
   let lexbuf = Lexbuf.buffer buffer in
@@ -92,6 +100,7 @@ let rec read_long_string level buf buffer =
           Lexbuf.rollback buffer;
           LITERAL (Buffer.contents buf)
       | _ ->
+          (* We have nested strings. Do not terminate end *)
           Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
           read_long_string (level - 1) buf buffer)
   | eol ->
@@ -204,11 +213,11 @@ let rec token : Lexbuf.t -> token =
   | ')' ->
       Lexbuf.leave_state buffer;
       R_PAREN
-  | '<' -> LT
-  | '>' -> GT
   | ">>" ->
       Lexbuf.leave_state buffer;
       token buffer
+  | '<' -> LT
+  | '>' -> GT
   | coma -> COMA
   | '=' ->
       Lexbuf.enter_state buffer Lexbuf.Expression;
diff --git a/lib/qparser/qsp_expression.mly b/lib/qparser/qsp_expression.mly
index 738c73c..58da39e 100644
--- a/lib/qparser/qsp_expression.mly
+++ b/lib/qparser/qsp_expression.mly
@@ -34,7 +34,10 @@
       op = binary_operator 
       expr2 = expression 
       { Analyzer.Expression.boperator $loc op expr1 expr2 }
-    | TEXT_MARKER v = LITERAL TEXT_MARKER { Analyzer.Expression.literal $loc v }
+    | TEXT_MARKER 
+      v = LITERAL 
+      TEXT_MARKER 
+      { Analyzer.Expression.literal $loc v }
     | i = INTEGER  { Analyzer.Expression.integer $loc i }
     | v = variable { Analyzer.Expression.ident v }
     %prec p_variable
author	Chimrod <>	2023-10-27 09:59:28 +0200
committer	Chimrod <>	2023-11-02 11:06:12 +0100
commit	8a7bdc73a7c65d23c79e1c470ba0fbff975b59a5 (patch)
tree	2057dce34911c78046ce93469856ae2a0913c2ce /lib/qparser
parent	4f5e33ef7b96d6daee29ff1088ea381b9302f846 (diff)