aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChimrod <>2023-10-27 09:59:28 +0200
committerChimrod <>2023-11-02 11:06:12 +0100
commit8a7bdc73a7c65d23c79e1c470ba0fbff975b59a5 (patch)
tree2057dce34911c78046ce93469856ae2a0913c2ce
parent4f5e33ef7b96d6daee29ff1088ea381b9302f846 (diff)
Updated the way to process the strings
-rw-r--r--lib/qparser/lexbuf.ml15
-rw-r--r--lib/qparser/lexbuf.mli32
-rw-r--r--lib/qparser/lexer.ml13
-rw-r--r--lib/qparser/qsp_expression.mly5
4 files changed, 42 insertions, 23 deletions
diff --git a/lib/qparser/lexbuf.ml b/lib/qparser/lexbuf.ml
index af8c48a..9498f4a 100644
--- a/lib/qparser/lexbuf.ml
+++ b/lib/qparser/lexbuf.ml
@@ -13,6 +13,9 @@ type t = {
reset_line : bool;
}
+let state : t -> state option = fun t -> Stack.top_opt t.state
+let enter_state : t -> state -> unit = fun t state -> Stack.push state t.state
+let leave_state : t -> unit = fun t -> ignore (Stack.pop_opt t.state)
let buffer : t -> Sedlexing.lexbuf = fun t -> t.buffer
let start : t -> unit =
@@ -22,6 +25,7 @@ let start : t -> unit =
if not t.reset_line then
Sedlexing.set_position t.buffer { end_pos with Lexing.pos_lnum = 1 }
in
+ Stack.clear t.state;
t.start_p <- None
let positions : t -> Lexing.position * Lexing.position =
@@ -61,14 +65,3 @@ let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position
lexer
let rollback : t -> unit = fun t -> Sedlexing.rollback t.buffer
-
-(** The comment system is terrible. The same symbol can be used for :
- - starting a comment
- - inequality operation
- In order to manage this, I try to identify the context in a very basic way,
- using a counter for determining the token to send.
-*)
-let state : t -> state option = fun t -> Stack.top_opt t.state
-
-let enter_state : t -> state -> unit = fun t state -> Stack.push state t.state
-let leave_state : t -> unit = fun t -> ignore (Stack.pop_opt t.state)
diff --git a/lib/qparser/lexbuf.mli b/lib/qparser/lexbuf.mli
index ec94d1b..5fda8ff 100644
--- a/lib/qparser/lexbuf.mli
+++ b/lib/qparser/lexbuf.mli
@@ -13,7 +13,11 @@ val buffer : t -> Sedlexing.lexbuf
(** Extract the sedlex buffer. Required in each rule. *)
val positions : t -> Lexing.position * Lexing.position
-(** Extract the starting and ending position for the matched token *)
+(** Extract the starting and ending position for the matched token.
+
+ This function is used outside of the parser, in order to get the position
+ of the latest token in the case of an error.
+ *)
val content : t -> string
(** Extract the token matched by the rule *)
@@ -29,18 +33,28 @@ val tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position
val rollback : t -> unit
(** Rollback the latest token matched *)
-(** {1 State in expressions} *)
+(** {1 State in expressions}
+
+ The comment system is terrible. The same symbol can be used for :
+ - starting a comment
+ - inequality operation
+
+ In order to manage this, I try to identify the context in a very basic way,
+ using a stack for determining the token to send.
+*)
type state =
- | Token
- | String
- | DString
- | MString of int
- | EndString
- | Expression
+ | Token (** Default state, parsing the tokens *)
+ | String (** String enclosed by [''] *)
+ | DString (** String enclosed by [""] *)
+ | MString of int (** String enclosed by [{}]*)
+ | EndString (** State raised just before closing the string *)
+ | Expression (** Expression where [!] is an operator *)
val state : t -> state option
-(** Get the current state for the lexer *)
+(** Get the current state for the lexer.
+
+ @return [None] when in the default state *)
val enter_state : t -> state -> unit
(** Enter into a new state *)
diff --git a/lib/qparser/lexer.ml b/lib/qparser/lexer.ml
index abe47ac..7878299 100644
--- a/lib/qparser/lexer.ml
+++ b/lib/qparser/lexer.ml
@@ -63,6 +63,14 @@ let location_ident = [%sedlex.regexp? letters | digit]
let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^']
let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident]
+(** Change the state when we are ending a string. Send the text marker to the
+ parser in order to tell the string is over.
+
+ This can work because the state EndString is only raised when the same
+ token is fetched inside the appropriate sting method lexer. The
+ [Lexbuf.rollback] function is called in order to let the same token occur
+ again.
+ *)
let end_string : Lexbuf.t -> token =
fun buffer ->
let lexbuf = Lexbuf.buffer buffer in
@@ -92,6 +100,7 @@ let rec read_long_string level buf buffer =
Lexbuf.rollback buffer;
LITERAL (Buffer.contents buf)
| _ ->
+ (* We have nested strings. Do not terminate end *)
Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
read_long_string (level - 1) buf buffer)
| eol ->
@@ -204,11 +213,11 @@ let rec token : Lexbuf.t -> token =
| ')' ->
Lexbuf.leave_state buffer;
R_PAREN
- | '<' -> LT
- | '>' -> GT
| ">>" ->
Lexbuf.leave_state buffer;
token buffer
+ | '<' -> LT
+ | '>' -> GT
| coma -> COMA
| '=' ->
Lexbuf.enter_state buffer Lexbuf.Expression;
diff --git a/lib/qparser/qsp_expression.mly b/lib/qparser/qsp_expression.mly
index 738c73c..58da39e 100644
--- a/lib/qparser/qsp_expression.mly
+++ b/lib/qparser/qsp_expression.mly
@@ -34,7 +34,10 @@
op = binary_operator
expr2 = expression
{ Analyzer.Expression.boperator $loc op expr1 expr2 }
- | TEXT_MARKER v = LITERAL TEXT_MARKER { Analyzer.Expression.literal $loc v }
+ | TEXT_MARKER
+ v = LITERAL
+ TEXT_MARKER
+ { Analyzer.Expression.literal $loc v }
| i = INTEGER { Analyzer.Expression.integer $loc i }
| v = variable { Analyzer.Expression.ident v }
%prec p_variable