From 8a7bdc73a7c65d23c79e1c470ba0fbff975b59a5 Mon Sep 17 00:00:00 2001 From: Chimrod <> Date: Fri, 27 Oct 2023 09:59:28 +0200 Subject: Updated the way to process the strings --- lib/qparser/lexbuf.ml | 15 ++++----------- lib/qparser/lexbuf.mli | 32 +++++++++++++++++++++++--------- lib/qparser/lexer.ml | 13 +++++++++++-- lib/qparser/qsp_expression.mly | 5 ++++- 4 files changed, 42 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/lib/qparser/lexbuf.ml b/lib/qparser/lexbuf.ml index af8c48a..9498f4a 100644 --- a/lib/qparser/lexbuf.ml +++ b/lib/qparser/lexbuf.ml @@ -13,6 +13,9 @@ type t = { reset_line : bool; } +let state : t -> state option = fun t -> Stack.top_opt t.state +let enter_state : t -> state -> unit = fun t state -> Stack.push state t.state +let leave_state : t -> unit = fun t -> ignore (Stack.pop_opt t.state) let buffer : t -> Sedlexing.lexbuf = fun t -> t.buffer let start : t -> unit = @@ -22,6 +25,7 @@ let start : t -> unit = if not t.reset_line then Sedlexing.set_position t.buffer { end_pos with Lexing.pos_lnum = 1 } in + Stack.clear t.state; t.start_p <- None let positions : t -> Lexing.position * Lexing.position = @@ -61,14 +65,3 @@ let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position lexer let rollback : t -> unit = fun t -> Sedlexing.rollback t.buffer - -(** The comment system is terrible. The same symbol can be used for : - - starting a comment - - inequality operation - In order to manage this, I try to identify the context in a very basic way, - using a counter for determining the token to send. -*) -let state : t -> state option = fun t -> Stack.top_opt t.state - -let enter_state : t -> state -> unit = fun t state -> Stack.push state t.state -let leave_state : t -> unit = fun t -> ignore (Stack.pop_opt t.state) diff --git a/lib/qparser/lexbuf.mli b/lib/qparser/lexbuf.mli index ec94d1b..5fda8ff 100644 --- a/lib/qparser/lexbuf.mli +++ b/lib/qparser/lexbuf.mli @@ -13,7 +13,11 @@ val buffer : t -> Sedlexing.lexbuf (** Extract the sedlex buffer. Required in each rule. *) val positions : t -> Lexing.position * Lexing.position -(** Extract the starting and ending position for the matched token *) +(** Extract the starting and ending position for the matched token. + + This function is used outside of the parser, in order to get the position + of the latest token in the case of an error. + *) val content : t -> string (** Extract the token matched by the rule *) @@ -29,18 +33,28 @@ val tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position val rollback : t -> unit (** Rollback the latest token matched *) -(** {1 State in expressions} *) +(** {1 State in expressions} + + The comment system is terrible. The same symbol can be used for : + - starting a comment + - inequality operation + + In order to manage this, I try to identify the context in a very basic way, + using a stack for determining the token to send. +*) type state = - | Token - | String - | DString - | MString of int - | EndString - | Expression + | Token (** Default state, parsing the tokens *) + | String (** String enclosed by [''] *) + | DString (** String enclosed by [""] *) + | MString of int (** String enclosed by [{}]*) + | EndString (** State raised just before closing the string *) + | Expression (** Expression where [!] is an operator *) val state : t -> state option -(** Get the current state for the lexer *) +(** Get the current state for the lexer. + + @return [None] when in the default state *) val enter_state : t -> state -> unit (** Enter into a new state *) diff --git a/lib/qparser/lexer.ml b/lib/qparser/lexer.ml index abe47ac..7878299 100644 --- a/lib/qparser/lexer.ml +++ b/lib/qparser/lexer.ml @@ -63,6 +63,14 @@ let location_ident = [%sedlex.regexp? letters | digit] let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^'] let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident] +(** Change the state when we are ending a string. Send the text marker to the + parser in order to tell the string is over. + + This can work because the state EndString is only raised when the same + token is fetched inside the appropriate sting method lexer. The + [Lexbuf.rollback] function is called in order to let the same token occur + again. + *) let end_string : Lexbuf.t -> token = fun buffer -> let lexbuf = Lexbuf.buffer buffer in @@ -92,6 +100,7 @@ let rec read_long_string level buf buffer = Lexbuf.rollback buffer; LITERAL (Buffer.contents buf) | _ -> + (* We have nested strings. Do not terminate end *) Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf); read_long_string (level - 1) buf buffer) | eol -> @@ -204,11 +213,11 @@ let rec token : Lexbuf.t -> token = | ')' -> Lexbuf.leave_state buffer; R_PAREN - | '<' -> LT - | '>' -> GT | ">>" -> Lexbuf.leave_state buffer; token buffer + | '<' -> LT + | '>' -> GT | coma -> COMA | '=' -> Lexbuf.enter_state buffer Lexbuf.Expression; diff --git a/lib/qparser/qsp_expression.mly b/lib/qparser/qsp_expression.mly index 738c73c..58da39e 100644 --- a/lib/qparser/qsp_expression.mly +++ b/lib/qparser/qsp_expression.mly @@ -34,7 +34,10 @@ op = binary_operator expr2 = expression { Analyzer.Expression.boperator $loc op expr1 expr2 } - | TEXT_MARKER v = LITERAL TEXT_MARKER { Analyzer.Expression.literal $loc v } + | TEXT_MARKER + v = LITERAL + TEXT_MARKER + { Analyzer.Expression.literal $loc v } | i = INTEGER { Analyzer.Expression.integer $loc i } | v = variable { Analyzer.Expression.ident v } %prec p_variable -- cgit v1.2.3