From dc1ae31617bc4c3cfaefc518971bbb153149ca86 Mon Sep 17 00:00:00 2001 From: Chimrod <> Date: Thu, 26 Oct 2023 16:50:12 +0200 Subject: Added a state in the string evaluation --- lib/qparser/analyzer.ml | 2 +- lib/qparser/lexbuf.ml | 17 +++++++++++-- lib/qparser/lexbuf.mli | 16 +++++++++--- lib/qparser/lexer.ml | 57 +++++++++++++++++++++++++++++++++++++----- lib/qparser/lexer.mli | 5 +++- lib/qparser/qsp_expression.mly | 2 +- lib/qparser/tokens.mly | 1 + 7 files changed, 86 insertions(+), 14 deletions(-) (limited to 'lib/qparser') diff --git a/lib/qparser/analyzer.ml b/lib/qparser/analyzer.ml index 58a117f..a79535e 100644 --- a/lib/qparser/analyzer.ml +++ b/lib/qparser/analyzer.ml @@ -13,7 +13,7 @@ let parse : let module IncrementalParser = Interpreter.Interpreter (Parser.MenhirInterpreter) in fun l -> - let lexer = Lexbuf.tokenize Lexer.token l in + let lexer = Lexbuf.tokenize Lexer.main l in let init = Parser.Incremental.main (fst (Lexbuf.positions l)) in diff --git a/lib/qparser/lexbuf.ml b/lib/qparser/lexbuf.ml index cd0add3..61f86cd 100644 --- a/lib/qparser/lexbuf.ml +++ b/lib/qparser/lexbuf.ml @@ -1,7 +1,10 @@ +type state = Token | String | DString | MString of int | EndString + type t = { buffer : Sedlexing.lexbuf; mutable start_p : Lexing.position option; mutable expression_level : int; + state : state Stack.t; reset_line : bool; } @@ -27,7 +30,13 @@ let content : t -> string = fun t -> Sedlexing.Utf8.lexeme t.buffer let from_lexbuf : ?reset_line:bool -> Sedlexing.lexbuf -> t = fun ?(reset_line = true) t -> - { buffer = t; start_p = None; expression_level = 0; reset_line } + { + buffer = t; + start_p = None; + expression_level = 0; + reset_line; + state = Stack.create (); + } let set_start_position : t -> Lexing.position -> unit = fun t position -> @@ -53,6 +62,11 @@ let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position in lexer +let rollback : t -> unit = fun t -> Sedlexing.rollback t.buffer +let state : t -> state option = fun t -> Stack.top_opt t.state +let enter_state : t -> state -> unit = fun t state -> Stack.push state t.state +let leave_state : t -> unit = fun t -> ignore (Stack.pop_opt t.state) + (* The comment system is terrible. The same symbol can be used for : - starting a comment - inequality operation @@ -68,4 +82,3 @@ let decr_level : t -> unit = let reset_level : t -> unit = fun t -> t.expression_level <- 0 let level : t -> int = fun t -> t.expression_level -let rollback : t -> unit = fun t -> Sedlexing.rollback t.buffer diff --git a/lib/qparser/lexbuf.mli b/lib/qparser/lexbuf.mli index 5a0bbcd..db81d2c 100644 --- a/lib/qparser/lexbuf.mli +++ b/lib/qparser/lexbuf.mli @@ -26,6 +26,19 @@ val tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position (** Function to use in the parser in order to extract the token match, and the starting and ending position. *) +val rollback : t -> unit +(** Rollback the latest token matched *) + +type state = Token | String | DString | MString of int | EndString + +val state : t -> state option +(** Get the current state for the lexer *) + +val enter_state : t -> state -> unit +val leave_state : t -> unit + +(** {1 Level in expressions} *) + val level : t -> int (** The state track the nesting level in the expression. Depending of the level, the token [!] will be considered as a comment or a boolean @@ -34,6 +47,3 @@ val level : t -> int val incr_level : t -> unit val decr_level : t -> unit val reset_level : t -> unit - -val rollback : t -> unit -(** Rollback the latest token matched *) diff --git a/lib/qparser/lexer.ml b/lib/qparser/lexer.ml index f2d04bf..75072c6 100644 --- a/lib/qparser/lexer.ml +++ b/lib/qparser/lexer.ml @@ -53,6 +53,21 @@ let location_ident = [%sedlex.regexp? letters | digit] let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^'] let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident] +let end_string : Lexbuf.t -> token = + fun buffer -> + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | '}' -> + Lexbuf.leave_state buffer; + TEXT_MARKER + | '\'' -> + Lexbuf.leave_state buffer; + TEXT_MARKER + | '"' -> + Lexbuf.leave_state buffer; + TEXT_MARKER + | _ -> raise Not_found + let rec read_long_string level buf buffer = let lexbuf = Lexbuf.buffer buffer in match%sedlex lexbuf with @@ -61,7 +76,11 @@ let rec read_long_string level buf buffer = read_long_string (level + 1) buf buffer | '}' -> ( match level with - | 0 -> Buffer.contents buf + | 0 -> + Lexbuf.leave_state buffer; + Lexbuf.enter_state buffer Lexbuf.EndString; + Lexbuf.rollback buffer; + LITERAL (Buffer.contents buf) | _ -> Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf); read_long_string (level - 1) buf buffer) @@ -79,7 +98,11 @@ let rec read_dquoted_string buf buffer = | "\"\"" -> Buffer.add_char buf '"'; read_dquoted_string buf buffer - | '"' -> Buffer.contents buf + | '"' -> + Lexbuf.leave_state buffer; + Lexbuf.enter_state buffer Lexbuf.EndString; + Lexbuf.rollback buffer; + LITERAL (Buffer.contents buf) | any -> Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf); read_dquoted_string buf buffer @@ -91,7 +114,11 @@ let rec read_quoted_string buf buffer = | "''" -> Buffer.add_char buf '\''; read_quoted_string buf buffer - | '\'' -> Buffer.contents buf + | '\'' -> + Lexbuf.leave_state buffer; + Lexbuf.enter_state buffer Lexbuf.EndString; + Lexbuf.rollback buffer; + LITERAL (Buffer.contents buf) | eol -> Buffer.add_string buf (Lexbuf.content buffer); read_quoted_string buf buffer @@ -105,12 +132,15 @@ let rec skip_comment buffer = match%sedlex lexbuf with | '{' -> let _ = wait_balance (read_long_string 0) buffer in + let _ = end_string buffer in skip_comment buffer | '\'' -> let _ = wait_balance read_quoted_string buffer in + let _ = end_string buffer in skip_comment buffer | '"' -> let _ = wait_balance read_dquoted_string buffer in + let _ = end_string buffer in skip_comment buffer | eol -> (* Ugly hack used in order to put the eol in the front of the next @@ -180,9 +210,16 @@ let rec token : Lexbuf.t -> token = AMPERSAND | '!' -> if Lexbuf.level buffer > 0 then EXCLAMATION else skip_comment buffer | spaces -> token buffer - | '\'' -> LITERAL (wait_balance read_quoted_string buffer) - | '"' -> LITERAL (wait_balance read_dquoted_string buffer) - | '{' -> LITERAL (wait_balance (read_long_string 0) buffer) + | '\'' -> + Lexbuf.enter_state buffer Lexbuf.String; + TEXT_MARKER + | '"' -> + Lexbuf.enter_state buffer Lexbuf.DString; + TEXT_MARKER + | '{' -> + Lexbuf.enter_state buffer (Lexbuf.MString 0); + TEXT_MARKER + | '}' -> TEXT_MARKER | eof -> raise EOF | _ -> let tok = Lexbuf.content buffer in @@ -190,6 +227,14 @@ let rec token : Lexbuf.t -> token = raise @@ LexError msg +let main buffer = + match Lexbuf.state buffer with + | Some Lexbuf.String -> wait_balance read_quoted_string buffer + | Some Lexbuf.DString -> wait_balance read_dquoted_string buffer + | Some (Lexbuf.MString level) -> wait_balance (read_long_string level) buffer + | Some Lexbuf.EndString -> end_string buffer + | _ -> token buffer + let rec discard buffer = let lexbuf = Lexbuf.buffer buffer in match%sedlex lexbuf with diff --git a/lib/qparser/lexer.mli b/lib/qparser/lexer.mli index 30766e0..26d59cb 100644 --- a/lib/qparser/lexer.mli +++ b/lib/qparser/lexer.mli @@ -2,5 +2,8 @@ exception EOF exception UnclosedQuote exception LexError of string -val token : Lexbuf.t -> Tokens.token val discard : Lexbuf.t -> unit +(** Discard the remaining element in the location until the end of this one. + Used in case of unrecoverable error *) + +val main : Lexbuf.t -> Tokens.token diff --git a/lib/qparser/qsp_expression.mly b/lib/qparser/qsp_expression.mly index 362c717..738c73c 100644 --- a/lib/qparser/qsp_expression.mly +++ b/lib/qparser/qsp_expression.mly @@ -34,7 +34,7 @@ op = binary_operator expr2 = expression { Analyzer.Expression.boperator $loc op expr1 expr2 } - | v = LITERAL { Analyzer.Expression.literal $loc v } + | TEXT_MARKER v = LITERAL TEXT_MARKER { Analyzer.Expression.literal $loc v } | i = INTEGER { Analyzer.Expression.integer $loc i } | v = variable { Analyzer.Expression.ident v } %prec p_variable diff --git a/lib/qparser/tokens.mly b/lib/qparser/tokens.mly index fa74cc7..6b218ed 100644 --- a/lib/qparser/tokens.mly +++ b/lib/qparser/tokens.mly @@ -24,6 +24,7 @@ %token IDENT %token LITERAL %token INTEGER +%token TEXT_MARKER %token COMMENT -- cgit v1.2.3