From 05f74bee05c0c56da593a5e89069711d5993e3b1 Mon Sep 17 00:00:00 2001 From: Chimrod <> Date: Mon, 30 Oct 2023 08:22:39 +0100 Subject: Managed the strings in strings --- lib/qparser/lexer.ml | 168 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 117 insertions(+), 51 deletions(-) (limited to 'lib/qparser/lexer.ml') diff --git a/lib/qparser/lexer.ml b/lib/qparser/lexer.ml index 114846c..8f3645c 100644 --- a/lib/qparser/lexer.ml +++ b/lib/qparser/lexer.ml @@ -3,11 +3,17 @@ *) open Tokens +open StdLabels exception UnclosedQuote exception LexError of string exception EOF +let pr_err buffer = + let location, _ = Lexbuf.positions buffer in + let line = location.Lexing.pos_lnum and file = location.Lexing.pos_fname in + Format.eprintf "read_quoted_string : %s:%d\n" file line + (* Extract the location name from the pattern *) let location_name = Str.regexp {|.* \(.*\)|} @@ -62,12 +68,26 @@ let location_ident = [%sedlex.regexp? letters | digit] let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^'] let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident] -let rec read_long_string level buf buffer = +let rec read_long_string ?(nested = false) level buf buffer = let lexbuf = Lexbuf.buffer buffer in match%sedlex lexbuf with + | "<<" -> + if not nested then ( + match Buffer.length buf with + | 0 -> + Lexbuf.enter_state buffer Lexbuf.Token; + ENTER_EMBED + | _ -> + let result = Tokens.LITERAL (Buffer.contents buf) in + Buffer.reset buf; + Lexbuf.rollback buffer; + result) + else ( + Buffer.add_string buf (Lexbuf.content buffer); + read_long_string ~nested level buf buffer) | '{' -> Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf); - read_long_string (level + 1) buf buffer + read_long_string ~nested (level + 1) buf buffer | '}' -> ( match level with | 0 -> @@ -79,64 +99,87 @@ let rec read_long_string level buf buffer = | _ -> (* We have nested strings. Do not terminate end *) Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf); - read_long_string (level - 1) buf buffer) + read_long_string ~nested (level - 1) buf buffer) + | '\'' | '"' -> + Buffer.add_string buf (Lexbuf.content buffer); + read_long_string ~nested:true level buf buffer | eol -> Buffer.add_string buf (Lexbuf.content buffer); - read_long_string level buf buffer + read_long_string ~nested level buf buffer | any -> Buffer.add_string buf (Lexbuf.content buffer); - read_long_string level buf buffer - | _ -> raise Not_found + read_long_string ~nested level buf buffer + | _ -> + pr_err buffer; + raise Not_found (** Read the text inside a ['] *) -let rec read_quoted_string f buf buffer = +let rec read_quoted_string : Lexbuf.stringWraper -> Lexbuf.buffer_builder = + fun f ?(nested = false) buf buffer -> let lexbuf = Lexbuf.buffer buffer in match%sedlex lexbuf with - | eol -> + | "<<" -> + if not nested then ( + match Buffer.length buf with + | 0 -> + Lexbuf.enter_state buffer Lexbuf.Token; + ENTER_EMBED + | _ -> + let result = Tokens.LITERAL (Buffer.contents buf) in + Buffer.reset buf; + Lexbuf.rollback buffer; + result) + else ( + Buffer.add_string buf (Lexbuf.content buffer); + (f.wrap (read_quoted_string f)) buf buffer) + | eol | any -> Buffer.add_string buf (Lexbuf.content buffer); - (f (read_quoted_string f)) buf buffer - | any -> - Buffer.add_string buf (Lexbuf.content buffer); - (f (read_quoted_string f)) buf buffer + (f.wrap (read_quoted_string f)) buf buffer | _ -> - let location, _ = Lexbuf.positions buffer in - let line = location.Lexing.pos_lnum - and file = location.Lexing.pos_fname in - Format.eprintf "read_quoted_string : %s:%d\n" file line; + pr_err buffer; raise Not_found let rec skip_comment buffer = + (* Simplified way to skip the content of a string until the end marker. + (expect the string to be well formed) *) + let rec parse_until_end f = + let _ = wait_balance f buffer in + match Lexbuf.state buffer with + | Some Lexbuf.Token -> + Lexbuf.leave_state buffer; + parse_until_end f + | Some (Lexbuf.EndString _) -> () + | _ -> parse_until_end f + in let lexbuf = Lexbuf.buffer buffer in match%sedlex lexbuf with | '{' -> - let _ = wait_balance (read_long_string 0) buffer in + parse_until_end (read_long_string 0); let _ = Lex_state.readLongStringWraper.end_string buffer in skip_comment buffer | '\'' -> - let _ = - wait_balance - (Lex_state.quotedStringWraper.wrap - (read_quoted_string Lex_state.quotedStringWraper.wrap)) - buffer - in + parse_until_end + (Lex_state.quotedStringWraper.wrap + (read_quoted_string Lex_state.quotedStringWraper)); let _ = Lex_state.quotedStringWraper.end_string buffer in skip_comment buffer | '"' -> - let _ = - wait_balance - (Lex_state.dQuotedStringWraper.wrap - (read_quoted_string Lex_state.dQuotedStringWraper.wrap)) - buffer - in + parse_until_end + (Lex_state.dQuotedStringWraper.wrap + (read_quoted_string Lex_state.dQuotedStringWraper)); let _ = Lex_state.dQuotedStringWraper.end_string buffer in skip_comment buffer | eol -> (* Ugly hack used in order to put the eol in the front of the next - parsing. *) + parsing. + This is required because the eol is also a part of the syntax, and do + cannot be discard with the end of the comment. *) Lexbuf.rollback buffer; COMMENT | any -> skip_comment buffer - | _ -> raise Not_found + | _ -> + pr_err buffer; + raise Not_found (** Main lexer *) let rec parse_token : Lexbuf.t -> token = @@ -183,19 +226,20 @@ let rec parse_token : Lexbuf.t -> token = Lexbuf.leave_state buffer; R_PAREN | ">>" -> + (* Leave the expression if we have any*) + leave_expression buffer; + (* Now leave the token mode and return to the string *) Lexbuf.leave_state buffer; - parse_token buffer + LEAVE_EMBED | '<' -> LT | '>' -> GT | coma -> COMA | '=' -> Lexbuf.enter_state buffer Lexbuf.Expression; - EQUAL | ident -> build_ident buffer | eol -> leave_expression buffer; - EOL | '&' -> leave_expression buffer; @@ -214,31 +258,53 @@ let rec parse_token : Lexbuf.t -> token = let main buffer = match Lexbuf.state buffer with | Some (Lexbuf.String w) -> - wait_balance (w.wrap @@ read_quoted_string w.wrap) buffer + wait_balance (w.wrap @@ read_quoted_string w) buffer | Some (Lexbuf.MString level) -> wait_balance (read_long_string level) buffer | Some (Lexbuf.EndString w) -> w.end_string buffer - | Some (Lexbuf.Token w) -> w.start_string parse_token buffer - | _ -> Lex_state.defaultWraper.start_string parse_token buffer + | _ -> + let parser = + parse_token |> Lex_state.defaultWraper.start_string + |> Lexbuf.overlay buffer + in + parser buffer let rec discard buffer = let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with - | '\'' -> - ignore - (wait_balance - (read_quoted_string Lex_state.quotedStringWraper.wrap) - buffer); - discard buffer - | '"' -> - ignore - (wait_balance - (read_quoted_string Lex_state.quotedStringWraper.wrap) - buffer); - discard buffer + | '\'' -> ( + match Lexbuf.state buffer with + | Some (Lexbuf.String _) -> + (* If we are inside a string, close it. *) + Lexbuf.leave_state buffer; + discard buffer + | _ -> + (* Otherwise wait skip until the end of the starting one *) + ignore + (read_quoted_string Lex_state.quotedStringWraper (Buffer.create 16) + buffer); + discard buffer) + | '"' -> ( + match Lexbuf.state buffer with + | Some (Lexbuf.String _) -> + Lexbuf.leave_state buffer; + discard buffer + | _ -> + ignore + (read_quoted_string Lex_state.quotedStringWraper (Buffer.create 16) + buffer); + discard buffer) + | '}' -> ( + match Lexbuf.state buffer with + | Some (Lexbuf.MString _) -> + Lexbuf.leave_state buffer; + discard buffer + | _ -> + ignore (read_long_string 0 (Buffer.create 16) buffer); + discard buffer) | '{' -> - ignore (wait_balance (read_long_string 0) buffer); + ignore (read_long_string 0 (Buffer.create 16) buffer); discard buffer - | eof -> raise EOF | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) -> leave_expression buffer; () -- cgit v1.2.3