diff options
author | Chimrod <> | 2023-10-28 16:47:23 +0200 |
---|---|---|
committer | Chimrod <> | 2023-11-02 11:06:12 +0100 |
commit | dd060261e35fcb8a57f03b01dbe84ab772a2a199 (patch) | |
tree | 8faf51bfe647ab844c976dfcbba4ad4d533f07b4 /lib/qparser/lexer.ml | |
parent | 872916a5661e31b655471ec0f9bf81a5474bc1ba (diff) |
Set up a context for parsing the literal strings
Diffstat (limited to 'lib/qparser/lexer.ml')
-rw-r--r-- | lib/qparser/lexer.ml | 120 |
1 files changed, 43 insertions, 77 deletions
diff --git a/lib/qparser/lexer.ml b/lib/qparser/lexer.ml index 7878299..114846c 100644 --- a/lib/qparser/lexer.ml +++ b/lib/qparser/lexer.ml @@ -53,7 +53,6 @@ let wait_balance : (Buffer.t -> Lexbuf.t -> 'a) -> Lexbuf.t -> 'a = with Not_found -> raise UnclosedQuote let space = [%sedlex.regexp? ' ' | '\t'] -let spaces = [%sedlex.regexp? Plus space] let eol = [%sedlex.regexp? '\r' | '\n' | "\r\n"] let coma = [%sedlex.regexp? ','] let digit = [%sedlex.regexp? '0' .. '9'] @@ -63,29 +62,6 @@ let location_ident = [%sedlex.regexp? letters | digit] let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^'] let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident] -(** Change the state when we are ending a string. Send the text marker to the - parser in order to tell the string is over. - - This can work because the state EndString is only raised when the same - token is fetched inside the appropriate sting method lexer. The - [Lexbuf.rollback] function is called in order to let the same token occur - again. - *) -let end_string : Lexbuf.t -> token = - fun buffer -> - let lexbuf = Lexbuf.buffer buffer in - match%sedlex lexbuf with - | '}' -> - Lexbuf.leave_state buffer; - TEXT_MARKER - | '\'' -> - Lexbuf.leave_state buffer; - TEXT_MARKER - | '"' -> - Lexbuf.leave_state buffer; - TEXT_MARKER - | _ -> raise Not_found - let rec read_long_string level buf buffer = let lexbuf = Lexbuf.buffer buffer in match%sedlex lexbuf with @@ -96,7 +72,8 @@ let rec read_long_string level buf buffer = match level with | 0 -> Lexbuf.leave_state buffer; - Lexbuf.enter_state buffer Lexbuf.EndString; + Lexbuf.enter_state buffer + (Lexbuf.EndString Lex_state.readLongStringWraper); Lexbuf.rollback buffer; LITERAL (Buffer.contents buf) | _ -> @@ -111,55 +88,47 @@ let rec read_long_string level buf buffer = read_long_string level buf buffer | _ -> raise Not_found -let rec read_dquoted_string buf buffer = +(** Read the text inside a ['] *) +let rec read_quoted_string f buf buffer = let lexbuf = Lexbuf.buffer buffer in match%sedlex lexbuf with - | "\"\"" -> - Buffer.add_char buf '"'; - read_dquoted_string buf buffer - | '"' -> - Lexbuf.leave_state buffer; - Lexbuf.enter_state buffer Lexbuf.EndString; - Lexbuf.rollback buffer; - LITERAL (Buffer.contents buf) - | any -> - Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf); - read_dquoted_string buf buffer - | _ -> raise Not_found - -let rec read_quoted_string buf buffer = - let lexbuf = Lexbuf.buffer buffer in - match%sedlex lexbuf with - | "''" -> - Buffer.add_char buf '\''; - read_quoted_string buf buffer - | '\'' -> - Lexbuf.leave_state buffer; - Lexbuf.enter_state buffer Lexbuf.EndString; - Lexbuf.rollback buffer; - LITERAL (Buffer.contents buf) | eol -> Buffer.add_string buf (Lexbuf.content buffer); - read_quoted_string buf buffer + (f (read_quoted_string f)) buf buffer | any -> Buffer.add_string buf (Lexbuf.content buffer); - read_quoted_string buf buffer - | _ -> raise Not_found + (f (read_quoted_string f)) buf buffer + | _ -> + let location, _ = Lexbuf.positions buffer in + let line = location.Lexing.pos_lnum + and file = location.Lexing.pos_fname in + Format.eprintf "read_quoted_string : %s:%d\n" file line; + raise Not_found let rec skip_comment buffer = let lexbuf = Lexbuf.buffer buffer in match%sedlex lexbuf with | '{' -> let _ = wait_balance (read_long_string 0) buffer in - let _ = end_string buffer in + let _ = Lex_state.readLongStringWraper.end_string buffer in skip_comment buffer | '\'' -> - let _ = wait_balance read_quoted_string buffer in - let _ = end_string buffer in + let _ = + wait_balance + (Lex_state.quotedStringWraper.wrap + (read_quoted_string Lex_state.quotedStringWraper.wrap)) + buffer + in + let _ = Lex_state.quotedStringWraper.end_string buffer in skip_comment buffer | '"' -> - let _ = wait_balance read_dquoted_string buffer in - let _ = end_string buffer in + let _ = + wait_balance + (Lex_state.dQuotedStringWraper.wrap + (read_quoted_string Lex_state.dQuotedStringWraper.wrap)) + buffer + in + let _ = Lex_state.dQuotedStringWraper.end_string buffer in skip_comment buffer | eol -> (* Ugly hack used in order to put the eol in the front of the next @@ -170,13 +139,13 @@ let rec skip_comment buffer = | _ -> raise Not_found (** Main lexer *) -let rec token : Lexbuf.t -> token = +let rec parse_token : Lexbuf.t -> token = fun buffer -> let lexbuf = Lexbuf.buffer buffer in match%sedlex lexbuf with | 0Xfeff -> (* Ignore the BOM *) - token buffer + parse_token buffer | '#', Star space, location -> (* Extract the location name *) let ident = Lexbuf.content buffer in @@ -215,7 +184,7 @@ let rec token : Lexbuf.t -> token = R_PAREN | ">>" -> Lexbuf.leave_state buffer; - token buffer + parse_token buffer | '<' -> LT | '>' -> GT | coma -> COMA @@ -235,16 +204,6 @@ let rec token : Lexbuf.t -> token = match Lexbuf.state buffer with | Some Lexbuf.Expression -> EXCLAMATION | _ -> skip_comment buffer) - | spaces -> token buffer - | '\'' -> - Lexbuf.enter_state buffer Lexbuf.String; - TEXT_MARKER - | '"' -> - Lexbuf.enter_state buffer Lexbuf.DString; - TEXT_MARKER - | '{' -> - Lexbuf.enter_state buffer (Lexbuf.MString 0); - TEXT_MARKER | '}' -> TEXT_MARKER | eof -> raise EOF | _ -> @@ -254,20 +213,27 @@ let rec token : Lexbuf.t -> token = let main buffer = match Lexbuf.state buffer with - | Some Lexbuf.String -> wait_balance read_quoted_string buffer - | Some Lexbuf.DString -> wait_balance read_dquoted_string buffer + | Some (Lexbuf.String w) -> + wait_balance (w.wrap @@ read_quoted_string w.wrap) buffer | Some (Lexbuf.MString level) -> wait_balance (read_long_string level) buffer - | Some Lexbuf.EndString -> end_string buffer - | _ -> token buffer + | Some (Lexbuf.EndString w) -> w.end_string buffer + | Some (Lexbuf.Token w) -> w.start_string parse_token buffer + | _ -> Lex_state.defaultWraper.start_string parse_token buffer let rec discard buffer = let lexbuf = Lexbuf.buffer buffer in match%sedlex lexbuf with | '\'' -> - ignore (wait_balance read_quoted_string buffer); + ignore + (wait_balance + (read_quoted_string Lex_state.quotedStringWraper.wrap) + buffer); discard buffer | '"' -> - ignore (wait_balance read_dquoted_string buffer); + ignore + (wait_balance + (read_quoted_string Lex_state.quotedStringWraper.wrap) + buffer); discard buffer | '{' -> ignore (wait_balance (read_long_string 0) buffer); |