aboutsummaryrefslogtreecommitdiff
path: root/lib/qparser/lexer.ml
diff options
context:
space:
mode:
authorChimrod <>2023-10-30 08:22:39 +0100
committerChimrod <>2023-11-02 14:53:02 +0100
commit05f74bee05c0c56da593a5e89069711d5993e3b1 (patch)
treed09dbc7fb8228b8091a48f7d4bf5f095638f2bad /lib/qparser/lexer.ml
parenta1bb96e73f54eaa7c7e4af5d930e9d10074afb08 (diff)
Managed the strings in strings
Diffstat (limited to 'lib/qparser/lexer.ml')
-rw-r--r--lib/qparser/lexer.ml168
1 files changed, 117 insertions, 51 deletions
diff --git a/lib/qparser/lexer.ml b/lib/qparser/lexer.ml
index 114846c..8f3645c 100644
--- a/lib/qparser/lexer.ml
+++ b/lib/qparser/lexer.ml
@@ -3,11 +3,17 @@
*)
open Tokens
+open StdLabels
exception UnclosedQuote
exception LexError of string
exception EOF
+let pr_err buffer =
+ let location, _ = Lexbuf.positions buffer in
+ let line = location.Lexing.pos_lnum and file = location.Lexing.pos_fname in
+ Format.eprintf "read_quoted_string : %s:%d\n" file line
+
(* Extract the location name from the pattern *)
let location_name = Str.regexp {|.* \(.*\)|}
@@ -62,12 +68,26 @@ let location_ident = [%sedlex.regexp? letters | digit]
let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^']
let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident]
-let rec read_long_string level buf buffer =
+let rec read_long_string ?(nested = false) level buf buffer =
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
+ | "<<" ->
+ if not nested then (
+ match Buffer.length buf with
+ | 0 ->
+ Lexbuf.enter_state buffer Lexbuf.Token;
+ ENTER_EMBED
+ | _ ->
+ let result = Tokens.LITERAL (Buffer.contents buf) in
+ Buffer.reset buf;
+ Lexbuf.rollback buffer;
+ result)
+ else (
+ Buffer.add_string buf (Lexbuf.content buffer);
+ read_long_string ~nested level buf buffer)
| '{' ->
Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
- read_long_string (level + 1) buf buffer
+ read_long_string ~nested (level + 1) buf buffer
| '}' -> (
match level with
| 0 ->
@@ -79,64 +99,87 @@ let rec read_long_string level buf buffer =
| _ ->
(* We have nested strings. Do not terminate end *)
Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
- read_long_string (level - 1) buf buffer)
+ read_long_string ~nested (level - 1) buf buffer)
+ | '\'' | '"' ->
+ Buffer.add_string buf (Lexbuf.content buffer);
+ read_long_string ~nested:true level buf buffer
| eol ->
Buffer.add_string buf (Lexbuf.content buffer);
- read_long_string level buf buffer
+ read_long_string ~nested level buf buffer
| any ->
Buffer.add_string buf (Lexbuf.content buffer);
- read_long_string level buf buffer
- | _ -> raise Not_found
+ read_long_string ~nested level buf buffer
+ | _ ->
+ pr_err buffer;
+ raise Not_found
(** Read the text inside a ['] *)
-let rec read_quoted_string f buf buffer =
+let rec read_quoted_string : Lexbuf.stringWraper -> Lexbuf.buffer_builder =
+ fun f ?(nested = false) buf buffer ->
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
- | eol ->
+ | "<<" ->
+ if not nested then (
+ match Buffer.length buf with
+ | 0 ->
+ Lexbuf.enter_state buffer Lexbuf.Token;
+ ENTER_EMBED
+ | _ ->
+ let result = Tokens.LITERAL (Buffer.contents buf) in
+ Buffer.reset buf;
+ Lexbuf.rollback buffer;
+ result)
+ else (
+ Buffer.add_string buf (Lexbuf.content buffer);
+ (f.wrap (read_quoted_string f)) buf buffer)
+ | eol | any ->
Buffer.add_string buf (Lexbuf.content buffer);
- (f (read_quoted_string f)) buf buffer
- | any ->
- Buffer.add_string buf (Lexbuf.content buffer);
- (f (read_quoted_string f)) buf buffer
+ (f.wrap (read_quoted_string f)) buf buffer
| _ ->
- let location, _ = Lexbuf.positions buffer in
- let line = location.Lexing.pos_lnum
- and file = location.Lexing.pos_fname in
- Format.eprintf "read_quoted_string : %s:%d\n" file line;
+ pr_err buffer;
raise Not_found
let rec skip_comment buffer =
+ (* Simplified way to skip the content of a string until the end marker.
+ (expect the string to be well formed) *)
+ let rec parse_until_end f =
+ let _ = wait_balance f buffer in
+ match Lexbuf.state buffer with
+ | Some Lexbuf.Token ->
+ Lexbuf.leave_state buffer;
+ parse_until_end f
+ | Some (Lexbuf.EndString _) -> ()
+ | _ -> parse_until_end f
+ in
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| '{' ->
- let _ = wait_balance (read_long_string 0) buffer in
+ parse_until_end (read_long_string 0);
let _ = Lex_state.readLongStringWraper.end_string buffer in
skip_comment buffer
| '\'' ->
- let _ =
- wait_balance
- (Lex_state.quotedStringWraper.wrap
- (read_quoted_string Lex_state.quotedStringWraper.wrap))
- buffer
- in
+ parse_until_end
+ (Lex_state.quotedStringWraper.wrap
+ (read_quoted_string Lex_state.quotedStringWraper));
let _ = Lex_state.quotedStringWraper.end_string buffer in
skip_comment buffer
| '"' ->
- let _ =
- wait_balance
- (Lex_state.dQuotedStringWraper.wrap
- (read_quoted_string Lex_state.dQuotedStringWraper.wrap))
- buffer
- in
+ parse_until_end
+ (Lex_state.dQuotedStringWraper.wrap
+ (read_quoted_string Lex_state.dQuotedStringWraper));
let _ = Lex_state.dQuotedStringWraper.end_string buffer in
skip_comment buffer
| eol ->
(* Ugly hack used in order to put the eol in the front of the next
- parsing. *)
+ parsing.
+ This is required because the eol is also a part of the syntax, and do
+ cannot be discard with the end of the comment. *)
Lexbuf.rollback buffer;
COMMENT
| any -> skip_comment buffer
- | _ -> raise Not_found
+ | _ ->
+ pr_err buffer;
+ raise Not_found
(** Main lexer *)
let rec parse_token : Lexbuf.t -> token =
@@ -183,19 +226,20 @@ let rec parse_token : Lexbuf.t -> token =
Lexbuf.leave_state buffer;
R_PAREN
| ">>" ->
+ (* Leave the expression if we have any*)
+ leave_expression buffer;
+ (* Now leave the token mode and return to the string *)
Lexbuf.leave_state buffer;
- parse_token buffer
+ LEAVE_EMBED
| '<' -> LT
| '>' -> GT
| coma -> COMA
| '=' ->
Lexbuf.enter_state buffer Lexbuf.Expression;
-
EQUAL
| ident -> build_ident buffer
| eol ->
leave_expression buffer;
-
EOL
| '&' ->
leave_expression buffer;
@@ -214,31 +258,53 @@ let rec parse_token : Lexbuf.t -> token =
let main buffer =
match Lexbuf.state buffer with
| Some (Lexbuf.String w) ->
- wait_balance (w.wrap @@ read_quoted_string w.wrap) buffer
+ wait_balance (w.wrap @@ read_quoted_string w) buffer
| Some (Lexbuf.MString level) -> wait_balance (read_long_string level) buffer
| Some (Lexbuf.EndString w) -> w.end_string buffer
- | Some (Lexbuf.Token w) -> w.start_string parse_token buffer
- | _ -> Lex_state.defaultWraper.start_string parse_token buffer
+ | _ ->
+ let parser =
+ parse_token |> Lex_state.defaultWraper.start_string
+ |> Lexbuf.overlay buffer
+ in
+ parser buffer
let rec discard buffer =
let lexbuf = Lexbuf.buffer buffer in
+
match%sedlex lexbuf with
- | '\'' ->
- ignore
- (wait_balance
- (read_quoted_string Lex_state.quotedStringWraper.wrap)
- buffer);
- discard buffer
- | '"' ->
- ignore
- (wait_balance
- (read_quoted_string Lex_state.quotedStringWraper.wrap)
- buffer);
- discard buffer
+ | '\'' -> (
+ match Lexbuf.state buffer with
+ | Some (Lexbuf.String _) ->
+ (* If we are inside a string, close it. *)
+ Lexbuf.leave_state buffer;
+ discard buffer
+ | _ ->
+ (* Otherwise wait skip until the end of the starting one *)
+ ignore
+ (read_quoted_string Lex_state.quotedStringWraper (Buffer.create 16)
+ buffer);
+ discard buffer)
+ | '"' -> (
+ match Lexbuf.state buffer with
+ | Some (Lexbuf.String _) ->
+ Lexbuf.leave_state buffer;
+ discard buffer
+ | _ ->
+ ignore
+ (read_quoted_string Lex_state.quotedStringWraper (Buffer.create 16)
+ buffer);
+ discard buffer)
+ | '}' -> (
+ match Lexbuf.state buffer with
+ | Some (Lexbuf.MString _) ->
+ Lexbuf.leave_state buffer;
+ discard buffer
+ | _ ->
+ ignore (read_long_string 0 (Buffer.create 16) buffer);
+ discard buffer)
| '{' ->
- ignore (wait_balance (read_long_string 0) buffer);
+ ignore (read_long_string 0 (Buffer.create 16) buffer);
discard buffer
- | eof -> raise EOF
| '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) ->
leave_expression buffer;
()