aboutsummaryrefslogtreecommitdiff
path: root/lib/qparser/lexer.ml
diff options
context:
space:
mode:
authorChimrod <>2023-10-28 16:47:23 +0200
committerChimrod <>2023-11-02 11:06:12 +0100
commitdd060261e35fcb8a57f03b01dbe84ab772a2a199 (patch)
tree8faf51bfe647ab844c976dfcbba4ad4d533f07b4 /lib/qparser/lexer.ml
parent872916a5661e31b655471ec0f9bf81a5474bc1ba (diff)
Set up a context for parsing the literal strings
Diffstat (limited to 'lib/qparser/lexer.ml')
-rw-r--r--lib/qparser/lexer.ml120
1 files changed, 43 insertions, 77 deletions
diff --git a/lib/qparser/lexer.ml b/lib/qparser/lexer.ml
index 7878299..114846c 100644
--- a/lib/qparser/lexer.ml
+++ b/lib/qparser/lexer.ml
@@ -53,7 +53,6 @@ let wait_balance : (Buffer.t -> Lexbuf.t -> 'a) -> Lexbuf.t -> 'a =
with Not_found -> raise UnclosedQuote
let space = [%sedlex.regexp? ' ' | '\t']
-let spaces = [%sedlex.regexp? Plus space]
let eol = [%sedlex.regexp? '\r' | '\n' | "\r\n"]
let coma = [%sedlex.regexp? ',']
let digit = [%sedlex.regexp? '0' .. '9']
@@ -63,29 +62,6 @@ let location_ident = [%sedlex.regexp? letters | digit]
let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^']
let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident]
-(** Change the state when we are ending a string. Send the text marker to the
- parser in order to tell the string is over.
-
- This can work because the state EndString is only raised when the same
- token is fetched inside the appropriate sting method lexer. The
- [Lexbuf.rollback] function is called in order to let the same token occur
- again.
- *)
-let end_string : Lexbuf.t -> token =
- fun buffer ->
- let lexbuf = Lexbuf.buffer buffer in
- match%sedlex lexbuf with
- | '}' ->
- Lexbuf.leave_state buffer;
- TEXT_MARKER
- | '\'' ->
- Lexbuf.leave_state buffer;
- TEXT_MARKER
- | '"' ->
- Lexbuf.leave_state buffer;
- TEXT_MARKER
- | _ -> raise Not_found
-
let rec read_long_string level buf buffer =
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
@@ -96,7 +72,8 @@ let rec read_long_string level buf buffer =
match level with
| 0 ->
Lexbuf.leave_state buffer;
- Lexbuf.enter_state buffer Lexbuf.EndString;
+ Lexbuf.enter_state buffer
+ (Lexbuf.EndString Lex_state.readLongStringWraper);
Lexbuf.rollback buffer;
LITERAL (Buffer.contents buf)
| _ ->
@@ -111,55 +88,47 @@ let rec read_long_string level buf buffer =
read_long_string level buf buffer
| _ -> raise Not_found
-let rec read_dquoted_string buf buffer =
+(** Read the text inside a ['] *)
+let rec read_quoted_string f buf buffer =
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
- | "\"\"" ->
- Buffer.add_char buf '"';
- read_dquoted_string buf buffer
- | '"' ->
- Lexbuf.leave_state buffer;
- Lexbuf.enter_state buffer Lexbuf.EndString;
- Lexbuf.rollback buffer;
- LITERAL (Buffer.contents buf)
- | any ->
- Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
- read_dquoted_string buf buffer
- | _ -> raise Not_found
-
-let rec read_quoted_string buf buffer =
- let lexbuf = Lexbuf.buffer buffer in
- match%sedlex lexbuf with
- | "''" ->
- Buffer.add_char buf '\'';
- read_quoted_string buf buffer
- | '\'' ->
- Lexbuf.leave_state buffer;
- Lexbuf.enter_state buffer Lexbuf.EndString;
- Lexbuf.rollback buffer;
- LITERAL (Buffer.contents buf)
| eol ->
Buffer.add_string buf (Lexbuf.content buffer);
- read_quoted_string buf buffer
+ (f (read_quoted_string f)) buf buffer
| any ->
Buffer.add_string buf (Lexbuf.content buffer);
- read_quoted_string buf buffer
- | _ -> raise Not_found
+ (f (read_quoted_string f)) buf buffer
+ | _ ->
+ let location, _ = Lexbuf.positions buffer in
+ let line = location.Lexing.pos_lnum
+ and file = location.Lexing.pos_fname in
+ Format.eprintf "read_quoted_string : %s:%d\n" file line;
+ raise Not_found
let rec skip_comment buffer =
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| '{' ->
let _ = wait_balance (read_long_string 0) buffer in
- let _ = end_string buffer in
+ let _ = Lex_state.readLongStringWraper.end_string buffer in
skip_comment buffer
| '\'' ->
- let _ = wait_balance read_quoted_string buffer in
- let _ = end_string buffer in
+ let _ =
+ wait_balance
+ (Lex_state.quotedStringWraper.wrap
+ (read_quoted_string Lex_state.quotedStringWraper.wrap))
+ buffer
+ in
+ let _ = Lex_state.quotedStringWraper.end_string buffer in
skip_comment buffer
| '"' ->
- let _ = wait_balance read_dquoted_string buffer in
- let _ = end_string buffer in
+ let _ =
+ wait_balance
+ (Lex_state.dQuotedStringWraper.wrap
+ (read_quoted_string Lex_state.dQuotedStringWraper.wrap))
+ buffer
+ in
+ let _ = Lex_state.dQuotedStringWraper.end_string buffer in
skip_comment buffer
| eol ->
(* Ugly hack used in order to put the eol in the front of the next
@@ -170,13 +139,13 @@ let rec skip_comment buffer =
| _ -> raise Not_found
(** Main lexer *)
-let rec token : Lexbuf.t -> token =
+let rec parse_token : Lexbuf.t -> token =
fun buffer ->
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| 0Xfeff ->
(* Ignore the BOM *)
- token buffer
+ parse_token buffer
| '#', Star space, location ->
(* Extract the location name *)
let ident = Lexbuf.content buffer in
@@ -215,7 +184,7 @@ let rec token : Lexbuf.t -> token =
R_PAREN
| ">>" ->
Lexbuf.leave_state buffer;
- token buffer
+ parse_token buffer
| '<' -> LT
| '>' -> GT
| coma -> COMA
@@ -235,16 +204,6 @@ let rec token : Lexbuf.t -> token =
match Lexbuf.state buffer with
| Some Lexbuf.Expression -> EXCLAMATION
| _ -> skip_comment buffer)
- | spaces -> token buffer
- | '\'' ->
- Lexbuf.enter_state buffer Lexbuf.String;
- TEXT_MARKER
- | '"' ->
- Lexbuf.enter_state buffer Lexbuf.DString;
- TEXT_MARKER
- | '{' ->
- Lexbuf.enter_state buffer (Lexbuf.MString 0);
- TEXT_MARKER
| '}' -> TEXT_MARKER
| eof -> raise EOF
| _ ->
@@ -254,20 +213,27 @@ let rec token : Lexbuf.t -> token =
let main buffer =
match Lexbuf.state buffer with
- | Some Lexbuf.String -> wait_balance read_quoted_string buffer
- | Some Lexbuf.DString -> wait_balance read_dquoted_string buffer
+ | Some (Lexbuf.String w) ->
+ wait_balance (w.wrap @@ read_quoted_string w.wrap) buffer
| Some (Lexbuf.MString level) -> wait_balance (read_long_string level) buffer
- | Some Lexbuf.EndString -> end_string buffer
- | _ -> token buffer
+ | Some (Lexbuf.EndString w) -> w.end_string buffer
+ | Some (Lexbuf.Token w) -> w.start_string parse_token buffer
+ | _ -> Lex_state.defaultWraper.start_string parse_token buffer
let rec discard buffer =
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| '\'' ->
- ignore (wait_balance read_quoted_string buffer);
+ ignore
+ (wait_balance
+ (read_quoted_string Lex_state.quotedStringWraper.wrap)
+ buffer);
discard buffer
| '"' ->
- ignore (wait_balance read_dquoted_string buffer);
+ ignore
+ (wait_balance
+ (read_quoted_string Lex_state.quotedStringWraper.wrap)
+ buffer);
discard buffer
| '{' ->
ignore (wait_balance (read_long_string 0) buffer);