aboutsummaryrefslogtreecommitdiff
path: root/lib/qparser/lexer.ml
diff options
context:
space:
mode:
Diffstat (limited to 'lib/qparser/lexer.ml')
-rw-r--r--lib/qparser/lexer.ml81
1 files changed, 69 insertions, 12 deletions
diff --git a/lib/qparser/lexer.ml b/lib/qparser/lexer.ml
index e3524cc..42f3c1c 100644
--- a/lib/qparser/lexer.ml
+++ b/lib/qparser/lexer.ml
@@ -63,7 +63,18 @@ let location_ident = [%sedlex.regexp? letters | digit]
let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^']
let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident]
-let rec read_long_string ?(nested = false) level buf buffer =
+(** Read a quote started with '{'
+
+ The function return the parsed string, but the closing token has been
+ rollbacked, leaving the state in [Lexbuf.EndString _].
+
+ The next call to [main] will call the associated function, effectively
+ leaving the string mode in the parser.
+
+ @param nested tell with started another block of string inside this one *)
+let rec read_long_string : ?nested:bool -> int -> Buffer.t -> Lexbuf.t -> token
+ =
+ fun ?(nested = false) level buf buffer ->
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| "<<" ->
@@ -89,6 +100,7 @@ let rec read_long_string ?(nested = false) level buf buffer =
Lexbuf.leave_state buffer;
Lexbuf.enter_state buffer
(Lexbuf.EndString Lex_state.readLongStringWraper);
+ (* rollback the latest token *)
Lexbuf.rollback buffer;
LITERAL (Buffer.contents buf)
| _ ->
@@ -141,34 +153,79 @@ let rec read_quoted_string : Lexbuf.stringWraper -> Lexbuf.buffer_builder =
(f.wrap (read_quoted_string f)) buf buffer
| _ -> raise Not_found
+(** Track the kind of nested string inside a multiline string inside a
+ comment.
+
+ Some constructions are not allowed in this specific case (see later)
+*)
+type commentedString = None | Quote | DQuote
+
let rec skip_comment buffer =
(* Simplified way to skip the content of a string until the end marker.
(expect the string to be well formed) *)
- let rec parse_until_end f =
- let _ = wait_balance f buffer in
+ let rec parse_until_end : (Buffer.t -> Lexbuf.t -> token) -> token =
+ fun f ->
+ let token = wait_balance f buffer in
match Lexbuf.state buffer with
| Some Lexbuf.Token ->
Lexbuf.leave_state buffer;
parse_until_end f
- | Some (Lexbuf.EndString _) -> ()
+ | Some (Lexbuf.EndString _) -> token
| _ -> parse_until_end f
in
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| '{' ->
- parse_until_end (read_long_string 0);
- let _ = Lex_state.readLongStringWraper.end_string buffer in
+ (* There are illegal constructions inside a comment containing {}
+ block.
+
+ Every opening text marker shall have the corresponding closing token
+ (but inside a nested block we can have unmatched double quote).
+
+ [! { ' }] this gives an error
+ [! { ' ' }] is ok
+ *)
+ let token = parse_until_end (read_long_string 0) in
+ let () =
+ match token with
+ | Tokens.LITERAL (content : string) -> (
+ (* Ensure every opening quote is closed before the end of the
+ comment *)
+ let string_state =
+ String.fold_left content ~init:None ~f:(fun state c ->
+ match (state, c) with
+ | None, '\'' -> Quote
+ | None, '"' -> DQuote
+ | Quote, '\'' -> None
+ | DQuote, '"' -> None
+ | _ -> state)
+ in
+ match string_state with
+ | None -> ()
+ | _ ->
+ Lexbuf.leave_state buffer;
+ raise UnclosedQuote)
+ | _ -> ()
+ in
+ let () =
+ try ignore (Lex_state.readLongStringWraper.end_string buffer)
+ with _ -> ()
+ in
skip_comment buffer
| '\'' ->
- parse_until_end
- (Lex_state.quotedStringWraper.wrap
- (read_quoted_string Lex_state.quotedStringWraper));
+ let _ =
+ parse_until_end
+ (Lex_state.quotedStringWraper.wrap
+ (read_quoted_string Lex_state.quotedStringWraper))
+ in
let _ = Lex_state.quotedStringWraper.end_string buffer in
skip_comment buffer
| '"' ->
- parse_until_end
- (Lex_state.dQuotedStringWraper.wrap
- (read_quoted_string Lex_state.dQuotedStringWraper));
+ let _ =
+ parse_until_end
+ (Lex_state.dQuotedStringWraper.wrap
+ (read_quoted_string Lex_state.dQuotedStringWraper))
+ in
let _ = Lex_state.dQuotedStringWraper.end_string buffer in
skip_comment buffer
| eol ->