diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/qparser/lexer.ml | 81 | ||||
-rw-r--r-- | lib/qparser/lexer.mli | 3 |
2 files changed, 72 insertions, 12 deletions
diff --git a/lib/qparser/lexer.ml b/lib/qparser/lexer.ml index e3524cc..42f3c1c 100644 --- a/lib/qparser/lexer.ml +++ b/lib/qparser/lexer.ml @@ -63,7 +63,18 @@ let location_ident = [%sedlex.regexp? letters | digit] let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^'] let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident] -let rec read_long_string ?(nested = false) level buf buffer = +(** Read a quote started with '{' + + The function return the parsed string, but the closing token has been + rollbacked, leaving the state in [Lexbuf.EndString _]. + + The next call to [main] will call the associated function, effectively + leaving the string mode in the parser. + + @param nested tell with started another block of string inside this one *) +let rec read_long_string : ?nested:bool -> int -> Buffer.t -> Lexbuf.t -> token + = + fun ?(nested = false) level buf buffer -> let lexbuf = Lexbuf.buffer buffer in match%sedlex lexbuf with | "<<" -> @@ -89,6 +100,7 @@ let rec read_long_string ?(nested = false) level buf buffer = Lexbuf.leave_state buffer; Lexbuf.enter_state buffer (Lexbuf.EndString Lex_state.readLongStringWraper); + (* rollback the latest token *) Lexbuf.rollback buffer; LITERAL (Buffer.contents buf) | _ -> @@ -141,34 +153,79 @@ let rec read_quoted_string : Lexbuf.stringWraper -> Lexbuf.buffer_builder = (f.wrap (read_quoted_string f)) buf buffer | _ -> raise Not_found +(** Track the kind of nested string inside a multiline string inside a + comment. + + Some constructions are not allowed in this specific case (see later) +*) +type commentedString = None | Quote | DQuote + let rec skip_comment buffer = (* Simplified way to skip the content of a string until the end marker. (expect the string to be well formed) *) - let rec parse_until_end f = - let _ = wait_balance f buffer in + let rec parse_until_end : (Buffer.t -> Lexbuf.t -> token) -> token = + fun f -> + let token = wait_balance f buffer in match Lexbuf.state buffer with | Some Lexbuf.Token -> Lexbuf.leave_state buffer; parse_until_end f - | Some (Lexbuf.EndString _) -> () + | Some (Lexbuf.EndString _) -> token | _ -> parse_until_end f in let lexbuf = Lexbuf.buffer buffer in match%sedlex lexbuf with | '{' -> - parse_until_end (read_long_string 0); - let _ = Lex_state.readLongStringWraper.end_string buffer in + (* There are illegal constructions inside a comment containing {} + block. + + Every opening text marker shall have the corresponding closing token + (but inside a nested block we can have unmatched double quote). + + [! { ' }] this gives an error + [! { ' ' }] is ok + *) + let token = parse_until_end (read_long_string 0) in + let () = + match token with + | Tokens.LITERAL (content : string) -> ( + (* Ensure every opening quote is closed before the end of the + comment *) + let string_state = + String.fold_left content ~init:None ~f:(fun state c -> + match (state, c) with + | None, '\'' -> Quote + | None, '"' -> DQuote + | Quote, '\'' -> None + | DQuote, '"' -> None + | _ -> state) + in + match string_state with + | None -> () + | _ -> + Lexbuf.leave_state buffer; + raise UnclosedQuote) + | _ -> () + in + let () = + try ignore (Lex_state.readLongStringWraper.end_string buffer) + with _ -> () + in skip_comment buffer | '\'' -> - parse_until_end - (Lex_state.quotedStringWraper.wrap - (read_quoted_string Lex_state.quotedStringWraper)); + let _ = + parse_until_end + (Lex_state.quotedStringWraper.wrap + (read_quoted_string Lex_state.quotedStringWraper)) + in let _ = Lex_state.quotedStringWraper.end_string buffer in skip_comment buffer | '"' -> - parse_until_end - (Lex_state.dQuotedStringWraper.wrap - (read_quoted_string Lex_state.dQuotedStringWraper)); + let _ = + parse_until_end + (Lex_state.dQuotedStringWraper.wrap + (read_quoted_string Lex_state.dQuotedStringWraper)) + in let _ = Lex_state.dQuotedStringWraper.end_string buffer in skip_comment buffer | eol -> diff --git a/lib/qparser/lexer.mli b/lib/qparser/lexer.mli index 299fa46..854bb1e 100644 --- a/lib/qparser/lexer.mli +++ b/lib/qparser/lexer.mli @@ -6,7 +6,10 @@ working. *) exception EOF + exception UnclosedQuote +(** Error reported when a string is not closed properly *) + exception LexError of string val discard : Lexbuf.t -> unit |