diff options
Diffstat (limited to 'lib/qparser')
| -rw-r--r-- | lib/qparser/lexer.ml | 81 | ||||
| -rw-r--r-- | lib/qparser/lexer.mli | 3 | 
2 files changed, 72 insertions, 12 deletions
| diff --git a/lib/qparser/lexer.ml b/lib/qparser/lexer.ml index e3524cc..42f3c1c 100644 --- a/lib/qparser/lexer.ml +++ b/lib/qparser/lexer.ml @@ -63,7 +63,18 @@ let location_ident = [%sedlex.regexp? letters | digit]  let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^']  let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident] -let rec read_long_string ?(nested = false) level buf buffer = +(** Read a quote started with '{' + +    The function return the parsed string, but the closing token has been +    rollbacked, leaving the state in [Lexbuf.EndString _]. + +    The next call to [main] will call the associated function, effectively +    leaving the string mode in the parser. + +    @param nested tell with started another block of string inside this one *) +let rec read_long_string : ?nested:bool -> int -> Buffer.t -> Lexbuf.t -> token +    = + fun ?(nested = false) level buf buffer ->    let lexbuf = Lexbuf.buffer buffer in    match%sedlex lexbuf with    | "<<" -> @@ -89,6 +100,7 @@ let rec read_long_string ?(nested = false) level buf buffer =            Lexbuf.leave_state buffer;            Lexbuf.enter_state buffer              (Lexbuf.EndString Lex_state.readLongStringWraper); +          (* rollback the latest token *)            Lexbuf.rollback buffer;            LITERAL (Buffer.contents buf)        | _ -> @@ -141,34 +153,79 @@ let rec read_quoted_string : Lexbuf.stringWraper -> Lexbuf.buffer_builder =        (f.wrap (read_quoted_string f)) buf buffer    | _ -> raise Not_found +(** Track the kind of nested string inside a multiline string inside a +    comment. + +    Some constructions are not allowed in this specific case (see later) +*) +type commentedString = None | Quote | DQuote +  let rec skip_comment buffer =    (* Simplified way to skip the content of a string until the end marker.       (expect the string to be well formed) *) -  let rec parse_until_end f = -    let _ = wait_balance f buffer in +  let rec parse_until_end : (Buffer.t -> Lexbuf.t -> token) -> token = +   fun f -> +    let token = wait_balance f buffer in      match Lexbuf.state buffer with      | Some Lexbuf.Token ->          Lexbuf.leave_state buffer;          parse_until_end f -    | Some (Lexbuf.EndString _) -> () +    | Some (Lexbuf.EndString _) -> token      | _ -> parse_until_end f    in    let lexbuf = Lexbuf.buffer buffer in    match%sedlex lexbuf with    | '{' -> -      parse_until_end (read_long_string 0); -      let _ = Lex_state.readLongStringWraper.end_string buffer in +      (* There are illegal constructions inside a comment containing {} +         block. + +         Every opening text marker shall have the corresponding closing token +         (but inside a nested block we can have unmatched double quote). + +         [! { ' }] this gives an error +         [! { ' ' }] is ok +      *) +      let token = parse_until_end (read_long_string 0) in +      let () = +        match token with +        | Tokens.LITERAL (content : string) -> ( +            (* Ensure every opening quote is closed before the end of the +               comment *) +            let string_state = +              String.fold_left content ~init:None ~f:(fun state c -> +                  match (state, c) with +                  | None, '\'' -> Quote +                  | None, '"' -> DQuote +                  | Quote, '\'' -> None +                  | DQuote, '"' -> None +                  | _ -> state) +            in +            match string_state with +            | None -> () +            | _ -> +                Lexbuf.leave_state buffer; +                raise UnclosedQuote) +        | _ -> () +      in +      let () = +        try ignore (Lex_state.readLongStringWraper.end_string buffer) +        with _ -> () +      in        skip_comment buffer    | '\'' -> -      parse_until_end -        (Lex_state.quotedStringWraper.wrap -           (read_quoted_string Lex_state.quotedStringWraper)); +      let _ = +        parse_until_end +          (Lex_state.quotedStringWraper.wrap +             (read_quoted_string Lex_state.quotedStringWraper)) +      in        let _ = Lex_state.quotedStringWraper.end_string buffer in        skip_comment buffer    | '"' -> -      parse_until_end -        (Lex_state.dQuotedStringWraper.wrap -           (read_quoted_string Lex_state.dQuotedStringWraper)); +      let _ = +        parse_until_end +          (Lex_state.dQuotedStringWraper.wrap +             (read_quoted_string Lex_state.dQuotedStringWraper)) +      in        let _ = Lex_state.dQuotedStringWraper.end_string buffer in        skip_comment buffer    | eol -> diff --git a/lib/qparser/lexer.mli b/lib/qparser/lexer.mli index 299fa46..854bb1e 100644 --- a/lib/qparser/lexer.mli +++ b/lib/qparser/lexer.mli @@ -6,7 +6,10 @@      working. *)  exception EOF +  exception UnclosedQuote +(** Error reported when a string is not closed properly *) +  exception LexError of string  val discard : Lexbuf.t -> unit | 
