From dd060261e35fcb8a57f03b01dbe84ab772a2a199 Mon Sep 17 00:00:00 2001 From: Chimrod <> Date: Sat, 28 Oct 2023 16:47:23 +0200 Subject: Set up a context for parsing the literal strings --- lib/qparser/lex_state.ml | 154 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 lib/qparser/lex_state.ml (limited to 'lib/qparser/lex_state.ml') diff --git a/lib/qparser/lex_state.ml b/lib/qparser/lex_state.ml new file mode 100644 index 0000000..37400e7 --- /dev/null +++ b/lib/qparser/lex_state.ml @@ -0,0 +1,154 @@ +exception Out_of_context + +let space = [%sedlex.regexp? ' ' | '\t'] +let spaces = [%sedlex.regexp? Plus space] +let single_quote = [%sedlex.regexp? '\''] +let double_quote = [%sedlex.regexp? '"'] + +let leave_text end_wrapper buf buffer = + Lexbuf.leave_state buffer; + Lexbuf.enter_state buffer (Lexbuf.EndString end_wrapper); + Lexbuf.rollback buffer; + Tokens.LITERAL (Buffer.contents buf) + +let rec nestedQuotedStringWraper : Lexbuf.stringWraper = + let rec start_string f buffer = + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + (* There is no more way to add start a quoted string here *) + | spaces -> start_string f buffer + | double_quote -> + Lexbuf.enter_state buffer (Lexbuf.String dQuotedStringWraper); + Tokens.TEXT_MARKER + | '{' -> + Lexbuf.enter_state buffer (Lexbuf.MString 0); + Tokens.TEXT_MARKER + | _ -> f buffer + in + + { + start_string; + wrap = (fun _ -> raise Out_of_context); + end_string = + (fun buffer -> + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | "''" -> + Lexbuf.leave_state buffer; + TEXT_MARKER + | _ -> raise Not_found); + } + +and quotedStringWraper : Lexbuf.stringWraper = + (* This function need to be recursirve in case of successive functions to + escape *) + let rec wrap f buf buffer = + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | "''" -> + Buffer.add_char buf '\''; + wrap f buf buffer + | single_quote -> leave_text quotedStringWraper buf buffer + | _ -> f buf buffer + in + + let rec start_string f buffer = + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | spaces -> start_string f buffer + | "''" -> + Lexbuf.enter_state buffer (Lexbuf.String nestedQuotedStringWraper); + Tokens.TEXT_MARKER + | double_quote -> + Lexbuf.enter_state buffer (Lexbuf.String dQuotedStringWraper); + Tokens.TEXT_MARKER + | '{' -> + Lexbuf.enter_state buffer (Lexbuf.MString 0); + Tokens.TEXT_MARKER + | _ -> f buffer + in + + { + start_string; + wrap; + end_string = + (fun buffer -> + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | single_quote -> + Lexbuf.leave_state buffer; + TEXT_MARKER + | _ -> raise Not_found); + } + +and dQuotedStringWraper : Lexbuf.stringWraper = + let rec wrap f buf buffer = + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | "\"\"" -> + Buffer.add_char buf '"'; + wrap f buf buffer + | double_quote -> leave_text dQuotedStringWraper buf buffer + | _ -> f buf buffer + in + + let rec start_string f buffer = + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | spaces -> start_string f buffer + | single_quote -> + Lexbuf.enter_state buffer (Lexbuf.String nestedQuotedStringWraper); + Tokens.TEXT_MARKER + | '{' -> + Lexbuf.enter_state buffer (Lexbuf.MString 0); + Tokens.TEXT_MARKER + | _ -> f buffer + in + + { + start_string; + wrap; + end_string = + (fun buffer -> + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | double_quote -> + Lexbuf.leave_state buffer; + TEXT_MARKER + | _ -> raise Not_found); + } + +let defaultWraper : Lexbuf.stringWraper = + let rec start_string f buffer = + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | spaces -> start_string f buffer + | single_quote -> + Lexbuf.enter_state buffer (Lexbuf.String quotedStringWraper); + Tokens.TEXT_MARKER + | double_quote -> + Lexbuf.enter_state buffer (Lexbuf.String dQuotedStringWraper); + Tokens.TEXT_MARKER + | '{' -> + Lexbuf.enter_state buffer (Lexbuf.MString 0); + Tokens.TEXT_MARKER + | _ -> f buffer + in + { + start_string; + wrap = (fun _f _buf _buffer -> raise Out_of_context); + end_string = (fun _buffer -> raise Out_of_context); + } + +let readLongStringWraper : Lexbuf.stringWraper = + { + defaultWraper with + end_string = + (fun buffer -> + let lexbuf = Lexbuf.buffer buffer in + match%sedlex lexbuf with + | "}" -> + Lexbuf.leave_state buffer; + TEXT_MARKER + | _ -> raise Not_found); + } -- cgit v1.2.3