aboutsummaryrefslogtreecommitdiff
path: root/lib/qparser
diff options
context:
space:
mode:
Diffstat (limited to 'lib/qparser')
-rw-r--r--lib/qparser/dune1
-rw-r--r--lib/qparser/lex_state.ml108
-rw-r--r--lib/qparser/lex_state.mli14
-rw-r--r--lib/qparser/lexbuf.ml23
-rw-r--r--lib/qparser/lexbuf.mli8
-rw-r--r--lib/qparser/lexer.ml168
6 files changed, 228 insertions, 94 deletions
diff --git a/lib/qparser/dune b/lib/qparser/dune
index f62c90e..8297268 100644
--- a/lib/qparser/dune
+++ b/lib/qparser/dune
@@ -4,6 +4,7 @@
str
menhirLib
qsp_syntax
+ sedlex
)
(preprocess (pps
sedlex.ppx
diff --git a/lib/qparser/lex_state.ml b/lib/qparser/lex_state.ml
index 37400e7..3cf757d 100644
--- a/lib/qparser/lex_state.ml
+++ b/lib/qparser/lex_state.ml
@@ -1,5 +1,14 @@
+(** This module provide functions used to parse the strings.
+
+ *)
+
exception Out_of_context
+let pr_err buffer =
+ let location, _ = Lexbuf.positions buffer in
+ let line = location.Lexing.pos_lnum and file = location.Lexing.pos_fname in
+ Format.eprintf "Error found at : %s:%d\n" file line
+
let space = [%sedlex.regexp? ' ' | '\t']
let spaces = [%sedlex.regexp? Plus space]
let single_quote = [%sedlex.regexp? '\'']
@@ -12,23 +21,21 @@ let leave_text end_wrapper buf buffer =
Tokens.LITERAL (Buffer.contents buf)
let rec nestedQuotedStringWraper : Lexbuf.stringWraper =
- let rec start_string f buffer =
+ let start_string f buffer =
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
(* There is no more way to add start a quoted string here *)
- | spaces -> start_string f buffer
- | double_quote ->
- Lexbuf.enter_state buffer (Lexbuf.String dQuotedStringWraper);
- Tokens.TEXT_MARKER
- | '{' ->
- Lexbuf.enter_state buffer (Lexbuf.MString 0);
- Tokens.TEXT_MARKER
| _ -> f buffer
in
{
start_string;
- wrap = (fun _ -> raise Out_of_context);
+ wrap =
+ (fun f ?(nested = false) buf buffer ->
+ let lexbuf = Lexbuf.buffer buffer in
+ match%sedlex lexbuf with
+ | "''" -> leave_text nestedQuotedStringWraper buf buffer
+ | _ -> f ~nested buf buffer);
end_string =
(fun buffer ->
let lexbuf = Lexbuf.buffer buffer in
@@ -39,38 +46,61 @@ let rec nestedQuotedStringWraper : Lexbuf.stringWraper =
| _ -> raise Not_found);
}
-and quotedStringWraper : Lexbuf.stringWraper =
- (* This function need to be recursirve in case of successive functions to
- escape *)
- let rec wrap f buf buffer =
+and nestedDquotedStringWraper : Lexbuf.stringWraper =
+ let start_string f buffer =
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
- | "''" ->
- Buffer.add_char buf '\'';
- wrap f buf buffer
- | single_quote -> leave_text quotedStringWraper buf buffer
- | _ -> f buf buffer
+ (* There is no more way to add start a quoted string here *)
+ | _ -> f buffer
in
+ let rec wrap (f : Lexbuf.buffer_builder) ?(nested = false) buf buffer =
+ let lexbuf = Lexbuf.buffer buffer in
+ match%sedlex lexbuf with
+ | double_quote ->
+ Buffer.add_char buf '"';
+ wrap ~nested:true f buf buffer
+ | double_quote, double_quote ->
+ leave_text nestedDquotedStringWraper buf buffer
+ | _ -> f ~nested buf buffer
+ in
+ {
+ start_string;
+ wrap;
+ end_string =
+ (fun buffer ->
+ let lexbuf = Lexbuf.buffer buffer in
+ match%sedlex lexbuf with
+ | double_quote, double_quote ->
+ Lexbuf.leave_state buffer;
+ TEXT_MARKER
+ | _ -> raise Not_found);
+ }
+
+and quotedStringWraper : Lexbuf.stringWraper =
let rec start_string f buffer =
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| spaces -> start_string f buffer
- | "''" ->
+ | single_quote -> raise Out_of_context
+ | single_quote, single_quote ->
Lexbuf.enter_state buffer (Lexbuf.String nestedQuotedStringWraper);
Tokens.TEXT_MARKER
- | double_quote ->
- Lexbuf.enter_state buffer (Lexbuf.String dQuotedStringWraper);
- Tokens.TEXT_MARKER
- | '{' ->
- Lexbuf.enter_state buffer (Lexbuf.MString 0);
- Tokens.TEXT_MARKER
| _ -> f buffer
in
+ let rec quoted_wrap (f : Lexbuf.buffer_builder) ?(nested = false) buf buffer =
+ let lexbuf = Lexbuf.buffer buffer in
+ match%sedlex lexbuf with
+ | single_quote, single_quote ->
+ Buffer.add_char buf '\'';
+ quoted_wrap ~nested f buf buffer
+ | single_quote -> leave_text quotedStringWraper buf buffer
+ | _ -> f ~nested buf buffer
+ in
{
start_string;
- wrap;
+ wrap = quoted_wrap;
end_string =
(fun buffer ->
let lexbuf = Lexbuf.buffer buffer in
@@ -78,29 +108,32 @@ and quotedStringWraper : Lexbuf.stringWraper =
| single_quote ->
Lexbuf.leave_state buffer;
TEXT_MARKER
- | _ -> raise Not_found);
+ | _ ->
+ pr_err buffer;
+ raise Not_found);
}
and dQuotedStringWraper : Lexbuf.stringWraper =
- let rec wrap f buf buffer =
+ let rec wrap (f : Lexbuf.buffer_builder) ?(nested = false) buf buffer =
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
- | "\"\"" ->
+ | single_quote ->
+ Buffer.add_char buf '\'';
+ wrap ~nested:true f buf buffer
+ | double_quote, double_quote ->
Buffer.add_char buf '"';
wrap f buf buffer
| double_quote -> leave_text dQuotedStringWraper buf buffer
- | _ -> f buf buffer
+ | _ -> f ~nested buf buffer
in
let rec start_string f buffer =
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| spaces -> start_string f buffer
- | single_quote ->
- Lexbuf.enter_state buffer (Lexbuf.String nestedQuotedStringWraper);
- Tokens.TEXT_MARKER
- | '{' ->
- Lexbuf.enter_state buffer (Lexbuf.MString 0);
+ | double_quote -> raise Out_of_context
+ | double_quote, double_quote ->
+ Lexbuf.enter_state buffer (Lexbuf.String nestedDquotedStringWraper);
Tokens.TEXT_MARKER
| _ -> f buffer
in
@@ -136,7 +169,10 @@ let defaultWraper : Lexbuf.stringWraper =
in
{
start_string;
- wrap = (fun _f _buf _buffer -> raise Out_of_context);
+ wrap =
+ (fun _f ?nested _buf _buffer ->
+ ignore nested;
+ raise Out_of_context);
end_string = (fun _buffer -> raise Out_of_context);
}
diff --git a/lib/qparser/lex_state.mli b/lib/qparser/lex_state.mli
index 1e69faf..a3d1ed0 100644
--- a/lib/qparser/lex_state.mli
+++ b/lib/qparser/lex_state.mli
@@ -1,14 +1,24 @@
(** This module keep a track of the different way to start, escape and end a
string in the lexer.
+ When a new string should be started ?
+ Which sequence identify the end of the string
+ How to handle the escaped characters inside this string
+ …
+
Depending on how the string was started (a single quote or double quote),
we have differents caracters for every of thoses actions.
-*)
+
+
+ The defaultWraper is used in any case, and other wrapper are stacked above
+ when needed. *)
val defaultWraper : Lexbuf.stringWraper
(** The default string lexer. Used when we start the lexing. *)
val quotedStringWraper : Lexbuf.stringWraper
-val nestedQuotedStringWraper : Lexbuf.stringWraper
val dQuotedStringWraper : Lexbuf.stringWraper
val readLongStringWraper : Lexbuf.stringWraper
+
+exception Out_of_context
+(** This exception should not be raised in a normal situation. *)
diff --git a/lib/qparser/lexbuf.ml b/lib/qparser/lexbuf.ml
index 2433ea5..1d93f67 100644
--- a/lib/qparser/lexbuf.ml
+++ b/lib/qparser/lexbuf.ml
@@ -1,3 +1,5 @@
+open StdLabels
+
type t = {
buffer : Sedlexing.lexbuf;
mutable start_p : Lexing.position option;
@@ -6,7 +8,7 @@ type t = {
}
and lexer = t -> Tokens.token
-and buffer_builder = Buffer.t -> lexer
+and buffer_builder = ?nested:bool -> Buffer.t -> lexer
and stringWraper = {
start_string : lexer -> lexer;
@@ -22,15 +24,22 @@ and stringWraper = {
}
and state =
- | Token of stringWraper
+ | Token
| String of stringWraper
| MString of int
| EndString of stringWraper
| Expression
+let pp_state format = function
+ | Token -> Format.fprintf format "Token"
+ | String _ -> Format.fprintf format "String"
+ | MString _ -> Format.fprintf format "MString"
+ | EndString _ -> Format.fprintf format "EndString"
+ | Expression -> Format.fprintf format "Expression"
+
let state : t -> state option = fun t -> Stack.top_opt t.state
let enter_state : t -> state -> unit = fun t state -> Stack.push state t.state
-let leave_state : t -> unit = fun t -> ignore (Stack.pop_opt t.state)
+let leave_state : t -> unit = fun t -> ignore @@ Stack.pop_opt t.state
let buffer : t -> Sedlexing.lexbuf = fun t -> t.buffer
let start : t -> unit =
@@ -80,3 +89,11 @@ let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position
lexer
let rollback : t -> unit = fun t -> Sedlexing.rollback t.buffer
+
+let overlay : t -> lexer -> lexer =
+ fun t lexer ->
+ let rev_list = Stack.fold (fun acc a -> a :: acc) [] t.state in
+ List.fold_left rev_list ~init:lexer ~f:(fun (acc : lexer) layer ->
+ match layer with
+ | String wraper | EndString wraper -> wraper.start_string acc
+ | _ -> acc)
diff --git a/lib/qparser/lexbuf.mli b/lib/qparser/lexbuf.mli
index dd13da4..ac3b262 100644
--- a/lib/qparser/lexbuf.mli
+++ b/lib/qparser/lexbuf.mli
@@ -44,7 +44,7 @@ val rollback : t -> unit
*)
type lexer = t -> Tokens.token
-type buffer_builder = Buffer.t -> lexer
+and buffer_builder = ?nested:bool -> Buffer.t -> t -> Tokens.token
type stringWraper = {
start_string : lexer -> lexer;
@@ -60,13 +60,15 @@ type stringWraper = {
}
type state =
- | Token of stringWraper (** Default state, parsing the tokens *)
+ | Token (** Default state, parsing the tokens *)
| String of stringWraper (** String enclosed by [''] *)
| MString of int (** String enclosed by [{}]*)
| EndString of stringWraper
(** State raised just before closing the string *)
| Expression (** Expression where [!] is an operator *)
+val pp_state : Format.formatter -> state -> unit
+
val state : t -> state option
(** Get the current state for the lexer.
@@ -77,3 +79,5 @@ val enter_state : t -> state -> unit
val leave_state : t -> unit
(** Leave the current state *)
+
+val overlay : t -> lexer -> lexer
diff --git a/lib/qparser/lexer.ml b/lib/qparser/lexer.ml
index 114846c..8f3645c 100644
--- a/lib/qparser/lexer.ml
+++ b/lib/qparser/lexer.ml
@@ -3,11 +3,17 @@
*)
open Tokens
+open StdLabels
exception UnclosedQuote
exception LexError of string
exception EOF
+let pr_err buffer =
+ let location, _ = Lexbuf.positions buffer in
+ let line = location.Lexing.pos_lnum and file = location.Lexing.pos_fname in
+ Format.eprintf "read_quoted_string : %s:%d\n" file line
+
(* Extract the location name from the pattern *)
let location_name = Str.regexp {|.* \(.*\)|}
@@ -62,12 +68,26 @@ let location_ident = [%sedlex.regexp? letters | digit]
let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^']
let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident]
-let rec read_long_string level buf buffer =
+let rec read_long_string ?(nested = false) level buf buffer =
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
+ | "<<" ->
+ if not nested then (
+ match Buffer.length buf with
+ | 0 ->
+ Lexbuf.enter_state buffer Lexbuf.Token;
+ ENTER_EMBED
+ | _ ->
+ let result = Tokens.LITERAL (Buffer.contents buf) in
+ Buffer.reset buf;
+ Lexbuf.rollback buffer;
+ result)
+ else (
+ Buffer.add_string buf (Lexbuf.content buffer);
+ read_long_string ~nested level buf buffer)
| '{' ->
Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
- read_long_string (level + 1) buf buffer
+ read_long_string ~nested (level + 1) buf buffer
| '}' -> (
match level with
| 0 ->
@@ -79,64 +99,87 @@ let rec read_long_string level buf buffer =
| _ ->
(* We have nested strings. Do not terminate end *)
Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
- read_long_string (level - 1) buf buffer)
+ read_long_string ~nested (level - 1) buf buffer)
+ | '\'' | '"' ->
+ Buffer.add_string buf (Lexbuf.content buffer);
+ read_long_string ~nested:true level buf buffer
| eol ->
Buffer.add_string buf (Lexbuf.content buffer);
- read_long_string level buf buffer
+ read_long_string ~nested level buf buffer
| any ->
Buffer.add_string buf (Lexbuf.content buffer);
- read_long_string level buf buffer
- | _ -> raise Not_found
+ read_long_string ~nested level buf buffer
+ | _ ->
+ pr_err buffer;
+ raise Not_found
(** Read the text inside a ['] *)
-let rec read_quoted_string f buf buffer =
+let rec read_quoted_string : Lexbuf.stringWraper -> Lexbuf.buffer_builder =
+ fun f ?(nested = false) buf buffer ->
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
- | eol ->
+ | "<<" ->
+ if not nested then (
+ match Buffer.length buf with
+ | 0 ->
+ Lexbuf.enter_state buffer Lexbuf.Token;
+ ENTER_EMBED
+ | _ ->
+ let result = Tokens.LITERAL (Buffer.contents buf) in
+ Buffer.reset buf;
+ Lexbuf.rollback buffer;
+ result)
+ else (
+ Buffer.add_string buf (Lexbuf.content buffer);
+ (f.wrap (read_quoted_string f)) buf buffer)
+ | eol | any ->
Buffer.add_string buf (Lexbuf.content buffer);
- (f (read_quoted_string f)) buf buffer
- | any ->
- Buffer.add_string buf (Lexbuf.content buffer);
- (f (read_quoted_string f)) buf buffer
+ (f.wrap (read_quoted_string f)) buf buffer
| _ ->
- let location, _ = Lexbuf.positions buffer in
- let line = location.Lexing.pos_lnum
- and file = location.Lexing.pos_fname in
- Format.eprintf "read_quoted_string : %s:%d\n" file line;
+ pr_err buffer;
raise Not_found
let rec skip_comment buffer =
+ (* Simplified way to skip the content of a string until the end marker.
+ (expect the string to be well formed) *)
+ let rec parse_until_end f =
+ let _ = wait_balance f buffer in
+ match Lexbuf.state buffer with
+ | Some Lexbuf.Token ->
+ Lexbuf.leave_state buffer;
+ parse_until_end f
+ | Some (Lexbuf.EndString _) -> ()
+ | _ -> parse_until_end f
+ in
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| '{' ->
- let _ = wait_balance (read_long_string 0) buffer in
+ parse_until_end (read_long_string 0);
let _ = Lex_state.readLongStringWraper.end_string buffer in
skip_comment buffer
| '\'' ->
- let _ =
- wait_balance
- (Lex_state.quotedStringWraper.wrap
- (read_quoted_string Lex_state.quotedStringWraper.wrap))
- buffer
- in
+ parse_until_end
+ (Lex_state.quotedStringWraper.wrap
+ (read_quoted_string Lex_state.quotedStringWraper));
let _ = Lex_state.quotedStringWraper.end_string buffer in
skip_comment buffer
| '"' ->
- let _ =
- wait_balance
- (Lex_state.dQuotedStringWraper.wrap
- (read_quoted_string Lex_state.dQuotedStringWraper.wrap))
- buffer
- in
+ parse_until_end
+ (Lex_state.dQuotedStringWraper.wrap
+ (read_quoted_string Lex_state.dQuotedStringWraper));
let _ = Lex_state.dQuotedStringWraper.end_string buffer in
skip_comment buffer
| eol ->
(* Ugly hack used in order to put the eol in the front of the next
- parsing. *)
+ parsing.
+ This is required because the eol is also a part of the syntax, and do
+ cannot be discard with the end of the comment. *)
Lexbuf.rollback buffer;
COMMENT
| any -> skip_comment buffer
- | _ -> raise Not_found
+ | _ ->
+ pr_err buffer;
+ raise Not_found
(** Main lexer *)
let rec parse_token : Lexbuf.t -> token =
@@ -183,19 +226,20 @@ let rec parse_token : Lexbuf.t -> token =
Lexbuf.leave_state buffer;
R_PAREN
| ">>" ->
+ (* Leave the expression if we have any*)
+ leave_expression buffer;
+ (* Now leave the token mode and return to the string *)
Lexbuf.leave_state buffer;
- parse_token buffer
+ LEAVE_EMBED
| '<' -> LT
| '>' -> GT
| coma -> COMA
| '=' ->
Lexbuf.enter_state buffer Lexbuf.Expression;
-
EQUAL
| ident -> build_ident buffer
| eol ->
leave_expression buffer;
-
EOL
| '&' ->
leave_expression buffer;
@@ -214,31 +258,53 @@ let rec parse_token : Lexbuf.t -> token =
let main buffer =
match Lexbuf.state buffer with
| Some (Lexbuf.String w) ->
- wait_balance (w.wrap @@ read_quoted_string w.wrap) buffer
+ wait_balance (w.wrap @@ read_quoted_string w) buffer
| Some (Lexbuf.MString level) -> wait_balance (read_long_string level) buffer
| Some (Lexbuf.EndString w) -> w.end_string buffer
- | Some (Lexbuf.Token w) -> w.start_string parse_token buffer
- | _ -> Lex_state.defaultWraper.start_string parse_token buffer
+ | _ ->
+ let parser =
+ parse_token |> Lex_state.defaultWraper.start_string
+ |> Lexbuf.overlay buffer
+ in
+ parser buffer
let rec discard buffer =
let lexbuf = Lexbuf.buffer buffer in
+
match%sedlex lexbuf with
- | '\'' ->
- ignore
- (wait_balance
- (read_quoted_string Lex_state.quotedStringWraper.wrap)
- buffer);
- discard buffer
- | '"' ->
- ignore
- (wait_balance
- (read_quoted_string Lex_state.quotedStringWraper.wrap)
- buffer);
- discard buffer
+ | '\'' -> (
+ match Lexbuf.state buffer with
+ | Some (Lexbuf.String _) ->
+ (* If we are inside a string, close it. *)
+ Lexbuf.leave_state buffer;
+ discard buffer
+ | _ ->
+ (* Otherwise wait skip until the end of the starting one *)
+ ignore
+ (read_quoted_string Lex_state.quotedStringWraper (Buffer.create 16)
+ buffer);
+ discard buffer)
+ | '"' -> (
+ match Lexbuf.state buffer with
+ | Some (Lexbuf.String _) ->
+ Lexbuf.leave_state buffer;
+ discard buffer
+ | _ ->
+ ignore
+ (read_quoted_string Lex_state.quotedStringWraper (Buffer.create 16)
+ buffer);
+ discard buffer)
+ | '}' -> (
+ match Lexbuf.state buffer with
+ | Some (Lexbuf.MString _) ->
+ Lexbuf.leave_state buffer;
+ discard buffer
+ | _ ->
+ ignore (read_long_string 0 (Buffer.create 16) buffer);
+ discard buffer)
| '{' ->
- ignore (wait_balance (read_long_string 0) buffer);
+ ignore (read_long_string 0 (Buffer.create 16) buffer);
discard buffer
- | eof -> raise EOF
| '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) ->
leave_expression buffer;
()