aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/qparser/analyzer.ml2
-rw-r--r--lib/qparser/lexbuf.ml17
-rw-r--r--lib/qparser/lexbuf.mli16
-rw-r--r--lib/qparser/lexer.ml57
-rw-r--r--lib/qparser/lexer.mli5
-rw-r--r--lib/qparser/qsp_expression.mly2
-rw-r--r--lib/qparser/tokens.mly1
7 files changed, 86 insertions, 14 deletions
diff --git a/lib/qparser/analyzer.ml b/lib/qparser/analyzer.ml
index 58a117f..a79535e 100644
--- a/lib/qparser/analyzer.ml
+++ b/lib/qparser/analyzer.ml
@@ -13,7 +13,7 @@ let parse :
let module IncrementalParser =
Interpreter.Interpreter (Parser.MenhirInterpreter) in
fun l ->
- let lexer = Lexbuf.tokenize Lexer.token l in
+ let lexer = Lexbuf.tokenize Lexer.main l in
let init = Parser.Incremental.main (fst (Lexbuf.positions l)) in
diff --git a/lib/qparser/lexbuf.ml b/lib/qparser/lexbuf.ml
index cd0add3..61f86cd 100644
--- a/lib/qparser/lexbuf.ml
+++ b/lib/qparser/lexbuf.ml
@@ -1,7 +1,10 @@
+type state = Token | String | DString | MString of int | EndString
+
type t = {
buffer : Sedlexing.lexbuf;
mutable start_p : Lexing.position option;
mutable expression_level : int;
+ state : state Stack.t;
reset_line : bool;
}
@@ -27,7 +30,13 @@ let content : t -> string = fun t -> Sedlexing.Utf8.lexeme t.buffer
let from_lexbuf : ?reset_line:bool -> Sedlexing.lexbuf -> t =
fun ?(reset_line = true) t ->
- { buffer = t; start_p = None; expression_level = 0; reset_line }
+ {
+ buffer = t;
+ start_p = None;
+ expression_level = 0;
+ reset_line;
+ state = Stack.create ();
+ }
let set_start_position : t -> Lexing.position -> unit =
fun t position ->
@@ -53,6 +62,11 @@ let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position
in
lexer
+let rollback : t -> unit = fun t -> Sedlexing.rollback t.buffer
+let state : t -> state option = fun t -> Stack.top_opt t.state
+let enter_state : t -> state -> unit = fun t state -> Stack.push state t.state
+let leave_state : t -> unit = fun t -> ignore (Stack.pop_opt t.state)
+
(* The comment system is terrible. The same symbol can be used for :
- starting a comment
- inequality operation
@@ -68,4 +82,3 @@ let decr_level : t -> unit =
let reset_level : t -> unit = fun t -> t.expression_level <- 0
let level : t -> int = fun t -> t.expression_level
-let rollback : t -> unit = fun t -> Sedlexing.rollback t.buffer
diff --git a/lib/qparser/lexbuf.mli b/lib/qparser/lexbuf.mli
index 5a0bbcd..db81d2c 100644
--- a/lib/qparser/lexbuf.mli
+++ b/lib/qparser/lexbuf.mli
@@ -26,6 +26,19 @@ val tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position
(** Function to use in the parser in order to extract the token match, and the
starting and ending position. *)
+val rollback : t -> unit
+(** Rollback the latest token matched *)
+
+type state = Token | String | DString | MString of int | EndString
+
+val state : t -> state option
+(** Get the current state for the lexer *)
+
+val enter_state : t -> state -> unit
+val leave_state : t -> unit
+
+(** {1 Level in expressions} *)
+
val level : t -> int
(** The state track the nesting level in the expression. Depending of the
level, the token [!] will be considered as a comment or a boolean
@@ -34,6 +47,3 @@ val level : t -> int
val incr_level : t -> unit
val decr_level : t -> unit
val reset_level : t -> unit
-
-val rollback : t -> unit
-(** Rollback the latest token matched *)
diff --git a/lib/qparser/lexer.ml b/lib/qparser/lexer.ml
index f2d04bf..75072c6 100644
--- a/lib/qparser/lexer.ml
+++ b/lib/qparser/lexer.ml
@@ -53,6 +53,21 @@ let location_ident = [%sedlex.regexp? letters | digit]
let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^']
let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident]
+let end_string : Lexbuf.t -> token =
+ fun buffer ->
+ let lexbuf = Lexbuf.buffer buffer in
+ match%sedlex lexbuf with
+ | '}' ->
+ Lexbuf.leave_state buffer;
+ TEXT_MARKER
+ | '\'' ->
+ Lexbuf.leave_state buffer;
+ TEXT_MARKER
+ | '"' ->
+ Lexbuf.leave_state buffer;
+ TEXT_MARKER
+ | _ -> raise Not_found
+
let rec read_long_string level buf buffer =
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
@@ -61,7 +76,11 @@ let rec read_long_string level buf buffer =
read_long_string (level + 1) buf buffer
| '}' -> (
match level with
- | 0 -> Buffer.contents buf
+ | 0 ->
+ Lexbuf.leave_state buffer;
+ Lexbuf.enter_state buffer Lexbuf.EndString;
+ Lexbuf.rollback buffer;
+ LITERAL (Buffer.contents buf)
| _ ->
Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
read_long_string (level - 1) buf buffer)
@@ -79,7 +98,11 @@ let rec read_dquoted_string buf buffer =
| "\"\"" ->
Buffer.add_char buf '"';
read_dquoted_string buf buffer
- | '"' -> Buffer.contents buf
+ | '"' ->
+ Lexbuf.leave_state buffer;
+ Lexbuf.enter_state buffer Lexbuf.EndString;
+ Lexbuf.rollback buffer;
+ LITERAL (Buffer.contents buf)
| any ->
Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
read_dquoted_string buf buffer
@@ -91,7 +114,11 @@ let rec read_quoted_string buf buffer =
| "''" ->
Buffer.add_char buf '\'';
read_quoted_string buf buffer
- | '\'' -> Buffer.contents buf
+ | '\'' ->
+ Lexbuf.leave_state buffer;
+ Lexbuf.enter_state buffer Lexbuf.EndString;
+ Lexbuf.rollback buffer;
+ LITERAL (Buffer.contents buf)
| eol ->
Buffer.add_string buf (Lexbuf.content buffer);
read_quoted_string buf buffer
@@ -105,12 +132,15 @@ let rec skip_comment buffer =
match%sedlex lexbuf with
| '{' ->
let _ = wait_balance (read_long_string 0) buffer in
+ let _ = end_string buffer in
skip_comment buffer
| '\'' ->
let _ = wait_balance read_quoted_string buffer in
+ let _ = end_string buffer in
skip_comment buffer
| '"' ->
let _ = wait_balance read_dquoted_string buffer in
+ let _ = end_string buffer in
skip_comment buffer
| eol ->
(* Ugly hack used in order to put the eol in the front of the next
@@ -180,9 +210,16 @@ let rec token : Lexbuf.t -> token =
AMPERSAND
| '!' -> if Lexbuf.level buffer > 0 then EXCLAMATION else skip_comment buffer
| spaces -> token buffer
- | '\'' -> LITERAL (wait_balance read_quoted_string buffer)
- | '"' -> LITERAL (wait_balance read_dquoted_string buffer)
- | '{' -> LITERAL (wait_balance (read_long_string 0) buffer)
+ | '\'' ->
+ Lexbuf.enter_state buffer Lexbuf.String;
+ TEXT_MARKER
+ | '"' ->
+ Lexbuf.enter_state buffer Lexbuf.DString;
+ TEXT_MARKER
+ | '{' ->
+ Lexbuf.enter_state buffer (Lexbuf.MString 0);
+ TEXT_MARKER
+ | '}' -> TEXT_MARKER
| eof -> raise EOF
| _ ->
let tok = Lexbuf.content buffer in
@@ -190,6 +227,14 @@ let rec token : Lexbuf.t -> token =
raise @@ LexError msg
+let main buffer =
+ match Lexbuf.state buffer with
+ | Some Lexbuf.String -> wait_balance read_quoted_string buffer
+ | Some Lexbuf.DString -> wait_balance read_dquoted_string buffer
+ | Some (Lexbuf.MString level) -> wait_balance (read_long_string level) buffer
+ | Some Lexbuf.EndString -> end_string buffer
+ | _ -> token buffer
+
let rec discard buffer =
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
diff --git a/lib/qparser/lexer.mli b/lib/qparser/lexer.mli
index 30766e0..26d59cb 100644
--- a/lib/qparser/lexer.mli
+++ b/lib/qparser/lexer.mli
@@ -2,5 +2,8 @@ exception EOF
exception UnclosedQuote
exception LexError of string
-val token : Lexbuf.t -> Tokens.token
val discard : Lexbuf.t -> unit
+(** Discard the remaining element in the location until the end of this one.
+ Used in case of unrecoverable error *)
+
+val main : Lexbuf.t -> Tokens.token
diff --git a/lib/qparser/qsp_expression.mly b/lib/qparser/qsp_expression.mly
index 362c717..738c73c 100644
--- a/lib/qparser/qsp_expression.mly
+++ b/lib/qparser/qsp_expression.mly
@@ -34,7 +34,7 @@
op = binary_operator
expr2 = expression
{ Analyzer.Expression.boperator $loc op expr1 expr2 }
- | v = LITERAL { Analyzer.Expression.literal $loc v }
+ | TEXT_MARKER v = LITERAL TEXT_MARKER { Analyzer.Expression.literal $loc v }
| i = INTEGER { Analyzer.Expression.integer $loc i }
| v = variable { Analyzer.Expression.ident v }
%prec p_variable
diff --git a/lib/qparser/tokens.mly b/lib/qparser/tokens.mly
index fa74cc7..6b218ed 100644
--- a/lib/qparser/tokens.mly
+++ b/lib/qparser/tokens.mly
@@ -24,6 +24,7 @@
%token <string>IDENT
%token <string>LITERAL
%token <string>INTEGER
+%token TEXT_MARKER
%token COMMENT