diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/qparser/lexbuf.ml | 36 | ||||
-rw-r--r-- | lib/qparser/lexbuf.mli | 24 | ||||
-rw-r--r-- | lib/qparser/lexer.ml | 40 |
3 files changed, 53 insertions, 47 deletions
diff --git a/lib/qparser/lexbuf.ml b/lib/qparser/lexbuf.ml index 61f86cd..af8c48a 100644 --- a/lib/qparser/lexbuf.ml +++ b/lib/qparser/lexbuf.ml @@ -1,9 +1,14 @@ -type state = Token | String | DString | MString of int | EndString +type state = + | Token + | String + | DString + | MString of int + | EndString + | Expression type t = { buffer : Sedlexing.lexbuf; mutable start_p : Lexing.position option; - mutable expression_level : int; state : state Stack.t; reset_line : bool; } @@ -17,8 +22,7 @@ let start : t -> unit = if not t.reset_line then Sedlexing.set_position t.buffer { end_pos with Lexing.pos_lnum = 1 } in - t.start_p <- None; - t.expression_level <- 0 + t.start_p <- None let positions : t -> Lexing.position * Lexing.position = fun t -> @@ -30,13 +34,7 @@ let content : t -> string = fun t -> Sedlexing.Utf8.lexeme t.buffer let from_lexbuf : ?reset_line:bool -> Sedlexing.lexbuf -> t = fun ?(reset_line = true) t -> - { - buffer = t; - start_p = None; - expression_level = 0; - reset_line; - state = Stack.create (); - } + { buffer = t; start_p = None; reset_line; state = Stack.create () } let set_start_position : t -> Lexing.position -> unit = fun t position -> @@ -63,22 +61,14 @@ let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position lexer let rollback : t -> unit = fun t -> Sedlexing.rollback t.buffer -let state : t -> state option = fun t -> Stack.top_opt t.state -let enter_state : t -> state -> unit = fun t state -> Stack.push state t.state -let leave_state : t -> unit = fun t -> ignore (Stack.pop_opt t.state) -(* The comment system is terrible. The same symbol can be used for : +(** The comment system is terrible. The same symbol can be used for : - starting a comment - inequality operation In order to manage this, I try to identify the context in a very basic way, using a counter for determining the token to send. *) +let state : t -> state option = fun t -> Stack.top_opt t.state -let incr_level : t -> unit = - fun t -> t.expression_level <- t.expression_level + 1 - -let decr_level : t -> unit = - fun t -> t.expression_level <- t.expression_level - 1 - -let reset_level : t -> unit = fun t -> t.expression_level <- 0 -let level : t -> int = fun t -> t.expression_level +let enter_state : t -> state -> unit = fun t state -> Stack.push state t.state +let leave_state : t -> unit = fun t -> ignore (Stack.pop_opt t.state) diff --git a/lib/qparser/lexbuf.mli b/lib/qparser/lexbuf.mli index db81d2c..ec94d1b 100644 --- a/lib/qparser/lexbuf.mli +++ b/lib/qparser/lexbuf.mli @@ -29,21 +29,21 @@ val tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position val rollback : t -> unit (** Rollback the latest token matched *) -type state = Token | String | DString | MString of int | EndString +(** {1 State in expressions} *) + +type state = + | Token + | String + | DString + | MString of int + | EndString + | Expression val state : t -> state option (** Get the current state for the lexer *) val enter_state : t -> state -> unit -val leave_state : t -> unit - -(** {1 Level in expressions} *) +(** Enter into a new state *) -val level : t -> int -(** The state track the nesting level in the expression. Depending of the - level, the token [!] will be considered as a comment or a boolean - operation. *) - -val incr_level : t -> unit -val decr_level : t -> unit -val reset_level : t -> unit +val leave_state : t -> unit +(** Leave the current state *) diff --git a/lib/qparser/lexer.ml b/lib/qparser/lexer.ml index 75072c6..abe47ac 100644 --- a/lib/qparser/lexer.ml +++ b/lib/qparser/lexer.ml @@ -11,6 +11,14 @@ exception EOF (* Extract the location name from the pattern *) let location_name = Str.regexp {|.* \(.*\)|} +(** Remove all the expression state when we are leaving the expression itself. *) +let rec leave_expression buffer = + match Lexbuf.state buffer with + | Some Lexbuf.Expression -> + Lexbuf.leave_state buffer; + leave_expression buffer + | _ -> () + (** Try to read the identifier and check if this is a function, a keyword, or just a variable. @@ -20,7 +28,9 @@ let build_ident buffer = try let value = Hashtbl.find Idents.keyword_table id in let _ = - match value with IF | ELIF -> Lexbuf.incr_level buffer | _ -> () + match value with + | IF | ELIF -> Lexbuf.enter_state buffer Lexbuf.Expression + | _ -> () in value with Not_found -> @@ -172,7 +182,7 @@ let rec token : Lexbuf.t -> token = LOCATION_START ident | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) -> - Lexbuf.reset_level buffer; + leave_expression buffer; LOCATION_END | Plus digit -> INTEGER (Lexbuf.content buffer) | '+' -> PLUS @@ -184,31 +194,38 @@ let rec token : Lexbuf.t -> token = | '*' -> STAR | ':' -> (* We are leaving the block, the comment will be handled again *) - Lexbuf.decr_level buffer; - + Lexbuf.leave_state buffer; COLUMN | '[' -> L_BRACKET | ']' -> R_BRACKET | '(' -> - Lexbuf.incr_level buffer; + Lexbuf.enter_state buffer Lexbuf.Expression; L_PAREN | ')' -> - Lexbuf.decr_level buffer; + Lexbuf.leave_state buffer; R_PAREN | '<' -> LT | '>' -> GT + | ">>" -> + Lexbuf.leave_state buffer; + token buffer | coma -> COMA | '=' -> - Lexbuf.incr_level buffer; + Lexbuf.enter_state buffer Lexbuf.Expression; + EQUAL | ident -> build_ident buffer | eol -> - Lexbuf.reset_level buffer; + leave_expression buffer; + EOL | '&' -> - Lexbuf.reset_level buffer; + leave_expression buffer; AMPERSAND - | '!' -> if Lexbuf.level buffer > 0 then EXCLAMATION else skip_comment buffer + | '!' -> ( + match Lexbuf.state buffer with + | Some Lexbuf.Expression -> EXCLAMATION + | _ -> skip_comment buffer) | spaces -> token buffer | '\'' -> Lexbuf.enter_state buffer Lexbuf.String; @@ -224,7 +241,6 @@ let rec token : Lexbuf.t -> token = | _ -> let tok = Lexbuf.content buffer in let msg = Format.asprintf "Unexpected character %S" tok in - raise @@ LexError msg let main buffer = @@ -249,7 +265,7 @@ let rec discard buffer = discard buffer | eof -> raise EOF | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) -> - Lexbuf.reset_level buffer; + leave_expression buffer; () | '!' -> ignore @@ skip_comment buffer; |