diff options
Diffstat (limited to 'lib/qparser')
| -rw-r--r-- | lib/qparser/lexbuf.ml | 36 | ||||
| -rw-r--r-- | lib/qparser/lexbuf.mli | 24 | ||||
| -rw-r--r-- | lib/qparser/lexer.ml | 40 | 
3 files changed, 53 insertions, 47 deletions
| diff --git a/lib/qparser/lexbuf.ml b/lib/qparser/lexbuf.ml index 61f86cd..af8c48a 100644 --- a/lib/qparser/lexbuf.ml +++ b/lib/qparser/lexbuf.ml @@ -1,9 +1,14 @@ -type state = Token | String | DString | MString of int | EndString +type state = +  | Token +  | String +  | DString +  | MString of int +  | EndString +  | Expression  type t = {    buffer : Sedlexing.lexbuf;    mutable start_p : Lexing.position option; -  mutable expression_level : int;    state : state Stack.t;    reset_line : bool;  } @@ -17,8 +22,7 @@ let start : t -> unit =      if not t.reset_line then        Sedlexing.set_position t.buffer { end_pos with Lexing.pos_lnum = 1 }    in -  t.start_p <- None; -  t.expression_level <- 0 +  t.start_p <- None  let positions : t -> Lexing.position * Lexing.position =   fun t -> @@ -30,13 +34,7 @@ let content : t -> string = fun t -> Sedlexing.Utf8.lexeme t.buffer  let from_lexbuf : ?reset_line:bool -> Sedlexing.lexbuf -> t =   fun ?(reset_line = true) t -> -  { -    buffer = t; -    start_p = None; -    expression_level = 0; -    reset_line; -    state = Stack.create (); -  } +  { buffer = t; start_p = None; reset_line; state = Stack.create () }  let set_start_position : t -> Lexing.position -> unit =   fun t position -> @@ -63,22 +61,14 @@ let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position    lexer  let rollback : t -> unit = fun t -> Sedlexing.rollback t.buffer -let state : t -> state option = fun t -> Stack.top_opt t.state -let enter_state : t -> state -> unit = fun t state -> Stack.push state t.state -let leave_state : t -> unit = fun t -> ignore (Stack.pop_opt t.state) -(* The comment system is terrible. The same symbol can be used for : +(** The comment system is terrible. The same symbol can be used for :       - starting a comment       - inequality operation     In order to manage this, I try to identify the context in a very basic way,     using a counter for determining the token to send.  *) +let state : t -> state option = fun t -> Stack.top_opt t.state -let incr_level : t -> unit = - fun t -> t.expression_level <- t.expression_level + 1 - -let decr_level : t -> unit = - fun t -> t.expression_level <- t.expression_level - 1 - -let reset_level : t -> unit = fun t -> t.expression_level <- 0 -let level : t -> int = fun t -> t.expression_level +let enter_state : t -> state -> unit = fun t state -> Stack.push state t.state +let leave_state : t -> unit = fun t -> ignore (Stack.pop_opt t.state) diff --git a/lib/qparser/lexbuf.mli b/lib/qparser/lexbuf.mli index db81d2c..ec94d1b 100644 --- a/lib/qparser/lexbuf.mli +++ b/lib/qparser/lexbuf.mli @@ -29,21 +29,21 @@ val tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position  val rollback : t -> unit  (** Rollback the latest token matched *) -type state = Token | String | DString | MString of int | EndString +(** {1 State in expressions} *) + +type state = +  | Token +  | String +  | DString +  | MString of int +  | EndString +  | Expression  val state : t -> state option  (** Get the current state for the lexer *)  val enter_state : t -> state -> unit -val leave_state : t -> unit - -(** {1 Level in expressions} *) +(** Enter into a new state *) -val level : t -> int -(** The state track the nesting level in the expression. Depending of the -    level, the token [!] will be considered as a comment or a boolean -    operation. *) - -val incr_level : t -> unit -val decr_level : t -> unit -val reset_level : t -> unit +val leave_state : t -> unit +(** Leave the current state *) diff --git a/lib/qparser/lexer.ml b/lib/qparser/lexer.ml index 75072c6..abe47ac 100644 --- a/lib/qparser/lexer.ml +++ b/lib/qparser/lexer.ml @@ -11,6 +11,14 @@ exception EOF  (* Extract the location name from the pattern *)  let location_name = Str.regexp {|.* \(.*\)|} +(** Remove all the expression state when we are leaving the expression itself. *) +let rec leave_expression buffer = +  match Lexbuf.state buffer with +  | Some Lexbuf.Expression -> +      Lexbuf.leave_state buffer; +      leave_expression buffer +  | _ -> () +  (** Try to read the identifier and check if this is a function, a keyword, or      just a variable.  @@ -20,7 +28,9 @@ let build_ident buffer =    try      let value = Hashtbl.find Idents.keyword_table id in      let _ = -      match value with IF | ELIF -> Lexbuf.incr_level buffer | _ -> () +      match value with +      | IF | ELIF -> Lexbuf.enter_state buffer Lexbuf.Expression +      | _ -> ()      in      value    with Not_found -> @@ -172,7 +182,7 @@ let rec token : Lexbuf.t -> token =        LOCATION_START ident    | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) -> -      Lexbuf.reset_level buffer; +      leave_expression buffer;        LOCATION_END    | Plus digit -> INTEGER (Lexbuf.content buffer)    | '+' -> PLUS @@ -184,31 +194,38 @@ let rec token : Lexbuf.t -> token =    | '*' -> STAR    | ':' ->        (* We are leaving the block, the comment will be handled again *) -      Lexbuf.decr_level buffer; - +      Lexbuf.leave_state buffer;        COLUMN    | '[' -> L_BRACKET    | ']' -> R_BRACKET    | '(' -> -      Lexbuf.incr_level buffer; +      Lexbuf.enter_state buffer Lexbuf.Expression;        L_PAREN    | ')' -> -      Lexbuf.decr_level buffer; +      Lexbuf.leave_state buffer;        R_PAREN    | '<' -> LT    | '>' -> GT +  | ">>" -> +      Lexbuf.leave_state buffer; +      token buffer    | coma -> COMA    | '=' -> -      Lexbuf.incr_level buffer; +      Lexbuf.enter_state buffer Lexbuf.Expression; +        EQUAL    | ident -> build_ident buffer    | eol -> -      Lexbuf.reset_level buffer; +      leave_expression buffer; +        EOL    | '&' -> -      Lexbuf.reset_level buffer; +      leave_expression buffer;        AMPERSAND -  | '!' -> if Lexbuf.level buffer > 0 then EXCLAMATION else skip_comment buffer +  | '!' -> ( +      match Lexbuf.state buffer with +      | Some Lexbuf.Expression -> EXCLAMATION +      | _ -> skip_comment buffer)    | spaces -> token buffer    | '\'' ->        Lexbuf.enter_state buffer Lexbuf.String; @@ -224,7 +241,6 @@ let rec token : Lexbuf.t -> token =    | _ ->        let tok = Lexbuf.content buffer in        let msg = Format.asprintf "Unexpected character %S" tok in -        raise @@ LexError msg  let main buffer = @@ -249,7 +265,7 @@ let rec discard buffer =        discard buffer    | eof -> raise EOF    | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) -> -      Lexbuf.reset_level buffer; +      leave_expression buffer;        ()    | '!' ->        ignore @@ skip_comment buffer; | 
