diff options
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/idents.ml | 6 | ||||
| -rw-r--r-- | lib/lexbuf.ml | 32 | ||||
| -rw-r--r-- | lib/lexbuf.mli | 6 | ||||
| -rw-r--r-- | lib/lexer.ml | 73 | ||||
| -rw-r--r-- | lib/lexer.mli | 4 | 
5 files changed, 52 insertions, 69 deletions
| diff --git a/lib/idents.ml b/lib/idents.ml index dab0ba0..baf23dc 100644 --- a/lib/idents.ml +++ b/lib/idents.ml @@ -3,12 +3,6 @@ module T = Qsp_syntax.T  let keyword_table = Hashtbl.create 53 -let char_of_uchar : Uchar.t -> char = - fun u -> match Uchar.is_char u with true -> Uchar.to_char u | _ -> '?' - -let of_uchars : Uchar.t array -> string = - fun arr -> Array.to_seq arr |> Seq.map char_of_uchar |> String.of_seq -  let _ =    List.iter      (fun (kwd, tok) -> Hashtbl.add keyword_table kwd tok) diff --git a/lib/lexbuf.ml b/lib/lexbuf.ml index 8ddba2d..6059c8a 100644 --- a/lib/lexbuf.ml +++ b/lib/lexbuf.ml @@ -1,7 +1,17 @@ -type t = { buffer : Sedlexing.lexbuf; mutable start_p : Lexing.position option } +type t = { +  buffer : Sedlexing.lexbuf; +  mutable start_p : Lexing.position option; +  mutable expression_level : int; +}  let buffer : t -> Sedlexing.lexbuf = fun t -> t.buffer -let start : t -> unit = fun t -> Sedlexing.start t.buffer + +let start : t -> unit = + fun t -> +  let _start_pos, end_pos = Sedlexing.lexing_positions t.buffer in +  Sedlexing.set_position t.buffer { end_pos with Lexing.pos_lnum = 1 }; +  t.start_p <- None; +  t.expression_level <- 0  let positions : t -> Lexing.position * Lexing.position =   fun t -> Sedlexing.lexing_positions t.buffer @@ -9,7 +19,7 @@ let positions : t -> Lexing.position * Lexing.position =  let content : t -> string = fun t -> Sedlexing.Utf8.lexeme t.buffer  let from_lexbuf : Sedlexing.lexbuf -> t = - fun t -> { buffer = t; start_p = None } + fun t -> { buffer = t; start_p = None; expression_level = 0 }  let set_start_position : t -> Lexing.position -> unit =   fun t position -> t.start_p <- Some position @@ -28,3 +38,19 @@ let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position      (token, start_p, curr_p)    in    lexer + +(* The comment system is terrible. The same symbol can be used for : +     - starting a comment +     - inequality operation +   In order to manage this, I try to identify the context in a very basic way, +   using a counter for determining the token to send. +*) + +let incr_level : t -> unit = + fun t -> t.expression_level <- t.expression_level + 1 + +let decr_level : t -> unit = + fun t -> t.expression_level <- t.expression_level - 1 + +let reset_level : t -> unit = fun t -> t.expression_level <- 0 +let level : t -> int = fun t -> t.expression_level diff --git a/lib/lexbuf.mli b/lib/lexbuf.mli index b058f3c..918c011 100644 --- a/lib/lexbuf.mli +++ b/lib/lexbuf.mli @@ -17,3 +17,9 @@ val content : t -> string  val set_start_position : t -> Lexing.position -> unit  val tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position +val incr_level : t -> unit +val decr_level : t -> unit +val reset_level : t -> unit + +val level : t -> int +(** Return the nested expression level *) diff --git a/lib/lexer.ml b/lib/lexer.ml index 7014d85..c643577 100644 --- a/lib/lexer.ml +++ b/lib/lexer.ml @@ -14,49 +14,16 @@ let pp_pos out { Lexing.pos_lnum; pos_cnum; pos_bol; _ } =  (* Extract the location name from the pattern *)  let location_name = Str.regexp {|.* \(.*\)|} -module type Encoding = sig -  val lexeme : Sedlexing.lexbuf -> string -end - -(* The comment system is terrible. The same symbol can be used for : -     - starting a comment -     - inequality operation -   In order to manage this, I try to identify the context in a very basic way, -   using a flag True False for determining the token to send. -*) -module Bucket = Ephemeron.K1.Bucket - -type bucket = (Sedlexing.lexbuf, int) Bucket.t - -let is_expression : bucket = Bucket.make () - -let incr_level lexbuf = -  match Bucket.find is_expression lexbuf with -  | None -> Bucket.add is_expression lexbuf 1 -  | Some v -> Bucket.add is_expression lexbuf (v + 1) - -let decr_level lexbuf = -  match Bucket.find is_expression lexbuf with -  | None -> () -  | Some v -> -      if v > 1 then Bucket.add is_expression lexbuf (v - 1) -      else Bucket.remove is_expression lexbuf - -let build_ident lexbuf = -  let id = -    Sedlexing.lexeme lexbuf |> Idents.of_uchars |> String.uppercase_ascii -  in +let build_ident buffer = +  let id = Lexbuf.content buffer |> String.uppercase_ascii in    try      let value = Hashtbl.find Idents.keyword_table id in -    let _ = match value with IF | ELIF -> incr_level lexbuf | _ -> () in +    let _ = +      match value with IF | ELIF -> Lexbuf.incr_level buffer | _ -> () +    in      value    with Not_found -> IDENT id -let incr_level lexbuf = -  match Bucket.find is_expression lexbuf with -  | None -> Bucket.add is_expression lexbuf 1 -  | Some v -> Bucket.add is_expression lexbuf (v + 1) -  let wait_balance : (Buffer.t -> Lexbuf.t -> 'a) -> Lexbuf.t -> 'a =   fun rule lexbuf ->    let _, position = Lexbuf.positions lexbuf in @@ -154,12 +121,9 @@ let rec token : Lexbuf.t -> token =    let lexbuf = Lexbuf.buffer buffer in    match%sedlex lexbuf with    | 0Xfeff -> -      Lexbuf.start buffer;        (* Ignore the BOM *)        token buffer    | '#', Star space, location -> -      let _start_pos, end_pos = Sedlexing.lexing_positions lexbuf in -        (* Extract the location name *)        let ident = Lexbuf.content buffer in        let () = @@ -169,12 +133,11 @@ let rec token : Lexbuf.t -> token =        in        (* Restart the line number (new location here) *) -      Sedlexing.start lexbuf; -      Sedlexing.set_position lexbuf { end_pos with Lexing.pos_lnum = 1 }; +      Lexbuf.start buffer;        LOCATION_START ident    | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) -> -      Bucket.remove is_expression lexbuf; +      Lexbuf.reset_level buffer;        LOCATION_END    | Plus digit -> INTEGER (Lexbuf.content buffer)    | '+' -> PLUS @@ -186,33 +149,31 @@ let rec token : Lexbuf.t -> token =    | '*' -> STAR    | ':' ->        (* We are leaving the block, the comment will be handled again *) -      decr_level lexbuf; +      Lexbuf.decr_level buffer; +        COLUMN    | '[' -> L_BRACKET    | ']' -> R_BRACKET    | '(' -> -      incr_level lexbuf; +      Lexbuf.incr_level buffer;        L_PAREN    | ')' -> -      decr_level lexbuf; +      Lexbuf.decr_level buffer;        R_PAREN    | '<' -> LT    | '>' -> GT    | coma -> COMA    | '=' -> -      incr_level lexbuf; +      Lexbuf.incr_level buffer;        EQUAL -  | ident -> build_ident lexbuf +  | ident -> build_ident buffer    | eol -> -      Bucket.add is_expression lexbuf 0; +      Lexbuf.reset_level buffer;        EOL    | '&' -> -      Bucket.add is_expression lexbuf 0; +      Lexbuf.reset_level buffer;        AMPERSAND -  | '!' -> ( -      match Bucket.find is_expression lexbuf with -      | Some i when i <> 0 -> EXCLAMATION -      | _ -> skip_comment buffer) +  | '!' -> if Lexbuf.level buffer > 0 then EXCLAMATION else skip_comment buffer    | spaces -> token buffer    | '\'' -> LITERAL (wait_balance read_quoted_string buffer)    | '"' -> LITERAL (wait_balance read_dquoted_string buffer) @@ -242,7 +203,7 @@ let rec discard buffer =        discard buffer    | eof -> raise EOF    | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) -> -      Bucket.remove is_expression lexbuf; +      Lexbuf.reset_level buffer;        ()    | '!' ->        ignore @@ skip_comment buffer; diff --git a/lib/lexer.mli b/lib/lexer.mli index 65e9fa0..585877c 100644 --- a/lib/lexer.mli +++ b/lib/lexer.mli @@ -1,7 +1,3 @@ -module type Encoding = sig -  val lexeme : Sedlexing.lexbuf -> string -end -  exception EOF  val token : Lexbuf.t -> Tokens.token | 
