diff options
-rw-r--r-- | lib/encoding.ml | 3 | ||||
-rw-r--r-- | lib/lexbuf.ml | 30 | ||||
-rw-r--r-- | lib/lexbuf.mli | 13 | ||||
-rw-r--r-- | lib/lexer.ml | 3 |
4 files changed, 38 insertions, 11 deletions
diff --git a/lib/encoding.ml b/lib/encoding.ml deleted file mode 100644 index 30b9c4e..0000000 --- a/lib/encoding.ml +++ /dev/null @@ -1,3 +0,0 @@ -module type S = sig - val lexeme : Sedlexing.lexbuf -> string -end diff --git a/lib/lexbuf.ml b/lib/lexbuf.ml index 2ed9099..8ddba2d 100644 --- a/lib/lexbuf.ml +++ b/lib/lexbuf.ml @@ -1,14 +1,30 @@ -type t = Sedlexing.lexbuf +type t = { buffer : Sedlexing.lexbuf; mutable start_p : Lexing.position option } -let buffer : t -> Sedlexing.lexbuf = fun t -> t -let start : t -> unit = fun t -> Sedlexing.start t +let buffer : t -> Sedlexing.lexbuf = fun t -> t.buffer +let start : t -> unit = fun t -> Sedlexing.start t.buffer let positions : t -> Lexing.position * Lexing.position = - fun t -> Sedlexing.lexing_positions t + fun t -> Sedlexing.lexing_positions t.buffer -let content : t -> string = fun t -> Sedlexing.Utf8.lexeme t -let from_lexbuf : Sedlexing.lexbuf -> t = fun t -> t +let content : t -> string = fun t -> Sedlexing.Utf8.lexeme t.buffer + +let from_lexbuf : Sedlexing.lexbuf -> t = + fun t -> { buffer = t; start_p = None } + +let set_start_position : t -> Lexing.position -> unit = + fun t position -> t.start_p <- Some position let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position = - fun f lexbuf -> Sedlexing.with_tokenizer f lexbuf + fun f t -> + let lexer () = + (* Clear the previous registered start position if any *) + t.start_p <- None; + let token = f t in + let default, curr_p = positions t in + + let start_p = Option.value ~default t.start_p in + + (token, start_p, curr_p) + in + lexer diff --git a/lib/lexbuf.mli b/lib/lexbuf.mli index cf93c7e..b058f3c 100644 --- a/lib/lexbuf.mli +++ b/lib/lexbuf.mli @@ -1,8 +1,19 @@ type t +val from_lexbuf : Sedlexing.lexbuf -> t +(** Create a new buffer *) + val start : t -> unit +(** Intialize a new run *) + val buffer : t -> Sedlexing.lexbuf +(** Extract the sedlex buffer. Required in each rule. *) + val positions : t -> Lexing.position * Lexing.position +(** Extract the starting and ending position for the matched token *) + val content : t -> string -val from_lexbuf : Sedlexing.lexbuf -> t +(** Extract the token matched by the rule *) + +val set_start_position : t -> Lexing.position -> unit val tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position diff --git a/lib/lexer.ml b/lib/lexer.ml index a91bfdb..7014d85 100644 --- a/lib/lexer.ml +++ b/lib/lexer.ml @@ -59,6 +59,9 @@ let incr_level lexbuf = let wait_balance : (Buffer.t -> Lexbuf.t -> 'a) -> Lexbuf.t -> 'a = fun rule lexbuf -> + let _, position = Lexbuf.positions lexbuf in + Lexbuf.set_start_position lexbuf position; + try[@warning "-52"] let token = rule (Buffer.create 256) lexbuf in token |