aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChimrod <>2023-09-29 09:27:22 +0200
committerChimrod <>2023-09-29 10:00:21 +0200
commit1f79e8f1f0f59748497665ccee544163c5136562 (patch)
treeeacf55e9a2b5b8ace25d3f118f9b749ae9b53dbc
parent5e0b521a2ccce4bd19cf5d08176616f760180c11 (diff)
Fixed a wrong location for string
-rw-r--r--lib/encoding.ml3
-rw-r--r--lib/lexbuf.ml30
-rw-r--r--lib/lexbuf.mli13
-rw-r--r--lib/lexer.ml3
4 files changed, 38 insertions, 11 deletions
diff --git a/lib/encoding.ml b/lib/encoding.ml
deleted file mode 100644
index 30b9c4e..0000000
--- a/lib/encoding.ml
+++ /dev/null
@@ -1,3 +0,0 @@
-module type S = sig
- val lexeme : Sedlexing.lexbuf -> string
-end
diff --git a/lib/lexbuf.ml b/lib/lexbuf.ml
index 2ed9099..8ddba2d 100644
--- a/lib/lexbuf.ml
+++ b/lib/lexbuf.ml
@@ -1,14 +1,30 @@
-type t = Sedlexing.lexbuf
+type t = { buffer : Sedlexing.lexbuf; mutable start_p : Lexing.position option }
-let buffer : t -> Sedlexing.lexbuf = fun t -> t
-let start : t -> unit = fun t -> Sedlexing.start t
+let buffer : t -> Sedlexing.lexbuf = fun t -> t.buffer
+let start : t -> unit = fun t -> Sedlexing.start t.buffer
let positions : t -> Lexing.position * Lexing.position =
- fun t -> Sedlexing.lexing_positions t
+ fun t -> Sedlexing.lexing_positions t.buffer
-let content : t -> string = fun t -> Sedlexing.Utf8.lexeme t
-let from_lexbuf : Sedlexing.lexbuf -> t = fun t -> t
+let content : t -> string = fun t -> Sedlexing.Utf8.lexeme t.buffer
+
+let from_lexbuf : Sedlexing.lexbuf -> t =
+ fun t -> { buffer = t; start_p = None }
+
+let set_start_position : t -> Lexing.position -> unit =
+ fun t position -> t.start_p <- Some position
let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position
=
- fun f lexbuf -> Sedlexing.with_tokenizer f lexbuf
+ fun f t ->
+ let lexer () =
+ (* Clear the previous registered start position if any *)
+ t.start_p <- None;
+ let token = f t in
+ let default, curr_p = positions t in
+
+ let start_p = Option.value ~default t.start_p in
+
+ (token, start_p, curr_p)
+ in
+ lexer
diff --git a/lib/lexbuf.mli b/lib/lexbuf.mli
index cf93c7e..b058f3c 100644
--- a/lib/lexbuf.mli
+++ b/lib/lexbuf.mli
@@ -1,8 +1,19 @@
type t
+val from_lexbuf : Sedlexing.lexbuf -> t
+(** Create a new buffer *)
+
val start : t -> unit
+(** Intialize a new run *)
+
val buffer : t -> Sedlexing.lexbuf
+(** Extract the sedlex buffer. Required in each rule. *)
+
val positions : t -> Lexing.position * Lexing.position
+(** Extract the starting and ending position for the matched token *)
+
val content : t -> string
-val from_lexbuf : Sedlexing.lexbuf -> t
+(** Extract the token matched by the rule *)
+
+val set_start_position : t -> Lexing.position -> unit
val tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position
diff --git a/lib/lexer.ml b/lib/lexer.ml
index a91bfdb..7014d85 100644
--- a/lib/lexer.ml
+++ b/lib/lexer.ml
@@ -59,6 +59,9 @@ let incr_level lexbuf =
let wait_balance : (Buffer.t -> Lexbuf.t -> 'a) -> Lexbuf.t -> 'a =
fun rule lexbuf ->
+ let _, position = Lexbuf.positions lexbuf in
+ Lexbuf.set_start_position lexbuf position;
+
try[@warning "-52"]
let token = rule (Buffer.create 256) lexbuf in
token