blob: 6059c8a44046d03a81a385ff604083e0ec4eaaa6 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
|
type t = {
buffer : Sedlexing.lexbuf;
mutable start_p : Lexing.position option;
mutable expression_level : int;
}
let buffer : t -> Sedlexing.lexbuf = fun t -> t.buffer
let start : t -> unit =
fun t ->
let _start_pos, end_pos = Sedlexing.lexing_positions t.buffer in
Sedlexing.set_position t.buffer { end_pos with Lexing.pos_lnum = 1 };
t.start_p <- None;
t.expression_level <- 0
let positions : t -> Lexing.position * Lexing.position =
fun t -> Sedlexing.lexing_positions t.buffer
let content : t -> string = fun t -> Sedlexing.Utf8.lexeme t.buffer
let from_lexbuf : Sedlexing.lexbuf -> t =
fun t -> { buffer = t; start_p = None; expression_level = 0 }
let set_start_position : t -> Lexing.position -> unit =
fun t position -> t.start_p <- Some position
let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position
=
fun f t ->
let lexer () =
(* Clear the previous registered start position if any *)
t.start_p <- None;
let token = f t in
let default, curr_p = positions t in
let start_p = Option.value ~default t.start_p in
(token, start_p, curr_p)
in
lexer
(* The comment system is terrible. The same symbol can be used for :
- starting a comment
- inequality operation
In order to manage this, I try to identify the context in a very basic way,
using a counter for determining the token to send.
*)
let incr_level : t -> unit =
fun t -> t.expression_level <- t.expression_level + 1
let decr_level : t -> unit =
fun t -> t.expression_level <- t.expression_level - 1
let reset_level : t -> unit = fun t -> t.expression_level <- 0
let level : t -> int = fun t -> t.expression_level
|