blob: dbed62230ea7a330ff64f466413d10cd9673c22d (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
|
open StdLabels
type t = {
buffer : Sedlexing.lexbuf;
mutable start_p : Lexing.position option;
state : state Stack.t;
reset_line : bool;
mutable recovering : bool;
}
and lexer = t -> Tokens.token
and buffer_builder = ?nested:bool -> Buffer.t -> lexer
and stringWraper = {
start_string : lexer -> lexer;
(** Start a new string. This function is used insed the token lexer, in
order to identify how to start a new string *)
wrap : buffer_builder -> buffer_builder;
(** function used to escape the character and add it to the buffer. This
function is used inside the string lexer. *)
end_string : lexer;
(** Function used to match the end of the string. This function is used
after the string lexer, in order to identify the end patten for a
string *)
}
and state =
| Token
| String of stringWraper
| MString of int
| EndString of stringWraper
| Expression
let pp_state format = function
| Token -> Format.fprintf format "Token"
| String _ -> Format.fprintf format "String"
| MString _ -> Format.fprintf format "MString"
| EndString _ -> Format.fprintf format "EndString"
| Expression -> Format.fprintf format "Expression"
let state : t -> state option = fun t -> Stack.top_opt t.state
let enter_state : t -> state -> unit = fun t state -> Stack.push state t.state
let leave_state : t -> unit = fun t -> ignore @@ Stack.pop_opt t.state
let buffer : t -> Sedlexing.lexbuf = fun t -> t.buffer
let start : t -> unit =
fun t ->
let _start_pos, end_pos = Sedlexing.lexing_positions t.buffer in
let () =
if not t.reset_line then
Sedlexing.set_position t.buffer { end_pos with Lexing.pos_lnum = 1 }
in
Stack.clear t.state;
t.start_p <- None;
t.recovering <- false
let positions : t -> Lexing.position * Lexing.position =
fun t ->
let default, end_p = Sedlexing.lexing_positions t.buffer in
let start_p = Option.value ~default t.start_p in
(start_p, end_p)
let content : t -> string = fun t -> Sedlexing.Utf8.lexeme t.buffer
let from_lexbuf :
?position:Lexing.position -> ?reset_line:bool -> Sedlexing.lexbuf -> t =
fun ?position ?(reset_line = true) t ->
Option.iter (Sedlexing.set_position t) position;
{
buffer = t;
start_p = None;
reset_line;
state = Stack.create ();
recovering = false;
}
let set_start_position : t -> Lexing.position -> unit =
fun t position ->
match t.start_p with
| None -> t.start_p <- Some position
| _ ->
(* We are already inside a block code, don’t stack it *)
()
let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position
=
fun f t ->
let lexer () =
(* Clear the previous registered start position if any *)
t.start_p <- None;
let token = f t in
let default, curr_p = positions t in
let start_p = Option.value ~default t.start_p in
t.start_p <- None;
(token, start_p, curr_p)
in
lexer
let rollback : t -> unit = fun t -> Sedlexing.rollback t.buffer
let overlay : t -> lexer -> lexer =
fun t lexer ->
let rev_list = Stack.fold (fun acc a -> a :: acc) [] t.state in
List.fold_left rev_list ~init:lexer ~f:(fun (acc : lexer) layer ->
match layer with
| String wraper | EndString wraper -> wraper.start_string acc
| _ -> acc)
let start_recovery : t -> unit = fun t -> t.recovering <- true
let is_recovery : t -> bool = fun t -> t.recovering
|