blob: 0416e53eb54642915e3ad93932d86d2c8297ffd7 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
(* Based on
* https://github.com/smolkaj/ocaml-parsing/blob/master/src/LexBuffer.ml *)
(** A custom lexbuffer that automatically keeps track of the source location.
This module is a thin wrapper arounds sedlexing's default buffer, which does
not provide this functionality. *)
type t = {
buf : Sedlexing.lexbuf;
mutable pos : Lexing.position;
mutable pos_mark : Lexing.position;
mutable last_char : int option;
mutable last_char_mark : int option;
}
(** the lex buffer type *)
let of_sedlex ?(file = "<n/a>") ?pos buf =
let pos =
match pos with
| None ->
{
Lexing.pos_fname = file;
pos_lnum = 1;
(* line number *)
pos_bol = 0;
(* offset of beginning of current line *)
pos_cnum = 0 (* total offset *);
}
| Some p -> p
in
{ buf; pos; pos_mark = pos; last_char = None; last_char_mark = None }
let of_ascii_string ?pos s = of_sedlex ?pos (Sedlexing.Latin1.from_string s)
let of_ascii_file file =
let chan = open_in file in
of_sedlex ~file (Sedlexing.Latin1.from_channel chan)
(** The next four functions are used by sedlex internally.
See https://www.lexifi.com/sedlex/libdoc/Sedlexing.html. *)
let mark lexbuf p =
lexbuf.pos_mark <- lexbuf.pos;
lexbuf.last_char_mark <- lexbuf.last_char;
Sedlexing.mark lexbuf.buf p
let backtrack lexbuf =
lexbuf.pos <- lexbuf.pos_mark;
lexbuf.last_char <- lexbuf.last_char_mark;
Sedlexing.backtrack lexbuf.buf
let start lexbuf =
lexbuf.pos_mark <- lexbuf.pos;
lexbuf.last_char_mark <- lexbuf.last_char;
Sedlexing.start lexbuf.buf
(** location of next character *)
let next_loc lexbuf = { lexbuf.pos with pos_cnum = lexbuf.pos.pos_cnum + 1 }
let cr = Char.code '\r'
(** next character *)
let next lexbuf =
let c = Sedlexing.next lexbuf.buf in
let pos = next_loc lexbuf in
let ch =
match c with
| None -> None
| Some c -> ( try Some (Uchar.to_char c) with Invalid_argument _ -> None )
in
( match ch with
| Some '\r' ->
lexbuf.pos <-
{ pos with pos_bol = pos.pos_cnum - 1; pos_lnum = pos.pos_lnum + 1 }
| Some '\n' when not (lexbuf.last_char = Some cr) ->
lexbuf.pos <-
{ pos with pos_bol = pos.pos_cnum - 1; pos_lnum = pos.pos_lnum + 1 }
| Some '\n' -> ()
| _ -> lexbuf.pos <- pos );
( match c with
| None -> lexbuf.last_char <- None
| Some c -> lexbuf.last_char <- Some (Uchar.to_int c) );
c
let raw lexbuf = Sedlexing.lexeme lexbuf.buf
let latin1 ?(skip = 0) ?(drop = 0) lexbuf =
let len = Sedlexing.lexeme_length lexbuf.buf - skip - drop in
Sedlexing.Latin1.sub_lexeme lexbuf.buf skip len
let utf8 ?(skip = 0) ?(drop = 0) lexbuf =
let len = Sedlexing.lexeme_length lexbuf.buf - skip - drop in
Sedlexing.Utf8.sub_lexeme lexbuf.buf skip len
let container_lnum_ref = ref 0
let fix_loc loc =
let fix_pos pos =
(* It looks like lex_buffer.ml returns a position with 2 extra
* chars for parsed lines after the first one. Bug? *)
let pos_cnum =
if pos.Lexing.pos_lnum > !container_lnum_ref then pos.Lexing.pos_cnum - 2
else pos.Lexing.pos_cnum
in
{ pos with Lexing.pos_cnum }
in
let loc_start = fix_pos loc.Location.loc_start in
let loc_end = fix_pos loc.Location.loc_end in
{ loc with Location.loc_start; loc_end }
let make_loc ?(loc_ghost = false) start_pos end_pos : Location.t =
{ Location.loc_start = start_pos; loc_end = end_pos; loc_ghost }
let make_loc_and_fix ?(loc_ghost = false) start_pos end_pos : Location.t =
make_loc ~loc_ghost start_pos end_pos |> fix_loc
|