summaryrefslogtreecommitdiff
path: root/css/lib/lex_buffer.ml
blob: 0416e53eb54642915e3ad93932d86d2c8297ffd7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
(* Based on
 * https://github.com/smolkaj/ocaml-parsing/blob/master/src/LexBuffer.ml *)

(** A custom lexbuffer that automatically keeps track of the source location.
    This module is a thin wrapper arounds sedlexing's default buffer, which does
    not provide this functionality. *)

type t = {
  buf : Sedlexing.lexbuf;
  mutable pos : Lexing.position;
  mutable pos_mark : Lexing.position;
  mutable last_char : int option;
  mutable last_char_mark : int option;
}
(** the lex buffer type *)

let of_sedlex ?(file = "<n/a>") ?pos buf =
  let pos =
    match pos with
    | None ->
        {
          Lexing.pos_fname = file;
          pos_lnum = 1;
          (* line number *)
          pos_bol = 0;
          (* offset of beginning of current line *)
          pos_cnum = 0 (* total offset *);
        }
    | Some p -> p
  in
  { buf; pos; pos_mark = pos; last_char = None; last_char_mark = None }

let of_ascii_string ?pos s = of_sedlex ?pos (Sedlexing.Latin1.from_string s)

let of_ascii_file file =
  let chan = open_in file in
  of_sedlex ~file (Sedlexing.Latin1.from_channel chan)

(** The next four functions are used by sedlex internally.
    See https://www.lexifi.com/sedlex/libdoc/Sedlexing.html.  *)
let mark lexbuf p =
  lexbuf.pos_mark <- lexbuf.pos;
  lexbuf.last_char_mark <- lexbuf.last_char;
  Sedlexing.mark lexbuf.buf p

let backtrack lexbuf =
  lexbuf.pos <- lexbuf.pos_mark;
  lexbuf.last_char <- lexbuf.last_char_mark;
  Sedlexing.backtrack lexbuf.buf

let start lexbuf =
  lexbuf.pos_mark <- lexbuf.pos;
  lexbuf.last_char_mark <- lexbuf.last_char;
  Sedlexing.start lexbuf.buf

(** location of next character *)
let next_loc lexbuf = { lexbuf.pos with pos_cnum = lexbuf.pos.pos_cnum + 1 }

let cr = Char.code '\r'

(** next character *)
let next lexbuf =
  let c = Sedlexing.next lexbuf.buf in
  let pos = next_loc lexbuf in
  let ch =
    match c with
    | None -> None
    | Some c -> ( try Some (Uchar.to_char c) with Invalid_argument _ -> None )
  in
  ( match ch with
  | Some '\r' ->
      lexbuf.pos <-
        { pos with pos_bol = pos.pos_cnum - 1; pos_lnum = pos.pos_lnum + 1 }
  | Some '\n' when not (lexbuf.last_char = Some cr) ->
      lexbuf.pos <-
        { pos with pos_bol = pos.pos_cnum - 1; pos_lnum = pos.pos_lnum + 1 }
  | Some '\n' -> ()
  | _ -> lexbuf.pos <- pos );
  ( match c with
  | None -> lexbuf.last_char <- None
  | Some c -> lexbuf.last_char <- Some (Uchar.to_int c) );
  c

let raw lexbuf = Sedlexing.lexeme lexbuf.buf

let latin1 ?(skip = 0) ?(drop = 0) lexbuf =
  let len = Sedlexing.lexeme_length lexbuf.buf - skip - drop in
  Sedlexing.Latin1.sub_lexeme lexbuf.buf skip len

let utf8 ?(skip = 0) ?(drop = 0) lexbuf =
  let len = Sedlexing.lexeme_length lexbuf.buf - skip - drop in
  Sedlexing.Utf8.sub_lexeme lexbuf.buf skip len

let container_lnum_ref = ref 0

let fix_loc loc =
  let fix_pos pos =
    (* It looks like lex_buffer.ml returns a position with 2 extra
     * chars for parsed lines after the first one. Bug? *)
    let pos_cnum =
      if pos.Lexing.pos_lnum > !container_lnum_ref then pos.Lexing.pos_cnum - 2
      else pos.Lexing.pos_cnum
    in
    { pos with Lexing.pos_cnum }
  in
  let loc_start = fix_pos loc.Location.loc_start in
  let loc_end = fix_pos loc.Location.loc_end in
  { loc with Location.loc_start; loc_end }

let make_loc ?(loc_ghost = false) start_pos end_pos : Location.t =
  { Location.loc_start = start_pos; loc_end = end_pos; loc_ghost }

let make_loc_and_fix ?(loc_ghost = false) start_pos end_pos : Location.t =
  make_loc ~loc_ghost start_pos end_pos |> fix_loc