aboutsummaryrefslogtreecommitdiff
path: root/lib/qparser/lexbuf.mli
blob: d6566422d611d4129f1a24b5aeffd732de1f79d2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
(** Lexing buffer. *)

type t
(** The state of the buffer *)

val from_lexbuf :
  ?position:Lexing.position -> ?reset_line:bool -> Sedlexing.lexbuf -> t
(** Create a new buffer.

    If a position is given, start from this position in the file. *)

val start : t -> unit
(** Intialize a new run. *)

val buffer : t -> Sedlexing.lexbuf
(** Extract the sedlex buffer. Required in each rule. *)

val positions : t -> Lexing.position * Lexing.position
(** Extract the starting and ending position for the matched token.

    This function is used outside of the parser, in order to get the position of
    the latest token in the case of an error. *)

val content : t -> string
(** Extract the token matched by the rule *)

val set_start_position : t -> Lexing.position -> unit
(** Reset the starting position. Used while parsing the string to keep the
    begining of the whole string. *)

val tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position
(** Function to use in the parser in order to extract the token match, and the
    starting and ending position. *)

val rollback : t -> unit
(** Rollback the latest token matched *)

(** {1 State in expressions}

    The comment system is terrible. The same symbol can be used for :
    - starting a comment
    - inequality operation

    In order to manage this, I try to identify the context in a very basic way,
    using a stack for determining the token to send. *)

type lexer = t -> Tokens.token
and buffer_builder = ?nested:bool -> Buffer.t -> t -> Tokens.token

type stringWraper = {
  start_string : lexer -> lexer;
      (** Start a new string. This function is used insed the token lexer, in
          order to identify how to start a new string *)
  wrap : buffer_builder -> buffer_builder;
      (** function used to escape the character and add it to the buffer. This
          function is used inside the string lexer. *)
  end_string : lexer;
      (** Function used to match the end of the string. This function is used
          after the string lexer, in order to identify the end patten for a
          string *)
}

type state =
  | Token  (** Default state, parsing the tokens *)
  | String of stringWraper  (** String enclosed by [''] *)
  | MString of int  (** String enclosed by [{}]*)
  | EndString of stringWraper
      (** State raised just before closing the string. The buffer is rollbacked
          and the position is the closing symbol. *)
  | Expression  (** Expression where [!] is an operator *)

val pp_state : Format.formatter -> state -> unit

val state : t -> state option
(** Get the current state for the lexer.

    @return [None] when in the default state *)

val enter_state : t -> state -> unit
(** Enter into a new state *)

val leave_state : t -> unit
(** Leave the current state *)

val overlay : t -> lexer -> lexer

val start_recovery : t -> unit
(** Set the lexer in recovery mode, the lexer raise this mode after an error, in
    order to ignore the further errors until a new location *)

val is_recovery : t -> bool
(** Check if the lexer is in recovery mode *)