src/lib/lexer.mll


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64

{

 open Tokens

 exception Error of string

}

rule letter = parse
| '|'           { Sep }
| 'a'           { A }
| 'b'           { B }
| 'c'           { C }
| 'd'           { D }
| 'e'           { E }
| '\233'        { E_ACUTE }
| '\xC3' '\xA9' { E_ACUTE }
| 'f'           { F }
| 'g'           { G }
| "gu"          { G }
| 'h'           { H }
| 'i'           { I }
| 'j'           { J }
| 'k'           { K }
| 'l'           { L }
| 'm'           { M }
| "mm"          { M }
| 'n'           { N }
| "nn"          { N }
| 'o'           { O }
| 'p'           { P }
| "ph"          { F }
| 'q'           { Q }
| "qu"          { K }
| 'r'           { R }
| 'u'           { U }
| 's'           { S }
| "ss"          { SS }
| 't'           { T }
| 'u'           { U }
| 'v'           { V }
| 'w'           { W }
| 'x'           { X }
| 'y'           { Y }
| 'z'           { Z }
| ' '           { Space }
| '\n'          { EOL }
| eof           { EOL }

(* This rule looks for a single line, terminated with '\n' or eof.
   It returns a pair of an optional string (the line that was found)
   and a Boolean flag (false if eof was reached). *)

and line = parse
| ([^'\n']* '\n') as line
    (* Normal case: one line, no eof. *)
    { Some line, true }
| eof
    (* Normal case: no data, eof. *)
    { None, false }
| ([^'\n']+ as line) eof
    (* Special case: some data but missing '\n', then eof.
       Consider this as the last line, and add the missing '\n'. *)
    { Some (line), false }