summaryrefslogtreecommitdiff
path: root/src/lib/lexer.mll
blob: 2fbffb5e448db505fc20dc36fb01e16c0feb2179 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
{

 open Prononciation

 exception Error of string

}

let ending = eof | '\n'

rule letter = parse
| '|'            { Sep }
| 'a'            { A }
| 'b'            { B }
| 'c'            { C }
| 'd'            { D }
| 'e'            { E Accent.NONE }
| '\232'         { E Accent.ACUTE }
| "è"            { E Accent.AGRAVE }
| "ê"            { E Accent.AGRAVE }
| '\233'         { E Accent.ACUTE }
| "é"            { E Accent.ACUTE }
| 'f'            { F }
| 'g'            { G }
| 'h'            { H }
| 'i'            { I }
| 'j'            { J }
| 'k'            { K }
| 'l'            { L }
| 'm'            { M }
| "mm"           { M }
| 'n'            { N }
| "nn"           { N }
| 'o'            { O }
| 'p'            { P }
| "ph"           { F }
| 'q'            { Q }
| 'r'            { R }
| 'u'            { U }
| 's'            { S }
| 't'            { T }
| 'u'            { U }
| 'v'            { V }
| 'w'            { W }
| 'x'            { X }
| 'y'            { Y }
| 'z'            { Z }
| ending         { EOL }
| "erf" ending   { ERF_ }
| "el" ending    { EL_ }
(*| "ent" ending   { ENT_ }*)
| "ient" ending  { IENT_ }
| "ie" ending    { IE_ }
| "x" ending     { X_ }
| _              { Space (Lexing.lexeme lexbuf)}

(* This rule looks for a single line, terminated with '\n' or eof.
   It returns a pair of an optional string (the line that was found)
   and a Boolean flag (false if eof was reached). *)

and line = parse
| ([^'\n']* '\n') as line
    (* Normal case: one line, no eof. *)
    { Some line, true }
| eof
    (* Normal case: no data, eof. *)
    { None, false }
| ([^'\n']+ as line) eof
    (* Special case: some data but missing '\n', then eof.
       Consider this as the last line, and add the missing '\n'. *)
    { Some (line), false }