summaryrefslogtreecommitdiff
path: root/src/lib/lexer.mll
diff options
context:
space:
mode:
authorSébastien Dailly <sebastien@chimrod.com>2021-08-23 14:37:53 +0200
committerSébastien Dailly <sebastien@chimrod.com>2021-08-23 14:37:53 +0200
commit546afdcf2148087f3a90b69c23ea756550f64433 (patch)
treeac56c71393aacf0fade729e98eeecb1e87a88534 /src/lib/lexer.mll
Initial commit
Diffstat (limited to 'src/lib/lexer.mll')
-rw-r--r--src/lib/lexer.mll64
1 files changed, 64 insertions, 0 deletions
diff --git a/src/lib/lexer.mll b/src/lib/lexer.mll
new file mode 100644
index 0000000..236e353
--- /dev/null
+++ b/src/lib/lexer.mll
@@ -0,0 +1,64 @@
+{
+
+ open Tokens
+
+ exception Error of string
+
+}
+
+rule letter = parse
+| '|' { Sep }
+| 'a' { A }
+| 'b' { B }
+| 'c' { C }
+| 'd' { D }
+| 'e' { E }
+| '\233' { E_ACUTE }
+| '\xC3' '\xA9' { E_ACUTE }
+| 'f' { F }
+| 'g' { G }
+| "gu" { G }
+| 'h' { H }
+| 'i' { I }
+| 'j' { J }
+| 'k' { K }
+| 'l' { L }
+| 'm' { M }
+| "mm" { M }
+| 'n' { N }
+| "nn" { N }
+| 'o' { O }
+| 'p' { P }
+| "ph" { F }
+| 'q' { Q }
+| "qu" { K }
+| 'r' { R }
+| 'u' { U }
+| 's' { S }
+| "ss" { SS }
+| 't' { T }
+| 'u' { U }
+| 'v' { V }
+| 'w' { W }
+| 'x' { X }
+| 'y' { Y }
+| 'z' { Z }
+| ' ' { Space }
+| '\n' { EOL }
+| eof { EOL }
+
+(* This rule looks for a single line, terminated with '\n' or eof.
+ It returns a pair of an optional string (the line that was found)
+ and a Boolean flag (false if eof was reached). *)
+
+and line = parse
+| ([^'\n']* '\n') as line
+ (* Normal case: one line, no eof. *)
+ { Some line, true }
+| eof
+ (* Normal case: no data, eof. *)
+ { None, false }
+| ([^'\n']+ as line) eof
+ (* Special case: some data but missing '\n', then eof.
+ Consider this as the last line, and add the missing '\n'. *)
+ { Some (line), false }