diff options
author | Sébastien Dailly <sebastien@chimrod.com> | 2021-08-23 14:37:53 +0200 |
---|---|---|
committer | Sébastien Dailly <sebastien@chimrod.com> | 2021-08-23 14:37:53 +0200 |
commit | 546afdcf2148087f3a90b69c23ea756550f64433 (patch) | |
tree | ac56c71393aacf0fade729e98eeecb1e87a88534 /src/lib/lexer.mll |
Initial commit
Diffstat (limited to 'src/lib/lexer.mll')
-rw-r--r-- | src/lib/lexer.mll | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/src/lib/lexer.mll b/src/lib/lexer.mll new file mode 100644 index 0000000..236e353 --- /dev/null +++ b/src/lib/lexer.mll @@ -0,0 +1,64 @@ +{ + + open Tokens + + exception Error of string + +} + +rule letter = parse +| '|' { Sep } +| 'a' { A } +| 'b' { B } +| 'c' { C } +| 'd' { D } +| 'e' { E } +| '\233' { E_ACUTE } +| '\xC3' '\xA9' { E_ACUTE } +| 'f' { F } +| 'g' { G } +| "gu" { G } +| 'h' { H } +| 'i' { I } +| 'j' { J } +| 'k' { K } +| 'l' { L } +| 'm' { M } +| "mm" { M } +| 'n' { N } +| "nn" { N } +| 'o' { O } +| 'p' { P } +| "ph" { F } +| 'q' { Q } +| "qu" { K } +| 'r' { R } +| 'u' { U } +| 's' { S } +| "ss" { SS } +| 't' { T } +| 'u' { U } +| 'v' { V } +| 'w' { W } +| 'x' { X } +| 'y' { Y } +| 'z' { Z } +| ' ' { Space } +| '\n' { EOL } +| eof { EOL } + +(* This rule looks for a single line, terminated with '\n' or eof. + It returns a pair of an optional string (the line that was found) + and a Boolean flag (false if eof was reached). *) + +and line = parse +| ([^'\n']* '\n') as line + (* Normal case: one line, no eof. *) + { Some line, true } +| eof + (* Normal case: no data, eof. *) + { None, false } +| ([^'\n']+ as line) eof + (* Special case: some data but missing '\n', then eof. + Consider this as the last line, and add the missing '\n'. *) + { Some (line), false } |