diff options
author | Sébastien Dailly <sebastien@chimrod.com> | 2021-08-31 13:37:19 +0200 |
---|---|---|
committer | Sébastien Dailly <sebastien@chimrod.com> | 2021-08-31 13:37:19 +0200 |
commit | 843230359b2157212c4e93b51994f0fde90d808b (patch) | |
tree | 1d22d4efb54f0e4c94564b8e8e1960a0a9fda8ef /src | |
parent | 6ccbcc2cadae41574e33226b9072a08354880d28 (diff) |
Added endign work in lexer
Diffstat (limited to 'src')
-rw-r--r-- | src/lib/dune | 5 | ||||
-rw-r--r-- | src/lib/lexer.mll | 81 | ||||
-rw-r--r-- | src/lib/parser.mly | 1 | ||||
-rw-r--r-- | src/lib/prononciation.mly | 48 | ||||
-rw-r--r-- | src/lib/reader.ml | 6 | ||||
-rw-r--r-- | src/lib/sounds/sounds.ml | 18 | ||||
-rw-r--r-- | src/test/bw.conflicts | 25 | ||||
-rw-r--r-- | src/test/bw.mly | 18 | ||||
-rw-r--r-- | src/test/test.ml | 2 |
9 files changed, 154 insertions, 50 deletions
diff --git a/src/lib/dune b/src/lib/dune index ac2a45f..89f3ddf 100644 --- a/src/lib/dune +++ b/src/lib/dune @@ -10,9 +10,8 @@ (flags --only-tokens) ) (menhir - (modules tokens prononciation) - (merge_into prononciation) - (flags --external-tokens Tokens --table --explain --dump) ) + (modules prononciation) + (flags --table --explain --dump) ) (menhir (modules tokens parser) diff --git a/src/lib/lexer.mll b/src/lib/lexer.mll index 07305d8..27a7a8f 100644 --- a/src/lib/lexer.mll +++ b/src/lib/lexer.mll @@ -1,49 +1,54 @@ { - open Tokens + open Prononciation exception Error of string } +let ending = eof | '\n' + rule letter = parse -| '|' { Sep } -| 'a' { A } -| 'b' { B } -| 'c' { C } -| 'd' { D } -| 'e' { E } -| '\232' { E_ACUTE } -| '\xC3' '\xA8' { E_AGRAVE } -| '\233' { E_ACUTE } -| '\xC3' '\xA9' { E_ACUTE } -| 'f' { F } -| 'g' { G } -| 'h' { H } -| 'i' { I } -| 'j' { J } -| 'k' { K } -| 'l' { L } -| 'm' { M } -| "mm" { M } -| 'n' { N } -| "nn" { N } -| 'o' { O } -| 'p' { P } -| 'q' { Q } -| 'r' { R } -| 'u' { U } -| 's' { S } -| 't' { T } -| 'u' { U } -| 'v' { V } -| 'w' { W } -| 'x' { X } -| 'y' { Y } -| 'z' { Z } -| ' ' { Space } -| '\n' { EOL } -| eof { EOL } +| '|' { Sep } +| 'a' { A } +| 'b' { B } +| 'c' { C } +| 'd' { D } +| 'e' { E } +| '\232' { E_ACUTE } +| "è" { E_AGRAVE } +| '\233' { E_ACUTE } +| "é" { E_ACUTE } +| 'f' { F } +| 'g' { G } +| 'h' { H } +| 'i' { I } +| 'j' { J } +| 'k' { K } +| 'l' { L } +| 'm' { M } +| "mm" { M } +| 'n' { N } +| "nn" { N } +| 'o' { O } +| 'p' { P } +| 'q' { Q } +| 'r' { R } +| 'u' { U } +| 's' { S } +| 't' { T } +| 'u' { U } +| 'v' { V } +| 'w' { W } +| 'x' { X } +| 'y' { Y } +| 'z' { Z } +| ' ' { Space } +| ending { EOL } +| "eaux" ending { AUX_ } +| "aux" ending { AUX_ } +| "ient" ending { IENT_ } +| "ent" ending { ENT_ } (* This rule looks for a single line, terminated with '\n' or eof. It returns a pair of an optional string (the line that was found) diff --git a/src/lib/parser.mly b/src/lib/parser.mly index 6bef7bf..92a8750 100644 --- a/src/lib/parser.mly +++ b/src/lib/parser.mly @@ -39,6 +39,7 @@ fricativ: | V { T.v } | X { T.ch } + | J { T.j } obstruent: | occlusiv { $1 } diff --git a/src/lib/prononciation.mly b/src/lib/prononciation.mly index 09124a6..bd97632 100644 --- a/src/lib/prononciation.mly +++ b/src/lib/prononciation.mly @@ -10,9 +10,52 @@ open Tokens %} +%token AUX_ +%token ENT_ +%token IENT_ +%token Sep + +%token A +%token B +%token C +%token D +%token E +%token E_ACUTE +%token E_AGRAVE +%token F +%token G +%token H +%token I +%token J +%token K +%token L +%token M +%token N +%token O +%token OU +%token Q +%token P +%token R +%token S +%token SZ +%token T +%token U +%token V +%token W (* semi voyel w *) +%token X +%token Y (* semi voyel j *) +%token Z +%token Space +%token EOL + +%nonassoc Low +%left R +%right High + %start<Tokens.token list> main %% + voyel: | A { A } | E { E } @@ -85,7 +128,10 @@ letters: | Z { Z :: [] } ending: - | EOL %prec Low { EOL::[] } + | AUX_ { O::S::EOL::[]} + | IENT_ { I::T::EOL::[]} + | ENT_ { E::T::EOL::[]} + | EOL { EOL::[] } main: | append(flatten(letters*), ending) { $1 } diff --git a/src/lib/reader.ml b/src/lib/reader.ml index f705b90..c5f8cda 100644 --- a/src/lib/reader.ml +++ b/src/lib/reader.ml @@ -18,12 +18,6 @@ let succeed (res : Sounds.t list) = let fail (_ : 'a I.checkpoint) = Error ("Syntax Error") -let get_element lexbuf checkpoint = - let token = Lexer.letter lexbuf in - let startp = lexbuf.lex_start_p - and endp = lexbuf.lex_curr_p in - I.offer checkpoint (token, startp, endp) - let rec loop get_element (checkpoint : Sounds.t list I.checkpoint) = match checkpoint with | I.InputNeeded _env -> diff --git a/src/lib/sounds/sounds.ml b/src/lib/sounds/sounds.ml index 85db338..f2a9d86 100644 --- a/src/lib/sounds/sounds.ml +++ b/src/lib/sounds/sounds.ml @@ -44,7 +44,7 @@ module type T = sig val z: t val sz: t val ch: t - (* val j: t *) + val j: t val n: t val m: t @@ -86,6 +86,9 @@ module T = struct | Consonant_V | Consonant_S | Consonant_Z + | Consonant_X + | Consonant_J + | Consonant_M | Consonant_N | Consonant_L @@ -131,6 +134,7 @@ module Repr = struct and v = "v" and ch = "S" + and j = "j" and s = "s" and z = "z" @@ -263,7 +267,14 @@ module S = struct let ch = { none with - repr = Repr.ch + code = Consonant_X + ; repr = Repr.ch + ; mutable_ = false } + + let j = + { none with + code = Consonant_J + ; repr = Repr.j ; mutable_ = false } let sz = @@ -374,6 +385,9 @@ module S = struct | Consonant_S, _ -> Repr.s | Consonant_Z, _ -> Repr.z + | Consonant_X, _ -> Repr.ch + | Consonant_J, _ -> Repr.j + | Consonant_M, _ -> Repr.m | Consonant_N, _ -> Repr.n | Consonant_L, _ -> Repr.l diff --git a/src/test/bw.conflicts b/src/test/bw.conflicts new file mode 100644 index 0000000..58385d2 --- /dev/null +++ b/src/test/bw.conflicts @@ -0,0 +1,25 @@ + +** Conflict (shift/reduce) in state 2. +** Token involved: W +** This state is reached from main after reading: + +seq W + +** The derivations that appear below have the following common factor: +** (The question mark symbol (?) represents the spot where the derivations begin to differ.) + +main +(?) + +** In state 2, looking ahead at W, shifting is permitted +** because of the following sub-derivation: + +seq ending EOL + W . W W + +** In state 2, looking ahead at W, reducing production +** seq -> seq W +** is permitted because of the following sub-derivation: + +seq ending EOL // lookahead token appears because ending can begin with W +seq W . diff --git a/src/test/bw.mly b/src/test/bw.mly new file mode 100644 index 0000000..8b022bd --- /dev/null +++ b/src/test/bw.mly @@ -0,0 +1,18 @@ +%token B +%token W +%token EOL + +%start<unit> main + +%% + +seq : {} + | seq B {} + | seq W {} + ; + +ending : {} + | W W W {} + ; + +main : seq ending EOL {} diff --git a/src/test/test.ml b/src/test/test.ml index a9980e4..a3f4598 100644 --- a/src/test/test.ml +++ b/src/test/test.ml @@ -48,6 +48,7 @@ let tests = ; "achat", "aSa(t)" ; "ani", "ani" ; "anta", "@ta" + ; "anneaux", "ano(s)" ; "arachide", "aRaSid°" ; "as", "a(s)" ; "asia", "azia" @@ -65,6 +66,7 @@ let tests = ; "loin", "Lw5" ; "groin", "gR[w5]" ; "hirondelle", "iR§dEL°" + ; "joues", "ju°(s)" ; "pacha", "paSa" ; "péché", "peSe" ; "persai", "pERsE" |