summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSébastien Dailly <sebastien@chimrod.com>2021-08-31 13:37:19 +0200
committerSébastien Dailly <sebastien@chimrod.com>2021-08-31 13:37:19 +0200
commit843230359b2157212c4e93b51994f0fde90d808b (patch)
tree1d22d4efb54f0e4c94564b8e8e1960a0a9fda8ef
parent6ccbcc2cadae41574e33226b9072a08354880d28 (diff)
Added endign work in lexer
-rw-r--r--src/lib/dune5
-rw-r--r--src/lib/lexer.mll81
-rw-r--r--src/lib/parser.mly1
-rw-r--r--src/lib/prononciation.mly48
-rw-r--r--src/lib/reader.ml6
-rw-r--r--src/lib/sounds/sounds.ml18
-rw-r--r--src/test/bw.conflicts25
-rw-r--r--src/test/bw.mly18
-rw-r--r--src/test/test.ml2
9 files changed, 154 insertions, 50 deletions
diff --git a/src/lib/dune b/src/lib/dune
index ac2a45f..89f3ddf 100644
--- a/src/lib/dune
+++ b/src/lib/dune
@@ -10,9 +10,8 @@
(flags --only-tokens) )
(menhir
- (modules tokens prononciation)
- (merge_into prononciation)
- (flags --external-tokens Tokens --table --explain --dump) )
+ (modules prononciation)
+ (flags --table --explain --dump) )
(menhir
(modules tokens parser)
diff --git a/src/lib/lexer.mll b/src/lib/lexer.mll
index 07305d8..27a7a8f 100644
--- a/src/lib/lexer.mll
+++ b/src/lib/lexer.mll
@@ -1,49 +1,54 @@
{
- open Tokens
+ open Prononciation
exception Error of string
}
+let ending = eof | '\n'
+
rule letter = parse
-| '|' { Sep }
-| 'a' { A }
-| 'b' { B }
-| 'c' { C }
-| 'd' { D }
-| 'e' { E }
-| '\232' { E_ACUTE }
-| '\xC3' '\xA8' { E_AGRAVE }
-| '\233' { E_ACUTE }
-| '\xC3' '\xA9' { E_ACUTE }
-| 'f' { F }
-| 'g' { G }
-| 'h' { H }
-| 'i' { I }
-| 'j' { J }
-| 'k' { K }
-| 'l' { L }
-| 'm' { M }
-| "mm" { M }
-| 'n' { N }
-| "nn" { N }
-| 'o' { O }
-| 'p' { P }
-| 'q' { Q }
-| 'r' { R }
-| 'u' { U }
-| 's' { S }
-| 't' { T }
-| 'u' { U }
-| 'v' { V }
-| 'w' { W }
-| 'x' { X }
-| 'y' { Y }
-| 'z' { Z }
-| ' ' { Space }
-| '\n' { EOL }
-| eof { EOL }
+| '|' { Sep }
+| 'a' { A }
+| 'b' { B }
+| 'c' { C }
+| 'd' { D }
+| 'e' { E }
+| '\232' { E_ACUTE }
+| "è" { E_AGRAVE }
+| '\233' { E_ACUTE }
+| "é" { E_ACUTE }
+| 'f' { F }
+| 'g' { G }
+| 'h' { H }
+| 'i' { I }
+| 'j' { J }
+| 'k' { K }
+| 'l' { L }
+| 'm' { M }
+| "mm" { M }
+| 'n' { N }
+| "nn" { N }
+| 'o' { O }
+| 'p' { P }
+| 'q' { Q }
+| 'r' { R }
+| 'u' { U }
+| 's' { S }
+| 't' { T }
+| 'u' { U }
+| 'v' { V }
+| 'w' { W }
+| 'x' { X }
+| 'y' { Y }
+| 'z' { Z }
+| ' ' { Space }
+| ending { EOL }
+| "eaux" ending { AUX_ }
+| "aux" ending { AUX_ }
+| "ient" ending { IENT_ }
+| "ent" ending { ENT_ }
(* This rule looks for a single line, terminated with '\n' or eof.
It returns a pair of an optional string (the line that was found)
diff --git a/src/lib/parser.mly b/src/lib/parser.mly
index 6bef7bf..92a8750 100644
--- a/src/lib/parser.mly
+++ b/src/lib/parser.mly
@@ -39,6 +39,7 @@ fricativ:
| V { T.v }
| X { T.ch }
+ | J { T.j }
obstruent:
| occlusiv { $1 }
diff --git a/src/lib/prononciation.mly b/src/lib/prononciation.mly
index 09124a6..bd97632 100644
--- a/src/lib/prononciation.mly
+++ b/src/lib/prononciation.mly
@@ -10,9 +10,52 @@
open Tokens
%}
+%token AUX_
+%token ENT_
+%token IENT_
+%token Sep
+
+%token A
+%token B
+%token C
+%token D
+%token E
+%token E_ACUTE
+%token E_AGRAVE
+%token F
+%token G
+%token H
+%token I
+%token J
+%token K
+%token L
+%token M
+%token N
+%token O
+%token OU
+%token Q
+%token P
+%token R
+%token S
+%token SZ
+%token T
+%token U
+%token V
+%token W (* semi voyel w *)
+%token X
+%token Y (* semi voyel j *)
+%token Z
+%token Space
+%token EOL
+
+%nonassoc Low
+%left R
+%right High
+
%start<Tokens.token list> main
%%
+
voyel:
| A { A }
| E { E }
@@ -85,7 +128,10 @@ letters:
| Z { Z :: [] }
ending:
- | EOL %prec Low { EOL::[] }
+ | AUX_ { O::S::EOL::[]}
+ | IENT_ { I::T::EOL::[]}
+ | ENT_ { E::T::EOL::[]}
+ | EOL { EOL::[] }
main:
| append(flatten(letters*), ending) { $1 }
diff --git a/src/lib/reader.ml b/src/lib/reader.ml
index f705b90..c5f8cda 100644
--- a/src/lib/reader.ml
+++ b/src/lib/reader.ml
@@ -18,12 +18,6 @@ let succeed (res : Sounds.t list) =
let fail (_ : 'a I.checkpoint) =
Error ("Syntax Error")
-let get_element lexbuf checkpoint =
- let token = Lexer.letter lexbuf in
- let startp = lexbuf.lex_start_p
- and endp = lexbuf.lex_curr_p in
- I.offer checkpoint (token, startp, endp)
-
let rec loop get_element (checkpoint : Sounds.t list I.checkpoint) =
match checkpoint with
| I.InputNeeded _env ->
diff --git a/src/lib/sounds/sounds.ml b/src/lib/sounds/sounds.ml
index 85db338..f2a9d86 100644
--- a/src/lib/sounds/sounds.ml
+++ b/src/lib/sounds/sounds.ml
@@ -44,7 +44,7 @@ module type T = sig
val z: t
val sz: t
val ch: t
- (* val j: t *)
+ val j: t
val n: t
val m: t
@@ -86,6 +86,9 @@ module T = struct
| Consonant_V
| Consonant_S
| Consonant_Z
+ | Consonant_X
+ | Consonant_J
+
| Consonant_M
| Consonant_N
| Consonant_L
@@ -131,6 +134,7 @@ module Repr = struct
and v = "v"
and ch = "S"
+ and j = "j"
and s = "s"
and z = "z"
@@ -263,7 +267,14 @@ module S = struct
let ch =
{ none with
- repr = Repr.ch
+ code = Consonant_X
+ ; repr = Repr.ch
+ ; mutable_ = false }
+
+ let j =
+ { none with
+ code = Consonant_J
+ ; repr = Repr.j
; mutable_ = false }
let sz =
@@ -374,6 +385,9 @@ module S = struct
| Consonant_S, _ -> Repr.s
| Consonant_Z, _ -> Repr.z
+ | Consonant_X, _ -> Repr.ch
+ | Consonant_J, _ -> Repr.j
+
| Consonant_M, _ -> Repr.m
| Consonant_N, _ -> Repr.n
| Consonant_L, _ -> Repr.l
diff --git a/src/test/bw.conflicts b/src/test/bw.conflicts
new file mode 100644
index 0000000..58385d2
--- /dev/null
+++ b/src/test/bw.conflicts
@@ -0,0 +1,25 @@
+
+** Conflict (shift/reduce) in state 2.
+** Token involved: W
+** This state is reached from main after reading:
+
+seq W
+
+** The derivations that appear below have the following common factor:
+** (The question mark symbol (?) represents the spot where the derivations begin to differ.)
+
+main
+(?)
+
+** In state 2, looking ahead at W, shifting is permitted
+** because of the following sub-derivation:
+
+seq ending EOL
+ W . W W
+
+** In state 2, looking ahead at W, reducing production
+** seq -> seq W
+** is permitted because of the following sub-derivation:
+
+seq ending EOL // lookahead token appears because ending can begin with W
+seq W .
diff --git a/src/test/bw.mly b/src/test/bw.mly
new file mode 100644
index 0000000..8b022bd
--- /dev/null
+++ b/src/test/bw.mly
@@ -0,0 +1,18 @@
+%token B
+%token W
+%token EOL
+
+%start<unit> main
+
+%%
+
+seq : {}
+ | seq B {}
+ | seq W {}
+ ;
+
+ending : {}
+ | W W W {}
+ ;
+
+main : seq ending EOL {}
diff --git a/src/test/test.ml b/src/test/test.ml
index a9980e4..a3f4598 100644
--- a/src/test/test.ml
+++ b/src/test/test.ml
@@ -48,6 +48,7 @@ let tests =
; "achat", "aSa(t)"
; "ani", "ani"
; "anta", "@ta"
+ ; "anneaux", "ano(s)"
; "arachide", "aRaSid°"
; "as", "a(s)"
; "asia", "azia"
@@ -65,6 +66,7 @@ let tests =
; "loin", "Lw5"
; "groin", "gR[w5]"
; "hirondelle", "iR§dEL°"
+ ; "joues", "ju°(s)"
; "pacha", "paSa"
; "péché", "peSe"
; "persai", "pERsE"