Added endign work in lexer

author: Sébastien Dailly <sebastien@chimrod.com> 2021-08-31 13:37:19 +0200
committer: Sébastien Dailly <sebastien@chimrod.com> 2021-08-31 13:37:19 +0200
commit: 843230359b2157212c4e93b51994f0fde90d808b (patch)
tree: 1d22d4efb54f0e4c94564b8e8e1960a0a9fda8ef /src
parent: 6ccbcc2cadae41574e33226b9072a08354880d28 (diff)
9 files changed, 154 insertions, 50 deletions
diff --git a/src/lib/dune b/src/lib/dune
index ac2a45f..89f3ddf 100644
--- a/src/lib/dune
+++ b/src/lib/dune
@@ -10,9 +10,8 @@
   (flags --only-tokens) )
 
 (menhir   
-  (modules tokens prononciation)
-  (merge_into prononciation)
-  (flags --external-tokens Tokens --table --explain --dump) )
+  (modules prononciation)
+  (flags --table --explain --dump) )
 
 (menhir   
   (modules tokens parser)
diff --git a/src/lib/lexer.mll b/src/lib/lexer.mll
index 07305d8..27a7a8f 100644
--- a/src/lib/lexer.mll
+++ b/src/lib/lexer.mll
@@ -1,49 +1,54 @@
 {
 
- open Tokens
+ open Prononciation
 
  exception Error of string
 
 }
 
+let ending = eof | '\n'
+
 rule letter = parse
-| '|'           { Sep }
-| 'a'           { A }
-| 'b'           { B }
-| 'c'           { C }
-| 'd'           { D }
-| 'e'           { E }
-| '\232'        { E_ACUTE }
-| '\xC3' '\xA8' { E_AGRAVE }
-| '\233'        { E_ACUTE }
-| '\xC3' '\xA9' { E_ACUTE }
-| 'f'           { F }
-| 'g'           { G }
-| 'h'           { H }
-| 'i'           { I }
-| 'j'           { J }
-| 'k'           { K }
-| 'l'           { L }
-| 'm'           { M }
-| "mm"          { M }
-| 'n'           { N }
-| "nn"          { N }
-| 'o'           { O }
-| 'p'           { P }
-| 'q'           { Q }
-| 'r'           { R }
-| 'u'           { U }
-| 's'           { S }
-| 't'           { T }
-| 'u'           { U }
-| 'v'           { V }
-| 'w'           { W }
-| 'x'           { X }
-| 'y'           { Y }
-| 'z'           { Z }
-| ' '           { Space }
-| '\n'          { EOL }
-| eof           { EOL }
+| '|'            { Sep }
+| 'a'            { A }
+| 'b'            { B }
+| 'c'            { C }
+| 'd'            { D }
+| 'e'            { E }
+| '\232'         { E_ACUTE }
+| "è"            { E_AGRAVE }
+| '\233'         { E_ACUTE }
+| "é"            { E_ACUTE }
+| 'f'            { F }
+| 'g'            { G }
+| 'h'            { H }
+| 'i'            { I }
+| 'j'            { J }
+| 'k'            { K }
+| 'l'            { L }
+| 'm'            { M }
+| "mm"           { M }
+| 'n'            { N }
+| "nn"           { N }
+| 'o'            { O }
+| 'p'            { P }
+| 'q'            { Q }
+| 'r'            { R }
+| 'u'            { U }
+| 's'            { S }
+| 't'            { T }
+| 'u'            { U }
+| 'v'            { V }
+| 'w'            { W }
+| 'x'            { X }
+| 'y'            { Y }
+| 'z'            { Z }
+| ' '            { Space }
+| ending         { EOL }
+| "eaux" ending  { AUX_ }
+| "aux" ending   { AUX_ }
+| "ient" ending  { IENT_ }
+| "ent" ending   { ENT_ }
 
 (* This rule looks for a single line, terminated with '\n' or eof.
    It returns a pair of an optional string (the line that was found)
diff --git a/src/lib/parser.mly b/src/lib/parser.mly
index 6bef7bf..92a8750 100644
--- a/src/lib/parser.mly
+++ b/src/lib/parser.mly
@@ -39,6 +39,7 @@ fricativ:
   | V               { T.v }
 
   | X               { T.ch }
+  | J               { T.j  }
 
 obstruent:
   | occlusiv        { $1 }
diff --git a/src/lib/prononciation.mly b/src/lib/prononciation.mly
index 09124a6..bd97632 100644
--- a/src/lib/prononciation.mly
+++ b/src/lib/prononciation.mly
@@ -10,9 +10,52 @@
     open Tokens
 %}
 
+%token AUX_
+%token ENT_
+%token IENT_
+%token Sep
+
+%token A
+%token B
+%token C
+%token D
+%token E
+%token E_ACUTE
+%token E_AGRAVE
+%token F
+%token G
+%token H
+%token I
+%token J
+%token K
+%token L
+%token M
+%token N
+%token O
+%token OU
+%token Q
+%token P
+%token R
+%token S
+%token SZ
+%token T
+%token U
+%token V
+%token W (* semi voyel w *)
+%token X
+%token Y (* semi voyel j *)
+%token Z
+%token Space
+%token EOL
+
+%nonassoc Low
+%left R
+%right High
+
 %start<Tokens.token list> main
 %%
 
+
 voyel:
   | A           { A }
   | E           { E }
@@ -85,7 +128,10 @@ letters:
   | Z       { Z :: [] }
 
 ending: 
-  | EOL %prec Low            { EOL::[] }
+  | AUX_      { O::S::EOL::[]}
+  | IENT_     { I::T::EOL::[]}
+  | ENT_      { E::T::EOL::[]}
+  | EOL       { EOL::[] }
 
 main: 
   | append(flatten(letters*), ending)     { $1 }
diff --git a/src/lib/reader.ml b/src/lib/reader.ml
index f705b90..c5f8cda 100644
--- a/src/lib/reader.ml
+++ b/src/lib/reader.ml
@@ -18,12 +18,6 @@ let succeed (res : Sounds.t list) =
 let fail (_ : 'a I.checkpoint) =
   Error ("Syntax Error")
 
-let get_element lexbuf checkpoint =
-  let token = Lexer.letter lexbuf in
-  let startp = lexbuf.lex_start_p
-  and endp = lexbuf.lex_curr_p in
-  I.offer checkpoint (token, startp, endp)
-
 let rec loop get_element (checkpoint : Sounds.t list I.checkpoint) =
   match checkpoint with
   | I.InputNeeded _env ->
diff --git a/src/lib/sounds/sounds.ml b/src/lib/sounds/sounds.ml
index 85db338..f2a9d86 100644
--- a/src/lib/sounds/sounds.ml
+++ b/src/lib/sounds/sounds.ml
@@ -44,7 +44,7 @@ module type T = sig
   val z: t
   val sz: t
   val ch: t
-  (* val j: t *)
+  val j: t
 
   val n: t
   val m: t
@@ -86,6 +86,9 @@ module T = struct
     | Consonant_V
     | Consonant_S
     | Consonant_Z
+    | Consonant_X
+    | Consonant_J
+
     | Consonant_M
     | Consonant_N
     | Consonant_L
@@ -131,6 +134,7 @@ module Repr = struct
   and v = "v"
 
   and ch = "S"
+  and j = "j"
 
   and s = "s"
   and z = "z"
@@ -263,7 +267,14 @@ module S = struct
 
   let ch =
     { none with
-      repr = Repr.ch
+      code = Consonant_X
+    ; repr = Repr.ch
+    ; mutable_ = false }
+
+  let j =
+    { none with
+      code = Consonant_J
+    ; repr = Repr.j
     ; mutable_ = false }
 
   let sz =
@@ -374,6 +385,9 @@ module S = struct
       | Consonant_S, _ -> Repr.s
       | Consonant_Z, _ -> Repr.z
 
+      | Consonant_X, _ -> Repr.ch
+      | Consonant_J, _ -> Repr.j
+
       | Consonant_M, _ -> Repr.m
       | Consonant_N, _ -> Repr.n
       | Consonant_L, _ -> Repr.l
diff --git a/src/test/bw.conflicts b/src/test/bw.conflicts
new file mode 100644
index 0000000..58385d2
--- /dev/null
+++ b/src/test/bw.conflicts
@@ -0,0 +1,25 @@
+
+** Conflict (shift/reduce) in state 2.
+** Token involved: W
+** This state is reached from main after reading:
+
+seq W
+
+** The derivations that appear below have the following common factor:
+** (The question mark symbol (?) represents the spot where the derivations begin to differ.)
+
+main 
+(?)
+
+** In state 2, looking ahead at W, shifting is permitted
+** because of the following sub-derivation:
+
+seq ending EOL 
+    W . W W 
+
+** In state 2, looking ahead at W, reducing production
+** seq -> seq W
+** is permitted because of the following sub-derivation:
+
+seq ending EOL // lookahead token appears because ending can begin with W
+seq W . 
diff --git a/src/test/bw.mly b/src/test/bw.mly
new file mode 100644
index 0000000..8b022bd
--- /dev/null
+++ b/src/test/bw.mly
@@ -0,0 +1,18 @@
+%token B
+%token W
+%token EOL
+
+%start<unit> main
+
+%% 
+
+seq    :                {}
+       | seq B          {}
+       | seq W          {}
+       ;
+
+ending :                {}
+       | W W W          {}
+       ;
+
+main   : seq ending EOL {}
diff --git a/src/test/test.ml b/src/test/test.ml
index a9980e4..a3f4598 100644
--- a/src/test/test.ml
+++ b/src/test/test.ml
@@ -48,6 +48,7 @@ let tests =
   ; "achat",        "aSa(t)"
   ; "ani",          "ani"
   ; "anta",         "@ta"
+  ; "anneaux",      "ano(s)"
   ; "arachide",     "aRaSid°"
   ; "as",           "a(s)"
   ; "asia",         "azia"
@@ -65,6 +66,7 @@ let tests =
   ; "loin",         "Lw5"
   ; "groin",        "gR[w5]"
   ; "hirondelle",   "iR§dEL°"
+  ; "joues",        "ju°(s)"
   ; "pacha",        "paSa"
   ; "péché",        "peSe"
   ; "persai",       "pERsE"
author	Sébastien Dailly <sebastien@chimrod.com>	2021-08-31 13:37:19 +0200
committer	Sébastien Dailly <sebastien@chimrod.com>	2021-08-31 13:37:19 +0200
commit	843230359b2157212c4e93b51994f0fde90d808b (patch)
tree	1d22d4efb54f0e4c94564b8e8e1960a0a9fda8ef /src
parent	6ccbcc2cadae41574e33226b9072a08354880d28 (diff)