summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSébastien Dailly <sebastien@chimrod.com>2021-08-24 15:04:25 +0200
committerSébastien Dailly <sebastien@chimrod.com>2021-08-24 15:04:25 +0200
commitdf92da631e9a10a099a72ba846f90adf99d180df (patch)
treec56cffa7045795c0cc494512c28fc34a63f800bc
parent546afdcf2148087f3a90b69c23ea756550f64433 (diff)
Added pre-processing
-rw-r--r--src/lib/dune5
-rw-r--r--src/lib/lexer.mll4
-rw-r--r--src/lib/parser.mly7
-rw-r--r--src/lib/process.ml2
-rw-r--r--src/lib/prononciation.mly72
-rw-r--r--src/lib/reader.ml53
-rw-r--r--src/lib/sounds.ml23
-rw-r--r--src/lib/tokens.mly9
-rw-r--r--src/test/test.ml6
9 files changed, 158 insertions, 23 deletions
diff --git a/src/lib/dune b/src/lib/dune
index 6693079..29b0668 100644
--- a/src/lib/dune
+++ b/src/lib/dune
@@ -10,6 +10,11 @@
(flags --only-tokens) )
(menhir
+ (modules tokens prononciation)
+ (merge_into prononciation)
+ (flags --external-tokens Tokens --table --explain --dump) )
+
+(menhir
(modules tokens parser)
(merge_into parser)
(flags --external-tokens Tokens --table --explain --dump) )
diff --git a/src/lib/lexer.mll b/src/lib/lexer.mll
index 236e353..f8a7f2c 100644
--- a/src/lib/lexer.mll
+++ b/src/lib/lexer.mll
@@ -17,7 +17,6 @@ rule letter = parse
| '\xC3' '\xA9' { E_ACUTE }
| 'f' { F }
| 'g' { G }
-| "gu" { G }
| 'h' { H }
| 'i' { I }
| 'j' { J }
@@ -29,13 +28,10 @@ rule letter = parse
| "nn" { N }
| 'o' { O }
| 'p' { P }
-| "ph" { F }
| 'q' { Q }
-| "qu" { K }
| 'r' { R }
| 'u' { U }
| 's' { S }
-| "ss" { SS }
| 't' { T }
| 'u' { U }
| 'v' { V }
diff --git a/src/lib/parser.mly b/src/lib/parser.mly
index acb8e25..b4b7817 100644
--- a/src/lib/parser.mly
+++ b/src/lib/parser.mly
@@ -27,17 +27,17 @@ occlusiv:
| T { T.t }
| D { T.none }
- | C { T.k }
| K { T.k }
| G { T.none }
fricativ:
| S { T.s () }
- | SS { T.s () }
+ | SZ { T.sz () }
+ | Z { T.z () }
| F { T.f }
- | C H { T.ch () }
+ | X { T.ch () }
obstruent:
| occlusiv { $1 }
@@ -68,6 +68,7 @@ voyels:
| E_ACUTE E? { T.e `Closed , T.e `Closed }
| E U { T.eu `Opened , T.eu `Opened }
| O { T.o `Opened , T.o `Opened }
+ | A_NASAL { T.nasal (T.a `Opened), T.nasal (T.a `Opened) }
nasal_voyels:
| A N { T.a' () , T.a' () }
diff --git a/src/lib/process.ml b/src/lib/process.ml
index 9bfd45d..10b2945 100644
--- a/src/lib/process.ml
+++ b/src/lib/process.ml
@@ -56,7 +56,7 @@ module M(T:Sounds.T) = struct
match c with
| None -> init
| Some op -> match op.opening, op.ending with
- | hd::[], None when hd = T.s () ->
+ | hd::[], None when T.code hd = T.SZ ->
let c = Some { op with opening = [T.z ()] } in
(((v1, v2), c) , ending)
| _ -> init
diff --git a/src/lib/prononciation.mly b/src/lib/prononciation.mly
new file mode 100644
index 0000000..f865abd
--- /dev/null
+++ b/src/lib/prononciation.mly
@@ -0,0 +1,72 @@
+%{
+
+ (** This module transform the words into a semi-prononciation elements.
+
+ The letters should be quite close with the associate phonem, but some
+ modification can occur.
+
+ *)
+
+ open Tokens
+%}
+
+%start<Tokens.token list> main
+%%
+
+initial_voyel:
+ | A { A }
+ | E { E }
+ | I { I }
+ | O { O }
+ | U { U }
+ | E_ACUTE { E_ACUTE }
+
+voyel:
+ | initial_voyel { $1 }
+
+
+letters:
+ | { [] }
+ | letters voyel { $2 :: $1 }
+ | letters Space { Space :: $1 }
+ | letters Sep { Sep :: $1 }
+
+ | letters B { B :: $1 }
+ | letters C %prec Low { K :: $1 }
+ | letters C H { X :: $1 }
+ | letters C I { I :: S :: $1 }
+ | letters C E { E :: S :: $1 }
+ | letters C U I { I :: K :: $1 }
+ | letters C U E { E :: K :: $1 }
+ | letters D { D :: $1 }
+ | letters F { F :: $1 }
+ | letters G %prec Low { G :: $1 }
+ | letters G I { I :: J :: $1 }
+ | letters G E { E :: J :: $1 }
+ | letters G U I { I :: G :: $1 }
+ | letters G U E { E :: G :: $1 }
+
+ | letters J { J :: $1 }
+ | letters K { K :: $1 }
+ | letters L { L :: $1 }
+ | letters M { M :: $1 }
+ | letters N { N :: $1 }
+ | letters P { P :: $1 }
+ | letters P H { F :: $1 }
+
+ | letters Q U { K :: $1 }
+
+ | letters R { R :: $1 }
+ | letters S S { S :: $1 }
+ | letters S { SZ :: $1 }
+ | letters T { T :: $1 }
+
+ | letters V { V :: $1 }
+ | letters W { V :: $1 }
+ | letters X { S :: K :: $1 }
+ | letters Y { I :: $1 }
+
+ | letters Z { Z :: $1 }
+
+main:
+ letters EOL { EOL::$1 }
diff --git a/src/lib/reader.ml b/src/lib/reader.ml
index 4b123d5..20dc9cc 100644
--- a/src/lib/reader.ml
+++ b/src/lib/reader.ml
@@ -15,23 +15,54 @@ let sound_to_string
let succeed (res : Sounds.t list) =
Ok (sound_to_string res)
-let fail lexbuf (_ : 'a I.checkpoint) =
- Error (
- Printf.sprintf
- "At offset %d: syntax error."
- (Lexing.lexeme_start lexbuf)
- )
+let fail (_ : 'a I.checkpoint) =
+ Error ("Syntax Error")
-(* The parser has suspended itself because of a syntax error. Stop. *)
+let get_element lexbuf checkpoint =
+ let token = Lexer.letter lexbuf in
+ let startp = lexbuf.lex_start_p
+ and endp = lexbuf.lex_curr_p in
+ I.offer checkpoint (token, startp, endp)
-let loop lexbuf result =
- let supplier = I.lexer_lexbuf_to_supplier Lexer.letter lexbuf in
- I.loop_handle succeed (fail lexbuf) supplier result
+let rec loop get_element (checkpoint : Sounds.t list I.checkpoint) =
+ match checkpoint with
+ | I.InputNeeded _env ->
+ (* The parser needs a token. Request one from the lexer,
+ and offer it to the parser, which will produce a new
+ checkpoint. Then, repeat. *)
+ let checkpoint = get_element checkpoint in
+ loop get_element checkpoint
+ | I.Shifting _
+ | I.AboutToReduce _ ->
+ let checkpoint = I.resume checkpoint in
+ loop get_element checkpoint
+ | I.HandlingError _env ->
+ fail checkpoint
+ | I.Accepted v ->
+ (* The parser has succeeded and produced a semantic value. Print it. *)
+ succeed v
+ | I.Rejected ->
+ (* The parser rejects this input. This cannot happen, here, because
+ we stop as soon as the parser reports [HandlingError]. *)
+ assert false
+
+let build_processor ll =
+ let l = ref ll in
+ fun checkpoint ->
+ match !l with
+ | [] -> raise (Failure "Empty")
+ | hd::tl ->
+ l := tl;
+ I.offer checkpoint (hd, Lexing.dummy_pos, Lexing.dummy_pos)
let process (line : string) =
let lexbuf = Lexing.from_string line in
+ let content =
+ Prononciation.main Lexer.letter lexbuf
+ |> List.rev
+ |> build_processor in
try
- loop lexbuf (Parser.Incremental.main lexbuf.lex_curr_p)
+ loop content (Parser.Incremental.main lexbuf.lex_curr_p)
with
| Lexer.Error msg -> Error msg
diff --git a/src/lib/sounds.ml b/src/lib/sounds.ml
index ec1ddf2..0ee9f5c 100644
--- a/src/lib/sounds.ml
+++ b/src/lib/sounds.ml
@@ -22,6 +22,7 @@ module type T = sig
val k: t
val f: t
val s: unit -> t
+ val sz: unit -> t
val ch: unit -> t
val z: unit -> t
@@ -33,6 +34,12 @@ module type T = sig
val is_voyel : t -> bool
val is_nasal : t -> bool
+ type code =
+ | None
+ | SZ
+
+ val code : t -> code
+
end
module T = struct
@@ -40,12 +47,18 @@ module T = struct
| None
| Voyel
+ type code =
+ | None
+ | SZ
+
type t =
- { repr : string
+ { code : code
+ ; repr : string
; muted : bool
; kind : kind
; nasal : bool
}
+
end
module Repr = struct
@@ -68,7 +81,10 @@ module S = struct
{ repr = "."
; muted = false
; kind = None
- ; nasal = false }
+ ; nasal = false
+ ; code = None }
+
+ let code t = t.code
let nasal t =
match t.repr with
@@ -120,6 +136,9 @@ module S = struct
let s () =
{ none with repr = "s" }
+ let sz () =
+ { (s()) with code = SZ }
+
let ch () =
{ none with repr = "S" }
diff --git a/src/lib/tokens.mly b/src/lib/tokens.mly
index 428c744..5346005 100644
--- a/src/lib/tokens.mly
+++ b/src/lib/tokens.mly
@@ -1,5 +1,10 @@
%token Sep
+%token A_NASAL
+%token O_NASAL
+%token I_NASAL
+
+
%token A
%token B
%token C
@@ -20,7 +25,7 @@
%token P
%token R
%token S
-%token SS
+%token SZ
%token T
%token U
%token V
@@ -37,7 +42,7 @@
%right C H J Q V W X Y Z
%right P B T D K G
-%right S SS F
+%right S F
%right N M
%right L R
diff --git a/src/test/test.ml b/src/test/test.ml
index ca74a9e..5d1d09c 100644
--- a/src/test/test.ml
+++ b/src/test/test.ml
@@ -17,6 +17,11 @@ let process (optional_line : string option) expected =
| Ok response ->
if String.equal response expected then
()
+ (*
+ (Printf.fprintf stdout "%s : %s OK \n%!"
+ line
+ expected)
+ *)
else (
error := 1;
(Printf.fprintf stdout "%s : got %s / %s expected\n%!"
@@ -41,6 +46,7 @@ let tests =
; "abaissées", "abEse(s)"
; "abaissera", "abEs°Ra"
; "achat", "aSa(t)"
+ ; "as", "a(s)"
; "astiqué", "astike"
; "casait", "kazE(t)"
; "cassait", "kasE(t)"