From 546afdcf2148087f3a90b69c23ea756550f64433 Mon Sep 17 00:00:00 2001
From: Sébastien Dailly <sebastien@chimrod.com>
Date: Mon, 23 Aug 2021 14:37:53 +0200
Subject: Initial commit

---
 Makefile                |   5 ++
 dune-project            |  15 +++++
 src/bin/dune            |   6 ++
 src/bin/transcriptor.ml |  23 ++++++++
 src/lib/.mly            |  24 ++++++++
 src/lib/dune            |  17 ++++++
 src/lib/lexer.mll       |  64 ++++++++++++++++++++++
 src/lib/parser.mly      | 118 ++++++++++++++++++++++++++++++++++++++++
 src/lib/process.ml      | 115 +++++++++++++++++++++++++++++++++++++++
 src/lib/reader.ml       |  37 +++++++++++++
 src/lib/sounds.ml       | 142 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/lib/tokens.mly      |  46 ++++++++++++++++
 src/test/dune           |   9 +++
 src/test/test.ml        |  66 ++++++++++++++++++++++
 14 files changed, 687 insertions(+)
 create mode 100644 Makefile
 create mode 100644 dune-project
 create mode 100644 src/bin/dune
 create mode 100644 src/bin/transcriptor.ml
 create mode 100644 src/lib/.mly
 create mode 100644 src/lib/dune
 create mode 100644 src/lib/lexer.mll
 create mode 100644 src/lib/parser.mly
 create mode 100644 src/lib/process.ml
 create mode 100644 src/lib/reader.ml
 create mode 100644 src/lib/sounds.ml
 create mode 100644 src/lib/tokens.mly
 create mode 100644 src/test/dune
 create mode 100644 src/test/test.ml

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..0e365db
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,5 @@
+all:
+	dune build
+
+test:
+	dune runtest src/test
diff --git a/dune-project b/dune-project
new file mode 100644
index 0000000..fddf7a5
--- /dev/null
+++ b/dune-project
@@ -0,0 +1,15 @@
+(lang dune 2.9)
+(name transcriptor)
+(generate_opam_files true)
+
+(using menhir 2.0)
+
+
+(package
+  (name transcriptor)
+  (synopsis "Text transcriptor")
+  (depends
+   (menhirLib (>= 20210419))
+   )
+  )
+
diff --git a/src/bin/dune b/src/bin/dune
new file mode 100644
index 0000000..5d49b1d
--- /dev/null
+++ b/src/bin/dune
@@ -0,0 +1,6 @@
+(executables
+ (names 
+    transcriptor
+ )
+ (libraries translator)
+ )
diff --git a/src/bin/transcriptor.ml b/src/bin/transcriptor.ml
new file mode 100644
index 0000000..2378c5d
--- /dev/null
+++ b/src/bin/transcriptor.ml
@@ -0,0 +1,23 @@
+module T = Translator
+module P = T.Parser
+
+module Parser = P.Make(T.Sounds)
+module I =   Parser.MenhirInterpreter
+
+let process (optional_line : string option) =
+  match optional_line with
+  | None -> ()
+  | Some line ->
+    match T.Reader.process line with
+    | Ok response -> print_endline response
+    | Error err -> print_endline err
+
+let rec repeat channel =
+  (* Attempt to read one line. *)
+  let optional_line, continue = T.Lexer.line channel in
+  process optional_line;
+  if continue then
+    repeat channel
+
+let () =
+  repeat (Lexing.from_channel stdin)
diff --git a/src/lib/.mly b/src/lib/.mly
new file mode 100644
index 0000000..464a065
--- /dev/null
+++ b/src/lib/.mly
@@ -0,0 +1,24 @@
+%start<'a T.t List.t> main
+
+
+initial:
+  | { }
+
+medium:
+  | { }
+
+ending
+  | {}
+
+word:
+  | initial medium* ending 
+
+
+main:
+  | {}
+ | main word Spaces+ 
+  {
+
+
+  }
+
diff --git a/src/lib/dune b/src/lib/dune
new file mode 100644
index 0000000..6693079
--- /dev/null
+++ b/src/lib/dune
@@ -0,0 +1,17 @@
+(library
+ (name
+    translator
+ )
+ (libraries menhirLib)
+ )
+
+(menhir   
+  (modules tokens)   
+  (flags --only-tokens) )
+
+(menhir   
+  (modules tokens parser)
+  (merge_into parser)
+  (flags --external-tokens Tokens --table --explain --dump) )
+
+(ocamllex lexer)
diff --git a/src/lib/lexer.mll b/src/lib/lexer.mll
new file mode 100644
index 0000000..236e353
--- /dev/null
+++ b/src/lib/lexer.mll
@@ -0,0 +1,64 @@
+{
+
+ open Tokens
+
+ exception Error of string
+
+}
+
+rule letter = parse
+| '|'           { Sep }
+| 'a'           { A }
+| 'b'           { B }
+| 'c'           { C }
+| 'd'           { D }
+| 'e'           { E }
+| '\233'        { E_ACUTE }
+| '\xC3' '\xA9' { E_ACUTE }
+| 'f'           { F }
+| 'g'           { G }
+| "gu"          { G }
+| 'h'           { H }
+| 'i'           { I }
+| 'j'           { J }
+| 'k'           { K }
+| 'l'           { L }
+| 'm'           { M }
+| "mm"          { M }
+| 'n'           { N }
+| "nn"          { N }
+| 'o'           { O }
+| 'p'           { P }
+| "ph"          { F }
+| 'q'           { Q }
+| "qu"          { K }
+| 'r'           { R }
+| 'u'           { U }
+| 's'           { S }
+| "ss"          { SS }
+| 't'           { T }
+| 'u'           { U }
+| 'v'           { V }
+| 'w'           { W }
+| 'x'           { X }
+| 'y'           { Y }
+| 'z'           { Z }
+| ' '           { Space }
+| '\n'          { EOL }
+| eof           { EOL }
+
+(* This rule looks for a single line, terminated with '\n' or eof.
+   It returns a pair of an optional string (the line that was found)
+   and a Boolean flag (false if eof was reached). *)
+
+and line = parse
+| ([^'\n']* '\n') as line
+    (* Normal case: one line, no eof. *)
+    { Some line, true }
+| eof
+    (* Normal case: no data, eof. *)
+    { None, false }
+| ([^'\n']+ as line) eof
+    (* Special case: some data but missing '\n', then eof.
+       Consider this as the last line, and add the missing '\n'. *)
+    { Some (line), false }
diff --git a/src/lib/parser.mly b/src/lib/parser.mly
new file mode 100644
index 0000000..acb8e25
--- /dev/null
+++ b/src/lib/parser.mly
@@ -0,0 +1,118 @@
+(*
+
+See [1] for the theory behind the analysis
+
+
+[1] https://fr.m.wiktionary.org/wiki/Annexe:Prononciation/fran%C3%A7ais#Structure_syllabique
+
+
+   *)
+%parameter<T:Sounds.T>
+
+
+
+%{
+
+    module P = Process.M(T)
+
+%}
+%start<T.t List.t> main
+
+%%
+
+occlusiv:
+  | P               { T.p }
+  | B               { T.b }
+
+  | T               { T.t }
+  | D               { T.none }
+
+  | C               { T.k }
+  | K               { T.k }
+  | G               { T.none }
+
+fricativ:
+  | S               { T.s () }
+  | SS              { T.s () }
+
+  | F               { T.f }
+
+  | C H             { T.ch () }
+
+obstruent:
+  | occlusiv        { $1 }
+  | fricativ        { $1 }
+
+liquid:
+  | L               { T.l () }
+  | R               { T.r () }
+
+nasal:
+  | N               { T.n () }
+
+opening_consonant:
+  | occlusiv            { $1, None }
+  | fricativ            { $1, None }
+  | nasal               { $1, None }
+  | liquid              { $1, None }
+  | obstruent liquid    { $1, Some $2 }
+  | occlusiv fricativ   { $1, Some $2 }
+
+(* Each voyel as two associated sounds, depending there is a followng sound or
+   not *)
+voyels:
+  | A               { T.a  `Opened , T.a  `Opened }
+  | A I             { T.e  `Opened,  T.e  `Opened }
+  | I               { T.i  `Opened , T.i  `Opened }
+  | E               { T.schwa ()   , T.schwa () }
+  | E_ACUTE E?      { T.e  `Closed , T.e  `Closed }
+  | E U             { T.eu `Opened , T.eu `Opened }
+  | O               { T.o  `Opened , T.o  `Opened }
+
+nasal_voyels:
+  | A N             { T.a' ()      , T.a' ()      }
+  %prec Low
+
+ending_consonant: 
+  | S               { Some (T.s ()) }
+  | T               { None }
+  | R               { Some (T.r ()) }
+  | nasal           { Some $1 }
+
+ending_word: 
+  | X               { Some (T.muted (T.s ())) }
+  | S               { Some (T.muted (T.s ())) }
+  | R               { Some (T.muted (T.r ())) }
+  | T               { Some (T.muted T.t) }
+
+consonant_group:
+  | opening_consonant 
+    { 
+        { ending = None 
+        ; opening = [ fst $1 ]
+        ; following = snd $1  }
+    }
+  | ending_consonant
+    opening_consonant 
+    { 
+        { ending = Some $1 
+        ; opening = [ fst $2 ]
+        ; following = snd $2  }
+    }
+
+syllable:
+  | c = consonant_group?
+    v = voyels
+    { (v, c) }
+
+
+syllables:
+  | { [] }
+  | ss = syllables s = syllable { s::ss }
+    
+
+word:
+  | syllables ending_word? EOL { P.rebuild $2 $1 }
+
+main: 
+  | word { $1 }
diff --git a/src/lib/process.ml b/src/lib/process.ml
new file mode 100644
index 0000000..9bfd45d
--- /dev/null
+++ b/src/lib/process.ml
@@ -0,0 +1,115 @@
+open StdLabels
+module M(T:Sounds.T) = struct
+
+  type voyel = (T.t * T.t )
+
+  type consonants =
+    { ending : T.t option option
+    ; opening : T.t list
+    ; following : T.t option }
+
+  type group = voyel * consonants option
+
+  type modifier = (group * T.t option option)  -> (group * T.t option option)
+
+  (** Apply all the modifiers to the syllabic group in order to correct the
+      relation between the elements
+
+      This is just a fold_left list, and the order matter : for example
+      nasalisation shall be applied after the S vocalisation
+
+  *)
+  let apply_modifiers
+    : group * T.t option option -> modifier list -> group * T.t option option
+    = fun e m ->
+      List.fold_left m
+        ~init:e
+        ~f:(fun e f -> f e)
+
+  (** The Nasal modifier transform a voyel followed by N and a consonant
+      into a nasal voyel.
+
+      Does this min that nasal voyel are not a distinct element, but just a
+      merge from two distinct elements ? *)
+  let nasal
+    : modifier
+    = fun init ->
+      let (((v1, v2), c) , ending) = init in
+      let ending = Option.bind ending (fun x -> x) in
+      match ending with
+      | None -> init
+      | Some ending ->
+        match T.is_nasal ending with
+        | false -> init
+        | true ->
+          (* Remove the ending consonant, and transform the voyel into
+             the nasal form  *)
+          ( ( (T.nasal v1, T.nasal v2)
+            , c )
+          , None )
+
+  let vocalize_s
+    : modifier
+    = fun init ->
+      let (((v1, v2), c) , ending) = init in
+
+      match c with
+      | None -> init
+      | Some op -> match op.opening, op.ending with
+        | hd::[], None when hd = T.s () ->
+          let c = Some { op with opening = [T.z ()] } in
+          (((v1, v2), c) , ending)
+        | _ -> init
+
+  let rec _rebuild ~(m:modifier list) acc ending_consonant : group list -> T.t list
+    = function
+      | [] -> acc
+      | hd::tl ->
+
+        let modifier_ = vocalize_s :: nasal :: m in
+        let (voyel, consonants), ending_consonant =
+          apply_modifiers
+            (hd, ending_consonant)
+            modifier_  in
+
+        (* Add the last consonant and the voyel *)
+        let m, acc = match ending_consonant with
+          | None -> modifier_, (snd voyel)::acc
+          | Some s ->
+            let default = modifier_, (fst voyel) :: acc in
+            match s with
+            | None -> default
+            | Some s ->
+
+              modifier_, (fst voyel) :: s::acc  in
+
+        match consonants with
+        | None -> _rebuild ~m acc None tl
+        | Some {ending; opening; following} ->
+
+          let acc = match following with
+            | None -> acc
+            | Some s -> s::acc in
+
+          match opening with
+          | [] ->_rebuild ~m acc ending tl
+          | opening -> _rebuild ~m (opening @ acc) ending tl
+
+  (** Rebuild the list in the normal order
+
+      The voyels have to be choosen, depending either they are followed by a
+      consonant or not
+
+      Some consonants may be changed depending of the following voyel
+
+      The list has to be reversed
+
+      and so one
+
+  *)
+  let rebuild
+    : T.t option option -> group list -> T.t list
+    = fun  ending elems ->
+      _rebuild ~m:[] [] ending elems
+
+end
diff --git a/src/lib/reader.ml b/src/lib/reader.ml
new file mode 100644
index 0000000..4b123d5
--- /dev/null
+++ b/src/lib/reader.ml
@@ -0,0 +1,37 @@
+open StdLabels
+
+module P = Parser
+module Parser = P.Make(Sounds)
+module I =   Parser.MenhirInterpreter
+
+let sound_to_string
+  : Sounds.t list -> string
+  = fun t ->
+    let buff = Buffer.create 16 in
+    List.iter t
+      ~f:(fun f -> Buffer.add_string buff f.Sounds.repr);
+    Buffer.contents buff
+
+let succeed (res : Sounds.t list) =
+  Ok (sound_to_string res)
+
+let fail lexbuf (_ : 'a I.checkpoint) =
+  Error (
+    Printf.sprintf
+      "At offset %d: syntax error."
+      (Lexing.lexeme_start lexbuf)
+  )
+
+(* The parser has suspended itself because of a syntax error. Stop. *)
+
+let loop lexbuf result =
+  let supplier = I.lexer_lexbuf_to_supplier Lexer.letter lexbuf in
+  I.loop_handle succeed (fail lexbuf) supplier result
+
+let process (line : string) =
+  let lexbuf = Lexing.from_string line in
+  try
+    loop lexbuf (Parser.Incremental.main lexbuf.lex_curr_p)
+  with
+  | Lexer.Error msg -> Error msg
+
diff --git a/src/lib/sounds.ml b/src/lib/sounds.ml
new file mode 100644
index 0000000..ec1ddf2
--- /dev/null
+++ b/src/lib/sounds.ml
@@ -0,0 +1,142 @@
+module type T = sig
+
+  type t
+  val muted : t -> t
+
+  val a : [`Closed | `Opened] -> t
+  val e : [`Closed | `Opened] -> t
+  val eu : [`Closed | `Opened] -> t
+
+  val o : [`Closed | `Opened] -> t
+  val schwa : unit -> t
+  val i : [`Closed | `Opened] -> t
+
+
+  val nasal: t -> t
+
+  val none: t
+
+  val p: t
+  val b: t
+  val t: t
+  val k: t
+  val f: t
+  val s: unit -> t
+  val ch: unit -> t
+  val z: unit -> t
+
+  val n: unit -> t
+
+  val r: unit -> t
+  val l: unit -> t
+
+  val is_voyel : t -> bool
+  val is_nasal : t -> bool
+
+end
+
+module T = struct
+  type kind =
+    | None
+    | Voyel
+
+  type t =
+    { repr : string
+    ; muted : bool
+    ; kind : kind
+    ; nasal : bool
+    }
+end
+
+module Repr = struct
+
+  let a = "a"
+  and a_nasal = "@"
+
+  and o_nasal = "§"
+
+end
+
+module S = struct
+
+  include T
+
+  let is_voyel t = t.kind = Voyel
+  let is_nasal t = t.nasal
+
+  let none =
+    { repr = "."
+    ; muted = false
+    ; kind = None
+    ; nasal = false }
+
+  let nasal t =
+    match t.repr with
+    | "a" -> { t with repr = Repr.a_nasal ; nasal = true }
+    | "o" -> { t with repr = Repr.o_nasal ; nasal = true }
+    | _ -> t
+
+  let muted f =
+    { none with
+      repr = "(" ^ f.repr ^ ")"
+    ; muted = true }
+
+  let a _ =
+    { none with repr = Repr.a }
+
+  let e = function
+    | `Closed -> { none with repr = "e" }
+    | `Opened -> { none with repr = "E" }
+
+  let eu = function
+    | `Closed -> { none with repr = "2" }
+    | `Opened -> { none with repr = "9" }
+
+
+  let schwa () =
+    { none with repr = "°" }
+
+  let o _ =
+    { none with repr = "o" }
+
+  let i _ =
+    { none with repr = "i" }
+
+  let p =
+    { none with repr = "p" }
+
+  let b =
+    { none with repr = "b" }
+
+  let t =
+    { none with repr = "t" }
+
+  let k =
+    { none with repr = "k" }
+
+  let f =
+    { none with repr = "f" }
+
+  let s () =
+    { none with repr = "s" }
+
+  let ch () =
+    { none with repr = "S" }
+
+  let z () =
+    { none with repr = "z" }
+
+  let n () =
+    { none with
+      repr = "n"
+    ; nasal = true }
+
+  let l () =
+    { none with repr = "L" }
+
+  let r () =
+    { none with repr = "R" }
+
+end
+
+include S
diff --git a/src/lib/tokens.mly b/src/lib/tokens.mly
new file mode 100644
index 0000000..428c744
--- /dev/null
+++ b/src/lib/tokens.mly
@@ -0,0 +1,46 @@
+%token Sep
+
+%token A
+%token B
+%token C
+%token D
+%token E
+%token E_ACUTE
+%token F
+%token G
+%token H
+%token I
+%token J
+%token K
+%token L
+%token M
+%token N
+%token O
+%token Q
+%token P
+%token R
+%token S
+%token SS
+%token T
+%token U
+%token V
+%token W
+%token X
+%token Y
+%token Z
+%token Space
+%token EOL
+
+%nonassoc Low
+
+%right A E E_ACUTE I O U 
+
+%right C H J Q V W X Y Z
+%right P B T D K G
+%right S SS F
+
+%right N M
+%right L R
+%right High
+
+%%
diff --git a/src/test/dune b/src/test/dune
new file mode 100644
index 0000000..f8dbc72
--- /dev/null
+++ b/src/test/dune
@@ -0,0 +1,9 @@
+(test
+ (name
+    test
+ )
+ (libraries translator)
+ )
+
+
+
diff --git a/src/test/test.ml b/src/test/test.ml
new file mode 100644
index 0000000..ca74a9e
--- /dev/null
+++ b/src/test/test.ml
@@ -0,0 +1,66 @@
+open StdLabels
+
+module T = Translator
+let error = ref 0
+
+let process (optional_line : string option) expected =
+  match optional_line with
+  | None -> ()
+  | Some line ->
+    match T.Reader.process line with
+    | Error result ->
+      error := 1;
+      Printf.fprintf stdout
+        "%s : %s\n%!"
+        line
+        result
+    | Ok response ->
+      if String.equal response expected then
+        ()
+      else (
+        error := 1;
+        (Printf.fprintf stdout "%s : got %s / %s expected\n%!"
+           line
+           response
+           expected)
+      )
+
+
+let rec repeat input expected =
+  (* Attempt to read one line. *)
+  let optional_line, continue = T.Lexer.line input in
+  process optional_line expected;
+  if continue then
+    repeat input expected
+
+let tests =
+  [ "abaca",        "abaka"
+  ; "abaissa",      "abEsa"
+  ; "abaissait",    "abEsE(t)"
+  ; "abaissant",    "abEs@(t)"
+  ; "abaissées",    "abEse(s)"
+  ; "abaissera",    "abEs°Ra"
+  ; "achat",        "aSa(t)"
+  ; "astiqué",      "astike"
+  ; "casait",       "kazE(t)"
+  ; "cassait",      "kasE(t)"
+  ; "chanci",       "S@si"
+  ; "chat",         "Sa(t)"
+  ; "chipant",      "Sip@(t)"
+  ; "pacha",        "paSa"
+  ; "péché",        "peSe"
+  ; "persai",       "pERse"
+  ; "asia",         "azia"
+  ; "ani",          "ani"
+  ; "anta",         "@ta"
+  ; "plat",         "pLa(t)"
+  ]
+
+let () =
+  let () = List.iter tests
+      ~f:(fun (input, expected) ->
+          repeat (Lexing.from_string input) expected)
+  in
+
+  exit !error
+
-- 
cgit v1.2.3