From 95432043550bd4a41b4466395502bc3b748e6746 Mon Sep 17 00:00:00 2001
From: Sébastien Dailly <sebastien@dailly.me>
Date: Thu, 24 Feb 2022 09:26:23 +0100
Subject: Moved the wordlist into a set

---
 motus/lib/freq_analysis.ml  | 68 ++++++++++++++++++++++++++++++++
 motus/lib/freq_analysis.mli | 12 ++++++
 motus/lib/wordlist.ml       | 95 +++++++--------------------------------------
 motus/lib/wordlist.mli      | 33 ++++++++--------
 4 files changed, 112 insertions(+), 96 deletions(-)
 create mode 100644 motus/lib/freq_analysis.ml
 create mode 100644 motus/lib/freq_analysis.mli

(limited to 'motus/lib')

diff --git a/motus/lib/freq_analysis.ml b/motus/lib/freq_analysis.ml
new file mode 100644
index 0000000..12f5fef
--- /dev/null
+++ b/motus/lib/freq_analysis.ml
@@ -0,0 +1,68 @@
+open StdLabels
+
+let () = Random.self_init ()
+
+type t = (char * int) list
+
+let update_freq : (char, int) Hashtbl.t -> char -> unit =
+ fun freq c ->
+  match Hashtbl.find_opt freq c with
+  | None -> Hashtbl.add freq c 1
+  | Some value -> Hashtbl.replace freq c (value + 1)
+
+
+(** Evaluate the score for each char (lower is better) *)
+let analyse : Wordlist.t -> (char * int) list =
+ fun data ->
+  let freq = Hashtbl.create 26 in
+  Seq.iter
+    (fun word -> String.iter word ~f:(update_freq freq))
+    (Wordlist.words data);
+
+  let number_2 = Wordlist.list_size data / 2 in
+  Hashtbl.fold (fun k v acc -> (k, abs (v - number_2)) :: acc) freq []
+
+
+(** Get the word which with the most information in it.
+
+The information is the score given to each character, representing each
+frequency in the whole word list (lower is better). If the same letter is
+present many times, we consider that succeding letters does not give any more
+informations (do not consider the position here) *)
+let pick_next_word : Wordlist.t -> (char * int) list -> string =
+ fun data scores ->
+  let list_size = Wordlist.list_size data / 2 in
+
+  let p' : (string list * int) option -> string -> (string list * int) option =
+   fun prec word ->
+    (* evaluate the score for this word *)
+    let _, eval =
+      String.fold_left
+        ~f:(fun (scores, score) c ->
+          match List.assoc_opt c scores with
+          | None ->
+              (* if the character has no score associated, we consider that it
+                 does not provide any more information, and give it the max
+                 score available *)
+              (scores, score + list_size)
+          | Some v ->
+              let new_scores =
+                List.filter ~f:(fun (c', _) -> not (Char.equal c c')) scores
+              in
+              (new_scores, score + v) )
+        ~init:(scores, 0)
+        word
+    in
+    match prec with
+    | None -> Some ([ word ], eval)
+    | Some (_, prec_score) when eval < prec_score -> Some ([ word ], eval)
+    | Some (w, prec_score) when eval = prec_score -> Some (word :: w, eval)
+    | _ -> prec
+  in
+  match Seq.fold_left p' None (Wordlist.words data) with
+  | None -> ""
+  | Some (words, _) ->
+      (* Pick a reandom word from the list *)
+      let elements = List.length words in
+      let number = Random.int elements in
+      List.nth words number
diff --git a/motus/lib/freq_analysis.mli b/motus/lib/freq_analysis.mli
new file mode 100644
index 0000000..bd7f1d8
--- /dev/null
+++ b/motus/lib/freq_analysis.mli
@@ -0,0 +1,12 @@
+type t
+
+val analyse : Wordlist.t -> t
+(** Evaluate the score for each char (lower is better) *)
+
+val pick_next_word : Wordlist.t -> t -> string
+(** Get the word which with the most information in it.
+
+The information is the score given to each character, representing each
+frequency in the whole word list (lower is better). If the same letter is
+present many times, we consider that succeding letters does not give any more
+informations (do not consider the position here) *)
diff --git a/motus/lib/wordlist.ml b/motus/lib/wordlist.ml
index 45fc73a..7c400bb 100644
--- a/motus/lib/wordlist.ml
+++ b/motus/lib/wordlist.ml
@@ -1,90 +1,25 @@
 open StdLabels
+module S = Set.Make (String)
 
-let () = Random.self_init ()
+type t = S.t
 
-type t =
-  { number : int
-  ; element : string list
-  }
+let empty_data () = S.empty
 
-let empty_data () = { number = 0; element = [] }
+let add_words : Criteria.t list -> string Seq.t -> t =
+ fun filters words ->
+  Seq.filter
+    (fun word -> List.for_all ~f:(Criteria.check_filter word) filters)
+    words
+  |> S.of_seq
 
-let update_freq : (char, int) Hashtbl.t -> char -> unit =
- fun freq c ->
-  match Hashtbl.find_opt freq c with
-  | None -> Hashtbl.add freq c 1
-  | Some value -> Hashtbl.replace freq c (value + 1)
 
+let filter : Criteria.t list -> t -> t =
+ fun filters t ->
+  S.filter (fun word -> List.for_all ~f:(Criteria.check_filter word) filters) t
 
-(** Evaluate the score for each char (lower is better) *)
-let extract_freq : t -> (char * int) list =
- fun data ->
-  let freq = Hashtbl.create 26 in
-  List.iter data.element ~f:(fun word ->
-      String.iter word ~f:(fun c -> update_freq freq c) );
 
-  let number_2 = data.number / 2 in
-  Hashtbl.fold (fun k v acc -> (k, abs (v - number_2)) :: acc) freq []
-  (* Sort the list for a pretty printing *)
-  |> List.sort ~cmp:(fun v1 v2 -> snd v1 - snd v2)
+let words = S.to_seq
 
+let list_size = S.cardinal
 
-let add_word : Criteria.t list -> t -> string -> t =
- fun filters data word ->
-  match List.for_all ~f:(Criteria.check_filter word) filters with
-  | true -> { number = data.number + 1; element = word :: data.element }
-  | false -> data
-
-
-(** Get the word which with the most information in it.
-
-The information is the score given to each character, representing each
-frequency in the whole word list (lower is better). If the same letter is
-present many times, we consider that succeding letters does not give any more
-informations (do not consider the position here) *)
-let pick_next_word : t -> (char * int) list -> string * int =
- fun data scores ->
-  let p' : (string list * int) option -> string -> (string list * int) option =
-   fun prec word ->
-    (* evaluate the score for this word *)
-    let _, eval =
-      String.fold_left
-        ~f:(fun (scores, score) c ->
-          match List.assoc_opt c scores with
-          | None ->
-              (* if the character has no score associated, we consider that it
-                 does not provide any more information, and give it the max
-                 score available *)
-              (scores, score + (data.number / 2))
-          | Some v ->
-              let new_scores =
-                List.filter ~f:(fun (c', _) -> not (Char.equal c c')) scores
-              in
-              (new_scores, score + v) )
-        ~init:(scores, 0)
-        word
-    in
-    match prec with
-    | None -> Some ([ word ], eval)
-    | Some (_, prec_score) when eval < prec_score -> Some ([ word ], eval)
-    | Some (w, prec_score) when eval = prec_score -> Some (word :: w, eval)
-    | _ -> prec
-  in
-  match List.fold_left ~f:p' ~init:None data.element with
-  | None -> ("", 0)
-  | Some (words, score) ->
-      (* Pick a reandom word from the list *)
-      let elements = List.length words in
-      let number = Random.int elements in
-      (List.nth words number, score)
-
-
-let remove_word : t -> string -> t =
- fun t word ->
-  let element = List.filter ~f:(fun w -> not (String.equal w word)) t.element in
-  { element; number = t.number - 1 }
-
-
-let words : t -> string list = fun { element; _ } -> element
-
-let list_size : t -> int = fun { number; _ } -> number
+let remove_word t w = S.remove w t
diff --git a/motus/lib/wordlist.mli b/motus/lib/wordlist.mli
index 766fbdf..a56cab3 100644
--- a/motus/lib/wordlist.mli
+++ b/motus/lib/wordlist.mli
@@ -1,25 +1,26 @@
+(** The dictionnary *)
+
 type t
 
-val words : t -> string list
+val empty_data : unit -> t
+(** Create an empty dictionnary. The resulting dictionnay cannot be used, it is
+    juste here if required for initialisation *)
+
+val words : t -> string Seq.t
+(** Load all the words *)
 
 val list_size : t -> int
 (** Number of words in the list *)
 
-val empty_data : unit -> t
-
-val add_word : Criteria.t list -> t -> string -> t
-(** Add a new word in the list. Check are made against the differents criteria in order to ensure that the word is valid *)
-
-val remove_word : t -> string -> t
-(** Remove a word from this list *)
+val add_words : Criteria.t list -> string Seq.t -> t
+(** Create a word list from an initial sequence. 
 
-val extract_freq : t -> (char * int) list
-(** Evaluate the score for each char (lower is better) *)
+    Checks are made against the differents criteria in order to ensure that the
+    word is valid *)
 
-val pick_next_word : t -> (char * int) list -> string * int
-(** Get the word which with the most information in it.
+val filter : Criteria.t list -> t -> t
+(** Remove all the words that does not match the criterias. *)
 
-The information is the score given to each character, representing each
-frequency in the whole word list (lower is better). If the same letter is
-present many times, we consider that succeding letters does not give any more
-informations (do not consider the position here) *)
+val remove_word : t -> string -> t
+(** Remove a word from this list. This function is called when a proposition
+    from the application is not recognized by the game. *)
-- 
cgit v1.2.3