From 71286c4a0bbc0afd89622170a02908a9d978cc3e Mon Sep 17 00:00:00 2001
From: Sébastien Dailly <sebastien@dailly.me>
Date: Sat, 27 Aug 2022 11:38:46 +0200
Subject: Update the entropy evaluation in the sutom application

---
 motus/lib/entropy.ml | 163 +++++++++++++++++++++++++++++++++------------------
 1 file changed, 106 insertions(+), 57 deletions(-)

diff --git a/motus/lib/entropy.ml b/motus/lib/entropy.ml
index 5fcef6c..b192f16 100644
--- a/motus/lib/entropy.ml
+++ b/motus/lib/entropy.ml
@@ -9,53 +9,111 @@ let compare_words : string -> ref:string * CharSet.t -> Validity.t array option
  fun w1 ~ref ->
   let wordRef = fst ref in
   let l1 = String.length w1 in
-  if l1 <> String.length wordRef
-  then None
+  if l1 <> String.length wordRef then None
   else
     let result =
       Array.init l1 ~f:(fun i ->
-          let c1 = String.get w1 i
-          and c2 = String.get wordRef i in
+          let c1 = String.get w1 i and c2 = String.get wordRef i in
 
           let state =
-            if Char.equal c1 c2
-            then Validity.Wellplaced
-            else if CharSet.mem c1 (snd ref)
-            then Validity.Misplaced
+            if Char.equal c1 c2 then Validity.Wellplaced
+            else if CharSet.mem c1 (snd ref) then Validity.Misplaced
             else Validity.Missing
           in
-          state )
+          state)
     in
     Some result
 
+(** The module provide the entropy evaluation *)
+module E : sig
+  type t
 
-let get_entropy max_element words_number arr =
-  let entropy = ref 0. in
-  for idx = 0 to max_element - 1 do
-    let content = Float.of_int (Bigarray.Array1.get arr idx) in
-    if content > 0.
-    then
-      let ratio = content /. words_number in
-      entropy := !entropy -. (ratio *. Float.log2 ratio)
-  done;
-  !entropy
+  val create : cardinal:int -> length:int -> t
+  (** Create a new evaluation for the entropy evaluation. 
 
+  [cardinal] is the number of elements in the set 
+  [length] is the number of criteria to check with *)
 
+  val reset : t -> unit
+  (** Reinitialize the state (we use the same in the successive
+       iterations) *)
+
+  val copy : t -> t
+  (** Create a copy of the evaluation *)
+
+  val add_element : t:t -> f:('a -> int Seq.t) -> 'a -> unit
+  (** Add an element in the evaluation. The function provided should give the
+      index list match by this element *)
+
+  val get_entropy : t -> float
+  (** Get the entropy for the evaluation *)
+end = struct
+  type t = {
+      arr : (int, Bigarray.int_elt, Bigarray.c_layout) Bigarray.Array1.t
+    ; cardinal : float
+  }
+
+  let reset t = Bigarray.Array1.fill t.arr 0
+
+  let copy t =
+    let arr =
+      Bigarray.Array1.create Bigarray.Int Bigarray.C_layout
+        (Bigarray.Array1.dim t.arr)
+    in
+    Bigarray.Array1.blit t.arr arr;
+    { t with arr }
+
+  let create : cardinal:int -> length:int -> t =
+   fun ~cardinal ~length ->
+    {
+      arr = Bigarray.Array1.create Bigarray.Int Bigarray.C_layout length
+    ; cardinal = Float.of_int cardinal
+    }
+
+  (**  [get_entropy] will evaluate the entropy of the values in an array.  
+
+     The function return the quantity of information in the state.
+  *)
+  let get_entropy : t -> float =
+   fun t ->
+    let entropy = ref 0. in
+    for idx = 0 to Bigarray.Array1.dim t.arr - 1 do
+      let content = Float.of_int (Bigarray.Array1.get t.arr idx) in
+      if content > 0. then
+        let ratio = content /. t.cardinal in
+        entropy := !entropy -. (ratio *. Float.log2 ratio)
+    done;
+    !entropy
+
+  let add_element : t:t -> f:('a -> int Seq.t) -> 'a -> unit =
+   fun ~t ~f element ->
+    Seq.iter
+      (fun idx ->
+        let content = Bigarray.Array1.get t.arr idx in
+        Bigarray.Array1.set t.arr idx (succ content))
+      (f element)
+end
+
+(**  Get the word and the highest score by picking the next word from the
+     list.
+
+     [analyse ~catalog base words] will check each word from [words] and see if
+     it give a good score. This score is matched against each words from
+     [catalog]
+
+     The [base] argument is the number of letters in each word. *)
 let analyse : int -> catalog:Wordlist.t -> Wordlist.t -> t =
  fun base ~catalog words ->
   (* If we have only two elements, just pick one of them *)
-  if Wordlist.list_size words <= 2
-  then (0.5, Option.get @@ Wordlist.pick words)
+  if Wordlist.list_size words <= 2 then (0.5, Option.get @@ Wordlist.pick words)
   else
-    let words_number = Float.of_int (Wordlist.list_size catalog) in
-
     (* Each result from the game is stored as an integer, and we create an
        array with as many elements as we have possibilities. *)
     let max_element =
       Float.to_int @@ (Validity.elements ** Float.of_int base)
     in
     let arr =
-      Bigarray.Array1.create Bigarray.Int Bigarray.C_layout max_element
+      E.create ~length:max_element ~cardinal:(Wordlist.list_size catalog)
     in
     match Wordlist.pick words with
     | None -> (0., "")
@@ -65,45 +123,36 @@ let analyse : int -> catalog:Wordlist.t -> Wordlist.t -> t =
             (fun (score, word, already_picked) word_ref ->
               (* Reinitialize the array (we use the same in the successive
                  iterations) *)
-              Bigarray.Array1.fill arr 0;
+              E.reset arr;
 
               let set_ref = String.to_seq word_ref |> CharSet.of_seq in
-              Seq.iter
-                (fun w2 ->
-                  let result = compare_words ~ref:(word_ref, set_ref) w2 in
-                  match result with
-                  | None -> ()
-                  | Some r ->
-                      let idx = Validity.index_of_result r in
-
-                      let content = Bigarray.Array1.get arr idx in
-                      Bigarray.Array1.set arr idx (succ content) )
-                (Wordlist.words words);
-
-              let entropy = get_entropy max_element words_number arr in
-
-              (* If get more information that we had, use this word. Choose this
-                 word to if it belongs to the final list (it gives a small chance
-                 to pick the right one…) *)
+              E.add_element ~t:arr set_ref ~f:(fun set_ref ->
+                  Seq.filter_map
+                    (fun w2 ->
+                      let result = compare_words ~ref:(word_ref, set_ref) w2 in
+                      Option.map Validity.index_of_result result)
+                    (Wordlist.words words));
+              let entropy = E.get_entropy arr in
+
+              (* If we get more information that we had, use this word.
+
+                 If this word does not add more informations that we already
+                 had, choose it anyway if it belongs to the final list (it gives
+                 a small chance to pick the right one…) *)
               let is_better, already_picked =
-                if entropy > score
-                then (true, false)
-                else if (not already_picked)
-                        && entropy == score
-                        && Wordlist.mem word_ref words
+                if entropy > score then (true, false)
+                else if
+                  (not already_picked) && entropy == score
+                  && Wordlist.mem word_ref words
                 then (true, true)
                 else (false, already_picked)
               in
 
-              if is_better
-              then (
-                Printf.printf
-                  "Q. of information when selecting %s : %f\n"
-                  word_ref
-                  entropy;
-                (entropy, word_ref, already_picked) )
-              else (score, word, already_picked) )
-            (-0., v, false)
-            (Wordlist.words catalog)
+              if is_better then (
+                Printf.printf "Q. of information when selecting %s : %f\n"
+                  word_ref entropy;
+                (entropy, word_ref, already_picked))
+              else (score, word, already_picked))
+            (-0., v, false) (Wordlist.words catalog)
         in
         (score, word)
-- 
cgit v1.2.3