aboutsummaryrefslogtreecommitdiff
path: root/motus/lib/entropy.ml
blob: b192f166996b5c8c0de8166b5176ddd890152b34 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
open StdLabels

type t = float * string

module CharSet = Set.Make (Char)

let compare_words : string -> ref:string * CharSet.t -> Validity.t array option
    =
 fun w1 ~ref ->
  let wordRef = fst ref in
  let l1 = String.length w1 in
  if l1 <> String.length wordRef then None
  else
    let result =
      Array.init l1 ~f:(fun i ->
          let c1 = String.get w1 i and c2 = String.get wordRef i in

          let state =
            if Char.equal c1 c2 then Validity.Wellplaced
            else if CharSet.mem c1 (snd ref) then Validity.Misplaced
            else Validity.Missing
          in
          state)
    in
    Some result

(** The module provide the entropy evaluation *)
module E : sig
  type t

  val create : cardinal:int -> length:int -> t
  (** Create a new evaluation for the entropy evaluation. 

  [cardinal] is the number of elements in the set 
  [length] is the number of criteria to check with *)

  val reset : t -> unit
  (** Reinitialize the state (we use the same in the successive
       iterations) *)

  val copy : t -> t
  (** Create a copy of the evaluation *)

  val add_element : t:t -> f:('a -> int Seq.t) -> 'a -> unit
  (** Add an element in the evaluation. The function provided should give the
      index list match by this element *)

  val get_entropy : t -> float
  (** Get the entropy for the evaluation *)
end = struct
  type t = {
      arr : (int, Bigarray.int_elt, Bigarray.c_layout) Bigarray.Array1.t
    ; cardinal : float
  }

  let reset t = Bigarray.Array1.fill t.arr 0

  let copy t =
    let arr =
      Bigarray.Array1.create Bigarray.Int Bigarray.C_layout
        (Bigarray.Array1.dim t.arr)
    in
    Bigarray.Array1.blit t.arr arr;
    { t with arr }

  let create : cardinal:int -> length:int -> t =
   fun ~cardinal ~length ->
    {
      arr = Bigarray.Array1.create Bigarray.Int Bigarray.C_layout length
    ; cardinal = Float.of_int cardinal
    }

  (**  [get_entropy] will evaluate the entropy of the values in an array.  

     The function return the quantity of information in the state.
  *)
  let get_entropy : t -> float =
   fun t ->
    let entropy = ref 0. in
    for idx = 0 to Bigarray.Array1.dim t.arr - 1 do
      let content = Float.of_int (Bigarray.Array1.get t.arr idx) in
      if content > 0. then
        let ratio = content /. t.cardinal in
        entropy := !entropy -. (ratio *. Float.log2 ratio)
    done;
    !entropy

  let add_element : t:t -> f:('a -> int Seq.t) -> 'a -> unit =
   fun ~t ~f element ->
    Seq.iter
      (fun idx ->
        let content = Bigarray.Array1.get t.arr idx in
        Bigarray.Array1.set t.arr idx (succ content))
      (f element)
end

(**  Get the word and the highest score by picking the next word from the
     list.

     [analyse ~catalog base words] will check each word from [words] and see if
     it give a good score. This score is matched against each words from
     [catalog]

     The [base] argument is the number of letters in each word. *)
let analyse : int -> catalog:Wordlist.t -> Wordlist.t -> t =
 fun base ~catalog words ->
  (* If we have only two elements, just pick one of them *)
  if Wordlist.list_size words <= 2 then (0.5, Option.get @@ Wordlist.pick words)
  else
    (* Each result from the game is stored as an integer, and we create an
       array with as many elements as we have possibilities. *)
    let max_element =
      Float.to_int @@ (Validity.elements ** Float.of_int base)
    in
    let arr =
      E.create ~length:max_element ~cardinal:(Wordlist.list_size catalog)
    in
    match Wordlist.pick words with
    | None -> (0., "")
    | Some v ->
        let score, word, _ =
          Seq.fold_left
            (fun (score, word, already_picked) word_ref ->
              (* Reinitialize the array (we use the same in the successive
                 iterations) *)
              E.reset arr;

              let set_ref = String.to_seq word_ref |> CharSet.of_seq in
              E.add_element ~t:arr set_ref ~f:(fun set_ref ->
                  Seq.filter_map
                    (fun w2 ->
                      let result = compare_words ~ref:(word_ref, set_ref) w2 in
                      Option.map Validity.index_of_result result)
                    (Wordlist.words words));
              let entropy = E.get_entropy arr in

              (* If we get more information that we had, use this word.

                 If this word does not add more informations that we already
                 had, choose it anyway if it belongs to the final list (it gives
                 a small chance to pick the right one…) *)
              let is_better, already_picked =
                if entropy > score then (true, false)
                else if
                  (not already_picked) && entropy == score
                  && Wordlist.mem word_ref words
                then (true, true)
                else (false, already_picked)
              in

              if is_better then (
                Printf.printf "Q. of information when selecting %s : %f\n"
                  word_ref entropy;
                (entropy, word_ref, already_picked))
              else (score, word, already_picked))
            (-0., v, false) (Wordlist.words catalog)
        in
        (score, word)