aboutsummaryrefslogtreecommitdiff
path: root/motus/lib/entropy.ml
blob: 5b86a9d41b9094f2a688dec9c7ab3d3de21c901c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
type t = float * string

let get_entropy max_element words_number arr =
  let entropy = ref 0. in
  for idx = 0 to max_element - 1 do
    let content = Float.of_int (Bigarray.Array1.get arr idx) in
    if content > 0.
    then
      let ratio = content /. words_number in
      entropy := !entropy -. (ratio *. Float.log2 ratio)
  done;
  entropy


let analyse : int -> Wordlist.t -> t =
 fun base words ->
  let max_element = Float.to_int @@ (Validity.elements ** Float.of_int base) in
  let words_number = Float.of_int (Wordlist.list_size words) in

  match Wordlist.pick words with
  | None -> (0., "")
  | Some v ->
      (* Build the array  *)
      Seq.fold_left
        (fun (score, word) word_ref ->
          (* Reinitialize the array (we use the same in the successive
             iterations *)
          let set_ref = String.to_seq word_ref |> Validity.CharSet.of_seq in

          let arr =
            Bigarray.Array1.create Bigarray.Int Bigarray.C_layout max_element
          in

          Seq.iter
            (fun w2 ->
              let result = Validity.compare_words ~ref:(word_ref, set_ref) w2 in
              match result with
              | None -> ()
              | Some r ->
                  let idx = Validity.index_of_result r in

                  let content = Bigarray.Array1.get arr idx in
                  Bigarray.Array1.set arr idx (succ content) )
            (Wordlist.words words);

          (* Now evaluate the entropy in the array *)
          let entropy = get_entropy max_element words_number arr in

          if !entropy > score
          then (
            Printf.printf "Entropy for selecting %s : %.2f\n" word_ref !entropy;
            (!entropy, word_ref) )
          else (score, word) )
        (0., v)
        (Wordlist.words words)