diff options
Diffstat (limited to 'motus/lib/entropy.ml')
-rw-r--r-- | motus/lib/entropy.ml | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/motus/lib/entropy.ml b/motus/lib/entropy.ml new file mode 100644 index 0000000..5b86a9d --- /dev/null +++ b/motus/lib/entropy.ml @@ -0,0 +1,55 @@ +type t = float * string + +let get_entropy max_element words_number arr = + let entropy = ref 0. in + for idx = 0 to max_element - 1 do + let content = Float.of_int (Bigarray.Array1.get arr idx) in + if content > 0. + then + let ratio = content /. words_number in + entropy := !entropy -. (ratio *. Float.log2 ratio) + done; + entropy + + +let analyse : int -> Wordlist.t -> t = + fun base words -> + let max_element = Float.to_int @@ (Validity.elements ** Float.of_int base) in + let words_number = Float.of_int (Wordlist.list_size words) in + + match Wordlist.pick words with + | None -> (0., "") + | Some v -> + (* Build the array *) + Seq.fold_left + (fun (score, word) word_ref -> + (* Reinitialize the array (we use the same in the successive + iterations *) + let set_ref = String.to_seq word_ref |> Validity.CharSet.of_seq in + + let arr = + Bigarray.Array1.create Bigarray.Int Bigarray.C_layout max_element + in + + Seq.iter + (fun w2 -> + let result = Validity.compare_words ~ref:(word_ref, set_ref) w2 in + match result with + | None -> () + | Some r -> + let idx = Validity.index_of_result r in + + let content = Bigarray.Array1.get arr idx in + Bigarray.Array1.set arr idx (succ content) ) + (Wordlist.words words); + + (* Now evaluate the entropy in the array *) + let entropy = get_entropy max_element words_number arr in + + if !entropy > score + then ( + Printf.printf "Entropy for selecting %s : %.2f\n" word_ref !entropy; + (!entropy, word_ref) ) + else (score, word) ) + (0., v) + (Wordlist.words words) |