blob: 043d0c889d5a8de67501bce39de5a6b9d7434bcb (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
open StdLabels
type t = float * string
module CharSet = Set.Make (Char)
let compare_words : string -> ref:string * CharSet.t -> Validity.t array option
=
fun w1 ~ref ->
let wordRef = fst ref in
let l1 = String.length w1 in
if l1 <> String.length wordRef
then None
else
let result =
Array.init l1 ~f:(fun i ->
let c1 = String.get w1 i
and c2 = String.get wordRef i in
let state =
if Char.equal c1 c2
then Validity.Wellplaced
else if CharSet.mem c1 (snd ref)
then Validity.Misplaced
else Validity.Missing
in
state )
in
Some result
let get_entropy max_element words_number arr =
let entropy = ref 0. in
for idx = 0 to max_element - 1 do
let content = Float.of_int (Bigarray.Array1.get arr idx) in
if content > 0.
then
let ratio = content /. words_number in
entropy := !entropy -. (ratio *. Float.log2 ratio)
done;
entropy
let analyse : int -> catalog:Wordlist.t -> Wordlist.t -> t =
fun base ~catalog words ->
let max_element = Float.to_int @@ (Validity.elements ** Float.of_int base) in
let words_number = Float.of_int (Wordlist.list_size catalog) in
let arr = Bigarray.Array1.create Bigarray.Int Bigarray.C_layout max_element in
match Wordlist.pick words with
| None -> (0., "")
| Some v ->
(* Build the array *)
Seq.fold_left
(fun (score, word) word_ref ->
(* Reinitialize the array (we use the same in the successive
iterations *)
let set_ref = String.to_seq word_ref |> CharSet.of_seq in
Bigarray.Array1.fill arr 0;
Seq.iter
(fun w2 ->
let result = compare_words ~ref:(word_ref, set_ref) w2 in
match result with
| None -> ()
| Some r ->
let idx = Validity.index_of_result r in
let content = Bigarray.Array1.get arr idx in
Bigarray.Array1.set arr idx (succ content) )
(Wordlist.words words);
(* Now evaluate the entropy in the array *)
let entropy = get_entropy max_element words_number arr in
if !entropy > score
then (
Printf.printf "Entropy for selecting %s : %.2f\n" word_ref !entropy;
(!entropy, word_ref) )
else (score, word) )
(-0., v)
(Wordlist.words catalog)
|