From 3235260a3dca98e96ab50458a5daf3baf3f238d0 Mon Sep 17 00:00:00 2001 From: Sébastien Dailly Date: Sun, 27 Feb 2022 10:31:27 +0100 Subject: Do not evaluate the entropy when only 2 words are still presents --- motus/bin/motus.ml | 22 +++++++++++---- motus/js/initialize.ml | 7 +---- motus/lib/entropy.ml | 75 +++++++++++++++++++++++++++++--------------------- 3 files changed, 61 insertions(+), 43 deletions(-) (limited to 'motus') diff --git a/motus/bin/motus.ml b/motus/bin/motus.ml index 638e778..823a01b 100644 --- a/motus/bin/motus.ml +++ b/motus/bin/motus.ml @@ -69,14 +69,20 @@ let create_new_rules word result = !rules -let rec run len filters words = +type t = + { catalog : Wordlist.t + ; words : Wordlist.t + } + +let rec run : int -> Criteria.t list -> t -> unit = + fun len filters { catalog; words } -> let () = show_structure Format.std_formatter words filters in (* let next = Freq_analysis.analyse words |> Freq_analysis.pick_next_word words in *) - let _, next = Entropy.analyse len ~catalog:words words in + let _, next = Entropy.analyse len ~catalog words in let () = Format.fprintf Format.std_formatter "Next word will be : %s@\n" next @@ -86,14 +92,20 @@ let rec run len filters words = (* if the input is empty, remove the word from the list and restart *) match String.equal String.empty input with - | true -> run len filters (Wordlist.remove_word words next) + | true -> + let new_list = + { catalog = Wordlist.remove_word catalog next + ; words = Wordlist.remove_word words next + } + in + run len filters new_list | false -> let new_rules = Criteria.merge_lists ~init:filters (create_new_rules next input) |> List.sort_uniq Stdlib.compare in let words = Wordlist.filter new_rules words in - run len new_rules words + run len new_rules { catalog = Wordlist.remove_word catalog next; words } let init_rule rules word = @@ -122,4 +134,4 @@ let () = let words_channel = open_in (List.hd !dict) in let words = get_list words_channel initial_filter in close_in words_channel; - run !length initial_filter words + run !length initial_filter { catalog = words; words } diff --git a/motus/js/initialize.ml b/motus/js/initialize.ml index 9375880..b721343 100644 --- a/motus/js/initialize.ml +++ b/motus/js/initialize.ml @@ -28,14 +28,9 @@ let get_proposition : let elements = Wordlist.list_size wordlist in if elements > 2000 then Freq_analysis.analyse wordlist |> Freq_analysis.pick_next_word wordlist - else if elements > 1 - then + else let _, word = Entropy.analyse length ~catalog wordlist in word - else - match Wordlist.pick wordlist with - | Some w -> w - | None -> "" in match String.equal String.empty word with | true -> None diff --git a/motus/lib/entropy.ml b/motus/lib/entropy.ml index 043d0c8..aec5d86 100644 --- a/motus/lib/entropy.ml +++ b/motus/lib/entropy.ml @@ -43,41 +43,52 @@ let get_entropy max_element words_number arr = let analyse : int -> catalog:Wordlist.t -> Wordlist.t -> t = fun base ~catalog words -> - let max_element = Float.to_int @@ (Validity.elements ** Float.of_int base) in - let words_number = Float.of_int (Wordlist.list_size catalog) in + (* If we have only two elements, just pick one of them *) + if Wordlist.list_size words <= 2 + then (1.0, Option.get @@ Wordlist.pick words) + else + let max_element = + Float.to_int @@ (Validity.elements ** Float.of_int base) + in + let words_number = Float.of_int (Wordlist.list_size catalog) in - let arr = Bigarray.Array1.create Bigarray.Int Bigarray.C_layout max_element in - match Wordlist.pick words with - | None -> (0., "") - | Some v -> - (* Build the array *) - Seq.fold_left - (fun (score, word) word_ref -> - (* Reinitialize the array (we use the same in the successive - iterations *) - let set_ref = String.to_seq word_ref |> CharSet.of_seq in + let arr = + Bigarray.Array1.create Bigarray.Int Bigarray.C_layout max_element + in + match Wordlist.pick words with + | None -> (0., "") + | Some v -> + (* Build the array *) + Seq.fold_left + (fun (score, word) word_ref -> + (* Reinitialize the array (we use the same in the successive + iterations *) + let set_ref = String.to_seq word_ref |> CharSet.of_seq in - Bigarray.Array1.fill arr 0; + Bigarray.Array1.fill arr 0; - Seq.iter - (fun w2 -> - let result = compare_words ~ref:(word_ref, set_ref) w2 in - match result with - | None -> () - | Some r -> - let idx = Validity.index_of_result r in + Seq.iter + (fun w2 -> + let result = compare_words ~ref:(word_ref, set_ref) w2 in + match result with + | None -> () + | Some r -> + let idx = Validity.index_of_result r in - let content = Bigarray.Array1.get arr idx in - Bigarray.Array1.set arr idx (succ content) ) - (Wordlist.words words); + let content = Bigarray.Array1.get arr idx in + Bigarray.Array1.set arr idx (succ content) ) + (Wordlist.words words); - (* Now evaluate the entropy in the array *) - let entropy = get_entropy max_element words_number arr in + (* Now evaluate the entropy in the array *) + let entropy = get_entropy max_element words_number arr in - if !entropy > score - then ( - Printf.printf "Entropy for selecting %s : %.2f\n" word_ref !entropy; - (!entropy, word_ref) ) - else (score, word) ) - (-0., v) - (Wordlist.words catalog) + if !entropy > score + then ( + Printf.printf + "Entropy for selecting %s : %.2f\n" + word_ref + !entropy; + (!entropy, word_ref) ) + else (score, word) ) + (-0., v) + (Wordlist.words catalog) -- cgit v1.2.3