From 4eff667b92ff7ef4c3542650509c03fb0de5cbce Mon Sep 17 00:00:00 2001 From: Sébastien Dailly Date: Fri, 25 Feb 2022 19:15:29 +0100 Subject: Added an another engine for motus, using entropy instead of frequencies only --- motus/bin/motus.ml | 11 ++++++---- motus/js/initialize.ml | 16 +++++++++----- motus/js/next.ml | 10 ++++----- motus/js/reload.ml | 2 +- motus/lib/entropy.ml | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ motus/lib/validity.ml | 41 +++++++++++++++++++++--------------- motus/lib/validity.mli | 7 +++++- motus/lib/wordlist.ml | 2 ++ motus/lib/wordlist.mli | 2 ++ motus/test/motus_test.ml | 10 +++++++-- 10 files changed, 121 insertions(+), 35 deletions(-) create mode 100644 motus/lib/entropy.ml diff --git a/motus/bin/motus.ml b/motus/bin/motus.ml index e32adca..a8a9188 100644 --- a/motus/bin/motus.ml +++ b/motus/bin/motus.ml @@ -69,11 +69,14 @@ let create_new_rules word result = !rules -let rec run filters words = +let rec run len filters words = let () = show_structure Format.std_formatter words filters in + (* let next = Freq_analysis.analyse words |> Freq_analysis.pick_next_word words in + *) + let _, next = Entropy.analyse len words in let () = Format.fprintf Format.std_formatter "Next word will be : %s@\n" next @@ -83,14 +86,14 @@ let rec run filters words = (* if the input is empty, remove the word from the list and restart *) match String.equal String.empty input with - | true -> run filters (Wordlist.remove_word words next) + | true -> run len filters (Wordlist.remove_word words next) | false -> let new_rules = Criteria.merge_lists ~init:filters (create_new_rules next input) |> List.sort_uniq Stdlib.compare in let words = Wordlist.filter new_rules words in - run new_rules words + run len new_rules words let init_rule rules word = @@ -119,4 +122,4 @@ let () = let words_channel = open_in (List.hd !dict) in let words = get_list words_channel initial_filter in close_in words_channel; - run initial_filter words + run !length initial_filter words diff --git a/motus/js/initialize.ml b/motus/js/initialize.ml index a088a0e..9d07a85 100644 --- a/motus/js/initialize.ml +++ b/motus/js/initialize.ml @@ -17,11 +17,17 @@ type t = The rule list is used to identify the letter already fixed by the previous results. *) -let get_proposition : Wordlist.t -> Criteria.t list -> State.proposition option - = - fun analysis rules -> +let get_proposition : + int -> Wordlist.t -> Criteria.t list -> State.proposition option = + fun length wordlist rules -> + Printf.printf "Number of elements : %d\n" (Wordlist.list_size wordlist); + let word = - Freq_analysis.analyse analysis |> Freq_analysis.pick_next_word analysis + if Wordlist.list_size wordlist > 2000 + then Freq_analysis.analyse wordlist |> Freq_analysis.pick_next_word wordlist + else + let _, word = Entropy.analyse length wordlist in + word in match String.equal String.empty word with | true -> None @@ -63,7 +69,7 @@ let process { sender; length; content; proposition } state = |> Wordlist.add_words rules in - let current_prop = get_proposition analysis rules + let current_prop = get_proposition length analysis rules and fields = FieldList.make length sender in ( match current_prop with | None -> state diff --git a/motus/js/next.ml b/motus/js/next.ml index 104b3e6..f3bb2fe 100644 --- a/motus/js/next.ml +++ b/motus/js/next.ml @@ -6,24 +6,24 @@ type t = unit let process : t -> State.state -> State.state = fun () state -> - (* Add the current proposition into the validated list *) + (* Get the news rules (and match the words only against the new ones )*) let rules = State.get_current_rules state.current_prop in + (* Update the word list with the new rules *) + let analysis = Motus_lib.Wordlist.filter rules state.analysis in + let rules = Motus_lib.Criteria.merge_lists ~init:state.rules rules |> List.sort_uniq ~cmp:Stdlib.compare in - (* Update the word list with the new rules *) - let analysis = Motus_lib.Wordlist.filter rules state.analysis in - let propositions = state.current_prop :: state.propositions and current_prop = [] in let new_state = { state with propositions; current_prop; rules; analysis } in (* Get the new proposition if any *) - let current_prop = Initialize.get_proposition analysis rules in + let current_prop = Initialize.get_proposition state.length analysis rules in match current_prop with | None -> new_state | Some prop -> diff --git a/motus/js/reload.ml b/motus/js/reload.ml index 2756b74..f0b581f 100644 --- a/motus/js/reload.ml +++ b/motus/js/reload.ml @@ -22,7 +22,7 @@ let process : t -> State.state -> State.state = in (* Get the new proposition if any *) let current_prop = - Initialize.get_proposition new_state.analysis state.rules + Initialize.get_proposition state.length new_state.analysis state.rules in match current_prop with | None -> new_state diff --git a/motus/lib/entropy.ml b/motus/lib/entropy.ml new file mode 100644 index 0000000..5b86a9d --- /dev/null +++ b/motus/lib/entropy.ml @@ -0,0 +1,55 @@ +type t = float * string + +let get_entropy max_element words_number arr = + let entropy = ref 0. in + for idx = 0 to max_element - 1 do + let content = Float.of_int (Bigarray.Array1.get arr idx) in + if content > 0. + then + let ratio = content /. words_number in + entropy := !entropy -. (ratio *. Float.log2 ratio) + done; + entropy + + +let analyse : int -> Wordlist.t -> t = + fun base words -> + let max_element = Float.to_int @@ (Validity.elements ** Float.of_int base) in + let words_number = Float.of_int (Wordlist.list_size words) in + + match Wordlist.pick words with + | None -> (0., "") + | Some v -> + (* Build the array *) + Seq.fold_left + (fun (score, word) word_ref -> + (* Reinitialize the array (we use the same in the successive + iterations *) + let set_ref = String.to_seq word_ref |> Validity.CharSet.of_seq in + + let arr = + Bigarray.Array1.create Bigarray.Int Bigarray.C_layout max_element + in + + Seq.iter + (fun w2 -> + let result = Validity.compare_words ~ref:(word_ref, set_ref) w2 in + match result with + | None -> () + | Some r -> + let idx = Validity.index_of_result r in + + let content = Bigarray.Array1.get arr idx in + Bigarray.Array1.set arr idx (succ content) ) + (Wordlist.words words); + + (* Now evaluate the entropy in the array *) + let entropy = get_entropy max_element words_number arr in + + if !entropy > score + then ( + Printf.printf "Entropy for selecting %s : %.2f\n" word_ref !entropy; + (!entropy, word_ref) ) + else (score, word) ) + (0., v) + (Wordlist.words words) diff --git a/motus/lib/validity.ml b/motus/lib/validity.ml index ae85f04..0fdc40c 100644 --- a/motus/lib/validity.ml +++ b/motus/lib/validity.ml @@ -12,25 +12,28 @@ type t = [@@@warning "+32"] -let m = Float.of_int (1 + max) +let elements = Float.of_int (1 + max) (** Get the index of a validity result *) let index_of_result : t array -> int = fun elems -> - let _, value = - Array.fold_left elems ~init:(0., 0.) ~f:(fun (pos, acc) content -> - let v = Float.of_int (to_enum content) in - let acc' = acc +. (v *. (m ** pos)) in - (pos +. Float.one, acc') ) - in - Float.to_int value + (* This seems to be more efficient than a foldLeft *) + let value = ref 0. in + Array.iteri + ~f:(fun pos content -> + let pos = Float.of_int pos in + let v = Float.of_int (to_enum content) in + let acc' = !value +. (v *. (elements ** pos)) in + value := acc' ) + elems; + Float.to_int !value let index_to_result : base:int -> int -> t array = fun ~base n -> let rec _f acc n i = - let next = Float.round (n /. m) - and rem = Float.(to_int @@ rem n m) in + let next = Float.round (n /. elements) + and rem = Float.(to_int @@ rem n elements) in match (rem, i) with | _, 0 -> Array.of_list acc @@ -45,29 +48,33 @@ let index_to_result : base:int -> int -> t array = (** Build a sequence of all the possible status for a given number of letters *) let sequence : int -> t array Seq.t = fun base -> - let max_element = Float.to_int @@ (m ** Float.of_int base) in + let max_element = Float.to_int @@ (elements ** Float.of_int base) in Seq.unfold (fun n -> - if n < max_element then Some (index_to_result ~base n, n + 1) else None ) + if n < max_element then Some (index_to_result ~base n, succ n) else None + ) 0 -let compare_words : string -> ref:string -> t array option = +module CharSet = Set.Make (Char) + +let compare_words : string -> ref:string * CharSet.t -> t array option = fun w1 ~ref -> + let wordRef = fst ref in let l1 = String.length w1 in - if l1 <> String.length ref + if l1 <> String.length wordRef then None else let result = Array.init l1 ~f:(fun i -> let c1 = String.get w1 i - and c2 = String.get ref i in + and c2 = String.get wordRef i in let state = if Char.equal c1 c2 then Wellplaced - else if String.contains ref c1 + else if CharSet.mem c2 (snd ref) then Misplaced else Missing in @@ -92,7 +99,7 @@ let to_criterias : string -> t array -> Criteria.t list = let l, _ = Array.fold_left t ~init:([], 0) ~f:(fun (acc, i) t -> let acc = to_criteria (String.get word i) i t acc in - (acc, i + 1) ) + (acc, succ i) ) in List.rev l diff --git a/motus/lib/validity.mli b/motus/lib/validity.mli index dfd876c..a3d8ae3 100644 --- a/motus/lib/validity.mli +++ b/motus/lib/validity.mli @@ -1,8 +1,13 @@ +module CharSet : Set.S with type elt = char + type t = | Wellplaced | Misplaced | Missing +val elements : float +(** Number of elements in the sum type *) + val sequence : int -> t array Seq.t (** Build a sequence of all the possible status for a given number of letters *) @@ -11,7 +16,7 @@ val index_of_result : t array -> int val index_to_result : base:int -> int -> t array -val compare_words : string -> ref:string -> t array option +val compare_words : string -> ref:string * CharSet.t -> t array option val to_criteria : char -> int -> t -> Criteria.t list -> Criteria.t list diff --git a/motus/lib/wordlist.ml b/motus/lib/wordlist.ml index 7c400bb..4a84ab0 100644 --- a/motus/lib/wordlist.ml +++ b/motus/lib/wordlist.ml @@ -23,3 +23,5 @@ let words = S.to_seq let list_size = S.cardinal let remove_word t w = S.remove w t + +let pick = S.choose_opt diff --git a/motus/lib/wordlist.mli b/motus/lib/wordlist.mli index a56cab3..86881f2 100644 --- a/motus/lib/wordlist.mli +++ b/motus/lib/wordlist.mli @@ -9,6 +9,8 @@ val empty_data : unit -> t val words : t -> string Seq.t (** Load all the words *) +val pick : t -> string option + val list_size : t -> int (** Number of words in the list *) diff --git a/motus/test/motus_test.ml b/motus/test/motus_test.ml index 4ea952e..0586ffe 100644 --- a/motus/test/motus_test.ml +++ b/motus/test/motus_test.ml @@ -37,14 +37,20 @@ let tests = assert_equal 243 count ) ; ( "Compare words 1" >:: fun _ -> + let w = "Test" in + + let ref = (w, Validity.CharSet.of_seq (String.to_seq w)) in + assert_equal (Some Validity.[| Wellplaced; Wellplaced; Wellplaced; Wellplaced |]) - (Validity.compare_words "Test" ~ref:"Test") ) + (Validity.compare_words "Test" ~ref) ) ; ( "Compare words 2" >:: fun _ -> + let w = "ABC" in + let ref = (w, Validity.CharSet.of_seq (String.to_seq w)) in assert_equal (Some Validity.[| Missing; Misplaced; Wellplaced |]) - (Validity.compare_words "DAC" ~ref:"ABC") ) + (Validity.compare_words "DAC" ~ref) ) ] -- cgit v1.2.3