From 4eff667b92ff7ef4c3542650509c03fb0de5cbce Mon Sep 17 00:00:00 2001 From: Sébastien Dailly Date: Fri, 25 Feb 2022 19:15:29 +0100 Subject: Added an another engine for motus, using entropy instead of frequencies only --- motus/lib/entropy.ml | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++ motus/lib/validity.ml | 41 +++++++++++++++++++++---------------- motus/lib/validity.mli | 7 ++++++- motus/lib/wordlist.ml | 2 ++ motus/lib/wordlist.mli | 2 ++ 5 files changed, 89 insertions(+), 18 deletions(-) create mode 100644 motus/lib/entropy.ml (limited to 'motus/lib') diff --git a/motus/lib/entropy.ml b/motus/lib/entropy.ml new file mode 100644 index 0000000..5b86a9d --- /dev/null +++ b/motus/lib/entropy.ml @@ -0,0 +1,55 @@ +type t = float * string + +let get_entropy max_element words_number arr = + let entropy = ref 0. in + for idx = 0 to max_element - 1 do + let content = Float.of_int (Bigarray.Array1.get arr idx) in + if content > 0. + then + let ratio = content /. words_number in + entropy := !entropy -. (ratio *. Float.log2 ratio) + done; + entropy + + +let analyse : int -> Wordlist.t -> t = + fun base words -> + let max_element = Float.to_int @@ (Validity.elements ** Float.of_int base) in + let words_number = Float.of_int (Wordlist.list_size words) in + + match Wordlist.pick words with + | None -> (0., "") + | Some v -> + (* Build the array *) + Seq.fold_left + (fun (score, word) word_ref -> + (* Reinitialize the array (we use the same in the successive + iterations *) + let set_ref = String.to_seq word_ref |> Validity.CharSet.of_seq in + + let arr = + Bigarray.Array1.create Bigarray.Int Bigarray.C_layout max_element + in + + Seq.iter + (fun w2 -> + let result = Validity.compare_words ~ref:(word_ref, set_ref) w2 in + match result with + | None -> () + | Some r -> + let idx = Validity.index_of_result r in + + let content = Bigarray.Array1.get arr idx in + Bigarray.Array1.set arr idx (succ content) ) + (Wordlist.words words); + + (* Now evaluate the entropy in the array *) + let entropy = get_entropy max_element words_number arr in + + if !entropy > score + then ( + Printf.printf "Entropy for selecting %s : %.2f\n" word_ref !entropy; + (!entropy, word_ref) ) + else (score, word) ) + (0., v) + (Wordlist.words words) diff --git a/motus/lib/validity.ml b/motus/lib/validity.ml index ae85f04..0fdc40c 100644 --- a/motus/lib/validity.ml +++ b/motus/lib/validity.ml @@ -12,25 +12,28 @@ type t = [@@@warning "+32"] -let m = Float.of_int (1 + max) +let elements = Float.of_int (1 + max) (** Get the index of a validity result *) let index_of_result : t array -> int = fun elems -> - let _, value = - Array.fold_left elems ~init:(0., 0.) ~f:(fun (pos, acc) content -> - let v = Float.of_int (to_enum content) in - let acc' = acc +. (v *. (m ** pos)) in - (pos +. Float.one, acc') ) - in - Float.to_int value + (* This seems to be more efficient than a foldLeft *) + let value = ref 0. in + Array.iteri + ~f:(fun pos content -> + let pos = Float.of_int pos in + let v = Float.of_int (to_enum content) in + let acc' = !value +. (v *. (elements ** pos)) in + value := acc' ) + elems; + Float.to_int !value let index_to_result : base:int -> int -> t array = fun ~base n -> let rec _f acc n i = - let next = Float.round (n /. m) - and rem = Float.(to_int @@ rem n m) in + let next = Float.round (n /. elements) + and rem = Float.(to_int @@ rem n elements) in match (rem, i) with | _, 0 -> Array.of_list acc @@ -45,29 +48,33 @@ let index_to_result : base:int -> int -> t array = (** Build a sequence of all the possible status for a given number of letters *) let sequence : int -> t array Seq.t = fun base -> - let max_element = Float.to_int @@ (m ** Float.of_int base) in + let max_element = Float.to_int @@ (elements ** Float.of_int base) in Seq.unfold (fun n -> - if n < max_element then Some (index_to_result ~base n, n + 1) else None ) + if n < max_element then Some (index_to_result ~base n, succ n) else None + ) 0 -let compare_words : string -> ref:string -> t array option = +module CharSet = Set.Make (Char) + +let compare_words : string -> ref:string * CharSet.t -> t array option = fun w1 ~ref -> + let wordRef = fst ref in let l1 = String.length w1 in - if l1 <> String.length ref + if l1 <> String.length wordRef then None else let result = Array.init l1 ~f:(fun i -> let c1 = String.get w1 i - and c2 = String.get ref i in + and c2 = String.get wordRef i in let state = if Char.equal c1 c2 then Wellplaced - else if String.contains ref c1 + else if CharSet.mem c2 (snd ref) then Misplaced else Missing in @@ -92,7 +99,7 @@ let to_criterias : string -> t array -> Criteria.t list = let l, _ = Array.fold_left t ~init:([], 0) ~f:(fun (acc, i) t -> let acc = to_criteria (String.get word i) i t acc in - (acc, i + 1) ) + (acc, succ i) ) in List.rev l diff --git a/motus/lib/validity.mli b/motus/lib/validity.mli index dfd876c..a3d8ae3 100644 --- a/motus/lib/validity.mli +++ b/motus/lib/validity.mli @@ -1,8 +1,13 @@ +module CharSet : Set.S with type elt = char + type t = | Wellplaced | Misplaced | Missing +val elements : float +(** Number of elements in the sum type *) + val sequence : int -> t array Seq.t (** Build a sequence of all the possible status for a given number of letters *) @@ -11,7 +16,7 @@ val index_of_result : t array -> int val index_to_result : base:int -> int -> t array -val compare_words : string -> ref:string -> t array option +val compare_words : string -> ref:string * CharSet.t -> t array option val to_criteria : char -> int -> t -> Criteria.t list -> Criteria.t list diff --git a/motus/lib/wordlist.ml b/motus/lib/wordlist.ml index 7c400bb..4a84ab0 100644 --- a/motus/lib/wordlist.ml +++ b/motus/lib/wordlist.ml @@ -23,3 +23,5 @@ let words = S.to_seq let list_size = S.cardinal let remove_word t w = S.remove w t + +let pick = S.choose_opt diff --git a/motus/lib/wordlist.mli b/motus/lib/wordlist.mli index a56cab3..86881f2 100644 --- a/motus/lib/wordlist.mli +++ b/motus/lib/wordlist.mli @@ -9,6 +9,8 @@ val empty_data : unit -> t val words : t -> string Seq.t (** Load all the words *) +val pick : t -> string option + val list_size : t -> int (** Number of words in the list *) -- cgit v1.2.3