aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSébastien Dailly <sebastien@dailly.me>2022-02-25 19:15:29 +0100
committerSébastien Dailly <sebastien@dailly.me>2022-02-25 19:15:29 +0100
commit4eff667b92ff7ef4c3542650509c03fb0de5cbce (patch)
tree6a6180d355d0ab78626ad5988c5203ee20344187
parent95432043550bd4a41b4466395502bc3b748e6746 (diff)
Added an another engine for motus, using entropy instead of frequencies only
-rw-r--r--motus/bin/motus.ml11
-rw-r--r--motus/js/initialize.ml16
-rw-r--r--motus/js/next.ml10
-rw-r--r--motus/js/reload.ml2
-rw-r--r--motus/lib/entropy.ml55
-rw-r--r--motus/lib/validity.ml41
-rw-r--r--motus/lib/validity.mli7
-rw-r--r--motus/lib/wordlist.ml2
-rw-r--r--motus/lib/wordlist.mli2
-rw-r--r--motus/test/motus_test.ml10
10 files changed, 121 insertions, 35 deletions
diff --git a/motus/bin/motus.ml b/motus/bin/motus.ml
index e32adca..a8a9188 100644
--- a/motus/bin/motus.ml
+++ b/motus/bin/motus.ml
@@ -69,11 +69,14 @@ let create_new_rules word result =
!rules
-let rec run filters words =
+let rec run len filters words =
let () = show_structure Format.std_formatter words filters in
+ (*
let next =
Freq_analysis.analyse words |> Freq_analysis.pick_next_word words
in
+ *)
+ let _, next = Entropy.analyse len words in
let () =
Format.fprintf Format.std_formatter "Next word will be : %s@\n" next
@@ -83,14 +86,14 @@ let rec run filters words =
(* if the input is empty, remove the word from the list and restart *)
match String.equal String.empty input with
- | true -> run filters (Wordlist.remove_word words next)
+ | true -> run len filters (Wordlist.remove_word words next)
| false ->
let new_rules =
Criteria.merge_lists ~init:filters (create_new_rules next input)
|> List.sort_uniq Stdlib.compare
in
let words = Wordlist.filter new_rules words in
- run new_rules words
+ run len new_rules words
let init_rule rules word =
@@ -119,4 +122,4 @@ let () =
let words_channel = open_in (List.hd !dict) in
let words = get_list words_channel initial_filter in
close_in words_channel;
- run initial_filter words
+ run !length initial_filter words
diff --git a/motus/js/initialize.ml b/motus/js/initialize.ml
index a088a0e..9d07a85 100644
--- a/motus/js/initialize.ml
+++ b/motus/js/initialize.ml
@@ -17,11 +17,17 @@ type t =
The rule list is used to identify the letter already fixed by the previous
results. *)
-let get_proposition : Wordlist.t -> Criteria.t list -> State.proposition option
- =
- fun analysis rules ->
+let get_proposition :
+ int -> Wordlist.t -> Criteria.t list -> State.proposition option =
+ fun length wordlist rules ->
+ Printf.printf "Number of elements : %d\n" (Wordlist.list_size wordlist);
+
let word =
- Freq_analysis.analyse analysis |> Freq_analysis.pick_next_word analysis
+ if Wordlist.list_size wordlist > 2000
+ then Freq_analysis.analyse wordlist |> Freq_analysis.pick_next_word wordlist
+ else
+ let _, word = Entropy.analyse length wordlist in
+ word
in
match String.equal String.empty word with
| true -> None
@@ -63,7 +69,7 @@ let process { sender; length; content; proposition } state =
|> Wordlist.add_words rules
in
- let current_prop = get_proposition analysis rules
+ let current_prop = get_proposition length analysis rules
and fields = FieldList.make length sender in
( match current_prop with
| None -> state
diff --git a/motus/js/next.ml b/motus/js/next.ml
index 104b3e6..f3bb2fe 100644
--- a/motus/js/next.ml
+++ b/motus/js/next.ml
@@ -6,24 +6,24 @@ type t = unit
let process : t -> State.state -> State.state =
fun () state ->
- (* Add the current proposition into the validated list *)
+ (* Get the news rules (and match the words only against the new ones )*)
let rules = State.get_current_rules state.current_prop in
+ (* Update the word list with the new rules *)
+ let analysis = Motus_lib.Wordlist.filter rules state.analysis in
+
let rules =
Motus_lib.Criteria.merge_lists ~init:state.rules rules
|> List.sort_uniq ~cmp:Stdlib.compare
in
- (* Update the word list with the new rules *)
- let analysis = Motus_lib.Wordlist.filter rules state.analysis in
-
let propositions = state.current_prop :: state.propositions
and current_prop = [] in
let new_state = { state with propositions; current_prop; rules; analysis } in
(* Get the new proposition if any *)
- let current_prop = Initialize.get_proposition analysis rules in
+ let current_prop = Initialize.get_proposition state.length analysis rules in
match current_prop with
| None -> new_state
| Some prop ->
diff --git a/motus/js/reload.ml b/motus/js/reload.ml
index 2756b74..f0b581f 100644
--- a/motus/js/reload.ml
+++ b/motus/js/reload.ml
@@ -22,7 +22,7 @@ let process : t -> State.state -> State.state =
in
(* Get the new proposition if any *)
let current_prop =
- Initialize.get_proposition new_state.analysis state.rules
+ Initialize.get_proposition state.length new_state.analysis state.rules
in
match current_prop with
| None -> new_state
diff --git a/motus/lib/entropy.ml b/motus/lib/entropy.ml
new file mode 100644
index 0000000..5b86a9d
--- /dev/null
+++ b/motus/lib/entropy.ml
@@ -0,0 +1,55 @@
+type t = float * string
+
+let get_entropy max_element words_number arr =
+ let entropy = ref 0. in
+ for idx = 0 to max_element - 1 do
+ let content = Float.of_int (Bigarray.Array1.get arr idx) in
+ if content > 0.
+ then
+ let ratio = content /. words_number in
+ entropy := !entropy -. (ratio *. Float.log2 ratio)
+ done;
+ entropy
+
+
+let analyse : int -> Wordlist.t -> t =
+ fun base words ->
+ let max_element = Float.to_int @@ (Validity.elements ** Float.of_int base) in
+ let words_number = Float.of_int (Wordlist.list_size words) in
+
+ match Wordlist.pick words with
+ | None -> (0., "")
+ | Some v ->
+ (* Build the array *)
+ Seq.fold_left
+ (fun (score, word) word_ref ->
+ (* Reinitialize the array (we use the same in the successive
+ iterations *)
+ let set_ref = String.to_seq word_ref |> Validity.CharSet.of_seq in
+
+ let arr =
+ Bigarray.Array1.create Bigarray.Int Bigarray.C_layout max_element
+ in
+
+ Seq.iter
+ (fun w2 ->
+ let result = Validity.compare_words ~ref:(word_ref, set_ref) w2 in
+ match result with
+ | None -> ()
+ | Some r ->
+ let idx = Validity.index_of_result r in
+
+ let content = Bigarray.Array1.get arr idx in
+ Bigarray.Array1.set arr idx (succ content) )
+ (Wordlist.words words);
+
+ (* Now evaluate the entropy in the array *)
+ let entropy = get_entropy max_element words_number arr in
+
+ if !entropy > score
+ then (
+ Printf.printf "Entropy for selecting %s : %.2f\n" word_ref !entropy;
+ (!entropy, word_ref) )
+ else (score, word) )
+ (0., v)
+ (Wordlist.words words)
diff --git a/motus/lib/validity.ml b/motus/lib/validity.ml
index ae85f04..0fdc40c 100644
--- a/motus/lib/validity.ml
+++ b/motus/lib/validity.ml
@@ -12,25 +12,28 @@ type t =
[@@@warning "+32"]
-let m = Float.of_int (1 + max)
+let elements = Float.of_int (1 + max)
(** Get the index of a validity result *)
let index_of_result : t array -> int =
fun elems ->
- let _, value =
- Array.fold_left elems ~init:(0., 0.) ~f:(fun (pos, acc) content ->
- let v = Float.of_int (to_enum content) in
- let acc' = acc +. (v *. (m ** pos)) in
- (pos +. Float.one, acc') )
- in
- Float.to_int value
+ (* This seems to be more efficient than a foldLeft *)
+ let value = ref 0. in
+ Array.iteri
+ ~f:(fun pos content ->
+ let pos = Float.of_int pos in
+ let v = Float.of_int (to_enum content) in
+ let acc' = !value +. (v *. (elements ** pos)) in
+ value := acc' )
+ elems;
+ Float.to_int !value
let index_to_result : base:int -> int -> t array =
fun ~base n ->
let rec _f acc n i =
- let next = Float.round (n /. m)
- and rem = Float.(to_int @@ rem n m) in
+ let next = Float.round (n /. elements)
+ and rem = Float.(to_int @@ rem n elements) in
match (rem, i) with
| _, 0 -> Array.of_list acc
@@ -45,29 +48,33 @@ let index_to_result : base:int -> int -> t array =
(** Build a sequence of all the possible status for a given number of letters *)
let sequence : int -> t array Seq.t =
fun base ->
- let max_element = Float.to_int @@ (m ** Float.of_int base) in
+ let max_element = Float.to_int @@ (elements ** Float.of_int base) in
Seq.unfold
(fun n ->
- if n < max_element then Some (index_to_result ~base n, n + 1) else None )
+ if n < max_element then Some (index_to_result ~base n, succ n) else None
+ )
0
-let compare_words : string -> ref:string -> t array option =
+module CharSet = Set.Make (Char)
+
+let compare_words : string -> ref:string * CharSet.t -> t array option =
fun w1 ~ref ->
+ let wordRef = fst ref in
let l1 = String.length w1 in
- if l1 <> String.length ref
+ if l1 <> String.length wordRef
then None
else
let result =
Array.init l1 ~f:(fun i ->
let c1 = String.get w1 i
- and c2 = String.get ref i in
+ and c2 = String.get wordRef i in
let state =
if Char.equal c1 c2
then Wellplaced
- else if String.contains ref c1
+ else if CharSet.mem c2 (snd ref)
then Misplaced
else Missing
in
@@ -92,7 +99,7 @@ let to_criterias : string -> t array -> Criteria.t list =
let l, _ =
Array.fold_left t ~init:([], 0) ~f:(fun (acc, i) t ->
let acc = to_criteria (String.get word i) i t acc in
- (acc, i + 1) )
+ (acc, succ i) )
in
List.rev l
diff --git a/motus/lib/validity.mli b/motus/lib/validity.mli
index dfd876c..a3d8ae3 100644
--- a/motus/lib/validity.mli
+++ b/motus/lib/validity.mli
@@ -1,8 +1,13 @@
+module CharSet : Set.S with type elt = char
+
type t =
| Wellplaced
| Misplaced
| Missing
+val elements : float
+(** Number of elements in the sum type *)
+
val sequence : int -> t array Seq.t
(** Build a sequence of all the possible status for a given number of letters *)
@@ -11,7 +16,7 @@ val index_of_result : t array -> int
val index_to_result : base:int -> int -> t array
-val compare_words : string -> ref:string -> t array option
+val compare_words : string -> ref:string * CharSet.t -> t array option
val to_criteria : char -> int -> t -> Criteria.t list -> Criteria.t list
diff --git a/motus/lib/wordlist.ml b/motus/lib/wordlist.ml
index 7c400bb..4a84ab0 100644
--- a/motus/lib/wordlist.ml
+++ b/motus/lib/wordlist.ml
@@ -23,3 +23,5 @@ let words = S.to_seq
let list_size = S.cardinal
let remove_word t w = S.remove w t
+
+let pick = S.choose_opt
diff --git a/motus/lib/wordlist.mli b/motus/lib/wordlist.mli
index a56cab3..86881f2 100644
--- a/motus/lib/wordlist.mli
+++ b/motus/lib/wordlist.mli
@@ -9,6 +9,8 @@ val empty_data : unit -> t
val words : t -> string Seq.t
(** Load all the words *)
+val pick : t -> string option
+
val list_size : t -> int
(** Number of words in the list *)
diff --git a/motus/test/motus_test.ml b/motus/test/motus_test.ml
index 4ea952e..0586ffe 100644
--- a/motus/test/motus_test.ml
+++ b/motus/test/motus_test.ml
@@ -37,14 +37,20 @@ let tests =
assert_equal 243 count )
; ( "Compare words 1"
>:: fun _ ->
+ let w = "Test" in
+
+ let ref = (w, Validity.CharSet.of_seq (String.to_seq w)) in
+
assert_equal
(Some Validity.[| Wellplaced; Wellplaced; Wellplaced; Wellplaced |])
- (Validity.compare_words "Test" ~ref:"Test") )
+ (Validity.compare_words "Test" ~ref) )
; ( "Compare words 2"
>:: fun _ ->
+ let w = "ABC" in
+ let ref = (w, Validity.CharSet.of_seq (String.to_seq w)) in
assert_equal
(Some Validity.[| Missing; Misplaced; Wellplaced |])
- (Validity.compare_words "DAC" ~ref:"ABC") )
+ (Validity.compare_words "DAC" ~ref) )
]