diff options
| author | Sébastien Dailly <sebastien@dailly.me> | 2022-02-25 19:15:29 +0100 | 
|---|---|---|
| committer | Sébastien Dailly <sebastien@dailly.me> | 2022-02-25 19:15:29 +0100 | 
| commit | 4eff667b92ff7ef4c3542650509c03fb0de5cbce (patch) | |
| tree | 6a6180d355d0ab78626ad5988c5203ee20344187 | |
| parent | 95432043550bd4a41b4466395502bc3b748e6746 (diff) | |
Added an another engine for motus, using entropy instead of frequencies only
| -rw-r--r-- | motus/bin/motus.ml | 11 | ||||
| -rw-r--r-- | motus/js/initialize.ml | 16 | ||||
| -rw-r--r-- | motus/js/next.ml | 10 | ||||
| -rw-r--r-- | motus/js/reload.ml | 2 | ||||
| -rw-r--r-- | motus/lib/entropy.ml | 55 | ||||
| -rw-r--r-- | motus/lib/validity.ml | 41 | ||||
| -rw-r--r-- | motus/lib/validity.mli | 7 | ||||
| -rw-r--r-- | motus/lib/wordlist.ml | 2 | ||||
| -rw-r--r-- | motus/lib/wordlist.mli | 2 | ||||
| -rw-r--r-- | motus/test/motus_test.ml | 10 | 
10 files changed, 121 insertions, 35 deletions
diff --git a/motus/bin/motus.ml b/motus/bin/motus.ml index e32adca..a8a9188 100644 --- a/motus/bin/motus.ml +++ b/motus/bin/motus.ml @@ -69,11 +69,14 @@ let create_new_rules word result =    !rules -let rec run filters words = +let rec run len filters words =    let () = show_structure Format.std_formatter words filters in +  (*    let next =      Freq_analysis.analyse words |> Freq_analysis.pick_next_word words    in +  *) +  let _, next = Entropy.analyse len words in    let () =      Format.fprintf Format.std_formatter "Next word will be : %s@\n" next @@ -83,14 +86,14 @@ let rec run filters words =    (* if the input is empty, remove the word from the list and restart *)    match String.equal String.empty input with -  | true -> run filters (Wordlist.remove_word words next) +  | true -> run len filters (Wordlist.remove_word words next)    | false ->        let new_rules =          Criteria.merge_lists ~init:filters (create_new_rules next input)          |> List.sort_uniq Stdlib.compare        in        let words = Wordlist.filter new_rules words in -      run new_rules words +      run len new_rules words  let init_rule rules word = @@ -119,4 +122,4 @@ let () =    let words_channel = open_in (List.hd !dict) in    let words = get_list words_channel initial_filter in    close_in words_channel; -  run initial_filter words +  run !length initial_filter words diff --git a/motus/js/initialize.ml b/motus/js/initialize.ml index a088a0e..9d07a85 100644 --- a/motus/js/initialize.ml +++ b/motus/js/initialize.ml @@ -17,11 +17,17 @@ type t =      The rule list is used to identify the letter already fixed by the previous      results. *) -let get_proposition : Wordlist.t -> Criteria.t list -> State.proposition option -    = - fun analysis rules -> +let get_proposition : +    int -> Wordlist.t -> Criteria.t list -> State.proposition option = + fun length wordlist rules -> +  Printf.printf "Number of elements : %d\n" (Wordlist.list_size wordlist); +    let word = -    Freq_analysis.analyse analysis |> Freq_analysis.pick_next_word analysis +    if Wordlist.list_size wordlist > 2000 +    then Freq_analysis.analyse wordlist |> Freq_analysis.pick_next_word wordlist +    else +      let _, word = Entropy.analyse length wordlist in +      word    in    match String.equal String.empty word with    | true -> None @@ -63,7 +69,7 @@ let process { sender; length; content; proposition } state =          |> Wordlist.add_words rules        in -      let current_prop = get_proposition analysis rules +      let current_prop = get_proposition length analysis rules        and fields = FieldList.make length sender in        ( match current_prop with        | None -> state diff --git a/motus/js/next.ml b/motus/js/next.ml index 104b3e6..f3bb2fe 100644 --- a/motus/js/next.ml +++ b/motus/js/next.ml @@ -6,24 +6,24 @@ type t = unit  let process : t -> State.state -> State.state =   fun () state -> -  (* Add the current proposition into the validated list *) +  (* Get the news rules (and match the words only against the new ones )*)    let rules = State.get_current_rules state.current_prop in +  (* Update the word list with the new rules *) +  let analysis = Motus_lib.Wordlist.filter rules state.analysis in +    let rules =      Motus_lib.Criteria.merge_lists ~init:state.rules rules      |> List.sort_uniq ~cmp:Stdlib.compare    in -  (* Update the word list with the new rules *) -  let analysis = Motus_lib.Wordlist.filter rules state.analysis in -    let propositions = state.current_prop :: state.propositions    and current_prop = [] in    let new_state = { state with propositions; current_prop; rules; analysis } in    (* Get the new proposition if any *) -  let current_prop = Initialize.get_proposition analysis rules in +  let current_prop = Initialize.get_proposition state.length analysis rules in    match current_prop with    | None -> new_state    | Some prop -> diff --git a/motus/js/reload.ml b/motus/js/reload.ml index 2756b74..f0b581f 100644 --- a/motus/js/reload.ml +++ b/motus/js/reload.ml @@ -22,7 +22,7 @@ let process : t -> State.state -> State.state =    in    (* Get the new proposition if any *)    let current_prop = -    Initialize.get_proposition new_state.analysis state.rules +    Initialize.get_proposition state.length new_state.analysis state.rules    in    match current_prop with    | None -> new_state diff --git a/motus/lib/entropy.ml b/motus/lib/entropy.ml new file mode 100644 index 0000000..5b86a9d --- /dev/null +++ b/motus/lib/entropy.ml @@ -0,0 +1,55 @@ +type t = float * string + +let get_entropy max_element words_number arr = +  let entropy = ref 0. in +  for idx = 0 to max_element - 1 do +    let content = Float.of_int (Bigarray.Array1.get arr idx) in +    if content > 0. +    then +      let ratio = content /. words_number in +      entropy := !entropy -. (ratio *. Float.log2 ratio) +  done; +  entropy + + +let analyse : int -> Wordlist.t -> t = + fun base words -> +  let max_element = Float.to_int @@ (Validity.elements ** Float.of_int base) in +  let words_number = Float.of_int (Wordlist.list_size words) in + +  match Wordlist.pick words with +  | None -> (0., "") +  | Some v -> +      (* Build the array  *) +      Seq.fold_left +        (fun (score, word) word_ref -> +          (* Reinitialize the array (we use the same in the successive +             iterations *) +          let set_ref = String.to_seq word_ref |> Validity.CharSet.of_seq in + +          let arr = +            Bigarray.Array1.create Bigarray.Int Bigarray.C_layout max_element +          in + +          Seq.iter +            (fun w2 -> +              let result = Validity.compare_words ~ref:(word_ref, set_ref) w2 in +              match result with +              | None -> () +              | Some r -> +                  let idx = Validity.index_of_result r in + +                  let content = Bigarray.Array1.get arr idx in +                  Bigarray.Array1.set arr idx (succ content) ) +            (Wordlist.words words); + +          (* Now evaluate the entropy in the array *) +          let entropy = get_entropy max_element words_number arr in + +          if !entropy > score +          then ( +            Printf.printf "Entropy for selecting %s : %.2f\n" word_ref !entropy; +            (!entropy, word_ref) ) +          else (score, word) ) +        (0., v) +        (Wordlist.words words) diff --git a/motus/lib/validity.ml b/motus/lib/validity.ml index ae85f04..0fdc40c 100644 --- a/motus/lib/validity.ml +++ b/motus/lib/validity.ml @@ -12,25 +12,28 @@ type t =  [@@@warning "+32"] -let m = Float.of_int (1 + max) +let elements = Float.of_int (1 + max)  (** Get the index of a validity result *)  let index_of_result : t array -> int =   fun elems -> -  let _, value = -    Array.fold_left elems ~init:(0., 0.) ~f:(fun (pos, acc) content -> -        let v = Float.of_int (to_enum content) in -        let acc' = acc +. (v *. (m ** pos)) in -        (pos +. Float.one, acc') ) -  in -  Float.to_int value +  (* This seems to be more efficient than a foldLeft *) +  let value = ref 0. in +  Array.iteri +    ~f:(fun pos content -> +      let pos = Float.of_int pos in +      let v = Float.of_int (to_enum content) in +      let acc' = !value +. (v *. (elements ** pos)) in +      value := acc' ) +    elems; +  Float.to_int !value  let index_to_result : base:int -> int -> t array =   fun ~base n ->    let rec _f acc n i = -    let next = Float.round (n /. m) -    and rem = Float.(to_int @@ rem n m) in +    let next = Float.round (n /. elements) +    and rem = Float.(to_int @@ rem n elements) in      match (rem, i) with      | _, 0 -> Array.of_list acc @@ -45,29 +48,33 @@ let index_to_result : base:int -> int -> t array =  (** Build a sequence of all the possible status for a given number of letters *)  let sequence : int -> t array Seq.t =   fun base -> -  let max_element = Float.to_int @@ (m ** Float.of_int base) in +  let max_element = Float.to_int @@ (elements ** Float.of_int base) in    Seq.unfold      (fun n -> -      if n < max_element then Some (index_to_result ~base n, n + 1) else None ) +      if n < max_element then Some (index_to_result ~base n, succ n) else None +      )      0 -let compare_words : string -> ref:string -> t array option = +module CharSet = Set.Make (Char) + +let compare_words : string -> ref:string * CharSet.t -> t array option =   fun w1 ~ref -> +  let wordRef = fst ref in    let l1 = String.length w1 in -  if l1 <> String.length ref +  if l1 <> String.length wordRef    then None    else      let result =        Array.init l1 ~f:(fun i ->            let c1 = String.get w1 i -          and c2 = String.get ref i in +          and c2 = String.get wordRef i in            let state =              if Char.equal c1 c2              then Wellplaced -            else if String.contains ref c1 +            else if CharSet.mem c2 (snd ref)              then Misplaced              else Missing            in @@ -92,7 +99,7 @@ let to_criterias : string -> t array -> Criteria.t list =    let l, _ =      Array.fold_left t ~init:([], 0) ~f:(fun (acc, i) t ->          let acc = to_criteria (String.get word i) i t acc in -        (acc, i + 1) ) +        (acc, succ i) )    in    List.rev l diff --git a/motus/lib/validity.mli b/motus/lib/validity.mli index dfd876c..a3d8ae3 100644 --- a/motus/lib/validity.mli +++ b/motus/lib/validity.mli @@ -1,8 +1,13 @@ +module CharSet : Set.S with type elt = char +  type t =    | Wellplaced    | Misplaced    | Missing +val elements : float +(** Number of elements in the sum type *) +  val sequence : int -> t array Seq.t  (** Build a sequence of all the possible status for a given number of letters *) @@ -11,7 +16,7 @@ val index_of_result : t array -> int  val index_to_result : base:int -> int -> t array -val compare_words : string -> ref:string -> t array option +val compare_words : string -> ref:string * CharSet.t -> t array option  val to_criteria : char -> int -> t -> Criteria.t list -> Criteria.t list diff --git a/motus/lib/wordlist.ml b/motus/lib/wordlist.ml index 7c400bb..4a84ab0 100644 --- a/motus/lib/wordlist.ml +++ b/motus/lib/wordlist.ml @@ -23,3 +23,5 @@ let words = S.to_seq  let list_size = S.cardinal  let remove_word t w = S.remove w t + +let pick = S.choose_opt diff --git a/motus/lib/wordlist.mli b/motus/lib/wordlist.mli index a56cab3..86881f2 100644 --- a/motus/lib/wordlist.mli +++ b/motus/lib/wordlist.mli @@ -9,6 +9,8 @@ val empty_data : unit -> t  val words : t -> string Seq.t  (** Load all the words *) +val pick : t -> string option +  val list_size : t -> int  (** Number of words in the list *) diff --git a/motus/test/motus_test.ml b/motus/test/motus_test.ml index 4ea952e..0586ffe 100644 --- a/motus/test/motus_test.ml +++ b/motus/test/motus_test.ml @@ -37,14 +37,20 @@ let tests =           assert_equal 243 count )         ; ( "Compare words 1"           >:: fun _ -> +         let w = "Test" in + +         let ref = (w, Validity.CharSet.of_seq (String.to_seq w)) in +           assert_equal             (Some Validity.[| Wellplaced; Wellplaced; Wellplaced; Wellplaced |]) -           (Validity.compare_words "Test" ~ref:"Test") ) +           (Validity.compare_words "Test" ~ref) )         ; ( "Compare words 2"           >:: fun _ -> +         let w = "ABC" in +         let ref = (w, Validity.CharSet.of_seq (String.to_seq w)) in           assert_equal             (Some Validity.[| Missing; Misplaced; Wellplaced |]) -           (Validity.compare_words "DAC" ~ref:"ABC") ) +           (Validity.compare_words "DAC" ~ref) )         ]  | 
