diff options
Diffstat (limited to 'motus/lib/freq_analysis.ml')
-rw-r--r-- | motus/lib/freq_analysis.ml | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/motus/lib/freq_analysis.ml b/motus/lib/freq_analysis.ml new file mode 100644 index 0000000..12f5fef --- /dev/null +++ b/motus/lib/freq_analysis.ml @@ -0,0 +1,68 @@ +open StdLabels + +let () = Random.self_init () + +type t = (char * int) list + +let update_freq : (char, int) Hashtbl.t -> char -> unit = + fun freq c -> + match Hashtbl.find_opt freq c with + | None -> Hashtbl.add freq c 1 + | Some value -> Hashtbl.replace freq c (value + 1) + + +(** Evaluate the score for each char (lower is better) *) +let analyse : Wordlist.t -> (char * int) list = + fun data -> + let freq = Hashtbl.create 26 in + Seq.iter + (fun word -> String.iter word ~f:(update_freq freq)) + (Wordlist.words data); + + let number_2 = Wordlist.list_size data / 2 in + Hashtbl.fold (fun k v acc -> (k, abs (v - number_2)) :: acc) freq [] + + +(** Get the word which with the most information in it. + +The information is the score given to each character, representing each +frequency in the whole word list (lower is better). If the same letter is +present many times, we consider that succeding letters does not give any more +informations (do not consider the position here) *) +let pick_next_word : Wordlist.t -> (char * int) list -> string = + fun data scores -> + let list_size = Wordlist.list_size data / 2 in + + let p' : (string list * int) option -> string -> (string list * int) option = + fun prec word -> + (* evaluate the score for this word *) + let _, eval = + String.fold_left + ~f:(fun (scores, score) c -> + match List.assoc_opt c scores with + | None -> + (* if the character has no score associated, we consider that it + does not provide any more information, and give it the max + score available *) + (scores, score + list_size) + | Some v -> + let new_scores = + List.filter ~f:(fun (c', _) -> not (Char.equal c c')) scores + in + (new_scores, score + v) ) + ~init:(scores, 0) + word + in + match prec with + | None -> Some ([ word ], eval) + | Some (_, prec_score) when eval < prec_score -> Some ([ word ], eval) + | Some (w, prec_score) when eval = prec_score -> Some (word :: w, eval) + | _ -> prec + in + match Seq.fold_left p' None (Wordlist.words data) with + | None -> "" + | Some (words, _) -> + (* Pick a reandom word from the list *) + let elements = List.length words in + let number = Random.int elements in + List.nth words number |