aboutsummaryrefslogtreecommitdiff
path: root/motus/lib/freq_analysis.ml
diff options
context:
space:
mode:
Diffstat (limited to 'motus/lib/freq_analysis.ml')
-rw-r--r--motus/lib/freq_analysis.ml68
1 files changed, 68 insertions, 0 deletions
diff --git a/motus/lib/freq_analysis.ml b/motus/lib/freq_analysis.ml
new file mode 100644
index 0000000..12f5fef
--- /dev/null
+++ b/motus/lib/freq_analysis.ml
@@ -0,0 +1,68 @@
+open StdLabels
+
+let () = Random.self_init ()
+
+type t = (char * int) list
+
+let update_freq : (char, int) Hashtbl.t -> char -> unit =
+ fun freq c ->
+ match Hashtbl.find_opt freq c with
+ | None -> Hashtbl.add freq c 1
+ | Some value -> Hashtbl.replace freq c (value + 1)
+
+
+(** Evaluate the score for each char (lower is better) *)
+let analyse : Wordlist.t -> (char * int) list =
+ fun data ->
+ let freq = Hashtbl.create 26 in
+ Seq.iter
+ (fun word -> String.iter word ~f:(update_freq freq))
+ (Wordlist.words data);
+
+ let number_2 = Wordlist.list_size data / 2 in
+ Hashtbl.fold (fun k v acc -> (k, abs (v - number_2)) :: acc) freq []
+
+
+(** Get the word which with the most information in it.
+
+The information is the score given to each character, representing each
+frequency in the whole word list (lower is better). If the same letter is
+present many times, we consider that succeding letters does not give any more
+informations (do not consider the position here) *)
+let pick_next_word : Wordlist.t -> (char * int) list -> string =
+ fun data scores ->
+ let list_size = Wordlist.list_size data / 2 in
+
+ let p' : (string list * int) option -> string -> (string list * int) option =
+ fun prec word ->
+ (* evaluate the score for this word *)
+ let _, eval =
+ String.fold_left
+ ~f:(fun (scores, score) c ->
+ match List.assoc_opt c scores with
+ | None ->
+ (* if the character has no score associated, we consider that it
+ does not provide any more information, and give it the max
+ score available *)
+ (scores, score + list_size)
+ | Some v ->
+ let new_scores =
+ List.filter ~f:(fun (c', _) -> not (Char.equal c c')) scores
+ in
+ (new_scores, score + v) )
+ ~init:(scores, 0)
+ word
+ in
+ match prec with
+ | None -> Some ([ word ], eval)
+ | Some (_, prec_score) when eval < prec_score -> Some ([ word ], eval)
+ | Some (w, prec_score) when eval = prec_score -> Some (word :: w, eval)
+ | _ -> prec
+ in
+ match Seq.fold_left p' None (Wordlist.words data) with
+ | None -> ""
+ | Some (words, _) ->
+ (* Pick a reandom word from the list *)
+ let elements = List.length words in
+ let number = Random.int elements in
+ List.nth words number