open StdLabels let () = Random.self_init () type t = (char * int) list let update_freq : (char, int) Hashtbl.t -> char -> unit = fun freq c -> match Hashtbl.find_opt freq c with | None -> Hashtbl.add freq c 1 | Some value -> Hashtbl.replace freq c (value + 1) (** Evaluate the score for each char (lower is better) *) let analyse : Wordlist.t -> (char * int) list = fun data -> let freq = Hashtbl.create 26 in Seq.iter (fun word -> String.iter word ~f:(update_freq freq)) (Wordlist.words data); let number_2 = Wordlist.list_size data / 2 in Hashtbl.fold (fun k v acc -> (k, abs (v - number_2)) :: acc) freq [] (** Get the word which with the most information in it. The information is the score given to each character, representing each frequency in the whole word list (lower is better). If the same letter is present many times, we consider that succeding letters does not give any more informations (do not consider the position here) *) let pick_next_word : Wordlist.t -> (char * int) list -> string = fun data scores -> let list_size = Wordlist.list_size data / 2 in let p' : (string list * int) option -> string -> (string list * int) option = fun prec word -> (* evaluate the score for this word *) let _, eval = String.fold_left ~f:(fun (scores, score) c -> match List.assoc_opt c scores with | None -> (* if the character has no score associated, we consider that it does not provide any more information, and give it the max score available *) (scores, score + list_size) | Some v -> let new_scores = List.filter ~f:(fun (c', _) -> not (Char.equal c c')) scores in (new_scores, score + v) ) ~init:(scores, 0) word in match prec with | None -> Some ([ word ], eval) | Some (_, prec_score) when eval < prec_score -> Some ([ word ], eval) | Some (w, prec_score) when eval = prec_score -> Some (word :: w, eval) | _ -> prec in match Seq.fold_left p' None (Wordlist.words data) with | None -> "" | Some (words, _) -> (* Pick a reandom word from the list *) let elements = List.length words in let number = Random.int elements in List.nth words number