aboutsummaryrefslogtreecommitdiff
path: root/motus/lib/wordlist.ml
diff options
context:
space:
mode:
Diffstat (limited to 'motus/lib/wordlist.ml')
-rw-r--r--motus/lib/wordlist.ml90
1 files changed, 90 insertions, 0 deletions
diff --git a/motus/lib/wordlist.ml b/motus/lib/wordlist.ml
new file mode 100644
index 0000000..45fc73a
--- /dev/null
+++ b/motus/lib/wordlist.ml
@@ -0,0 +1,90 @@
+open StdLabels
+
+let () = Random.self_init ()
+
+type t =
+ { number : int
+ ; element : string list
+ }
+
+let empty_data () = { number = 0; element = [] }
+
+let update_freq : (char, int) Hashtbl.t -> char -> unit =
+ fun freq c ->
+ match Hashtbl.find_opt freq c with
+ | None -> Hashtbl.add freq c 1
+ | Some value -> Hashtbl.replace freq c (value + 1)
+
+
+(** Evaluate the score for each char (lower is better) *)
+let extract_freq : t -> (char * int) list =
+ fun data ->
+ let freq = Hashtbl.create 26 in
+ List.iter data.element ~f:(fun word ->
+ String.iter word ~f:(fun c -> update_freq freq c) );
+
+ let number_2 = data.number / 2 in
+ Hashtbl.fold (fun k v acc -> (k, abs (v - number_2)) :: acc) freq []
+ (* Sort the list for a pretty printing *)
+ |> List.sort ~cmp:(fun v1 v2 -> snd v1 - snd v2)
+
+
+let add_word : Criteria.t list -> t -> string -> t =
+ fun filters data word ->
+ match List.for_all ~f:(Criteria.check_filter word) filters with
+ | true -> { number = data.number + 1; element = word :: data.element }
+ | false -> data
+
+
+(** Get the word which with the most information in it.
+
+The information is the score given to each character, representing each
+frequency in the whole word list (lower is better). If the same letter is
+present many times, we consider that succeding letters does not give any more
+informations (do not consider the position here) *)
+let pick_next_word : t -> (char * int) list -> string * int =
+ fun data scores ->
+ let p' : (string list * int) option -> string -> (string list * int) option =
+ fun prec word ->
+ (* evaluate the score for this word *)
+ let _, eval =
+ String.fold_left
+ ~f:(fun (scores, score) c ->
+ match List.assoc_opt c scores with
+ | None ->
+ (* if the character has no score associated, we consider that it
+ does not provide any more information, and give it the max
+ score available *)
+ (scores, score + (data.number / 2))
+ | Some v ->
+ let new_scores =
+ List.filter ~f:(fun (c', _) -> not (Char.equal c c')) scores
+ in
+ (new_scores, score + v) )
+ ~init:(scores, 0)
+ word
+ in
+ match prec with
+ | None -> Some ([ word ], eval)
+ | Some (_, prec_score) when eval < prec_score -> Some ([ word ], eval)
+ | Some (w, prec_score) when eval = prec_score -> Some (word :: w, eval)
+ | _ -> prec
+ in
+ match List.fold_left ~f:p' ~init:None data.element with
+ | None -> ("", 0)
+ | Some (words, score) ->
+ (* Pick a reandom word from the list *)
+ let elements = List.length words in
+ let number = Random.int elements in
+ (List.nth words number, score)
+
+
+let remove_word : t -> string -> t =
+ fun t word ->
+ let element = List.filter ~f:(fun w -> not (String.equal w word)) t.element in
+ { element; number = t.number - 1 }
+
+
+let words : t -> string list = fun { element; _ } -> element
+
+let list_size : t -> int = fun { number; _ } -> number