diff options
Diffstat (limited to 'motus/lib/wordlist.ml')
-rw-r--r-- | motus/lib/wordlist.ml | 95 |
1 files changed, 15 insertions, 80 deletions
diff --git a/motus/lib/wordlist.ml b/motus/lib/wordlist.ml index 45fc73a..7c400bb 100644 --- a/motus/lib/wordlist.ml +++ b/motus/lib/wordlist.ml @@ -1,90 +1,25 @@ open StdLabels +module S = Set.Make (String) -let () = Random.self_init () +type t = S.t -type t = - { number : int - ; element : string list - } +let empty_data () = S.empty -let empty_data () = { number = 0; element = [] } +let add_words : Criteria.t list -> string Seq.t -> t = + fun filters words -> + Seq.filter + (fun word -> List.for_all ~f:(Criteria.check_filter word) filters) + words + |> S.of_seq -let update_freq : (char, int) Hashtbl.t -> char -> unit = - fun freq c -> - match Hashtbl.find_opt freq c with - | None -> Hashtbl.add freq c 1 - | Some value -> Hashtbl.replace freq c (value + 1) +let filter : Criteria.t list -> t -> t = + fun filters t -> + S.filter (fun word -> List.for_all ~f:(Criteria.check_filter word) filters) t -(** Evaluate the score for each char (lower is better) *) -let extract_freq : t -> (char * int) list = - fun data -> - let freq = Hashtbl.create 26 in - List.iter data.element ~f:(fun word -> - String.iter word ~f:(fun c -> update_freq freq c) ); - let number_2 = data.number / 2 in - Hashtbl.fold (fun k v acc -> (k, abs (v - number_2)) :: acc) freq [] - (* Sort the list for a pretty printing *) - |> List.sort ~cmp:(fun v1 v2 -> snd v1 - snd v2) +let words = S.to_seq +let list_size = S.cardinal -let add_word : Criteria.t list -> t -> string -> t = - fun filters data word -> - match List.for_all ~f:(Criteria.check_filter word) filters with - | true -> { number = data.number + 1; element = word :: data.element } - | false -> data - - -(** Get the word which with the most information in it. - -The information is the score given to each character, representing each -frequency in the whole word list (lower is better). If the same letter is -present many times, we consider that succeding letters does not give any more -informations (do not consider the position here) *) -let pick_next_word : t -> (char * int) list -> string * int = - fun data scores -> - let p' : (string list * int) option -> string -> (string list * int) option = - fun prec word -> - (* evaluate the score for this word *) - let _, eval = - String.fold_left - ~f:(fun (scores, score) c -> - match List.assoc_opt c scores with - | None -> - (* if the character has no score associated, we consider that it - does not provide any more information, and give it the max - score available *) - (scores, score + (data.number / 2)) - | Some v -> - let new_scores = - List.filter ~f:(fun (c', _) -> not (Char.equal c c')) scores - in - (new_scores, score + v) ) - ~init:(scores, 0) - word - in - match prec with - | None -> Some ([ word ], eval) - | Some (_, prec_score) when eval < prec_score -> Some ([ word ], eval) - | Some (w, prec_score) when eval = prec_score -> Some (word :: w, eval) - | _ -> prec - in - match List.fold_left ~f:p' ~init:None data.element with - | None -> ("", 0) - | Some (words, score) -> - (* Pick a reandom word from the list *) - let elements = List.length words in - let number = Random.int elements in - (List.nth words number, score) - - -let remove_word : t -> string -> t = - fun t word -> - let element = List.filter ~f:(fun w -> not (String.equal w word)) t.element in - { element; number = t.number - 1 } - - -let words : t -> string list = fun { element; _ } -> element - -let list_size : t -> int = fun { number; _ } -> number +let remove_word t w = S.remove w t |