aboutsummaryrefslogtreecommitdiff
path: root/motus/lib/wordlist.ml
diff options
context:
space:
mode:
Diffstat (limited to 'motus/lib/wordlist.ml')
-rw-r--r--motus/lib/wordlist.ml95
1 files changed, 15 insertions, 80 deletions
diff --git a/motus/lib/wordlist.ml b/motus/lib/wordlist.ml
index 45fc73a..7c400bb 100644
--- a/motus/lib/wordlist.ml
+++ b/motus/lib/wordlist.ml
@@ -1,90 +1,25 @@
open StdLabels
+module S = Set.Make (String)
-let () = Random.self_init ()
+type t = S.t
-type t =
- { number : int
- ; element : string list
- }
+let empty_data () = S.empty
-let empty_data () = { number = 0; element = [] }
+let add_words : Criteria.t list -> string Seq.t -> t =
+ fun filters words ->
+ Seq.filter
+ (fun word -> List.for_all ~f:(Criteria.check_filter word) filters)
+ words
+ |> S.of_seq
-let update_freq : (char, int) Hashtbl.t -> char -> unit =
- fun freq c ->
- match Hashtbl.find_opt freq c with
- | None -> Hashtbl.add freq c 1
- | Some value -> Hashtbl.replace freq c (value + 1)
+let filter : Criteria.t list -> t -> t =
+ fun filters t ->
+ S.filter (fun word -> List.for_all ~f:(Criteria.check_filter word) filters) t
-(** Evaluate the score for each char (lower is better) *)
-let extract_freq : t -> (char * int) list =
- fun data ->
- let freq = Hashtbl.create 26 in
- List.iter data.element ~f:(fun word ->
- String.iter word ~f:(fun c -> update_freq freq c) );
- let number_2 = data.number / 2 in
- Hashtbl.fold (fun k v acc -> (k, abs (v - number_2)) :: acc) freq []
- (* Sort the list for a pretty printing *)
- |> List.sort ~cmp:(fun v1 v2 -> snd v1 - snd v2)
+let words = S.to_seq
+let list_size = S.cardinal
-let add_word : Criteria.t list -> t -> string -> t =
- fun filters data word ->
- match List.for_all ~f:(Criteria.check_filter word) filters with
- | true -> { number = data.number + 1; element = word :: data.element }
- | false -> data
-
-
-(** Get the word which with the most information in it.
-
-The information is the score given to each character, representing each
-frequency in the whole word list (lower is better). If the same letter is
-present many times, we consider that succeding letters does not give any more
-informations (do not consider the position here) *)
-let pick_next_word : t -> (char * int) list -> string * int =
- fun data scores ->
- let p' : (string list * int) option -> string -> (string list * int) option =
- fun prec word ->
- (* evaluate the score for this word *)
- let _, eval =
- String.fold_left
- ~f:(fun (scores, score) c ->
- match List.assoc_opt c scores with
- | None ->
- (* if the character has no score associated, we consider that it
- does not provide any more information, and give it the max
- score available *)
- (scores, score + (data.number / 2))
- | Some v ->
- let new_scores =
- List.filter ~f:(fun (c', _) -> not (Char.equal c c')) scores
- in
- (new_scores, score + v) )
- ~init:(scores, 0)
- word
- in
- match prec with
- | None -> Some ([ word ], eval)
- | Some (_, prec_score) when eval < prec_score -> Some ([ word ], eval)
- | Some (w, prec_score) when eval = prec_score -> Some (word :: w, eval)
- | _ -> prec
- in
- match List.fold_left ~f:p' ~init:None data.element with
- | None -> ("", 0)
- | Some (words, score) ->
- (* Pick a reandom word from the list *)
- let elements = List.length words in
- let number = Random.int elements in
- (List.nth words number, score)
-
-
-let remove_word : t -> string -> t =
- fun t word ->
- let element = List.filter ~f:(fun w -> not (String.equal w word)) t.element in
- { element; number = t.number - 1 }
-
-
-let words : t -> string list = fun { element; _ } -> element
-
-let list_size : t -> int = fun { number; _ } -> number
+let remove_word t w = S.remove w t