aboutsummaryrefslogtreecommitdiff
path: root/motus/lib
diff options
context:
space:
mode:
Diffstat (limited to 'motus/lib')
-rw-r--r--motus/lib/freq_analysis.ml68
-rw-r--r--motus/lib/freq_analysis.mli12
-rw-r--r--motus/lib/wordlist.ml95
-rw-r--r--motus/lib/wordlist.mli33
4 files changed, 112 insertions, 96 deletions
diff --git a/motus/lib/freq_analysis.ml b/motus/lib/freq_analysis.ml
new file mode 100644
index 0000000..12f5fef
--- /dev/null
+++ b/motus/lib/freq_analysis.ml
@@ -0,0 +1,68 @@
+open StdLabels
+
+let () = Random.self_init ()
+
+type t = (char * int) list
+
+let update_freq : (char, int) Hashtbl.t -> char -> unit =
+ fun freq c ->
+ match Hashtbl.find_opt freq c with
+ | None -> Hashtbl.add freq c 1
+ | Some value -> Hashtbl.replace freq c (value + 1)
+
+
+(** Evaluate the score for each char (lower is better) *)
+let analyse : Wordlist.t -> (char * int) list =
+ fun data ->
+ let freq = Hashtbl.create 26 in
+ Seq.iter
+ (fun word -> String.iter word ~f:(update_freq freq))
+ (Wordlist.words data);
+
+ let number_2 = Wordlist.list_size data / 2 in
+ Hashtbl.fold (fun k v acc -> (k, abs (v - number_2)) :: acc) freq []
+
+
+(** Get the word which with the most information in it.
+
+The information is the score given to each character, representing each
+frequency in the whole word list (lower is better). If the same letter is
+present many times, we consider that succeding letters does not give any more
+informations (do not consider the position here) *)
+let pick_next_word : Wordlist.t -> (char * int) list -> string =
+ fun data scores ->
+ let list_size = Wordlist.list_size data / 2 in
+
+ let p' : (string list * int) option -> string -> (string list * int) option =
+ fun prec word ->
+ (* evaluate the score for this word *)
+ let _, eval =
+ String.fold_left
+ ~f:(fun (scores, score) c ->
+ match List.assoc_opt c scores with
+ | None ->
+ (* if the character has no score associated, we consider that it
+ does not provide any more information, and give it the max
+ score available *)
+ (scores, score + list_size)
+ | Some v ->
+ let new_scores =
+ List.filter ~f:(fun (c', _) -> not (Char.equal c c')) scores
+ in
+ (new_scores, score + v) )
+ ~init:(scores, 0)
+ word
+ in
+ match prec with
+ | None -> Some ([ word ], eval)
+ | Some (_, prec_score) when eval < prec_score -> Some ([ word ], eval)
+ | Some (w, prec_score) when eval = prec_score -> Some (word :: w, eval)
+ | _ -> prec
+ in
+ match Seq.fold_left p' None (Wordlist.words data) with
+ | None -> ""
+ | Some (words, _) ->
+ (* Pick a reandom word from the list *)
+ let elements = List.length words in
+ let number = Random.int elements in
+ List.nth words number
diff --git a/motus/lib/freq_analysis.mli b/motus/lib/freq_analysis.mli
new file mode 100644
index 0000000..bd7f1d8
--- /dev/null
+++ b/motus/lib/freq_analysis.mli
@@ -0,0 +1,12 @@
+type t
+
+val analyse : Wordlist.t -> t
+(** Evaluate the score for each char (lower is better) *)
+
+val pick_next_word : Wordlist.t -> t -> string
+(** Get the word which with the most information in it.
+
+The information is the score given to each character, representing each
+frequency in the whole word list (lower is better). If the same letter is
+present many times, we consider that succeding letters does not give any more
+informations (do not consider the position here) *)
diff --git a/motus/lib/wordlist.ml b/motus/lib/wordlist.ml
index 45fc73a..7c400bb 100644
--- a/motus/lib/wordlist.ml
+++ b/motus/lib/wordlist.ml
@@ -1,90 +1,25 @@
open StdLabels
+module S = Set.Make (String)
-let () = Random.self_init ()
+type t = S.t
-type t =
- { number : int
- ; element : string list
- }
+let empty_data () = S.empty
-let empty_data () = { number = 0; element = [] }
+let add_words : Criteria.t list -> string Seq.t -> t =
+ fun filters words ->
+ Seq.filter
+ (fun word -> List.for_all ~f:(Criteria.check_filter word) filters)
+ words
+ |> S.of_seq
-let update_freq : (char, int) Hashtbl.t -> char -> unit =
- fun freq c ->
- match Hashtbl.find_opt freq c with
- | None -> Hashtbl.add freq c 1
- | Some value -> Hashtbl.replace freq c (value + 1)
+let filter : Criteria.t list -> t -> t =
+ fun filters t ->
+ S.filter (fun word -> List.for_all ~f:(Criteria.check_filter word) filters) t
-(** Evaluate the score for each char (lower is better) *)
-let extract_freq : t -> (char * int) list =
- fun data ->
- let freq = Hashtbl.create 26 in
- List.iter data.element ~f:(fun word ->
- String.iter word ~f:(fun c -> update_freq freq c) );
- let number_2 = data.number / 2 in
- Hashtbl.fold (fun k v acc -> (k, abs (v - number_2)) :: acc) freq []
- (* Sort the list for a pretty printing *)
- |> List.sort ~cmp:(fun v1 v2 -> snd v1 - snd v2)
+let words = S.to_seq
+let list_size = S.cardinal
-let add_word : Criteria.t list -> t -> string -> t =
- fun filters data word ->
- match List.for_all ~f:(Criteria.check_filter word) filters with
- | true -> { number = data.number + 1; element = word :: data.element }
- | false -> data
-
-
-(** Get the word which with the most information in it.
-
-The information is the score given to each character, representing each
-frequency in the whole word list (lower is better). If the same letter is
-present many times, we consider that succeding letters does not give any more
-informations (do not consider the position here) *)
-let pick_next_word : t -> (char * int) list -> string * int =
- fun data scores ->
- let p' : (string list * int) option -> string -> (string list * int) option =
- fun prec word ->
- (* evaluate the score for this word *)
- let _, eval =
- String.fold_left
- ~f:(fun (scores, score) c ->
- match List.assoc_opt c scores with
- | None ->
- (* if the character has no score associated, we consider that it
- does not provide any more information, and give it the max
- score available *)
- (scores, score + (data.number / 2))
- | Some v ->
- let new_scores =
- List.filter ~f:(fun (c', _) -> not (Char.equal c c')) scores
- in
- (new_scores, score + v) )
- ~init:(scores, 0)
- word
- in
- match prec with
- | None -> Some ([ word ], eval)
- | Some (_, prec_score) when eval < prec_score -> Some ([ word ], eval)
- | Some (w, prec_score) when eval = prec_score -> Some (word :: w, eval)
- | _ -> prec
- in
- match List.fold_left ~f:p' ~init:None data.element with
- | None -> ("", 0)
- | Some (words, score) ->
- (* Pick a reandom word from the list *)
- let elements = List.length words in
- let number = Random.int elements in
- (List.nth words number, score)
-
-
-let remove_word : t -> string -> t =
- fun t word ->
- let element = List.filter ~f:(fun w -> not (String.equal w word)) t.element in
- { element; number = t.number - 1 }
-
-
-let words : t -> string list = fun { element; _ } -> element
-
-let list_size : t -> int = fun { number; _ } -> number
+let remove_word t w = S.remove w t
diff --git a/motus/lib/wordlist.mli b/motus/lib/wordlist.mli
index 766fbdf..a56cab3 100644
--- a/motus/lib/wordlist.mli
+++ b/motus/lib/wordlist.mli
@@ -1,25 +1,26 @@
+(** The dictionnary *)
+
type t
-val words : t -> string list
+val empty_data : unit -> t
+(** Create an empty dictionnary. The resulting dictionnay cannot be used, it is
+ juste here if required for initialisation *)
+
+val words : t -> string Seq.t
+(** Load all the words *)
val list_size : t -> int
(** Number of words in the list *)
-val empty_data : unit -> t
-
-val add_word : Criteria.t list -> t -> string -> t
-(** Add a new word in the list. Check are made against the differents criteria in order to ensure that the word is valid *)
-
-val remove_word : t -> string -> t
-(** Remove a word from this list *)
+val add_words : Criteria.t list -> string Seq.t -> t
+(** Create a word list from an initial sequence.
-val extract_freq : t -> (char * int) list
-(** Evaluate the score for each char (lower is better) *)
+ Checks are made against the differents criteria in order to ensure that the
+ word is valid *)
-val pick_next_word : t -> (char * int) list -> string * int
-(** Get the word which with the most information in it.
+val filter : Criteria.t list -> t -> t
+(** Remove all the words that does not match the criterias. *)
-The information is the score given to each character, representing each
-frequency in the whole word list (lower is better). If the same letter is
-present many times, we consider that succeding letters does not give any more
-informations (do not consider the position here) *)
+val remove_word : t -> string -> t
+(** Remove a word from this list. This function is called when a proposition
+ from the application is not recognized by the game. *)