From 95432043550bd4a41b4466395502bc3b748e6746 Mon Sep 17 00:00:00 2001 From: Sébastien Dailly Date: Thu, 24 Feb 2022 09:26:23 +0100 Subject: Moved the wordlist into a set --- motus/bin/motus.ml | 45 +++++++-------------- motus/js/initialize.ml | 12 +++--- motus/js/next.ml | 7 +--- motus/lib/freq_analysis.ml | 68 ++++++++++++++++++++++++++++++++ motus/lib/freq_analysis.mli | 12 ++++++ motus/lib/wordlist.ml | 95 +++++++-------------------------------------- motus/lib/wordlist.mli | 33 ++++++++-------- 7 files changed, 132 insertions(+), 140 deletions(-) create mode 100644 motus/lib/freq_analysis.ml create mode 100644 motus/lib/freq_analysis.mli diff --git a/motus/bin/motus.ml b/motus/bin/motus.ml index 2ee058d..e32adca 100644 --- a/motus/bin/motus.ml +++ b/motus/bin/motus.ml @@ -25,12 +25,6 @@ let show_structure : Format.formatter -> Wordlist.t -> Criteria.t list -> unit = Format.fprintf format "Got %d elements @\n" (Wordlist.list_size data); - Format.fprintf - format - "Frequencies : @[%a@]@\n" - (Format.pp_print_list (fun f (k, v) -> Format.fprintf f "%c -> %d" k v)) - (Wordlist.extract_freq data); - if Wordlist.list_size data < 20 then Format.fprintf @@ -38,22 +32,20 @@ let show_structure : Format.formatter -> Wordlist.t -> Criteria.t list -> unit = "Remaining words @[@;%a@]@\n" (Format.pp_print_list ~pp_sep:Format.pp_force_newline (fun f w -> Format.fprintf f "%s" w ) ) - (Wordlist.words data); + (List.of_seq @@ Wordlist.words data); Format.close_box () (** Get the initial list *) -let rec get_list : in_channel -> Wordlist.t -> Criteria.t list -> Wordlist.t = - fun channel data filters -> - let word = - try Some (String.lowercase_ascii (Stdlib.input_line channel)) with +let get_list : in_channel -> Criteria.t list -> Wordlist.t = + fun channel filters -> + let read channel = + try Some (String.lowercase_ascii (Stdlib.input_line channel), channel) with | End_of_file -> None in - match word with - | None -> data - | Some word -> - let data = Wordlist.add_word filters data word in - get_list channel data filters + + let s = Seq.unfold read channel in + Wordlist.add_words filters s (** Compare the proposed word and the result from the user in order to identify @@ -79,15 +71,12 @@ let create_new_rules word result = let rec run filters words = let () = show_structure Format.std_formatter words filters in - let freq = Wordlist.extract_freq words in - let next, score = Wordlist.pick_next_word words freq in + let next = + Freq_analysis.analyse words |> Freq_analysis.pick_next_word words + in let () = - Format.fprintf - Format.std_formatter - "Next word will be : %s (%d)@\n" - next - score + Format.fprintf Format.std_formatter "Next word will be : %s@\n" next in let input = Stdlib.read_line () in @@ -100,13 +89,7 @@ let rec run filters words = Criteria.merge_lists ~init:filters (create_new_rules next input) |> List.sort_uniq Stdlib.compare in - - let words = - List.fold_left - (Wordlist.add_word new_rules) - (Wordlist.empty_data ()) - (Wordlist.words words) - in + let words = Wordlist.filter new_rules words in run new_rules words @@ -134,6 +117,6 @@ let () = let initial_filter = Criteria.Lenght !length :: !rules in let words_channel = open_in (List.hd !dict) in - let words = get_list words_channel (Wordlist.empty_data ()) initial_filter in + let words = get_list words_channel initial_filter in close_in words_channel; run initial_filter words diff --git a/motus/js/initialize.ml b/motus/js/initialize.ml index 82f1288..a088a0e 100644 --- a/motus/js/initialize.ml +++ b/motus/js/initialize.ml @@ -21,7 +21,7 @@ let get_proposition : Wordlist.t -> Criteria.t list -> State.proposition option = fun analysis rules -> let word = - Wordlist.extract_freq analysis |> Wordlist.pick_next_word analysis |> fst + Freq_analysis.analyse analysis |> Freq_analysis.pick_next_word analysis in match String.equal String.empty word with | true -> None @@ -58,13 +58,11 @@ let process { sender; length; content; proposition } state = in let words = Jstr.cuts ~sep:(Jstr.v "\n") value in let analysis = - List.fold_left - ~f:(fun a w -> - let upper = Jstr.uppercased w in - Wordlist.add_word rules a (Jstr.to_string upper) ) - ~init:(Wordlist.empty_data ()) - words + List.to_seq words + |> Seq.map (fun w -> Jstr.(to_string (uppercased w))) + |> Wordlist.add_words rules in + let current_prop = get_proposition analysis rules and fields = FieldList.make length sender in ( match current_prop with diff --git a/motus/js/next.ml b/motus/js/next.ml index 72db9e4..104b3e6 100644 --- a/motus/js/next.ml +++ b/motus/js/next.ml @@ -15,12 +15,7 @@ let process : t -> State.state -> State.state = in (* Update the word list with the new rules *) - let analysis = - List.fold_left - ~f:(Motus_lib.Wordlist.add_word rules) - ~init:(Motus_lib.Wordlist.empty_data ()) - (Motus_lib.Wordlist.words state.analysis) - in + let analysis = Motus_lib.Wordlist.filter rules state.analysis in let propositions = state.current_prop :: state.propositions and current_prop = [] in diff --git a/motus/lib/freq_analysis.ml b/motus/lib/freq_analysis.ml new file mode 100644 index 0000000..12f5fef --- /dev/null +++ b/motus/lib/freq_analysis.ml @@ -0,0 +1,68 @@ +open StdLabels + +let () = Random.self_init () + +type t = (char * int) list + +let update_freq : (char, int) Hashtbl.t -> char -> unit = + fun freq c -> + match Hashtbl.find_opt freq c with + | None -> Hashtbl.add freq c 1 + | Some value -> Hashtbl.replace freq c (value + 1) + + +(** Evaluate the score for each char (lower is better) *) +let analyse : Wordlist.t -> (char * int) list = + fun data -> + let freq = Hashtbl.create 26 in + Seq.iter + (fun word -> String.iter word ~f:(update_freq freq)) + (Wordlist.words data); + + let number_2 = Wordlist.list_size data / 2 in + Hashtbl.fold (fun k v acc -> (k, abs (v - number_2)) :: acc) freq [] + + +(** Get the word which with the most information in it. + +The information is the score given to each character, representing each +frequency in the whole word list (lower is better). If the same letter is +present many times, we consider that succeding letters does not give any more +informations (do not consider the position here) *) +let pick_next_word : Wordlist.t -> (char * int) list -> string = + fun data scores -> + let list_size = Wordlist.list_size data / 2 in + + let p' : (string list * int) option -> string -> (string list * int) option = + fun prec word -> + (* evaluate the score for this word *) + let _, eval = + String.fold_left + ~f:(fun (scores, score) c -> + match List.assoc_opt c scores with + | None -> + (* if the character has no score associated, we consider that it + does not provide any more information, and give it the max + score available *) + (scores, score + list_size) + | Some v -> + let new_scores = + List.filter ~f:(fun (c', _) -> not (Char.equal c c')) scores + in + (new_scores, score + v) ) + ~init:(scores, 0) + word + in + match prec with + | None -> Some ([ word ], eval) + | Some (_, prec_score) when eval < prec_score -> Some ([ word ], eval) + | Some (w, prec_score) when eval = prec_score -> Some (word :: w, eval) + | _ -> prec + in + match Seq.fold_left p' None (Wordlist.words data) with + | None -> "" + | Some (words, _) -> + (* Pick a reandom word from the list *) + let elements = List.length words in + let number = Random.int elements in + List.nth words number diff --git a/motus/lib/freq_analysis.mli b/motus/lib/freq_analysis.mli new file mode 100644 index 0000000..bd7f1d8 --- /dev/null +++ b/motus/lib/freq_analysis.mli @@ -0,0 +1,12 @@ +type t + +val analyse : Wordlist.t -> t +(** Evaluate the score for each char (lower is better) *) + +val pick_next_word : Wordlist.t -> t -> string +(** Get the word which with the most information in it. + +The information is the score given to each character, representing each +frequency in the whole word list (lower is better). If the same letter is +present many times, we consider that succeding letters does not give any more +informations (do not consider the position here) *) diff --git a/motus/lib/wordlist.ml b/motus/lib/wordlist.ml index 45fc73a..7c400bb 100644 --- a/motus/lib/wordlist.ml +++ b/motus/lib/wordlist.ml @@ -1,90 +1,25 @@ open StdLabels +module S = Set.Make (String) -let () = Random.self_init () +type t = S.t -type t = - { number : int - ; element : string list - } +let empty_data () = S.empty -let empty_data () = { number = 0; element = [] } +let add_words : Criteria.t list -> string Seq.t -> t = + fun filters words -> + Seq.filter + (fun word -> List.for_all ~f:(Criteria.check_filter word) filters) + words + |> S.of_seq -let update_freq : (char, int) Hashtbl.t -> char -> unit = - fun freq c -> - match Hashtbl.find_opt freq c with - | None -> Hashtbl.add freq c 1 - | Some value -> Hashtbl.replace freq c (value + 1) +let filter : Criteria.t list -> t -> t = + fun filters t -> + S.filter (fun word -> List.for_all ~f:(Criteria.check_filter word) filters) t -(** Evaluate the score for each char (lower is better) *) -let extract_freq : t -> (char * int) list = - fun data -> - let freq = Hashtbl.create 26 in - List.iter data.element ~f:(fun word -> - String.iter word ~f:(fun c -> update_freq freq c) ); - let number_2 = data.number / 2 in - Hashtbl.fold (fun k v acc -> (k, abs (v - number_2)) :: acc) freq [] - (* Sort the list for a pretty printing *) - |> List.sort ~cmp:(fun v1 v2 -> snd v1 - snd v2) +let words = S.to_seq +let list_size = S.cardinal -let add_word : Criteria.t list -> t -> string -> t = - fun filters data word -> - match List.for_all ~f:(Criteria.check_filter word) filters with - | true -> { number = data.number + 1; element = word :: data.element } - | false -> data - - -(** Get the word which with the most information in it. - -The information is the score given to each character, representing each -frequency in the whole word list (lower is better). If the same letter is -present many times, we consider that succeding letters does not give any more -informations (do not consider the position here) *) -let pick_next_word : t -> (char * int) list -> string * int = - fun data scores -> - let p' : (string list * int) option -> string -> (string list * int) option = - fun prec word -> - (* evaluate the score for this word *) - let _, eval = - String.fold_left - ~f:(fun (scores, score) c -> - match List.assoc_opt c scores with - | None -> - (* if the character has no score associated, we consider that it - does not provide any more information, and give it the max - score available *) - (scores, score + (data.number / 2)) - | Some v -> - let new_scores = - List.filter ~f:(fun (c', _) -> not (Char.equal c c')) scores - in - (new_scores, score + v) ) - ~init:(scores, 0) - word - in - match prec with - | None -> Some ([ word ], eval) - | Some (_, prec_score) when eval < prec_score -> Some ([ word ], eval) - | Some (w, prec_score) when eval = prec_score -> Some (word :: w, eval) - | _ -> prec - in - match List.fold_left ~f:p' ~init:None data.element with - | None -> ("", 0) - | Some (words, score) -> - (* Pick a reandom word from the list *) - let elements = List.length words in - let number = Random.int elements in - (List.nth words number, score) - - -let remove_word : t -> string -> t = - fun t word -> - let element = List.filter ~f:(fun w -> not (String.equal w word)) t.element in - { element; number = t.number - 1 } - - -let words : t -> string list = fun { element; _ } -> element - -let list_size : t -> int = fun { number; _ } -> number +let remove_word t w = S.remove w t diff --git a/motus/lib/wordlist.mli b/motus/lib/wordlist.mli index 766fbdf..a56cab3 100644 --- a/motus/lib/wordlist.mli +++ b/motus/lib/wordlist.mli @@ -1,25 +1,26 @@ +(** The dictionnary *) + type t -val words : t -> string list +val empty_data : unit -> t +(** Create an empty dictionnary. The resulting dictionnay cannot be used, it is + juste here if required for initialisation *) + +val words : t -> string Seq.t +(** Load all the words *) val list_size : t -> int (** Number of words in the list *) -val empty_data : unit -> t - -val add_word : Criteria.t list -> t -> string -> t -(** Add a new word in the list. Check are made against the differents criteria in order to ensure that the word is valid *) - -val remove_word : t -> string -> t -(** Remove a word from this list *) +val add_words : Criteria.t list -> string Seq.t -> t +(** Create a word list from an initial sequence. -val extract_freq : t -> (char * int) list -(** Evaluate the score for each char (lower is better) *) + Checks are made against the differents criteria in order to ensure that the + word is valid *) -val pick_next_word : t -> (char * int) list -> string * int -(** Get the word which with the most information in it. +val filter : Criteria.t list -> t -> t +(** Remove all the words that does not match the criterias. *) -The information is the score given to each character, representing each -frequency in the whole word list (lower is better). If the same letter is -present many times, we consider that succeding letters does not give any more -informations (do not consider the position here) *) +val remove_word : t -> string -> t +(** Remove a word from this list. This function is called when a proposition + from the application is not recognized by the game. *) -- cgit v1.2.3