From c2bd6982e5ed845293a38ae600c239cd50924d76 Mon Sep 17 00:00:00 2001 From: Sébastien Dailly Date: Thu, 24 Feb 2022 08:59:44 +0100 Subject: Update code, added tests --- motus/lib/dune | 1 + motus/lib/persistence.ml | 95 --------------------------------------------- motus/lib/persistence.mli | 24 ------------ motus/lib/validity.ml | 98 +++++++++++++++++++++++++++++++++++++++++++++++ motus/lib/validity.mli | 20 ++++++++++ motus/lib/wordlist.ml | 90 +++++++++++++++++++++++++++++++++++++++++++ motus/lib/wordlist.mli | 25 ++++++++++++ 7 files changed, 234 insertions(+), 119 deletions(-) delete mode 100644 motus/lib/persistence.ml delete mode 100644 motus/lib/persistence.mli create mode 100644 motus/lib/validity.ml create mode 100644 motus/lib/validity.mli create mode 100644 motus/lib/wordlist.ml create mode 100644 motus/lib/wordlist.mli (limited to 'motus/lib') diff --git a/motus/lib/dune b/motus/lib/dune index b958922..a892e2b 100644 --- a/motus/lib/dune +++ b/motus/lib/dune @@ -2,6 +2,7 @@ (name motus_lib ) + (preprocess (pps ppx_deriving.enum)) ) diff --git a/motus/lib/persistence.ml b/motus/lib/persistence.ml deleted file mode 100644 index 6116653..0000000 --- a/motus/lib/persistence.ml +++ /dev/null @@ -1,95 +0,0 @@ -open StdLabels - -let () = Random.self_init () - -type t = - { number : int - ; element : string list - ; freq : (char, int) Hashtbl.t - } - -let empty_data () = { number = 0; element = []; freq = Hashtbl.create 26 } - -(** Evaluate the score for each char (lower is better) *) -let extract_freq : t -> (char * int) list = - fun data -> - let number_2 = data.number / 2 in - Hashtbl.fold (fun k v acc -> (k, abs (v - number_2)) :: acc) data.freq [] - (* Sort the list for a pretty printing *) - |> List.sort ~cmp:(fun v1 v2 -> snd v1 - snd v2) - - -let update_freq : (char, int) Hashtbl.t -> char -> unit = - fun freq c -> - match Hashtbl.find_opt freq c with - | None -> Hashtbl.add freq c 1 - | Some value -> Hashtbl.replace freq c (value + 1) - - -let add_word : Criteria.t list -> t -> string -> t = - fun filters data word -> - match List.for_all ~f:(Criteria.check_filter word) filters with - | true -> - let len = String.length word in - let chars = - List.init ~len ~f:(fun n -> String.get word n) - |> List.sort_uniq ~cmp:Char.compare - in - - List.iter ~f:(update_freq data.freq) chars; - { data with number = data.number + 1; element = word :: data.element } - | false -> data - - -(** Get the word which with the most information in it. - -The information is the score given to each character, representing each -frequency in the whole word list (lower is better). If the same letter is -present many times, we consider that succeding letters does not give any more -informations (do not consider the position here) *) -let pick_next_word : t -> (char * int) list -> string * int = - fun data scores -> - let p' : (string list * int) option -> string -> (string list * int) option = - fun prec word -> - (* evaluate the score for this word *) - let _, eval = - String.fold_left - ~f:(fun (scores, score) c -> - match List.assoc_opt c scores with - | None -> - (* if the character has no score associated, we consider that it - does not provide any more information, and give it the max - score available *) - (scores, score + (data.number / 2)) - | Some v -> - let new_scores = - List.filter ~f:(fun (c', _) -> not (Char.equal c c')) scores - in - (new_scores, score + v) ) - ~init:(scores, 0) - word - in - match prec with - | None -> Some ([ word ], eval) - | Some (_, prec_score) when eval < prec_score -> Some ([ word ], eval) - | Some (w, prec_score) when eval = prec_score -> Some (word :: w, eval) - | _ -> prec - in - match List.fold_left ~f:p' ~init:None data.element with - | None -> ("", 0) - | Some (words, score) -> - (* Pick a reandom word from the list *) - let elements = List.length words in - let number = Random.int elements in - (List.nth words number, score) - - -let remove_word : t -> string -> t = - fun t word -> - let element = List.filter ~f:(fun w -> not (String.equal w word)) t.element in - { t with element; number = t.number - 1 } - - -let words : t -> string list = fun { element; _ } -> element - -let list_size : t -> int = fun { number; _ } -> number diff --git a/motus/lib/persistence.mli b/motus/lib/persistence.mli deleted file mode 100644 index f5625e4..0000000 --- a/motus/lib/persistence.mli +++ /dev/null @@ -1,24 +0,0 @@ -type t - -val words : t -> string list - -val list_size : t -> int -(** Number of words in the list *) - -val empty_data : unit -> t - -val extract_freq : t -> (char * int) list -(** Evaluate the score for each char (lower is better) *) - -val add_word : Criteria.t list -> t -> string -> t -(** Add a new word in the list. Check are made against the differents criteria in order to ensure that the word is valid *) - -val pick_next_word : t -> (char * int) list -> string * int -(** Get the word which with the most information in it. - -The information is the score given to each character, representing each -frequency in the whole word list (lower is better). If the same letter is -present many times, we consider that succeding letters does not give any more -informations (do not consider the position here) *) - -val remove_word : t -> string -> t diff --git a/motus/lib/validity.ml b/motus/lib/validity.ml new file mode 100644 index 0000000..ae85f04 --- /dev/null +++ b/motus/lib/validity.ml @@ -0,0 +1,98 @@ +open StdLabels + +(* Enclose the type definition into a warning removval in order to hide some + auto-generated values *) +[@@@warning "-32"] + +type t = + | Wellplaced + | Misplaced + | Missing +[@@deriving enum] + +[@@@warning "+32"] + +let m = Float.of_int (1 + max) + +(** Get the index of a validity result *) +let index_of_result : t array -> int = + fun elems -> + let _, value = + Array.fold_left elems ~init:(0., 0.) ~f:(fun (pos, acc) content -> + let v = Float.of_int (to_enum content) in + let acc' = acc +. (v *. (m ** pos)) in + (pos +. Float.one, acc') ) + in + Float.to_int value + + +let index_to_result : base:int -> int -> t array = + fun ~base n -> + let rec _f acc n i = + let next = Float.round (n /. m) + and rem = Float.(to_int @@ rem n m) in + + match (rem, i) with + | _, 0 -> Array.of_list acc + | n, _ -> + ( match of_enum n with + | None -> Array.of_list acc + | Some v -> _f (v :: acc) next (i - 1) ) + in + _f [] (Float.of_int n) base + + +(** Build a sequence of all the possible status for a given number of letters *) +let sequence : int -> t array Seq.t = + fun base -> + let max_element = Float.to_int @@ (m ** Float.of_int base) in + + Seq.unfold + (fun n -> + if n < max_element then Some (index_to_result ~base n, n + 1) else None ) + 0 + + +let compare_words : string -> ref:string -> t array option = + fun w1 ~ref -> + let l1 = String.length w1 in + if l1 <> String.length ref + then None + else + let result = + Array.init l1 ~f:(fun i -> + let c1 = String.get w1 i + and c2 = String.get ref i in + + let state = + if Char.equal c1 c2 + then Wellplaced + else if String.contains ref c1 + then Misplaced + else Missing + in + state ) + in + Some result + + +let to_criteria : char -> int -> t -> Criteria.t list -> Criteria.t list = + fun c i t acc -> + match t with + | Wellplaced -> Criteria.add (Criteria.Contain (c, Some i)) acc + | Missing -> Criteria.add (Criteria.NotContain (c, None)) acc + | Misplaced -> + Criteria.add + (Criteria.NotContain (c, Some i)) + (Criteria.add (Criteria.Contain (c, None)) acc) + + +let to_criterias : string -> t array -> Criteria.t list = + fun word t -> + let l, _ = + Array.fold_left t ~init:([], 0) ~f:(fun (acc, i) t -> + let acc = to_criteria (String.get word i) i t acc in + (acc, i + 1) ) + in + + List.rev l diff --git a/motus/lib/validity.mli b/motus/lib/validity.mli new file mode 100644 index 0000000..dfd876c --- /dev/null +++ b/motus/lib/validity.mli @@ -0,0 +1,20 @@ +type t = + | Wellplaced + | Misplaced + | Missing + +val sequence : int -> t array Seq.t +(** Build a sequence of all the possible status for a given number of letters *) + +val index_of_result : t array -> int +(** Get the index of a validity result *) + +val index_to_result : base:int -> int -> t array + +val compare_words : string -> ref:string -> t array option + +val to_criteria : char -> int -> t -> Criteria.t list -> Criteria.t list + +val to_criterias : string -> t array -> Criteria.t list +(** Convert the validity result into a Criteria list, in the context of a given + word *) diff --git a/motus/lib/wordlist.ml b/motus/lib/wordlist.ml new file mode 100644 index 0000000..45fc73a --- /dev/null +++ b/motus/lib/wordlist.ml @@ -0,0 +1,90 @@ +open StdLabels + +let () = Random.self_init () + +type t = + { number : int + ; element : string list + } + +let empty_data () = { number = 0; element = [] } + +let update_freq : (char, int) Hashtbl.t -> char -> unit = + fun freq c -> + match Hashtbl.find_opt freq c with + | None -> Hashtbl.add freq c 1 + | Some value -> Hashtbl.replace freq c (value + 1) + + +(** Evaluate the score for each char (lower is better) *) +let extract_freq : t -> (char * int) list = + fun data -> + let freq = Hashtbl.create 26 in + List.iter data.element ~f:(fun word -> + String.iter word ~f:(fun c -> update_freq freq c) ); + + let number_2 = data.number / 2 in + Hashtbl.fold (fun k v acc -> (k, abs (v - number_2)) :: acc) freq [] + (* Sort the list for a pretty printing *) + |> List.sort ~cmp:(fun v1 v2 -> snd v1 - snd v2) + + +let add_word : Criteria.t list -> t -> string -> t = + fun filters data word -> + match List.for_all ~f:(Criteria.check_filter word) filters with + | true -> { number = data.number + 1; element = word :: data.element } + | false -> data + + +(** Get the word which with the most information in it. + +The information is the score given to each character, representing each +frequency in the whole word list (lower is better). If the same letter is +present many times, we consider that succeding letters does not give any more +informations (do not consider the position here) *) +let pick_next_word : t -> (char * int) list -> string * int = + fun data scores -> + let p' : (string list * int) option -> string -> (string list * int) option = + fun prec word -> + (* evaluate the score for this word *) + let _, eval = + String.fold_left + ~f:(fun (scores, score) c -> + match List.assoc_opt c scores with + | None -> + (* if the character has no score associated, we consider that it + does not provide any more information, and give it the max + score available *) + (scores, score + (data.number / 2)) + | Some v -> + let new_scores = + List.filter ~f:(fun (c', _) -> not (Char.equal c c')) scores + in + (new_scores, score + v) ) + ~init:(scores, 0) + word + in + match prec with + | None -> Some ([ word ], eval) + | Some (_, prec_score) when eval < prec_score -> Some ([ word ], eval) + | Some (w, prec_score) when eval = prec_score -> Some (word :: w, eval) + | _ -> prec + in + match List.fold_left ~f:p' ~init:None data.element with + | None -> ("", 0) + | Some (words, score) -> + (* Pick a reandom word from the list *) + let elements = List.length words in + let number = Random.int elements in + (List.nth words number, score) + + +let remove_word : t -> string -> t = + fun t word -> + let element = List.filter ~f:(fun w -> not (String.equal w word)) t.element in + { element; number = t.number - 1 } + + +let words : t -> string list = fun { element; _ } -> element + +let list_size : t -> int = fun { number; _ } -> number diff --git a/motus/lib/wordlist.mli b/motus/lib/wordlist.mli new file mode 100644 index 0000000..766fbdf --- /dev/null +++ b/motus/lib/wordlist.mli @@ -0,0 +1,25 @@ +type t + +val words : t -> string list + +val list_size : t -> int +(** Number of words in the list *) + +val empty_data : unit -> t + +val add_word : Criteria.t list -> t -> string -> t +(** Add a new word in the list. Check are made against the differents criteria in order to ensure that the word is valid *) + +val remove_word : t -> string -> t +(** Remove a word from this list *) + +val extract_freq : t -> (char * int) list +(** Evaluate the score for each char (lower is better) *) + +val pick_next_word : t -> (char * int) list -> string * int +(** Get the word which with the most information in it. + +The information is the score given to each character, representing each +frequency in the whole word list (lower is better). If the same letter is +present many times, we consider that succeding letters does not give any more +informations (do not consider the position here) *) -- cgit v1.2.3