From 71286c4a0bbc0afd89622170a02908a9d978cc3e Mon Sep 17 00:00:00 2001 From: Sébastien Dailly Date: Sat, 27 Aug 2022 11:38:46 +0200 Subject: Update the entropy evaluation in the sutom application --- motus/lib/entropy.ml | 163 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 106 insertions(+), 57 deletions(-) diff --git a/motus/lib/entropy.ml b/motus/lib/entropy.ml index 5fcef6c..b192f16 100644 --- a/motus/lib/entropy.ml +++ b/motus/lib/entropy.ml @@ -9,53 +9,111 @@ let compare_words : string -> ref:string * CharSet.t -> Validity.t array option fun w1 ~ref -> let wordRef = fst ref in let l1 = String.length w1 in - if l1 <> String.length wordRef - then None + if l1 <> String.length wordRef then None else let result = Array.init l1 ~f:(fun i -> - let c1 = String.get w1 i - and c2 = String.get wordRef i in + let c1 = String.get w1 i and c2 = String.get wordRef i in let state = - if Char.equal c1 c2 - then Validity.Wellplaced - else if CharSet.mem c1 (snd ref) - then Validity.Misplaced + if Char.equal c1 c2 then Validity.Wellplaced + else if CharSet.mem c1 (snd ref) then Validity.Misplaced else Validity.Missing in - state ) + state) in Some result +(** The module provide the entropy evaluation *) +module E : sig + type t -let get_entropy max_element words_number arr = - let entropy = ref 0. in - for idx = 0 to max_element - 1 do - let content = Float.of_int (Bigarray.Array1.get arr idx) in - if content > 0. - then - let ratio = content /. words_number in - entropy := !entropy -. (ratio *. Float.log2 ratio) - done; - !entropy + val create : cardinal:int -> length:int -> t + (** Create a new evaluation for the entropy evaluation. + [cardinal] is the number of elements in the set + [length] is the number of criteria to check with *) + val reset : t -> unit + (** Reinitialize the state (we use the same in the successive + iterations) *) + + val copy : t -> t + (** Create a copy of the evaluation *) + + val add_element : t:t -> f:('a -> int Seq.t) -> 'a -> unit + (** Add an element in the evaluation. The function provided should give the + index list match by this element *) + + val get_entropy : t -> float + (** Get the entropy for the evaluation *) +end = struct + type t = { + arr : (int, Bigarray.int_elt, Bigarray.c_layout) Bigarray.Array1.t + ; cardinal : float + } + + let reset t = Bigarray.Array1.fill t.arr 0 + + let copy t = + let arr = + Bigarray.Array1.create Bigarray.Int Bigarray.C_layout + (Bigarray.Array1.dim t.arr) + in + Bigarray.Array1.blit t.arr arr; + { t with arr } + + let create : cardinal:int -> length:int -> t = + fun ~cardinal ~length -> + { + arr = Bigarray.Array1.create Bigarray.Int Bigarray.C_layout length + ; cardinal = Float.of_int cardinal + } + + (** [get_entropy] will evaluate the entropy of the values in an array. + + The function return the quantity of information in the state. + *) + let get_entropy : t -> float = + fun t -> + let entropy = ref 0. in + for idx = 0 to Bigarray.Array1.dim t.arr - 1 do + let content = Float.of_int (Bigarray.Array1.get t.arr idx) in + if content > 0. then + let ratio = content /. t.cardinal in + entropy := !entropy -. (ratio *. Float.log2 ratio) + done; + !entropy + + let add_element : t:t -> f:('a -> int Seq.t) -> 'a -> unit = + fun ~t ~f element -> + Seq.iter + (fun idx -> + let content = Bigarray.Array1.get t.arr idx in + Bigarray.Array1.set t.arr idx (succ content)) + (f element) +end + +(** Get the word and the highest score by picking the next word from the + list. + + [analyse ~catalog base words] will check each word from [words] and see if + it give a good score. This score is matched against each words from + [catalog] + + The [base] argument is the number of letters in each word. *) let analyse : int -> catalog:Wordlist.t -> Wordlist.t -> t = fun base ~catalog words -> (* If we have only two elements, just pick one of them *) - if Wordlist.list_size words <= 2 - then (0.5, Option.get @@ Wordlist.pick words) + if Wordlist.list_size words <= 2 then (0.5, Option.get @@ Wordlist.pick words) else - let words_number = Float.of_int (Wordlist.list_size catalog) in - (* Each result from the game is stored as an integer, and we create an array with as many elements as we have possibilities. *) let max_element = Float.to_int @@ (Validity.elements ** Float.of_int base) in let arr = - Bigarray.Array1.create Bigarray.Int Bigarray.C_layout max_element + E.create ~length:max_element ~cardinal:(Wordlist.list_size catalog) in match Wordlist.pick words with | None -> (0., "") @@ -65,45 +123,36 @@ let analyse : int -> catalog:Wordlist.t -> Wordlist.t -> t = (fun (score, word, already_picked) word_ref -> (* Reinitialize the array (we use the same in the successive iterations) *) - Bigarray.Array1.fill arr 0; + E.reset arr; let set_ref = String.to_seq word_ref |> CharSet.of_seq in - Seq.iter - (fun w2 -> - let result = compare_words ~ref:(word_ref, set_ref) w2 in - match result with - | None -> () - | Some r -> - let idx = Validity.index_of_result r in - - let content = Bigarray.Array1.get arr idx in - Bigarray.Array1.set arr idx (succ content) ) - (Wordlist.words words); - - let entropy = get_entropy max_element words_number arr in - - (* If get more information that we had, use this word. Choose this - word to if it belongs to the final list (it gives a small chance - to pick the right one…) *) + E.add_element ~t:arr set_ref ~f:(fun set_ref -> + Seq.filter_map + (fun w2 -> + let result = compare_words ~ref:(word_ref, set_ref) w2 in + Option.map Validity.index_of_result result) + (Wordlist.words words)); + let entropy = E.get_entropy arr in + + (* If we get more information that we had, use this word. + + If this word does not add more informations that we already + had, choose it anyway if it belongs to the final list (it gives + a small chance to pick the right one…) *) let is_better, already_picked = - if entropy > score - then (true, false) - else if (not already_picked) - && entropy == score - && Wordlist.mem word_ref words + if entropy > score then (true, false) + else if + (not already_picked) && entropy == score + && Wordlist.mem word_ref words then (true, true) else (false, already_picked) in - if is_better - then ( - Printf.printf - "Q. of information when selecting %s : %f\n" - word_ref - entropy; - (entropy, word_ref, already_picked) ) - else (score, word, already_picked) ) - (-0., v, false) - (Wordlist.words catalog) + if is_better then ( + Printf.printf "Q. of information when selecting %s : %f\n" + word_ref entropy; + (entropy, word_ref, already_picked)) + else (score, word, already_picked)) + (-0., v, false) (Wordlist.words catalog) in (score, word) -- cgit v1.2.3