diff options
Diffstat (limited to 'motus/lib')
-rw-r--r-- | motus/lib/criteria.ml | 42 | ||||
-rw-r--r-- | motus/lib/dune | 7 | ||||
-rw-r--r-- | motus/lib/persistence.ml | 75 |
3 files changed, 124 insertions, 0 deletions
diff --git a/motus/lib/criteria.ml b/motus/lib/criteria.ml new file mode 100644 index 0000000..61bb539 --- /dev/null +++ b/motus/lib/criteria.ml @@ -0,0 +1,42 @@ +open StdLabels + +type t = + | Lenght of int + | Contain of char * int option + | NotContain of char * int option + +(** Return true if the word match the given filter *) +let check_filter : string -> t -> bool = + fun word f -> + match f with + | Lenght l -> l = String.length word + | Contain (c, pos) -> + ( match pos with + | None -> String.contains word c + | Some i -> Char.equal c (String.get word i) ) + | NotContain (c, pos) -> + ( match pos with + | None -> not (String.contains word c) + | Some i -> not (Char.equal c (String.get word i)) ) + + +let is_valid : t -> t -> bool = + fun t1 t2 -> + match (t1, t2) with + | Lenght _, Lenght _ -> false + | Contain (c1, _), NotContain (c2, None) -> not (Char.equal c1 c2) + | NotContain (c1, None), Contain (c2, _) -> not (Char.equal c1 c2) + | Contain (c1, Some i1), Contain (c2, Some i2) -> Char.equal c1 c2 || i1 <> i2 + | _ -> true + + +(** Add a new filter in the list if it is compatible with the existing ones *) +let add : t -> t list -> t list = + fun t filters -> + match List.for_all ~f:(is_valid t) filters with + | true -> t :: filters + | false -> filters + + +let merge_lists : init:t list -> t list -> t list = + fun ~init news -> List.fold_left ~f:(fun acc t -> add t acc) ~init news diff --git a/motus/lib/dune b/motus/lib/dune new file mode 100644 index 0000000..b958922 --- /dev/null +++ b/motus/lib/dune @@ -0,0 +1,7 @@ +(library + (name + motus_lib + ) + ) + + diff --git a/motus/lib/persistence.ml b/motus/lib/persistence.ml new file mode 100644 index 0000000..1bc20ad --- /dev/null +++ b/motus/lib/persistence.ml @@ -0,0 +1,75 @@ +type t = + { number : int + ; element : string list + ; freq : (char, int) Hashtbl.t + } + +let empty_data () = { number = 0; element = []; freq = Hashtbl.create 26 } + +(** Evaluate the score for each char (lower is better) *) +let extract_freq : t -> (char * int) list = + fun data -> + let number_2 = data.number / 2 in + Hashtbl.fold (fun k v acc -> (k, abs (v - number_2)) :: acc) data.freq [] + (* Sort the list for a pretty printing *) + |> List.sort (fun v1 v2 -> snd v1 - snd v2) + + +let update_freq : (char, int) Hashtbl.t -> char -> unit = + fun freq c -> + match Hashtbl.find_opt freq c with + | None -> Hashtbl.add freq c 1 + | Some value -> Hashtbl.replace freq c (value + 1) + + +let add_word : Criteria.t list -> t -> string -> t = + fun filters data word -> + match List.for_all (Criteria.check_filter word) filters with + | true -> + let len = String.length word in + let chars = + List.init len (fun n -> String.get word n) + |> List.sort_uniq Char.compare + in + + List.iter (update_freq data.freq) chars; + { data with number = data.number + 1; element = word :: data.element } + | false -> data + + +(** Get the word which with the most information in it. + +The information is the score given to each character, representing each +frequency in the whole word list (lower is better). If the same letter is +present many times, we consider that succeding letters does not give any more +informations (do not consider the position here) *) +let pick_next_word : t -> (char * int) list -> string * int = + fun data scores -> + let p' : (string * int) option -> string -> (string * int) option = + fun prec word -> + (* evaluate the score for this word *) + let _, eval = + String.fold_left + (fun (scores, score) c -> + match List.assoc_opt c scores with + | None -> + (* if the character has no score associated, we consider that it + does not provide any more information, and give it the max + score available *) + (scores, score + (data.number / 2)) + | Some v -> + let new_scores = + List.filter (fun (c', _) -> not (Char.equal c c')) scores + in + (new_scores, score + v) ) + (scores, 0) + word + in + match prec with + | None -> Some (word, eval) + | Some (_, prec_score) when eval < prec_score -> Some (word, eval) + | _ -> prec + in + match List.fold_left p' None data.element with + | None -> ("", 0) + | Some r -> r |