aboutsummaryrefslogtreecommitdiff
path: root/motus/lib/wordlist.ml
blob: 45fc73aca0b5fad91be9f4840244472853194ab3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
open StdLabels

let () = Random.self_init ()

type t =
  { number : int
  ; element : string list
  }

let empty_data () = { number = 0; element = [] }

let update_freq : (char, int) Hashtbl.t -> char -> unit =
 fun freq c ->
  match Hashtbl.find_opt freq c with
  | None -> Hashtbl.add freq c 1
  | Some value -> Hashtbl.replace freq c (value + 1)


(** Evaluate the score for each char (lower is better) *)
let extract_freq : t -> (char * int) list =
 fun data ->
  let freq = Hashtbl.create 26 in
  List.iter data.element ~f:(fun word ->
      String.iter word ~f:(fun c -> update_freq freq c) );

  let number_2 = data.number / 2 in
  Hashtbl.fold (fun k v acc -> (k, abs (v - number_2)) :: acc) freq []
  (* Sort the list for a pretty printing *)
  |> List.sort ~cmp:(fun v1 v2 -> snd v1 - snd v2)


let add_word : Criteria.t list -> t -> string -> t =
 fun filters data word ->
  match List.for_all ~f:(Criteria.check_filter word) filters with
  | true -> { number = data.number + 1; element = word :: data.element }
  | false -> data


(** Get the word which with the most information in it.

The information is the score given to each character, representing each
frequency in the whole word list (lower is better). If the same letter is
present many times, we consider that succeding letters does not give any more
informations (do not consider the position here) *)
let pick_next_word : t -> (char * int) list -> string * int =
 fun data scores ->
  let p' : (string list * int) option -> string -> (string list * int) option =
   fun prec word ->
    (* evaluate the score for this word *)
    let _, eval =
      String.fold_left
        ~f:(fun (scores, score) c ->
          match List.assoc_opt c scores with
          | None ->
              (* if the character has no score associated, we consider that it
                 does not provide any more information, and give it the max
                 score available *)
              (scores, score + (data.number / 2))
          | Some v ->
              let new_scores =
                List.filter ~f:(fun (c', _) -> not (Char.equal c c')) scores
              in
              (new_scores, score + v) )
        ~init:(scores, 0)
        word
    in
    match prec with
    | None -> Some ([ word ], eval)
    | Some (_, prec_score) when eval < prec_score -> Some ([ word ], eval)
    | Some (w, prec_score) when eval = prec_score -> Some (word :: w, eval)
    | _ -> prec
  in
  match List.fold_left ~f:p' ~init:None data.element with
  | None -> ("", 0)
  | Some (words, score) ->
      (* Pick a reandom word from the list *)
      let elements = List.length words in
      let number = Random.int elements in
      (List.nth words number, score)


let remove_word : t -> string -> t =
 fun t word ->
  let element = List.filter ~f:(fun w -> not (String.equal w word)) t.element in
  { element; number = t.number - 1 }


let words : t -> string list = fun { element; _ } -> element

let list_size : t -> int = fun { number; _ } -> number