aboutsummaryrefslogtreecommitdiff
path: root/motus/bin/motus.ml
blob: 823a01bec15c505284a195993813c3edd540e218 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
open Motus_lib

(** Represent a decision rule *)
let format_filter : Format.formatter -> Criteria.t -> unit =
 fun formatter f ->
  match f with
  | Lenght l -> Format.fprintf formatter "Doit etre de longueur %d" l
  | Contain (c, None) -> Format.fprintf formatter "Doit contenir un %c" c
  | Contain (c, Some i) ->
      Format.fprintf formatter "Doit contenir un %c a la position %d" c i
  | NotContain (c, None) ->
      Format.fprintf formatter "Ne doit pas contenir un %c" c
  | NotContain (c, Some i) ->
      Format.fprintf formatter "Ne doit pas contenir un %c a la position %d" c i


(** Display the informations about the structure *)
let show_structure : Format.formatter -> Wordlist.t -> Criteria.t list -> unit =
 fun format data filters ->
  Format.fprintf
    format
    "Filtres en cours : @[<v>%a@]@\n"
    (Format.pp_print_list format_filter)
    filters;

  Format.fprintf format "Got %d elements @\n" (Wordlist.list_size data);

  if Wordlist.list_size data < 20
  then
    Format.fprintf
      format
      "Remaining words @[<v>@;%a@]@\n"
      (Format.pp_print_list ~pp_sep:Format.pp_force_newline (fun f w ->
           Format.fprintf f "%s" w ) )
      (List.of_seq @@ Wordlist.words data);
  Format.close_box ()


(** Get the initial list *)
let get_list : in_channel -> Criteria.t list -> Wordlist.t =
 fun channel filters ->
  let read channel =
    try Some (String.lowercase_ascii (Stdlib.input_line channel), channel) with
    | End_of_file -> None
  in

  let s = Seq.unfold read channel in
  Wordlist.add_words filters s


(** Compare the proposed word and the result from the user in order to identify
    the future rules to apply *)
let create_new_rules word result =
  let rules = ref []
  and max_length = min (String.length word) (String.length result) in
  for i = 0 to max_length - 1 do
    match (String.get word i, String.get result i) with
    (* A space means that the letter is not present *)
    | c, ' ' -> rules := Criteria.(add (NotContain (c, None))) !rules
    (* The same letter means that the we found the right caracter *)
    | c, c' when Char.equal c c' ->
        rules := Criteria.(add (Contain (c, Some i)) !rules)
    (* Anything else, we got the letter, but at the wrong place *)
    | c, _ ->
        rules :=
          Criteria.(
            add (Contain (c, None)) (add (NotContain (c, Some i)) !rules))
  done;
  !rules


type t =
  { catalog : Wordlist.t
  ; words : Wordlist.t
  }

let rec run : int -> Criteria.t list -> t -> unit =
 fun len filters { catalog; words } ->
  let () = show_structure Format.std_formatter words filters in
  (*
  let next =
    Freq_analysis.analyse words |> Freq_analysis.pick_next_word words
  in
  *)
  let _, next = Entropy.analyse len ~catalog words in

  let () =
    Format.fprintf Format.std_formatter "Next word will be : %s@\n" next
  in

  let input = Stdlib.read_line () in

  (* if the input is empty, remove the word from the list and restart *)
  match String.equal String.empty input with
  | true ->
      let new_list =
        { catalog = Wordlist.remove_word catalog next
        ; words = Wordlist.remove_word words next
        }
      in
      run len filters new_list
  | false ->
      let new_rules =
        Criteria.merge_lists ~init:filters (create_new_rules next input)
        |> List.sort_uniq Stdlib.compare
      in
      let words = Wordlist.filter new_rules words in
      run len new_rules { catalog = Wordlist.remove_word catalog next; words }


let init_rule rules word =
  String.iteri
    (fun p c ->
      match c with
      | ' ' -> ()
      | _ -> rules := Criteria.Contain (c, Some p) :: !rules )
    word


let () =
  let length = ref 5
  and dict = ref [] in
  let rules = ref [] in

  let anon_fun filename = dict := filename :: !dict in
  let speclist =
    [ ("-n", Arg.Set_int length, "Nombre de lettres")
    ; ("-p", Arg.String (init_rule rules), "Motif initial")
    ]
  in
  let () = Arg.parse speclist anon_fun "motus [-n <nb>] dictionnaire" in

  let initial_filter = Criteria.Lenght !length :: !rules in
  let words_channel = open_in (List.hd !dict) in
  let words = get_list words_channel initial_filter in
  close_in words_channel;
  run !length initial_filter { catalog = words; words }