open StdLabels
module IntSet = ImportContainers.IntSet
module Syntax = ImportConf.Syntax
module Table = ImportDataTypes.Table
module Path = ImportDataTypes.Path
module Expression = ImportExpression.T

(*
   Internally, the dependency chain is represented as a graph.

   Each file to process (csv or xlsx) is marked as a node, The link between
   each node cannot be represented in the graph because each index can imply
   multiple tables together (for exemple [join(source.A, other.B)] ). So the
   graph is only used in order to get the import process order, and each
   information is keeped in a separate table.

*)

type deps = (ImportContainers.Source.t * ImportContainers.Source.t list) list

type key = {
  name : string;
  expression : Path.column Expression.t;
  columns : ImportContainers.IntSet.t Lazy.t;
}

type t = {
  table : Table.t;
  columns : IntSet.t;
  keys : key list;
}

let table t = t.table
let columns t = t.columns
let keys t = t.keys

type build_map = t ImportContainers.Externals.t

(* The expression can be qualified with full path (when we are in the column
   definition) or only with a column (in a binding with an externanl table).

   This type is here to extract the correct information, according to the type
   we are dealing with :

   - [to_mapping] allow to pick the right Set in which we need to add the
     column pointed by the path (this can be [keys] or [columns]
   - [of_path] is need to extract the qualified source from any kind of path.
*)
type 'a expression_extractor = {
  to_mapping : t -> Path.column -> t;
  of_path : 'a -> string option * Path.column;
}

(** [add_path_in_map f parent path ] Extract the column from element [path] and
    process the column in the function [f]

    The [path] is abstract, but the function [f.of_path] can extract the needed
    elements in order to add it in the mapping.

    The function may raise [Unknown_source] if the the path describe an unknown
    table. *)
let add_path_in_map :
    f:'a expression_extractor -> conf:Syntax.t -> 'a -> build_map -> build_map =
 fun ~f ~conf path map ->
  let table_source, column = f.of_path path in
  let table =
    try ImportConf.get_table_for_name conf table_source with
    | Not_found -> raise (ImportErrors.Unknown_source (Option.get table_source))
  in

  ImportContainers.Externals.update map
    ~key:(ImportContainers.KeyName.from_table table) ~f:(fun v ->
      let mapping =
        match v with
        | None -> raise (ImportErrors.Unknown_source table.name)
        | Some mapping -> mapping
      in

      Some (f.to_mapping mapping column))

let add_expression_in_map :
    f:'a expression_extractor ->
    conf:Syntax.t ->
    'a Expression.t ->
    build_map ->
    build_map =
 fun ~f ~conf expr map ->
  Expression.fold_values expr ~init:map ~f:(fun map p ->
      add_path_in_map ~f ~conf p map)

let add_columns_in_map :
    f:'a expression_extractor ->
    conf:Syntax.t ->
    'a Expression.t list ->
    build_map ->
    build_map =
 fun ~f ~conf columns map ->
  let columns =
    List.fold_left columns ~init:map ~f:(fun map expression ->
        let new_map = add_expression_in_map ~f ~conf expression map in
        new_map)
  in
  columns

(* [add_dependancies ~conf source map path]
   add the dependancy from the table [source] to another one after analysing the
   expression and extracting the path contained inside.

   This function is called for each path declared inside the expression. *)
let add_dependancies :
    conf:Syntax.t -> Syntax.Extern.t -> deps -> Path.t -> deps =
 fun ~conf extern graph path ->
  let source_table = ImportConf.get_table_for_name conf path.Path.alias in

  let source = ImportContainers.Source.from_table source_table in
  let target = ImportContainers.Source.from_table extern.target in

  match ImportContainers.Source.equal target source with
  | true -> graph
  | _ -> (target, [ source ]) :: graph

let add_external_in_map :
    conf:Syntax.t -> Syntax.Extern.t -> build_map * deps -> build_map * deps =
 fun ~conf extern (map, graph) ->
  let dest = ImportContainers.KeyName.from_table extern.target in
  (* Pre-check that every source is already declared in the configuration. *)
  let _ =
    Expression.fold_values extern.intern_key ~init:() ~f:(fun () path ->
        try
          let _ = ImportConf.get_table_for_name conf path.Path.alias in
          ()
        with
        | Not_found -> (
            match path.alias with
            | Some table -> raise (ImportErrors.Unknown_source table)
            | None ->
                (* This is very unlikely. This error would be raised if we have
                   no source for this import *)
                let root = conf.source in
                raise (ImportErrors.Unknown_source root.name)))
  in

  (* Create the new key with all the expression and all the columns inside it *)
  let new_key =
    {
      name = extern.target.Table.name;
      expression = extern.extern_key;
      columns =
        lazy
          (Expression.fold_values extern.extern_key
             ~f:(fun acc k -> ImportContainers.IntSet.add k acc)
             ~init:ImportContainers.IntSet.empty);
    }
  in
  let build_map =
    ImportContainers.Externals.update map ~key:dest ~f:(function
      | None ->
          (* Create the entry for the external if it does not exists *)
          Some
            {
              table = extern.target;
              columns = IntSet.empty;
              keys = [ new_key ];
            }
      | Some mapping ->
          (* Or update the existing one with the key we just created *)
          Some { mapping with keys = new_key :: mapping.keys })
  in
  let graph =
    Expression.fold_values extern.intern_key ~init:graph
      ~f:(add_dependancies ~conf extern)
  in
  let build_map =
    add_expression_in_map extern.intern_key build_map ~conf
      ~f:
        {
          of_path =
            (fun Path.{ alias; column } ->
              let table = ImportConf.get_table_for_name conf alias in
              (Some table.name, column));
          to_mapping =
            (fun mapping column ->
              { mapping with columns = IntSet.add column mapping.columns });
        }
  in
  (build_map, graph)

let mapper =
  {
    to_mapping =
      (fun mapping column ->
        { mapping with columns = IntSet.add column mapping.columns });
    of_path = (fun ({ alias; column } : Path.t) -> (alias, column));
  }

let get_mapping : Syntax.t -> build_map * deps =
 fun conf ->
  let root = ImportContainers.Source.from_table (ImportConf.root_table conf)
  and root' =
    ImportContainers.KeyName.from_table (ImportConf.root_table conf)
  in
  let graph = [ (root, []) ] in

  (* For each external declared in the configuration file, add the columns to
     query *)
  let init =
    ( ImportContainers.Externals.singleton root'
        {
          table = ImportConf.root_table conf;
          columns = IntSet.empty;
          keys = [];
        },
      graph )
  in
  let map, graph =
    List.fold_left conf.externals ~init ~f:(fun map extern ->
        add_external_in_map ~conf extern map)
  in

  (* Now we don’t bother anymore with the graph and it’s dependency, we just
     collect all the columns in the differents expressions *)
  let map =
    map
    |> add_columns_in_map ~conf ~f:mapper conf.columns
    |> add_columns_in_map ~conf ~f:mapper conf.sort
    |> add_columns_in_map ~conf ~f:mapper conf.filters
    |> add_columns_in_map ~conf ~f:mapper conf.uniq
  in
  (map, graph)

let get_process_order : Syntax.t -> t list =
 fun map ->
  let map, graph = get_mapping map in

  match Tsort.sort graph with
  | Tsort.ErrorCycle l ->
      let name_of_key k =
        ImportContainers.(Externals.find (Source.name k) map).table.name
      in
      raise (ImportErrors.Cycle (List.map ~f:name_of_key l))
  | Sorted elements ->
      (* It’s OK, we know there is no cycles now, we can extract the files to
         load from this list.
      *)
      List.filter_map elements ~f:(fun v ->
          ImportContainers.Externals.find_opt
            (ImportContainers.Source.name v)
            map)
      (* This list can still have duplicated values, and we have to remove them
         still keeping the order.
      *)
      |> List.fold_left ~init:[] ~f:(fun acc element ->
             (* Prevent the same file to beeing loaded twice *)
             match List.mem element ~set:acc with
             | true -> acc
             | false -> element :: acc)