open StdLabels module IntSet = ImportContainers.IntSet module Syntax = ImportConf.Syntax module Table = ImportDataTypes.Table module Path = ImportDataTypes.Path module Expression = ImportExpression.T (* Internally, the dependency chain is represented as a graph. Each file to process (csv or xlsx) is marked as a node, The link between each node cannot be represented in the graph because each index can imply multiple tables together (for exemple [join(source.A, other.B)] ). So the graph is only used in order to get the import process order, and each information is keeped in a separate table. *) type deps = (ImportContainers.Source.t * ImportContainers.Source.t list) list type key = { name : string; expression : Path.column Expression.t; columns : ImportContainers.IntSet.t Lazy.t; } type t = { table : Table.t; columns : IntSet.t; keys : key list; } let table t = t.table let columns t = t.columns let keys t = t.keys type build_map = t ImportContainers.Externals.t (* The expression can be qualified with full path (when we are in the column definition) or only with a column (in a binding with an externanl table). This type is here to extract the correct information, according to the type we are dealing with : - [to_mapping] allow to pick the right Set in which we need to add the column pointed by the path (this can be [keys] or [columns] - [of_path] is need to extract the qualified source from any kind of path. *) type 'a expression_extractor = { to_mapping : t -> Path.column -> t; of_path : 'a -> string option * Path.column; } (** [add_path_in_map f parent path ] Extract the column from element [path] and process the column in the function [f] The [path] is abstract, but the function [f.of_path] can extract the needed elements in order to add it in the mapping. The function may raise [Unknown_source] if the the path describe an unknown table. *) let add_path_in_map : f:'a expression_extractor -> conf:Syntax.t -> 'a -> build_map -> build_map = fun ~f ~conf path map -> let table_source, column = f.of_path path in let table = try ImportConf.get_table_for_name conf table_source with | Not_found -> raise (ImportErrors.Unknown_source (Option.get table_source)) in ImportContainers.Externals.update map ~key:(ImportContainers.KeyName.from_table table) ~f:(fun v -> let mapping = match v with | None -> raise (ImportErrors.Unknown_source table.name) | Some mapping -> mapping in Some (f.to_mapping mapping column)) let add_expression_in_map : f:'a expression_extractor -> conf:Syntax.t -> 'a Expression.t -> build_map -> build_map = fun ~f ~conf expr map -> Expression.fold_values expr ~init:map ~f:(fun map p -> add_path_in_map ~f ~conf p map) let add_columns_in_map : f:'a expression_extractor -> conf:Syntax.t -> 'a Expression.t list -> build_map -> build_map = fun ~f ~conf columns map -> let columns = List.fold_left columns ~init:map ~f:(fun map expression -> let new_map = add_expression_in_map ~f ~conf expression map in new_map) in columns (* [add_dependancies ~conf source map path] add the dependancy from the table [source] to another one after analysing the expression and extracting the path contained inside. This function is called for each path declared inside the expression. *) let add_dependancies : conf:Syntax.t -> Syntax.extern -> deps -> Path.t -> deps = fun ~conf extern graph path -> let source_table = ImportConf.get_table_for_name conf path.Path.alias in let source = ImportContainers.Source.from_table source_table in let target = ImportContainers.Source.from_table extern.target in match ImportContainers.Source.equal target source with | true -> graph | _ -> (target, [ source ]) :: graph let add_external_in_map : conf:Syntax.t -> Syntax.extern -> build_map * deps -> build_map * deps = fun ~conf extern (map, graph) -> let dest = ImportContainers.KeyName.from_table extern.target in (* Pre-check that every source is already declared in the configuration. *) let _ = Expression.fold_values extern.intern_key ~init:() ~f:(fun () path -> try let _ = ImportConf.get_table_for_name conf path.Path.alias in () with | Not_found -> ( match path.alias with | Some table -> raise (ImportErrors.Unknown_source table) | None -> (* This is very unlikely. This error would be raised if we have no source for this import *) let root = conf.source in raise (ImportErrors.Unknown_source root.name))) in (* Create the new key with all the expression and all the columns inside it *) let new_key = { name = extern.target.Table.name; expression = extern.extern_key; columns = lazy (Expression.fold_values extern.extern_key ~f:(fun acc k -> ImportContainers.IntSet.add k acc) ~init:ImportContainers.IntSet.empty); } in let build_map = ImportContainers.Externals.update map ~key:dest ~f:(function | None -> (* Create the entry for the external if it does not exists *) Some { table = extern.target; columns = IntSet.empty; keys = [ new_key ]; } | Some mapping -> (* Or update the existing one with the key we just created *) Some { mapping with keys = new_key :: mapping.keys }) in let graph = Expression.fold_values extern.intern_key ~init:graph ~f:(add_dependancies ~conf extern) in let build_map = add_expression_in_map extern.intern_key build_map ~conf ~f: { of_path = (fun Path.{ alias; column } -> let table = ImportConf.get_table_for_name conf alias in (Some table.name, column)); to_mapping = (fun mapping column -> { mapping with columns = IntSet.add column mapping.columns }); } in (build_map, graph) let mapper = { to_mapping = (fun mapping column -> { mapping with columns = IntSet.add column mapping.columns }); of_path = (fun ({ alias; column } : Path.t) -> (alias, column)); } let get_mapping : Syntax.t -> build_map * deps = fun conf -> let root = ImportContainers.Source.from_table (ImportConf.root_table conf) and root' = ImportContainers.KeyName.from_table (ImportConf.root_table conf) in let graph = [ (root, []) ] in (* For each external declared in the configuration file, add the columns to query *) let init = ( ImportContainers.Externals.singleton root' { table = ImportConf.root_table conf; columns = IntSet.empty; keys = []; }, graph ) in let map, graph = List.fold_left conf.externals ~init ~f:(fun map extern -> add_external_in_map ~conf extern map) in (* Now we don’t bother anymore with the graph and it’s dependency, we just collect all the columns in the differents expressions *) let map = map |> add_columns_in_map ~conf ~f:mapper conf.columns |> add_columns_in_map ~conf ~f:mapper conf.sort |> add_columns_in_map ~conf ~f:mapper conf.filters |> add_columns_in_map ~conf ~f:mapper conf.uniq in (map, graph) let get_process_order : Syntax.t -> t list = fun map -> let map, graph = get_mapping map in match Tsort.sort graph with | Tsort.ErrorCycle l -> let name_of_key k = ImportContainers.(Externals.find (Source.name k) map).table.name in raise (ImportErrors.Cycle (List.map ~f:name_of_key l)) | Sorted elements -> (* It’s OK, we know there is no cycles now, we can extract the files to load from this list. *) List.filter_map elements ~f:(fun v -> ImportContainers.Externals.find_opt (ImportContainers.Source.name v) map) (* This list can still have duplicated values, and we have to remove them still keeping the order. *) |> List.fold_left ~init:[] ~f:(fun acc element -> (* Prevent the same file to beeing loaded twice *) match List.mem element ~set:acc with | true -> acc | false -> element :: acc)