aboutsummaryrefslogtreecommitdiff
path: root/lib/analysers
diff options
context:
space:
mode:
authorSébastien Dailly <sebastien@dailly.me>2024-03-14 08:26:58 +0100
committerSébastien Dailly <sebastien@dailly.me>2024-03-14 08:26:58 +0100
commit6b377719c10d5ab3343fd5221f99a4a21008e25a (patch)
treea7c1e9a820d339a2f161af3e09cf9e3161286796 /lib/analysers
Initial commitmain
Diffstat (limited to 'lib/analysers')
-rw-r--r--lib/analysers/dependency.ml256
-rw-r--r--lib/analysers/dependency.mli40
-rwxr-xr-xlib/analysers/dune12
-rw-r--r--lib/analysers/headers.ml55
-rw-r--r--lib/analysers/headers.mli11
-rw-r--r--lib/analysers/query.ml445
-rw-r--r--lib/analysers/query.mli27
7 files changed, 846 insertions, 0 deletions
diff --git a/lib/analysers/dependency.ml b/lib/analysers/dependency.ml
new file mode 100644
index 0000000..e81cc49
--- /dev/null
+++ b/lib/analysers/dependency.ml
@@ -0,0 +1,256 @@
+open StdLabels
+module IntSet = ImportContainers.IntSet
+module Syntax = ImportConf.Syntax
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+module Expression = ImportExpression.T
+
+(*
+ Internally, the dependency chain is represented as a graph.
+
+ Each file to process (csv or xlsx) is marked as a node, The link between
+ each node cannot be represented in the graph because each index can imply
+ multiple tables together (for exemple [join(source.A, other.B)] ). So the
+ graph is only used in order to get the import process order, and each
+ information is keeped in a separate table.
+
+*)
+
+type deps = (ImportContainers.Source.t * ImportContainers.Source.t list) list
+
+type key = {
+ name : string;
+ expression : Path.column Expression.t;
+ columns : ImportContainers.IntSet.t Lazy.t;
+}
+
+type t = {
+ table : Table.t;
+ columns : IntSet.t;
+ keys : key list;
+}
+
+let table t = t.table
+let columns t = t.columns
+let keys t = t.keys
+
+type build_map = t ImportContainers.Externals.t
+
+(* The expression can be qualified with full path (when we are in the column
+ definition) or only with a column (in a binding with an externanl table).
+
+ This type is here to extract the correct information, according to the type
+ we are dealing with :
+
+ - [to_mapping] allow to pick the right Set in which we need to add the
+ column pointed by the path (this can be [keys] or [columns]
+ - [of_path] is need to extract the qualified source from any kind of path.
+*)
+type 'a expression_extractor = {
+ to_mapping : t -> Path.column -> t;
+ of_path : 'a -> string option * Path.column;
+}
+
+(** [add_path_in_map f parent path ] Extract the column from element [path] and
+ process the column in the function [f]
+
+ The [path] is abstract, but the function [f.of_path] can extract the needed
+ elements in order to add it in the mapping.
+
+ The function may raise [Unknown_source] if the the path describe an unknown
+ table. *)
+let add_path_in_map :
+ f:'a expression_extractor -> conf:Syntax.t -> 'a -> build_map -> build_map =
+ fun ~f ~conf path map ->
+ let table_source, column = f.of_path path in
+ let table =
+ try ImportConf.get_table_for_name conf table_source with
+ | Not_found -> raise (ImportErrors.Unknown_source (Option.get table_source))
+ in
+
+ ImportContainers.Externals.update map
+ ~key:(ImportContainers.KeyName.from_table table) ~f:(fun v ->
+ let mapping =
+ match v with
+ | None -> raise (ImportErrors.Unknown_source table.name)
+ | Some mapping -> mapping
+ in
+
+ Some (f.to_mapping mapping column))
+
+let add_expression_in_map :
+ f:'a expression_extractor ->
+ conf:Syntax.t ->
+ 'a Expression.t ->
+ build_map ->
+ build_map =
+ fun ~f ~conf expr map ->
+ Expression.fold_values expr ~init:map ~f:(fun map p ->
+ add_path_in_map ~f ~conf p map)
+
+let add_columns_in_map :
+ f:'a expression_extractor ->
+ conf:Syntax.t ->
+ 'a Expression.t list ->
+ build_map ->
+ build_map =
+ fun ~f ~conf columns map ->
+ let columns =
+ List.fold_left columns ~init:map ~f:(fun map expression ->
+ let new_map = add_expression_in_map ~f ~conf expression map in
+ new_map)
+ in
+ columns
+
+(* [add_dependancies ~conf source map path]
+ add the dependancy from the table [source] to another one after analysing the
+ expression and extracting the path contained inside.
+
+ This function is called for each path declared inside the expression. *)
+let add_dependancies : conf:Syntax.t -> Syntax.extern -> deps -> Path.t -> deps
+ =
+ fun ~conf extern graph path ->
+ let source_table = ImportConf.get_table_for_name conf path.Path.alias in
+
+ let source = ImportContainers.Source.from_table source_table in
+ let target = ImportContainers.Source.from_table extern.target in
+
+ match ImportContainers.Source.equal target source with
+ | true -> graph
+ | _ -> (target, [ source ]) :: graph
+
+let add_external_in_map :
+ conf:Syntax.t -> Syntax.extern -> build_map * deps -> build_map * deps =
+ fun ~conf extern (map, graph) ->
+ let dest = ImportContainers.KeyName.from_table extern.target in
+ (* Pre-check that every source is already declared in the configuration. *)
+ let _ =
+ Expression.fold_values extern.intern_key ~init:() ~f:(fun () path ->
+ try
+ let _ = ImportConf.get_table_for_name conf path.Path.alias in
+ ()
+ with
+ | Not_found -> (
+ match path.alias with
+ | Some table -> raise (ImportErrors.Unknown_source table)
+ | None ->
+ (* This is very unlikely. This error would be raised if we have
+ no source for this import *)
+ let root = conf.source in
+ raise (ImportErrors.Unknown_source root.name)))
+ in
+
+ (* Create the new key with all the expression and all the columns inside it *)
+ let new_key =
+ {
+ name = extern.target.Table.name;
+ expression = extern.extern_key;
+ columns =
+ lazy
+ (Expression.fold_values extern.extern_key
+ ~f:(fun acc k -> ImportContainers.IntSet.add k acc)
+ ~init:ImportContainers.IntSet.empty);
+ }
+ in
+ let build_map =
+ ImportContainers.Externals.update map ~key:dest ~f:(function
+ | None ->
+ (* Create the entry for the external if it does not exists *)
+ Some
+ {
+ table = extern.target;
+ columns = IntSet.empty;
+ keys = [ new_key ];
+ }
+ | Some mapping ->
+ (* Or update the existing one with the key we just created *)
+ Some { mapping with keys = new_key :: mapping.keys })
+ in
+ let graph =
+ Expression.fold_values extern.intern_key ~init:graph
+ ~f:(add_dependancies ~conf extern)
+ in
+ let build_map =
+ add_expression_in_map extern.intern_key build_map ~conf
+ ~f:
+ {
+ of_path =
+ (fun Path.{ alias; column } ->
+ let table = ImportConf.get_table_for_name conf alias in
+ (Some table.name, column));
+ to_mapping =
+ (fun mapping column ->
+ { mapping with columns = IntSet.add column mapping.columns });
+ }
+ in
+ (build_map, graph)
+
+let mapper =
+ {
+ to_mapping =
+ (fun mapping column ->
+ { mapping with columns = IntSet.add column mapping.columns });
+ of_path = (fun ({ alias; column } : Path.t) -> (alias, column));
+ }
+
+let get_mapping : Syntax.t -> build_map * deps =
+ fun conf ->
+ let root = ImportContainers.Source.from_table (ImportConf.root_table conf)
+ and root' =
+ ImportContainers.KeyName.from_table (ImportConf.root_table conf)
+ in
+ let graph = [ (root, []) ] in
+
+ (* For each external declared in the configuration file, add the columns to
+ query *)
+ let init =
+ ( ImportContainers.Externals.singleton root'
+ {
+ table = ImportConf.root_table conf;
+ columns = IntSet.empty;
+ keys = [];
+ },
+ graph )
+ in
+ let map, graph =
+ List.fold_left conf.externals ~init ~f:(fun map extern ->
+ add_external_in_map ~conf extern map)
+ in
+
+ (* Now we don’t bother anymore with the graph and it’s dependency, we just
+ collect all the columns in the differents expressions *)
+ let map =
+ map
+ |> add_columns_in_map ~conf ~f:mapper conf.columns
+ |> add_columns_in_map ~conf ~f:mapper conf.sort
+ |> add_columns_in_map ~conf ~f:mapper conf.filters
+ |> add_columns_in_map ~conf ~f:mapper conf.uniq
+ in
+ (map, graph)
+
+let get_process_order : Syntax.t -> t list =
+ fun map ->
+ let map, graph = get_mapping map in
+
+ match Tsort.sort graph with
+ | Tsort.ErrorCycle l ->
+ let name_of_key k =
+ ImportContainers.(Externals.find (Source.name k) map).table.name
+ in
+ raise (ImportErrors.Cycle (List.map ~f:name_of_key l))
+ | Sorted elements ->
+ (* It’s OK, we know there is no cycles now, we can extract the files to
+ load from this list.
+ *)
+ List.filter_map elements ~f:(fun v ->
+ ImportContainers.Externals.find_opt
+ (ImportContainers.Source.name v)
+ map)
+ (* This list can still have duplicated values, and we have to remove them
+ still keeping the order.
+ *)
+ |> List.fold_left ~init:[] ~f:(fun acc element ->
+ (* Prevent the same file to beeing loaded twice *)
+ match List.mem element ~set:acc with
+ | true -> acc
+ | false -> element :: acc)
diff --git a/lib/analysers/dependency.mli b/lib/analysers/dependency.mli
new file mode 100644
index 0000000..c89522a
--- /dev/null
+++ b/lib/analysers/dependency.mli
@@ -0,0 +1,40 @@
+type t
+
+val get_process_order : ImportConf.Syntax.t -> t list
+(** Extract the file list to process, following the identified dependancies.
+ Try to load first the document which does not required another spreadsheet,
+ and keep going in the topological order
+
+ Raise [Unknown_source file] if a source is referenced but is not declared.
+ The order matter : the exception will be raised in a source is referenced
+ before the declaration (when chaining differents externals) *)
+
+val table : t -> ImportDataTypes.Table.t
+(** Get the table to proceed. *)
+
+val columns : t -> ImportContainers.IntSet.t
+(** A set of columns loaded in this table. Thoses columns may not need
+ reported in the final export.
+
+ Each column is identified by is index in the Excel document.
+
+ This set does not include the columns used in the keys. They can be
+ fetched with the [keys] function *)
+
+type key = {
+ name : string; (** This is the name of the target table we are pointed to *)
+ expression : ImportDataTypes.Path.column ImportExpression.T.t;
+ (** The expression used as key *)
+ columns : ImportContainers.IntSet.t Lazy.t;
+ (** The list of columns used in the key. All the columns are referenced
+ in the expression. We can have many columns used inside a single
+ key when a function is used (for example for joining multiple
+ columns into a single key) *)
+}
+(** This type describe the join key in a table. The name is the refering table
+ using this key (the key name in the datable is key_"name" ), and the
+ expression describe how to build the key. *)
+
+val keys : t -> key list
+(** [keys] is the list of columns pointed by another one. They are
+ considered as join key between the diffrent tables. *)
diff --git a/lib/analysers/dune b/lib/analysers/dune
new file mode 100755
index 0000000..1bbc30f
--- /dev/null
+++ b/lib/analysers/dune
@@ -0,0 +1,12 @@
+(library
+ (name importAnalyser)
+ (libraries
+ importConf
+ importContainers
+ importCSV
+ importDataTypes
+ importExpression
+ importErrors
+ tsort
+ )
+)
diff --git a/lib/analysers/headers.ml b/lib/analysers/headers.ml
new file mode 100644
index 0000000..916dfee
--- /dev/null
+++ b/lib/analysers/headers.ml
@@ -0,0 +1,55 @@
+open StdLabels
+module I = ImportConf
+module E = ImportExpression.T
+module Syntax = ImportConf.Syntax
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+
+module SheeetMap = Map.Make (struct
+ type t = Table.t
+
+ (** We are sure we can’t have the same name for two different table. *)
+ let compare v1 v2 = String.compare (Table.name v1) (Table.name v2)
+end)
+
+type content = string array
+
+type t = content SheeetMap.t
+(** The map associate a line of headers for each table.
+
+ The header are always in string. *)
+
+(** Get the headers. The function has to be called after reading each document,
+ and will reformat the first line with the values from the cell. The
+ functions will not be evaluated (instead they will be displayed "as is".
+
+ When there is no value for this path, return empty string.
+ *)
+let columns : Syntax.t -> t -> string list =
+ fun conf t ->
+ (* We build here a custom printer which search in the array for the column
+ name.
+
+ This function will be given as argument in the expression printer. *)
+ let f : Path.t -> Buffer.t -> unit =
+ fun path b ->
+ let source = I.get_table_for_name conf path.alias in
+
+ match SheeetMap.find_opt source t with
+ | None -> ()
+ | Some arr -> (
+ try Buffer.add_string b (Array.get arr (path.column - 1)) with
+ | _ ->
+ prerr_endline
+ @@ Printf.sprintf "No header found for :%s.%s"
+ (Option.value ~default:(I.root_table conf).Table.name
+ path.alias)
+ (ImportCSV.Csv.column_to_string path.column))
+ in
+
+ List.map conf.Syntax.columns ~f:(fun column ->
+ let b = Buffer.create 4 in
+
+ ImportExpression.Headers.headers_of_expression b f column;
+
+ Buffer.contents b)
diff --git a/lib/analysers/headers.mli b/lib/analysers/headers.mli
new file mode 100644
index 0000000..03e384b
--- /dev/null
+++ b/lib/analysers/headers.mli
@@ -0,0 +1,11 @@
+module SheeetMap : Map.S with type key = ImportDataTypes.Table.t
+
+type t = string array SheeetMap.t
+
+val columns : ImportConf.Syntax.t -> t -> string list
+(** Get the headers. The function has to be called after reading each document,
+ and will reformat the first line with the values from the cell. The
+ functions will not be evaluated (instead they will be displayed "as is".
+
+ When there is no value for this path, return empty string.
+ *)
diff --git a/lib/analysers/query.ml b/lib/analysers/query.ml
new file mode 100644
index 0000000..7a6dd2a
--- /dev/null
+++ b/lib/analysers/query.ml
@@ -0,0 +1,445 @@
+open StdLabels
+module Expression = ImportExpression
+module Q = Expression.Query
+module Syntax = ImportConf.Syntax
+module Table = ImportConf.Table
+module Path = ImportConf.Path
+
+let truncate buffer n = Buffer.truncate buffer (Buffer.length buffer - n)
+
+(** The module allow to create fragment in the query which keep together the
+ binderd parameters and the text of the query.contents.
+
+ This is used a lot in order to create the CTE, where you need the create
+ fragment used both in the main request and partially in the CTE itself.
+
+ The content is mutable and all the functions are returning [unit]. *)
+module Chunk = struct
+ type t = {
+ b : Buffer.t;
+ parameters : ImportCSV.DataType.t Queue.t;
+ }
+
+ let create : unit -> t =
+ fun () -> { b = Buffer.create 16; parameters = Queue.create () }
+
+ let create' : Buffer.t -> ImportCSV.DataType.t Queue.t -> t =
+ fun b parameters -> { b; parameters }
+
+ (* Append the element from [tail] at the end of [head]
+
+ Tail is destroyed during the operation.
+ *)
+ let append : head:t -> tail:t -> unit =
+ fun ~head ~tail ->
+ match Buffer.length tail.b with
+ | 0 -> ()
+ | _ ->
+ Buffer.add_buffer head.b tail.b;
+ Queue.transfer tail.parameters head.parameters;
+ ()
+
+ (** Add a litteral string in the sequence *)
+ let add_string : t -> string -> unit = fun t v -> Buffer.add_string t.b v
+
+ let copy : t -> t =
+ fun t ->
+ let b = Buffer.create 16 and parameters = Queue.copy t.parameters in
+ Buffer.add_buffer b t.b;
+ { b; parameters }
+
+ let add_parameters : t -> ImportCSV.DataType.t Seq.t -> unit =
+ fun t p -> Queue.add_seq t.parameters p
+end
+
+let prepare_key : f:(Format.formatter -> unit) -> Format.formatter -> unit =
+ fun ~f formatter -> Format.fprintf formatter "rtrim(upper(%t))" f
+
+(* Collect all the tables pointed by the expression. *)
+let pointed_tables : Syntax.t -> 'a Expression.T.t -> (Table.t * string) list =
+ fun conf expression ->
+ Expression.T.fold_values expression ~init:[] ~f:(fun acc path ->
+ let table = ImportConf.get_table_for_name conf path.Path.alias in
+ let table_name = Table.name table in
+ (table, table_name) :: acc)
+ |> List.sort_uniq ~cmp:Stdlib.compare
+
+(** Represent a column in a safe way in a query *)
+let print_column : Table.t -> string -> string =
+ fun table column ->
+ String.concat ~sep:"" [ "'"; table.Table.name; "'.'"; column; "'" ]
+
+let create_table : Dependency.t -> string =
+ fun mapping ->
+ let b = Buffer.create 64 in
+
+ Buffer.add_string b "CREATE TABLE '";
+ Buffer.add_string b (Table.name (Dependency.table mapping));
+ Buffer.add_string b "' (id INTEGER PRIMARY KEY";
+
+ List.iter (Dependency.keys mapping) ~f:(fun { Dependency.name; _ } ->
+ Buffer.add_string b ",'key_";
+ Buffer.add_string b name;
+ Buffer.add_string b "'");
+
+ ImportContainers.IntSet.iter (Dependency.columns mapping) ~f:(fun i ->
+ Buffer.add_string b ",'col_";
+ Buffer.add_string b (string_of_int i);
+ Buffer.add_string b "'");
+ Buffer.add_string b ")";
+
+ Buffer.contents b
+
+let show_path : conf:Syntax.t -> Format.formatter -> Path.t -> unit =
+ fun ~conf buffer { alias; column } ->
+ let table = ImportConf.get_table_for_name conf alias in
+ let table_name = table.Table.name in
+ Format.fprintf buffer "'%s'.col_%d" table_name column
+
+(** Extract the informations from the dependancies. We get two informations here :
+
+ - the join query in order to load the data from the external column
+ - the column corresponding to the key in order to identify the missing
+ links later.
+ *)
+let query_of_external :
+ conf:Syntax.t -> join_buffer:Chunk.t -> Syntax.extern -> unit =
+ fun ~conf ~join_buffer external_ ->
+ let extern_table = Table.name external_.target in
+
+ let formatter = Format.formatter_of_buffer join_buffer.b in
+ Format.fprintf formatter "\nLEFT JOIN '%s' AS '%s' ON %t = %s" extern_table
+ external_.target.name
+ (prepare_key ~f:(fun f ->
+ let q =
+ Q.query_of_expression Q.BindParam f (show_path ~conf)
+ external_.intern_key
+ in
+
+ Chunk.add_parameters join_buffer (Queue.to_seq q)))
+ (print_column external_.Syntax.target
+ ("key_" ^ external_.Syntax.target.name));
+
+ Format.pp_print_flush formatter ()
+
+(** Create the from part of the query, adding all the reuired externals.
+
+ SQLite is able to optimize the query and do not load the table not used in
+ the select clause. *)
+let create_from_chunck : Syntax.t -> Chunk.t -> unit =
+ fun conf c ->
+ Chunk.add_string c "\nFROM '";
+ Chunk.add_string c (Table.name conf.source);
+ Chunk.add_string c "' AS '";
+ Chunk.add_string c conf.source.name;
+ Chunk.add_string c "'";
+
+ (* Add the externals in the query *)
+ List.iter conf.externals ~f:(query_of_external ~conf ~join_buffer:c)
+
+(** Build a CTE query in order to use any group function inside the query.
+ Return the binded parameters used in the expression. The buffer given in
+ argument is also modified during the construction.
+
+ If filters is not None, the clauses are added to the CTE. *)
+let build_cte :
+ Syntax.t ->
+ expression:'a Expression.T.t ->
+ filters:Chunk.t option ->
+ Chunk.t =
+ fun conf ~expression ~filters ->
+ (* The binded parameters queue will be used later in the full query *)
+ let cte_chunk = Chunk.create () in
+
+ Chunk.add_string cte_chunk "WITH cte AS (SELECT ";
+ Chunk.add_string cte_chunk conf.source.name;
+ Chunk.add_string cte_chunk ".id, ";
+
+ let formatter = Format.formatter_of_buffer cte_chunk.b in
+
+ let p =
+ Q.query_of_expression Q.BindParam formatter (show_path ~conf) expression
+ in
+ Format.pp_print_flush formatter ();
+ Chunk.add_parameters cte_chunk (Queue.to_seq p);
+ (* The name is hardcoded here, and used in [Expression.Filters.window] *)
+ Chunk.add_string cte_chunk " AS group0";
+
+ let () = create_from_chunck conf cte_chunk in
+ let () =
+ match filters with
+ | None -> ()
+ | Some filters_chunk ->
+ Chunk.append ~head:cte_chunk ~tail:(Chunk.copy filters_chunk)
+ in
+
+ Chunk.add_string cte_chunk ")\n";
+ cte_chunk
+
+type filter_evaluation = {
+ content : Buffer.t;
+ parameters : ImportCSV.DataType.t Seq.t;
+ cte : (string * Chunk.t) option;
+}
+(** Build the filters to apply in the query. We make the difference here
+ between the predicates to apply directly in the query, and the filters
+ associated with a group, which are required to be transformed into a CTE
+in SQL, and are evaluated before. *)
+
+(** Evaluate the filters on the query *)
+let eval_filters : Syntax.t -> filter_evaluation =
+ fun conf ->
+ match conf.filters with
+ | [] ->
+ let empty_buffer = Buffer.create 0 in
+ { content = empty_buffer; parameters = Seq.empty; cte = None }
+ | filters -> (
+ (* Create a new queue in order to accumulate all the parameters to bind.
+ This filter will be given to both the CTE if any, or reused in the
+ main query when there is no CTE.
+ *)
+ let chunk_filters = Chunk.create () in
+ Chunk.add_string chunk_filters "\nWHERE ";
+
+ let group = Chunk.create () in
+
+ let with_cte, with_exr =
+ List.fold_left filters ~init:(None, false)
+ ~f:(fun (with_cte, with_exr) column ->
+ (* The function will return an option in second position which is
+ None when no Group function where found, and Some Expression
+ otherwise *)
+ let b = Buffer.create 16 in
+
+ let formatter = Format.formatter_of_buffer b in
+ let queue, group_found =
+ Expression.Filters.query_of_expression Q.BindParam formatter
+ (show_path ~conf) column
+ in
+ Format.pp_print_flush formatter ();
+ let clause = Chunk.create' b queue in
+
+ match (group_found, with_cte) with
+ | None, _ ->
+ Chunk.append ~head:chunk_filters ~tail:clause;
+ Chunk.add_string chunk_filters "\nAND ";
+ (with_cte, true)
+ | (Some _ as group'), None ->
+ (* We have a group here, we do not add it into the
+ filter_buffer right now.
+
+ This can occur only once, the second one will raise
+ an error. *)
+ Chunk.append ~head:group ~tail:clause;
+ (group', with_exr)
+ | Some _, Some _ -> raise ImportErrors.MisplacedWindow)
+ in
+
+ match with_cte with
+ | None ->
+ let content = chunk_filters.b in
+ truncate content 5;
+ {
+ (* There is no group clause in the query *)
+ content;
+ parameters = Queue.to_seq chunk_filters.parameters;
+ cte = None;
+ }
+ | Some expression ->
+ let filters =
+ if with_exr then (
+ (* If we have additionnals filters from the group clause, we
+ have to report them in the CTE instead of the main query. *)
+ let c' = Chunk.copy chunk_filters in
+ truncate c'.b 5;
+ Some c')
+ else None
+ in
+
+ (* Create the common expression table *)
+ let cte_parameters = build_cte conf ~expression ~filters in
+ Chunk.append ~head:chunk_filters ~tail:group;
+
+ {
+ content = chunk_filters.b;
+ parameters = Queue.to_seq chunk_filters.parameters;
+ (* The name is hardcoded here, and used in
+ [Expression.Filters.window] *)
+ cte = Some ("cte", cte_parameters);
+ })
+
+type query = {
+ q : string;
+ parameters : ImportCSV.DataType.t Seq.t;
+}
+
+(** Build the query and return also the mapping in order to identify each
+ external links between files.
+
+ The select query will name each column with an alias, and the map allow to
+ find which source is pointed by this alias. *)
+let select : Syntax.t -> query * Path.t ImportExpression.T.t array =
+ fun conf ->
+ (* If the filters contains a group expression, we need to transform this into
+ a CTE, which have to be evaluated before the main query. That’s why we are
+ evaluating the filters right now.*)
+ let filters = eval_filters conf in
+ let b = Buffer.create 256 in
+ let parameters = Queue.create () in
+
+ Option.iter
+ (fun (_, (cte : Chunk.t)) ->
+ Buffer.add_buffer b cte.b;
+ Queue.add_seq parameters (Queue.to_seq cte.parameters))
+ filters.cte;
+
+ (* For each column in the configuration file, add the corresponding element
+ in the query.
+
+ The Sqlite driver return the elements in an array, we create an array to
+ in order to manage the elements together.
+ *)
+ let headers = Array.make (List.length conf.columns) (Obj.magic None) in
+
+ let columns = List.to_seq conf.columns |> Seq.mapi (fun i c -> (i, c)) in
+ let formatter = Format.formatter_of_buffer b in
+ let () =
+ Format.fprintf formatter "SELECT %a"
+ (Format.pp_print_seq
+ ~pp_sep:(fun f () -> Format.fprintf f ",\n")
+ (fun formatter (i, column) ->
+ Array.set headers i column;
+ let p =
+ Q.query_of_expression Q.BindParam formatter (show_path ~conf)
+ column
+ in
+ Queue.transfer p parameters;
+ Format.fprintf formatter " AS result_%d" i))
+ columns
+ in
+ Format.pp_print_flush formatter ();
+
+ let () = create_from_chunck conf (Chunk.create' b parameters) in
+
+ (* If the query has a CTE, link it as well. We use an INNER JOIN here because
+ we want to be sure to get all the rows fetched by the CTE
+ *)
+ let () =
+ match filters.cte with
+ | None -> ()
+ | Some (name, _) ->
+ Buffer.add_string b "\nINNER JOIN '";
+ Buffer.add_string b name;
+ Buffer.add_string b "' ON ";
+ Buffer.add_string b name;
+ Buffer.add_string b ".id = ";
+ Buffer.add_string b conf.source.name;
+ Buffer.add_string b ".id"
+ in
+
+ Buffer.add_buffer b filters.content;
+ Queue.add_seq parameters filters.parameters;
+
+ let formatter = Format.formatter_of_buffer b in
+ (match conf.Syntax.uniq with
+ | [] -> ()
+ | uniq ->
+ Format.fprintf formatter "\nGROUP BY %a"
+ (Format.pp_print_list
+ ~pp_sep:(fun f () -> Format.fprintf f ", ")
+ (fun formatter column ->
+ let seq =
+ Q.query_of_expression Q.BindParam formatter (show_path ~conf)
+ column
+ in
+ Queue.transfer seq parameters))
+ uniq);
+ (match conf.Syntax.sort with
+ | [] -> ()
+ | sort ->
+ Format.fprintf formatter "\nORDER BY %a"
+ (Format.pp_print_list
+ ~pp_sep:(fun f () -> Format.fprintf f ", ")
+ (fun formatter column ->
+ let seq =
+ Q.query_of_expression Q.BindParam formatter (show_path ~conf)
+ column
+ in
+ Queue.transfer seq parameters))
+ sort);
+ Format.pp_print_flush formatter ();
+
+ ({ q = Buffer.contents b; parameters = Queue.to_seq parameters }, headers)
+
+let check_external : Syntax.t -> Syntax.extern -> query =
+ fun conf external_ ->
+ let extern_table = Table.name external_.target in
+
+ let parameters = Queue.create () in
+ let internal_key_buffer = Buffer.create 16 in
+ let formatter = Format.formatter_of_buffer internal_key_buffer in
+ let internal_key_seq =
+ Q.query_of_expression Q.BindParam formatter (show_path ~conf)
+ external_.intern_key
+ in
+ Format.pp_print_flush formatter ();
+
+ let external_key_buffer = Buffer.create 16 in
+ let pointed_tables = pointed_tables conf external_.intern_key in
+ Buffer.add_string external_key_buffer
+ (print_column external_.Syntax.target
+ ("key_" ^ external_.Syntax.target.name));
+
+ (* We do a copy before the transfert because the Queue is reused later in the
+ query *)
+ Queue.transfer (Queue.copy internal_key_seq) parameters;
+
+ let join_content = Buffer.contents external_key_buffer in
+ let inner_content = Buffer.contents internal_key_buffer in
+ let b = Buffer.create 256 in
+ let formatter = Format.formatter_of_buffer b in
+ let () =
+ Format.fprintf formatter
+ "SELECT %a%s FROM%a LEFT JOIN '%s' AS '%s' ON %t = %s WHERE %s IS NULL \
+ AND %s IS NOT NULL AND %s <> ''"
+ (fun formatter -> function
+ | [ (table, _name) ] ->
+ Format.fprintf formatter "%s, " (print_column table "id")
+ | _ -> Format.fprintf formatter "-1, ")
+ pointed_tables (* *)
+ inner_content (* *)
+ (Format.pp_print_list
+ ~pp_sep:(fun f () -> Format.pp_print_text f ", ")
+ (fun formatter (table, name) ->
+ Format.fprintf formatter "\n'%s' AS '%s'" name table.Table.name))
+ pointed_tables (* *)
+ extern_table (* *)
+ external_.target.name
+ (prepare_key ~f:(fun b ->
+ Format.pp_print_text b (Buffer.contents internal_key_buffer)))
+ join_content (* *)
+ join_content (* *)
+ inner_content (* *)
+ inner_content
+ in
+
+ Format.pp_print_flush formatter ();
+
+ { q = Buffer.contents b; parameters = Queue.to_seq parameters }
+
+let build_key_insert : Buffer.t -> Dependency.key -> unit =
+ fun buffer { Dependency.expression; _ } ->
+ let show_column : Format.formatter -> Path.column -> unit =
+ fun formatter column -> Format.fprintf formatter ":col_%d" column
+ in
+
+ let formatter = Format.formatter_of_buffer buffer in
+
+ let () =
+ prepare_key formatter ~f:(fun formatter ->
+ Q.query_of_expression Q.NoParam formatter show_column expression)
+ in
+
+ Format.pp_print_flush formatter ();
+
+ ()
diff --git a/lib/analysers/query.mli b/lib/analysers/query.mli
new file mode 100644
index 0000000..14d2807
--- /dev/null
+++ b/lib/analysers/query.mli
@@ -0,0 +1,27 @@
+module Syntax = ImportConf.Syntax
+
+val create_table : Dependency.t -> string
+
+type query = {
+ q : string; (** The query to execute *)
+ parameters : ImportCSV.DataType.t Seq.t;
+}
+(** This type represent a query to execute.
+ [q] is the template to run, and shall be run with all the binded parameters.
+ *)
+
+val select :
+ Syntax.t -> query * ImportDataTypes.Path.t ImportExpression.T.t array
+
+val check_external : Syntax.t -> Syntax.extern -> query
+(** Create a query which select all the missing key in an external *)
+
+val build_key_insert : Buffer.t -> Dependency.key -> unit
+(* Build the fragment insert query.
+
+ This use prepare statement and apply the function at the insert time, in
+ order to have the key already in the expected format in the database.
+
+ The column are name :col_XX where XX is the index of the column in the
+ datasheet (starting from 1)
+*)