aboutsummaryrefslogtreecommitdiff
path: root/lib/analysers
diff options
context:
space:
mode:
Diffstat (limited to 'lib/analysers')
-rw-r--r--lib/analysers/dependency.ml8
-rw-r--r--lib/analysers/query.ml155
-rw-r--r--lib/analysers/query.mli7
3 files changed, 102 insertions, 68 deletions
diff --git a/lib/analysers/dependency.ml b/lib/analysers/dependency.ml
index e81cc49..9dd4736 100644
--- a/lib/analysers/dependency.ml
+++ b/lib/analysers/dependency.ml
@@ -52,7 +52,7 @@ type 'a expression_extractor = {
}
(** [add_path_in_map f parent path ] Extract the column from element [path] and
- process the column in the function [f]
+ process the column in the function [f]
The [path] is abstract, but the function [f.of_path] can extract the needed
elements in order to add it in the mapping.
@@ -107,8 +107,8 @@ let add_columns_in_map :
expression and extracting the path contained inside.
This function is called for each path declared inside the expression. *)
-let add_dependancies : conf:Syntax.t -> Syntax.extern -> deps -> Path.t -> deps
- =
+let add_dependancies :
+ conf:Syntax.t -> Syntax.Extern.t -> deps -> Path.t -> deps =
fun ~conf extern graph path ->
let source_table = ImportConf.get_table_for_name conf path.Path.alias in
@@ -120,7 +120,7 @@ let add_dependancies : conf:Syntax.t -> Syntax.extern -> deps -> Path.t -> deps
| _ -> (target, [ source ]) :: graph
let add_external_in_map :
- conf:Syntax.t -> Syntax.extern -> build_map * deps -> build_map * deps =
+ conf:Syntax.t -> Syntax.Extern.t -> build_map * deps -> build_map * deps =
fun ~conf extern (map, graph) ->
let dest = ImportContainers.KeyName.from_table extern.target in
(* Pre-check that every source is already declared in the configuration. *)
diff --git a/lib/analysers/query.ml b/lib/analysers/query.ml
index 7a6dd2a..dff3b9d 100644
--- a/lib/analysers/query.ml
+++ b/lib/analysers/query.ml
@@ -96,14 +96,14 @@ let show_path : conf:Syntax.t -> Format.formatter -> Path.t -> unit =
let table_name = table.Table.name in
Format.fprintf buffer "'%s'.col_%d" table_name column
-(** Extract the informations from the dependancies. We get two informations here :
+(** Extract the informations from the dependancies. We get two informations here
+ :
- - the join query in order to load the data from the external column
- - the column corresponding to the key in order to identify the missing
- links later.
- *)
+ - the join query in order to load the data from the external column
+ - the column corresponding to the key in order to identify the missing links
+ later. *)
let query_of_external :
- conf:Syntax.t -> join_buffer:Chunk.t -> Syntax.extern -> unit =
+ conf:Syntax.t -> join_buffer:Chunk.t -> Syntax.Extern.t -> unit =
fun ~conf ~join_buffer external_ ->
let extern_table = Table.name external_.target in
@@ -117,12 +117,12 @@ let query_of_external :
in
Chunk.add_parameters join_buffer (Queue.to_seq q)))
- (print_column external_.Syntax.target
- ("key_" ^ external_.Syntax.target.name));
+ (print_column external_.Syntax.Extern.target
+ ("key_" ^ external_.Syntax.Extern.target.name));
Format.pp_print_flush formatter ()
-(** Create the from part of the query, adding all the reuired externals.
+(** Create the from part of the query, adding all the reuired externals.
SQLite is able to optimize the query and do not load the table not used in
the select clause. *)
@@ -139,7 +139,7 @@ let create_from_chunck : Syntax.t -> Chunk.t -> unit =
(** Build a CTE query in order to use any group function inside the query.
Return the binded parameters used in the expression. The buffer given in
- argument is also modified during the construction.
+ argument is also modified during the construction.
If filters is not None, the clauses are added to the CTE. *)
let build_cte :
@@ -181,10 +181,10 @@ type filter_evaluation = {
parameters : ImportCSV.DataType.t Seq.t;
cte : (string * Chunk.t) option;
}
-(** Build the filters to apply in the query. We make the difference here
- between the predicates to apply directly in the query, and the filters
- associated with a group, which are required to be transformed into a CTE
-in SQL, and are evaluated before. *)
+(** Build the filters to apply in the query. We make the difference here between
+ the predicates to apply directly in the query, and the filters associated
+ with a group, which are required to be transformed into a CTE in SQL, and
+ are evaluated before. *)
(** Evaluate the filters on the query *)
let eval_filters : Syntax.t -> filter_evaluation =
@@ -274,7 +274,7 @@ type query = {
}
(** Build the query and return also the mapping in order to identify each
- external links between files.
+ external links between files.
The select query will name each column with an alias, and the map allow to
find which source is pointed by this alias. *)
@@ -371,61 +371,98 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array =
({ q = Buffer.contents b; parameters = Queue.to_seq parameters }, headers)
-let check_external : Syntax.t -> Syntax.extern -> query =
+let check_external : Syntax.t -> Syntax.Extern.t -> query =
fun conf external_ ->
- let extern_table = Table.name external_.target in
-
- let parameters = Queue.create () in
- let internal_key_buffer = Buffer.create 16 in
- let formatter = Format.formatter_of_buffer internal_key_buffer in
- let internal_key_seq =
- Q.query_of_expression Q.BindParam formatter (show_path ~conf)
- external_.intern_key
+ let internal_chunk =
+ let internal_key_buffer = Buffer.create 16 in
+ let formatter = Format.formatter_of_buffer internal_key_buffer in
+ let internal_key_seq =
+ Q.query_of_expression Q.BindParam formatter (show_path ~conf)
+ external_.Syntax.Extern.intern_key
+ in
+ Format.pp_print_flush formatter ();
+ Chunk.create' internal_key_buffer (Queue.copy internal_key_seq)
in
- Format.pp_print_flush formatter ();
let external_key_buffer = Buffer.create 16 in
- let pointed_tables = pointed_tables conf external_.intern_key in
Buffer.add_string external_key_buffer
- (print_column external_.Syntax.target
- ("key_" ^ external_.Syntax.target.name));
+ (print_column external_.Syntax.Extern.target
+ ("key_" ^ external_.Syntax.Extern.target.name));
+ let pointed_tables = pointed_tables conf external_.intern_key in
+
+ let parameters = Queue.create () in
(* We do a copy before the transfert because the Queue is reused later in the
query *)
- Queue.transfer (Queue.copy internal_key_seq) parameters;
+ Queue.transfer (Queue.copy internal_chunk.parameters) parameters;
+
+ (* We have to link all the tables referenced by the external, we cannot let
+ any table not linked with the source in the request (this would cause a
+ cartesian product request)
+
+ This not the usual way to proceed (we start from the source and link the externals)
+ *)
+ let rec collect_links :
+ Syntax.Extern.t -> Syntax.Extern.t list -> Syntax.Extern.t list =
+ fun table init ->
+ let res =
+ (* Do not add the same external if the value is already present *)
+ let init =
+ match List.find_opt init ~f:(fun ext -> table == ext) with
+ | None -> table :: init
+ | Some _ -> init
+ in
+
+ Expression.T.fold_values ~init table.Syntax.Extern.intern_key
+ ~f:(fun acc expr ->
+ match expr.Path.alias with
+ | None -> acc
+ | Some _ as path -> (
+ let table = ImportConf.get_table_for_name conf path in
+ (* Look for this table in the externals *)
+ let external_opt =
+ List.find_opt conf.Syntax.externals ~f:(fun t ->
+ t.Syntax.Extern.target == table)
+ in
+ match external_opt with
+ | None -> acc
+ | Some ext -> collect_links ext acc))
+ in
+ res
+ in
+ let dependencies = collect_links external_ [] in
let join_content = Buffer.contents external_key_buffer in
- let inner_content = Buffer.contents internal_key_buffer in
- let b = Buffer.create 256 in
- let formatter = Format.formatter_of_buffer b in
+ let request = Chunk.create () in
+ Chunk.add_string request "SELECT ";
let () =
- Format.fprintf formatter
- "SELECT %a%s FROM%a LEFT JOIN '%s' AS '%s' ON %t = %s WHERE %s IS NULL \
- AND %s IS NOT NULL AND %s <> ''"
- (fun formatter -> function
- | [ (table, _name) ] ->
- Format.fprintf formatter "%s, " (print_column table "id")
- | _ -> Format.fprintf formatter "-1, ")
- pointed_tables (* *)
- inner_content (* *)
- (Format.pp_print_list
- ~pp_sep:(fun f () -> Format.pp_print_text f ", ")
- (fun formatter (table, name) ->
- Format.fprintf formatter "\n'%s' AS '%s'" name table.Table.name))
- pointed_tables (* *)
- extern_table (* *)
- external_.target.name
- (prepare_key ~f:(fun b ->
- Format.pp_print_text b (Buffer.contents internal_key_buffer)))
- join_content (* *)
- join_content (* *)
- inner_content (* *)
- inner_content
+ match pointed_tables with
+ | [] ->
+ (* Otherwise, just return -1 *)
+ Chunk.add_string request "-1"
+ | (table, _name) :: _ ->
+ (* If we have a single source, extract the row number. *)
+ Chunk.add_string request (print_column table "id")
in
+ Chunk.add_string request ", ";
+ Chunk.append ~head:request ~tail:(Chunk.copy internal_chunk);
+ Chunk.add_string request " FROM\n'";
+ Chunk.add_string request (Table.name conf.source);
+ Chunk.add_string request "' AS '";
+ Chunk.add_string request conf.source.name;
+ Chunk.add_string request "'";
- Format.pp_print_flush formatter ();
-
- { q = Buffer.contents b; parameters = Queue.to_seq parameters }
+ (* Add the externals in the query *)
+ List.iter dependencies ~f:(query_of_external ~conf ~join_buffer:request);
+ Chunk.add_string request " WHERE ";
+ Chunk.add_string request join_content;
+ Chunk.add_string request " IS NULL AND ";
+ Chunk.append ~head:request ~tail:(Chunk.copy internal_chunk);
+ Chunk.add_string request " IS NOT NULL AND ";
+ Chunk.append ~head:request ~tail:(Chunk.copy internal_chunk);
+ Chunk.add_string request " <> ''";
+ let q = Buffer.contents request.b in
+ { q; parameters = Queue.to_seq request.parameters }
let build_key_insert : Buffer.t -> Dependency.key -> unit =
fun buffer { Dependency.expression; _ } ->
@@ -440,6 +477,4 @@ let build_key_insert : Buffer.t -> Dependency.key -> unit =
Q.query_of_expression Q.NoParam formatter show_column expression)
in
- Format.pp_print_flush formatter ();
-
- ()
+ Format.pp_print_flush formatter ()
diff --git a/lib/analysers/query.mli b/lib/analysers/query.mli
index 14d2807..520718a 100644
--- a/lib/analysers/query.mli
+++ b/lib/analysers/query.mli
@@ -6,14 +6,13 @@ type query = {
q : string; (** The query to execute *)
parameters : ImportCSV.DataType.t Seq.t;
}
-(** This type represent a query to execute.
- [q] is the template to run, and shall be run with all the binded parameters.
- *)
+(** This type represent a query to execute. [q] is the template to run, and
+ shall be run with all the binded parameters. *)
val select :
Syntax.t -> query * ImportDataTypes.Path.t ImportExpression.T.t array
-val check_external : Syntax.t -> Syntax.extern -> query
+val check_external : Syntax.t -> Syntax.Extern.t -> query
(** Create a query which select all the missing key in an external *)
val build_key_insert : Buffer.t -> Dependency.key -> unit