aboutsummaryrefslogtreecommitdiff
path: root/lib/analysers/query.ml
diff options
context:
space:
mode:
Diffstat (limited to 'lib/analysers/query.ml')
-rw-r--r--lib/analysers/query.ml155
1 files changed, 95 insertions, 60 deletions
diff --git a/lib/analysers/query.ml b/lib/analysers/query.ml
index 7a6dd2a..dff3b9d 100644
--- a/lib/analysers/query.ml
+++ b/lib/analysers/query.ml
@@ -96,14 +96,14 @@ let show_path : conf:Syntax.t -> Format.formatter -> Path.t -> unit =
let table_name = table.Table.name in
Format.fprintf buffer "'%s'.col_%d" table_name column
-(** Extract the informations from the dependancies. We get two informations here :
+(** Extract the informations from the dependancies. We get two informations here
+ :
- - the join query in order to load the data from the external column
- - the column corresponding to the key in order to identify the missing
- links later.
- *)
+ - the join query in order to load the data from the external column
+ - the column corresponding to the key in order to identify the missing links
+ later. *)
let query_of_external :
- conf:Syntax.t -> join_buffer:Chunk.t -> Syntax.extern -> unit =
+ conf:Syntax.t -> join_buffer:Chunk.t -> Syntax.Extern.t -> unit =
fun ~conf ~join_buffer external_ ->
let extern_table = Table.name external_.target in
@@ -117,12 +117,12 @@ let query_of_external :
in
Chunk.add_parameters join_buffer (Queue.to_seq q)))
- (print_column external_.Syntax.target
- ("key_" ^ external_.Syntax.target.name));
+ (print_column external_.Syntax.Extern.target
+ ("key_" ^ external_.Syntax.Extern.target.name));
Format.pp_print_flush formatter ()
-(** Create the from part of the query, adding all the reuired externals.
+(** Create the from part of the query, adding all the reuired externals.
SQLite is able to optimize the query and do not load the table not used in
the select clause. *)
@@ -139,7 +139,7 @@ let create_from_chunck : Syntax.t -> Chunk.t -> unit =
(** Build a CTE query in order to use any group function inside the query.
Return the binded parameters used in the expression. The buffer given in
- argument is also modified during the construction.
+ argument is also modified during the construction.
If filters is not None, the clauses are added to the CTE. *)
let build_cte :
@@ -181,10 +181,10 @@ type filter_evaluation = {
parameters : ImportCSV.DataType.t Seq.t;
cte : (string * Chunk.t) option;
}
-(** Build the filters to apply in the query. We make the difference here
- between the predicates to apply directly in the query, and the filters
- associated with a group, which are required to be transformed into a CTE
-in SQL, and are evaluated before. *)
+(** Build the filters to apply in the query. We make the difference here between
+ the predicates to apply directly in the query, and the filters associated
+ with a group, which are required to be transformed into a CTE in SQL, and
+ are evaluated before. *)
(** Evaluate the filters on the query *)
let eval_filters : Syntax.t -> filter_evaluation =
@@ -274,7 +274,7 @@ type query = {
}
(** Build the query and return also the mapping in order to identify each
- external links between files.
+ external links between files.
The select query will name each column with an alias, and the map allow to
find which source is pointed by this alias. *)
@@ -371,61 +371,98 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array =
({ q = Buffer.contents b; parameters = Queue.to_seq parameters }, headers)
-let check_external : Syntax.t -> Syntax.extern -> query =
+let check_external : Syntax.t -> Syntax.Extern.t -> query =
fun conf external_ ->
- let extern_table = Table.name external_.target in
-
- let parameters = Queue.create () in
- let internal_key_buffer = Buffer.create 16 in
- let formatter = Format.formatter_of_buffer internal_key_buffer in
- let internal_key_seq =
- Q.query_of_expression Q.BindParam formatter (show_path ~conf)
- external_.intern_key
+ let internal_chunk =
+ let internal_key_buffer = Buffer.create 16 in
+ let formatter = Format.formatter_of_buffer internal_key_buffer in
+ let internal_key_seq =
+ Q.query_of_expression Q.BindParam formatter (show_path ~conf)
+ external_.Syntax.Extern.intern_key
+ in
+ Format.pp_print_flush formatter ();
+ Chunk.create' internal_key_buffer (Queue.copy internal_key_seq)
in
- Format.pp_print_flush formatter ();
let external_key_buffer = Buffer.create 16 in
- let pointed_tables = pointed_tables conf external_.intern_key in
Buffer.add_string external_key_buffer
- (print_column external_.Syntax.target
- ("key_" ^ external_.Syntax.target.name));
+ (print_column external_.Syntax.Extern.target
+ ("key_" ^ external_.Syntax.Extern.target.name));
+ let pointed_tables = pointed_tables conf external_.intern_key in
+
+ let parameters = Queue.create () in
(* We do a copy before the transfert because the Queue is reused later in the
query *)
- Queue.transfer (Queue.copy internal_key_seq) parameters;
+ Queue.transfer (Queue.copy internal_chunk.parameters) parameters;
+
+ (* We have to link all the tables referenced by the external, we cannot let
+ any table not linked with the source in the request (this would cause a
+ cartesian product request)
+
+ This not the usual way to proceed (we start from the source and link the externals)
+ *)
+ let rec collect_links :
+ Syntax.Extern.t -> Syntax.Extern.t list -> Syntax.Extern.t list =
+ fun table init ->
+ let res =
+ (* Do not add the same external if the value is already present *)
+ let init =
+ match List.find_opt init ~f:(fun ext -> table == ext) with
+ | None -> table :: init
+ | Some _ -> init
+ in
+
+ Expression.T.fold_values ~init table.Syntax.Extern.intern_key
+ ~f:(fun acc expr ->
+ match expr.Path.alias with
+ | None -> acc
+ | Some _ as path -> (
+ let table = ImportConf.get_table_for_name conf path in
+ (* Look for this table in the externals *)
+ let external_opt =
+ List.find_opt conf.Syntax.externals ~f:(fun t ->
+ t.Syntax.Extern.target == table)
+ in
+ match external_opt with
+ | None -> acc
+ | Some ext -> collect_links ext acc))
+ in
+ res
+ in
+ let dependencies = collect_links external_ [] in
let join_content = Buffer.contents external_key_buffer in
- let inner_content = Buffer.contents internal_key_buffer in
- let b = Buffer.create 256 in
- let formatter = Format.formatter_of_buffer b in
+ let request = Chunk.create () in
+ Chunk.add_string request "SELECT ";
let () =
- Format.fprintf formatter
- "SELECT %a%s FROM%a LEFT JOIN '%s' AS '%s' ON %t = %s WHERE %s IS NULL \
- AND %s IS NOT NULL AND %s <> ''"
- (fun formatter -> function
- | [ (table, _name) ] ->
- Format.fprintf formatter "%s, " (print_column table "id")
- | _ -> Format.fprintf formatter "-1, ")
- pointed_tables (* *)
- inner_content (* *)
- (Format.pp_print_list
- ~pp_sep:(fun f () -> Format.pp_print_text f ", ")
- (fun formatter (table, name) ->
- Format.fprintf formatter "\n'%s' AS '%s'" name table.Table.name))
- pointed_tables (* *)
- extern_table (* *)
- external_.target.name
- (prepare_key ~f:(fun b ->
- Format.pp_print_text b (Buffer.contents internal_key_buffer)))
- join_content (* *)
- join_content (* *)
- inner_content (* *)
- inner_content
+ match pointed_tables with
+ | [] ->
+ (* Otherwise, just return -1 *)
+ Chunk.add_string request "-1"
+ | (table, _name) :: _ ->
+ (* If we have a single source, extract the row number. *)
+ Chunk.add_string request (print_column table "id")
in
+ Chunk.add_string request ", ";
+ Chunk.append ~head:request ~tail:(Chunk.copy internal_chunk);
+ Chunk.add_string request " FROM\n'";
+ Chunk.add_string request (Table.name conf.source);
+ Chunk.add_string request "' AS '";
+ Chunk.add_string request conf.source.name;
+ Chunk.add_string request "'";
- Format.pp_print_flush formatter ();
-
- { q = Buffer.contents b; parameters = Queue.to_seq parameters }
+ (* Add the externals in the query *)
+ List.iter dependencies ~f:(query_of_external ~conf ~join_buffer:request);
+ Chunk.add_string request " WHERE ";
+ Chunk.add_string request join_content;
+ Chunk.add_string request " IS NULL AND ";
+ Chunk.append ~head:request ~tail:(Chunk.copy internal_chunk);
+ Chunk.add_string request " IS NOT NULL AND ";
+ Chunk.append ~head:request ~tail:(Chunk.copy internal_chunk);
+ Chunk.add_string request " <> ''";
+ let q = Buffer.contents request.b in
+ { q; parameters = Queue.to_seq request.parameters }
let build_key_insert : Buffer.t -> Dependency.key -> unit =
fun buffer { Dependency.expression; _ } ->
@@ -440,6 +477,4 @@ let build_key_insert : Buffer.t -> Dependency.key -> unit =
Q.query_of_expression Q.NoParam formatter show_column expression)
in
- Format.pp_print_flush formatter ();
-
- ()
+ Format.pp_print_flush formatter ()