From c284321b1073e06481c63e2c061a1600fa68254d Mon Sep 17 00:00:00 2001 From: Sébastien Dailly Date: Thu, 10 Apr 2025 20:27:59 +0200 Subject: Added filters expressions in the externals --- lib/analysers/chunk.ml | 87 ++++++++++++++++++++++++++++++++------------ lib/analysers/chunk.mli | 11 ++++-- lib/analysers/dependency.ml | 66 ++++++++++++++++++++++++--------- lib/analysers/dependency.mli | 5 ++- lib/analysers/filters.ml | 27 +++----------- lib/analysers/query.ml | 83 ++++++++++++++++++------------------------ 6 files changed, 163 insertions(+), 116 deletions(-) (limited to 'lib/analysers') diff --git a/lib/analysers/chunk.ml b/lib/analysers/chunk.ml index cefa6d8..b09f311 100644 --- a/lib/analysers/chunk.ml +++ b/lib/analysers/chunk.ml @@ -43,9 +43,24 @@ let add_parameters : t -> ImportDataTypes.Value.t Seq.t -> unit = fun t p -> Queue.add_seq t.parameters p module Table = ImportDataTypes.Table -module Q = ImportExpression.Query open StdLabels +let add_expression : + repr:(Format.formatter -> 'a -> unit) -> + t -> + 'a ImportExpression.T.t -> + unit = + fun ~repr group expression -> + let formatter = Format.formatter_of_buffer group.b in + Format.pp_print_char formatter '('; + let queue = + ImportExpression.Query.query_of_expression ImportExpression.Query.BindParam + formatter repr expression + in + Format.pp_print_char formatter ')'; + Format.pp_print_flush formatter (); + add_parameters group (Queue.to_seq queue) + (** Extract the informations from the dependancies. We get two informations here : @@ -57,20 +72,34 @@ let join_external : fun ~conf ~join_buffer external_ -> let extern_table = Table.name external_.target in - let formatter = Format.formatter_of_buffer join_buffer.b in - Format.fprintf formatter "\nLEFT JOIN '%s' AS '%s' ON %t = %s" extern_table - external_.target.name - (Printers.prepare_key ~f:(fun f -> - let q = - Q.query_of_expression Q.BindParam f (Printers.path ~conf) - external_.intern_key - in - - add_parameters join_buffer (Queue.to_seq q))) - (Table.print_column external_.ImporterSyntax.Extern.target - ("key_" ^ external_.ImporterSyntax.Extern.target.name)); - - Format.pp_print_flush formatter () + add_string join_buffer "\nLEFT JOIN '"; + add_string join_buffer extern_table; + add_string join_buffer "' AS '"; + add_string join_buffer external_.target.name; + add_string join_buffer "' ON "; + add_string join_buffer + (Format.asprintf "%t = %s" + (Printers.prepare_key ~f:(fun f -> + let q = + ImportExpression.Query.query_of_expression + ImportExpression.Query.BindParam f (Printers.path ~conf) + external_.intern_key + in + + add_parameters join_buffer (Queue.to_seq q))) + (Table.print_column external_.ImporterSyntax.Extern.target + ("key_" ^ external_.ImporterSyntax.Extern.target.name))); + + (* Add the filters given for this external in the query *) + let table = external_.ImporterSyntax.Extern.target + and filters = external_.ImporterSyntax.Extern.filters in + List.iter filters ~f:(fun f -> + add_string join_buffer " AND "; + add_expression + ~repr:(fun formatter column -> + Format.fprintf formatter "%s" + (Table.print_column table ("col_" ^ string_of_int column))) + join_buffer f) (** Create the from part of the query, adding all the required externals (even when not required) @@ -90,16 +119,26 @@ let create_from_statement_of_chunck : (* Add the externals in the query *) List.iter externals ~f:(join_external ~conf ~join_buffer:c) -let add_expression : - conf:ImporterSyntax.t -> +(** Add a list of expressions into the group *) +let add_expressions : + repr:(Format.formatter -> 'a -> unit) -> + sep:string -> t -> - ImportDataTypes.Path.t ImportExpression.T.t -> + 'a ImportExpression.T.t list -> unit = - fun ~conf group expression -> + fun ~repr ~sep group exppressions -> let formatter = Format.formatter_of_buffer group.b in - let queue = - ImportExpression.Query.query_of_expression ImportExpression.Query.BindParam - formatter (Printers.path ~conf) expression + let () = + Format.pp_print_list + ~pp_sep:(fun f () -> Format.pp_print_string f sep) + (fun formatter column -> + Format.pp_print_char formatter '('; + let seq = + ImportExpression.Query.query_of_expression + ImportExpression.Query.BindParam formatter repr column + in + Format.pp_print_char formatter ')'; + Queue.transfer seq group.parameters) + formatter exppressions in - Format.pp_print_flush formatter (); - add_parameters group (Queue.to_seq queue) + Format.pp_print_flush formatter () diff --git a/lib/analysers/chunk.mli b/lib/analysers/chunk.mli index 13a748a..ad9ca00 100644 --- a/lib/analysers/chunk.mli +++ b/lib/analysers/chunk.mli @@ -27,8 +27,13 @@ val create_from_statement_of_chunck : the select clause. *) val add_expression : - conf:ImporterSyntax.t -> + repr:(Format.formatter -> 'a -> unit) -> t -> 'a ImportExpression.T.t -> unit +(** Add an expression into an existing chunck *) + +val add_expressions : + repr:(Format.formatter -> 'a -> unit) -> + sep:string -> t -> - ImportDataTypes.Path.t ImportExpression.T.t -> + 'a ImportExpression.T.t list -> unit -(** Add an expression into an existing chunck *) +(** Add a list of expressions into an existing chunk *) diff --git a/lib/analysers/dependency.ml b/lib/analysers/dependency.ml index 38bc23c..8c969fe 100644 --- a/lib/analysers/dependency.ml +++ b/lib/analysers/dependency.ml @@ -1,7 +1,5 @@ open StdLabels module IntSet = ImportContainers.IntSet -module Table = ImportDataTypes.Table -module Path = ImportDataTypes.Path module Expression = ImportExpression.T (* @@ -19,13 +17,14 @@ type deps = (ImportContainers.Source.t * ImportContainers.Source.t list) list type key = { name : string; - expression : Path.column Expression.t; + expression : ImportDataTypes.Path.column Expression.t; columns : ImportContainers.IntSet.t Lazy.t; + filters : ImportDataTypes.Path.column ImportExpression.T.t list; } [@@deriving show, eq] type t = { - table : Table.t; + table : ImportDataTypes.Table.t; columns : IntSet.t; keys : key list; } @@ -47,8 +46,8 @@ type build_map = t ImportContainers.Externals.t - [of_path] is need to extract the qualified source from any kind of path. *) type 'a expression_extractor = { - to_mapping : t -> Path.column -> t; - of_path : 'a -> string option * Path.column; + to_mapping : t -> ImportDataTypes.Path.column -> t; + of_path : 'a -> string option * ImportDataTypes.Path.column; } (** [add_path_in_map f parent path ] Extract the column from element [path] and @@ -112,9 +111,15 @@ let add_columns_in_map : This function is called for each path declared inside the expression. *) let add_dependancies : - conf:ImporterSyntax.t -> ImporterSyntax.Extern.t -> deps -> Path.t -> deps = + conf:ImporterSyntax.t -> + ImporterSyntax.Extern.t -> + deps -> + ImportDataTypes.Path.t -> + deps = fun ~conf extern graph path -> - let source_table = ImporterSyntax.get_table_for_name conf path.Path.alias in + let source_table = + ImporterSyntax.get_table_for_name conf path.ImportDataTypes.Path.alias + in let source = ImportContainers.Source.from_table source_table in let target = ImportContainers.Source.from_table extern.target in @@ -134,7 +139,10 @@ let add_external_in_map : let _ = Expression.fold_values extern.intern_key ~init:() ~f:(fun () path -> try - let _ = ImporterSyntax.get_table_for_name conf path.Path.alias in + let _ = + ImporterSyntax.get_table_for_name conf + path.ImportDataTypes.Path.alias + in () with | Not_found -> ( @@ -147,16 +155,19 @@ let add_external_in_map : raise (ImportErrors.Unknown_source root.name))) in + let columns () = + let f = fun acc k -> ImportContainers.IntSet.add k acc in + Expression.fold_values extern.extern_key ~f + ~init:ImportContainers.IntSet.empty + in + (* Create the new key with all the expression and all the columns inside it *) let new_key = { - name = extern.target.Table.name; + name = extern.target.ImportDataTypes.Table.name; expression = extern.extern_key; - columns = - lazy - (Expression.fold_values extern.extern_key - ~f:(fun acc k -> ImportContainers.IntSet.add k acc) - ~init:ImportContainers.IntSet.empty); + columns = Lazy.from_fun columns; + filters = extern.filters; } in let build_map = @@ -182,7 +193,7 @@ let add_external_in_map : ~f: { of_path = - (fun Path.{ alias; column } -> + (fun ImportDataTypes.Path.{ alias; column } -> let table = ImporterSyntax.get_table_for_name conf alias in (Some table.name, column)); to_mapping = @@ -197,7 +208,8 @@ let mapper = to_mapping = (fun mapping column -> { mapping with columns = IntSet.add column mapping.columns }); - of_path = (fun ({ alias; column } : Path.t) -> (alias, column)); + of_path = + (fun ({ alias; column } : ImportDataTypes.Path.t) -> (alias, column)); } let get_mapping : ImporterSyntax.t -> build_map * deps = @@ -221,7 +233,24 @@ let get_mapping : ImporterSyntax.t -> build_map * deps = in let map, graph = List.fold_left conf.externals ~init ~f:(fun map extern -> - add_external_in_map ~conf extern map) + let map, graph = add_external_in_map ~conf extern map in + + (* Also add the filters in the externals. The column are not defined as + a full path, with table and column, but only with a column. We need + to transform them to make them present as the same as the others *) + let table = + ImporterSyntax.get_table_for_name conf (Some extern.target.name) + in + let path_filters = + List.map extern.filters ~f:(fun expression -> + Expression.map + ~f:(fun column -> + ImportDataTypes.Path.{ alias = Some table.name; column }) + expression) + in + let map = add_columns_in_map ~conf ~f:mapper path_filters map in + + (map, graph)) in (* Now we don’t bother anymore with the graph and it’s dependency, we just @@ -233,6 +262,7 @@ let get_mapping : ImporterSyntax.t -> build_map * deps = |> add_columns_in_map ~conf ~f:mapper conf.filters |> add_columns_in_map ~conf ~f:mapper conf.uniq in + (map, graph) let get_process_order : ImporterSyntax.t -> t list = diff --git a/lib/analysers/dependency.mli b/lib/analysers/dependency.mli index 522436c..1eb55c5 100644 --- a/lib/analysers/dependency.mli +++ b/lib/analysers/dependency.mli @@ -29,7 +29,10 @@ type key = { (** The list of columns used in the key. All the columns are referenced in the expression. We can have many columns used inside a single key when a function is used (for example for joining multiple columns into a - single key) *) + single key). + + The columns used in the filter are also declared as well. *) + filters : ImportDataTypes.Path.column ImportExpression.T.t list; } [@@deriving show, eq] (** This type describe the join key in a table. The name is the refering table diff --git a/lib/analysers/filters.ml b/lib/analysers/filters.ml index 6b1d843..15e8cda 100644 --- a/lib/analysers/filters.ml +++ b/lib/analysers/filters.ml @@ -4,25 +4,6 @@ module Path = ImportDataTypes.Path module Expression = ImportExpression open StdLabels -(** Add a list of expressions into the group *) -let add_filters : - conf:ImporterSyntax.t -> Chunk.t -> Path.t Expression.T.t list -> unit = - fun ~conf group -> function - | [] -> () - | any -> - let rec f ~conf group = function - | [] -> () - | hd :: [] -> - Chunk.add_expression ~conf group hd; - Chunk.add_string group ")" - | hd :: tl -> - Chunk.add_expression ~conf group hd; - Chunk.add_string group ")\nAND ("; - f ~conf group tl - in - Chunk.add_string group "("; - f ~conf group any - type 'a cte_acc = { n : int; has_previous : bool; @@ -65,7 +46,7 @@ let print : Chunk.add_string query "SELECT "; Chunk.add_string query conf.source.name; Chunk.add_string query ".id, "; - Chunk.add_expression ~conf query expression; + Chunk.add_expression ~repr:(Printers.path ~conf) query expression; Chunk.add_string query " AS group_function"; Chunk.create_from_statement_of_chunck conf query; @@ -83,7 +64,8 @@ let print : | [] -> () | _ -> Chunk.add_string query " WHERE "; - add_filters ~conf query cte.ImporterSyntax.CTE.filters + Chunk.add_expressions ~sep:"\nAND " ~repr:(Printers.path ~conf) + query cte.ImporterSyntax.CTE.filters end; Chunk.add_string query ")\n"; Some acc.n @@ -91,7 +73,8 @@ let print : (* Do not add the filters in the CTE (we don’t have any) but in the main query *) Chunk.add_string predicates "WHERE "; - add_filters ~conf predicates cte.ImporterSyntax.CTE.filters; + Chunk.add_expressions ~sep:"\nAND " ~repr:(Printers.path ~conf) + predicates cte.ImporterSyntax.CTE.filters; acc.cte_index in { diff --git a/lib/analysers/query.ml b/lib/analysers/query.ml index f89f5f0..dac4d89 100644 --- a/lib/analysers/query.ml +++ b/lib/analysers/query.ml @@ -135,41 +135,24 @@ let select : ImporterSyntax.t -> query * Path.t ImportExpression.T.t array = let () = Chunk.create_from_statement_of_chunck conf request_header in Chunk.append ~head:request_header ~tail:filters; - let formatter = Format.formatter_of_buffer b in (match conf.ImporterSyntax.uniq with | [] -> () | uniq -> - Format.fprintf formatter "\nGROUP BY %a" - (Format.pp_print_list - ~pp_sep:(fun f () -> Format.fprintf f ", ") - (fun formatter column -> - let seq = - Q.query_of_expression Q.BindParam formatter (Printers.path ~conf) - column - in - Queue.transfer seq parameters)) + Chunk.add_string request_header "\nGROUP BY "; + Chunk.add_expressions ~repr:(Printers.path ~conf) ~sep:", " request_header uniq); (match conf.ImporterSyntax.sort with | [] -> () | sort -> - Format.fprintf formatter "\nORDER BY %a" - (Format.pp_print_list - ~pp_sep:(fun f () -> Format.fprintf f ", ") - (fun formatter column -> - let seq = - Q.query_of_expression Q.BindParam formatter (Printers.path ~conf) - column - in - Queue.transfer seq parameters)) + Chunk.add_string request_header "\nORDER BY "; + Chunk.add_expressions ~repr:(Printers.path ~conf) ~sep:", " request_header sort); - Format.pp_print_flush formatter (); - ({ q = Buffer.contents b; parameters = Queue.to_seq parameters }, headers) let check_external : ImporterSyntax.t -> ImporterSyntax.Extern.t -> query = fun conf external_ -> let internal_chunk = Chunk.create () in - Chunk.add_expression ~conf internal_chunk + Chunk.add_expression ~repr:(Printers.path ~conf) internal_chunk external_.ImporterSyntax.Extern.intern_key; let external_key_buffer = Buffer.create 16 in @@ -180,6 +163,7 @@ let check_external : ImporterSyntax.t -> ImporterSyntax.Extern.t -> query = let pointed_tables = pointed_tables conf external_.intern_key in let parameters = Queue.create () in + (* We do a copy before the transfert because the Queue is reused later in the query *) Queue.transfer (Queue.copy internal_chunk.parameters) parameters; @@ -195,44 +179,47 @@ let check_external : ImporterSyntax.t -> ImporterSyntax.Extern.t -> query = ImporterSyntax.Extern.t list -> ImporterSyntax.Extern.t list = fun table init -> - let res = - (* Do not add the same external if the value is already present *) - let init = - match List.find_opt init ~f:(fun ext -> table == ext) with - | None -> table :: init - | Some _ -> init - in - - Expression.T.fold_values ~init table.ImporterSyntax.Extern.intern_key - ~f:(fun acc expr -> - match expr.Path.alias with - | None -> acc - | Some _ as path -> ( - let table = ImporterSyntax.get_table_for_name conf path in - (* Look for this table in the externals *) - let external_opt = - List.find_opt conf.ImporterSyntax.externals ~f:(fun t -> - t.ImporterSyntax.Extern.target == table) - in - match external_opt with - | None -> acc - | Some ext -> collect_links ext acc)) + (* Do not add the same external if the value is already present *) + let init = + match + List.find_opt init ~f:(fun ext -> ImporterSyntax.Extern.equal table ext) + with + | None -> table :: init + | Some _ -> init in - res + + Expression.T.fold_values ~init table.ImporterSyntax.Extern.intern_key + ~f:(fun acc expr -> + match expr.Path.alias with + | None -> acc + | Some _ as path -> ( + let table = ImporterSyntax.get_table_for_name conf path in + (* Look for this table in the externals *) + let external_opt = + List.find_opt conf.ImporterSyntax.externals ~f:(fun t -> + t.ImporterSyntax.Extern.target == table) + in + match external_opt with + | None -> acc + | Some ext -> collect_links ext acc)) in let dependencies = collect_links external_ [] in let join_content = Buffer.contents external_key_buffer in let request = Chunk.create () in + Chunk.add_string request "SELECT "; + + (* Check if we can identify the line number in the file. It’s only possible + if we have a single source used as a key *) let () = match pointed_tables with - | [] -> - (* Otherwise, just return -1 *) - Chunk.add_string request "-1" | (table, _name) :: _ -> (* If we have a single source, extract the row number. *) Chunk.add_string request (Table.print_column table "id") + | [] -> + (* Otherwise, just return -1 *) + Chunk.add_string request "-1" in Chunk.add_string request ", "; Chunk.append ~head:request ~tail:(Chunk.copy internal_chunk); -- cgit v1.2.3