diff options
author | Sébastien Dailly <sebastien@dailly.me> | 2025-03-01 08:39:02 +0100 |
---|---|---|
committer | Sébastien Dailly <sebastien@dailly.me> | 2025-03-06 20:57:10 +0100 |
commit | 81db1bfd580791910646525e30bc45af34533987 (patch) | |
tree | c610f53c284d3707a3d6fe49486b5c09e66dc41f | |
parent | 67320d8f04e1f302306b9aafdaaf4bafcf443839 (diff) |
Rewrite the way to handle filters
-rw-r--r-- | examples/checksum | 3 | ||||
-rw-r--r-- | lib/analysers/chunk.ml | 102 | ||||
-rw-r--r-- | lib/analysers/chunk.mli | 34 | ||||
-rw-r--r-- | lib/analysers/filters.ml | 133 | ||||
-rw-r--r-- | lib/analysers/filters.mli | 2 | ||||
-rw-r--r-- | lib/analysers/printers.ml | 12 | ||||
-rw-r--r-- | lib/analysers/printers.mli | 14 | ||||
-rw-r--r-- | lib/analysers/query.ml | 310 | ||||
-rw-r--r-- | lib/analysers/query.mli | 7 | ||||
-rw-r--r-- | lib/configuration/cte.ml | 53 | ||||
-rw-r--r-- | lib/configuration/cte.mli | 20 | ||||
-rw-r--r-- | lib/configuration/importConf.ml | 1 | ||||
-rw-r--r-- | lib/configuration/importConf.mli | 12 | ||||
-rw-r--r-- | lib/configuration/read_conf.ml | 69 | ||||
-rw-r--r-- | lib/data_types/table.ml | 5 | ||||
-rw-r--r-- | lib/expression/_readme.rst | 39 | ||||
-rw-r--r-- | lib/expression/filters.ml | 193 | ||||
-rw-r--r-- | lib/expression/filters.mli | 9 | ||||
-rwxr-xr-x | lib/helpers/helpers.ml | 43 | ||||
-rw-r--r-- | readme.rst | 17 | ||||
-rw-r--r-- | tests/analyser_filters.ml | 187 | ||||
-rw-r--r-- | tests/analyser_query_test.ml | 64 | ||||
-rw-r--r-- | tests/confLoader.ml | 6 | ||||
-rw-r--r-- | tests/configuration_toml.ml | 24 | ||||
-rw-r--r-- | tests/importer_test.ml | 1 | ||||
-rw-r--r-- | tests/sql_db.ml | 119 | ||||
-rw-r--r-- | tests/test_migration.ml | 27 |
27 files changed, 882 insertions, 624 deletions
diff --git a/examples/checksum b/examples/checksum new file mode 100644 index 0000000..b223bc4 --- /dev/null +++ b/examples/checksum @@ -0,0 +1,3 @@ +e158edc600b314e5451cbfbb42fa0a6c importer.csv +e85080e97cd3f1615069232c22e9b9cc example_csv.csv +d646adc7d22212908d7c12a88a19ea4b importer_groupe.csv diff --git a/lib/analysers/chunk.ml b/lib/analysers/chunk.ml new file mode 100644 index 0000000..2fa4808 --- /dev/null +++ b/lib/analysers/chunk.ml @@ -0,0 +1,102 @@ +(** The module allow to create fragment in the query which keep together the + binderd parameters and the text of the query.contents. + + This is used a lot in order to create the CTE, where you need the create + fragment used both in the main request and partially in the CTE itself. + + The content is mutable and all the functions are returning [unit]. *) + +type t = { + b : Buffer.t; + parameters : ImportCSV.DataType.t Queue.t; +} + +let create : unit -> t = + fun () -> { b = Buffer.create 16; parameters = Queue.create () } + +let create' : Buffer.t -> ImportCSV.DataType.t Queue.t -> t = + fun b parameters -> { b; parameters } + +(* Append the element from [tail] at the end of [head] + + Tail is destroyed during the operation. + *) +let append : head:t -> tail:t -> unit = + fun ~head ~tail -> + match Buffer.length tail.b with + | 0 -> () + | _ -> + Buffer.add_buffer head.b tail.b; + Queue.transfer tail.parameters head.parameters; + () + +(** Add a litteral string in the sequence *) +let add_string : t -> string -> unit = fun t v -> Buffer.add_string t.b v + +let copy : t -> t = + fun t -> + let b = Buffer.create 16 and parameters = Queue.copy t.parameters in + Buffer.add_buffer b t.b; + { b; parameters } + +let add_parameters : t -> ImportCSV.DataType.t Seq.t -> unit = + fun t p -> Queue.add_seq t.parameters p + +module Syntax = ImportConf.Syntax +module Table = ImportDataTypes.Table +module Q = ImportExpression.Query +open StdLabels + +(** Extract the informations from the dependancies. We get two informations here + : + + - the join query in order to load the data from the external column + - the column corresponding to the key in order to identify the missing links + later. *) +let join_external : conf:Syntax.t -> join_buffer:t -> Syntax.Extern.t -> unit = + fun ~conf ~join_buffer external_ -> + let extern_table = Table.name external_.target in + + let formatter = Format.formatter_of_buffer join_buffer.b in + Format.fprintf formatter "\nLEFT JOIN '%s' AS '%s' ON %t = %s" extern_table + external_.target.name + (Printers.prepare_key ~f:(fun f -> + let q = + Q.query_of_expression Q.BindParam f (Printers.path ~conf) + external_.intern_key + in + + add_parameters join_buffer (Queue.to_seq q))) + (Table.print_column external_.Syntax.Extern.target + ("key_" ^ external_.Syntax.Extern.target.name)); + + Format.pp_print_flush formatter () + +(** Create the from part of the query, adding all the required externals (even + when not required) + + SQLite is able to optimize the query and do not load the table not used in + the select clause. *) +let create_from_statement_of_chunck : + ?externals:Syntax.Extern.t list -> Syntax.t -> t -> unit = + fun ?externals conf c -> + let externals = Option.value externals ~default:conf.externals in + add_string c "\nFROM '"; + add_string c (Table.name conf.source); + add_string c "' AS '"; + add_string c conf.source.name; + add_string c "'"; + + (* Add the externals in the query *) + List.iter externals ~f:(join_external ~conf ~join_buffer:c) + +let add_expression : + conf:Syntax.t -> t -> ImportDataTypes.Path.t ImportExpression.T.t -> unit = + fun ~conf group expression -> + let formatter = Format.formatter_of_buffer group.b in + let queue = + ImportExpression.Query.query_of_expression ImportExpression.Query.BindParam + formatter (Printers.path ~conf) expression + in + Format.pp_print_flush formatter (); + add_parameters group (Queue.to_seq queue) diff --git a/lib/analysers/chunk.mli b/lib/analysers/chunk.mli new file mode 100644 index 0000000..d4f69e7 --- /dev/null +++ b/lib/analysers/chunk.mli @@ -0,0 +1,34 @@ +(** This module helps to create a query with prepared values. *) + +type t = { + b : Buffer.t; + parameters : ImportCSV.DataType.t Queue.t; +} + +val create : unit -> t +val create' : Buffer.t -> ImportCSV.DataType.t Queue.t -> t + +val append : head:t -> tail:t -> unit +(** Append the element from [tail] at the end of [head] + + Tail is destroyed during the operation. *) + +val add_string : t -> string -> unit +(** Add a litteral string in the sequence *) + +val copy : t -> t + +val create_from_statement_of_chunck : + ?externals:ImportConf.Syntax.Extern.t list -> ImportConf.Syntax.t -> t -> unit +(** Create the from part of the query, adding all the declared externals (even + when not required) + + SQLite is able to optimize the query and do not load the table not used in + the select clause. *) + +val add_expression : + conf:ImportConf.Syntax.t -> + t -> + ImportDataTypes.Path.t ImportExpression.T.t -> + unit +(** Add an expression into an existing chunck *) diff --git a/lib/analysers/filters.ml b/lib/analysers/filters.ml new file mode 100644 index 0000000..4e8b175 --- /dev/null +++ b/lib/analysers/filters.ml @@ -0,0 +1,133 @@ +(** Build a fragment of the query match a filter *) + +module Syntax = ImportConf.Syntax +module Path = ImportDataTypes.Path +module Expression = ImportExpression +module CTE = ImportConf.CTE +open StdLabels + +(** Add a list of expressions into the group *) +let rec add_filters : + conf:Syntax.t -> Chunk.t -> Path.t Expression.T.t list -> unit = + fun ~conf group -> function + | [] -> () + | hd :: [] -> Chunk.add_expression ~conf group hd + | hd :: tl -> + Chunk.add_expression ~conf group hd; + Chunk.add_string group "\nAND "; + add_filters ~conf group tl + +type 'a cte_acc = { + n : int; + has_previous : bool; + acc : 'a; + cte_index : int option; + latest_expression : Path.t Expression.T.t list; +} + +let add_inner : conf:Syntax.t -> int -> Buffer.t -> unit = + fun ~conf n b -> + let name = "filter" ^ string_of_int n in + (* We use an INNER JOIN here because we want to be sure to get all the rows + fetched by the CTE *) + Buffer.add_string b "\nINNER JOIN '"; + Buffer.add_string b name; + Buffer.add_string b "' ON "; + Buffer.add_string b name; + Buffer.add_string b ".id = "; + Buffer.add_string b conf.source.name; + Buffer.add_string b ".id\n" + +let print : + conf:Syntax.t -> + (Chunk.t * Chunk.t) cte_acc -> + CTE.t -> + (Chunk.t * Chunk.t) cte_acc = + fun ~conf acc cte -> + let predicates, query = acc.acc in + let n = acc.n in + let cte_index = + match cte.CTE.group with + | Some expression -> + begin + if acc.has_previous then Chunk.add_string query ", " + else Chunk.add_string query "WITH " + end; + Chunk.add_string query "filter"; + Chunk.add_string query (string_of_int n); + Chunk.add_string query " AS ("; + Chunk.add_string query "SELECT "; + Chunk.add_string query conf.source.name; + Chunk.add_string query ".id, "; + Chunk.add_expression ~conf query expression; + Chunk.add_string query " AS group_function"; + Chunk.create_from_statement_of_chunck conf query; + + if acc.has_previous then begin + let previous_name = "filter" ^ string_of_int (n - 1) in + add_inner ~conf (n - 1) query.Chunk.b; + + Chunk.add_string query "WHERE "; + Chunk.add_string query previous_name; + Chunk.add_string query ".group_function" + end; + + begin + match cte.CTE.filters with + | [] -> () + | _ -> + Chunk.add_string query " WHERE "; + add_filters ~conf query cte.CTE.filters + end; + Chunk.add_string query ")\n"; + Some acc.n + | None -> + (* Do not add the filters in the CTE (we don’t have any) but in the main + query *) + Chunk.add_string predicates "WHERE "; + add_filters ~conf predicates cte.CTE.filters; + acc.cte_index + in + { + acc with + has_previous = true; + n = acc.n + 1; + cte_index; + latest_expression = cte.CTE.filters; + } + +let generate_sql : conf:Syntax.t -> CTE.t list -> Chunk.t -> Chunk.t = + fun ~conf filters links' -> + let predicates = Chunk.create () and links = Chunk.create () in + let eval = + List.fold_left filters + ~init: + { + n = 0; + has_previous = false; + acc = (links, predicates); + cte_index = None; + latest_expression = []; + } + ~f:(print ~conf) + in + match (eval.cte_index, eval.latest_expression) with + | None, [] -> predicates + | None, _ -> + Chunk.add_string links' " "; + Chunk.append ~head:links' ~tail:links; + predicates + | Some n, [] -> + add_inner ~conf n links'.b; + Chunk.append ~head:links' ~tail:links; + Chunk.add_string links' "filter"; + Chunk.add_string links' (string_of_int n); + Chunk.add_string links' ".group_function"; + predicates + | Some n, _ -> + add_inner ~conf n links'.b; + Chunk.append ~head:links' ~tail:links; + Chunk.add_string links' " AND filter"; + Chunk.add_string links' (string_of_int n); + Chunk.add_string links' ".group_function"; + predicates diff --git a/lib/analysers/filters.mli b/lib/analysers/filters.mli new file mode 100644 index 0000000..7783799 --- /dev/null +++ b/lib/analysers/filters.mli @@ -0,0 +1,2 @@ +val generate_sql : + conf:ImportConf.Syntax.t -> ImportConf.CTE.t list -> Chunk.t -> Chunk.t diff --git a/lib/analysers/printers.ml b/lib/analysers/printers.ml new file mode 100644 index 0000000..1c73c13 --- /dev/null +++ b/lib/analysers/printers.ml @@ -0,0 +1,12 @@ +module Syntax = ImportConf.Syntax +module Table = ImportDataTypes.Table +module Path = ImportDataTypes.Path + +let path : conf:Syntax.t -> Format.formatter -> Path.t -> unit = + fun ~conf buffer { alias; column } -> + let table = ImportConf.get_table_for_name conf alias in + Format.fprintf buffer "%s" + (Table.print_column table ("col_" ^ string_of_int column)) + +let prepare_key : f:(Format.formatter -> unit) -> Format.formatter -> unit = + fun ~f formatter -> Format.fprintf formatter "rtrim(upper(%t))" f diff --git a/lib/analysers/printers.mli b/lib/analysers/printers.mli new file mode 100644 index 0000000..102bb91 --- /dev/null +++ b/lib/analysers/printers.mli @@ -0,0 +1,14 @@ +val path : + conf:ImportConf.Syntax.t -> Format.formatter -> ImportDataTypes.Path.t -> unit +(** Represent a path in a SQL query. + + This function is given in the Expression.Query module. *) + +val prepare_key : f:(Format.formatter -> unit) -> Format.formatter -> unit +(** Wrap an expression for beiing used as a key. What’s given to the formater + will be pre-processed with sql function to trim and uppercase the value. + + This function is used at two location in the code : + + - during the index creation + - when matching a value against this index *) diff --git a/lib/analysers/query.ml b/lib/analysers/query.ml index bcf4a72..e24da78 100644 --- a/lib/analysers/query.ml +++ b/lib/analysers/query.ml @@ -2,58 +2,8 @@ open StdLabels module Expression = ImportExpression module Q = Expression.Query module Syntax = ImportConf.Syntax -module Table = ImportConf.Table -module Path = ImportConf.Path - -let truncate buffer n = Buffer.truncate buffer (Buffer.length buffer - n) - -(** The module allow to create fragment in the query which keep together the - binderd parameters and the text of the query.contents. - - This is used a lot in order to create the CTE, where you need the create - fragment used both in the main request and partially in the CTE itself. - - The content is mutable and all the functions are returning [unit]. *) -module Chunk = struct - type t = { - b : Buffer.t; - parameters : ImportCSV.DataType.t Queue.t; - } - - let create : unit -> t = - fun () -> { b = Buffer.create 16; parameters = Queue.create () } - - let create' : Buffer.t -> ImportCSV.DataType.t Queue.t -> t = - fun b parameters -> { b; parameters } - - (* Append the element from [tail] at the end of [head] - - Tail is destroyed during the operation. - *) - let append : head:t -> tail:t -> unit = - fun ~head ~tail -> - match Buffer.length tail.b with - | 0 -> () - | _ -> - Buffer.add_buffer head.b tail.b; - Queue.transfer tail.parameters head.parameters; - () - - (** Add a litteral string in the sequence *) - let add_string : t -> string -> unit = fun t v -> Buffer.add_string t.b v - - let copy : t -> t = - fun t -> - let b = Buffer.create 16 and parameters = Queue.copy t.parameters in - Buffer.add_buffer b t.b; - { b; parameters } - - let add_parameters : t -> ImportCSV.DataType.t Seq.t -> unit = - fun t p -> Queue.add_seq t.parameters p -end - -let prepare_key : f:(Format.formatter -> unit) -> Format.formatter -> unit = - fun ~f formatter -> Format.fprintf formatter "rtrim(upper(%t))" f +module Table = ImportDataTypes.Table +module Path = ImportDataTypes.Path (* Collect all the tables pointed by the expression. *) let pointed_tables : Syntax.t -> 'a Expression.T.t -> (Table.t * string) list = @@ -64,11 +14,6 @@ let pointed_tables : Syntax.t -> 'a Expression.T.t -> (Table.t * string) list = (table, table_name) :: acc) |> List.sort_uniq ~cmp:Stdlib.compare -(** Represent a column in a safe way in a query *) -let print_column : Table.t -> string -> string = - fun table column -> - String.concat ~sep:"" [ "'"; table.Table.name; "'.'"; column; "'" ] - let create_table : Dependency.t -> string = fun mapping -> let b = Buffer.create 64 in @@ -90,184 +35,6 @@ let create_table : Dependency.t -> string = Buffer.contents b -let show_path : conf:Syntax.t -> Format.formatter -> Path.t -> unit = - fun ~conf buffer { alias; column } -> - let table = ImportConf.get_table_for_name conf alias in - let table_name = table.Table.name in - Format.fprintf buffer "'%s'.col_%d" table_name column - -(** Extract the informations from the dependancies. We get two informations here - : - - - the join query in order to load the data from the external column - - the column corresponding to the key in order to identify the missing links - later. *) -let query_of_external : - conf:Syntax.t -> join_buffer:Chunk.t -> Syntax.Extern.t -> unit = - fun ~conf ~join_buffer external_ -> - let extern_table = Table.name external_.target in - - let formatter = Format.formatter_of_buffer join_buffer.b in - Format.fprintf formatter "\nLEFT JOIN '%s' AS '%s' ON %t = %s" extern_table - external_.target.name - (prepare_key ~f:(fun f -> - let q = - Q.query_of_expression Q.BindParam f (show_path ~conf) - external_.intern_key - in - - Chunk.add_parameters join_buffer (Queue.to_seq q))) - (print_column external_.Syntax.Extern.target - ("key_" ^ external_.Syntax.Extern.target.name)); - - Format.pp_print_flush formatter () - -(** Create the from part of the query, adding all the reuired externals. - - SQLite is able to optimize the query and do not load the table not used in - the select clause. *) -let create_from_chunck : Syntax.t -> Chunk.t -> unit = - fun conf c -> - Chunk.add_string c "\nFROM '"; - Chunk.add_string c (Table.name conf.source); - Chunk.add_string c "' AS '"; - Chunk.add_string c conf.source.name; - Chunk.add_string c "'"; - - (* Add the externals in the query *) - List.iter conf.externals ~f:(query_of_external ~conf ~join_buffer:c) - -(** Build a CTE query in order to use any group function inside the query. - Return the binded parameters used in the expression. The buffer given in - argument is also modified during the construction. - - If filters is not None, the clauses are added to the CTE. *) -let build_cte : - Syntax.t -> - expression:'a Expression.T.t -> - filters:Chunk.t option -> - Chunk.t = - fun conf ~expression ~filters -> - (* The binded parameters queue will be used later in the full query *) - let cte_chunk = Chunk.create () in - - Chunk.add_string cte_chunk "WITH cte AS (SELECT "; - Chunk.add_string cte_chunk conf.source.name; - Chunk.add_string cte_chunk ".id, "; - - let formatter = Format.formatter_of_buffer cte_chunk.b in - - let p = - Q.query_of_expression Q.BindParam formatter (show_path ~conf) expression - in - Format.pp_print_flush formatter (); - Chunk.add_parameters cte_chunk (Queue.to_seq p); - (* The name is hardcoded here, and used in [Expression.Filters.window] *) - Chunk.add_string cte_chunk " AS group0"; - - let () = create_from_chunck conf cte_chunk in - let () = - match filters with - | None -> () - | Some filters_chunk -> - Chunk.append ~head:cte_chunk ~tail:(Chunk.copy filters_chunk) - in - - Chunk.add_string cte_chunk ")\n"; - cte_chunk - -type filter_evaluation = { - content : Buffer.t; - parameters : ImportCSV.DataType.t Seq.t; - cte : (string * Chunk.t) option; -} -(** Build the filters to apply in the query. We make the difference here between - the predicates to apply directly in the query, and the filters associated - with a group, which are required to be transformed into a CTE in SQL, and - are evaluated before. *) - -(** Evaluate the filters on the query *) -let eval_filters : Syntax.t -> filter_evaluation = - fun conf -> - match conf.filters with - | [] -> - let empty_buffer = Buffer.create 0 in - { content = empty_buffer; parameters = Seq.empty; cte = None } - | filters -> ( - (* Create a new queue in order to accumulate all the parameters to bind. - This filter will be given to both the CTE if any, or reused in the - main query when there is no CTE. - *) - let chunk_filters = Chunk.create () in - Chunk.add_string chunk_filters "\nWHERE "; - - let group = Chunk.create () in - - let with_cte, with_exr = - List.fold_left filters ~init:(None, false) - ~f:(fun (with_cte, with_exr) column -> - (* The function will return an option in second position which is - None when no Group function where found, and Some Expression - otherwise *) - let b = Buffer.create 16 in - - let formatter = Format.formatter_of_buffer b in - let queue, group_found = - Expression.Filters.query_of_expression Q.BindParam formatter - (show_path ~conf) column - in - Format.pp_print_flush formatter (); - let clause = Chunk.create' b queue in - - match (group_found, with_cte) with - | None, _ -> - Chunk.append ~head:chunk_filters ~tail:clause; - Chunk.add_string chunk_filters "\nAND "; - (with_cte, true) - | (Some _ as group'), None -> - (* We have a group here, we do not add it into the - filter_buffer right now. - - This can occur only once, the second one will raise - an error. *) - Chunk.append ~head:group ~tail:clause; - (group', with_exr) - | Some _, Some _ -> raise ImportErrors.MisplacedWindow) - in - - match with_cte with - | None -> - let content = chunk_filters.b in - truncate content 5; - { - (* There is no group clause in the query *) - content; - parameters = Queue.to_seq chunk_filters.parameters; - cte = None; - } - | Some expression -> - let filters = - if with_exr then ( - (* If we have additionnals filters from the group clause, we - have to report them in the CTE instead of the main query. *) - let c' = Chunk.copy chunk_filters in - truncate c'.b 5; - Some c') - else None - in - - (* Create the common expression table *) - let cte_parameters = build_cte conf ~expression ~filters in - Chunk.append ~head:chunk_filters ~tail:group; - - { - content = chunk_filters.b; - parameters = Queue.to_seq chunk_filters.parameters; - (* The name is hardcoded here, and used in - [Expression.Filters.window] *) - cte = Some ("cte", cte_parameters); - }) - type query = { q : string; parameters : ImportCSV.DataType.t Seq.t; @@ -323,23 +90,12 @@ let clean_window : find which source is pointed by this alias. *) let select : Syntax.t -> query * Path.t ImportExpression.T.t array = fun conf -> - (* If the filters contains a group expression, we need to transform this into - a CTE, which have to be evaluated before the main query. That’s why we are - evaluating the filters right now.*) - let filters = eval_filters conf in - let b = Buffer.create 256 in - let parameters = Queue.create () in - - Option.iter - (fun (_, (cte : Chunk.t)) -> - Buffer.add_buffer b cte.b; - Queue.add_seq parameters (Queue.to_seq cte.parameters)) - filters.cte; + let filter = ImportConf.CTE.of_filters conf.filters in (* For each column in the configuration file, add the corresponding element in the query. - The Sqlite driver return the elements in an array, we create an array to + The Sqlite driver return the elements in an array, we create an array too in order to manage the elements together. *) let headers = Array.make (List.length conf.columns) (Obj.magic None) in @@ -355,6 +111,10 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array = let expression = c in (i, clean_window ~prefix:conf.uniq expression)) in + let filters = Chunk.create () in + let request_header = Filters.generate_sql ~conf filter filters in + let b = request_header.Chunk.b + and parameters = request_header.Chunk.parameters in let formatter = Format.formatter_of_buffer b in let () = Format.fprintf formatter "SELECT %a" @@ -363,7 +123,7 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array = (fun formatter (i, column) -> Array.set headers i column; let p = - Q.query_of_expression Q.BindParam formatter (show_path ~conf) + Q.query_of_expression Q.BindParam formatter (Printers.path ~conf) column in Queue.transfer p parameters; @@ -372,26 +132,8 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array = in Format.pp_print_flush formatter (); - let () = create_from_chunck conf (Chunk.create' b parameters) in - - (* If the query has a CTE, link it as well. We use an INNER JOIN here because - we want to be sure to get all the rows fetched by the CTE - *) - let () = - match filters.cte with - | None -> () - | Some (name, _) -> - Buffer.add_string b "\nINNER JOIN '"; - Buffer.add_string b name; - Buffer.add_string b "' ON "; - Buffer.add_string b name; - Buffer.add_string b ".id = "; - Buffer.add_string b conf.source.name; - Buffer.add_string b ".id" - in - - Buffer.add_buffer b filters.content; - Queue.add_seq parameters filters.parameters; + let () = Chunk.create_from_statement_of_chunck conf request_header in + Chunk.append ~head:request_header ~tail:filters; let formatter = Format.formatter_of_buffer b in (match conf.Syntax.uniq with @@ -402,7 +144,7 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array = ~pp_sep:(fun f () -> Format.fprintf f ", ") (fun formatter column -> let seq = - Q.query_of_expression Q.BindParam formatter (show_path ~conf) + Q.query_of_expression Q.BindParam formatter (Printers.path ~conf) column in Queue.transfer seq parameters)) @@ -415,7 +157,7 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array = ~pp_sep:(fun f () -> Format.fprintf f ", ") (fun formatter column -> let seq = - Q.query_of_expression Q.BindParam formatter (show_path ~conf) + Q.query_of_expression Q.BindParam formatter (Printers.path ~conf) column in Queue.transfer seq parameters)) @@ -426,20 +168,12 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array = let check_external : Syntax.t -> Syntax.Extern.t -> query = fun conf external_ -> - let internal_chunk = - let internal_key_buffer = Buffer.create 16 in - let formatter = Format.formatter_of_buffer internal_key_buffer in - let internal_key_seq = - Q.query_of_expression Q.BindParam formatter (show_path ~conf) - external_.Syntax.Extern.intern_key - in - Format.pp_print_flush formatter (); - Chunk.create' internal_key_buffer (Queue.copy internal_key_seq) - in + let internal_chunk = Chunk.create () in + Chunk.add_expression ~conf internal_chunk external_.Syntax.Extern.intern_key; let external_key_buffer = Buffer.create 16 in Buffer.add_string external_key_buffer - (print_column external_.Syntax.Extern.target + (Table.print_column external_.Syntax.Extern.target ("key_" ^ external_.Syntax.Extern.target.name)); let pointed_tables = pointed_tables conf external_.intern_key in @@ -495,18 +229,12 @@ let check_external : Syntax.t -> Syntax.Extern.t -> query = Chunk.add_string request "-1" | (table, _name) :: _ -> (* If we have a single source, extract the row number. *) - Chunk.add_string request (print_column table "id") + Chunk.add_string request (Table.print_column table "id") in Chunk.add_string request ", "; Chunk.append ~head:request ~tail:(Chunk.copy internal_chunk); - Chunk.add_string request " FROM\n'"; - Chunk.add_string request (Table.name conf.source); - Chunk.add_string request "' AS '"; - Chunk.add_string request conf.source.name; - Chunk.add_string request "'"; - (* Add the externals in the query *) - List.iter dependencies ~f:(query_of_external ~conf ~join_buffer:request); + Chunk.create_from_statement_of_chunck ~externals:dependencies conf request; Chunk.add_string request " WHERE "; Chunk.add_string request join_content; Chunk.add_string request " IS NULL AND "; @@ -526,7 +254,7 @@ let build_key_insert : Buffer.t -> Dependency.key -> unit = let formatter = Format.formatter_of_buffer buffer in let () = - prepare_key formatter ~f:(fun formatter -> + Printers.prepare_key formatter ~f:(fun formatter -> Q.query_of_expression Q.NoParam formatter show_column expression) in diff --git a/lib/analysers/query.mli b/lib/analysers/query.mli index 520718a..d158867 100644 --- a/lib/analysers/query.mli +++ b/lib/analysers/query.mli @@ -1,5 +1,3 @@ -module Syntax = ImportConf.Syntax - val create_table : Dependency.t -> string type query = { @@ -10,9 +8,10 @@ type query = { shall be run with all the binded parameters. *) val select : - Syntax.t -> query * ImportDataTypes.Path.t ImportExpression.T.t array + ImportConf.Syntax.t -> + query * ImportDataTypes.Path.t ImportExpression.T.t array -val check_external : Syntax.t -> Syntax.Extern.t -> query +val check_external : ImportConf.Syntax.t -> ImportConf.Syntax.Extern.t -> query (** Create a query which select all the missing key in an external *) val build_key_insert : Buffer.t -> Dependency.key -> unit diff --git a/lib/configuration/cte.ml b/lib/configuration/cte.ml new file mode 100644 index 0000000..ff43d6d --- /dev/null +++ b/lib/configuration/cte.ml @@ -0,0 +1,53 @@ +open StdLabels +module Path = ImportDataTypes.Path +module Expression = ImportExpression.T + +type t = { + filters : Path.t Expression.t list; + group : Path.t Expression.t option; +} + +(** Ensure the group criteria in window functions match the global group by + criteria. + + Traverse the configuration tree until finding a group window. *) + +(** Check if the expression contains a group function *) +let matchWindowGroup : 'a ImportExpression.T.t -> bool = + fun expression -> + let exception Found in + let open ImportExpression.T in + let rec f = function + | Empty | Literal _ | Integer _ | Path _ -> () + | Expr e -> f e + | Concat pp | Function' (_, pp) | Function (_, pp) | Nvl pp | Join (_, pp) + -> List.iter ~f pp + | Window (_, _, _) -> raise Found + | BOperator (_, arg1, arg2) -> + f arg1; + f arg2 + | GEquality (_, arg1, args) -> + f arg1; + List.iter ~f args + in + try + f expression; + false + with + | Found -> true + +(** Transform a list of expression into a list of CTE to evaluate. *) +let of_filters : Path.t Expression.t list -> t list = + fun filters -> + let last_group, prev = + List.fold_left filters + ~init:({ filters = []; group = None }, []) + ~f:(fun (cte, acc) expr -> + begin + if matchWindowGroup expr then + ( { filters = []; group = None }, + { cte with group = Some expr } :: acc ) + else ({ cte with filters = expr :: cte.filters }, acc) + end) + in + List.rev (last_group :: prev) diff --git a/lib/configuration/cte.mli b/lib/configuration/cte.mli new file mode 100644 index 0000000..0f2b3e3 --- /dev/null +++ b/lib/configuration/cte.mli @@ -0,0 +1,20 @@ +module Path = ImportDataTypes.Path +module Expression = ImportExpression.T + +type t = { + filters : Path.t Expression.t list; + group : Path.t Expression.t option; +} +(** Represent a filter to apply in the querry + + The CTE can have filters applied on the previous CTE (or directly in the + sources if there is any yet) and can hold a group (an only one). + + If there is a group, it must be applied after the others filters. + + The order in which the filters are presented in the configuration can change + the results ; it does not matter when we only have classicals filters, + because all cf them can be evaluated at the same time, but as soon we have a + group function, the result become dependant of the previous ones. *) + +val of_filters : Path.t Expression.t list -> t list diff --git a/lib/configuration/importConf.ml b/lib/configuration/importConf.ml index ebbcb7c..8516008 100644 --- a/lib/configuration/importConf.ml +++ b/lib/configuration/importConf.ml @@ -1,5 +1,6 @@ open StdLabels module Syntax = Syntax +module CTE = Cte module Table = ImportDataTypes.Table module Path = ImportDataTypes.Path module T = Read_conf diff --git a/lib/configuration/importConf.mli b/lib/configuration/importConf.mli index 9ddc40c..40b985b 100644 --- a/lib/configuration/importConf.mli +++ b/lib/configuration/importConf.mli @@ -1,18 +1,18 @@ module Syntax = Syntax -module Table = ImportDataTypes.Table -module Path = ImportDataTypes.Path +module CTE = Cte val dummy_conf : Syntax.t -val root_table : Syntax.t -> Table.t +val root_table : Syntax.t -> ImportDataTypes.Table.t (** Get the root table, this table is the main table to load and each line in this table will be processed *) val t_of_toml : Otoml.t -> (Syntax.t, string) result -val get_table_for_name : Syntax.t -> string option -> Table.t +val get_table_for_name : Syntax.t -> string option -> ImportDataTypes.Table.t -val get_dependancies_for_table : Syntax.t -> Table.t -> Syntax.Extern.t list +val get_dependancies_for_table : + Syntax.t -> ImportDataTypes.Table.t -> Syntax.Extern.t list (** Get all the externals refered by the source *) val expression_from_string : - string -> (Path.t ImportExpression.T.t, string) result + string -> (ImportDataTypes.Path.t ImportExpression.T.t, string) result diff --git a/lib/configuration/read_conf.ml b/lib/configuration/read_conf.ml index 69240c1..11f6726 100644 --- a/lib/configuration/read_conf.ml +++ b/lib/configuration/read_conf.ml @@ -126,44 +126,51 @@ end = struct let column = Expression_parser.Incremental.column_expr end +exception Divergent (** Ensure the group criteria in window functions match the global group by - criteria. + criteria. *) - Traverse the configuration tree until finding a group window. *) +exception NestedGroup +(** Raised when a group contains another one *) + +(** Traverse the configuration tree until finding a group window. *) let matchWindowGroup : eq:('a -> 'a -> bool) -> subset:'a ImportExpression.T.t list -> 'a ImportExpression.T.t -> - bool = + unit = fun ~eq ~subset expression -> - let exception Divergent in let open ImportExpression.T in - let rec f = function + let rec f isIngroup = function | Empty | Literal _ | Integer _ | Path _ -> () - | Expr e -> f e + | Expr e -> f isIngroup e | Concat pp | Function' (_, pp) | Function (_, pp) | Nvl pp | Join (_, pp) - -> List.iter ~f pp - | Window (_, pp1, _) -> - if List.equal ~eq:(ImportExpression.T.equal eq) subset pp1 then () - else raise_notrace Divergent + -> List.iter ~f:(f isIngroup) pp + | Window (expr, pp1, pp2) -> + let () = + if List.equal ~eq:(ImportExpression.T.equal eq) subset pp1 then () + else + match subset with + | [] -> () + | _ -> raise_notrace Divergent + in + let () = + match isIngroup with + | true -> raise NestedGroup + | false -> () + in + + ignore @@ ImportExpression.T.map_window ~f:(f true) expr; + List.iter ~f:(f true) pp1; + List.iter ~f:(f true) pp2 | BOperator (_, arg1, arg2) -> - f arg1; - f arg2 + f isIngroup arg1; + f isIngroup arg2 | GEquality (_, arg1, args) -> - f arg1; - List.iter ~f args + f isIngroup arg1; + List.iter ~f:(f isIngroup) args in - match subset with - | [] -> - (* Do not bother traversing the tree if there is no group by, just - return Ok *) - true - | _ -> ( - try - f expression; - true - with - | Divergent -> false) + f false expression module Make (S : Decoders.Decode.S) = struct let ( let* ) = S.( let* ) @@ -185,13 +192,15 @@ module Make (S : Decoders.Decode.S) = struct | Error e -> S.fail_with Decoders.Error.(make e) | Ok expr -> ( (* Now check that every window function include at least the uniq list *) - let valid_subset = matchWindowGroup ~eq ~subset:groups expr in - match valid_subset with - | true -> S.succeed expr - | false -> + match matchWindowGroup ~eq ~subset:groups expr with + | () -> S.succeed expr + | exception Divergent -> S.fail "The group function shall match the same arguments as the \ - \"uniq\" parameter") + \"uniq\" parameter" + | exception NestedGroup -> + S.fail + "A group function cannot contains another group function") method source = let* file = S.field "file" S.string diff --git a/lib/data_types/table.ml b/lib/data_types/table.ml index 2dd956b..4831ac8 100644 --- a/lib/data_types/table.ml +++ b/lib/data_types/table.ml @@ -23,3 +23,8 @@ let name : t -> string = match source.tab with | 1 -> file_name | _ -> String.concat ~sep:"_" [ file_name; string_of_int source.tab ] + +(** Represent a column in a safe way in a query *) +let print_column : t -> string -> string = + fun table column -> + String.concat ~sep:"" [ "'"; table.name; "'.'"; column; "'" ] diff --git a/lib/expression/_readme.rst b/lib/expression/_readme.rst new file mode 100644 index 0000000..729a950 --- /dev/null +++ b/lib/expression/_readme.rst @@ -0,0 +1,39 @@ +All thoses modules transforms an expression. + +Simple transformations +---------------------- + +:Ast: + + Regenerate the Ast from the expression + +:Repr: + + Generate a representable string + +:Type_of: + + Infer the type of an expression + +:Compose: + + Allow to use a module into another one. + +:Headers: + + Extract the headers to display in the result file + +Composed transformations +------------------------ + +:Filter: + + Generate the filters in the query. This module identify if one the + expression is actually a group window and handle a special case for this. + + This module relies on Ast + +:Query: + + Build an sql query. This module relies on Type_of + diff --git a/lib/expression/filters.ml b/lib/expression/filters.ml deleted file mode 100644 index 42c794b..0000000 --- a/lib/expression/filters.ml +++ /dev/null @@ -1,193 +0,0 @@ -(** This module evaluate the sql query to use in order to filter an expression - - The result is built over [Query] except for the group function, which are - translated into a CTE in sql - *) - -open StdLabels -module Q = Query - -type 'a result = { - repr : Format.formatter -> nested:Query.QueryParameter.t -> unit; - group : 'a T.t option; -} - -module Filter = struct - type 'a repr = { - repr : 'a Q.Query.repr; - with_group : 'a T.t option; - } - - type 'a obs = 'a result - type 'a path_repr = 'a Q.Query.path_repr - - let observe : 'a Ast.obs * 'a repr -> 'a obs = - fun (_, v) -> { repr = Q.Query.observe v.repr; group = v.with_group } - - let empty : 'a Ast.obs -> 'a repr = - fun _ -> { repr = Q.Query.empty (); with_group = None } - - let expr : 'a Ast.obs * 'a repr -> 'a Ast.obs -> 'a repr = - fun (_, expr) _ -> - { repr = Q.Query.expr expr.repr; with_group = expr.with_group } - - let path : 'a path_repr -> 'a -> 'a Ast.obs -> 'a repr = - fun repr p _ -> { repr = Q.Query.path repr p; with_group = None } - - let literal : string -> 'a Ast.obs -> 'a repr = - fun l _ -> { repr = Q.Query.literal l; with_group = None } - - let integer : string -> 'a Ast.obs -> 'a repr = - fun l _ -> { repr = Q.Query.integer l; with_group = None } - - let nvl : ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr = - fun expression _ -> - let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression in - let with_group = - List.find_map ~f:(fun v -> (snd v).with_group) expression - in - match with_group with - | None -> { repr = Q.Query.nvl expr_repr; with_group } - | Some _ -> raise ImportErrors.MisplacedWindow - - let concat : ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr = - fun expression _ -> - let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression in - let with_group = - List.find_map ~f:(fun v -> (snd v).with_group) expression - in - match with_group with - | None -> { repr = Q.Query.concat expr_repr; with_group } - | Some _ -> raise ImportErrors.MisplacedWindow - - let join : string -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr = - fun sep expression _ -> - let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression - and with_group = - List.find_map ~f:(fun v -> (snd v).with_group) expression - in - match with_group with - | None -> { repr = Q.Query.join sep expr_repr; with_group } - | Some _ -> raise ImportErrors.MisplacedWindow - - let boperator : - T.binary_operator -> - 'a Ast.obs * 'a repr -> - 'a Ast.obs * 'a repr -> - 'a Ast.obs -> - 'a repr = - fun name (_, e1) (_, e2) _ -> - let with_group = - match (e1.with_group, e2.with_group) with - | Some e, None -> Some e - | None, Some e -> Some e - | None, None -> None - | _ -> raise ImportErrors.MisplacedWindow - in - { repr = Q.Query.boperator name e1.repr e2.repr; with_group } - - let gequality : - T.binary_operator -> - 'a Ast.obs * 'a repr -> - ('a Ast.obs * 'a repr) list -> - 'a Ast.obs -> - 'a repr = - fun name (_, e1) group _ -> - let group_repr = List.map ~f:(fun v -> (snd v).repr) group - and with_group = List.find_map ~f:(fun v -> (snd v).with_group) group in - - match with_group with - | None -> - { - repr = Q.Query.gequality name e1.repr group_repr; - with_group = e1.with_group; - } - | _ -> raise ImportErrors.MisplacedWindow - - let funct : string -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr = - fun name expressions _ -> - let expr_repr = List.map ~f:(fun v -> (snd v).repr) expressions in - let with_group = - List.find_map ~f:(fun v -> (snd v).with_group) expressions - in - match with_group with - | None -> { repr = Q.Query.funct name expr_repr; with_group } - | Some _ -> raise ImportErrors.MisplacedWindow - - let function' : - T.funct -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr = - fun name expressions _ -> - let expr_repr = List.map ~f:(fun v -> (snd v).repr) expressions in - let with_group = - List.find_map ~f:(fun v -> (snd v).with_group) expressions - in - match with_group with - | None -> - { repr = Q.Query.funct (T.name_of_function name) expr_repr; with_group } - | Some _ -> raise ImportErrors.MisplacedWindow - - (** Window functions are not handled in the filters, we save them as an AST - in order to process them in a separated handler. - - It is not allowed to build nested window functions. *) - let window : - ('a Ast.obs * 'a repr) T.window -> - ('a Ast.obs * 'a repr) list -> - ('a Ast.obs * 'a repr) list -> - 'a Ast.obs -> - 'a repr = - fun name expressions order ast -> - ignore name; - let with_group_expr = - List.find_map ~f:(fun v -> (snd v).with_group) expressions - and with_group_order = - List.find_map ~f:(fun v -> (snd v).with_group) order - in - match (with_group_expr, with_group_order) with - | Some _, _ | _, Some _ -> raise ImportErrors.MisplacedWindow - | None, None -> - (* The column name used with the cte. The name is fixed here, and used - as is in [Analysers.Query.build_cte] and - [Analysers.Query.eval_filters] *) - let q = "cte.group0" in - { - with_group = Some ast; - repr = Q.Query.funct "expr" [ Q.Query.literal q ]; - } -end - -module ASTBuilder = - Compose.Expression - (Ast) - (struct - let v = () - end) - -module F : - Sym.SYM_EXPR - with type 'a obs = 'a result - and type 'a path_repr = Format.formatter -> 'a -> unit = - ASTBuilder.Make (Filter) - -module M = Sym.M (F) - -let query_of_expression : - type b. - b Q.binded_query -> - Format.formatter -> - (Format.formatter -> 'a -> unit) -> - 'a T.t -> - b * 'a T.t option = - fun parameter formatter printer expr -> - let repr = M.eval ~path_repr:printer expr in - match parameter with - | BindParam -> - let p = Queue.create () in - let parameter = Q.QueryParameter.Queue p in - let value = F.observe repr in - value.repr ~nested:parameter formatter; - (p, value.group) - | NoParam -> - let value = F.observe repr in - value.repr ~nested:Literal formatter; - ((), value.group) diff --git a/lib/expression/filters.mli b/lib/expression/filters.mli deleted file mode 100644 index d462b5f..0000000 --- a/lib/expression/filters.mli +++ /dev/null @@ -1,9 +0,0 @@ -module F : Sym.SYM_EXPR with type 'a path_repr = Format.formatter -> 'a -> unit -(** Query used inside the filter clauses *) - -val query_of_expression : - 'b Query.binded_query -> - Format.formatter -> - (Format.formatter -> 'a -> unit) -> - 'a T.t -> - 'b * 'a T.t option diff --git a/lib/helpers/helpers.ml b/lib/helpers/helpers.ml index 9d6fcb8..7e0f2aa 100755 --- a/lib/helpers/helpers.ml +++ b/lib/helpers/helpers.ml @@ -1,45 +1,2 @@ module Toml = Toml
module Console = Console
-
-let date_from_csv : string -> CalendarLib.Date.t option =
- fun value ->
- let open CalendarLib.Date in
- try Some (Scanf.sscanf value "%d/%d/%d" (fun d m y -> make y m d)) with
- | _ -> (
- (* If the date is a number, try from julian day *)
- match int_of_string_opt value with
- | None -> None
- | Some v -> Some (add (make 1899 12 30) (Period.day v)))
-
-let fold_opt : ('a -> 'b -> 'a option) -> 'a -> 'b -> 'a =
- fun f acc b ->
- match f acc b with
- | None -> acc
- | Some v -> v
-
-let try_opt exp =
- try Some (exp ()) with
- | _ -> None
-
-let repr_date formatter date =
- Format.fprintf formatter "%02d/%02d/%d"
- (CalendarLib.Date.day_of_month date)
- CalendarLib.Date.(int_of_month @@ month date)
- (CalendarLib.Date.year date)
-
-let s_repr_date date =
- Format.sprintf "%02d/%02d/%d"
- (CalendarLib.Date.day_of_month date)
- CalendarLib.Date.(int_of_month @@ month date)
- (CalendarLib.Date.year date)
-
-let repr_opt f channel = function
- | None -> ()
- | Some v -> f channel v
-
-let str_format f =
- let buffer = Buffer.create 16 in
- let formatter = Format.formatter_of_buffer buffer in
- f formatter;
- Format.pp_print_flush formatter ();
- Buffer.contents buffer
@@ -324,19 +324,12 @@ Filtrer les données Un filtre peut être mis en place pour ne conserver que les lignes souhaitées. Les conditions doivent être toutes validées pour que la ligne soit retenue. -.. note:: - - Il est possible de mettre une fonction de groupe dans les filtres, pour - n’extraire par exemple que la première ligne d’une série. Toutefois, cette - fonctionnalité est encore expérimentale. L’application peut lever une - erreur si la fonction est trop complexe pour etre interprétée. - - Si une fonction de groupe est présente, l’application ordonnera les - évaluations selon l’ordre suivant : +Les filtres sont appliqués dans leur ordre d’apparition. Cela peut changer le +résultat selon que l’on filtre : - 1. En premier lieu tous les filtres *simples* pour ne conserver que les - lignes correspondantes. - 2. Ensuite seulement la fonction de groupe présente dans la section `filter` +1. d’abord sur la plus grande valeur de la colonne A, puis les lignes avec la + colonne B ≠ 0 +2. d’abord sur la colonne B ≠ 0, puis la plus grande valeur de la colonne A. Exemple complet =============== diff --git a/tests/analyser_filters.ml b/tests/analyser_filters.ml new file mode 100644 index 0000000..c329af8 --- /dev/null +++ b/tests/analyser_filters.ml @@ -0,0 +1,187 @@ +module CTE = ImportConf.CTE +module Filters = ImportAnalyser.Filters +module Chunk = ImportAnalyser.Chunk + +(* Remove the externals links in the coonfiguration because the queries are + already complicated enough without *) +let conf = { ConfLoader.conf with externals = [] } + +let path' = + Expression_builder.path ImportDataTypes.Path.{ alias = None; column = 1 } + +(** Build a simple expression expressing a group. This expression will trigger + the creation of a CTE in the query. *) +let group_expression = + Expression_builder.(equal (max path' [ path' ] [ path' ]) integer_one) + +(** Basic test ensuring we create nothing when we have nothing to create :) *) +let empty_predicates () = + let chunk_links = Chunk.create () in + let chunk_predicates = Filters.generate_sql ~conf [] chunk_links in + + Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) + "No predicates" + (Chunk.create (), Chunk.create ()) + (chunk_links, chunk_predicates) + +(** Simple numeric filter + + The path is identified as pointing to a numeric column, and the associated + query will replace null with 0. *) +let simple_filter () = + let filter = CTE.of_filters Expression_builder.[ equal integer_one path' ] in + let chunk_links = Chunk.create () in + let chunk_predicates = Filters.generate_sql ~conf filter chunk_links in + + let expected_predicates = Chunk.create () in + Chunk.add_string expected_predicates " WHERE 1=COALESCE('source'.'col_1',0)"; + + Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) + "Simple predicate" + (expected_predicates, Chunk.create ()) + (chunk_links, chunk_predicates) + +(** Combined filter + + The path is identified as pointing to a literal column, andn the associated + query will replace null with empty string. *) +let multiple_filters () = + let filter = + CTE.of_filters Expression_builder.[ integer_one; equal path' literal_zero ] + in + let chunk_links = Chunk.create () in + let chunk_predicates = Filters.generate_sql ~conf filter chunk_links in + + let expected_predicates = Chunk.create () in + (* The predicates can be executed in reverse order, but it’s not an issue + because they all are applied at the same time in the projection *) + Chunk.add_string expected_predicates + " WHERE COALESCE('source'.'col_1','')=?\nAND 1"; + + Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) + "Combined predicate" + (expected_predicates, Chunk.create ()) + (chunk_links, chunk_predicates) + +(** Create a simple configuration with a group expression and no other filters +*) +let group_filter () = + let filter = CTE.of_filters [ group_expression ] in + let chunk_links = Chunk.create () in + let chunk_predicates = Filters.generate_sql ~conf filter chunk_links in + + let expected_predicates = Chunk.create () in + Chunk.add_string expected_predicates + "\n\ + INNER JOIN 'filter0' ON filter0.id = source.id\n\ + WHERE filter0.group_function"; + + let expected_links = Chunk.create () in + Chunk.add_string expected_links + "WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_1') OVER \ + (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN \ + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1 AS group_function\n\ + FROM 'source' AS 'source')\n"; + + Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) + "Predicate with a group function" + (expected_predicates, expected_links) + (chunk_links, chunk_predicates) + +let expression_with_group () = + let filter = + CTE.of_filters + Expression_builder.[ equal integer_one path'; group_expression ] + in + let chunk_links = Chunk.create () in + let chunk_predicates = Filters.generate_sql ~conf filter chunk_links in + + let expected_predicates = Chunk.create () in + Chunk.add_string expected_predicates + "\n\ + INNER JOIN 'filter0' ON filter0.id = source.id\n\ + WHERE filter0.group_function"; + + let expected_links = Chunk.create () in + Chunk.add_string expected_links + "WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_1') OVER \ + (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN \ + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1 AS group_function\n\ + FROM 'source' AS 'source' WHERE 1=COALESCE('source'.'col_1',0))\n"; + + Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) + "The predicate expression is inside of the CTE" + (expected_predicates, expected_links) + (chunk_links, chunk_predicates) + +(** The last filter is given after the group. The predicate shall not be + included inside the CTE but in the main query. *) +let group_with_expression () = + let filter = + CTE.of_filters + Expression_builder.[ group_expression; equal integer_one path' ] + in + let chunk_links = Chunk.create () in + let chunk_predicates = Filters.generate_sql ~conf filter chunk_links in + + let expected_predicates = Chunk.create () in + Chunk.add_string expected_predicates + "\n\ + INNER JOIN 'filter0' ON filter0.id = source.id\n\ + WHERE 1=COALESCE('source'.'col_1',0) AND filter0.group_function"; + + let expected_links = Chunk.create () in + Chunk.add_string expected_links + "WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_1') OVER \ + (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN \ + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1 AS group_function\n\ + FROM 'source' AS 'source')\n"; + + Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) + "The predicate expression is out of the CTE" + (expected_predicates, expected_links) + (chunk_links, chunk_predicates) + +(** Test the configuration with two group, each one generating it’s own CTE in + the query *) +let group_with_group () = + let filter = CTE.of_filters [ group_expression; group_expression ] in + let chunk_links = Chunk.create () in + let chunk_predicates = Filters.generate_sql ~conf filter chunk_links in + + let expected_predicates = Chunk.create () in + Chunk.add_string expected_predicates + "\n\ + INNER JOIN 'filter1' ON filter1.id = source.id\n\ + WHERE filter1.group_function"; + + let expected_links = Chunk.create () in + Chunk.add_string expected_links + "WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_1') OVER \ + (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN \ + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1 AS group_function\n\ + FROM 'source' AS 'source')\n\ + , filter1 AS (SELECT source.id, LAST_VALUE('source'.'col_1') OVER \ + (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN \ + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1 AS group_function\n\ + FROM 'source' AS 'source'\n\ + INNER JOIN 'filter0' ON filter0.id = source.id\n\ + WHERE filter0.group_function)\n"; + + Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) + "The two CTE shall be generated" + (expected_predicates, expected_links) + (chunk_links, chunk_predicates) + +let tests = + Alcotest. + ( "filters", + [ + test_case "No filters" `Quick empty_predicates; + test_case "Simple filter" `Quick simple_filter; + test_case "Combined filter" `Quick multiple_filters; + test_case "Group filter" `Quick group_filter; + test_case "Expression and Group filter" `Quick expression_with_group; + test_case "Group filter and Expression" `Quick group_with_expression; + test_case "Group and Group" `Quick group_with_group; + ] ) diff --git a/tests/analyser_query_test.ml b/tests/analyser_query_test.ml index 0e23f11..c4ab6d8 100644 --- a/tests/analyser_query_test.ml +++ b/tests/analyser_query_test.ml @@ -32,12 +32,12 @@ let select = "Select" >:: fun _ -> let query, _ = Q.select conf in let expected_query = - {|SELECT COALESCE('source'.col_1,'') || ? || '' AS result_0, -'source'.col_2 AS result_1, -'last_file'.col_5 AS result_2 + {|SELECT COALESCE('source'.'col_1','') || ? || '' AS result_0, +'source'.'col_2' AS result_1, +'last_file'.'col_5' AS result_2 FROM 'source' AS 'source' -LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = 'other'.'key_other' -LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.col_1)) = 'last_file'.'key_last_file'|} +LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = 'other'.'key_other' +LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.'col_1')) = 'last_file'.'key_last_file'|} in Alcotest.check Alcotest.string "" expected_query query.q @@ -47,11 +47,11 @@ let check_externals = let query = Q.check_external conf (List.hd conf.externals) in let expected_query = - "SELECT 'source'.'id', 'source'.col_1 FROM\n\ - 'source' AS 'source'\n\ - LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = \ - 'other'.'key_other' WHERE 'other'.'key_other' IS NULL AND 'source'.col_1 \ - IS NOT NULL AND 'source'.col_1 <> ''" + "SELECT 'source'.'id', 'source'.'col_1'\n\ + FROM 'source' AS 'source'\n\ + LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = \ + 'other'.'key_other' WHERE 'other'.'key_other' IS NULL AND \ + 'source'.'col_1' IS NOT NULL AND 'source'.'col_1' <> ''" in Alcotest.check Alcotest.string "" expected_query query.q @@ -81,8 +81,8 @@ let previous = let query, _ = ImportAnalyser.Query.select conf in let expected_query = - "SELECT LAG('previous'.col_5) OVER (PARTITION BY 'previous'.col_1 ORDER BY \ - 'previous'.col_3) AS result_0\n\ + "SELECT LAG('previous'.'col_5') OVER (PARTITION BY 'previous'.'col_1' \ + ORDER BY 'previous'.'col_3') AS result_0\n\ FROM 'source' AS 'previous'" in Alcotest.check Alcotest.string "" expected_query query.q @@ -112,7 +112,7 @@ let sum = let query, _ = ImportAnalyser.Query.select conf in let expected_query = - "SELECT SUM('previous'.col_5) OVER (PARTITION BY 'previous'.col_1) AS \ + "SELECT SUM('previous'.'col_5') OVER (PARTITION BY 'previous'.'col_1') AS \ result_0\n\ FROM 'source' AS 'previous'" in @@ -137,7 +137,7 @@ let sum_total = let query, _ = ImportAnalyser.Query.select conf in let expected_query = - "SELECT SUM('previous'.col_5) AS result_0\nFROM 'source' AS 'previous'" + "SELECT SUM('previous'.'col_5') AS result_0\nFROM 'source' AS 'previous'" in Alcotest.check Alcotest.string "" expected_query query.q @@ -166,7 +166,7 @@ let sum_unfiltered = let query, _ = ImportAnalyser.Query.select conf in let expected_query = - "SELECT SUM('previous'.col_5) AS result_0\nFROM 'source' AS 'previous'" + "SELECT SUM('previous'.'col_5') AS result_0\nFROM 'source' AS 'previous'" in Alcotest.check Alcotest.string "" expected_query query.q @@ -206,16 +206,16 @@ let filter_group = let contents, _ = ImportAnalyser.Query.select conf in let expected = - {|WITH cte AS (SELECT source.id, LAST_VALUE('source'.col_3) OVER (PARTITION BY 'source'.col_1 ORDER BY 'source'.col_1 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS group0 + {|WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_3') OVER (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS group_function FROM 'source' AS 'source' -LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = 'other'.'key_other' -LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.col_1)) = 'last_file'.'key_last_file') -SELECT 'source'.col_1 AS result_0 +LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = 'other'.'key_other' +LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.'col_1')) = 'last_file'.'key_last_file') +SELECT 'source'.'col_1' AS result_0 FROM 'source' AS 'source' -LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = 'other'.'key_other' -LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.col_1)) = 'last_file'.'key_last_file' -INNER JOIN 'cte' ON cte.id = source.id -WHERE (cte.group0)|} +LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = 'other'.'key_other' +LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.'col_1')) = 'last_file'.'key_last_file' +INNER JOIN 'filter0' ON filter0.id = source.id +WHERE filter0.group_function|} in Alcotest.check Alcotest.string "" expected contents.q @@ -238,18 +238,16 @@ let filter_group2 = let contents, _ = ImportAnalyser.Query.select conf in let expected = - {|WITH cte AS (SELECT source.id, LAST_VALUE('source'.col_3) OVER (PARTITION BY 'source'.col_1 ORDER BY 'source'.col_1 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS group0 + {|WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_3') OVER (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS group_function FROM 'source' AS 'source' -LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = 'other'.'key_other' -LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.col_1)) = 'last_file'.'key_last_file' -WHERE COALESCE('source'.col_3,0)=0) -SELECT 'source'.col_1 AS result_0 +LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = 'other'.'key_other' +LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.'col_1')) = 'last_file'.'key_last_file') +SELECT 'source'.'col_1' AS result_0 FROM 'source' AS 'source' -LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = 'other'.'key_other' -LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.col_1)) = 'last_file'.'key_last_file' -INNER JOIN 'cte' ON cte.id = source.id -WHERE COALESCE('source'.col_3,0)=0 -AND (cte.group0)|} +LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = 'other'.'key_other' +LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.'col_1')) = 'last_file'.'key_last_file' +INNER JOIN 'filter0' ON filter0.id = source.id +WHERE COALESCE('source'.'col_3',0)=0 AND filter0.group_function|} in Alcotest.check Alcotest.string "" expected contents.q diff --git a/tests/confLoader.ml b/tests/confLoader.ml index 4adedd6..692cda9 100644 --- a/tests/confLoader.ml +++ b/tests/confLoader.ml @@ -1,7 +1,9 @@ +let load' : string -> (ImportConf.Syntax.t, string) Result.t = + fun content -> Otoml.Parser.from_string content |> ImportConf.t_of_toml + (** Read the configuration in toml and return the internal representation *) let load : string -> ImportConf.Syntax.t = - fun content -> - Otoml.Parser.from_string content |> ImportConf.t_of_toml |> Result.get_ok + fun content -> Result.get_ok (load' content) let conf = load diff --git a/tests/configuration_toml.ml b/tests/configuration_toml.ml index dc1f769..e51c727 100644 --- a/tests/configuration_toml.ml +++ b/tests/configuration_toml.ml @@ -2,6 +2,29 @@ module Expression = ImportExpression.T module Path = ImportDataTypes.Path open Test_migration +let nested_group () = + let expected = + Error + "in field \"sheet\":\n\ + \ in field \"columns\":\n\ + \ while decoding a list:\n\ + \ element 0:\n\ + \ A group function cannot contains another group function, but got\n\ + \ \"max(:A, [counter([:A], [:A])], [])\" \n" + and result = + ConfLoader.load' + {|[source] +name = "source_name" +file = "source_file" + +[sheet] +columns = [ + "max(:A, [counter([:A], [:A])], [])", +]|} + in + Alcotest.(check (result Test_migration.syntax string)) + "duplicate" expected result + let test_suit = [ ( "parse_extern" >:: fun _ -> @@ -63,6 +86,7 @@ let test_suit = let toml = Otoml.Parser.from_file "configuration/example_csv.toml" in let toml = ImportConf.t_of_toml toml in ignore toml ); + ("nested group", `Quick, nested_group); ] let tests = "configuration_toml" >::: test_suit diff --git a/tests/importer_test.ml b/tests/importer_test.ml index a7b7f0d..78b7f59 100644 --- a/tests/importer_test.ml +++ b/tests/importer_test.ml @@ -11,6 +11,7 @@ let _ = Expression_type_of.tests; Expression_query.tests; Analyser_dependency.tests; + Analyser_filters.tests; Analyser_query_test.tests; Sql_db.tests; ] diff --git a/tests/sql_db.ml b/tests/sql_db.ml index 75b8293..65a93ce 100644 --- a/tests/sql_db.ml +++ b/tests/sql_db.ml @@ -200,6 +200,121 @@ columns = [ ImportCSV.DataType. [ [| Integer 1; Integer 200 |]; [| Integer 2; Integer 100 |] ]) +let filter_group = + run_test "filter_group" + ~configuration: + {|[source] +name = "source_name" +file = "source_file" + +[sheet] +columns = [ + ":A", +] + +filters = [ + "max(:B, [:C], [:B]) = :B", +] + +|} + ~input: + ImportCSV.DataType. + [ + [ (0, Integer 1); (1, Integer 100); (2, Integer 0) ]; + [ (0, Integer 2); (1, Integer 150); (2, Integer 0) ]; + [ (0, Integer 3); (1, Integer 200); (2, Integer 0) ]; + ] + ~expected:(Ok ImportCSV.DataType.[ [| Integer 3 |] ]) + +(** The first filter will prevent the max value to pop, and only the second one + will be reported *) +let filter_expression_and_group = + run_test "filter expression then group" + ~configuration: + {|[source] +name = "source_name" +file = "source_file" + +[sheet] +columns = [ + ":A", +] + +filters = [ + ":B <> 200", + "max(:B, [:C], [:B]) = :B", +] + +|} + ~input: + ImportCSV.DataType. + [ + [ (0, Integer 1); (1, Integer 100); (2, Integer 0) ]; + [ (0, Integer 2); (1, Integer 150); (2, Integer 0) ]; + [ (0, Integer 3); (1, Integer 200); (2, Integer 0) ]; + ] + ~expected:(Ok ImportCSV.DataType.[ [| Integer 2 |] ]) + +(** In this case, we first filter the line and keep only the max value, but the + second filter will match the result and will produce an empty list *) +let filter_group_and_expression = + run_test "filter group then expression" + ~configuration: + {|[source] +name = "source_name" +file = "source_file" + +[sheet] +columns = [ + ":A", +] + +filters = [ + "max(:B, [:C], [:B]) = :B", + ":B <> 200", +] + +|} + ~input: + ImportCSV.DataType. + [ + [ (0, Integer 1); (1, Integer 100); (2, Integer 0) ]; + [ (0, Integer 2); (1, Integer 150); (2, Integer 0) ]; + [ (0, Integer 3); (1, Integer 200); (2, Integer 0) ]; + ] + ~expected:(Ok []) + +(** In this case, each filter remove the line with the higher value in B. + + After the application of the two filters, the only remaining result is the + first line. *) +let filter_group_and_group = + run_test "filter group then group" + ~configuration: + {|[source] +name = "source_name" +file = "source_file" + +[sheet] +columns = [ + ":A", +] + +filters = [ + "max(:B, [:C], [:B]) <> :B", + "max(:B, [:C], [:B]) <> :B", +] + +|} + ~input: + ImportCSV.DataType. + [ + [ (0, Integer 1); (1, Integer 100); (2, Integer 0) ]; + [ (0, Integer 2); (1, Integer 150); (2, Integer 0) ]; + [ (0, Integer 3); (1, Integer 200); (2, Integer 0) ]; + ] + ~expected:(Ok [ [| ImportCSV.DataType.Integer 1 |] ]) + let test_suit = [ simple_extraction; @@ -208,6 +323,10 @@ let test_suit = sum_unfiltered; sum_group; sum_group_uniq; + filter_group; + filter_expression_and_group; + filter_group_and_expression; + filter_group_and_group; ] let tests = "sql_db" >::: test_suit diff --git a/tests/test_migration.ml b/tests/test_migration.ml index 35480d1..e26f354 100644 --- a/tests/test_migration.ml +++ b/tests/test_migration.ml @@ -45,7 +45,7 @@ let int_container_testable = make_test (module ImportContainers.IntSet) let expression_testable = make_test (module struct - type t = ImportConf.Path.t ImportExpression.T.t [@@deriving show, eq] + type t = ImportDataTypes.Path.t ImportExpression.T.t [@@deriving show, eq] end) let dep_key_testable = @@ -53,3 +53,28 @@ let dep_key_testable = (module struct type t = ImportAnalyser.Dependency.key [@@deriving show, eq] end) + +let chunk = + make_test + (module struct + type t = ImportAnalyser.Chunk.t + + let pp formater t = + Format.fprintf formater "%s" (Buffer.contents t.ImportAnalyser.Chunk.b) + + let equal t1 t2 = + let to_string t = Buffer.contents t.ImportAnalyser.Chunk.b in + String.equal (to_string t1) (to_string t2) + end) + +let syntax = + make_test + (module struct + type t = ImportConf.Syntax.t + + let pp format t = + Format.fprintf format "%s" + (Otoml.Printer.to_string (ImportConf.Syntax.repr t)) + + let equal t1 t2 = t1 = t2 + end) |