aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorSébastien Dailly <sebastien@dailly.me>2025-03-01 08:39:02 +0100
committerSébastien Dailly <sebastien@dailly.me>2025-03-06 20:57:10 +0100
commit81db1bfd580791910646525e30bc45af34533987 (patch)
treec610f53c284d3707a3d6fe49486b5c09e66dc41f /lib
parent67320d8f04e1f302306b9aafdaaf4bafcf443839 (diff)
Rewrite the way to handle filters
Diffstat (limited to 'lib')
-rw-r--r--lib/analysers/chunk.ml102
-rw-r--r--lib/analysers/chunk.mli34
-rw-r--r--lib/analysers/filters.ml133
-rw-r--r--lib/analysers/filters.mli2
-rw-r--r--lib/analysers/printers.ml12
-rw-r--r--lib/analysers/printers.mli14
-rw-r--r--lib/analysers/query.ml310
-rw-r--r--lib/analysers/query.mli7
-rw-r--r--lib/configuration/cte.ml53
-rw-r--r--lib/configuration/cte.mli20
-rw-r--r--lib/configuration/importConf.ml1
-rw-r--r--lib/configuration/importConf.mli12
-rw-r--r--lib/configuration/read_conf.ml69
-rw-r--r--lib/data_types/table.ml5
-rw-r--r--lib/expression/_readme.rst39
-rw-r--r--lib/expression/filters.ml193
-rw-r--r--lib/expression/filters.mli9
-rwxr-xr-xlib/helpers/helpers.ml43
18 files changed, 482 insertions, 576 deletions
diff --git a/lib/analysers/chunk.ml b/lib/analysers/chunk.ml
new file mode 100644
index 0000000..2fa4808
--- /dev/null
+++ b/lib/analysers/chunk.ml
@@ -0,0 +1,102 @@
+(** The module allow to create fragment in the query which keep together the
+ binderd parameters and the text of the query.contents.
+
+ This is used a lot in order to create the CTE, where you need the create
+ fragment used both in the main request and partially in the CTE itself.
+
+ The content is mutable and all the functions are returning [unit]. *)
+
+type t = {
+ b : Buffer.t;
+ parameters : ImportCSV.DataType.t Queue.t;
+}
+
+let create : unit -> t =
+ fun () -> { b = Buffer.create 16; parameters = Queue.create () }
+
+let create' : Buffer.t -> ImportCSV.DataType.t Queue.t -> t =
+ fun b parameters -> { b; parameters }
+
+(* Append the element from [tail] at the end of [head]
+
+ Tail is destroyed during the operation.
+ *)
+let append : head:t -> tail:t -> unit =
+ fun ~head ~tail ->
+ match Buffer.length tail.b with
+ | 0 -> ()
+ | _ ->
+ Buffer.add_buffer head.b tail.b;
+ Queue.transfer tail.parameters head.parameters;
+ ()
+
+(** Add a litteral string in the sequence *)
+let add_string : t -> string -> unit = fun t v -> Buffer.add_string t.b v
+
+let copy : t -> t =
+ fun t ->
+ let b = Buffer.create 16 and parameters = Queue.copy t.parameters in
+ Buffer.add_buffer b t.b;
+ { b; parameters }
+
+let add_parameters : t -> ImportCSV.DataType.t Seq.t -> unit =
+ fun t p -> Queue.add_seq t.parameters p
+
+module Syntax = ImportConf.Syntax
+module Table = ImportDataTypes.Table
+module Q = ImportExpression.Query
+open StdLabels
+
+(** Extract the informations from the dependancies. We get two informations here
+ :
+
+ - the join query in order to load the data from the external column
+ - the column corresponding to the key in order to identify the missing links
+ later. *)
+let join_external : conf:Syntax.t -> join_buffer:t -> Syntax.Extern.t -> unit =
+ fun ~conf ~join_buffer external_ ->
+ let extern_table = Table.name external_.target in
+
+ let formatter = Format.formatter_of_buffer join_buffer.b in
+ Format.fprintf formatter "\nLEFT JOIN '%s' AS '%s' ON %t = %s" extern_table
+ external_.target.name
+ (Printers.prepare_key ~f:(fun f ->
+ let q =
+ Q.query_of_expression Q.BindParam f (Printers.path ~conf)
+ external_.intern_key
+ in
+
+ add_parameters join_buffer (Queue.to_seq q)))
+ (Table.print_column external_.Syntax.Extern.target
+ ("key_" ^ external_.Syntax.Extern.target.name));
+
+ Format.pp_print_flush formatter ()
+
+(** Create the from part of the query, adding all the required externals (even
+ when not required)
+
+ SQLite is able to optimize the query and do not load the table not used in
+ the select clause. *)
+let create_from_statement_of_chunck :
+ ?externals:Syntax.Extern.t list -> Syntax.t -> t -> unit =
+ fun ?externals conf c ->
+ let externals = Option.value externals ~default:conf.externals in
+ add_string c "\nFROM '";
+ add_string c (Table.name conf.source);
+ add_string c "' AS '";
+ add_string c conf.source.name;
+ add_string c "'";
+
+ (* Add the externals in the query *)
+ List.iter externals ~f:(join_external ~conf ~join_buffer:c)
+
+let add_expression :
+ conf:Syntax.t -> t -> ImportDataTypes.Path.t ImportExpression.T.t -> unit =
+ fun ~conf group expression ->
+ let formatter = Format.formatter_of_buffer group.b in
+ let queue =
+ ImportExpression.Query.query_of_expression ImportExpression.Query.BindParam
+ formatter (Printers.path ~conf) expression
+ in
+ Format.pp_print_flush formatter ();
+ add_parameters group (Queue.to_seq queue)
diff --git a/lib/analysers/chunk.mli b/lib/analysers/chunk.mli
new file mode 100644
index 0000000..d4f69e7
--- /dev/null
+++ b/lib/analysers/chunk.mli
@@ -0,0 +1,34 @@
+(** This module helps to create a query with prepared values. *)
+
+type t = {
+ b : Buffer.t;
+ parameters : ImportCSV.DataType.t Queue.t;
+}
+
+val create : unit -> t
+val create' : Buffer.t -> ImportCSV.DataType.t Queue.t -> t
+
+val append : head:t -> tail:t -> unit
+(** Append the element from [tail] at the end of [head]
+
+ Tail is destroyed during the operation. *)
+
+val add_string : t -> string -> unit
+(** Add a litteral string in the sequence *)
+
+val copy : t -> t
+
+val create_from_statement_of_chunck :
+ ?externals:ImportConf.Syntax.Extern.t list -> ImportConf.Syntax.t -> t -> unit
+(** Create the from part of the query, adding all the declared externals (even
+ when not required)
+
+ SQLite is able to optimize the query and do not load the table not used in
+ the select clause. *)
+
+val add_expression :
+ conf:ImportConf.Syntax.t ->
+ t ->
+ ImportDataTypes.Path.t ImportExpression.T.t ->
+ unit
+(** Add an expression into an existing chunck *)
diff --git a/lib/analysers/filters.ml b/lib/analysers/filters.ml
new file mode 100644
index 0000000..4e8b175
--- /dev/null
+++ b/lib/analysers/filters.ml
@@ -0,0 +1,133 @@
+(** Build a fragment of the query match a filter *)
+
+module Syntax = ImportConf.Syntax
+module Path = ImportDataTypes.Path
+module Expression = ImportExpression
+module CTE = ImportConf.CTE
+open StdLabels
+
+(** Add a list of expressions into the group *)
+let rec add_filters :
+ conf:Syntax.t -> Chunk.t -> Path.t Expression.T.t list -> unit =
+ fun ~conf group -> function
+ | [] -> ()
+ | hd :: [] -> Chunk.add_expression ~conf group hd
+ | hd :: tl ->
+ Chunk.add_expression ~conf group hd;
+ Chunk.add_string group "\nAND ";
+ add_filters ~conf group tl
+
+type 'a cte_acc = {
+ n : int;
+ has_previous : bool;
+ acc : 'a;
+ cte_index : int option;
+ latest_expression : Path.t Expression.T.t list;
+}
+
+let add_inner : conf:Syntax.t -> int -> Buffer.t -> unit =
+ fun ~conf n b ->
+ let name = "filter" ^ string_of_int n in
+ (* We use an INNER JOIN here because we want to be sure to get all the rows
+ fetched by the CTE *)
+ Buffer.add_string b "\nINNER JOIN '";
+ Buffer.add_string b name;
+ Buffer.add_string b "' ON ";
+ Buffer.add_string b name;
+ Buffer.add_string b ".id = ";
+ Buffer.add_string b conf.source.name;
+ Buffer.add_string b ".id\n"
+
+let print :
+ conf:Syntax.t ->
+ (Chunk.t * Chunk.t) cte_acc ->
+ CTE.t ->
+ (Chunk.t * Chunk.t) cte_acc =
+ fun ~conf acc cte ->
+ let predicates, query = acc.acc in
+ let n = acc.n in
+ let cte_index =
+ match cte.CTE.group with
+ | Some expression ->
+ begin
+ if acc.has_previous then Chunk.add_string query ", "
+ else Chunk.add_string query "WITH "
+ end;
+ Chunk.add_string query "filter";
+ Chunk.add_string query (string_of_int n);
+ Chunk.add_string query " AS (";
+ Chunk.add_string query "SELECT ";
+ Chunk.add_string query conf.source.name;
+ Chunk.add_string query ".id, ";
+ Chunk.add_expression ~conf query expression;
+ Chunk.add_string query " AS group_function";
+ Chunk.create_from_statement_of_chunck conf query;
+
+ if acc.has_previous then begin
+ let previous_name = "filter" ^ string_of_int (n - 1) in
+ add_inner ~conf (n - 1) query.Chunk.b;
+
+ Chunk.add_string query "WHERE ";
+ Chunk.add_string query previous_name;
+ Chunk.add_string query ".group_function"
+ end;
+
+ begin
+ match cte.CTE.filters with
+ | [] -> ()
+ | _ ->
+ Chunk.add_string query " WHERE ";
+ add_filters ~conf query cte.CTE.filters
+ end;
+ Chunk.add_string query ")\n";
+ Some acc.n
+ | None ->
+ (* Do not add the filters in the CTE (we don’t have any) but in the main
+ query *)
+ Chunk.add_string predicates "WHERE ";
+ add_filters ~conf predicates cte.CTE.filters;
+ acc.cte_index
+ in
+ {
+ acc with
+ has_previous = true;
+ n = acc.n + 1;
+ cte_index;
+ latest_expression = cte.CTE.filters;
+ }
+
+let generate_sql : conf:Syntax.t -> CTE.t list -> Chunk.t -> Chunk.t =
+ fun ~conf filters links' ->
+ let predicates = Chunk.create () and links = Chunk.create () in
+ let eval =
+ List.fold_left filters
+ ~init:
+ {
+ n = 0;
+ has_previous = false;
+ acc = (links, predicates);
+ cte_index = None;
+ latest_expression = [];
+ }
+ ~f:(print ~conf)
+ in
+ match (eval.cte_index, eval.latest_expression) with
+ | None, [] -> predicates
+ | None, _ ->
+ Chunk.add_string links' " ";
+ Chunk.append ~head:links' ~tail:links;
+ predicates
+ | Some n, [] ->
+ add_inner ~conf n links'.b;
+ Chunk.append ~head:links' ~tail:links;
+ Chunk.add_string links' "filter";
+ Chunk.add_string links' (string_of_int n);
+ Chunk.add_string links' ".group_function";
+ predicates
+ | Some n, _ ->
+ add_inner ~conf n links'.b;
+ Chunk.append ~head:links' ~tail:links;
+ Chunk.add_string links' " AND filter";
+ Chunk.add_string links' (string_of_int n);
+ Chunk.add_string links' ".group_function";
+ predicates
diff --git a/lib/analysers/filters.mli b/lib/analysers/filters.mli
new file mode 100644
index 0000000..7783799
--- /dev/null
+++ b/lib/analysers/filters.mli
@@ -0,0 +1,2 @@
+val generate_sql :
+ conf:ImportConf.Syntax.t -> ImportConf.CTE.t list -> Chunk.t -> Chunk.t
diff --git a/lib/analysers/printers.ml b/lib/analysers/printers.ml
new file mode 100644
index 0000000..1c73c13
--- /dev/null
+++ b/lib/analysers/printers.ml
@@ -0,0 +1,12 @@
+module Syntax = ImportConf.Syntax
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+
+let path : conf:Syntax.t -> Format.formatter -> Path.t -> unit =
+ fun ~conf buffer { alias; column } ->
+ let table = ImportConf.get_table_for_name conf alias in
+ Format.fprintf buffer "%s"
+ (Table.print_column table ("col_" ^ string_of_int column))
+
+let prepare_key : f:(Format.formatter -> unit) -> Format.formatter -> unit =
+ fun ~f formatter -> Format.fprintf formatter "rtrim(upper(%t))" f
diff --git a/lib/analysers/printers.mli b/lib/analysers/printers.mli
new file mode 100644
index 0000000..102bb91
--- /dev/null
+++ b/lib/analysers/printers.mli
@@ -0,0 +1,14 @@
+val path :
+ conf:ImportConf.Syntax.t -> Format.formatter -> ImportDataTypes.Path.t -> unit
+(** Represent a path in a SQL query.
+
+ This function is given in the Expression.Query module. *)
+
+val prepare_key : f:(Format.formatter -> unit) -> Format.formatter -> unit
+(** Wrap an expression for beiing used as a key. What’s given to the formater
+ will be pre-processed with sql function to trim and uppercase the value.
+
+ This function is used at two location in the code :
+
+ - during the index creation
+ - when matching a value against this index *)
diff --git a/lib/analysers/query.ml b/lib/analysers/query.ml
index bcf4a72..e24da78 100644
--- a/lib/analysers/query.ml
+++ b/lib/analysers/query.ml
@@ -2,58 +2,8 @@ open StdLabels
module Expression = ImportExpression
module Q = Expression.Query
module Syntax = ImportConf.Syntax
-module Table = ImportConf.Table
-module Path = ImportConf.Path
-
-let truncate buffer n = Buffer.truncate buffer (Buffer.length buffer - n)
-
-(** The module allow to create fragment in the query which keep together the
- binderd parameters and the text of the query.contents.
-
- This is used a lot in order to create the CTE, where you need the create
- fragment used both in the main request and partially in the CTE itself.
-
- The content is mutable and all the functions are returning [unit]. *)
-module Chunk = struct
- type t = {
- b : Buffer.t;
- parameters : ImportCSV.DataType.t Queue.t;
- }
-
- let create : unit -> t =
- fun () -> { b = Buffer.create 16; parameters = Queue.create () }
-
- let create' : Buffer.t -> ImportCSV.DataType.t Queue.t -> t =
- fun b parameters -> { b; parameters }
-
- (* Append the element from [tail] at the end of [head]
-
- Tail is destroyed during the operation.
- *)
- let append : head:t -> tail:t -> unit =
- fun ~head ~tail ->
- match Buffer.length tail.b with
- | 0 -> ()
- | _ ->
- Buffer.add_buffer head.b tail.b;
- Queue.transfer tail.parameters head.parameters;
- ()
-
- (** Add a litteral string in the sequence *)
- let add_string : t -> string -> unit = fun t v -> Buffer.add_string t.b v
-
- let copy : t -> t =
- fun t ->
- let b = Buffer.create 16 and parameters = Queue.copy t.parameters in
- Buffer.add_buffer b t.b;
- { b; parameters }
-
- let add_parameters : t -> ImportCSV.DataType.t Seq.t -> unit =
- fun t p -> Queue.add_seq t.parameters p
-end
-
-let prepare_key : f:(Format.formatter -> unit) -> Format.formatter -> unit =
- fun ~f formatter -> Format.fprintf formatter "rtrim(upper(%t))" f
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
(* Collect all the tables pointed by the expression. *)
let pointed_tables : Syntax.t -> 'a Expression.T.t -> (Table.t * string) list =
@@ -64,11 +14,6 @@ let pointed_tables : Syntax.t -> 'a Expression.T.t -> (Table.t * string) list =
(table, table_name) :: acc)
|> List.sort_uniq ~cmp:Stdlib.compare
-(** Represent a column in a safe way in a query *)
-let print_column : Table.t -> string -> string =
- fun table column ->
- String.concat ~sep:"" [ "'"; table.Table.name; "'.'"; column; "'" ]
-
let create_table : Dependency.t -> string =
fun mapping ->
let b = Buffer.create 64 in
@@ -90,184 +35,6 @@ let create_table : Dependency.t -> string =
Buffer.contents b
-let show_path : conf:Syntax.t -> Format.formatter -> Path.t -> unit =
- fun ~conf buffer { alias; column } ->
- let table = ImportConf.get_table_for_name conf alias in
- let table_name = table.Table.name in
- Format.fprintf buffer "'%s'.col_%d" table_name column
-
-(** Extract the informations from the dependancies. We get two informations here
- :
-
- - the join query in order to load the data from the external column
- - the column corresponding to the key in order to identify the missing links
- later. *)
-let query_of_external :
- conf:Syntax.t -> join_buffer:Chunk.t -> Syntax.Extern.t -> unit =
- fun ~conf ~join_buffer external_ ->
- let extern_table = Table.name external_.target in
-
- let formatter = Format.formatter_of_buffer join_buffer.b in
- Format.fprintf formatter "\nLEFT JOIN '%s' AS '%s' ON %t = %s" extern_table
- external_.target.name
- (prepare_key ~f:(fun f ->
- let q =
- Q.query_of_expression Q.BindParam f (show_path ~conf)
- external_.intern_key
- in
-
- Chunk.add_parameters join_buffer (Queue.to_seq q)))
- (print_column external_.Syntax.Extern.target
- ("key_" ^ external_.Syntax.Extern.target.name));
-
- Format.pp_print_flush formatter ()
-
-(** Create the from part of the query, adding all the reuired externals.
-
- SQLite is able to optimize the query and do not load the table not used in
- the select clause. *)
-let create_from_chunck : Syntax.t -> Chunk.t -> unit =
- fun conf c ->
- Chunk.add_string c "\nFROM '";
- Chunk.add_string c (Table.name conf.source);
- Chunk.add_string c "' AS '";
- Chunk.add_string c conf.source.name;
- Chunk.add_string c "'";
-
- (* Add the externals in the query *)
- List.iter conf.externals ~f:(query_of_external ~conf ~join_buffer:c)
-
-(** Build a CTE query in order to use any group function inside the query.
- Return the binded parameters used in the expression. The buffer given in
- argument is also modified during the construction.
-
- If filters is not None, the clauses are added to the CTE. *)
-let build_cte :
- Syntax.t ->
- expression:'a Expression.T.t ->
- filters:Chunk.t option ->
- Chunk.t =
- fun conf ~expression ~filters ->
- (* The binded parameters queue will be used later in the full query *)
- let cte_chunk = Chunk.create () in
-
- Chunk.add_string cte_chunk "WITH cte AS (SELECT ";
- Chunk.add_string cte_chunk conf.source.name;
- Chunk.add_string cte_chunk ".id, ";
-
- let formatter = Format.formatter_of_buffer cte_chunk.b in
-
- let p =
- Q.query_of_expression Q.BindParam formatter (show_path ~conf) expression
- in
- Format.pp_print_flush formatter ();
- Chunk.add_parameters cte_chunk (Queue.to_seq p);
- (* The name is hardcoded here, and used in [Expression.Filters.window] *)
- Chunk.add_string cte_chunk " AS group0";
-
- let () = create_from_chunck conf cte_chunk in
- let () =
- match filters with
- | None -> ()
- | Some filters_chunk ->
- Chunk.append ~head:cte_chunk ~tail:(Chunk.copy filters_chunk)
- in
-
- Chunk.add_string cte_chunk ")\n";
- cte_chunk
-
-type filter_evaluation = {
- content : Buffer.t;
- parameters : ImportCSV.DataType.t Seq.t;
- cte : (string * Chunk.t) option;
-}
-(** Build the filters to apply in the query. We make the difference here between
- the predicates to apply directly in the query, and the filters associated
- with a group, which are required to be transformed into a CTE in SQL, and
- are evaluated before. *)
-
-(** Evaluate the filters on the query *)
-let eval_filters : Syntax.t -> filter_evaluation =
- fun conf ->
- match conf.filters with
- | [] ->
- let empty_buffer = Buffer.create 0 in
- { content = empty_buffer; parameters = Seq.empty; cte = None }
- | filters -> (
- (* Create a new queue in order to accumulate all the parameters to bind.
- This filter will be given to both the CTE if any, or reused in the
- main query when there is no CTE.
- *)
- let chunk_filters = Chunk.create () in
- Chunk.add_string chunk_filters "\nWHERE ";
-
- let group = Chunk.create () in
-
- let with_cte, with_exr =
- List.fold_left filters ~init:(None, false)
- ~f:(fun (with_cte, with_exr) column ->
- (* The function will return an option in second position which is
- None when no Group function where found, and Some Expression
- otherwise *)
- let b = Buffer.create 16 in
-
- let formatter = Format.formatter_of_buffer b in
- let queue, group_found =
- Expression.Filters.query_of_expression Q.BindParam formatter
- (show_path ~conf) column
- in
- Format.pp_print_flush formatter ();
- let clause = Chunk.create' b queue in
-
- match (group_found, with_cte) with
- | None, _ ->
- Chunk.append ~head:chunk_filters ~tail:clause;
- Chunk.add_string chunk_filters "\nAND ";
- (with_cte, true)
- | (Some _ as group'), None ->
- (* We have a group here, we do not add it into the
- filter_buffer right now.
-
- This can occur only once, the second one will raise
- an error. *)
- Chunk.append ~head:group ~tail:clause;
- (group', with_exr)
- | Some _, Some _ -> raise ImportErrors.MisplacedWindow)
- in
-
- match with_cte with
- | None ->
- let content = chunk_filters.b in
- truncate content 5;
- {
- (* There is no group clause in the query *)
- content;
- parameters = Queue.to_seq chunk_filters.parameters;
- cte = None;
- }
- | Some expression ->
- let filters =
- if with_exr then (
- (* If we have additionnals filters from the group clause, we
- have to report them in the CTE instead of the main query. *)
- let c' = Chunk.copy chunk_filters in
- truncate c'.b 5;
- Some c')
- else None
- in
-
- (* Create the common expression table *)
- let cte_parameters = build_cte conf ~expression ~filters in
- Chunk.append ~head:chunk_filters ~tail:group;
-
- {
- content = chunk_filters.b;
- parameters = Queue.to_seq chunk_filters.parameters;
- (* The name is hardcoded here, and used in
- [Expression.Filters.window] *)
- cte = Some ("cte", cte_parameters);
- })
-
type query = {
q : string;
parameters : ImportCSV.DataType.t Seq.t;
@@ -323,23 +90,12 @@ let clean_window :
find which source is pointed by this alias. *)
let select : Syntax.t -> query * Path.t ImportExpression.T.t array =
fun conf ->
- (* If the filters contains a group expression, we need to transform this into
- a CTE, which have to be evaluated before the main query. That’s why we are
- evaluating the filters right now.*)
- let filters = eval_filters conf in
- let b = Buffer.create 256 in
- let parameters = Queue.create () in
-
- Option.iter
- (fun (_, (cte : Chunk.t)) ->
- Buffer.add_buffer b cte.b;
- Queue.add_seq parameters (Queue.to_seq cte.parameters))
- filters.cte;
+ let filter = ImportConf.CTE.of_filters conf.filters in
(* For each column in the configuration file, add the corresponding element
in the query.
- The Sqlite driver return the elements in an array, we create an array to
+ The Sqlite driver return the elements in an array, we create an array too
in order to manage the elements together.
*)
let headers = Array.make (List.length conf.columns) (Obj.magic None) in
@@ -355,6 +111,10 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array =
let expression = c in
(i, clean_window ~prefix:conf.uniq expression))
in
+ let filters = Chunk.create () in
+ let request_header = Filters.generate_sql ~conf filter filters in
+ let b = request_header.Chunk.b
+ and parameters = request_header.Chunk.parameters in
let formatter = Format.formatter_of_buffer b in
let () =
Format.fprintf formatter "SELECT %a"
@@ -363,7 +123,7 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array =
(fun formatter (i, column) ->
Array.set headers i column;
let p =
- Q.query_of_expression Q.BindParam formatter (show_path ~conf)
+ Q.query_of_expression Q.BindParam formatter (Printers.path ~conf)
column
in
Queue.transfer p parameters;
@@ -372,26 +132,8 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array =
in
Format.pp_print_flush formatter ();
- let () = create_from_chunck conf (Chunk.create' b parameters) in
-
- (* If the query has a CTE, link it as well. We use an INNER JOIN here because
- we want to be sure to get all the rows fetched by the CTE
- *)
- let () =
- match filters.cte with
- | None -> ()
- | Some (name, _) ->
- Buffer.add_string b "\nINNER JOIN '";
- Buffer.add_string b name;
- Buffer.add_string b "' ON ";
- Buffer.add_string b name;
- Buffer.add_string b ".id = ";
- Buffer.add_string b conf.source.name;
- Buffer.add_string b ".id"
- in
-
- Buffer.add_buffer b filters.content;
- Queue.add_seq parameters filters.parameters;
+ let () = Chunk.create_from_statement_of_chunck conf request_header in
+ Chunk.append ~head:request_header ~tail:filters;
let formatter = Format.formatter_of_buffer b in
(match conf.Syntax.uniq with
@@ -402,7 +144,7 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array =
~pp_sep:(fun f () -> Format.fprintf f ", ")
(fun formatter column ->
let seq =
- Q.query_of_expression Q.BindParam formatter (show_path ~conf)
+ Q.query_of_expression Q.BindParam formatter (Printers.path ~conf)
column
in
Queue.transfer seq parameters))
@@ -415,7 +157,7 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array =
~pp_sep:(fun f () -> Format.fprintf f ", ")
(fun formatter column ->
let seq =
- Q.query_of_expression Q.BindParam formatter (show_path ~conf)
+ Q.query_of_expression Q.BindParam formatter (Printers.path ~conf)
column
in
Queue.transfer seq parameters))
@@ -426,20 +168,12 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array =
let check_external : Syntax.t -> Syntax.Extern.t -> query =
fun conf external_ ->
- let internal_chunk =
- let internal_key_buffer = Buffer.create 16 in
- let formatter = Format.formatter_of_buffer internal_key_buffer in
- let internal_key_seq =
- Q.query_of_expression Q.BindParam formatter (show_path ~conf)
- external_.Syntax.Extern.intern_key
- in
- Format.pp_print_flush formatter ();
- Chunk.create' internal_key_buffer (Queue.copy internal_key_seq)
- in
+ let internal_chunk = Chunk.create () in
+ Chunk.add_expression ~conf internal_chunk external_.Syntax.Extern.intern_key;
let external_key_buffer = Buffer.create 16 in
Buffer.add_string external_key_buffer
- (print_column external_.Syntax.Extern.target
+ (Table.print_column external_.Syntax.Extern.target
("key_" ^ external_.Syntax.Extern.target.name));
let pointed_tables = pointed_tables conf external_.intern_key in
@@ -495,18 +229,12 @@ let check_external : Syntax.t -> Syntax.Extern.t -> query =
Chunk.add_string request "-1"
| (table, _name) :: _ ->
(* If we have a single source, extract the row number. *)
- Chunk.add_string request (print_column table "id")
+ Chunk.add_string request (Table.print_column table "id")
in
Chunk.add_string request ", ";
Chunk.append ~head:request ~tail:(Chunk.copy internal_chunk);
- Chunk.add_string request " FROM\n'";
- Chunk.add_string request (Table.name conf.source);
- Chunk.add_string request "' AS '";
- Chunk.add_string request conf.source.name;
- Chunk.add_string request "'";
- (* Add the externals in the query *)
- List.iter dependencies ~f:(query_of_external ~conf ~join_buffer:request);
+ Chunk.create_from_statement_of_chunck ~externals:dependencies conf request;
Chunk.add_string request " WHERE ";
Chunk.add_string request join_content;
Chunk.add_string request " IS NULL AND ";
@@ -526,7 +254,7 @@ let build_key_insert : Buffer.t -> Dependency.key -> unit =
let formatter = Format.formatter_of_buffer buffer in
let () =
- prepare_key formatter ~f:(fun formatter ->
+ Printers.prepare_key formatter ~f:(fun formatter ->
Q.query_of_expression Q.NoParam formatter show_column expression)
in
diff --git a/lib/analysers/query.mli b/lib/analysers/query.mli
index 520718a..d158867 100644
--- a/lib/analysers/query.mli
+++ b/lib/analysers/query.mli
@@ -1,5 +1,3 @@
-module Syntax = ImportConf.Syntax
-
val create_table : Dependency.t -> string
type query = {
@@ -10,9 +8,10 @@ type query = {
shall be run with all the binded parameters. *)
val select :
- Syntax.t -> query * ImportDataTypes.Path.t ImportExpression.T.t array
+ ImportConf.Syntax.t ->
+ query * ImportDataTypes.Path.t ImportExpression.T.t array
-val check_external : Syntax.t -> Syntax.Extern.t -> query
+val check_external : ImportConf.Syntax.t -> ImportConf.Syntax.Extern.t -> query
(** Create a query which select all the missing key in an external *)
val build_key_insert : Buffer.t -> Dependency.key -> unit
diff --git a/lib/configuration/cte.ml b/lib/configuration/cte.ml
new file mode 100644
index 0000000..ff43d6d
--- /dev/null
+++ b/lib/configuration/cte.ml
@@ -0,0 +1,53 @@
+open StdLabels
+module Path = ImportDataTypes.Path
+module Expression = ImportExpression.T
+
+type t = {
+ filters : Path.t Expression.t list;
+ group : Path.t Expression.t option;
+}
+
+(** Ensure the group criteria in window functions match the global group by
+ criteria.
+
+ Traverse the configuration tree until finding a group window. *)
+
+(** Check if the expression contains a group function *)
+let matchWindowGroup : 'a ImportExpression.T.t -> bool =
+ fun expression ->
+ let exception Found in
+ let open ImportExpression.T in
+ let rec f = function
+ | Empty | Literal _ | Integer _ | Path _ -> ()
+ | Expr e -> f e
+ | Concat pp | Function' (_, pp) | Function (_, pp) | Nvl pp | Join (_, pp)
+ -> List.iter ~f pp
+ | Window (_, _, _) -> raise Found
+ | BOperator (_, arg1, arg2) ->
+ f arg1;
+ f arg2
+ | GEquality (_, arg1, args) ->
+ f arg1;
+ List.iter ~f args
+ in
+ try
+ f expression;
+ false
+ with
+ | Found -> true
+
+(** Transform a list of expression into a list of CTE to evaluate. *)
+let of_filters : Path.t Expression.t list -> t list =
+ fun filters ->
+ let last_group, prev =
+ List.fold_left filters
+ ~init:({ filters = []; group = None }, [])
+ ~f:(fun (cte, acc) expr ->
+ begin
+ if matchWindowGroup expr then
+ ( { filters = []; group = None },
+ { cte with group = Some expr } :: acc )
+ else ({ cte with filters = expr :: cte.filters }, acc)
+ end)
+ in
+ List.rev (last_group :: prev)
diff --git a/lib/configuration/cte.mli b/lib/configuration/cte.mli
new file mode 100644
index 0000000..0f2b3e3
--- /dev/null
+++ b/lib/configuration/cte.mli
@@ -0,0 +1,20 @@
+module Path = ImportDataTypes.Path
+module Expression = ImportExpression.T
+
+type t = {
+ filters : Path.t Expression.t list;
+ group : Path.t Expression.t option;
+}
+(** Represent a filter to apply in the querry
+
+ The CTE can have filters applied on the previous CTE (or directly in the
+ sources if there is any yet) and can hold a group (an only one).
+
+ If there is a group, it must be applied after the others filters.
+
+ The order in which the filters are presented in the configuration can change
+ the results ; it does not matter when we only have classicals filters,
+ because all cf them can be evaluated at the same time, but as soon we have a
+ group function, the result become dependant of the previous ones. *)
+
+val of_filters : Path.t Expression.t list -> t list
diff --git a/lib/configuration/importConf.ml b/lib/configuration/importConf.ml
index ebbcb7c..8516008 100644
--- a/lib/configuration/importConf.ml
+++ b/lib/configuration/importConf.ml
@@ -1,5 +1,6 @@
open StdLabels
module Syntax = Syntax
+module CTE = Cte
module Table = ImportDataTypes.Table
module Path = ImportDataTypes.Path
module T = Read_conf
diff --git a/lib/configuration/importConf.mli b/lib/configuration/importConf.mli
index 9ddc40c..40b985b 100644
--- a/lib/configuration/importConf.mli
+++ b/lib/configuration/importConf.mli
@@ -1,18 +1,18 @@
module Syntax = Syntax
-module Table = ImportDataTypes.Table
-module Path = ImportDataTypes.Path
+module CTE = Cte
val dummy_conf : Syntax.t
-val root_table : Syntax.t -> Table.t
+val root_table : Syntax.t -> ImportDataTypes.Table.t
(** Get the root table, this table is the main table to load and each line in
this table will be processed *)
val t_of_toml : Otoml.t -> (Syntax.t, string) result
-val get_table_for_name : Syntax.t -> string option -> Table.t
+val get_table_for_name : Syntax.t -> string option -> ImportDataTypes.Table.t
-val get_dependancies_for_table : Syntax.t -> Table.t -> Syntax.Extern.t list
+val get_dependancies_for_table :
+ Syntax.t -> ImportDataTypes.Table.t -> Syntax.Extern.t list
(** Get all the externals refered by the source *)
val expression_from_string :
- string -> (Path.t ImportExpression.T.t, string) result
+ string -> (ImportDataTypes.Path.t ImportExpression.T.t, string) result
diff --git a/lib/configuration/read_conf.ml b/lib/configuration/read_conf.ml
index 69240c1..11f6726 100644
--- a/lib/configuration/read_conf.ml
+++ b/lib/configuration/read_conf.ml
@@ -126,44 +126,51 @@ end = struct
let column = Expression_parser.Incremental.column_expr
end
+exception Divergent
(** Ensure the group criteria in window functions match the global group by
- criteria.
+ criteria. *)
- Traverse the configuration tree until finding a group window. *)
+exception NestedGroup
+(** Raised when a group contains another one *)
+
+(** Traverse the configuration tree until finding a group window. *)
let matchWindowGroup :
eq:('a -> 'a -> bool) ->
subset:'a ImportExpression.T.t list ->
'a ImportExpression.T.t ->
- bool =
+ unit =
fun ~eq ~subset expression ->
- let exception Divergent in
let open ImportExpression.T in
- let rec f = function
+ let rec f isIngroup = function
| Empty | Literal _ | Integer _ | Path _ -> ()
- | Expr e -> f e
+ | Expr e -> f isIngroup e
| Concat pp | Function' (_, pp) | Function (_, pp) | Nvl pp | Join (_, pp)
- -> List.iter ~f pp
- | Window (_, pp1, _) ->
- if List.equal ~eq:(ImportExpression.T.equal eq) subset pp1 then ()
- else raise_notrace Divergent
+ -> List.iter ~f:(f isIngroup) pp
+ | Window (expr, pp1, pp2) ->
+ let () =
+ if List.equal ~eq:(ImportExpression.T.equal eq) subset pp1 then ()
+ else
+ match subset with
+ | [] -> ()
+ | _ -> raise_notrace Divergent
+ in
+ let () =
+ match isIngroup with
+ | true -> raise NestedGroup
+ | false -> ()
+ in
+
+ ignore @@ ImportExpression.T.map_window ~f:(f true) expr;
+ List.iter ~f:(f true) pp1;
+ List.iter ~f:(f true) pp2
| BOperator (_, arg1, arg2) ->
- f arg1;
- f arg2
+ f isIngroup arg1;
+ f isIngroup arg2
| GEquality (_, arg1, args) ->
- f arg1;
- List.iter ~f args
+ f isIngroup arg1;
+ List.iter ~f:(f isIngroup) args
in
- match subset with
- | [] ->
- (* Do not bother traversing the tree if there is no group by, just
- return Ok *)
- true
- | _ -> (
- try
- f expression;
- true
- with
- | Divergent -> false)
+ f false expression
module Make (S : Decoders.Decode.S) = struct
let ( let* ) = S.( let* )
@@ -185,13 +192,15 @@ module Make (S : Decoders.Decode.S) = struct
| Error e -> S.fail_with Decoders.Error.(make e)
| Ok expr -> (
(* Now check that every window function include at least the uniq list *)
- let valid_subset = matchWindowGroup ~eq ~subset:groups expr in
- match valid_subset with
- | true -> S.succeed expr
- | false ->
+ match matchWindowGroup ~eq ~subset:groups expr with
+ | () -> S.succeed expr
+ | exception Divergent ->
S.fail
"The group function shall match the same arguments as the \
- \"uniq\" parameter")
+ \"uniq\" parameter"
+ | exception NestedGroup ->
+ S.fail
+ "A group function cannot contains another group function")
method source =
let* file = S.field "file" S.string
diff --git a/lib/data_types/table.ml b/lib/data_types/table.ml
index 2dd956b..4831ac8 100644
--- a/lib/data_types/table.ml
+++ b/lib/data_types/table.ml
@@ -23,3 +23,8 @@ let name : t -> string =
match source.tab with
| 1 -> file_name
| _ -> String.concat ~sep:"_" [ file_name; string_of_int source.tab ]
+
+(** Represent a column in a safe way in a query *)
+let print_column : t -> string -> string =
+ fun table column ->
+ String.concat ~sep:"" [ "'"; table.name; "'.'"; column; "'" ]
diff --git a/lib/expression/_readme.rst b/lib/expression/_readme.rst
new file mode 100644
index 0000000..729a950
--- /dev/null
+++ b/lib/expression/_readme.rst
@@ -0,0 +1,39 @@
+All thoses modules transforms an expression.
+
+Simple transformations
+----------------------
+
+:Ast:
+
+ Regenerate the Ast from the expression
+
+:Repr:
+
+ Generate a representable string
+
+:Type_of:
+
+ Infer the type of an expression
+
+:Compose:
+
+ Allow to use a module into another one.
+
+:Headers:
+
+ Extract the headers to display in the result file
+
+Composed transformations
+------------------------
+
+:Filter:
+
+ Generate the filters in the query. This module identify if one the
+ expression is actually a group window and handle a special case for this.
+
+ This module relies on Ast
+
+:Query:
+
+ Build an sql query. This module relies on Type_of
+
diff --git a/lib/expression/filters.ml b/lib/expression/filters.ml
deleted file mode 100644
index 42c794b..0000000
--- a/lib/expression/filters.ml
+++ /dev/null
@@ -1,193 +0,0 @@
-(** This module evaluate the sql query to use in order to filter an expression
-
- The result is built over [Query] except for the group function, which are
- translated into a CTE in sql
- *)
-
-open StdLabels
-module Q = Query
-
-type 'a result = {
- repr : Format.formatter -> nested:Query.QueryParameter.t -> unit;
- group : 'a T.t option;
-}
-
-module Filter = struct
- type 'a repr = {
- repr : 'a Q.Query.repr;
- with_group : 'a T.t option;
- }
-
- type 'a obs = 'a result
- type 'a path_repr = 'a Q.Query.path_repr
-
- let observe : 'a Ast.obs * 'a repr -> 'a obs =
- fun (_, v) -> { repr = Q.Query.observe v.repr; group = v.with_group }
-
- let empty : 'a Ast.obs -> 'a repr =
- fun _ -> { repr = Q.Query.empty (); with_group = None }
-
- let expr : 'a Ast.obs * 'a repr -> 'a Ast.obs -> 'a repr =
- fun (_, expr) _ ->
- { repr = Q.Query.expr expr.repr; with_group = expr.with_group }
-
- let path : 'a path_repr -> 'a -> 'a Ast.obs -> 'a repr =
- fun repr p _ -> { repr = Q.Query.path repr p; with_group = None }
-
- let literal : string -> 'a Ast.obs -> 'a repr =
- fun l _ -> { repr = Q.Query.literal l; with_group = None }
-
- let integer : string -> 'a Ast.obs -> 'a repr =
- fun l _ -> { repr = Q.Query.integer l; with_group = None }
-
- let nvl : ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr =
- fun expression _ ->
- let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression in
- let with_group =
- List.find_map ~f:(fun v -> (snd v).with_group) expression
- in
- match with_group with
- | None -> { repr = Q.Query.nvl expr_repr; with_group }
- | Some _ -> raise ImportErrors.MisplacedWindow
-
- let concat : ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr =
- fun expression _ ->
- let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression in
- let with_group =
- List.find_map ~f:(fun v -> (snd v).with_group) expression
- in
- match with_group with
- | None -> { repr = Q.Query.concat expr_repr; with_group }
- | Some _ -> raise ImportErrors.MisplacedWindow
-
- let join : string -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr =
- fun sep expression _ ->
- let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression
- and with_group =
- List.find_map ~f:(fun v -> (snd v).with_group) expression
- in
- match with_group with
- | None -> { repr = Q.Query.join sep expr_repr; with_group }
- | Some _ -> raise ImportErrors.MisplacedWindow
-
- let boperator :
- T.binary_operator ->
- 'a Ast.obs * 'a repr ->
- 'a Ast.obs * 'a repr ->
- 'a Ast.obs ->
- 'a repr =
- fun name (_, e1) (_, e2) _ ->
- let with_group =
- match (e1.with_group, e2.with_group) with
- | Some e, None -> Some e
- | None, Some e -> Some e
- | None, None -> None
- | _ -> raise ImportErrors.MisplacedWindow
- in
- { repr = Q.Query.boperator name e1.repr e2.repr; with_group }
-
- let gequality :
- T.binary_operator ->
- 'a Ast.obs * 'a repr ->
- ('a Ast.obs * 'a repr) list ->
- 'a Ast.obs ->
- 'a repr =
- fun name (_, e1) group _ ->
- let group_repr = List.map ~f:(fun v -> (snd v).repr) group
- and with_group = List.find_map ~f:(fun v -> (snd v).with_group) group in
-
- match with_group with
- | None ->
- {
- repr = Q.Query.gequality name e1.repr group_repr;
- with_group = e1.with_group;
- }
- | _ -> raise ImportErrors.MisplacedWindow
-
- let funct : string -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr =
- fun name expressions _ ->
- let expr_repr = List.map ~f:(fun v -> (snd v).repr) expressions in
- let with_group =
- List.find_map ~f:(fun v -> (snd v).with_group) expressions
- in
- match with_group with
- | None -> { repr = Q.Query.funct name expr_repr; with_group }
- | Some _ -> raise ImportErrors.MisplacedWindow
-
- let function' :
- T.funct -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr =
- fun name expressions _ ->
- let expr_repr = List.map ~f:(fun v -> (snd v).repr) expressions in
- let with_group =
- List.find_map ~f:(fun v -> (snd v).with_group) expressions
- in
- match with_group with
- | None ->
- { repr = Q.Query.funct (T.name_of_function name) expr_repr; with_group }
- | Some _ -> raise ImportErrors.MisplacedWindow
-
- (** Window functions are not handled in the filters, we save them as an AST
- in order to process them in a separated handler.
-
- It is not allowed to build nested window functions. *)
- let window :
- ('a Ast.obs * 'a repr) T.window ->
- ('a Ast.obs * 'a repr) list ->
- ('a Ast.obs * 'a repr) list ->
- 'a Ast.obs ->
- 'a repr =
- fun name expressions order ast ->
- ignore name;
- let with_group_expr =
- List.find_map ~f:(fun v -> (snd v).with_group) expressions
- and with_group_order =
- List.find_map ~f:(fun v -> (snd v).with_group) order
- in
- match (with_group_expr, with_group_order) with
- | Some _, _ | _, Some _ -> raise ImportErrors.MisplacedWindow
- | None, None ->
- (* The column name used with the cte. The name is fixed here, and used
- as is in [Analysers.Query.build_cte] and
- [Analysers.Query.eval_filters] *)
- let q = "cte.group0" in
- {
- with_group = Some ast;
- repr = Q.Query.funct "expr" [ Q.Query.literal q ];
- }
-end
-
-module ASTBuilder =
- Compose.Expression
- (Ast)
- (struct
- let v = ()
- end)
-
-module F :
- Sym.SYM_EXPR
- with type 'a obs = 'a result
- and type 'a path_repr = Format.formatter -> 'a -> unit =
- ASTBuilder.Make (Filter)
-
-module M = Sym.M (F)
-
-let query_of_expression :
- type b.
- b Q.binded_query ->
- Format.formatter ->
- (Format.formatter -> 'a -> unit) ->
- 'a T.t ->
- b * 'a T.t option =
- fun parameter formatter printer expr ->
- let repr = M.eval ~path_repr:printer expr in
- match parameter with
- | BindParam ->
- let p = Queue.create () in
- let parameter = Q.QueryParameter.Queue p in
- let value = F.observe repr in
- value.repr ~nested:parameter formatter;
- (p, value.group)
- | NoParam ->
- let value = F.observe repr in
- value.repr ~nested:Literal formatter;
- ((), value.group)
diff --git a/lib/expression/filters.mli b/lib/expression/filters.mli
deleted file mode 100644
index d462b5f..0000000
--- a/lib/expression/filters.mli
+++ /dev/null
@@ -1,9 +0,0 @@
-module F : Sym.SYM_EXPR with type 'a path_repr = Format.formatter -> 'a -> unit
-(** Query used inside the filter clauses *)
-
-val query_of_expression :
- 'b Query.binded_query ->
- Format.formatter ->
- (Format.formatter -> 'a -> unit) ->
- 'a T.t ->
- 'b * 'a T.t option
diff --git a/lib/helpers/helpers.ml b/lib/helpers/helpers.ml
index 9d6fcb8..7e0f2aa 100755
--- a/lib/helpers/helpers.ml
+++ b/lib/helpers/helpers.ml
@@ -1,45 +1,2 @@
module Toml = Toml
module Console = Console
-
-let date_from_csv : string -> CalendarLib.Date.t option =
- fun value ->
- let open CalendarLib.Date in
- try Some (Scanf.sscanf value "%d/%d/%d" (fun d m y -> make y m d)) with
- | _ -> (
- (* If the date is a number, try from julian day *)
- match int_of_string_opt value with
- | None -> None
- | Some v -> Some (add (make 1899 12 30) (Period.day v)))
-
-let fold_opt : ('a -> 'b -> 'a option) -> 'a -> 'b -> 'a =
- fun f acc b ->
- match f acc b with
- | None -> acc
- | Some v -> v
-
-let try_opt exp =
- try Some (exp ()) with
- | _ -> None
-
-let repr_date formatter date =
- Format.fprintf formatter "%02d/%02d/%d"
- (CalendarLib.Date.day_of_month date)
- CalendarLib.Date.(int_of_month @@ month date)
- (CalendarLib.Date.year date)
-
-let s_repr_date date =
- Format.sprintf "%02d/%02d/%d"
- (CalendarLib.Date.day_of_month date)
- CalendarLib.Date.(int_of_month @@ month date)
- (CalendarLib.Date.year date)
-
-let repr_opt f channel = function
- | None -> ()
- | Some v -> f channel v
-
-let str_format f =
- let buffer = Buffer.create 16 in
- let formatter = Format.formatter_of_buffer buffer in
- f formatter;
- Format.pp_print_flush formatter ();
- Buffer.contents buffer