diff options
author | Sébastien Dailly <sebastien@dailly.me> | 2025-03-01 08:39:02 +0100 |
---|---|---|
committer | Sébastien Dailly <sebastien@dailly.me> | 2025-03-06 20:57:10 +0100 |
commit | 81db1bfd580791910646525e30bc45af34533987 (patch) | |
tree | c610f53c284d3707a3d6fe49486b5c09e66dc41f /lib | |
parent | 67320d8f04e1f302306b9aafdaaf4bafcf443839 (diff) |
Rewrite the way to handle filters
Diffstat (limited to 'lib')
-rw-r--r-- | lib/analysers/chunk.ml | 102 | ||||
-rw-r--r-- | lib/analysers/chunk.mli | 34 | ||||
-rw-r--r-- | lib/analysers/filters.ml | 133 | ||||
-rw-r--r-- | lib/analysers/filters.mli | 2 | ||||
-rw-r--r-- | lib/analysers/printers.ml | 12 | ||||
-rw-r--r-- | lib/analysers/printers.mli | 14 | ||||
-rw-r--r-- | lib/analysers/query.ml | 310 | ||||
-rw-r--r-- | lib/analysers/query.mli | 7 | ||||
-rw-r--r-- | lib/configuration/cte.ml | 53 | ||||
-rw-r--r-- | lib/configuration/cte.mli | 20 | ||||
-rw-r--r-- | lib/configuration/importConf.ml | 1 | ||||
-rw-r--r-- | lib/configuration/importConf.mli | 12 | ||||
-rw-r--r-- | lib/configuration/read_conf.ml | 69 | ||||
-rw-r--r-- | lib/data_types/table.ml | 5 | ||||
-rw-r--r-- | lib/expression/_readme.rst | 39 | ||||
-rw-r--r-- | lib/expression/filters.ml | 193 | ||||
-rw-r--r-- | lib/expression/filters.mli | 9 | ||||
-rwxr-xr-x | lib/helpers/helpers.ml | 43 |
18 files changed, 482 insertions, 576 deletions
diff --git a/lib/analysers/chunk.ml b/lib/analysers/chunk.ml new file mode 100644 index 0000000..2fa4808 --- /dev/null +++ b/lib/analysers/chunk.ml @@ -0,0 +1,102 @@ +(** The module allow to create fragment in the query which keep together the + binderd parameters and the text of the query.contents. + + This is used a lot in order to create the CTE, where you need the create + fragment used both in the main request and partially in the CTE itself. + + The content is mutable and all the functions are returning [unit]. *) + +type t = { + b : Buffer.t; + parameters : ImportCSV.DataType.t Queue.t; +} + +let create : unit -> t = + fun () -> { b = Buffer.create 16; parameters = Queue.create () } + +let create' : Buffer.t -> ImportCSV.DataType.t Queue.t -> t = + fun b parameters -> { b; parameters } + +(* Append the element from [tail] at the end of [head] + + Tail is destroyed during the operation. + *) +let append : head:t -> tail:t -> unit = + fun ~head ~tail -> + match Buffer.length tail.b with + | 0 -> () + | _ -> + Buffer.add_buffer head.b tail.b; + Queue.transfer tail.parameters head.parameters; + () + +(** Add a litteral string in the sequence *) +let add_string : t -> string -> unit = fun t v -> Buffer.add_string t.b v + +let copy : t -> t = + fun t -> + let b = Buffer.create 16 and parameters = Queue.copy t.parameters in + Buffer.add_buffer b t.b; + { b; parameters } + +let add_parameters : t -> ImportCSV.DataType.t Seq.t -> unit = + fun t p -> Queue.add_seq t.parameters p + +module Syntax = ImportConf.Syntax +module Table = ImportDataTypes.Table +module Q = ImportExpression.Query +open StdLabels + +(** Extract the informations from the dependancies. We get two informations here + : + + - the join query in order to load the data from the external column + - the column corresponding to the key in order to identify the missing links + later. *) +let join_external : conf:Syntax.t -> join_buffer:t -> Syntax.Extern.t -> unit = + fun ~conf ~join_buffer external_ -> + let extern_table = Table.name external_.target in + + let formatter = Format.formatter_of_buffer join_buffer.b in + Format.fprintf formatter "\nLEFT JOIN '%s' AS '%s' ON %t = %s" extern_table + external_.target.name + (Printers.prepare_key ~f:(fun f -> + let q = + Q.query_of_expression Q.BindParam f (Printers.path ~conf) + external_.intern_key + in + + add_parameters join_buffer (Queue.to_seq q))) + (Table.print_column external_.Syntax.Extern.target + ("key_" ^ external_.Syntax.Extern.target.name)); + + Format.pp_print_flush formatter () + +(** Create the from part of the query, adding all the required externals (even + when not required) + + SQLite is able to optimize the query and do not load the table not used in + the select clause. *) +let create_from_statement_of_chunck : + ?externals:Syntax.Extern.t list -> Syntax.t -> t -> unit = + fun ?externals conf c -> + let externals = Option.value externals ~default:conf.externals in + add_string c "\nFROM '"; + add_string c (Table.name conf.source); + add_string c "' AS '"; + add_string c conf.source.name; + add_string c "'"; + + (* Add the externals in the query *) + List.iter externals ~f:(join_external ~conf ~join_buffer:c) + +let add_expression : + conf:Syntax.t -> t -> ImportDataTypes.Path.t ImportExpression.T.t -> unit = + fun ~conf group expression -> + let formatter = Format.formatter_of_buffer group.b in + let queue = + ImportExpression.Query.query_of_expression ImportExpression.Query.BindParam + formatter (Printers.path ~conf) expression + in + Format.pp_print_flush formatter (); + add_parameters group (Queue.to_seq queue) diff --git a/lib/analysers/chunk.mli b/lib/analysers/chunk.mli new file mode 100644 index 0000000..d4f69e7 --- /dev/null +++ b/lib/analysers/chunk.mli @@ -0,0 +1,34 @@ +(** This module helps to create a query with prepared values. *) + +type t = { + b : Buffer.t; + parameters : ImportCSV.DataType.t Queue.t; +} + +val create : unit -> t +val create' : Buffer.t -> ImportCSV.DataType.t Queue.t -> t + +val append : head:t -> tail:t -> unit +(** Append the element from [tail] at the end of [head] + + Tail is destroyed during the operation. *) + +val add_string : t -> string -> unit +(** Add a litteral string in the sequence *) + +val copy : t -> t + +val create_from_statement_of_chunck : + ?externals:ImportConf.Syntax.Extern.t list -> ImportConf.Syntax.t -> t -> unit +(** Create the from part of the query, adding all the declared externals (even + when not required) + + SQLite is able to optimize the query and do not load the table not used in + the select clause. *) + +val add_expression : + conf:ImportConf.Syntax.t -> + t -> + ImportDataTypes.Path.t ImportExpression.T.t -> + unit +(** Add an expression into an existing chunck *) diff --git a/lib/analysers/filters.ml b/lib/analysers/filters.ml new file mode 100644 index 0000000..4e8b175 --- /dev/null +++ b/lib/analysers/filters.ml @@ -0,0 +1,133 @@ +(** Build a fragment of the query match a filter *) + +module Syntax = ImportConf.Syntax +module Path = ImportDataTypes.Path +module Expression = ImportExpression +module CTE = ImportConf.CTE +open StdLabels + +(** Add a list of expressions into the group *) +let rec add_filters : + conf:Syntax.t -> Chunk.t -> Path.t Expression.T.t list -> unit = + fun ~conf group -> function + | [] -> () + | hd :: [] -> Chunk.add_expression ~conf group hd + | hd :: tl -> + Chunk.add_expression ~conf group hd; + Chunk.add_string group "\nAND "; + add_filters ~conf group tl + +type 'a cte_acc = { + n : int; + has_previous : bool; + acc : 'a; + cte_index : int option; + latest_expression : Path.t Expression.T.t list; +} + +let add_inner : conf:Syntax.t -> int -> Buffer.t -> unit = + fun ~conf n b -> + let name = "filter" ^ string_of_int n in + (* We use an INNER JOIN here because we want to be sure to get all the rows + fetched by the CTE *) + Buffer.add_string b "\nINNER JOIN '"; + Buffer.add_string b name; + Buffer.add_string b "' ON "; + Buffer.add_string b name; + Buffer.add_string b ".id = "; + Buffer.add_string b conf.source.name; + Buffer.add_string b ".id\n" + +let print : + conf:Syntax.t -> + (Chunk.t * Chunk.t) cte_acc -> + CTE.t -> + (Chunk.t * Chunk.t) cte_acc = + fun ~conf acc cte -> + let predicates, query = acc.acc in + let n = acc.n in + let cte_index = + match cte.CTE.group with + | Some expression -> + begin + if acc.has_previous then Chunk.add_string query ", " + else Chunk.add_string query "WITH " + end; + Chunk.add_string query "filter"; + Chunk.add_string query (string_of_int n); + Chunk.add_string query " AS ("; + Chunk.add_string query "SELECT "; + Chunk.add_string query conf.source.name; + Chunk.add_string query ".id, "; + Chunk.add_expression ~conf query expression; + Chunk.add_string query " AS group_function"; + Chunk.create_from_statement_of_chunck conf query; + + if acc.has_previous then begin + let previous_name = "filter" ^ string_of_int (n - 1) in + add_inner ~conf (n - 1) query.Chunk.b; + + Chunk.add_string query "WHERE "; + Chunk.add_string query previous_name; + Chunk.add_string query ".group_function" + end; + + begin + match cte.CTE.filters with + | [] -> () + | _ -> + Chunk.add_string query " WHERE "; + add_filters ~conf query cte.CTE.filters + end; + Chunk.add_string query ")\n"; + Some acc.n + | None -> + (* Do not add the filters in the CTE (we don’t have any) but in the main + query *) + Chunk.add_string predicates "WHERE "; + add_filters ~conf predicates cte.CTE.filters; + acc.cte_index + in + { + acc with + has_previous = true; + n = acc.n + 1; + cte_index; + latest_expression = cte.CTE.filters; + } + +let generate_sql : conf:Syntax.t -> CTE.t list -> Chunk.t -> Chunk.t = + fun ~conf filters links' -> + let predicates = Chunk.create () and links = Chunk.create () in + let eval = + List.fold_left filters + ~init: + { + n = 0; + has_previous = false; + acc = (links, predicates); + cte_index = None; + latest_expression = []; + } + ~f:(print ~conf) + in + match (eval.cte_index, eval.latest_expression) with + | None, [] -> predicates + | None, _ -> + Chunk.add_string links' " "; + Chunk.append ~head:links' ~tail:links; + predicates + | Some n, [] -> + add_inner ~conf n links'.b; + Chunk.append ~head:links' ~tail:links; + Chunk.add_string links' "filter"; + Chunk.add_string links' (string_of_int n); + Chunk.add_string links' ".group_function"; + predicates + | Some n, _ -> + add_inner ~conf n links'.b; + Chunk.append ~head:links' ~tail:links; + Chunk.add_string links' " AND filter"; + Chunk.add_string links' (string_of_int n); + Chunk.add_string links' ".group_function"; + predicates diff --git a/lib/analysers/filters.mli b/lib/analysers/filters.mli new file mode 100644 index 0000000..7783799 --- /dev/null +++ b/lib/analysers/filters.mli @@ -0,0 +1,2 @@ +val generate_sql : + conf:ImportConf.Syntax.t -> ImportConf.CTE.t list -> Chunk.t -> Chunk.t diff --git a/lib/analysers/printers.ml b/lib/analysers/printers.ml new file mode 100644 index 0000000..1c73c13 --- /dev/null +++ b/lib/analysers/printers.ml @@ -0,0 +1,12 @@ +module Syntax = ImportConf.Syntax +module Table = ImportDataTypes.Table +module Path = ImportDataTypes.Path + +let path : conf:Syntax.t -> Format.formatter -> Path.t -> unit = + fun ~conf buffer { alias; column } -> + let table = ImportConf.get_table_for_name conf alias in + Format.fprintf buffer "%s" + (Table.print_column table ("col_" ^ string_of_int column)) + +let prepare_key : f:(Format.formatter -> unit) -> Format.formatter -> unit = + fun ~f formatter -> Format.fprintf formatter "rtrim(upper(%t))" f diff --git a/lib/analysers/printers.mli b/lib/analysers/printers.mli new file mode 100644 index 0000000..102bb91 --- /dev/null +++ b/lib/analysers/printers.mli @@ -0,0 +1,14 @@ +val path : + conf:ImportConf.Syntax.t -> Format.formatter -> ImportDataTypes.Path.t -> unit +(** Represent a path in a SQL query. + + This function is given in the Expression.Query module. *) + +val prepare_key : f:(Format.formatter -> unit) -> Format.formatter -> unit +(** Wrap an expression for beiing used as a key. What’s given to the formater + will be pre-processed with sql function to trim and uppercase the value. + + This function is used at two location in the code : + + - during the index creation + - when matching a value against this index *) diff --git a/lib/analysers/query.ml b/lib/analysers/query.ml index bcf4a72..e24da78 100644 --- a/lib/analysers/query.ml +++ b/lib/analysers/query.ml @@ -2,58 +2,8 @@ open StdLabels module Expression = ImportExpression module Q = Expression.Query module Syntax = ImportConf.Syntax -module Table = ImportConf.Table -module Path = ImportConf.Path - -let truncate buffer n = Buffer.truncate buffer (Buffer.length buffer - n) - -(** The module allow to create fragment in the query which keep together the - binderd parameters and the text of the query.contents. - - This is used a lot in order to create the CTE, where you need the create - fragment used both in the main request and partially in the CTE itself. - - The content is mutable and all the functions are returning [unit]. *) -module Chunk = struct - type t = { - b : Buffer.t; - parameters : ImportCSV.DataType.t Queue.t; - } - - let create : unit -> t = - fun () -> { b = Buffer.create 16; parameters = Queue.create () } - - let create' : Buffer.t -> ImportCSV.DataType.t Queue.t -> t = - fun b parameters -> { b; parameters } - - (* Append the element from [tail] at the end of [head] - - Tail is destroyed during the operation. - *) - let append : head:t -> tail:t -> unit = - fun ~head ~tail -> - match Buffer.length tail.b with - | 0 -> () - | _ -> - Buffer.add_buffer head.b tail.b; - Queue.transfer tail.parameters head.parameters; - () - - (** Add a litteral string in the sequence *) - let add_string : t -> string -> unit = fun t v -> Buffer.add_string t.b v - - let copy : t -> t = - fun t -> - let b = Buffer.create 16 and parameters = Queue.copy t.parameters in - Buffer.add_buffer b t.b; - { b; parameters } - - let add_parameters : t -> ImportCSV.DataType.t Seq.t -> unit = - fun t p -> Queue.add_seq t.parameters p -end - -let prepare_key : f:(Format.formatter -> unit) -> Format.formatter -> unit = - fun ~f formatter -> Format.fprintf formatter "rtrim(upper(%t))" f +module Table = ImportDataTypes.Table +module Path = ImportDataTypes.Path (* Collect all the tables pointed by the expression. *) let pointed_tables : Syntax.t -> 'a Expression.T.t -> (Table.t * string) list = @@ -64,11 +14,6 @@ let pointed_tables : Syntax.t -> 'a Expression.T.t -> (Table.t * string) list = (table, table_name) :: acc) |> List.sort_uniq ~cmp:Stdlib.compare -(** Represent a column in a safe way in a query *) -let print_column : Table.t -> string -> string = - fun table column -> - String.concat ~sep:"" [ "'"; table.Table.name; "'.'"; column; "'" ] - let create_table : Dependency.t -> string = fun mapping -> let b = Buffer.create 64 in @@ -90,184 +35,6 @@ let create_table : Dependency.t -> string = Buffer.contents b -let show_path : conf:Syntax.t -> Format.formatter -> Path.t -> unit = - fun ~conf buffer { alias; column } -> - let table = ImportConf.get_table_for_name conf alias in - let table_name = table.Table.name in - Format.fprintf buffer "'%s'.col_%d" table_name column - -(** Extract the informations from the dependancies. We get two informations here - : - - - the join query in order to load the data from the external column - - the column corresponding to the key in order to identify the missing links - later. *) -let query_of_external : - conf:Syntax.t -> join_buffer:Chunk.t -> Syntax.Extern.t -> unit = - fun ~conf ~join_buffer external_ -> - let extern_table = Table.name external_.target in - - let formatter = Format.formatter_of_buffer join_buffer.b in - Format.fprintf formatter "\nLEFT JOIN '%s' AS '%s' ON %t = %s" extern_table - external_.target.name - (prepare_key ~f:(fun f -> - let q = - Q.query_of_expression Q.BindParam f (show_path ~conf) - external_.intern_key - in - - Chunk.add_parameters join_buffer (Queue.to_seq q))) - (print_column external_.Syntax.Extern.target - ("key_" ^ external_.Syntax.Extern.target.name)); - - Format.pp_print_flush formatter () - -(** Create the from part of the query, adding all the reuired externals. - - SQLite is able to optimize the query and do not load the table not used in - the select clause. *) -let create_from_chunck : Syntax.t -> Chunk.t -> unit = - fun conf c -> - Chunk.add_string c "\nFROM '"; - Chunk.add_string c (Table.name conf.source); - Chunk.add_string c "' AS '"; - Chunk.add_string c conf.source.name; - Chunk.add_string c "'"; - - (* Add the externals in the query *) - List.iter conf.externals ~f:(query_of_external ~conf ~join_buffer:c) - -(** Build a CTE query in order to use any group function inside the query. - Return the binded parameters used in the expression. The buffer given in - argument is also modified during the construction. - - If filters is not None, the clauses are added to the CTE. *) -let build_cte : - Syntax.t -> - expression:'a Expression.T.t -> - filters:Chunk.t option -> - Chunk.t = - fun conf ~expression ~filters -> - (* The binded parameters queue will be used later in the full query *) - let cte_chunk = Chunk.create () in - - Chunk.add_string cte_chunk "WITH cte AS (SELECT "; - Chunk.add_string cte_chunk conf.source.name; - Chunk.add_string cte_chunk ".id, "; - - let formatter = Format.formatter_of_buffer cte_chunk.b in - - let p = - Q.query_of_expression Q.BindParam formatter (show_path ~conf) expression - in - Format.pp_print_flush formatter (); - Chunk.add_parameters cte_chunk (Queue.to_seq p); - (* The name is hardcoded here, and used in [Expression.Filters.window] *) - Chunk.add_string cte_chunk " AS group0"; - - let () = create_from_chunck conf cte_chunk in - let () = - match filters with - | None -> () - | Some filters_chunk -> - Chunk.append ~head:cte_chunk ~tail:(Chunk.copy filters_chunk) - in - - Chunk.add_string cte_chunk ")\n"; - cte_chunk - -type filter_evaluation = { - content : Buffer.t; - parameters : ImportCSV.DataType.t Seq.t; - cte : (string * Chunk.t) option; -} -(** Build the filters to apply in the query. We make the difference here between - the predicates to apply directly in the query, and the filters associated - with a group, which are required to be transformed into a CTE in SQL, and - are evaluated before. *) - -(** Evaluate the filters on the query *) -let eval_filters : Syntax.t -> filter_evaluation = - fun conf -> - match conf.filters with - | [] -> - let empty_buffer = Buffer.create 0 in - { content = empty_buffer; parameters = Seq.empty; cte = None } - | filters -> ( - (* Create a new queue in order to accumulate all the parameters to bind. - This filter will be given to both the CTE if any, or reused in the - main query when there is no CTE. - *) - let chunk_filters = Chunk.create () in - Chunk.add_string chunk_filters "\nWHERE "; - - let group = Chunk.create () in - - let with_cte, with_exr = - List.fold_left filters ~init:(None, false) - ~f:(fun (with_cte, with_exr) column -> - (* The function will return an option in second position which is - None when no Group function where found, and Some Expression - otherwise *) - let b = Buffer.create 16 in - - let formatter = Format.formatter_of_buffer b in - let queue, group_found = - Expression.Filters.query_of_expression Q.BindParam formatter - (show_path ~conf) column - in - Format.pp_print_flush formatter (); - let clause = Chunk.create' b queue in - - match (group_found, with_cte) with - | None, _ -> - Chunk.append ~head:chunk_filters ~tail:clause; - Chunk.add_string chunk_filters "\nAND "; - (with_cte, true) - | (Some _ as group'), None -> - (* We have a group here, we do not add it into the - filter_buffer right now. - - This can occur only once, the second one will raise - an error. *) - Chunk.append ~head:group ~tail:clause; - (group', with_exr) - | Some _, Some _ -> raise ImportErrors.MisplacedWindow) - in - - match with_cte with - | None -> - let content = chunk_filters.b in - truncate content 5; - { - (* There is no group clause in the query *) - content; - parameters = Queue.to_seq chunk_filters.parameters; - cte = None; - } - | Some expression -> - let filters = - if with_exr then ( - (* If we have additionnals filters from the group clause, we - have to report them in the CTE instead of the main query. *) - let c' = Chunk.copy chunk_filters in - truncate c'.b 5; - Some c') - else None - in - - (* Create the common expression table *) - let cte_parameters = build_cte conf ~expression ~filters in - Chunk.append ~head:chunk_filters ~tail:group; - - { - content = chunk_filters.b; - parameters = Queue.to_seq chunk_filters.parameters; - (* The name is hardcoded here, and used in - [Expression.Filters.window] *) - cte = Some ("cte", cte_parameters); - }) - type query = { q : string; parameters : ImportCSV.DataType.t Seq.t; @@ -323,23 +90,12 @@ let clean_window : find which source is pointed by this alias. *) let select : Syntax.t -> query * Path.t ImportExpression.T.t array = fun conf -> - (* If the filters contains a group expression, we need to transform this into - a CTE, which have to be evaluated before the main query. That’s why we are - evaluating the filters right now.*) - let filters = eval_filters conf in - let b = Buffer.create 256 in - let parameters = Queue.create () in - - Option.iter - (fun (_, (cte : Chunk.t)) -> - Buffer.add_buffer b cte.b; - Queue.add_seq parameters (Queue.to_seq cte.parameters)) - filters.cte; + let filter = ImportConf.CTE.of_filters conf.filters in (* For each column in the configuration file, add the corresponding element in the query. - The Sqlite driver return the elements in an array, we create an array to + The Sqlite driver return the elements in an array, we create an array too in order to manage the elements together. *) let headers = Array.make (List.length conf.columns) (Obj.magic None) in @@ -355,6 +111,10 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array = let expression = c in (i, clean_window ~prefix:conf.uniq expression)) in + let filters = Chunk.create () in + let request_header = Filters.generate_sql ~conf filter filters in + let b = request_header.Chunk.b + and parameters = request_header.Chunk.parameters in let formatter = Format.formatter_of_buffer b in let () = Format.fprintf formatter "SELECT %a" @@ -363,7 +123,7 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array = (fun formatter (i, column) -> Array.set headers i column; let p = - Q.query_of_expression Q.BindParam formatter (show_path ~conf) + Q.query_of_expression Q.BindParam formatter (Printers.path ~conf) column in Queue.transfer p parameters; @@ -372,26 +132,8 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array = in Format.pp_print_flush formatter (); - let () = create_from_chunck conf (Chunk.create' b parameters) in - - (* If the query has a CTE, link it as well. We use an INNER JOIN here because - we want to be sure to get all the rows fetched by the CTE - *) - let () = - match filters.cte with - | None -> () - | Some (name, _) -> - Buffer.add_string b "\nINNER JOIN '"; - Buffer.add_string b name; - Buffer.add_string b "' ON "; - Buffer.add_string b name; - Buffer.add_string b ".id = "; - Buffer.add_string b conf.source.name; - Buffer.add_string b ".id" - in - - Buffer.add_buffer b filters.content; - Queue.add_seq parameters filters.parameters; + let () = Chunk.create_from_statement_of_chunck conf request_header in + Chunk.append ~head:request_header ~tail:filters; let formatter = Format.formatter_of_buffer b in (match conf.Syntax.uniq with @@ -402,7 +144,7 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array = ~pp_sep:(fun f () -> Format.fprintf f ", ") (fun formatter column -> let seq = - Q.query_of_expression Q.BindParam formatter (show_path ~conf) + Q.query_of_expression Q.BindParam formatter (Printers.path ~conf) column in Queue.transfer seq parameters)) @@ -415,7 +157,7 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array = ~pp_sep:(fun f () -> Format.fprintf f ", ") (fun formatter column -> let seq = - Q.query_of_expression Q.BindParam formatter (show_path ~conf) + Q.query_of_expression Q.BindParam formatter (Printers.path ~conf) column in Queue.transfer seq parameters)) @@ -426,20 +168,12 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array = let check_external : Syntax.t -> Syntax.Extern.t -> query = fun conf external_ -> - let internal_chunk = - let internal_key_buffer = Buffer.create 16 in - let formatter = Format.formatter_of_buffer internal_key_buffer in - let internal_key_seq = - Q.query_of_expression Q.BindParam formatter (show_path ~conf) - external_.Syntax.Extern.intern_key - in - Format.pp_print_flush formatter (); - Chunk.create' internal_key_buffer (Queue.copy internal_key_seq) - in + let internal_chunk = Chunk.create () in + Chunk.add_expression ~conf internal_chunk external_.Syntax.Extern.intern_key; let external_key_buffer = Buffer.create 16 in Buffer.add_string external_key_buffer - (print_column external_.Syntax.Extern.target + (Table.print_column external_.Syntax.Extern.target ("key_" ^ external_.Syntax.Extern.target.name)); let pointed_tables = pointed_tables conf external_.intern_key in @@ -495,18 +229,12 @@ let check_external : Syntax.t -> Syntax.Extern.t -> query = Chunk.add_string request "-1" | (table, _name) :: _ -> (* If we have a single source, extract the row number. *) - Chunk.add_string request (print_column table "id") + Chunk.add_string request (Table.print_column table "id") in Chunk.add_string request ", "; Chunk.append ~head:request ~tail:(Chunk.copy internal_chunk); - Chunk.add_string request " FROM\n'"; - Chunk.add_string request (Table.name conf.source); - Chunk.add_string request "' AS '"; - Chunk.add_string request conf.source.name; - Chunk.add_string request "'"; - (* Add the externals in the query *) - List.iter dependencies ~f:(query_of_external ~conf ~join_buffer:request); + Chunk.create_from_statement_of_chunck ~externals:dependencies conf request; Chunk.add_string request " WHERE "; Chunk.add_string request join_content; Chunk.add_string request " IS NULL AND "; @@ -526,7 +254,7 @@ let build_key_insert : Buffer.t -> Dependency.key -> unit = let formatter = Format.formatter_of_buffer buffer in let () = - prepare_key formatter ~f:(fun formatter -> + Printers.prepare_key formatter ~f:(fun formatter -> Q.query_of_expression Q.NoParam formatter show_column expression) in diff --git a/lib/analysers/query.mli b/lib/analysers/query.mli index 520718a..d158867 100644 --- a/lib/analysers/query.mli +++ b/lib/analysers/query.mli @@ -1,5 +1,3 @@ -module Syntax = ImportConf.Syntax - val create_table : Dependency.t -> string type query = { @@ -10,9 +8,10 @@ type query = { shall be run with all the binded parameters. *) val select : - Syntax.t -> query * ImportDataTypes.Path.t ImportExpression.T.t array + ImportConf.Syntax.t -> + query * ImportDataTypes.Path.t ImportExpression.T.t array -val check_external : Syntax.t -> Syntax.Extern.t -> query +val check_external : ImportConf.Syntax.t -> ImportConf.Syntax.Extern.t -> query (** Create a query which select all the missing key in an external *) val build_key_insert : Buffer.t -> Dependency.key -> unit diff --git a/lib/configuration/cte.ml b/lib/configuration/cte.ml new file mode 100644 index 0000000..ff43d6d --- /dev/null +++ b/lib/configuration/cte.ml @@ -0,0 +1,53 @@ +open StdLabels +module Path = ImportDataTypes.Path +module Expression = ImportExpression.T + +type t = { + filters : Path.t Expression.t list; + group : Path.t Expression.t option; +} + +(** Ensure the group criteria in window functions match the global group by + criteria. + + Traverse the configuration tree until finding a group window. *) + +(** Check if the expression contains a group function *) +let matchWindowGroup : 'a ImportExpression.T.t -> bool = + fun expression -> + let exception Found in + let open ImportExpression.T in + let rec f = function + | Empty | Literal _ | Integer _ | Path _ -> () + | Expr e -> f e + | Concat pp | Function' (_, pp) | Function (_, pp) | Nvl pp | Join (_, pp) + -> List.iter ~f pp + | Window (_, _, _) -> raise Found + | BOperator (_, arg1, arg2) -> + f arg1; + f arg2 + | GEquality (_, arg1, args) -> + f arg1; + List.iter ~f args + in + try + f expression; + false + with + | Found -> true + +(** Transform a list of expression into a list of CTE to evaluate. *) +let of_filters : Path.t Expression.t list -> t list = + fun filters -> + let last_group, prev = + List.fold_left filters + ~init:({ filters = []; group = None }, []) + ~f:(fun (cte, acc) expr -> + begin + if matchWindowGroup expr then + ( { filters = []; group = None }, + { cte with group = Some expr } :: acc ) + else ({ cte with filters = expr :: cte.filters }, acc) + end) + in + List.rev (last_group :: prev) diff --git a/lib/configuration/cte.mli b/lib/configuration/cte.mli new file mode 100644 index 0000000..0f2b3e3 --- /dev/null +++ b/lib/configuration/cte.mli @@ -0,0 +1,20 @@ +module Path = ImportDataTypes.Path +module Expression = ImportExpression.T + +type t = { + filters : Path.t Expression.t list; + group : Path.t Expression.t option; +} +(** Represent a filter to apply in the querry + + The CTE can have filters applied on the previous CTE (or directly in the + sources if there is any yet) and can hold a group (an only one). + + If there is a group, it must be applied after the others filters. + + The order in which the filters are presented in the configuration can change + the results ; it does not matter when we only have classicals filters, + because all cf them can be evaluated at the same time, but as soon we have a + group function, the result become dependant of the previous ones. *) + +val of_filters : Path.t Expression.t list -> t list diff --git a/lib/configuration/importConf.ml b/lib/configuration/importConf.ml index ebbcb7c..8516008 100644 --- a/lib/configuration/importConf.ml +++ b/lib/configuration/importConf.ml @@ -1,5 +1,6 @@ open StdLabels module Syntax = Syntax +module CTE = Cte module Table = ImportDataTypes.Table module Path = ImportDataTypes.Path module T = Read_conf diff --git a/lib/configuration/importConf.mli b/lib/configuration/importConf.mli index 9ddc40c..40b985b 100644 --- a/lib/configuration/importConf.mli +++ b/lib/configuration/importConf.mli @@ -1,18 +1,18 @@ module Syntax = Syntax -module Table = ImportDataTypes.Table -module Path = ImportDataTypes.Path +module CTE = Cte val dummy_conf : Syntax.t -val root_table : Syntax.t -> Table.t +val root_table : Syntax.t -> ImportDataTypes.Table.t (** Get the root table, this table is the main table to load and each line in this table will be processed *) val t_of_toml : Otoml.t -> (Syntax.t, string) result -val get_table_for_name : Syntax.t -> string option -> Table.t +val get_table_for_name : Syntax.t -> string option -> ImportDataTypes.Table.t -val get_dependancies_for_table : Syntax.t -> Table.t -> Syntax.Extern.t list +val get_dependancies_for_table : + Syntax.t -> ImportDataTypes.Table.t -> Syntax.Extern.t list (** Get all the externals refered by the source *) val expression_from_string : - string -> (Path.t ImportExpression.T.t, string) result + string -> (ImportDataTypes.Path.t ImportExpression.T.t, string) result diff --git a/lib/configuration/read_conf.ml b/lib/configuration/read_conf.ml index 69240c1..11f6726 100644 --- a/lib/configuration/read_conf.ml +++ b/lib/configuration/read_conf.ml @@ -126,44 +126,51 @@ end = struct let column = Expression_parser.Incremental.column_expr end +exception Divergent (** Ensure the group criteria in window functions match the global group by - criteria. + criteria. *) - Traverse the configuration tree until finding a group window. *) +exception NestedGroup +(** Raised when a group contains another one *) + +(** Traverse the configuration tree until finding a group window. *) let matchWindowGroup : eq:('a -> 'a -> bool) -> subset:'a ImportExpression.T.t list -> 'a ImportExpression.T.t -> - bool = + unit = fun ~eq ~subset expression -> - let exception Divergent in let open ImportExpression.T in - let rec f = function + let rec f isIngroup = function | Empty | Literal _ | Integer _ | Path _ -> () - | Expr e -> f e + | Expr e -> f isIngroup e | Concat pp | Function' (_, pp) | Function (_, pp) | Nvl pp | Join (_, pp) - -> List.iter ~f pp - | Window (_, pp1, _) -> - if List.equal ~eq:(ImportExpression.T.equal eq) subset pp1 then () - else raise_notrace Divergent + -> List.iter ~f:(f isIngroup) pp + | Window (expr, pp1, pp2) -> + let () = + if List.equal ~eq:(ImportExpression.T.equal eq) subset pp1 then () + else + match subset with + | [] -> () + | _ -> raise_notrace Divergent + in + let () = + match isIngroup with + | true -> raise NestedGroup + | false -> () + in + + ignore @@ ImportExpression.T.map_window ~f:(f true) expr; + List.iter ~f:(f true) pp1; + List.iter ~f:(f true) pp2 | BOperator (_, arg1, arg2) -> - f arg1; - f arg2 + f isIngroup arg1; + f isIngroup arg2 | GEquality (_, arg1, args) -> - f arg1; - List.iter ~f args + f isIngroup arg1; + List.iter ~f:(f isIngroup) args in - match subset with - | [] -> - (* Do not bother traversing the tree if there is no group by, just - return Ok *) - true - | _ -> ( - try - f expression; - true - with - | Divergent -> false) + f false expression module Make (S : Decoders.Decode.S) = struct let ( let* ) = S.( let* ) @@ -185,13 +192,15 @@ module Make (S : Decoders.Decode.S) = struct | Error e -> S.fail_with Decoders.Error.(make e) | Ok expr -> ( (* Now check that every window function include at least the uniq list *) - let valid_subset = matchWindowGroup ~eq ~subset:groups expr in - match valid_subset with - | true -> S.succeed expr - | false -> + match matchWindowGroup ~eq ~subset:groups expr with + | () -> S.succeed expr + | exception Divergent -> S.fail "The group function shall match the same arguments as the \ - \"uniq\" parameter") + \"uniq\" parameter" + | exception NestedGroup -> + S.fail + "A group function cannot contains another group function") method source = let* file = S.field "file" S.string diff --git a/lib/data_types/table.ml b/lib/data_types/table.ml index 2dd956b..4831ac8 100644 --- a/lib/data_types/table.ml +++ b/lib/data_types/table.ml @@ -23,3 +23,8 @@ let name : t -> string = match source.tab with | 1 -> file_name | _ -> String.concat ~sep:"_" [ file_name; string_of_int source.tab ] + +(** Represent a column in a safe way in a query *) +let print_column : t -> string -> string = + fun table column -> + String.concat ~sep:"" [ "'"; table.name; "'.'"; column; "'" ] diff --git a/lib/expression/_readme.rst b/lib/expression/_readme.rst new file mode 100644 index 0000000..729a950 --- /dev/null +++ b/lib/expression/_readme.rst @@ -0,0 +1,39 @@ +All thoses modules transforms an expression. + +Simple transformations +---------------------- + +:Ast: + + Regenerate the Ast from the expression + +:Repr: + + Generate a representable string + +:Type_of: + + Infer the type of an expression + +:Compose: + + Allow to use a module into another one. + +:Headers: + + Extract the headers to display in the result file + +Composed transformations +------------------------ + +:Filter: + + Generate the filters in the query. This module identify if one the + expression is actually a group window and handle a special case for this. + + This module relies on Ast + +:Query: + + Build an sql query. This module relies on Type_of + diff --git a/lib/expression/filters.ml b/lib/expression/filters.ml deleted file mode 100644 index 42c794b..0000000 --- a/lib/expression/filters.ml +++ /dev/null @@ -1,193 +0,0 @@ -(** This module evaluate the sql query to use in order to filter an expression - - The result is built over [Query] except for the group function, which are - translated into a CTE in sql - *) - -open StdLabels -module Q = Query - -type 'a result = { - repr : Format.formatter -> nested:Query.QueryParameter.t -> unit; - group : 'a T.t option; -} - -module Filter = struct - type 'a repr = { - repr : 'a Q.Query.repr; - with_group : 'a T.t option; - } - - type 'a obs = 'a result - type 'a path_repr = 'a Q.Query.path_repr - - let observe : 'a Ast.obs * 'a repr -> 'a obs = - fun (_, v) -> { repr = Q.Query.observe v.repr; group = v.with_group } - - let empty : 'a Ast.obs -> 'a repr = - fun _ -> { repr = Q.Query.empty (); with_group = None } - - let expr : 'a Ast.obs * 'a repr -> 'a Ast.obs -> 'a repr = - fun (_, expr) _ -> - { repr = Q.Query.expr expr.repr; with_group = expr.with_group } - - let path : 'a path_repr -> 'a -> 'a Ast.obs -> 'a repr = - fun repr p _ -> { repr = Q.Query.path repr p; with_group = None } - - let literal : string -> 'a Ast.obs -> 'a repr = - fun l _ -> { repr = Q.Query.literal l; with_group = None } - - let integer : string -> 'a Ast.obs -> 'a repr = - fun l _ -> { repr = Q.Query.integer l; with_group = None } - - let nvl : ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr = - fun expression _ -> - let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression in - let with_group = - List.find_map ~f:(fun v -> (snd v).with_group) expression - in - match with_group with - | None -> { repr = Q.Query.nvl expr_repr; with_group } - | Some _ -> raise ImportErrors.MisplacedWindow - - let concat : ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr = - fun expression _ -> - let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression in - let with_group = - List.find_map ~f:(fun v -> (snd v).with_group) expression - in - match with_group with - | None -> { repr = Q.Query.concat expr_repr; with_group } - | Some _ -> raise ImportErrors.MisplacedWindow - - let join : string -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr = - fun sep expression _ -> - let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression - and with_group = - List.find_map ~f:(fun v -> (snd v).with_group) expression - in - match with_group with - | None -> { repr = Q.Query.join sep expr_repr; with_group } - | Some _ -> raise ImportErrors.MisplacedWindow - - let boperator : - T.binary_operator -> - 'a Ast.obs * 'a repr -> - 'a Ast.obs * 'a repr -> - 'a Ast.obs -> - 'a repr = - fun name (_, e1) (_, e2) _ -> - let with_group = - match (e1.with_group, e2.with_group) with - | Some e, None -> Some e - | None, Some e -> Some e - | None, None -> None - | _ -> raise ImportErrors.MisplacedWindow - in - { repr = Q.Query.boperator name e1.repr e2.repr; with_group } - - let gequality : - T.binary_operator -> - 'a Ast.obs * 'a repr -> - ('a Ast.obs * 'a repr) list -> - 'a Ast.obs -> - 'a repr = - fun name (_, e1) group _ -> - let group_repr = List.map ~f:(fun v -> (snd v).repr) group - and with_group = List.find_map ~f:(fun v -> (snd v).with_group) group in - - match with_group with - | None -> - { - repr = Q.Query.gequality name e1.repr group_repr; - with_group = e1.with_group; - } - | _ -> raise ImportErrors.MisplacedWindow - - let funct : string -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr = - fun name expressions _ -> - let expr_repr = List.map ~f:(fun v -> (snd v).repr) expressions in - let with_group = - List.find_map ~f:(fun v -> (snd v).with_group) expressions - in - match with_group with - | None -> { repr = Q.Query.funct name expr_repr; with_group } - | Some _ -> raise ImportErrors.MisplacedWindow - - let function' : - T.funct -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr = - fun name expressions _ -> - let expr_repr = List.map ~f:(fun v -> (snd v).repr) expressions in - let with_group = - List.find_map ~f:(fun v -> (snd v).with_group) expressions - in - match with_group with - | None -> - { repr = Q.Query.funct (T.name_of_function name) expr_repr; with_group } - | Some _ -> raise ImportErrors.MisplacedWindow - - (** Window functions are not handled in the filters, we save them as an AST - in order to process them in a separated handler. - - It is not allowed to build nested window functions. *) - let window : - ('a Ast.obs * 'a repr) T.window -> - ('a Ast.obs * 'a repr) list -> - ('a Ast.obs * 'a repr) list -> - 'a Ast.obs -> - 'a repr = - fun name expressions order ast -> - ignore name; - let with_group_expr = - List.find_map ~f:(fun v -> (snd v).with_group) expressions - and with_group_order = - List.find_map ~f:(fun v -> (snd v).with_group) order - in - match (with_group_expr, with_group_order) with - | Some _, _ | _, Some _ -> raise ImportErrors.MisplacedWindow - | None, None -> - (* The column name used with the cte. The name is fixed here, and used - as is in [Analysers.Query.build_cte] and - [Analysers.Query.eval_filters] *) - let q = "cte.group0" in - { - with_group = Some ast; - repr = Q.Query.funct "expr" [ Q.Query.literal q ]; - } -end - -module ASTBuilder = - Compose.Expression - (Ast) - (struct - let v = () - end) - -module F : - Sym.SYM_EXPR - with type 'a obs = 'a result - and type 'a path_repr = Format.formatter -> 'a -> unit = - ASTBuilder.Make (Filter) - -module M = Sym.M (F) - -let query_of_expression : - type b. - b Q.binded_query -> - Format.formatter -> - (Format.formatter -> 'a -> unit) -> - 'a T.t -> - b * 'a T.t option = - fun parameter formatter printer expr -> - let repr = M.eval ~path_repr:printer expr in - match parameter with - | BindParam -> - let p = Queue.create () in - let parameter = Q.QueryParameter.Queue p in - let value = F.observe repr in - value.repr ~nested:parameter formatter; - (p, value.group) - | NoParam -> - let value = F.observe repr in - value.repr ~nested:Literal formatter; - ((), value.group) diff --git a/lib/expression/filters.mli b/lib/expression/filters.mli deleted file mode 100644 index d462b5f..0000000 --- a/lib/expression/filters.mli +++ /dev/null @@ -1,9 +0,0 @@ -module F : Sym.SYM_EXPR with type 'a path_repr = Format.formatter -> 'a -> unit -(** Query used inside the filter clauses *) - -val query_of_expression : - 'b Query.binded_query -> - Format.formatter -> - (Format.formatter -> 'a -> unit) -> - 'a T.t -> - 'b * 'a T.t option diff --git a/lib/helpers/helpers.ml b/lib/helpers/helpers.ml index 9d6fcb8..7e0f2aa 100755 --- a/lib/helpers/helpers.ml +++ b/lib/helpers/helpers.ml @@ -1,45 +1,2 @@ module Toml = Toml
module Console = Console
-
-let date_from_csv : string -> CalendarLib.Date.t option =
- fun value ->
- let open CalendarLib.Date in
- try Some (Scanf.sscanf value "%d/%d/%d" (fun d m y -> make y m d)) with
- | _ -> (
- (* If the date is a number, try from julian day *)
- match int_of_string_opt value with
- | None -> None
- | Some v -> Some (add (make 1899 12 30) (Period.day v)))
-
-let fold_opt : ('a -> 'b -> 'a option) -> 'a -> 'b -> 'a =
- fun f acc b ->
- match f acc b with
- | None -> acc
- | Some v -> v
-
-let try_opt exp =
- try Some (exp ()) with
- | _ -> None
-
-let repr_date formatter date =
- Format.fprintf formatter "%02d/%02d/%d"
- (CalendarLib.Date.day_of_month date)
- CalendarLib.Date.(int_of_month @@ month date)
- (CalendarLib.Date.year date)
-
-let s_repr_date date =
- Format.sprintf "%02d/%02d/%d"
- (CalendarLib.Date.day_of_month date)
- CalendarLib.Date.(int_of_month @@ month date)
- (CalendarLib.Date.year date)
-
-let repr_opt f channel = function
- | None -> ()
- | Some v -> f channel v
-
-let str_format f =
- let buffer = Buffer.create 16 in
- let formatter = Format.formatter_of_buffer buffer in
- f formatter;
- Format.pp_print_flush formatter ();
- Buffer.contents buffer
|