From 6b377719c10d5ab3343fd5221f99a4a21008e25a Mon Sep 17 00:00:00 2001 From: Sébastien Dailly Date: Thu, 14 Mar 2024 08:26:58 +0100 Subject: Initial commit --- lib/analysers/dependency.ml | 256 +++++++++++++++++++++++++ lib/analysers/dependency.mli | 40 ++++ lib/analysers/dune | 12 ++ lib/analysers/headers.ml | 55 ++++++ lib/analysers/headers.mli | 11 ++ lib/analysers/query.ml | 445 +++++++++++++++++++++++++++++++++++++++++++ lib/analysers/query.mli | 27 +++ 7 files changed, 846 insertions(+) create mode 100644 lib/analysers/dependency.ml create mode 100644 lib/analysers/dependency.mli create mode 100755 lib/analysers/dune create mode 100644 lib/analysers/headers.ml create mode 100644 lib/analysers/headers.mli create mode 100644 lib/analysers/query.ml create mode 100644 lib/analysers/query.mli (limited to 'lib/analysers') diff --git a/lib/analysers/dependency.ml b/lib/analysers/dependency.ml new file mode 100644 index 0000000..e81cc49 --- /dev/null +++ b/lib/analysers/dependency.ml @@ -0,0 +1,256 @@ +open StdLabels +module IntSet = ImportContainers.IntSet +module Syntax = ImportConf.Syntax +module Table = ImportDataTypes.Table +module Path = ImportDataTypes.Path +module Expression = ImportExpression.T + +(* + Internally, the dependency chain is represented as a graph. + + Each file to process (csv or xlsx) is marked as a node, The link between + each node cannot be represented in the graph because each index can imply + multiple tables together (for exemple [join(source.A, other.B)] ). So the + graph is only used in order to get the import process order, and each + information is keeped in a separate table. + +*) + +type deps = (ImportContainers.Source.t * ImportContainers.Source.t list) list + +type key = { + name : string; + expression : Path.column Expression.t; + columns : ImportContainers.IntSet.t Lazy.t; +} + +type t = { + table : Table.t; + columns : IntSet.t; + keys : key list; +} + +let table t = t.table +let columns t = t.columns +let keys t = t.keys + +type build_map = t ImportContainers.Externals.t + +(* The expression can be qualified with full path (when we are in the column + definition) or only with a column (in a binding with an externanl table). + + This type is here to extract the correct information, according to the type + we are dealing with : + + - [to_mapping] allow to pick the right Set in which we need to add the + column pointed by the path (this can be [keys] or [columns] + - [of_path] is need to extract the qualified source from any kind of path. +*) +type 'a expression_extractor = { + to_mapping : t -> Path.column -> t; + of_path : 'a -> string option * Path.column; +} + +(** [add_path_in_map f parent path ] Extract the column from element [path] and + process the column in the function [f] + + The [path] is abstract, but the function [f.of_path] can extract the needed + elements in order to add it in the mapping. + + The function may raise [Unknown_source] if the the path describe an unknown + table. *) +let add_path_in_map : + f:'a expression_extractor -> conf:Syntax.t -> 'a -> build_map -> build_map = + fun ~f ~conf path map -> + let table_source, column = f.of_path path in + let table = + try ImportConf.get_table_for_name conf table_source with + | Not_found -> raise (ImportErrors.Unknown_source (Option.get table_source)) + in + + ImportContainers.Externals.update map + ~key:(ImportContainers.KeyName.from_table table) ~f:(fun v -> + let mapping = + match v with + | None -> raise (ImportErrors.Unknown_source table.name) + | Some mapping -> mapping + in + + Some (f.to_mapping mapping column)) + +let add_expression_in_map : + f:'a expression_extractor -> + conf:Syntax.t -> + 'a Expression.t -> + build_map -> + build_map = + fun ~f ~conf expr map -> + Expression.fold_values expr ~init:map ~f:(fun map p -> + add_path_in_map ~f ~conf p map) + +let add_columns_in_map : + f:'a expression_extractor -> + conf:Syntax.t -> + 'a Expression.t list -> + build_map -> + build_map = + fun ~f ~conf columns map -> + let columns = + List.fold_left columns ~init:map ~f:(fun map expression -> + let new_map = add_expression_in_map ~f ~conf expression map in + new_map) + in + columns + +(* [add_dependancies ~conf source map path] + add the dependancy from the table [source] to another one after analysing the + expression and extracting the path contained inside. + + This function is called for each path declared inside the expression. *) +let add_dependancies : conf:Syntax.t -> Syntax.extern -> deps -> Path.t -> deps + = + fun ~conf extern graph path -> + let source_table = ImportConf.get_table_for_name conf path.Path.alias in + + let source = ImportContainers.Source.from_table source_table in + let target = ImportContainers.Source.from_table extern.target in + + match ImportContainers.Source.equal target source with + | true -> graph + | _ -> (target, [ source ]) :: graph + +let add_external_in_map : + conf:Syntax.t -> Syntax.extern -> build_map * deps -> build_map * deps = + fun ~conf extern (map, graph) -> + let dest = ImportContainers.KeyName.from_table extern.target in + (* Pre-check that every source is already declared in the configuration. *) + let _ = + Expression.fold_values extern.intern_key ~init:() ~f:(fun () path -> + try + let _ = ImportConf.get_table_for_name conf path.Path.alias in + () + with + | Not_found -> ( + match path.alias with + | Some table -> raise (ImportErrors.Unknown_source table) + | None -> + (* This is very unlikely. This error would be raised if we have + no source for this import *) + let root = conf.source in + raise (ImportErrors.Unknown_source root.name))) + in + + (* Create the new key with all the expression and all the columns inside it *) + let new_key = + { + name = extern.target.Table.name; + expression = extern.extern_key; + columns = + lazy + (Expression.fold_values extern.extern_key + ~f:(fun acc k -> ImportContainers.IntSet.add k acc) + ~init:ImportContainers.IntSet.empty); + } + in + let build_map = + ImportContainers.Externals.update map ~key:dest ~f:(function + | None -> + (* Create the entry for the external if it does not exists *) + Some + { + table = extern.target; + columns = IntSet.empty; + keys = [ new_key ]; + } + | Some mapping -> + (* Or update the existing one with the key we just created *) + Some { mapping with keys = new_key :: mapping.keys }) + in + let graph = + Expression.fold_values extern.intern_key ~init:graph + ~f:(add_dependancies ~conf extern) + in + let build_map = + add_expression_in_map extern.intern_key build_map ~conf + ~f: + { + of_path = + (fun Path.{ alias; column } -> + let table = ImportConf.get_table_for_name conf alias in + (Some table.name, column)); + to_mapping = + (fun mapping column -> + { mapping with columns = IntSet.add column mapping.columns }); + } + in + (build_map, graph) + +let mapper = + { + to_mapping = + (fun mapping column -> + { mapping with columns = IntSet.add column mapping.columns }); + of_path = (fun ({ alias; column } : Path.t) -> (alias, column)); + } + +let get_mapping : Syntax.t -> build_map * deps = + fun conf -> + let root = ImportContainers.Source.from_table (ImportConf.root_table conf) + and root' = + ImportContainers.KeyName.from_table (ImportConf.root_table conf) + in + let graph = [ (root, []) ] in + + (* For each external declared in the configuration file, add the columns to + query *) + let init = + ( ImportContainers.Externals.singleton root' + { + table = ImportConf.root_table conf; + columns = IntSet.empty; + keys = []; + }, + graph ) + in + let map, graph = + List.fold_left conf.externals ~init ~f:(fun map extern -> + add_external_in_map ~conf extern map) + in + + (* Now we don’t bother anymore with the graph and it’s dependency, we just + collect all the columns in the differents expressions *) + let map = + map + |> add_columns_in_map ~conf ~f:mapper conf.columns + |> add_columns_in_map ~conf ~f:mapper conf.sort + |> add_columns_in_map ~conf ~f:mapper conf.filters + |> add_columns_in_map ~conf ~f:mapper conf.uniq + in + (map, graph) + +let get_process_order : Syntax.t -> t list = + fun map -> + let map, graph = get_mapping map in + + match Tsort.sort graph with + | Tsort.ErrorCycle l -> + let name_of_key k = + ImportContainers.(Externals.find (Source.name k) map).table.name + in + raise (ImportErrors.Cycle (List.map ~f:name_of_key l)) + | Sorted elements -> + (* It’s OK, we know there is no cycles now, we can extract the files to + load from this list. + *) + List.filter_map elements ~f:(fun v -> + ImportContainers.Externals.find_opt + (ImportContainers.Source.name v) + map) + (* This list can still have duplicated values, and we have to remove them + still keeping the order. + *) + |> List.fold_left ~init:[] ~f:(fun acc element -> + (* Prevent the same file to beeing loaded twice *) + match List.mem element ~set:acc with + | true -> acc + | false -> element :: acc) diff --git a/lib/analysers/dependency.mli b/lib/analysers/dependency.mli new file mode 100644 index 0000000..c89522a --- /dev/null +++ b/lib/analysers/dependency.mli @@ -0,0 +1,40 @@ +type t + +val get_process_order : ImportConf.Syntax.t -> t list +(** Extract the file list to process, following the identified dependancies. + Try to load first the document which does not required another spreadsheet, + and keep going in the topological order + + Raise [Unknown_source file] if a source is referenced but is not declared. + The order matter : the exception will be raised in a source is referenced + before the declaration (when chaining differents externals) *) + +val table : t -> ImportDataTypes.Table.t +(** Get the table to proceed. *) + +val columns : t -> ImportContainers.IntSet.t +(** A set of columns loaded in this table. Thoses columns may not need + reported in the final export. + + Each column is identified by is index in the Excel document. + + This set does not include the columns used in the keys. They can be + fetched with the [keys] function *) + +type key = { + name : string; (** This is the name of the target table we are pointed to *) + expression : ImportDataTypes.Path.column ImportExpression.T.t; + (** The expression used as key *) + columns : ImportContainers.IntSet.t Lazy.t; + (** The list of columns used in the key. All the columns are referenced + in the expression. We can have many columns used inside a single + key when a function is used (for example for joining multiple + columns into a single key) *) +} +(** This type describe the join key in a table. The name is the refering table + using this key (the key name in the datable is key_"name" ), and the + expression describe how to build the key. *) + +val keys : t -> key list +(** [keys] is the list of columns pointed by another one. They are + considered as join key between the diffrent tables. *) diff --git a/lib/analysers/dune b/lib/analysers/dune new file mode 100755 index 0000000..1bbc30f --- /dev/null +++ b/lib/analysers/dune @@ -0,0 +1,12 @@ +(library + (name importAnalyser) + (libraries + importConf + importContainers + importCSV + importDataTypes + importExpression + importErrors + tsort + ) +) diff --git a/lib/analysers/headers.ml b/lib/analysers/headers.ml new file mode 100644 index 0000000..916dfee --- /dev/null +++ b/lib/analysers/headers.ml @@ -0,0 +1,55 @@ +open StdLabels +module I = ImportConf +module E = ImportExpression.T +module Syntax = ImportConf.Syntax +module Table = ImportDataTypes.Table +module Path = ImportDataTypes.Path + +module SheeetMap = Map.Make (struct + type t = Table.t + + (** We are sure we can’t have the same name for two different table. *) + let compare v1 v2 = String.compare (Table.name v1) (Table.name v2) +end) + +type content = string array + +type t = content SheeetMap.t +(** The map associate a line of headers for each table. + + The header are always in string. *) + +(** Get the headers. The function has to be called after reading each document, + and will reformat the first line with the values from the cell. The + functions will not be evaluated (instead they will be displayed "as is". + + When there is no value for this path, return empty string. + *) +let columns : Syntax.t -> t -> string list = + fun conf t -> + (* We build here a custom printer which search in the array for the column + name. + + This function will be given as argument in the expression printer. *) + let f : Path.t -> Buffer.t -> unit = + fun path b -> + let source = I.get_table_for_name conf path.alias in + + match SheeetMap.find_opt source t with + | None -> () + | Some arr -> ( + try Buffer.add_string b (Array.get arr (path.column - 1)) with + | _ -> + prerr_endline + @@ Printf.sprintf "No header found for :%s.%s" + (Option.value ~default:(I.root_table conf).Table.name + path.alias) + (ImportCSV.Csv.column_to_string path.column)) + in + + List.map conf.Syntax.columns ~f:(fun column -> + let b = Buffer.create 4 in + + ImportExpression.Headers.headers_of_expression b f column; + + Buffer.contents b) diff --git a/lib/analysers/headers.mli b/lib/analysers/headers.mli new file mode 100644 index 0000000..03e384b --- /dev/null +++ b/lib/analysers/headers.mli @@ -0,0 +1,11 @@ +module SheeetMap : Map.S with type key = ImportDataTypes.Table.t + +type t = string array SheeetMap.t + +val columns : ImportConf.Syntax.t -> t -> string list +(** Get the headers. The function has to be called after reading each document, + and will reformat the first line with the values from the cell. The + functions will not be evaluated (instead they will be displayed "as is". + + When there is no value for this path, return empty string. + *) diff --git a/lib/analysers/query.ml b/lib/analysers/query.ml new file mode 100644 index 0000000..7a6dd2a --- /dev/null +++ b/lib/analysers/query.ml @@ -0,0 +1,445 @@ +open StdLabels +module Expression = ImportExpression +module Q = Expression.Query +module Syntax = ImportConf.Syntax +module Table = ImportConf.Table +module Path = ImportConf.Path + +let truncate buffer n = Buffer.truncate buffer (Buffer.length buffer - n) + +(** The module allow to create fragment in the query which keep together the + binderd parameters and the text of the query.contents. + + This is used a lot in order to create the CTE, where you need the create + fragment used both in the main request and partially in the CTE itself. + + The content is mutable and all the functions are returning [unit]. *) +module Chunk = struct + type t = { + b : Buffer.t; + parameters : ImportCSV.DataType.t Queue.t; + } + + let create : unit -> t = + fun () -> { b = Buffer.create 16; parameters = Queue.create () } + + let create' : Buffer.t -> ImportCSV.DataType.t Queue.t -> t = + fun b parameters -> { b; parameters } + + (* Append the element from [tail] at the end of [head] + + Tail is destroyed during the operation. + *) + let append : head:t -> tail:t -> unit = + fun ~head ~tail -> + match Buffer.length tail.b with + | 0 -> () + | _ -> + Buffer.add_buffer head.b tail.b; + Queue.transfer tail.parameters head.parameters; + () + + (** Add a litteral string in the sequence *) + let add_string : t -> string -> unit = fun t v -> Buffer.add_string t.b v + + let copy : t -> t = + fun t -> + let b = Buffer.create 16 and parameters = Queue.copy t.parameters in + Buffer.add_buffer b t.b; + { b; parameters } + + let add_parameters : t -> ImportCSV.DataType.t Seq.t -> unit = + fun t p -> Queue.add_seq t.parameters p +end + +let prepare_key : f:(Format.formatter -> unit) -> Format.formatter -> unit = + fun ~f formatter -> Format.fprintf formatter "rtrim(upper(%t))" f + +(* Collect all the tables pointed by the expression. *) +let pointed_tables : Syntax.t -> 'a Expression.T.t -> (Table.t * string) list = + fun conf expression -> + Expression.T.fold_values expression ~init:[] ~f:(fun acc path -> + let table = ImportConf.get_table_for_name conf path.Path.alias in + let table_name = Table.name table in + (table, table_name) :: acc) + |> List.sort_uniq ~cmp:Stdlib.compare + +(** Represent a column in a safe way in a query *) +let print_column : Table.t -> string -> string = + fun table column -> + String.concat ~sep:"" [ "'"; table.Table.name; "'.'"; column; "'" ] + +let create_table : Dependency.t -> string = + fun mapping -> + let b = Buffer.create 64 in + + Buffer.add_string b "CREATE TABLE '"; + Buffer.add_string b (Table.name (Dependency.table mapping)); + Buffer.add_string b "' (id INTEGER PRIMARY KEY"; + + List.iter (Dependency.keys mapping) ~f:(fun { Dependency.name; _ } -> + Buffer.add_string b ",'key_"; + Buffer.add_string b name; + Buffer.add_string b "'"); + + ImportContainers.IntSet.iter (Dependency.columns mapping) ~f:(fun i -> + Buffer.add_string b ",'col_"; + Buffer.add_string b (string_of_int i); + Buffer.add_string b "'"); + Buffer.add_string b ")"; + + Buffer.contents b + +let show_path : conf:Syntax.t -> Format.formatter -> Path.t -> unit = + fun ~conf buffer { alias; column } -> + let table = ImportConf.get_table_for_name conf alias in + let table_name = table.Table.name in + Format.fprintf buffer "'%s'.col_%d" table_name column + +(** Extract the informations from the dependancies. We get two informations here : + + - the join query in order to load the data from the external column + - the column corresponding to the key in order to identify the missing + links later. + *) +let query_of_external : + conf:Syntax.t -> join_buffer:Chunk.t -> Syntax.extern -> unit = + fun ~conf ~join_buffer external_ -> + let extern_table = Table.name external_.target in + + let formatter = Format.formatter_of_buffer join_buffer.b in + Format.fprintf formatter "\nLEFT JOIN '%s' AS '%s' ON %t = %s" extern_table + external_.target.name + (prepare_key ~f:(fun f -> + let q = + Q.query_of_expression Q.BindParam f (show_path ~conf) + external_.intern_key + in + + Chunk.add_parameters join_buffer (Queue.to_seq q))) + (print_column external_.Syntax.target + ("key_" ^ external_.Syntax.target.name)); + + Format.pp_print_flush formatter () + +(** Create the from part of the query, adding all the reuired externals. + + SQLite is able to optimize the query and do not load the table not used in + the select clause. *) +let create_from_chunck : Syntax.t -> Chunk.t -> unit = + fun conf c -> + Chunk.add_string c "\nFROM '"; + Chunk.add_string c (Table.name conf.source); + Chunk.add_string c "' AS '"; + Chunk.add_string c conf.source.name; + Chunk.add_string c "'"; + + (* Add the externals in the query *) + List.iter conf.externals ~f:(query_of_external ~conf ~join_buffer:c) + +(** Build a CTE query in order to use any group function inside the query. + Return the binded parameters used in the expression. The buffer given in + argument is also modified during the construction. + + If filters is not None, the clauses are added to the CTE. *) +let build_cte : + Syntax.t -> + expression:'a Expression.T.t -> + filters:Chunk.t option -> + Chunk.t = + fun conf ~expression ~filters -> + (* The binded parameters queue will be used later in the full query *) + let cte_chunk = Chunk.create () in + + Chunk.add_string cte_chunk "WITH cte AS (SELECT "; + Chunk.add_string cte_chunk conf.source.name; + Chunk.add_string cte_chunk ".id, "; + + let formatter = Format.formatter_of_buffer cte_chunk.b in + + let p = + Q.query_of_expression Q.BindParam formatter (show_path ~conf) expression + in + Format.pp_print_flush formatter (); + Chunk.add_parameters cte_chunk (Queue.to_seq p); + (* The name is hardcoded here, and used in [Expression.Filters.window] *) + Chunk.add_string cte_chunk " AS group0"; + + let () = create_from_chunck conf cte_chunk in + let () = + match filters with + | None -> () + | Some filters_chunk -> + Chunk.append ~head:cte_chunk ~tail:(Chunk.copy filters_chunk) + in + + Chunk.add_string cte_chunk ")\n"; + cte_chunk + +type filter_evaluation = { + content : Buffer.t; + parameters : ImportCSV.DataType.t Seq.t; + cte : (string * Chunk.t) option; +} +(** Build the filters to apply in the query. We make the difference here + between the predicates to apply directly in the query, and the filters + associated with a group, which are required to be transformed into a CTE +in SQL, and are evaluated before. *) + +(** Evaluate the filters on the query *) +let eval_filters : Syntax.t -> filter_evaluation = + fun conf -> + match conf.filters with + | [] -> + let empty_buffer = Buffer.create 0 in + { content = empty_buffer; parameters = Seq.empty; cte = None } + | filters -> ( + (* Create a new queue in order to accumulate all the parameters to bind. + This filter will be given to both the CTE if any, or reused in the + main query when there is no CTE. + *) + let chunk_filters = Chunk.create () in + Chunk.add_string chunk_filters "\nWHERE "; + + let group = Chunk.create () in + + let with_cte, with_exr = + List.fold_left filters ~init:(None, false) + ~f:(fun (with_cte, with_exr) column -> + (* The function will return an option in second position which is + None when no Group function where found, and Some Expression + otherwise *) + let b = Buffer.create 16 in + + let formatter = Format.formatter_of_buffer b in + let queue, group_found = + Expression.Filters.query_of_expression Q.BindParam formatter + (show_path ~conf) column + in + Format.pp_print_flush formatter (); + let clause = Chunk.create' b queue in + + match (group_found, with_cte) with + | None, _ -> + Chunk.append ~head:chunk_filters ~tail:clause; + Chunk.add_string chunk_filters "\nAND "; + (with_cte, true) + | (Some _ as group'), None -> + (* We have a group here, we do not add it into the + filter_buffer right now. + + This can occur only once, the second one will raise + an error. *) + Chunk.append ~head:group ~tail:clause; + (group', with_exr) + | Some _, Some _ -> raise ImportErrors.MisplacedWindow) + in + + match with_cte with + | None -> + let content = chunk_filters.b in + truncate content 5; + { + (* There is no group clause in the query *) + content; + parameters = Queue.to_seq chunk_filters.parameters; + cte = None; + } + | Some expression -> + let filters = + if with_exr then ( + (* If we have additionnals filters from the group clause, we + have to report them in the CTE instead of the main query. *) + let c' = Chunk.copy chunk_filters in + truncate c'.b 5; + Some c') + else None + in + + (* Create the common expression table *) + let cte_parameters = build_cte conf ~expression ~filters in + Chunk.append ~head:chunk_filters ~tail:group; + + { + content = chunk_filters.b; + parameters = Queue.to_seq chunk_filters.parameters; + (* The name is hardcoded here, and used in + [Expression.Filters.window] *) + cte = Some ("cte", cte_parameters); + }) + +type query = { + q : string; + parameters : ImportCSV.DataType.t Seq.t; +} + +(** Build the query and return also the mapping in order to identify each + external links between files. + + The select query will name each column with an alias, and the map allow to + find which source is pointed by this alias. *) +let select : Syntax.t -> query * Path.t ImportExpression.T.t array = + fun conf -> + (* If the filters contains a group expression, we need to transform this into + a CTE, which have to be evaluated before the main query. That’s why we are + evaluating the filters right now.*) + let filters = eval_filters conf in + let b = Buffer.create 256 in + let parameters = Queue.create () in + + Option.iter + (fun (_, (cte : Chunk.t)) -> + Buffer.add_buffer b cte.b; + Queue.add_seq parameters (Queue.to_seq cte.parameters)) + filters.cte; + + (* For each column in the configuration file, add the corresponding element + in the query. + + The Sqlite driver return the elements in an array, we create an array to + in order to manage the elements together. + *) + let headers = Array.make (List.length conf.columns) (Obj.magic None) in + + let columns = List.to_seq conf.columns |> Seq.mapi (fun i c -> (i, c)) in + let formatter = Format.formatter_of_buffer b in + let () = + Format.fprintf formatter "SELECT %a" + (Format.pp_print_seq + ~pp_sep:(fun f () -> Format.fprintf f ",\n") + (fun formatter (i, column) -> + Array.set headers i column; + let p = + Q.query_of_expression Q.BindParam formatter (show_path ~conf) + column + in + Queue.transfer p parameters; + Format.fprintf formatter " AS result_%d" i)) + columns + in + Format.pp_print_flush formatter (); + + let () = create_from_chunck conf (Chunk.create' b parameters) in + + (* If the query has a CTE, link it as well. We use an INNER JOIN here because + we want to be sure to get all the rows fetched by the CTE + *) + let () = + match filters.cte with + | None -> () + | Some (name, _) -> + Buffer.add_string b "\nINNER JOIN '"; + Buffer.add_string b name; + Buffer.add_string b "' ON "; + Buffer.add_string b name; + Buffer.add_string b ".id = "; + Buffer.add_string b conf.source.name; + Buffer.add_string b ".id" + in + + Buffer.add_buffer b filters.content; + Queue.add_seq parameters filters.parameters; + + let formatter = Format.formatter_of_buffer b in + (match conf.Syntax.uniq with + | [] -> () + | uniq -> + Format.fprintf formatter "\nGROUP BY %a" + (Format.pp_print_list + ~pp_sep:(fun f () -> Format.fprintf f ", ") + (fun formatter column -> + let seq = + Q.query_of_expression Q.BindParam formatter (show_path ~conf) + column + in + Queue.transfer seq parameters)) + uniq); + (match conf.Syntax.sort with + | [] -> () + | sort -> + Format.fprintf formatter "\nORDER BY %a" + (Format.pp_print_list + ~pp_sep:(fun f () -> Format.fprintf f ", ") + (fun formatter column -> + let seq = + Q.query_of_expression Q.BindParam formatter (show_path ~conf) + column + in + Queue.transfer seq parameters)) + sort); + Format.pp_print_flush formatter (); + + ({ q = Buffer.contents b; parameters = Queue.to_seq parameters }, headers) + +let check_external : Syntax.t -> Syntax.extern -> query = + fun conf external_ -> + let extern_table = Table.name external_.target in + + let parameters = Queue.create () in + let internal_key_buffer = Buffer.create 16 in + let formatter = Format.formatter_of_buffer internal_key_buffer in + let internal_key_seq = + Q.query_of_expression Q.BindParam formatter (show_path ~conf) + external_.intern_key + in + Format.pp_print_flush formatter (); + + let external_key_buffer = Buffer.create 16 in + let pointed_tables = pointed_tables conf external_.intern_key in + Buffer.add_string external_key_buffer + (print_column external_.Syntax.target + ("key_" ^ external_.Syntax.target.name)); + + (* We do a copy before the transfert because the Queue is reused later in the + query *) + Queue.transfer (Queue.copy internal_key_seq) parameters; + + let join_content = Buffer.contents external_key_buffer in + let inner_content = Buffer.contents internal_key_buffer in + let b = Buffer.create 256 in + let formatter = Format.formatter_of_buffer b in + let () = + Format.fprintf formatter + "SELECT %a%s FROM%a LEFT JOIN '%s' AS '%s' ON %t = %s WHERE %s IS NULL \ + AND %s IS NOT NULL AND %s <> ''" + (fun formatter -> function + | [ (table, _name) ] -> + Format.fprintf formatter "%s, " (print_column table "id") + | _ -> Format.fprintf formatter "-1, ") + pointed_tables (* *) + inner_content (* *) + (Format.pp_print_list + ~pp_sep:(fun f () -> Format.pp_print_text f ", ") + (fun formatter (table, name) -> + Format.fprintf formatter "\n'%s' AS '%s'" name table.Table.name)) + pointed_tables (* *) + extern_table (* *) + external_.target.name + (prepare_key ~f:(fun b -> + Format.pp_print_text b (Buffer.contents internal_key_buffer))) + join_content (* *) + join_content (* *) + inner_content (* *) + inner_content + in + + Format.pp_print_flush formatter (); + + { q = Buffer.contents b; parameters = Queue.to_seq parameters } + +let build_key_insert : Buffer.t -> Dependency.key -> unit = + fun buffer { Dependency.expression; _ } -> + let show_column : Format.formatter -> Path.column -> unit = + fun formatter column -> Format.fprintf formatter ":col_%d" column + in + + let formatter = Format.formatter_of_buffer buffer in + + let () = + prepare_key formatter ~f:(fun formatter -> + Q.query_of_expression Q.NoParam formatter show_column expression) + in + + Format.pp_print_flush formatter (); + + () diff --git a/lib/analysers/query.mli b/lib/analysers/query.mli new file mode 100644 index 0000000..14d2807 --- /dev/null +++ b/lib/analysers/query.mli @@ -0,0 +1,27 @@ +module Syntax = ImportConf.Syntax + +val create_table : Dependency.t -> string + +type query = { + q : string; (** The query to execute *) + parameters : ImportCSV.DataType.t Seq.t; +} +(** This type represent a query to execute. + [q] is the template to run, and shall be run with all the binded parameters. + *) + +val select : + Syntax.t -> query * ImportDataTypes.Path.t ImportExpression.T.t array + +val check_external : Syntax.t -> Syntax.extern -> query +(** Create a query which select all the missing key in an external *) + +val build_key_insert : Buffer.t -> Dependency.key -> unit +(* Build the fragment insert query. + + This use prepare statement and apply the function at the insert time, in + order to have the key already in the expected format in the database. + + The column are name :col_XX where XX is the index of the column in the + datasheet (starting from 1) +*) -- cgit v1.2.3