diff options
33 files changed, 1208 insertions, 455 deletions
diff --git a/.ocamlformat b/.ocamlformat index 72fc0fd..eb55759 100644 --- a/.ocamlformat +++ b/.ocamlformat @@ -1,3 +1,4 @@ +ocaml-version = 5.3
profile = default
parens-tuple = always
sequence-style = terminator
diff --git a/bin/importer.ml b/bin/importer.ml index 0da2ab7..260d83b 100644 --- a/bin/importer.ml +++ b/bin/importer.ml @@ -33,6 +33,7 @@ module Args = struct let load_conf : string -> ImporterSyntax.t = fun file -> + let dirname = Filename.dirname file in match Filename.extension file with | _ -> ( let (conf : (ImporterSyntax.t, string) result) = @@ -51,7 +52,16 @@ module Args = struct Error error_msg in - ImportConf.t_of_toml configuration_file + + ImportConf.t_of_toml + ~context: + { + checkFile = + (fun f -> Sys.file_exists (Filename.concat dirname f)); + loadFile = + (fun f -> Otoml.Parser.from_file (Filename.concat dirname f)); + } + configuration_file in match conf with | Error e -> @@ -238,14 +248,6 @@ let () = let sqlfile = Filename.concat dirname (prefix ^ ".sqlite") in let conf = { conf with mapping_date = creation_date sqlfile } in - (* Ensure that all the files exists *) - List.iter process_order ~f:(fun (mapping : Analyse.t) -> - let source = Analyse.table mapping in - (* First, check *) - if not (Sys.file_exists source.Table.file) then begin - ignore @@ exists @@ Filename.concat dirname source.Table.file - end); - (* The configuration is loaded and valid, we create the errors log file *) let log_error = ImportErrors.log ~with_bom:conf.bom prefix dirname in diff --git a/examples/checksum b/examples/checksum index b223bc4..c47c0ba 100644 --- a/examples/checksum +++ b/examples/checksum @@ -1,3 +1,3 @@ -e158edc600b314e5451cbfbb42fa0a6c importer.csv -e85080e97cd3f1615069232c22e9b9cc example_csv.csv -d646adc7d22212908d7c12a88a19ea4b importer_groupe.csv +e158edc600b314e5451cbfbb42fa0a6c examples/importer.csv +d646adc7d22212908d7c12a88a19ea4b examples/importer_groupe.csv +614c3d20d6c19757fbb76a646809c2e1 examples/externals_filters.csv diff --git a/examples/dataset.toml b/examples/dataset.toml new file mode 100644 index 0000000..dd72cd2 --- /dev/null +++ b/examples/dataset.toml @@ -0,0 +1,2 @@ +[files] + source = "financial.xlsx" diff --git a/examples/externals_filters.toml b/examples/externals_filters.toml new file mode 100644 index 0000000..78e6603 --- /dev/null +++ b/examples/externals_filters.toml @@ -0,0 +1,22 @@ +dataset = "dataset.toml" + +[source] + name = "source" + +# This show how the filter in external works. +# Only the lines with Column C = 'France' will be +# matched. +[externals.source-target] + intern_key = ":source.A" + extern_key = ":A" + filters = [ + ":C = 'France'" + ] + +[sheet] + columns = [ + ":A", + ":C", + ":source-target.A", + ":source-target.C", + ] diff --git a/examples/importer.toml b/examples/importer.toml index 08e9e25..a8ee199 100644 --- a/examples/importer.toml +++ b/examples/importer.toml @@ -1,23 +1,25 @@ +dataset = "dataset.toml" + [source] - file = "financial.xlsx" name = "source" + # The file is looked up in the dataset -[externals.target] +[externals.source-target] intern_key = ":source.A ^ '-suffix'" extern_key = ":A ^ '-suffix'" - file = "financial.xlsx" allow_missing = false + # The file is looked up in the dataset [externals.a_financial] - intern_key = ":target.A" - extern_key = ":O" # This key is here to generate errors + intern_key = ":source-target.A" + extern_key = ":O" file = "financial.xlsx" allow_missing = false [sheet] columns = [ - ":target.A ^ '\\''", # Ensure the quote is escaped before sending to the sql engine - "join('-', :A, :target.E, :B)", + ":source-target.A ^ '\\''", # Ensure the quote is escaped before sending to the sql engine + "join('-', :A, :source-target.E, :B)", ":C", "counter([:C], [:A])", "sum(:F, [:B, :C, :D], [:B])", @@ -40,4 +42,3 @@ ] sort = [] - uniq = [] diff --git a/examples/importer_groupe.toml b/examples/importer_groupe.toml index eb2f7e6..ea2c7de 100644 --- a/examples/importer_groupe.toml +++ b/examples/importer_groupe.toml @@ -1,15 +1,12 @@ -# Cet exemple permet de montrer la combinaison de fonction de groupe avec des -# filtres. +# This file show how the groups can be used in filters. +# +# We search for the greatest value of units sold in France. In order to get the +# expected result, we need to filter first the country in France before +# looking for the greatest value. -# On recherche ici le plus grand nombre d’unitées vendues en France. -# Pour que le résultat soit conforme à l’attendu, il faut que l’application -# commence par filtrer les lignes qui concernent la France, avant de chercher -# la ligne contenant le plus grand nombre d’unités. - -version = 1 +dataset = "dataset.toml" [source] - file = "financial.xlsx" name = "source" [sheet] diff --git a/lib/analysers/chunk.ml b/lib/analysers/chunk.ml index cefa6d8..b09f311 100644 --- a/lib/analysers/chunk.ml +++ b/lib/analysers/chunk.ml @@ -43,9 +43,24 @@ let add_parameters : t -> ImportDataTypes.Value.t Seq.t -> unit = fun t p -> Queue.add_seq t.parameters p module Table = ImportDataTypes.Table -module Q = ImportExpression.Query open StdLabels +let add_expression : + repr:(Format.formatter -> 'a -> unit) -> + t -> + 'a ImportExpression.T.t -> + unit = + fun ~repr group expression -> + let formatter = Format.formatter_of_buffer group.b in + Format.pp_print_char formatter '('; + let queue = + ImportExpression.Query.query_of_expression ImportExpression.Query.BindParam + formatter repr expression + in + Format.pp_print_char formatter ')'; + Format.pp_print_flush formatter (); + add_parameters group (Queue.to_seq queue) + (** Extract the informations from the dependancies. We get two informations here : @@ -57,20 +72,34 @@ let join_external : fun ~conf ~join_buffer external_ -> let extern_table = Table.name external_.target in - let formatter = Format.formatter_of_buffer join_buffer.b in - Format.fprintf formatter "\nLEFT JOIN '%s' AS '%s' ON %t = %s" extern_table - external_.target.name - (Printers.prepare_key ~f:(fun f -> - let q = - Q.query_of_expression Q.BindParam f (Printers.path ~conf) - external_.intern_key - in - - add_parameters join_buffer (Queue.to_seq q))) - (Table.print_column external_.ImporterSyntax.Extern.target - ("key_" ^ external_.ImporterSyntax.Extern.target.name)); - - Format.pp_print_flush formatter () + add_string join_buffer "\nLEFT JOIN '"; + add_string join_buffer extern_table; + add_string join_buffer "' AS '"; + add_string join_buffer external_.target.name; + add_string join_buffer "' ON "; + add_string join_buffer + (Format.asprintf "%t = %s" + (Printers.prepare_key ~f:(fun f -> + let q = + ImportExpression.Query.query_of_expression + ImportExpression.Query.BindParam f (Printers.path ~conf) + external_.intern_key + in + + add_parameters join_buffer (Queue.to_seq q))) + (Table.print_column external_.ImporterSyntax.Extern.target + ("key_" ^ external_.ImporterSyntax.Extern.target.name))); + + (* Add the filters given for this external in the query *) + let table = external_.ImporterSyntax.Extern.target + and filters = external_.ImporterSyntax.Extern.filters in + List.iter filters ~f:(fun f -> + add_string join_buffer " AND "; + add_expression + ~repr:(fun formatter column -> + Format.fprintf formatter "%s" + (Table.print_column table ("col_" ^ string_of_int column))) + join_buffer f) (** Create the from part of the query, adding all the required externals (even when not required) @@ -90,16 +119,26 @@ let create_from_statement_of_chunck : (* Add the externals in the query *) List.iter externals ~f:(join_external ~conf ~join_buffer:c) -let add_expression : - conf:ImporterSyntax.t -> +(** Add a list of expressions into the group *) +let add_expressions : + repr:(Format.formatter -> 'a -> unit) -> + sep:string -> t -> - ImportDataTypes.Path.t ImportExpression.T.t -> + 'a ImportExpression.T.t list -> unit = - fun ~conf group expression -> + fun ~repr ~sep group exppressions -> let formatter = Format.formatter_of_buffer group.b in - let queue = - ImportExpression.Query.query_of_expression ImportExpression.Query.BindParam - formatter (Printers.path ~conf) expression + let () = + Format.pp_print_list + ~pp_sep:(fun f () -> Format.pp_print_string f sep) + (fun formatter column -> + Format.pp_print_char formatter '('; + let seq = + ImportExpression.Query.query_of_expression + ImportExpression.Query.BindParam formatter repr column + in + Format.pp_print_char formatter ')'; + Queue.transfer seq group.parameters) + formatter exppressions in - Format.pp_print_flush formatter (); - add_parameters group (Queue.to_seq queue) + Format.pp_print_flush formatter () diff --git a/lib/analysers/chunk.mli b/lib/analysers/chunk.mli index 13a748a..ad9ca00 100644 --- a/lib/analysers/chunk.mli +++ b/lib/analysers/chunk.mli @@ -27,8 +27,13 @@ val create_from_statement_of_chunck : the select clause. *) val add_expression : - conf:ImporterSyntax.t -> + repr:(Format.formatter -> 'a -> unit) -> t -> 'a ImportExpression.T.t -> unit +(** Add an expression into an existing chunck *) + +val add_expressions : + repr:(Format.formatter -> 'a -> unit) -> + sep:string -> t -> - ImportDataTypes.Path.t ImportExpression.T.t -> + 'a ImportExpression.T.t list -> unit -(** Add an expression into an existing chunck *) +(** Add a list of expressions into an existing chunk *) diff --git a/lib/analysers/dependency.ml b/lib/analysers/dependency.ml index 38bc23c..8c969fe 100644 --- a/lib/analysers/dependency.ml +++ b/lib/analysers/dependency.ml @@ -1,7 +1,5 @@ open StdLabels module IntSet = ImportContainers.IntSet -module Table = ImportDataTypes.Table -module Path = ImportDataTypes.Path module Expression = ImportExpression.T (* @@ -19,13 +17,14 @@ type deps = (ImportContainers.Source.t * ImportContainers.Source.t list) list type key = { name : string; - expression : Path.column Expression.t; + expression : ImportDataTypes.Path.column Expression.t; columns : ImportContainers.IntSet.t Lazy.t; + filters : ImportDataTypes.Path.column ImportExpression.T.t list; } [@@deriving show, eq] type t = { - table : Table.t; + table : ImportDataTypes.Table.t; columns : IntSet.t; keys : key list; } @@ -47,8 +46,8 @@ type build_map = t ImportContainers.Externals.t - [of_path] is need to extract the qualified source from any kind of path. *) type 'a expression_extractor = { - to_mapping : t -> Path.column -> t; - of_path : 'a -> string option * Path.column; + to_mapping : t -> ImportDataTypes.Path.column -> t; + of_path : 'a -> string option * ImportDataTypes.Path.column; } (** [add_path_in_map f parent path ] Extract the column from element [path] and @@ -112,9 +111,15 @@ let add_columns_in_map : This function is called for each path declared inside the expression. *) let add_dependancies : - conf:ImporterSyntax.t -> ImporterSyntax.Extern.t -> deps -> Path.t -> deps = + conf:ImporterSyntax.t -> + ImporterSyntax.Extern.t -> + deps -> + ImportDataTypes.Path.t -> + deps = fun ~conf extern graph path -> - let source_table = ImporterSyntax.get_table_for_name conf path.Path.alias in + let source_table = + ImporterSyntax.get_table_for_name conf path.ImportDataTypes.Path.alias + in let source = ImportContainers.Source.from_table source_table in let target = ImportContainers.Source.from_table extern.target in @@ -134,7 +139,10 @@ let add_external_in_map : let _ = Expression.fold_values extern.intern_key ~init:() ~f:(fun () path -> try - let _ = ImporterSyntax.get_table_for_name conf path.Path.alias in + let _ = + ImporterSyntax.get_table_for_name conf + path.ImportDataTypes.Path.alias + in () with | Not_found -> ( @@ -147,16 +155,19 @@ let add_external_in_map : raise (ImportErrors.Unknown_source root.name))) in + let columns () = + let f = fun acc k -> ImportContainers.IntSet.add k acc in + Expression.fold_values extern.extern_key ~f + ~init:ImportContainers.IntSet.empty + in + (* Create the new key with all the expression and all the columns inside it *) let new_key = { - name = extern.target.Table.name; + name = extern.target.ImportDataTypes.Table.name; expression = extern.extern_key; - columns = - lazy - (Expression.fold_values extern.extern_key - ~f:(fun acc k -> ImportContainers.IntSet.add k acc) - ~init:ImportContainers.IntSet.empty); + columns = Lazy.from_fun columns; + filters = extern.filters; } in let build_map = @@ -182,7 +193,7 @@ let add_external_in_map : ~f: { of_path = - (fun Path.{ alias; column } -> + (fun ImportDataTypes.Path.{ alias; column } -> let table = ImporterSyntax.get_table_for_name conf alias in (Some table.name, column)); to_mapping = @@ -197,7 +208,8 @@ let mapper = to_mapping = (fun mapping column -> { mapping with columns = IntSet.add column mapping.columns }); - of_path = (fun ({ alias; column } : Path.t) -> (alias, column)); + of_path = + (fun ({ alias; column } : ImportDataTypes.Path.t) -> (alias, column)); } let get_mapping : ImporterSyntax.t -> build_map * deps = @@ -221,7 +233,24 @@ let get_mapping : ImporterSyntax.t -> build_map * deps = in let map, graph = List.fold_left conf.externals ~init ~f:(fun map extern -> - add_external_in_map ~conf extern map) + let map, graph = add_external_in_map ~conf extern map in + + (* Also add the filters in the externals. The column are not defined as + a full path, with table and column, but only with a column. We need + to transform them to make them present as the same as the others *) + let table = + ImporterSyntax.get_table_for_name conf (Some extern.target.name) + in + let path_filters = + List.map extern.filters ~f:(fun expression -> + Expression.map + ~f:(fun column -> + ImportDataTypes.Path.{ alias = Some table.name; column }) + expression) + in + let map = add_columns_in_map ~conf ~f:mapper path_filters map in + + (map, graph)) in (* Now we don’t bother anymore with the graph and it’s dependency, we just @@ -233,6 +262,7 @@ let get_mapping : ImporterSyntax.t -> build_map * deps = |> add_columns_in_map ~conf ~f:mapper conf.filters |> add_columns_in_map ~conf ~f:mapper conf.uniq in + (map, graph) let get_process_order : ImporterSyntax.t -> t list = diff --git a/lib/analysers/dependency.mli b/lib/analysers/dependency.mli index 522436c..1eb55c5 100644 --- a/lib/analysers/dependency.mli +++ b/lib/analysers/dependency.mli @@ -29,7 +29,10 @@ type key = { (** The list of columns used in the key. All the columns are referenced in the expression. We can have many columns used inside a single key when a function is used (for example for joining multiple columns into a - single key) *) + single key). + + The columns used in the filter are also declared as well. *) + filters : ImportDataTypes.Path.column ImportExpression.T.t list; } [@@deriving show, eq] (** This type describe the join key in a table. The name is the refering table diff --git a/lib/analysers/filters.ml b/lib/analysers/filters.ml index 7044798..15e8cda 100644 --- a/lib/analysers/filters.ml +++ b/lib/analysers/filters.ml @@ -4,17 +4,6 @@ module Path = ImportDataTypes.Path module Expression = ImportExpression open StdLabels -(** Add a list of expressions into the group *) -let rec add_filters : - conf:ImporterSyntax.t -> Chunk.t -> Path.t Expression.T.t list -> unit = - fun ~conf group -> function - | [] -> () - | hd :: [] -> Chunk.add_expression ~conf group hd - | hd :: tl -> - Chunk.add_expression ~conf group hd; - Chunk.add_string group "\nAND "; - add_filters ~conf group tl - type 'a cte_acc = { n : int; has_previous : bool; @@ -57,7 +46,7 @@ let print : Chunk.add_string query "SELECT "; Chunk.add_string query conf.source.name; Chunk.add_string query ".id, "; - Chunk.add_expression ~conf query expression; + Chunk.add_expression ~repr:(Printers.path ~conf) query expression; Chunk.add_string query " AS group_function"; Chunk.create_from_statement_of_chunck conf query; @@ -75,7 +64,8 @@ let print : | [] -> () | _ -> Chunk.add_string query " WHERE "; - add_filters ~conf query cte.ImporterSyntax.CTE.filters + Chunk.add_expressions ~sep:"\nAND " ~repr:(Printers.path ~conf) + query cte.ImporterSyntax.CTE.filters end; Chunk.add_string query ")\n"; Some acc.n @@ -83,7 +73,8 @@ let print : (* Do not add the filters in the CTE (we don’t have any) but in the main query *) Chunk.add_string predicates "WHERE "; - add_filters ~conf predicates cte.ImporterSyntax.CTE.filters; + Chunk.add_expressions ~sep:"\nAND " ~repr:(Printers.path ~conf) + predicates cte.ImporterSyntax.CTE.filters; acc.cte_index in { diff --git a/lib/analysers/query.ml b/lib/analysers/query.ml index f89f5f0..dac4d89 100644 --- a/lib/analysers/query.ml +++ b/lib/analysers/query.ml @@ -135,41 +135,24 @@ let select : ImporterSyntax.t -> query * Path.t ImportExpression.T.t array = let () = Chunk.create_from_statement_of_chunck conf request_header in Chunk.append ~head:request_header ~tail:filters; - let formatter = Format.formatter_of_buffer b in (match conf.ImporterSyntax.uniq with | [] -> () | uniq -> - Format.fprintf formatter "\nGROUP BY %a" - (Format.pp_print_list - ~pp_sep:(fun f () -> Format.fprintf f ", ") - (fun formatter column -> - let seq = - Q.query_of_expression Q.BindParam formatter (Printers.path ~conf) - column - in - Queue.transfer seq parameters)) + Chunk.add_string request_header "\nGROUP BY "; + Chunk.add_expressions ~repr:(Printers.path ~conf) ~sep:", " request_header uniq); (match conf.ImporterSyntax.sort with | [] -> () | sort -> - Format.fprintf formatter "\nORDER BY %a" - (Format.pp_print_list - ~pp_sep:(fun f () -> Format.fprintf f ", ") - (fun formatter column -> - let seq = - Q.query_of_expression Q.BindParam formatter (Printers.path ~conf) - column - in - Queue.transfer seq parameters)) + Chunk.add_string request_header "\nORDER BY "; + Chunk.add_expressions ~repr:(Printers.path ~conf) ~sep:", " request_header sort); - Format.pp_print_flush formatter (); - ({ q = Buffer.contents b; parameters = Queue.to_seq parameters }, headers) let check_external : ImporterSyntax.t -> ImporterSyntax.Extern.t -> query = fun conf external_ -> let internal_chunk = Chunk.create () in - Chunk.add_expression ~conf internal_chunk + Chunk.add_expression ~repr:(Printers.path ~conf) internal_chunk external_.ImporterSyntax.Extern.intern_key; let external_key_buffer = Buffer.create 16 in @@ -180,6 +163,7 @@ let check_external : ImporterSyntax.t -> ImporterSyntax.Extern.t -> query = let pointed_tables = pointed_tables conf external_.intern_key in let parameters = Queue.create () in + (* We do a copy before the transfert because the Queue is reused later in the query *) Queue.transfer (Queue.copy internal_chunk.parameters) parameters; @@ -195,44 +179,47 @@ let check_external : ImporterSyntax.t -> ImporterSyntax.Extern.t -> query = ImporterSyntax.Extern.t list -> ImporterSyntax.Extern.t list = fun table init -> - let res = - (* Do not add the same external if the value is already present *) - let init = - match List.find_opt init ~f:(fun ext -> table == ext) with - | None -> table :: init - | Some _ -> init - in - - Expression.T.fold_values ~init table.ImporterSyntax.Extern.intern_key - ~f:(fun acc expr -> - match expr.Path.alias with - | None -> acc - | Some _ as path -> ( - let table = ImporterSyntax.get_table_for_name conf path in - (* Look for this table in the externals *) - let external_opt = - List.find_opt conf.ImporterSyntax.externals ~f:(fun t -> - t.ImporterSyntax.Extern.target == table) - in - match external_opt with - | None -> acc - | Some ext -> collect_links ext acc)) + (* Do not add the same external if the value is already present *) + let init = + match + List.find_opt init ~f:(fun ext -> ImporterSyntax.Extern.equal table ext) + with + | None -> table :: init + | Some _ -> init in - res + + Expression.T.fold_values ~init table.ImporterSyntax.Extern.intern_key + ~f:(fun acc expr -> + match expr.Path.alias with + | None -> acc + | Some _ as path -> ( + let table = ImporterSyntax.get_table_for_name conf path in + (* Look for this table in the externals *) + let external_opt = + List.find_opt conf.ImporterSyntax.externals ~f:(fun t -> + t.ImporterSyntax.Extern.target == table) + in + match external_opt with + | None -> acc + | Some ext -> collect_links ext acc)) in let dependencies = collect_links external_ [] in let join_content = Buffer.contents external_key_buffer in let request = Chunk.create () in + Chunk.add_string request "SELECT "; + + (* Check if we can identify the line number in the file. It’s only possible + if we have a single source used as a key *) let () = match pointed_tables with - | [] -> - (* Otherwise, just return -1 *) - Chunk.add_string request "-1" | (table, _name) :: _ -> (* If we have a single source, extract the row number. *) Chunk.add_string request (Table.print_column table "id") + | [] -> + (* Otherwise, just return -1 *) + Chunk.add_string request "-1" in Chunk.add_string request ", "; Chunk.append ~head:request ~tail:(Chunk.copy internal_chunk); diff --git a/lib/configuration/expression_parser.mly b/lib/configuration/expression_parser.mly index 1761cce..9b97637 100644 --- a/lib/configuration/expression_parser.mly +++ b/lib/configuration/expression_parser.mly @@ -34,20 +34,25 @@ column_expr: path_: | COLUMN - column = IDENT - { ImportExpression.T.Path - ImportDataTypes.Path.{ alias = None - ; column = ImportDataTypes.Path.column_of_string column - } - } - - | COLUMN - table = IDENT - DOT - column = IDENT - { ImportExpression.T.Path - ImportDataTypes.Path.{ alias = Some table - ; column = ImportDataTypes.Path.column_of_string column} + (* The dot character is required as a separator between the table and the + colum, like in [:table.XX] but the table name can also contains [.] + (this is allowed in the toml configuration syntax, as long as the + identifier is quoted. + + So we have to handle cases like [:foo.bar.XX] + *) + path = separated_nonempty_list(DOT, IDENT) + { let reversed_path = List.rev path in + (* reversed_path is nonempty, and we can take head and tail safely *) + let tail = List.tl reversed_path in + let alias = match tail with + | [] -> None + | tl -> Some (String.concat "." (List.rev tl)) + in + + ImportExpression.T.Path + ImportDataTypes.Path.{ alias + ; column = ImportDataTypes.Path.column_of_string (List.hd reversed_path)} } column_: diff --git a/lib/configuration/importConf.ml b/lib/configuration/importConf.ml index 2df24bd..4b49686 100644 --- a/lib/configuration/importConf.ml +++ b/lib/configuration/importConf.ml @@ -1,14 +1,22 @@ module TomlReader = Read_conf.Make (Helpers.Toml.Decode) -let t_of_toml : Otoml.t -> (ImporterSyntax.t, string) result = - fun toml -> +type loader_context = TomlReader.loader_context = { + checkFile : string -> bool; + loadFile : string -> Otoml.t; +} + +let t_of_toml : + context:loader_context -> Otoml.t -> (ImporterSyntax.t, string) result = + fun ~context toml -> let version = Otoml.find_or ~default:ImporterSyntax.latest_version toml (Otoml.get_integer ~strict:false) [ "version" ] in match version with - | n when n = ImporterSyntax.latest_version -> TomlReader.read toml + | n when n = ImporterSyntax.latest_version -> begin + TomlReader.read context toml + end | _ -> Printf.eprintf "Unsuported version : %d\n" version; exit 1 diff --git a/lib/configuration/importConf.mli b/lib/configuration/importConf.mli index d2f65f2..7234499 100644 --- a/lib/configuration/importConf.mli +++ b/lib/configuration/importConf.mli @@ -1,4 +1,12 @@ -val t_of_toml : Otoml.t -> (ImporterSyntax.t, string) result +type loader_context = { + checkFile : string -> bool; + loadFile : string -> Otoml.t; +} + +val t_of_toml : + context:loader_context -> Otoml.t -> (ImporterSyntax.t, string) result +(** [fileChecker] is called when a file is declared in the configuration. An + arror will be raised if the computation return false *) val expression_from_string : string -> (ImportDataTypes.Path.t ImportExpression.T.t, string) result diff --git a/lib/configuration/read_conf.ml b/lib/configuration/read_conf.ml index d406b0e..0771565 100644 --- a/lib/configuration/read_conf.ml +++ b/lib/configuration/read_conf.ml @@ -178,14 +178,66 @@ module Make (S : Decoders.Decode.S) = struct let ( >>= ) = S.( >>= ) let ( >|= ) = S.( >|= ) - class loader = + type loader_context = { + checkFile : string -> bool; + loadFile : string -> S.value; + } + + type dataSet = S.value * (string * (S.value * string)) list + + class loader (context : loader_context) = object (self) + method path_checker : (S.value * string) S.decoder -> string S.decoder = + fun check -> + Decoders.Decoder.bind + (fun (value, path) -> + if context.checkFile path then Decoders.Decoder.pure path + else + let message = "Expected a path to an existing file" in + let err = Decoders.Error.make ~context:value message in + S.fail_with err) + check + (** Check if a file given in the configuration exists: an error is raised + if the function [checkFile] retun false. + + In the unit tests, the effect is mapped to a function returning alway + true. *) + + method keep : type a. a S.decoder -> (S.value * a) S.decoder = + fun decoder value -> S.map (fun v -> (value, v)) decoder value + (** [keep decoder] transform a decoder and keep the initial value with the + decoded value. + + This helps to build custom error message, if we want to report an + error from a different place. *) + + method load_resources : dataSet option S.decoder = + (* Do not accept dash in the keys, as the caracter is used to alias + the same file in differents mappings *) + let no_dash_decoder = + let* s = S.string in + match String.exists s ~f:(Char.equal '-') with + | true -> S.fail "Expected a key without '-'" + | false -> S.succeed s + in + let list_files_decoders = + self#keep (S.key_value_pairs' no_dash_decoder (self#keep S.string)) + in + let get_field = S.field "files" list_files_decoders in + + let* result = + S.map context.loadFile (self#path_checker (self#keep S.string)) + in + let files = get_field result in + let dataSetResult = Result.map (fun v -> Some v) files in + S.from_result dataSetResult + (** Load an external file containing the list of files to include.*) + method parse_expression : type a. ?groups:a ImportExpression.T.t list -> eq:(a -> a -> bool) -> a ExpressionParser.path_builder -> - S.value -> - (a ImportExpression.T.t, S.value Decoders.Error.t) result = + a ImportExpression.T.t S.decoder = fun ?(groups = []) ~eq path -> S.string >>= fun v -> match ExpressionParser.of_string path v with @@ -202,41 +254,87 @@ module Make (S : Decoders.Decode.S) = struct S.fail "A group function cannot contains another group function") - method source = - let* file = S.field "file" S.string - and* name = S.field "name" S.string - and* tab = S.field_opt_or ~default:1 "tab" S.int in - S.succeed { Table.file; name; tab } - - method external_ name = - let* intern_key = - S.field "intern_key" - (self#parse_expression ~eq:Path.equal ExpressionParser.path) - and* extern_key = - S.field "extern_key" - (self#parse_expression ~eq:Int.equal ExpressionParser.column) - and* file = S.field "file" S.string - and* tab = S.field_opt_or ~default:1 "tab" S.int - and* allow_missing = - S.field_opt_or ~default:false "allow_missing" S.bool - in + method look_file : + dataset:dataSet option -> name:string -> string S.decoder = + fun ~dataset ~name -> + self#path_checker + (S.one_of + [ + (* If the file is declared, use it *) + ("file", self#keep (S.field "file" S.string)); + ( "dataset", + (* Otherwise search in the data set *) + + (* We work inside an option monad : + - Do we have a dataset + - Do we have a valid root name to look for + - De we have a match in the dataset + *) + let ( let* ) = Option.bind in + let element = + let* _, resources = dataset in + let* root = + match String.split_on_char ~sep:'-' name with + | hd :: _ -> Some hd + | _ -> None + in + let* elem = List.assoc_opt root resources in + Some elem + in + match (element, dataset) with + | Some value, _ -> S.succeed value + | None, Some (t, _) -> + let message = "Looking for \"" ^ name ^ "\"" in + S.fail_with (Decoders.Error.make ~context:t message) + | None, None -> S.fail "No dataset declared" ); + ]) - S.succeed - ImporterSyntax.Extern. - { - intern_key; - extern_key; - target = { name; file; tab }; - allow_missing; - match_rule = None; - } + method source : dataSet option -> Table.t S.decoder = + fun dataset -> + (* The file shall either be present in the external, or be declared + in the dataset *) + let* name = S.field "name" S.string in + let* file = self#look_file ~dataset ~name + and* tab = S.field_opt_or ~default:1 "tab" S.int in + S.succeed { Table.file; Table.name; Table.tab } + + method external' : + dataSet option -> string -> ImporterSyntax.Extern.t S.decoder = + fun dataset name -> + let* intern_key = + S.field "intern_key" + (self#parse_expression ~eq:Path.equal ExpressionParser.path) + and* extern_key = + S.field "extern_key" + (self#parse_expression ~eq:Int.equal ExpressionParser.column) + and* file = self#look_file ~dataset ~name + and* tab = S.field_opt_or ~default:1 "tab" S.int + and* allow_missing = + S.field_opt_or ~default:false "allow_missing" S.bool + and* filters = + S.field_opt_or ~default:[] "filters" + (S.list + (self#parse_expression ~eq:Int.equal ExpressionParser.column)) + in + + S.succeed + ImporterSyntax.Extern. + { + intern_key; + extern_key; + target = { name; file; tab }; + allow_missing; + filters; + } + (** Load the configuration for an external file to link *) method sheet = (* Check the uniq property first, beecause the group functions need to include the same expression (at least) *) let* uniq = S.field_opt_or ~default:[] "uniq" - @@ S.list (self#parse_expression ~eq:Path.equal ExpressionParser.path) + @@ S.list + @@ self#parse_expression ~eq:Path.equal ExpressionParser.path in let* columns = @@ -247,23 +345,27 @@ module Make (S : Decoders.Decode.S) = struct and* filters = S.field_opt_or ~default:[] "filters" @@ S.list - (self#parse_expression ~eq:Path.equal ~groups:uniq - ExpressionParser.path) + @@ self#parse_expression ~eq:Path.equal ~groups:uniq + ExpressionParser.path and* sort = S.field_opt_or ~default:[] "sort" @@ S.list - (self#parse_expression ~eq:Path.equal ~groups:uniq - ExpressionParser.path) + @@ self#parse_expression ~eq:Path.equal ~groups:uniq + ExpressionParser.path in S.succeed @@ fun version source externals locale -> ImporterSyntax. { version; source; externals; columns; filters; sort; uniq; locale } method conf = - let* source = S.field "source" self#source + let* dataset : dataSet option = + S.field_opt_or ~default:None "dataset" self#load_resources + in + + let* source = S.field "source" (self#source dataset) and* externals = S.field_opt_or ~default:[] "externals" - (S.key_value_pairs_seq self#external_) + @@ S.key_value_pairs_seq (self#external' dataset) and* locale = S.field_opt "locale" S.string in let* sheet = S.field "sheet" self#sheet >|= fun v -> v 1 source externals locale @@ -272,15 +374,8 @@ module Make (S : Decoders.Decode.S) = struct S.succeed sheet end - let read_file file = - S.decode_file (new loader)#conf file - |> Result.map_error (fun v -> - let formatter = Format.str_formatter in - Format.fprintf formatter "%a@." S.pp_error v; - Format.flush_str_formatter ()) - - let read toml = - S.decode_value (new loader)#conf toml + let read context toml = + S.decode_value (new loader context)#conf toml |> Result.map_error (fun v -> let formatter = Format.str_formatter in Format.fprintf formatter "%a@." S.pp_error v; diff --git a/lib/file_handler/dune b/lib/file_handler/dune index 80b2ff6..6d5def0 100755 --- a/lib/file_handler/dune +++ b/lib/file_handler/dune @@ -9,6 +9,7 @@ core
lwt
lwt.unix
+ ppx_deriving.runtime
helpers
importDataTypes
importContainers
diff --git a/lib/file_handler/state.ml b/lib/file_handler/state.ml index 7cf57da..e073fd3 100644 --- a/lib/file_handler/state.ml +++ b/lib/file_handler/state.ml @@ -151,7 +151,9 @@ let clear : ImporterSyntax.t -> unit ImportSQL.Db.result = fun ~log_error db mapping conf -> - ImportSQL.Db.clear_duplicates db (A.table mapping) (A.keys mapping) + let table = A.table mapping in + let is_root = ImportDataTypes.Table.equal table conf.source in + ImportSQL.Db.clear_duplicates ~is_root db table (A.keys mapping) ~f:(fun values -> let line = match snd @@ Array.get values 0 with diff --git a/lib/helpers/toml.ml b/lib/helpers/toml.ml index 1b7fb15..5f441dc 100644 --- a/lib/helpers/toml.ml +++ b/lib/helpers/toml.ml @@ -1,9 +1,118 @@ +open StdLabels + +let rec pp : + ?topLevel:bool -> ?path:string list -> Format.formatter -> Otoml.t -> unit = + fun ?(topLevel = true) ?(path = []) format -> function + | Otoml.TomlString v -> begin + match String.contains v '\n' with + | false -> + Format.pp_print_string format "\""; + Format.pp_print_string format v; + Format.pp_print_string format "\"" + | true -> + Format.pp_print_string format {|"""|}; + Format.pp_print_text format v; + Format.pp_print_string format {|"""|} + end + | Otoml.TomlInteger i -> Format.pp_print_int format i + | Otoml.TomlFloat f -> Format.pp_print_float format f + | Otoml.TomlBoolean b -> Format.pp_print_bool format b + | Otoml.TomlArray l -> begin + match (topLevel, l) with + | _, [] -> Format.pp_print_string format "[]" + | false, _ -> Format.pp_print_string format "..." + | true, l -> + Format.pp_print_string format "["; + Format.pp_print_break format 0 4; + Format.pp_open_vbox format 0; + Format.pp_print_list + ~pp_sep:(fun f () -> + Format.pp_print_string f ","; + Format.pp_print_cut f ()) + pp format l; + Format.pp_close_box format (); + Format.pp_print_cut format (); + Format.pp_print_string format "]" + end + | Otoml.TomlTable elements | Otoml.TomlInlineTable elements -> + pp_table ~path format elements + | Otoml.TomlTableArray t -> Format.pp_print_list pp format t + | Otoml.TomlOffsetDateTime _ + | Otoml.TomlLocalDate _ + | Otoml.TomlLocalDateTime _ + | Otoml.TomlLocalTime _ -> () + +and pp_key_values : Format.formatter -> string * Otoml.t -> unit = + fun format (name, value) -> + Format.fprintf format "%s = %a" name (pp ~topLevel:false ~path:[]) value + +and pp_table : + ?path:string list -> Format.formatter -> (string * Otoml.t) list -> unit = + fun ?(path = []) format elements -> + (* Create two lists, one for the subtables, and one for the values. + + As a table is valid until the next table, we then start to print the + preoperties before printintg the subtables inside the element. + *) + let subtables, properties = + List.partition elements ~f:(fun (_, v) -> + match v with + | Otoml.TomlTable _ -> true + | _ -> false) + in + + let () = + match properties with + | [] -> () + | _ -> + let isTopLevel = + match path with + | [] -> true + | _ -> false + in + if not isTopLevel then begin + let path = List.rev path in + Format.pp_print_cut format (); + Format.fprintf format "[%a]" + (Format.pp_print_list + ~pp_sep:(fun f () -> Format.pp_print_string f ".") + Format.pp_print_string) + path; + Format.pp_print_break format 0 4; + Format.pp_open_vbox format 0 + end; + if isTopLevel then begin + Format.pp_print_list ~pp_sep:Format.pp_print_cut + (fun format v -> + match v with + | key, Otoml.TomlTable elements -> + pp_table ~path:(key :: path) format elements + | other -> pp_key_values format other) + format properties + end + else begin + Format.pp_print_string format "..." + end; + + if not isTopLevel then begin + Format.pp_close_box format () + end; + Format.pp_print_cut format () + in + (* Then go deeper inside each subtable *) + List.iter subtables ~f:(function + | name, Otoml.TomlTable v -> pp_table ~path:(name :: path) format v + | _ -> (* Because of the partition, this should not happen *) ()) + module Decode = struct module S = struct type value = Otoml.t let pp : Format.formatter -> value -> unit = - fun format t -> Format.pp_print_string format (Otoml.Printer.to_string t) + fun formatter v -> + Format.pp_open_vbox formatter 0; + pp formatter v; + Format.pp_close_box formatter () let of_string : string -> (value, string) result = Otoml.Parser.from_string_result @@ -22,7 +131,7 @@ module Decode = struct let get_key_value_pairs : value -> (value * value) list option = Otoml.get_opt (fun key -> - Otoml.get_table key |> List.map (fun (k, v) -> (Otoml.string k, v))) + Otoml.get_table key |> List.map ~f:(fun (k, v) -> (Otoml.string k, v))) let to_list : value list -> value = Otoml.array end diff --git a/lib/sql/db.ml b/lib/sql/db.ml index f2a2653..1d92a2e 100644 --- a/lib/sql/db.ml +++ b/lib/sql/db.ml @@ -286,17 +286,6 @@ let create_view : Sqlite3.db -> ImporterSyntax.t -> unit T.result = Ok () -(* - let query, _ = ImportAnalyser.Query.select output in - - let query = - { query with q = Printf.sprintf "CREATE VIEW result AS %s" query.q } - in - let** statement = execute_query db query in - - Sqlite3.step statement |> T.to_result - *) - let check_foreign : f:((string * ImportDataTypes.Value.t) array -> unit) -> Sqlite3.db -> @@ -316,66 +305,118 @@ let check_foreign : |> T.to_result let clear_duplicates : + is_root:bool -> f:((string * ImportDataTypes.Value.t) array -> unit) -> 'a t -> ImportDataTypes.Table.t -> ImportAnalyser.Dependency.key list -> unit T.result = - fun ~f db table keys -> + fun ~is_root ~f db table keys -> let table_name = Table.name table in - List.fold_left keys ~init:(Ok ()) - ~f:(fun state { ImportAnalyser.Dependency.name; _ } -> - let** _ = state in - - let select_query = - String.concat ~sep:"" - [ - "SELECT '"; - table_name; - "'.id, '"; - table_name; - "'.'key_"; - name; - "', '"; - name; - "' FROM '"; - table_name; - "' INNER JOIN (SELECT id, row_number() OVER(PARTITION BY '"; - table_name; - "'.'key_"; - name; - "' ORDER BY (id)) AS row_num from '"; - table_name; - "') other_table WHERE other_table.row_num <> 1 and '"; - table_name; - "'.id = other_table.id"; - ] - in - let stmt = Sqlite3.prepare db select_query in + let keys_name = + List.map keys ~f:(fun ImportAnalyser.Dependency.{ name; filters; _ } -> + ( name, + filters, + String.concat ~sep:"" [ "'"; table_name; "'.'key_"; name; "'" ] )) + in - ignore - @@ Sqlite3.iter stmt ~f:(fun data -> - let values = - Array.mapi data ~f:(fun i value -> - (Sqlite3.column_name stmt i, T.to_datatype value)) - in - f values); + let** _ = + List.fold_left keys_name ~init:(Ok ()) + ~f:(fun state (name, filters, key_name) -> + let table_external = + ImportDataTypes.Table.{ file = ""; tab = 0; name = table_name } + in - let delete_query = - Printf.sprintf - {|UPDATE '%s' -SET key_%s = NULL + (* If there are filters to apply in the external, prepare the predicates now *) + let join_buffer = ImportAnalyser.Chunk.create () in + let () = + match filters with + | [] -> () + | _ -> + ImportAnalyser.Chunk.add_string join_buffer " WHERE "; + ImportAnalyser.Chunk.add_expressions ~sep:"\nAND " + ~repr:(fun formatter column -> + Format.fprintf formatter "%s" + (Table.print_column table_external + ("col_" ^ string_of_int column))) + join_buffer filters + in + let values = + Queue.to_seq join_buffer.parameters + |> Seq.map (fun v -> T.of_datatype v) + |> List.of_seq + in + + let** _ = state in + + let select_query = + String.concat ~sep:"" + [ + "SELECT '"; + table_name; + "'.id, "; + key_name; + ", '"; + name; + "' FROM '"; + table_name; + "' INNER JOIN (SELECT id, row_number() OVER(PARTITION BY "; + key_name; + " ORDER BY (id)) AS row_num from '"; + table_name; + "'"; + Buffer.contents join_buffer.b; + ") other_table WHERE other_table.row_num <> 1 AND \ + 'other_table'.id = "; + Table.print_column table_external "id"; + ] + in + let stmt = Sqlite3.prepare db select_query in + let* _ = Sqlite3.bind_values stmt values in + + ignore + @@ Sqlite3.iter stmt ~f:(fun data -> + let values = + Array.mapi data ~f:(fun i value -> + (Sqlite3.column_name stmt i, T.to_datatype value)) + in + f values); + + let clear_query = + Printf.sprintf + {|UPDATE '%s' +SET 'key_%s' = NULL FROM ( - SELECT id, row_number() OVER(PARTITION BY key_%s ORDER BY (id)) AS row_num + SELECT id, row_number() OVER(PARTITION BY %s ORDER BY (id)) AS row_num, * from '%s' -) other_table +%s) other_table WHERE other_table.row_num <> 1 and '%s'.id = other_table.id|} - table_name name name table_name table_name - in + table_name name key_name table_name + (Buffer.contents join_buffer.b) + table_name + in - Sqlite3.exec db delete_query |> T.to_result) + let stmt = Sqlite3.prepare db clear_query in + let* _ = Sqlite3.bind_values stmt values in + let* _ = Sqlite3.step stmt in + Result.ok ()) + in + + (* Now remove from the database all the line having ALL the keys to null *) + match (is_root, keys_name) with + | true, _ | _, [] -> Result.ok () + | _ -> + let predicates = + List.map keys_name ~f:(fun (_, _, key) -> key ^ " IS NULL") + |> String.concat ~sep:" AND " + in + let delete_query = + Printf.sprintf {|DELETE FROM '%s' WHERE %s|} table_name predicates + in + let** _ = Sqlite3.exec db delete_query |> T.to_result in + Result.ok () type 'a result = ('a, exn) Result.t diff --git a/lib/sql/db.mli b/lib/sql/db.mli index 213fb27..b73479b 100644 --- a/lib/sql/db.mli +++ b/lib/sql/db.mli @@ -84,12 +84,16 @@ val check_foreign : unit result val clear_duplicates : + is_root:bool -> f:((string * ImportDataTypes.Value.t) array -> unit) -> 'a t -> ImportDataTypes.Table.t -> ImportAnalyser.Dependency.key list -> unit result -(** Remove all duplicated keys in the table by setting them to NULL. *) +(** Remove all duplicated keys in the table by setting them to NULL. + + This function will check each key referenced used in the table, and will + process each key separately.*) val insert_header : 'a t -> diff --git a/lib/sql/hashs.ml b/lib/sql/hashs.ml index eced2b4..4df8ca1 100644 --- a/lib/sql/hashs.ml +++ b/lib/sql/hashs.ml @@ -5,7 +5,6 @@ before inserting the values. *) open StdLabels -module Table = ImportDataTypes.Table let ( let* ) = Result.bind @@ -21,16 +20,21 @@ let evaluate : ImportAnalyser.Dependency.t -> int = (* Extract all the references to this table *) let keys = List.map (ImportAnalyser.Dependency.keys table) - ~f:(fun ImportAnalyser.Dependency.{ name; columns; expression } -> + ~f:(fun + ImportAnalyser.Dependency.{ name; columns; expression; filters } -> + (* It’s better to explicitly ignore the fields we want to exclude than + using the pattern _ in the fuction definition. + + This way, adding a new field in the type will raise a compilation error. *) ignore columns; - (name, expression)) + (name, expression, filters)) in Hashtbl.hash keys let insert : 'a T.t -> ImportAnalyser.Dependency.t -> unit T.result = fun db table -> let source = ImportAnalyser.Dependency.table table in - let table_name = Table.name source in + let table_name = ImportDataTypes.Table.name source in let hash = evaluate table in @@ -56,7 +60,7 @@ let insert : 'a T.t -> ImportAnalyser.Dependency.t -> unit T.result = let query : 'a T.t -> ImportDataTypes.Table.t -> int option T.result = fun db table -> - let table_name = Table.name table in + let table_name = ImportDataTypes.Table.name table in let query = String.concat ~sep:"" [ "SELECT hash FROM 'hashes' WHERE hashes.'table' = '"; table_name; "'" ] diff --git a/lib/syntax/importerSyntax.ml b/lib/syntax/importerSyntax.ml index 7788613..d91db09 100644 --- a/lib/syntax/importerSyntax.ml +++ b/lib/syntax/importerSyntax.ml @@ -14,13 +14,18 @@ let toml_of_table Table.{ file; tab; name } = Otoml.table values +let repr_expression_list ~repr l = + Otoml.array + (List.map l ~f:(fun v -> + Otoml.string (ImportExpression.Repr.repr ~top:true repr v))) + module Extern = struct type t = { intern_key : Path.t E.t; target : Table.t; extern_key : Path.column E.t; allow_missing : bool; - match_rule : string option; + filters : ImportDataTypes.Path.column ImportExpression.T.t list; } [@@deriving show, eq] (** Describe a relation beteween two tables *) @@ -38,6 +43,10 @@ module Extern = struct extern.extern_key ); ("file", Otoml.string extern.target.file); ("allow_missing", Otoml.boolean extern.allow_missing); + ( "filters", + repr_expression_list + ~repr:(fun s -> ":" ^ Path.column_to_string s) + extern.filters ); ] in @@ -66,19 +75,13 @@ type t = { } let repr t = - let repr_expression_list l = - Otoml.array - (List.map l ~f:(fun v -> - Otoml.string (ImportExpression.Repr.repr ~top:true Path.show v))) - in - let sheet = Otoml.table [ - ("columns", repr_expression_list t.columns); - ("filters", repr_expression_list t.filters); - ("sort", repr_expression_list t.sort); - ("uniq", repr_expression_list t.uniq); + ("columns", repr_expression_list ~repr:Path.show t.columns); + ("filters", repr_expression_list ~repr:Path.show t.filters); + ("sort", repr_expression_list ~repr:Path.show t.sort); + ("uniq", repr_expression_list ~repr:Path.show t.uniq); ] in @@ -128,7 +131,7 @@ let dummy_conf = { source = { file = ""; tab = 0; name = "" }; version = latest_version; - locale = Some "C"; + locale = None; externals = []; columns = []; filters = []; diff --git a/lib/syntax/importerSyntax.mli b/lib/syntax/importerSyntax.mli index 49b7364..47dabe6 100644 --- a/lib/syntax/importerSyntax.mli +++ b/lib/syntax/importerSyntax.mli @@ -4,7 +4,7 @@ module Extern : sig target : ImportDataTypes.Table.t; extern_key : ImportDataTypes.Path.column ImportExpression.T.t; allow_missing : bool; - match_rule : string option; + filters : ImportDataTypes.Path.column ImportExpression.T.t list; } [@@deriving show, eq] end @@ -145,16 +145,22 @@ Fichier de configuration Les informations générales -------------------------- -version +dataset + Il s’agit d’un chemin vers un fichier listant tous les fichiers à utiliser. + Quand cet clef est définie, l’application ira chercher les fichier aux + emplacements définis ici, et il n’est plus nécessaire de définir les clef + `file` dans le reste de la configuration. - Il s’agit de la version de la syntaxe du fichier de configuration. Valeur par - défaut : `1` + Son utilité prend son sens quand un nouveau jeu de données doit être traité, + et plusieurs règles doivent être exécutées : il suffit alors de changer les + chemins dans le dataset et uniquement dans ce fichier. source La clef `source` indique quel est le fichier source : pour chaque ligne présente dans ce fichier, une ligne sera générée en sortie. - :file: le fichier à charger + :file: le fichier à charger. Ce champ peut être ignoré si le dataset est + renseigné :tab: optionnellement l’onglet concerné :name: le nom sous lequel le fichier sera associé. @@ -186,7 +192,8 @@ fichier : intern_key Il s’agit de la colonne servant à faire la liaison dans la source. file - Le fichier à charger + Le fichier à charger. Ce champ peut être ignoré si le dataset est + renseigné. tab optionnellement l’onglet concerné extern_key @@ -198,6 +205,13 @@ allow_missing Cette clef optionnelle indique s’il faut autoriser les valeurs manquantes lors dans une liaison. Déclarer `allow_missing` va également autoriser les doublons sur les valeurs de clef. +filters + Il s’agit d’une liste d’expression devant être validées pour que la ligne + soit prise en compte. + + .. include:: examples/externals_filters.toml + :code: toml + :class: collapse Une fois la dépendance vers un fichier externe déclaré, il est possible d’utiliser les colonnes de ce fichier en les référençant directement. Cela @@ -208,16 +222,15 @@ afin de construire des chemins sur plusieurs niveaux : .. code:: toml - [externals.acheteur_annuaire] + [externals.annuaire] intern_key = ":I" - extern_key = ":A" file = "ANNUAIRE.xlsx" - - [externals.acheteur_societe] - intern_key = ":acheteur_annuaire.BJ" extern_key = ":A" - file = "SOCIETES.xlsx" + [externals.country] + intern_key = ":annuaire.BJ" + file = "referentials.xlsx" + extern_key = ":A" Les valeurs présentes dans ces colonnes sont pré-traitées pour éviter les erreurs générales lors des imports : les espaces en fin de texte sont @@ -331,6 +344,10 @@ résultat selon que l’on filtre : colonne B ≠ 0 2. d’abord sur la colonne B ≠ 0, puis la plus grande valeur de la colonne A. +.. include:: examples/importer_groupe.toml + :code: toml + :class: collapse + Exemple complet =============== @@ -396,10 +413,13 @@ partir de plusieurs tables. # La table des immatriculations. # Seules les lignes avec la colonne D = 4 nous intéressent [externals.immat] - intern_key = ":A ^ '_4'" + intern_key = ":A" file = "20220222_SO_IMMATRICULATION.xlsx" - extern_key = ":B ^ '_' ^ :D" + extern_key = ":B" allow_missing = true + filters = [ + ":D = 4" + ] [sheet] columns = [ diff --git a/tests/analyser_dependency.ml b/tests/analyser_dependency.ml index 511b706..00f21d7 100644 --- a/tests/analyser_dependency.ml +++ b/tests/analyser_dependency.ml @@ -40,6 +40,7 @@ let test_keys = name = "other"; expression = Expression.Path 3; columns = lazy (Cont.IntSet.singleton 3); + filters = []; }; ] in @@ -61,6 +62,7 @@ let test_keys_missing = name = "last_file"; expression = Expression.Path 3; columns = lazy (Cont.IntSet.singleton 3); + filters = []; }; ] in @@ -145,14 +147,14 @@ let test_unlinked = target = { file = "other.xlsx"; tab = 1; name = "circular" }; extern_key = Path 3; allow_missing = true; - match_rule = None; + filters = []; }; { intern_key = Path { alias = Some "circular"; column = 1 }; target = { file = "other2.xlsx"; tab = 1; name = "circular2" }; extern_key = Path 3; allow_missing = true; - match_rule = None; + filters = []; }; ]; columns = []; @@ -177,7 +179,7 @@ let conf_with_unlinked = target = { file = "other.xlsx"; tab = 1; name = "other" }; extern_key = Path 3; allow_missing = false; - match_rule = None; + filters = []; }; ]; columns = diff --git a/tests/analyser_filters.ml b/tests/analyser_filters.ml index 864cab7..9a54bde 100644 --- a/tests/analyser_filters.ml +++ b/tests/analyser_filters.ml @@ -35,7 +35,7 @@ let simple_filter () = let chunk_predicates = Filters.generate_sql ~conf filter chunk_links in let expected_predicates = Chunk.create () in - Chunk.add_string expected_predicates " WHERE 1=COALESCE('source'.'col_1',0)"; + Chunk.add_string expected_predicates " WHERE (1=COALESCE('source'.'col_1',0))"; Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) "Simple predicate" @@ -58,7 +58,7 @@ let multiple_filters () = (* The predicates can be executed in reverse order, but it’s not an issue because they all are applied at the same time in the projection *) Chunk.add_string expected_predicates - " WHERE COALESCE('source'.'col_1','')=?\nAND 1"; + " WHERE (COALESCE('source'.'col_1','')=?)\nAND (1)"; Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) "Combined predicate" @@ -80,9 +80,9 @@ let group_filter () = let expected_links = Chunk.create () in Chunk.add_string expected_links - "WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_1') OVER \ + "WITH filter0 AS (SELECT source.id, (LAST_VALUE('source'.'col_1') OVER \ (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN \ - UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1 AS group_function\n\ + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1) AS group_function\n\ FROM 'source' AS 'source')\n"; Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) @@ -106,10 +106,10 @@ let expression_with_group () = let expected_links = Chunk.create () in Chunk.add_string expected_links - "WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_1') OVER \ + "WITH filter0 AS (SELECT source.id, (LAST_VALUE('source'.'col_1') OVER \ (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN \ - UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1 AS group_function\n\ - FROM 'source' AS 'source' WHERE 1=COALESCE('source'.'col_1',0))\n"; + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1) AS group_function\n\ + FROM 'source' AS 'source' WHERE (1=COALESCE('source'.'col_1',0)))\n"; Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) "The predicate expression is inside of the CTE" @@ -130,13 +130,13 @@ let group_with_expression () = Chunk.add_string expected_predicates "\n\ INNER JOIN 'filter0' ON filter0.id = source.id\n\ - WHERE 1=COALESCE('source'.'col_1',0) AND filter0.group_function"; + WHERE (1=COALESCE('source'.'col_1',0)) AND filter0.group_function"; let expected_links = Chunk.create () in Chunk.add_string expected_links - "WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_1') OVER \ + "WITH filter0 AS (SELECT source.id, (LAST_VALUE('source'.'col_1') OVER \ (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN \ - UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1 AS group_function\n\ + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1) AS group_function\n\ FROM 'source' AS 'source')\n"; Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) @@ -161,13 +161,13 @@ let group_with_group () = let expected_links = Chunk.create () in Chunk.add_string expected_links - "WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_1') OVER \ + "WITH filter0 AS (SELECT source.id, (LAST_VALUE('source'.'col_1') OVER \ (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN \ - UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1 AS group_function\n\ + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1) AS group_function\n\ FROM 'source' AS 'source')\n\ - , filter1 AS (SELECT source.id, LAST_VALUE('source'.'col_1') OVER \ + , filter1 AS (SELECT source.id, (LAST_VALUE('source'.'col_1') OVER \ (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN \ - UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1 AS group_function\n\ + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1) AS group_function\n\ FROM 'source' AS 'source'\n\ INNER JOIN 'filter0' ON filter0.id = source.id\n\ WHERE filter0.group_function)\n"; diff --git a/tests/analyser_query_test.ml b/tests/analyser_query_test.ml index fd8914b..37a748b 100644 --- a/tests/analyser_query_test.ml +++ b/tests/analyser_query_test.ml @@ -46,11 +46,11 @@ let check_externals = let query = Q.check_external conf (List.hd conf.externals) in let expected_query = - "SELECT 'source'.'id', 'source'.'col_1'\n\ + "SELECT 'source'.'id', ('source'.'col_1')\n\ FROM 'source' AS 'source'\n\ LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = \ 'other'.'key_other' WHERE 'other'.'key_other' IS NULL AND \ - 'source'.'col_1' IS NOT NULL AND 'source'.'col_1' <> ''" + ('source'.'col_1') IS NOT NULL AND ('source'.'col_1') <> ''" in Alcotest.check Alcotest.string "" expected_query query.q @@ -179,6 +179,7 @@ let prepare_insert = name = "key_test"; expression = Concat [ Path 1; Literal "_"; Empty ]; columns = lazy (ImportContainers.IntSet.singleton 1); + filters = []; } in @@ -207,7 +208,7 @@ let filter_group = let contents, _ = ImportAnalyser.Query.select conf in let expected = - {|WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_3') OVER (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS group_function + {|WITH filter0 AS (SELECT source.id, (LAST_VALUE('source'.'col_3') OVER (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) AS group_function FROM 'source' AS 'source' LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = 'other'.'key_other' LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.'col_1')) = 'last_file'.'key_last_file') @@ -239,7 +240,7 @@ let filter_group2 = let contents, _ = ImportAnalyser.Query.select conf in let expected = - {|WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_3') OVER (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS group_function + {|WITH filter0 AS (SELECT source.id, (LAST_VALUE('source'.'col_3') OVER (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) AS group_function FROM 'source' AS 'source' LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = 'other'.'key_other' LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.'col_1')) = 'last_file'.'key_last_file') @@ -248,11 +249,94 @@ FROM 'source' AS 'source' LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = 'other'.'key_other' LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.'col_1')) = 'last_file'.'key_last_file' INNER JOIN 'filter0' ON filter0.id = source.id -WHERE COALESCE('source'.'col_3',0)=0 AND filter0.group_function|} +WHERE (COALESCE('source'.'col_3',0)=0) AND filter0.group_function|} in Alcotest.check Alcotest.string "" expected contents.q +(** Add an external with a filter. Ensure the predicate is reported in the + query. *) +let external_filter = + "external_filter" >:: fun _ -> + let conf = + Syntax. + { + ConfLoader.conf with + externals = + [ + { + ConfLoader.external_other with + filters = [ Expression_builder.(equal (path 1) integer_one) ]; + }; + ]; + columns = [ Expression_builder.empty ]; + } + in + + let query, _ = ImportAnalyser.Query.select conf in + let expected_query = + {|SELECT '' AS result_0 +FROM 'source' AS 'source' +LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = 'other'.'key_other' AND (COALESCE('other'.'col_1',0)=1)|} + in + Alcotest.check Test_migration.trimed_string "" expected_query query.q + +let order_by = + "order_by" >:: fun () -> + let conf = + Syntax. + { + ConfLoader.conf with + externals = []; + columns = + [ + Expression_builder.path + ImportDataTypes.Path.{ alias = None; column = 1 }; + ]; + sort = + [ + Expression_builder.path + ImportDataTypes.Path.{ alias = None; column = 1 }; + Expression_builder.integer_one; + ]; + } + in + let query, _ = ImportAnalyser.Query.select conf in + let expected_query = + {|SELECT 'source'.'col_1' AS result_0 +FROM 'source' AS 'source' +ORDER BY ('source'.'col_1'), (1)|} + in + Alcotest.check Test_migration.trimed_string "" expected_query query.q + +let group_by = + "order_by" >:: fun () -> + let conf = + Syntax. + { + ConfLoader.conf with + externals = []; + columns = + [ + Expression_builder.path + ImportDataTypes.Path.{ alias = None; column = 1 }; + ]; + uniq = + [ + Expression_builder.path + ImportDataTypes.Path.{ alias = None; column = 1 }; + Expression_builder.integer_one; + ]; + } + in + let query, _ = ImportAnalyser.Query.select conf in + let expected_query = + {|SELECT 'source'.'col_1' AS result_0 +FROM 'source' AS 'source' +GROUP BY ('source'.'col_1'), (1)|} + in + Alcotest.check Test_migration.trimed_string "" expected_query query.q + let test_suit = [ create_table; @@ -265,6 +349,9 @@ let test_suit = prepare_insert; filter_group; filter_group2; + external_filter; + order_by; + group_by; ] let tests = "analyser_query_test" >::: test_suit diff --git a/tests/confLoader.ml b/tests/confLoader.ml index 13f9840..b0be690 100644 --- a/tests/confLoader.ml +++ b/tests/confLoader.ml @@ -1,5 +1,23 @@ -let load' : string -> (ImporterSyntax.t, string) Result.t = - fun content -> Otoml.Parser.from_string content |> ImportConf.t_of_toml +(** During the test, we don’t care with the file existence *) +let context = + ImportConf. + { loadFile = (fun _ -> Otoml.array []); checkFile = (fun _ -> true) } + +let load' : + ?dataset:(string -> Otoml.t) -> + string -> + (ImporterSyntax.t, string) Result.t = + fun ?(dataset = fun _ -> Otoml.array []) content -> + let toml = Otoml.Parser.from_string content in + ImportConf.t_of_toml toml ~context:{ context with loadFile = dataset } + +let load_from_file : + ?dataset:(string -> Otoml.t) -> + string -> + (ImporterSyntax.t, string) Result.t = + fun ?(dataset = fun _ -> Otoml.array []) content -> + let toml = Otoml.Parser.from_file content in + ImportConf.t_of_toml toml ~context:{ context with loadFile = dataset } (** Read the configuration in toml and return the internal representation *) let load : string -> ImporterSyntax.t = @@ -43,7 +61,7 @@ let external_other = target = external_table_other; extern_key = Path 3; allow_missing = false; - match_rule = None; + filters = []; } let external_table_last = @@ -56,5 +74,5 @@ let external_last = target = external_table_last; extern_key = Path 3; allow_missing = true; - match_rule = None; + filters = []; } diff --git a/tests/configuration_expression.ml b/tests/configuration_expression.ml index cd28589..6478903 100644 --- a/tests/configuration_expression.ml +++ b/tests/configuration_expression.ml @@ -4,104 +4,72 @@ open Test_migration let result_testable = Alcotest.result Test_migration.expression_testable Alcotest.string +(** Helper used to test the equality between the litteral expression and it’s + AST *) +let test : string -> Path.t ImportExpression.T.t -> unit = + fun expr result -> + let expression = ImportConf.expression_from_string expr in + Alcotest.check result_testable "" (Ok result) expression + +let path_column = + "column as path" >:: fun () -> + test ":A" (Path { Path.alias = None; column = 1 }) + +let path_table = + "path with table" >:: fun () -> + test ":table.A" (Path { Path.alias = Some "table"; column = 1 }) + +let path_subtable = + "path with table" >:: fun () -> + test ":table.Name.A" (Path { Path.alias = Some "table.Name"; column = 1 }) + let parse_dquoted = "parse_dquoted" >:: fun _ -> - let expr = "match(\"\\(..\\)\", :B)" in - let result = ImportConf.expression_from_string expr in - Alcotest.check result_testable "" - (Ok - (Function - ("match", [ Literal "\\(..\\)"; Path { alias = None; column = 2 } ]))) - result + test "match(\"\\(..\\)\", :B)" + (Function + ("match", [ Literal "\\(..\\)"; Path { alias = None; column = 2 } ])) let parse_quoted = "parse_quoted" >:: fun _ -> - let expr = "match('\\(..\\)', :B)" in - let result = ImportConf.expression_from_string expr in - Alcotest.check result_testable "" - (Ok - (Function - ("match", [ Literal "\\(..\\)"; Path { alias = None; column = 2 } ]))) - result + test "match('\\(..\\)', :B)" + (Function + ("match", [ Literal "\\(..\\)"; Path { alias = None; column = 2 } ])) let concat = "concat" >:: fun _ -> - let expr = ":A ^ :B" in - let result = ImportConf.expression_from_string expr in - Alcotest.check result_testable "" - (Ok - (Concat - [ - Path { alias = None; column = 1 }; Path { alias = None; column = 2 }; - ])) - result + test ":A ^ :B" + (Concat + [ Path { alias = None; column = 1 }; Path { alias = None; column = 2 } ]) let concat2 = "concat2" >:: fun _ -> - let expr = "'A' ^ '_' ^ 'B'" in - let result = ImportConf.expression_from_string expr in - Alcotest.check result_testable "" - (Ok (Concat [ Literal "A"; Literal "_"; Literal "B" ])) - result + test "'A' ^ '_' ^ 'B'" (Concat [ Literal "A"; Literal "_"; Literal "B" ]) let litteral = "litteral" >:: fun _ -> (* The text is quoted in shall not be considered as a path *) - let expr = "':A'" in - let result = ImportConf.expression_from_string expr in - Alcotest.check result_testable "" (Ok (Literal ":A")) result + test "':A'" (Literal ":A") -let empty = - "empty" >:: fun _ -> - let expr = "" in - let result = ImportConf.expression_from_string expr in - Alcotest.check result_testable "" (Ok Empty) result +let empty = "empty" >:: fun _ -> test "" Empty let upper_nvl = - "upper_nvl" >:: fun _ -> - let expr = "NVL('','')" in - let result = ImportConf.expression_from_string expr in - Alcotest.check result_testable "" (Ok (Nvl [ Empty; Empty ])) result + "upper_nvl" >:: fun _ -> test "NVL('','')" (Nvl [ Empty; Empty ]) let lower_nvl = - "lower_nvl" >:: fun _ -> - let expr = "nvl('','')" in - let result = ImportConf.expression_from_string expr in - Alcotest.check result_testable "" (Ok (Nvl [ Empty; Empty ])) result - -let numeric = - "numeric" >:: fun _ -> - let expr = "123" in - let result = ImportConf.expression_from_string expr in - Alcotest.check result_testable "" (Ok (Integer "123")) result + "lower_nvl" >:: fun _ -> test "nvl('','')" (Nvl [ Empty; Empty ]) -let numeric_neg = - "numeric_neg" >:: fun _ -> - let expr = "-123" in - let result = ImportConf.expression_from_string expr in - Alcotest.check result_testable "" (Ok (Integer "-123")) result +let numeric = "numeric" >:: fun _ -> test "123" (Integer "123") +let numeric_neg = "numeric_neg" >:: fun _ -> test "-123" (Integer "-123") let op_priority = "operator_priority" >:: fun _ -> - let expr = "1 + 2 > 2" in - let result = ImportConf.expression_from_string expr - and expected = - ImportExpression.T.( - BOperator (GT, BOperator (Add, Integer "1", Integer "2"), Integer "2")) - in - - Alcotest.check result_testable "" (Ok expected) result + test "1 + 2 > 2" + (BOperator (GT, BOperator (Add, Integer "1", Integer "2"), Integer "2")) let op_priority2 = "operator_priority" >:: fun _ -> - let expr = "1 ^ 2 = 2" in - let result = ImportConf.expression_from_string expr - and expected = - ImportExpression.T.( - BOperator (Equal, Concat [ Integer "1"; Integer "2" ], Integer "2")) - in - - Alcotest.check result_testable "" (Ok expected) result + test "1 ^ 2 = 2" + (BOperator (Equal, Concat [ Integer "1"; Integer "2" ], Integer "2")) let join = "join" >:: fun _ -> @@ -119,29 +87,15 @@ let join = let join_empty = "join" >:: fun _ -> - let expr = "join('', :A, :B)" in - let result = ImportConf.expression_from_string expr in - Alcotest.check result_testable "" - (Ok - (Join - ( "", - [ - Path { alias = None; column = 1 }; - Path { alias = None; column = 2 }; - ] ))) - result + test "join('', :A, :B)" + (Join + ( "", + [ + Path { alias = None; column = 1 }; Path { alias = None; column = 2 }; + ] )) -let upper = - "upper" >:: fun _ -> - let expr = "upper('')" in - let result = ImportConf.expression_from_string expr in - Alcotest.check result_testable "" (Ok (Function' (Upper, [ Empty ]))) result - -let trim = - "trim" >:: fun _ -> - let expr = "trim('')" in - let result = ImportConf.expression_from_string expr in - Alcotest.check result_testable "" (Ok (Function' (Trim, [ Empty ]))) result +let upper = "upper" >:: fun _ -> test "upper('')" (Function' (Upper, [ Empty ])) +let trim = "trim" >:: fun _ -> test "trim('')" (Function' (Trim, [ Empty ])) (** Extract the columns from a window function *) let fold_values = @@ -182,44 +136,24 @@ let bad_quote = let nested_expression = "nested_expression" >:: fun _ -> - let expr = "1 = (1 = 0)" in - let result = ImportConf.expression_from_string expr in - Alcotest.check result_testable "" - (Ok - (BOperator - ( Equal, - Integer "1", - Expr (BOperator (Equal, Integer "1", Integer "0")) ))) - result + test "1 = (1 = 0)" + (BOperator + (Equal, Integer "1", Expr (BOperator (Equal, Integer "1", Integer "0")))) let priority_equality = "priority_equality" >:: fun _ -> - let expr = "1 = 1 = 0" in - let result = ImportConf.expression_from_string expr in - Alcotest.check result_testable "" - (Ok - (BOperator - (Equal, Integer "1", BOperator (Equal, Integer "1", Integer "0")))) - result + test "1 = 1 = 0" + (BOperator (Equal, Integer "1", BOperator (Equal, Integer "1", Integer "0"))) let priority_operator_and = "priority_equality" >:: fun _ -> - let expr = "1 and 1 = 0" in - let result = ImportConf.expression_from_string expr in - Alcotest.check result_testable "" - (Ok - (BOperator (And, Integer "1", BOperator (Equal, Integer "1", Integer "0")))) - result + test "1 and 1 = 0" + (BOperator (And, Integer "1", BOperator (Equal, Integer "1", Integer "0"))) let priority_operator_or = "priority_equality" >:: fun _ -> - let expr = "1 <> 1 or 0" in - let result = ImportConf.expression_from_string expr in - Alcotest.check result_testable "" - (Ok - (BOperator - (Or, BOperator (Different, Integer "1", Integer "1"), Integer "0"))) - result + test "1 <> 1 or 0" + (BOperator (Or, BOperator (Different, Integer "1", Integer "1"), Integer "0")) let unknown_function = "unknown function" >:: fun _ -> @@ -240,6 +174,9 @@ let wrong_arguments = let test_suit = [ + path_column; + path_table; + path_subtable; parse_dquoted; parse_quoted; concat; diff --git a/tests/configuration_toml.ml b/tests/configuration_toml.ml index 0a36faf..620a106 100644 --- a/tests/configuration_toml.ml +++ b/tests/configuration_toml.ml @@ -5,12 +5,12 @@ open Test_migration let nested_group () = let expected = Error - "in field \"sheet\":\n\ - \ in field \"columns\":\n\ - \ while decoding a list:\n\ - \ element 0:\n\ - \ A group function cannot contains another group function, but got\n\ - \ \"max(:A, [counter([:A], [:A])], [])\" \n" + {|in field "sheet": + in field "columns": + while decoding a list: + element 0: + A group function cannot contains another group function, but got + "max(:A, [counter([:A], [:A])], [])"|} and result = ConfLoader.load' {|[source] @@ -22,14 +22,325 @@ columns = [ "max(:A, [counter([:A], [:A])], [])", ]|} in - Alcotest.(check (result Test_migration.syntax string)) + Alcotest.(check (result Test_migration.syntax Test_migration.trimed_string)) "duplicate" expected result +(** Load a simple configuration *) +let load_configuration () = + let configuration = + ConfLoader.load' + {|[source] +name = "" +file = "" +tab = 0 + +[sheet] +columns = []|} + and expected = Ok ImporterSyntax.dummy_conf in + Alcotest.(check (result Test_migration.syntax string)) + "Simple configuration" expected configuration + +let externals () = + let configuration = + ConfLoader.load' + {|[source] +name = "" +file = "" +tab = 0 + +[externals.other] + intern_key = ":A" + file = "other.xlsx" + extern_key = ":C" + allow_missing = false + +[sheet] +columns = []|} + and expected = + Ok + { + ImporterSyntax.dummy_conf with + externals = [ ConfLoader.external_other ]; + } + in + Alcotest.(check (result Test_migration.syntax Test_migration.trimed_string)) + "Simple external" expected configuration + +(** There is an error in this configuration the key [intern_key] is missing in + the external *) +let external_with_missing_key () = + let configuration = + ConfLoader.load' + {|[source] +name = "" +file = "" + +[externals.other] + file = "" + extern_key = "" + +[sheet] +columns = []|} + and expected = + Error + {|in field "externals": + Failed while decoding key-value pairs: + Expected an object with an attribute "intern_key", but got + file = "" + extern_key = ""|} + in + Alcotest.(check (result Test_migration.syntax Test_migration.trimed_string)) + "Missing key" expected configuration + +let sub_external () = + let configuration = + ConfLoader.load' + {|[source] +name = "" +file = "" +tab = 0 + + +[externals.other-1] + intern_key = ":A" + file = "other.xlsx" + extern_key = ":C" + allow_missing = false + +[sheet] +columns = []|} + and expected = + Ok + { + ImporterSyntax.dummy_conf with + externals = + ConfLoader. + [ + { + external_other with + target = { external_table_other with name = "other-1" }; + }; + ]; + } + in + Alcotest.(check (result Test_migration.syntax string)) + "external with path" expected configuration + +let sub_external_with_missing_key () = + let configuration = + ConfLoader.load' + {|[source] +name = "" +file = "" + +[externals.other-1] + file = "" + extern_key = "" + +[sheet] +columns = []|} + and expected = + Error + {|in field "externals": + Failed while decoding key-value pairs: + Expected an object with an attribute "intern_key", but got + file = "" + extern_key = ""|} + in + Alcotest.(check (result Test_migration.syntax Test_migration.trimed_string)) + "Missing intern_key" expected configuration + +(** The same configuration has external, and sub-element external *) +let sub_external_mixed () = + let configuration = + ConfLoader.load' + {|[source] +name = "" +file = "" +tab = 0 + +[externals.other] + intern_key = ":A" + file = "other.xlsx" + extern_key = ":C" + allow_missing = false + +[externals.other-1] + intern_key = ":A" + file = "other.xlsx" + extern_key = ":C" + allow_missing = false + +[sheet] +columns = []|} + and expected = + Ok + { + ImporterSyntax.dummy_conf with + externals = + ConfLoader. + [ + external_other; + { + external_other with + target = { external_table_other with name = "other-1" }; + }; + ]; + } + in + Alcotest.(check (result Test_migration.syntax string)) + "external with path" expected configuration + +let missing_dataset () = + let configuration = + ConfLoader.load' {|[source] +name = "" +tab = 0 + +[sheet] +columns = []|} + and expected = + Error + {|in field "source": + I tried the following decoders but they all failed: + "file" decoder: + Expected an object with an attribute "file", but got name = "" + tab = 0 + + "dataset" decoder: No dataset declared, but got name = "" + tab = 0|} + in + Alcotest.(check (result Test_migration.syntax Test_migration.trimed_string)) + "No dataset provided" expected configuration + +let empty_dataset () = + let configuration = + ConfLoader.load' + ~dataset:(fun _ -> Otoml.TomlArray []) + {| + +dataset = "…" + +[source] +name = "" + +[sheet] +columns = []|} + and expected = + Error + {|in field "dataset": Expected an object with an attribute "files", but got []|} + in + Alcotest.(check (result Test_migration.syntax Test_migration.trimed_string)) + "Invalid Dataset" expected configuration + +let dataset_with_invalid_key () = + let configuration = + ConfLoader.load' + ~dataset:(fun _ -> + Otoml.( + TomlTable + [ ("files", TomlTable [ ("other-1", TomlString "other.xlsx") ]) ])) + {| + +dataset = "…" + +[source] +name = "" + +[sheet] +columns = []|} + and expected = + Error + {|in field "dataset": + in field "files": + Failed while decoding key-value pairs: + Expected a key without '-', but got "other-1"|} + in + Alcotest.(check (result Test_migration.syntax Test_migration.trimed_string)) + "Invalid Dataset: invalid key" expected configuration + +let external_dataset () = + let configuration = + ConfLoader.load' + ~dataset:(fun _ -> + Otoml.( + TomlTable + [ ("files", TomlTable [ ("other", TomlString "other.xlsx") ]) ])) + {| + +dataset = "…" + +[source] +name = "" +file = "" +tab = 0 + + +[externals.other-1] + # The file is not defined here + # And in the dataset, there is no "other-1", just "other": the application + # should be able to infer the information from "other" and apply it here. + intern_key = ":A" + extern_key = ":C" + allow_missing = false + +[sheet] +columns = []|} + and expected = + Ok + { + ImporterSyntax.dummy_conf with + externals = + ConfLoader. + [ + { + external_other with + target = { external_table_other with name = "other-1" }; + }; + ]; + } + in + Alcotest.(check (result Test_migration.syntax string)) + "Dataset with alias" expected configuration + +let external_filters () = + let configuration = + ConfLoader.load' + {|[source] +name = "" +file = "" +tab = 0 + +[externals.other] + intern_key = ":A" + file = "other.xlsx" + extern_key = ":C" + filters = [":B = 1"] + + +[sheet] +columns = []|} + and expected = + Ok + { + ImporterSyntax.dummy_conf with + externals = + ConfLoader. + [ + { + external_other with + filters = [ Expression_builder.(equal (path 2) integer_one) ]; + }; + ]; + } + in + Alcotest.(check (result Test_migration.syntax string)) + "Filters in external" expected configuration + let test_suit = [ ( "parse_extern" >:: fun _ -> - let toml = Otoml.Parser.from_file "configuration/simple.toml" in - let toml = ImportConf.t_of_toml toml in + let toml = ConfLoader.load_from_file "configuration/simple.toml" in match toml with | Error s -> raise (Failure s) | Ok result -> @@ -45,7 +356,7 @@ let test_suit = Path { alias = None; column = 1 }; Path { alias = None; column = 2 }; ] ); - match_rule = None; + filters = []; allow_missing = true; } in @@ -55,8 +366,7 @@ let test_suit = (Alcotest.list Test_migration.extern_testable) "" [ expected ] result.externals ); ( "parse_columns" >:: fun _ -> - let toml = Otoml.Parser.from_file "configuration/simple.toml" in - let toml = ImportConf.t_of_toml toml in + let toml = ConfLoader.load_from_file "configuration/simple.toml" in match toml with | Error s -> raise (Failure s) @@ -83,10 +393,20 @@ let test_suit = (Alcotest.list Test_migration.expression_testable) "" expected result.columns ); ( "parse_csv" >:: fun _ -> - let toml = Otoml.Parser.from_file "configuration/example_csv.toml" in - let toml = ImportConf.t_of_toml toml in + let toml = ConfLoader.load_from_file "configuration/example_csv.toml" in ignore toml ); ("nested group", `Quick, nested_group); + ("Basic configuration", `Quick, load_configuration); + ("Configuration with external", `Quick, externals); + ("Faulty configuration", `Quick, external_with_missing_key); + ("Sub external", `Quick, sub_external); + ("Faulty configuration", `Quick, sub_external_with_missing_key); + ("Mix in external and sub external", `Quick, sub_external_mixed); + ("Missing dataset", `Quick, missing_dataset); + ("Empty dataset", `Quick, empty_dataset); + ("Dataset with invalid key", `Quick, dataset_with_invalid_key); + ("External dataset", `Quick, external_dataset); + ("External with filter", `Quick, external_filters); ] let tests = "configuration_toml" >::: test_suit diff --git a/tests/test_migration.ml b/tests/test_migration.ml index 17e48cc..acf782d 100644 --- a/tests/test_migration.ml +++ b/tests/test_migration.ml @@ -42,6 +42,15 @@ let extern_testable = make_test (module ImporterSyntax.Extern) let table_testable = make_test (module ImportDataTypes.Table) let int_container_testable = make_test (module ImportContainers.IntSet) +let trimed_string = + make_test + (module struct + type t = string + + let equal s1 s2 = String.equal (String.trim s1) (String.trim s2) + let pp format t = Format.fprintf format "%s" (String.trim t) + end) + let expression_testable = make_test (module struct |