aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSébastien Dailly <sebastien@dailly.me>2024-03-14 08:26:58 +0100
committerSébastien Dailly <sebastien@dailly.me>2024-03-14 08:26:58 +0100
commit6b377719c10d5ab3343fd5221f99a4a21008e25a (patch)
treea7c1e9a820d339a2f161af3e09cf9e3161286796
Initial commitmain
-rw-r--r--.gitignore4
-rw-r--r--.ocamlformat11
-rwxr-xr-xbin/dune38
-rw-r--r--bin/importer.ml316
-rwxr-xr-xdune-project27
-rw-r--r--examples/example_csv.toml34
-rw-r--r--examples/financial.xlsxbin0 -> 71637 bytes
-rw-r--r--examples/importer.toml44
-rw-r--r--examples/importer_groupe.toml28
-rw-r--r--lib/analysers/dependency.ml256
-rw-r--r--lib/analysers/dependency.mli40
-rwxr-xr-xlib/analysers/dune12
-rw-r--r--lib/analysers/headers.ml55
-rw-r--r--lib/analysers/headers.mli11
-rw-r--r--lib/analysers/query.ml445
-rw-r--r--lib/analysers/query.mli27
-rwxr-xr-xlib/configuration/dune29
-rw-r--r--lib/configuration/expression_lexer.mll91
-rw-r--r--lib/configuration/expression_parser.messages123
-rw-r--r--lib/configuration/expression_parser.mly185
-rw-r--r--lib/configuration/importConf.ml90
-rw-r--r--lib/configuration/importConf.mli23
-rw-r--r--lib/configuration/of_json.ml134
-rw-r--r--lib/configuration/read_conf.ml216
-rw-r--r--lib/configuration/syntax.ml88
-rwxr-xr-xlib/containers/dune7
-rw-r--r--lib/containers/importContainers.ml61
-rw-r--r--lib/csv/csv.ml30
-rw-r--r--lib/csv/dataType.ml21
-rw-r--r--lib/csv/dataType.mli8
-rwxr-xr-xlib/csv/dune6
-rw-r--r--lib/data_types/dune10
-rw-r--r--lib/data_types/path.ml15
-rw-r--r--lib/data_types/readme.rst4
-rw-r--r--lib/data_types/table.ml19
-rw-r--r--lib/data_types/types.ml15
-rw-r--r--lib/errors/dune9
-rw-r--r--lib/errors/importErrors.ml98
-rw-r--r--lib/errors/importErrors.mli46
-rw-r--r--lib/expression/ast.ml31
-rw-r--r--lib/expression/compose.ml150
-rw-r--r--lib/expression/compose.mli59
-rwxr-xr-xlib/expression/dune9
-rw-r--r--lib/expression/filters.ml193
-rw-r--r--lib/expression/filters.mli9
-rw-r--r--lib/expression/headers.ml89
-rw-r--r--lib/expression/headers.mli7
-rw-r--r--lib/expression/lazier.ml71
-rw-r--r--lib/expression/query.ml335
-rw-r--r--lib/expression/query.mli27
-rw-r--r--lib/expression/repr.ml127
-rw-r--r--lib/expression/repr.mli6
-rw-r--r--lib/expression/sym.ml71
-rw-r--r--lib/expression/t.ml153
-rw-r--r--lib/expression/t.mli54
-rw-r--r--lib/expression/type_of.ml150
-rw-r--r--lib/expression/type_of.mli10
-rw-r--r--lib/file_handler/csv2sql.ml135
-rw-r--r--lib/file_handler/csv2sql.mli10
-rwxr-xr-xlib/file_handler/dune21
-rw-r--r--lib/file_handler/state.ml178
-rw-r--r--lib/file_handler/state.mli46
-rw-r--r--lib/file_handler/xlsx2sql.ml205
-rw-r--r--lib/file_handler/xlsx2sql.mli10
-rw-r--r--lib/helpers/console.ml16
-rw-r--r--lib/helpers/console.mli5
-rwxr-xr-xlib/helpers/dune8
-rwxr-xr-xlib/helpers/helpers.ml45
-rw-r--r--lib/helpers/toml.ml31
-rw-r--r--lib/helpers/toml.mli1
-rw-r--r--lib/sql/date.ml24
-rw-r--r--lib/sql/db.ml383
-rw-r--r--lib/sql/db.mli106
-rw-r--r--lib/sql/dune15
-rw-r--r--lib/sql/hashs.ml79
-rw-r--r--lib/sql/header.ml74
-rw-r--r--lib/sql/join.ml30
-rw-r--r--lib/sql/match.ml22
-rw-r--r--lib/sql/math.ml20
-rw-r--r--lib/sql/t.ml52
-rw-r--r--lib/sql/trim.ml9
-rw-r--r--lib/sql/year.ml19
-rw-r--r--lib/tools/dune10
-rwxr-xr-xlib/tools/git_head.sh11
-rw-r--r--readme.rst668
-rw-r--r--tests/analyser_dependency.ml247
-rw-r--r--tests/analyser_query_test.ml304
-rw-r--r--tests/confLoader.ml128
-rw-r--r--tests/configuration/example_csv.toml31
-rw-r--r--tests/configuration/simple.toml20
-rw-r--r--tests/configuration_expression.ml253
-rw-r--r--tests/configuration_toml.ml71
-rw-r--r--tests/dune17
-rw-r--r--tests/expression_builder.ml47
-rw-r--r--tests/expression_query.ml208
-rw-r--r--tests/expression_repr.ml37
-rw-r--r--tests/expression_type_of.ml65
-rw-r--r--tests/importCSV_test.ml29
-rw-r--r--tests/importConf_test.ml23
-rw-r--r--tests/importer_test.ml21
-rw-r--r--tests/sql_date.ml18
-rw-r--r--tests/sql_db.ml198
-rw-r--r--tests/sql_int.ml26
-rw-r--r--tests/sql_match.ml12
-rw-r--r--tests/sql_trim.ml11
105 files changed, 8226 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..37d870f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+_build/
+examples/*.csv
+examples/*.sqlite
+*.opam
diff --git a/.ocamlformat b/.ocamlformat
new file mode 100644
index 0000000..72fc0fd
--- /dev/null
+++ b/.ocamlformat
@@ -0,0 +1,11 @@
+profile = default
+parens-tuple = always
+sequence-style = terminator
+
+single-case = sparse
+type-decl = sparse
+break-separators = after
+break-cases = fit-or-vertical
+
+# Keep the begin/end blocks
+exp-grouping=preserve
diff --git a/bin/dune b/bin/dune
new file mode 100755
index 0000000..fad168d
--- /dev/null
+++ b/bin/dune
@@ -0,0 +1,38 @@
+(env
+ (dev
+ (flags (:standard -warn-error -A))
+ )
+ (release
+ (ocamlopt_flags (-O3)))
+)
+
+(executable
+ (name importer)
+ (libraries
+ csv
+ lwt
+ lwt.unix
+ otoml
+ yojson
+ tools
+ helpers
+ importConf
+ importAnalyser
+ importContainers
+ importDataTypes
+ importCSV
+ importErrors
+ importExpression
+ importFileHandler
+ importSQL
+ )
+ (link_flags (:standard))
+(preprocess (pps ppx_yojson_conv))
+)
+
+(install
+ (files importer.exe)
+ (section bin)
+ (package importer))
+
+(dirs :standard \ examples)
diff --git a/bin/importer.ml b/bin/importer.ml
new file mode 100644
index 0000000..f737a46
--- /dev/null
+++ b/bin/importer.ml
@@ -0,0 +1,316 @@
+open StdLabels
+module Analyse = ImportAnalyser.Dependency
+module Headers = ImportAnalyser.Headers
+module Db = ImportSQL.Db
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+
+let ( let* ) = Result.bind
+
+let exists source =
+ match Sys.file_exists source with
+ | true -> source
+ | false ->
+ prerr_endline
+ @@ String.concat ~sep:" " [ "The file"; source; "does not exists" ];
+ exit 1
+
+let creation_date file =
+ try
+ let stat = Unix.stat file in
+ Some stat.Unix.st_mtime
+ with
+ | Unix.Unix_error _ -> None
+
+module Args = struct
+ type arguments = {
+ configuration : ImportConf.Syntax.t;
+ conf_name : string;
+ bom : bool;
+ print_conf : bool;
+ mapping_date : float option;
+ }
+
+ let load_conf : string -> ImportConf.Syntax.t =
+ fun file ->
+ match Filename.extension file with
+ | ".json" -> (
+ let configuration_file = Yojson.Safe.from_file (exists file) in
+ try ImportConf.t_of_yojson configuration_file with
+ | e ->
+ print_endline @@ ImportErrors.repr_error e;
+ exit 1)
+ | _ -> (
+ let (conf : (Db.Syntax.t, string) result) =
+ let* configuration_file =
+ try Ok (Otoml.Parser.from_file (exists file)) with
+ | Otoml.Parse_error (position, message) ->
+ let format_position () = function
+ | None -> ""
+ | Some (line, col) ->
+ Printf.sprintf "At line %d, col %d: " line col
+ in
+
+ let error_msg =
+ Printf.sprintf "%aError %s" format_position position message
+ in
+
+ Error error_msg
+ in
+ ImportConf.t_of_toml configuration_file
+ in
+ match conf with
+ | Error e ->
+ prerr_endline e;
+ exit 1
+ | Ok e -> e)
+
+ let load () =
+ let conf = ref ("", ImportConf.dummy_conf)
+ and bom = ref true
+ and usage = "importer [--conf configuration.toml]"
+ and print_conf = ref false in
+
+ let annon_fun _filename =
+ print_endline usage;
+ exit 1
+ and set_conf file = conf := (file, load_conf file) in
+ let speclist =
+ [
+ ( "--version",
+ Arg.Unit
+ (fun () ->
+ Printf.printf "Version %s\n" Tools.Git_hash.revision;
+ exit 0),
+ "\tDisplay the version of the application and exit" );
+ ("--conf", Arg.String set_conf, "Configuration file");
+ ("-c", Arg.String set_conf, "Configuration file");
+ ("--no-bom", Arg.Clear bom, "Do not insert a BOM in the CSV");
+ ( "--print-conf",
+ Arg.Set print_conf,
+ "Reformat the configuration file and exit" );
+ ]
+ in
+ let () = Arg.parse speclist annon_fun usage in
+ {
+ configuration = snd !conf;
+ bom = !bom;
+ conf_name = fst !conf;
+ print_conf = !print_conf;
+ mapping_date = None;
+ }
+end
+
+(** Print the result from the query.
+
+ Each value is given with the associated expression in the configuration,
+ the function is expected to convert the result into string in order to
+ include the content in the output CSV.
+ *)
+let printer : Path.t ImportExpression.T.t * ImportCSV.DataType.t -> string =
+ fun (column, value) ->
+ ignore column;
+ ImportCSV.DataType.to_string value
+
+let bom = "\xEF\xBB\xBF"
+
+let process_table :
+ 'a Db.t ->
+ string ->
+ Csv.out_channel Lazy.t ->
+ Args.arguments ->
+ string array Headers.SheeetMap.t ->
+ Analyse.t ->
+ string array Headers.SheeetMap.t =
+ fun db dirname log_error conf map mapping ->
+ let source = Analyse.table mapping in
+ (* Load all the element in the database *)
+ let file = Filename.concat dirname source.file in
+
+ let file_date = creation_date file in
+ let recent =
+ match (conf.mapping_date, file_date) with
+ | Some r, Some t -> r > t
+ | _, _ -> false
+ in
+
+ match (recent, ImportSQL.Db.check_table_schema db mapping) with
+ | true, Ok true -> (
+ (* If the data extraction is more recent than the sheet, and if the
+ configuration did not changed for this table, we do not reload the
+ table. *)
+ match ImportSQL.Db.query_headers db source with
+ | Ok v ->
+ let text_headers = Array.map v ~f:ImportCSV.DataType.to_string in
+ Headers.SheeetMap.add source text_headers map
+ | Error _ -> map)
+ | _ ->
+ Printf.printf "Loading document %s %!" source.name;
+ let headers_opt =
+ let extension = String.lowercase_ascii (Filename.extension file) in
+ match extension with
+ | ".xlsx" ->
+ Lwt_main.run
+ @@ ImportFileHandler.Xlsx2sql.importInDatable ~dirname
+ ~conf:conf.configuration ~log_error mapping db
+ | ".csv" ->
+ Lwt_main.run
+ @@ ImportFileHandler.Csv2sql.importInDatable ~dirname
+ ~conf:conf.configuration ~log_error mapping db
+ | _ -> raise (ImportErrors.Unknown_source extension)
+ in
+ Helpers.Console.close_cursor ();
+
+ (* Get the headers *)
+ let headers =
+ match headers_opt with
+ | None -> map
+ | Some v ->
+ let text_headers = Array.map v ~f:ImportCSV.DataType.to_string in
+ Headers.SheeetMap.add source text_headers map
+ in
+
+ (* For each external check if the values are loaded *)
+ let dependancies =
+ ImportConf.get_dependancies_for_table conf.configuration source
+ in
+ List.iter dependancies ~f:(fun ext ->
+ match ext.ImportConf.Syntax.allow_missing with
+ | true -> ()
+ | false -> (
+ Printf.printf "Checking dependancies for %s %!"
+ ext.ImportConf.Syntax.target.ImportDataTypes.Table.name;
+ try
+ ignore
+ @@ Db.check_foreign db conf.configuration ext ~f:(fun values ->
+ Helpers.Console.update_cursor ();
+
+ let row =
+ match snd (Array.get values 0) with
+ | ImportCSV.DataType.Integer i -> i
+ | _ -> -1
+ and value = snd (Array.get values 1) in
+ let error =
+ ImportErrors.
+ {
+ source;
+ sheet = source.Table.tab;
+ row;
+ value;
+ target = Some ext.ImportConf.Syntax.target;
+ exn =
+ Failure
+ (Printf.sprintf "Key '%s' not found"
+ (CSV.DataType.to_string value));
+ }
+ in
+
+ ImportErrors.output_error log_error error);
+ Helpers.Console.close_cursor ()
+ with
+ | Sqlite3.Error _ ->
+ (* We can have errors here if we have cycles in the
+ dependencies, but it’s OK at this step.*)
+ ()));
+ headers
+
+let () =
+ let conf = Args.load () in
+
+ (* Global configuration variables *)
+ let dirname = Filename.dirname conf.conf_name in
+ let basename = Filename.basename conf.conf_name in
+ let prefix = Filename.remove_extension basename in
+
+ (* Analyse the configuration *)
+ let process_order =
+ try Analyse.get_process_order conf.configuration with
+ | e ->
+ prerr_endline @@ ImportErrors.repr_error e;
+ exit 1
+ in
+
+ (* With the printconf option, we do not need to open any file *)
+ if conf.print_conf then (
+ let toml = ImportConf.Syntax.repr conf.configuration in
+ Otoml.Printer.to_channel ~collapse_tables:true stdout toml;
+ exit 0);
+
+ let sqlfile = Filename.concat dirname (prefix ^ ".sqlite") in
+ let conf = { conf with mapping_date = creation_date sqlfile } in
+
+ (* Ensure that all the files exists *)
+ List.iter process_order ~f:(fun (mapping : Analyse.t) ->
+ let source = Analyse.table mapping in
+ (* First, check *)
+ if not (Sys.file_exists source.Table.file) then begin
+ ignore @@ exists @@ Filename.concat dirname source.Table.file
+ end);
+
+ (* The configuration is loaded and valid, we create the errors log file *)
+ let log_error = ImportErrors.log ~with_bom:conf.bom prefix dirname in
+
+ (* Now check if we can open the output file. Opening the file "truncate" it.
+ I do not want to delete the file content before I am sure that I can
+ process it. This is why the file is opened after reading the configuration
+ file although we do not need the configuration file for it.
+ *)
+ let out_channel =
+ try
+ Out_channel.open_bin (Filename.concat dirname (String.cat prefix ".csv"))
+ with
+ | Sys_error e ->
+ prerr_endline e;
+ exit 1
+ in
+
+ Fun.protect
+ ~finally:(fun () ->
+ Out_channel.flush out_channel;
+ Out_channel.close_noerr out_channel)
+ (fun () ->
+ (* Add the BOM to the CSV *)
+ if conf.bom then output_string out_channel bom;
+
+ let out_csv =
+ Csv.to_channel ~separator:';' ~excel_tricks:false out_channel
+ in
+
+ (* Create the database *)
+ ignore
+ @@ Db.with_db sqlfile (fun db ->
+ let headers =
+ List.fold_left process_order ~init:Headers.SheeetMap.empty
+ ~f:(process_table db dirname log_error conf)
+ in
+
+ let first_line = Headers.columns conf.configuration headers in
+ Csv.output_record out_csv first_line;
+
+ (* Run the query *)
+ ignore @@ Db.create_view db conf.configuration;
+ Printf.printf "Extracting results %!";
+ match
+ Db.query
+ ~f:(fun v ->
+ let arr = Array.to_seq v |> Seq.map printer |> List.of_seq in
+
+ Helpers.Console.update_cursor ();
+ Csv.output_record out_csv arr)
+ db conf.configuration
+ with
+ | Ok () ->
+ Printf.printf "%c[?25h%c[1D%c[0K\n%!" (char_of_int 27)
+ (char_of_int 27) (char_of_int 27);
+ Ok ()
+ | Error e ->
+ Printf.printf "%c[?25h%c[1D%c[0K\n%!" (char_of_int 27)
+ (char_of_int 27) (char_of_int 27);
+ print_endline @@ ImportErrors.repr_error e;
+
+ Ok ()));
+
+ (* Close the error file *)
+ match Lazy.is_val log_error with
+ | false -> ()
+ | true -> Csv.close_out (Lazy.force log_error)
diff --git a/dune-project b/dune-project
new file mode 100755
index 0000000..ca435e6
--- /dev/null
+++ b/dune-project
@@ -0,0 +1,27 @@
+(lang dune 3.7)
+(using menhir 2.0)
+
+(generate_opam_files true)
+(implicit_transitive_deps true)
+(authors "Sébastien Dailly")
+(maintainers "Sébastien Dailly")
+
+(package
+ (name importer)
+ (depends
+ (ocaml (>= 4.14.0))
+ (sqlite3 (>= 5.1.0))
+ (SZXX (and (>= 2.1.0) (<= 4.0.0)))
+ (csv (>= 2.4))
+ (csv-lwt (>= 2.4))
+ (decoders (>= 1.0.0))
+ (ppx_yojson_conv (>= v0.14.0))
+ (ounit (>= 2.2.6))
+ (otoml (>= 1.0.1))
+ (re (>= 1.10.4))
+ (ppx_deriving (>= 5.2.1))
+ (tsort (>= 2.1.0))
+ (calendar (>= 3.0.0))
+ )
+ (allow_empty)
+)
diff --git a/examples/example_csv.toml b/examples/example_csv.toml
new file mode 100644
index 0000000..1374515
--- /dev/null
+++ b/examples/example_csv.toml
@@ -0,0 +1,34 @@
+version = 1
+
+[source]
+ file = "importer.csv"
+ name = "source"
+
+[externals.target]
+ intern_key = ":source.A"
+ extern_key = ":A"
+ file = "financial.xlsx"
+ allow_missing = false
+
+[externals.a_financial]
+ intern_key = ":target.A"
+ extern_key = ":O"
+ file = "financial.xlsx"
+ allow_missing = false
+
+[sheet]
+ columns = [
+ ":A",
+ "concat(\"-\", :A, :target.E, :B)",
+ ":C",
+ """counter(
+ [:C],
+ [:A])""",
+ ":E",
+ "match(\"\\(..\\)\", :B)",
+ ":D",
+ "counter([:D] ,[:A])"
+ ]
+ filters = []
+ sort = []
+ uniq = []
diff --git a/examples/financial.xlsx b/examples/financial.xlsx
new file mode 100644
index 0000000..6ed8522
--- /dev/null
+++ b/examples/financial.xlsx
Binary files differ
diff --git a/examples/importer.toml b/examples/importer.toml
new file mode 100644
index 0000000..4be6618
--- /dev/null
+++ b/examples/importer.toml
@@ -0,0 +1,44 @@
+version = 1
+
+[source]
+ file = "financial.xlsx"
+ name = "source"
+
+[externals.target]
+ intern_key = ":source.A ^ '-suffix'"
+ extern_key = ":A ^ '-suffix'"
+ file = "financial.xlsx"
+ allow_missing = false
+
+[externals.a_financial]
+ intern_key = ":target.A"
+ extern_key = ":O" # This key is here to generate errors
+ file = "financial.xlsx"
+ allow_missing = false
+
+[sheet]
+ columns = [
+ ":target.A ^ '\\''", # Ensure the quote is escaped before sending to the sql engine
+ "concat('-', :A, :target.E, :B)",
+ ":C",
+ "counter([:C], [:A])",
+ "sum(:F, [:B, :C, :D], [:B])",
+ "int(1) = counter([:C], [:A])",
+ ":E", "match('(..)', :B)",
+ ":D",
+ "counter([:D], [:A])",
+ "year(:N)",
+ # Apply a function on the year
+ """:Q
+ -
+ # We only keep the years after Y2K
+ 2000"""
+
+ ]
+ filters = [
+
+ "1 = counter( [:B], [:A])",
+
+ ]
+ sort = []
+ uniq = []
diff --git a/examples/importer_groupe.toml b/examples/importer_groupe.toml
new file mode 100644
index 0000000..eb2f7e6
--- /dev/null
+++ b/examples/importer_groupe.toml
@@ -0,0 +1,28 @@
+# Cet exemple permet de montrer la combinaison de fonction de groupe avec des
+# filtres.
+
+# On recherche ici le plus grand nombre d’unitées vendues en France.
+# Pour que le résultat soit conforme à l’attendu, il faut que l’application
+# commence par filtrer les lignes qui concernent la France, avant de chercher
+# la ligne contenant le plus grand nombre d’unités.
+
+version = 1
+
+[source]
+ file = "financial.xlsx"
+ name = "source"
+
+[sheet]
+ columns = [
+ ":A",
+ ":C",
+ ":D",
+ ":N",
+
+ ]
+ filters = [
+
+ ":C = 'France'",
+ ":A = max(:A, [:D], [:F])",
+
+ ]
diff --git a/lib/analysers/dependency.ml b/lib/analysers/dependency.ml
new file mode 100644
index 0000000..e81cc49
--- /dev/null
+++ b/lib/analysers/dependency.ml
@@ -0,0 +1,256 @@
+open StdLabels
+module IntSet = ImportContainers.IntSet
+module Syntax = ImportConf.Syntax
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+module Expression = ImportExpression.T
+
+(*
+ Internally, the dependency chain is represented as a graph.
+
+ Each file to process (csv or xlsx) is marked as a node, The link between
+ each node cannot be represented in the graph because each index can imply
+ multiple tables together (for exemple [join(source.A, other.B)] ). So the
+ graph is only used in order to get the import process order, and each
+ information is keeped in a separate table.
+
+*)
+
+type deps = (ImportContainers.Source.t * ImportContainers.Source.t list) list
+
+type key = {
+ name : string;
+ expression : Path.column Expression.t;
+ columns : ImportContainers.IntSet.t Lazy.t;
+}
+
+type t = {
+ table : Table.t;
+ columns : IntSet.t;
+ keys : key list;
+}
+
+let table t = t.table
+let columns t = t.columns
+let keys t = t.keys
+
+type build_map = t ImportContainers.Externals.t
+
+(* The expression can be qualified with full path (when we are in the column
+ definition) or only with a column (in a binding with an externanl table).
+
+ This type is here to extract the correct information, according to the type
+ we are dealing with :
+
+ - [to_mapping] allow to pick the right Set in which we need to add the
+ column pointed by the path (this can be [keys] or [columns]
+ - [of_path] is need to extract the qualified source from any kind of path.
+*)
+type 'a expression_extractor = {
+ to_mapping : t -> Path.column -> t;
+ of_path : 'a -> string option * Path.column;
+}
+
+(** [add_path_in_map f parent path ] Extract the column from element [path] and
+ process the column in the function [f]
+
+ The [path] is abstract, but the function [f.of_path] can extract the needed
+ elements in order to add it in the mapping.
+
+ The function may raise [Unknown_source] if the the path describe an unknown
+ table. *)
+let add_path_in_map :
+ f:'a expression_extractor -> conf:Syntax.t -> 'a -> build_map -> build_map =
+ fun ~f ~conf path map ->
+ let table_source, column = f.of_path path in
+ let table =
+ try ImportConf.get_table_for_name conf table_source with
+ | Not_found -> raise (ImportErrors.Unknown_source (Option.get table_source))
+ in
+
+ ImportContainers.Externals.update map
+ ~key:(ImportContainers.KeyName.from_table table) ~f:(fun v ->
+ let mapping =
+ match v with
+ | None -> raise (ImportErrors.Unknown_source table.name)
+ | Some mapping -> mapping
+ in
+
+ Some (f.to_mapping mapping column))
+
+let add_expression_in_map :
+ f:'a expression_extractor ->
+ conf:Syntax.t ->
+ 'a Expression.t ->
+ build_map ->
+ build_map =
+ fun ~f ~conf expr map ->
+ Expression.fold_values expr ~init:map ~f:(fun map p ->
+ add_path_in_map ~f ~conf p map)
+
+let add_columns_in_map :
+ f:'a expression_extractor ->
+ conf:Syntax.t ->
+ 'a Expression.t list ->
+ build_map ->
+ build_map =
+ fun ~f ~conf columns map ->
+ let columns =
+ List.fold_left columns ~init:map ~f:(fun map expression ->
+ let new_map = add_expression_in_map ~f ~conf expression map in
+ new_map)
+ in
+ columns
+
+(* [add_dependancies ~conf source map path]
+ add the dependancy from the table [source] to another one after analysing the
+ expression and extracting the path contained inside.
+
+ This function is called for each path declared inside the expression. *)
+let add_dependancies : conf:Syntax.t -> Syntax.extern -> deps -> Path.t -> deps
+ =
+ fun ~conf extern graph path ->
+ let source_table = ImportConf.get_table_for_name conf path.Path.alias in
+
+ let source = ImportContainers.Source.from_table source_table in
+ let target = ImportContainers.Source.from_table extern.target in
+
+ match ImportContainers.Source.equal target source with
+ | true -> graph
+ | _ -> (target, [ source ]) :: graph
+
+let add_external_in_map :
+ conf:Syntax.t -> Syntax.extern -> build_map * deps -> build_map * deps =
+ fun ~conf extern (map, graph) ->
+ let dest = ImportContainers.KeyName.from_table extern.target in
+ (* Pre-check that every source is already declared in the configuration. *)
+ let _ =
+ Expression.fold_values extern.intern_key ~init:() ~f:(fun () path ->
+ try
+ let _ = ImportConf.get_table_for_name conf path.Path.alias in
+ ()
+ with
+ | Not_found -> (
+ match path.alias with
+ | Some table -> raise (ImportErrors.Unknown_source table)
+ | None ->
+ (* This is very unlikely. This error would be raised if we have
+ no source for this import *)
+ let root = conf.source in
+ raise (ImportErrors.Unknown_source root.name)))
+ in
+
+ (* Create the new key with all the expression and all the columns inside it *)
+ let new_key =
+ {
+ name = extern.target.Table.name;
+ expression = extern.extern_key;
+ columns =
+ lazy
+ (Expression.fold_values extern.extern_key
+ ~f:(fun acc k -> ImportContainers.IntSet.add k acc)
+ ~init:ImportContainers.IntSet.empty);
+ }
+ in
+ let build_map =
+ ImportContainers.Externals.update map ~key:dest ~f:(function
+ | None ->
+ (* Create the entry for the external if it does not exists *)
+ Some
+ {
+ table = extern.target;
+ columns = IntSet.empty;
+ keys = [ new_key ];
+ }
+ | Some mapping ->
+ (* Or update the existing one with the key we just created *)
+ Some { mapping with keys = new_key :: mapping.keys })
+ in
+ let graph =
+ Expression.fold_values extern.intern_key ~init:graph
+ ~f:(add_dependancies ~conf extern)
+ in
+ let build_map =
+ add_expression_in_map extern.intern_key build_map ~conf
+ ~f:
+ {
+ of_path =
+ (fun Path.{ alias; column } ->
+ let table = ImportConf.get_table_for_name conf alias in
+ (Some table.name, column));
+ to_mapping =
+ (fun mapping column ->
+ { mapping with columns = IntSet.add column mapping.columns });
+ }
+ in
+ (build_map, graph)
+
+let mapper =
+ {
+ to_mapping =
+ (fun mapping column ->
+ { mapping with columns = IntSet.add column mapping.columns });
+ of_path = (fun ({ alias; column } : Path.t) -> (alias, column));
+ }
+
+let get_mapping : Syntax.t -> build_map * deps =
+ fun conf ->
+ let root = ImportContainers.Source.from_table (ImportConf.root_table conf)
+ and root' =
+ ImportContainers.KeyName.from_table (ImportConf.root_table conf)
+ in
+ let graph = [ (root, []) ] in
+
+ (* For each external declared in the configuration file, add the columns to
+ query *)
+ let init =
+ ( ImportContainers.Externals.singleton root'
+ {
+ table = ImportConf.root_table conf;
+ columns = IntSet.empty;
+ keys = [];
+ },
+ graph )
+ in
+ let map, graph =
+ List.fold_left conf.externals ~init ~f:(fun map extern ->
+ add_external_in_map ~conf extern map)
+ in
+
+ (* Now we don’t bother anymore with the graph and it’s dependency, we just
+ collect all the columns in the differents expressions *)
+ let map =
+ map
+ |> add_columns_in_map ~conf ~f:mapper conf.columns
+ |> add_columns_in_map ~conf ~f:mapper conf.sort
+ |> add_columns_in_map ~conf ~f:mapper conf.filters
+ |> add_columns_in_map ~conf ~f:mapper conf.uniq
+ in
+ (map, graph)
+
+let get_process_order : Syntax.t -> t list =
+ fun map ->
+ let map, graph = get_mapping map in
+
+ match Tsort.sort graph with
+ | Tsort.ErrorCycle l ->
+ let name_of_key k =
+ ImportContainers.(Externals.find (Source.name k) map).table.name
+ in
+ raise (ImportErrors.Cycle (List.map ~f:name_of_key l))
+ | Sorted elements ->
+ (* It’s OK, we know there is no cycles now, we can extract the files to
+ load from this list.
+ *)
+ List.filter_map elements ~f:(fun v ->
+ ImportContainers.Externals.find_opt
+ (ImportContainers.Source.name v)
+ map)
+ (* This list can still have duplicated values, and we have to remove them
+ still keeping the order.
+ *)
+ |> List.fold_left ~init:[] ~f:(fun acc element ->
+ (* Prevent the same file to beeing loaded twice *)
+ match List.mem element ~set:acc with
+ | true -> acc
+ | false -> element :: acc)
diff --git a/lib/analysers/dependency.mli b/lib/analysers/dependency.mli
new file mode 100644
index 0000000..c89522a
--- /dev/null
+++ b/lib/analysers/dependency.mli
@@ -0,0 +1,40 @@
+type t
+
+val get_process_order : ImportConf.Syntax.t -> t list
+(** Extract the file list to process, following the identified dependancies.
+ Try to load first the document which does not required another spreadsheet,
+ and keep going in the topological order
+
+ Raise [Unknown_source file] if a source is referenced but is not declared.
+ The order matter : the exception will be raised in a source is referenced
+ before the declaration (when chaining differents externals) *)
+
+val table : t -> ImportDataTypes.Table.t
+(** Get the table to proceed. *)
+
+val columns : t -> ImportContainers.IntSet.t
+(** A set of columns loaded in this table. Thoses columns may not need
+ reported in the final export.
+
+ Each column is identified by is index in the Excel document.
+
+ This set does not include the columns used in the keys. They can be
+ fetched with the [keys] function *)
+
+type key = {
+ name : string; (** This is the name of the target table we are pointed to *)
+ expression : ImportDataTypes.Path.column ImportExpression.T.t;
+ (** The expression used as key *)
+ columns : ImportContainers.IntSet.t Lazy.t;
+ (** The list of columns used in the key. All the columns are referenced
+ in the expression. We can have many columns used inside a single
+ key when a function is used (for example for joining multiple
+ columns into a single key) *)
+}
+(** This type describe the join key in a table. The name is the refering table
+ using this key (the key name in the datable is key_"name" ), and the
+ expression describe how to build the key. *)
+
+val keys : t -> key list
+(** [keys] is the list of columns pointed by another one. They are
+ considered as join key between the diffrent tables. *)
diff --git a/lib/analysers/dune b/lib/analysers/dune
new file mode 100755
index 0000000..1bbc30f
--- /dev/null
+++ b/lib/analysers/dune
@@ -0,0 +1,12 @@
+(library
+ (name importAnalyser)
+ (libraries
+ importConf
+ importContainers
+ importCSV
+ importDataTypes
+ importExpression
+ importErrors
+ tsort
+ )
+)
diff --git a/lib/analysers/headers.ml b/lib/analysers/headers.ml
new file mode 100644
index 0000000..916dfee
--- /dev/null
+++ b/lib/analysers/headers.ml
@@ -0,0 +1,55 @@
+open StdLabels
+module I = ImportConf
+module E = ImportExpression.T
+module Syntax = ImportConf.Syntax
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+
+module SheeetMap = Map.Make (struct
+ type t = Table.t
+
+ (** We are sure we can’t have the same name for two different table. *)
+ let compare v1 v2 = String.compare (Table.name v1) (Table.name v2)
+end)
+
+type content = string array
+
+type t = content SheeetMap.t
+(** The map associate a line of headers for each table.
+
+ The header are always in string. *)
+
+(** Get the headers. The function has to be called after reading each document,
+ and will reformat the first line with the values from the cell. The
+ functions will not be evaluated (instead they will be displayed "as is".
+
+ When there is no value for this path, return empty string.
+ *)
+let columns : Syntax.t -> t -> string list =
+ fun conf t ->
+ (* We build here a custom printer which search in the array for the column
+ name.
+
+ This function will be given as argument in the expression printer. *)
+ let f : Path.t -> Buffer.t -> unit =
+ fun path b ->
+ let source = I.get_table_for_name conf path.alias in
+
+ match SheeetMap.find_opt source t with
+ | None -> ()
+ | Some arr -> (
+ try Buffer.add_string b (Array.get arr (path.column - 1)) with
+ | _ ->
+ prerr_endline
+ @@ Printf.sprintf "No header found for :%s.%s"
+ (Option.value ~default:(I.root_table conf).Table.name
+ path.alias)
+ (ImportCSV.Csv.column_to_string path.column))
+ in
+
+ List.map conf.Syntax.columns ~f:(fun column ->
+ let b = Buffer.create 4 in
+
+ ImportExpression.Headers.headers_of_expression b f column;
+
+ Buffer.contents b)
diff --git a/lib/analysers/headers.mli b/lib/analysers/headers.mli
new file mode 100644
index 0000000..03e384b
--- /dev/null
+++ b/lib/analysers/headers.mli
@@ -0,0 +1,11 @@
+module SheeetMap : Map.S with type key = ImportDataTypes.Table.t
+
+type t = string array SheeetMap.t
+
+val columns : ImportConf.Syntax.t -> t -> string list
+(** Get the headers. The function has to be called after reading each document,
+ and will reformat the first line with the values from the cell. The
+ functions will not be evaluated (instead they will be displayed "as is".
+
+ When there is no value for this path, return empty string.
+ *)
diff --git a/lib/analysers/query.ml b/lib/analysers/query.ml
new file mode 100644
index 0000000..7a6dd2a
--- /dev/null
+++ b/lib/analysers/query.ml
@@ -0,0 +1,445 @@
+open StdLabels
+module Expression = ImportExpression
+module Q = Expression.Query
+module Syntax = ImportConf.Syntax
+module Table = ImportConf.Table
+module Path = ImportConf.Path
+
+let truncate buffer n = Buffer.truncate buffer (Buffer.length buffer - n)
+
+(** The module allow to create fragment in the query which keep together the
+ binderd parameters and the text of the query.contents.
+
+ This is used a lot in order to create the CTE, where you need the create
+ fragment used both in the main request and partially in the CTE itself.
+
+ The content is mutable and all the functions are returning [unit]. *)
+module Chunk = struct
+ type t = {
+ b : Buffer.t;
+ parameters : ImportCSV.DataType.t Queue.t;
+ }
+
+ let create : unit -> t =
+ fun () -> { b = Buffer.create 16; parameters = Queue.create () }
+
+ let create' : Buffer.t -> ImportCSV.DataType.t Queue.t -> t =
+ fun b parameters -> { b; parameters }
+
+ (* Append the element from [tail] at the end of [head]
+
+ Tail is destroyed during the operation.
+ *)
+ let append : head:t -> tail:t -> unit =
+ fun ~head ~tail ->
+ match Buffer.length tail.b with
+ | 0 -> ()
+ | _ ->
+ Buffer.add_buffer head.b tail.b;
+ Queue.transfer tail.parameters head.parameters;
+ ()
+
+ (** Add a litteral string in the sequence *)
+ let add_string : t -> string -> unit = fun t v -> Buffer.add_string t.b v
+
+ let copy : t -> t =
+ fun t ->
+ let b = Buffer.create 16 and parameters = Queue.copy t.parameters in
+ Buffer.add_buffer b t.b;
+ { b; parameters }
+
+ let add_parameters : t -> ImportCSV.DataType.t Seq.t -> unit =
+ fun t p -> Queue.add_seq t.parameters p
+end
+
+let prepare_key : f:(Format.formatter -> unit) -> Format.formatter -> unit =
+ fun ~f formatter -> Format.fprintf formatter "rtrim(upper(%t))" f
+
+(* Collect all the tables pointed by the expression. *)
+let pointed_tables : Syntax.t -> 'a Expression.T.t -> (Table.t * string) list =
+ fun conf expression ->
+ Expression.T.fold_values expression ~init:[] ~f:(fun acc path ->
+ let table = ImportConf.get_table_for_name conf path.Path.alias in
+ let table_name = Table.name table in
+ (table, table_name) :: acc)
+ |> List.sort_uniq ~cmp:Stdlib.compare
+
+(** Represent a column in a safe way in a query *)
+let print_column : Table.t -> string -> string =
+ fun table column ->
+ String.concat ~sep:"" [ "'"; table.Table.name; "'.'"; column; "'" ]
+
+let create_table : Dependency.t -> string =
+ fun mapping ->
+ let b = Buffer.create 64 in
+
+ Buffer.add_string b "CREATE TABLE '";
+ Buffer.add_string b (Table.name (Dependency.table mapping));
+ Buffer.add_string b "' (id INTEGER PRIMARY KEY";
+
+ List.iter (Dependency.keys mapping) ~f:(fun { Dependency.name; _ } ->
+ Buffer.add_string b ",'key_";
+ Buffer.add_string b name;
+ Buffer.add_string b "'");
+
+ ImportContainers.IntSet.iter (Dependency.columns mapping) ~f:(fun i ->
+ Buffer.add_string b ",'col_";
+ Buffer.add_string b (string_of_int i);
+ Buffer.add_string b "'");
+ Buffer.add_string b ")";
+
+ Buffer.contents b
+
+let show_path : conf:Syntax.t -> Format.formatter -> Path.t -> unit =
+ fun ~conf buffer { alias; column } ->
+ let table = ImportConf.get_table_for_name conf alias in
+ let table_name = table.Table.name in
+ Format.fprintf buffer "'%s'.col_%d" table_name column
+
+(** Extract the informations from the dependancies. We get two informations here :
+
+ - the join query in order to load the data from the external column
+ - the column corresponding to the key in order to identify the missing
+ links later.
+ *)
+let query_of_external :
+ conf:Syntax.t -> join_buffer:Chunk.t -> Syntax.extern -> unit =
+ fun ~conf ~join_buffer external_ ->
+ let extern_table = Table.name external_.target in
+
+ let formatter = Format.formatter_of_buffer join_buffer.b in
+ Format.fprintf formatter "\nLEFT JOIN '%s' AS '%s' ON %t = %s" extern_table
+ external_.target.name
+ (prepare_key ~f:(fun f ->
+ let q =
+ Q.query_of_expression Q.BindParam f (show_path ~conf)
+ external_.intern_key
+ in
+
+ Chunk.add_parameters join_buffer (Queue.to_seq q)))
+ (print_column external_.Syntax.target
+ ("key_" ^ external_.Syntax.target.name));
+
+ Format.pp_print_flush formatter ()
+
+(** Create the from part of the query, adding all the reuired externals.
+
+ SQLite is able to optimize the query and do not load the table not used in
+ the select clause. *)
+let create_from_chunck : Syntax.t -> Chunk.t -> unit =
+ fun conf c ->
+ Chunk.add_string c "\nFROM '";
+ Chunk.add_string c (Table.name conf.source);
+ Chunk.add_string c "' AS '";
+ Chunk.add_string c conf.source.name;
+ Chunk.add_string c "'";
+
+ (* Add the externals in the query *)
+ List.iter conf.externals ~f:(query_of_external ~conf ~join_buffer:c)
+
+(** Build a CTE query in order to use any group function inside the query.
+ Return the binded parameters used in the expression. The buffer given in
+ argument is also modified during the construction.
+
+ If filters is not None, the clauses are added to the CTE. *)
+let build_cte :
+ Syntax.t ->
+ expression:'a Expression.T.t ->
+ filters:Chunk.t option ->
+ Chunk.t =
+ fun conf ~expression ~filters ->
+ (* The binded parameters queue will be used later in the full query *)
+ let cte_chunk = Chunk.create () in
+
+ Chunk.add_string cte_chunk "WITH cte AS (SELECT ";
+ Chunk.add_string cte_chunk conf.source.name;
+ Chunk.add_string cte_chunk ".id, ";
+
+ let formatter = Format.formatter_of_buffer cte_chunk.b in
+
+ let p =
+ Q.query_of_expression Q.BindParam formatter (show_path ~conf) expression
+ in
+ Format.pp_print_flush formatter ();
+ Chunk.add_parameters cte_chunk (Queue.to_seq p);
+ (* The name is hardcoded here, and used in [Expression.Filters.window] *)
+ Chunk.add_string cte_chunk " AS group0";
+
+ let () = create_from_chunck conf cte_chunk in
+ let () =
+ match filters with
+ | None -> ()
+ | Some filters_chunk ->
+ Chunk.append ~head:cte_chunk ~tail:(Chunk.copy filters_chunk)
+ in
+
+ Chunk.add_string cte_chunk ")\n";
+ cte_chunk
+
+type filter_evaluation = {
+ content : Buffer.t;
+ parameters : ImportCSV.DataType.t Seq.t;
+ cte : (string * Chunk.t) option;
+}
+(** Build the filters to apply in the query. We make the difference here
+ between the predicates to apply directly in the query, and the filters
+ associated with a group, which are required to be transformed into a CTE
+in SQL, and are evaluated before. *)
+
+(** Evaluate the filters on the query *)
+let eval_filters : Syntax.t -> filter_evaluation =
+ fun conf ->
+ match conf.filters with
+ | [] ->
+ let empty_buffer = Buffer.create 0 in
+ { content = empty_buffer; parameters = Seq.empty; cte = None }
+ | filters -> (
+ (* Create a new queue in order to accumulate all the parameters to bind.
+ This filter will be given to both the CTE if any, or reused in the
+ main query when there is no CTE.
+ *)
+ let chunk_filters = Chunk.create () in
+ Chunk.add_string chunk_filters "\nWHERE ";
+
+ let group = Chunk.create () in
+
+ let with_cte, with_exr =
+ List.fold_left filters ~init:(None, false)
+ ~f:(fun (with_cte, with_exr) column ->
+ (* The function will return an option in second position which is
+ None when no Group function where found, and Some Expression
+ otherwise *)
+ let b = Buffer.create 16 in
+
+ let formatter = Format.formatter_of_buffer b in
+ let queue, group_found =
+ Expression.Filters.query_of_expression Q.BindParam formatter
+ (show_path ~conf) column
+ in
+ Format.pp_print_flush formatter ();
+ let clause = Chunk.create' b queue in
+
+ match (group_found, with_cte) with
+ | None, _ ->
+ Chunk.append ~head:chunk_filters ~tail:clause;
+ Chunk.add_string chunk_filters "\nAND ";
+ (with_cte, true)
+ | (Some _ as group'), None ->
+ (* We have a group here, we do not add it into the
+ filter_buffer right now.
+
+ This can occur only once, the second one will raise
+ an error. *)
+ Chunk.append ~head:group ~tail:clause;
+ (group', with_exr)
+ | Some _, Some _ -> raise ImportErrors.MisplacedWindow)
+ in
+
+ match with_cte with
+ | None ->
+ let content = chunk_filters.b in
+ truncate content 5;
+ {
+ (* There is no group clause in the query *)
+ content;
+ parameters = Queue.to_seq chunk_filters.parameters;
+ cte = None;
+ }
+ | Some expression ->
+ let filters =
+ if with_exr then (
+ (* If we have additionnals filters from the group clause, we
+ have to report them in the CTE instead of the main query. *)
+ let c' = Chunk.copy chunk_filters in
+ truncate c'.b 5;
+ Some c')
+ else None
+ in
+
+ (* Create the common expression table *)
+ let cte_parameters = build_cte conf ~expression ~filters in
+ Chunk.append ~head:chunk_filters ~tail:group;
+
+ {
+ content = chunk_filters.b;
+ parameters = Queue.to_seq chunk_filters.parameters;
+ (* The name is hardcoded here, and used in
+ [Expression.Filters.window] *)
+ cte = Some ("cte", cte_parameters);
+ })
+
+type query = {
+ q : string;
+ parameters : ImportCSV.DataType.t Seq.t;
+}
+
+(** Build the query and return also the mapping in order to identify each
+ external links between files.
+
+ The select query will name each column with an alias, and the map allow to
+ find which source is pointed by this alias. *)
+let select : Syntax.t -> query * Path.t ImportExpression.T.t array =
+ fun conf ->
+ (* If the filters contains a group expression, we need to transform this into
+ a CTE, which have to be evaluated before the main query. That’s why we are
+ evaluating the filters right now.*)
+ let filters = eval_filters conf in
+ let b = Buffer.create 256 in
+ let parameters = Queue.create () in
+
+ Option.iter
+ (fun (_, (cte : Chunk.t)) ->
+ Buffer.add_buffer b cte.b;
+ Queue.add_seq parameters (Queue.to_seq cte.parameters))
+ filters.cte;
+
+ (* For each column in the configuration file, add the corresponding element
+ in the query.
+
+ The Sqlite driver return the elements in an array, we create an array to
+ in order to manage the elements together.
+ *)
+ let headers = Array.make (List.length conf.columns) (Obj.magic None) in
+
+ let columns = List.to_seq conf.columns |> Seq.mapi (fun i c -> (i, c)) in
+ let formatter = Format.formatter_of_buffer b in
+ let () =
+ Format.fprintf formatter "SELECT %a"
+ (Format.pp_print_seq
+ ~pp_sep:(fun f () -> Format.fprintf f ",\n")
+ (fun formatter (i, column) ->
+ Array.set headers i column;
+ let p =
+ Q.query_of_expression Q.BindParam formatter (show_path ~conf)
+ column
+ in
+ Queue.transfer p parameters;
+ Format.fprintf formatter " AS result_%d" i))
+ columns
+ in
+ Format.pp_print_flush formatter ();
+
+ let () = create_from_chunck conf (Chunk.create' b parameters) in
+
+ (* If the query has a CTE, link it as well. We use an INNER JOIN here because
+ we want to be sure to get all the rows fetched by the CTE
+ *)
+ let () =
+ match filters.cte with
+ | None -> ()
+ | Some (name, _) ->
+ Buffer.add_string b "\nINNER JOIN '";
+ Buffer.add_string b name;
+ Buffer.add_string b "' ON ";
+ Buffer.add_string b name;
+ Buffer.add_string b ".id = ";
+ Buffer.add_string b conf.source.name;
+ Buffer.add_string b ".id"
+ in
+
+ Buffer.add_buffer b filters.content;
+ Queue.add_seq parameters filters.parameters;
+
+ let formatter = Format.formatter_of_buffer b in
+ (match conf.Syntax.uniq with
+ | [] -> ()
+ | uniq ->
+ Format.fprintf formatter "\nGROUP BY %a"
+ (Format.pp_print_list
+ ~pp_sep:(fun f () -> Format.fprintf f ", ")
+ (fun formatter column ->
+ let seq =
+ Q.query_of_expression Q.BindParam formatter (show_path ~conf)
+ column
+ in
+ Queue.transfer seq parameters))
+ uniq);
+ (match conf.Syntax.sort with
+ | [] -> ()
+ | sort ->
+ Format.fprintf formatter "\nORDER BY %a"
+ (Format.pp_print_list
+ ~pp_sep:(fun f () -> Format.fprintf f ", ")
+ (fun formatter column ->
+ let seq =
+ Q.query_of_expression Q.BindParam formatter (show_path ~conf)
+ column
+ in
+ Queue.transfer seq parameters))
+ sort);
+ Format.pp_print_flush formatter ();
+
+ ({ q = Buffer.contents b; parameters = Queue.to_seq parameters }, headers)
+
+let check_external : Syntax.t -> Syntax.extern -> query =
+ fun conf external_ ->
+ let extern_table = Table.name external_.target in
+
+ let parameters = Queue.create () in
+ let internal_key_buffer = Buffer.create 16 in
+ let formatter = Format.formatter_of_buffer internal_key_buffer in
+ let internal_key_seq =
+ Q.query_of_expression Q.BindParam formatter (show_path ~conf)
+ external_.intern_key
+ in
+ Format.pp_print_flush formatter ();
+
+ let external_key_buffer = Buffer.create 16 in
+ let pointed_tables = pointed_tables conf external_.intern_key in
+ Buffer.add_string external_key_buffer
+ (print_column external_.Syntax.target
+ ("key_" ^ external_.Syntax.target.name));
+
+ (* We do a copy before the transfert because the Queue is reused later in the
+ query *)
+ Queue.transfer (Queue.copy internal_key_seq) parameters;
+
+ let join_content = Buffer.contents external_key_buffer in
+ let inner_content = Buffer.contents internal_key_buffer in
+ let b = Buffer.create 256 in
+ let formatter = Format.formatter_of_buffer b in
+ let () =
+ Format.fprintf formatter
+ "SELECT %a%s FROM%a LEFT JOIN '%s' AS '%s' ON %t = %s WHERE %s IS NULL \
+ AND %s IS NOT NULL AND %s <> ''"
+ (fun formatter -> function
+ | [ (table, _name) ] ->
+ Format.fprintf formatter "%s, " (print_column table "id")
+ | _ -> Format.fprintf formatter "-1, ")
+ pointed_tables (* *)
+ inner_content (* *)
+ (Format.pp_print_list
+ ~pp_sep:(fun f () -> Format.pp_print_text f ", ")
+ (fun formatter (table, name) ->
+ Format.fprintf formatter "\n'%s' AS '%s'" name table.Table.name))
+ pointed_tables (* *)
+ extern_table (* *)
+ external_.target.name
+ (prepare_key ~f:(fun b ->
+ Format.pp_print_text b (Buffer.contents internal_key_buffer)))
+ join_content (* *)
+ join_content (* *)
+ inner_content (* *)
+ inner_content
+ in
+
+ Format.pp_print_flush formatter ();
+
+ { q = Buffer.contents b; parameters = Queue.to_seq parameters }
+
+let build_key_insert : Buffer.t -> Dependency.key -> unit =
+ fun buffer { Dependency.expression; _ } ->
+ let show_column : Format.formatter -> Path.column -> unit =
+ fun formatter column -> Format.fprintf formatter ":col_%d" column
+ in
+
+ let formatter = Format.formatter_of_buffer buffer in
+
+ let () =
+ prepare_key formatter ~f:(fun formatter ->
+ Q.query_of_expression Q.NoParam formatter show_column expression)
+ in
+
+ Format.pp_print_flush formatter ();
+
+ ()
diff --git a/lib/analysers/query.mli b/lib/analysers/query.mli
new file mode 100644
index 0000000..14d2807
--- /dev/null
+++ b/lib/analysers/query.mli
@@ -0,0 +1,27 @@
+module Syntax = ImportConf.Syntax
+
+val create_table : Dependency.t -> string
+
+type query = {
+ q : string; (** The query to execute *)
+ parameters : ImportCSV.DataType.t Seq.t;
+}
+(** This type represent a query to execute.
+ [q] is the template to run, and shall be run with all the binded parameters.
+ *)
+
+val select :
+ Syntax.t -> query * ImportDataTypes.Path.t ImportExpression.T.t array
+
+val check_external : Syntax.t -> Syntax.extern -> query
+(** Create a query which select all the missing key in an external *)
+
+val build_key_insert : Buffer.t -> Dependency.key -> unit
+(* Build the fragment insert query.
+
+ This use prepare statement and apply the function at the insert time, in
+ order to have the key already in the expected format in the database.
+
+ The column are name :col_XX where XX is the index of the column in the
+ datasheet (starting from 1)
+*)
diff --git a/lib/configuration/dune b/lib/configuration/dune
new file mode 100755
index 0000000..27d31a6
--- /dev/null
+++ b/lib/configuration/dune
@@ -0,0 +1,29 @@
+(library
+ (name importConf)
+ (libraries
+ decoders
+ otoml
+ menhirLib
+ importCSV
+ yojson
+ re
+ helpers
+ importDataTypes
+ importExpression
+ importErrors
+ )
+
+(preprocess (pps ppx_yojson_conv ppx_deriving.ord))
+)
+
+(rule
+ (targets expression_parser_messages.ml)
+ (deps expression_parser.messages expression_parser.mly)
+ (action (with-stdout-to %{targets} (run menhir --compile-errors %{deps}))))
+
+(menhir
+ (modules expression_parser)
+ (flags --table)
+)
+
+(ocamllex expression_lexer)
diff --git a/lib/configuration/expression_lexer.mll b/lib/configuration/expression_lexer.mll
new file mode 100644
index 0000000..cbfc8dc
--- /dev/null
+++ b/lib/configuration/expression_lexer.mll
@@ -0,0 +1,91 @@
+{
+ open Expression_parser
+ module Expression = ImportExpression.T
+
+ exception UnclosedQuote of { content: string ; line : int}
+}
+
+
+let spaces = [ ' ' '\t' ]
+let letters = [^ '"' '\'' '(' ')' '[' ']' ':' '.' ',' '^' ' ' '\t' '\n' '\r' ]
+let digit = [ '0'-'9' ]
+let eol = [ '\r' '\n' ]
+
+let escaped = [ '\'' '\\']
+
+rule token = parse
+| eol { Lexing.new_line lexbuf; token lexbuf }
+| spaces { token lexbuf }
+| '\'' {
+ try read_quoted_string (Buffer.create 17) lexbuf
+ with Failure _ ->
+ let line = lexbuf.Lexing.lex_curr_p.pos_lnum
+ and content = Bytes.to_string lexbuf.Lexing.lex_buffer in
+ raise (UnclosedQuote {line; content})
+}
+| '"' { read_dquoted_string (Buffer.create 17) lexbuf }
+| '#' { skip_comment lexbuf }
+| '(' { L_PAREN }
+| ')' { R_PAREN }
+| '[' { L_BRACKET }
+| ']' { R_BRACKET }
+| ':' { COLUMN }
+| '.' { DOT }
+| ',' { COMA }
+| '^' { CONCAT_OPERATOR }
+| '+' { BINARY_OPERATOR (Expression.Add) }
+| '-' { BINARY_OPERATOR (Expression.Minus) }
+| '/' { BINARY_OPERATOR (Expression.Division) }
+| "and" { BOOL_OPERATOR (Expression.And) }
+| "or" { BOOL_OPERATOR (Expression.Or) }
+| '<' { INEQUALITY_OPERATOR (Expression.LT) }
+| '>' { INEQUALITY_OPERATOR (Expression.GT) }
+| "<>" { EQUALITY_OPERATOR (Expression.Different) }
+| '=' { EQUALITY_OPERATOR (Expression.Equal) }
+| digit+ as l { INTEGER l}
+| '-' digit+ as l { INTEGER l}
+| letters+ as l { IDENT l}
+| eof { EOF }
+
+and skip_comment = parse
+ | [^ '\r' '\n' ]
+ { skip_comment lexbuf }
+ | eol
+ { token lexbuf }
+
+(* Read the content until we got another one quote *)
+and read_quoted_string buf = parse
+ | [^ '\'' '\\' ]+
+ { Buffer.add_string buf (Lexing.lexeme lexbuf);
+ read_quoted_string buf lexbuf
+ }
+ | "\\\'"
+ { Buffer.add_char buf '\'';
+ read_quoted_string buf lexbuf
+ }
+ | '\\'
+ { Buffer.add_char buf '\\';
+ read_quoted_string buf lexbuf
+ }
+ | '\''
+ { LITERAL (Buffer.contents buf)
+ }
+
+(* Read the content until we got another one quote *)
+and read_dquoted_string buf = parse
+ | [^ '"' '\\' ]+
+ { Buffer.add_string buf (Lexing.lexeme lexbuf);
+ read_dquoted_string buf lexbuf
+ }
+ | "\\\""
+ { Buffer.add_char buf '"';
+ read_dquoted_string buf lexbuf
+ }
+ | '\\'
+ { Buffer.add_char buf '\\';
+ read_dquoted_string buf lexbuf
+ }
+ | '"'
+ {
+ LITERAL (Buffer.contents buf)
+ }
diff --git a/lib/configuration/expression_parser.messages b/lib/configuration/expression_parser.messages
new file mode 100644
index 0000000..ff7e757
--- /dev/null
+++ b/lib/configuration/expression_parser.messages
@@ -0,0 +1,123 @@
+column_expr: R_PAREN
+##
+
+Invalid expression
+
+path_expr: IDENT R_PAREN
+column_expr: IDENT R_PAREN
+column_expr: IDENT L_PAREN IDENT R_PAREN
+path_expr: IDENT L_PAREN IDENT R_PAREN
+##
+
+Misplaced function. Did you forgot to quote the text ?
+
+column_expr: IDENT L_PAREN EOF
+path_expr: IDENT L_PAREN EOF
+##
+
+Uncomplete expression
+
+column_expr: COLUMN R_PAREN
+path_expr: COLUMN R_PAREN
+##
+
+The path is missing.
+
+column_expr: LITERAL CONCAT_OPERATOR LITERAL L_PAREN
+path_expr: LITERAL CONCAT_OPERATOR LITERAL L_PAREN
+path_expr: LITERAL L_PAREN
+column_expr: LITERAL CONCAT_OPERATOR LITERAL BINARY_OPERATOR LITERAL L_PAREN
+path_expr: LITERAL CONCAT_OPERATOR LITERAL BINARY_OPERATOR LITERAL L_PAREN
+column_expr: LITERAL CONCAT_OPERATOR LITERAL CONCAT_OPERATOR LITERAL L_PAREN
+path_expr: LITERAL CONCAT_OPERATOR LITERAL CONCAT_OPERATOR LITERAL L_PAREN
+column_expr: IDENT L_PAREN L_PAREN LITERAL L_PAREN
+path_expr: IDENT L_PAREN LITERAL L_PAREN
+##
+
+A text is given where it was expected a function.
+
+column_expr: LITERAL CONCAT_OPERATOR IDENT L_PAREN L_BRACKET R_BRACKET COMA R_PAREN
+column_expr: IDENT L_PAREN L_BRACKET R_BRACKET COMA R_PAREN
+path_expr: IDENT L_PAREN L_BRACKET R_BRACKET COMA R_PAREN
+path_expr: LITERAL BINARY_OPERATOR IDENT L_PAREN L_BRACKET R_BRACKET COMA R_PAREN
+##
+## Ends in an error in state: 61.
+##
+## separated_nonempty_list(COMA,expr_(path_,COMA)) -> expr_(path_,COMA) COMA . separated_nonempty_list(COMA,expr_(path_,COMA)) [ R_PAREN ]
+##
+## The known suffix of the stack is as follows:
+## expr_(path_,COMA) COMA
+##
+
+Uncomplete expression
+
+column_expr: IDENT L_PAREN LITERAL COMA R_PAREN
+path_expr: IDENT L_PAREN LITERAL COMA R_PAREN
+
+Misplaced coma
+
+column_expr: IDENT L_PAREN LITERAL EOF
+column_expr: LITERAL CONCAT_OPERATOR IDENT L_PAREN EOF
+path_expr: LITERAL CONCAT_OPERATOR IDENT L_PAREN EOF
+path_expr: IDENT L_PAREN L_PAREN LITERAL EOF
+##
+
+Uncomplete expression. Did you forgot a ')' ?
+
+
+column_expr: LITERAL R_PAREN
+path_expr: LITERAL BINARY_OPERATOR LITERAL R_PAREN
+##
+
+Invalid expression
+
+path_expr: COLUMN IDENT L_PAREN
+##
+# Also apply to :
+# path_expr: COLUMN IDENT COLUMN
+
+Misplaced path
+
+path_expr: COLUMN IDENT DOT R_PAREN
+##
+
+Incomplete path: the table is missing
+
+column_expr: INTEGER BINARY_OPERATOR INTEGER R_PAREN
+##
+
+Unbalanced parens. Did you wanted to write ')' instead of '(' ?
+
+
+path_expr: IDENT L_PAREN L_BRACKET LITERAL R_PAREN
+
+Unbalanced brackets. Did you wanted to write ']' instead of ')' ?
+
+column_expr: IDENT L_PAREN LITERAL CONCAT_OPERATOR R_PAREN
+path_expr: IDENT L_PAREN LITERAL CONCAT_OPERATOR R_PAREN
+column_expr: LITERAL CONCAT_OPERATOR R_PAREN
+path_expr: LITERAL CONCAT_OPERATOR R_PAREN
+column_expr: IDENT L_PAREN LITERAL BINARY_OPERATOR R_PAREN
+path_expr: IDENT L_PAREN LITERAL BINARY_OPERATOR R_PAREN
+column_expr: LITERAL BINARY_OPERATOR R_PAREN
+path_expr: LITERAL BINARY_OPERATOR R_PAREN
+column_expr: INTEGER EQUALITY_OPERATOR R_PAREN
+path_expr: INTEGER EQUALITY_OPERATOR R_PAREN
+column_expr: INTEGER INEQUALITY_OPERATOR R_PAREN
+path_expr: INTEGER INEQUALITY_OPERATOR R_PAREN
+column_expr: INTEGER EQUALITY_OPERATOR INTEGER INEQUALITY_OPERATOR R_PAREN
+path_expr: INTEGER EQUALITY_OPERATOR INTEGER INEQUALITY_OPERATOR R_PAREN
+column_expr: INTEGER EQUALITY_OPERATOR INTEGER EQUALITY_OPERATOR R_PAREN
+path_expr: INTEGER EQUALITY_OPERATOR INTEGER EQUALITY_OPERATOR R_PAREN
+
+The operator expect two arguments. Only one is given
+
+column_expr: IDENT L_PAREN L_BRACKET R_PAREN
+path_expr: IDENT L_PAREN L_BRACKET R_PAREN
+column_expr: IDENT L_PAREN L_BRACKET LITERAL R_PAREN
+column_expr: LITERAL CONCAT_OPERATOR IDENT L_PAREN L_BRACKET R_BRACKET COMA L_BRACKET R_BRACKET R_BRACKET
+path_expr: INTEGER CONCAT_OPERATOR IDENT L_PAREN L_BRACKET R_BRACKET COMA L_BRACKET R_BRACKET R_BRACKET
+path_expr: IDENT L_PAREN L_BRACKET R_BRACKET COMA L_BRACKET R_BRACKET R_BRACKET
+
+Mix between brackets and parens.
+
diff --git a/lib/configuration/expression_parser.mly b/lib/configuration/expression_parser.mly
new file mode 100644
index 0000000..1304c4d
--- /dev/null
+++ b/lib/configuration/expression_parser.mly
@@ -0,0 +1,185 @@
+%token <string>IDENT
+%token L_PAREN
+%token R_PAREN
+%token L_BRACKET R_BRACKET
+%token COLUMN
+%token DOT
+%token <string>LITERAL
+%token <string>INTEGER
+%token COMA
+%token EOF
+%token CONCAT_OPERATOR
+
+%token <ImportExpression.T.binary_operator>BINARY_OPERATOR
+%token <ImportExpression.T.binary_operator>INEQUALITY_OPERATOR
+%token <ImportExpression.T.binary_operator>EQUALITY_OPERATOR
+%token <ImportExpression.T.binary_operator>BOOL_OPERATOR
+
+%start <ImportDataTypes.Path.t ImportExpression.T.t> path_expr
+%start <ImportDataTypes.Path.column ImportExpression.T.t> column_expr
+
+%right BOOL_OPERATOR
+%right INEQUALITY_OPERATOR EQUALITY_OPERATOR
+%right CONCAT_OPERATOR BINARY_OPERATOR
+
+%{
+
+ let function_of_name param f =
+ match (String.lowercase_ascii f, param) with
+ | "nvl", _ ->
+ ImportExpression.T.Nvl param
+ | "join", (ImportExpression.T.Literal sep:: tl) ->
+ ImportExpression.T.Join (sep, tl)
+ | "join", (ImportExpression.T.Empty:: tl) ->
+ ImportExpression.T.Join ("", tl)
+ | "upper", _ ->
+ ImportExpression.T.Function' (ImportExpression.T.Upper, param)
+ | "trim", _ ->
+ ImportExpression.T.Function' (ImportExpression.T.Trim, param)
+ | other, _ ->
+ ImportExpression.T.Function (other, param)
+
+%}
+
+%%
+
+path_expr:
+ | expr_(path_, EOF) EOF { $1 }
+ | EOF { ImportExpression.T.Empty }
+column_expr:
+ | expr_(column_, EOF) EOF { $1 }
+ | EOF { ImportExpression.T.Empty }
+
+
+path_:
+ | COLUMN
+ column = IDENT
+ { ImportExpression.T.Path
+ Syntax.Path.{ alias = None
+ ; column = ImportCSV.Csv.column_of_string column
+ }
+ }
+
+ | COLUMN
+ table = IDENT
+ DOT
+ column = IDENT
+ { ImportExpression.T.Path
+ Syntax.Path.{ alias = Some table
+ ; column = ImportCSV.Csv.column_of_string column}
+ }
+
+column_:
+ | COLUMN
+ column = IDENT
+ { try ImportExpression.T.Path (ImportCSV.Csv.column_of_string column)
+ with _ -> ImportExpression.T.Literal column }
+
+arguments(PATH):
+ | L_PAREN
+ expr = separated_list(COMA, expr_(PATH, COMA))
+ R_PAREN
+ { expr }
+
+group(PATH):
+ | L_BRACKET
+ expr = separated_list(COMA, expr_(PATH, COMA))
+ R_BRACKET
+ { expr }
+
+fixed(PATH):
+ | d = INTEGER
+ { ImportExpression.T.Integer d }
+ | l = LITERAL
+ {
+ if String.equal String.empty l then
+ ImportExpression.T.Empty
+ else
+ ImportExpression.T.Literal l
+ }
+
+%inline boperator:
+ | e = BINARY_OPERATOR { e }
+ | e = INEQUALITY_OPERATOR { e }
+ | e = EQUALITY_OPERATOR { e }
+ | e = BOOL_OPERATOR { e }
+
+(* The expression evaluation receveive in parameters :
+ 1. the way to buidl a path, as we have two distinct ways to build them in
+ the case of externals (the external_key does not allow a table name)
+ 2. a phantom type telling wich kind of element will end the expression.
+ This can be EOF for the root expression, or COMA when inside a function.
+ This prevent merlin to optimize thoses two path, and allow more precise
+ error messages. *)
+expr_(PATH, ENDING_PHANTOM):
+ | L_PAREN
+ e = expr_(PATH, R_PAREN)
+ R_PAREN
+ { ImportExpression.T.Expr e
+ }
+ |
+ p1 = expr_(PATH, ENDING_PHANTOM)
+ CONCAT_OPERATOR
+ p2 = expr_(PATH, COMA)
+ { match p2 with
+ | ImportExpression.T.Concat args -> ImportExpression.T.Concat (p1::args)
+ | _ -> ImportExpression.T.Concat (p1::p2::[])
+ }
+ | p1 = expr_(PATH, ENDING_PHANTOM)
+
+ op = boperator
+ p2 = expr_(PATH, COMA)
+ { ImportExpression.T.BOperator (op, p1, p2) }
+
+ | p1 = expr_(PATH, ENDING_PHANTOM)
+ op = EQUALITY_OPERATOR
+ p2 = group(PATH)
+ { ImportExpression.T.GEquality(op, p1, p2) }
+
+
+
+ | p = PATH
+ { p }
+ | f = fixed(PATH)
+ { f }
+ | s = IDENT
+ args = arguments(PATH)
+ { function_of_name args s }
+ |
+ s = IDENT
+ L_PAREN
+ opt_arg = opt_arg(PATH, COMA)?
+ args1 = group(PATH)
+ COMA
+ args2 = group(PATH)
+ R_PAREN
+ { let window_name = ImportExpression.T.window_of_name s opt_arg in
+ ImportExpression.T.Window (window_name, args1, args2) }
+(*
+ | (* This case is here to describe a window function which has 2 arguments
+ level.
+ I’m not completely satisfied with it, as it prevent the ability to
+ create a exprpression block with parens arround. *)
+ s = IDENT
+ L_PAREN
+ opt_arg = opt_arg(PATH, COMA)?
+ args1 = arguments(PATH)
+ COMA
+ args2 = arguments(PATH)
+ R_PAREN
+ { let window_name = ImportExpression.T.window_of_name s opt_arg in
+ let expr = ImportExpression.T.Window (window_name, args1, args2) in
+
+ let expr_repr = ImportExpression.Repr.repr ~top:true (fun _ -> "")
+ expr in
+ Printf.printf "Deprecated syntax in \"%s\" use [] instead of ()\n" expr_repr;
+
+
+ expr
+ }
+*)
+
+opt_arg(PATH, SEP):
+ | expr = expr_(PATH, COMA)
+ SEP
+ { expr }
diff --git a/lib/configuration/importConf.ml b/lib/configuration/importConf.ml
new file mode 100644
index 0000000..586be3c
--- /dev/null
+++ b/lib/configuration/importConf.ml
@@ -0,0 +1,90 @@
+open StdLabels
+module Syntax = Syntax
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+module T = Read_conf
+module Expression = ImportExpression.T
+
+let current_syntax = 1
+
+let t_of_yojson : Yojson.Safe.t -> Syntax.t =
+ fun json ->
+ let keys = Yojson.Safe.Util.keys json in
+ let version =
+ match List.find_opt keys ~f:(String.equal "version") with
+ | None ->
+ Printf.printf
+ "No version given. Your setup may break in the future.\n\
+ Please add « \"version\":%d » in your configuration.\n\n"
+ current_syntax;
+ `Int 1
+ | Some _ -> Yojson.Safe.Util.member "version" json
+ in
+
+ match version with
+ | `Int 1 -> Of_json.t_of_yojson json
+ | other ->
+ Printf.eprintf "Unsuported version : %s\n" (Yojson.Safe.to_string other);
+ exit 1
+
+module TomlReader = Read_conf.Make (Helpers.Toml.Decode)
+
+let t_of_toml : Otoml.t -> (Syntax.t, string) result =
+ fun toml ->
+ let version =
+ Otoml.find toml (Otoml.get_integer ~strict:false) [ "version" ]
+ in
+ match version with
+ | 1 -> TomlReader.read toml
+ | _ ->
+ Printf.eprintf "Unsuported version : %d\n" version;
+ exit 1
+
+let dummy_conf =
+ Syntax.
+ {
+ source = { file = ""; tab = 0; name = "" };
+ version = 1;
+ externals = [];
+ columns = [];
+ filters = [];
+ sort = [];
+ uniq = [];
+ }
+
+let get_table_for_name : Syntax.t -> string option -> Table.t =
+ fun conf name ->
+ match name with
+ | None -> conf.source
+ | Some name ->
+ if String.equal name conf.source.name then conf.source
+ else
+ let ext =
+ List.find conf.externals ~f:(fun (ext : Syntax.extern) ->
+ String.equal name ext.target.name)
+ in
+ ext.target
+
+let root_table : Syntax.t -> Table.t = fun conf -> conf.source
+
+let get_dependancies_for_table : Syntax.t -> Table.t -> Syntax.extern list =
+ fun conf source ->
+ let is_root = source = conf.source in
+
+ List.filter conf.externals ~f:(fun (ext : Syntax.extern) ->
+ (* Enumerate the intern_key and check the source pointed by each column *)
+ Expression.fold_values ext.intern_key ~init:false ~f:(fun acc expr ->
+ if acc then acc
+ else
+ match expr.Syntax.Path.alias with
+ | Some v -> String.equal v source.name
+ | None -> is_root))
+
+let print_path_expression t = ImportExpression.Repr.repr Path.repr t
+
+let print_extern t =
+ let toml = Syntax.toml_of_extern t in
+ Otoml.Printer.to_string toml
+
+let expression_from_string s =
+ Read_conf.ExpressionParser.of_string Read_conf.ExpressionParser.path s
diff --git a/lib/configuration/importConf.mli b/lib/configuration/importConf.mli
new file mode 100644
index 0000000..3a8ae75
--- /dev/null
+++ b/lib/configuration/importConf.mli
@@ -0,0 +1,23 @@
+module Syntax = Syntax
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+
+val dummy_conf : Syntax.t
+
+val root_table : Syntax.t -> Table.t
+(** Get the root table, this table is the main table to load and each line in
+ this table will be processed *)
+
+val t_of_yojson : Yojson.Safe.t -> Syntax.t
+val t_of_toml : Otoml.t -> (Syntax.t, string) result
+val get_table_for_name : Syntax.t -> string option -> Table.t
+
+val get_dependancies_for_table : Syntax.t -> Table.t -> Syntax.extern list
+(** Get all the externals refered by the source *)
+
+val print_path_expression : Path.t ImportExpression.T.t -> string
+
+val expression_from_string :
+ string -> (Path.t ImportExpression.T.t, string) result
+
+val print_extern : Syntax.extern -> string
diff --git a/lib/configuration/of_json.ml b/lib/configuration/of_json.ml
new file mode 100644
index 0000000..f9171b9
--- /dev/null
+++ b/lib/configuration/of_json.ml
@@ -0,0 +1,134 @@
+open StdLabels
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+module Expression = ImportExpression.T
+
+open Ppx_yojson_conv_lib.Yojson_conv.Primitives
+
+let current_syntax = 1
+
+let rec expression_of_yojson :
+ (Yojson.Safe.t -> 'a) -> Yojson.Safe.t -> 'a Expression.t =
+ fun f expr ->
+ match expr with
+ | `Null -> Empty
+ | `List l -> Concat (List.map ~f:(expression_of_yojson f) l)
+ | `String s as json -> (
+ try Path (f json) with
+ | _ -> Literal s)
+ | `Assoc [ (fn, `List [ `List l1; `List l2 ]) ]
+ when String.equal "counter" (String.lowercase_ascii fn) ->
+ Window
+ ( Expression.Counter,
+ List.map ~f:(expression_of_yojson f) l1,
+ List.map ~f:(expression_of_yojson f) l2 )
+ | `Assoc [ (fn, `List [ expr1; `List l2; `List l3 ]) ]
+ when String.equal "previous" (String.lowercase_ascii fn) ->
+ Window
+ ( Expression.Previous (expression_of_yojson f expr1),
+ List.map ~f:(expression_of_yojson f) l2,
+ List.map ~f:(expression_of_yojson f) l3 )
+ | `Assoc [ (fn, `List l) ] when String.equal "nvl" (String.lowercase_ascii fn)
+ -> Nvl (List.map ~f:(expression_of_yojson f) l)
+ | `Assoc [ (fn, `List l) ] ->
+ Function
+ (String.lowercase_ascii fn, List.map ~f:(expression_of_yojson f) l)
+ | json -> (
+ try Path (f json) with
+ | _ ->
+ let str_json = Yojson.Safe.pretty_to_string json in
+ raise
+ (ImportErrors.JsonError { json = str_json; element = "Expression" })
+ )
+
+type 'a expression = 'a Expression.t
+type column = Path.column
+
+let column_of_yojson : Yojson.Safe.t -> int = function
+ | `Int i -> i
+ | `String s -> ImportCSV.Csv.column_of_string s
+ | _ -> raise (Invalid_argument "column")
+
+let yojson_of_column i = `String (ImportCSV.Csv.column_to_string i)
+
+type path = Syntax.Path.t = {
+ alias : string option; [@default None] [@yojson_drop_default ( = )]
+ (* External file to load, when the information is missing, load in
+ the current file *)
+ column : column;
+}
+[@@deriving of_yojson]
+
+let path_of_yojson : Yojson.Safe.t -> path = function
+ | `String s ->
+ Scanf.sscanf s ":%s@.%s" (fun table column ->
+ if String.equal column "" then
+ { alias = None; column = ImportCSV.Csv.column_of_string table }
+ else
+ {
+ alias = Some table;
+ column = ImportCSV.Csv.column_of_string column;
+ })
+ | other -> path_of_yojson other
+
+let yojson_of_path : path -> Yojson.Safe.t =
+ fun { alias; column } ->
+ let table =
+ match alias with
+ | None -> ""
+ | Some table -> String.cat table "."
+ in
+
+ `String
+ (String.concat ~sep:""
+ [ ":"; table; ImportCSV.Csv.column_to_string column ])
+
+type table = Table.t = {
+ file : string;
+ tab : int; [@default 1] [@yojson_drop_default ( = )]
+ name : string;
+}
+[@@deriving of_yojson]
+
+type extern = {
+ source : string option; [@default None] [@yojson_drop_default ( = )]
+ intern_key : column expression;
+ target : table;
+ extern_key : column expression;
+ allow_missing : bool; [@default false] [@yojson_drop_default ( = )]
+ match_rule : string option; [@default None] [@yojson_drop_default ( = )]
+}
+[@@deriving of_yojson]
+
+type syntax_v1_extern = Syntax.extern
+
+let syntax_v1_extern_of_yojson yojson =
+ let e = extern_of_yojson yojson in
+ let intern_key : path Expression.t =
+ Expression.map e.intern_key ~f:(fun column ->
+ Syntax.Path.{ column; alias = e.source })
+ in
+ Syntax.
+ {
+ extern_key = e.extern_key;
+ intern_key;
+ target = e.target;
+ allow_missing = e.allow_missing;
+ match_rule = e.match_rule;
+ }
+
+type predicate = unit
+
+let predicate_of_yojson _ = ()
+let yojson_of_predicate () = `Null
+
+type t = Syntax.t = {
+ version : int; [@default current_syntax]
+ source : table;
+ externals : syntax_v1_extern list; [@default []]
+ columns : path expression list;
+ filters : path expression list; [@default []] [@yojson_drop_default ( = )]
+ sort : path expression list; [@default []] [@yojson_drop_default ( = )]
+ uniq : path expression list; [@default []] [@yojson_drop_default ( = )]
+}
+[@@deriving of_yojson]
diff --git a/lib/configuration/read_conf.ml b/lib/configuration/read_conf.ml
new file mode 100644
index 0000000..8d467a5
--- /dev/null
+++ b/lib/configuration/read_conf.ml
@@ -0,0 +1,216 @@
+open StdLabels
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+
+module ExpressionParser : sig
+ type 'a path_builder
+
+ val path : Path.t path_builder
+ val column : Path.column path_builder
+
+ val of_string :
+ 'a path_builder -> string -> ('a ImportExpression.T.t, string) result
+end = struct
+ module MI = Expression_parser.MenhirInterpreter
+ module E = MenhirLib.ErrorReports
+ module L = MenhirLib.LexerUtil
+
+ type error = {
+ message : string;
+ start_line : int;
+ start_pos : int;
+ end_pos : int;
+ }
+
+ let range_message start_pos end_pos message =
+ let start_c = start_pos.Lexing.pos_cnum - start_pos.Lexing.pos_bol
+ and end_c = end_pos.Lexing.pos_cnum - end_pos.Lexing.pos_bol in
+ {
+ message;
+ start_line = start_pos.Lexing.pos_bol;
+ start_pos = start_c;
+ end_pos = end_c;
+ }
+
+ (** Extract the line in error from the whole expression, and print some
+ characters just under the faulty part *)
+ let get_line_error : error -> string -> string =
+ fun error content ->
+ let sub_text =
+ try
+ let end_pos = String.index_from content error.start_line '\n' in
+
+ String.sub content ~pos:error.start_line
+ ~len:(end_pos - error.start_line)
+ with
+ | Not_found ->
+ (* There is no new line, extract the ending part *)
+ let len = String.length content - error.start_line in
+ String.sub content ~pos:error.start_line ~len
+ in
+ (* I’m not sure how to produce it, but the error may be over two lines.
+ This line is here to prevent the underline to overflow. *)
+ let stop_pos = min error.end_pos (String.length sub_text) in
+ let error_length = stop_pos - error.start_pos in
+ String.concat ~sep:""
+ [
+ sub_text;
+ "\n";
+ String.make error.start_pos ' ';
+ String.make error_length '^';
+ ]
+
+ let get_parse_error default_position env : error =
+ match MI.stack env with
+ | (lazy Nil) ->
+ range_message default_position.Lexing.lex_start_p
+ default_position.Lexing.lex_curr_p "Invalid syntax\n"
+ | (lazy (Cons (MI.Element (state, _, start_pos, end_pos), _))) ->
+ let message =
+ try Expression_parser_messages.message (MI.number state) with
+ | Not_found -> "Invalid syntax (no specific message for this eror)\n"
+ in
+
+ range_message start_pos end_pos message
+
+ type 'a path_builder =
+ Lexing.position -> 'a ImportExpression.T.t MI.checkpoint
+
+ let rec _parse lexbuf (checkpoint : 'a ImportExpression.T.t MI.checkpoint) =
+ match checkpoint with
+ | MI.InputNeeded _env ->
+ let token = Expression_lexer.token lexbuf in
+ let startp = lexbuf.lex_start_p and endp = lexbuf.lex_curr_p in
+ let checkpoint = MI.offer checkpoint (token, startp, endp) in
+ _parse lexbuf checkpoint
+ | MI.Shifting _ | MI.AboutToReduce _ ->
+ let checkpoint = MI.resume checkpoint in
+ _parse lexbuf checkpoint
+ | MI.HandlingError _env ->
+ let err = get_parse_error lexbuf _env in
+ Error err
+ | MI.Accepted v -> Ok v
+ | MI.Rejected ->
+ let err =
+ range_message lexbuf.lex_start_p lexbuf.lex_curr_p
+ "invalid syntax (parser rejected the input)"
+ in
+ Error err
+
+ let of_string :
+ 'a path_builder -> string -> ('a ImportExpression.T.t, string) result =
+ fun f str_expression ->
+ try
+ let lexbuf = Lexing.from_string str_expression in
+ let init = f lexbuf.lex_curr_p in
+ match _parse lexbuf init with
+ | Ok res -> Ok res
+ | Error e ->
+ let message =
+ String.concat ~sep:"\n"
+ [ e.message; get_line_error e str_expression ]
+ in
+ Error message
+ with
+ | Expression_lexer.UnclosedQuote { line; content } ->
+ let message =
+ Printf.sprintf "Unclosed quote at line %d : \"%s\"" line content
+ in
+ Error message
+ | e ->
+ let message = Printexc.to_string e in
+ Error message
+
+ let path = Expression_parser.Incremental.path_expr
+ let column = Expression_parser.Incremental.column_expr
+end
+
+module Make (S : Decoders.Decode.S) = struct
+ let ( let* ) = S.( let* )
+ let ( and* ) = S.( and* )
+ let ( >>= ) = S.( >>= )
+ let ( >|= ) = S.( >|= )
+
+ class loader =
+ object (self)
+ method parse_expression : type a.
+ a ExpressionParser.path_builder ->
+ S.value ->
+ (a ImportExpression.T.t, S.value Decoders.Error.t) result =
+ fun path ->
+ S.string >>= fun v ->
+ match ExpressionParser.of_string path v with
+ | Ok expr -> S.succeed expr
+ | Error e -> S.fail_with Decoders.Error.(make e)
+
+ method source =
+ let* file = S.field "file" S.string
+ and* name = S.field "name" S.string
+ and* tab = S.field_opt_or ~default:1 "tab" S.int in
+ S.succeed { Table.file; name; tab }
+
+ method external_ name =
+ let* intern_key =
+ S.field "intern_key" (self#parse_expression ExpressionParser.path)
+ and* extern_key =
+ S.field "extern_key" (self#parse_expression ExpressionParser.column)
+ and* file = S.field "file" S.string
+ and* tab = S.field_opt_or ~default:1 "tab" S.int
+ and* allow_missing =
+ S.field_opt_or ~default:false "allow_missing" S.bool
+ in
+
+ S.succeed
+ Syntax.
+ {
+ intern_key;
+ extern_key;
+ target = { name; file; tab };
+ allow_missing;
+ match_rule = None;
+ }
+
+ method sheet =
+ let* columns =
+ S.field "columns"
+ @@ S.list (self#parse_expression ExpressionParser.path)
+ and* filters =
+ S.field_opt_or ~default:[] "filters"
+ @@ S.list (self#parse_expression ExpressionParser.path)
+ and* sort =
+ S.field_opt_or ~default:[] "sort"
+ @@ S.list (self#parse_expression ExpressionParser.path)
+ and* uniq =
+ S.field_opt_or ~default:[] "uniq"
+ @@ S.list (self#parse_expression ExpressionParser.path)
+ in
+ S.succeed @@ fun version source externals ->
+ Syntax.{ version; source; externals; columns; filters; sort; uniq }
+
+ method conf =
+ let* source = S.field "source" self#source
+ and* externals =
+ S.field_opt_or ~default:[] "externals"
+ (S.key_value_pairs_seq self#external_)
+ in
+ let* sheet =
+ S.field "sheet" self#sheet >|= fun v -> v 1 source externals
+ in
+
+ S.succeed sheet
+ end
+
+ let read_file file =
+ S.decode_file (new loader)#conf file
+ |> Result.map_error (fun v ->
+ let formatter = Format.str_formatter in
+ Format.fprintf formatter "%a@." S.pp_error v;
+ Format.flush_str_formatter ())
+
+ let read toml =
+ S.decode_value (new loader)#conf toml
+ |> Result.map_error (fun v ->
+ let formatter = Format.str_formatter in
+ Format.fprintf formatter "%a@." S.pp_error v;
+ Format.flush_str_formatter ())
+end
diff --git a/lib/configuration/syntax.ml b/lib/configuration/syntax.ml
new file mode 100644
index 0000000..8efdc59
--- /dev/null
+++ b/lib/configuration/syntax.ml
@@ -0,0 +1,88 @@
+open StdLabels
+module E = ImportExpression.T
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+
+let toml_of_table Table.{ file; tab; name } =
+ let values = [ ("file", Otoml.string file); ("name", Otoml.string name) ] in
+ let values =
+ match tab with
+ | 1 -> values
+ | tab -> ("tab", Otoml.integer tab) :: values
+ in
+
+ Otoml.table values
+
+type extern = {
+ intern_key : Path.t E.t;
+ target : Table.t;
+ extern_key : Path.column E.t;
+ allow_missing : bool;
+ match_rule : string option;
+}
+
+let toml_of_extern extern =
+ let values =
+ [
+ ( "intern_key",
+ Otoml.string
+ @@ ImportExpression.Repr.repr ~top:true Path.repr extern.intern_key );
+ ( "extern_key",
+ Otoml.string
+ @@ ImportExpression.Repr.repr ~top:true
+ (fun v -> ":" ^ ImportCSV.Csv.column_to_string v)
+ extern.extern_key );
+ ("file", Otoml.string extern.target.file);
+ ("allow_missing", Otoml.boolean extern.allow_missing);
+ ]
+ in
+
+ let values =
+ match extern.target.tab with
+ | 1 -> values
+ | tab -> ("tab", Otoml.integer tab) :: values
+ in
+
+ Otoml.table values
+
+let toml_of_externs externs =
+ List.map externs ~f:(fun e -> (e.target.name, toml_of_extern e))
+ |> Otoml.table
+
+type t = {
+ version : int;
+ source : Table.t;
+ externals : extern list;
+ columns : Path.t E.t list;
+ filters : Path.t E.t list;
+ sort : Path.t E.t list;
+ uniq : Path.t E.t list;
+}
+
+let repr t =
+ let repr_expression_list l =
+ Otoml.array
+ (List.map l ~f:(fun v ->
+ Otoml.string (ImportExpression.Repr.repr ~top:true Path.repr v)))
+ in
+
+ let sheet =
+ Otoml.table
+ [
+ ("columns", repr_expression_list t.columns);
+ ("filters", repr_expression_list t.filters);
+ ("sort", repr_expression_list t.sort);
+ ("uniq", repr_expression_list t.uniq);
+ ]
+ in
+
+ let values =
+ [
+ ("version", Otoml.integer t.version);
+ ("source", toml_of_table t.source);
+ ("externals", toml_of_externs t.externals);
+ ("sheet", sheet);
+ ]
+ in
+
+ Otoml.table values
diff --git a/lib/containers/dune b/lib/containers/dune
new file mode 100755
index 0000000..46d0e24
--- /dev/null
+++ b/lib/containers/dune
@@ -0,0 +1,7 @@
+(library
+ (name importContainers)
+ (libraries
+ importDataTypes
+ importConf
+ )
+)
diff --git a/lib/containers/importContainers.ml b/lib/containers/importContainers.ml
new file mode 100644
index 0000000..bf65ba4
--- /dev/null
+++ b/lib/containers/importContainers.ml
@@ -0,0 +1,61 @@
+module Conf = ImportConf
+module Syntax = Conf.Syntax
+module Table = ImportDataTypes.Table
+
+(** This key is used to create the table of each externals in the
+ configuration.
+
+ This table allow to check if there are cycles between the references *)
+module KeyName : sig
+ type t
+
+ val compare : t -> t -> int
+ val from_table : Table.t -> t
+ val hash : t -> int
+ val equal : t -> t -> bool
+end = struct
+ type t = Table.t
+
+ let compare v1 v2 = String.compare (Table.name v1) (Table.name v2)
+ (* We use the alias given in the configuration as key, because we want to
+ be sure there is no cycle in the dependencies. It’s OK to have the same
+ file used in differents sources, but the sources cannot induce cycles *)
+
+ let from_table (s : Table.t) = s
+ let hash = Hashtbl.hash
+ let equal v1 v2 = String.equal (Table.name v1) (Table.name v2)
+end
+
+module Source : sig
+ type t
+
+ val compare : t -> t -> int
+ val from_table : Table.t -> t
+ val hash : t -> int
+ val equal : t -> t -> bool
+ val name : t -> KeyName.t
+end = struct
+ type t = Table.t
+
+ let compare v1 v2 = String.compare v1.Table.name v2.Table.name
+ (* We use the alias given in the configuration as key, because we want to
+ be sure there is no cycle in the dependencies. It’s OK to have the same
+ file used in differents sources, but the sources cannot induce cycles *)
+
+ let from_table (s : Table.t) = s
+ let hash = Hashtbl.hash
+ let equal v1 v2 = String.equal v1.Table.name v2.Table.name
+ let name t = KeyName.from_table t
+end
+
+module Externals = MoreLabels.Map.Make (KeyName)
+module IntSet = MoreLabels.Set.Make (Int)
+
+let show_intSet set =
+ let b = Buffer.create 16 in
+ IntSet.iter
+ ~f:(fun v ->
+ Buffer.add_string b (string_of_int v);
+ Buffer.add_char b ',')
+ set;
+ Buffer.contents b
diff --git a/lib/csv/csv.ml b/lib/csv/csv.ml
new file mode 100644
index 0000000..db7329d
--- /dev/null
+++ b/lib/csv/csv.ml
@@ -0,0 +1,30 @@
+open StdLabels
+
+type t = int
+
+let column_of_char = function
+ | 'A' .. 'Z' as c -> Char.code c - (Char.code 'A' - 1)
+ | 'a' .. 'z' as c -> Char.code c - (Char.code 'a' - 1)
+ | c -> raise (Invalid_argument ("column: " ^ Char.escaped c))
+
+let column_of_string : string -> int =
+ fun s ->
+ String.fold_left s ~init:0 ~f:(fun value c -> (value * 26) + column_of_char c)
+
+(** Accumulate the remaining for the successives divisions in a list. *)
+let rec _to_char ~b i =
+ if i > 0 then
+ let res = i mod 26 in
+ let res = if res = 0 then 26 else res in
+
+ let c = char_of_int @@ (res + 64) in
+ (* The modulo is accumulated in the list head, which is the expected
+ sequence *)
+ let b = c :: b in
+
+ _to_char ~b @@ ((i - res) / 26)
+ else b
+
+let column_to_string i =
+ let res = _to_char ~b:[] i in
+ List.to_seq res |> String.of_seq
diff --git a/lib/csv/dataType.ml b/lib/csv/dataType.ml
new file mode 100644
index 0000000..c582b9c
--- /dev/null
+++ b/lib/csv/dataType.ml
@@ -0,0 +1,21 @@
+let match_date = Re.Str.regexp {|[0-9]+/[0-9]+/[0-9]+|}
+
+type t =
+ | Null
+ | Error of string
+ | Content of string
+ | Integer of int
+ | Float of float
+
+let to_string = function
+ | Null -> ""
+ | Error s -> s
+ | Integer i -> string_of_int i
+ | Float f -> string_of_float f
+ | Content c -> (
+ match String.starts_with ~prefix:"0" c with
+ | false -> c
+ | true ->
+ (* If the string is a date, do not escape it *)
+ if Re.Str.string_match match_date c 0 then c
+ else String.concat "" [ "=\""; c; "\"" ])
diff --git a/lib/csv/dataType.mli b/lib/csv/dataType.mli
new file mode 100644
index 0000000..ebb8bc7
--- /dev/null
+++ b/lib/csv/dataType.mli
@@ -0,0 +1,8 @@
+type t =
+ | Null
+ | Error of string
+ | Content of string
+ | Integer of int
+ | Float of float
+
+val to_string : t -> string
diff --git a/lib/csv/dune b/lib/csv/dune
new file mode 100755
index 0000000..b0f4a72
--- /dev/null
+++ b/lib/csv/dune
@@ -0,0 +1,6 @@
+(library
+ (name importCSV)
+ (libraries
+ re
+ )
+)
diff --git a/lib/data_types/dune b/lib/data_types/dune
new file mode 100644
index 0000000..e38310b
--- /dev/null
+++ b/lib/data_types/dune
@@ -0,0 +1,10 @@
+(library
+ (name importDataTypes)
+ (libraries
+ importCSV
+ )
+
+ (preprocess (pps ppx_deriving.ord))
+ )
+
+
diff --git a/lib/data_types/path.ml b/lib/data_types/path.ml
new file mode 100644
index 0000000..6684b5a
--- /dev/null
+++ b/lib/data_types/path.ml
@@ -0,0 +1,15 @@
+type column = int [@@deriving ord]
+
+type t = {
+ alias : string option;
+ (* External file to load, when the information is missing, load in
+ the current file *)
+ column : column;
+}
+[@@deriving ord]
+
+let repr { alias; column } =
+ let column_text = ImportCSV.Csv.column_to_string column in
+ match alias with
+ | None -> ":" ^ column_text
+ | Some value -> ":" ^ value ^ "." ^ column_text
diff --git a/lib/data_types/readme.rst b/lib/data_types/readme.rst
new file mode 100644
index 0000000..ac609d2
--- /dev/null
+++ b/lib/data_types/readme.rst
@@ -0,0 +1,4 @@
+This module contains all the types used in the application.
+
+It does not depends on any other library, and does not cause any dependency
+cycle.
diff --git a/lib/data_types/table.ml b/lib/data_types/table.ml
new file mode 100644
index 0000000..d807c5c
--- /dev/null
+++ b/lib/data_types/table.ml
@@ -0,0 +1,19 @@
+open StdLabels
+
+type t = {
+ file : string;
+ tab : int;
+ name : string;
+}
+
+(** Get the internal name for the given table.
+
+ This value may differ from the association name given in the configuration. *)
+let name : t -> string =
+ fun source ->
+ let file_name =
+ source.file |> Filename.basename |> Filename.remove_extension
+ in
+ match source.tab with
+ | 1 -> file_name
+ | _ -> String.concat ~sep:"_" [ file_name; string_of_int source.tab ]
diff --git a/lib/data_types/types.ml b/lib/data_types/types.ml
new file mode 100644
index 0000000..37fd90f
--- /dev/null
+++ b/lib/data_types/types.ml
@@ -0,0 +1,15 @@
+type t =
+ | Number
+ | String
+ | Bool
+ | None
+ | Extern
+ | Float
+
+let string_of_t : t -> string = function
+ | Number -> "Number"
+ | String -> "String"
+ | Bool -> "Bool"
+ | None -> "None"
+ | Extern -> "Extern"
+ | Float -> "Float"
diff --git a/lib/errors/dune b/lib/errors/dune
new file mode 100644
index 0000000..ab71219
--- /dev/null
+++ b/lib/errors/dune
@@ -0,0 +1,9 @@
+(library
+ (name importErrors)
+ (libraries
+ csv
+ sqlite3
+ importCSV
+ importDataTypes
+ )
+)
diff --git a/lib/errors/importErrors.ml b/lib/errors/importErrors.ml
new file mode 100644
index 0000000..04f9deb
--- /dev/null
+++ b/lib/errors/importErrors.ml
@@ -0,0 +1,98 @@
+open StdLabels
+module CSV = ImportCSV
+module Table = ImportDataTypes.Table
+
+let bom = "\xEF\xBB\xBF"
+
+type xlsError = {
+ source : Table.t;
+ row : int;
+ sheet : int;
+ target : Table.t option;
+ value : CSV.DataType.t;
+ exn : exn;
+}
+
+exception
+ JsonError of {
+ json : string;
+ element : string;
+ }
+
+exception InvalidEncoding of string
+exception NullKey of int
+exception SqlError of Sqlite3.Rc.t
+exception MisplacedWindow
+exception Unknown_source of string
+exception Unknown_extension of string
+
+exception Cycle of string list
+(** Cycle between the dpendencies *)
+
+exception
+ TypeError of {
+ expected : ImportDataTypes.Types.t;
+ actual : ImportDataTypes.Types.t;
+ expression : string;
+ subset : string;
+ }
+
+let repr_error = function
+ | SqlError s -> Printf.sprintf "%s Error" (Sqlite3.Rc.to_string s)
+ | JsonError { json; element } -> Printf.sprintf "%s : %s" element json
+ | NullKey k ->
+ Printf.sprintf "The key %s is null" (ImportCSV.Csv.column_to_string k)
+ | Unknown_source source ->
+ Printf.sprintf "The source %s is referenced without beiing declared"
+ source
+ | MisplacedWindow -> "A group function cannot appear here"
+ | TypeError { expected; actual; expression; subset } ->
+ Printf.sprintf
+ "In this expression %s has type %s but %s was expected:\n%s" subset
+ (ImportDataTypes.Types.string_of_t actual)
+ (ImportDataTypes.Types.string_of_t expected)
+ expression
+ | Unknown_extension ext -> Printf.sprintf "Unknown file extension %s" ext
+ | Cycle deps ->
+ Printf.sprintf "Cycle between the dependencies : %s"
+ (String.concat ~sep:"," deps)
+ | Failure e -> e
+ | e ->
+ Printexc.print_backtrace Stdlib.stdout;
+ Printexc.to_string e
+
+type t = Csv.out_channel Lazy.t
+
+let output_error : t -> xlsError -> unit =
+ fun csv error ->
+ let target =
+ match error.target with
+ | None -> ""
+ | Some value -> value.name
+ in
+
+ Csv.output_record (Lazy.force csv)
+ [
+ Table.name error.source;
+ string_of_int error.sheet;
+ string_of_int error.row;
+ target;
+ CSV.DataType.to_string error.value;
+ repr_error error.exn;
+ ]
+
+let log : with_bom:bool -> string -> string -> t =
+ fun ~with_bom prefix dirname ->
+ lazy
+ (let file = Filename.concat dirname (String.cat prefix "_errors.csv") in
+
+ prerr_endline @@ String.cat "Errors found. See the file " file;
+
+ (* Open the error file *)
+ let channel = Out_channel.open_bin file in
+ if with_bom then output_string channel bom;
+ let log_error = Csv.to_channel ~separator:';' ~excel_tricks:true channel in
+ (* Generate a header for the error file *)
+ Csv.output_record log_error
+ [ "Source"; "Tab"; "Row"; "Target"; "Value"; "Error" ];
+ log_error)
diff --git a/lib/errors/importErrors.mli b/lib/errors/importErrors.mli
new file mode 100644
index 0000000..7d17a5d
--- /dev/null
+++ b/lib/errors/importErrors.mli
@@ -0,0 +1,46 @@
+module CSV = ImportCSV
+
+exception SqlError of Sqlite3.Rc.t
+exception MisplacedWindow
+
+exception Unknown_source of string
+(** Exception raised when a source used in a path is not declared as an
+ external *)
+
+exception Cycle of string list
+(** Cycle between the dpendencies *)
+
+exception Unknown_extension of string
+
+exception
+ JsonError of {
+ json : string;
+ element : string;
+ }
+
+type xlsError = {
+ source : ImportDataTypes.Table.t;
+ row : int;
+ sheet : int;
+ target : ImportDataTypes.Table.t option;
+ value : CSV.DataType.t;
+ exn : exn;
+}
+
+exception
+ TypeError of {
+ expected : ImportDataTypes.Types.t;
+ actual : ImportDataTypes.Types.t;
+ expression : string;
+ subset : string;
+ }
+
+exception InvalidEncoding of string
+exception NullKey of int
+
+val repr_error : exn -> string
+
+type t = Csv.out_channel Lazy.t
+
+val output_error : t -> xlsError -> unit
+val log : with_bom:bool -> string -> string -> t
diff --git a/lib/expression/ast.ml b/lib/expression/ast.ml
new file mode 100644
index 0000000..ef083e9
--- /dev/null
+++ b/lib/expression/ast.ml
@@ -0,0 +1,31 @@
+(** This module rebuilds an AST from an evaluation *)
+
+type 'a repr = 'a T.t
+type 'a obs = 'a T.t
+type 'a path_repr = unit
+
+let observe : 'a repr -> 'a obs = Fun.id
+let empty : unit -> 'a repr = fun () -> T.Empty
+let expr : 'a repr -> 'a repr = fun t -> T.Expr t
+let literal : string -> 'a repr = fun s -> T.Literal s
+let integer : string -> 'a repr = fun i -> T.Integer i
+let path : 'a path_repr -> 'a -> 'a repr = fun _repr p -> T.Path p
+let concat : 'a repr list -> 'a repr = fun ll -> T.Concat ll
+
+let window : 'a repr T.window -> 'a repr list -> 'a repr list -> 'a repr =
+ fun w groups order -> T.Window (w, groups, order)
+
+let nvl : 'a repr list -> 'a repr = fun ll -> T.Nvl ll
+let join : string -> 'a repr list -> 'a repr = fun s ll -> T.Join (s, ll)
+
+let boperator : T.binary_operator -> 'a repr -> 'a repr -> 'a repr =
+ fun op e1 e2 -> T.BOperator (op, e1, e2)
+
+let gequality : T.binary_operator -> 'a repr -> 'a repr list -> 'a repr =
+ fun op e1 ll -> T.GEquality (op, e1, ll)
+
+let funct : string -> 'a repr list -> 'a repr =
+ fun name args -> T.Function (name, args)
+
+let function' : T.funct -> 'a repr list -> 'a repr =
+ fun f args -> T.Function' (f, args)
diff --git a/lib/expression/compose.ml b/lib/expression/compose.ml
new file mode 100644
index 0000000..028602b
--- /dev/null
+++ b/lib/expression/compose.ml
@@ -0,0 +1,150 @@
+open StdLabels
+
+(** Build an expression module with the result from another expression. The
+ signature of the fuctions is a bit different, as they all receive the
+ result from the previous evaluated element in argument. *)
+module Expression
+ (E : Sym.SYM_EXPR)
+ (R : sig
+ val v : 'a E.path_repr
+ end) =
+struct
+ module type SIG = sig
+ type 'a repr
+ type 'a obs
+ type 'a path_repr
+
+ val empty : 'a E.obs -> 'a repr
+ val expr : 'a E.obs * 'a repr -> 'a E.obs -> 'a repr
+ val literal : string -> 'a E.obs -> 'a repr
+ val integer : string -> 'a E.obs -> 'a repr
+ val path : 'a path_repr -> 'a -> 'a E.obs -> 'a repr
+ val concat : ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr
+
+ val window :
+ ('a E.obs * 'a repr) T.window ->
+ ('a E.obs * 'a repr) list ->
+ ('a E.obs * 'a repr) list ->
+ 'a E.obs ->
+ 'a repr
+
+ val nvl : ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr
+ val join : string -> ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr
+
+ val boperator :
+ T.binary_operator ->
+ 'a E.obs * 'a repr ->
+ 'a E.obs * 'a repr ->
+ 'a E.obs ->
+ 'a repr
+
+ val gequality :
+ T.binary_operator ->
+ 'a E.obs * 'a repr ->
+ ('a E.obs * 'a repr) list ->
+ 'a E.obs ->
+ 'a repr
+
+ val funct : string -> ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr
+ val function' : T.funct -> ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr
+ val observe : 'a E.obs * 'a repr -> 'a obs
+ end
+
+ module Make (M : SIG) = struct
+ type 'a repr = 'a E.repr * 'a M.repr
+ type 'a obs = 'a M.obs
+ type 'a path_repr = 'a M.path_repr
+
+ let map' : 'a repr list -> 'a E.repr list * ('a E.obs * 'a M.repr) list =
+ fun ll ->
+ let e = List.map ~f:fst ll in
+ (e, List.map ll ~f:(fun (e, m) -> (E.observe e, m)))
+
+ let observe : 'a repr -> 'a obs = fun (t, v) -> M.observe (E.observe t, v)
+
+ let empty : unit -> 'a repr =
+ fun () ->
+ let e = E.empty () in
+ (e, M.empty (E.observe e))
+
+ let expr : 'a repr -> 'a repr =
+ fun (e, m) ->
+ let e' = E.expr e in
+ (e', M.expr (E.observe e, m) (E.observe e'))
+
+ let literal : string -> 'a repr =
+ fun litt ->
+ let e = E.literal litt in
+ (e, M.literal litt (E.observe e))
+
+ let integer : string -> 'a repr =
+ fun i ->
+ let e' = E.integer i in
+ (e', M.integer i (E.observe e'))
+
+ let path : 'b path_repr -> 'b -> 'a repr =
+ fun path_repr path ->
+ let e = E.path R.v path in
+ let m = M.path path_repr path (E.observe e) in
+ (e, m)
+
+ let concat : 'a repr list -> 'a repr =
+ fun reprs ->
+ let e, m = map' reprs in
+ let e' = E.concat e in
+ (e', M.concat m (E.observe e'))
+
+ let window : 'a repr T.window -> 'a repr list -> 'a repr list -> 'a repr =
+ fun window expressions order ->
+ let e_expressions, m_expressions = map' expressions
+ and e_order, m_order = map' order
+ and e_window = T.map_window window ~f:fst
+ and m_window = T.map_window window ~f:(fun (e, m) -> (E.observe e, m)) in
+
+ let e = E.window e_window e_expressions e_order in
+ (e, M.window m_window m_expressions m_order (E.observe e))
+
+ let nvl : 'a repr list -> 'a repr =
+ fun reprs ->
+ let e, m = List.split reprs in
+
+ let e' = E.nvl e in
+ let e = List.map ~f:E.observe e in
+ (e', M.nvl (List.combine e m) (E.observe e'))
+
+ let join : string -> 'a repr list -> 'a repr =
+ fun sep reprs ->
+ let e_reprs, m = map' reprs in
+
+ let e = E.join sep e_reprs in
+ (e, M.join sep m (E.observe e))
+
+ let boperator : T.binary_operator -> 'a repr -> 'a repr -> 'a repr =
+ fun op (e1, m1) (e2, m2) ->
+ let e1' = E.observe e1
+ and e2' = E.observe e2
+ and e = E.boperator op e1 e2 in
+ let m' = M.boperator op (e1', m1) (e2', m2) (E.observe e) in
+ (e, m')
+
+ let gequality : T.binary_operator -> 'a repr -> 'a repr list -> 'a repr =
+ fun op (e1, m1) exprs ->
+ let e_reprs, m_reprs = map' exprs in
+ let e' = E.gequality op e1 e_reprs in
+ let m' = M.gequality op (E.observe e1, m1) m_reprs (E.observe e') in
+ (e', m')
+
+ let funct : string -> 'a repr list -> 'a repr =
+ fun sep reprs ->
+ let e_reprs, m = map' reprs in
+
+ let e = E.funct sep e_reprs in
+ (e, M.funct sep m (E.observe e))
+
+ let function' : T.funct -> 'a repr list -> 'a repr =
+ fun f reprs ->
+ let e_reprs, m = map' reprs in
+ let e = E.function' f e_reprs in
+ (e, M.function' f m (E.observe e))
+ end
+end
diff --git a/lib/expression/compose.mli b/lib/expression/compose.mli
new file mode 100644
index 0000000..4cced8c
--- /dev/null
+++ b/lib/expression/compose.mli
@@ -0,0 +1,59 @@
+(** Build an expression module with the result from another expression. The
+ signature of the fuctions is a bit different, as they all receive the
+ result from the previous evaluated element in argument. *)
+module Expression
+ (E : Sym.SYM_EXPR)
+ (_ : sig
+ val v : 'a E.path_repr
+ end) : sig
+ (** The signature for the module the adapt is a bit different for the
+ SYM_EXPR: every function takes an extra argument which is the Expression
+ we are wrapping, and every expression becomes a tuple with the same
+ expression represented in the composed type. *)
+ module type SIG = sig
+ type 'a repr
+ type 'a obs
+ type 'a path_repr
+
+ val empty : 'a E.obs -> 'a repr
+ val expr : 'a E.obs * 'a repr -> 'a E.obs -> 'a repr
+ val literal : string -> 'a E.obs -> 'a repr
+ val integer : string -> 'a E.obs -> 'a repr
+ val path : 'a path_repr -> 'a -> 'a E.obs -> 'a repr
+ val concat : ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr
+
+ val window :
+ ('a E.obs * 'a repr) T.window ->
+ ('a E.obs * 'a repr) list ->
+ ('a E.obs * 'a repr) list ->
+ 'a E.obs ->
+ 'a repr
+
+ val nvl : ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr
+ val join : string -> ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr
+
+ val boperator :
+ T.binary_operator ->
+ 'a E.obs * 'a repr ->
+ 'a E.obs * 'a repr ->
+ 'a E.obs ->
+ 'a repr
+
+ val gequality :
+ T.binary_operator ->
+ 'a E.obs * 'a repr ->
+ ('a E.obs * 'a repr) list ->
+ 'a E.obs ->
+ 'a repr
+
+ val funct : string -> ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr
+ val function' : T.funct -> ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr
+ val observe : 'a E.obs * 'a repr -> 'a obs
+ end
+
+ module Make (M : SIG) :
+ Sym.SYM_EXPR
+ with type 'a obs = 'a M.obs
+ and type 'a repr = 'a E.repr * 'a M.repr
+ and type 'a path_repr = 'a M.path_repr
+end
diff --git a/lib/expression/dune b/lib/expression/dune
new file mode 100755
index 0000000..96e386e
--- /dev/null
+++ b/lib/expression/dune
@@ -0,0 +1,9 @@
+(library
+ (name importExpression)
+ (libraries
+ re
+ importCSV
+ importDataTypes
+ importErrors
+ )
+)
diff --git a/lib/expression/filters.ml b/lib/expression/filters.ml
new file mode 100644
index 0000000..42c794b
--- /dev/null
+++ b/lib/expression/filters.ml
@@ -0,0 +1,193 @@
+(** This module evaluate the sql query to use in order to filter an expression
+
+ The result is built over [Query] except for the group function, which are
+ translated into a CTE in sql
+ *)
+
+open StdLabels
+module Q = Query
+
+type 'a result = {
+ repr : Format.formatter -> nested:Query.QueryParameter.t -> unit;
+ group : 'a T.t option;
+}
+
+module Filter = struct
+ type 'a repr = {
+ repr : 'a Q.Query.repr;
+ with_group : 'a T.t option;
+ }
+
+ type 'a obs = 'a result
+ type 'a path_repr = 'a Q.Query.path_repr
+
+ let observe : 'a Ast.obs * 'a repr -> 'a obs =
+ fun (_, v) -> { repr = Q.Query.observe v.repr; group = v.with_group }
+
+ let empty : 'a Ast.obs -> 'a repr =
+ fun _ -> { repr = Q.Query.empty (); with_group = None }
+
+ let expr : 'a Ast.obs * 'a repr -> 'a Ast.obs -> 'a repr =
+ fun (_, expr) _ ->
+ { repr = Q.Query.expr expr.repr; with_group = expr.with_group }
+
+ let path : 'a path_repr -> 'a -> 'a Ast.obs -> 'a repr =
+ fun repr p _ -> { repr = Q.Query.path repr p; with_group = None }
+
+ let literal : string -> 'a Ast.obs -> 'a repr =
+ fun l _ -> { repr = Q.Query.literal l; with_group = None }
+
+ let integer : string -> 'a Ast.obs -> 'a repr =
+ fun l _ -> { repr = Q.Query.integer l; with_group = None }
+
+ let nvl : ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr =
+ fun expression _ ->
+ let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression in
+ let with_group =
+ List.find_map ~f:(fun v -> (snd v).with_group) expression
+ in
+ match with_group with
+ | None -> { repr = Q.Query.nvl expr_repr; with_group }
+ | Some _ -> raise ImportErrors.MisplacedWindow
+
+ let concat : ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr =
+ fun expression _ ->
+ let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression in
+ let with_group =
+ List.find_map ~f:(fun v -> (snd v).with_group) expression
+ in
+ match with_group with
+ | None -> { repr = Q.Query.concat expr_repr; with_group }
+ | Some _ -> raise ImportErrors.MisplacedWindow
+
+ let join : string -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr =
+ fun sep expression _ ->
+ let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression
+ and with_group =
+ List.find_map ~f:(fun v -> (snd v).with_group) expression
+ in
+ match with_group with
+ | None -> { repr = Q.Query.join sep expr_repr; with_group }
+ | Some _ -> raise ImportErrors.MisplacedWindow
+
+ let boperator :
+ T.binary_operator ->
+ 'a Ast.obs * 'a repr ->
+ 'a Ast.obs * 'a repr ->
+ 'a Ast.obs ->
+ 'a repr =
+ fun name (_, e1) (_, e2) _ ->
+ let with_group =
+ match (e1.with_group, e2.with_group) with
+ | Some e, None -> Some e
+ | None, Some e -> Some e
+ | None, None -> None
+ | _ -> raise ImportErrors.MisplacedWindow
+ in
+ { repr = Q.Query.boperator name e1.repr e2.repr; with_group }
+
+ let gequality :
+ T.binary_operator ->
+ 'a Ast.obs * 'a repr ->
+ ('a Ast.obs * 'a repr) list ->
+ 'a Ast.obs ->
+ 'a repr =
+ fun name (_, e1) group _ ->
+ let group_repr = List.map ~f:(fun v -> (snd v).repr) group
+ and with_group = List.find_map ~f:(fun v -> (snd v).with_group) group in
+
+ match with_group with
+ | None ->
+ {
+ repr = Q.Query.gequality name e1.repr group_repr;
+ with_group = e1.with_group;
+ }
+ | _ -> raise ImportErrors.MisplacedWindow
+
+ let funct : string -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr =
+ fun name expressions _ ->
+ let expr_repr = List.map ~f:(fun v -> (snd v).repr) expressions in
+ let with_group =
+ List.find_map ~f:(fun v -> (snd v).with_group) expressions
+ in
+ match with_group with
+ | None -> { repr = Q.Query.funct name expr_repr; with_group }
+ | Some _ -> raise ImportErrors.MisplacedWindow
+
+ let function' :
+ T.funct -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr =
+ fun name expressions _ ->
+ let expr_repr = List.map ~f:(fun v -> (snd v).repr) expressions in
+ let with_group =
+ List.find_map ~f:(fun v -> (snd v).with_group) expressions
+ in
+ match with_group with
+ | None ->
+ { repr = Q.Query.funct (T.name_of_function name) expr_repr; with_group }
+ | Some _ -> raise ImportErrors.MisplacedWindow
+
+ (** Window functions are not handled in the filters, we save them as an AST
+ in order to process them in a separated handler.
+
+ It is not allowed to build nested window functions. *)
+ let window :
+ ('a Ast.obs * 'a repr) T.window ->
+ ('a Ast.obs * 'a repr) list ->
+ ('a Ast.obs * 'a repr) list ->
+ 'a Ast.obs ->
+ 'a repr =
+ fun name expressions order ast ->
+ ignore name;
+ let with_group_expr =
+ List.find_map ~f:(fun v -> (snd v).with_group) expressions
+ and with_group_order =
+ List.find_map ~f:(fun v -> (snd v).with_group) order
+ in
+ match (with_group_expr, with_group_order) with
+ | Some _, _ | _, Some _ -> raise ImportErrors.MisplacedWindow
+ | None, None ->
+ (* The column name used with the cte. The name is fixed here, and used
+ as is in [Analysers.Query.build_cte] and
+ [Analysers.Query.eval_filters] *)
+ let q = "cte.group0" in
+ {
+ with_group = Some ast;
+ repr = Q.Query.funct "expr" [ Q.Query.literal q ];
+ }
+end
+
+module ASTBuilder =
+ Compose.Expression
+ (Ast)
+ (struct
+ let v = ()
+ end)
+
+module F :
+ Sym.SYM_EXPR
+ with type 'a obs = 'a result
+ and type 'a path_repr = Format.formatter -> 'a -> unit =
+ ASTBuilder.Make (Filter)
+
+module M = Sym.M (F)
+
+let query_of_expression :
+ type b.
+ b Q.binded_query ->
+ Format.formatter ->
+ (Format.formatter -> 'a -> unit) ->
+ 'a T.t ->
+ b * 'a T.t option =
+ fun parameter formatter printer expr ->
+ let repr = M.eval ~path_repr:printer expr in
+ match parameter with
+ | BindParam ->
+ let p = Queue.create () in
+ let parameter = Q.QueryParameter.Queue p in
+ let value = F.observe repr in
+ value.repr ~nested:parameter formatter;
+ (p, value.group)
+ | NoParam ->
+ let value = F.observe repr in
+ value.repr ~nested:Literal formatter;
+ ((), value.group)
diff --git a/lib/expression/filters.mli b/lib/expression/filters.mli
new file mode 100644
index 0000000..d462b5f
--- /dev/null
+++ b/lib/expression/filters.mli
@@ -0,0 +1,9 @@
+module F : Sym.SYM_EXPR with type 'a path_repr = Format.formatter -> 'a -> unit
+(** Query used inside the filter clauses *)
+
+val query_of_expression :
+ 'b Query.binded_query ->
+ Format.formatter ->
+ (Format.formatter -> 'a -> unit) ->
+ 'a T.t ->
+ 'b * 'a T.t option
diff --git a/lib/expression/headers.ml b/lib/expression/headers.ml
new file mode 100644
index 0000000..6371e4f
--- /dev/null
+++ b/lib/expression/headers.ml
@@ -0,0 +1,89 @@
+open StdLabels
+
+let truncate buffer n = Buffer.truncate buffer (Buffer.length buffer - n)
+
+module E :
+ Sym.SYM_CHUNK
+ with type 'a obs = buffer:Buffer.t -> unit
+ and type 'a path_repr = 'a -> Buffer.t -> unit = struct
+ type 'a repr = buffer:Buffer.t -> unit
+ type 'a obs = buffer:Buffer.t -> unit
+ type 'a path_repr = 'a -> Buffer.t -> unit
+
+ let group : 'a repr list -> 'a repr =
+ fun args ~buffer ->
+ Buffer.add_string buffer "[";
+ List.iter args ~f:(fun v ->
+ v ~buffer;
+ Buffer.add_string buffer ", ");
+
+ truncate buffer 2;
+ Buffer.add_string buffer "]"
+
+ let arguments : 'a repr list -> 'a repr =
+ fun expressions ~buffer ->
+ Buffer.add_string buffer "(";
+ List.iter expressions ~f:(fun v ->
+ v ~buffer;
+ Buffer.add_string buffer ", ");
+
+ truncate buffer 2;
+ Buffer.add_string buffer ")"
+
+ let observe x ~buffer = x ~buffer
+ let empty : unit -> 'a repr = fun _ ~buffer -> Buffer.add_string buffer "''"
+ let path printer p ~buffer = printer p buffer
+ let literal l ~buffer = Buffer.add_string buffer l
+ let integer l ~buffer = Buffer.add_string buffer l
+
+ let expr expr ~buffer =
+ Buffer.add_char buffer '(';
+ expr ~buffer;
+ Buffer.add_char buffer ')'
+
+ let nvl expression ~buffer =
+ Buffer.add_string buffer "nvl";
+ arguments ~buffer expression
+
+ let concat expression ~buffer = List.iter expression ~f:(fun v -> v ~buffer)
+
+ let join sep expression ~buffer =
+ List.iter expression ~f:(fun v ->
+ v ~buffer;
+ Buffer.add_string buffer sep);
+ truncate buffer (String.length sep)
+
+ let window : 'a repr T.window -> 'a repr list -> 'a repr list -> 'a repr =
+ fun name expressions order ~buffer ->
+ ignore order;
+ let name = T.name_of_window name in
+
+ Buffer.add_string buffer name;
+ arguments ~buffer expressions
+
+ let boperator name e1 e2 ~buffer =
+ e1 ~buffer;
+ Buffer.add_string buffer (T.name_of_operator name);
+ e2 ~buffer
+
+ let gequality name e1 e2 ~buffer =
+ e1 ~buffer;
+ Buffer.add_string buffer (T.name_of_operator name);
+ group ~buffer e2
+
+ let funct name expressions ~buffer =
+ Buffer.add_string buffer name;
+ arguments ~buffer expressions
+
+ let function' name expressions ~buffer =
+ Buffer.add_string buffer (T.name_of_function name);
+ arguments ~buffer expressions
+end
+
+module M = Sym.M (E)
+
+let headers_of_expression :
+ Buffer.t -> ('a -> Buffer.t -> unit) -> 'a T.t -> unit =
+ fun buffer printer expr ->
+ let repr = M.eval expr ~path_repr:printer in
+ E.observe repr ~buffer
diff --git a/lib/expression/headers.mli b/lib/expression/headers.mli
new file mode 100644
index 0000000..1fafad0
--- /dev/null
+++ b/lib/expression/headers.mli
@@ -0,0 +1,7 @@
+val headers_of_expression :
+ Buffer.t -> ('a -> Buffer.t -> unit) -> 'a T.t -> unit
+
+module E :
+ Sym.SYM_EXPR
+ with type 'a obs = buffer:Buffer.t -> unit
+ and type 'a path_repr = 'a -> Buffer.t -> unit
diff --git a/lib/expression/lazier.ml b/lib/expression/lazier.ml
new file mode 100644
index 0000000..d8b12d9
--- /dev/null
+++ b/lib/expression/lazier.ml
@@ -0,0 +1,71 @@
+open StdLabels
+
+(** Make a module lazy *)
+module Make (S : Sym.SYM_EXPR) = struct
+ type 'a repr = 'a S.repr Lazy.t
+ type 'a obs = 'a S.obs Lazy.t
+ type 'a path_repr = 'a S.path_repr
+
+ let empty : unit -> 'a repr = fun () -> lazy (S.empty ())
+
+ let expr : 'a repr -> 'a repr =
+ fun expr -> Lazy.map (fun expr -> S.expr expr) expr
+
+ let literal : string -> 'a repr = fun l -> lazy (S.literal l)
+ let integer : string -> 'a repr = fun i -> lazy (S.integer i)
+
+ let path : 'b path_repr -> 'b -> 'a repr =
+ fun repr path -> lazy (S.path repr path)
+
+ let concat : 'a repr list -> 'a repr =
+ fun exprs ->
+ lazy
+ (let exprs' = List.map ~f:Lazy.force exprs in
+ S.concat exprs')
+
+ let window : 'a repr T.window -> 'a repr list -> 'a repr list -> 'a repr =
+ fun w group sort ->
+ lazy
+ (let w' = T.map_window ~f:Lazy.force w
+ and group' = List.map ~f:Lazy.force group
+ and sort' = List.map ~f:Lazy.force sort in
+ S.window w' group' sort')
+
+ let nvl : 'a repr list -> 'a repr =
+ fun exprs ->
+ lazy
+ (let exprs' = List.map ~f:Lazy.force exprs in
+ S.nvl exprs')
+
+ let join : string -> 'a repr list -> 'a repr =
+ fun sep exprs ->
+ lazy
+ (let exprs' = List.map ~f:Lazy.force exprs in
+ S.join sep exprs')
+
+ let boperator : T.binary_operator -> 'a repr -> 'a repr -> 'a repr =
+ fun op e1 e2 ->
+ lazy
+ (let e1' = Lazy.force e1 and e2' = Lazy.force e2 in
+ S.boperator op e1' e2')
+
+ let gequality : T.binary_operator -> 'a repr -> 'a repr list -> 'a repr =
+ fun op e exprs ->
+ lazy
+ (let e' = Lazy.force e and exprs' = List.map ~f:Lazy.force exprs in
+ S.gequality op e' exprs')
+
+ let funct : string -> 'a repr list -> 'a repr =
+ fun name exprs ->
+ lazy
+ (let exprs' = List.map ~f:Lazy.force exprs in
+ S.funct name exprs')
+
+ let function' : T.funct -> 'a repr list -> 'a repr =
+ fun f exprs ->
+ lazy
+ (let exprs' = List.map ~f:Lazy.force exprs in
+ S.function' f exprs')
+
+ let observe : 'a repr -> 'a obs = fun v -> Lazy.map S.observe v
+end
diff --git a/lib/expression/query.ml b/lib/expression/query.ml
new file mode 100644
index 0000000..5bd914a
--- /dev/null
+++ b/lib/expression/query.ml
@@ -0,0 +1,335 @@
+(**
+ This module create an sql query from an expression.
+ *)
+
+open StdLabels
+
+(** This type is used in the query builder (see [query_of_expression] just
+ below in order to tell if we need to bind the parameters in the query, or
+ if we can use plain literal as is (with some risk at the execution time. *)
+type _ binded_query =
+ | BindParam : ImportCSV.DataType.t Queue.t binded_query
+ | NoParam : unit binded_query
+
+module QueryParameter = struct
+ (** Internaly, we need to keep a different type for the Literal chunks
+ (which requires to be quoted), and raw (which should be given as is to the
+ sql engine)
+
+ The Raw can be generated from both BindParam or NoParam queries. *)
+ type t =
+ | Literal
+ | Queue of ImportCSV.DataType.t Queue.t
+ | Raw of t
+
+ (** Wrap the given parameter mode into the raw mode *)
+ let raw : t -> t = function
+ | Raw t -> Raw t
+ | Literal -> Raw Literal
+ | Queue q -> Raw (Queue q)
+
+ (** Nest the parameter in order to use it inside another function call.
+
+ The rule is to get out of the Raw mode as soon as we dive into another
+ one function. *)
+ let nest : t -> t = function
+ | Raw t -> t
+ | other -> other
+end
+
+module TypeBuilder =
+ Compose.Expression
+ (Type_of)
+ (struct
+ let v = ignore
+ end)
+
+module Query = TypeBuilder.Make (struct
+ type 'a repr = Format.formatter -> nested:QueryParameter.t -> unit
+ type 'a obs = Format.formatter -> nested:QueryParameter.t -> unit
+ type 'a path_repr = Format.formatter -> 'a -> unit
+
+ let observe : 'a Type_of.obs * 'a repr -> 'a obs =
+ fun (_, x) formatter ~nested ->
+ let () = x formatter ~nested in
+ Format.pp_print_flush formatter ()
+
+ (** Unify an external reference with a given type, using the COALESCE
+ function *)
+ let unify :
+ with_:Type_of.t ->
+ nested:QueryParameter.t ->
+ Format.formatter ->
+ 'a Type_of.obs * 'a repr ->
+ unit =
+ fun ~with_ ~nested format (type_of, expr) ->
+ match (type_of, with_) with
+ | ImportDataTypes.Types.Extern, Number
+ | ImportDataTypes.Types.Extern, Extern ->
+ Format.fprintf format "COALESCE(%a,0)"
+ (fun f expr -> expr f ~nested)
+ expr
+ | ImportDataTypes.Types.Extern, String ->
+ Format.fprintf format "COALESCE(%a,'')"
+ (fun f expr -> expr f ~nested)
+ expr
+ | _, Float ->
+ Format.fprintf format "CAST(%a AS REAL)"
+ (fun f expr -> expr f ~nested)
+ expr
+ | _, _ -> expr ~nested format
+
+ let empty : 'a Type_of.obs -> 'a repr =
+ fun type_of formatter ~nested ->
+ ignore type_of;
+ ignore nested;
+ Format.fprintf formatter "''"
+
+ let expr : 'a Type_of.obs * 'a repr -> 'a Type_of.obs -> 'a repr =
+ fun expr type_of formatter ~nested ->
+ ignore type_of;
+ Format.fprintf formatter "(";
+ (snd expr) ~nested formatter;
+ Format.fprintf formatter ")"
+
+ let literal : string -> 'a Type_of.obs -> 'a repr =
+ fun l type_of formatter ~nested ->
+ ignore type_of;
+ match nested with
+ | QueryParameter.Literal ->
+ (* If the text is a true literal, we insert it directly. This is
+ only called from the [query_of_expression] function *)
+ Format.fprintf formatter "'%s'" l
+ | QueryParameter.Queue queue ->
+ Format.fprintf formatter "?";
+ Queue.add (ImportCSV.DataType.Content l) queue
+ | QueryParameter.Raw _ -> Format.fprintf formatter "%s" l
+
+ let integer : string -> 'a Type_of.obs -> 'a repr =
+ fun l type_of formatter ~nested ->
+ ignore type_of;
+ ignore nested;
+ Format.fprintf formatter "%s" l
+
+ let path : 'b path_repr -> 'b -> 'a Type_of.obs -> 'a repr =
+ fun repr p type_of formatter ~nested ->
+ ignore nested;
+ ignore type_of;
+ repr formatter p
+
+ let concat : ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr =
+ fun expression type_of formatter ~nested ->
+ ignore type_of;
+ let nested' = QueryParameter.nest nested in
+
+ Format.pp_print_list
+ ~pp_sep:(fun f () -> Format.fprintf f " || ")
+ (unify ~with_:ImportDataTypes.Types.String ~nested:nested')
+ formatter expression
+
+ let print_expression :
+ ?sep:string ->
+ QueryParameter.t ->
+ Format.formatter ->
+ ('a Type_of.obs * 'a repr) list ->
+ unit =
+ fun ?(sep = ", ") nested formatter expression ->
+ (Format.pp_print_list
+ ~pp_sep:(fun f () -> Format.fprintf f "%s" sep)
+ (fun f v -> (snd v) f ~nested))
+ formatter expression
+
+ (** Format the partition expression. This function is used internally and
+ only form the expression inside the clause. *)
+ let group_windows :
+ QueryParameter.t ->
+ Format.formatter ->
+ ('a Type_of.obs * 'a repr) list
+ * ('a Type_of.obs * 'a repr) list
+ * string option ->
+ unit =
+ fun nested formatter (expressions, order, range) ->
+ match (expressions, order) with
+ | [], _ -> ()
+ | _, [] ->
+ Format.fprintf formatter " OVER (PARTITION BY %a%a)"
+ (print_expression nested) expressions
+ (Format.pp_print_option (fun f v -> Format.fprintf f "%s" v))
+ range
+ | _, _ ->
+ Format.fprintf formatter " OVER (PARTITION BY %a ORDER BY %a%a)"
+ (print_expression nested) expressions (print_expression nested) order
+ (Format.pp_print_option (fun f v -> Format.fprintf f "%s" v))
+ range
+
+ let window :
+ ('a Type_of.obs * 'a repr) T.window ->
+ ('a Type_of.obs * 'a repr) list ->
+ ('a Type_of.obs * 'a repr) list ->
+ 'a Type_of.obs ->
+ 'a repr =
+ fun name expressions order type_of formatter ~nested ->
+ ignore type_of;
+ let nested' = QueryParameter.nest nested in
+
+ (* By default, the range is defined like this
+
+ [RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW EXCLUDE NO OTHERS]
+
+ this only build a range until the current row, but in some cases (min,
+ last), we want to scan the whole group in order to evaluate the value to
+ keep.
+ *)
+ let range = " RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" in
+ match name with
+ | T.Min expr ->
+ Format.fprintf formatter "FIRST_VALUE(%a)%a "
+ (fun f v -> (snd v) f ~nested:nested')
+ expr (group_windows nested')
+ (expressions, order, Some range)
+ | T.Max expr ->
+ Format.fprintf formatter "LAST_VALUE(%a)%a"
+ (fun f v -> (snd v) f ~nested:nested')
+ expr (group_windows nested')
+ (expressions, order, Some range)
+ | T.Counter ->
+ (* If no order is given, return the number of elements in the
+ whole group *)
+ let operator =
+ match order with
+ | [] -> "COUNT"
+ | _ -> "ROW_NUMBER"
+ in
+ Format.fprintf formatter "%s()%a" operator (group_windows nested')
+ (expressions, order, None)
+ | T.Previous expr ->
+ Format.fprintf formatter "LAG(%a)%a"
+ (fun f v -> (snd v) f ~nested:nested')
+ expr (group_windows nested') (expressions, order, None)
+ | T.Sum expr ->
+ Format.fprintf formatter "SUM(%a)%a"
+ (fun f v -> (snd v) f ~nested:nested')
+ expr (group_windows nested') (expressions, order, None)
+
+ let nvl : ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr =
+ fun expression type_of formatter ~nested ->
+ ignore type_of;
+ let nested' = QueryParameter.nest nested in
+ Format.fprintf formatter "COALESCE(%a)" (print_expression nested')
+ expression
+
+ let join :
+ string -> ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr =
+ fun sep expression type_of formatter ~nested ->
+ ignore type_of;
+ let nested' = QueryParameter.nest nested in
+
+ (* Directly call the literal function for the first argument *)
+ Format.fprintf formatter "CONCAT(%a, %a)"
+ (fun f v -> (literal v ImportDataTypes.Types.String) f ~nested:nested')
+ sep (print_expression nested') expression
+
+ let boperator :
+ T.binary_operator ->
+ 'a Type_of.obs * 'a repr ->
+ 'a Type_of.obs * 'a repr ->
+ 'a Type_of.obs ->
+ 'a repr =
+ fun name e1 e2 type_of formatter ~nested ->
+ ignore type_of;
+ (* When dividing, we need to be sure that the type is a float,
+ otherwise SQL will truncate the result *)
+ let with_ =
+ match name with
+ | T.Division -> ImportDataTypes.Types.Float
+ | _ -> fst e2
+ in
+
+ let nested' = QueryParameter.nest nested in
+ Format.fprintf formatter "%a%s%a"
+ (unify ~with_ ~nested:nested')
+ e1
+ (* The operator *)
+ (T.name_of_operator name)
+ (unify ~with_:(fst e1) ~nested:nested')
+ e2
+
+ let gequality :
+ T.binary_operator ->
+ 'a Type_of.obs * 'a repr ->
+ ('a Type_of.obs * 'a repr) list ->
+ 'a Type_of.obs ->
+ 'a repr =
+ fun name e1 group type_of ->
+ ignore type_of;
+ let group_type = List.map ~f:fst group in
+ fun formatter ~nested ->
+ let nested' = QueryParameter.nest nested in
+ let op_name =
+ match name with
+ | T.Equal -> " IN("
+ | T.Different -> " NOT IN("
+ | _ -> ""
+ in
+
+ Format.fprintf formatter "%a%s%a)"
+ (unify ~with_:(Type_of.group' group_type) ~nested:nested')
+ e1 op_name (print_expression nested') group
+
+ let exprs expressions formatter ~nested =
+ (* Literal expression, starting from now, all the quoted string are
+ directly given to the sql engine *)
+ let nested' = QueryParameter.raw nested in
+
+ Format.fprintf formatter "(%a)"
+ (print_expression ~sep:" " nested')
+ expressions
+
+ let rec funct :
+ string -> ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr =
+ fun name expressions type_of formatter ~nested ->
+ ignore type_of;
+ let nested' = QueryParameter.nest nested in
+ match name with
+ | "expr" ->
+ (* Raw expression are parsed directly *)
+ exprs expressions formatter ~nested
+ | "if" ->
+ (* The if is renamed into IIF *)
+ funct "IIF" expressions type_of formatter ~nested
+ | _ ->
+ (* Default case *)
+ Format.fprintf formatter "%s(%a)" name (print_expression nested')
+ expressions
+
+ let function' :
+ T.funct -> ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr =
+ fun name expressions type_of formatter ~nested ->
+ ignore type_of;
+ let nested' = QueryParameter.nest nested in
+ match name with
+ | Upper | Trim ->
+ Format.fprintf formatter "%s(%a)" (T.name_of_function name)
+ (print_expression nested') expressions
+end)
+
+module M = Sym.M (Query)
+
+let query_of_expression :
+ type b.
+ b binded_query ->
+ Format.formatter ->
+ (Format.formatter -> 'a -> unit) ->
+ 'a T.t ->
+ b =
+ fun parameter formatter printer expr ->
+ let repr = M.eval ~path_repr:printer expr in
+ match parameter with
+ | BindParam ->
+ let p = Queue.create () in
+ let parameter = QueryParameter.Queue p in
+ Query.observe repr formatter ~nested:parameter;
+ p
+ | NoParam ->
+ Query.observe repr formatter ~nested:Literal;
+ ()
diff --git a/lib/expression/query.mli b/lib/expression/query.mli
new file mode 100644
index 0000000..fa789a9
--- /dev/null
+++ b/lib/expression/query.mli
@@ -0,0 +1,27 @@
+module QueryParameter : sig
+ (** Internaly, we need to keep a different type for the Literal chunks
+ (which requires to be quoted), and raw (which should be given as is to the
+ sql engine)
+
+ The Raw can be generated from both BindParam or NoParam queries. *)
+ type t =
+ | Literal
+ | Queue of ImportCSV.DataType.t Queue.t
+ | Raw of t
+end
+
+type _ binded_query =
+ | BindParam : ImportCSV.DataType.t Queue.t binded_query
+ | NoParam : unit binded_query
+
+val query_of_expression :
+ 'b binded_query ->
+ Format.formatter ->
+ (Format.formatter -> 'a -> unit) ->
+ 'a T.t ->
+ 'b
+
+module Query :
+ Sym.SYM_EXPR
+ with type 'a obs = Format.formatter -> nested:QueryParameter.t -> unit
+ and type 'a path_repr = Format.formatter -> 'a -> unit
diff --git a/lib/expression/repr.ml b/lib/expression/repr.ml
new file mode 100644
index 0000000..4990236
--- /dev/null
+++ b/lib/expression/repr.ml
@@ -0,0 +1,127 @@
+open StdLabels
+
+let escape_dquote = Re.Str.regexp "'"
+let escape content = Re.Str.global_replace escape_dquote "\\'" content
+
+module E :
+ Sym.SYM_CHUNK
+ with type 'a obs = top:bool -> string
+ and type 'a path_repr = 'a -> string = struct
+ type 'a repr = top:bool -> string
+ type 'a obs = top:bool -> string
+ type 'a path_repr = 'a -> string
+
+ let observe x = x
+
+ let group : 'a repr list -> 'a repr =
+ fun args ~top ->
+ let args_repr = List.map ~f:(fun v -> v ~top) args in
+ let args = String.concat ~sep:", " args_repr in
+ "[" ^ args ^ "]"
+
+ let arguments : 'a repr list -> 'a repr =
+ fun args ~top ->
+ let args_repr = List.map ~f:(fun v -> v ~top) args in
+ let args = String.concat ~sep:", " args_repr in
+ "(" ^ args ^ ")"
+
+ let empty : unit -> 'a repr =
+ fun () ~top ->
+ match top with
+ | false -> "''"
+ | true -> ""
+
+ let literal : string -> 'a repr =
+ fun l ~top ->
+ if String.equal String.empty l then (empty ()) ~top
+ else
+ match int_of_string_opt l with
+ | Some _ -> l
+ | None -> "'" ^ escape l ^ "'"
+
+ let integer : string -> 'a repr =
+ fun l ~top -> if String.equal String.empty l then (empty ()) ~top else l
+
+ let expr : 'a repr -> 'a repr =
+ fun expr ~top ->
+ ignore top;
+ String.concat ~sep:"" [ "("; expr ~top:false; ")" ]
+
+ let path : 'b path_repr -> 'b -> 'a repr =
+ fun path_repr p ~top ->
+ ignore top;
+ path_repr p
+
+ let concat : 'a repr list -> 'a repr =
+ fun elems ~top ->
+ ignore top;
+ let top = false in
+ let strs = List.map elems ~f:(fun v -> v ~top) in
+ String.concat ~sep:" ^ " strs
+
+ let window : 'a repr T.window -> 'a repr list -> 'a repr list -> 'a repr =
+ fun name g1 sort ~top ->
+ ignore top;
+
+ let args1 = group ~top:false g1
+ and args2 = group ~top:false sort
+ and f_name = T.name_of_window name in
+ let args = [ args1; args2 ] in
+ let args =
+ match name with
+ | T.Counter -> args
+ | T.Min prefix_arg
+ | T.Max prefix_arg
+ | T.Previous prefix_arg
+ | T.Sum prefix_arg -> prefix_arg ~top:false :: args
+ in
+
+ f_name ^ "(" ^ String.concat ~sep:", " args ^ ")"
+
+ let nvl : 'a repr list -> 'a repr =
+ fun elems ~top ->
+ ignore top;
+ let args = arguments ~top:false elems in
+ "nvl" ^ args
+
+ let join : string -> 'a repr list -> 'a repr =
+ fun sep elems ~top ->
+ ignore top;
+ let header = literal sep in
+ let args = arguments ~top:false (header :: elems) in
+ "join" ^ args
+
+ let boperator : T.binary_operator -> 'a repr -> 'a repr -> 'a repr =
+ fun op arg1 arg2 ~top ->
+ ignore top;
+ let top = false in
+ let sep = T.name_of_operator op in
+ String.concat ~sep [ arg1 ~top; arg2 ~top ]
+
+ let gequality : T.binary_operator -> 'a repr -> 'a repr list -> 'a repr =
+ fun op arg1 arg2 ~top ->
+ ignore top;
+ let top = false in
+ let sep = T.name_of_operator op in
+ let args = group ~top:false arg2 in
+ String.concat ~sep [ arg1 ~top; args ]
+
+ let funct : string -> 'a repr list -> 'a repr =
+ fun f args ~top ->
+ ignore top;
+ let args = arguments ~top:false args in
+ f ^ args
+
+ let function' : T.funct -> 'a repr list -> 'a repr =
+ fun f args ~top ->
+ ignore top;
+ let args = arguments ~top:false args in
+ T.name_of_function f ^ args
+end
+
+module M = Sym.M (E)
+
+let repr : ?top:bool -> ('a -> string) -> 'a T.t -> string =
+ fun ?(top = false) printer expr ->
+ let repr = M.eval ~path_repr:printer expr in
+ E.observe repr ~top
diff --git a/lib/expression/repr.mli b/lib/expression/repr.mli
new file mode 100644
index 0000000..4431655
--- /dev/null
+++ b/lib/expression/repr.mli
@@ -0,0 +1,6 @@
+val repr : ?top:bool -> ('a -> string) -> 'a T.t -> string
+
+module E :
+ Sym.SYM_EXPR
+ with type 'a obs = top:bool -> string
+ and type 'a path_repr = 'a -> string
diff --git a/lib/expression/sym.ml b/lib/expression/sym.ml
new file mode 100644
index 0000000..0360e8e
--- /dev/null
+++ b/lib/expression/sym.ml
@@ -0,0 +1,71 @@
+(** The signature for an expression analyzer.
+
+ Every element is mapped to a function, using the tagless final pattern.
+
+ *)
+module type SYM_EXPR = sig
+ type 'a repr
+ type 'a obs
+ type 'a path_repr
+
+ val empty : unit -> 'a repr
+ val expr : 'a repr -> 'a repr
+ val literal : string -> 'a repr
+ val integer : string -> 'a repr
+ val path : 'a path_repr -> 'a -> 'a repr
+ val concat : 'a repr list -> 'a repr
+ val window : 'a repr T.window -> 'a repr list -> 'a repr list -> 'a repr
+ val nvl : 'a repr list -> 'a repr
+ val join : string -> 'a repr list -> 'a repr
+ val boperator : T.binary_operator -> 'a repr -> 'a repr -> 'a repr
+ val gequality : T.binary_operator -> 'a repr -> 'a repr list -> 'a repr
+ val funct : string -> 'a repr list -> 'a repr
+ val function' : T.funct -> 'a repr list -> 'a repr
+ val observe : 'a repr -> 'a obs
+end
+
+module type SYM_CHUNK = sig
+ include SYM_EXPR
+
+ val group : 'a repr list -> 'a repr
+ val arguments : 'a repr list -> 'a repr
+end
+
+open StdLabels
+
+module M (Expr : SYM_EXPR) = struct
+ let rec eval : path_repr:'a Expr.path_repr -> 'a T.t -> 'c Expr.repr =
+ fun ~path_repr t ->
+ match t with
+ | T.Expr expr -> Expr.expr (eval ~path_repr expr)
+ | T.Empty -> Expr.empty ()
+ | T.Literal s -> Expr.literal s
+ | T.Integer i -> Expr.integer i
+ | T.Concat elems -> Expr.concat (List.map elems ~f:(eval ~path_repr))
+ | T.Function (name, args) ->
+ Expr.funct name (List.map args ~f:(eval ~path_repr))
+ | T.Function' (name, args) ->
+ Expr.function' name (List.map args ~f:(eval ~path_repr))
+ | T.Nvl elems -> Expr.nvl (List.map elems ~f:(eval ~path_repr))
+ | T.Join (sep, args) -> Expr.join sep (List.map args ~f:(eval ~path_repr))
+ | T.Window (name, group, sort) ->
+ Expr.window
+ (eval_window ~path_repr name)
+ (List.map group ~f:(eval ~path_repr))
+ (List.map sort ~f:(eval ~path_repr))
+ | T.BOperator (op, arg1, arg2) ->
+ Expr.boperator op (eval ~path_repr arg1) (eval ~path_repr arg2)
+ | T.GEquality (op, arg1, arg2) ->
+ Expr.gequality op (eval ~path_repr arg1)
+ (List.map arg2 ~f:(eval ~path_repr))
+ | T.Path p -> Expr.path path_repr p
+
+ and eval_window :
+ path_repr:'a Expr.path_repr -> 'a T.t T.window -> 'a Expr.repr T.window =
+ fun ~path_repr -> function
+ | Min a -> Min (eval ~path_repr a)
+ | Max a -> Max (eval ~path_repr a)
+ | Counter -> Counter
+ | Previous a -> Previous (eval ~path_repr a)
+ | Sum a -> Sum (eval ~path_repr a)
+end
diff --git a/lib/expression/t.ml b/lib/expression/t.ml
new file mode 100644
index 0000000..7e61317
--- /dev/null
+++ b/lib/expression/t.ml
@@ -0,0 +1,153 @@
+open StdLabels
+
+type 'a window =
+ | Min of 'a
+ | Max of 'a
+ | Counter
+ | Previous of 'a
+ | Sum of 'a
+
+type 'a t =
+ | Empty
+ | Expr of 'a t
+ | Literal of string
+ | Integer of string
+ | Path of 'a
+ | Concat of 'a t list
+ | Function of string * 'a t list
+ | Nvl of 'a t list
+ | Join of string * 'a t list
+ | Window of ('a t window * 'a t list * 'a t list)
+ | BOperator of binary_operator * 'a t * 'a t
+ | GEquality of binary_operator * 'a t * 'a t list
+ | Function' of funct * 'a t list
+
+and binary_operator =
+ | Equal
+ | Different
+ | Add
+ | Minus
+ | Division
+ | LT
+ | GT
+ | And
+ | Or
+
+and funct =
+ | Upper
+ | Trim
+
+let name_of_function = function
+ | Upper -> "UPPER"
+ | Trim -> "TRIM"
+
+let name_of_operator = function
+ | Equal -> "="
+ | Different -> "<>"
+ | Add -> "+"
+ | Minus -> "-"
+ | Division -> "/"
+ | LT -> "<"
+ | GT -> ">"
+ | And -> " and "
+ | Or -> " or "
+
+let name_of_window = function
+ | Min _ -> "min"
+ | Max _ -> "max"
+ | Counter -> "counter"
+ | Previous _ -> "previous"
+ | Sum _ -> "sum"
+
+let map_window : f:('a -> 'b) -> 'a window -> 'b window =
+ fun ~f -> function
+ | Min t -> Min (f t)
+ | Max t -> Max (f t)
+ | Counter -> Counter
+ | Previous t -> Previous (f t)
+ | Sum t -> Sum (f t)
+
+(** Extract the kind of the window function from the given name. *)
+let window_of_name name opt =
+ match (name, opt) with
+ | "min", Some p -> Min p
+ | "max", Some p -> Max p
+ | "counter", None -> Counter
+ | "previous", Some p -> Previous p
+ | "sum", Some p -> Sum p
+ | _other -> raise Not_found
+
+let rec cmp : ('a -> 'a -> int) -> 'a t -> 'a t -> int =
+ fun f e1 e2 ->
+ match (e1, e2) with
+ | Empty, Empty -> 0
+ | Literal l1, Literal l2 -> String.compare l1 l2
+ | Integer l1, Integer l2 -> String.compare l1 l2
+ | Path p1, Path p2 -> f p1 p2
+ | Concat elems1, Concat elems2 | Nvl elems1, Nvl elems2 ->
+ List.compare ~cmp:(cmp f) elems1 elems2
+ | Function (n1, elems1), Function (n2, elems2) ->
+ let name_cmp = String.compare n1 n2 in
+ if name_cmp = 0 then List.compare ~cmp:(cmp f) elems1 elems2 else name_cmp
+ | Window (s1, l11, l12), Window (s2, l21, l22) -> (
+ match compare s1 s2 with
+ | 0 ->
+ let l1_cmp = List.compare ~cmp:(cmp f) l11 l21 in
+ if l1_cmp = 0 then List.compare ~cmp:(cmp f) l12 l22 else l1_cmp
+ | other -> other)
+ | BOperator (n1, arg11, arg12), BOperator (n2, arg21, arg22) -> begin
+ match compare n1 n2 with
+ | 0 -> begin
+ match cmp f arg11 arg21 with
+ | 0 -> cmp f arg12 arg22
+ | other -> other
+ end
+ | other -> other
+ end
+ (* Any other case *)
+ | other1, other2 -> Stdlib.compare other1 other2
+
+let fold_values : f:('b -> 'a -> 'b) -> init:'b -> 'a t -> 'b =
+ fun ~f ~init expression ->
+ let rec _f acc = function
+ | Empty | Literal _ | Integer _ -> acc
+ | Expr e -> _f acc e
+ | Path p -> f acc p
+ | Concat pp | Function' (_, pp) | Function (_, pp) | Nvl pp | Join (_, pp)
+ -> List.fold_left ~f:(fun acc a -> _f acc a) ~init:acc pp
+ | Window (window_f, pp1, pp2) ->
+ (* Each window function can have a distinct parameter first. *)
+ let acc' =
+ match window_f with
+ | Counter -> acc
+ | Min key | Max key | Previous key | Sum key -> _f acc key
+ in
+ let eval1 = List.fold_left ~f:(fun acc a -> _f acc a) ~init:acc' pp1 in
+ List.fold_left ~f:(fun acc a -> _f acc a) ~init:eval1 pp2
+ | BOperator (_, arg1, arg2) -> _f (_f acc arg1) arg2
+ | GEquality (_, arg1, arg2) ->
+ let eval1 = List.fold_left ~f:(fun acc a -> _f acc a) ~init:acc arg2 in
+ _f eval1 arg1
+ in
+ _f init expression
+
+let map : type a b. f:(a -> b) -> a t -> b t =
+ fun ~f expression ->
+ let rec map = function
+ | Expr e -> Expr (map e)
+ | Empty -> Empty
+ | Literal s -> Literal s
+ | Integer i -> Integer i
+ | Path p -> Path (f p)
+ | Concat pp -> Concat (List.map ~f:map pp)
+ | Function' (name, pp) -> Function' (name, List.map ~f:map pp)
+ | Function (name, pp) -> Function (name, List.map ~f:map pp)
+ | Nvl pp -> Nvl (List.map ~f:map pp)
+ | Join (sep, pp) -> Join (sep, List.map ~f:map pp)
+ | Window (window_f, pp1, pp2) ->
+ let w = map_window ~f:map window_f in
+ Window (w, List.map ~f:map pp1, List.map ~f:map pp2)
+ | BOperator (n, arg1, arg2) -> BOperator (n, map arg1, map arg2)
+ | GEquality (n, arg1, args) -> GEquality (n, map arg1, List.map ~f:map args)
+ in
+ map expression
diff --git a/lib/expression/t.mli b/lib/expression/t.mli
new file mode 100644
index 0000000..840805d
--- /dev/null
+++ b/lib/expression/t.mli
@@ -0,0 +1,54 @@
+type 'a window =
+ | Min of 'a
+ | Max of 'a
+ | Counter
+ | Previous of 'a
+ | Sum of 'a
+
+type 'a t =
+ | Empty
+ | Expr of 'a t
+ | Literal of string
+ | Integer of string
+ | Path of 'a
+ | Concat of 'a t list
+ | Function of string * 'a t list
+ | Nvl of 'a t list
+ | Join of string * 'a t list
+ | Window of ('a t window * 'a t list * 'a t list)
+ | BOperator of binary_operator * 'a t * 'a t
+ | GEquality of binary_operator * 'a t * 'a t list
+ | Function' of funct * 'a t list
+
+and binary_operator =
+ | Equal
+ | Different
+ | Add
+ | Minus
+ | Division
+ | LT
+ | GT
+ | And
+ | Or
+
+and funct =
+ | Upper
+ | Trim
+
+val cmp : ('a -> 'a -> int) -> 'a t -> 'a t -> int
+(** Compare two expressions *)
+
+val fold_values : f:('b -> 'a -> 'b) -> init:'b -> 'a t -> 'b
+(** Fold over all the path presents inside the expression. Used for example to
+ identify all the columns to extract from the file.
+
+ The order is not guarantee to follow the order from the expression *)
+
+val map : f:('a -> 'b) -> 'a t -> 'b t
+(** The map function. Mainly used in the configuration migration. *)
+
+val name_of_operator : binary_operator -> string
+val name_of_window : 'a window -> string
+val map_window : f:('a -> 'b) -> 'a window -> 'b window
+val window_of_name : string -> 'a option -> 'a window
+val name_of_function : funct -> string
diff --git a/lib/expression/type_of.ml b/lib/expression/type_of.ml
new file mode 100644
index 0000000..ce1a17e
--- /dev/null
+++ b/lib/expression/type_of.ml
@@ -0,0 +1,150 @@
+(**
+ This module evaluate the type of an expression.
+
+ The type is given with an analysis from all the component involved inside
+ the exrpssion. It is used inside the [query] module in order to check if one
+ type need conversion before being used.
+ *)
+
+open StdLabels
+
+module Lazy_Repr =
+ Compose.Expression
+ (Lazier.Make
+ (Repr.E))
+ (struct
+ let v _ = ""
+ end)
+
+type t = ImportDataTypes.Types.t
+
+(** Fold over the list of parameters and ensure all the elements are typed in
+the same way *)
+let group' : t list -> t =
+ fun elements ->
+ List.fold_left elements ~init:None
+ ~f:(fun (acc : ImportDataTypes.Types.t option) v ->
+ match acc with
+ | None -> Some v
+ | Some t when t = v -> acc
+ | _ -> Some Extern)
+ |> Option.value ~default:ImportDataTypes.Types.None
+
+include Lazy_Repr.Make (struct
+ type nonrec t = t
+ type 'a repr = t
+ type 'a obs = ImportDataTypes.Types.t
+ type 'a path_repr = 'a -> unit
+
+ let observe : 'a Repr.E.obs Lazy.t * 'a repr -> 'a obs = snd
+
+ let empty : 'a Repr.E.obs Lazy.t -> 'a repr =
+ fun _ -> ImportDataTypes.Types.None
+
+ let expr : 'a Repr.E.obs Lazy.t * 'a repr -> 'a Repr.E.obs Lazy.t -> 'a repr =
+ fun e _ -> snd e
+
+ let literal : string -> 'a Repr.E.obs Lazy.t -> 'a repr =
+ fun _ _ -> ImportDataTypes.Types.String
+
+ let integer : string -> 'a Repr.E.obs Lazy.t -> 'a repr =
+ fun _ _ -> ImportDataTypes.Types.Number
+
+ let path : 'b path_repr -> 'b -> 'a Repr.E.obs Lazy.t -> 'a repr =
+ fun _ _ _ -> ImportDataTypes.Types.Extern
+
+ let concat :
+ ('a Repr.E.obs Lazy.t * 'a repr) list -> 'a Repr.E.obs Lazy.t -> 'a repr =
+ fun _ _ -> ImportDataTypes.Types.String
+
+ let window :
+ ('a Repr.E.obs Lazy.t * 'a repr) T.window ->
+ ('a Repr.E.obs Lazy.t * 'a repr) list ->
+ ('a Repr.E.obs Lazy.t * 'a repr) list ->
+ 'a Repr.E.obs Lazy.t ->
+ 'a repr =
+ fun name expressions order _ ->
+ ignore order;
+ ignore expressions;
+ match name with
+ | T.Counter | T.Max _ | T.Min _ | T.Sum _ -> Number
+ | T.Previous expr -> snd expr
+
+ let nvl :
+ ('a Repr.E.obs Lazy.t * 'a repr) list -> 'a Repr.E.obs Lazy.t -> 'a repr =
+ fun v _ -> group' (List.map ~f:snd v)
+
+ let join :
+ string ->
+ ('a Repr.E.obs Lazy.t * 'a repr) list ->
+ 'a Repr.E.obs Lazy.t ->
+ 'a repr =
+ fun _ _ _ -> ImportDataTypes.Types.String
+
+ let boperator :
+ T.binary_operator ->
+ 'a Repr.E.obs Lazy.t * 'a repr ->
+ 'a Repr.E.obs Lazy.t * 'a repr ->
+ 'a Repr.E.obs Lazy.t ->
+ 'a repr =
+ fun name _ _ _ ->
+ match name with
+ | T.Equal | T.Different | T.LT | T.GT -> Bool
+ | T.Add | T.Minus -> Number
+ | T.Division -> Float
+ | T.And | T.Or -> Bool
+
+ let gequality :
+ T.binary_operator ->
+ 'a Repr.E.obs Lazy.t * 'a repr ->
+ ('a Repr.E.obs Lazy.t * 'a repr) list ->
+ 'a Repr.E.obs Lazy.t ->
+ 'a repr =
+ fun name _ _ _ ->
+ match name with
+ | T.Equal | T.Different -> Bool
+ | _ -> None
+
+ let function' :
+ T.funct ->
+ ('a Repr.E.obs Lazy.t * 'a repr) list ->
+ 'a Repr.E.obs Lazy.t ->
+ 'a repr =
+ fun name expressions _ ->
+ ignore expressions;
+ match name with
+ | Upper | Trim -> String
+
+ let check : expected:t -> actual:t -> string -> 'a Repr.E.obs Lazy.t -> t =
+ fun ~expected ~actual subset expr ->
+ if actual = expected then actual
+ else
+ let expression = (Lazy.force expr) ~top:false in
+ raise (ImportErrors.TypeError { expression; subset; expected; actual })
+
+ let funct :
+ string ->
+ ('a Repr.E.obs Lazy.t * 'a repr) list ->
+ 'a Repr.E.obs Lazy.t ->
+ 'a repr =
+ fun name expressions repr ->
+ match name with
+ | "if" -> begin
+ match expressions with
+ | [] -> Extern
+ | (_, hd) :: arg1 :: _ when hd = Bool -> snd arg1
+ | (_, hd) :: _ ->
+ let expected = ImportDataTypes.Types.Bool and actual = hd in
+ check ~expected ~actual "the predicate" repr
+ end
+ | _ -> Extern
+end)
+
+let group :
+ ('a Lazier.Make(Repr.E).repr * t) list -> 'a Lazier.Make(Repr.E).repr * t =
+ fun v ->
+ let v' = group' (List.map v ~f:snd) in
+ let l = lazy (Repr.E.empty ()) in
+ (l, v')
+
+let arguments = group
diff --git a/lib/expression/type_of.mli b/lib/expression/type_of.mli
new file mode 100644
index 0000000..7a11582
--- /dev/null
+++ b/lib/expression/type_of.mli
@@ -0,0 +1,10 @@
+(** This module tries to identify the type of an expression.
+
+The references to data comming from the spreaedsheet cannot be evaluated and
+marked as [Extern]. *)
+
+type t = ImportDataTypes.Types.t
+
+include Sym.SYM_CHUNK with type 'a obs = t and type 'a path_repr = 'a -> unit
+
+val group' : t list -> t
diff --git a/lib/file_handler/csv2sql.ml b/lib/file_handler/csv2sql.ml
new file mode 100644
index 0000000..42d84eb
--- /dev/null
+++ b/lib/file_handler/csv2sql.ml
@@ -0,0 +1,135 @@
+open StdLabels
+module A = ImportAnalyser.Dependency
+module CSV = ImportCSV
+module C = ImportContainers
+module Syntax = ImportConf.Syntax
+module Db = ImportSQL.Db
+
+type state = CSV.DataType.t array State.t
+
+let default_mapper :
+ (ImportCSV.DataType.t, ImportCSV.DataType.t array) State.mapper =
+ { get_row = Fun.id; get_value = Fun.id; default = ImportCSV.DataType.Null }
+
+let extract_values : string -> CSV.DataType.t =
+ fun value ->
+ (* Test first if the content is empty *)
+ if String.equal String.empty value then CSV.DataType.Null
+ else
+ (* else, try differents conversion in order to see which one works *)
+ match int_of_string_opt value with
+ | Some i -> CSV.DataType.Integer i
+ | None -> (
+ match float_of_string_opt value with
+ | Some f -> CSV.DataType.Float f
+ | None ->
+ (* And finaly convert into date *)
+ CSV.DataType.Content value)
+
+(** Initialize the state for the first row, count the column number and create
+ the table in the database *)
+let first_row : A.t -> _ Db.t -> state -> CSV.DataType.t list -> state =
+ fun mapping db acc row ->
+ (if acc.transaction then
+ match Db.commit db with
+ | Ok () -> ()
+ | Error e -> print_endline (ImportErrors.repr_error e));
+
+ ignore @@ Db.create_table db mapping;
+ let row = Array.of_list row in
+ match Db.prepare_insert db mapping with
+ | Ok stmt ->
+ {
+ acc with
+ header = Some row;
+ transaction = false;
+ insert_stmt = Some stmt;
+ row_number = acc.row_number + 1;
+ }
+ | _ -> { acc with header = Some row; transaction = false; insert_stmt = None }
+
+let read_csv_line :
+ log_error:ImportErrors.t -> A.t -> 'a Db.t -> state -> string list -> state
+ =
+ fun ~log_error mapping db acc row ->
+ let processed_row =
+ List.to_seq row |> Seq.map extract_values |> Array.of_seq
+ in
+ if acc.State.transaction then
+ State.run_row ~log_error ~mapper:default_mapper mapping db processed_row acc
+ else
+ match Db.begin_transaction db with
+ | Error e ->
+ print_endline (ImportErrors.repr_error e);
+ acc
+ | Ok () ->
+ let acc = { acc with transaction = true } in
+ State.run_row ~log_error ~mapper:default_mapper mapping db processed_row
+ acc
+
+let importInDatable :
+ log_error:ImportErrors.t ->
+ conf:Syntax.t ->
+ dirname:string ->
+ A.t ->
+ 'a Db.t ->
+ CSV.DataType.t array option Lwt.t =
+ fun ~log_error ~conf ~dirname mapping db ->
+ let file = Filename.concat dirname (A.table mapping).file in
+
+ let channel = Stdlib.open_in_bin file in
+
+ let csv_channel = Csv.of_channel ~separator:';' ~excel_tricks:true channel in
+
+ (* In the headers, we only keep the string.
+
+ This line could generate an error if the headers are not correctly defined.
+ *)
+ let header =
+ List.map ~f:(fun v -> CSV.DataType.Content v) (Csv.next csv_channel)
+ in
+
+ let state =
+ State.
+ {
+ transaction = false;
+ header = None;
+ insert_stmt = None;
+ check_key_stmt = None;
+ row_number = 1;
+ sheet_number = 1;
+ delayed = [];
+ }
+ in
+ let state = first_row mapping db state header in
+
+ let state =
+ try
+ Csv.fold_left csv_channel ~init:state
+ ~f:(read_csv_line ~log_error mapping db)
+ with
+ | Csv.Failure (line, row, cause) as e ->
+ Printf.eprintf "Error %s on line %d — field : %s\n" cause line
+ (ImportCSV.Csv.column_to_string row);
+ raise e
+ in
+ ignore @@ State.clear ~log_error db mapping conf;
+ ignore @@ Db.commit db;
+
+ (* Finalize the statements created during the import *)
+ let () =
+ Option.iter (fun v -> ignore @@ Db.finalize v) state.insert_stmt;
+ Option.iter (fun v -> ignore @@ Db.finalize v) state.check_key_stmt
+ in
+
+ (* Insert all the headers *)
+ let _ =
+ Option.iter
+ (fun headers ->
+ let values = Array.mapi headers ~f:(fun i value -> (i, value)) in
+
+ ignore
+ @@ Db.insert_header db (ImportAnalyser.Dependency.table mapping) values)
+ state.header
+ in
+ Lwt.return state.header
diff --git a/lib/file_handler/csv2sql.mli b/lib/file_handler/csv2sql.mli
new file mode 100644
index 0000000..e09737b
--- /dev/null
+++ b/lib/file_handler/csv2sql.mli
@@ -0,0 +1,10 @@
+val importInDatable :
+ log_error:ImportErrors.t ->
+ conf:ImportConf.Syntax.t ->
+ dirname:string ->
+ ImportAnalyser.Dependency.t ->
+ _ ImportSQL.Db.t ->
+ ImportCSV.DataType.t array option Lwt.t
+(** Load an excel spreadsheet in an SQLite database.
+
+Return the header if at least one row where present *)
diff --git a/lib/file_handler/dune b/lib/file_handler/dune
new file mode 100755
index 0000000..6b247db
--- /dev/null
+++ b/lib/file_handler/dune
@@ -0,0 +1,21 @@
+(library
+ (name importFileHandler)
+ (libraries
+ csv
+ SZXX
+ sqlite3
+ base
+ core
+ lwt
+ lwt.unix
+ helpers
+ importAnalyser
+ importConf
+ importContainers
+ importCSV
+ importDataTypes
+ importErrors
+ importExpression
+ importSQL
+ )
+)
diff --git a/lib/file_handler/state.ml b/lib/file_handler/state.ml
new file mode 100644
index 0000000..5b43aff
--- /dev/null
+++ b/lib/file_handler/state.ml
@@ -0,0 +1,178 @@
+open StdLabels
+module Table = ImportDataTypes.Table
+
+type 'a t = {
+ header : 'a option;
+ transaction : bool;
+ insert_stmt : Sqlite3.stmt option;
+ check_key_stmt : Sqlite3.stmt option;
+ row_number : int;
+ sheet_number : int;
+ delayed : 'a list;
+}
+
+type insert_result = {
+ insert_stmt : Sqlite3.stmt option;
+ check_key_stmt : Sqlite3.stmt option;
+}
+
+type ('a, 'b) mapper = {
+ get_row : 'b -> 'a Array.t;
+ get_value : 'a -> ImportCSV.DataType.t;
+ default : 'a;
+}
+
+module A = ImportAnalyser.Dependency
+
+let insert_row :
+ mapper:(_, 'row) mapper ->
+ A.t ->
+ _ ImportSQL.Db.t ->
+ 'row ->
+ _ t ->
+ (insert_result, ImportErrors.xlsError) result =
+ fun ~mapper mapping db row state ->
+ (* Extract all columns referenced in the keys or the columns to extract *)
+ let keys_id =
+ List.fold_left (A.keys mapping) ~init:ImportContainers.IntSet.empty
+ ~f:(fun acc (keys : A.key) ->
+ let columns = keys.A.columns in
+ ImportContainers.IntSet.union acc (Lazy.force columns))
+ and columns_id = A.columns mapping in
+ let ids = ImportContainers.IntSet.(union keys_id columns_id |> elements) in
+
+ (* Filter only the required columns in the row *)
+ let values =
+ List.map ids ~f:(fun i ->
+ let index = i - 1 in
+ let value =
+ try Array.get (mapper.get_row row) index with
+ | Stdlib.Invalid_argument _ ->
+ (* If we have more headers than data, assume the value are NULL.
+ This can happen when all the line tail is empty, Excel can
+ give us a truncated line instead of a series of NULL *)
+ mapper.default
+ in
+ (index, mapper.get_value value))
+ in
+ let keys = A.keys mapping in
+
+ let execution =
+ let ( let* ) = Result.bind in
+ let* check_key_stmt, result =
+ ImportSQL.Db.eval_key db state.check_key_stmt keys values
+ in
+ let no_null =
+ (* We check if we have at least one key which is not null — and in such
+ case we ignore the line.
+
+ If multiple keys are presents, we ensure there is at least one non
+ null here.
+ *)
+ match result with
+ | [] -> true
+ | _ ->
+ List.exists result ~f:(function
+ | Sqlite3.Data.FLOAT _ | Sqlite3.Data.INT _ -> true
+ | Sqlite3.Data.BLOB t | Sqlite3.Data.TEXT t ->
+ not (String.equal "" t)
+ | Sqlite3.Data.NONE | Sqlite3.Data.NULL -> false)
+ in
+ let* _ =
+ match no_null with
+ | true -> Ok ()
+ | false -> Error (Failure "The key is null")
+ in
+
+ let* statement =
+ match state.insert_stmt with
+ | None -> ImportSQL.Db.prepare_insert db mapping
+ | Some v -> Ok v
+ in
+ let* _ = ImportSQL.Db.insert db statement ~id:state.row_number values in
+ let* _ = ImportSQL.Db.reset statement in
+
+ Helpers.Console.update_cursor ();
+ Ok { insert_stmt = Some statement; check_key_stmt }
+ in
+
+ (* In case of error, wrap the exception to get the line *)
+ Result.map_error
+ (fun e ->
+ ImportErrors.
+ {
+ source = ImportAnalyser.Dependency.table mapping;
+ sheet = state.sheet_number;
+ row = state.row_number;
+ target = None;
+ value = CSV.DataType.Content (String.concat ~sep:", " []);
+ exn = e;
+ })
+ execution
+
+(** Load the row with all the informations associated with this sheet.
+
+ If an error has already been raised during the sheet, ignore this row only. *)
+let run_row :
+ log_error:ImportErrors.t ->
+ mapper:(_, 'row) mapper ->
+ A.t ->
+ _ ImportSQL.Db.t ->
+ 'row ->
+ 'a t ->
+ 'a t =
+ fun ~log_error ~mapper mapping db row state ->
+ match insert_row ~mapper mapping db row state with
+ | Ok { insert_stmt; check_key_stmt } ->
+ {
+ state with
+ insert_stmt;
+ check_key_stmt;
+ row_number = state.row_number + 1;
+ }
+ | Error e ->
+ Option.iter (fun v -> ignore @@ ImportSQL.Db.finalize v) state.insert_stmt;
+ Option.iter
+ (fun v -> ignore @@ ImportSQL.Db.finalize v)
+ state.check_key_stmt;
+ ImportErrors.output_error log_error e;
+ {
+ state with
+ insert_stmt = None;
+ check_key_stmt = None;
+ row_number = state.row_number + 1;
+ }
+
+let clear :
+ log_error:ImportErrors.t ->
+ 'a ImportSQL.Db.t ->
+ A.t ->
+ ImportConf.Syntax.t ->
+ unit ImportSQL.Db.result =
+ fun ~log_error db mapping conf ->
+ ImportSQL.Db.clear_duplicates db (A.table mapping) (A.keys mapping)
+ ~f:(fun values ->
+ let line =
+ match snd @@ Array.get values 0 with
+ | ImportCSV.DataType.Integer i -> i
+ | _ -> -1
+ and value = snd @@ Array.get values 1
+ and target =
+ match snd @@ Array.get values 2 with
+ | ImportCSV.DataType.Content s ->
+ Some (ImportConf.get_table_for_name conf (Some s))
+ | _ -> None
+ in
+ let error =
+ ImportErrors.
+ {
+ source = A.table mapping;
+ sheet = (A.table mapping).tab;
+ row = line;
+ target;
+ value;
+ exn = Failure "Duplicated key";
+ }
+ in
+
+ ImportErrors.output_error log_error error)
diff --git a/lib/file_handler/state.mli b/lib/file_handler/state.mli
new file mode 100644
index 0000000..f744c33
--- /dev/null
+++ b/lib/file_handler/state.mli
@@ -0,0 +1,46 @@
+type 'a t = {
+ header : 'a option;
+ transaction : bool;
+ insert_stmt : Sqlite3.stmt option;
+ check_key_stmt : Sqlite3.stmt option;
+ row_number : int;
+ sheet_number : int;
+ delayed : 'a list;
+}
+
+type insert_result = {
+ insert_stmt : Sqlite3.stmt option;
+ check_key_stmt : Sqlite3.stmt option;
+}
+
+type ('a, 'b) mapper = {
+ get_row : 'b -> 'a Array.t;
+ get_value : 'a -> ImportCSV.DataType.t;
+ default : 'a;
+}
+
+val insert_row :
+ mapper:(_, 'row) mapper ->
+ ImportAnalyser.Dependency.t ->
+ _ ImportSQL.Db.t ->
+ 'row ->
+ _ t ->
+ (insert_result, ImportErrors.xlsError) result
+(** Low level row insertion *)
+
+val run_row :
+ log_error:ImportErrors.t ->
+ mapper:(_, 'row) mapper ->
+ ImportAnalyser.Dependency.t ->
+ _ ImportSQL.Db.t ->
+ 'row ->
+ 'a t ->
+ 'a t
+
+val clear :
+ log_error:ImportErrors.t ->
+ 'a ImportSQL.Db.t ->
+ ImportAnalyser.Dependency.t ->
+ ImportConf.Syntax.t ->
+ unit ImportSQL.Db.result
+(** Clean up the table after the insertion, check for the duplicates and external references *)
diff --git a/lib/file_handler/xlsx2sql.ml b/lib/file_handler/xlsx2sql.ml
new file mode 100644
index 0000000..f2d8f12
--- /dev/null
+++ b/lib/file_handler/xlsx2sql.ml
@@ -0,0 +1,205 @@
+open StdLabels
+module A = ImportAnalyser.Dependency
+module C = ImportContainers
+module CSV = ImportCSV
+module Syntax = ImportConf.Syntax
+module Db = ImportSQL.Db
+
+let flags = Unix.[ O_RDONLY; O_NONBLOCK ]
+
+let extractors =
+ SZXX.Xlsx.
+ {
+ string = (fun _location s -> CSV.DataType.Content s);
+ error =
+ (fun _location s -> CSV.DataType.Error (Printf.sprintf "#ERROR# %s" s));
+ boolean =
+ (fun _location s ->
+ let value = String.(equal s "1") in
+ CSV.DataType.Content (string_of_bool value));
+ number =
+ (fun _location s ->
+ let f = Float.of_string s in
+ if Float.is_integer f then CSV.DataType.Integer (Float.to_int f)
+ else CSV.DataType.Float f);
+ date = (fun _location s -> CSV.DataType.Content s);
+ null = CSV.DataType.Null;
+ formula =
+ (fun _location ~formula s ->
+ ignore formula;
+ CSV.DataType.Content s);
+ }
+
+let feed_bigstring ic =
+ let open Lwt.Infix in
+ let len = Lwt_io.buffer_size ic in
+ let buf = Lwt_bytes.create len in
+ SZXX.Zip.Bigstring
+ (fun () ->
+ Lwt_io.read_into_bigstring ic buf 0 len >|= function
+ | 0 -> None
+ | len -> Some SZXX.Zip.{ buf; pos = 0; len })
+
+(* Evaluate if the row can be processed right now (does not contain
+ any delayed value) *)
+let is_delayed row =
+ Array.exists row.SZXX.Xlsx.data ~f:(function
+ | SZXX.Xlsx.Delayed _ -> true
+ | _ -> false)
+
+let default_mapper :
+ (ImportCSV.DataType.t, ImportCSV.DataType.t SZXX.Xlsx.row) State.mapper =
+ {
+ get_value =
+ (function
+ | ImportCSV.DataType.Content s ->
+ ImportCSV.DataType.Content (SZXX.Xml.unescape s)
+ | any -> any);
+ default = ImportCSV.DataType.Null;
+ get_row = (fun v -> v.SZXX.Xlsx.data);
+ }
+
+type state = CSV.DataType.t SZXX.Xlsx.status SZXX.Xlsx.row State.t
+
+let delayed_mapper =
+ State.
+ {
+ get_value =
+ (function
+ | SZXX.Xlsx.Available (CSV.DataType.Content s) ->
+ CSV.DataType.Content (SZXX.Xml.unescape s)
+ | SZXX.Xlsx.Available value -> value
+ | _ -> CSV.DataType.Null);
+ default = SZXX.Xlsx.Available CSV.DataType.Null;
+ get_row = (fun v -> v.SZXX.Xlsx.data);
+ }
+
+(** Initialize the state for the first row, count the column number and create
+ the table in the database *)
+let first_row : A.t -> _ Db.t -> state -> 'a SZXX.Xlsx.row -> state =
+ fun mapping db acc row ->
+ (if acc.transaction then
+ match Db.commit db with
+ | Ok () -> ()
+ | Error e -> print_endline (ImportErrors.repr_error e));
+
+ ignore @@ Db.create_table db mapping;
+ match Db.prepare_insert db mapping with
+ | Ok stmt ->
+ {
+ acc with
+ header = Some row;
+ transaction = false;
+ insert_stmt = Some stmt;
+ }
+ | _ -> { acc with header = Some row; transaction = false; insert_stmt = None }
+
+let importInDatable :
+ log_error:Csv.out_channel Lazy.t ->
+ conf:Syntax.t ->
+ dirname:string ->
+ A.t ->
+ 'a Db.t ->
+ CSV.DataType.t array option Lwt.t =
+ fun ~log_error ~conf ~dirname mapping db ->
+ let file = Filename.concat dirname (A.table mapping).file in
+
+ Lwt_io.with_file ~flags ~mode:Input file (fun ic ->
+ let open Lwt.Syntax in
+ let stream, sst_p, success =
+ SZXX.Xlsx.stream_rows ~only_sheet:(A.table mapping).tab
+ ~feed:(feed_bigstring ic) extractors
+ in
+ let* processed =
+ Lwt_stream.fold
+ (fun row acc ->
+ (* Create the table on the first line *)
+ if Int.equal 1 row.SZXX.Xlsx.row_number then
+ first_row mapping db acc row
+ else
+ match is_delayed row with
+ | true -> { acc with delayed = row :: acc.delayed }
+ | false -> (
+ let row_number = row.SZXX.Xlsx.row_number in
+ if acc.transaction then
+ State.run_row ~log_error ~mapper:delayed_mapper mapping db
+ row { acc with row_number }
+ else
+ match Db.begin_transaction db with
+ | Error e ->
+ print_endline (ImportErrors.repr_error e);
+ acc
+ | Ok () ->
+ let acc = { acc with transaction = true; row_number } in
+ State.run_row ~log_error ~mapper:delayed_mapper mapping
+ db row acc))
+ stream
+ {
+ transaction = false;
+ header = None;
+ delayed = [];
+ insert_stmt = None;
+ check_key_stmt = None;
+ row_number = 1;
+ sheet_number = (A.table mapping).tab;
+ }
+ in
+ (* Wait to reach the sst *)
+ let* sst = sst_p in
+
+ if processed.transaction then ignore (Db.commit db);
+
+ (* Insert the missing elements *)
+ ignore @@ Db.begin_transaction db;
+ List.iter processed.delayed ~f:(fun row ->
+ let fully_available_row =
+ SZXX.Xlsx.unwrap_status extractors sst row
+ in
+
+ let row_number = row.SZXX.Xlsx.row_number in
+
+ match
+ State.insert_row ~mapper:default_mapper mapping db
+ fully_available_row
+ { processed with row_number }
+ with
+ | Ok _ -> ()
+ | Error e ->
+ ImportErrors.output_error log_error e;
+ ());
+
+ ignore @@ State.clear ~log_error db mapping conf;
+ ignore @@ Db.commit db;
+
+ (* Finalize the statements created during the import *)
+ let () =
+ Option.iter (fun v -> ignore @@ Db.finalize v) processed.insert_stmt;
+ Option.iter (fun v -> ignore @@ Db.finalize v) processed.check_key_stmt
+ in
+
+ let _ =
+ Option.iter
+ (fun headers ->
+ let res = SZXX.Xlsx.unwrap_status extractors sst headers in
+
+ let values = Array.mapi res.data ~f:(fun i value -> (i, value)) in
+
+ ignore
+ @@ Db.insert_header db
+ (ImportAnalyser.Dependency.table mapping)
+ values)
+ processed.header
+ in
+
+ let header =
+ Option.map
+ (fun header ->
+ let res = SZXX.Xlsx.unwrap_status extractors sst header in
+ res.data)
+ processed.header
+ in
+
+ (* Finalize the process *)
+ let* () = success in
+
+ Lwt.return header)
diff --git a/lib/file_handler/xlsx2sql.mli b/lib/file_handler/xlsx2sql.mli
new file mode 100644
index 0000000..e09737b
--- /dev/null
+++ b/lib/file_handler/xlsx2sql.mli
@@ -0,0 +1,10 @@
+val importInDatable :
+ log_error:ImportErrors.t ->
+ conf:ImportConf.Syntax.t ->
+ dirname:string ->
+ ImportAnalyser.Dependency.t ->
+ _ ImportSQL.Db.t ->
+ ImportCSV.DataType.t array option Lwt.t
+(** Load an excel spreadsheet in an SQLite database.
+
+Return the header if at least one row where present *)
diff --git a/lib/helpers/console.ml b/lib/helpers/console.ml
new file mode 100644
index 0000000..838b25a
--- /dev/null
+++ b/lib/helpers/console.ml
@@ -0,0 +1,16 @@
+let cursors = [| '|'; '/'; '-'; '\\' |]
+let pos = ref 0
+
+let update_cursor () =
+ if Unix.(isatty stdout) then (
+ Printf.printf "%c[?25l%c[1D%c[0K%c" (char_of_int 27) (char_of_int 27)
+ (char_of_int 27) (Array.get cursors !pos);
+ pos := (!pos + 1) mod Array.length cursors)
+
+let close_cursor () =
+ if Unix.(isatty stdout) then
+ Printf.printf "%c[?25h%c[1D%c[0K\n%!" (char_of_int 27) (char_of_int 27)
+ (char_of_int 27)
+
+let clear_line () =
+ if Unix.(isatty stdout) then Printf.printf "%c[2K\r%!" (char_of_int 27)
diff --git a/lib/helpers/console.mli b/lib/helpers/console.mli
new file mode 100644
index 0000000..289d55c
--- /dev/null
+++ b/lib/helpers/console.mli
@@ -0,0 +1,5 @@
+val update_cursor : unit -> unit
+val close_cursor : unit -> unit
+
+val clear_line : unit -> unit
+(** Clear the entire line *)
diff --git a/lib/helpers/dune b/lib/helpers/dune
new file mode 100755
index 0000000..8e30d2b
--- /dev/null
+++ b/lib/helpers/dune
@@ -0,0 +1,8 @@
+(library
+ (name helpers)
+ (libraries
+ calendar
+ decoders
+ otoml
+ )
+)
diff --git a/lib/helpers/helpers.ml b/lib/helpers/helpers.ml
new file mode 100755
index 0000000..9d6fcb8
--- /dev/null
+++ b/lib/helpers/helpers.ml
@@ -0,0 +1,45 @@
+module Toml = Toml
+module Console = Console
+
+let date_from_csv : string -> CalendarLib.Date.t option =
+ fun value ->
+ let open CalendarLib.Date in
+ try Some (Scanf.sscanf value "%d/%d/%d" (fun d m y -> make y m d)) with
+ | _ -> (
+ (* If the date is a number, try from julian day *)
+ match int_of_string_opt value with
+ | None -> None
+ | Some v -> Some (add (make 1899 12 30) (Period.day v)))
+
+let fold_opt : ('a -> 'b -> 'a option) -> 'a -> 'b -> 'a =
+ fun f acc b ->
+ match f acc b with
+ | None -> acc
+ | Some v -> v
+
+let try_opt exp =
+ try Some (exp ()) with
+ | _ -> None
+
+let repr_date formatter date =
+ Format.fprintf formatter "%02d/%02d/%d"
+ (CalendarLib.Date.day_of_month date)
+ CalendarLib.Date.(int_of_month @@ month date)
+ (CalendarLib.Date.year date)
+
+let s_repr_date date =
+ Format.sprintf "%02d/%02d/%d"
+ (CalendarLib.Date.day_of_month date)
+ CalendarLib.Date.(int_of_month @@ month date)
+ (CalendarLib.Date.year date)
+
+let repr_opt f channel = function
+ | None -> ()
+ | Some v -> f channel v
+
+let str_format f =
+ let buffer = Buffer.create 16 in
+ let formatter = Format.formatter_of_buffer buffer in
+ f formatter;
+ Format.pp_print_flush formatter ();
+ Buffer.contents buffer
diff --git a/lib/helpers/toml.ml b/lib/helpers/toml.ml
new file mode 100644
index 0000000..1b7fb15
--- /dev/null
+++ b/lib/helpers/toml.ml
@@ -0,0 +1,31 @@
+module Decode = struct
+ module S = struct
+ type value = Otoml.t
+
+ let pp : Format.formatter -> value -> unit =
+ fun format t -> Format.pp_print_string format (Otoml.Printer.to_string t)
+
+ let of_string : string -> (value, string) result =
+ Otoml.Parser.from_string_result
+
+ let of_file : string -> (value, string) result =
+ Otoml.Parser.from_file_result
+
+ let get_string : value -> string option = Otoml.get_opt Otoml.get_string
+ let get_int : value -> int option = Otoml.get_opt Otoml.get_integer
+ let get_float : value -> float option = Otoml.get_opt Otoml.get_float
+ let get_bool : value -> bool option = Otoml.get_opt Otoml.get_boolean
+ let get_null : value -> unit option = fun _ -> None
+
+ let get_list : value -> value list option =
+ Otoml.get_opt @@ Otoml.get_array Fun.id
+
+ let get_key_value_pairs : value -> (value * value) list option =
+ Otoml.get_opt (fun key ->
+ Otoml.get_table key |> List.map (fun (k, v) -> (Otoml.string k, v)))
+
+ let to_list : value list -> value = Otoml.array
+ end
+
+ include Decoders.Decode.Make (S)
+end
diff --git a/lib/helpers/toml.mli b/lib/helpers/toml.mli
new file mode 100644
index 0000000..08d30b8
--- /dev/null
+++ b/lib/helpers/toml.mli
@@ -0,0 +1 @@
+module Decode : Decoders.Decode.S with type value = Otoml.t
diff --git a/lib/sql/date.ml b/lib/sql/date.ml
new file mode 100644
index 0000000..e8933c7
--- /dev/null
+++ b/lib/sql/date.ml
@@ -0,0 +1,24 @@
+(** Parse a text value into a date *)
+
+let first_day = CalendarLib.Date.make 1899 12 30
+
+let f : Sqlite3.Data.t -> Sqlite3.Data.t -> Sqlite3.Data.t =
+ fun str data ->
+ match (str, data) with
+ | Sqlite3.Data.TEXT format_, Sqlite3.Data.TEXT content -> (
+ try
+ let date = CalendarLib.Printer.Date.from_fstring format_ content in
+ let period =
+ CalendarLib.Date.sub date first_day
+ |> CalendarLib.Date.Period.nb_days |> Int64.of_int
+ in
+ Sqlite3.Data.INT period
+ with
+ | Invalid_argument e ->
+ prerr_endline e;
+ Sqlite3.Data.NULL)
+ | _ ->
+ (* If the data is already a date, it should be preserved as is *)
+ data
+
+let register : Sqlite3.db -> unit = fun db -> Sqlite3.create_fun2 db "date" f
diff --git a/lib/sql/db.ml b/lib/sql/db.ml
new file mode 100644
index 0000000..89431b1
--- /dev/null
+++ b/lib/sql/db.ml
@@ -0,0 +1,383 @@
+open StdLabels
+module CSV = ImportCSV
+module Syntax = ImportConf.Syntax
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+
+type 'a t = 'a T.t
+
+let ( let* ) res cont = Result.bind (T.to_result res) cont
+let ( let** ) res cont = Result.bind res cont
+let begin_transaction = T.begin_transaction
+let rollback = T.rollback
+let commit = T.commit
+let finalize = T.finalize
+let reset = T.reset
+let insert_header = Header.insert_header
+let query_headers = Header.query_headers
+
+let with_db : string -> (Sqlite3.db -> unit T.result) -> unit T.result =
+ fun filename f ->
+ let db = Sqlite3.db_open filename in
+
+ Match.register db;
+ Date.register db;
+ Year.register db;
+ Join.register db;
+ Math.register db;
+ Trim.register db;
+
+ (*let* _ = Sqlite3.exec db "PRAGMA foreign_keys = ON" |> to_result in*)
+ Sqlite3.( let& ) db f
+
+let create_table : 'a t -> ImportAnalyser.Dependency.t -> unit T.result =
+ fun db table ->
+ let source = ImportAnalyser.Dependency.table table in
+ let name = Table.name source in
+
+ let* _ =
+ Sqlite3.exec db
+ (String.concat ~sep:"" [ "DROP TABLE IF EXISTS '"; name; "'" ])
+ in
+
+ let query = ImportAnalyser.Query.create_table table in
+ let* _ = Sqlite3.exec db query in
+ match Header.create_table db with
+ | Ok () -> Hashs.create_table db
+ | e -> e
+
+let update_hash : 'a t -> ImportAnalyser.Dependency.t -> unit T.result =
+ fun db mapping ->
+ match Hashs.insert db mapping with
+ | Ok () -> Ok ()
+ | Error _ ->
+ let _ = Hashs.create_table db in
+ Hashs.insert db mapping
+
+let check_table_schema : 'a t -> ImportAnalyser.Dependency.t -> bool T.result =
+ fun db table ->
+ let source = ImportAnalyser.Dependency.table table in
+ let name = Table.name source in
+ let query =
+ String.concat ~sep:""
+ [ "SELECT sql FROM sqlite_schema WHERE name = '"; name; "'" ]
+ in
+ let stmt = Sqlite3.prepare db query in
+
+ let rc, result =
+ Sqlite3.fold stmt ~init:false ~f:(fun value row ->
+ if Array.length row <> 1 then value
+ else
+ match Sqlite3.Data.to_string (Array.get row 0) with
+ | Some s ->
+ let query = ImportAnalyser.Query.create_table table in
+ String.equal s query
+ | None -> value)
+ in
+ let* _ = rc in
+
+ (* The schema is the same, now check the hash in case the indexes changed *)
+ let rc_hash = Hashs.query db source in
+ match rc_hash with
+ | Ok (Some i) ->
+ let hash = Hashs.evaluate table in
+ begin
+ if i == hash then Ok result
+ else
+ let _ = update_hash db table in
+ Ok false
+ end
+ | _ ->
+ let _ = update_hash db table in
+ Ok result
+
+let prepare_insert :
+ Sqlite3.db -> ImportAnalyser.Dependency.t -> Sqlite3.stmt T.result =
+ fun db mapping ->
+ (* Get the list of columns from the table configuration *)
+ let columns =
+ ImportAnalyser.Dependency.columns mapping
+ |> ImportContainers.IntSet.elements
+ in
+ let table_name = Table.name (ImportAnalyser.Dependency.table mapping) in
+
+ let open Buffer in
+ let buff = create 20 and value_buff = create 10 and col_buff = create 10 in
+
+ add_string col_buff "'id',";
+
+ (* Add the key name if present *)
+ List.iter (ImportAnalyser.Dependency.keys mapping)
+ ~f:(fun { ImportAnalyser.Dependency.name; _ } ->
+ add_string col_buff "'key_";
+
+ add_string col_buff name;
+ add_string col_buff "',");
+
+ add_string value_buff ":id,";
+
+ (* Add the key settings if presents *)
+ List.iter (ImportAnalyser.Dependency.keys mapping) ~f:(fun key ->
+ ImportAnalyser.Query.build_key_insert value_buff key;
+ add_string value_buff ",");
+
+ List.iter columns ~f:(fun id ->
+ add_string col_buff "'col_";
+ add_string col_buff (string_of_int id);
+ add_string col_buff "',";
+
+ let col_name = ":col_" ^ string_of_int id in
+
+ add_string value_buff col_name;
+ add_string value_buff ",");
+
+ truncate col_buff (length col_buff - 1);
+ truncate value_buff (length value_buff - 1);
+ add_string buff "INSERT INTO '";
+ add_string buff table_name;
+ add_string buff "' (";
+ add_buffer buff col_buff;
+ add_string buff " ) VALUES (";
+ add_buffer buff value_buff;
+ add_string buff " )";
+
+ let query = contents buff in
+
+ try Ok (Sqlite3.prepare db query) with
+ | e ->
+ print_endline "Error during this query :";
+ print_endline query;
+ Error e
+
+let eval_key :
+ 'a t ->
+ Sqlite3.stmt option ->
+ ImportAnalyser.Dependency.key list ->
+ (int * CSV.DataType.t) list ->
+ (Sqlite3.stmt option * Sqlite3.Data.t list) T.result =
+ fun db stmt keys values ->
+ match keys with
+ | [] -> Ok (None, [])
+ | _ ->
+ let buffer = Buffer.create 16 in
+ Buffer.add_string buffer "SELECT ";
+ List.iter keys ~f:(fun key ->
+ ImportAnalyser.Query.build_key_insert buffer key;
+ Buffer.add_string buffer ",");
+
+ Buffer.truncate buffer (Buffer.length buffer - 1);
+ let query = Buffer.contents buffer in
+
+ let statement = Sqlite3.prepare_or_reset db (ref stmt) query in
+
+ (* Extract all the column id used in the keys.
+ *)
+ let keys_id =
+ List.fold_left keys ~init:ImportContainers.IntSet.empty
+ ~f:(fun acc (keys : ImportAnalyser.Dependency.key) ->
+ let columns = Lazy.force keys.ImportAnalyser.Dependency.columns in
+ ImportContainers.IntSet.union acc columns)
+ in
+
+ let** _ =
+ List.fold_left values ~init:(Ok 1) ~f:(fun idx (id, value) ->
+ let** idx = idx in
+
+ (* Ensure the column is required in the keys *)
+ match ImportContainers.IntSet.mem (1 + id) keys_id with
+ | false -> Ok (idx + 1)
+ | true ->
+ let sql_data = T.of_datatype value in
+
+ let col_name = ":col_" ^ string_of_int (1 + id) in
+ let* _ = Sqlite3.bind_name statement col_name sql_data in
+ Ok (idx + 1))
+ in
+
+ let result, evaluated_keys =
+ Sqlite3.fold statement ~init:[] ~f:(fun _ v -> Array.to_list v)
+ in
+ let* _ = result in
+ Ok (Some statement, evaluated_keys)
+
+let insert :
+ Sqlite3.db ->
+ Sqlite3.stmt ->
+ id:int ->
+ (int * CSV.DataType.t) list ->
+ unit T.result =
+ fun db statement ~id values ->
+ let** _ = T.savepoint db "PREVIOUS" in
+ let* _ =
+ Sqlite3.bind_name statement ":id" (Sqlite3.Data.INT (Int64.of_int id))
+ in
+ let** _ =
+ List.fold_left values ~init:(Ok 1) ~f:(fun idx (id, value) ->
+ let** idx = idx in
+ let sql_data = T.of_datatype value in
+
+ let col_name = ":col_" ^ string_of_int (1 + id) in
+ let* _ = Sqlite3.bind_name statement col_name sql_data in
+
+ Ok (idx + 1))
+ in
+
+ match T.to_result (Sqlite3.step statement) with
+ | Ok () -> T.release db "PREVIOUS"
+ | Error e ->
+ (* I intentionnaly ignore any error here, as we are already in an
+ error case *)
+ ignore (Sqlite3.exec db "ROLLBACK TRANSACTION TO SAVEPOINT PREVIOUS");
+ Error e
+
+(** This simple function convert a query generated by the application into a
+ statement executed with sqlite.
+
+ The function expect a perfect match between the query and the parameters. *)
+let execute_query :
+ Sqlite3.db -> ImportAnalyser.Query.query -> Sqlite3.stmt T.result =
+ fun db query ->
+ let statement =
+ try Sqlite3.prepare db query.q with
+ | e ->
+ print_endline "Error during this query :";
+ print_endline query.q;
+ raise e
+ in
+
+ (* Add the binded parameters *)
+ let values =
+ Seq.map (fun v -> T.of_datatype v) query.parameters |> List.of_seq
+ in
+
+ let* _ = Sqlite3.bind_values statement values in
+
+ Ok statement
+
+let query :
+ f:((Path.t ImportExpression.T.t * CSV.DataType.t) array -> unit) ->
+ Sqlite3.db ->
+ Syntax.t ->
+ unit T.result =
+ fun ~f db output ->
+ (* Extract the query from the configuration. *)
+ let** query_analysis =
+ match ImportAnalyser.Query.select output with
+ | exception e -> Error e
+ | other -> Ok other
+ in
+
+ let query, columns = query_analysis in
+ let** statement = execute_query db query in
+
+ let* _ =
+ Sqlite3.iter statement ~f:(fun data ->
+ let values =
+ Array.map2 data columns ~f:(fun value column ->
+ (column, T.to_datatype value))
+ in
+ f values)
+ in
+ Ok ()
+
+let create_view : Sqlite3.db -> Syntax.t -> unit T.result =
+ fun db output ->
+ ignore output;
+ let* drop = Sqlite3.exec db "DROP VIEW IF EXISTS 'result'" in
+ ignore drop;
+
+ Ok ()
+
+(*
+ let query, _ = ImportAnalyser.Query.select output in
+
+ let query =
+ { query with q = Printf.sprintf "CREATE VIEW result AS %s" query.q }
+ in
+ let** statement = execute_query db query in
+
+ Sqlite3.step statement |> T.to_result
+ *)
+
+let check_foreign :
+ f:((string * CSV.DataType.t) array -> unit) ->
+ Sqlite3.db ->
+ Syntax.t ->
+ Syntax.extern ->
+ unit T.result =
+ fun ~f db conf external_ ->
+ let query = ImportAnalyser.Query.check_external conf external_ in
+
+ let** statement = execute_query db query in
+ Sqlite3.iter statement ~f:(fun data ->
+ let values =
+ Array.mapi data ~f:(fun i value ->
+ (Sqlite3.column_name statement i, T.to_datatype value))
+ in
+ f values)
+ |> T.to_result
+
+let clear_duplicates :
+ f:((string * ImportCSV.DataType.t) array -> unit) ->
+ 'a t ->
+ ImportDataTypes.Table.t ->
+ ImportAnalyser.Dependency.key list ->
+ unit T.result =
+ fun ~f db table keys ->
+ let table_name = Table.name table in
+
+ List.fold_left keys ~init:(Ok ())
+ ~f:(fun state { ImportAnalyser.Dependency.name; _ } ->
+ let** _ = state in
+
+ let select_query =
+ String.concat ~sep:""
+ [
+ "SELECT '";
+ table_name;
+ "'.id, '";
+ table_name;
+ "'.'key_";
+ name;
+ "', '";
+ name;
+ "' FROM '";
+ table_name;
+ "' INNER JOIN (SELECT id, row_number() OVER(PARTITION BY '";
+ table_name;
+ "'.'key_";
+ name;
+ "' ORDER BY (id)) AS row_num from '";
+ table_name;
+ "') other_table WHERE other_table.row_num <> 1 and '";
+ table_name;
+ "'.id = other_table.id";
+ ]
+ in
+ let stmt = Sqlite3.prepare db select_query in
+
+ ignore
+ @@ Sqlite3.iter stmt ~f:(fun data ->
+ let values =
+ Array.mapi data ~f:(fun i value ->
+ (Sqlite3.column_name stmt i, T.to_datatype value))
+ in
+ f values);
+
+ let delete_query =
+ Printf.sprintf
+ {|UPDATE '%s'
+SET key_%s = NULL
+FROM
+(
+ SELECT id, row_number() OVER(PARTITION BY key_%s ORDER BY (id)) AS row_num
+ from '%s'
+) other_table
+WHERE other_table.row_num <> 1
+and '%s'.id = other_table.id|}
+ table_name name name table_name table_name
+ in
+
+ Sqlite3.exec db delete_query |> T.to_result)
+
+type 'a result = ('a, exn) Result.t
diff --git a/lib/sql/db.mli b/lib/sql/db.mli
new file mode 100644
index 0000000..465b159
--- /dev/null
+++ b/lib/sql/db.mli
@@ -0,0 +1,106 @@
+module Syntax = ImportConf.Syntax
+
+type 'a t
+type 'a result = ('a, exn) Result.t
+
+val with_db : string -> ('a t -> unit result) -> unit result
+
+val check_table_schema : 'a t -> ImportAnalyser.Dependency.t -> bool result
+(** Check if a table with the same structure already exists in the database.
+
+ This query allow to reuse the same data without reloading the file if
+ nothing changed. *)
+
+val create_table : 'a t -> ImportAnalyser.Dependency.t -> unit result
+(** [create_table db name] will create a new table in the
+ db with the given name, and the columns from the configuration (see
+ [ImportAnalyser.Query.create_table])
+
+ Any previous table with the same name will be deleted. *)
+
+val prepare_insert : 'a t -> ImportAnalyser.Dependency.t -> Sqlite3.stmt result
+(** Create a statement to use in an insert.
+ [prepare_insert db table] will prepare a statement for inserting
+ the columns at the given index. *)
+
+val finalize : Sqlite3.stmt -> unit result
+(** Finalize the statement. The function shall be called once each insert are
+ done, or after an error in the insert. *)
+
+val reset : Sqlite3.stmt -> unit result
+
+val eval_key :
+ 'a t ->
+ Sqlite3.stmt option ->
+ ImportAnalyser.Dependency.key list ->
+ (int * ImportCSV.DataType.t) list ->
+ (Sqlite3.stmt option * Sqlite3.Data.t list) result
+(** Evaluate the keys in sqlite and get the results.
+
+ The function is intended to check if the values are null before inserting
+ them in a batch *)
+
+val insert :
+ 'a t ->
+ Sqlite3.stmt ->
+ id:int ->
+ (int * ImportCSV.DataType.t) list ->
+ unit result
+(** Insert a new row in the database.
+
+ [insert db ~id statement values] will add a new row in the given table with
+ [id]. The columns are identified with their index number (there is a
+ difference by one with the column number)
+
+ Thanks to SQLite Flexible Typing (https://www.sqlite.org/flextypegood.html)
+ each column can contain values typed differently which is how the spreadsheet
+ also works.
+
+ This function is expected to be run inside a transaction. *)
+
+val begin_transaction : 'a t -> unit result
+val commit : 'a t -> unit result
+val rollback : 'a t -> unit result
+
+val query :
+ f:
+ ((ImportDataTypes.Path.t ImportExpression.T.t * ImportCSV.DataType.t) array ->
+ unit) ->
+ 'a t ->
+ Syntax.t ->
+ unit result
+(** This one the most important function from the application. The function
+ will transform the configuration into an sql query and will fetch the
+ result from the sqlite engine.
+
+ The function [f] given in argument will be called for each line
+
+ *)
+
+val create_view : 'a t -> Syntax.t -> unit result
+(** Create a view which represent the result *)
+
+val check_foreign :
+ f:((string * ImportCSV.DataType.t) array -> unit) ->
+ 'a t ->
+ Syntax.t ->
+ Syntax.extern ->
+ unit result
+
+val clear_duplicates :
+ f:((string * ImportCSV.DataType.t) array -> unit) ->
+ 'a t ->
+ ImportDataTypes.Table.t ->
+ ImportAnalyser.Dependency.key list ->
+ unit result
+(** Remove all duplicated keys in the table by setting them to NULL. *)
+
+val insert_header :
+ 'a t ->
+ ImportDataTypes.Table.t ->
+ (int * ImportCSV.DataType.t) array ->
+ unit T.result
+
+val query_headers :
+ 'a t -> ImportDataTypes.Table.t -> ImportCSV.DataType.t array T.result
+(** Get all the headers from the database (used or not) *)
diff --git a/lib/sql/dune b/lib/sql/dune
new file mode 100644
index 0000000..9f9f205
--- /dev/null
+++ b/lib/sql/dune
@@ -0,0 +1,15 @@
+(library
+ (name importSQL)
+ (libraries
+ re
+ sqlite3
+ calendar
+ importAnalyser
+ importCSV
+ importConf
+ importContainers
+ importDataTypes
+ importErrors
+ importExpression
+ )
+)
diff --git a/lib/sql/hashs.ml b/lib/sql/hashs.ml
new file mode 100644
index 0000000..af1f092
--- /dev/null
+++ b/lib/sql/hashs.ml
@@ -0,0 +1,79 @@
+(**
+ This module store the hash of the indexes ([extern_key]) for each table in
+ order to update the file if the configuration changed.
+
+ The hashes are stored in a table named [hashes] and are evaluated just
+ before inserting the values.
+*)
+
+open StdLabels
+module Table = ImportDataTypes.Table
+
+let ( let* ) = Result.bind
+
+let create_table : 'a T.t -> unit T.result =
+ fun db ->
+ Sqlite3.exec db
+ "CREATE TABLE IF NOT EXISTS 'hashes' ('table' TEXT, 'hash' INTEGER, \
+ PRIMARY KEY ('table'))"
+ |> T.to_result
+
+let evaluate : ImportAnalyser.Dependency.t -> int =
+ fun table ->
+ (* Extract all the references to this table *)
+ let keys =
+ List.map (ImportAnalyser.Dependency.keys table)
+ ~f:(fun ImportAnalyser.Dependency.{ name; columns; expression } ->
+ ignore columns;
+ (name, expression))
+ in
+ Hashtbl.hash keys
+
+let insert : 'a T.t -> ImportAnalyser.Dependency.t -> unit T.result =
+ fun db table ->
+ let source = ImportAnalyser.Dependency.table table in
+ let table_name = Table.name source in
+
+ let hash = evaluate table in
+
+ let query =
+ String.concat ~sep:""
+ [
+ "INSERT INTO 'hashes' ('table', 'hash') VALUES ('";
+ table_name;
+ "', :hash) ON CONFLICT(hashes.'table') DO UPDATE SET 'hash' = :hash";
+ ]
+ in
+ let* statement =
+ try Ok (Sqlite3.prepare db query) with
+ | e -> Error e
+ in
+
+ let* _ = T.begin_transaction db in
+ let sql_data = Sqlite3.Data.INT (Int64.of_int hash) in
+
+ let* _ = Sqlite3.bind_name statement ":hash" sql_data |> T.to_result in
+ let* _ = T.to_result (Sqlite3.step statement) in
+ T.commit db
+
+let query : 'a T.t -> ImportDataTypes.Table.t -> int option T.result =
+ fun db table ->
+ let table_name = Table.name table in
+ let query =
+ String.concat ~sep:""
+ [ "SELECT hash FROM 'hashes' WHERE hashes.'table' = '"; table_name; "'" ]
+ in
+
+ let* stmt =
+ try Ok (Sqlite3.prepare db query) with
+ | e -> Error e
+ in
+ let state, res =
+ Sqlite3.fold stmt ~init:None ~f:(fun _ d ->
+ Some (T.to_datatype (Array.get d 0)))
+ in
+
+ let* _ = T.to_result state in
+ match res with
+ | Some (ImportCSV.DataType.Integer i) -> Ok (Some i)
+ | _ -> Ok None
diff --git a/lib/sql/header.ml b/lib/sql/header.ml
new file mode 100644
index 0000000..3cac5fb
--- /dev/null
+++ b/lib/sql/header.ml
@@ -0,0 +1,74 @@
+open StdLabels
+module Table = ImportDataTypes.Table
+
+let ( let* ) = Result.bind
+
+let create_table : 'a T.t -> unit T.result =
+ fun db ->
+ Sqlite3.exec db
+ "CREATE TABLE IF NOT EXISTS 'header' ('table' TEXT, 'column' INTEGER, \
+ 'label', PRIMARY KEY ('table', 'column'))"
+ |> T.to_result
+
+let insert_header :
+ 'a T.t ->
+ ImportDataTypes.Table.t ->
+ (int * ImportCSV.DataType.t) array ->
+ unit T.result =
+ fun db table values ->
+ let table_name = Table.name table in
+
+ let query =
+ String.concat ~sep:""
+ [
+ "INSERT INTO 'header' ('table', 'column', 'label') VALUES ('";
+ table_name;
+ "', :column, :label) ON CONFLICT(header.'table', header.'column') DO \
+ UPDATE SET 'label' = :label";
+ ]
+ in
+
+ let statement = Sqlite3.prepare db query in
+
+ let* _ = T.begin_transaction db in
+ let* _ =
+ Array.fold_left values ~init:(Ok ()) ~f:(fun acc (column, value) ->
+ let* _ = acc in
+ let sql_data = T.of_datatype value in
+ let* _ = Sqlite3.bind_name statement ":label" sql_data |> T.to_result in
+
+ let* _ =
+ Sqlite3.bind_name statement ":column"
+ (Sqlite3.Data.INT (Int64.of_int column))
+ |> T.to_result
+ in
+ let* _ = T.to_result (Sqlite3.step statement) in
+ T.reset statement)
+ in
+ T.commit db
+
+let query_headers :
+ 'a T.t -> ImportDataTypes.Table.t -> ImportCSV.DataType.t array T.result =
+ fun db table ->
+ let table_name = Table.name table in
+ let query =
+ String.concat ~sep:""
+ [
+ "SELECT label FROM 'header' WHERE header.'table' = '";
+ table_name;
+ "' ORDER BY column DESC";
+ ]
+ in
+
+ let* stmt =
+ try Ok (Sqlite3.prepare db query) with
+ | e -> Error e
+ in
+ let state, res =
+ Sqlite3.fold stmt ~init:[] ~f:(fun acc d ->
+ let value = T.to_datatype (Array.get d 0) in
+ value :: acc)
+ in
+
+ let* _ = T.to_result state in
+ Ok (Array.of_list res)
diff --git a/lib/sql/join.ml b/lib/sql/join.ml
new file mode 100644
index 0000000..3f82b92
--- /dev/null
+++ b/lib/sql/join.ml
@@ -0,0 +1,30 @@
+module D = Sqlite3.Data
+
+let f : Sqlite3.Data.t array -> Sqlite3.Data.t =
+ fun arguments ->
+ if Array.length arguments < 2 then Sqlite3.Data.NULL
+ else
+ let sep = Array.get arguments 0 in
+
+ (* Shift all the elements into an list*)
+ let contents =
+ Array.to_seqi arguments
+ |> Seq.filter_map (fun (i, value) ->
+ if i = 0 then None
+ else
+ match value with
+ | D.INT i -> Some (Int64.to_string i)
+ | D.FLOAT f -> Some (Float.to_string f)
+ | D.NONE -> None
+ | D.NULL -> None
+ | D.TEXT s | D.BLOB s ->
+ if String.length s = 0 then None else Some (String.trim s))
+ |> List.of_seq
+ in
+
+ D.TEXT (String.concat (D.to_string_coerce sep) contents)
+
+let register : Sqlite3.db -> unit =
+ fun db ->
+ Sqlite3.create_funN db "join" f;
+ Sqlite3.create_funN db "concat" f
diff --git a/lib/sql/match.ml b/lib/sql/match.ml
new file mode 100644
index 0000000..82fc1da
--- /dev/null
+++ b/lib/sql/match.ml
@@ -0,0 +1,22 @@
+let f : Sqlite3.Data.t -> Sqlite3.Data.t -> Sqlite3.Data.t =
+ let memo = Hashtbl.create 16 in
+ fun str data_1 ->
+ match (str, data_1) with
+ | Sqlite3.Data.TEXT s, Sqlite3.Data.TEXT content -> (
+ let regex =
+ match Hashtbl.find_opt memo s with
+ | None ->
+ let regex = Re.Posix.compile_pat s in
+ Hashtbl.add memo s regex;
+ regex
+ | Some v -> v
+ in
+
+ match Re.exec_opt regex content with
+ | None -> Sqlite3.Data.NULL
+ | Some g ->
+ let matched = Re.Group.get g 1 in
+ Sqlite3.Data.TEXT matched)
+ | _, _ -> data_1
+
+let register : Sqlite3.db -> unit = fun db -> Sqlite3.create_fun2 db "match" f
diff --git a/lib/sql/math.ml b/lib/sql/math.ml
new file mode 100644
index 0000000..576d9f6
--- /dev/null
+++ b/lib/sql/math.ml
@@ -0,0 +1,20 @@
+(** Math functions *)
+
+let int : Sqlite3.Data.t -> Sqlite3.Data.t =
+ fun data ->
+ match data with
+ (* If the data is already an int, do not change it *)
+ | Sqlite3.Data.INT _ -> data
+ | Sqlite3.Data.FLOAT content -> Sqlite3.Data.INT (Int64.of_float content)
+ | Sqlite3.Data.BLOB content | Sqlite3.Data.TEXT content -> begin
+ match Int64.of_string_opt content with
+ | Some i -> Sqlite3.Data.INT i
+ | None -> begin
+ match Float.of_string_opt content with
+ | Some f -> Sqlite3.Data.INT (Int64.of_float f)
+ | None -> Sqlite3.Data.NULL
+ end
+ end
+ | _ -> Sqlite3.Data.NULL
+
+let register : Sqlite3.db -> unit = fun db -> Sqlite3.create_fun1 db "int" int
diff --git a/lib/sql/t.ml b/lib/sql/t.ml
new file mode 100644
index 0000000..202c535
--- /dev/null
+++ b/lib/sql/t.ml
@@ -0,0 +1,52 @@
+exception SqlError of Sqlite3.Rc.t
+
+type 'a t = Sqlite3.db
+type 'a result = ('a, exn) Result.t
+
+let to_result : Sqlite3.Rc.t -> unit result = function
+ | Sqlite3.Rc.OK | Sqlite3.Rc.DONE -> Ok ()
+ | res -> Error (SqlError res)
+
+let begin_transaction : Sqlite3.db -> unit result =
+ fun db ->
+ let query = "BEGIN" in
+ Sqlite3.exec db query |> to_result
+
+let commit : Sqlite3.db -> unit result =
+ fun db ->
+ let query = "COMMIT" in
+ Sqlite3.exec db query |> to_result
+
+let rollback : Sqlite3.db -> unit result =
+ fun db ->
+ let query = "ROLLBACK" in
+ Sqlite3.exec db query |> to_result
+
+let savepoint : Sqlite3.db -> string -> unit result =
+ fun db name ->
+ let query = "SAVEPOINT " ^ name in
+ Sqlite3.exec db query |> to_result
+
+let release : Sqlite3.db -> string -> unit result =
+ fun db name ->
+ let query = "RELEASE SAVEPOINT " ^ name in
+ Sqlite3.exec db query |> to_result
+
+let finalize : Sqlite3.stmt -> unit result =
+ fun statement -> to_result (Sqlite3.finalize statement)
+
+let reset : Sqlite3.stmt -> unit result =
+ fun statement -> to_result (Sqlite3.reset statement)
+
+let of_datatype = function
+ | ImportCSV.DataType.Float f -> Sqlite3.Data.FLOAT f
+ | ImportCSV.DataType.Integer i -> Sqlite3.Data.INT (Int64.of_int i)
+ | ImportCSV.DataType.Null -> Sqlite3.Data.NULL
+ | ImportCSV.DataType.Error _ -> Sqlite3.Data.NULL
+ | ImportCSV.DataType.Content s -> Sqlite3.Data.TEXT s
+
+let to_datatype : Sqlite3.Data.t -> ImportCSV.DataType.t = function
+ | Sqlite3.Data.NONE | Sqlite3.Data.NULL -> ImportCSV.DataType.Null
+ | Sqlite3.Data.INT i -> ImportCSV.DataType.Integer (Int64.to_int i)
+ | Sqlite3.Data.FLOAT f -> ImportCSV.DataType.Float f
+ | Sqlite3.Data.TEXT t | Sqlite3.Data.BLOB t -> ImportCSV.DataType.Content t
diff --git a/lib/sql/trim.ml b/lib/sql/trim.ml
new file mode 100644
index 0000000..4e4bcf4
--- /dev/null
+++ b/lib/sql/trim.ml
@@ -0,0 +1,9 @@
+(* Override the trim function with another which also remove the retchar *)
+
+let f : Sqlite3.Data.t -> Sqlite3.Data.t =
+ fun data ->
+ match data with
+ | Sqlite3.Data.TEXT content -> Sqlite3.Data.TEXT (String.trim content)
+ | _ -> data
+
+let register : Sqlite3.db -> unit = fun db -> Sqlite3.create_fun1 db "trim" f
diff --git a/lib/sql/year.ml b/lib/sql/year.ml
new file mode 100644
index 0000000..1e9c599
--- /dev/null
+++ b/lib/sql/year.ml
@@ -0,0 +1,19 @@
+(** Parse a text value into a date *)
+
+open CalendarLib
+
+let first_day = CalendarLib.Date.make 1899 12 30
+
+let f : Sqlite3.Data.t -> Sqlite3.Data.t =
+ fun data ->
+ match data with
+ | Sqlite3.Data.INT content ->
+ let nb = Int64.to_int content in
+ let date = Date.add first_day (Date.Period.day nb) in
+ let year = CalendarLib.Date.year date in
+
+ Sqlite3.Data.INT (Int64.of_int year)
+ | _ -> data
+
+
+let register : Sqlite3.db -> unit = fun db -> Sqlite3.create_fun1 db "year" f
diff --git a/lib/tools/dune b/lib/tools/dune
new file mode 100644
index 0000000..29ee744
--- /dev/null
+++ b/lib/tools/dune
@@ -0,0 +1,10 @@
+(library
+ (name tools))
+
+(rule
+ (target git_hash.ml)
+ (deps git_head.sh (universe))
+ (action
+ (with-stdout-to
+ %{target}
+ (bash "./git_head.sh"))))
diff --git a/lib/tools/git_head.sh b/lib/tools/git_head.sh
new file mode 100755
index 0000000..bfa738a
--- /dev/null
+++ b/lib/tools/git_head.sh
@@ -0,0 +1,11 @@
+#! /bin/sh
+# Include the git hash in an OCaml file.
+
+git diff-index --quiet HEAD --
+if [ $? -ne 0 ]; then
+ revision=": untracked"
+else
+ revision=$(git rev-parse --short HEAD)
+fi
+compile_date=$(date +%Y/%m/%d)
+echo "let revision = \"${revision} - compiled on ${compile_date}\""
diff --git a/readme.rst b/readme.rst
new file mode 100644
index 0000000..ab7b5df
--- /dev/null
+++ b/readme.rst
@@ -0,0 +1,668 @@
+.. -*- mode: rst -*-
+.. -*- coding: utf-8 -*-
+
+.. role:: toml(code)
+ :language: toml
+
+.. default-role:: toml
+
+============================================
+Outil de construction des fichiers d’imports
+============================================
+
+.. raw:: html
+
+ <style>
+ body { font-family: sans-serif; }
+ p, li { line-height: 1.5em; }
+ pre.code, code {
+ border: 1px solid #d2d3d7;
+ background-color: #f5f6f7;
+ }
+ pre.code { padding: 1em; margin: 2em 1.5em 2em 1.5em; }
+ code { display: inline-block; padding: 0.1em; }
+ table { border-collapse: collapse; }
+ thead { color: white; background-color: gray; }
+ td { border: none; }
+ table:not(.option-list) tr:not(:last-child):not(.field) { border-bottom: 1px solid lightgray; }
+
+ .collapse_wrap > input {
+ display: none;
+ height: 0px;
+ }
+
+ .collapse_wrap > label::before {
+ content: "\25B6";
+ padding-right: 10px;
+ }
+ .collapse_wrap input:checked ~ label::before {
+ content: "\25BC";
+ padding-right: 10px;
+ }
+
+ .align-center {
+ display: block;
+ margin-left: auto;
+ margin-right: auto;
+ }
+
+ .collapse_wrap .collapse {
+ overflow: hidden;
+ display: none;
+ transition: 0.5s;
+ box-shadow: 1px 2px 4px rgba(0, 0, 0, 0.3);
+ }
+
+ .collapse_wrap > input:checked ~ .collapse {
+ display: block;
+ height: unset;
+ overflow: auto;
+ }
+ </style>
+
+
+ <script>
+ document.addEventListener("DOMContentLoaded", function() {
+ const collapsables = document.getElementsByClassName("collapse");
+ for (let i = 0; i < collapsables.length; i++) {
+ var wrap = document.createElement("div");
+ wrap.classList.add("collapse_wrap");
+
+ var radio = document.createElement("input");
+ radio.setAttribute('type', 'checkbox');
+ radio.setAttribute('id', 'radio_' + i);
+
+ var label = document.createElement("label");
+ label.setAttribute('for', 'radio_' + i);
+ label.innerHTML = "Afficher";
+
+ wrap.appendChild(radio);
+ wrap.appendChild(label);
+
+ collapsables[i].parentNode.insertBefore(wrap, collapsables[i]);
+ wrap.appendChild(collapsables[i]);
+ }
+ });
+ </script>
+
+.. contents::
+ :depth: 2
+
+L’application permet de construire un fichier d’import à partir d’une série de
+règles indiquant comment assembler les données. L’outil est capable d’aller
+chercher les données dans plusieurs fichiers différents et appliquer des règles
+simples pour mettre en forme le résultat.
+
+Invocation
+==========
+
+--conf
+
+ Le chemin vers le fichier de configuration. Cet argument est obligatoire.
+
+--print-conf
+
+ Écrit la configuration chargée sur la sortie courante et selon la dernière
+ syntaxe de l’application.
+
+.. --no-bom
+..
+.. Ne pas insérer un BOM_ dans le fichier CSV. Cet indicateur permet
+.. d’informer Excel que le fichier CSV est en UTF-8, ce qui devrait être le
+.. cas. Dans le cas où cette chaine de caractère pose problème, il est
+.. possible de désactiver ce mécanisme.
+..
+.. .. _BOM: https://fr.wikipedia.org/wiki/Indicateur_d%27ordre_des_octets
+
+Sortie
+======
+
+L’exécution du programme va générer trois fichiers, qui sont tous basés sur le
+nom du fichier de configuration :
+
+
+sortie.csv
+
+ Il s’agit du fichier produit par l’application, et dont les colonnes sont
+ celles définies dans le fichier de configuration.
+
+sortie.sqlite
+
+ Il s’agit d’une base de données reprenant toutes les données utilisées pour
+ construire le résultat.
+
+sortie_error.csv
+
+ Il s’agit d’un fichier CSV généré si l’application rencontre des erreurs.
+ Celles-ci concernent les liaisons entre les différents fichiers qui n’ont
+ pas pu être réalisées
+
+Fichier de configuration
+========================
+
+Les informations générales
+--------------------------
+
+version
+ Il s’agit de la version de la syntaxe du fichier de configuration. Valeur
+ attendue : `1`
+
+source
+ La clef `source` indique quel est le fichier source : pour chaque ligne
+ présente dans ce fichier, une ligne sera générée en sortie.
+
+ :file: le fichier à charger
+ :tab: optionnellement l’onglet concerné
+ :name: le nom sous lequel le fichier sera associé.
+
+
+
+Le chargement des dépendances
+-----------------------------
+
+La recherche de valeurs dans d’autres fichiers Excel nécessite de déclarer les
+fichiers à charger. La liste `externals` décrit tous les fichiers qui doivent
+être chargés, et comment les données doivent être associée avec le fichier
+source.
+
+.. code:: toml
+
+ [externals.target]
+ intern_key = ":source.A"
+ extern_key = ":A"
+ file = "financial.xlsx"
+ allow_missing = false
+
+Les clefs suivantes sont nécessaires pour lier les données venant d’un autre
+fichier :
+
+intern_key
+ Il s’agit de la colonne servant à faire la liaison dans la source.
+file
+ Le fichier à charger
+tab
+ optionnellement l’onglet concerné
+extern_key
+ Il s’agit de la colonne servant à faire la liaison dans le fichier devant
+ être lié. La clef doit doit etre unique, l’application supprimera les
+ doublons pour ne garder qu’une seule valeur par clef. Il n’est donc pas
+ possible de faire un produit cartésien.
+allow_missing
+ Cette clef optionnelle indique s’il faut autoriser les valeurs manquantes
+ lors dans une liaison. Déclarer `allow_missing` va également autoriser les
+ doublons sur les valeurs de clef.
+
+Une fois la dépendance vers un fichier externe déclaré, il est possible
+d’utiliser les colonnes de ce fichier en les référençant directement. Cela
+revient à faire un `RECHERCHEV` dans Excel à chaque fois.
+
+Il est possible d’utiliser une référence externe dans une autre source externe,
+afin de construire des chemins sur plusieurs niveaux :
+
+.. code:: toml
+
+ [externals.acheteur_annuaire]
+ intern_key = ":I"
+ extern_key = ":A"
+ file = "ANNUAIRE.xlsx"
+
+ [externals.acheteur_societe]
+ intern_key = ":acheteur_annuaire.BJ"
+ extern_key = ":A"
+ file = "SOCIETES.xlsx"
+
+
+Les valeurs présentes dans ces colonnes sont pré-traitées pour éviter les
+erreurs générales lors des imports : les espaces en fin de texte sont
+supprimés et le texte est passé en capitale.
+
+Définition des colonnes
+-----------------------
+
+Une fois les différents fichiers chargés, il est possible de construire le
+schéma du fichier de sortie en listant toutes les colonnes à générer :
+
+.. code:: toml
+
+ """function
+ ( :A ^ '_' ^ :target.E
+ , :E
+ )"""
+
+======================= ======================================= ======================
+Type d’élément Interprétation Exemple
+======================= ======================================= ======================
+Texte Un texte libre. Celui-ci doit être `'_'`
+ placé entre quote (`'` ou `"`).
+
+Un nombre La valeur correspondante `1234`
+
+Une référence Une colonne. Celle-ci peut être définie `:A`
+ via un fichier externe ou directement
+ par rapport au fichier source. `:target.E`
+
+ Elle ne doit pas être placée entre
+ quote. Le nom du fichier peut être omis
+ (et est alors considéré comme étant le
+ fichier source)
+
+Opérateur `^` Concaténation d’éléments `"contract_" ^ :A`
+
+Opérateur mathématique `+` `-` `/` `=` `<>` `:Q + 1`
+ `:A = 2000`
+
+Fonction Applique la fonction sur les `nvl(:I, :N, "Defaut")`
+ éléments donnés. La fonction ne doit
+ pas être placée entre quote `trim(:N)`
+
+ La fonction prend en paramètre
+ les arguments sous forme de `trim("prefixe_" ^ :C)`
+ liste, séparés par des virgules.
+
+======================= ======================================= ======================
+
+
+.. class:: collapse
+
+ :Operator: `+` | `-` | `<` | `>` | `/` | `^`
+ :Equality: `=` | `<>`
+
+ :Reference:
+ | `:`\ COLUMN
+ | `:`\ TABLE\ `.`\ COLUMN
+
+ :Expression:
+ | LITERAL
+ | NUMERIC
+ | *Reference*
+ | *Expression* *Operator* *Expression*
+ | *Expression* *Equality* *Expression*
+ | *Expression* *Equality* *Group*
+ | IDENTIFIER `(` *Expression*\ + `)`
+ | IDENTIFIER `(` *Expression*\ , *Group*, *Group* `)`
+ | IDENTIFIER `(` *Group*, *Group* `)`
+ :Group:
+ | `[` Expression* `]`
+
+Tri des données
+---------------
+
+.. code:: toml
+
+ sort = [
+ ":mouvements.B",
+ ":lib_titres.B",
+ "date('%d/%m/%Y', :mouvements.C)",
+ ":A",
+ ]
+
+Les données peuvent être triées à l’aide de la section `sort`. Il est possible
+de définir plusieurs critères de tri qui seront traités par ordre de priorité.
+Des fonctions peuvent également être utilisées (dans l’exemple ci-dessus, nous
+avons un tri sur des dates au format dd/mm/yyyy, si la donnée n’était pas
+convertie en date, elle serait triée sur le jour avant d’être triée sur
+l’année).
+
+Filtrer les données
+-------------------
+
+.. code:: toml
+
+ filters = [
+ ":t_operation.B <> ['Cession', 'Restitution', 'Prêt de titres']",
+ ":lib_titres.B <> 0",
+ ":societe.A <> 0",
+ ]
+
+Un filtre peut être mis en place pour ne conserver que les lignes souhaitées.
+Les conditions doivent être toutes validées pour que la ligne soit retenue.
+
+.. note::
+
+ Il est possible de mettre une fonction de groupe dans les filtres, pour
+ n’extraire par exemple que la première ligne d’une série. Toutefois, cette
+ fonctionnalité est encore expérimentale. L’application peut lever une
+ erreur si la fonction est trop complexe pour etre interprétée.
+
+ Si une fonction de groupe est présente, l’application ordonnera les
+ évaluations selon l’ordre suivant :
+
+ 1. En premier lieu tous les filtres *simples* pour ne conserver que les
+ lignes correspondantes.
+ 2. Ensuite seulement la fonction de groupe présente dans la section `filter`
+
+Exemple complet
+===============
+
+Cet exemple (issu d’un cas réel), crée un fichier des personnes morales à
+partir de plusieurs tables.
+
+- Le fichier source donne des identifiants pour les formes juridiques (qu’il
+ faut donc aller dans une table à part) et les devises
+
+- le numéro de TVA communautaire doit être chargé dans la table
+ immatriculation, la colonne A doit correspondre au code de la société, et la
+ colonne D doit avoir la valeur `4`
+
+- l’adresse est reconstituée en concaténant cinq colonnes depuis la table
+ annuaire.
+
+- la ville et le code postal peuvent être présent sur deux tables : la table
+ annuaire si l’adresse n’a pas été modifiée depuis la saisie initiale de la
+ fiche, ou la source qui contient la dernière valeur, mais peut être nulle si
+ celle-ci n’a pas été modifiée…
+
+.. code:: toml
+
+ version = 1
+
+ [source]
+ file = "20220222_SOCIETES.xlsx"
+ name = "société"
+
+ # Des commentaires peuvent etre ajoutés tout au long du fichier.
+ # Ici la table actif permet de faire une correspondance entre les valeurs
+ # du client et nos valeurs :
+ # 1 -> Actif
+ # 0 -> Inactif
+ [externals.actifs]
+ intern_key = ":E"
+ file = "actifs.xlsx"
+ extern_key = ":A"
+ allow_missing = false
+
+ [externals.legalform]
+ intern_key = ":J"
+ file = "mapping.xlsx"
+ tab = 2
+ extern_key = ":A"
+ allow_missing = false
+
+ [externals.currency]
+ intern_key = ":BF"
+ file = "mapping.xlsx"
+ tab = 6
+ extern_key = ":A"
+ allow_missing = false
+
+ # le fichier annuaire contient des informations supplémentaires
+ [externals.annuaire]
+ intern_key = ":A"
+ file = "20220222_ANNUAIRE.xlsx"
+ extern_key = ":BJ"
+ allow_missing = false
+
+ # La table des immatriculations.
+ # Seules les lignes avec la colonne D = 4 nous intéressent
+ [externals.immat]
+ intern_key = ":A ^ '_4'"
+ file = "20220222_SO_IMMATRICULATION.xlsx"
+ extern_key = ":B ^ '_' ^ :D"
+ allow_missing = true
+
+ [sheet]
+ columns = [
+ "'companyDirectory_' ^ :A",
+ ":G",
+ "'internalExternalList_Internal'",
+ ":actifs.B",
+ ":H",
+ ":I",
+ "",
+ ":legalform.S",
+ ":annuaire.I",
+ ":T",
+ ":BP",
+ ":currency.D",
+ "",
+ ":annuaire.CC",
+ ":CQ",
+ ":CO",
+ ":immat.C",
+ # Utilise la fonction join pour assembler les éléments
+ # De cette manière, si l’un des éléments est vide, il n’y aura pas de
+ # double espace (' ') dans le résultat
+ "join(' ', :annuaire.CP, :annuaire.CQ, :annuaire.CR, :annuaire.L, :annuaire.M)",
+ "nvl(:CM, :annuaire.N)",
+ "nvl(:CS, :annuaire.AB)", ":CR"
+ ]
+ filters = []
+ sort = []
+ uniq = []
+
+Comparaison des valeurs
+=======================
+
+Comparaison par groupe
+----------------------
+
+.. code:: toml
+
+ ":t_operation.B <> ['Cession', 'Restitution', 'Prêt de titres']"
+
+Les opérateurs de comparaison `=` et `<>` peuvent accepter un groupe de valeur
+plutot qu’une valeur unique. Dans ce cas, la condition est vraie si l’une des
+valeurs est présente.
+
+Le cas des cellules vides
+-------------------------
+
+.. code:: toml
+
+ ":B > 0",
+ "'prefix_' ^ :B",
+
+Les cellules vides peuvent apparaitres dans des colonnes où l’on s’attend à
+trouver des champs texte, ou des nombres. L’application va traiter ces valeurs
+vides en fonction du contexte et les considérer comme `0` si elles sont
+associées avec un numérique, ou `""` si elles sont associées à un texte.
+
+
+Fonctions disponibles
+=====================
+
+Liste non exhaustive.
+
+`if`
+ Choisi la valeur de la cellule en fonction d’une condition :
+
+ .. code:: toml
+
+ """if(
+ :E = 1
+ , 'Vrai'
+ , 'Faux'
+ )"""
+
+`nvl`
+ Sélectionne la première valeur non nulle de la liste
+
+ .. code:: toml
+
+ "nvl(:I, :N, 'Defaut')"
+
+Fonctions sur les nombres
+-------------------------
+
+`abs`
+ Renvoie la valeur absolue d’un nombre.
+
+`int`
+ Transforme une valeur en nombre entier.
+
+Fonctions sur le texte
+----------------------
+
+`join`
+ .. code:: toml
+
+ "join('-', :A, :target.E, :B)"
+
+ Concatène tous les champs en utilisant le premier paramètre comme
+ séparateur. Les valeurs vides sont ignorées, et dans ce cas le séparateur
+ n’est pas répété.
+
+`match`
+ Extrait un motif sur la base d’une `expression régulière`_.
+
+ .. code:: toml
+
+ "match('hello ([A-Za-z]+)', ':B')"
+
+.. _expression régulière:
+ https://fr.wikipedia.org/wiki/Expression_r%C3%A9guli%C3%A8re#Op%C3%A9rateurs
+
+`substring`
+ Extrait une sous-chaine en indiquant la position et la longueur.
+
+ .. code:: toml
+
+ "substring(:H, 1, 5)"
+
+`trim`
+ Supprime les espaces en début et fin de chaine
+
+ .. code:: toml
+
+ "trim(:C)"
+
+`upper`
+ Passe le texte en capitale
+
+ .. code:: toml
+
+ "upper(:A)"
+
+Fonctions sur les dates
+-----------------------
+
+Normalement, les dates sont représentées dans Excel comme un nombre. On peut
+donc utiliser les opérations standard (addition…) sans avoir à se poser de
+question sur leur résultat. Toutefois, il est parfois nécessaire de faire des
+opérations avancées.
+
+
+`date`
+ Converti une date depuis le format texte. Le résultat de la fonction est le
+ nombre de jours écoulés depuis le 31 décembre 1899 (`1` correspond au 1er
+ janvier 1900). Cela correspond à la manière dont Excel enregistre une
+ valeur de type date.
+
+ .. code:: toml
+
+ "date('%d/%m/%Y', :B)"
+
+
+`year`
+ Extrait l’année (sur 4 chiffres) à partir d’une date. Si la date est donnée
+ directement, il vaut mieux utiliser `substring` qui est plus simple
+ d’accès. `year` devient utile s’il y a des calculs associés :
+
+ .. code:: toml
+
+ "year( date('%d/%m/%Y', :M) + date('%d/%m/%Y', '31/12/1999') )",
+
+
+Les fonctions de groupes
+------------------------
+
+Les fonctions suivantes permettent de grouper les données ou de les mettre en
+correspondance avec d’autres lignes du fichier.
+
+Elles prennent se basent sur deux paramètres supplémentaires :
+
+1. le premier étant les colonnes à identifier pour faire le regroupement,
+2. le second étant l’ordre dans lequel les lignes doivent être triées.
+
+ `counter([regroupement, …],[tri, …])`
+
+ Le tri est ignoré si le regroupement est omi.
+
+
+`counter`
+ Crée un compteur qui s’incrémente tant que les lignes sont identiques.
+
+ L’exemple suivant va remettre le compteur à `1` dès que la société ou le
+ titre change, et utilisera la date pour ordonner les valeurs dans ce
+ groupe.
+
+ .. code:: toml
+
+ """counter(
+ [:societe.H, :lib_titres.B],
+ [date('%d/%m/%Y', :mouvements.C), :A]
+ )"""
+
+
+ La fonction peut également être utilisée pour ne conserver que les lignes
+ uniques au sein d’une plage donnée (via un post-traitement où l’on ne garde
+ que les valeurs à `1`)
+
+ .. code:: toml
+
+ "counter([:C], [:A]) = 1"
+
+ Il est possible de construire plusieurs compteurs sur des groupes et des
+ tris différents au sein d’un même fichier.
+
+ Si le critère de tri n’est pas donné, l’application va compter le nombre
+ d’éléments pour chaque groupe: toutes les cellules auront le même résultat.
+
+`previous`
+ La fonction `previous` permet d’aller chercher l’élément précédent dans les
+ données.
+
+ En fonction du critère de regroupement, la valeur renverra vide (si l’on
+ est sur groupe différent), et la valeur précédente sera recherchée en
+ fonction du critère de tri donné.
+
+ `previous(expression, [regroupement, …], [tri, …])`
+
+ .. code:: toml
+
+ """previous(
+ :ths.G,
+ [:socs.EB, :socs.L, :ths.E],
+ [:ths.H])"""
+
+`sum`
+ La fonction `sum` permet de calculer la somme d’une colonne donnée sur un
+ critère de regroupement.
+
+ Si le critère de tri n’est pas donné, l’application va calculer la somme
+ totale en fonction du critère de regroupement : toutes les cellules auront
+ le même résultat.
+
+ `sum(expression, [regroupement, …], [])`
+
+ .. code:: toml
+
+ """sum(
+ :shares.K,
+ [:shares.A, :shares:D],
+ [])"""
+
+ Au contraire, si un tri est donné, l’application va accumuler la somme tout
+ au long des valeurs rencontrées, et selon l’ordre du tri.
+
+`min`
+ La fonction `min` permet d’aller chercher le premier élément du groupe
+ (c’est à dire le plus petit selon l’ordre de tri)
+
+
+ `min(expression, [regroupement, …], [tri, …])`
+
+ .. code:: toml
+
+ """min(
+ :mouvements.C,
+ [:societe.A, :I],
+ [:mouvements.C])"""
+
+ exemple d’utilisation : rechercher la première date d’achat.
+
+`max`
+ La fonction `max` permet d’aller chercher le dernier élément du groupe.
+
+ `max(expression, [regroupement, …], [tri, …])`
+
diff --git a/tests/analyser_dependency.ml b/tests/analyser_dependency.ml
new file mode 100644
index 0000000..dea7727
--- /dev/null
+++ b/tests/analyser_dependency.ml
@@ -0,0 +1,247 @@
+open OUnit2
+open StdLabels
+module A = ImportAnalyser.Dependency
+module Cont = ImportContainers
+module Syntax = ImportConf.Syntax
+module Expression = ImportExpression.T
+module Table = ImportDataTypes.Table
+open ConfLoader
+
+let test_order =
+ "Order" >:: fun _ ->
+ let order = A.get_process_order conf |> List.map ~f:A.table in
+ let expected_order =
+ [ external_table_last; external_table_other; external_table_source ]
+ in
+ assert_equal ~cmp:(cmp_list cmp_source) ~printer:show_sources expected_order
+ order
+
+let test_columns =
+ "Columns" >:: fun _ ->
+ let expected_colums = Cont.IntSet.of_list [ 1; 2 ] in
+ let columns =
+ A.get_process_order conf
+ |> List.find ~f:(fun v -> A.table v = external_table_source)
+ |> A.columns
+ in
+
+ assert_equal
+ ~cmp:(fun a b -> 0 = Cont.IntSet.compare a b)
+ ~printer:Cont.show_intSet expected_colums columns
+
+let test_keys =
+ "Keys" >:: fun _ ->
+ (* We should have one key in the table other, because it is referenced as a
+ source in last file *)
+ let expected_keys =
+ A.
+ [
+ {
+ name = "other";
+ expression = Expression.Path 3;
+ columns = lazy (Cont.IntSet.singleton 3);
+ };
+ ]
+ in
+
+ let keys =
+ A.get_process_order conf
+ |> List.find ~f:(fun v -> A.table v = external_table_other)
+ |> A.keys
+ in
+
+ assert_equal ~cmp:(cmp_list key_cmp) ~printer:keys_printer expected_keys keys
+
+let test_keys_missing =
+ "Keys missing" >:: fun _ ->
+ (* We have no key in last_file because the key is declared as missing *)
+ let expected_keys =
+ A.
+ [
+ {
+ name = "last_file";
+ expression = Expression.Path 3;
+ columns = lazy (Cont.IntSet.singleton 3);
+ };
+ ]
+ in
+
+ let keys =
+ A.get_process_order conf
+ |> List.find ~f:(fun v -> A.table v = external_table_last)
+ |> A.keys
+ in
+
+ assert_equal ~cmp:(cmp_list key_cmp) ~printer:keys_printer expected_keys keys
+
+let test_unknow_source =
+ "Unknown source" >:: fun _ ->
+ let conf = { conf with externals = [] } in
+ assert_raises (ImportErrors.Unknown_source "last_file") (fun () ->
+ A.get_process_order conf)
+
+let test_unordered =
+ "Unorderd references" >:: fun _ ->
+ (* Externals not described in the right order shall not raise any
+ error. *)
+ let conf =
+ load
+ {|version = 1
+[source]
+ file = "source.xlsx"
+ name = "source"
+
+[externals.last_file]
+ intern_key = ":other.A"
+ file = "last.xlsx"
+ extern_key = ":C"
+ allow_missing = true
+
+[externals.other]
+ intern_key = ":A"
+ file = "other.xlsx"
+ extern_key = ":C"
+ allow_missing = false
+
+[sheet]
+ columns = []|}
+ in
+ assert_raises (ImportErrors.Unknown_source "other") (fun () ->
+ A.get_process_order conf)
+
+let test_circular =
+ "Unlinked reference" >:: fun _ ->
+ (* A reference to itself should be understood *)
+ let conf =
+ load
+ {|version = 1
+[source]
+ file = "source.xlsx"
+ name = "source"
+
+[externals.circular]
+ intern_key = ":circular.A"
+ file = "last.xlsx"
+ extern_key = ":A"
+ allow_missing = true
+
+[sheet]
+ columns = []|}
+ in
+
+ let elements = A.get_process_order conf in
+ assert_equal ~printer:string_of_int 1 (List.length elements)
+
+let test_unlinked =
+ "Circular reference" >:: fun _ ->
+ (* An element linked to anything (except itself) should be ignored *)
+ let conf =
+ Syntax.
+ {
+ version = 1;
+ source = external_table_source;
+ externals =
+ [
+ {
+ intern_key = Path { alias = Some "circular2"; column = 1 };
+ target = { file = "other.xlsx"; tab = 1; name = "circular" };
+ extern_key = Path 3;
+ allow_missing = true;
+ match_rule = None;
+ };
+ {
+ intern_key = Path { alias = Some "circular"; column = 1 };
+ target = { file = "other2.xlsx"; tab = 1; name = "circular2" };
+ extern_key = Path 3;
+ allow_missing = true;
+ match_rule = None;
+ };
+ ];
+ columns = [];
+ filters = [];
+ sort = [];
+ uniq = [];
+ }
+ in
+ assert_raises (ImportErrors.Unknown_source "circular2") (fun () ->
+ A.get_process_order conf |> List.map ~f:A.table)
+
+let conf_with_unlinked =
+ Syntax.
+ {
+ version = 1;
+ source = external_table_source;
+ externals =
+ [
+ {
+ intern_key = Path { alias = None; column = 1 };
+ target = { file = "other.xlsx"; tab = 1; name = "other" };
+ extern_key = Path 3;
+ allow_missing = false;
+ match_rule = None;
+ };
+ ];
+ columns =
+ [
+ Concat [ Path { alias = None; column = 1 }; Literal "_"; Empty ];
+ Path { alias = None; column = 2 };
+ ];
+ filters = [];
+ sort = [];
+ uniq = [];
+ }
+
+(** A table referenced only in a filter list shall be loaded correctly *)
+let test_order_filter =
+ "Order filter" >:: fun _ ->
+ let order =
+ {
+ conf_with_unlinked with
+ filters = [ Path { alias = Some "other"; column = 5 } ];
+ }
+ |> A.get_process_order |> List.map ~f:A.table
+ in
+ let expected_order = [ external_table_other; external_table_source ] in
+ assert_equal ~printer:show_sources expected_order order
+
+(** A table referenced only in the order list shall be loaded correctly *)
+let test_order_sort =
+ "Order sort" >:: fun _ ->
+ let order =
+ {
+ conf_with_unlinked with
+ sort = [ Path { alias = Some "other"; column = 5 } ];
+ }
+ |> A.get_process_order |> List.map ~f:A.table
+ in
+ let expected_order = [ external_table_other; external_table_source ] in
+ assert_equal ~printer:show_sources expected_order order
+
+(** A table referenced only in the uniq list shall be loaded correctly *)
+let test_order_uniq =
+ "Order uniq" >:: fun _ ->
+ let order =
+ {
+ conf_with_unlinked with
+ uniq = [ Path { alias = Some "other"; column = 5 } ];
+ }
+ |> A.get_process_order |> List.map ~f:A.table
+ in
+ let expected_order = [ external_table_other; external_table_source ] in
+ assert_equal ~printer:show_sources expected_order order
+
+let tests =
+ "analyser_dependency"
+ >::: [
+ test_order;
+ test_columns;
+ test_keys;
+ test_keys_missing;
+ test_unknow_source;
+ test_unordered;
+ test_circular;
+ test_unlinked;
+ test_order_filter;
+ test_order_sort;
+ test_order_uniq;
+ ]
diff --git a/tests/analyser_query_test.ml b/tests/analyser_query_test.ml
new file mode 100644
index 0000000..3559de4
--- /dev/null
+++ b/tests/analyser_query_test.ml
@@ -0,0 +1,304 @@
+open OUnit2
+open StdLabels
+module A = ImportAnalyser.Dependency
+module Q = ImportAnalyser.Query
+module C = ImportConf
+module Syntax = ImportConf.Syntax
+module Expr = Expression_builder
+
+let show_source (source : ImportDataTypes.Table.t) =
+ Printf.sprintf "%s:%d" source.ImportDataTypes.Table.file source.tab
+
+let show_sources sources =
+ let b = Buffer.create 16 in
+ Buffer.add_string b "[";
+ List.iter sources ~f:(fun source ->
+ Buffer.add_string b (show_source source);
+ Buffer.add_string b ",");
+
+ let len = Buffer.length b in
+ if len > 1 then Buffer.truncate b (len - 1);
+ Buffer.add_string b "]";
+
+ Buffer.contents b
+
+(** This is sample configuration used in the tests *)
+let conf =
+ Syntax.
+ {
+ version = 1;
+ source = { file = "source.xlsx"; tab = 1; name = "source" };
+ externals =
+ [
+ {
+ intern_key = Path { alias = None; column = 1 };
+ target = { file = "other.xlsx"; tab = 1; name = "other" };
+ extern_key = Path 3;
+ allow_missing = false;
+ match_rule = None;
+ };
+ {
+ intern_key = Path { alias = Some "other"; column = 1 };
+ target = { file = "last.xlsx"; tab = 1; name = "last_file" };
+ extern_key = Path 3;
+ allow_missing = true;
+ match_rule = None;
+ };
+ ];
+ columns =
+ [
+ Concat [ Path { alias = None; column = 1 }; Literal "_"; Empty ];
+ Path { alias = None; column = 2 };
+ Path { alias = Some "last_file"; column = 5 };
+ ];
+ filters = [];
+ sort = [];
+ uniq = [];
+ }
+
+let create_table =
+ "Create table" >:: fun _ ->
+ let out = A.get_process_order conf in
+
+ let query = Q.create_table (List.hd out) in
+
+ assert_equal ~printer:Fun.id
+ "CREATE TABLE 'last' (id INTEGER PRIMARY KEY,'key_last_file','col_5')" query
+
+let select =
+ "Select" >:: fun _ ->
+ let query, _ = Q.select conf in
+ let expected_query =
+ {|SELECT COALESCE('source'.col_1,'') || ? || '' AS result_0,
+'source'.col_2 AS result_1,
+'last_file'.col_5 AS result_2
+FROM 'source' AS 'source'
+LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = 'other'.'key_other'
+LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.col_1)) = 'last_file'.'key_last_file'|}
+ in
+
+ assert_equal ~printer:Fun.id expected_query query.q
+
+let check_externals =
+ "Check external" >:: fun _ ->
+ let query = Q.check_external conf (List.hd conf.externals) in
+
+ let expected_query =
+ "SELECT 'source'.'id', 'source'.col_1 FROM\n\
+ 'source' AS 'source' LEFT JOIN 'other' AS 'other' ON \
+ rtrim(upper('source'.col_1)) = 'other'.'key_other' WHERE \
+ 'other'.'key_other' IS NULL AND 'source'.col_1 IS NOT NULL AND \
+ 'source'.col_1 <> ''"
+ in
+
+ assert_equal ~printer:Fun.id expected_query query.q
+
+let previous =
+ "Test window previous" >:: fun _ ->
+ (* This is sample configuration used in the tests *)
+ let conf =
+ Syntax.
+ {
+ version = 1;
+ source = { file = "source.xlsx"; tab = 1; name = "previous" };
+ externals = [];
+ columns =
+ [
+ Window
+ ( Previous (Path { alias = None; column = 5 }),
+ [ Path { alias = None; column = 1 } ],
+ [ Path { alias = None; column = 3 } ] );
+ ];
+ filters = [];
+ sort = [];
+ uniq = [];
+ }
+ in
+
+ let res, _ = ImportAnalyser.Query.select conf in
+ let query =
+ "SELECT LAG('previous'.col_5) OVER (PARTITION BY 'previous'.col_1 ORDER BY \
+ 'previous'.col_3) AS result_0\n\
+ FROM 'source' AS 'previous'"
+ in
+ assert_equal ~printer:Fun.id query res.q
+
+let sum =
+ "Test window sum" >:: fun _ ->
+ (* This is sample configuration used in the tests *)
+ let conf =
+ Syntax.
+ {
+ version = 1;
+ source = { file = "source.xlsx"; tab = 1; name = "previous" };
+ externals = [];
+ columns =
+ [
+ Window
+ ( Sum (Path { alias = None; column = 5 }),
+ [ Path { alias = None; column = 1 } ],
+ [] );
+ ];
+ filters = [];
+ sort = [];
+ uniq = [];
+ }
+ in
+
+ let res, _ = ImportAnalyser.Query.select conf in
+ let query =
+ "SELECT SUM('previous'.col_5) OVER (PARTITION BY 'previous'.col_1) AS \
+ result_0\n\
+ FROM 'source' AS 'previous'"
+ in
+ assert_equal ~printer:Fun.id query res.q
+
+let sum_total =
+ "Test sum over the whole range" >:: fun _ ->
+ (* This is sample configuration used in the tests *)
+ let conf =
+ Syntax.
+ {
+ version = 1;
+ source = { file = "source.xlsx"; tab = 1; name = "previous" };
+ externals = [];
+ columns = [ Window (Sum (Path { alias = None; column = 5 }), [], []) ];
+ filters = [];
+ sort = [];
+ uniq = [];
+ }
+ in
+
+ let res, _ = ImportAnalyser.Query.select conf in
+ let query =
+ "SELECT SUM('previous'.col_5) AS result_0\nFROM 'source' AS 'previous'"
+ in
+ assert_equal ~printer:Fun.id query res.q
+
+let sum_unfiltered =
+ "Test sum over the whole range" >:: fun _ ->
+ (* This is sample configuration used in the tests *)
+ let conf =
+ Syntax.
+ {
+ version = 1;
+ source = { file = "source.xlsx"; tab = 1; name = "previous" };
+ externals = [];
+ columns =
+ [
+ Window
+ ( Sum (Path { alias = None; column = 5 }),
+ [],
+ [ Path { alias = None; column = 1 } ] );
+ ];
+ filters = [];
+ sort = [];
+ uniq = [];
+ }
+ in
+
+ let res, _ = ImportAnalyser.Query.select conf in
+ let query =
+ "SELECT SUM('previous'.col_5) AS result_0\nFROM 'source' AS 'previous'"
+ in
+ assert_equal ~printer:Fun.id query res.q
+
+let prepare_insert =
+ "Test prepare_insert" >:: fun _ ->
+ let key =
+ ImportAnalyser.Dependency.
+ {
+ name = "key_test";
+ expression = Concat [ Path 1; Literal "_"; Empty ];
+ columns = lazy (ImportContainers.IntSet.singleton 1);
+ }
+ in
+
+ let buffer = Buffer.create 16 in
+ let () = ImportAnalyser.Query.build_key_insert buffer key in
+ let contents = Buffer.contents buffer in
+
+ let expected = "rtrim(upper(COALESCE(:col_1,'') || '_' || ''))" in
+
+ assert_equal ~printer:Fun.id expected contents
+
+(** Test a request with a group in a filter.
+
+This generate a CTE expression in order to evaluate the group before loading
+the results from the query. *)
+let filter_group =
+ "Test filter_group" >:: fun _ ->
+ let c col = Expr.path ImportDataTypes.Path.{ alias = None; column = col } in
+ let conf =
+ {
+ conf with
+ columns = [ c 1 ];
+ filters = [ Expr.(max (c 3) [ c 1 ] [ c 1 ]) ];
+ }
+ in
+ let contents, _ = ImportAnalyser.Query.select conf in
+
+ let expected =
+ {|WITH cte AS (SELECT source.id, LAST_VALUE('source'.col_3) OVER (PARTITION BY 'source'.col_1 ORDER BY 'source'.col_1 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS group0
+FROM 'source' AS 'source'
+LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = 'other'.'key_other'
+LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.col_1)) = 'last_file'.'key_last_file')
+SELECT 'source'.col_1 AS result_0
+FROM 'source' AS 'source'
+LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = 'other'.'key_other'
+LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.col_1)) = 'last_file'.'key_last_file'
+INNER JOIN 'cte' ON cte.id = source.id
+WHERE (cte.group0)|}
+ in
+
+ assert_equal ~printer:(fun s -> Printf.sprintf "\n%s" s) expected contents.q
+
+(** Test a request with a group in a filter.
+
+This generate a CTE expression in order to evaluate the group before loading
+the results from the query. *)
+let filter_group2 =
+ "Test filter_group" >:: fun _ ->
+ let c col = Expr.path ImportDataTypes.Path.{ alias = None; column = col } in
+ let conf =
+ {
+ conf with
+ columns = [ c 1 ];
+ filters =
+ [ Expr.(max (c 3) [ c 1 ] [ c 1 ]); Expr.equal (c 3) Expr.integer_zero ];
+ }
+ in
+ let contents, _ = ImportAnalyser.Query.select conf in
+
+ let expected =
+ {|WITH cte AS (SELECT source.id, LAST_VALUE('source'.col_3) OVER (PARTITION BY 'source'.col_1 ORDER BY 'source'.col_1 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS group0
+FROM 'source' AS 'source'
+LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = 'other'.'key_other'
+LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.col_1)) = 'last_file'.'key_last_file'
+WHERE COALESCE('source'.col_3,0)=0)
+SELECT 'source'.col_1 AS result_0
+FROM 'source' AS 'source'
+LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = 'other'.'key_other'
+LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.col_1)) = 'last_file'.'key_last_file'
+INNER JOIN 'cte' ON cte.id = source.id
+WHERE COALESCE('source'.col_3,0)=0
+AND (cte.group0)|}
+ in
+
+ assert_equal ~printer:(fun s -> Printf.sprintf "\n%s" s) expected contents.q
+
+let test_suit =
+ [
+ create_table;
+ select;
+ check_externals;
+ previous;
+ sum;
+ sum_total;
+ sum_unfiltered;
+ prepare_insert;
+ filter_group;
+ filter_group2;
+ ]
+
+let tests = "analyser_query_test" >::: test_suit
diff --git a/tests/confLoader.ml b/tests/confLoader.ml
new file mode 100644
index 0000000..266ff33
--- /dev/null
+++ b/tests/confLoader.ml
@@ -0,0 +1,128 @@
+open StdLabels
+
+(** Read the configuration in toml and return the internal representation *)
+let load : string -> ImportConf.Syntax.t =
+ fun content ->
+ Otoml.Parser.from_string content |> ImportConf.t_of_toml |> Result.get_ok
+
+let conf =
+ load
+ {|version = 1
+
+[source]
+ file = "source.xlsx"
+ name = "source"
+
+[externals.other]
+ intern_key = ":A"
+ file = "other.xlsx"
+ extern_key = ":C"
+ allow_missing = false
+
+[externals.last_file]
+ intern_key = ":other.A"
+ file = "last.xlsx"
+ extern_key = ":C"
+ allow_missing = true
+
+[sheet]
+ columns = [
+ ":A ^ '_'",
+ ":B",
+ ":last_file.E",
+ ]|}
+
+let external_table_source =
+ ImportDataTypes.Table.{ file = "source.xlsx"; tab = 1; name = "source" }
+
+let external_table_other =
+ ImportDataTypes.Table.{ file = "other.xlsx"; tab = 1; name = "other" }
+
+let external_other =
+ ImportConf.Syntax.
+ {
+ intern_key = Path { alias = None; column = 1 };
+ target = external_table_other;
+ extern_key = Path 3;
+ allow_missing = false;
+ match_rule = None;
+ }
+
+let external_table_last =
+ ImportDataTypes.Table.{ file = "last.xlsx"; tab = 1; name = "last_file" }
+
+let external_last =
+ ImportConf.Syntax.
+ {
+ intern_key = Path { alias = Some "other"; column = 1 };
+ target = external_table_last;
+ extern_key = Path 3;
+ allow_missing = true;
+ match_rule = None;
+ }
+
+let show_source (source : ImportDataTypes.Table.t) =
+ Printf.sprintf "%s:%d" source.ImportDataTypes.Table.file
+ source.ImportDataTypes.Table.tab
+
+(*
+ * Compare two external sources
+ *)
+
+let show_sources sources =
+ let b = Buffer.create 16 in
+ Buffer.add_string b "[";
+ List.iter sources ~f:(fun source ->
+ Buffer.add_string b (show_source source);
+ Buffer.add_string b ",");
+
+ let len = Buffer.length b in
+ if len > 1 then Buffer.truncate b (len - 1);
+ Buffer.add_string b "]";
+
+ Buffer.contents b
+
+and cmp_source : ImportDataTypes.Table.t -> ImportDataTypes.Table.t -> bool =
+ fun s1 s2 ->
+ String.equal s1.ImportDataTypes.Table.name s2.ImportDataTypes.Table.name
+ && String.equal s1.ImportDataTypes.Table.file s2.ImportDataTypes.Table.file
+ && s1.ImportDataTypes.Table.tab = s2.ImportDataTypes.Table.tab
+
+let cmp_list : ('a -> 'a -> bool) -> 'a list -> 'a list -> bool =
+ fun cmp elems1 elems2 -> List.for_all2 ~f:cmp elems1 elems2
+
+(*
+ * Compare keys in the dependencies
+ *)
+
+let key_printer : ImportAnalyser.Dependency.key -> string =
+ fun { name; expression; _ } ->
+ let path_name =
+ let buffer = Buffer.create 16 in
+ ImportExpression.Headers.headers_of_expression buffer
+ (fun col buffer ->
+ Buffer.add_string buffer (ImportCSV.Csv.column_to_string col))
+ expression;
+ Buffer.contents buffer
+ in
+ Printf.sprintf "%s, %s" name path_name
+
+and key_cmp a b =
+ 0
+ = ImportExpression.T.cmp
+ (fun a b -> a - b)
+ a.ImportAnalyser.Dependency.expression
+ b.ImportAnalyser.Dependency.expression
+
+let keys_printer : ImportAnalyser.Dependency.key list -> string =
+ fun contents ->
+ let b = Buffer.create 16 in
+ List.iter contents ~f:(fun v -> Buffer.add_string b (key_printer v));
+ Buffer.contents b
+
+(*
+ * Represents externals
+ *)
+
+let pp_externals : ImportConf.Syntax.extern list -> string =
+ fun ext -> ImportConf.Syntax.toml_of_externs ext |> Otoml.Printer.to_string
diff --git a/tests/configuration/example_csv.toml b/tests/configuration/example_csv.toml
new file mode 100644
index 0000000..24cee9b
--- /dev/null
+++ b/tests/configuration/example_csv.toml
@@ -0,0 +1,31 @@
+version = 1
+
+[source]
+ file = "importer.csv"
+ name = "source"
+
+[externals.target]
+ intern_key = ":source.A"
+ extern_key = ":A"
+ file = "financial.xlsx"
+ allow_missing = false
+
+[externals.a_financial]
+ intern_key = ":target.A"
+ extern_key = ":O"
+ file = "financial.xlsx"
+ allow_missing = false
+
+[sheet]
+ columns = [":A",
+ "concat(\"-\", :A, :target.E, :B)",
+ ":C",
+ "counter([:C],[:A])",
+ ":E",
+ "match(\"\\(..\\)\", :B)",
+ ":D",
+ "counter([:D],[:A])"
+ ]
+ filters = []
+ sort = []
+ uniq = []
diff --git a/tests/configuration/simple.toml b/tests/configuration/simple.toml
new file mode 100644
index 0000000..d41383a
--- /dev/null
+++ b/tests/configuration/simple.toml
@@ -0,0 +1,20 @@
+version = 1
+
+[source]
+name = "source_name"
+file = "source_file"
+tab = 1
+
+[externals.target]
+extern_key = "\"_B\""
+# Here, the values A & B are considered as column, and not litteral
+intern_key = "function(:A, :B)"
+allow_missing = true
+file = ""
+tab = 1
+
+[sheet]
+columns = [
+ "function(:target.A, :B, 'free\\' text')",
+ "counter([:target.A],[:target.A])"
+]
diff --git a/tests/configuration_expression.ml b/tests/configuration_expression.ml
new file mode 100644
index 0000000..a5c4755
--- /dev/null
+++ b/tests/configuration_expression.ml
@@ -0,0 +1,253 @@
+open StdLabels
+open OUnit2
+module Expression = ImportExpression.T
+module Path = ImportDataTypes.Path
+open Path
+
+let printer = function
+ | Ok e -> ImportExpression.Repr.repr ImportConf.Path.repr e
+ | Error msg -> msg
+
+let parse_dquoted =
+ "parse_dquoted" >:: fun _ ->
+ let expr = "match(\"\\(..\\)\", :B)" in
+ let result = ImportConf.expression_from_string expr in
+ assert_equal ~printer
+ (Ok
+ (Function
+ ("match", [ Literal "\\(..\\)"; Path { alias = None; column = 2 } ])))
+ result
+
+let parse_quoted =
+ "parse_quoted" >:: fun _ ->
+ let expr = "match('\\(..\\)', :B)" in
+ let result = ImportConf.expression_from_string expr in
+ assert_equal ~printer
+ (Ok
+ (Function
+ ("match", [ Literal "\\(..\\)"; Path { alias = None; column = 2 } ])))
+ result
+
+let concat =
+ "concat" >:: fun _ ->
+ let expr = ":A ^ :B" in
+ let result = ImportConf.expression_from_string expr in
+ assert_equal ~printer
+ (Ok
+ (Concat
+ [
+ Path { alias = None; column = 1 }; Path { alias = None; column = 2 };
+ ]))
+ result
+
+let concat2 =
+ "concat2" >:: fun _ ->
+ let expr = "'A' ^ '_' ^ 'B'" in
+ let result = ImportConf.expression_from_string expr in
+ assert_equal ~printer
+ (Ok (Concat [ Literal "A"; Literal "_"; Literal "B" ]))
+ result
+
+let litteral =
+ "litteral" >:: fun _ ->
+ (* The text is quoted in shall not be considered as a path *)
+ let expr = "':A'" in
+ let result = ImportConf.expression_from_string expr in
+ assert_equal ~printer (Ok (Literal ":A")) result
+
+let empty =
+ "empty" >:: fun _ ->
+ let expr = "" in
+ let result = ImportConf.expression_from_string expr in
+ assert_equal ~printer (Ok Empty) result
+
+let upper_nvl =
+ "upper_nvl" >:: fun _ ->
+ let expr = "NVL('','')" in
+ let result = ImportConf.expression_from_string expr in
+ assert_equal ~printer (Ok (Nvl [ Empty; Empty ])) result
+
+let lower_nvl =
+ "lower_nvl" >:: fun _ ->
+ let expr = "nvl('','')" in
+ let result = ImportConf.expression_from_string expr in
+ assert_equal ~printer (Ok (Nvl [ Empty; Empty ])) result
+
+let numeric =
+ "numeric" >:: fun _ ->
+ let expr = "123" in
+ let result = ImportConf.expression_from_string expr in
+ assert_equal ~printer (Ok (Integer "123")) result
+
+let numeric_neg =
+ "numeric_neg" >:: fun _ ->
+ let expr = "-123" in
+ let result = ImportConf.expression_from_string expr in
+ assert_equal ~printer (Ok (Integer "-123")) result
+
+let op_priority =
+ "operator_priority" >:: fun _ ->
+ let expr = "1 + 2 > 2" in
+ let result = ImportConf.expression_from_string expr
+ and expected =
+ ImportExpression.T.(
+ BOperator (GT, BOperator (Add, Integer "1", Integer "2"), Integer "2"))
+ in
+
+ assert_equal ~printer (Ok expected) result
+
+let op_priority2 =
+ "operator_priority" >:: fun _ ->
+ let expr = "1 ^ 2 = 2" in
+ let result = ImportConf.expression_from_string expr
+ and expected =
+ ImportExpression.T.(
+ BOperator (Equal, Concat [ Integer "1"; Integer "2" ], Integer "2"))
+ in
+
+ assert_equal ~printer (Ok expected) result
+
+let join =
+ "join" >:: fun _ ->
+ let expr = "join('sep', :A, :B)" in
+ let result = ImportConf.expression_from_string expr in
+ assert_equal ~printer
+ (Ok
+ (Join
+ ( "sep",
+ [
+ Path { alias = None; column = 1 };
+ Path { alias = None; column = 2 };
+ ] )))
+ result
+
+let join_empty =
+ "join" >:: fun _ ->
+ let expr = "join('', :A, :B)" in
+ let result = ImportConf.expression_from_string expr in
+ assert_equal ~printer
+ (Ok
+ (Join
+ ( "",
+ [
+ Path { alias = None; column = 1 };
+ Path { alias = None; column = 2 };
+ ] )))
+ result
+
+let upper =
+ "upper" >:: fun _ ->
+ let expr = "upper('')" in
+ let result = ImportConf.expression_from_string expr in
+ assert_equal ~printer (Ok (Function' (Upper, [ Empty ]))) result
+
+let trim =
+ "trim" >:: fun _ ->
+ let expr = "trim('')" in
+ let result = ImportConf.expression_from_string expr in
+ assert_equal ~printer (Ok (Function' (Trim, [ Empty ]))) result
+
+(** Extract the columns from a window function *)
+let fold_values =
+ "fold_values" >:: fun _ ->
+ (* The expression we want to test *)
+ let expr =
+ Expression.Window
+ ( Previous (Path { alias = None; column = 1 }),
+ [ Path { alias = None; column = 2 } ],
+ [ Path { alias = None; column = 3 } ] )
+ in
+
+ (* Extract the columns from the expression. The result is sorted because
+ the order is not preserved during the extraction. *)
+ let result =
+ Expression.fold_values ~init:[] ~f:(fun acc v -> v :: acc) expr
+ |> List.sort ~cmp:Path.compare
+ in
+
+ let expected =
+ [
+ { alias = None; column = 1 };
+ { alias = None; column = 2 };
+ { alias = None; column = 3 };
+ ]
+ in
+
+ assert_equal expected result
+
+let bad_quote =
+ "bad_quote" >:: fun _ ->
+ let expr = "':source.A" in
+ let result = ImportConf.expression_from_string expr in
+
+ assert_equal ~printer (Error "Unclosed quote at line 1 : \"':source.A\"")
+ result
+
+let nested_expression =
+ "nested_expression" >:: fun _ ->
+ let expr = "1 = (1 = 0)" in
+ let result = ImportConf.expression_from_string expr in
+ assert_equal ~printer
+ (Ok
+ (BOperator
+ ( Equal,
+ Integer "1",
+ Expr (BOperator (Equal, Integer "1", Integer "0")) )))
+ result
+
+let priority_equality =
+ "priority_equality" >:: fun _ ->
+ let expr = "1 = 1 = 0" in
+ let result = ImportConf.expression_from_string expr in
+ assert_equal ~printer
+ (Ok
+ (BOperator
+ (Equal, Integer "1", BOperator (Equal, Integer "1", Integer "0"))))
+ result
+
+let priority_operator_and =
+ "priority_equality" >:: fun _ ->
+ let expr = "1 and 1 = 0" in
+ let result = ImportConf.expression_from_string expr in
+ assert_equal ~printer
+ (Ok
+ (BOperator (And, Integer "1", BOperator (Equal, Integer "1", Integer "0"))))
+ result
+
+let priority_operator_or =
+ "priority_equality" >:: fun _ ->
+ let expr = "1 <> 1 or 0" in
+ let result = ImportConf.expression_from_string expr in
+ assert_equal ~printer
+ (Ok
+ (BOperator
+ (Or, BOperator (Different, Integer "1", Integer "1"), Integer "0")))
+ result
+
+let test_suit =
+ [
+ parse_dquoted;
+ parse_quoted;
+ concat;
+ concat2;
+ litteral;
+ empty;
+ upper_nvl;
+ lower_nvl;
+ numeric;
+ numeric_neg;
+ op_priority;
+ op_priority2;
+ join;
+ upper;
+ trim;
+ join_empty;
+ fold_values;
+ bad_quote;
+ nested_expression;
+ priority_equality;
+ priority_operator_and;
+ priority_operator_or;
+ ]
+
+let tests = "configuration_expression" >::: test_suit
diff --git a/tests/configuration_toml.ml b/tests/configuration_toml.ml
new file mode 100644
index 0000000..3c8bfc2
--- /dev/null
+++ b/tests/configuration_toml.ml
@@ -0,0 +1,71 @@
+open OUnit2
+module Expression = ImportExpression.T
+module Path = ImportDataTypes.Path
+
+let test_suit =
+ [
+ ( "parse_extern" >:: fun _ ->
+ let toml = Otoml.Parser.from_file "configuration/simple.toml" in
+ let toml = ImportConf.t_of_toml toml in
+ match toml with
+ | Error s -> raise (Failure s)
+ | Ok result ->
+ let open ImportConf.Syntax in
+ let expected =
+ {
+ target = { file = ""; tab = 1; name = "target" };
+ extern_key = Literal "_B";
+ intern_key =
+ Function
+ ( "function",
+ [
+ Path { alias = None; column = 1 };
+ Path { alias = None; column = 2 };
+ ] );
+ match_rule = None;
+ allow_missing = true;
+ }
+ in
+
+ let printer s =
+ String.concat "," (List.map ImportConf.print_extern s)
+ in
+
+ assert_equal ~printer [ expected ] result.externals );
+ ( "parse_columns" >:: fun _ ->
+ let toml = Otoml.Parser.from_file "configuration/simple.toml" in
+ let toml = ImportConf.t_of_toml toml in
+
+ match toml with
+ | Error s -> raise (Failure s)
+ | Ok result ->
+ let open Path in
+ let open Expression in
+ let expected =
+ [
+ Function
+ ( "function",
+ [
+ Path { alias = Some "target"; column = 1 };
+ Path { alias = None; column = 2 };
+ Literal "free' text";
+ ] );
+ Window
+ ( Counter,
+ [ Path { alias = Some "target"; column = 1 } ],
+ [ Path { alias = Some "target"; column = 1 } ] );
+ ]
+ in
+
+ List.iter2
+ (fun expected result ->
+ assert_equal ~printer:ImportConf.print_path_expression expected
+ result)
+ expected result.columns );
+ ( "parse_csv" >:: fun _ ->
+ let toml = Otoml.Parser.from_file "configuration/example_csv.toml" in
+ let toml = ImportConf.t_of_toml toml in
+ ignore toml );
+ ]
+
+let tests = "configuration_toml" >::: test_suit
diff --git a/tests/dune b/tests/dune
new file mode 100644
index 0000000..c777aef
--- /dev/null
+++ b/tests/dune
@@ -0,0 +1,17 @@
+(test
+ (name importer_test)
+ (deps (source_tree configuration))
+ (libraries
+ ounit2
+ otoml
+ ppx_deriving.runtime
+ sqlite3
+ importConf
+ importAnalyser
+ importContainers
+ importCSV
+ importDataTypes
+ importErrors
+ importExpression
+ importSQL
+))
diff --git a/tests/expression_builder.ml b/tests/expression_builder.ml
new file mode 100644
index 0000000..fd9a17f
--- /dev/null
+++ b/tests/expression_builder.ml
@@ -0,0 +1,47 @@
+module T = ImportExpression.T
+
+let empty : 'a T.t = T.Empty
+let path : 'a -> 'a T.t = fun v -> T.Path v
+let literal_test : 'a T.t = T.Literal "test"
+let literal_quoted : 'a T.t = T.Literal "'"
+let literal_zero : 'a T.t = T.Literal "0"
+let integer_zero : 'a T.t = T.Integer "0"
+let integer_one : 'a T.t = T.Integer "1"
+let concat : 'a T.t = T.Concat [ T.Empty; T.Literal "test" ]
+let expr : 'a T.t = T.Function ("expr", [ literal_test; T.Literal "NOT NULL" ])
+
+let equal : 'a T.t -> 'a T.t -> 'a T.t =
+ fun e1 e2 -> T.BOperator (T.Equal, e1, e2)
+
+let different : 'a T.t -> 'a T.t -> 'a T.t =
+ fun e1 e2 -> T.BOperator (T.Different, e1, e2)
+
+let divide : 'a T.t -> 'a T.t -> 'a T.t =
+ fun e1 e2 -> T.BOperator (T.Division, e1, e2)
+
+let nvl : 'a T.t -> 'a T.t -> 'a T.t = fun e1 e2 -> T.Nvl [ e1; e2 ]
+
+let if_ : 'a T.t -> 'a T.t -> 'a T.t -> 'a T.t =
+ fun pred e1 e2 -> T.Function ("if", [ pred; e1; e2 ])
+
+let in_ : 'a T.t -> 'a T.t list -> 'a T.t =
+ fun e1 group -> T.GEquality (T.Equal, e1, group)
+
+let not_in : 'a T.t -> 'a T.t list -> 'a T.t =
+ fun e1 group -> T.GEquality (T.Different, e1, group)
+
+let max : 'a T.t -> 'a T.t list -> 'a T.t list -> 'a T.t =
+ fun e group order -> T.Window (T.Max e, group, order)
+
+let counter : 'a T.t list -> 'a T.t list -> 'a T.t =
+ fun group order -> T.Window (T.Counter, group, order)
+
+let function' : T.funct -> 'a T.t list -> 'a T.t =
+ fun name param -> T.Function' (name, param)
+
+module Make (Sym : ImportExpression.Sym.SYM_EXPR) = struct
+ module M = ImportExpression.Sym.M (Sym)
+
+ let eval : 'a T.t -> path_repr:'b Sym.path_repr -> 'a Sym.repr =
+ fun v ~path_repr -> M.eval ~path_repr v
+end
diff --git a/tests/expression_query.ml b/tests/expression_query.ml
new file mode 100644
index 0000000..d260a76
--- /dev/null
+++ b/tests/expression_query.ml
@@ -0,0 +1,208 @@
+open OUnit2
+module T = ImportExpression.T
+module Expr = Expression_builder
+module M = Expr.Make (ImportExpression.Query.Query)
+
+let eval =
+ M.eval ~path_repr:(fun formatter n -> Format.fprintf formatter "%s" n)
+
+let printer = Fun.id
+
+let test_expr ?(nested = ImportExpression.Query.QueryParameter.Literal) expr =
+ let buffer = Buffer.create 16 in
+ let formatter = Format.formatter_of_buffer buffer in
+ let () = ImportExpression.Query.Query.observe expr formatter ~nested in
+ Format.pp_print_flush formatter ();
+ Buffer.contents buffer
+
+let empty =
+ "empty" >:: fun _ ->
+ let expr = eval Expr.empty in
+ let content = test_expr expr and expected = "''" in
+
+ assert_equal ~printer expected content
+
+let litteral =
+ "literal" >:: fun _ ->
+ let expr = eval Expr.literal_test in
+ let content = test_expr expr and expected = "'test'" in
+
+ assert_equal ~printer expected content
+
+let litteral_quoted =
+ "literal_quoted" >:: fun _ ->
+ let expr = eval Expr.literal_quoted in
+ let content = test_expr expr and expected = "'\''" in
+
+ assert_equal ~printer expected content
+
+let litteral_raw =
+ "literal_raw" >:: fun _ ->
+ let expr = eval Expr.literal_test in
+ let nested = ImportExpression.Query.QueryParameter.(Raw Literal) in
+ let content = test_expr expr ~nested and expected = "test" in
+
+ assert_equal ~printer expected content
+
+let path =
+ "path" >:: fun _ ->
+ (* In the path, the given function do all the job *)
+ let expr = eval @@ Expr.path "test" in
+ let content = test_expr expr and expected = "test" in
+
+ assert_equal ~printer expected content
+
+let concat =
+ "concat" >:: fun _ ->
+ let expr = eval Expr.concat in
+ let content = test_expr expr and expected = "'' || 'test'" in
+
+ assert_equal ~printer expected content
+
+let nvl =
+ "nvl" >:: fun _ ->
+ let expr = eval @@ Expr.nvl Expr.empty Expr.literal_test in
+ let content = test_expr expr and expected = "COALESCE('', 'test')" in
+
+ assert_equal ~printer expected content
+
+let upper =
+ "upper" >:: fun _ ->
+ let expr = eval @@ Expr.function' T.Upper [ Expr.literal_test ] in
+ let content = test_expr expr and expected = "UPPER('test')" in
+
+ assert_equal ~printer expected content
+
+let join =
+ "join" >:: fun _ ->
+ let expr =
+ ImportExpression.Query.Query.(
+ join "," [ eval Expr.empty; eval Expr.literal_test ])
+ in
+ let content = test_expr expr and expected = "CONCAT(',', '', 'test')" in
+
+ assert_equal ~printer expected content
+
+let boperator_eq =
+ "boperator_eq" >:: fun _ ->
+ let expr = eval @@ Expr.equal Expr.empty Expr.literal_test in
+ let content = test_expr expr and expected = "''='test'" in
+
+ assert_equal ~printer expected content
+
+let boperator_div =
+ "boperator_div" >:: fun _ ->
+ let expr = eval @@ Expr.divide Expr.integer_one Expr.integer_zero in
+ let content = test_expr expr and expected = "CAST(1 AS REAL)/0" in
+
+ assert_equal ~printer expected content
+
+let boperator_neq =
+ "boperator_neq" >:: fun _ ->
+ let expr = eval @@ Expr.different Expr.empty Expr.literal_test in
+ let content = test_expr expr and expected = "''<>'test'" in
+
+ assert_equal ~printer expected content
+
+let expr =
+ "expr" >:: fun _ ->
+ let expr = eval Expr.expr in
+ let content = test_expr expr and expected = "(test NOT NULL)" in
+
+ assert_equal ~printer expected content
+
+let unify_int =
+ "unify_int" >:: fun _ ->
+ let expr = eval @@ Expr.equal (Expr.path "external") Expr.integer_zero in
+ let content = test_expr expr and expected = "COALESCE(external,0)=0" in
+
+ assert_equal ~printer expected content
+
+let unify_string =
+ "unify_string" >:: fun _ ->
+ let expr = eval @@ Expr.equal (Expr.path "external") Expr.literal_zero in
+ let content = test_expr expr and expected = "COALESCE(external,'')='0'" in
+
+ assert_equal ~printer expected content
+
+let in_string =
+ "in_string" >:: fun _ ->
+ let expr = eval @@ Expr.in_ (Expr.path "external") [ Expr.literal_zero ] in
+ let content = test_expr expr and expected = "COALESCE(external,'') IN('0')" in
+ assert_equal ~printer expected content
+
+let not_in_string =
+ "in_string" >:: fun _ ->
+ let expr = eval @@ Expr.not_in (Expr.path "external") [ Expr.literal_zero ] in
+ let content = test_expr expr
+ and expected = "COALESCE(external,'') NOT IN('0')" in
+ assert_equal ~printer expected content
+
+(* Evaluate the max function *)
+let max =
+ "max" >:: fun _ ->
+ let expr =
+ eval @@ Expr.(max (path ":C") [ path ":A" ] [ path ":A"; path ":B" ])
+ in
+
+ let content = test_expr expr
+ and expected =
+ "LAST_VALUE(:C) OVER (PARTITION BY :A ORDER BY :A, :B RANGE BETWEEN \
+ UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)"
+ in
+
+ assert_equal ~printer expected content
+
+let in_int =
+ "in_int" >:: fun _ ->
+ let expr =
+ eval
+ @@ Expr.in_ (Expr.path "external") [ Expr.integer_zero; Expr.integer_one ]
+ in
+ let content = test_expr expr and expected = "COALESCE(external,0) IN(0, 1)" in
+ assert_equal ~printer expected content
+
+let counter_no_order =
+ "counter_no_order" >:: fun _ ->
+ let expr = eval @@ Expr.(counter [ path ":A" ] []) in
+
+ let content = test_expr expr
+ and expected = "COUNT() OVER (PARTITION BY :A)" in
+
+ assert_equal ~printer expected content
+
+let counter_order =
+ "counter_no_order" >:: fun _ ->
+ let expr = eval @@ Expr.(counter [ path ":A" ] [ path ":B" ]) in
+
+ let content = test_expr expr
+ and expected = "ROW_NUMBER() OVER (PARTITION BY :A ORDER BY :B)" in
+
+ assert_equal ~printer expected content
+
+let test_suit =
+ [
+ empty;
+ litteral;
+ litteral_quoted;
+ litteral_raw;
+ path;
+ concat;
+ nvl;
+ upper;
+ join;
+ boperator_eq;
+ boperator_neq;
+ boperator_div;
+ expr;
+ unify_int;
+ unify_string;
+ in_string;
+ not_in_string;
+ in_int;
+ max;
+ counter_no_order;
+ counter_order;
+ ]
+
+let tests = "expression_query" >::: test_suit
diff --git a/tests/expression_repr.ml b/tests/expression_repr.ml
new file mode 100644
index 0000000..9ad321a
--- /dev/null
+++ b/tests/expression_repr.ml
@@ -0,0 +1,37 @@
+open OUnit2
+module Expression = ImportExpression.T
+module Path = ImportDataTypes.Path
+module Expr = Expression_builder
+module M = Expr.Make (ImportExpression.Repr.E)
+
+let eval = M.eval ~path_repr:ImportCSV.Csv.column_to_string
+let test_expr expr = ImportExpression.Repr.E.observe ~top:true expr
+
+let printer = function
+ | Ok e -> ImportConf.print_path_expression e
+ | Error msg -> msg
+
+let print_literal =
+ "print_litteral" >:: fun _ ->
+ let result = test_expr @@ eval (Literal "Content") in
+ assert_equal ~printer:Fun.id "'Content'" result
+
+let print_quoted_literal =
+ "print_quoted_literal" >:: fun _ ->
+ let result = test_expr @@ eval Expression_builder.literal_quoted in
+ assert_equal ~printer:Fun.id "'\\''" result
+
+let print_dquoted_literal =
+ "print_dquoted_literal" >:: fun _ ->
+ let result = test_expr @@ eval (Literal "\"") in
+ assert_equal ~printer:Fun.id "'\"'" result
+
+let print_numeric =
+ "print_numeric" >:: fun _ ->
+ let result = test_expr @@ eval (Literal "123") in
+ assert_equal ~printer:Fun.id "123" result
+
+let test_suit =
+ [ print_literal; print_quoted_literal; print_dquoted_literal; print_numeric ]
+
+let tests = "expression_repr" >::: test_suit
diff --git a/tests/expression_type_of.ml b/tests/expression_type_of.ml
new file mode 100644
index 0000000..706b3e7
--- /dev/null
+++ b/tests/expression_type_of.ml
@@ -0,0 +1,65 @@
+open OUnit2
+module T = ImportExpression.T
+module Types = ImportDataTypes.Types
+module Expr = Expression_builder
+module M = Expr.Make (ImportExpression.Type_of)
+
+let eval = M.eval ~path_repr:(fun _ -> ())
+let printer = Types.string_of_t
+let test_expr expr = ImportExpression.Type_of.observe expr
+
+let empty =
+ "empty" >:: fun _ ->
+ let expr = eval Expr.empty in
+ let content = test_expr expr and expected = Types.None in
+
+ assert_equal ~printer expected content
+
+(** Control an if statement with a predicate which is not a boolean *)
+let invalid_if =
+ "invalid_if" >:: fun _ ->
+ (* The expression we want to test *)
+ let raw_expr =
+ Expr.if_ Expr.literal_test Expr.integer_zero Expr.integer_one
+ in
+
+ let exn =
+ ImportErrors.TypeError
+ {
+ expected = Types.Bool;
+ actual = Types.String;
+ expression = ImportExpression.Repr.repr Fun.id raw_expr;
+ subset = "the predicate";
+ }
+ in
+ assert_raises exn (fun () ->
+ let expr = eval raw_expr in
+ test_expr expr)
+
+(** The application should infer that the expression is a string *)
+let valid_if =
+ "valid_if" >:: fun _ ->
+ let expr =
+ eval
+ @@ Expr.if_
+ (Expr.equal Expr.integer_one Expr.integer_zero)
+ Expr.literal_test Expr.literal_test
+ in
+ let content = test_expr expr and expected = Types.String in
+ assert_equal ~printer expected content
+
+let upper =
+ "upper" >:: fun _ ->
+ let expr = eval @@ Expr.function' T.Upper [Expr.literal_test] in
+
+ let content = test_expr expr and expected = Types.String in
+ assert_equal ~printer expected content
+
+let in_int =
+ "in_int" >:: fun _ ->
+ let expr = eval @@ Expr.in_ (Expr.path "external") [ Expr.integer_one ] in
+ let content = test_expr expr and expected = Types.Bool in
+ assert_equal ~printer expected content
+
+let tests = "expression_type_of" >::: [ empty; invalid_if; valid_if; in_int ;
+upper ]
diff --git a/tests/importCSV_test.ml b/tests/importCSV_test.ml
new file mode 100644
index 0000000..bc21992
--- /dev/null
+++ b/tests/importCSV_test.ml
@@ -0,0 +1,29 @@
+open OUnit2
+open ImportCSV
+
+let test_suit =
+ [
+ ( "Column A" >:: fun _ ->
+ assert_equal
+ ~printer:(fun i -> Printf.sprintf "%d (%s)" i (Csv.column_to_string i))
+ 1 (Csv.column_of_string "A") );
+ ( "Column a" >:: fun _ ->
+ assert_equal
+ ~printer:(fun i -> Printf.sprintf "%d (%s)" i (Csv.column_to_string i))
+ 1 (Csv.column_of_string "a") );
+ ( "Column name" >:: fun _ ->
+ let () =
+ for i = 1 to 1_000 do
+ let column_name = Csv.column_to_string i in
+ let column_index = Csv.column_of_string column_name in
+
+ assert_equal
+ ~printer:(fun i ->
+ Printf.sprintf "%d (%s)" i (Csv.column_to_string i))
+ i column_index
+ done
+ in
+ () );
+ ]
+
+let tests = "importCSV_test" >::: test_suit
diff --git a/tests/importConf_test.ml b/tests/importConf_test.ml
new file mode 100644
index 0000000..c94eb91
--- /dev/null
+++ b/tests/importConf_test.ml
@@ -0,0 +1,23 @@
+open OUnit2
+open ConfLoader
+
+(** Test the dependencies extracted from the external named "source".
+
+ Refer to the default configuration used in [ConfLoader] to see the
+ configuration.
+ *)
+let test_get_dependencies_for_source =
+ "get_dependancies_for_table" >:: fun _ ->
+ let result = ImportConf.get_dependancies_for_table conf conf.source
+ and expected = [ external_other ] in
+ assert_equal ~printer:pp_externals expected result
+
+let test_get_dependencies_for_other =
+ "get_dependancies_for_table" >:: fun _ ->
+ let result = ImportConf.get_dependancies_for_table conf external_table_other
+ and expected = [ external_last ] in
+ assert_equal ~printer:pp_externals expected result
+
+let tests =
+ "importConf_test"
+ >::: [ test_get_dependencies_for_source; test_get_dependencies_for_other ]
diff --git a/tests/importer_test.ml b/tests/importer_test.ml
new file mode 100644
index 0000000..16ea663
--- /dev/null
+++ b/tests/importer_test.ml
@@ -0,0 +1,21 @@
+open OUnit2
+
+let _ =
+ run_test_tt_main
+ ("importer_tests"
+ >::: [
+ ImportCSV_test.tests;
+ Sql_date.tests;
+ Sql_match.tests;
+ Sql_int.tests;
+ Sql_trim.tests;
+ ImportConf_test.tests;
+ Configuration_toml.tests;
+ Configuration_expression.tests;
+ Expression_repr.tests;
+ Expression_type_of.tests;
+ Expression_query.tests;
+ Analyser_dependency.tests;
+ Analyser_query_test.tests;
+ Sql_db.tests;
+ ])
diff --git a/tests/sql_date.ml b/tests/sql_date.ml
new file mode 100644
index 0000000..4becdf5
--- /dev/null
+++ b/tests/sql_date.ml
@@ -0,0 +1,18 @@
+open OUnit2
+
+let test_suit =
+ [
+ ( "Parse date" >:: fun _ ->
+ let text_date = Sqlite3.Data.TEXT "2002-04-08 15:59:41.000"
+ and format_date = Sqlite3.Data.TEXT "%Y-%m-%d %H:%M:%S.000" in
+
+ assert_equal (Sqlite3.Data.INT 37354L)
+ (ImportSQL.Date.f format_date text_date) );
+ ( "Parse date as int" >:: fun _ ->
+ let int_date = Sqlite3.Data.INT 37354L
+ and format_date = Sqlite3.Data.TEXT "%Y-%m-%d %H:%M:%S.000" in
+
+ assert_equal int_date (ImportSQL.Date.f format_date int_date) );
+ ]
+
+let tests = "sql_date" >::: test_suit
diff --git a/tests/sql_db.ml b/tests/sql_db.ml
new file mode 100644
index 0000000..c966f4e
--- /dev/null
+++ b/tests/sql_db.ml
@@ -0,0 +1,198 @@
+(** Test the behavior of the sqlite with a in-memory database *)
+
+open OUnit2
+open StdLabels
+
+let ( let* ) res cont =
+ match res with
+ | Ok value -> cont value
+ | Error e -> raise e
+
+(** Test a process with a simple configuration in-memory *)
+let run_test ~configuration ~input ~expected name =
+ name >:: fun _ ->
+ (* We expect a valid configuration *)
+ let conf =
+ ImportConf.t_of_toml (Otoml.Parser.from_string configuration)
+ |> Result.get_ok
+ in
+
+ let exec db =
+ let table = List.hd @@ ImportAnalyser.Dependency.get_process_order conf in
+ let* () = ImportSQL.Db.create_table db table in
+
+ (* Prepare the statement in order to import data *)
+ let* stmt = ImportSQL.Db.prepare_insert db table in
+
+ (* Inject some data into the table *)
+ let result, _ =
+ List.fold_left ~init:(Ok (), 0) input ~f:(fun (_, i) data ->
+ let result =
+ let* () = ImportSQL.Db.insert ~id:i db stmt data in
+ let* () = ImportSQL.Db.reset stmt in
+ Ok ()
+ in
+ (result, i + 1))
+ in
+ let* () = result in
+
+ let* () = ImportSQL.Db.finalize stmt in
+
+ let expected = ref expected in
+
+ (* Collect the data *)
+ let* () =
+ ImportSQL.Db.query db conf ~f:(fun rows ->
+ match !expected with
+ | [] -> ()
+ | hd :: tl ->
+ expected := tl;
+ let () =
+ Array.iter2 rows hd ~f:(fun (_, value) expected ->
+ assert_equal ~printer:ImportCSV.DataType.to_string value
+ expected)
+ in
+ ())
+ in
+
+ Ok ()
+ in
+
+ (* Use a magic keyword for in-memory database *)
+ ignore @@ ImportSQL.Db.with_db ":memory:" exec
+
+(** Simple test used to check the process *)
+let simple_extraction =
+ run_test "simple_extraction"
+ ~configuration:
+ {|version = 1
+
+[source]
+name = "source_name"
+file = "source_file"
+
+[sheet]
+columns = [
+ ":A ^ '_'",
+ ":B",
+ ":E"]|}
+ ~input:
+ [
+ [
+ (0, ImportCSV.DataType.Integer 123);
+ (1, ImportCSV.DataType.Integer 2);
+ (4, ImportCSV.DataType.Integer 5);
+ ];
+ ]
+ ~expected:
+ [
+ [|
+ ImportCSV.DataType.Content "123_";
+ ImportCSV.DataType.Integer 2;
+ ImportCSV.DataType.Integer 5;
+ |];
+ ]
+
+(** Ensure the behavior of the sum function when a filter is given. It is
+ expected to accumulate the values over each line *)
+let sum_sort =
+ run_test "sum_sort"
+ ~configuration:
+ {|version = 1
+
+[source]
+name = "source_name"
+file = "source_file"
+
+[sheet]
+columns = [
+ ":A",
+ "sum(:C, [:B], [:A])",
+]|}
+ ~input:
+ [
+ [
+ (0, ImportCSV.DataType.Integer 1);
+ (1, ImportCSV.DataType.Content "A");
+ (2, ImportCSV.DataType.Integer 100);
+ ];
+ [
+ (0, ImportCSV.DataType.Integer 2);
+ (1, ImportCSV.DataType.Content "A");
+ (2, ImportCSV.DataType.Integer 100);
+ ];
+ [
+ (0, ImportCSV.DataType.Integer 3);
+ (1, ImportCSV.DataType.Content "A");
+ (2, ImportCSV.DataType.Integer 100);
+ ];
+ ]
+ ~expected:
+ [
+ [| ImportCSV.DataType.Integer 1; ImportCSV.DataType.Integer 100 |];
+ [| ImportCSV.DataType.Integer 2; ImportCSV.DataType.Integer 200 |];
+ [| ImportCSV.DataType.Integer 3; ImportCSV.DataType.Integer 300 |];
+ ]
+
+let sum_total =
+ run_test "sum_total"
+ ~configuration:
+ {|version = 1
+
+[source]
+name = "source_name"
+file = "source_file"
+
+[sheet]
+columns = [
+ ":A",
+ "sum(:C, [], [])",
+]|}
+ ~input:
+ [
+ [
+ (0, ImportCSV.DataType.Integer 1); (2, ImportCSV.DataType.Integer 100);
+ ];
+ [
+ (0, ImportCSV.DataType.Integer 2); (2, ImportCSV.DataType.Integer 100);
+ ];
+ ]
+ ~expected:
+ [
+ [| ImportCSV.DataType.Integer 1; ImportCSV.DataType.Integer 200 |];
+ [| ImportCSV.DataType.Integer 2; ImportCSV.DataType.Integer 200 |];
+ ]
+
+(** Ensure the behavior of the sum function when no filter is given. It is
+ expected to get the total sum for each line *)
+let sum_unfiltered =
+ run_test "sum_unfiltered"
+ ~configuration:
+ {|version = 1
+
+[source]
+name = "source_name"
+file = "source_file"
+
+[sheet]
+columns = [
+ ":A",
+ "sum(:C, [], [:A])",
+]|}
+ ~input:
+ [
+ [
+ (0, ImportCSV.DataType.Integer 1); (2, ImportCSV.DataType.Integer 100);
+ ];
+ [
+ (0, ImportCSV.DataType.Integer 2); (2, ImportCSV.DataType.Integer 100);
+ ];
+ ]
+ ~expected:
+ [
+ [| ImportCSV.DataType.Integer 1; ImportCSV.DataType.Integer 200 |];
+ [| ImportCSV.DataType.Integer 2; ImportCSV.DataType.Integer 200 |];
+ ]
+
+let test_suit = [ simple_extraction; sum_sort; sum_total; sum_unfiltered ]
+let tests = "sql_db" >::: test_suit
diff --git a/tests/sql_int.ml b/tests/sql_int.ml
new file mode 100644
index 0000000..87b1086
--- /dev/null
+++ b/tests/sql_int.ml
@@ -0,0 +1,26 @@
+open OUnit2
+
+let printer = function
+ | Sqlite3.Data.INT t -> Int64.to_string t
+ | Sqlite3.Data.NONE -> "None"
+ | Sqlite3.Data.NULL -> "Null"
+ | Sqlite3.Data.FLOAT f -> Float.to_string f
+ | Sqlite3.Data.TEXT t | Sqlite3.Data.BLOB t -> t
+
+let test_suit =
+ [
+ ( "Int_of_int" >:: fun _ ->
+ assert_equal (Sqlite3.Data.INT 37354L)
+ (ImportSQL.Math.int (Sqlite3.Data.INT 37354L)) );
+ ( "Int_of_string" >:: fun _ ->
+ assert_equal (Sqlite3.Data.INT 37354L)
+ (ImportSQL.Math.int (Sqlite3.Data.TEXT "37354")) );
+ ( "Int_of_string2" >:: fun _ ->
+ assert_equal ~printer (Sqlite3.Data.INT 37354L)
+ (ImportSQL.Math.int (Sqlite3.Data.TEXT "37354.0")) );
+ ( "Int_of_float" >:: fun _ ->
+ assert_equal (Sqlite3.Data.INT 37354L)
+ (ImportSQL.Math.int (Sqlite3.Data.FLOAT 37354.0)) );
+ ]
+
+let tests = "sql_int" >::: test_suit
diff --git a/tests/sql_match.ml b/tests/sql_match.ml
new file mode 100644
index 0000000..0314bb3
--- /dev/null
+++ b/tests/sql_match.ml
@@ -0,0 +1,12 @@
+open OUnit2
+
+let test_suit =
+ [
+ ( "Parse regex" >:: fun _ ->
+ let text = Sqlite3.Data.TEXT "hello world"
+ and regex = Sqlite3.Data.TEXT "hello ([A-Za-z]+)" in
+
+ assert_equal (Sqlite3.Data.TEXT "world") (ImportSQL.Match.f regex text) );
+ ]
+
+let tests = "sql_match" >::: test_suit
diff --git a/tests/sql_trim.ml b/tests/sql_trim.ml
new file mode 100644
index 0000000..77e755e
--- /dev/null
+++ b/tests/sql_trim.ml
@@ -0,0 +1,11 @@
+open OUnit2
+
+let test_suit =
+ [
+ ( "Trim" >:: fun _ ->
+ let text = Sqlite3.Data.TEXT " \nABC \n" in
+
+ assert_equal (Sqlite3.Data.TEXT "ABC") (ImportSQL.Trim.f text) );
+ ]
+
+let tests = "sql_trim" >::: test_suit