aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/analysers/dependency.ml256
-rw-r--r--lib/analysers/dependency.mli40
-rwxr-xr-xlib/analysers/dune12
-rw-r--r--lib/analysers/headers.ml55
-rw-r--r--lib/analysers/headers.mli11
-rw-r--r--lib/analysers/query.ml445
-rw-r--r--lib/analysers/query.mli27
-rwxr-xr-xlib/configuration/dune29
-rw-r--r--lib/configuration/expression_lexer.mll91
-rw-r--r--lib/configuration/expression_parser.messages123
-rw-r--r--lib/configuration/expression_parser.mly185
-rw-r--r--lib/configuration/importConf.ml90
-rw-r--r--lib/configuration/importConf.mli23
-rw-r--r--lib/configuration/of_json.ml134
-rw-r--r--lib/configuration/read_conf.ml216
-rw-r--r--lib/configuration/syntax.ml88
-rwxr-xr-xlib/containers/dune7
-rw-r--r--lib/containers/importContainers.ml61
-rw-r--r--lib/csv/csv.ml30
-rw-r--r--lib/csv/dataType.ml21
-rw-r--r--lib/csv/dataType.mli8
-rwxr-xr-xlib/csv/dune6
-rw-r--r--lib/data_types/dune10
-rw-r--r--lib/data_types/path.ml15
-rw-r--r--lib/data_types/readme.rst4
-rw-r--r--lib/data_types/table.ml19
-rw-r--r--lib/data_types/types.ml15
-rw-r--r--lib/errors/dune9
-rw-r--r--lib/errors/importErrors.ml98
-rw-r--r--lib/errors/importErrors.mli46
-rw-r--r--lib/expression/ast.ml31
-rw-r--r--lib/expression/compose.ml150
-rw-r--r--lib/expression/compose.mli59
-rwxr-xr-xlib/expression/dune9
-rw-r--r--lib/expression/filters.ml193
-rw-r--r--lib/expression/filters.mli9
-rw-r--r--lib/expression/headers.ml89
-rw-r--r--lib/expression/headers.mli7
-rw-r--r--lib/expression/lazier.ml71
-rw-r--r--lib/expression/query.ml335
-rw-r--r--lib/expression/query.mli27
-rw-r--r--lib/expression/repr.ml127
-rw-r--r--lib/expression/repr.mli6
-rw-r--r--lib/expression/sym.ml71
-rw-r--r--lib/expression/t.ml153
-rw-r--r--lib/expression/t.mli54
-rw-r--r--lib/expression/type_of.ml150
-rw-r--r--lib/expression/type_of.mli10
-rw-r--r--lib/file_handler/csv2sql.ml135
-rw-r--r--lib/file_handler/csv2sql.mli10
-rwxr-xr-xlib/file_handler/dune21
-rw-r--r--lib/file_handler/state.ml178
-rw-r--r--lib/file_handler/state.mli46
-rw-r--r--lib/file_handler/xlsx2sql.ml205
-rw-r--r--lib/file_handler/xlsx2sql.mli10
-rw-r--r--lib/helpers/console.ml16
-rw-r--r--lib/helpers/console.mli5
-rwxr-xr-xlib/helpers/dune8
-rwxr-xr-xlib/helpers/helpers.ml45
-rw-r--r--lib/helpers/toml.ml31
-rw-r--r--lib/helpers/toml.mli1
-rw-r--r--lib/sql/date.ml24
-rw-r--r--lib/sql/db.ml383
-rw-r--r--lib/sql/db.mli106
-rw-r--r--lib/sql/dune15
-rw-r--r--lib/sql/hashs.ml79
-rw-r--r--lib/sql/header.ml74
-rw-r--r--lib/sql/join.ml30
-rw-r--r--lib/sql/match.ml22
-rw-r--r--lib/sql/math.ml20
-rw-r--r--lib/sql/t.ml52
-rw-r--r--lib/sql/trim.ml9
-rw-r--r--lib/sql/year.ml19
-rw-r--r--lib/tools/dune10
-rwxr-xr-xlib/tools/git_head.sh11
75 files changed, 5290 insertions, 0 deletions
diff --git a/lib/analysers/dependency.ml b/lib/analysers/dependency.ml
new file mode 100644
index 0000000..e81cc49
--- /dev/null
+++ b/lib/analysers/dependency.ml
@@ -0,0 +1,256 @@
+open StdLabels
+module IntSet = ImportContainers.IntSet
+module Syntax = ImportConf.Syntax
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+module Expression = ImportExpression.T
+
+(*
+ Internally, the dependency chain is represented as a graph.
+
+ Each file to process (csv or xlsx) is marked as a node, The link between
+ each node cannot be represented in the graph because each index can imply
+ multiple tables together (for exemple [join(source.A, other.B)] ). So the
+ graph is only used in order to get the import process order, and each
+ information is keeped in a separate table.
+
+*)
+
+type deps = (ImportContainers.Source.t * ImportContainers.Source.t list) list
+
+type key = {
+ name : string;
+ expression : Path.column Expression.t;
+ columns : ImportContainers.IntSet.t Lazy.t;
+}
+
+type t = {
+ table : Table.t;
+ columns : IntSet.t;
+ keys : key list;
+}
+
+let table t = t.table
+let columns t = t.columns
+let keys t = t.keys
+
+type build_map = t ImportContainers.Externals.t
+
+(* The expression can be qualified with full path (when we are in the column
+ definition) or only with a column (in a binding with an externanl table).
+
+ This type is here to extract the correct information, according to the type
+ we are dealing with :
+
+ - [to_mapping] allow to pick the right Set in which we need to add the
+ column pointed by the path (this can be [keys] or [columns]
+ - [of_path] is need to extract the qualified source from any kind of path.
+*)
+type 'a expression_extractor = {
+ to_mapping : t -> Path.column -> t;
+ of_path : 'a -> string option * Path.column;
+}
+
+(** [add_path_in_map f parent path ] Extract the column from element [path] and
+ process the column in the function [f]
+
+ The [path] is abstract, but the function [f.of_path] can extract the needed
+ elements in order to add it in the mapping.
+
+ The function may raise [Unknown_source] if the the path describe an unknown
+ table. *)
+let add_path_in_map :
+ f:'a expression_extractor -> conf:Syntax.t -> 'a -> build_map -> build_map =
+ fun ~f ~conf path map ->
+ let table_source, column = f.of_path path in
+ let table =
+ try ImportConf.get_table_for_name conf table_source with
+ | Not_found -> raise (ImportErrors.Unknown_source (Option.get table_source))
+ in
+
+ ImportContainers.Externals.update map
+ ~key:(ImportContainers.KeyName.from_table table) ~f:(fun v ->
+ let mapping =
+ match v with
+ | None -> raise (ImportErrors.Unknown_source table.name)
+ | Some mapping -> mapping
+ in
+
+ Some (f.to_mapping mapping column))
+
+let add_expression_in_map :
+ f:'a expression_extractor ->
+ conf:Syntax.t ->
+ 'a Expression.t ->
+ build_map ->
+ build_map =
+ fun ~f ~conf expr map ->
+ Expression.fold_values expr ~init:map ~f:(fun map p ->
+ add_path_in_map ~f ~conf p map)
+
+let add_columns_in_map :
+ f:'a expression_extractor ->
+ conf:Syntax.t ->
+ 'a Expression.t list ->
+ build_map ->
+ build_map =
+ fun ~f ~conf columns map ->
+ let columns =
+ List.fold_left columns ~init:map ~f:(fun map expression ->
+ let new_map = add_expression_in_map ~f ~conf expression map in
+ new_map)
+ in
+ columns
+
+(* [add_dependancies ~conf source map path]
+ add the dependancy from the table [source] to another one after analysing the
+ expression and extracting the path contained inside.
+
+ This function is called for each path declared inside the expression. *)
+let add_dependancies : conf:Syntax.t -> Syntax.extern -> deps -> Path.t -> deps
+ =
+ fun ~conf extern graph path ->
+ let source_table = ImportConf.get_table_for_name conf path.Path.alias in
+
+ let source = ImportContainers.Source.from_table source_table in
+ let target = ImportContainers.Source.from_table extern.target in
+
+ match ImportContainers.Source.equal target source with
+ | true -> graph
+ | _ -> (target, [ source ]) :: graph
+
+let add_external_in_map :
+ conf:Syntax.t -> Syntax.extern -> build_map * deps -> build_map * deps =
+ fun ~conf extern (map, graph) ->
+ let dest = ImportContainers.KeyName.from_table extern.target in
+ (* Pre-check that every source is already declared in the configuration. *)
+ let _ =
+ Expression.fold_values extern.intern_key ~init:() ~f:(fun () path ->
+ try
+ let _ = ImportConf.get_table_for_name conf path.Path.alias in
+ ()
+ with
+ | Not_found -> (
+ match path.alias with
+ | Some table -> raise (ImportErrors.Unknown_source table)
+ | None ->
+ (* This is very unlikely. This error would be raised if we have
+ no source for this import *)
+ let root = conf.source in
+ raise (ImportErrors.Unknown_source root.name)))
+ in
+
+ (* Create the new key with all the expression and all the columns inside it *)
+ let new_key =
+ {
+ name = extern.target.Table.name;
+ expression = extern.extern_key;
+ columns =
+ lazy
+ (Expression.fold_values extern.extern_key
+ ~f:(fun acc k -> ImportContainers.IntSet.add k acc)
+ ~init:ImportContainers.IntSet.empty);
+ }
+ in
+ let build_map =
+ ImportContainers.Externals.update map ~key:dest ~f:(function
+ | None ->
+ (* Create the entry for the external if it does not exists *)
+ Some
+ {
+ table = extern.target;
+ columns = IntSet.empty;
+ keys = [ new_key ];
+ }
+ | Some mapping ->
+ (* Or update the existing one with the key we just created *)
+ Some { mapping with keys = new_key :: mapping.keys })
+ in
+ let graph =
+ Expression.fold_values extern.intern_key ~init:graph
+ ~f:(add_dependancies ~conf extern)
+ in
+ let build_map =
+ add_expression_in_map extern.intern_key build_map ~conf
+ ~f:
+ {
+ of_path =
+ (fun Path.{ alias; column } ->
+ let table = ImportConf.get_table_for_name conf alias in
+ (Some table.name, column));
+ to_mapping =
+ (fun mapping column ->
+ { mapping with columns = IntSet.add column mapping.columns });
+ }
+ in
+ (build_map, graph)
+
+let mapper =
+ {
+ to_mapping =
+ (fun mapping column ->
+ { mapping with columns = IntSet.add column mapping.columns });
+ of_path = (fun ({ alias; column } : Path.t) -> (alias, column));
+ }
+
+let get_mapping : Syntax.t -> build_map * deps =
+ fun conf ->
+ let root = ImportContainers.Source.from_table (ImportConf.root_table conf)
+ and root' =
+ ImportContainers.KeyName.from_table (ImportConf.root_table conf)
+ in
+ let graph = [ (root, []) ] in
+
+ (* For each external declared in the configuration file, add the columns to
+ query *)
+ let init =
+ ( ImportContainers.Externals.singleton root'
+ {
+ table = ImportConf.root_table conf;
+ columns = IntSet.empty;
+ keys = [];
+ },
+ graph )
+ in
+ let map, graph =
+ List.fold_left conf.externals ~init ~f:(fun map extern ->
+ add_external_in_map ~conf extern map)
+ in
+
+ (* Now we don’t bother anymore with the graph and it’s dependency, we just
+ collect all the columns in the differents expressions *)
+ let map =
+ map
+ |> add_columns_in_map ~conf ~f:mapper conf.columns
+ |> add_columns_in_map ~conf ~f:mapper conf.sort
+ |> add_columns_in_map ~conf ~f:mapper conf.filters
+ |> add_columns_in_map ~conf ~f:mapper conf.uniq
+ in
+ (map, graph)
+
+let get_process_order : Syntax.t -> t list =
+ fun map ->
+ let map, graph = get_mapping map in
+
+ match Tsort.sort graph with
+ | Tsort.ErrorCycle l ->
+ let name_of_key k =
+ ImportContainers.(Externals.find (Source.name k) map).table.name
+ in
+ raise (ImportErrors.Cycle (List.map ~f:name_of_key l))
+ | Sorted elements ->
+ (* It’s OK, we know there is no cycles now, we can extract the files to
+ load from this list.
+ *)
+ List.filter_map elements ~f:(fun v ->
+ ImportContainers.Externals.find_opt
+ (ImportContainers.Source.name v)
+ map)
+ (* This list can still have duplicated values, and we have to remove them
+ still keeping the order.
+ *)
+ |> List.fold_left ~init:[] ~f:(fun acc element ->
+ (* Prevent the same file to beeing loaded twice *)
+ match List.mem element ~set:acc with
+ | true -> acc
+ | false -> element :: acc)
diff --git a/lib/analysers/dependency.mli b/lib/analysers/dependency.mli
new file mode 100644
index 0000000..c89522a
--- /dev/null
+++ b/lib/analysers/dependency.mli
@@ -0,0 +1,40 @@
+type t
+
+val get_process_order : ImportConf.Syntax.t -> t list
+(** Extract the file list to process, following the identified dependancies.
+ Try to load first the document which does not required another spreadsheet,
+ and keep going in the topological order
+
+ Raise [Unknown_source file] if a source is referenced but is not declared.
+ The order matter : the exception will be raised in a source is referenced
+ before the declaration (when chaining differents externals) *)
+
+val table : t -> ImportDataTypes.Table.t
+(** Get the table to proceed. *)
+
+val columns : t -> ImportContainers.IntSet.t
+(** A set of columns loaded in this table. Thoses columns may not need
+ reported in the final export.
+
+ Each column is identified by is index in the Excel document.
+
+ This set does not include the columns used in the keys. They can be
+ fetched with the [keys] function *)
+
+type key = {
+ name : string; (** This is the name of the target table we are pointed to *)
+ expression : ImportDataTypes.Path.column ImportExpression.T.t;
+ (** The expression used as key *)
+ columns : ImportContainers.IntSet.t Lazy.t;
+ (** The list of columns used in the key. All the columns are referenced
+ in the expression. We can have many columns used inside a single
+ key when a function is used (for example for joining multiple
+ columns into a single key) *)
+}
+(** This type describe the join key in a table. The name is the refering table
+ using this key (the key name in the datable is key_"name" ), and the
+ expression describe how to build the key. *)
+
+val keys : t -> key list
+(** [keys] is the list of columns pointed by another one. They are
+ considered as join key between the diffrent tables. *)
diff --git a/lib/analysers/dune b/lib/analysers/dune
new file mode 100755
index 0000000..1bbc30f
--- /dev/null
+++ b/lib/analysers/dune
@@ -0,0 +1,12 @@
+(library
+ (name importAnalyser)
+ (libraries
+ importConf
+ importContainers
+ importCSV
+ importDataTypes
+ importExpression
+ importErrors
+ tsort
+ )
+)
diff --git a/lib/analysers/headers.ml b/lib/analysers/headers.ml
new file mode 100644
index 0000000..916dfee
--- /dev/null
+++ b/lib/analysers/headers.ml
@@ -0,0 +1,55 @@
+open StdLabels
+module I = ImportConf
+module E = ImportExpression.T
+module Syntax = ImportConf.Syntax
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+
+module SheeetMap = Map.Make (struct
+ type t = Table.t
+
+ (** We are sure we can’t have the same name for two different table. *)
+ let compare v1 v2 = String.compare (Table.name v1) (Table.name v2)
+end)
+
+type content = string array
+
+type t = content SheeetMap.t
+(** The map associate a line of headers for each table.
+
+ The header are always in string. *)
+
+(** Get the headers. The function has to be called after reading each document,
+ and will reformat the first line with the values from the cell. The
+ functions will not be evaluated (instead they will be displayed "as is".
+
+ When there is no value for this path, return empty string.
+ *)
+let columns : Syntax.t -> t -> string list =
+ fun conf t ->
+ (* We build here a custom printer which search in the array for the column
+ name.
+
+ This function will be given as argument in the expression printer. *)
+ let f : Path.t -> Buffer.t -> unit =
+ fun path b ->
+ let source = I.get_table_for_name conf path.alias in
+
+ match SheeetMap.find_opt source t with
+ | None -> ()
+ | Some arr -> (
+ try Buffer.add_string b (Array.get arr (path.column - 1)) with
+ | _ ->
+ prerr_endline
+ @@ Printf.sprintf "No header found for :%s.%s"
+ (Option.value ~default:(I.root_table conf).Table.name
+ path.alias)
+ (ImportCSV.Csv.column_to_string path.column))
+ in
+
+ List.map conf.Syntax.columns ~f:(fun column ->
+ let b = Buffer.create 4 in
+
+ ImportExpression.Headers.headers_of_expression b f column;
+
+ Buffer.contents b)
diff --git a/lib/analysers/headers.mli b/lib/analysers/headers.mli
new file mode 100644
index 0000000..03e384b
--- /dev/null
+++ b/lib/analysers/headers.mli
@@ -0,0 +1,11 @@
+module SheeetMap : Map.S with type key = ImportDataTypes.Table.t
+
+type t = string array SheeetMap.t
+
+val columns : ImportConf.Syntax.t -> t -> string list
+(** Get the headers. The function has to be called after reading each document,
+ and will reformat the first line with the values from the cell. The
+ functions will not be evaluated (instead they will be displayed "as is".
+
+ When there is no value for this path, return empty string.
+ *)
diff --git a/lib/analysers/query.ml b/lib/analysers/query.ml
new file mode 100644
index 0000000..7a6dd2a
--- /dev/null
+++ b/lib/analysers/query.ml
@@ -0,0 +1,445 @@
+open StdLabels
+module Expression = ImportExpression
+module Q = Expression.Query
+module Syntax = ImportConf.Syntax
+module Table = ImportConf.Table
+module Path = ImportConf.Path
+
+let truncate buffer n = Buffer.truncate buffer (Buffer.length buffer - n)
+
+(** The module allow to create fragment in the query which keep together the
+ binderd parameters and the text of the query.contents.
+
+ This is used a lot in order to create the CTE, where you need the create
+ fragment used both in the main request and partially in the CTE itself.
+
+ The content is mutable and all the functions are returning [unit]. *)
+module Chunk = struct
+ type t = {
+ b : Buffer.t;
+ parameters : ImportCSV.DataType.t Queue.t;
+ }
+
+ let create : unit -> t =
+ fun () -> { b = Buffer.create 16; parameters = Queue.create () }
+
+ let create' : Buffer.t -> ImportCSV.DataType.t Queue.t -> t =
+ fun b parameters -> { b; parameters }
+
+ (* Append the element from [tail] at the end of [head]
+
+ Tail is destroyed during the operation.
+ *)
+ let append : head:t -> tail:t -> unit =
+ fun ~head ~tail ->
+ match Buffer.length tail.b with
+ | 0 -> ()
+ | _ ->
+ Buffer.add_buffer head.b tail.b;
+ Queue.transfer tail.parameters head.parameters;
+ ()
+
+ (** Add a litteral string in the sequence *)
+ let add_string : t -> string -> unit = fun t v -> Buffer.add_string t.b v
+
+ let copy : t -> t =
+ fun t ->
+ let b = Buffer.create 16 and parameters = Queue.copy t.parameters in
+ Buffer.add_buffer b t.b;
+ { b; parameters }
+
+ let add_parameters : t -> ImportCSV.DataType.t Seq.t -> unit =
+ fun t p -> Queue.add_seq t.parameters p
+end
+
+let prepare_key : f:(Format.formatter -> unit) -> Format.formatter -> unit =
+ fun ~f formatter -> Format.fprintf formatter "rtrim(upper(%t))" f
+
+(* Collect all the tables pointed by the expression. *)
+let pointed_tables : Syntax.t -> 'a Expression.T.t -> (Table.t * string) list =
+ fun conf expression ->
+ Expression.T.fold_values expression ~init:[] ~f:(fun acc path ->
+ let table = ImportConf.get_table_for_name conf path.Path.alias in
+ let table_name = Table.name table in
+ (table, table_name) :: acc)
+ |> List.sort_uniq ~cmp:Stdlib.compare
+
+(** Represent a column in a safe way in a query *)
+let print_column : Table.t -> string -> string =
+ fun table column ->
+ String.concat ~sep:"" [ "'"; table.Table.name; "'.'"; column; "'" ]
+
+let create_table : Dependency.t -> string =
+ fun mapping ->
+ let b = Buffer.create 64 in
+
+ Buffer.add_string b "CREATE TABLE '";
+ Buffer.add_string b (Table.name (Dependency.table mapping));
+ Buffer.add_string b "' (id INTEGER PRIMARY KEY";
+
+ List.iter (Dependency.keys mapping) ~f:(fun { Dependency.name; _ } ->
+ Buffer.add_string b ",'key_";
+ Buffer.add_string b name;
+ Buffer.add_string b "'");
+
+ ImportContainers.IntSet.iter (Dependency.columns mapping) ~f:(fun i ->
+ Buffer.add_string b ",'col_";
+ Buffer.add_string b (string_of_int i);
+ Buffer.add_string b "'");
+ Buffer.add_string b ")";
+
+ Buffer.contents b
+
+let show_path : conf:Syntax.t -> Format.formatter -> Path.t -> unit =
+ fun ~conf buffer { alias; column } ->
+ let table = ImportConf.get_table_for_name conf alias in
+ let table_name = table.Table.name in
+ Format.fprintf buffer "'%s'.col_%d" table_name column
+
+(** Extract the informations from the dependancies. We get two informations here :
+
+ - the join query in order to load the data from the external column
+ - the column corresponding to the key in order to identify the missing
+ links later.
+ *)
+let query_of_external :
+ conf:Syntax.t -> join_buffer:Chunk.t -> Syntax.extern -> unit =
+ fun ~conf ~join_buffer external_ ->
+ let extern_table = Table.name external_.target in
+
+ let formatter = Format.formatter_of_buffer join_buffer.b in
+ Format.fprintf formatter "\nLEFT JOIN '%s' AS '%s' ON %t = %s" extern_table
+ external_.target.name
+ (prepare_key ~f:(fun f ->
+ let q =
+ Q.query_of_expression Q.BindParam f (show_path ~conf)
+ external_.intern_key
+ in
+
+ Chunk.add_parameters join_buffer (Queue.to_seq q)))
+ (print_column external_.Syntax.target
+ ("key_" ^ external_.Syntax.target.name));
+
+ Format.pp_print_flush formatter ()
+
+(** Create the from part of the query, adding all the reuired externals.
+
+ SQLite is able to optimize the query and do not load the table not used in
+ the select clause. *)
+let create_from_chunck : Syntax.t -> Chunk.t -> unit =
+ fun conf c ->
+ Chunk.add_string c "\nFROM '";
+ Chunk.add_string c (Table.name conf.source);
+ Chunk.add_string c "' AS '";
+ Chunk.add_string c conf.source.name;
+ Chunk.add_string c "'";
+
+ (* Add the externals in the query *)
+ List.iter conf.externals ~f:(query_of_external ~conf ~join_buffer:c)
+
+(** Build a CTE query in order to use any group function inside the query.
+ Return the binded parameters used in the expression. The buffer given in
+ argument is also modified during the construction.
+
+ If filters is not None, the clauses are added to the CTE. *)
+let build_cte :
+ Syntax.t ->
+ expression:'a Expression.T.t ->
+ filters:Chunk.t option ->
+ Chunk.t =
+ fun conf ~expression ~filters ->
+ (* The binded parameters queue will be used later in the full query *)
+ let cte_chunk = Chunk.create () in
+
+ Chunk.add_string cte_chunk "WITH cte AS (SELECT ";
+ Chunk.add_string cte_chunk conf.source.name;
+ Chunk.add_string cte_chunk ".id, ";
+
+ let formatter = Format.formatter_of_buffer cte_chunk.b in
+
+ let p =
+ Q.query_of_expression Q.BindParam formatter (show_path ~conf) expression
+ in
+ Format.pp_print_flush formatter ();
+ Chunk.add_parameters cte_chunk (Queue.to_seq p);
+ (* The name is hardcoded here, and used in [Expression.Filters.window] *)
+ Chunk.add_string cte_chunk " AS group0";
+
+ let () = create_from_chunck conf cte_chunk in
+ let () =
+ match filters with
+ | None -> ()
+ | Some filters_chunk ->
+ Chunk.append ~head:cte_chunk ~tail:(Chunk.copy filters_chunk)
+ in
+
+ Chunk.add_string cte_chunk ")\n";
+ cte_chunk
+
+type filter_evaluation = {
+ content : Buffer.t;
+ parameters : ImportCSV.DataType.t Seq.t;
+ cte : (string * Chunk.t) option;
+}
+(** Build the filters to apply in the query. We make the difference here
+ between the predicates to apply directly in the query, and the filters
+ associated with a group, which are required to be transformed into a CTE
+in SQL, and are evaluated before. *)
+
+(** Evaluate the filters on the query *)
+let eval_filters : Syntax.t -> filter_evaluation =
+ fun conf ->
+ match conf.filters with
+ | [] ->
+ let empty_buffer = Buffer.create 0 in
+ { content = empty_buffer; parameters = Seq.empty; cte = None }
+ | filters -> (
+ (* Create a new queue in order to accumulate all the parameters to bind.
+ This filter will be given to both the CTE if any, or reused in the
+ main query when there is no CTE.
+ *)
+ let chunk_filters = Chunk.create () in
+ Chunk.add_string chunk_filters "\nWHERE ";
+
+ let group = Chunk.create () in
+
+ let with_cte, with_exr =
+ List.fold_left filters ~init:(None, false)
+ ~f:(fun (with_cte, with_exr) column ->
+ (* The function will return an option in second position which is
+ None when no Group function where found, and Some Expression
+ otherwise *)
+ let b = Buffer.create 16 in
+
+ let formatter = Format.formatter_of_buffer b in
+ let queue, group_found =
+ Expression.Filters.query_of_expression Q.BindParam formatter
+ (show_path ~conf) column
+ in
+ Format.pp_print_flush formatter ();
+ let clause = Chunk.create' b queue in
+
+ match (group_found, with_cte) with
+ | None, _ ->
+ Chunk.append ~head:chunk_filters ~tail:clause;
+ Chunk.add_string chunk_filters "\nAND ";
+ (with_cte, true)
+ | (Some _ as group'), None ->
+ (* We have a group here, we do not add it into the
+ filter_buffer right now.
+
+ This can occur only once, the second one will raise
+ an error. *)
+ Chunk.append ~head:group ~tail:clause;
+ (group', with_exr)
+ | Some _, Some _ -> raise ImportErrors.MisplacedWindow)
+ in
+
+ match with_cte with
+ | None ->
+ let content = chunk_filters.b in
+ truncate content 5;
+ {
+ (* There is no group clause in the query *)
+ content;
+ parameters = Queue.to_seq chunk_filters.parameters;
+ cte = None;
+ }
+ | Some expression ->
+ let filters =
+ if with_exr then (
+ (* If we have additionnals filters from the group clause, we
+ have to report them in the CTE instead of the main query. *)
+ let c' = Chunk.copy chunk_filters in
+ truncate c'.b 5;
+ Some c')
+ else None
+ in
+
+ (* Create the common expression table *)
+ let cte_parameters = build_cte conf ~expression ~filters in
+ Chunk.append ~head:chunk_filters ~tail:group;
+
+ {
+ content = chunk_filters.b;
+ parameters = Queue.to_seq chunk_filters.parameters;
+ (* The name is hardcoded here, and used in
+ [Expression.Filters.window] *)
+ cte = Some ("cte", cte_parameters);
+ })
+
+type query = {
+ q : string;
+ parameters : ImportCSV.DataType.t Seq.t;
+}
+
+(** Build the query and return also the mapping in order to identify each
+ external links between files.
+
+ The select query will name each column with an alias, and the map allow to
+ find which source is pointed by this alias. *)
+let select : Syntax.t -> query * Path.t ImportExpression.T.t array =
+ fun conf ->
+ (* If the filters contains a group expression, we need to transform this into
+ a CTE, which have to be evaluated before the main query. That’s why we are
+ evaluating the filters right now.*)
+ let filters = eval_filters conf in
+ let b = Buffer.create 256 in
+ let parameters = Queue.create () in
+
+ Option.iter
+ (fun (_, (cte : Chunk.t)) ->
+ Buffer.add_buffer b cte.b;
+ Queue.add_seq parameters (Queue.to_seq cte.parameters))
+ filters.cte;
+
+ (* For each column in the configuration file, add the corresponding element
+ in the query.
+
+ The Sqlite driver return the elements in an array, we create an array to
+ in order to manage the elements together.
+ *)
+ let headers = Array.make (List.length conf.columns) (Obj.magic None) in
+
+ let columns = List.to_seq conf.columns |> Seq.mapi (fun i c -> (i, c)) in
+ let formatter = Format.formatter_of_buffer b in
+ let () =
+ Format.fprintf formatter "SELECT %a"
+ (Format.pp_print_seq
+ ~pp_sep:(fun f () -> Format.fprintf f ",\n")
+ (fun formatter (i, column) ->
+ Array.set headers i column;
+ let p =
+ Q.query_of_expression Q.BindParam formatter (show_path ~conf)
+ column
+ in
+ Queue.transfer p parameters;
+ Format.fprintf formatter " AS result_%d" i))
+ columns
+ in
+ Format.pp_print_flush formatter ();
+
+ let () = create_from_chunck conf (Chunk.create' b parameters) in
+
+ (* If the query has a CTE, link it as well. We use an INNER JOIN here because
+ we want to be sure to get all the rows fetched by the CTE
+ *)
+ let () =
+ match filters.cte with
+ | None -> ()
+ | Some (name, _) ->
+ Buffer.add_string b "\nINNER JOIN '";
+ Buffer.add_string b name;
+ Buffer.add_string b "' ON ";
+ Buffer.add_string b name;
+ Buffer.add_string b ".id = ";
+ Buffer.add_string b conf.source.name;
+ Buffer.add_string b ".id"
+ in
+
+ Buffer.add_buffer b filters.content;
+ Queue.add_seq parameters filters.parameters;
+
+ let formatter = Format.formatter_of_buffer b in
+ (match conf.Syntax.uniq with
+ | [] -> ()
+ | uniq ->
+ Format.fprintf formatter "\nGROUP BY %a"
+ (Format.pp_print_list
+ ~pp_sep:(fun f () -> Format.fprintf f ", ")
+ (fun formatter column ->
+ let seq =
+ Q.query_of_expression Q.BindParam formatter (show_path ~conf)
+ column
+ in
+ Queue.transfer seq parameters))
+ uniq);
+ (match conf.Syntax.sort with
+ | [] -> ()
+ | sort ->
+ Format.fprintf formatter "\nORDER BY %a"
+ (Format.pp_print_list
+ ~pp_sep:(fun f () -> Format.fprintf f ", ")
+ (fun formatter column ->
+ let seq =
+ Q.query_of_expression Q.BindParam formatter (show_path ~conf)
+ column
+ in
+ Queue.transfer seq parameters))
+ sort);
+ Format.pp_print_flush formatter ();
+
+ ({ q = Buffer.contents b; parameters = Queue.to_seq parameters }, headers)
+
+let check_external : Syntax.t -> Syntax.extern -> query =
+ fun conf external_ ->
+ let extern_table = Table.name external_.target in
+
+ let parameters = Queue.create () in
+ let internal_key_buffer = Buffer.create 16 in
+ let formatter = Format.formatter_of_buffer internal_key_buffer in
+ let internal_key_seq =
+ Q.query_of_expression Q.BindParam formatter (show_path ~conf)
+ external_.intern_key
+ in
+ Format.pp_print_flush formatter ();
+
+ let external_key_buffer = Buffer.create 16 in
+ let pointed_tables = pointed_tables conf external_.intern_key in
+ Buffer.add_string external_key_buffer
+ (print_column external_.Syntax.target
+ ("key_" ^ external_.Syntax.target.name));
+
+ (* We do a copy before the transfert because the Queue is reused later in the
+ query *)
+ Queue.transfer (Queue.copy internal_key_seq) parameters;
+
+ let join_content = Buffer.contents external_key_buffer in
+ let inner_content = Buffer.contents internal_key_buffer in
+ let b = Buffer.create 256 in
+ let formatter = Format.formatter_of_buffer b in
+ let () =
+ Format.fprintf formatter
+ "SELECT %a%s FROM%a LEFT JOIN '%s' AS '%s' ON %t = %s WHERE %s IS NULL \
+ AND %s IS NOT NULL AND %s <> ''"
+ (fun formatter -> function
+ | [ (table, _name) ] ->
+ Format.fprintf formatter "%s, " (print_column table "id")
+ | _ -> Format.fprintf formatter "-1, ")
+ pointed_tables (* *)
+ inner_content (* *)
+ (Format.pp_print_list
+ ~pp_sep:(fun f () -> Format.pp_print_text f ", ")
+ (fun formatter (table, name) ->
+ Format.fprintf formatter "\n'%s' AS '%s'" name table.Table.name))
+ pointed_tables (* *)
+ extern_table (* *)
+ external_.target.name
+ (prepare_key ~f:(fun b ->
+ Format.pp_print_text b (Buffer.contents internal_key_buffer)))
+ join_content (* *)
+ join_content (* *)
+ inner_content (* *)
+ inner_content
+ in
+
+ Format.pp_print_flush formatter ();
+
+ { q = Buffer.contents b; parameters = Queue.to_seq parameters }
+
+let build_key_insert : Buffer.t -> Dependency.key -> unit =
+ fun buffer { Dependency.expression; _ } ->
+ let show_column : Format.formatter -> Path.column -> unit =
+ fun formatter column -> Format.fprintf formatter ":col_%d" column
+ in
+
+ let formatter = Format.formatter_of_buffer buffer in
+
+ let () =
+ prepare_key formatter ~f:(fun formatter ->
+ Q.query_of_expression Q.NoParam formatter show_column expression)
+ in
+
+ Format.pp_print_flush formatter ();
+
+ ()
diff --git a/lib/analysers/query.mli b/lib/analysers/query.mli
new file mode 100644
index 0000000..14d2807
--- /dev/null
+++ b/lib/analysers/query.mli
@@ -0,0 +1,27 @@
+module Syntax = ImportConf.Syntax
+
+val create_table : Dependency.t -> string
+
+type query = {
+ q : string; (** The query to execute *)
+ parameters : ImportCSV.DataType.t Seq.t;
+}
+(** This type represent a query to execute.
+ [q] is the template to run, and shall be run with all the binded parameters.
+ *)
+
+val select :
+ Syntax.t -> query * ImportDataTypes.Path.t ImportExpression.T.t array
+
+val check_external : Syntax.t -> Syntax.extern -> query
+(** Create a query which select all the missing key in an external *)
+
+val build_key_insert : Buffer.t -> Dependency.key -> unit
+(* Build the fragment insert query.
+
+ This use prepare statement and apply the function at the insert time, in
+ order to have the key already in the expected format in the database.
+
+ The column are name :col_XX where XX is the index of the column in the
+ datasheet (starting from 1)
+*)
diff --git a/lib/configuration/dune b/lib/configuration/dune
new file mode 100755
index 0000000..27d31a6
--- /dev/null
+++ b/lib/configuration/dune
@@ -0,0 +1,29 @@
+(library
+ (name importConf)
+ (libraries
+ decoders
+ otoml
+ menhirLib
+ importCSV
+ yojson
+ re
+ helpers
+ importDataTypes
+ importExpression
+ importErrors
+ )
+
+(preprocess (pps ppx_yojson_conv ppx_deriving.ord))
+)
+
+(rule
+ (targets expression_parser_messages.ml)
+ (deps expression_parser.messages expression_parser.mly)
+ (action (with-stdout-to %{targets} (run menhir --compile-errors %{deps}))))
+
+(menhir
+ (modules expression_parser)
+ (flags --table)
+)
+
+(ocamllex expression_lexer)
diff --git a/lib/configuration/expression_lexer.mll b/lib/configuration/expression_lexer.mll
new file mode 100644
index 0000000..cbfc8dc
--- /dev/null
+++ b/lib/configuration/expression_lexer.mll
@@ -0,0 +1,91 @@
+{
+ open Expression_parser
+ module Expression = ImportExpression.T
+
+ exception UnclosedQuote of { content: string ; line : int}
+}
+
+
+let spaces = [ ' ' '\t' ]
+let letters = [^ '"' '\'' '(' ')' '[' ']' ':' '.' ',' '^' ' ' '\t' '\n' '\r' ]
+let digit = [ '0'-'9' ]
+let eol = [ '\r' '\n' ]
+
+let escaped = [ '\'' '\\']
+
+rule token = parse
+| eol { Lexing.new_line lexbuf; token lexbuf }
+| spaces { token lexbuf }
+| '\'' {
+ try read_quoted_string (Buffer.create 17) lexbuf
+ with Failure _ ->
+ let line = lexbuf.Lexing.lex_curr_p.pos_lnum
+ and content = Bytes.to_string lexbuf.Lexing.lex_buffer in
+ raise (UnclosedQuote {line; content})
+}
+| '"' { read_dquoted_string (Buffer.create 17) lexbuf }
+| '#' { skip_comment lexbuf }
+| '(' { L_PAREN }
+| ')' { R_PAREN }
+| '[' { L_BRACKET }
+| ']' { R_BRACKET }
+| ':' { COLUMN }
+| '.' { DOT }
+| ',' { COMA }
+| '^' { CONCAT_OPERATOR }
+| '+' { BINARY_OPERATOR (Expression.Add) }
+| '-' { BINARY_OPERATOR (Expression.Minus) }
+| '/' { BINARY_OPERATOR (Expression.Division) }
+| "and" { BOOL_OPERATOR (Expression.And) }
+| "or" { BOOL_OPERATOR (Expression.Or) }
+| '<' { INEQUALITY_OPERATOR (Expression.LT) }
+| '>' { INEQUALITY_OPERATOR (Expression.GT) }
+| "<>" { EQUALITY_OPERATOR (Expression.Different) }
+| '=' { EQUALITY_OPERATOR (Expression.Equal) }
+| digit+ as l { INTEGER l}
+| '-' digit+ as l { INTEGER l}
+| letters+ as l { IDENT l}
+| eof { EOF }
+
+and skip_comment = parse
+ | [^ '\r' '\n' ]
+ { skip_comment lexbuf }
+ | eol
+ { token lexbuf }
+
+(* Read the content until we got another one quote *)
+and read_quoted_string buf = parse
+ | [^ '\'' '\\' ]+
+ { Buffer.add_string buf (Lexing.lexeme lexbuf);
+ read_quoted_string buf lexbuf
+ }
+ | "\\\'"
+ { Buffer.add_char buf '\'';
+ read_quoted_string buf lexbuf
+ }
+ | '\\'
+ { Buffer.add_char buf '\\';
+ read_quoted_string buf lexbuf
+ }
+ | '\''
+ { LITERAL (Buffer.contents buf)
+ }
+
+(* Read the content until we got another one quote *)
+and read_dquoted_string buf = parse
+ | [^ '"' '\\' ]+
+ { Buffer.add_string buf (Lexing.lexeme lexbuf);
+ read_dquoted_string buf lexbuf
+ }
+ | "\\\""
+ { Buffer.add_char buf '"';
+ read_dquoted_string buf lexbuf
+ }
+ | '\\'
+ { Buffer.add_char buf '\\';
+ read_dquoted_string buf lexbuf
+ }
+ | '"'
+ {
+ LITERAL (Buffer.contents buf)
+ }
diff --git a/lib/configuration/expression_parser.messages b/lib/configuration/expression_parser.messages
new file mode 100644
index 0000000..ff7e757
--- /dev/null
+++ b/lib/configuration/expression_parser.messages
@@ -0,0 +1,123 @@
+column_expr: R_PAREN
+##
+
+Invalid expression
+
+path_expr: IDENT R_PAREN
+column_expr: IDENT R_PAREN
+column_expr: IDENT L_PAREN IDENT R_PAREN
+path_expr: IDENT L_PAREN IDENT R_PAREN
+##
+
+Misplaced function. Did you forgot to quote the text ?
+
+column_expr: IDENT L_PAREN EOF
+path_expr: IDENT L_PAREN EOF
+##
+
+Uncomplete expression
+
+column_expr: COLUMN R_PAREN
+path_expr: COLUMN R_PAREN
+##
+
+The path is missing.
+
+column_expr: LITERAL CONCAT_OPERATOR LITERAL L_PAREN
+path_expr: LITERAL CONCAT_OPERATOR LITERAL L_PAREN
+path_expr: LITERAL L_PAREN
+column_expr: LITERAL CONCAT_OPERATOR LITERAL BINARY_OPERATOR LITERAL L_PAREN
+path_expr: LITERAL CONCAT_OPERATOR LITERAL BINARY_OPERATOR LITERAL L_PAREN
+column_expr: LITERAL CONCAT_OPERATOR LITERAL CONCAT_OPERATOR LITERAL L_PAREN
+path_expr: LITERAL CONCAT_OPERATOR LITERAL CONCAT_OPERATOR LITERAL L_PAREN
+column_expr: IDENT L_PAREN L_PAREN LITERAL L_PAREN
+path_expr: IDENT L_PAREN LITERAL L_PAREN
+##
+
+A text is given where it was expected a function.
+
+column_expr: LITERAL CONCAT_OPERATOR IDENT L_PAREN L_BRACKET R_BRACKET COMA R_PAREN
+column_expr: IDENT L_PAREN L_BRACKET R_BRACKET COMA R_PAREN
+path_expr: IDENT L_PAREN L_BRACKET R_BRACKET COMA R_PAREN
+path_expr: LITERAL BINARY_OPERATOR IDENT L_PAREN L_BRACKET R_BRACKET COMA R_PAREN
+##
+## Ends in an error in state: 61.
+##
+## separated_nonempty_list(COMA,expr_(path_,COMA)) -> expr_(path_,COMA) COMA . separated_nonempty_list(COMA,expr_(path_,COMA)) [ R_PAREN ]
+##
+## The known suffix of the stack is as follows:
+## expr_(path_,COMA) COMA
+##
+
+Uncomplete expression
+
+column_expr: IDENT L_PAREN LITERAL COMA R_PAREN
+path_expr: IDENT L_PAREN LITERAL COMA R_PAREN
+
+Misplaced coma
+
+column_expr: IDENT L_PAREN LITERAL EOF
+column_expr: LITERAL CONCAT_OPERATOR IDENT L_PAREN EOF
+path_expr: LITERAL CONCAT_OPERATOR IDENT L_PAREN EOF
+path_expr: IDENT L_PAREN L_PAREN LITERAL EOF
+##
+
+Uncomplete expression. Did you forgot a ')' ?
+
+
+column_expr: LITERAL R_PAREN
+path_expr: LITERAL BINARY_OPERATOR LITERAL R_PAREN
+##
+
+Invalid expression
+
+path_expr: COLUMN IDENT L_PAREN
+##
+# Also apply to :
+# path_expr: COLUMN IDENT COLUMN
+
+Misplaced path
+
+path_expr: COLUMN IDENT DOT R_PAREN
+##
+
+Incomplete path: the table is missing
+
+column_expr: INTEGER BINARY_OPERATOR INTEGER R_PAREN
+##
+
+Unbalanced parens. Did you wanted to write ')' instead of '(' ?
+
+
+path_expr: IDENT L_PAREN L_BRACKET LITERAL R_PAREN
+
+Unbalanced brackets. Did you wanted to write ']' instead of ')' ?
+
+column_expr: IDENT L_PAREN LITERAL CONCAT_OPERATOR R_PAREN
+path_expr: IDENT L_PAREN LITERAL CONCAT_OPERATOR R_PAREN
+column_expr: LITERAL CONCAT_OPERATOR R_PAREN
+path_expr: LITERAL CONCAT_OPERATOR R_PAREN
+column_expr: IDENT L_PAREN LITERAL BINARY_OPERATOR R_PAREN
+path_expr: IDENT L_PAREN LITERAL BINARY_OPERATOR R_PAREN
+column_expr: LITERAL BINARY_OPERATOR R_PAREN
+path_expr: LITERAL BINARY_OPERATOR R_PAREN
+column_expr: INTEGER EQUALITY_OPERATOR R_PAREN
+path_expr: INTEGER EQUALITY_OPERATOR R_PAREN
+column_expr: INTEGER INEQUALITY_OPERATOR R_PAREN
+path_expr: INTEGER INEQUALITY_OPERATOR R_PAREN
+column_expr: INTEGER EQUALITY_OPERATOR INTEGER INEQUALITY_OPERATOR R_PAREN
+path_expr: INTEGER EQUALITY_OPERATOR INTEGER INEQUALITY_OPERATOR R_PAREN
+column_expr: INTEGER EQUALITY_OPERATOR INTEGER EQUALITY_OPERATOR R_PAREN
+path_expr: INTEGER EQUALITY_OPERATOR INTEGER EQUALITY_OPERATOR R_PAREN
+
+The operator expect two arguments. Only one is given
+
+column_expr: IDENT L_PAREN L_BRACKET R_PAREN
+path_expr: IDENT L_PAREN L_BRACKET R_PAREN
+column_expr: IDENT L_PAREN L_BRACKET LITERAL R_PAREN
+column_expr: LITERAL CONCAT_OPERATOR IDENT L_PAREN L_BRACKET R_BRACKET COMA L_BRACKET R_BRACKET R_BRACKET
+path_expr: INTEGER CONCAT_OPERATOR IDENT L_PAREN L_BRACKET R_BRACKET COMA L_BRACKET R_BRACKET R_BRACKET
+path_expr: IDENT L_PAREN L_BRACKET R_BRACKET COMA L_BRACKET R_BRACKET R_BRACKET
+
+Mix between brackets and parens.
+
diff --git a/lib/configuration/expression_parser.mly b/lib/configuration/expression_parser.mly
new file mode 100644
index 0000000..1304c4d
--- /dev/null
+++ b/lib/configuration/expression_parser.mly
@@ -0,0 +1,185 @@
+%token <string>IDENT
+%token L_PAREN
+%token R_PAREN
+%token L_BRACKET R_BRACKET
+%token COLUMN
+%token DOT
+%token <string>LITERAL
+%token <string>INTEGER
+%token COMA
+%token EOF
+%token CONCAT_OPERATOR
+
+%token <ImportExpression.T.binary_operator>BINARY_OPERATOR
+%token <ImportExpression.T.binary_operator>INEQUALITY_OPERATOR
+%token <ImportExpression.T.binary_operator>EQUALITY_OPERATOR
+%token <ImportExpression.T.binary_operator>BOOL_OPERATOR
+
+%start <ImportDataTypes.Path.t ImportExpression.T.t> path_expr
+%start <ImportDataTypes.Path.column ImportExpression.T.t> column_expr
+
+%right BOOL_OPERATOR
+%right INEQUALITY_OPERATOR EQUALITY_OPERATOR
+%right CONCAT_OPERATOR BINARY_OPERATOR
+
+%{
+
+ let function_of_name param f =
+ match (String.lowercase_ascii f, param) with
+ | "nvl", _ ->
+ ImportExpression.T.Nvl param
+ | "join", (ImportExpression.T.Literal sep:: tl) ->
+ ImportExpression.T.Join (sep, tl)
+ | "join", (ImportExpression.T.Empty:: tl) ->
+ ImportExpression.T.Join ("", tl)
+ | "upper", _ ->
+ ImportExpression.T.Function' (ImportExpression.T.Upper, param)
+ | "trim", _ ->
+ ImportExpression.T.Function' (ImportExpression.T.Trim, param)
+ | other, _ ->
+ ImportExpression.T.Function (other, param)
+
+%}
+
+%%
+
+path_expr:
+ | expr_(path_, EOF) EOF { $1 }
+ | EOF { ImportExpression.T.Empty }
+column_expr:
+ | expr_(column_, EOF) EOF { $1 }
+ | EOF { ImportExpression.T.Empty }
+
+
+path_:
+ | COLUMN
+ column = IDENT
+ { ImportExpression.T.Path
+ Syntax.Path.{ alias = None
+ ; column = ImportCSV.Csv.column_of_string column
+ }
+ }
+
+ | COLUMN
+ table = IDENT
+ DOT
+ column = IDENT
+ { ImportExpression.T.Path
+ Syntax.Path.{ alias = Some table
+ ; column = ImportCSV.Csv.column_of_string column}
+ }
+
+column_:
+ | COLUMN
+ column = IDENT
+ { try ImportExpression.T.Path (ImportCSV.Csv.column_of_string column)
+ with _ -> ImportExpression.T.Literal column }
+
+arguments(PATH):
+ | L_PAREN
+ expr = separated_list(COMA, expr_(PATH, COMA))
+ R_PAREN
+ { expr }
+
+group(PATH):
+ | L_BRACKET
+ expr = separated_list(COMA, expr_(PATH, COMA))
+ R_BRACKET
+ { expr }
+
+fixed(PATH):
+ | d = INTEGER
+ { ImportExpression.T.Integer d }
+ | l = LITERAL
+ {
+ if String.equal String.empty l then
+ ImportExpression.T.Empty
+ else
+ ImportExpression.T.Literal l
+ }
+
+%inline boperator:
+ | e = BINARY_OPERATOR { e }
+ | e = INEQUALITY_OPERATOR { e }
+ | e = EQUALITY_OPERATOR { e }
+ | e = BOOL_OPERATOR { e }
+
+(* The expression evaluation receveive in parameters :
+ 1. the way to buidl a path, as we have two distinct ways to build them in
+ the case of externals (the external_key does not allow a table name)
+ 2. a phantom type telling wich kind of element will end the expression.
+ This can be EOF for the root expression, or COMA when inside a function.
+ This prevent merlin to optimize thoses two path, and allow more precise
+ error messages. *)
+expr_(PATH, ENDING_PHANTOM):
+ | L_PAREN
+ e = expr_(PATH, R_PAREN)
+ R_PAREN
+ { ImportExpression.T.Expr e
+ }
+ |
+ p1 = expr_(PATH, ENDING_PHANTOM)
+ CONCAT_OPERATOR
+ p2 = expr_(PATH, COMA)
+ { match p2 with
+ | ImportExpression.T.Concat args -> ImportExpression.T.Concat (p1::args)
+ | _ -> ImportExpression.T.Concat (p1::p2::[])
+ }
+ | p1 = expr_(PATH, ENDING_PHANTOM)
+
+ op = boperator
+ p2 = expr_(PATH, COMA)
+ { ImportExpression.T.BOperator (op, p1, p2) }
+
+ | p1 = expr_(PATH, ENDING_PHANTOM)
+ op = EQUALITY_OPERATOR
+ p2 = group(PATH)
+ { ImportExpression.T.GEquality(op, p1, p2) }
+
+
+
+ | p = PATH
+ { p }
+ | f = fixed(PATH)
+ { f }
+ | s = IDENT
+ args = arguments(PATH)
+ { function_of_name args s }
+ |
+ s = IDENT
+ L_PAREN
+ opt_arg = opt_arg(PATH, COMA)?
+ args1 = group(PATH)
+ COMA
+ args2 = group(PATH)
+ R_PAREN
+ { let window_name = ImportExpression.T.window_of_name s opt_arg in
+ ImportExpression.T.Window (window_name, args1, args2) }
+(*
+ | (* This case is here to describe a window function which has 2 arguments
+ level.
+ I’m not completely satisfied with it, as it prevent the ability to
+ create a exprpression block with parens arround. *)
+ s = IDENT
+ L_PAREN
+ opt_arg = opt_arg(PATH, COMA)?
+ args1 = arguments(PATH)
+ COMA
+ args2 = arguments(PATH)
+ R_PAREN
+ { let window_name = ImportExpression.T.window_of_name s opt_arg in
+ let expr = ImportExpression.T.Window (window_name, args1, args2) in
+
+ let expr_repr = ImportExpression.Repr.repr ~top:true (fun _ -> "")
+ expr in
+ Printf.printf "Deprecated syntax in \"%s\" use [] instead of ()\n" expr_repr;
+
+
+ expr
+ }
+*)
+
+opt_arg(PATH, SEP):
+ | expr = expr_(PATH, COMA)
+ SEP
+ { expr }
diff --git a/lib/configuration/importConf.ml b/lib/configuration/importConf.ml
new file mode 100644
index 0000000..586be3c
--- /dev/null
+++ b/lib/configuration/importConf.ml
@@ -0,0 +1,90 @@
+open StdLabels
+module Syntax = Syntax
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+module T = Read_conf
+module Expression = ImportExpression.T
+
+let current_syntax = 1
+
+let t_of_yojson : Yojson.Safe.t -> Syntax.t =
+ fun json ->
+ let keys = Yojson.Safe.Util.keys json in
+ let version =
+ match List.find_opt keys ~f:(String.equal "version") with
+ | None ->
+ Printf.printf
+ "No version given. Your setup may break in the future.\n\
+ Please add « \"version\":%d » in your configuration.\n\n"
+ current_syntax;
+ `Int 1
+ | Some _ -> Yojson.Safe.Util.member "version" json
+ in
+
+ match version with
+ | `Int 1 -> Of_json.t_of_yojson json
+ | other ->
+ Printf.eprintf "Unsuported version : %s\n" (Yojson.Safe.to_string other);
+ exit 1
+
+module TomlReader = Read_conf.Make (Helpers.Toml.Decode)
+
+let t_of_toml : Otoml.t -> (Syntax.t, string) result =
+ fun toml ->
+ let version =
+ Otoml.find toml (Otoml.get_integer ~strict:false) [ "version" ]
+ in
+ match version with
+ | 1 -> TomlReader.read toml
+ | _ ->
+ Printf.eprintf "Unsuported version : %d\n" version;
+ exit 1
+
+let dummy_conf =
+ Syntax.
+ {
+ source = { file = ""; tab = 0; name = "" };
+ version = 1;
+ externals = [];
+ columns = [];
+ filters = [];
+ sort = [];
+ uniq = [];
+ }
+
+let get_table_for_name : Syntax.t -> string option -> Table.t =
+ fun conf name ->
+ match name with
+ | None -> conf.source
+ | Some name ->
+ if String.equal name conf.source.name then conf.source
+ else
+ let ext =
+ List.find conf.externals ~f:(fun (ext : Syntax.extern) ->
+ String.equal name ext.target.name)
+ in
+ ext.target
+
+let root_table : Syntax.t -> Table.t = fun conf -> conf.source
+
+let get_dependancies_for_table : Syntax.t -> Table.t -> Syntax.extern list =
+ fun conf source ->
+ let is_root = source = conf.source in
+
+ List.filter conf.externals ~f:(fun (ext : Syntax.extern) ->
+ (* Enumerate the intern_key and check the source pointed by each column *)
+ Expression.fold_values ext.intern_key ~init:false ~f:(fun acc expr ->
+ if acc then acc
+ else
+ match expr.Syntax.Path.alias with
+ | Some v -> String.equal v source.name
+ | None -> is_root))
+
+let print_path_expression t = ImportExpression.Repr.repr Path.repr t
+
+let print_extern t =
+ let toml = Syntax.toml_of_extern t in
+ Otoml.Printer.to_string toml
+
+let expression_from_string s =
+ Read_conf.ExpressionParser.of_string Read_conf.ExpressionParser.path s
diff --git a/lib/configuration/importConf.mli b/lib/configuration/importConf.mli
new file mode 100644
index 0000000..3a8ae75
--- /dev/null
+++ b/lib/configuration/importConf.mli
@@ -0,0 +1,23 @@
+module Syntax = Syntax
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+
+val dummy_conf : Syntax.t
+
+val root_table : Syntax.t -> Table.t
+(** Get the root table, this table is the main table to load and each line in
+ this table will be processed *)
+
+val t_of_yojson : Yojson.Safe.t -> Syntax.t
+val t_of_toml : Otoml.t -> (Syntax.t, string) result
+val get_table_for_name : Syntax.t -> string option -> Table.t
+
+val get_dependancies_for_table : Syntax.t -> Table.t -> Syntax.extern list
+(** Get all the externals refered by the source *)
+
+val print_path_expression : Path.t ImportExpression.T.t -> string
+
+val expression_from_string :
+ string -> (Path.t ImportExpression.T.t, string) result
+
+val print_extern : Syntax.extern -> string
diff --git a/lib/configuration/of_json.ml b/lib/configuration/of_json.ml
new file mode 100644
index 0000000..f9171b9
--- /dev/null
+++ b/lib/configuration/of_json.ml
@@ -0,0 +1,134 @@
+open StdLabels
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+module Expression = ImportExpression.T
+
+open Ppx_yojson_conv_lib.Yojson_conv.Primitives
+
+let current_syntax = 1
+
+let rec expression_of_yojson :
+ (Yojson.Safe.t -> 'a) -> Yojson.Safe.t -> 'a Expression.t =
+ fun f expr ->
+ match expr with
+ | `Null -> Empty
+ | `List l -> Concat (List.map ~f:(expression_of_yojson f) l)
+ | `String s as json -> (
+ try Path (f json) with
+ | _ -> Literal s)
+ | `Assoc [ (fn, `List [ `List l1; `List l2 ]) ]
+ when String.equal "counter" (String.lowercase_ascii fn) ->
+ Window
+ ( Expression.Counter,
+ List.map ~f:(expression_of_yojson f) l1,
+ List.map ~f:(expression_of_yojson f) l2 )
+ | `Assoc [ (fn, `List [ expr1; `List l2; `List l3 ]) ]
+ when String.equal "previous" (String.lowercase_ascii fn) ->
+ Window
+ ( Expression.Previous (expression_of_yojson f expr1),
+ List.map ~f:(expression_of_yojson f) l2,
+ List.map ~f:(expression_of_yojson f) l3 )
+ | `Assoc [ (fn, `List l) ] when String.equal "nvl" (String.lowercase_ascii fn)
+ -> Nvl (List.map ~f:(expression_of_yojson f) l)
+ | `Assoc [ (fn, `List l) ] ->
+ Function
+ (String.lowercase_ascii fn, List.map ~f:(expression_of_yojson f) l)
+ | json -> (
+ try Path (f json) with
+ | _ ->
+ let str_json = Yojson.Safe.pretty_to_string json in
+ raise
+ (ImportErrors.JsonError { json = str_json; element = "Expression" })
+ )
+
+type 'a expression = 'a Expression.t
+type column = Path.column
+
+let column_of_yojson : Yojson.Safe.t -> int = function
+ | `Int i -> i
+ | `String s -> ImportCSV.Csv.column_of_string s
+ | _ -> raise (Invalid_argument "column")
+
+let yojson_of_column i = `String (ImportCSV.Csv.column_to_string i)
+
+type path = Syntax.Path.t = {
+ alias : string option; [@default None] [@yojson_drop_default ( = )]
+ (* External file to load, when the information is missing, load in
+ the current file *)
+ column : column;
+}
+[@@deriving of_yojson]
+
+let path_of_yojson : Yojson.Safe.t -> path = function
+ | `String s ->
+ Scanf.sscanf s ":%s@.%s" (fun table column ->
+ if String.equal column "" then
+ { alias = None; column = ImportCSV.Csv.column_of_string table }
+ else
+ {
+ alias = Some table;
+ column = ImportCSV.Csv.column_of_string column;
+ })
+ | other -> path_of_yojson other
+
+let yojson_of_path : path -> Yojson.Safe.t =
+ fun { alias; column } ->
+ let table =
+ match alias with
+ | None -> ""
+ | Some table -> String.cat table "."
+ in
+
+ `String
+ (String.concat ~sep:""
+ [ ":"; table; ImportCSV.Csv.column_to_string column ])
+
+type table = Table.t = {
+ file : string;
+ tab : int; [@default 1] [@yojson_drop_default ( = )]
+ name : string;
+}
+[@@deriving of_yojson]
+
+type extern = {
+ source : string option; [@default None] [@yojson_drop_default ( = )]
+ intern_key : column expression;
+ target : table;
+ extern_key : column expression;
+ allow_missing : bool; [@default false] [@yojson_drop_default ( = )]
+ match_rule : string option; [@default None] [@yojson_drop_default ( = )]
+}
+[@@deriving of_yojson]
+
+type syntax_v1_extern = Syntax.extern
+
+let syntax_v1_extern_of_yojson yojson =
+ let e = extern_of_yojson yojson in
+ let intern_key : path Expression.t =
+ Expression.map e.intern_key ~f:(fun column ->
+ Syntax.Path.{ column; alias = e.source })
+ in
+ Syntax.
+ {
+ extern_key = e.extern_key;
+ intern_key;
+ target = e.target;
+ allow_missing = e.allow_missing;
+ match_rule = e.match_rule;
+ }
+
+type predicate = unit
+
+let predicate_of_yojson _ = ()
+let yojson_of_predicate () = `Null
+
+type t = Syntax.t = {
+ version : int; [@default current_syntax]
+ source : table;
+ externals : syntax_v1_extern list; [@default []]
+ columns : path expression list;
+ filters : path expression list; [@default []] [@yojson_drop_default ( = )]
+ sort : path expression list; [@default []] [@yojson_drop_default ( = )]
+ uniq : path expression list; [@default []] [@yojson_drop_default ( = )]
+}
+[@@deriving of_yojson]
diff --git a/lib/configuration/read_conf.ml b/lib/configuration/read_conf.ml
new file mode 100644
index 0000000..8d467a5
--- /dev/null
+++ b/lib/configuration/read_conf.ml
@@ -0,0 +1,216 @@
+open StdLabels
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+
+module ExpressionParser : sig
+ type 'a path_builder
+
+ val path : Path.t path_builder
+ val column : Path.column path_builder
+
+ val of_string :
+ 'a path_builder -> string -> ('a ImportExpression.T.t, string) result
+end = struct
+ module MI = Expression_parser.MenhirInterpreter
+ module E = MenhirLib.ErrorReports
+ module L = MenhirLib.LexerUtil
+
+ type error = {
+ message : string;
+ start_line : int;
+ start_pos : int;
+ end_pos : int;
+ }
+
+ let range_message start_pos end_pos message =
+ let start_c = start_pos.Lexing.pos_cnum - start_pos.Lexing.pos_bol
+ and end_c = end_pos.Lexing.pos_cnum - end_pos.Lexing.pos_bol in
+ {
+ message;
+ start_line = start_pos.Lexing.pos_bol;
+ start_pos = start_c;
+ end_pos = end_c;
+ }
+
+ (** Extract the line in error from the whole expression, and print some
+ characters just under the faulty part *)
+ let get_line_error : error -> string -> string =
+ fun error content ->
+ let sub_text =
+ try
+ let end_pos = String.index_from content error.start_line '\n' in
+
+ String.sub content ~pos:error.start_line
+ ~len:(end_pos - error.start_line)
+ with
+ | Not_found ->
+ (* There is no new line, extract the ending part *)
+ let len = String.length content - error.start_line in
+ String.sub content ~pos:error.start_line ~len
+ in
+ (* I’m not sure how to produce it, but the error may be over two lines.
+ This line is here to prevent the underline to overflow. *)
+ let stop_pos = min error.end_pos (String.length sub_text) in
+ let error_length = stop_pos - error.start_pos in
+ String.concat ~sep:""
+ [
+ sub_text;
+ "\n";
+ String.make error.start_pos ' ';
+ String.make error_length '^';
+ ]
+
+ let get_parse_error default_position env : error =
+ match MI.stack env with
+ | (lazy Nil) ->
+ range_message default_position.Lexing.lex_start_p
+ default_position.Lexing.lex_curr_p "Invalid syntax\n"
+ | (lazy (Cons (MI.Element (state, _, start_pos, end_pos), _))) ->
+ let message =
+ try Expression_parser_messages.message (MI.number state) with
+ | Not_found -> "Invalid syntax (no specific message for this eror)\n"
+ in
+
+ range_message start_pos end_pos message
+
+ type 'a path_builder =
+ Lexing.position -> 'a ImportExpression.T.t MI.checkpoint
+
+ let rec _parse lexbuf (checkpoint : 'a ImportExpression.T.t MI.checkpoint) =
+ match checkpoint with
+ | MI.InputNeeded _env ->
+ let token = Expression_lexer.token lexbuf in
+ let startp = lexbuf.lex_start_p and endp = lexbuf.lex_curr_p in
+ let checkpoint = MI.offer checkpoint (token, startp, endp) in
+ _parse lexbuf checkpoint
+ | MI.Shifting _ | MI.AboutToReduce _ ->
+ let checkpoint = MI.resume checkpoint in
+ _parse lexbuf checkpoint
+ | MI.HandlingError _env ->
+ let err = get_parse_error lexbuf _env in
+ Error err
+ | MI.Accepted v -> Ok v
+ | MI.Rejected ->
+ let err =
+ range_message lexbuf.lex_start_p lexbuf.lex_curr_p
+ "invalid syntax (parser rejected the input)"
+ in
+ Error err
+
+ let of_string :
+ 'a path_builder -> string -> ('a ImportExpression.T.t, string) result =
+ fun f str_expression ->
+ try
+ let lexbuf = Lexing.from_string str_expression in
+ let init = f lexbuf.lex_curr_p in
+ match _parse lexbuf init with
+ | Ok res -> Ok res
+ | Error e ->
+ let message =
+ String.concat ~sep:"\n"
+ [ e.message; get_line_error e str_expression ]
+ in
+ Error message
+ with
+ | Expression_lexer.UnclosedQuote { line; content } ->
+ let message =
+ Printf.sprintf "Unclosed quote at line %d : \"%s\"" line content
+ in
+ Error message
+ | e ->
+ let message = Printexc.to_string e in
+ Error message
+
+ let path = Expression_parser.Incremental.path_expr
+ let column = Expression_parser.Incremental.column_expr
+end
+
+module Make (S : Decoders.Decode.S) = struct
+ let ( let* ) = S.( let* )
+ let ( and* ) = S.( and* )
+ let ( >>= ) = S.( >>= )
+ let ( >|= ) = S.( >|= )
+
+ class loader =
+ object (self)
+ method parse_expression : type a.
+ a ExpressionParser.path_builder ->
+ S.value ->
+ (a ImportExpression.T.t, S.value Decoders.Error.t) result =
+ fun path ->
+ S.string >>= fun v ->
+ match ExpressionParser.of_string path v with
+ | Ok expr -> S.succeed expr
+ | Error e -> S.fail_with Decoders.Error.(make e)
+
+ method source =
+ let* file = S.field "file" S.string
+ and* name = S.field "name" S.string
+ and* tab = S.field_opt_or ~default:1 "tab" S.int in
+ S.succeed { Table.file; name; tab }
+
+ method external_ name =
+ let* intern_key =
+ S.field "intern_key" (self#parse_expression ExpressionParser.path)
+ and* extern_key =
+ S.field "extern_key" (self#parse_expression ExpressionParser.column)
+ and* file = S.field "file" S.string
+ and* tab = S.field_opt_or ~default:1 "tab" S.int
+ and* allow_missing =
+ S.field_opt_or ~default:false "allow_missing" S.bool
+ in
+
+ S.succeed
+ Syntax.
+ {
+ intern_key;
+ extern_key;
+ target = { name; file; tab };
+ allow_missing;
+ match_rule = None;
+ }
+
+ method sheet =
+ let* columns =
+ S.field "columns"
+ @@ S.list (self#parse_expression ExpressionParser.path)
+ and* filters =
+ S.field_opt_or ~default:[] "filters"
+ @@ S.list (self#parse_expression ExpressionParser.path)
+ and* sort =
+ S.field_opt_or ~default:[] "sort"
+ @@ S.list (self#parse_expression ExpressionParser.path)
+ and* uniq =
+ S.field_opt_or ~default:[] "uniq"
+ @@ S.list (self#parse_expression ExpressionParser.path)
+ in
+ S.succeed @@ fun version source externals ->
+ Syntax.{ version; source; externals; columns; filters; sort; uniq }
+
+ method conf =
+ let* source = S.field "source" self#source
+ and* externals =
+ S.field_opt_or ~default:[] "externals"
+ (S.key_value_pairs_seq self#external_)
+ in
+ let* sheet =
+ S.field "sheet" self#sheet >|= fun v -> v 1 source externals
+ in
+
+ S.succeed sheet
+ end
+
+ let read_file file =
+ S.decode_file (new loader)#conf file
+ |> Result.map_error (fun v ->
+ let formatter = Format.str_formatter in
+ Format.fprintf formatter "%a@." S.pp_error v;
+ Format.flush_str_formatter ())
+
+ let read toml =
+ S.decode_value (new loader)#conf toml
+ |> Result.map_error (fun v ->
+ let formatter = Format.str_formatter in
+ Format.fprintf formatter "%a@." S.pp_error v;
+ Format.flush_str_formatter ())
+end
diff --git a/lib/configuration/syntax.ml b/lib/configuration/syntax.ml
new file mode 100644
index 0000000..8efdc59
--- /dev/null
+++ b/lib/configuration/syntax.ml
@@ -0,0 +1,88 @@
+open StdLabels
+module E = ImportExpression.T
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+
+let toml_of_table Table.{ file; tab; name } =
+ let values = [ ("file", Otoml.string file); ("name", Otoml.string name) ] in
+ let values =
+ match tab with
+ | 1 -> values
+ | tab -> ("tab", Otoml.integer tab) :: values
+ in
+
+ Otoml.table values
+
+type extern = {
+ intern_key : Path.t E.t;
+ target : Table.t;
+ extern_key : Path.column E.t;
+ allow_missing : bool;
+ match_rule : string option;
+}
+
+let toml_of_extern extern =
+ let values =
+ [
+ ( "intern_key",
+ Otoml.string
+ @@ ImportExpression.Repr.repr ~top:true Path.repr extern.intern_key );
+ ( "extern_key",
+ Otoml.string
+ @@ ImportExpression.Repr.repr ~top:true
+ (fun v -> ":" ^ ImportCSV.Csv.column_to_string v)
+ extern.extern_key );
+ ("file", Otoml.string extern.target.file);
+ ("allow_missing", Otoml.boolean extern.allow_missing);
+ ]
+ in
+
+ let values =
+ match extern.target.tab with
+ | 1 -> values
+ | tab -> ("tab", Otoml.integer tab) :: values
+ in
+
+ Otoml.table values
+
+let toml_of_externs externs =
+ List.map externs ~f:(fun e -> (e.target.name, toml_of_extern e))
+ |> Otoml.table
+
+type t = {
+ version : int;
+ source : Table.t;
+ externals : extern list;
+ columns : Path.t E.t list;
+ filters : Path.t E.t list;
+ sort : Path.t E.t list;
+ uniq : Path.t E.t list;
+}
+
+let repr t =
+ let repr_expression_list l =
+ Otoml.array
+ (List.map l ~f:(fun v ->
+ Otoml.string (ImportExpression.Repr.repr ~top:true Path.repr v)))
+ in
+
+ let sheet =
+ Otoml.table
+ [
+ ("columns", repr_expression_list t.columns);
+ ("filters", repr_expression_list t.filters);
+ ("sort", repr_expression_list t.sort);
+ ("uniq", repr_expression_list t.uniq);
+ ]
+ in
+
+ let values =
+ [
+ ("version", Otoml.integer t.version);
+ ("source", toml_of_table t.source);
+ ("externals", toml_of_externs t.externals);
+ ("sheet", sheet);
+ ]
+ in
+
+ Otoml.table values
diff --git a/lib/containers/dune b/lib/containers/dune
new file mode 100755
index 0000000..46d0e24
--- /dev/null
+++ b/lib/containers/dune
@@ -0,0 +1,7 @@
+(library
+ (name importContainers)
+ (libraries
+ importDataTypes
+ importConf
+ )
+)
diff --git a/lib/containers/importContainers.ml b/lib/containers/importContainers.ml
new file mode 100644
index 0000000..bf65ba4
--- /dev/null
+++ b/lib/containers/importContainers.ml
@@ -0,0 +1,61 @@
+module Conf = ImportConf
+module Syntax = Conf.Syntax
+module Table = ImportDataTypes.Table
+
+(** This key is used to create the table of each externals in the
+ configuration.
+
+ This table allow to check if there are cycles between the references *)
+module KeyName : sig
+ type t
+
+ val compare : t -> t -> int
+ val from_table : Table.t -> t
+ val hash : t -> int
+ val equal : t -> t -> bool
+end = struct
+ type t = Table.t
+
+ let compare v1 v2 = String.compare (Table.name v1) (Table.name v2)
+ (* We use the alias given in the configuration as key, because we want to
+ be sure there is no cycle in the dependencies. It’s OK to have the same
+ file used in differents sources, but the sources cannot induce cycles *)
+
+ let from_table (s : Table.t) = s
+ let hash = Hashtbl.hash
+ let equal v1 v2 = String.equal (Table.name v1) (Table.name v2)
+end
+
+module Source : sig
+ type t
+
+ val compare : t -> t -> int
+ val from_table : Table.t -> t
+ val hash : t -> int
+ val equal : t -> t -> bool
+ val name : t -> KeyName.t
+end = struct
+ type t = Table.t
+
+ let compare v1 v2 = String.compare v1.Table.name v2.Table.name
+ (* We use the alias given in the configuration as key, because we want to
+ be sure there is no cycle in the dependencies. It’s OK to have the same
+ file used in differents sources, but the sources cannot induce cycles *)
+
+ let from_table (s : Table.t) = s
+ let hash = Hashtbl.hash
+ let equal v1 v2 = String.equal v1.Table.name v2.Table.name
+ let name t = KeyName.from_table t
+end
+
+module Externals = MoreLabels.Map.Make (KeyName)
+module IntSet = MoreLabels.Set.Make (Int)
+
+let show_intSet set =
+ let b = Buffer.create 16 in
+ IntSet.iter
+ ~f:(fun v ->
+ Buffer.add_string b (string_of_int v);
+ Buffer.add_char b ',')
+ set;
+ Buffer.contents b
diff --git a/lib/csv/csv.ml b/lib/csv/csv.ml
new file mode 100644
index 0000000..db7329d
--- /dev/null
+++ b/lib/csv/csv.ml
@@ -0,0 +1,30 @@
+open StdLabels
+
+type t = int
+
+let column_of_char = function
+ | 'A' .. 'Z' as c -> Char.code c - (Char.code 'A' - 1)
+ | 'a' .. 'z' as c -> Char.code c - (Char.code 'a' - 1)
+ | c -> raise (Invalid_argument ("column: " ^ Char.escaped c))
+
+let column_of_string : string -> int =
+ fun s ->
+ String.fold_left s ~init:0 ~f:(fun value c -> (value * 26) + column_of_char c)
+
+(** Accumulate the remaining for the successives divisions in a list. *)
+let rec _to_char ~b i =
+ if i > 0 then
+ let res = i mod 26 in
+ let res = if res = 0 then 26 else res in
+
+ let c = char_of_int @@ (res + 64) in
+ (* The modulo is accumulated in the list head, which is the expected
+ sequence *)
+ let b = c :: b in
+
+ _to_char ~b @@ ((i - res) / 26)
+ else b
+
+let column_to_string i =
+ let res = _to_char ~b:[] i in
+ List.to_seq res |> String.of_seq
diff --git a/lib/csv/dataType.ml b/lib/csv/dataType.ml
new file mode 100644
index 0000000..c582b9c
--- /dev/null
+++ b/lib/csv/dataType.ml
@@ -0,0 +1,21 @@
+let match_date = Re.Str.regexp {|[0-9]+/[0-9]+/[0-9]+|}
+
+type t =
+ | Null
+ | Error of string
+ | Content of string
+ | Integer of int
+ | Float of float
+
+let to_string = function
+ | Null -> ""
+ | Error s -> s
+ | Integer i -> string_of_int i
+ | Float f -> string_of_float f
+ | Content c -> (
+ match String.starts_with ~prefix:"0" c with
+ | false -> c
+ | true ->
+ (* If the string is a date, do not escape it *)
+ if Re.Str.string_match match_date c 0 then c
+ else String.concat "" [ "=\""; c; "\"" ])
diff --git a/lib/csv/dataType.mli b/lib/csv/dataType.mli
new file mode 100644
index 0000000..ebb8bc7
--- /dev/null
+++ b/lib/csv/dataType.mli
@@ -0,0 +1,8 @@
+type t =
+ | Null
+ | Error of string
+ | Content of string
+ | Integer of int
+ | Float of float
+
+val to_string : t -> string
diff --git a/lib/csv/dune b/lib/csv/dune
new file mode 100755
index 0000000..b0f4a72
--- /dev/null
+++ b/lib/csv/dune
@@ -0,0 +1,6 @@
+(library
+ (name importCSV)
+ (libraries
+ re
+ )
+)
diff --git a/lib/data_types/dune b/lib/data_types/dune
new file mode 100644
index 0000000..e38310b
--- /dev/null
+++ b/lib/data_types/dune
@@ -0,0 +1,10 @@
+(library
+ (name importDataTypes)
+ (libraries
+ importCSV
+ )
+
+ (preprocess (pps ppx_deriving.ord))
+ )
+
+
diff --git a/lib/data_types/path.ml b/lib/data_types/path.ml
new file mode 100644
index 0000000..6684b5a
--- /dev/null
+++ b/lib/data_types/path.ml
@@ -0,0 +1,15 @@
+type column = int [@@deriving ord]
+
+type t = {
+ alias : string option;
+ (* External file to load, when the information is missing, load in
+ the current file *)
+ column : column;
+}
+[@@deriving ord]
+
+let repr { alias; column } =
+ let column_text = ImportCSV.Csv.column_to_string column in
+ match alias with
+ | None -> ":" ^ column_text
+ | Some value -> ":" ^ value ^ "." ^ column_text
diff --git a/lib/data_types/readme.rst b/lib/data_types/readme.rst
new file mode 100644
index 0000000..ac609d2
--- /dev/null
+++ b/lib/data_types/readme.rst
@@ -0,0 +1,4 @@
+This module contains all the types used in the application.
+
+It does not depends on any other library, and does not cause any dependency
+cycle.
diff --git a/lib/data_types/table.ml b/lib/data_types/table.ml
new file mode 100644
index 0000000..d807c5c
--- /dev/null
+++ b/lib/data_types/table.ml
@@ -0,0 +1,19 @@
+open StdLabels
+
+type t = {
+ file : string;
+ tab : int;
+ name : string;
+}
+
+(** Get the internal name for the given table.
+
+ This value may differ from the association name given in the configuration. *)
+let name : t -> string =
+ fun source ->
+ let file_name =
+ source.file |> Filename.basename |> Filename.remove_extension
+ in
+ match source.tab with
+ | 1 -> file_name
+ | _ -> String.concat ~sep:"_" [ file_name; string_of_int source.tab ]
diff --git a/lib/data_types/types.ml b/lib/data_types/types.ml
new file mode 100644
index 0000000..37fd90f
--- /dev/null
+++ b/lib/data_types/types.ml
@@ -0,0 +1,15 @@
+type t =
+ | Number
+ | String
+ | Bool
+ | None
+ | Extern
+ | Float
+
+let string_of_t : t -> string = function
+ | Number -> "Number"
+ | String -> "String"
+ | Bool -> "Bool"
+ | None -> "None"
+ | Extern -> "Extern"
+ | Float -> "Float"
diff --git a/lib/errors/dune b/lib/errors/dune
new file mode 100644
index 0000000..ab71219
--- /dev/null
+++ b/lib/errors/dune
@@ -0,0 +1,9 @@
+(library
+ (name importErrors)
+ (libraries
+ csv
+ sqlite3
+ importCSV
+ importDataTypes
+ )
+)
diff --git a/lib/errors/importErrors.ml b/lib/errors/importErrors.ml
new file mode 100644
index 0000000..04f9deb
--- /dev/null
+++ b/lib/errors/importErrors.ml
@@ -0,0 +1,98 @@
+open StdLabels
+module CSV = ImportCSV
+module Table = ImportDataTypes.Table
+
+let bom = "\xEF\xBB\xBF"
+
+type xlsError = {
+ source : Table.t;
+ row : int;
+ sheet : int;
+ target : Table.t option;
+ value : CSV.DataType.t;
+ exn : exn;
+}
+
+exception
+ JsonError of {
+ json : string;
+ element : string;
+ }
+
+exception InvalidEncoding of string
+exception NullKey of int
+exception SqlError of Sqlite3.Rc.t
+exception MisplacedWindow
+exception Unknown_source of string
+exception Unknown_extension of string
+
+exception Cycle of string list
+(** Cycle between the dpendencies *)
+
+exception
+ TypeError of {
+ expected : ImportDataTypes.Types.t;
+ actual : ImportDataTypes.Types.t;
+ expression : string;
+ subset : string;
+ }
+
+let repr_error = function
+ | SqlError s -> Printf.sprintf "%s Error" (Sqlite3.Rc.to_string s)
+ | JsonError { json; element } -> Printf.sprintf "%s : %s" element json
+ | NullKey k ->
+ Printf.sprintf "The key %s is null" (ImportCSV.Csv.column_to_string k)
+ | Unknown_source source ->
+ Printf.sprintf "The source %s is referenced without beiing declared"
+ source
+ | MisplacedWindow -> "A group function cannot appear here"
+ | TypeError { expected; actual; expression; subset } ->
+ Printf.sprintf
+ "In this expression %s has type %s but %s was expected:\n%s" subset
+ (ImportDataTypes.Types.string_of_t actual)
+ (ImportDataTypes.Types.string_of_t expected)
+ expression
+ | Unknown_extension ext -> Printf.sprintf "Unknown file extension %s" ext
+ | Cycle deps ->
+ Printf.sprintf "Cycle between the dependencies : %s"
+ (String.concat ~sep:"," deps)
+ | Failure e -> e
+ | e ->
+ Printexc.print_backtrace Stdlib.stdout;
+ Printexc.to_string e
+
+type t = Csv.out_channel Lazy.t
+
+let output_error : t -> xlsError -> unit =
+ fun csv error ->
+ let target =
+ match error.target with
+ | None -> ""
+ | Some value -> value.name
+ in
+
+ Csv.output_record (Lazy.force csv)
+ [
+ Table.name error.source;
+ string_of_int error.sheet;
+ string_of_int error.row;
+ target;
+ CSV.DataType.to_string error.value;
+ repr_error error.exn;
+ ]
+
+let log : with_bom:bool -> string -> string -> t =
+ fun ~with_bom prefix dirname ->
+ lazy
+ (let file = Filename.concat dirname (String.cat prefix "_errors.csv") in
+
+ prerr_endline @@ String.cat "Errors found. See the file " file;
+
+ (* Open the error file *)
+ let channel = Out_channel.open_bin file in
+ if with_bom then output_string channel bom;
+ let log_error = Csv.to_channel ~separator:';' ~excel_tricks:true channel in
+ (* Generate a header for the error file *)
+ Csv.output_record log_error
+ [ "Source"; "Tab"; "Row"; "Target"; "Value"; "Error" ];
+ log_error)
diff --git a/lib/errors/importErrors.mli b/lib/errors/importErrors.mli
new file mode 100644
index 0000000..7d17a5d
--- /dev/null
+++ b/lib/errors/importErrors.mli
@@ -0,0 +1,46 @@
+module CSV = ImportCSV
+
+exception SqlError of Sqlite3.Rc.t
+exception MisplacedWindow
+
+exception Unknown_source of string
+(** Exception raised when a source used in a path is not declared as an
+ external *)
+
+exception Cycle of string list
+(** Cycle between the dpendencies *)
+
+exception Unknown_extension of string
+
+exception
+ JsonError of {
+ json : string;
+ element : string;
+ }
+
+type xlsError = {
+ source : ImportDataTypes.Table.t;
+ row : int;
+ sheet : int;
+ target : ImportDataTypes.Table.t option;
+ value : CSV.DataType.t;
+ exn : exn;
+}
+
+exception
+ TypeError of {
+ expected : ImportDataTypes.Types.t;
+ actual : ImportDataTypes.Types.t;
+ expression : string;
+ subset : string;
+ }
+
+exception InvalidEncoding of string
+exception NullKey of int
+
+val repr_error : exn -> string
+
+type t = Csv.out_channel Lazy.t
+
+val output_error : t -> xlsError -> unit
+val log : with_bom:bool -> string -> string -> t
diff --git a/lib/expression/ast.ml b/lib/expression/ast.ml
new file mode 100644
index 0000000..ef083e9
--- /dev/null
+++ b/lib/expression/ast.ml
@@ -0,0 +1,31 @@
+(** This module rebuilds an AST from an evaluation *)
+
+type 'a repr = 'a T.t
+type 'a obs = 'a T.t
+type 'a path_repr = unit
+
+let observe : 'a repr -> 'a obs = Fun.id
+let empty : unit -> 'a repr = fun () -> T.Empty
+let expr : 'a repr -> 'a repr = fun t -> T.Expr t
+let literal : string -> 'a repr = fun s -> T.Literal s
+let integer : string -> 'a repr = fun i -> T.Integer i
+let path : 'a path_repr -> 'a -> 'a repr = fun _repr p -> T.Path p
+let concat : 'a repr list -> 'a repr = fun ll -> T.Concat ll
+
+let window : 'a repr T.window -> 'a repr list -> 'a repr list -> 'a repr =
+ fun w groups order -> T.Window (w, groups, order)
+
+let nvl : 'a repr list -> 'a repr = fun ll -> T.Nvl ll
+let join : string -> 'a repr list -> 'a repr = fun s ll -> T.Join (s, ll)
+
+let boperator : T.binary_operator -> 'a repr -> 'a repr -> 'a repr =
+ fun op e1 e2 -> T.BOperator (op, e1, e2)
+
+let gequality : T.binary_operator -> 'a repr -> 'a repr list -> 'a repr =
+ fun op e1 ll -> T.GEquality (op, e1, ll)
+
+let funct : string -> 'a repr list -> 'a repr =
+ fun name args -> T.Function (name, args)
+
+let function' : T.funct -> 'a repr list -> 'a repr =
+ fun f args -> T.Function' (f, args)
diff --git a/lib/expression/compose.ml b/lib/expression/compose.ml
new file mode 100644
index 0000000..028602b
--- /dev/null
+++ b/lib/expression/compose.ml
@@ -0,0 +1,150 @@
+open StdLabels
+
+(** Build an expression module with the result from another expression. The
+ signature of the fuctions is a bit different, as they all receive the
+ result from the previous evaluated element in argument. *)
+module Expression
+ (E : Sym.SYM_EXPR)
+ (R : sig
+ val v : 'a E.path_repr
+ end) =
+struct
+ module type SIG = sig
+ type 'a repr
+ type 'a obs
+ type 'a path_repr
+
+ val empty : 'a E.obs -> 'a repr
+ val expr : 'a E.obs * 'a repr -> 'a E.obs -> 'a repr
+ val literal : string -> 'a E.obs -> 'a repr
+ val integer : string -> 'a E.obs -> 'a repr
+ val path : 'a path_repr -> 'a -> 'a E.obs -> 'a repr
+ val concat : ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr
+
+ val window :
+ ('a E.obs * 'a repr) T.window ->
+ ('a E.obs * 'a repr) list ->
+ ('a E.obs * 'a repr) list ->
+ 'a E.obs ->
+ 'a repr
+
+ val nvl : ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr
+ val join : string -> ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr
+
+ val boperator :
+ T.binary_operator ->
+ 'a E.obs * 'a repr ->
+ 'a E.obs * 'a repr ->
+ 'a E.obs ->
+ 'a repr
+
+ val gequality :
+ T.binary_operator ->
+ 'a E.obs * 'a repr ->
+ ('a E.obs * 'a repr) list ->
+ 'a E.obs ->
+ 'a repr
+
+ val funct : string -> ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr
+ val function' : T.funct -> ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr
+ val observe : 'a E.obs * 'a repr -> 'a obs
+ end
+
+ module Make (M : SIG) = struct
+ type 'a repr = 'a E.repr * 'a M.repr
+ type 'a obs = 'a M.obs
+ type 'a path_repr = 'a M.path_repr
+
+ let map' : 'a repr list -> 'a E.repr list * ('a E.obs * 'a M.repr) list =
+ fun ll ->
+ let e = List.map ~f:fst ll in
+ (e, List.map ll ~f:(fun (e, m) -> (E.observe e, m)))
+
+ let observe : 'a repr -> 'a obs = fun (t, v) -> M.observe (E.observe t, v)
+
+ let empty : unit -> 'a repr =
+ fun () ->
+ let e = E.empty () in
+ (e, M.empty (E.observe e))
+
+ let expr : 'a repr -> 'a repr =
+ fun (e, m) ->
+ let e' = E.expr e in
+ (e', M.expr (E.observe e, m) (E.observe e'))
+
+ let literal : string -> 'a repr =
+ fun litt ->
+ let e = E.literal litt in
+ (e, M.literal litt (E.observe e))
+
+ let integer : string -> 'a repr =
+ fun i ->
+ let e' = E.integer i in
+ (e', M.integer i (E.observe e'))
+
+ let path : 'b path_repr -> 'b -> 'a repr =
+ fun path_repr path ->
+ let e = E.path R.v path in
+ let m = M.path path_repr path (E.observe e) in
+ (e, m)
+
+ let concat : 'a repr list -> 'a repr =
+ fun reprs ->
+ let e, m = map' reprs in
+ let e' = E.concat e in
+ (e', M.concat m (E.observe e'))
+
+ let window : 'a repr T.window -> 'a repr list -> 'a repr list -> 'a repr =
+ fun window expressions order ->
+ let e_expressions, m_expressions = map' expressions
+ and e_order, m_order = map' order
+ and e_window = T.map_window window ~f:fst
+ and m_window = T.map_window window ~f:(fun (e, m) -> (E.observe e, m)) in
+
+ let e = E.window e_window e_expressions e_order in
+ (e, M.window m_window m_expressions m_order (E.observe e))
+
+ let nvl : 'a repr list -> 'a repr =
+ fun reprs ->
+ let e, m = List.split reprs in
+
+ let e' = E.nvl e in
+ let e = List.map ~f:E.observe e in
+ (e', M.nvl (List.combine e m) (E.observe e'))
+
+ let join : string -> 'a repr list -> 'a repr =
+ fun sep reprs ->
+ let e_reprs, m = map' reprs in
+
+ let e = E.join sep e_reprs in
+ (e, M.join sep m (E.observe e))
+
+ let boperator : T.binary_operator -> 'a repr -> 'a repr -> 'a repr =
+ fun op (e1, m1) (e2, m2) ->
+ let e1' = E.observe e1
+ and e2' = E.observe e2
+ and e = E.boperator op e1 e2 in
+ let m' = M.boperator op (e1', m1) (e2', m2) (E.observe e) in
+ (e, m')
+
+ let gequality : T.binary_operator -> 'a repr -> 'a repr list -> 'a repr =
+ fun op (e1, m1) exprs ->
+ let e_reprs, m_reprs = map' exprs in
+ let e' = E.gequality op e1 e_reprs in
+ let m' = M.gequality op (E.observe e1, m1) m_reprs (E.observe e') in
+ (e', m')
+
+ let funct : string -> 'a repr list -> 'a repr =
+ fun sep reprs ->
+ let e_reprs, m = map' reprs in
+
+ let e = E.funct sep e_reprs in
+ (e, M.funct sep m (E.observe e))
+
+ let function' : T.funct -> 'a repr list -> 'a repr =
+ fun f reprs ->
+ let e_reprs, m = map' reprs in
+ let e = E.function' f e_reprs in
+ (e, M.function' f m (E.observe e))
+ end
+end
diff --git a/lib/expression/compose.mli b/lib/expression/compose.mli
new file mode 100644
index 0000000..4cced8c
--- /dev/null
+++ b/lib/expression/compose.mli
@@ -0,0 +1,59 @@
+(** Build an expression module with the result from another expression. The
+ signature of the fuctions is a bit different, as they all receive the
+ result from the previous evaluated element in argument. *)
+module Expression
+ (E : Sym.SYM_EXPR)
+ (_ : sig
+ val v : 'a E.path_repr
+ end) : sig
+ (** The signature for the module the adapt is a bit different for the
+ SYM_EXPR: every function takes an extra argument which is the Expression
+ we are wrapping, and every expression becomes a tuple with the same
+ expression represented in the composed type. *)
+ module type SIG = sig
+ type 'a repr
+ type 'a obs
+ type 'a path_repr
+
+ val empty : 'a E.obs -> 'a repr
+ val expr : 'a E.obs * 'a repr -> 'a E.obs -> 'a repr
+ val literal : string -> 'a E.obs -> 'a repr
+ val integer : string -> 'a E.obs -> 'a repr
+ val path : 'a path_repr -> 'a -> 'a E.obs -> 'a repr
+ val concat : ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr
+
+ val window :
+ ('a E.obs * 'a repr) T.window ->
+ ('a E.obs * 'a repr) list ->
+ ('a E.obs * 'a repr) list ->
+ 'a E.obs ->
+ 'a repr
+
+ val nvl : ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr
+ val join : string -> ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr
+
+ val boperator :
+ T.binary_operator ->
+ 'a E.obs * 'a repr ->
+ 'a E.obs * 'a repr ->
+ 'a E.obs ->
+ 'a repr
+
+ val gequality :
+ T.binary_operator ->
+ 'a E.obs * 'a repr ->
+ ('a E.obs * 'a repr) list ->
+ 'a E.obs ->
+ 'a repr
+
+ val funct : string -> ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr
+ val function' : T.funct -> ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr
+ val observe : 'a E.obs * 'a repr -> 'a obs
+ end
+
+ module Make (M : SIG) :
+ Sym.SYM_EXPR
+ with type 'a obs = 'a M.obs
+ and type 'a repr = 'a E.repr * 'a M.repr
+ and type 'a path_repr = 'a M.path_repr
+end
diff --git a/lib/expression/dune b/lib/expression/dune
new file mode 100755
index 0000000..96e386e
--- /dev/null
+++ b/lib/expression/dune
@@ -0,0 +1,9 @@
+(library
+ (name importExpression)
+ (libraries
+ re
+ importCSV
+ importDataTypes
+ importErrors
+ )
+)
diff --git a/lib/expression/filters.ml b/lib/expression/filters.ml
new file mode 100644
index 0000000..42c794b
--- /dev/null
+++ b/lib/expression/filters.ml
@@ -0,0 +1,193 @@
+(** This module evaluate the sql query to use in order to filter an expression
+
+ The result is built over [Query] except for the group function, which are
+ translated into a CTE in sql
+ *)
+
+open StdLabels
+module Q = Query
+
+type 'a result = {
+ repr : Format.formatter -> nested:Query.QueryParameter.t -> unit;
+ group : 'a T.t option;
+}
+
+module Filter = struct
+ type 'a repr = {
+ repr : 'a Q.Query.repr;
+ with_group : 'a T.t option;
+ }
+
+ type 'a obs = 'a result
+ type 'a path_repr = 'a Q.Query.path_repr
+
+ let observe : 'a Ast.obs * 'a repr -> 'a obs =
+ fun (_, v) -> { repr = Q.Query.observe v.repr; group = v.with_group }
+
+ let empty : 'a Ast.obs -> 'a repr =
+ fun _ -> { repr = Q.Query.empty (); with_group = None }
+
+ let expr : 'a Ast.obs * 'a repr -> 'a Ast.obs -> 'a repr =
+ fun (_, expr) _ ->
+ { repr = Q.Query.expr expr.repr; with_group = expr.with_group }
+
+ let path : 'a path_repr -> 'a -> 'a Ast.obs -> 'a repr =
+ fun repr p _ -> { repr = Q.Query.path repr p; with_group = None }
+
+ let literal : string -> 'a Ast.obs -> 'a repr =
+ fun l _ -> { repr = Q.Query.literal l; with_group = None }
+
+ let integer : string -> 'a Ast.obs -> 'a repr =
+ fun l _ -> { repr = Q.Query.integer l; with_group = None }
+
+ let nvl : ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr =
+ fun expression _ ->
+ let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression in
+ let with_group =
+ List.find_map ~f:(fun v -> (snd v).with_group) expression
+ in
+ match with_group with
+ | None -> { repr = Q.Query.nvl expr_repr; with_group }
+ | Some _ -> raise ImportErrors.MisplacedWindow
+
+ let concat : ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr =
+ fun expression _ ->
+ let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression in
+ let with_group =
+ List.find_map ~f:(fun v -> (snd v).with_group) expression
+ in
+ match with_group with
+ | None -> { repr = Q.Query.concat expr_repr; with_group }
+ | Some _ -> raise ImportErrors.MisplacedWindow
+
+ let join : string -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr =
+ fun sep expression _ ->
+ let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression
+ and with_group =
+ List.find_map ~f:(fun v -> (snd v).with_group) expression
+ in
+ match with_group with
+ | None -> { repr = Q.Query.join sep expr_repr; with_group }
+ | Some _ -> raise ImportErrors.MisplacedWindow
+
+ let boperator :
+ T.binary_operator ->
+ 'a Ast.obs * 'a repr ->
+ 'a Ast.obs * 'a repr ->
+ 'a Ast.obs ->
+ 'a repr =
+ fun name (_, e1) (_, e2) _ ->
+ let with_group =
+ match (e1.with_group, e2.with_group) with
+ | Some e, None -> Some e
+ | None, Some e -> Some e
+ | None, None -> None
+ | _ -> raise ImportErrors.MisplacedWindow
+ in
+ { repr = Q.Query.boperator name e1.repr e2.repr; with_group }
+
+ let gequality :
+ T.binary_operator ->
+ 'a Ast.obs * 'a repr ->
+ ('a Ast.obs * 'a repr) list ->
+ 'a Ast.obs ->
+ 'a repr =
+ fun name (_, e1) group _ ->
+ let group_repr = List.map ~f:(fun v -> (snd v).repr) group
+ and with_group = List.find_map ~f:(fun v -> (snd v).with_group) group in
+
+ match with_group with
+ | None ->
+ {
+ repr = Q.Query.gequality name e1.repr group_repr;
+ with_group = e1.with_group;
+ }
+ | _ -> raise ImportErrors.MisplacedWindow
+
+ let funct : string -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr =
+ fun name expressions _ ->
+ let expr_repr = List.map ~f:(fun v -> (snd v).repr) expressions in
+ let with_group =
+ List.find_map ~f:(fun v -> (snd v).with_group) expressions
+ in
+ match with_group with
+ | None -> { repr = Q.Query.funct name expr_repr; with_group }
+ | Some _ -> raise ImportErrors.MisplacedWindow
+
+ let function' :
+ T.funct -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr =
+ fun name expressions _ ->
+ let expr_repr = List.map ~f:(fun v -> (snd v).repr) expressions in
+ let with_group =
+ List.find_map ~f:(fun v -> (snd v).with_group) expressions
+ in
+ match with_group with
+ | None ->
+ { repr = Q.Query.funct (T.name_of_function name) expr_repr; with_group }
+ | Some _ -> raise ImportErrors.MisplacedWindow
+
+ (** Window functions are not handled in the filters, we save them as an AST
+ in order to process them in a separated handler.
+
+ It is not allowed to build nested window functions. *)
+ let window :
+ ('a Ast.obs * 'a repr) T.window ->
+ ('a Ast.obs * 'a repr) list ->
+ ('a Ast.obs * 'a repr) list ->
+ 'a Ast.obs ->
+ 'a repr =
+ fun name expressions order ast ->
+ ignore name;
+ let with_group_expr =
+ List.find_map ~f:(fun v -> (snd v).with_group) expressions
+ and with_group_order =
+ List.find_map ~f:(fun v -> (snd v).with_group) order
+ in
+ match (with_group_expr, with_group_order) with
+ | Some _, _ | _, Some _ -> raise ImportErrors.MisplacedWindow
+ | None, None ->
+ (* The column name used with the cte. The name is fixed here, and used
+ as is in [Analysers.Query.build_cte] and
+ [Analysers.Query.eval_filters] *)
+ let q = "cte.group0" in
+ {
+ with_group = Some ast;
+ repr = Q.Query.funct "expr" [ Q.Query.literal q ];
+ }
+end
+
+module ASTBuilder =
+ Compose.Expression
+ (Ast)
+ (struct
+ let v = ()
+ end)
+
+module F :
+ Sym.SYM_EXPR
+ with type 'a obs = 'a result
+ and type 'a path_repr = Format.formatter -> 'a -> unit =
+ ASTBuilder.Make (Filter)
+
+module M = Sym.M (F)
+
+let query_of_expression :
+ type b.
+ b Q.binded_query ->
+ Format.formatter ->
+ (Format.formatter -> 'a -> unit) ->
+ 'a T.t ->
+ b * 'a T.t option =
+ fun parameter formatter printer expr ->
+ let repr = M.eval ~path_repr:printer expr in
+ match parameter with
+ | BindParam ->
+ let p = Queue.create () in
+ let parameter = Q.QueryParameter.Queue p in
+ let value = F.observe repr in
+ value.repr ~nested:parameter formatter;
+ (p, value.group)
+ | NoParam ->
+ let value = F.observe repr in
+ value.repr ~nested:Literal formatter;
+ ((), value.group)
diff --git a/lib/expression/filters.mli b/lib/expression/filters.mli
new file mode 100644
index 0000000..d462b5f
--- /dev/null
+++ b/lib/expression/filters.mli
@@ -0,0 +1,9 @@
+module F : Sym.SYM_EXPR with type 'a path_repr = Format.formatter -> 'a -> unit
+(** Query used inside the filter clauses *)
+
+val query_of_expression :
+ 'b Query.binded_query ->
+ Format.formatter ->
+ (Format.formatter -> 'a -> unit) ->
+ 'a T.t ->
+ 'b * 'a T.t option
diff --git a/lib/expression/headers.ml b/lib/expression/headers.ml
new file mode 100644
index 0000000..6371e4f
--- /dev/null
+++ b/lib/expression/headers.ml
@@ -0,0 +1,89 @@
+open StdLabels
+
+let truncate buffer n = Buffer.truncate buffer (Buffer.length buffer - n)
+
+module E :
+ Sym.SYM_CHUNK
+ with type 'a obs = buffer:Buffer.t -> unit
+ and type 'a path_repr = 'a -> Buffer.t -> unit = struct
+ type 'a repr = buffer:Buffer.t -> unit
+ type 'a obs = buffer:Buffer.t -> unit
+ type 'a path_repr = 'a -> Buffer.t -> unit
+
+ let group : 'a repr list -> 'a repr =
+ fun args ~buffer ->
+ Buffer.add_string buffer "[";
+ List.iter args ~f:(fun v ->
+ v ~buffer;
+ Buffer.add_string buffer ", ");
+
+ truncate buffer 2;
+ Buffer.add_string buffer "]"
+
+ let arguments : 'a repr list -> 'a repr =
+ fun expressions ~buffer ->
+ Buffer.add_string buffer "(";
+ List.iter expressions ~f:(fun v ->
+ v ~buffer;
+ Buffer.add_string buffer ", ");
+
+ truncate buffer 2;
+ Buffer.add_string buffer ")"
+
+ let observe x ~buffer = x ~buffer
+ let empty : unit -> 'a repr = fun _ ~buffer -> Buffer.add_string buffer "''"
+ let path printer p ~buffer = printer p buffer
+ let literal l ~buffer = Buffer.add_string buffer l
+ let integer l ~buffer = Buffer.add_string buffer l
+
+ let expr expr ~buffer =
+ Buffer.add_char buffer '(';
+ expr ~buffer;
+ Buffer.add_char buffer ')'
+
+ let nvl expression ~buffer =
+ Buffer.add_string buffer "nvl";
+ arguments ~buffer expression
+
+ let concat expression ~buffer = List.iter expression ~f:(fun v -> v ~buffer)
+
+ let join sep expression ~buffer =
+ List.iter expression ~f:(fun v ->
+ v ~buffer;
+ Buffer.add_string buffer sep);
+ truncate buffer (String.length sep)
+
+ let window : 'a repr T.window -> 'a repr list -> 'a repr list -> 'a repr =
+ fun name expressions order ~buffer ->
+ ignore order;
+ let name = T.name_of_window name in
+
+ Buffer.add_string buffer name;
+ arguments ~buffer expressions
+
+ let boperator name e1 e2 ~buffer =
+ e1 ~buffer;
+ Buffer.add_string buffer (T.name_of_operator name);
+ e2 ~buffer
+
+ let gequality name e1 e2 ~buffer =
+ e1 ~buffer;
+ Buffer.add_string buffer (T.name_of_operator name);
+ group ~buffer e2
+
+ let funct name expressions ~buffer =
+ Buffer.add_string buffer name;
+ arguments ~buffer expressions
+
+ let function' name expressions ~buffer =
+ Buffer.add_string buffer (T.name_of_function name);
+ arguments ~buffer expressions
+end
+
+module M = Sym.M (E)
+
+let headers_of_expression :
+ Buffer.t -> ('a -> Buffer.t -> unit) -> 'a T.t -> unit =
+ fun buffer printer expr ->
+ let repr = M.eval expr ~path_repr:printer in
+ E.observe repr ~buffer
diff --git a/lib/expression/headers.mli b/lib/expression/headers.mli
new file mode 100644
index 0000000..1fafad0
--- /dev/null
+++ b/lib/expression/headers.mli
@@ -0,0 +1,7 @@
+val headers_of_expression :
+ Buffer.t -> ('a -> Buffer.t -> unit) -> 'a T.t -> unit
+
+module E :
+ Sym.SYM_EXPR
+ with type 'a obs = buffer:Buffer.t -> unit
+ and type 'a path_repr = 'a -> Buffer.t -> unit
diff --git a/lib/expression/lazier.ml b/lib/expression/lazier.ml
new file mode 100644
index 0000000..d8b12d9
--- /dev/null
+++ b/lib/expression/lazier.ml
@@ -0,0 +1,71 @@
+open StdLabels
+
+(** Make a module lazy *)
+module Make (S : Sym.SYM_EXPR) = struct
+ type 'a repr = 'a S.repr Lazy.t
+ type 'a obs = 'a S.obs Lazy.t
+ type 'a path_repr = 'a S.path_repr
+
+ let empty : unit -> 'a repr = fun () -> lazy (S.empty ())
+
+ let expr : 'a repr -> 'a repr =
+ fun expr -> Lazy.map (fun expr -> S.expr expr) expr
+
+ let literal : string -> 'a repr = fun l -> lazy (S.literal l)
+ let integer : string -> 'a repr = fun i -> lazy (S.integer i)
+
+ let path : 'b path_repr -> 'b -> 'a repr =
+ fun repr path -> lazy (S.path repr path)
+
+ let concat : 'a repr list -> 'a repr =
+ fun exprs ->
+ lazy
+ (let exprs' = List.map ~f:Lazy.force exprs in
+ S.concat exprs')
+
+ let window : 'a repr T.window -> 'a repr list -> 'a repr list -> 'a repr =
+ fun w group sort ->
+ lazy
+ (let w' = T.map_window ~f:Lazy.force w
+ and group' = List.map ~f:Lazy.force group
+ and sort' = List.map ~f:Lazy.force sort in
+ S.window w' group' sort')
+
+ let nvl : 'a repr list -> 'a repr =
+ fun exprs ->
+ lazy
+ (let exprs' = List.map ~f:Lazy.force exprs in
+ S.nvl exprs')
+
+ let join : string -> 'a repr list -> 'a repr =
+ fun sep exprs ->
+ lazy
+ (let exprs' = List.map ~f:Lazy.force exprs in
+ S.join sep exprs')
+
+ let boperator : T.binary_operator -> 'a repr -> 'a repr -> 'a repr =
+ fun op e1 e2 ->
+ lazy
+ (let e1' = Lazy.force e1 and e2' = Lazy.force e2 in
+ S.boperator op e1' e2')
+
+ let gequality : T.binary_operator -> 'a repr -> 'a repr list -> 'a repr =
+ fun op e exprs ->
+ lazy
+ (let e' = Lazy.force e and exprs' = List.map ~f:Lazy.force exprs in
+ S.gequality op e' exprs')
+
+ let funct : string -> 'a repr list -> 'a repr =
+ fun name exprs ->
+ lazy
+ (let exprs' = List.map ~f:Lazy.force exprs in
+ S.funct name exprs')
+
+ let function' : T.funct -> 'a repr list -> 'a repr =
+ fun f exprs ->
+ lazy
+ (let exprs' = List.map ~f:Lazy.force exprs in
+ S.function' f exprs')
+
+ let observe : 'a repr -> 'a obs = fun v -> Lazy.map S.observe v
+end
diff --git a/lib/expression/query.ml b/lib/expression/query.ml
new file mode 100644
index 0000000..5bd914a
--- /dev/null
+++ b/lib/expression/query.ml
@@ -0,0 +1,335 @@
+(**
+ This module create an sql query from an expression.
+ *)
+
+open StdLabels
+
+(** This type is used in the query builder (see [query_of_expression] just
+ below in order to tell if we need to bind the parameters in the query, or
+ if we can use plain literal as is (with some risk at the execution time. *)
+type _ binded_query =
+ | BindParam : ImportCSV.DataType.t Queue.t binded_query
+ | NoParam : unit binded_query
+
+module QueryParameter = struct
+ (** Internaly, we need to keep a different type for the Literal chunks
+ (which requires to be quoted), and raw (which should be given as is to the
+ sql engine)
+
+ The Raw can be generated from both BindParam or NoParam queries. *)
+ type t =
+ | Literal
+ | Queue of ImportCSV.DataType.t Queue.t
+ | Raw of t
+
+ (** Wrap the given parameter mode into the raw mode *)
+ let raw : t -> t = function
+ | Raw t -> Raw t
+ | Literal -> Raw Literal
+ | Queue q -> Raw (Queue q)
+
+ (** Nest the parameter in order to use it inside another function call.
+
+ The rule is to get out of the Raw mode as soon as we dive into another
+ one function. *)
+ let nest : t -> t = function
+ | Raw t -> t
+ | other -> other
+end
+
+module TypeBuilder =
+ Compose.Expression
+ (Type_of)
+ (struct
+ let v = ignore
+ end)
+
+module Query = TypeBuilder.Make (struct
+ type 'a repr = Format.formatter -> nested:QueryParameter.t -> unit
+ type 'a obs = Format.formatter -> nested:QueryParameter.t -> unit
+ type 'a path_repr = Format.formatter -> 'a -> unit
+
+ let observe : 'a Type_of.obs * 'a repr -> 'a obs =
+ fun (_, x) formatter ~nested ->
+ let () = x formatter ~nested in
+ Format.pp_print_flush formatter ()
+
+ (** Unify an external reference with a given type, using the COALESCE
+ function *)
+ let unify :
+ with_:Type_of.t ->
+ nested:QueryParameter.t ->
+ Format.formatter ->
+ 'a Type_of.obs * 'a repr ->
+ unit =
+ fun ~with_ ~nested format (type_of, expr) ->
+ match (type_of, with_) with
+ | ImportDataTypes.Types.Extern, Number
+ | ImportDataTypes.Types.Extern, Extern ->
+ Format.fprintf format "COALESCE(%a,0)"
+ (fun f expr -> expr f ~nested)
+ expr
+ | ImportDataTypes.Types.Extern, String ->
+ Format.fprintf format "COALESCE(%a,'')"
+ (fun f expr -> expr f ~nested)
+ expr
+ | _, Float ->
+ Format.fprintf format "CAST(%a AS REAL)"
+ (fun f expr -> expr f ~nested)
+ expr
+ | _, _ -> expr ~nested format
+
+ let empty : 'a Type_of.obs -> 'a repr =
+ fun type_of formatter ~nested ->
+ ignore type_of;
+ ignore nested;
+ Format.fprintf formatter "''"
+
+ let expr : 'a Type_of.obs * 'a repr -> 'a Type_of.obs -> 'a repr =
+ fun expr type_of formatter ~nested ->
+ ignore type_of;
+ Format.fprintf formatter "(";
+ (snd expr) ~nested formatter;
+ Format.fprintf formatter ")"
+
+ let literal : string -> 'a Type_of.obs -> 'a repr =
+ fun l type_of formatter ~nested ->
+ ignore type_of;
+ match nested with
+ | QueryParameter.Literal ->
+ (* If the text is a true literal, we insert it directly. This is
+ only called from the [query_of_expression] function *)
+ Format.fprintf formatter "'%s'" l
+ | QueryParameter.Queue queue ->
+ Format.fprintf formatter "?";
+ Queue.add (ImportCSV.DataType.Content l) queue
+ | QueryParameter.Raw _ -> Format.fprintf formatter "%s" l
+
+ let integer : string -> 'a Type_of.obs -> 'a repr =
+ fun l type_of formatter ~nested ->
+ ignore type_of;
+ ignore nested;
+ Format.fprintf formatter "%s" l
+
+ let path : 'b path_repr -> 'b -> 'a Type_of.obs -> 'a repr =
+ fun repr p type_of formatter ~nested ->
+ ignore nested;
+ ignore type_of;
+ repr formatter p
+
+ let concat : ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr =
+ fun expression type_of formatter ~nested ->
+ ignore type_of;
+ let nested' = QueryParameter.nest nested in
+
+ Format.pp_print_list
+ ~pp_sep:(fun f () -> Format.fprintf f " || ")
+ (unify ~with_:ImportDataTypes.Types.String ~nested:nested')
+ formatter expression
+
+ let print_expression :
+ ?sep:string ->
+ QueryParameter.t ->
+ Format.formatter ->
+ ('a Type_of.obs * 'a repr) list ->
+ unit =
+ fun ?(sep = ", ") nested formatter expression ->
+ (Format.pp_print_list
+ ~pp_sep:(fun f () -> Format.fprintf f "%s" sep)
+ (fun f v -> (snd v) f ~nested))
+ formatter expression
+
+ (** Format the partition expression. This function is used internally and
+ only form the expression inside the clause. *)
+ let group_windows :
+ QueryParameter.t ->
+ Format.formatter ->
+ ('a Type_of.obs * 'a repr) list
+ * ('a Type_of.obs * 'a repr) list
+ * string option ->
+ unit =
+ fun nested formatter (expressions, order, range) ->
+ match (expressions, order) with
+ | [], _ -> ()
+ | _, [] ->
+ Format.fprintf formatter " OVER (PARTITION BY %a%a)"
+ (print_expression nested) expressions
+ (Format.pp_print_option (fun f v -> Format.fprintf f "%s" v))
+ range
+ | _, _ ->
+ Format.fprintf formatter " OVER (PARTITION BY %a ORDER BY %a%a)"
+ (print_expression nested) expressions (print_expression nested) order
+ (Format.pp_print_option (fun f v -> Format.fprintf f "%s" v))
+ range
+
+ let window :
+ ('a Type_of.obs * 'a repr) T.window ->
+ ('a Type_of.obs * 'a repr) list ->
+ ('a Type_of.obs * 'a repr) list ->
+ 'a Type_of.obs ->
+ 'a repr =
+ fun name expressions order type_of formatter ~nested ->
+ ignore type_of;
+ let nested' = QueryParameter.nest nested in
+
+ (* By default, the range is defined like this
+
+ [RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW EXCLUDE NO OTHERS]
+
+ this only build a range until the current row, but in some cases (min,
+ last), we want to scan the whole group in order to evaluate the value to
+ keep.
+ *)
+ let range = " RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" in
+ match name with
+ | T.Min expr ->
+ Format.fprintf formatter "FIRST_VALUE(%a)%a "
+ (fun f v -> (snd v) f ~nested:nested')
+ expr (group_windows nested')
+ (expressions, order, Some range)
+ | T.Max expr ->
+ Format.fprintf formatter "LAST_VALUE(%a)%a"
+ (fun f v -> (snd v) f ~nested:nested')
+ expr (group_windows nested')
+ (expressions, order, Some range)
+ | T.Counter ->
+ (* If no order is given, return the number of elements in the
+ whole group *)
+ let operator =
+ match order with
+ | [] -> "COUNT"
+ | _ -> "ROW_NUMBER"
+ in
+ Format.fprintf formatter "%s()%a" operator (group_windows nested')
+ (expressions, order, None)
+ | T.Previous expr ->
+ Format.fprintf formatter "LAG(%a)%a"
+ (fun f v -> (snd v) f ~nested:nested')
+ expr (group_windows nested') (expressions, order, None)
+ | T.Sum expr ->
+ Format.fprintf formatter "SUM(%a)%a"
+ (fun f v -> (snd v) f ~nested:nested')
+ expr (group_windows nested') (expressions, order, None)
+
+ let nvl : ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr =
+ fun expression type_of formatter ~nested ->
+ ignore type_of;
+ let nested' = QueryParameter.nest nested in
+ Format.fprintf formatter "COALESCE(%a)" (print_expression nested')
+ expression
+
+ let join :
+ string -> ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr =
+ fun sep expression type_of formatter ~nested ->
+ ignore type_of;
+ let nested' = QueryParameter.nest nested in
+
+ (* Directly call the literal function for the first argument *)
+ Format.fprintf formatter "CONCAT(%a, %a)"
+ (fun f v -> (literal v ImportDataTypes.Types.String) f ~nested:nested')
+ sep (print_expression nested') expression
+
+ let boperator :
+ T.binary_operator ->
+ 'a Type_of.obs * 'a repr ->
+ 'a Type_of.obs * 'a repr ->
+ 'a Type_of.obs ->
+ 'a repr =
+ fun name e1 e2 type_of formatter ~nested ->
+ ignore type_of;
+ (* When dividing, we need to be sure that the type is a float,
+ otherwise SQL will truncate the result *)
+ let with_ =
+ match name with
+ | T.Division -> ImportDataTypes.Types.Float
+ | _ -> fst e2
+ in
+
+ let nested' = QueryParameter.nest nested in
+ Format.fprintf formatter "%a%s%a"
+ (unify ~with_ ~nested:nested')
+ e1
+ (* The operator *)
+ (T.name_of_operator name)
+ (unify ~with_:(fst e1) ~nested:nested')
+ e2
+
+ let gequality :
+ T.binary_operator ->
+ 'a Type_of.obs * 'a repr ->
+ ('a Type_of.obs * 'a repr) list ->
+ 'a Type_of.obs ->
+ 'a repr =
+ fun name e1 group type_of ->
+ ignore type_of;
+ let group_type = List.map ~f:fst group in
+ fun formatter ~nested ->
+ let nested' = QueryParameter.nest nested in
+ let op_name =
+ match name with
+ | T.Equal -> " IN("
+ | T.Different -> " NOT IN("
+ | _ -> ""
+ in
+
+ Format.fprintf formatter "%a%s%a)"
+ (unify ~with_:(Type_of.group' group_type) ~nested:nested')
+ e1 op_name (print_expression nested') group
+
+ let exprs expressions formatter ~nested =
+ (* Literal expression, starting from now, all the quoted string are
+ directly given to the sql engine *)
+ let nested' = QueryParameter.raw nested in
+
+ Format.fprintf formatter "(%a)"
+ (print_expression ~sep:" " nested')
+ expressions
+
+ let rec funct :
+ string -> ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr =
+ fun name expressions type_of formatter ~nested ->
+ ignore type_of;
+ let nested' = QueryParameter.nest nested in
+ match name with
+ | "expr" ->
+ (* Raw expression are parsed directly *)
+ exprs expressions formatter ~nested
+ | "if" ->
+ (* The if is renamed into IIF *)
+ funct "IIF" expressions type_of formatter ~nested
+ | _ ->
+ (* Default case *)
+ Format.fprintf formatter "%s(%a)" name (print_expression nested')
+ expressions
+
+ let function' :
+ T.funct -> ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr =
+ fun name expressions type_of formatter ~nested ->
+ ignore type_of;
+ let nested' = QueryParameter.nest nested in
+ match name with
+ | Upper | Trim ->
+ Format.fprintf formatter "%s(%a)" (T.name_of_function name)
+ (print_expression nested') expressions
+end)
+
+module M = Sym.M (Query)
+
+let query_of_expression :
+ type b.
+ b binded_query ->
+ Format.formatter ->
+ (Format.formatter -> 'a -> unit) ->
+ 'a T.t ->
+ b =
+ fun parameter formatter printer expr ->
+ let repr = M.eval ~path_repr:printer expr in
+ match parameter with
+ | BindParam ->
+ let p = Queue.create () in
+ let parameter = QueryParameter.Queue p in
+ Query.observe repr formatter ~nested:parameter;
+ p
+ | NoParam ->
+ Query.observe repr formatter ~nested:Literal;
+ ()
diff --git a/lib/expression/query.mli b/lib/expression/query.mli
new file mode 100644
index 0000000..fa789a9
--- /dev/null
+++ b/lib/expression/query.mli
@@ -0,0 +1,27 @@
+module QueryParameter : sig
+ (** Internaly, we need to keep a different type for the Literal chunks
+ (which requires to be quoted), and raw (which should be given as is to the
+ sql engine)
+
+ The Raw can be generated from both BindParam or NoParam queries. *)
+ type t =
+ | Literal
+ | Queue of ImportCSV.DataType.t Queue.t
+ | Raw of t
+end
+
+type _ binded_query =
+ | BindParam : ImportCSV.DataType.t Queue.t binded_query
+ | NoParam : unit binded_query
+
+val query_of_expression :
+ 'b binded_query ->
+ Format.formatter ->
+ (Format.formatter -> 'a -> unit) ->
+ 'a T.t ->
+ 'b
+
+module Query :
+ Sym.SYM_EXPR
+ with type 'a obs = Format.formatter -> nested:QueryParameter.t -> unit
+ and type 'a path_repr = Format.formatter -> 'a -> unit
diff --git a/lib/expression/repr.ml b/lib/expression/repr.ml
new file mode 100644
index 0000000..4990236
--- /dev/null
+++ b/lib/expression/repr.ml
@@ -0,0 +1,127 @@
+open StdLabels
+
+let escape_dquote = Re.Str.regexp "'"
+let escape content = Re.Str.global_replace escape_dquote "\\'" content
+
+module E :
+ Sym.SYM_CHUNK
+ with type 'a obs = top:bool -> string
+ and type 'a path_repr = 'a -> string = struct
+ type 'a repr = top:bool -> string
+ type 'a obs = top:bool -> string
+ type 'a path_repr = 'a -> string
+
+ let observe x = x
+
+ let group : 'a repr list -> 'a repr =
+ fun args ~top ->
+ let args_repr = List.map ~f:(fun v -> v ~top) args in
+ let args = String.concat ~sep:", " args_repr in
+ "[" ^ args ^ "]"
+
+ let arguments : 'a repr list -> 'a repr =
+ fun args ~top ->
+ let args_repr = List.map ~f:(fun v -> v ~top) args in
+ let args = String.concat ~sep:", " args_repr in
+ "(" ^ args ^ ")"
+
+ let empty : unit -> 'a repr =
+ fun () ~top ->
+ match top with
+ | false -> "''"
+ | true -> ""
+
+ let literal : string -> 'a repr =
+ fun l ~top ->
+ if String.equal String.empty l then (empty ()) ~top
+ else
+ match int_of_string_opt l with
+ | Some _ -> l
+ | None -> "'" ^ escape l ^ "'"
+
+ let integer : string -> 'a repr =
+ fun l ~top -> if String.equal String.empty l then (empty ()) ~top else l
+
+ let expr : 'a repr -> 'a repr =
+ fun expr ~top ->
+ ignore top;
+ String.concat ~sep:"" [ "("; expr ~top:false; ")" ]
+
+ let path : 'b path_repr -> 'b -> 'a repr =
+ fun path_repr p ~top ->
+ ignore top;
+ path_repr p
+
+ let concat : 'a repr list -> 'a repr =
+ fun elems ~top ->
+ ignore top;
+ let top = false in
+ let strs = List.map elems ~f:(fun v -> v ~top) in
+ String.concat ~sep:" ^ " strs
+
+ let window : 'a repr T.window -> 'a repr list -> 'a repr list -> 'a repr =
+ fun name g1 sort ~top ->
+ ignore top;
+
+ let args1 = group ~top:false g1
+ and args2 = group ~top:false sort
+ and f_name = T.name_of_window name in
+ let args = [ args1; args2 ] in
+ let args =
+ match name with
+ | T.Counter -> args
+ | T.Min prefix_arg
+ | T.Max prefix_arg
+ | T.Previous prefix_arg
+ | T.Sum prefix_arg -> prefix_arg ~top:false :: args
+ in
+
+ f_name ^ "(" ^ String.concat ~sep:", " args ^ ")"
+
+ let nvl : 'a repr list -> 'a repr =
+ fun elems ~top ->
+ ignore top;
+ let args = arguments ~top:false elems in
+ "nvl" ^ args
+
+ let join : string -> 'a repr list -> 'a repr =
+ fun sep elems ~top ->
+ ignore top;
+ let header = literal sep in
+ let args = arguments ~top:false (header :: elems) in
+ "join" ^ args
+
+ let boperator : T.binary_operator -> 'a repr -> 'a repr -> 'a repr =
+ fun op arg1 arg2 ~top ->
+ ignore top;
+ let top = false in
+ let sep = T.name_of_operator op in
+ String.concat ~sep [ arg1 ~top; arg2 ~top ]
+
+ let gequality : T.binary_operator -> 'a repr -> 'a repr list -> 'a repr =
+ fun op arg1 arg2 ~top ->
+ ignore top;
+ let top = false in
+ let sep = T.name_of_operator op in
+ let args = group ~top:false arg2 in
+ String.concat ~sep [ arg1 ~top; args ]
+
+ let funct : string -> 'a repr list -> 'a repr =
+ fun f args ~top ->
+ ignore top;
+ let args = arguments ~top:false args in
+ f ^ args
+
+ let function' : T.funct -> 'a repr list -> 'a repr =
+ fun f args ~top ->
+ ignore top;
+ let args = arguments ~top:false args in
+ T.name_of_function f ^ args
+end
+
+module M = Sym.M (E)
+
+let repr : ?top:bool -> ('a -> string) -> 'a T.t -> string =
+ fun ?(top = false) printer expr ->
+ let repr = M.eval ~path_repr:printer expr in
+ E.observe repr ~top
diff --git a/lib/expression/repr.mli b/lib/expression/repr.mli
new file mode 100644
index 0000000..4431655
--- /dev/null
+++ b/lib/expression/repr.mli
@@ -0,0 +1,6 @@
+val repr : ?top:bool -> ('a -> string) -> 'a T.t -> string
+
+module E :
+ Sym.SYM_EXPR
+ with type 'a obs = top:bool -> string
+ and type 'a path_repr = 'a -> string
diff --git a/lib/expression/sym.ml b/lib/expression/sym.ml
new file mode 100644
index 0000000..0360e8e
--- /dev/null
+++ b/lib/expression/sym.ml
@@ -0,0 +1,71 @@
+(** The signature for an expression analyzer.
+
+ Every element is mapped to a function, using the tagless final pattern.
+
+ *)
+module type SYM_EXPR = sig
+ type 'a repr
+ type 'a obs
+ type 'a path_repr
+
+ val empty : unit -> 'a repr
+ val expr : 'a repr -> 'a repr
+ val literal : string -> 'a repr
+ val integer : string -> 'a repr
+ val path : 'a path_repr -> 'a -> 'a repr
+ val concat : 'a repr list -> 'a repr
+ val window : 'a repr T.window -> 'a repr list -> 'a repr list -> 'a repr
+ val nvl : 'a repr list -> 'a repr
+ val join : string -> 'a repr list -> 'a repr
+ val boperator : T.binary_operator -> 'a repr -> 'a repr -> 'a repr
+ val gequality : T.binary_operator -> 'a repr -> 'a repr list -> 'a repr
+ val funct : string -> 'a repr list -> 'a repr
+ val function' : T.funct -> 'a repr list -> 'a repr
+ val observe : 'a repr -> 'a obs
+end
+
+module type SYM_CHUNK = sig
+ include SYM_EXPR
+
+ val group : 'a repr list -> 'a repr
+ val arguments : 'a repr list -> 'a repr
+end
+
+open StdLabels
+
+module M (Expr : SYM_EXPR) = struct
+ let rec eval : path_repr:'a Expr.path_repr -> 'a T.t -> 'c Expr.repr =
+ fun ~path_repr t ->
+ match t with
+ | T.Expr expr -> Expr.expr (eval ~path_repr expr)
+ | T.Empty -> Expr.empty ()
+ | T.Literal s -> Expr.literal s
+ | T.Integer i -> Expr.integer i
+ | T.Concat elems -> Expr.concat (List.map elems ~f:(eval ~path_repr))
+ | T.Function (name, args) ->
+ Expr.funct name (List.map args ~f:(eval ~path_repr))
+ | T.Function' (name, args) ->
+ Expr.function' name (List.map args ~f:(eval ~path_repr))
+ | T.Nvl elems -> Expr.nvl (List.map elems ~f:(eval ~path_repr))
+ | T.Join (sep, args) -> Expr.join sep (List.map args ~f:(eval ~path_repr))
+ | T.Window (name, group, sort) ->
+ Expr.window
+ (eval_window ~path_repr name)
+ (List.map group ~f:(eval ~path_repr))
+ (List.map sort ~f:(eval ~path_repr))
+ | T.BOperator (op, arg1, arg2) ->
+ Expr.boperator op (eval ~path_repr arg1) (eval ~path_repr arg2)
+ | T.GEquality (op, arg1, arg2) ->
+ Expr.gequality op (eval ~path_repr arg1)
+ (List.map arg2 ~f:(eval ~path_repr))
+ | T.Path p -> Expr.path path_repr p
+
+ and eval_window :
+ path_repr:'a Expr.path_repr -> 'a T.t T.window -> 'a Expr.repr T.window =
+ fun ~path_repr -> function
+ | Min a -> Min (eval ~path_repr a)
+ | Max a -> Max (eval ~path_repr a)
+ | Counter -> Counter
+ | Previous a -> Previous (eval ~path_repr a)
+ | Sum a -> Sum (eval ~path_repr a)
+end
diff --git a/lib/expression/t.ml b/lib/expression/t.ml
new file mode 100644
index 0000000..7e61317
--- /dev/null
+++ b/lib/expression/t.ml
@@ -0,0 +1,153 @@
+open StdLabels
+
+type 'a window =
+ | Min of 'a
+ | Max of 'a
+ | Counter
+ | Previous of 'a
+ | Sum of 'a
+
+type 'a t =
+ | Empty
+ | Expr of 'a t
+ | Literal of string
+ | Integer of string
+ | Path of 'a
+ | Concat of 'a t list
+ | Function of string * 'a t list
+ | Nvl of 'a t list
+ | Join of string * 'a t list
+ | Window of ('a t window * 'a t list * 'a t list)
+ | BOperator of binary_operator * 'a t * 'a t
+ | GEquality of binary_operator * 'a t * 'a t list
+ | Function' of funct * 'a t list
+
+and binary_operator =
+ | Equal
+ | Different
+ | Add
+ | Minus
+ | Division
+ | LT
+ | GT
+ | And
+ | Or
+
+and funct =
+ | Upper
+ | Trim
+
+let name_of_function = function
+ | Upper -> "UPPER"
+ | Trim -> "TRIM"
+
+let name_of_operator = function
+ | Equal -> "="
+ | Different -> "<>"
+ | Add -> "+"
+ | Minus -> "-"
+ | Division -> "/"
+ | LT -> "<"
+ | GT -> ">"
+ | And -> " and "
+ | Or -> " or "
+
+let name_of_window = function
+ | Min _ -> "min"
+ | Max _ -> "max"
+ | Counter -> "counter"
+ | Previous _ -> "previous"
+ | Sum _ -> "sum"
+
+let map_window : f:('a -> 'b) -> 'a window -> 'b window =
+ fun ~f -> function
+ | Min t -> Min (f t)
+ | Max t -> Max (f t)
+ | Counter -> Counter
+ | Previous t -> Previous (f t)
+ | Sum t -> Sum (f t)
+
+(** Extract the kind of the window function from the given name. *)
+let window_of_name name opt =
+ match (name, opt) with
+ | "min", Some p -> Min p
+ | "max", Some p -> Max p
+ | "counter", None -> Counter
+ | "previous", Some p -> Previous p
+ | "sum", Some p -> Sum p
+ | _other -> raise Not_found
+
+let rec cmp : ('a -> 'a -> int) -> 'a t -> 'a t -> int =
+ fun f e1 e2 ->
+ match (e1, e2) with
+ | Empty, Empty -> 0
+ | Literal l1, Literal l2 -> String.compare l1 l2
+ | Integer l1, Integer l2 -> String.compare l1 l2
+ | Path p1, Path p2 -> f p1 p2
+ | Concat elems1, Concat elems2 | Nvl elems1, Nvl elems2 ->
+ List.compare ~cmp:(cmp f) elems1 elems2
+ | Function (n1, elems1), Function (n2, elems2) ->
+ let name_cmp = String.compare n1 n2 in
+ if name_cmp = 0 then List.compare ~cmp:(cmp f) elems1 elems2 else name_cmp
+ | Window (s1, l11, l12), Window (s2, l21, l22) -> (
+ match compare s1 s2 with
+ | 0 ->
+ let l1_cmp = List.compare ~cmp:(cmp f) l11 l21 in
+ if l1_cmp = 0 then List.compare ~cmp:(cmp f) l12 l22 else l1_cmp
+ | other -> other)
+ | BOperator (n1, arg11, arg12), BOperator (n2, arg21, arg22) -> begin
+ match compare n1 n2 with
+ | 0 -> begin
+ match cmp f arg11 arg21 with
+ | 0 -> cmp f arg12 arg22
+ | other -> other
+ end
+ | other -> other
+ end
+ (* Any other case *)
+ | other1, other2 -> Stdlib.compare other1 other2
+
+let fold_values : f:('b -> 'a -> 'b) -> init:'b -> 'a t -> 'b =
+ fun ~f ~init expression ->
+ let rec _f acc = function
+ | Empty | Literal _ | Integer _ -> acc
+ | Expr e -> _f acc e
+ | Path p -> f acc p
+ | Concat pp | Function' (_, pp) | Function (_, pp) | Nvl pp | Join (_, pp)
+ -> List.fold_left ~f:(fun acc a -> _f acc a) ~init:acc pp
+ | Window (window_f, pp1, pp2) ->
+ (* Each window function can have a distinct parameter first. *)
+ let acc' =
+ match window_f with
+ | Counter -> acc
+ | Min key | Max key | Previous key | Sum key -> _f acc key
+ in
+ let eval1 = List.fold_left ~f:(fun acc a -> _f acc a) ~init:acc' pp1 in
+ List.fold_left ~f:(fun acc a -> _f acc a) ~init:eval1 pp2
+ | BOperator (_, arg1, arg2) -> _f (_f acc arg1) arg2
+ | GEquality (_, arg1, arg2) ->
+ let eval1 = List.fold_left ~f:(fun acc a -> _f acc a) ~init:acc arg2 in
+ _f eval1 arg1
+ in
+ _f init expression
+
+let map : type a b. f:(a -> b) -> a t -> b t =
+ fun ~f expression ->
+ let rec map = function
+ | Expr e -> Expr (map e)
+ | Empty -> Empty
+ | Literal s -> Literal s
+ | Integer i -> Integer i
+ | Path p -> Path (f p)
+ | Concat pp -> Concat (List.map ~f:map pp)
+ | Function' (name, pp) -> Function' (name, List.map ~f:map pp)
+ | Function (name, pp) -> Function (name, List.map ~f:map pp)
+ | Nvl pp -> Nvl (List.map ~f:map pp)
+ | Join (sep, pp) -> Join (sep, List.map ~f:map pp)
+ | Window (window_f, pp1, pp2) ->
+ let w = map_window ~f:map window_f in
+ Window (w, List.map ~f:map pp1, List.map ~f:map pp2)
+ | BOperator (n, arg1, arg2) -> BOperator (n, map arg1, map arg2)
+ | GEquality (n, arg1, args) -> GEquality (n, map arg1, List.map ~f:map args)
+ in
+ map expression
diff --git a/lib/expression/t.mli b/lib/expression/t.mli
new file mode 100644
index 0000000..840805d
--- /dev/null
+++ b/lib/expression/t.mli
@@ -0,0 +1,54 @@
+type 'a window =
+ | Min of 'a
+ | Max of 'a
+ | Counter
+ | Previous of 'a
+ | Sum of 'a
+
+type 'a t =
+ | Empty
+ | Expr of 'a t
+ | Literal of string
+ | Integer of string
+ | Path of 'a
+ | Concat of 'a t list
+ | Function of string * 'a t list
+ | Nvl of 'a t list
+ | Join of string * 'a t list
+ | Window of ('a t window * 'a t list * 'a t list)
+ | BOperator of binary_operator * 'a t * 'a t
+ | GEquality of binary_operator * 'a t * 'a t list
+ | Function' of funct * 'a t list
+
+and binary_operator =
+ | Equal
+ | Different
+ | Add
+ | Minus
+ | Division
+ | LT
+ | GT
+ | And
+ | Or
+
+and funct =
+ | Upper
+ | Trim
+
+val cmp : ('a -> 'a -> int) -> 'a t -> 'a t -> int
+(** Compare two expressions *)
+
+val fold_values : f:('b -> 'a -> 'b) -> init:'b -> 'a t -> 'b
+(** Fold over all the path presents inside the expression. Used for example to
+ identify all the columns to extract from the file.
+
+ The order is not guarantee to follow the order from the expression *)
+
+val map : f:('a -> 'b) -> 'a t -> 'b t
+(** The map function. Mainly used in the configuration migration. *)
+
+val name_of_operator : binary_operator -> string
+val name_of_window : 'a window -> string
+val map_window : f:('a -> 'b) -> 'a window -> 'b window
+val window_of_name : string -> 'a option -> 'a window
+val name_of_function : funct -> string
diff --git a/lib/expression/type_of.ml b/lib/expression/type_of.ml
new file mode 100644
index 0000000..ce1a17e
--- /dev/null
+++ b/lib/expression/type_of.ml
@@ -0,0 +1,150 @@
+(**
+ This module evaluate the type of an expression.
+
+ The type is given with an analysis from all the component involved inside
+ the exrpssion. It is used inside the [query] module in order to check if one
+ type need conversion before being used.
+ *)
+
+open StdLabels
+
+module Lazy_Repr =
+ Compose.Expression
+ (Lazier.Make
+ (Repr.E))
+ (struct
+ let v _ = ""
+ end)
+
+type t = ImportDataTypes.Types.t
+
+(** Fold over the list of parameters and ensure all the elements are typed in
+the same way *)
+let group' : t list -> t =
+ fun elements ->
+ List.fold_left elements ~init:None
+ ~f:(fun (acc : ImportDataTypes.Types.t option) v ->
+ match acc with
+ | None -> Some v
+ | Some t when t = v -> acc
+ | _ -> Some Extern)
+ |> Option.value ~default:ImportDataTypes.Types.None
+
+include Lazy_Repr.Make (struct
+ type nonrec t = t
+ type 'a repr = t
+ type 'a obs = ImportDataTypes.Types.t
+ type 'a path_repr = 'a -> unit
+
+ let observe : 'a Repr.E.obs Lazy.t * 'a repr -> 'a obs = snd
+
+ let empty : 'a Repr.E.obs Lazy.t -> 'a repr =
+ fun _ -> ImportDataTypes.Types.None
+
+ let expr : 'a Repr.E.obs Lazy.t * 'a repr -> 'a Repr.E.obs Lazy.t -> 'a repr =
+ fun e _ -> snd e
+
+ let literal : string -> 'a Repr.E.obs Lazy.t -> 'a repr =
+ fun _ _ -> ImportDataTypes.Types.String
+
+ let integer : string -> 'a Repr.E.obs Lazy.t -> 'a repr =
+ fun _ _ -> ImportDataTypes.Types.Number
+
+ let path : 'b path_repr -> 'b -> 'a Repr.E.obs Lazy.t -> 'a repr =
+ fun _ _ _ -> ImportDataTypes.Types.Extern
+
+ let concat :
+ ('a Repr.E.obs Lazy.t * 'a repr) list -> 'a Repr.E.obs Lazy.t -> 'a repr =
+ fun _ _ -> ImportDataTypes.Types.String
+
+ let window :
+ ('a Repr.E.obs Lazy.t * 'a repr) T.window ->
+ ('a Repr.E.obs Lazy.t * 'a repr) list ->
+ ('a Repr.E.obs Lazy.t * 'a repr) list ->
+ 'a Repr.E.obs Lazy.t ->
+ 'a repr =
+ fun name expressions order _ ->
+ ignore order;
+ ignore expressions;
+ match name with
+ | T.Counter | T.Max _ | T.Min _ | T.Sum _ -> Number
+ | T.Previous expr -> snd expr
+
+ let nvl :
+ ('a Repr.E.obs Lazy.t * 'a repr) list -> 'a Repr.E.obs Lazy.t -> 'a repr =
+ fun v _ -> group' (List.map ~f:snd v)
+
+ let join :
+ string ->
+ ('a Repr.E.obs Lazy.t * 'a repr) list ->
+ 'a Repr.E.obs Lazy.t ->
+ 'a repr =
+ fun _ _ _ -> ImportDataTypes.Types.String
+
+ let boperator :
+ T.binary_operator ->
+ 'a Repr.E.obs Lazy.t * 'a repr ->
+ 'a Repr.E.obs Lazy.t * 'a repr ->
+ 'a Repr.E.obs Lazy.t ->
+ 'a repr =
+ fun name _ _ _ ->
+ match name with
+ | T.Equal | T.Different | T.LT | T.GT -> Bool
+ | T.Add | T.Minus -> Number
+ | T.Division -> Float
+ | T.And | T.Or -> Bool
+
+ let gequality :
+ T.binary_operator ->
+ 'a Repr.E.obs Lazy.t * 'a repr ->
+ ('a Repr.E.obs Lazy.t * 'a repr) list ->
+ 'a Repr.E.obs Lazy.t ->
+ 'a repr =
+ fun name _ _ _ ->
+ match name with
+ | T.Equal | T.Different -> Bool
+ | _ -> None
+
+ let function' :
+ T.funct ->
+ ('a Repr.E.obs Lazy.t * 'a repr) list ->
+ 'a Repr.E.obs Lazy.t ->
+ 'a repr =
+ fun name expressions _ ->
+ ignore expressions;
+ match name with
+ | Upper | Trim -> String
+
+ let check : expected:t -> actual:t -> string -> 'a Repr.E.obs Lazy.t -> t =
+ fun ~expected ~actual subset expr ->
+ if actual = expected then actual
+ else
+ let expression = (Lazy.force expr) ~top:false in
+ raise (ImportErrors.TypeError { expression; subset; expected; actual })
+
+ let funct :
+ string ->
+ ('a Repr.E.obs Lazy.t * 'a repr) list ->
+ 'a Repr.E.obs Lazy.t ->
+ 'a repr =
+ fun name expressions repr ->
+ match name with
+ | "if" -> begin
+ match expressions with
+ | [] -> Extern
+ | (_, hd) :: arg1 :: _ when hd = Bool -> snd arg1
+ | (_, hd) :: _ ->
+ let expected = ImportDataTypes.Types.Bool and actual = hd in
+ check ~expected ~actual "the predicate" repr
+ end
+ | _ -> Extern
+end)
+
+let group :
+ ('a Lazier.Make(Repr.E).repr * t) list -> 'a Lazier.Make(Repr.E).repr * t =
+ fun v ->
+ let v' = group' (List.map v ~f:snd) in
+ let l = lazy (Repr.E.empty ()) in
+ (l, v')
+
+let arguments = group
diff --git a/lib/expression/type_of.mli b/lib/expression/type_of.mli
new file mode 100644
index 0000000..7a11582
--- /dev/null
+++ b/lib/expression/type_of.mli
@@ -0,0 +1,10 @@
+(** This module tries to identify the type of an expression.
+
+The references to data comming from the spreaedsheet cannot be evaluated and
+marked as [Extern]. *)
+
+type t = ImportDataTypes.Types.t
+
+include Sym.SYM_CHUNK with type 'a obs = t and type 'a path_repr = 'a -> unit
+
+val group' : t list -> t
diff --git a/lib/file_handler/csv2sql.ml b/lib/file_handler/csv2sql.ml
new file mode 100644
index 0000000..42d84eb
--- /dev/null
+++ b/lib/file_handler/csv2sql.ml
@@ -0,0 +1,135 @@
+open StdLabels
+module A = ImportAnalyser.Dependency
+module CSV = ImportCSV
+module C = ImportContainers
+module Syntax = ImportConf.Syntax
+module Db = ImportSQL.Db
+
+type state = CSV.DataType.t array State.t
+
+let default_mapper :
+ (ImportCSV.DataType.t, ImportCSV.DataType.t array) State.mapper =
+ { get_row = Fun.id; get_value = Fun.id; default = ImportCSV.DataType.Null }
+
+let extract_values : string -> CSV.DataType.t =
+ fun value ->
+ (* Test first if the content is empty *)
+ if String.equal String.empty value then CSV.DataType.Null
+ else
+ (* else, try differents conversion in order to see which one works *)
+ match int_of_string_opt value with
+ | Some i -> CSV.DataType.Integer i
+ | None -> (
+ match float_of_string_opt value with
+ | Some f -> CSV.DataType.Float f
+ | None ->
+ (* And finaly convert into date *)
+ CSV.DataType.Content value)
+
+(** Initialize the state for the first row, count the column number and create
+ the table in the database *)
+let first_row : A.t -> _ Db.t -> state -> CSV.DataType.t list -> state =
+ fun mapping db acc row ->
+ (if acc.transaction then
+ match Db.commit db with
+ | Ok () -> ()
+ | Error e -> print_endline (ImportErrors.repr_error e));
+
+ ignore @@ Db.create_table db mapping;
+ let row = Array.of_list row in
+ match Db.prepare_insert db mapping with
+ | Ok stmt ->
+ {
+ acc with
+ header = Some row;
+ transaction = false;
+ insert_stmt = Some stmt;
+ row_number = acc.row_number + 1;
+ }
+ | _ -> { acc with header = Some row; transaction = false; insert_stmt = None }
+
+let read_csv_line :
+ log_error:ImportErrors.t -> A.t -> 'a Db.t -> state -> string list -> state
+ =
+ fun ~log_error mapping db acc row ->
+ let processed_row =
+ List.to_seq row |> Seq.map extract_values |> Array.of_seq
+ in
+ if acc.State.transaction then
+ State.run_row ~log_error ~mapper:default_mapper mapping db processed_row acc
+ else
+ match Db.begin_transaction db with
+ | Error e ->
+ print_endline (ImportErrors.repr_error e);
+ acc
+ | Ok () ->
+ let acc = { acc with transaction = true } in
+ State.run_row ~log_error ~mapper:default_mapper mapping db processed_row
+ acc
+
+let importInDatable :
+ log_error:ImportErrors.t ->
+ conf:Syntax.t ->
+ dirname:string ->
+ A.t ->
+ 'a Db.t ->
+ CSV.DataType.t array option Lwt.t =
+ fun ~log_error ~conf ~dirname mapping db ->
+ let file = Filename.concat dirname (A.table mapping).file in
+
+ let channel = Stdlib.open_in_bin file in
+
+ let csv_channel = Csv.of_channel ~separator:';' ~excel_tricks:true channel in
+
+ (* In the headers, we only keep the string.
+
+ This line could generate an error if the headers are not correctly defined.
+ *)
+ let header =
+ List.map ~f:(fun v -> CSV.DataType.Content v) (Csv.next csv_channel)
+ in
+
+ let state =
+ State.
+ {
+ transaction = false;
+ header = None;
+ insert_stmt = None;
+ check_key_stmt = None;
+ row_number = 1;
+ sheet_number = 1;
+ delayed = [];
+ }
+ in
+ let state = first_row mapping db state header in
+
+ let state =
+ try
+ Csv.fold_left csv_channel ~init:state
+ ~f:(read_csv_line ~log_error mapping db)
+ with
+ | Csv.Failure (line, row, cause) as e ->
+ Printf.eprintf "Error %s on line %d — field : %s\n" cause line
+ (ImportCSV.Csv.column_to_string row);
+ raise e
+ in
+ ignore @@ State.clear ~log_error db mapping conf;
+ ignore @@ Db.commit db;
+
+ (* Finalize the statements created during the import *)
+ let () =
+ Option.iter (fun v -> ignore @@ Db.finalize v) state.insert_stmt;
+ Option.iter (fun v -> ignore @@ Db.finalize v) state.check_key_stmt
+ in
+
+ (* Insert all the headers *)
+ let _ =
+ Option.iter
+ (fun headers ->
+ let values = Array.mapi headers ~f:(fun i value -> (i, value)) in
+
+ ignore
+ @@ Db.insert_header db (ImportAnalyser.Dependency.table mapping) values)
+ state.header
+ in
+ Lwt.return state.header
diff --git a/lib/file_handler/csv2sql.mli b/lib/file_handler/csv2sql.mli
new file mode 100644
index 0000000..e09737b
--- /dev/null
+++ b/lib/file_handler/csv2sql.mli
@@ -0,0 +1,10 @@
+val importInDatable :
+ log_error:ImportErrors.t ->
+ conf:ImportConf.Syntax.t ->
+ dirname:string ->
+ ImportAnalyser.Dependency.t ->
+ _ ImportSQL.Db.t ->
+ ImportCSV.DataType.t array option Lwt.t
+(** Load an excel spreadsheet in an SQLite database.
+
+Return the header if at least one row where present *)
diff --git a/lib/file_handler/dune b/lib/file_handler/dune
new file mode 100755
index 0000000..6b247db
--- /dev/null
+++ b/lib/file_handler/dune
@@ -0,0 +1,21 @@
+(library
+ (name importFileHandler)
+ (libraries
+ csv
+ SZXX
+ sqlite3
+ base
+ core
+ lwt
+ lwt.unix
+ helpers
+ importAnalyser
+ importConf
+ importContainers
+ importCSV
+ importDataTypes
+ importErrors
+ importExpression
+ importSQL
+ )
+)
diff --git a/lib/file_handler/state.ml b/lib/file_handler/state.ml
new file mode 100644
index 0000000..5b43aff
--- /dev/null
+++ b/lib/file_handler/state.ml
@@ -0,0 +1,178 @@
+open StdLabels
+module Table = ImportDataTypes.Table
+
+type 'a t = {
+ header : 'a option;
+ transaction : bool;
+ insert_stmt : Sqlite3.stmt option;
+ check_key_stmt : Sqlite3.stmt option;
+ row_number : int;
+ sheet_number : int;
+ delayed : 'a list;
+}
+
+type insert_result = {
+ insert_stmt : Sqlite3.stmt option;
+ check_key_stmt : Sqlite3.stmt option;
+}
+
+type ('a, 'b) mapper = {
+ get_row : 'b -> 'a Array.t;
+ get_value : 'a -> ImportCSV.DataType.t;
+ default : 'a;
+}
+
+module A = ImportAnalyser.Dependency
+
+let insert_row :
+ mapper:(_, 'row) mapper ->
+ A.t ->
+ _ ImportSQL.Db.t ->
+ 'row ->
+ _ t ->
+ (insert_result, ImportErrors.xlsError) result =
+ fun ~mapper mapping db row state ->
+ (* Extract all columns referenced in the keys or the columns to extract *)
+ let keys_id =
+ List.fold_left (A.keys mapping) ~init:ImportContainers.IntSet.empty
+ ~f:(fun acc (keys : A.key) ->
+ let columns = keys.A.columns in
+ ImportContainers.IntSet.union acc (Lazy.force columns))
+ and columns_id = A.columns mapping in
+ let ids = ImportContainers.IntSet.(union keys_id columns_id |> elements) in
+
+ (* Filter only the required columns in the row *)
+ let values =
+ List.map ids ~f:(fun i ->
+ let index = i - 1 in
+ let value =
+ try Array.get (mapper.get_row row) index with
+ | Stdlib.Invalid_argument _ ->
+ (* If we have more headers than data, assume the value are NULL.
+ This can happen when all the line tail is empty, Excel can
+ give us a truncated line instead of a series of NULL *)
+ mapper.default
+ in
+ (index, mapper.get_value value))
+ in
+ let keys = A.keys mapping in
+
+ let execution =
+ let ( let* ) = Result.bind in
+ let* check_key_stmt, result =
+ ImportSQL.Db.eval_key db state.check_key_stmt keys values
+ in
+ let no_null =
+ (* We check if we have at least one key which is not null — and in such
+ case we ignore the line.
+
+ If multiple keys are presents, we ensure there is at least one non
+ null here.
+ *)
+ match result with
+ | [] -> true
+ | _ ->
+ List.exists result ~f:(function
+ | Sqlite3.Data.FLOAT _ | Sqlite3.Data.INT _ -> true
+ | Sqlite3.Data.BLOB t | Sqlite3.Data.TEXT t ->
+ not (String.equal "" t)
+ | Sqlite3.Data.NONE | Sqlite3.Data.NULL -> false)
+ in
+ let* _ =
+ match no_null with
+ | true -> Ok ()
+ | false -> Error (Failure "The key is null")
+ in
+
+ let* statement =
+ match state.insert_stmt with
+ | None -> ImportSQL.Db.prepare_insert db mapping
+ | Some v -> Ok v
+ in
+ let* _ = ImportSQL.Db.insert db statement ~id:state.row_number values in
+ let* _ = ImportSQL.Db.reset statement in
+
+ Helpers.Console.update_cursor ();
+ Ok { insert_stmt = Some statement; check_key_stmt }
+ in
+
+ (* In case of error, wrap the exception to get the line *)
+ Result.map_error
+ (fun e ->
+ ImportErrors.
+ {
+ source = ImportAnalyser.Dependency.table mapping;
+ sheet = state.sheet_number;
+ row = state.row_number;
+ target = None;
+ value = CSV.DataType.Content (String.concat ~sep:", " []);
+ exn = e;
+ })
+ execution
+
+(** Load the row with all the informations associated with this sheet.
+
+ If an error has already been raised during the sheet, ignore this row only. *)
+let run_row :
+ log_error:ImportErrors.t ->
+ mapper:(_, 'row) mapper ->
+ A.t ->
+ _ ImportSQL.Db.t ->
+ 'row ->
+ 'a t ->
+ 'a t =
+ fun ~log_error ~mapper mapping db row state ->
+ match insert_row ~mapper mapping db row state with
+ | Ok { insert_stmt; check_key_stmt } ->
+ {
+ state with
+ insert_stmt;
+ check_key_stmt;
+ row_number = state.row_number + 1;
+ }
+ | Error e ->
+ Option.iter (fun v -> ignore @@ ImportSQL.Db.finalize v) state.insert_stmt;
+ Option.iter
+ (fun v -> ignore @@ ImportSQL.Db.finalize v)
+ state.check_key_stmt;
+ ImportErrors.output_error log_error e;
+ {
+ state with
+ insert_stmt = None;
+ check_key_stmt = None;
+ row_number = state.row_number + 1;
+ }
+
+let clear :
+ log_error:ImportErrors.t ->
+ 'a ImportSQL.Db.t ->
+ A.t ->
+ ImportConf.Syntax.t ->
+ unit ImportSQL.Db.result =
+ fun ~log_error db mapping conf ->
+ ImportSQL.Db.clear_duplicates db (A.table mapping) (A.keys mapping)
+ ~f:(fun values ->
+ let line =
+ match snd @@ Array.get values 0 with
+ | ImportCSV.DataType.Integer i -> i
+ | _ -> -1
+ and value = snd @@ Array.get values 1
+ and target =
+ match snd @@ Array.get values 2 with
+ | ImportCSV.DataType.Content s ->
+ Some (ImportConf.get_table_for_name conf (Some s))
+ | _ -> None
+ in
+ let error =
+ ImportErrors.
+ {
+ source = A.table mapping;
+ sheet = (A.table mapping).tab;
+ row = line;
+ target;
+ value;
+ exn = Failure "Duplicated key";
+ }
+ in
+
+ ImportErrors.output_error log_error error)
diff --git a/lib/file_handler/state.mli b/lib/file_handler/state.mli
new file mode 100644
index 0000000..f744c33
--- /dev/null
+++ b/lib/file_handler/state.mli
@@ -0,0 +1,46 @@
+type 'a t = {
+ header : 'a option;
+ transaction : bool;
+ insert_stmt : Sqlite3.stmt option;
+ check_key_stmt : Sqlite3.stmt option;
+ row_number : int;
+ sheet_number : int;
+ delayed : 'a list;
+}
+
+type insert_result = {
+ insert_stmt : Sqlite3.stmt option;
+ check_key_stmt : Sqlite3.stmt option;
+}
+
+type ('a, 'b) mapper = {
+ get_row : 'b -> 'a Array.t;
+ get_value : 'a -> ImportCSV.DataType.t;
+ default : 'a;
+}
+
+val insert_row :
+ mapper:(_, 'row) mapper ->
+ ImportAnalyser.Dependency.t ->
+ _ ImportSQL.Db.t ->
+ 'row ->
+ _ t ->
+ (insert_result, ImportErrors.xlsError) result
+(** Low level row insertion *)
+
+val run_row :
+ log_error:ImportErrors.t ->
+ mapper:(_, 'row) mapper ->
+ ImportAnalyser.Dependency.t ->
+ _ ImportSQL.Db.t ->
+ 'row ->
+ 'a t ->
+ 'a t
+
+val clear :
+ log_error:ImportErrors.t ->
+ 'a ImportSQL.Db.t ->
+ ImportAnalyser.Dependency.t ->
+ ImportConf.Syntax.t ->
+ unit ImportSQL.Db.result
+(** Clean up the table after the insertion, check for the duplicates and external references *)
diff --git a/lib/file_handler/xlsx2sql.ml b/lib/file_handler/xlsx2sql.ml
new file mode 100644
index 0000000..f2d8f12
--- /dev/null
+++ b/lib/file_handler/xlsx2sql.ml
@@ -0,0 +1,205 @@
+open StdLabels
+module A = ImportAnalyser.Dependency
+module C = ImportContainers
+module CSV = ImportCSV
+module Syntax = ImportConf.Syntax
+module Db = ImportSQL.Db
+
+let flags = Unix.[ O_RDONLY; O_NONBLOCK ]
+
+let extractors =
+ SZXX.Xlsx.
+ {
+ string = (fun _location s -> CSV.DataType.Content s);
+ error =
+ (fun _location s -> CSV.DataType.Error (Printf.sprintf "#ERROR# %s" s));
+ boolean =
+ (fun _location s ->
+ let value = String.(equal s "1") in
+ CSV.DataType.Content (string_of_bool value));
+ number =
+ (fun _location s ->
+ let f = Float.of_string s in
+ if Float.is_integer f then CSV.DataType.Integer (Float.to_int f)
+ else CSV.DataType.Float f);
+ date = (fun _location s -> CSV.DataType.Content s);
+ null = CSV.DataType.Null;
+ formula =
+ (fun _location ~formula s ->
+ ignore formula;
+ CSV.DataType.Content s);
+ }
+
+let feed_bigstring ic =
+ let open Lwt.Infix in
+ let len = Lwt_io.buffer_size ic in
+ let buf = Lwt_bytes.create len in
+ SZXX.Zip.Bigstring
+ (fun () ->
+ Lwt_io.read_into_bigstring ic buf 0 len >|= function
+ | 0 -> None
+ | len -> Some SZXX.Zip.{ buf; pos = 0; len })
+
+(* Evaluate if the row can be processed right now (does not contain
+ any delayed value) *)
+let is_delayed row =
+ Array.exists row.SZXX.Xlsx.data ~f:(function
+ | SZXX.Xlsx.Delayed _ -> true
+ | _ -> false)
+
+let default_mapper :
+ (ImportCSV.DataType.t, ImportCSV.DataType.t SZXX.Xlsx.row) State.mapper =
+ {
+ get_value =
+ (function
+ | ImportCSV.DataType.Content s ->
+ ImportCSV.DataType.Content (SZXX.Xml.unescape s)
+ | any -> any);
+ default = ImportCSV.DataType.Null;
+ get_row = (fun v -> v.SZXX.Xlsx.data);
+ }
+
+type state = CSV.DataType.t SZXX.Xlsx.status SZXX.Xlsx.row State.t
+
+let delayed_mapper =
+ State.
+ {
+ get_value =
+ (function
+ | SZXX.Xlsx.Available (CSV.DataType.Content s) ->
+ CSV.DataType.Content (SZXX.Xml.unescape s)
+ | SZXX.Xlsx.Available value -> value
+ | _ -> CSV.DataType.Null);
+ default = SZXX.Xlsx.Available CSV.DataType.Null;
+ get_row = (fun v -> v.SZXX.Xlsx.data);
+ }
+
+(** Initialize the state for the first row, count the column number and create
+ the table in the database *)
+let first_row : A.t -> _ Db.t -> state -> 'a SZXX.Xlsx.row -> state =
+ fun mapping db acc row ->
+ (if acc.transaction then
+ match Db.commit db with
+ | Ok () -> ()
+ | Error e -> print_endline (ImportErrors.repr_error e));
+
+ ignore @@ Db.create_table db mapping;
+ match Db.prepare_insert db mapping with
+ | Ok stmt ->
+ {
+ acc with
+ header = Some row;
+ transaction = false;
+ insert_stmt = Some stmt;
+ }
+ | _ -> { acc with header = Some row; transaction = false; insert_stmt = None }
+
+let importInDatable :
+ log_error:Csv.out_channel Lazy.t ->
+ conf:Syntax.t ->
+ dirname:string ->
+ A.t ->
+ 'a Db.t ->
+ CSV.DataType.t array option Lwt.t =
+ fun ~log_error ~conf ~dirname mapping db ->
+ let file = Filename.concat dirname (A.table mapping).file in
+
+ Lwt_io.with_file ~flags ~mode:Input file (fun ic ->
+ let open Lwt.Syntax in
+ let stream, sst_p, success =
+ SZXX.Xlsx.stream_rows ~only_sheet:(A.table mapping).tab
+ ~feed:(feed_bigstring ic) extractors
+ in
+ let* processed =
+ Lwt_stream.fold
+ (fun row acc ->
+ (* Create the table on the first line *)
+ if Int.equal 1 row.SZXX.Xlsx.row_number then
+ first_row mapping db acc row
+ else
+ match is_delayed row with
+ | true -> { acc with delayed = row :: acc.delayed }
+ | false -> (
+ let row_number = row.SZXX.Xlsx.row_number in
+ if acc.transaction then
+ State.run_row ~log_error ~mapper:delayed_mapper mapping db
+ row { acc with row_number }
+ else
+ match Db.begin_transaction db with
+ | Error e ->
+ print_endline (ImportErrors.repr_error e);
+ acc
+ | Ok () ->
+ let acc = { acc with transaction = true; row_number } in
+ State.run_row ~log_error ~mapper:delayed_mapper mapping
+ db row acc))
+ stream
+ {
+ transaction = false;
+ header = None;
+ delayed = [];
+ insert_stmt = None;
+ check_key_stmt = None;
+ row_number = 1;
+ sheet_number = (A.table mapping).tab;
+ }
+ in
+ (* Wait to reach the sst *)
+ let* sst = sst_p in
+
+ if processed.transaction then ignore (Db.commit db);
+
+ (* Insert the missing elements *)
+ ignore @@ Db.begin_transaction db;
+ List.iter processed.delayed ~f:(fun row ->
+ let fully_available_row =
+ SZXX.Xlsx.unwrap_status extractors sst row
+ in
+
+ let row_number = row.SZXX.Xlsx.row_number in
+
+ match
+ State.insert_row ~mapper:default_mapper mapping db
+ fully_available_row
+ { processed with row_number }
+ with
+ | Ok _ -> ()
+ | Error e ->
+ ImportErrors.output_error log_error e;
+ ());
+
+ ignore @@ State.clear ~log_error db mapping conf;
+ ignore @@ Db.commit db;
+
+ (* Finalize the statements created during the import *)
+ let () =
+ Option.iter (fun v -> ignore @@ Db.finalize v) processed.insert_stmt;
+ Option.iter (fun v -> ignore @@ Db.finalize v) processed.check_key_stmt
+ in
+
+ let _ =
+ Option.iter
+ (fun headers ->
+ let res = SZXX.Xlsx.unwrap_status extractors sst headers in
+
+ let values = Array.mapi res.data ~f:(fun i value -> (i, value)) in
+
+ ignore
+ @@ Db.insert_header db
+ (ImportAnalyser.Dependency.table mapping)
+ values)
+ processed.header
+ in
+
+ let header =
+ Option.map
+ (fun header ->
+ let res = SZXX.Xlsx.unwrap_status extractors sst header in
+ res.data)
+ processed.header
+ in
+
+ (* Finalize the process *)
+ let* () = success in
+
+ Lwt.return header)
diff --git a/lib/file_handler/xlsx2sql.mli b/lib/file_handler/xlsx2sql.mli
new file mode 100644
index 0000000..e09737b
--- /dev/null
+++ b/lib/file_handler/xlsx2sql.mli
@@ -0,0 +1,10 @@
+val importInDatable :
+ log_error:ImportErrors.t ->
+ conf:ImportConf.Syntax.t ->
+ dirname:string ->
+ ImportAnalyser.Dependency.t ->
+ _ ImportSQL.Db.t ->
+ ImportCSV.DataType.t array option Lwt.t
+(** Load an excel spreadsheet in an SQLite database.
+
+Return the header if at least one row where present *)
diff --git a/lib/helpers/console.ml b/lib/helpers/console.ml
new file mode 100644
index 0000000..838b25a
--- /dev/null
+++ b/lib/helpers/console.ml
@@ -0,0 +1,16 @@
+let cursors = [| '|'; '/'; '-'; '\\' |]
+let pos = ref 0
+
+let update_cursor () =
+ if Unix.(isatty stdout) then (
+ Printf.printf "%c[?25l%c[1D%c[0K%c" (char_of_int 27) (char_of_int 27)
+ (char_of_int 27) (Array.get cursors !pos);
+ pos := (!pos + 1) mod Array.length cursors)
+
+let close_cursor () =
+ if Unix.(isatty stdout) then
+ Printf.printf "%c[?25h%c[1D%c[0K\n%!" (char_of_int 27) (char_of_int 27)
+ (char_of_int 27)
+
+let clear_line () =
+ if Unix.(isatty stdout) then Printf.printf "%c[2K\r%!" (char_of_int 27)
diff --git a/lib/helpers/console.mli b/lib/helpers/console.mli
new file mode 100644
index 0000000..289d55c
--- /dev/null
+++ b/lib/helpers/console.mli
@@ -0,0 +1,5 @@
+val update_cursor : unit -> unit
+val close_cursor : unit -> unit
+
+val clear_line : unit -> unit
+(** Clear the entire line *)
diff --git a/lib/helpers/dune b/lib/helpers/dune
new file mode 100755
index 0000000..8e30d2b
--- /dev/null
+++ b/lib/helpers/dune
@@ -0,0 +1,8 @@
+(library
+ (name helpers)
+ (libraries
+ calendar
+ decoders
+ otoml
+ )
+)
diff --git a/lib/helpers/helpers.ml b/lib/helpers/helpers.ml
new file mode 100755
index 0000000..9d6fcb8
--- /dev/null
+++ b/lib/helpers/helpers.ml
@@ -0,0 +1,45 @@
+module Toml = Toml
+module Console = Console
+
+let date_from_csv : string -> CalendarLib.Date.t option =
+ fun value ->
+ let open CalendarLib.Date in
+ try Some (Scanf.sscanf value "%d/%d/%d" (fun d m y -> make y m d)) with
+ | _ -> (
+ (* If the date is a number, try from julian day *)
+ match int_of_string_opt value with
+ | None -> None
+ | Some v -> Some (add (make 1899 12 30) (Period.day v)))
+
+let fold_opt : ('a -> 'b -> 'a option) -> 'a -> 'b -> 'a =
+ fun f acc b ->
+ match f acc b with
+ | None -> acc
+ | Some v -> v
+
+let try_opt exp =
+ try Some (exp ()) with
+ | _ -> None
+
+let repr_date formatter date =
+ Format.fprintf formatter "%02d/%02d/%d"
+ (CalendarLib.Date.day_of_month date)
+ CalendarLib.Date.(int_of_month @@ month date)
+ (CalendarLib.Date.year date)
+
+let s_repr_date date =
+ Format.sprintf "%02d/%02d/%d"
+ (CalendarLib.Date.day_of_month date)
+ CalendarLib.Date.(int_of_month @@ month date)
+ (CalendarLib.Date.year date)
+
+let repr_opt f channel = function
+ | None -> ()
+ | Some v -> f channel v
+
+let str_format f =
+ let buffer = Buffer.create 16 in
+ let formatter = Format.formatter_of_buffer buffer in
+ f formatter;
+ Format.pp_print_flush formatter ();
+ Buffer.contents buffer
diff --git a/lib/helpers/toml.ml b/lib/helpers/toml.ml
new file mode 100644
index 0000000..1b7fb15
--- /dev/null
+++ b/lib/helpers/toml.ml
@@ -0,0 +1,31 @@
+module Decode = struct
+ module S = struct
+ type value = Otoml.t
+
+ let pp : Format.formatter -> value -> unit =
+ fun format t -> Format.pp_print_string format (Otoml.Printer.to_string t)
+
+ let of_string : string -> (value, string) result =
+ Otoml.Parser.from_string_result
+
+ let of_file : string -> (value, string) result =
+ Otoml.Parser.from_file_result
+
+ let get_string : value -> string option = Otoml.get_opt Otoml.get_string
+ let get_int : value -> int option = Otoml.get_opt Otoml.get_integer
+ let get_float : value -> float option = Otoml.get_opt Otoml.get_float
+ let get_bool : value -> bool option = Otoml.get_opt Otoml.get_boolean
+ let get_null : value -> unit option = fun _ -> None
+
+ let get_list : value -> value list option =
+ Otoml.get_opt @@ Otoml.get_array Fun.id
+
+ let get_key_value_pairs : value -> (value * value) list option =
+ Otoml.get_opt (fun key ->
+ Otoml.get_table key |> List.map (fun (k, v) -> (Otoml.string k, v)))
+
+ let to_list : value list -> value = Otoml.array
+ end
+
+ include Decoders.Decode.Make (S)
+end
diff --git a/lib/helpers/toml.mli b/lib/helpers/toml.mli
new file mode 100644
index 0000000..08d30b8
--- /dev/null
+++ b/lib/helpers/toml.mli
@@ -0,0 +1 @@
+module Decode : Decoders.Decode.S with type value = Otoml.t
diff --git a/lib/sql/date.ml b/lib/sql/date.ml
new file mode 100644
index 0000000..e8933c7
--- /dev/null
+++ b/lib/sql/date.ml
@@ -0,0 +1,24 @@
+(** Parse a text value into a date *)
+
+let first_day = CalendarLib.Date.make 1899 12 30
+
+let f : Sqlite3.Data.t -> Sqlite3.Data.t -> Sqlite3.Data.t =
+ fun str data ->
+ match (str, data) with
+ | Sqlite3.Data.TEXT format_, Sqlite3.Data.TEXT content -> (
+ try
+ let date = CalendarLib.Printer.Date.from_fstring format_ content in
+ let period =
+ CalendarLib.Date.sub date first_day
+ |> CalendarLib.Date.Period.nb_days |> Int64.of_int
+ in
+ Sqlite3.Data.INT period
+ with
+ | Invalid_argument e ->
+ prerr_endline e;
+ Sqlite3.Data.NULL)
+ | _ ->
+ (* If the data is already a date, it should be preserved as is *)
+ data
+
+let register : Sqlite3.db -> unit = fun db -> Sqlite3.create_fun2 db "date" f
diff --git a/lib/sql/db.ml b/lib/sql/db.ml
new file mode 100644
index 0000000..89431b1
--- /dev/null
+++ b/lib/sql/db.ml
@@ -0,0 +1,383 @@
+open StdLabels
+module CSV = ImportCSV
+module Syntax = ImportConf.Syntax
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+
+type 'a t = 'a T.t
+
+let ( let* ) res cont = Result.bind (T.to_result res) cont
+let ( let** ) res cont = Result.bind res cont
+let begin_transaction = T.begin_transaction
+let rollback = T.rollback
+let commit = T.commit
+let finalize = T.finalize
+let reset = T.reset
+let insert_header = Header.insert_header
+let query_headers = Header.query_headers
+
+let with_db : string -> (Sqlite3.db -> unit T.result) -> unit T.result =
+ fun filename f ->
+ let db = Sqlite3.db_open filename in
+
+ Match.register db;
+ Date.register db;
+ Year.register db;
+ Join.register db;
+ Math.register db;
+ Trim.register db;
+
+ (*let* _ = Sqlite3.exec db "PRAGMA foreign_keys = ON" |> to_result in*)
+ Sqlite3.( let& ) db f
+
+let create_table : 'a t -> ImportAnalyser.Dependency.t -> unit T.result =
+ fun db table ->
+ let source = ImportAnalyser.Dependency.table table in
+ let name = Table.name source in
+
+ let* _ =
+ Sqlite3.exec db
+ (String.concat ~sep:"" [ "DROP TABLE IF EXISTS '"; name; "'" ])
+ in
+
+ let query = ImportAnalyser.Query.create_table table in
+ let* _ = Sqlite3.exec db query in
+ match Header.create_table db with
+ | Ok () -> Hashs.create_table db
+ | e -> e
+
+let update_hash : 'a t -> ImportAnalyser.Dependency.t -> unit T.result =
+ fun db mapping ->
+ match Hashs.insert db mapping with
+ | Ok () -> Ok ()
+ | Error _ ->
+ let _ = Hashs.create_table db in
+ Hashs.insert db mapping
+
+let check_table_schema : 'a t -> ImportAnalyser.Dependency.t -> bool T.result =
+ fun db table ->
+ let source = ImportAnalyser.Dependency.table table in
+ let name = Table.name source in
+ let query =
+ String.concat ~sep:""
+ [ "SELECT sql FROM sqlite_schema WHERE name = '"; name; "'" ]
+ in
+ let stmt = Sqlite3.prepare db query in
+
+ let rc, result =
+ Sqlite3.fold stmt ~init:false ~f:(fun value row ->
+ if Array.length row <> 1 then value
+ else
+ match Sqlite3.Data.to_string (Array.get row 0) with
+ | Some s ->
+ let query = ImportAnalyser.Query.create_table table in
+ String.equal s query
+ | None -> value)
+ in
+ let* _ = rc in
+
+ (* The schema is the same, now check the hash in case the indexes changed *)
+ let rc_hash = Hashs.query db source in
+ match rc_hash with
+ | Ok (Some i) ->
+ let hash = Hashs.evaluate table in
+ begin
+ if i == hash then Ok result
+ else
+ let _ = update_hash db table in
+ Ok false
+ end
+ | _ ->
+ let _ = update_hash db table in
+ Ok result
+
+let prepare_insert :
+ Sqlite3.db -> ImportAnalyser.Dependency.t -> Sqlite3.stmt T.result =
+ fun db mapping ->
+ (* Get the list of columns from the table configuration *)
+ let columns =
+ ImportAnalyser.Dependency.columns mapping
+ |> ImportContainers.IntSet.elements
+ in
+ let table_name = Table.name (ImportAnalyser.Dependency.table mapping) in
+
+ let open Buffer in
+ let buff = create 20 and value_buff = create 10 and col_buff = create 10 in
+
+ add_string col_buff "'id',";
+
+ (* Add the key name if present *)
+ List.iter (ImportAnalyser.Dependency.keys mapping)
+ ~f:(fun { ImportAnalyser.Dependency.name; _ } ->
+ add_string col_buff "'key_";
+
+ add_string col_buff name;
+ add_string col_buff "',");
+
+ add_string value_buff ":id,";
+
+ (* Add the key settings if presents *)
+ List.iter (ImportAnalyser.Dependency.keys mapping) ~f:(fun key ->
+ ImportAnalyser.Query.build_key_insert value_buff key;
+ add_string value_buff ",");
+
+ List.iter columns ~f:(fun id ->
+ add_string col_buff "'col_";
+ add_string col_buff (string_of_int id);
+ add_string col_buff "',";
+
+ let col_name = ":col_" ^ string_of_int id in
+
+ add_string value_buff col_name;
+ add_string value_buff ",");
+
+ truncate col_buff (length col_buff - 1);
+ truncate value_buff (length value_buff - 1);
+ add_string buff "INSERT INTO '";
+ add_string buff table_name;
+ add_string buff "' (";
+ add_buffer buff col_buff;
+ add_string buff " ) VALUES (";
+ add_buffer buff value_buff;
+ add_string buff " )";
+
+ let query = contents buff in
+
+ try Ok (Sqlite3.prepare db query) with
+ | e ->
+ print_endline "Error during this query :";
+ print_endline query;
+ Error e
+
+let eval_key :
+ 'a t ->
+ Sqlite3.stmt option ->
+ ImportAnalyser.Dependency.key list ->
+ (int * CSV.DataType.t) list ->
+ (Sqlite3.stmt option * Sqlite3.Data.t list) T.result =
+ fun db stmt keys values ->
+ match keys with
+ | [] -> Ok (None, [])
+ | _ ->
+ let buffer = Buffer.create 16 in
+ Buffer.add_string buffer "SELECT ";
+ List.iter keys ~f:(fun key ->
+ ImportAnalyser.Query.build_key_insert buffer key;
+ Buffer.add_string buffer ",");
+
+ Buffer.truncate buffer (Buffer.length buffer - 1);
+ let query = Buffer.contents buffer in
+
+ let statement = Sqlite3.prepare_or_reset db (ref stmt) query in
+
+ (* Extract all the column id used in the keys.
+ *)
+ let keys_id =
+ List.fold_left keys ~init:ImportContainers.IntSet.empty
+ ~f:(fun acc (keys : ImportAnalyser.Dependency.key) ->
+ let columns = Lazy.force keys.ImportAnalyser.Dependency.columns in
+ ImportContainers.IntSet.union acc columns)
+ in
+
+ let** _ =
+ List.fold_left values ~init:(Ok 1) ~f:(fun idx (id, value) ->
+ let** idx = idx in
+
+ (* Ensure the column is required in the keys *)
+ match ImportContainers.IntSet.mem (1 + id) keys_id with
+ | false -> Ok (idx + 1)
+ | true ->
+ let sql_data = T.of_datatype value in
+
+ let col_name = ":col_" ^ string_of_int (1 + id) in
+ let* _ = Sqlite3.bind_name statement col_name sql_data in
+ Ok (idx + 1))
+ in
+
+ let result, evaluated_keys =
+ Sqlite3.fold statement ~init:[] ~f:(fun _ v -> Array.to_list v)
+ in
+ let* _ = result in
+ Ok (Some statement, evaluated_keys)
+
+let insert :
+ Sqlite3.db ->
+ Sqlite3.stmt ->
+ id:int ->
+ (int * CSV.DataType.t) list ->
+ unit T.result =
+ fun db statement ~id values ->
+ let** _ = T.savepoint db "PREVIOUS" in
+ let* _ =
+ Sqlite3.bind_name statement ":id" (Sqlite3.Data.INT (Int64.of_int id))
+ in
+ let** _ =
+ List.fold_left values ~init:(Ok 1) ~f:(fun idx (id, value) ->
+ let** idx = idx in
+ let sql_data = T.of_datatype value in
+
+ let col_name = ":col_" ^ string_of_int (1 + id) in
+ let* _ = Sqlite3.bind_name statement col_name sql_data in
+
+ Ok (idx + 1))
+ in
+
+ match T.to_result (Sqlite3.step statement) with
+ | Ok () -> T.release db "PREVIOUS"
+ | Error e ->
+ (* I intentionnaly ignore any error here, as we are already in an
+ error case *)
+ ignore (Sqlite3.exec db "ROLLBACK TRANSACTION TO SAVEPOINT PREVIOUS");
+ Error e
+
+(** This simple function convert a query generated by the application into a
+ statement executed with sqlite.
+
+ The function expect a perfect match between the query and the parameters. *)
+let execute_query :
+ Sqlite3.db -> ImportAnalyser.Query.query -> Sqlite3.stmt T.result =
+ fun db query ->
+ let statement =
+ try Sqlite3.prepare db query.q with
+ | e ->
+ print_endline "Error during this query :";
+ print_endline query.q;
+ raise e
+ in
+
+ (* Add the binded parameters *)
+ let values =
+ Seq.map (fun v -> T.of_datatype v) query.parameters |> List.of_seq
+ in
+
+ let* _ = Sqlite3.bind_values statement values in
+
+ Ok statement
+
+let query :
+ f:((Path.t ImportExpression.T.t * CSV.DataType.t) array -> unit) ->
+ Sqlite3.db ->
+ Syntax.t ->
+ unit T.result =
+ fun ~f db output ->
+ (* Extract the query from the configuration. *)
+ let** query_analysis =
+ match ImportAnalyser.Query.select output with
+ | exception e -> Error e
+ | other -> Ok other
+ in
+
+ let query, columns = query_analysis in
+ let** statement = execute_query db query in
+
+ let* _ =
+ Sqlite3.iter statement ~f:(fun data ->
+ let values =
+ Array.map2 data columns ~f:(fun value column ->
+ (column, T.to_datatype value))
+ in
+ f values)
+ in
+ Ok ()
+
+let create_view : Sqlite3.db -> Syntax.t -> unit T.result =
+ fun db output ->
+ ignore output;
+ let* drop = Sqlite3.exec db "DROP VIEW IF EXISTS 'result'" in
+ ignore drop;
+
+ Ok ()
+
+(*
+ let query, _ = ImportAnalyser.Query.select output in
+
+ let query =
+ { query with q = Printf.sprintf "CREATE VIEW result AS %s" query.q }
+ in
+ let** statement = execute_query db query in
+
+ Sqlite3.step statement |> T.to_result
+ *)
+
+let check_foreign :
+ f:((string * CSV.DataType.t) array -> unit) ->
+ Sqlite3.db ->
+ Syntax.t ->
+ Syntax.extern ->
+ unit T.result =
+ fun ~f db conf external_ ->
+ let query = ImportAnalyser.Query.check_external conf external_ in
+
+ let** statement = execute_query db query in
+ Sqlite3.iter statement ~f:(fun data ->
+ let values =
+ Array.mapi data ~f:(fun i value ->
+ (Sqlite3.column_name statement i, T.to_datatype value))
+ in
+ f values)
+ |> T.to_result
+
+let clear_duplicates :
+ f:((string * ImportCSV.DataType.t) array -> unit) ->
+ 'a t ->
+ ImportDataTypes.Table.t ->
+ ImportAnalyser.Dependency.key list ->
+ unit T.result =
+ fun ~f db table keys ->
+ let table_name = Table.name table in
+
+ List.fold_left keys ~init:(Ok ())
+ ~f:(fun state { ImportAnalyser.Dependency.name; _ } ->
+ let** _ = state in
+
+ let select_query =
+ String.concat ~sep:""
+ [
+ "SELECT '";
+ table_name;
+ "'.id, '";
+ table_name;
+ "'.'key_";
+ name;
+ "', '";
+ name;
+ "' FROM '";
+ table_name;
+ "' INNER JOIN (SELECT id, row_number() OVER(PARTITION BY '";
+ table_name;
+ "'.'key_";
+ name;
+ "' ORDER BY (id)) AS row_num from '";
+ table_name;
+ "') other_table WHERE other_table.row_num <> 1 and '";
+ table_name;
+ "'.id = other_table.id";
+ ]
+ in
+ let stmt = Sqlite3.prepare db select_query in
+
+ ignore
+ @@ Sqlite3.iter stmt ~f:(fun data ->
+ let values =
+ Array.mapi data ~f:(fun i value ->
+ (Sqlite3.column_name stmt i, T.to_datatype value))
+ in
+ f values);
+
+ let delete_query =
+ Printf.sprintf
+ {|UPDATE '%s'
+SET key_%s = NULL
+FROM
+(
+ SELECT id, row_number() OVER(PARTITION BY key_%s ORDER BY (id)) AS row_num
+ from '%s'
+) other_table
+WHERE other_table.row_num <> 1
+and '%s'.id = other_table.id|}
+ table_name name name table_name table_name
+ in
+
+ Sqlite3.exec db delete_query |> T.to_result)
+
+type 'a result = ('a, exn) Result.t
diff --git a/lib/sql/db.mli b/lib/sql/db.mli
new file mode 100644
index 0000000..465b159
--- /dev/null
+++ b/lib/sql/db.mli
@@ -0,0 +1,106 @@
+module Syntax = ImportConf.Syntax
+
+type 'a t
+type 'a result = ('a, exn) Result.t
+
+val with_db : string -> ('a t -> unit result) -> unit result
+
+val check_table_schema : 'a t -> ImportAnalyser.Dependency.t -> bool result
+(** Check if a table with the same structure already exists in the database.
+
+ This query allow to reuse the same data without reloading the file if
+ nothing changed. *)
+
+val create_table : 'a t -> ImportAnalyser.Dependency.t -> unit result
+(** [create_table db name] will create a new table in the
+ db with the given name, and the columns from the configuration (see
+ [ImportAnalyser.Query.create_table])
+
+ Any previous table with the same name will be deleted. *)
+
+val prepare_insert : 'a t -> ImportAnalyser.Dependency.t -> Sqlite3.stmt result
+(** Create a statement to use in an insert.
+ [prepare_insert db table] will prepare a statement for inserting
+ the columns at the given index. *)
+
+val finalize : Sqlite3.stmt -> unit result
+(** Finalize the statement. The function shall be called once each insert are
+ done, or after an error in the insert. *)
+
+val reset : Sqlite3.stmt -> unit result
+
+val eval_key :
+ 'a t ->
+ Sqlite3.stmt option ->
+ ImportAnalyser.Dependency.key list ->
+ (int * ImportCSV.DataType.t) list ->
+ (Sqlite3.stmt option * Sqlite3.Data.t list) result
+(** Evaluate the keys in sqlite and get the results.
+
+ The function is intended to check if the values are null before inserting
+ them in a batch *)
+
+val insert :
+ 'a t ->
+ Sqlite3.stmt ->
+ id:int ->
+ (int * ImportCSV.DataType.t) list ->
+ unit result
+(** Insert a new row in the database.
+
+ [insert db ~id statement values] will add a new row in the given table with
+ [id]. The columns are identified with their index number (there is a
+ difference by one with the column number)
+
+ Thanks to SQLite Flexible Typing (https://www.sqlite.org/flextypegood.html)
+ each column can contain values typed differently which is how the spreadsheet
+ also works.
+
+ This function is expected to be run inside a transaction. *)
+
+val begin_transaction : 'a t -> unit result
+val commit : 'a t -> unit result
+val rollback : 'a t -> unit result
+
+val query :
+ f:
+ ((ImportDataTypes.Path.t ImportExpression.T.t * ImportCSV.DataType.t) array ->
+ unit) ->
+ 'a t ->
+ Syntax.t ->
+ unit result
+(** This one the most important function from the application. The function
+ will transform the configuration into an sql query and will fetch the
+ result from the sqlite engine.
+
+ The function [f] given in argument will be called for each line
+
+ *)
+
+val create_view : 'a t -> Syntax.t -> unit result
+(** Create a view which represent the result *)
+
+val check_foreign :
+ f:((string * ImportCSV.DataType.t) array -> unit) ->
+ 'a t ->
+ Syntax.t ->
+ Syntax.extern ->
+ unit result
+
+val clear_duplicates :
+ f:((string * ImportCSV.DataType.t) array -> unit) ->
+ 'a t ->
+ ImportDataTypes.Table.t ->
+ ImportAnalyser.Dependency.key list ->
+ unit result
+(** Remove all duplicated keys in the table by setting them to NULL. *)
+
+val insert_header :
+ 'a t ->
+ ImportDataTypes.Table.t ->
+ (int * ImportCSV.DataType.t) array ->
+ unit T.result
+
+val query_headers :
+ 'a t -> ImportDataTypes.Table.t -> ImportCSV.DataType.t array T.result
+(** Get all the headers from the database (used or not) *)
diff --git a/lib/sql/dune b/lib/sql/dune
new file mode 100644
index 0000000..9f9f205
--- /dev/null
+++ b/lib/sql/dune
@@ -0,0 +1,15 @@
+(library
+ (name importSQL)
+ (libraries
+ re
+ sqlite3
+ calendar
+ importAnalyser
+ importCSV
+ importConf
+ importContainers
+ importDataTypes
+ importErrors
+ importExpression
+ )
+)
diff --git a/lib/sql/hashs.ml b/lib/sql/hashs.ml
new file mode 100644
index 0000000..af1f092
--- /dev/null
+++ b/lib/sql/hashs.ml
@@ -0,0 +1,79 @@
+(**
+ This module store the hash of the indexes ([extern_key]) for each table in
+ order to update the file if the configuration changed.
+
+ The hashes are stored in a table named [hashes] and are evaluated just
+ before inserting the values.
+*)
+
+open StdLabels
+module Table = ImportDataTypes.Table
+
+let ( let* ) = Result.bind
+
+let create_table : 'a T.t -> unit T.result =
+ fun db ->
+ Sqlite3.exec db
+ "CREATE TABLE IF NOT EXISTS 'hashes' ('table' TEXT, 'hash' INTEGER, \
+ PRIMARY KEY ('table'))"
+ |> T.to_result
+
+let evaluate : ImportAnalyser.Dependency.t -> int =
+ fun table ->
+ (* Extract all the references to this table *)
+ let keys =
+ List.map (ImportAnalyser.Dependency.keys table)
+ ~f:(fun ImportAnalyser.Dependency.{ name; columns; expression } ->
+ ignore columns;
+ (name, expression))
+ in
+ Hashtbl.hash keys
+
+let insert : 'a T.t -> ImportAnalyser.Dependency.t -> unit T.result =
+ fun db table ->
+ let source = ImportAnalyser.Dependency.table table in
+ let table_name = Table.name source in
+
+ let hash = evaluate table in
+
+ let query =
+ String.concat ~sep:""
+ [
+ "INSERT INTO 'hashes' ('table', 'hash') VALUES ('";
+ table_name;
+ "', :hash) ON CONFLICT(hashes.'table') DO UPDATE SET 'hash' = :hash";
+ ]
+ in
+ let* statement =
+ try Ok (Sqlite3.prepare db query) with
+ | e -> Error e
+ in
+
+ let* _ = T.begin_transaction db in
+ let sql_data = Sqlite3.Data.INT (Int64.of_int hash) in
+
+ let* _ = Sqlite3.bind_name statement ":hash" sql_data |> T.to_result in
+ let* _ = T.to_result (Sqlite3.step statement) in
+ T.commit db
+
+let query : 'a T.t -> ImportDataTypes.Table.t -> int option T.result =
+ fun db table ->
+ let table_name = Table.name table in
+ let query =
+ String.concat ~sep:""
+ [ "SELECT hash FROM 'hashes' WHERE hashes.'table' = '"; table_name; "'" ]
+ in
+
+ let* stmt =
+ try Ok (Sqlite3.prepare db query) with
+ | e -> Error e
+ in
+ let state, res =
+ Sqlite3.fold stmt ~init:None ~f:(fun _ d ->
+ Some (T.to_datatype (Array.get d 0)))
+ in
+
+ let* _ = T.to_result state in
+ match res with
+ | Some (ImportCSV.DataType.Integer i) -> Ok (Some i)
+ | _ -> Ok None
diff --git a/lib/sql/header.ml b/lib/sql/header.ml
new file mode 100644
index 0000000..3cac5fb
--- /dev/null
+++ b/lib/sql/header.ml
@@ -0,0 +1,74 @@
+open StdLabels
+module Table = ImportDataTypes.Table
+
+let ( let* ) = Result.bind
+
+let create_table : 'a T.t -> unit T.result =
+ fun db ->
+ Sqlite3.exec db
+ "CREATE TABLE IF NOT EXISTS 'header' ('table' TEXT, 'column' INTEGER, \
+ 'label', PRIMARY KEY ('table', 'column'))"
+ |> T.to_result
+
+let insert_header :
+ 'a T.t ->
+ ImportDataTypes.Table.t ->
+ (int * ImportCSV.DataType.t) array ->
+ unit T.result =
+ fun db table values ->
+ let table_name = Table.name table in
+
+ let query =
+ String.concat ~sep:""
+ [
+ "INSERT INTO 'header' ('table', 'column', 'label') VALUES ('";
+ table_name;
+ "', :column, :label) ON CONFLICT(header.'table', header.'column') DO \
+ UPDATE SET 'label' = :label";
+ ]
+ in
+
+ let statement = Sqlite3.prepare db query in
+
+ let* _ = T.begin_transaction db in
+ let* _ =
+ Array.fold_left values ~init:(Ok ()) ~f:(fun acc (column, value) ->
+ let* _ = acc in
+ let sql_data = T.of_datatype value in
+ let* _ = Sqlite3.bind_name statement ":label" sql_data |> T.to_result in
+
+ let* _ =
+ Sqlite3.bind_name statement ":column"
+ (Sqlite3.Data.INT (Int64.of_int column))
+ |> T.to_result
+ in
+ let* _ = T.to_result (Sqlite3.step statement) in
+ T.reset statement)
+ in
+ T.commit db
+
+let query_headers :
+ 'a T.t -> ImportDataTypes.Table.t -> ImportCSV.DataType.t array T.result =
+ fun db table ->
+ let table_name = Table.name table in
+ let query =
+ String.concat ~sep:""
+ [
+ "SELECT label FROM 'header' WHERE header.'table' = '";
+ table_name;
+ "' ORDER BY column DESC";
+ ]
+ in
+
+ let* stmt =
+ try Ok (Sqlite3.prepare db query) with
+ | e -> Error e
+ in
+ let state, res =
+ Sqlite3.fold stmt ~init:[] ~f:(fun acc d ->
+ let value = T.to_datatype (Array.get d 0) in
+ value :: acc)
+ in
+
+ let* _ = T.to_result state in
+ Ok (Array.of_list res)
diff --git a/lib/sql/join.ml b/lib/sql/join.ml
new file mode 100644
index 0000000..3f82b92
--- /dev/null
+++ b/lib/sql/join.ml
@@ -0,0 +1,30 @@
+module D = Sqlite3.Data
+
+let f : Sqlite3.Data.t array -> Sqlite3.Data.t =
+ fun arguments ->
+ if Array.length arguments < 2 then Sqlite3.Data.NULL
+ else
+ let sep = Array.get arguments 0 in
+
+ (* Shift all the elements into an list*)
+ let contents =
+ Array.to_seqi arguments
+ |> Seq.filter_map (fun (i, value) ->
+ if i = 0 then None
+ else
+ match value with
+ | D.INT i -> Some (Int64.to_string i)
+ | D.FLOAT f -> Some (Float.to_string f)
+ | D.NONE -> None
+ | D.NULL -> None
+ | D.TEXT s | D.BLOB s ->
+ if String.length s = 0 then None else Some (String.trim s))
+ |> List.of_seq
+ in
+
+ D.TEXT (String.concat (D.to_string_coerce sep) contents)
+
+let register : Sqlite3.db -> unit =
+ fun db ->
+ Sqlite3.create_funN db "join" f;
+ Sqlite3.create_funN db "concat" f
diff --git a/lib/sql/match.ml b/lib/sql/match.ml
new file mode 100644
index 0000000..82fc1da
--- /dev/null
+++ b/lib/sql/match.ml
@@ -0,0 +1,22 @@
+let f : Sqlite3.Data.t -> Sqlite3.Data.t -> Sqlite3.Data.t =
+ let memo = Hashtbl.create 16 in
+ fun str data_1 ->
+ match (str, data_1) with
+ | Sqlite3.Data.TEXT s, Sqlite3.Data.TEXT content -> (
+ let regex =
+ match Hashtbl.find_opt memo s with
+ | None ->
+ let regex = Re.Posix.compile_pat s in
+ Hashtbl.add memo s regex;
+ regex
+ | Some v -> v
+ in
+
+ match Re.exec_opt regex content with
+ | None -> Sqlite3.Data.NULL
+ | Some g ->
+ let matched = Re.Group.get g 1 in
+ Sqlite3.Data.TEXT matched)
+ | _, _ -> data_1
+
+let register : Sqlite3.db -> unit = fun db -> Sqlite3.create_fun2 db "match" f
diff --git a/lib/sql/math.ml b/lib/sql/math.ml
new file mode 100644
index 0000000..576d9f6
--- /dev/null
+++ b/lib/sql/math.ml
@@ -0,0 +1,20 @@
+(** Math functions *)
+
+let int : Sqlite3.Data.t -> Sqlite3.Data.t =
+ fun data ->
+ match data with
+ (* If the data is already an int, do not change it *)
+ | Sqlite3.Data.INT _ -> data
+ | Sqlite3.Data.FLOAT content -> Sqlite3.Data.INT (Int64.of_float content)
+ | Sqlite3.Data.BLOB content | Sqlite3.Data.TEXT content -> begin
+ match Int64.of_string_opt content with
+ | Some i -> Sqlite3.Data.INT i
+ | None -> begin
+ match Float.of_string_opt content with
+ | Some f -> Sqlite3.Data.INT (Int64.of_float f)
+ | None -> Sqlite3.Data.NULL
+ end
+ end
+ | _ -> Sqlite3.Data.NULL
+
+let register : Sqlite3.db -> unit = fun db -> Sqlite3.create_fun1 db "int" int
diff --git a/lib/sql/t.ml b/lib/sql/t.ml
new file mode 100644
index 0000000..202c535
--- /dev/null
+++ b/lib/sql/t.ml
@@ -0,0 +1,52 @@
+exception SqlError of Sqlite3.Rc.t
+
+type 'a t = Sqlite3.db
+type 'a result = ('a, exn) Result.t
+
+let to_result : Sqlite3.Rc.t -> unit result = function
+ | Sqlite3.Rc.OK | Sqlite3.Rc.DONE -> Ok ()
+ | res -> Error (SqlError res)
+
+let begin_transaction : Sqlite3.db -> unit result =
+ fun db ->
+ let query = "BEGIN" in
+ Sqlite3.exec db query |> to_result
+
+let commit : Sqlite3.db -> unit result =
+ fun db ->
+ let query = "COMMIT" in
+ Sqlite3.exec db query |> to_result
+
+let rollback : Sqlite3.db -> unit result =
+ fun db ->
+ let query = "ROLLBACK" in
+ Sqlite3.exec db query |> to_result
+
+let savepoint : Sqlite3.db -> string -> unit result =
+ fun db name ->
+ let query = "SAVEPOINT " ^ name in
+ Sqlite3.exec db query |> to_result
+
+let release : Sqlite3.db -> string -> unit result =
+ fun db name ->
+ let query = "RELEASE SAVEPOINT " ^ name in
+ Sqlite3.exec db query |> to_result
+
+let finalize : Sqlite3.stmt -> unit result =
+ fun statement -> to_result (Sqlite3.finalize statement)
+
+let reset : Sqlite3.stmt -> unit result =
+ fun statement -> to_result (Sqlite3.reset statement)
+
+let of_datatype = function
+ | ImportCSV.DataType.Float f -> Sqlite3.Data.FLOAT f
+ | ImportCSV.DataType.Integer i -> Sqlite3.Data.INT (Int64.of_int i)
+ | ImportCSV.DataType.Null -> Sqlite3.Data.NULL
+ | ImportCSV.DataType.Error _ -> Sqlite3.Data.NULL
+ | ImportCSV.DataType.Content s -> Sqlite3.Data.TEXT s
+
+let to_datatype : Sqlite3.Data.t -> ImportCSV.DataType.t = function
+ | Sqlite3.Data.NONE | Sqlite3.Data.NULL -> ImportCSV.DataType.Null
+ | Sqlite3.Data.INT i -> ImportCSV.DataType.Integer (Int64.to_int i)
+ | Sqlite3.Data.FLOAT f -> ImportCSV.DataType.Float f
+ | Sqlite3.Data.TEXT t | Sqlite3.Data.BLOB t -> ImportCSV.DataType.Content t
diff --git a/lib/sql/trim.ml b/lib/sql/trim.ml
new file mode 100644
index 0000000..4e4bcf4
--- /dev/null
+++ b/lib/sql/trim.ml
@@ -0,0 +1,9 @@
+(* Override the trim function with another which also remove the retchar *)
+
+let f : Sqlite3.Data.t -> Sqlite3.Data.t =
+ fun data ->
+ match data with
+ | Sqlite3.Data.TEXT content -> Sqlite3.Data.TEXT (String.trim content)
+ | _ -> data
+
+let register : Sqlite3.db -> unit = fun db -> Sqlite3.create_fun1 db "trim" f
diff --git a/lib/sql/year.ml b/lib/sql/year.ml
new file mode 100644
index 0000000..1e9c599
--- /dev/null
+++ b/lib/sql/year.ml
@@ -0,0 +1,19 @@
+(** Parse a text value into a date *)
+
+open CalendarLib
+
+let first_day = CalendarLib.Date.make 1899 12 30
+
+let f : Sqlite3.Data.t -> Sqlite3.Data.t =
+ fun data ->
+ match data with
+ | Sqlite3.Data.INT content ->
+ let nb = Int64.to_int content in
+ let date = Date.add first_day (Date.Period.day nb) in
+ let year = CalendarLib.Date.year date in
+
+ Sqlite3.Data.INT (Int64.of_int year)
+ | _ -> data
+
+
+let register : Sqlite3.db -> unit = fun db -> Sqlite3.create_fun1 db "year" f
diff --git a/lib/tools/dune b/lib/tools/dune
new file mode 100644
index 0000000..29ee744
--- /dev/null
+++ b/lib/tools/dune
@@ -0,0 +1,10 @@
+(library
+ (name tools))
+
+(rule
+ (target git_hash.ml)
+ (deps git_head.sh (universe))
+ (action
+ (with-stdout-to
+ %{target}
+ (bash "./git_head.sh"))))
diff --git a/lib/tools/git_head.sh b/lib/tools/git_head.sh
new file mode 100755
index 0000000..bfa738a
--- /dev/null
+++ b/lib/tools/git_head.sh
@@ -0,0 +1,11 @@
+#! /bin/sh
+# Include the git hash in an OCaml file.
+
+git diff-index --quiet HEAD --
+if [ $? -ne 0 ]; then
+ revision=": untracked"
+else
+ revision=$(git rev-parse --short HEAD)
+fi
+compile_date=$(date +%Y/%m/%d)
+echo "let revision = \"${revision} - compiled on ${compile_date}\""