aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/analysers/chunk.ml87
-rw-r--r--lib/analysers/chunk.mli11
-rw-r--r--lib/analysers/dependency.ml66
-rw-r--r--lib/analysers/dependency.mli5
-rw-r--r--lib/analysers/filters.ml19
-rw-r--r--lib/analysers/query.ml83
-rw-r--r--lib/configuration/expression_parser.mly33
-rw-r--r--lib/configuration/importConf.ml14
-rw-r--r--lib/configuration/importConf.mli10
-rw-r--r--lib/configuration/read_conf.ml187
-rwxr-xr-xlib/file_handler/dune1
-rw-r--r--lib/file_handler/state.ml4
-rw-r--r--lib/helpers/toml.ml113
-rw-r--r--lib/sql/db.ml155
-rw-r--r--lib/sql/db.mli6
-rw-r--r--lib/sql/hashs.ml14
-rw-r--r--lib/syntax/importerSyntax.ml27
-rw-r--r--lib/syntax/importerSyntax.mli2
18 files changed, 586 insertions, 251 deletions
diff --git a/lib/analysers/chunk.ml b/lib/analysers/chunk.ml
index cefa6d8..b09f311 100644
--- a/lib/analysers/chunk.ml
+++ b/lib/analysers/chunk.ml
@@ -43,9 +43,24 @@ let add_parameters : t -> ImportDataTypes.Value.t Seq.t -> unit =
fun t p -> Queue.add_seq t.parameters p
module Table = ImportDataTypes.Table
-module Q = ImportExpression.Query
open StdLabels
+let add_expression :
+ repr:(Format.formatter -> 'a -> unit) ->
+ t ->
+ 'a ImportExpression.T.t ->
+ unit =
+ fun ~repr group expression ->
+ let formatter = Format.formatter_of_buffer group.b in
+ Format.pp_print_char formatter '(';
+ let queue =
+ ImportExpression.Query.query_of_expression ImportExpression.Query.BindParam
+ formatter repr expression
+ in
+ Format.pp_print_char formatter ')';
+ Format.pp_print_flush formatter ();
+ add_parameters group (Queue.to_seq queue)
+
(** Extract the informations from the dependancies. We get two informations here
:
@@ -57,20 +72,34 @@ let join_external :
fun ~conf ~join_buffer external_ ->
let extern_table = Table.name external_.target in
- let formatter = Format.formatter_of_buffer join_buffer.b in
- Format.fprintf formatter "\nLEFT JOIN '%s' AS '%s' ON %t = %s" extern_table
- external_.target.name
- (Printers.prepare_key ~f:(fun f ->
- let q =
- Q.query_of_expression Q.BindParam f (Printers.path ~conf)
- external_.intern_key
- in
-
- add_parameters join_buffer (Queue.to_seq q)))
- (Table.print_column external_.ImporterSyntax.Extern.target
- ("key_" ^ external_.ImporterSyntax.Extern.target.name));
-
- Format.pp_print_flush formatter ()
+ add_string join_buffer "\nLEFT JOIN '";
+ add_string join_buffer extern_table;
+ add_string join_buffer "' AS '";
+ add_string join_buffer external_.target.name;
+ add_string join_buffer "' ON ";
+ add_string join_buffer
+ (Format.asprintf "%t = %s"
+ (Printers.prepare_key ~f:(fun f ->
+ let q =
+ ImportExpression.Query.query_of_expression
+ ImportExpression.Query.BindParam f (Printers.path ~conf)
+ external_.intern_key
+ in
+
+ add_parameters join_buffer (Queue.to_seq q)))
+ (Table.print_column external_.ImporterSyntax.Extern.target
+ ("key_" ^ external_.ImporterSyntax.Extern.target.name)));
+
+ (* Add the filters given for this external in the query *)
+ let table = external_.ImporterSyntax.Extern.target
+ and filters = external_.ImporterSyntax.Extern.filters in
+ List.iter filters ~f:(fun f ->
+ add_string join_buffer " AND ";
+ add_expression
+ ~repr:(fun formatter column ->
+ Format.fprintf formatter "%s"
+ (Table.print_column table ("col_" ^ string_of_int column)))
+ join_buffer f)
(** Create the from part of the query, adding all the required externals (even
when not required)
@@ -90,16 +119,26 @@ let create_from_statement_of_chunck :
(* Add the externals in the query *)
List.iter externals ~f:(join_external ~conf ~join_buffer:c)
-let add_expression :
- conf:ImporterSyntax.t ->
+(** Add a list of expressions into the group *)
+let add_expressions :
+ repr:(Format.formatter -> 'a -> unit) ->
+ sep:string ->
t ->
- ImportDataTypes.Path.t ImportExpression.T.t ->
+ 'a ImportExpression.T.t list ->
unit =
- fun ~conf group expression ->
+ fun ~repr ~sep group exppressions ->
let formatter = Format.formatter_of_buffer group.b in
- let queue =
- ImportExpression.Query.query_of_expression ImportExpression.Query.BindParam
- formatter (Printers.path ~conf) expression
+ let () =
+ Format.pp_print_list
+ ~pp_sep:(fun f () -> Format.pp_print_string f sep)
+ (fun formatter column ->
+ Format.pp_print_char formatter '(';
+ let seq =
+ ImportExpression.Query.query_of_expression
+ ImportExpression.Query.BindParam formatter repr column
+ in
+ Format.pp_print_char formatter ')';
+ Queue.transfer seq group.parameters)
+ formatter exppressions
in
- Format.pp_print_flush formatter ();
- add_parameters group (Queue.to_seq queue)
+ Format.pp_print_flush formatter ()
diff --git a/lib/analysers/chunk.mli b/lib/analysers/chunk.mli
index 13a748a..ad9ca00 100644
--- a/lib/analysers/chunk.mli
+++ b/lib/analysers/chunk.mli
@@ -27,8 +27,13 @@ val create_from_statement_of_chunck :
the select clause. *)
val add_expression :
- conf:ImporterSyntax.t ->
+ repr:(Format.formatter -> 'a -> unit) -> t -> 'a ImportExpression.T.t -> unit
+(** Add an expression into an existing chunck *)
+
+val add_expressions :
+ repr:(Format.formatter -> 'a -> unit) ->
+ sep:string ->
t ->
- ImportDataTypes.Path.t ImportExpression.T.t ->
+ 'a ImportExpression.T.t list ->
unit
-(** Add an expression into an existing chunck *)
+(** Add a list of expressions into an existing chunk *)
diff --git a/lib/analysers/dependency.ml b/lib/analysers/dependency.ml
index 38bc23c..8c969fe 100644
--- a/lib/analysers/dependency.ml
+++ b/lib/analysers/dependency.ml
@@ -1,7 +1,5 @@
open StdLabels
module IntSet = ImportContainers.IntSet
-module Table = ImportDataTypes.Table
-module Path = ImportDataTypes.Path
module Expression = ImportExpression.T
(*
@@ -19,13 +17,14 @@ type deps = (ImportContainers.Source.t * ImportContainers.Source.t list) list
type key = {
name : string;
- expression : Path.column Expression.t;
+ expression : ImportDataTypes.Path.column Expression.t;
columns : ImportContainers.IntSet.t Lazy.t;
+ filters : ImportDataTypes.Path.column ImportExpression.T.t list;
}
[@@deriving show, eq]
type t = {
- table : Table.t;
+ table : ImportDataTypes.Table.t;
columns : IntSet.t;
keys : key list;
}
@@ -47,8 +46,8 @@ type build_map = t ImportContainers.Externals.t
- [of_path] is need to extract the qualified source from any kind of path.
*)
type 'a expression_extractor = {
- to_mapping : t -> Path.column -> t;
- of_path : 'a -> string option * Path.column;
+ to_mapping : t -> ImportDataTypes.Path.column -> t;
+ of_path : 'a -> string option * ImportDataTypes.Path.column;
}
(** [add_path_in_map f parent path ] Extract the column from element [path] and
@@ -112,9 +111,15 @@ let add_columns_in_map :
This function is called for each path declared inside the expression. *)
let add_dependancies :
- conf:ImporterSyntax.t -> ImporterSyntax.Extern.t -> deps -> Path.t -> deps =
+ conf:ImporterSyntax.t ->
+ ImporterSyntax.Extern.t ->
+ deps ->
+ ImportDataTypes.Path.t ->
+ deps =
fun ~conf extern graph path ->
- let source_table = ImporterSyntax.get_table_for_name conf path.Path.alias in
+ let source_table =
+ ImporterSyntax.get_table_for_name conf path.ImportDataTypes.Path.alias
+ in
let source = ImportContainers.Source.from_table source_table in
let target = ImportContainers.Source.from_table extern.target in
@@ -134,7 +139,10 @@ let add_external_in_map :
let _ =
Expression.fold_values extern.intern_key ~init:() ~f:(fun () path ->
try
- let _ = ImporterSyntax.get_table_for_name conf path.Path.alias in
+ let _ =
+ ImporterSyntax.get_table_for_name conf
+ path.ImportDataTypes.Path.alias
+ in
()
with
| Not_found -> (
@@ -147,16 +155,19 @@ let add_external_in_map :
raise (ImportErrors.Unknown_source root.name)))
in
+ let columns () =
+ let f = fun acc k -> ImportContainers.IntSet.add k acc in
+ Expression.fold_values extern.extern_key ~f
+ ~init:ImportContainers.IntSet.empty
+ in
+
(* Create the new key with all the expression and all the columns inside it *)
let new_key =
{
- name = extern.target.Table.name;
+ name = extern.target.ImportDataTypes.Table.name;
expression = extern.extern_key;
- columns =
- lazy
- (Expression.fold_values extern.extern_key
- ~f:(fun acc k -> ImportContainers.IntSet.add k acc)
- ~init:ImportContainers.IntSet.empty);
+ columns = Lazy.from_fun columns;
+ filters = extern.filters;
}
in
let build_map =
@@ -182,7 +193,7 @@ let add_external_in_map :
~f:
{
of_path =
- (fun Path.{ alias; column } ->
+ (fun ImportDataTypes.Path.{ alias; column } ->
let table = ImporterSyntax.get_table_for_name conf alias in
(Some table.name, column));
to_mapping =
@@ -197,7 +208,8 @@ let mapper =
to_mapping =
(fun mapping column ->
{ mapping with columns = IntSet.add column mapping.columns });
- of_path = (fun ({ alias; column } : Path.t) -> (alias, column));
+ of_path =
+ (fun ({ alias; column } : ImportDataTypes.Path.t) -> (alias, column));
}
let get_mapping : ImporterSyntax.t -> build_map * deps =
@@ -221,7 +233,24 @@ let get_mapping : ImporterSyntax.t -> build_map * deps =
in
let map, graph =
List.fold_left conf.externals ~init ~f:(fun map extern ->
- add_external_in_map ~conf extern map)
+ let map, graph = add_external_in_map ~conf extern map in
+
+ (* Also add the filters in the externals. The column are not defined as
+ a full path, with table and column, but only with a column. We need
+ to transform them to make them present as the same as the others *)
+ let table =
+ ImporterSyntax.get_table_for_name conf (Some extern.target.name)
+ in
+ let path_filters =
+ List.map extern.filters ~f:(fun expression ->
+ Expression.map
+ ~f:(fun column ->
+ ImportDataTypes.Path.{ alias = Some table.name; column })
+ expression)
+ in
+ let map = add_columns_in_map ~conf ~f:mapper path_filters map in
+
+ (map, graph))
in
(* Now we don’t bother anymore with the graph and it’s dependency, we just
@@ -233,6 +262,7 @@ let get_mapping : ImporterSyntax.t -> build_map * deps =
|> add_columns_in_map ~conf ~f:mapper conf.filters
|> add_columns_in_map ~conf ~f:mapper conf.uniq
in
+
(map, graph)
let get_process_order : ImporterSyntax.t -> t list =
diff --git a/lib/analysers/dependency.mli b/lib/analysers/dependency.mli
index 522436c..1eb55c5 100644
--- a/lib/analysers/dependency.mli
+++ b/lib/analysers/dependency.mli
@@ -29,7 +29,10 @@ type key = {
(** The list of columns used in the key. All the columns are referenced in
the expression. We can have many columns used inside a single key when
a function is used (for example for joining multiple columns into a
- single key) *)
+ single key).
+
+ The columns used in the filter are also declared as well. *)
+ filters : ImportDataTypes.Path.column ImportExpression.T.t list;
}
[@@deriving show, eq]
(** This type describe the join key in a table. The name is the refering table
diff --git a/lib/analysers/filters.ml b/lib/analysers/filters.ml
index 7044798..15e8cda 100644
--- a/lib/analysers/filters.ml
+++ b/lib/analysers/filters.ml
@@ -4,17 +4,6 @@ module Path = ImportDataTypes.Path
module Expression = ImportExpression
open StdLabels
-(** Add a list of expressions into the group *)
-let rec add_filters :
- conf:ImporterSyntax.t -> Chunk.t -> Path.t Expression.T.t list -> unit =
- fun ~conf group -> function
- | [] -> ()
- | hd :: [] -> Chunk.add_expression ~conf group hd
- | hd :: tl ->
- Chunk.add_expression ~conf group hd;
- Chunk.add_string group "\nAND ";
- add_filters ~conf group tl
-
type 'a cte_acc = {
n : int;
has_previous : bool;
@@ -57,7 +46,7 @@ let print :
Chunk.add_string query "SELECT ";
Chunk.add_string query conf.source.name;
Chunk.add_string query ".id, ";
- Chunk.add_expression ~conf query expression;
+ Chunk.add_expression ~repr:(Printers.path ~conf) query expression;
Chunk.add_string query " AS group_function";
Chunk.create_from_statement_of_chunck conf query;
@@ -75,7 +64,8 @@ let print :
| [] -> ()
| _ ->
Chunk.add_string query " WHERE ";
- add_filters ~conf query cte.ImporterSyntax.CTE.filters
+ Chunk.add_expressions ~sep:"\nAND " ~repr:(Printers.path ~conf)
+ query cte.ImporterSyntax.CTE.filters
end;
Chunk.add_string query ")\n";
Some acc.n
@@ -83,7 +73,8 @@ let print :
(* Do not add the filters in the CTE (we don’t have any) but in the main
query *)
Chunk.add_string predicates "WHERE ";
- add_filters ~conf predicates cte.ImporterSyntax.CTE.filters;
+ Chunk.add_expressions ~sep:"\nAND " ~repr:(Printers.path ~conf)
+ predicates cte.ImporterSyntax.CTE.filters;
acc.cte_index
in
{
diff --git a/lib/analysers/query.ml b/lib/analysers/query.ml
index f89f5f0..dac4d89 100644
--- a/lib/analysers/query.ml
+++ b/lib/analysers/query.ml
@@ -135,41 +135,24 @@ let select : ImporterSyntax.t -> query * Path.t ImportExpression.T.t array =
let () = Chunk.create_from_statement_of_chunck conf request_header in
Chunk.append ~head:request_header ~tail:filters;
- let formatter = Format.formatter_of_buffer b in
(match conf.ImporterSyntax.uniq with
| [] -> ()
| uniq ->
- Format.fprintf formatter "\nGROUP BY %a"
- (Format.pp_print_list
- ~pp_sep:(fun f () -> Format.fprintf f ", ")
- (fun formatter column ->
- let seq =
- Q.query_of_expression Q.BindParam formatter (Printers.path ~conf)
- column
- in
- Queue.transfer seq parameters))
+ Chunk.add_string request_header "\nGROUP BY ";
+ Chunk.add_expressions ~repr:(Printers.path ~conf) ~sep:", " request_header
uniq);
(match conf.ImporterSyntax.sort with
| [] -> ()
| sort ->
- Format.fprintf formatter "\nORDER BY %a"
- (Format.pp_print_list
- ~pp_sep:(fun f () -> Format.fprintf f ", ")
- (fun formatter column ->
- let seq =
- Q.query_of_expression Q.BindParam formatter (Printers.path ~conf)
- column
- in
- Queue.transfer seq parameters))
+ Chunk.add_string request_header "\nORDER BY ";
+ Chunk.add_expressions ~repr:(Printers.path ~conf) ~sep:", " request_header
sort);
- Format.pp_print_flush formatter ();
-
({ q = Buffer.contents b; parameters = Queue.to_seq parameters }, headers)
let check_external : ImporterSyntax.t -> ImporterSyntax.Extern.t -> query =
fun conf external_ ->
let internal_chunk = Chunk.create () in
- Chunk.add_expression ~conf internal_chunk
+ Chunk.add_expression ~repr:(Printers.path ~conf) internal_chunk
external_.ImporterSyntax.Extern.intern_key;
let external_key_buffer = Buffer.create 16 in
@@ -180,6 +163,7 @@ let check_external : ImporterSyntax.t -> ImporterSyntax.Extern.t -> query =
let pointed_tables = pointed_tables conf external_.intern_key in
let parameters = Queue.create () in
+
(* We do a copy before the transfert because the Queue is reused later in the
query *)
Queue.transfer (Queue.copy internal_chunk.parameters) parameters;
@@ -195,44 +179,47 @@ let check_external : ImporterSyntax.t -> ImporterSyntax.Extern.t -> query =
ImporterSyntax.Extern.t list ->
ImporterSyntax.Extern.t list =
fun table init ->
- let res =
- (* Do not add the same external if the value is already present *)
- let init =
- match List.find_opt init ~f:(fun ext -> table == ext) with
- | None -> table :: init
- | Some _ -> init
- in
-
- Expression.T.fold_values ~init table.ImporterSyntax.Extern.intern_key
- ~f:(fun acc expr ->
- match expr.Path.alias with
- | None -> acc
- | Some _ as path -> (
- let table = ImporterSyntax.get_table_for_name conf path in
- (* Look for this table in the externals *)
- let external_opt =
- List.find_opt conf.ImporterSyntax.externals ~f:(fun t ->
- t.ImporterSyntax.Extern.target == table)
- in
- match external_opt with
- | None -> acc
- | Some ext -> collect_links ext acc))
+ (* Do not add the same external if the value is already present *)
+ let init =
+ match
+ List.find_opt init ~f:(fun ext -> ImporterSyntax.Extern.equal table ext)
+ with
+ | None -> table :: init
+ | Some _ -> init
in
- res
+
+ Expression.T.fold_values ~init table.ImporterSyntax.Extern.intern_key
+ ~f:(fun acc expr ->
+ match expr.Path.alias with
+ | None -> acc
+ | Some _ as path -> (
+ let table = ImporterSyntax.get_table_for_name conf path in
+ (* Look for this table in the externals *)
+ let external_opt =
+ List.find_opt conf.ImporterSyntax.externals ~f:(fun t ->
+ t.ImporterSyntax.Extern.target == table)
+ in
+ match external_opt with
+ | None -> acc
+ | Some ext -> collect_links ext acc))
in
let dependencies = collect_links external_ [] in
let join_content = Buffer.contents external_key_buffer in
let request = Chunk.create () in
+
Chunk.add_string request "SELECT ";
+
+ (* Check if we can identify the line number in the file. It’s only possible
+ if we have a single source used as a key *)
let () =
match pointed_tables with
- | [] ->
- (* Otherwise, just return -1 *)
- Chunk.add_string request "-1"
| (table, _name) :: _ ->
(* If we have a single source, extract the row number. *)
Chunk.add_string request (Table.print_column table "id")
+ | [] ->
+ (* Otherwise, just return -1 *)
+ Chunk.add_string request "-1"
in
Chunk.add_string request ", ";
Chunk.append ~head:request ~tail:(Chunk.copy internal_chunk);
diff --git a/lib/configuration/expression_parser.mly b/lib/configuration/expression_parser.mly
index 1761cce..9b97637 100644
--- a/lib/configuration/expression_parser.mly
+++ b/lib/configuration/expression_parser.mly
@@ -34,20 +34,25 @@ column_expr:
path_:
| COLUMN
- column = IDENT
- { ImportExpression.T.Path
- ImportDataTypes.Path.{ alias = None
- ; column = ImportDataTypes.Path.column_of_string column
- }
- }
-
- | COLUMN
- table = IDENT
- DOT
- column = IDENT
- { ImportExpression.T.Path
- ImportDataTypes.Path.{ alias = Some table
- ; column = ImportDataTypes.Path.column_of_string column}
+ (* The dot character is required as a separator between the table and the
+ colum, like in [:table.XX] but the table name can also contains [.]
+ (this is allowed in the toml configuration syntax, as long as the
+ identifier is quoted.
+
+ So we have to handle cases like [:foo.bar.XX]
+ *)
+ path = separated_nonempty_list(DOT, IDENT)
+ { let reversed_path = List.rev path in
+ (* reversed_path is nonempty, and we can take head and tail safely *)
+ let tail = List.tl reversed_path in
+ let alias = match tail with
+ | [] -> None
+ | tl -> Some (String.concat "." (List.rev tl))
+ in
+
+ ImportExpression.T.Path
+ ImportDataTypes.Path.{ alias
+ ; column = ImportDataTypes.Path.column_of_string (List.hd reversed_path)}
}
column_:
diff --git a/lib/configuration/importConf.ml b/lib/configuration/importConf.ml
index 2df24bd..4b49686 100644
--- a/lib/configuration/importConf.ml
+++ b/lib/configuration/importConf.ml
@@ -1,14 +1,22 @@
module TomlReader = Read_conf.Make (Helpers.Toml.Decode)
-let t_of_toml : Otoml.t -> (ImporterSyntax.t, string) result =
- fun toml ->
+type loader_context = TomlReader.loader_context = {
+ checkFile : string -> bool;
+ loadFile : string -> Otoml.t;
+}
+
+let t_of_toml :
+ context:loader_context -> Otoml.t -> (ImporterSyntax.t, string) result =
+ fun ~context toml ->
let version =
Otoml.find_or ~default:ImporterSyntax.latest_version toml
(Otoml.get_integer ~strict:false)
[ "version" ]
in
match version with
- | n when n = ImporterSyntax.latest_version -> TomlReader.read toml
+ | n when n = ImporterSyntax.latest_version -> begin
+ TomlReader.read context toml
+ end
| _ ->
Printf.eprintf "Unsuported version : %d\n" version;
exit 1
diff --git a/lib/configuration/importConf.mli b/lib/configuration/importConf.mli
index d2f65f2..7234499 100644
--- a/lib/configuration/importConf.mli
+++ b/lib/configuration/importConf.mli
@@ -1,4 +1,12 @@
-val t_of_toml : Otoml.t -> (ImporterSyntax.t, string) result
+type loader_context = {
+ checkFile : string -> bool;
+ loadFile : string -> Otoml.t;
+}
+
+val t_of_toml :
+ context:loader_context -> Otoml.t -> (ImporterSyntax.t, string) result
+(** [fileChecker] is called when a file is declared in the configuration. An
+ arror will be raised if the computation return false *)
val expression_from_string :
string -> (ImportDataTypes.Path.t ImportExpression.T.t, string) result
diff --git a/lib/configuration/read_conf.ml b/lib/configuration/read_conf.ml
index d406b0e..0771565 100644
--- a/lib/configuration/read_conf.ml
+++ b/lib/configuration/read_conf.ml
@@ -178,14 +178,66 @@ module Make (S : Decoders.Decode.S) = struct
let ( >>= ) = S.( >>= )
let ( >|= ) = S.( >|= )
- class loader =
+ type loader_context = {
+ checkFile : string -> bool;
+ loadFile : string -> S.value;
+ }
+
+ type dataSet = S.value * (string * (S.value * string)) list
+
+ class loader (context : loader_context) =
object (self)
+ method path_checker : (S.value * string) S.decoder -> string S.decoder =
+ fun check ->
+ Decoders.Decoder.bind
+ (fun (value, path) ->
+ if context.checkFile path then Decoders.Decoder.pure path
+ else
+ let message = "Expected a path to an existing file" in
+ let err = Decoders.Error.make ~context:value message in
+ S.fail_with err)
+ check
+ (** Check if a file given in the configuration exists: an error is raised
+ if the function [checkFile] retun false.
+
+ In the unit tests, the effect is mapped to a function returning alway
+ true. *)
+
+ method keep : type a. a S.decoder -> (S.value * a) S.decoder =
+ fun decoder value -> S.map (fun v -> (value, v)) decoder value
+ (** [keep decoder] transform a decoder and keep the initial value with the
+ decoded value.
+
+ This helps to build custom error message, if we want to report an
+ error from a different place. *)
+
+ method load_resources : dataSet option S.decoder =
+ (* Do not accept dash in the keys, as the caracter is used to alias
+ the same file in differents mappings *)
+ let no_dash_decoder =
+ let* s = S.string in
+ match String.exists s ~f:(Char.equal '-') with
+ | true -> S.fail "Expected a key without '-'"
+ | false -> S.succeed s
+ in
+ let list_files_decoders =
+ self#keep (S.key_value_pairs' no_dash_decoder (self#keep S.string))
+ in
+ let get_field = S.field "files" list_files_decoders in
+
+ let* result =
+ S.map context.loadFile (self#path_checker (self#keep S.string))
+ in
+ let files = get_field result in
+ let dataSetResult = Result.map (fun v -> Some v) files in
+ S.from_result dataSetResult
+ (** Load an external file containing the list of files to include.*)
+
method parse_expression : type a.
?groups:a ImportExpression.T.t list ->
eq:(a -> a -> bool) ->
a ExpressionParser.path_builder ->
- S.value ->
- (a ImportExpression.T.t, S.value Decoders.Error.t) result =
+ a ImportExpression.T.t S.decoder =
fun ?(groups = []) ~eq path ->
S.string >>= fun v ->
match ExpressionParser.of_string path v with
@@ -202,41 +254,87 @@ module Make (S : Decoders.Decode.S) = struct
S.fail
"A group function cannot contains another group function")
- method source =
- let* file = S.field "file" S.string
- and* name = S.field "name" S.string
- and* tab = S.field_opt_or ~default:1 "tab" S.int in
- S.succeed { Table.file; name; tab }
-
- method external_ name =
- let* intern_key =
- S.field "intern_key"
- (self#parse_expression ~eq:Path.equal ExpressionParser.path)
- and* extern_key =
- S.field "extern_key"
- (self#parse_expression ~eq:Int.equal ExpressionParser.column)
- and* file = S.field "file" S.string
- and* tab = S.field_opt_or ~default:1 "tab" S.int
- and* allow_missing =
- S.field_opt_or ~default:false "allow_missing" S.bool
- in
+ method look_file :
+ dataset:dataSet option -> name:string -> string S.decoder =
+ fun ~dataset ~name ->
+ self#path_checker
+ (S.one_of
+ [
+ (* If the file is declared, use it *)
+ ("file", self#keep (S.field "file" S.string));
+ ( "dataset",
+ (* Otherwise search in the data set *)
+
+ (* We work inside an option monad :
+ - Do we have a dataset
+ - Do we have a valid root name to look for
+ - De we have a match in the dataset
+ *)
+ let ( let* ) = Option.bind in
+ let element =
+ let* _, resources = dataset in
+ let* root =
+ match String.split_on_char ~sep:'-' name with
+ | hd :: _ -> Some hd
+ | _ -> None
+ in
+ let* elem = List.assoc_opt root resources in
+ Some elem
+ in
+ match (element, dataset) with
+ | Some value, _ -> S.succeed value
+ | None, Some (t, _) ->
+ let message = "Looking for \"" ^ name ^ "\"" in
+ S.fail_with (Decoders.Error.make ~context:t message)
+ | None, None -> S.fail "No dataset declared" );
+ ])
- S.succeed
- ImporterSyntax.Extern.
- {
- intern_key;
- extern_key;
- target = { name; file; tab };
- allow_missing;
- match_rule = None;
- }
+ method source : dataSet option -> Table.t S.decoder =
+ fun dataset ->
+ (* The file shall either be present in the external, or be declared
+ in the dataset *)
+ let* name = S.field "name" S.string in
+ let* file = self#look_file ~dataset ~name
+ and* tab = S.field_opt_or ~default:1 "tab" S.int in
+ S.succeed { Table.file; Table.name; Table.tab }
+
+ method external' :
+ dataSet option -> string -> ImporterSyntax.Extern.t S.decoder =
+ fun dataset name ->
+ let* intern_key =
+ S.field "intern_key"
+ (self#parse_expression ~eq:Path.equal ExpressionParser.path)
+ and* extern_key =
+ S.field "extern_key"
+ (self#parse_expression ~eq:Int.equal ExpressionParser.column)
+ and* file = self#look_file ~dataset ~name
+ and* tab = S.field_opt_or ~default:1 "tab" S.int
+ and* allow_missing =
+ S.field_opt_or ~default:false "allow_missing" S.bool
+ and* filters =
+ S.field_opt_or ~default:[] "filters"
+ (S.list
+ (self#parse_expression ~eq:Int.equal ExpressionParser.column))
+ in
+
+ S.succeed
+ ImporterSyntax.Extern.
+ {
+ intern_key;
+ extern_key;
+ target = { name; file; tab };
+ allow_missing;
+ filters;
+ }
+ (** Load the configuration for an external file to link *)
method sheet =
(* Check the uniq property first, beecause the group functions need
to include the same expression (at least) *)
let* uniq =
S.field_opt_or ~default:[] "uniq"
- @@ S.list (self#parse_expression ~eq:Path.equal ExpressionParser.path)
+ @@ S.list
+ @@ self#parse_expression ~eq:Path.equal ExpressionParser.path
in
let* columns =
@@ -247,23 +345,27 @@ module Make (S : Decoders.Decode.S) = struct
and* filters =
S.field_opt_or ~default:[] "filters"
@@ S.list
- (self#parse_expression ~eq:Path.equal ~groups:uniq
- ExpressionParser.path)
+ @@ self#parse_expression ~eq:Path.equal ~groups:uniq
+ ExpressionParser.path
and* sort =
S.field_opt_or ~default:[] "sort"
@@ S.list
- (self#parse_expression ~eq:Path.equal ~groups:uniq
- ExpressionParser.path)
+ @@ self#parse_expression ~eq:Path.equal ~groups:uniq
+ ExpressionParser.path
in
S.succeed @@ fun version source externals locale ->
ImporterSyntax.
{ version; source; externals; columns; filters; sort; uniq; locale }
method conf =
- let* source = S.field "source" self#source
+ let* dataset : dataSet option =
+ S.field_opt_or ~default:None "dataset" self#load_resources
+ in
+
+ let* source = S.field "source" (self#source dataset)
and* externals =
S.field_opt_or ~default:[] "externals"
- (S.key_value_pairs_seq self#external_)
+ @@ S.key_value_pairs_seq (self#external' dataset)
and* locale = S.field_opt "locale" S.string in
let* sheet =
S.field "sheet" self#sheet >|= fun v -> v 1 source externals locale
@@ -272,15 +374,8 @@ module Make (S : Decoders.Decode.S) = struct
S.succeed sheet
end
- let read_file file =
- S.decode_file (new loader)#conf file
- |> Result.map_error (fun v ->
- let formatter = Format.str_formatter in
- Format.fprintf formatter "%a@." S.pp_error v;
- Format.flush_str_formatter ())
-
- let read toml =
- S.decode_value (new loader)#conf toml
+ let read context toml =
+ S.decode_value (new loader context)#conf toml
|> Result.map_error (fun v ->
let formatter = Format.str_formatter in
Format.fprintf formatter "%a@." S.pp_error v;
diff --git a/lib/file_handler/dune b/lib/file_handler/dune
index 80b2ff6..6d5def0 100755
--- a/lib/file_handler/dune
+++ b/lib/file_handler/dune
@@ -9,6 +9,7 @@
core
lwt
lwt.unix
+ ppx_deriving.runtime
helpers
importDataTypes
importContainers
diff --git a/lib/file_handler/state.ml b/lib/file_handler/state.ml
index 7cf57da..e073fd3 100644
--- a/lib/file_handler/state.ml
+++ b/lib/file_handler/state.ml
@@ -151,7 +151,9 @@ let clear :
ImporterSyntax.t ->
unit ImportSQL.Db.result =
fun ~log_error db mapping conf ->
- ImportSQL.Db.clear_duplicates db (A.table mapping) (A.keys mapping)
+ let table = A.table mapping in
+ let is_root = ImportDataTypes.Table.equal table conf.source in
+ ImportSQL.Db.clear_duplicates ~is_root db table (A.keys mapping)
~f:(fun values ->
let line =
match snd @@ Array.get values 0 with
diff --git a/lib/helpers/toml.ml b/lib/helpers/toml.ml
index 1b7fb15..5f441dc 100644
--- a/lib/helpers/toml.ml
+++ b/lib/helpers/toml.ml
@@ -1,9 +1,118 @@
+open StdLabels
+
+let rec pp :
+ ?topLevel:bool -> ?path:string list -> Format.formatter -> Otoml.t -> unit =
+ fun ?(topLevel = true) ?(path = []) format -> function
+ | Otoml.TomlString v -> begin
+ match String.contains v '\n' with
+ | false ->
+ Format.pp_print_string format "\"";
+ Format.pp_print_string format v;
+ Format.pp_print_string format "\""
+ | true ->
+ Format.pp_print_string format {|"""|};
+ Format.pp_print_text format v;
+ Format.pp_print_string format {|"""|}
+ end
+ | Otoml.TomlInteger i -> Format.pp_print_int format i
+ | Otoml.TomlFloat f -> Format.pp_print_float format f
+ | Otoml.TomlBoolean b -> Format.pp_print_bool format b
+ | Otoml.TomlArray l -> begin
+ match (topLevel, l) with
+ | _, [] -> Format.pp_print_string format "[]"
+ | false, _ -> Format.pp_print_string format "..."
+ | true, l ->
+ Format.pp_print_string format "[";
+ Format.pp_print_break format 0 4;
+ Format.pp_open_vbox format 0;
+ Format.pp_print_list
+ ~pp_sep:(fun f () ->
+ Format.pp_print_string f ",";
+ Format.pp_print_cut f ())
+ pp format l;
+ Format.pp_close_box format ();
+ Format.pp_print_cut format ();
+ Format.pp_print_string format "]"
+ end
+ | Otoml.TomlTable elements | Otoml.TomlInlineTable elements ->
+ pp_table ~path format elements
+ | Otoml.TomlTableArray t -> Format.pp_print_list pp format t
+ | Otoml.TomlOffsetDateTime _
+ | Otoml.TomlLocalDate _
+ | Otoml.TomlLocalDateTime _
+ | Otoml.TomlLocalTime _ -> ()
+
+and pp_key_values : Format.formatter -> string * Otoml.t -> unit =
+ fun format (name, value) ->
+ Format.fprintf format "%s = %a" name (pp ~topLevel:false ~path:[]) value
+
+and pp_table :
+ ?path:string list -> Format.formatter -> (string * Otoml.t) list -> unit =
+ fun ?(path = []) format elements ->
+ (* Create two lists, one for the subtables, and one for the values.
+
+ As a table is valid until the next table, we then start to print the
+ preoperties before printintg the subtables inside the element.
+ *)
+ let subtables, properties =
+ List.partition elements ~f:(fun (_, v) ->
+ match v with
+ | Otoml.TomlTable _ -> true
+ | _ -> false)
+ in
+
+ let () =
+ match properties with
+ | [] -> ()
+ | _ ->
+ let isTopLevel =
+ match path with
+ | [] -> true
+ | _ -> false
+ in
+ if not isTopLevel then begin
+ let path = List.rev path in
+ Format.pp_print_cut format ();
+ Format.fprintf format "[%a]"
+ (Format.pp_print_list
+ ~pp_sep:(fun f () -> Format.pp_print_string f ".")
+ Format.pp_print_string)
+ path;
+ Format.pp_print_break format 0 4;
+ Format.pp_open_vbox format 0
+ end;
+ if isTopLevel then begin
+ Format.pp_print_list ~pp_sep:Format.pp_print_cut
+ (fun format v ->
+ match v with
+ | key, Otoml.TomlTable elements ->
+ pp_table ~path:(key :: path) format elements
+ | other -> pp_key_values format other)
+ format properties
+ end
+ else begin
+ Format.pp_print_string format "..."
+ end;
+
+ if not isTopLevel then begin
+ Format.pp_close_box format ()
+ end;
+ Format.pp_print_cut format ()
+ in
+ (* Then go deeper inside each subtable *)
+ List.iter subtables ~f:(function
+ | name, Otoml.TomlTable v -> pp_table ~path:(name :: path) format v
+ | _ -> (* Because of the partition, this should not happen *) ())
+
module Decode = struct
module S = struct
type value = Otoml.t
let pp : Format.formatter -> value -> unit =
- fun format t -> Format.pp_print_string format (Otoml.Printer.to_string t)
+ fun formatter v ->
+ Format.pp_open_vbox formatter 0;
+ pp formatter v;
+ Format.pp_close_box formatter ()
let of_string : string -> (value, string) result =
Otoml.Parser.from_string_result
@@ -22,7 +131,7 @@ module Decode = struct
let get_key_value_pairs : value -> (value * value) list option =
Otoml.get_opt (fun key ->
- Otoml.get_table key |> List.map (fun (k, v) -> (Otoml.string k, v)))
+ Otoml.get_table key |> List.map ~f:(fun (k, v) -> (Otoml.string k, v)))
let to_list : value list -> value = Otoml.array
end
diff --git a/lib/sql/db.ml b/lib/sql/db.ml
index f2a2653..1d92a2e 100644
--- a/lib/sql/db.ml
+++ b/lib/sql/db.ml
@@ -286,17 +286,6 @@ let create_view : Sqlite3.db -> ImporterSyntax.t -> unit T.result =
Ok ()
-(*
- let query, _ = ImportAnalyser.Query.select output in
-
- let query =
- { query with q = Printf.sprintf "CREATE VIEW result AS %s" query.q }
- in
- let** statement = execute_query db query in
-
- Sqlite3.step statement |> T.to_result
- *)
-
let check_foreign :
f:((string * ImportDataTypes.Value.t) array -> unit) ->
Sqlite3.db ->
@@ -316,66 +305,118 @@ let check_foreign :
|> T.to_result
let clear_duplicates :
+ is_root:bool ->
f:((string * ImportDataTypes.Value.t) array -> unit) ->
'a t ->
ImportDataTypes.Table.t ->
ImportAnalyser.Dependency.key list ->
unit T.result =
- fun ~f db table keys ->
+ fun ~is_root ~f db table keys ->
let table_name = Table.name table in
- List.fold_left keys ~init:(Ok ())
- ~f:(fun state { ImportAnalyser.Dependency.name; _ } ->
- let** _ = state in
-
- let select_query =
- String.concat ~sep:""
- [
- "SELECT '";
- table_name;
- "'.id, '";
- table_name;
- "'.'key_";
- name;
- "', '";
- name;
- "' FROM '";
- table_name;
- "' INNER JOIN (SELECT id, row_number() OVER(PARTITION BY '";
- table_name;
- "'.'key_";
- name;
- "' ORDER BY (id)) AS row_num from '";
- table_name;
- "') other_table WHERE other_table.row_num <> 1 and '";
- table_name;
- "'.id = other_table.id";
- ]
- in
- let stmt = Sqlite3.prepare db select_query in
+ let keys_name =
+ List.map keys ~f:(fun ImportAnalyser.Dependency.{ name; filters; _ } ->
+ ( name,
+ filters,
+ String.concat ~sep:"" [ "'"; table_name; "'.'key_"; name; "'" ] ))
+ in
- ignore
- @@ Sqlite3.iter stmt ~f:(fun data ->
- let values =
- Array.mapi data ~f:(fun i value ->
- (Sqlite3.column_name stmt i, T.to_datatype value))
- in
- f values);
+ let** _ =
+ List.fold_left keys_name ~init:(Ok ())
+ ~f:(fun state (name, filters, key_name) ->
+ let table_external =
+ ImportDataTypes.Table.{ file = ""; tab = 0; name = table_name }
+ in
- let delete_query =
- Printf.sprintf
- {|UPDATE '%s'
-SET key_%s = NULL
+ (* If there are filters to apply in the external, prepare the predicates now *)
+ let join_buffer = ImportAnalyser.Chunk.create () in
+ let () =
+ match filters with
+ | [] -> ()
+ | _ ->
+ ImportAnalyser.Chunk.add_string join_buffer " WHERE ";
+ ImportAnalyser.Chunk.add_expressions ~sep:"\nAND "
+ ~repr:(fun formatter column ->
+ Format.fprintf formatter "%s"
+ (Table.print_column table_external
+ ("col_" ^ string_of_int column)))
+ join_buffer filters
+ in
+ let values =
+ Queue.to_seq join_buffer.parameters
+ |> Seq.map (fun v -> T.of_datatype v)
+ |> List.of_seq
+ in
+
+ let** _ = state in
+
+ let select_query =
+ String.concat ~sep:""
+ [
+ "SELECT '";
+ table_name;
+ "'.id, ";
+ key_name;
+ ", '";
+ name;
+ "' FROM '";
+ table_name;
+ "' INNER JOIN (SELECT id, row_number() OVER(PARTITION BY ";
+ key_name;
+ " ORDER BY (id)) AS row_num from '";
+ table_name;
+ "'";
+ Buffer.contents join_buffer.b;
+ ") other_table WHERE other_table.row_num <> 1 AND \
+ 'other_table'.id = ";
+ Table.print_column table_external "id";
+ ]
+ in
+ let stmt = Sqlite3.prepare db select_query in
+ let* _ = Sqlite3.bind_values stmt values in
+
+ ignore
+ @@ Sqlite3.iter stmt ~f:(fun data ->
+ let values =
+ Array.mapi data ~f:(fun i value ->
+ (Sqlite3.column_name stmt i, T.to_datatype value))
+ in
+ f values);
+
+ let clear_query =
+ Printf.sprintf
+ {|UPDATE '%s'
+SET 'key_%s' = NULL
FROM
(
- SELECT id, row_number() OVER(PARTITION BY key_%s ORDER BY (id)) AS row_num
+ SELECT id, row_number() OVER(PARTITION BY %s ORDER BY (id)) AS row_num, *
from '%s'
-) other_table
+%s) other_table
WHERE other_table.row_num <> 1
and '%s'.id = other_table.id|}
- table_name name name table_name table_name
- in
+ table_name name key_name table_name
+ (Buffer.contents join_buffer.b)
+ table_name
+ in
- Sqlite3.exec db delete_query |> T.to_result)
+ let stmt = Sqlite3.prepare db clear_query in
+ let* _ = Sqlite3.bind_values stmt values in
+ let* _ = Sqlite3.step stmt in
+ Result.ok ())
+ in
+
+ (* Now remove from the database all the line having ALL the keys to null *)
+ match (is_root, keys_name) with
+ | true, _ | _, [] -> Result.ok ()
+ | _ ->
+ let predicates =
+ List.map keys_name ~f:(fun (_, _, key) -> key ^ " IS NULL")
+ |> String.concat ~sep:" AND "
+ in
+ let delete_query =
+ Printf.sprintf {|DELETE FROM '%s' WHERE %s|} table_name predicates
+ in
+ let** _ = Sqlite3.exec db delete_query |> T.to_result in
+ Result.ok ()
type 'a result = ('a, exn) Result.t
diff --git a/lib/sql/db.mli b/lib/sql/db.mli
index 213fb27..b73479b 100644
--- a/lib/sql/db.mli
+++ b/lib/sql/db.mli
@@ -84,12 +84,16 @@ val check_foreign :
unit result
val clear_duplicates :
+ is_root:bool ->
f:((string * ImportDataTypes.Value.t) array -> unit) ->
'a t ->
ImportDataTypes.Table.t ->
ImportAnalyser.Dependency.key list ->
unit result
-(** Remove all duplicated keys in the table by setting them to NULL. *)
+(** Remove all duplicated keys in the table by setting them to NULL.
+
+ This function will check each key referenced used in the table, and will
+ process each key separately.*)
val insert_header :
'a t ->
diff --git a/lib/sql/hashs.ml b/lib/sql/hashs.ml
index eced2b4..4df8ca1 100644
--- a/lib/sql/hashs.ml
+++ b/lib/sql/hashs.ml
@@ -5,7 +5,6 @@
before inserting the values. *)
open StdLabels
-module Table = ImportDataTypes.Table
let ( let* ) = Result.bind
@@ -21,16 +20,21 @@ let evaluate : ImportAnalyser.Dependency.t -> int =
(* Extract all the references to this table *)
let keys =
List.map (ImportAnalyser.Dependency.keys table)
- ~f:(fun ImportAnalyser.Dependency.{ name; columns; expression } ->
+ ~f:(fun
+ ImportAnalyser.Dependency.{ name; columns; expression; filters } ->
+ (* It’s better to explicitly ignore the fields we want to exclude than
+ using the pattern _ in the fuction definition.
+
+ This way, adding a new field in the type will raise a compilation error. *)
ignore columns;
- (name, expression))
+ (name, expression, filters))
in
Hashtbl.hash keys
let insert : 'a T.t -> ImportAnalyser.Dependency.t -> unit T.result =
fun db table ->
let source = ImportAnalyser.Dependency.table table in
- let table_name = Table.name source in
+ let table_name = ImportDataTypes.Table.name source in
let hash = evaluate table in
@@ -56,7 +60,7 @@ let insert : 'a T.t -> ImportAnalyser.Dependency.t -> unit T.result =
let query : 'a T.t -> ImportDataTypes.Table.t -> int option T.result =
fun db table ->
- let table_name = Table.name table in
+ let table_name = ImportDataTypes.Table.name table in
let query =
String.concat ~sep:""
[ "SELECT hash FROM 'hashes' WHERE hashes.'table' = '"; table_name; "'" ]
diff --git a/lib/syntax/importerSyntax.ml b/lib/syntax/importerSyntax.ml
index 7788613..d91db09 100644
--- a/lib/syntax/importerSyntax.ml
+++ b/lib/syntax/importerSyntax.ml
@@ -14,13 +14,18 @@ let toml_of_table Table.{ file; tab; name } =
Otoml.table values
+let repr_expression_list ~repr l =
+ Otoml.array
+ (List.map l ~f:(fun v ->
+ Otoml.string (ImportExpression.Repr.repr ~top:true repr v)))
+
module Extern = struct
type t = {
intern_key : Path.t E.t;
target : Table.t;
extern_key : Path.column E.t;
allow_missing : bool;
- match_rule : string option;
+ filters : ImportDataTypes.Path.column ImportExpression.T.t list;
}
[@@deriving show, eq]
(** Describe a relation beteween two tables *)
@@ -38,6 +43,10 @@ module Extern = struct
extern.extern_key );
("file", Otoml.string extern.target.file);
("allow_missing", Otoml.boolean extern.allow_missing);
+ ( "filters",
+ repr_expression_list
+ ~repr:(fun s -> ":" ^ Path.column_to_string s)
+ extern.filters );
]
in
@@ -66,19 +75,13 @@ type t = {
}
let repr t =
- let repr_expression_list l =
- Otoml.array
- (List.map l ~f:(fun v ->
- Otoml.string (ImportExpression.Repr.repr ~top:true Path.show v)))
- in
-
let sheet =
Otoml.table
[
- ("columns", repr_expression_list t.columns);
- ("filters", repr_expression_list t.filters);
- ("sort", repr_expression_list t.sort);
- ("uniq", repr_expression_list t.uniq);
+ ("columns", repr_expression_list ~repr:Path.show t.columns);
+ ("filters", repr_expression_list ~repr:Path.show t.filters);
+ ("sort", repr_expression_list ~repr:Path.show t.sort);
+ ("uniq", repr_expression_list ~repr:Path.show t.uniq);
]
in
@@ -128,7 +131,7 @@ let dummy_conf =
{
source = { file = ""; tab = 0; name = "" };
version = latest_version;
- locale = Some "C";
+ locale = None;
externals = [];
columns = [];
filters = [];
diff --git a/lib/syntax/importerSyntax.mli b/lib/syntax/importerSyntax.mli
index 49b7364..47dabe6 100644
--- a/lib/syntax/importerSyntax.mli
+++ b/lib/syntax/importerSyntax.mli
@@ -4,7 +4,7 @@ module Extern : sig
target : ImportDataTypes.Table.t;
extern_key : ImportDataTypes.Path.column ImportExpression.T.t;
allow_missing : bool;
- match_rule : string option;
+ filters : ImportDataTypes.Path.column ImportExpression.T.t list;
}
[@@deriving show, eq]
end