aboutsummaryrefslogtreecommitdiff
path: root/bin
diff options
context:
space:
mode:
Diffstat (limited to 'bin')
-rwxr-xr-xbin/dune38
-rw-r--r--bin/importer.ml316
2 files changed, 354 insertions, 0 deletions
diff --git a/bin/dune b/bin/dune
new file mode 100755
index 0000000..fad168d
--- /dev/null
+++ b/bin/dune
@@ -0,0 +1,38 @@
+(env
+ (dev
+ (flags (:standard -warn-error -A))
+ )
+ (release
+ (ocamlopt_flags (-O3)))
+)
+
+(executable
+ (name importer)
+ (libraries
+ csv
+ lwt
+ lwt.unix
+ otoml
+ yojson
+ tools
+ helpers
+ importConf
+ importAnalyser
+ importContainers
+ importDataTypes
+ importCSV
+ importErrors
+ importExpression
+ importFileHandler
+ importSQL
+ )
+ (link_flags (:standard))
+(preprocess (pps ppx_yojson_conv))
+)
+
+(install
+ (files importer.exe)
+ (section bin)
+ (package importer))
+
+(dirs :standard \ examples)
diff --git a/bin/importer.ml b/bin/importer.ml
new file mode 100644
index 0000000..f737a46
--- /dev/null
+++ b/bin/importer.ml
@@ -0,0 +1,316 @@
+open StdLabels
+module Analyse = ImportAnalyser.Dependency
+module Headers = ImportAnalyser.Headers
+module Db = ImportSQL.Db
+module Table = ImportDataTypes.Table
+module Path = ImportDataTypes.Path
+
+let ( let* ) = Result.bind
+
+let exists source =
+ match Sys.file_exists source with
+ | true -> source
+ | false ->
+ prerr_endline
+ @@ String.concat ~sep:" " [ "The file"; source; "does not exists" ];
+ exit 1
+
+let creation_date file =
+ try
+ let stat = Unix.stat file in
+ Some stat.Unix.st_mtime
+ with
+ | Unix.Unix_error _ -> None
+
+module Args = struct
+ type arguments = {
+ configuration : ImportConf.Syntax.t;
+ conf_name : string;
+ bom : bool;
+ print_conf : bool;
+ mapping_date : float option;
+ }
+
+ let load_conf : string -> ImportConf.Syntax.t =
+ fun file ->
+ match Filename.extension file with
+ | ".json" -> (
+ let configuration_file = Yojson.Safe.from_file (exists file) in
+ try ImportConf.t_of_yojson configuration_file with
+ | e ->
+ print_endline @@ ImportErrors.repr_error e;
+ exit 1)
+ | _ -> (
+ let (conf : (Db.Syntax.t, string) result) =
+ let* configuration_file =
+ try Ok (Otoml.Parser.from_file (exists file)) with
+ | Otoml.Parse_error (position, message) ->
+ let format_position () = function
+ | None -> ""
+ | Some (line, col) ->
+ Printf.sprintf "At line %d, col %d: " line col
+ in
+
+ let error_msg =
+ Printf.sprintf "%aError %s" format_position position message
+ in
+
+ Error error_msg
+ in
+ ImportConf.t_of_toml configuration_file
+ in
+ match conf with
+ | Error e ->
+ prerr_endline e;
+ exit 1
+ | Ok e -> e)
+
+ let load () =
+ let conf = ref ("", ImportConf.dummy_conf)
+ and bom = ref true
+ and usage = "importer [--conf configuration.toml]"
+ and print_conf = ref false in
+
+ let annon_fun _filename =
+ print_endline usage;
+ exit 1
+ and set_conf file = conf := (file, load_conf file) in
+ let speclist =
+ [
+ ( "--version",
+ Arg.Unit
+ (fun () ->
+ Printf.printf "Version %s\n" Tools.Git_hash.revision;
+ exit 0),
+ "\tDisplay the version of the application and exit" );
+ ("--conf", Arg.String set_conf, "Configuration file");
+ ("-c", Arg.String set_conf, "Configuration file");
+ ("--no-bom", Arg.Clear bom, "Do not insert a BOM in the CSV");
+ ( "--print-conf",
+ Arg.Set print_conf,
+ "Reformat the configuration file and exit" );
+ ]
+ in
+ let () = Arg.parse speclist annon_fun usage in
+ {
+ configuration = snd !conf;
+ bom = !bom;
+ conf_name = fst !conf;
+ print_conf = !print_conf;
+ mapping_date = None;
+ }
+end
+
+(** Print the result from the query.
+
+ Each value is given with the associated expression in the configuration,
+ the function is expected to convert the result into string in order to
+ include the content in the output CSV.
+ *)
+let printer : Path.t ImportExpression.T.t * ImportCSV.DataType.t -> string =
+ fun (column, value) ->
+ ignore column;
+ ImportCSV.DataType.to_string value
+
+let bom = "\xEF\xBB\xBF"
+
+let process_table :
+ 'a Db.t ->
+ string ->
+ Csv.out_channel Lazy.t ->
+ Args.arguments ->
+ string array Headers.SheeetMap.t ->
+ Analyse.t ->
+ string array Headers.SheeetMap.t =
+ fun db dirname log_error conf map mapping ->
+ let source = Analyse.table mapping in
+ (* Load all the element in the database *)
+ let file = Filename.concat dirname source.file in
+
+ let file_date = creation_date file in
+ let recent =
+ match (conf.mapping_date, file_date) with
+ | Some r, Some t -> r > t
+ | _, _ -> false
+ in
+
+ match (recent, ImportSQL.Db.check_table_schema db mapping) with
+ | true, Ok true -> (
+ (* If the data extraction is more recent than the sheet, and if the
+ configuration did not changed for this table, we do not reload the
+ table. *)
+ match ImportSQL.Db.query_headers db source with
+ | Ok v ->
+ let text_headers = Array.map v ~f:ImportCSV.DataType.to_string in
+ Headers.SheeetMap.add source text_headers map
+ | Error _ -> map)
+ | _ ->
+ Printf.printf "Loading document %s %!" source.name;
+ let headers_opt =
+ let extension = String.lowercase_ascii (Filename.extension file) in
+ match extension with
+ | ".xlsx" ->
+ Lwt_main.run
+ @@ ImportFileHandler.Xlsx2sql.importInDatable ~dirname
+ ~conf:conf.configuration ~log_error mapping db
+ | ".csv" ->
+ Lwt_main.run
+ @@ ImportFileHandler.Csv2sql.importInDatable ~dirname
+ ~conf:conf.configuration ~log_error mapping db
+ | _ -> raise (ImportErrors.Unknown_source extension)
+ in
+ Helpers.Console.close_cursor ();
+
+ (* Get the headers *)
+ let headers =
+ match headers_opt with
+ | None -> map
+ | Some v ->
+ let text_headers = Array.map v ~f:ImportCSV.DataType.to_string in
+ Headers.SheeetMap.add source text_headers map
+ in
+
+ (* For each external check if the values are loaded *)
+ let dependancies =
+ ImportConf.get_dependancies_for_table conf.configuration source
+ in
+ List.iter dependancies ~f:(fun ext ->
+ match ext.ImportConf.Syntax.allow_missing with
+ | true -> ()
+ | false -> (
+ Printf.printf "Checking dependancies for %s %!"
+ ext.ImportConf.Syntax.target.ImportDataTypes.Table.name;
+ try
+ ignore
+ @@ Db.check_foreign db conf.configuration ext ~f:(fun values ->
+ Helpers.Console.update_cursor ();
+
+ let row =
+ match snd (Array.get values 0) with
+ | ImportCSV.DataType.Integer i -> i
+ | _ -> -1
+ and value = snd (Array.get values 1) in
+ let error =
+ ImportErrors.
+ {
+ source;
+ sheet = source.Table.tab;
+ row;
+ value;
+ target = Some ext.ImportConf.Syntax.target;
+ exn =
+ Failure
+ (Printf.sprintf "Key '%s' not found"
+ (CSV.DataType.to_string value));
+ }
+ in
+
+ ImportErrors.output_error log_error error);
+ Helpers.Console.close_cursor ()
+ with
+ | Sqlite3.Error _ ->
+ (* We can have errors here if we have cycles in the
+ dependencies, but it’s OK at this step.*)
+ ()));
+ headers
+
+let () =
+ let conf = Args.load () in
+
+ (* Global configuration variables *)
+ let dirname = Filename.dirname conf.conf_name in
+ let basename = Filename.basename conf.conf_name in
+ let prefix = Filename.remove_extension basename in
+
+ (* Analyse the configuration *)
+ let process_order =
+ try Analyse.get_process_order conf.configuration with
+ | e ->
+ prerr_endline @@ ImportErrors.repr_error e;
+ exit 1
+ in
+
+ (* With the printconf option, we do not need to open any file *)
+ if conf.print_conf then (
+ let toml = ImportConf.Syntax.repr conf.configuration in
+ Otoml.Printer.to_channel ~collapse_tables:true stdout toml;
+ exit 0);
+
+ let sqlfile = Filename.concat dirname (prefix ^ ".sqlite") in
+ let conf = { conf with mapping_date = creation_date sqlfile } in
+
+ (* Ensure that all the files exists *)
+ List.iter process_order ~f:(fun (mapping : Analyse.t) ->
+ let source = Analyse.table mapping in
+ (* First, check *)
+ if not (Sys.file_exists source.Table.file) then begin
+ ignore @@ exists @@ Filename.concat dirname source.Table.file
+ end);
+
+ (* The configuration is loaded and valid, we create the errors log file *)
+ let log_error = ImportErrors.log ~with_bom:conf.bom prefix dirname in
+
+ (* Now check if we can open the output file. Opening the file "truncate" it.
+ I do not want to delete the file content before I am sure that I can
+ process it. This is why the file is opened after reading the configuration
+ file although we do not need the configuration file for it.
+ *)
+ let out_channel =
+ try
+ Out_channel.open_bin (Filename.concat dirname (String.cat prefix ".csv"))
+ with
+ | Sys_error e ->
+ prerr_endline e;
+ exit 1
+ in
+
+ Fun.protect
+ ~finally:(fun () ->
+ Out_channel.flush out_channel;
+ Out_channel.close_noerr out_channel)
+ (fun () ->
+ (* Add the BOM to the CSV *)
+ if conf.bom then output_string out_channel bom;
+
+ let out_csv =
+ Csv.to_channel ~separator:';' ~excel_tricks:false out_channel
+ in
+
+ (* Create the database *)
+ ignore
+ @@ Db.with_db sqlfile (fun db ->
+ let headers =
+ List.fold_left process_order ~init:Headers.SheeetMap.empty
+ ~f:(process_table db dirname log_error conf)
+ in
+
+ let first_line = Headers.columns conf.configuration headers in
+ Csv.output_record out_csv first_line;
+
+ (* Run the query *)
+ ignore @@ Db.create_view db conf.configuration;
+ Printf.printf "Extracting results %!";
+ match
+ Db.query
+ ~f:(fun v ->
+ let arr = Array.to_seq v |> Seq.map printer |> List.of_seq in
+
+ Helpers.Console.update_cursor ();
+ Csv.output_record out_csv arr)
+ db conf.configuration
+ with
+ | Ok () ->
+ Printf.printf "%c[?25h%c[1D%c[0K\n%!" (char_of_int 27)
+ (char_of_int 27) (char_of_int 27);
+ Ok ()
+ | Error e ->
+ Printf.printf "%c[?25h%c[1D%c[0K\n%!" (char_of_int 27)
+ (char_of_int 27) (char_of_int 27);
+ print_endline @@ ImportErrors.repr_error e;
+
+ Ok ()));
+
+ (* Close the error file *)
+ match Lazy.is_val log_error with
+ | false -> ()
+ | true -> Csv.close_out (Lazy.force log_error)