From 6b377719c10d5ab3343fd5221f99a4a21008e25a Mon Sep 17 00:00:00 2001 From: Sébastien Dailly Date: Thu, 14 Mar 2024 08:26:58 +0100 Subject: Initial commit --- bin/dune | 38 +++++++ bin/importer.ml | 316 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 354 insertions(+) create mode 100755 bin/dune create mode 100644 bin/importer.ml (limited to 'bin') diff --git a/bin/dune b/bin/dune new file mode 100755 index 0000000..fad168d --- /dev/null +++ b/bin/dune @@ -0,0 +1,38 @@ +(env + (dev + (flags (:standard -warn-error -A)) + ) + (release + (ocamlopt_flags (-O3))) +) + +(executable + (name importer) + (libraries + csv + lwt + lwt.unix + otoml + yojson + tools + helpers + importConf + importAnalyser + importContainers + importDataTypes + importCSV + importErrors + importExpression + importFileHandler + importSQL + ) + (link_flags (:standard)) +(preprocess (pps ppx_yojson_conv)) +) + +(install + (files importer.exe) + (section bin) + (package importer)) + +(dirs :standard \ examples) diff --git a/bin/importer.ml b/bin/importer.ml new file mode 100644 index 0000000..f737a46 --- /dev/null +++ b/bin/importer.ml @@ -0,0 +1,316 @@ +open StdLabels +module Analyse = ImportAnalyser.Dependency +module Headers = ImportAnalyser.Headers +module Db = ImportSQL.Db +module Table = ImportDataTypes.Table +module Path = ImportDataTypes.Path + +let ( let* ) = Result.bind + +let exists source = + match Sys.file_exists source with + | true -> source + | false -> + prerr_endline + @@ String.concat ~sep:" " [ "The file"; source; "does not exists" ]; + exit 1 + +let creation_date file = + try + let stat = Unix.stat file in + Some stat.Unix.st_mtime + with + | Unix.Unix_error _ -> None + +module Args = struct + type arguments = { + configuration : ImportConf.Syntax.t; + conf_name : string; + bom : bool; + print_conf : bool; + mapping_date : float option; + } + + let load_conf : string -> ImportConf.Syntax.t = + fun file -> + match Filename.extension file with + | ".json" -> ( + let configuration_file = Yojson.Safe.from_file (exists file) in + try ImportConf.t_of_yojson configuration_file with + | e -> + print_endline @@ ImportErrors.repr_error e; + exit 1) + | _ -> ( + let (conf : (Db.Syntax.t, string) result) = + let* configuration_file = + try Ok (Otoml.Parser.from_file (exists file)) with + | Otoml.Parse_error (position, message) -> + let format_position () = function + | None -> "" + | Some (line, col) -> + Printf.sprintf "At line %d, col %d: " line col + in + + let error_msg = + Printf.sprintf "%aError %s" format_position position message + in + + Error error_msg + in + ImportConf.t_of_toml configuration_file + in + match conf with + | Error e -> + prerr_endline e; + exit 1 + | Ok e -> e) + + let load () = + let conf = ref ("", ImportConf.dummy_conf) + and bom = ref true + and usage = "importer [--conf configuration.toml]" + and print_conf = ref false in + + let annon_fun _filename = + print_endline usage; + exit 1 + and set_conf file = conf := (file, load_conf file) in + let speclist = + [ + ( "--version", + Arg.Unit + (fun () -> + Printf.printf "Version %s\n" Tools.Git_hash.revision; + exit 0), + "\tDisplay the version of the application and exit" ); + ("--conf", Arg.String set_conf, "Configuration file"); + ("-c", Arg.String set_conf, "Configuration file"); + ("--no-bom", Arg.Clear bom, "Do not insert a BOM in the CSV"); + ( "--print-conf", + Arg.Set print_conf, + "Reformat the configuration file and exit" ); + ] + in + let () = Arg.parse speclist annon_fun usage in + { + configuration = snd !conf; + bom = !bom; + conf_name = fst !conf; + print_conf = !print_conf; + mapping_date = None; + } +end + +(** Print the result from the query. + + Each value is given with the associated expression in the configuration, + the function is expected to convert the result into string in order to + include the content in the output CSV. + *) +let printer : Path.t ImportExpression.T.t * ImportCSV.DataType.t -> string = + fun (column, value) -> + ignore column; + ImportCSV.DataType.to_string value + +let bom = "\xEF\xBB\xBF" + +let process_table : + 'a Db.t -> + string -> + Csv.out_channel Lazy.t -> + Args.arguments -> + string array Headers.SheeetMap.t -> + Analyse.t -> + string array Headers.SheeetMap.t = + fun db dirname log_error conf map mapping -> + let source = Analyse.table mapping in + (* Load all the element in the database *) + let file = Filename.concat dirname source.file in + + let file_date = creation_date file in + let recent = + match (conf.mapping_date, file_date) with + | Some r, Some t -> r > t + | _, _ -> false + in + + match (recent, ImportSQL.Db.check_table_schema db mapping) with + | true, Ok true -> ( + (* If the data extraction is more recent than the sheet, and if the + configuration did not changed for this table, we do not reload the + table. *) + match ImportSQL.Db.query_headers db source with + | Ok v -> + let text_headers = Array.map v ~f:ImportCSV.DataType.to_string in + Headers.SheeetMap.add source text_headers map + | Error _ -> map) + | _ -> + Printf.printf "Loading document %s %!" source.name; + let headers_opt = + let extension = String.lowercase_ascii (Filename.extension file) in + match extension with + | ".xlsx" -> + Lwt_main.run + @@ ImportFileHandler.Xlsx2sql.importInDatable ~dirname + ~conf:conf.configuration ~log_error mapping db + | ".csv" -> + Lwt_main.run + @@ ImportFileHandler.Csv2sql.importInDatable ~dirname + ~conf:conf.configuration ~log_error mapping db + | _ -> raise (ImportErrors.Unknown_source extension) + in + Helpers.Console.close_cursor (); + + (* Get the headers *) + let headers = + match headers_opt with + | None -> map + | Some v -> + let text_headers = Array.map v ~f:ImportCSV.DataType.to_string in + Headers.SheeetMap.add source text_headers map + in + + (* For each external check if the values are loaded *) + let dependancies = + ImportConf.get_dependancies_for_table conf.configuration source + in + List.iter dependancies ~f:(fun ext -> + match ext.ImportConf.Syntax.allow_missing with + | true -> () + | false -> ( + Printf.printf "Checking dependancies for %s %!" + ext.ImportConf.Syntax.target.ImportDataTypes.Table.name; + try + ignore + @@ Db.check_foreign db conf.configuration ext ~f:(fun values -> + Helpers.Console.update_cursor (); + + let row = + match snd (Array.get values 0) with + | ImportCSV.DataType.Integer i -> i + | _ -> -1 + and value = snd (Array.get values 1) in + let error = + ImportErrors. + { + source; + sheet = source.Table.tab; + row; + value; + target = Some ext.ImportConf.Syntax.target; + exn = + Failure + (Printf.sprintf "Key '%s' not found" + (CSV.DataType.to_string value)); + } + in + + ImportErrors.output_error log_error error); + Helpers.Console.close_cursor () + with + | Sqlite3.Error _ -> + (* We can have errors here if we have cycles in the + dependencies, but it’s OK at this step.*) + ())); + headers + +let () = + let conf = Args.load () in + + (* Global configuration variables *) + let dirname = Filename.dirname conf.conf_name in + let basename = Filename.basename conf.conf_name in + let prefix = Filename.remove_extension basename in + + (* Analyse the configuration *) + let process_order = + try Analyse.get_process_order conf.configuration with + | e -> + prerr_endline @@ ImportErrors.repr_error e; + exit 1 + in + + (* With the printconf option, we do not need to open any file *) + if conf.print_conf then ( + let toml = ImportConf.Syntax.repr conf.configuration in + Otoml.Printer.to_channel ~collapse_tables:true stdout toml; + exit 0); + + let sqlfile = Filename.concat dirname (prefix ^ ".sqlite") in + let conf = { conf with mapping_date = creation_date sqlfile } in + + (* Ensure that all the files exists *) + List.iter process_order ~f:(fun (mapping : Analyse.t) -> + let source = Analyse.table mapping in + (* First, check *) + if not (Sys.file_exists source.Table.file) then begin + ignore @@ exists @@ Filename.concat dirname source.Table.file + end); + + (* The configuration is loaded and valid, we create the errors log file *) + let log_error = ImportErrors.log ~with_bom:conf.bom prefix dirname in + + (* Now check if we can open the output file. Opening the file "truncate" it. + I do not want to delete the file content before I am sure that I can + process it. This is why the file is opened after reading the configuration + file although we do not need the configuration file for it. + *) + let out_channel = + try + Out_channel.open_bin (Filename.concat dirname (String.cat prefix ".csv")) + with + | Sys_error e -> + prerr_endline e; + exit 1 + in + + Fun.protect + ~finally:(fun () -> + Out_channel.flush out_channel; + Out_channel.close_noerr out_channel) + (fun () -> + (* Add the BOM to the CSV *) + if conf.bom then output_string out_channel bom; + + let out_csv = + Csv.to_channel ~separator:';' ~excel_tricks:false out_channel + in + + (* Create the database *) + ignore + @@ Db.with_db sqlfile (fun db -> + let headers = + List.fold_left process_order ~init:Headers.SheeetMap.empty + ~f:(process_table db dirname log_error conf) + in + + let first_line = Headers.columns conf.configuration headers in + Csv.output_record out_csv first_line; + + (* Run the query *) + ignore @@ Db.create_view db conf.configuration; + Printf.printf "Extracting results %!"; + match + Db.query + ~f:(fun v -> + let arr = Array.to_seq v |> Seq.map printer |> List.of_seq in + + Helpers.Console.update_cursor (); + Csv.output_record out_csv arr) + db conf.configuration + with + | Ok () -> + Printf.printf "%c[?25h%c[1D%c[0K\n%!" (char_of_int 27) + (char_of_int 27) (char_of_int 27); + Ok () + | Error e -> + Printf.printf "%c[?25h%c[1D%c[0K\n%!" (char_of_int 27) + (char_of_int 27) (char_of_int 27); + print_endline @@ ImportErrors.repr_error e; + + Ok ())); + + (* Close the error file *) + match Lazy.is_val log_error with + | false -> () + | true -> Csv.close_out (Lazy.force log_error) -- cgit v1.2.3