open StdLabels module Analyse = ImportAnalyser.Dependency module Headers = ImportAnalyser.Headers module Db = ImportSQL.Db module Table = ImportDataTypes.Table module Path = ImportDataTypes.Path let ( let* ) = Result.bind let exists source = match Sys.file_exists source with | true -> source | false -> prerr_endline @@ String.concat ~sep:" " [ "The file"; source; "does not exists" ]; exit 1 let creation_date file = try let stat = Unix.stat file in Some stat.Unix.st_mtime with | Unix.Unix_error _ -> None module Args = struct type arguments = { configuration : ImportConf.Syntax.t; conf_name : string; bom : bool; print_conf : bool; mapping_date : float option; } let load_conf : string -> ImportConf.Syntax.t = fun file -> match Filename.extension file with | ".json" -> ( let configuration_file = Yojson.Safe.from_file (exists file) in try ImportConf.t_of_yojson configuration_file with | e -> print_endline @@ ImportErrors.repr_error e; exit 1) | _ -> ( let (conf : (Db.Syntax.t, string) result) = let* configuration_file = try Ok (Otoml.Parser.from_file (exists file)) with | Otoml.Parse_error (position, message) -> let format_position () = function | None -> "" | Some (line, col) -> Printf.sprintf "At line %d, col %d: " line col in let error_msg = Printf.sprintf "%aError %s" format_position position message in Error error_msg in ImportConf.t_of_toml configuration_file in match conf with | Error e -> prerr_endline e; exit 1 | Ok e -> e) let load () = let conf = ref ("", ImportConf.dummy_conf) and bom = ref true and usage = "importer [--conf configuration.toml]" and print_conf = ref false in let annon_fun _filename = print_endline usage; exit 1 and set_conf file = conf := (file, load_conf file) in let speclist = [ ( "--version", Arg.Unit (fun () -> Printf.printf "Version %s\n" Tools.Git_hash.revision; exit 0), "\tDisplay the version of the application and exit" ); ("--conf", Arg.String set_conf, "Configuration file"); ("-c", Arg.String set_conf, "Configuration file"); ("--no-bom", Arg.Clear bom, "Do not insert a BOM in the CSV"); ( "--print-conf", Arg.Set print_conf, "Reformat the configuration file and exit" ); ] in let () = Arg.parse speclist annon_fun usage in { configuration = snd !conf; bom = !bom; conf_name = fst !conf; print_conf = !print_conf; mapping_date = None; } end (** Print the result from the query. Each value is given with the associated expression in the configuration, the function is expected to convert the result into string in order to include the content in the output CSV. *) let printer : Path.t ImportExpression.T.t * ImportCSV.DataType.t -> string = fun (column, value) -> ignore column; ImportCSV.DataType.to_string value let bom = "\xEF\xBB\xBF" let process_table : 'a Db.t -> string -> Csv.out_channel Lazy.t -> Args.arguments -> string array Headers.SheeetMap.t -> Analyse.t -> string array Headers.SheeetMap.t = fun db dirname log_error conf map mapping -> let source = Analyse.table mapping in (* Load all the element in the database *) let file = Filename.concat dirname source.file in let file_date = creation_date file in let recent = match (conf.mapping_date, file_date) with | Some r, Some t -> r > t | _, _ -> false in match (recent, ImportSQL.Db.check_table_schema db mapping) with | true, Ok true -> ( (* If the data extraction is more recent than the sheet, and if the configuration did not changed for this table, we do not reload the table. *) match ImportSQL.Db.query_headers db source with | Ok v -> let text_headers = Array.map v ~f:ImportCSV.DataType.to_string in Headers.SheeetMap.add source text_headers map | Error _ -> map) | _ -> Printf.printf "Loading document %s %!" source.name; let headers_opt = let extension = String.lowercase_ascii (Filename.extension file) in match extension with | ".xlsx" -> Lwt_main.run @@ ImportFileHandler.Xlsx2sql.importInDatable ~dirname ~conf:conf.configuration ~log_error mapping db | ".csv" -> Lwt_main.run @@ ImportFileHandler.Csv2sql.importInDatable ~dirname ~conf:conf.configuration ~log_error mapping db | _ -> raise (ImportErrors.Unknown_source extension) in Helpers.Console.close_cursor (); (* Get the headers *) let headers = match headers_opt with | None -> map | Some v -> let text_headers = Array.map v ~f:ImportCSV.DataType.to_string in Headers.SheeetMap.add source text_headers map in (* For each external check if the values are loaded *) let dependancies = ImportConf.get_dependancies_for_table conf.configuration source in List.iter dependancies ~f:(fun ext -> match ext.ImportConf.Syntax.allow_missing with | true -> () | false -> ( Printf.printf "Checking dependancies for %s %!" ext.ImportConf.Syntax.target.ImportDataTypes.Table.name; try ignore @@ Db.check_foreign db conf.configuration ext ~f:(fun values -> Helpers.Console.update_cursor (); let row = match snd (Array.get values 0) with | ImportCSV.DataType.Integer i -> i | _ -> -1 and value = snd (Array.get values 1) in let error = ImportErrors. { source; sheet = source.Table.tab; row; value; target = Some ext.ImportConf.Syntax.target; exn = Failure (Printf.sprintf "Key '%s' not found" (CSV.DataType.to_string value)); } in ImportErrors.output_error log_error error); Helpers.Console.close_cursor () with | Sqlite3.Error _ -> (* We can have errors here if we have cycles in the dependencies, but it’s OK at this step.*) ())); headers let () = let conf = Args.load () in (* Global configuration variables *) let dirname = Filename.dirname conf.conf_name in let basename = Filename.basename conf.conf_name in let prefix = Filename.remove_extension basename in (* Analyse the configuration *) let process_order = try Analyse.get_process_order conf.configuration with | e -> prerr_endline @@ ImportErrors.repr_error e; exit 1 in (* With the printconf option, we do not need to open any file *) if conf.print_conf then ( let toml = ImportConf.Syntax.repr conf.configuration in Otoml.Printer.to_channel ~collapse_tables:true stdout toml; exit 0); let sqlfile = Filename.concat dirname (prefix ^ ".sqlite") in let conf = { conf with mapping_date = creation_date sqlfile } in (* Ensure that all the files exists *) List.iter process_order ~f:(fun (mapping : Analyse.t) -> let source = Analyse.table mapping in (* First, check *) if not (Sys.file_exists source.Table.file) then begin ignore @@ exists @@ Filename.concat dirname source.Table.file end); (* The configuration is loaded and valid, we create the errors log file *) let log_error = ImportErrors.log ~with_bom:conf.bom prefix dirname in (* Now check if we can open the output file. Opening the file "truncate" it. I do not want to delete the file content before I am sure that I can process it. This is why the file is opened after reading the configuration file although we do not need the configuration file for it. *) let out_channel = try Out_channel.open_bin (Filename.concat dirname (String.cat prefix ".csv")) with | Sys_error e -> prerr_endline e; exit 1 in Fun.protect ~finally:(fun () -> Out_channel.flush out_channel; Out_channel.close_noerr out_channel) (fun () -> (* Add the BOM to the CSV *) if conf.bom then output_string out_channel bom; let out_csv = Csv.to_channel ~separator:';' ~excel_tricks:false out_channel in (* Create the database *) ignore @@ Db.with_db sqlfile (fun db -> let headers = List.fold_left process_order ~init:Headers.SheeetMap.empty ~f:(process_table db dirname log_error conf) in let first_line = Headers.columns conf.configuration headers in Csv.output_record out_csv first_line; (* Run the query *) ignore @@ Db.create_view db conf.configuration; Printf.printf "Extracting results %!"; match Db.query ~f:(fun v -> let arr = Array.to_seq v |> Seq.map printer |> List.of_seq in Helpers.Console.update_cursor (); Csv.output_record out_csv arr) db conf.configuration with | Ok () -> Printf.printf "%c[?25h%c[1D%c[0K\n%!" (char_of_int 27) (char_of_int 27) (char_of_int 27); Ok () | Error e -> Printf.printf "%c[?25h%c[1D%c[0K\n%!" (char_of_int 27) (char_of_int 27) (char_of_int 27); print_endline @@ ImportErrors.repr_error e; Ok ())); (* Close the error file *) match Lazy.is_val log_error with | false -> () | true -> Csv.close_out (Lazy.force log_error)