From 6b377719c10d5ab3343fd5221f99a4a21008e25a Mon Sep 17 00:00:00 2001 From: Sébastien Dailly Date: Thu, 14 Mar 2024 08:26:58 +0100 Subject: Initial commit --- lib/file_handler/csv2sql.ml | 135 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 lib/file_handler/csv2sql.ml (limited to 'lib/file_handler/csv2sql.ml') diff --git a/lib/file_handler/csv2sql.ml b/lib/file_handler/csv2sql.ml new file mode 100644 index 0000000..42d84eb --- /dev/null +++ b/lib/file_handler/csv2sql.ml @@ -0,0 +1,135 @@ +open StdLabels +module A = ImportAnalyser.Dependency +module CSV = ImportCSV +module C = ImportContainers +module Syntax = ImportConf.Syntax +module Db = ImportSQL.Db + +type state = CSV.DataType.t array State.t + +let default_mapper : + (ImportCSV.DataType.t, ImportCSV.DataType.t array) State.mapper = + { get_row = Fun.id; get_value = Fun.id; default = ImportCSV.DataType.Null } + +let extract_values : string -> CSV.DataType.t = + fun value -> + (* Test first if the content is empty *) + if String.equal String.empty value then CSV.DataType.Null + else + (* else, try differents conversion in order to see which one works *) + match int_of_string_opt value with + | Some i -> CSV.DataType.Integer i + | None -> ( + match float_of_string_opt value with + | Some f -> CSV.DataType.Float f + | None -> + (* And finaly convert into date *) + CSV.DataType.Content value) + +(** Initialize the state for the first row, count the column number and create + the table in the database *) +let first_row : A.t -> _ Db.t -> state -> CSV.DataType.t list -> state = + fun mapping db acc row -> + (if acc.transaction then + match Db.commit db with + | Ok () -> () + | Error e -> print_endline (ImportErrors.repr_error e)); + + ignore @@ Db.create_table db mapping; + let row = Array.of_list row in + match Db.prepare_insert db mapping with + | Ok stmt -> + { + acc with + header = Some row; + transaction = false; + insert_stmt = Some stmt; + row_number = acc.row_number + 1; + } + | _ -> { acc with header = Some row; transaction = false; insert_stmt = None } + +let read_csv_line : + log_error:ImportErrors.t -> A.t -> 'a Db.t -> state -> string list -> state + = + fun ~log_error mapping db acc row -> + let processed_row = + List.to_seq row |> Seq.map extract_values |> Array.of_seq + in + if acc.State.transaction then + State.run_row ~log_error ~mapper:default_mapper mapping db processed_row acc + else + match Db.begin_transaction db with + | Error e -> + print_endline (ImportErrors.repr_error e); + acc + | Ok () -> + let acc = { acc with transaction = true } in + State.run_row ~log_error ~mapper:default_mapper mapping db processed_row + acc + +let importInDatable : + log_error:ImportErrors.t -> + conf:Syntax.t -> + dirname:string -> + A.t -> + 'a Db.t -> + CSV.DataType.t array option Lwt.t = + fun ~log_error ~conf ~dirname mapping db -> + let file = Filename.concat dirname (A.table mapping).file in + + let channel = Stdlib.open_in_bin file in + + let csv_channel = Csv.of_channel ~separator:';' ~excel_tricks:true channel in + + (* In the headers, we only keep the string. + + This line could generate an error if the headers are not correctly defined. + *) + let header = + List.map ~f:(fun v -> CSV.DataType.Content v) (Csv.next csv_channel) + in + + let state = + State. + { + transaction = false; + header = None; + insert_stmt = None; + check_key_stmt = None; + row_number = 1; + sheet_number = 1; + delayed = []; + } + in + let state = first_row mapping db state header in + + let state = + try + Csv.fold_left csv_channel ~init:state + ~f:(read_csv_line ~log_error mapping db) + with + | Csv.Failure (line, row, cause) as e -> + Printf.eprintf "Error %s on line %d — field : %s\n" cause line + (ImportCSV.Csv.column_to_string row); + raise e + in + ignore @@ State.clear ~log_error db mapping conf; + ignore @@ Db.commit db; + + (* Finalize the statements created during the import *) + let () = + Option.iter (fun v -> ignore @@ Db.finalize v) state.insert_stmt; + Option.iter (fun v -> ignore @@ Db.finalize v) state.check_key_stmt + in + + (* Insert all the headers *) + let _ = + Option.iter + (fun headers -> + let values = Array.mapi headers ~f:(fun i value -> (i, value)) in + + ignore + @@ Db.insert_header db (ImportAnalyser.Dependency.table mapping) values) + state.header + in + Lwt.return state.header -- cgit v1.2.3