aboutsummaryrefslogtreecommitdiff
path: root/lib/file_handler/csv2sql.ml
diff options
context:
space:
mode:
authorSébastien Dailly <sebastien@dailly.me>2024-03-14 08:26:58 +0100
committerSébastien Dailly <sebastien@dailly.me>2024-03-14 08:26:58 +0100
commit6b377719c10d5ab3343fd5221f99a4a21008e25a (patch)
treea7c1e9a820d339a2f161af3e09cf9e3161286796 /lib/file_handler/csv2sql.ml
Initial commitmain
Diffstat (limited to 'lib/file_handler/csv2sql.ml')
-rw-r--r--lib/file_handler/csv2sql.ml135
1 files changed, 135 insertions, 0 deletions
diff --git a/lib/file_handler/csv2sql.ml b/lib/file_handler/csv2sql.ml
new file mode 100644
index 0000000..42d84eb
--- /dev/null
+++ b/lib/file_handler/csv2sql.ml
@@ -0,0 +1,135 @@
+open StdLabels
+module A = ImportAnalyser.Dependency
+module CSV = ImportCSV
+module C = ImportContainers
+module Syntax = ImportConf.Syntax
+module Db = ImportSQL.Db
+
+type state = CSV.DataType.t array State.t
+
+let default_mapper :
+ (ImportCSV.DataType.t, ImportCSV.DataType.t array) State.mapper =
+ { get_row = Fun.id; get_value = Fun.id; default = ImportCSV.DataType.Null }
+
+let extract_values : string -> CSV.DataType.t =
+ fun value ->
+ (* Test first if the content is empty *)
+ if String.equal String.empty value then CSV.DataType.Null
+ else
+ (* else, try differents conversion in order to see which one works *)
+ match int_of_string_opt value with
+ | Some i -> CSV.DataType.Integer i
+ | None -> (
+ match float_of_string_opt value with
+ | Some f -> CSV.DataType.Float f
+ | None ->
+ (* And finaly convert into date *)
+ CSV.DataType.Content value)
+
+(** Initialize the state for the first row, count the column number and create
+ the table in the database *)
+let first_row : A.t -> _ Db.t -> state -> CSV.DataType.t list -> state =
+ fun mapping db acc row ->
+ (if acc.transaction then
+ match Db.commit db with
+ | Ok () -> ()
+ | Error e -> print_endline (ImportErrors.repr_error e));
+
+ ignore @@ Db.create_table db mapping;
+ let row = Array.of_list row in
+ match Db.prepare_insert db mapping with
+ | Ok stmt ->
+ {
+ acc with
+ header = Some row;
+ transaction = false;
+ insert_stmt = Some stmt;
+ row_number = acc.row_number + 1;
+ }
+ | _ -> { acc with header = Some row; transaction = false; insert_stmt = None }
+
+let read_csv_line :
+ log_error:ImportErrors.t -> A.t -> 'a Db.t -> state -> string list -> state
+ =
+ fun ~log_error mapping db acc row ->
+ let processed_row =
+ List.to_seq row |> Seq.map extract_values |> Array.of_seq
+ in
+ if acc.State.transaction then
+ State.run_row ~log_error ~mapper:default_mapper mapping db processed_row acc
+ else
+ match Db.begin_transaction db with
+ | Error e ->
+ print_endline (ImportErrors.repr_error e);
+ acc
+ | Ok () ->
+ let acc = { acc with transaction = true } in
+ State.run_row ~log_error ~mapper:default_mapper mapping db processed_row
+ acc
+
+let importInDatable :
+ log_error:ImportErrors.t ->
+ conf:Syntax.t ->
+ dirname:string ->
+ A.t ->
+ 'a Db.t ->
+ CSV.DataType.t array option Lwt.t =
+ fun ~log_error ~conf ~dirname mapping db ->
+ let file = Filename.concat dirname (A.table mapping).file in
+
+ let channel = Stdlib.open_in_bin file in
+
+ let csv_channel = Csv.of_channel ~separator:';' ~excel_tricks:true channel in
+
+ (* In the headers, we only keep the string.
+
+ This line could generate an error if the headers are not correctly defined.
+ *)
+ let header =
+ List.map ~f:(fun v -> CSV.DataType.Content v) (Csv.next csv_channel)
+ in
+
+ let state =
+ State.
+ {
+ transaction = false;
+ header = None;
+ insert_stmt = None;
+ check_key_stmt = None;
+ row_number = 1;
+ sheet_number = 1;
+ delayed = [];
+ }
+ in
+ let state = first_row mapping db state header in
+
+ let state =
+ try
+ Csv.fold_left csv_channel ~init:state
+ ~f:(read_csv_line ~log_error mapping db)
+ with
+ | Csv.Failure (line, row, cause) as e ->
+ Printf.eprintf "Error %s on line %d — field : %s\n" cause line
+ (ImportCSV.Csv.column_to_string row);
+ raise e
+ in
+ ignore @@ State.clear ~log_error db mapping conf;
+ ignore @@ Db.commit db;
+
+ (* Finalize the statements created during the import *)
+ let () =
+ Option.iter (fun v -> ignore @@ Db.finalize v) state.insert_stmt;
+ Option.iter (fun v -> ignore @@ Db.finalize v) state.check_key_stmt
+ in
+
+ (* Insert all the headers *)
+ let _ =
+ Option.iter
+ (fun headers ->
+ let values = Array.mapi headers ~f:(fun i value -> (i, value)) in
+
+ ignore
+ @@ Db.insert_header db (ImportAnalyser.Dependency.table mapping) values)
+ state.header
+ in
+ Lwt.return state.header