diff options
author | Sébastien Dailly <sebastien@dailly.me> | 2025-03-17 09:11:25 +0100 |
---|---|---|
committer | Sébastien Dailly <sebastien@dailly.me> | 2025-03-17 18:59:32 +0100 |
commit | 8b8b730d3ba98d6c9e4e6274844641043b5fefbb (patch) | |
tree | 4cb60dafa05b479d0ca287d501a51db88cecaaa4 /lib/analysers | |
parent | 7bfbb67d83011f3e1845dcb9e44c3b6a5e93a9da (diff) |
Moved the syntax module in its own library
Diffstat (limited to 'lib/analysers')
-rw-r--r-- | lib/analysers/chunk.ml | 21 | ||||
-rw-r--r-- | lib/analysers/chunk.mli | 8 | ||||
-rw-r--r-- | lib/analysers/dependency.ml | 36 | ||||
-rw-r--r-- | lib/analysers/dependency.mli | 2 | ||||
-rwxr-xr-x | lib/analysers/dune | 7 | ||||
-rw-r--r-- | lib/analysers/filters.ml | 23 | ||||
-rw-r--r-- | lib/analysers/filters.mli | 2 | ||||
-rw-r--r-- | lib/analysers/headers.ml | 18 | ||||
-rw-r--r-- | lib/analysers/headers.mli | 5 | ||||
-rw-r--r-- | lib/analysers/printers.ml | 5 | ||||
-rw-r--r-- | lib/analysers/printers.mli | 2 | ||||
-rw-r--r-- | lib/analysers/query.ml | 37 | ||||
-rw-r--r-- | lib/analysers/query.mli | 7 |
13 files changed, 89 insertions, 84 deletions
diff --git a/lib/analysers/chunk.ml b/lib/analysers/chunk.ml index 2fa4808..cefa6d8 100644 --- a/lib/analysers/chunk.ml +++ b/lib/analysers/chunk.ml @@ -8,13 +8,13 @@ type t = { b : Buffer.t; - parameters : ImportCSV.DataType.t Queue.t; + parameters : ImportDataTypes.Value.t Queue.t; } let create : unit -> t = fun () -> { b = Buffer.create 16; parameters = Queue.create () } -let create' : Buffer.t -> ImportCSV.DataType.t Queue.t -> t = +let create' : Buffer.t -> ImportDataTypes.Value.t Queue.t -> t = fun b parameters -> { b; parameters } (* Append the element from [tail] at the end of [head] @@ -39,10 +39,9 @@ let copy : t -> t = Buffer.add_buffer b t.b; { b; parameters } -let add_parameters : t -> ImportCSV.DataType.t Seq.t -> unit = +let add_parameters : t -> ImportDataTypes.Value.t Seq.t -> unit = fun t p -> Queue.add_seq t.parameters p -module Syntax = ImportConf.Syntax module Table = ImportDataTypes.Table module Q = ImportExpression.Query open StdLabels @@ -53,7 +52,8 @@ open StdLabels - the join query in order to load the data from the external column - the column corresponding to the key in order to identify the missing links later. *) -let join_external : conf:Syntax.t -> join_buffer:t -> Syntax.Extern.t -> unit = +let join_external : + conf:ImporterSyntax.t -> join_buffer:t -> ImporterSyntax.Extern.t -> unit = fun ~conf ~join_buffer external_ -> let extern_table = Table.name external_.target in @@ -67,8 +67,8 @@ let join_external : conf:Syntax.t -> join_buffer:t -> Syntax.Extern.t -> unit = in add_parameters join_buffer (Queue.to_seq q))) - (Table.print_column external_.Syntax.Extern.target - ("key_" ^ external_.Syntax.Extern.target.name)); + (Table.print_column external_.ImporterSyntax.Extern.target + ("key_" ^ external_.ImporterSyntax.Extern.target.name)); Format.pp_print_flush formatter () @@ -78,7 +78,7 @@ let join_external : conf:Syntax.t -> join_buffer:t -> Syntax.Extern.t -> unit = SQLite is able to optimize the query and do not load the table not used in the select clause. *) let create_from_statement_of_chunck : - ?externals:Syntax.Extern.t list -> Syntax.t -> t -> unit = + ?externals:ImporterSyntax.Extern.t list -> ImporterSyntax.t -> t -> unit = fun ?externals conf c -> let externals = Option.value externals ~default:conf.externals in add_string c "\nFROM '"; @@ -91,7 +91,10 @@ let create_from_statement_of_chunck : List.iter externals ~f:(join_external ~conf ~join_buffer:c) let add_expression : - conf:Syntax.t -> t -> ImportDataTypes.Path.t ImportExpression.T.t -> unit = + conf:ImporterSyntax.t -> + t -> + ImportDataTypes.Path.t ImportExpression.T.t -> + unit = fun ~conf group expression -> let formatter = Format.formatter_of_buffer group.b in let queue = diff --git a/lib/analysers/chunk.mli b/lib/analysers/chunk.mli index d4f69e7..13a748a 100644 --- a/lib/analysers/chunk.mli +++ b/lib/analysers/chunk.mli @@ -2,11 +2,11 @@ type t = { b : Buffer.t; - parameters : ImportCSV.DataType.t Queue.t; + parameters : ImportDataTypes.Value.t Queue.t; } val create : unit -> t -val create' : Buffer.t -> ImportCSV.DataType.t Queue.t -> t +val create' : Buffer.t -> ImportDataTypes.Value.t Queue.t -> t val append : head:t -> tail:t -> unit (** Append the element from [tail] at the end of [head] @@ -19,7 +19,7 @@ val add_string : t -> string -> unit val copy : t -> t val create_from_statement_of_chunck : - ?externals:ImportConf.Syntax.Extern.t list -> ImportConf.Syntax.t -> t -> unit + ?externals:ImporterSyntax.Extern.t list -> ImporterSyntax.t -> t -> unit (** Create the from part of the query, adding all the declared externals (even when not required) @@ -27,7 +27,7 @@ val create_from_statement_of_chunck : the select clause. *) val add_expression : - conf:ImportConf.Syntax.t -> + conf:ImporterSyntax.t -> t -> ImportDataTypes.Path.t ImportExpression.T.t -> unit diff --git a/lib/analysers/dependency.ml b/lib/analysers/dependency.ml index d0ea8b3..38bc23c 100644 --- a/lib/analysers/dependency.ml +++ b/lib/analysers/dependency.ml @@ -1,6 +1,5 @@ open StdLabels module IntSet = ImportContainers.IntSet -module Syntax = ImportConf.Syntax module Table = ImportDataTypes.Table module Path = ImportDataTypes.Path module Expression = ImportExpression.T @@ -61,11 +60,15 @@ type 'a expression_extractor = { The function may raise [Unknown_source] if the the path describe an unknown table. *) let add_path_in_map : - f:'a expression_extractor -> conf:Syntax.t -> 'a -> build_map -> build_map = + f:'a expression_extractor -> + conf:ImporterSyntax.t -> + 'a -> + build_map -> + build_map = fun ~f ~conf path map -> let table_source, column = f.of_path path in let table = - try ImportConf.get_table_for_name conf table_source with + try ImporterSyntax.get_table_for_name conf table_source with | Not_found -> raise (ImportErrors.Unknown_source (Option.get table_source)) in @@ -81,7 +84,7 @@ let add_path_in_map : let add_expression_in_map : f:'a expression_extractor -> - conf:Syntax.t -> + conf:ImporterSyntax.t -> 'a Expression.t -> build_map -> build_map = @@ -91,7 +94,7 @@ let add_expression_in_map : let add_columns_in_map : f:'a expression_extractor -> - conf:Syntax.t -> + conf:ImporterSyntax.t -> 'a Expression.t list -> build_map -> build_map = @@ -109,9 +112,9 @@ let add_columns_in_map : This function is called for each path declared inside the expression. *) let add_dependancies : - conf:Syntax.t -> Syntax.Extern.t -> deps -> Path.t -> deps = + conf:ImporterSyntax.t -> ImporterSyntax.Extern.t -> deps -> Path.t -> deps = fun ~conf extern graph path -> - let source_table = ImportConf.get_table_for_name conf path.Path.alias in + let source_table = ImporterSyntax.get_table_for_name conf path.Path.alias in let source = ImportContainers.Source.from_table source_table in let target = ImportContainers.Source.from_table extern.target in @@ -121,14 +124,17 @@ let add_dependancies : | _ -> (target, [ source ]) :: graph let add_external_in_map : - conf:Syntax.t -> Syntax.Extern.t -> build_map * deps -> build_map * deps = + conf:ImporterSyntax.t -> + ImporterSyntax.Extern.t -> + build_map * deps -> + build_map * deps = fun ~conf extern (map, graph) -> let dest = ImportContainers.KeyName.from_table extern.target in (* Pre-check that every source is already declared in the configuration. *) let _ = Expression.fold_values extern.intern_key ~init:() ~f:(fun () path -> try - let _ = ImportConf.get_table_for_name conf path.Path.alias in + let _ = ImporterSyntax.get_table_for_name conf path.Path.alias in () with | Not_found -> ( @@ -177,7 +183,7 @@ let add_external_in_map : { of_path = (fun Path.{ alias; column } -> - let table = ImportConf.get_table_for_name conf alias in + let table = ImporterSyntax.get_table_for_name conf alias in (Some table.name, column)); to_mapping = (fun mapping column -> @@ -194,11 +200,11 @@ let mapper = of_path = (fun ({ alias; column } : Path.t) -> (alias, column)); } -let get_mapping : Syntax.t -> build_map * deps = +let get_mapping : ImporterSyntax.t -> build_map * deps = fun conf -> - let root = ImportContainers.Source.from_table (ImportConf.root_table conf) + let root = ImportContainers.Source.from_table (ImporterSyntax.root_table conf) and root' = - ImportContainers.KeyName.from_table (ImportConf.root_table conf) + ImportContainers.KeyName.from_table (ImporterSyntax.root_table conf) in let graph = [ (root, []) ] in @@ -207,7 +213,7 @@ let get_mapping : Syntax.t -> build_map * deps = let init = ( ImportContainers.Externals.singleton root' { - table = ImportConf.root_table conf; + table = ImporterSyntax.root_table conf; columns = IntSet.empty; keys = []; }, @@ -229,7 +235,7 @@ let get_mapping : Syntax.t -> build_map * deps = in (map, graph) -let get_process_order : Syntax.t -> t list = +let get_process_order : ImporterSyntax.t -> t list = fun map -> let map, graph = get_mapping map in diff --git a/lib/analysers/dependency.mli b/lib/analysers/dependency.mli index bc761ae..522436c 100644 --- a/lib/analysers/dependency.mli +++ b/lib/analysers/dependency.mli @@ -1,6 +1,6 @@ type t -val get_process_order : ImportConf.Syntax.t -> t list +val get_process_order : ImporterSyntax.t -> t list (** Extract the file list to process, following the identified dependancies. Try to load first the document which does not required another spreadsheet, and keep going in the topological order diff --git a/lib/analysers/dune b/lib/analysers/dune index 382dd6b..3ba018c 100755 --- a/lib/analysers/dune +++ b/lib/analysers/dune @@ -1,12 +1,11 @@ (library
(name importAnalyser)
(libraries
- importConf
- importContainers
- importCSV
importDataTypes
- importExpression
importErrors
+ importExpression
+ importerSyntax
+ importContainers
tsort
)
(preprocess (pps
diff --git a/lib/analysers/filters.ml b/lib/analysers/filters.ml index 4e8b175..7044798 100644 --- a/lib/analysers/filters.ml +++ b/lib/analysers/filters.ml @@ -1,14 +1,12 @@ (** Build a fragment of the query match a filter *) -module Syntax = ImportConf.Syntax module Path = ImportDataTypes.Path module Expression = ImportExpression -module CTE = ImportConf.CTE open StdLabels (** Add a list of expressions into the group *) let rec add_filters : - conf:Syntax.t -> Chunk.t -> Path.t Expression.T.t list -> unit = + conf:ImporterSyntax.t -> Chunk.t -> Path.t Expression.T.t list -> unit = fun ~conf group -> function | [] -> () | hd :: [] -> Chunk.add_expression ~conf group hd @@ -25,7 +23,7 @@ type 'a cte_acc = { latest_expression : Path.t Expression.T.t list; } -let add_inner : conf:Syntax.t -> int -> Buffer.t -> unit = +let add_inner : conf:ImporterSyntax.t -> int -> Buffer.t -> unit = fun ~conf n b -> let name = "filter" ^ string_of_int n in (* We use an INNER JOIN here because we want to be sure to get all the rows @@ -39,15 +37,15 @@ let add_inner : conf:Syntax.t -> int -> Buffer.t -> unit = Buffer.add_string b ".id\n" let print : - conf:Syntax.t -> + conf:ImporterSyntax.t -> (Chunk.t * Chunk.t) cte_acc -> - CTE.t -> + ImporterSyntax.CTE.t -> (Chunk.t * Chunk.t) cte_acc = fun ~conf acc cte -> let predicates, query = acc.acc in let n = acc.n in let cte_index = - match cte.CTE.group with + match cte.ImporterSyntax.CTE.group with | Some expression -> begin if acc.has_previous then Chunk.add_string query ", " @@ -73,11 +71,11 @@ let print : end; begin - match cte.CTE.filters with + match cte.ImporterSyntax.CTE.filters with | [] -> () | _ -> Chunk.add_string query " WHERE "; - add_filters ~conf query cte.CTE.filters + add_filters ~conf query cte.ImporterSyntax.CTE.filters end; Chunk.add_string query ")\n"; Some acc.n @@ -85,7 +83,7 @@ let print : (* Do not add the filters in the CTE (we don’t have any) but in the main query *) Chunk.add_string predicates "WHERE "; - add_filters ~conf predicates cte.CTE.filters; + add_filters ~conf predicates cte.ImporterSyntax.CTE.filters; acc.cte_index in { @@ -93,10 +91,11 @@ let print : has_previous = true; n = acc.n + 1; cte_index; - latest_expression = cte.CTE.filters; + latest_expression = cte.ImporterSyntax.CTE.filters; } -let generate_sql : conf:Syntax.t -> CTE.t list -> Chunk.t -> Chunk.t = +let generate_sql : + conf:ImporterSyntax.t -> ImporterSyntax.CTE.t list -> Chunk.t -> Chunk.t = fun ~conf filters links' -> let predicates = Chunk.create () and links = Chunk.create () in let eval = diff --git a/lib/analysers/filters.mli b/lib/analysers/filters.mli index 7783799..3a81202 100644 --- a/lib/analysers/filters.mli +++ b/lib/analysers/filters.mli @@ -1,2 +1,2 @@ val generate_sql : - conf:ImportConf.Syntax.t -> ImportConf.CTE.t list -> Chunk.t -> Chunk.t + conf:ImporterSyntax.t -> ImporterSyntax.CTE.t list -> Chunk.t -> Chunk.t diff --git a/lib/analysers/headers.ml b/lib/analysers/headers.ml index 916dfee..cbeddfb 100644 --- a/lib/analysers/headers.ml +++ b/lib/analysers/headers.ml @@ -1,7 +1,5 @@ open StdLabels -module I = ImportConf module E = ImportExpression.T -module Syntax = ImportConf.Syntax module Table = ImportDataTypes.Table module Path = ImportDataTypes.Path @@ -15,7 +13,7 @@ end) type content = string array type t = content SheeetMap.t -(** The map associate a line of headers for each table. +(** The map associate a line of headers for each table. The header are always in string. *) @@ -23,9 +21,8 @@ type t = content SheeetMap.t and will reformat the first line with the values from the cell. The functions will not be evaluated (instead they will be displayed "as is". - When there is no value for this path, return empty string. - *) -let columns : Syntax.t -> t -> string list = + When there is no value for this path, return empty string. *) +let columns : ImporterSyntax.t -> t -> string list = fun conf t -> (* We build here a custom printer which search in the array for the column name. @@ -33,7 +30,7 @@ let columns : Syntax.t -> t -> string list = This function will be given as argument in the expression printer. *) let f : Path.t -> Buffer.t -> unit = fun path b -> - let source = I.get_table_for_name conf path.alias in + let source = ImporterSyntax.get_table_for_name conf path.alias in match SheeetMap.find_opt source t with | None -> () @@ -42,12 +39,13 @@ let columns : Syntax.t -> t -> string list = | _ -> prerr_endline @@ Printf.sprintf "No header found for :%s.%s" - (Option.value ~default:(I.root_table conf).Table.name + (Option.value + ~default:(ImporterSyntax.root_table conf).Table.name path.alias) - (ImportCSV.Csv.column_to_string path.column)) + (ImportDataTypes.Path.column_to_string path.column)) in - List.map conf.Syntax.columns ~f:(fun column -> + List.map conf.ImporterSyntax.columns ~f:(fun column -> let b = Buffer.create 4 in ImportExpression.Headers.headers_of_expression b f column; diff --git a/lib/analysers/headers.mli b/lib/analysers/headers.mli index 03e384b..b9149a9 100644 --- a/lib/analysers/headers.mli +++ b/lib/analysers/headers.mli @@ -2,10 +2,9 @@ module SheeetMap : Map.S with type key = ImportDataTypes.Table.t type t = string array SheeetMap.t -val columns : ImportConf.Syntax.t -> t -> string list +val columns : ImporterSyntax.t -> t -> string list (** Get the headers. The function has to be called after reading each document, and will reformat the first line with the values from the cell. The functions will not be evaluated (instead they will be displayed "as is". - When there is no value for this path, return empty string. - *) + When there is no value for this path, return empty string. *) diff --git a/lib/analysers/printers.ml b/lib/analysers/printers.ml index 1c73c13..3dc2f02 100644 --- a/lib/analysers/printers.ml +++ b/lib/analysers/printers.ml @@ -1,10 +1,9 @@ -module Syntax = ImportConf.Syntax module Table = ImportDataTypes.Table module Path = ImportDataTypes.Path -let path : conf:Syntax.t -> Format.formatter -> Path.t -> unit = +let path : conf:ImporterSyntax.t -> Format.formatter -> Path.t -> unit = fun ~conf buffer { alias; column } -> - let table = ImportConf.get_table_for_name conf alias in + let table = ImporterSyntax.get_table_for_name conf alias in Format.fprintf buffer "%s" (Table.print_column table ("col_" ^ string_of_int column)) diff --git a/lib/analysers/printers.mli b/lib/analysers/printers.mli index 102bb91..3916e1c 100644 --- a/lib/analysers/printers.mli +++ b/lib/analysers/printers.mli @@ -1,5 +1,5 @@ val path : - conf:ImportConf.Syntax.t -> Format.formatter -> ImportDataTypes.Path.t -> unit + conf:ImporterSyntax.t -> Format.formatter -> ImportDataTypes.Path.t -> unit (** Represent a path in a SQL query. This function is given in the Expression.Query module. *) diff --git a/lib/analysers/query.ml b/lib/analysers/query.ml index e24da78..f89f5f0 100644 --- a/lib/analysers/query.ml +++ b/lib/analysers/query.ml @@ -1,15 +1,15 @@ open StdLabels module Expression = ImportExpression module Q = Expression.Query -module Syntax = ImportConf.Syntax module Table = ImportDataTypes.Table module Path = ImportDataTypes.Path (* Collect all the tables pointed by the expression. *) -let pointed_tables : Syntax.t -> 'a Expression.T.t -> (Table.t * string) list = +let pointed_tables : + ImporterSyntax.t -> 'a Expression.T.t -> (Table.t * string) list = fun conf expression -> Expression.T.fold_values expression ~init:[] ~f:(fun acc path -> - let table = ImportConf.get_table_for_name conf path.Path.alias in + let table = ImporterSyntax.get_table_for_name conf path.Path.alias in let table_name = Table.name table in (table, table_name) :: acc) |> List.sort_uniq ~cmp:Stdlib.compare @@ -37,7 +37,7 @@ let create_table : Dependency.t -> string = type query = { q : string; - parameters : ImportCSV.DataType.t Seq.t; + parameters : ImportDataTypes.Value.t Seq.t; } let rec take_elements : @@ -88,9 +88,9 @@ let clean_window : The select query will name each column with an alias, and the map allow to find which source is pointed by this alias. *) -let select : Syntax.t -> query * Path.t ImportExpression.T.t array = +let select : ImporterSyntax.t -> query * Path.t ImportExpression.T.t array = fun conf -> - let filter = ImportConf.CTE.of_filters conf.filters in + let filter = ImporterSyntax.CTE.of_filters conf.filters in (* For each column in the configuration file, add the corresponding element in the query. @@ -136,7 +136,7 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array = Chunk.append ~head:request_header ~tail:filters; let formatter = Format.formatter_of_buffer b in - (match conf.Syntax.uniq with + (match conf.ImporterSyntax.uniq with | [] -> () | uniq -> Format.fprintf formatter "\nGROUP BY %a" @@ -149,7 +149,7 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array = in Queue.transfer seq parameters)) uniq); - (match conf.Syntax.sort with + (match conf.ImporterSyntax.sort with | [] -> () | sort -> Format.fprintf formatter "\nORDER BY %a" @@ -166,15 +166,16 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array = ({ q = Buffer.contents b; parameters = Queue.to_seq parameters }, headers) -let check_external : Syntax.t -> Syntax.Extern.t -> query = +let check_external : ImporterSyntax.t -> ImporterSyntax.Extern.t -> query = fun conf external_ -> let internal_chunk = Chunk.create () in - Chunk.add_expression ~conf internal_chunk external_.Syntax.Extern.intern_key; + Chunk.add_expression ~conf internal_chunk + external_.ImporterSyntax.Extern.intern_key; let external_key_buffer = Buffer.create 16 in Buffer.add_string external_key_buffer - (Table.print_column external_.Syntax.Extern.target - ("key_" ^ external_.Syntax.Extern.target.name)); + (Table.print_column external_.ImporterSyntax.Extern.target + ("key_" ^ external_.ImporterSyntax.Extern.target.name)); let pointed_tables = pointed_tables conf external_.intern_key in @@ -190,7 +191,9 @@ let check_external : Syntax.t -> Syntax.Extern.t -> query = This not the usual way to proceed (we start from the source and link the externals) *) let rec collect_links : - Syntax.Extern.t -> Syntax.Extern.t list -> Syntax.Extern.t list = + ImporterSyntax.Extern.t -> + ImporterSyntax.Extern.t list -> + ImporterSyntax.Extern.t list = fun table init -> let res = (* Do not add the same external if the value is already present *) @@ -200,16 +203,16 @@ let check_external : Syntax.t -> Syntax.Extern.t -> query = | Some _ -> init in - Expression.T.fold_values ~init table.Syntax.Extern.intern_key + Expression.T.fold_values ~init table.ImporterSyntax.Extern.intern_key ~f:(fun acc expr -> match expr.Path.alias with | None -> acc | Some _ as path -> ( - let table = ImportConf.get_table_for_name conf path in + let table = ImporterSyntax.get_table_for_name conf path in (* Look for this table in the externals *) let external_opt = - List.find_opt conf.Syntax.externals ~f:(fun t -> - t.Syntax.Extern.target == table) + List.find_opt conf.ImporterSyntax.externals ~f:(fun t -> + t.ImporterSyntax.Extern.target == table) in match external_opt with | None -> acc diff --git a/lib/analysers/query.mli b/lib/analysers/query.mli index d158867..491c891 100644 --- a/lib/analysers/query.mli +++ b/lib/analysers/query.mli @@ -2,16 +2,15 @@ val create_table : Dependency.t -> string type query = { q : string; (** The query to execute *) - parameters : ImportCSV.DataType.t Seq.t; + parameters : ImportDataTypes.Value.t Seq.t; } (** This type represent a query to execute. [q] is the template to run, and shall be run with all the binded parameters. *) val select : - ImportConf.Syntax.t -> - query * ImportDataTypes.Path.t ImportExpression.T.t array + ImporterSyntax.t -> query * ImportDataTypes.Path.t ImportExpression.T.t array -val check_external : ImportConf.Syntax.t -> ImportConf.Syntax.Extern.t -> query +val check_external : ImporterSyntax.t -> ImporterSyntax.Extern.t -> query (** Create a query which select all the missing key in an external *) val build_key_insert : Buffer.t -> Dependency.key -> unit |