diff options
| author | Sébastien Dailly <sebastien@dailly.me> | 2025-03-01 08:39:02 +0100 | 
|---|---|---|
| committer | Sébastien Dailly <sebastien@dailly.me> | 2025-03-06 20:57:10 +0100 | 
| commit | 81db1bfd580791910646525e30bc45af34533987 (patch) | |
| tree | c610f53c284d3707a3d6fe49486b5c09e66dc41f | |
| parent | 67320d8f04e1f302306b9aafdaaf4bafcf443839 (diff) | |
Rewrite the way to handle filters
| -rw-r--r-- | examples/checksum | 3 | ||||
| -rw-r--r-- | lib/analysers/chunk.ml | 102 | ||||
| -rw-r--r-- | lib/analysers/chunk.mli | 34 | ||||
| -rw-r--r-- | lib/analysers/filters.ml | 133 | ||||
| -rw-r--r-- | lib/analysers/filters.mli | 2 | ||||
| -rw-r--r-- | lib/analysers/printers.ml | 12 | ||||
| -rw-r--r-- | lib/analysers/printers.mli | 14 | ||||
| -rw-r--r-- | lib/analysers/query.ml | 310 | ||||
| -rw-r--r-- | lib/analysers/query.mli | 7 | ||||
| -rw-r--r-- | lib/configuration/cte.ml | 53 | ||||
| -rw-r--r-- | lib/configuration/cte.mli | 20 | ||||
| -rw-r--r-- | lib/configuration/importConf.ml | 1 | ||||
| -rw-r--r-- | lib/configuration/importConf.mli | 12 | ||||
| -rw-r--r-- | lib/configuration/read_conf.ml | 69 | ||||
| -rw-r--r-- | lib/data_types/table.ml | 5 | ||||
| -rw-r--r-- | lib/expression/_readme.rst | 39 | ||||
| -rw-r--r-- | lib/expression/filters.ml | 193 | ||||
| -rw-r--r-- | lib/expression/filters.mli | 9 | ||||
| -rwxr-xr-x | lib/helpers/helpers.ml | 43 | ||||
| -rw-r--r-- | readme.rst | 17 | ||||
| -rw-r--r-- | tests/analyser_filters.ml | 187 | ||||
| -rw-r--r-- | tests/analyser_query_test.ml | 64 | ||||
| -rw-r--r-- | tests/confLoader.ml | 6 | ||||
| -rw-r--r-- | tests/configuration_toml.ml | 24 | ||||
| -rw-r--r-- | tests/importer_test.ml | 1 | ||||
| -rw-r--r-- | tests/sql_db.ml | 119 | ||||
| -rw-r--r-- | tests/test_migration.ml | 27 | 
27 files changed, 882 insertions, 624 deletions
| diff --git a/examples/checksum b/examples/checksum new file mode 100644 index 0000000..b223bc4 --- /dev/null +++ b/examples/checksum @@ -0,0 +1,3 @@ +e158edc600b314e5451cbfbb42fa0a6c  importer.csv +e85080e97cd3f1615069232c22e9b9cc  example_csv.csv +d646adc7d22212908d7c12a88a19ea4b  importer_groupe.csv diff --git a/lib/analysers/chunk.ml b/lib/analysers/chunk.ml new file mode 100644 index 0000000..2fa4808 --- /dev/null +++ b/lib/analysers/chunk.ml @@ -0,0 +1,102 @@ +(** The module allow to create fragment in the query which keep together the +    binderd parameters and the text of the query.contents. + +    This is used a lot in order to create the CTE, where you need the create +    fragment used both in the main request and partially in the CTE itself. + +    The content is mutable and all the functions are returning [unit]. *) + +type t = { +  b : Buffer.t; +  parameters : ImportCSV.DataType.t Queue.t; +} + +let create : unit -> t = + fun () -> { b = Buffer.create 16; parameters = Queue.create () } + +let create' : Buffer.t -> ImportCSV.DataType.t Queue.t -> t = + fun b parameters -> { b; parameters } + +(* Append the element from [tail] at the end of [head] + +     Tail is destroyed during the operation. +  *) +let append : head:t -> tail:t -> unit = + fun ~head ~tail -> +  match Buffer.length tail.b with +  | 0 -> () +  | _ -> +      Buffer.add_buffer head.b tail.b; +      Queue.transfer tail.parameters head.parameters; +      () + +(** Add a litteral string in the sequence *) +let add_string : t -> string -> unit = fun t v -> Buffer.add_string t.b v + +let copy : t -> t = + fun t -> +  let b = Buffer.create 16 and parameters = Queue.copy t.parameters in +  Buffer.add_buffer b t.b; +  { b; parameters } + +let add_parameters : t -> ImportCSV.DataType.t Seq.t -> unit = + fun t p -> Queue.add_seq t.parameters p + +module Syntax = ImportConf.Syntax +module Table = ImportDataTypes.Table +module Q = ImportExpression.Query +open StdLabels + +(** Extract the informations from the dependancies. We get two informations here +    : + +    - the join query in order to load the data from the external column +    - the column corresponding to the key in order to identify the missing links +      later. *) +let join_external : conf:Syntax.t -> join_buffer:t -> Syntax.Extern.t -> unit = + fun ~conf ~join_buffer external_ -> +  let extern_table = Table.name external_.target in + +  let formatter = Format.formatter_of_buffer join_buffer.b in +  Format.fprintf formatter "\nLEFT JOIN '%s' AS '%s' ON %t = %s" extern_table +    external_.target.name +    (Printers.prepare_key ~f:(fun f -> +         let q = +           Q.query_of_expression Q.BindParam f (Printers.path ~conf) +             external_.intern_key +         in + +         add_parameters join_buffer (Queue.to_seq q))) +    (Table.print_column external_.Syntax.Extern.target +       ("key_" ^ external_.Syntax.Extern.target.name)); + +  Format.pp_print_flush formatter () + +(** Create the from part of the query, adding all the required externals (even +    when not required) + +    SQLite is able to optimize the query and do not load the table not used in +    the select clause. *) +let create_from_statement_of_chunck : +    ?externals:Syntax.Extern.t list -> Syntax.t -> t -> unit = + fun ?externals conf c -> +  let externals = Option.value externals ~default:conf.externals in +  add_string c "\nFROM '"; +  add_string c (Table.name conf.source); +  add_string c "' AS '"; +  add_string c conf.source.name; +  add_string c "'"; + +  (* Add the externals in the query *) +  List.iter externals ~f:(join_external ~conf ~join_buffer:c) + +let add_expression : +    conf:Syntax.t -> t -> ImportDataTypes.Path.t ImportExpression.T.t -> unit = + fun ~conf group expression -> +  let formatter = Format.formatter_of_buffer group.b in +  let queue = +    ImportExpression.Query.query_of_expression ImportExpression.Query.BindParam +      formatter (Printers.path ~conf) expression +  in +  Format.pp_print_flush formatter (); +  add_parameters group (Queue.to_seq queue) diff --git a/lib/analysers/chunk.mli b/lib/analysers/chunk.mli new file mode 100644 index 0000000..d4f69e7 --- /dev/null +++ b/lib/analysers/chunk.mli @@ -0,0 +1,34 @@ +(** This module helps to create a query with prepared values. *) + +type t = { +  b : Buffer.t; +  parameters : ImportCSV.DataType.t Queue.t; +} + +val create : unit -> t +val create' : Buffer.t -> ImportCSV.DataType.t Queue.t -> t + +val append : head:t -> tail:t -> unit +(** Append the element from [tail] at the end of [head] + +    Tail is destroyed during the operation. *) + +val add_string : t -> string -> unit +(** Add a litteral string in the sequence *) + +val copy : t -> t + +val create_from_statement_of_chunck : +  ?externals:ImportConf.Syntax.Extern.t list -> ImportConf.Syntax.t -> t -> unit +(** Create the from part of the query, adding all the declared externals (even +    when not required) + +    SQLite is able to optimize the query and do not load the table not used in +    the select clause. *) + +val add_expression : +  conf:ImportConf.Syntax.t -> +  t -> +  ImportDataTypes.Path.t ImportExpression.T.t -> +  unit +(** Add an expression into an existing chunck *) diff --git a/lib/analysers/filters.ml b/lib/analysers/filters.ml new file mode 100644 index 0000000..4e8b175 --- /dev/null +++ b/lib/analysers/filters.ml @@ -0,0 +1,133 @@ +(** Build a fragment of the query match a filter *) + +module Syntax = ImportConf.Syntax +module Path = ImportDataTypes.Path +module Expression = ImportExpression +module CTE = ImportConf.CTE +open StdLabels + +(** Add a list of expressions into the group *) +let rec add_filters : +    conf:Syntax.t -> Chunk.t -> Path.t Expression.T.t list -> unit = + fun ~conf group -> function +  | [] -> () +  | hd :: [] -> Chunk.add_expression ~conf group hd +  | hd :: tl -> +      Chunk.add_expression ~conf group hd; +      Chunk.add_string group "\nAND "; +      add_filters ~conf group tl + +type 'a cte_acc = { +  n : int; +  has_previous : bool; +  acc : 'a; +  cte_index : int option; +  latest_expression : Path.t Expression.T.t list; +} + +let add_inner : conf:Syntax.t -> int -> Buffer.t -> unit = + fun ~conf n b -> +  let name = "filter" ^ string_of_int n in +  (* We use an INNER JOIN here because we want to be sure to get all the rows +     fetched by the CTE *) +  Buffer.add_string b "\nINNER JOIN '"; +  Buffer.add_string b name; +  Buffer.add_string b "' ON "; +  Buffer.add_string b name; +  Buffer.add_string b ".id = "; +  Buffer.add_string b conf.source.name; +  Buffer.add_string b ".id\n" + +let print : +    conf:Syntax.t -> +    (Chunk.t * Chunk.t) cte_acc -> +    CTE.t -> +    (Chunk.t * Chunk.t) cte_acc = + fun ~conf acc cte -> +  let predicates, query = acc.acc in +  let n = acc.n in +  let cte_index = +    match cte.CTE.group with +    | Some expression -> +        begin +          if acc.has_previous then Chunk.add_string query ", " +          else Chunk.add_string query "WITH " +        end; +        Chunk.add_string query "filter"; +        Chunk.add_string query (string_of_int n); +        Chunk.add_string query " AS ("; +        Chunk.add_string query "SELECT "; +        Chunk.add_string query conf.source.name; +        Chunk.add_string query ".id, "; +        Chunk.add_expression ~conf query expression; +        Chunk.add_string query " AS group_function"; +        Chunk.create_from_statement_of_chunck conf query; + +        if acc.has_previous then begin +          let previous_name = "filter" ^ string_of_int (n - 1) in +          add_inner ~conf (n - 1) query.Chunk.b; + +          Chunk.add_string query "WHERE "; +          Chunk.add_string query previous_name; +          Chunk.add_string query ".group_function" +        end; + +        begin +          match cte.CTE.filters with +          | [] -> () +          | _ -> +              Chunk.add_string query " WHERE "; +              add_filters ~conf query cte.CTE.filters +        end; +        Chunk.add_string query ")\n"; +        Some acc.n +    | None -> +        (* Do not add the filters in the CTE (we don’t have any) but in the main +         query *) +        Chunk.add_string predicates "WHERE "; +        add_filters ~conf predicates cte.CTE.filters; +        acc.cte_index +  in +  { +    acc with +    has_previous = true; +    n = acc.n + 1; +    cte_index; +    latest_expression = cte.CTE.filters; +  } + +let generate_sql : conf:Syntax.t -> CTE.t list -> Chunk.t -> Chunk.t = + fun ~conf filters links' -> +  let predicates = Chunk.create () and links = Chunk.create () in +  let eval = +    List.fold_left filters +      ~init: +        { +          n = 0; +          has_previous = false; +          acc = (links, predicates); +          cte_index = None; +          latest_expression = []; +        } +      ~f:(print ~conf) +  in +  match (eval.cte_index, eval.latest_expression) with +  | None, [] -> predicates +  | None, _ -> +      Chunk.add_string links' " "; +      Chunk.append ~head:links' ~tail:links; +      predicates +  | Some n, [] -> +      add_inner ~conf n links'.b; +      Chunk.append ~head:links' ~tail:links; +      Chunk.add_string links' "filter"; +      Chunk.add_string links' (string_of_int n); +      Chunk.add_string links' ".group_function"; +      predicates +  | Some n, _ -> +      add_inner ~conf n links'.b; +      Chunk.append ~head:links' ~tail:links; +      Chunk.add_string links' " AND filter"; +      Chunk.add_string links' (string_of_int n); +      Chunk.add_string links' ".group_function"; +      predicates diff --git a/lib/analysers/filters.mli b/lib/analysers/filters.mli new file mode 100644 index 0000000..7783799 --- /dev/null +++ b/lib/analysers/filters.mli @@ -0,0 +1,2 @@ +val generate_sql : +  conf:ImportConf.Syntax.t -> ImportConf.CTE.t list -> Chunk.t -> Chunk.t diff --git a/lib/analysers/printers.ml b/lib/analysers/printers.ml new file mode 100644 index 0000000..1c73c13 --- /dev/null +++ b/lib/analysers/printers.ml @@ -0,0 +1,12 @@ +module Syntax = ImportConf.Syntax +module Table = ImportDataTypes.Table +module Path = ImportDataTypes.Path + +let path : conf:Syntax.t -> Format.formatter -> Path.t -> unit = + fun ~conf buffer { alias; column } -> +  let table = ImportConf.get_table_for_name conf alias in +  Format.fprintf buffer "%s" +    (Table.print_column table ("col_" ^ string_of_int column)) + +let prepare_key : f:(Format.formatter -> unit) -> Format.formatter -> unit = + fun ~f formatter -> Format.fprintf formatter "rtrim(upper(%t))" f diff --git a/lib/analysers/printers.mli b/lib/analysers/printers.mli new file mode 100644 index 0000000..102bb91 --- /dev/null +++ b/lib/analysers/printers.mli @@ -0,0 +1,14 @@ +val path : +  conf:ImportConf.Syntax.t -> Format.formatter -> ImportDataTypes.Path.t -> unit +(** Represent a path in a SQL query. + +    This function is given in the Expression.Query module. *) + +val prepare_key : f:(Format.formatter -> unit) -> Format.formatter -> unit +(** Wrap an expression for beiing used as a key. What’s given to the formater +    will be pre-processed with sql function to trim and uppercase the value. + +    This function is used at two location in the code : + +    - during the index creation +    - when matching a value against this index *) diff --git a/lib/analysers/query.ml b/lib/analysers/query.ml index bcf4a72..e24da78 100644 --- a/lib/analysers/query.ml +++ b/lib/analysers/query.ml @@ -2,58 +2,8 @@ open StdLabels  module Expression = ImportExpression  module Q = Expression.Query  module Syntax = ImportConf.Syntax -module Table = ImportConf.Table -module Path = ImportConf.Path - -let truncate buffer n = Buffer.truncate buffer (Buffer.length buffer - n) - -(** The module allow to create fragment in the query which keep together the -    binderd parameters and the text of the query.contents. - -    This is used a lot in order to create the CTE, where you need the create -    fragment used both in the main request and partially in the CTE itself. - -    The content is mutable and all the functions are returning [unit]. *) -module Chunk = struct -  type t = { -    b : Buffer.t; -    parameters : ImportCSV.DataType.t Queue.t; -  } - -  let create : unit -> t = -   fun () -> { b = Buffer.create 16; parameters = Queue.create () } - -  let create' : Buffer.t -> ImportCSV.DataType.t Queue.t -> t = -   fun b parameters -> { b; parameters } - -  (* Append the element from [tail] at the end of [head] - -     Tail is destroyed during the operation. -  *) -  let append : head:t -> tail:t -> unit = -   fun ~head ~tail -> -    match Buffer.length tail.b with -    | 0 -> () -    | _ -> -        Buffer.add_buffer head.b tail.b; -        Queue.transfer tail.parameters head.parameters; -        () - -  (** Add a litteral string in the sequence *) -  let add_string : t -> string -> unit = fun t v -> Buffer.add_string t.b v - -  let copy : t -> t = -   fun t -> -    let b = Buffer.create 16 and parameters = Queue.copy t.parameters in -    Buffer.add_buffer b t.b; -    { b; parameters } - -  let add_parameters : t -> ImportCSV.DataType.t Seq.t -> unit = -   fun t p -> Queue.add_seq t.parameters p -end - -let prepare_key : f:(Format.formatter -> unit) -> Format.formatter -> unit = - fun ~f formatter -> Format.fprintf formatter "rtrim(upper(%t))" f +module Table = ImportDataTypes.Table +module Path = ImportDataTypes.Path  (* Collect all the tables pointed by the expression. *)  let pointed_tables : Syntax.t -> 'a Expression.T.t -> (Table.t * string) list = @@ -64,11 +14,6 @@ let pointed_tables : Syntax.t -> 'a Expression.T.t -> (Table.t * string) list =        (table, table_name) :: acc)    |> List.sort_uniq ~cmp:Stdlib.compare -(** Represent a column in a safe way in a query *) -let print_column : Table.t -> string -> string = - fun table column -> -  String.concat ~sep:"" [ "'"; table.Table.name; "'.'"; column; "'" ] -  let create_table : Dependency.t -> string =   fun mapping ->    let b = Buffer.create 64 in @@ -90,184 +35,6 @@ let create_table : Dependency.t -> string =    Buffer.contents b -let show_path : conf:Syntax.t -> Format.formatter -> Path.t -> unit = - fun ~conf buffer { alias; column } -> -  let table = ImportConf.get_table_for_name conf alias in -  let table_name = table.Table.name in -  Format.fprintf buffer "'%s'.col_%d" table_name column - -(** Extract the informations from the dependancies. We get two informations here -    : - -    - the join query in order to load the data from the external column -    - the column corresponding to the key in order to identify the missing links -      later. *) -let query_of_external : -    conf:Syntax.t -> join_buffer:Chunk.t -> Syntax.Extern.t -> unit = - fun ~conf ~join_buffer external_ -> -  let extern_table = Table.name external_.target in - -  let formatter = Format.formatter_of_buffer join_buffer.b in -  Format.fprintf formatter "\nLEFT JOIN '%s' AS '%s' ON %t = %s" extern_table -    external_.target.name -    (prepare_key ~f:(fun f -> -         let q = -           Q.query_of_expression Q.BindParam f (show_path ~conf) -             external_.intern_key -         in - -         Chunk.add_parameters join_buffer (Queue.to_seq q))) -    (print_column external_.Syntax.Extern.target -       ("key_" ^ external_.Syntax.Extern.target.name)); - -  Format.pp_print_flush formatter () - -(** Create the from part of the query, adding all the reuired externals. - -    SQLite is able to optimize the query and do not load the table not used in -    the select clause. *) -let create_from_chunck : Syntax.t -> Chunk.t -> unit = - fun conf c -> -  Chunk.add_string c "\nFROM '"; -  Chunk.add_string c (Table.name conf.source); -  Chunk.add_string c "' AS '"; -  Chunk.add_string c conf.source.name; -  Chunk.add_string c "'"; - -  (* Add the externals in the query *) -  List.iter conf.externals ~f:(query_of_external ~conf ~join_buffer:c) - -(** Build a CTE query in order to use any group function inside the query. -    Return the binded parameters used in the expression. The buffer given in -    argument is also modified during the construction. - -    If filters is not None, the clauses are added to the CTE. *) -let build_cte : -    Syntax.t -> -    expression:'a Expression.T.t -> -    filters:Chunk.t option -> -    Chunk.t = - fun conf ~expression ~filters -> -  (* The binded parameters queue will be used later in the full query *) -  let cte_chunk = Chunk.create () in - -  Chunk.add_string cte_chunk "WITH cte AS (SELECT "; -  Chunk.add_string cte_chunk conf.source.name; -  Chunk.add_string cte_chunk ".id, "; - -  let formatter = Format.formatter_of_buffer cte_chunk.b in - -  let p = -    Q.query_of_expression Q.BindParam formatter (show_path ~conf) expression -  in -  Format.pp_print_flush formatter (); -  Chunk.add_parameters cte_chunk (Queue.to_seq p); -  (* The name is hardcoded here, and used in [Expression.Filters.window] *) -  Chunk.add_string cte_chunk " AS group0"; - -  let () = create_from_chunck conf cte_chunk in -  let () = -    match filters with -    | None -> () -    | Some filters_chunk -> -        Chunk.append ~head:cte_chunk ~tail:(Chunk.copy filters_chunk) -  in - -  Chunk.add_string cte_chunk ")\n"; -  cte_chunk - -type filter_evaluation = { -  content : Buffer.t; -  parameters : ImportCSV.DataType.t Seq.t; -  cte : (string * Chunk.t) option; -} -(** Build the filters to apply in the query. We make the difference here between -    the predicates to apply directly in the query, and the filters associated -    with a group, which are required to be transformed into a CTE in SQL, and -    are evaluated before. *) - -(** Evaluate the filters on the query *) -let eval_filters : Syntax.t -> filter_evaluation = - fun conf -> -  match conf.filters with -  | [] -> -      let empty_buffer = Buffer.create 0 in -      { content = empty_buffer; parameters = Seq.empty; cte = None } -  | filters -> ( -      (* Create a new queue in order to accumulate all the parameters to bind. -         This filter will be given to both the CTE if any, or reused in the -         main query when there is no CTE. -      *) -      let chunk_filters = Chunk.create () in -      Chunk.add_string chunk_filters "\nWHERE "; - -      let group = Chunk.create () in - -      let with_cte, with_exr = -        List.fold_left filters ~init:(None, false) -          ~f:(fun (with_cte, with_exr) column -> -            (* The function will return an option in second position which is -               None when no Group function where found, and Some Expression -               otherwise *) -            let b = Buffer.create 16 in - -            let formatter = Format.formatter_of_buffer b in -            let queue, group_found = -              Expression.Filters.query_of_expression Q.BindParam formatter -                (show_path ~conf) column -            in -            Format.pp_print_flush formatter (); -            let clause = Chunk.create' b queue in - -            match (group_found, with_cte) with -            | None, _ -> -                Chunk.append ~head:chunk_filters ~tail:clause; -                Chunk.add_string chunk_filters "\nAND "; -                (with_cte, true) -            | (Some _ as group'), None -> -                (* We have a group here, we do not add it into the -                   filter_buffer right now. - -                   This can occur only once, the second one will raise -                   an error. *) -                Chunk.append ~head:group ~tail:clause; -                (group', with_exr) -            | Some _, Some _ -> raise ImportErrors.MisplacedWindow) -      in - -      match with_cte with -      | None -> -          let content = chunk_filters.b in -          truncate content 5; -          { -            (* There is no group clause in the query *) -            content; -            parameters = Queue.to_seq chunk_filters.parameters; -            cte = None; -          } -      | Some expression -> -          let filters = -            if with_exr then ( -              (* If we have additionnals filters from the group clause, we -                 have to report them in the CTE instead of the main query. *) -              let c' = Chunk.copy chunk_filters in -              truncate c'.b 5; -              Some c') -            else None -          in - -          (* Create the common expression table *) -          let cte_parameters = build_cte conf ~expression ~filters in -          Chunk.append ~head:chunk_filters ~tail:group; - -          { -            content = chunk_filters.b; -            parameters = Queue.to_seq chunk_filters.parameters; -            (* The name is hardcoded here, and used in -               [Expression.Filters.window] *) -            cte = Some ("cte", cte_parameters); -          }) -  type query = {    q : string;    parameters : ImportCSV.DataType.t Seq.t; @@ -323,23 +90,12 @@ let clean_window :      find which source is pointed by this alias. *)  let select : Syntax.t -> query * Path.t ImportExpression.T.t array =   fun conf -> -  (* If the filters contains a group expression, we need to transform this into -     a CTE, which have to be evaluated before the main query. That’s why we are -     evaluating the filters right now.*) -  let filters = eval_filters conf in -  let b = Buffer.create 256 in -  let parameters = Queue.create () in - -  Option.iter -    (fun (_, (cte : Chunk.t)) -> -      Buffer.add_buffer b cte.b; -      Queue.add_seq parameters (Queue.to_seq cte.parameters)) -    filters.cte; +  let filter = ImportConf.CTE.of_filters conf.filters in    (* For each column in the configuration file, add the corresponding element       in the query. -     The Sqlite driver return the elements in an array, we create  an array to +     The Sqlite driver return the elements in an array, we create an array too       in order to manage the elements together.    *)    let headers = Array.make (List.length conf.columns) (Obj.magic None) in @@ -355,6 +111,10 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array =             let expression = c in             (i, clean_window ~prefix:conf.uniq expression))    in +  let filters = Chunk.create () in +  let request_header = Filters.generate_sql ~conf filter filters in +  let b = request_header.Chunk.b +  and parameters = request_header.Chunk.parameters in    let formatter = Format.formatter_of_buffer b in    let () =      Format.fprintf formatter "SELECT %a" @@ -363,7 +123,7 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array =           (fun formatter (i, column) ->             Array.set headers i column;             let p = -             Q.query_of_expression Q.BindParam formatter (show_path ~conf) +             Q.query_of_expression Q.BindParam formatter (Printers.path ~conf)                 column             in             Queue.transfer p parameters; @@ -372,26 +132,8 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array =    in    Format.pp_print_flush formatter (); -  let () = create_from_chunck conf (Chunk.create' b parameters) in - -  (* If the query has a CTE, link it as well. We use an INNER JOIN here because -     we want to be sure to get all the rows fetched by the CTE -  *) -  let () = -    match filters.cte with -    | None -> () -    | Some (name, _) -> -        Buffer.add_string b "\nINNER JOIN '"; -        Buffer.add_string b name; -        Buffer.add_string b "' ON "; -        Buffer.add_string b name; -        Buffer.add_string b ".id = "; -        Buffer.add_string b conf.source.name; -        Buffer.add_string b ".id" -  in - -  Buffer.add_buffer b filters.content; -  Queue.add_seq parameters filters.parameters; +  let () = Chunk.create_from_statement_of_chunck conf request_header in +  Chunk.append ~head:request_header ~tail:filters;    let formatter = Format.formatter_of_buffer b in    (match conf.Syntax.uniq with @@ -402,7 +144,7 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array =             ~pp_sep:(fun f () -> Format.fprintf f ", ")             (fun formatter column ->               let seq = -               Q.query_of_expression Q.BindParam formatter (show_path ~conf) +               Q.query_of_expression Q.BindParam formatter (Printers.path ~conf)                   column               in               Queue.transfer seq parameters)) @@ -415,7 +157,7 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array =             ~pp_sep:(fun f () -> Format.fprintf f ", ")             (fun formatter column ->               let seq = -               Q.query_of_expression Q.BindParam formatter (show_path ~conf) +               Q.query_of_expression Q.BindParam formatter (Printers.path ~conf)                   column               in               Queue.transfer seq parameters)) @@ -426,20 +168,12 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array =  let check_external : Syntax.t -> Syntax.Extern.t -> query =   fun conf external_ -> -  let internal_chunk = -    let internal_key_buffer = Buffer.create 16 in -    let formatter = Format.formatter_of_buffer internal_key_buffer in -    let internal_key_seq = -      Q.query_of_expression Q.BindParam formatter (show_path ~conf) -        external_.Syntax.Extern.intern_key -    in -    Format.pp_print_flush formatter (); -    Chunk.create' internal_key_buffer (Queue.copy internal_key_seq) -  in +  let internal_chunk = Chunk.create () in +  Chunk.add_expression ~conf internal_chunk external_.Syntax.Extern.intern_key;    let external_key_buffer = Buffer.create 16 in    Buffer.add_string external_key_buffer -    (print_column external_.Syntax.Extern.target +    (Table.print_column external_.Syntax.Extern.target         ("key_" ^ external_.Syntax.Extern.target.name));    let pointed_tables = pointed_tables conf external_.intern_key in @@ -495,18 +229,12 @@ let check_external : Syntax.t -> Syntax.Extern.t -> query =          Chunk.add_string request "-1"      | (table, _name) :: _ ->          (* If we have a single source, extract the row number. *) -        Chunk.add_string request (print_column table "id") +        Chunk.add_string request (Table.print_column table "id")    in    Chunk.add_string request ", ";    Chunk.append ~head:request ~tail:(Chunk.copy internal_chunk); -  Chunk.add_string request " FROM\n'"; -  Chunk.add_string request (Table.name conf.source); -  Chunk.add_string request "' AS '"; -  Chunk.add_string request conf.source.name; -  Chunk.add_string request "'"; -  (* Add the externals in the query *) -  List.iter dependencies ~f:(query_of_external ~conf ~join_buffer:request); +  Chunk.create_from_statement_of_chunck ~externals:dependencies conf request;    Chunk.add_string request " WHERE ";    Chunk.add_string request join_content;    Chunk.add_string request " IS NULL AND "; @@ -526,7 +254,7 @@ let build_key_insert : Buffer.t -> Dependency.key -> unit =    let formatter = Format.formatter_of_buffer buffer in    let () = -    prepare_key formatter ~f:(fun formatter -> +    Printers.prepare_key formatter ~f:(fun formatter ->          Q.query_of_expression Q.NoParam formatter show_column expression)    in diff --git a/lib/analysers/query.mli b/lib/analysers/query.mli index 520718a..d158867 100644 --- a/lib/analysers/query.mli +++ b/lib/analysers/query.mli @@ -1,5 +1,3 @@ -module Syntax = ImportConf.Syntax -  val create_table : Dependency.t -> string  type query = { @@ -10,9 +8,10 @@ type query = {      shall be run with all the binded parameters. *)  val select : -  Syntax.t -> query * ImportDataTypes.Path.t ImportExpression.T.t array +  ImportConf.Syntax.t -> +  query * ImportDataTypes.Path.t ImportExpression.T.t array -val check_external : Syntax.t -> Syntax.Extern.t -> query +val check_external : ImportConf.Syntax.t -> ImportConf.Syntax.Extern.t -> query  (** Create a query which select all the missing key in an external *)  val build_key_insert : Buffer.t -> Dependency.key -> unit diff --git a/lib/configuration/cte.ml b/lib/configuration/cte.ml new file mode 100644 index 0000000..ff43d6d --- /dev/null +++ b/lib/configuration/cte.ml @@ -0,0 +1,53 @@ +open StdLabels +module Path = ImportDataTypes.Path +module Expression = ImportExpression.T + +type t = { +  filters : Path.t Expression.t list; +  group : Path.t Expression.t option; +} + +(** Ensure the group criteria in window functions match the global group by +    criteria. + +    Traverse the configuration tree until finding a group window. *) + +(** Check if the expression contains a group function *) +let matchWindowGroup : 'a ImportExpression.T.t -> bool = + fun expression -> +  let exception Found in +  let open ImportExpression.T in +  let rec f = function +    | Empty | Literal _ | Integer _ | Path _ -> () +    | Expr e -> f e +    | Concat pp | Function' (_, pp) | Function (_, pp) | Nvl pp | Join (_, pp) +      -> List.iter ~f pp +    | Window (_, _, _) -> raise Found +    | BOperator (_, arg1, arg2) -> +        f arg1; +        f arg2 +    | GEquality (_, arg1, args) -> +        f arg1; +        List.iter ~f args +  in +  try +    f expression; +    false +  with +  | Found -> true + +(** Transform a list of expression into a list of CTE to evaluate. *) +let of_filters : Path.t Expression.t list -> t list = + fun filters -> +  let last_group, prev = +    List.fold_left filters +      ~init:({ filters = []; group = None }, []) +      ~f:(fun (cte, acc) expr -> +        begin +          if matchWindowGroup expr then +            ( { filters = []; group = None }, +              { cte with group = Some expr } :: acc ) +          else ({ cte with filters = expr :: cte.filters }, acc) +        end) +  in +  List.rev (last_group :: prev) diff --git a/lib/configuration/cte.mli b/lib/configuration/cte.mli new file mode 100644 index 0000000..0f2b3e3 --- /dev/null +++ b/lib/configuration/cte.mli @@ -0,0 +1,20 @@ +module Path = ImportDataTypes.Path +module Expression = ImportExpression.T + +type t = { +  filters : Path.t Expression.t list; +  group : Path.t Expression.t option; +} +(** Represent a filter to apply in the querry + +    The CTE can have filters applied on the previous CTE (or directly in the +    sources if there is any yet) and can hold a group (an only one). + +    If there is a group, it must be applied after the others filters. + +    The order in which the filters are presented in the configuration can change +    the results ; it does not matter when we only have classicals filters, +    because all cf them can be evaluated at the same time, but as soon we have a +    group function, the result become dependant of the previous ones. *) + +val of_filters : Path.t Expression.t list -> t list diff --git a/lib/configuration/importConf.ml b/lib/configuration/importConf.ml index ebbcb7c..8516008 100644 --- a/lib/configuration/importConf.ml +++ b/lib/configuration/importConf.ml @@ -1,5 +1,6 @@  open StdLabels  module Syntax = Syntax +module CTE = Cte  module Table = ImportDataTypes.Table  module Path = ImportDataTypes.Path  module T = Read_conf diff --git a/lib/configuration/importConf.mli b/lib/configuration/importConf.mli index 9ddc40c..40b985b 100644 --- a/lib/configuration/importConf.mli +++ b/lib/configuration/importConf.mli @@ -1,18 +1,18 @@  module Syntax = Syntax -module Table = ImportDataTypes.Table -module Path = ImportDataTypes.Path +module CTE = Cte  val dummy_conf : Syntax.t -val root_table : Syntax.t -> Table.t +val root_table : Syntax.t -> ImportDataTypes.Table.t  (** Get the root table, this table is the main table to load and each line in      this table will be processed *)  val t_of_toml : Otoml.t -> (Syntax.t, string) result -val get_table_for_name : Syntax.t -> string option -> Table.t +val get_table_for_name : Syntax.t -> string option -> ImportDataTypes.Table.t -val get_dependancies_for_table : Syntax.t -> Table.t -> Syntax.Extern.t list +val get_dependancies_for_table : +  Syntax.t -> ImportDataTypes.Table.t -> Syntax.Extern.t list  (** Get all the externals refered by the source *)  val expression_from_string : -  string -> (Path.t ImportExpression.T.t, string) result +  string -> (ImportDataTypes.Path.t ImportExpression.T.t, string) result diff --git a/lib/configuration/read_conf.ml b/lib/configuration/read_conf.ml index 69240c1..11f6726 100644 --- a/lib/configuration/read_conf.ml +++ b/lib/configuration/read_conf.ml @@ -126,44 +126,51 @@ end = struct    let column = Expression_parser.Incremental.column_expr  end +exception Divergent  (** Ensure the group criteria in window functions match the global group by -    criteria. +    criteria. *) -    Traverse the configuration tree until finding a group window. *) +exception NestedGroup +(** Raised when a group contains another one *) + +(** Traverse the configuration tree until finding a group window. *)  let matchWindowGroup :      eq:('a -> 'a -> bool) ->      subset:'a ImportExpression.T.t list ->      'a ImportExpression.T.t -> -    bool = +    unit =   fun ~eq ~subset expression -> -  let exception Divergent in    let open ImportExpression.T in -  let rec f = function +  let rec f isIngroup = function      | Empty | Literal _ | Integer _ | Path _ -> () -    | Expr e -> f e +    | Expr e -> f isIngroup e      | Concat pp | Function' (_, pp) | Function (_, pp) | Nvl pp | Join (_, pp) -      -> List.iter ~f pp -    | Window (_, pp1, _) -> -        if List.equal ~eq:(ImportExpression.T.equal eq) subset pp1 then () -        else raise_notrace Divergent +      -> List.iter ~f:(f isIngroup) pp +    | Window (expr, pp1, pp2) -> +        let () = +          if List.equal ~eq:(ImportExpression.T.equal eq) subset pp1 then () +          else +            match subset with +            | [] -> () +            | _ -> raise_notrace Divergent +        in +        let () = +          match isIngroup with +          | true -> raise NestedGroup +          | false -> () +        in + +        ignore @@ ImportExpression.T.map_window ~f:(f true) expr; +        List.iter ~f:(f true) pp1; +        List.iter ~f:(f true) pp2      | BOperator (_, arg1, arg2) -> -        f arg1; -        f arg2 +        f isIngroup arg1; +        f isIngroup arg2      | GEquality (_, arg1, args) -> -        f arg1; -        List.iter ~f args +        f isIngroup arg1; +        List.iter ~f:(f isIngroup) args    in -  match subset with -  | [] -> -      (* Do not bother traversing the tree if there is no group by, just -         return Ok *) -      true -  | _ -> ( -      try -        f expression; -        true -      with -      | Divergent -> false) +  f false expression  module Make (S : Decoders.Decode.S) = struct    let ( let* ) = S.( let* ) @@ -185,13 +192,15 @@ module Make (S : Decoders.Decode.S) = struct            | Error e -> S.fail_with Decoders.Error.(make e)            | Ok expr -> (                (* Now check that every window function include at least the uniq list *) -              let valid_subset = matchWindowGroup ~eq ~subset:groups expr in -              match valid_subset with -              | true -> S.succeed expr -              | false -> +              match matchWindowGroup ~eq ~subset:groups expr with +              | () -> S.succeed expr +              | exception Divergent ->                    S.fail                      "The group function shall match the same arguments as the \ -                     \"uniq\" parameter") +                     \"uniq\" parameter" +              | exception NestedGroup -> +                  S.fail +                    "A group function cannot contains another group function")        method source =          let* file = S.field "file" S.string diff --git a/lib/data_types/table.ml b/lib/data_types/table.ml index 2dd956b..4831ac8 100644 --- a/lib/data_types/table.ml +++ b/lib/data_types/table.ml @@ -23,3 +23,8 @@ let name : t -> string =    match source.tab with    | 1 -> file_name    | _ -> String.concat ~sep:"_" [ file_name; string_of_int source.tab ] + +(** Represent a column in a safe way in a query *) +let print_column : t -> string -> string = + fun table column -> +  String.concat ~sep:"" [ "'"; table.name; "'.'"; column; "'" ] diff --git a/lib/expression/_readme.rst b/lib/expression/_readme.rst new file mode 100644 index 0000000..729a950 --- /dev/null +++ b/lib/expression/_readme.rst @@ -0,0 +1,39 @@ +All thoses modules transforms an expression. + +Simple transformations +---------------------- + +:Ast: + +    Regenerate the Ast from the expression + +:Repr: + +    Generate a representable string + +:Type_of: + +    Infer the type of an expression + +:Compose: + +    Allow to use a module into another one. + +:Headers: + +    Extract the headers to display in the result file + +Composed transformations +------------------------ + +:Filter: + +    Generate the filters in the query. This module identify if one the +    expression is actually a group window and handle a special case for this. + +    This module relies on Ast + +:Query: + +    Build an sql query. This module relies on Type_of + diff --git a/lib/expression/filters.ml b/lib/expression/filters.ml deleted file mode 100644 index 42c794b..0000000 --- a/lib/expression/filters.ml +++ /dev/null @@ -1,193 +0,0 @@ -(** This module evaluate the sql query to use in order to filter an expression - -    The result is built over [Query] except for the group function, which are -    translated into a CTE in sql - *) - -open StdLabels -module Q = Query - -type 'a result = { -  repr : Format.formatter -> nested:Query.QueryParameter.t -> unit; -  group : 'a T.t option; -} - -module Filter = struct -  type 'a repr = { -    repr : 'a Q.Query.repr; -    with_group : 'a T.t option; -  } - -  type 'a obs = 'a result -  type 'a path_repr = 'a Q.Query.path_repr - -  let observe : 'a Ast.obs * 'a repr -> 'a obs = -   fun (_, v) -> { repr = Q.Query.observe v.repr; group = v.with_group } - -  let empty : 'a Ast.obs -> 'a repr = -   fun _ -> { repr = Q.Query.empty (); with_group = None } - -  let expr : 'a Ast.obs * 'a repr -> 'a Ast.obs -> 'a repr = -   fun (_, expr) _ -> -    { repr = Q.Query.expr expr.repr; with_group = expr.with_group } - -  let path : 'a path_repr -> 'a -> 'a Ast.obs -> 'a repr = -   fun repr p _ -> { repr = Q.Query.path repr p; with_group = None } - -  let literal : string -> 'a Ast.obs -> 'a repr = -   fun l _ -> { repr = Q.Query.literal l; with_group = None } - -  let integer : string -> 'a Ast.obs -> 'a repr = -   fun l _ -> { repr = Q.Query.integer l; with_group = None } - -  let nvl : ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr = -   fun expression _ -> -    let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression in -    let with_group = -      List.find_map ~f:(fun v -> (snd v).with_group) expression -    in -    match with_group with -    | None -> { repr = Q.Query.nvl expr_repr; with_group } -    | Some _ -> raise ImportErrors.MisplacedWindow - -  let concat : ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr = -   fun expression _ -> -    let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression in -    let with_group = -      List.find_map ~f:(fun v -> (snd v).with_group) expression -    in -    match with_group with -    | None -> { repr = Q.Query.concat expr_repr; with_group } -    | Some _ -> raise ImportErrors.MisplacedWindow - -  let join : string -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr = -   fun sep expression _ -> -    let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression -    and with_group = -      List.find_map ~f:(fun v -> (snd v).with_group) expression -    in -    match with_group with -    | None -> { repr = Q.Query.join sep expr_repr; with_group } -    | Some _ -> raise ImportErrors.MisplacedWindow - -  let boperator : -      T.binary_operator -> -      'a Ast.obs * 'a repr -> -      'a Ast.obs * 'a repr -> -      'a Ast.obs -> -      'a repr = -   fun name (_, e1) (_, e2) _ -> -    let with_group = -      match (e1.with_group, e2.with_group) with -      | Some e, None -> Some e -      | None, Some e -> Some e -      | None, None -> None -      | _ -> raise ImportErrors.MisplacedWindow -    in -    { repr = Q.Query.boperator name e1.repr e2.repr; with_group } - -  let gequality : -      T.binary_operator -> -      'a Ast.obs * 'a repr -> -      ('a Ast.obs * 'a repr) list -> -      'a Ast.obs -> -      'a repr = -   fun name (_, e1) group _ -> -    let group_repr = List.map ~f:(fun v -> (snd v).repr) group -    and with_group = List.find_map ~f:(fun v -> (snd v).with_group) group in - -    match with_group with -    | None -> -        { -          repr = Q.Query.gequality name e1.repr group_repr; -          with_group = e1.with_group; -        } -    | _ -> raise ImportErrors.MisplacedWindow - -  let funct : string -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr = -   fun name expressions _ -> -    let expr_repr = List.map ~f:(fun v -> (snd v).repr) expressions in -    let with_group = -      List.find_map ~f:(fun v -> (snd v).with_group) expressions -    in -    match with_group with -    | None -> { repr = Q.Query.funct name expr_repr; with_group } -    | Some _ -> raise ImportErrors.MisplacedWindow - -  let function' : -      T.funct -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr = -   fun name expressions _ -> -    let expr_repr = List.map ~f:(fun v -> (snd v).repr) expressions in -    let with_group = -      List.find_map ~f:(fun v -> (snd v).with_group) expressions -    in -    match with_group with -    | None -> -        { repr = Q.Query.funct (T.name_of_function name) expr_repr; with_group } -    | Some _ -> raise ImportErrors.MisplacedWindow - -  (** Window functions are not handled in the filters, we save them as an AST -  in order to process them in a separated handler.  - -  It is not allowed to build nested window functions. *) -  let window : -      ('a Ast.obs * 'a repr) T.window -> -      ('a Ast.obs * 'a repr) list -> -      ('a Ast.obs * 'a repr) list -> -      'a Ast.obs -> -      'a repr = -   fun name expressions order ast -> -    ignore name; -    let with_group_expr = -      List.find_map ~f:(fun v -> (snd v).with_group) expressions -    and with_group_order = -      List.find_map ~f:(fun v -> (snd v).with_group) order -    in -    match (with_group_expr, with_group_order) with -    | Some _, _ | _, Some _ -> raise ImportErrors.MisplacedWindow -    | None, None -> -        (* The column name used with the cte. The name is fixed here, and used -           as is in [Analysers.Query.build_cte] and -           [Analysers.Query.eval_filters] *) -        let q = "cte.group0" in -        { -          with_group = Some ast; -          repr = Q.Query.funct "expr" [ Q.Query.literal q ]; -        } -end - -module ASTBuilder = -  Compose.Expression -    (Ast) -    (struct -      let v = () -    end) - -module F : -  Sym.SYM_EXPR -    with type 'a obs = 'a result -     and type 'a path_repr = Format.formatter -> 'a -> unit = -  ASTBuilder.Make (Filter) - -module M = Sym.M (F) - -let query_of_expression : -    type b. -    b Q.binded_query -> -    Format.formatter -> -    (Format.formatter -> 'a -> unit) -> -    'a T.t -> -    b * 'a T.t option = - fun parameter formatter printer expr -> -  let repr = M.eval ~path_repr:printer expr in -  match parameter with -  | BindParam -> -      let p = Queue.create () in -      let parameter = Q.QueryParameter.Queue p in -      let value = F.observe repr in -      value.repr ~nested:parameter formatter; -      (p, value.group) -  | NoParam -> -      let value = F.observe repr in -      value.repr ~nested:Literal formatter; -      ((), value.group) diff --git a/lib/expression/filters.mli b/lib/expression/filters.mli deleted file mode 100644 index d462b5f..0000000 --- a/lib/expression/filters.mli +++ /dev/null @@ -1,9 +0,0 @@ -module F : Sym.SYM_EXPR with type 'a path_repr = Format.formatter -> 'a -> unit -(** Query used inside the filter clauses *) - -val query_of_expression : -  'b Query.binded_query -> -  Format.formatter -> -  (Format.formatter -> 'a -> unit) -> -  'a T.t -> -  'b * 'a T.t option diff --git a/lib/helpers/helpers.ml b/lib/helpers/helpers.ml index 9d6fcb8..7e0f2aa 100755 --- a/lib/helpers/helpers.ml +++ b/lib/helpers/helpers.ml @@ -1,45 +1,2 @@  module Toml = Toml
  module Console = Console
 -
 -let date_from_csv : string -> CalendarLib.Date.t option =
 - fun value ->
 -  let open CalendarLib.Date in
 -  try Some (Scanf.sscanf value "%d/%d/%d" (fun d m y -> make y m d)) with
 -  | _ -> (
 -      (* If the date is a number, try from julian day *)
 -      match int_of_string_opt value with
 -      | None -> None
 -      | Some v -> Some (add (make 1899 12 30) (Period.day v)))
 -
 -let fold_opt : ('a -> 'b -> 'a option) -> 'a -> 'b -> 'a =
 - fun f acc b ->
 -  match f acc b with
 -  | None -> acc
 -  | Some v -> v
 -
 -let try_opt exp =
 -  try Some (exp ()) with
 -  | _ -> None
 -
 -let repr_date formatter date =
 -  Format.fprintf formatter "%02d/%02d/%d"
 -    (CalendarLib.Date.day_of_month date)
 -    CalendarLib.Date.(int_of_month @@ month date)
 -    (CalendarLib.Date.year date)
 -
 -let s_repr_date date =
 -  Format.sprintf "%02d/%02d/%d"
 -    (CalendarLib.Date.day_of_month date)
 -    CalendarLib.Date.(int_of_month @@ month date)
 -    (CalendarLib.Date.year date)
 -
 -let repr_opt f channel = function
 -  | None -> ()
 -  | Some v -> f channel v
 -
 -let str_format f =
 -  let buffer = Buffer.create 16 in
 -  let formatter = Format.formatter_of_buffer buffer in
 -  f formatter;
 -  Format.pp_print_flush formatter ();
 -  Buffer.contents buffer
 @@ -324,19 +324,12 @@ Filtrer les données  Un filtre peut être mis en place pour ne conserver que les lignes souhaitées.  Les conditions doivent être toutes validées pour que la ligne soit retenue. -.. note:: - -    Il est possible de mettre une fonction de groupe dans les filtres, pour -    n’extraire par exemple que la première ligne d’une série. Toutefois, cette -    fonctionnalité est encore expérimentale. L’application peut lever une -    erreur si la fonction est trop complexe pour etre interprétée. - -    Si une fonction de groupe est présente, l’application ordonnera les -    évaluations selon l’ordre suivant : +Les filtres sont appliqués dans leur ordre d’apparition. Cela peut changer le +résultat selon que l’on filtre : -    1. En premier lieu tous les filtres *simples* pour ne conserver que les -       lignes correspondantes. -    2. Ensuite seulement la fonction de groupe présente dans la section `filter` +1. d’abord sur la plus grande valeur de la colonne A, puis les lignes avec la +   colonne B ≠ 0 +2. d’abord sur la colonne B ≠ 0, puis la plus grande valeur de la colonne A.  Exemple complet  =============== diff --git a/tests/analyser_filters.ml b/tests/analyser_filters.ml new file mode 100644 index 0000000..c329af8 --- /dev/null +++ b/tests/analyser_filters.ml @@ -0,0 +1,187 @@ +module CTE = ImportConf.CTE +module Filters = ImportAnalyser.Filters +module Chunk = ImportAnalyser.Chunk + +(* Remove the externals links in the coonfiguration because the queries are +   already complicated enough without *) +let conf = { ConfLoader.conf with externals = [] } + +let path' = +  Expression_builder.path ImportDataTypes.Path.{ alias = None; column = 1 } + +(** Build a simple expression expressing a group. This expression will trigger +    the creation of a CTE in the query. *) +let group_expression = +  Expression_builder.(equal (max path' [ path' ] [ path' ]) integer_one) + +(** Basic test ensuring we create nothing when we have nothing to create :) *) +let empty_predicates () = +  let chunk_links = Chunk.create () in +  let chunk_predicates = Filters.generate_sql ~conf [] chunk_links in + +  Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) +    "No predicates" +    (Chunk.create (), Chunk.create ()) +    (chunk_links, chunk_predicates) + +(** Simple numeric filter + +    The path is identified as pointing to a numeric column, and the associated +    query will replace null with 0. *) +let simple_filter () = +  let filter = CTE.of_filters Expression_builder.[ equal integer_one path' ] in +  let chunk_links = Chunk.create () in +  let chunk_predicates = Filters.generate_sql ~conf filter chunk_links in + +  let expected_predicates = Chunk.create () in +  Chunk.add_string expected_predicates " WHERE 1=COALESCE('source'.'col_1',0)"; + +  Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) +    "Simple predicate" +    (expected_predicates, Chunk.create ()) +    (chunk_links, chunk_predicates) + +(** Combined filter + +    The path is identified as pointing to a literal column, andn the associated +    query will replace null with empty string. *) +let multiple_filters () = +  let filter = +    CTE.of_filters Expression_builder.[ integer_one; equal path' literal_zero ] +  in +  let chunk_links = Chunk.create () in +  let chunk_predicates = Filters.generate_sql ~conf filter chunk_links in + +  let expected_predicates = Chunk.create () in +  (* The predicates can be executed in reverse order, but it’s not an issue +     because they all are applied at the same time in the projection *) +  Chunk.add_string expected_predicates +    " WHERE COALESCE('source'.'col_1','')=?\nAND 1"; + +  Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) +    "Combined predicate" +    (expected_predicates, Chunk.create ()) +    (chunk_links, chunk_predicates) + +(** Create a simple configuration with a group expression and no other filters +*) +let group_filter () = +  let filter = CTE.of_filters [ group_expression ] in +  let chunk_links = Chunk.create () in +  let chunk_predicates = Filters.generate_sql ~conf filter chunk_links in + +  let expected_predicates = Chunk.create () in +  Chunk.add_string expected_predicates +    "\n\ +     INNER JOIN 'filter0' ON filter0.id = source.id\n\ +     WHERE filter0.group_function"; + +  let expected_links = Chunk.create () in +  Chunk.add_string expected_links +    "WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_1') OVER \ +     (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN \ +     UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1 AS group_function\n\ +     FROM 'source' AS 'source')\n"; + +  Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) +    "Predicate with a group function" +    (expected_predicates, expected_links) +    (chunk_links, chunk_predicates) + +let expression_with_group () = +  let filter = +    CTE.of_filters +      Expression_builder.[ equal integer_one path'; group_expression ] +  in +  let chunk_links = Chunk.create () in +  let chunk_predicates = Filters.generate_sql ~conf filter chunk_links in + +  let expected_predicates = Chunk.create () in +  Chunk.add_string expected_predicates +    "\n\ +     INNER JOIN 'filter0' ON filter0.id = source.id\n\ +     WHERE filter0.group_function"; + +  let expected_links = Chunk.create () in +  Chunk.add_string expected_links +    "WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_1') OVER \ +     (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN \ +     UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1 AS group_function\n\ +     FROM 'source' AS 'source' WHERE 1=COALESCE('source'.'col_1',0))\n"; + +  Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) +    "The predicate expression is inside of the CTE" +    (expected_predicates, expected_links) +    (chunk_links, chunk_predicates) + +(** The last filter is given after the group. The predicate shall not be +    included inside the CTE but in the main query. *) +let group_with_expression () = +  let filter = +    CTE.of_filters +      Expression_builder.[ group_expression; equal integer_one path' ] +  in +  let chunk_links = Chunk.create () in +  let chunk_predicates = Filters.generate_sql ~conf filter chunk_links in + +  let expected_predicates = Chunk.create () in +  Chunk.add_string expected_predicates +    "\n\ +     INNER JOIN 'filter0' ON filter0.id = source.id\n\ +     WHERE 1=COALESCE('source'.'col_1',0) AND filter0.group_function"; + +  let expected_links = Chunk.create () in +  Chunk.add_string expected_links +    "WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_1') OVER \ +     (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN \ +     UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1 AS group_function\n\ +     FROM 'source' AS 'source')\n"; + +  Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) +    "The predicate expression is out of the CTE" +    (expected_predicates, expected_links) +    (chunk_links, chunk_predicates) + +(** Test the configuration with two group, each one generating it’s own CTE in +    the query *) +let group_with_group () = +  let filter = CTE.of_filters [ group_expression; group_expression ] in +  let chunk_links = Chunk.create () in +  let chunk_predicates = Filters.generate_sql ~conf filter chunk_links in + +  let expected_predicates = Chunk.create () in +  Chunk.add_string expected_predicates +    "\n\ +     INNER JOIN 'filter1' ON filter1.id = source.id\n\ +     WHERE filter1.group_function"; + +  let expected_links = Chunk.create () in +  Chunk.add_string expected_links +    "WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_1') OVER \ +     (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN \ +     UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1 AS group_function\n\ +     FROM 'source' AS 'source')\n\ +     , filter1 AS (SELECT source.id, LAST_VALUE('source'.'col_1') OVER \ +     (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN \ +     UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1 AS group_function\n\ +     FROM 'source' AS 'source'\n\ +     INNER JOIN 'filter0' ON filter0.id = source.id\n\ +     WHERE filter0.group_function)\n"; + +  Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) +    "The two CTE shall be generated" +    (expected_predicates, expected_links) +    (chunk_links, chunk_predicates) + +let tests = +  Alcotest. +    ( "filters", +      [ +        test_case "No filters" `Quick empty_predicates; +        test_case "Simple filter" `Quick simple_filter; +        test_case "Combined filter" `Quick multiple_filters; +        test_case "Group filter" `Quick group_filter; +        test_case "Expression and Group filter" `Quick expression_with_group; +        test_case "Group filter and Expression" `Quick group_with_expression; +        test_case "Group and Group" `Quick group_with_group; +      ] ) diff --git a/tests/analyser_query_test.ml b/tests/analyser_query_test.ml index 0e23f11..c4ab6d8 100644 --- a/tests/analyser_query_test.ml +++ b/tests/analyser_query_test.ml @@ -32,12 +32,12 @@ let select =    "Select" >:: fun _ ->    let query, _ = Q.select conf in    let expected_query = -    {|SELECT COALESCE('source'.col_1,'') || ? || '' AS result_0, -'source'.col_2 AS result_1, -'last_file'.col_5 AS result_2 +    {|SELECT COALESCE('source'.'col_1','') || ? || '' AS result_0, +'source'.'col_2' AS result_1, +'last_file'.'col_5' AS result_2  FROM 'source' AS 'source' -LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = 'other'.'key_other' -LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.col_1)) = 'last_file'.'key_last_file'|} +LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = 'other'.'key_other' +LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.'col_1')) = 'last_file'.'key_last_file'|}    in    Alcotest.check Alcotest.string "" expected_query query.q @@ -47,11 +47,11 @@ let check_externals =    let query = Q.check_external conf (List.hd conf.externals) in    let expected_query = -    "SELECT 'source'.'id', 'source'.col_1 FROM\n\ -     'source' AS 'source'\n\ -     LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = \ -     'other'.'key_other' WHERE 'other'.'key_other' IS NULL AND 'source'.col_1 \ -     IS NOT NULL AND 'source'.col_1 <> ''" +    "SELECT 'source'.'id', 'source'.'col_1'\n\ +     FROM 'source' AS 'source'\n\ +     LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = \ +     'other'.'key_other' WHERE 'other'.'key_other' IS NULL AND \ +     'source'.'col_1' IS NOT NULL AND 'source'.'col_1' <> ''"    in    Alcotest.check Alcotest.string "" expected_query query.q @@ -81,8 +81,8 @@ let previous =    let query, _ = ImportAnalyser.Query.select conf in    let expected_query = -    "SELECT LAG('previous'.col_5) OVER (PARTITION BY 'previous'.col_1 ORDER BY \ -     'previous'.col_3) AS result_0\n\ +    "SELECT LAG('previous'.'col_5') OVER (PARTITION BY 'previous'.'col_1' \ +     ORDER BY 'previous'.'col_3') AS result_0\n\       FROM 'source' AS 'previous'"    in    Alcotest.check Alcotest.string "" expected_query query.q @@ -112,7 +112,7 @@ let sum =    let query, _ = ImportAnalyser.Query.select conf in    let expected_query = -    "SELECT SUM('previous'.col_5) OVER (PARTITION BY 'previous'.col_1) AS \ +    "SELECT SUM('previous'.'col_5') OVER (PARTITION BY 'previous'.'col_1') AS \       result_0\n\       FROM 'source' AS 'previous'"    in @@ -137,7 +137,7 @@ let sum_total =    let query, _ = ImportAnalyser.Query.select conf in    let expected_query = -    "SELECT SUM('previous'.col_5) AS result_0\nFROM 'source' AS 'previous'" +    "SELECT SUM('previous'.'col_5') AS result_0\nFROM 'source' AS 'previous'"    in    Alcotest.check Alcotest.string "" expected_query query.q @@ -166,7 +166,7 @@ let sum_unfiltered =    let query, _ = ImportAnalyser.Query.select conf in    let expected_query = -    "SELECT SUM('previous'.col_5) AS result_0\nFROM 'source' AS 'previous'" +    "SELECT SUM('previous'.'col_5') AS result_0\nFROM 'source' AS 'previous'"    in    Alcotest.check Alcotest.string "" expected_query query.q @@ -206,16 +206,16 @@ let filter_group =    let contents, _ = ImportAnalyser.Query.select conf in    let expected = -    {|WITH cte AS (SELECT source.id, LAST_VALUE('source'.col_3) OVER (PARTITION BY 'source'.col_1 ORDER BY 'source'.col_1 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS group0 +    {|WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_3') OVER (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS group_function  FROM 'source' AS 'source' -LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = 'other'.'key_other' -LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.col_1)) = 'last_file'.'key_last_file') -SELECT 'source'.col_1 AS result_0 +LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = 'other'.'key_other' +LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.'col_1')) = 'last_file'.'key_last_file') +SELECT 'source'.'col_1' AS result_0  FROM 'source' AS 'source' -LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = 'other'.'key_other' -LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.col_1)) = 'last_file'.'key_last_file' -INNER JOIN 'cte' ON cte.id = source.id -WHERE (cte.group0)|} +LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = 'other'.'key_other' +LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.'col_1')) = 'last_file'.'key_last_file' +INNER JOIN 'filter0' ON filter0.id = source.id +WHERE filter0.group_function|}    in    Alcotest.check Alcotest.string "" expected contents.q @@ -238,18 +238,16 @@ let filter_group2 =    let contents, _ = ImportAnalyser.Query.select conf in    let expected = -    {|WITH cte AS (SELECT source.id, LAST_VALUE('source'.col_3) OVER (PARTITION BY 'source'.col_1 ORDER BY 'source'.col_1 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS group0 +    {|WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_3') OVER (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS group_function  FROM 'source' AS 'source' -LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = 'other'.'key_other' -LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.col_1)) = 'last_file'.'key_last_file' -WHERE COALESCE('source'.col_3,0)=0) -SELECT 'source'.col_1 AS result_0 +LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = 'other'.'key_other' +LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.'col_1')) = 'last_file'.'key_last_file') +SELECT 'source'.'col_1' AS result_0  FROM 'source' AS 'source' -LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = 'other'.'key_other' -LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.col_1)) = 'last_file'.'key_last_file' -INNER JOIN 'cte' ON cte.id = source.id -WHERE COALESCE('source'.col_3,0)=0 -AND (cte.group0)|} +LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = 'other'.'key_other' +LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.'col_1')) = 'last_file'.'key_last_file' +INNER JOIN 'filter0' ON filter0.id = source.id +WHERE COALESCE('source'.'col_3',0)=0 AND filter0.group_function|}    in    Alcotest.check Alcotest.string "" expected contents.q diff --git a/tests/confLoader.ml b/tests/confLoader.ml index 4adedd6..692cda9 100644 --- a/tests/confLoader.ml +++ b/tests/confLoader.ml @@ -1,7 +1,9 @@ +let load' : string -> (ImportConf.Syntax.t, string) Result.t = + fun content -> Otoml.Parser.from_string content |> ImportConf.t_of_toml +  (** Read the configuration in toml and return the internal representation *)  let load : string -> ImportConf.Syntax.t = - fun content -> -  Otoml.Parser.from_string content |> ImportConf.t_of_toml |> Result.get_ok + fun content -> Result.get_ok (load' content)  let conf =    load diff --git a/tests/configuration_toml.ml b/tests/configuration_toml.ml index dc1f769..e51c727 100644 --- a/tests/configuration_toml.ml +++ b/tests/configuration_toml.ml @@ -2,6 +2,29 @@ module Expression = ImportExpression.T  module Path = ImportDataTypes.Path  open Test_migration +let nested_group () = +  let expected = +    Error +      "in field \"sheet\":\n\ +      \  in field \"columns\":\n\ +      \    while decoding a list:\n\ +      \      element 0:\n\ +      \        A group function cannot contains another group function, but got\n\ +      \        \"max(:A, [counter([:A], [:A])], [])\" \n" +  and result = +    ConfLoader.load' +      {|[source] +name = "source_name" +file = "source_file" + +[sheet] +columns = [ +    "max(:A, [counter([:A], [:A])], [])",  +]|} +  in +  Alcotest.(check (result Test_migration.syntax string)) +    "duplicate" expected result +  let test_suit =    [      ( "parse_extern" >:: fun _ -> @@ -63,6 +86,7 @@ let test_suit =        let toml = Otoml.Parser.from_file "configuration/example_csv.toml" in        let toml = ImportConf.t_of_toml toml in        ignore toml ); +    ("nested group", `Quick, nested_group);    ]  let tests = "configuration_toml" >::: test_suit diff --git a/tests/importer_test.ml b/tests/importer_test.ml index a7b7f0d..78b7f59 100644 --- a/tests/importer_test.ml +++ b/tests/importer_test.ml @@ -11,6 +11,7 @@ let _ =        Expression_type_of.tests;        Expression_query.tests;        Analyser_dependency.tests; +      Analyser_filters.tests;        Analyser_query_test.tests;        Sql_db.tests;      ] diff --git a/tests/sql_db.ml b/tests/sql_db.ml index 75b8293..65a93ce 100644 --- a/tests/sql_db.ml +++ b/tests/sql_db.ml @@ -200,6 +200,121 @@ columns = [           ImportCSV.DataType.             [ [| Integer 1; Integer 200 |]; [| Integer 2; Integer 100 |] ]) +let filter_group = +  run_test "filter_group" +    ~configuration: +      {|[source] +name = "source_name" +file = "source_file" + +[sheet] +columns = [ +    ":A",  +] + +filters = [ +    "max(:B, [:C], [:B]) = :B", +] + +|} +    ~input: +      ImportCSV.DataType. +        [ +          [ (0, Integer 1); (1, Integer 100); (2, Integer 0) ]; +          [ (0, Integer 2); (1, Integer 150); (2, Integer 0) ]; +          [ (0, Integer 3); (1, Integer 200); (2, Integer 0) ]; +        ] +    ~expected:(Ok ImportCSV.DataType.[ [| Integer 3 |] ]) + +(** The first filter will prevent the max value to pop, and only the second one +    will be reported *) +let filter_expression_and_group = +  run_test "filter expression then group" +    ~configuration: +      {|[source] +name = "source_name" +file = "source_file" + +[sheet] +columns = [ +    ":A",  +] + +filters = [ +    ":B <> 200", +    "max(:B, [:C], [:B]) = :B", +] + +|} +    ~input: +      ImportCSV.DataType. +        [ +          [ (0, Integer 1); (1, Integer 100); (2, Integer 0) ]; +          [ (0, Integer 2); (1, Integer 150); (2, Integer 0) ]; +          [ (0, Integer 3); (1, Integer 200); (2, Integer 0) ]; +        ] +    ~expected:(Ok ImportCSV.DataType.[ [| Integer 2 |] ]) + +(** In this case, we first filter the line and keep only the max value, but the +    second filter will match the result and will produce an empty list *) +let filter_group_and_expression = +  run_test "filter group then expression" +    ~configuration: +      {|[source] +name = "source_name" +file = "source_file" + +[sheet] +columns = [ +    ":A",  +] + +filters = [ +    "max(:B, [:C], [:B]) = :B", +    ":B <> 200", +] + +|} +    ~input: +      ImportCSV.DataType. +        [ +          [ (0, Integer 1); (1, Integer 100); (2, Integer 0) ]; +          [ (0, Integer 2); (1, Integer 150); (2, Integer 0) ]; +          [ (0, Integer 3); (1, Integer 200); (2, Integer 0) ]; +        ] +    ~expected:(Ok []) + +(** In this case, each filter remove the line with the higher value in B. + +    After the application of the two filters, the only remaining result is the +    first line. *) +let filter_group_and_group = +  run_test "filter group then group" +    ~configuration: +      {|[source] +name = "source_name" +file = "source_file" + +[sheet] +columns = [ +    ":A",  +] + +filters = [ +    "max(:B, [:C], [:B]) <> :B", +    "max(:B, [:C], [:B]) <> :B", +] + +|} +    ~input: +      ImportCSV.DataType. +        [ +          [ (0, Integer 1); (1, Integer 100); (2, Integer 0) ]; +          [ (0, Integer 2); (1, Integer 150); (2, Integer 0) ]; +          [ (0, Integer 3); (1, Integer 200); (2, Integer 0) ]; +        ] +    ~expected:(Ok [ [| ImportCSV.DataType.Integer 1 |] ]) +  let test_suit =    [      simple_extraction; @@ -208,6 +323,10 @@ let test_suit =      sum_unfiltered;      sum_group;      sum_group_uniq; +    filter_group; +    filter_expression_and_group; +    filter_group_and_expression; +    filter_group_and_group;    ]  let tests = "sql_db" >::: test_suit diff --git a/tests/test_migration.ml b/tests/test_migration.ml index 35480d1..e26f354 100644 --- a/tests/test_migration.ml +++ b/tests/test_migration.ml @@ -45,7 +45,7 @@ let int_container_testable = make_test (module ImportContainers.IntSet)  let expression_testable =    make_test      (module struct -      type t = ImportConf.Path.t ImportExpression.T.t [@@deriving show, eq] +      type t = ImportDataTypes.Path.t ImportExpression.T.t [@@deriving show, eq]      end)  let dep_key_testable = @@ -53,3 +53,28 @@ let dep_key_testable =      (module struct        type t = ImportAnalyser.Dependency.key [@@deriving show, eq]      end) + +let chunk = +  make_test +    (module struct +      type t = ImportAnalyser.Chunk.t + +      let pp formater t = +        Format.fprintf formater "%s" (Buffer.contents t.ImportAnalyser.Chunk.b) + +      let equal t1 t2 = +        let to_string t = Buffer.contents t.ImportAnalyser.Chunk.b in +        String.equal (to_string t1) (to_string t2) +    end) + +let syntax = +  make_test +    (module struct +      type t = ImportConf.Syntax.t + +      let pp format t = +        Format.fprintf format "%s" +          (Otoml.Printer.to_string (ImportConf.Syntax.repr t)) + +      let equal t1 t2 = t1 = t2 +    end) | 
