diff options
author | Sébastien Dailly <sebastien@dailly.me> | 2025-02-04 22:30:34 +0100 |
---|---|---|
committer | Sébastien Dailly <sebastien@dailly.me> | 2025-02-13 18:30:43 +0100 |
commit | d3e0821b9c1551177afb34220d951b087acdea22 (patch) | |
tree | ba23b2916c7cb0336cb42793e4bee931053cacf5 | |
parent | 37556ab070abcbf87a1a822c95aeccf19dade687 (diff) |
Fixed a bug when a group function did not partition in the same way as the uniq parameter
-rw-r--r-- | lib/analysers/query.ml | 55 | ||||
-rw-r--r-- | lib/configuration/read_conf.ml | 81 | ||||
-rw-r--r-- | lib/configuration/syntax.ml | 4 | ||||
-rwxr-xr-x | lib/expression/dune | 6 | ||||
-rw-r--r-- | lib/expression/sym.ml | 4 | ||||
-rw-r--r-- | lib/expression/t.ml | 9 | ||||
-rw-r--r-- | lib/expression/t.mli | 2 | ||||
-rw-r--r-- | lib/expression/type_of.mli | 6 | ||||
-rw-r--r-- | readme.rst | 9 | ||||
-rw-r--r-- | tests/sql_db.ml | 153 |
10 files changed, 240 insertions, 89 deletions
diff --git a/lib/analysers/query.ml b/lib/analysers/query.ml index dff3b9d..bcf4a72 100644 --- a/lib/analysers/query.ml +++ b/lib/analysers/query.ml @@ -273,6 +273,49 @@ type query = { parameters : ImportCSV.DataType.t Seq.t; } +let rec take_elements : + prefix:'a list -> eq:('a -> 'a -> bool) -> 'a list -> 'a list = + fun ~prefix ~eq group -> + match (prefix, group) with + | [], any -> any + | _, [] -> [] + | hd1 :: tl1, hd2 :: tl2 when eq hd1 hd2 -> take_elements ~eq ~prefix:tl1 tl2 + | _, _ -> raise Not_found + +(** The window functions shall be grouped in the same way as the uniq property. + (We cannot make a partition over a group not kept in the final result). + + But the SQL query need to remove from the window function the elements + already defined in the group by statement, and we need to filter them from + the configuration before building the query. *) +let clean_window : + prefix:Path.t ImportExpression.T.t list -> + Path.t ImportExpression.T.t -> + Path.t ImportExpression.T.t = + fun ~prefix expression -> + let open ImportExpression.T in + let rec f = function + | Expr e -> Expr (f e) + | Empty -> Empty + | Literal s -> Literal s + | Integer i -> Integer i + | Path p -> Path p + | Concat pp -> Concat (List.map ~f pp) + | Function' (name, pp) -> Function' (name, List.map ~f pp) + | Function (name, pp) -> Function (name, List.map ~f pp) + | Nvl pp -> Nvl (List.map ~f pp) + | Join (sep, pp) -> Join (sep, List.map ~f pp) + | Window (window_f, group, order) -> + let w = map_window ~f window_f in + let group = + take_elements ~eq:(ImportExpression.T.equal Path.equal) ~prefix group + in + Window (w, List.map ~f group, List.map ~f order) + | BOperator (n, arg1, arg2) -> BOperator (n, f arg1, f arg2) + | GEquality (n, arg1, args) -> GEquality (n, f arg1, List.map ~f args) + in + f expression + (** Build the query and return also the mapping in order to identify each external links between files. @@ -301,7 +344,17 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array = *) let headers = Array.make (List.length conf.columns) (Obj.magic None) in - let columns = List.to_seq conf.columns |> Seq.mapi (fun i c -> (i, c)) in + (* Transform the columns to extract from the query. + - Associate each of them with a number + - Clean the window functions and remove the part already defined in the + [uniq] parameter. + *) + let columns = + List.to_seq conf.columns + |> Seq.mapi (fun i c -> + let expression = c in + (i, clean_window ~prefix:conf.uniq expression)) + in let formatter = Format.formatter_of_buffer b in let () = Format.fprintf formatter "SELECT %a" diff --git a/lib/configuration/read_conf.ml b/lib/configuration/read_conf.ml index 52ccb62..69240c1 100644 --- a/lib/configuration/read_conf.ml +++ b/lib/configuration/read_conf.ml @@ -126,6 +126,45 @@ end = struct let column = Expression_parser.Incremental.column_expr end +(** Ensure the group criteria in window functions match the global group by + criteria. + + Traverse the configuration tree until finding a group window. *) +let matchWindowGroup : + eq:('a -> 'a -> bool) -> + subset:'a ImportExpression.T.t list -> + 'a ImportExpression.T.t -> + bool = + fun ~eq ~subset expression -> + let exception Divergent in + let open ImportExpression.T in + let rec f = function + | Empty | Literal _ | Integer _ | Path _ -> () + | Expr e -> f e + | Concat pp | Function' (_, pp) | Function (_, pp) | Nvl pp | Join (_, pp) + -> List.iter ~f pp + | Window (_, pp1, _) -> + if List.equal ~eq:(ImportExpression.T.equal eq) subset pp1 then () + else raise_notrace Divergent + | BOperator (_, arg1, arg2) -> + f arg1; + f arg2 + | GEquality (_, arg1, args) -> + f arg1; + List.iter ~f args + in + match subset with + | [] -> + (* Do not bother traversing the tree if there is no group by, just + return Ok *) + true + | _ -> ( + try + f expression; + true + with + | Divergent -> false) + module Make (S : Decoders.Decode.S) = struct let ( let* ) = S.( let* ) let ( and* ) = S.( and* ) @@ -135,14 +174,24 @@ module Make (S : Decoders.Decode.S) = struct class loader = object (self) method parse_expression : type a. + ?groups:a ImportExpression.T.t list -> + eq:(a -> a -> bool) -> a ExpressionParser.path_builder -> S.value -> (a ImportExpression.T.t, S.value Decoders.Error.t) result = - fun path -> + fun ?(groups = []) ~eq path -> S.string >>= fun v -> match ExpressionParser.of_string path v with - | Ok expr -> S.succeed expr | Error e -> S.fail_with Decoders.Error.(make e) + | Ok expr -> ( + (* Now check that every window function include at least the uniq list *) + let valid_subset = matchWindowGroup ~eq ~subset:groups expr in + match valid_subset with + | true -> S.succeed expr + | false -> + S.fail + "The group function shall match the same arguments as the \ + \"uniq\" parameter") method source = let* file = S.field "file" S.string @@ -152,9 +201,11 @@ module Make (S : Decoders.Decode.S) = struct method external_ name = let* intern_key = - S.field "intern_key" (self#parse_expression ExpressionParser.path) + S.field "intern_key" + (self#parse_expression ~eq:Path.equal ExpressionParser.path) and* extern_key = - S.field "extern_key" (self#parse_expression ExpressionParser.column) + S.field "extern_key" + (self#parse_expression ~eq:Int.equal ExpressionParser.column) and* file = S.field "file" S.string and* tab = S.field_opt_or ~default:1 "tab" S.int and* allow_missing = @@ -172,18 +223,28 @@ module Make (S : Decoders.Decode.S) = struct } method sheet = + (* Check the uniq property first, beecause the group functions need + to include the same expression (at least) *) + let* uniq = + S.field_opt_or ~default:[] "uniq" + @@ S.list (self#parse_expression ~eq:Path.equal ExpressionParser.path) + in + let* columns = S.field "columns" - @@ S.list (self#parse_expression ExpressionParser.path) + @@ S.list + (self#parse_expression ~eq:Path.equal ~groups:uniq + ExpressionParser.path) and* filters = S.field_opt_or ~default:[] "filters" - @@ S.list (self#parse_expression ExpressionParser.path) + @@ S.list + (self#parse_expression ~eq:Path.equal ~groups:uniq + ExpressionParser.path) and* sort = S.field_opt_or ~default:[] "sort" - @@ S.list (self#parse_expression ExpressionParser.path) - and* uniq = - S.field_opt_or ~default:[] "uniq" - @@ S.list (self#parse_expression ExpressionParser.path) + @@ S.list + (self#parse_expression ~eq:Path.equal ~groups:uniq + ExpressionParser.path) in S.succeed @@ fun version source externals locale -> Syntax. diff --git a/lib/configuration/syntax.ml b/lib/configuration/syntax.ml index ee47277..9a0a706 100644 --- a/lib/configuration/syntax.ml +++ b/lib/configuration/syntax.ml @@ -53,6 +53,10 @@ module Extern = struct |> Otoml.table end +module ExpressionSet = Set.Make (struct + type t = Path.t ImportExpression.T.t [@@deriving show, eq, ord] +end) + type t = { version : int; locale : string option; diff --git a/lib/expression/dune b/lib/expression/dune index d141b7b..8bf6e62 100755 --- a/lib/expression/dune +++ b/lib/expression/dune @@ -7,6 +7,8 @@ importErrors
)
(preprocess (pps
- ppx_deriving.show
- ppx_deriving.eq ))
+ ppx_deriving.ord
+ ppx_deriving.show
+ ppx_deriving.eq
+ ))
)
diff --git a/lib/expression/sym.ml b/lib/expression/sym.ml index 0360e8e..49e923d 100644 --- a/lib/expression/sym.ml +++ b/lib/expression/sym.ml @@ -1,8 +1,6 @@ (** The signature for an expression analyzer. - Every element is mapped to a function, using the tagless final pattern. - - *) + Every element is mapped to a function, using the tagless final pattern. *) module type SYM_EXPR = sig type 'a repr type 'a obs diff --git a/lib/expression/t.ml b/lib/expression/t.ml index fec8fd7..324d515 100644 --- a/lib/expression/t.ml +++ b/lib/expression/t.ml @@ -6,7 +6,7 @@ type 'a window = | Counter | Previous of 'a | Sum of 'a -[@@deriving show, eq] +[@@deriving show, eq, ord] type 'a t = | Empty @@ -22,7 +22,7 @@ type 'a t = | BOperator of binary_operator * 'a t * 'a t | GEquality of binary_operator * 'a t * 'a t list | Function' of funct * 'a t list -[@@deriving show, eq] +[@@deriving show, eq, ord] and binary_operator = | Equal @@ -34,6 +34,7 @@ and binary_operator = | GT | And | Or +[@@deriving ord] and funct = | Cmp @@ -119,13 +120,13 @@ let rec cmp : ('a -> 'a -> int) -> 'a t -> 'a t -> int = let name_cmp = String.compare n1 n2 in if name_cmp = 0 then List.compare ~cmp:(cmp f) elems1 elems2 else name_cmp | Window (s1, l11, l12), Window (s2, l21, l22) -> ( - match compare s1 s2 with + match compare_window (cmp f) s1 s2 with | 0 -> let l1_cmp = List.compare ~cmp:(cmp f) l11 l21 in if l1_cmp = 0 then List.compare ~cmp:(cmp f) l12 l22 else l1_cmp | other -> other) | BOperator (n1, arg11, arg12), BOperator (n2, arg21, arg22) -> begin - match compare n1 n2 with + match compare_binary_operator n1 n2 with | 0 -> begin match cmp f arg11 arg21 with | 0 -> cmp f arg12 arg22 diff --git a/lib/expression/t.mli b/lib/expression/t.mli index 4e1af55..4c9e0a7 100644 --- a/lib/expression/t.mli +++ b/lib/expression/t.mli @@ -19,7 +19,7 @@ type 'a t = | BOperator of binary_operator * 'a t * 'a t | GEquality of binary_operator * 'a t * 'a t list | Function' of funct * 'a t list -[@@deriving show, eq] +[@@deriving show, eq, ord] and binary_operator = | Equal diff --git a/lib/expression/type_of.mli b/lib/expression/type_of.mli index 7a11582..a839c09 100644 --- a/lib/expression/type_of.mli +++ b/lib/expression/type_of.mli @@ -1,7 +1,7 @@ -(** This module tries to identify the type of an expression. +(** This module tries to identify the type of an expression. -The references to data comming from the spreaedsheet cannot be evaluated and -marked as [Extern]. *) + The references to data comming from the spreaedsheet cannot be evaluated and + marked as [Extern]. *) type t = ImportDataTypes.Types.t @@ -169,7 +169,7 @@ Le chargement des dépendances La recherche de valeurs dans d’autres fichiers Excel nécessite de déclarer les fichiers à charger. La liste `externals` décrit tous les fichiers qui doivent -être chargés, et comment les données doivent être associée avec le fichier +être chargés, et comment les données doivent être associées avec le fichier source. .. code:: toml @@ -608,6 +608,11 @@ Elles prennent se basent sur deux paramètres supplémentaires : Le tri est ignoré si le regroupement est omi. +.. note:: + + Quand une fonction de groupe est utilisée en combinaison du paramètre + `uniq`, le critère de regroupement doit correspondre à la règle d’unicité. + `counter` Crée un compteur qui s’incrémente tant que les lignes sont identiques. @@ -677,7 +682,7 @@ Elles prennent se basent sur deux paramètres supplémentaires : `min` La fonction `min` permet d’aller chercher le premier élément du groupe - (c’est à dire le plus petit selon l’ordre de tri) + (c’est-à-dire le plus petit selon l’ordre de tri) `min(expression, [regroupement, …], [tri, …])` diff --git a/tests/sql_db.ml b/tests/sql_db.ml index 28666b2..75b8293 100644 --- a/tests/sql_db.ml +++ b/tests/sql_db.ml @@ -57,9 +57,7 @@ let run_test ~configuration ~input ~expected name = let simple_extraction = run_test "simple_extraction" ~configuration: - {|version = 1 - -[source] + {|[source] name = "source_name" file = "source_file" @@ -69,31 +67,17 @@ columns = [ ":B", ":E"]|} ~input: - [ - [ - (0, ImportCSV.DataType.Integer 123); - (1, ImportCSV.DataType.Integer 2); - (4, ImportCSV.DataType.Integer 5); - ]; - ] + ImportCSV.DataType. + [ [ (0, Integer 123); (1, Integer 2); (4, Integer 5) ] ] ~expected: - (Ok - [ - [| - ImportCSV.DataType.Content "123_"; - ImportCSV.DataType.Integer 2; - ImportCSV.DataType.Integer 5; - |]; - ]) + (Ok ImportCSV.DataType.[ [| Content "123_"; Integer 2; Integer 5 |] ]) (** Ensure the behavior of the sum function when a filter is given. It is expected to accumulate the values over each line *) let sum_sort = run_test "sum_sort" ~configuration: - {|version = 1 - -[source] + {|[source] name = "source_name" file = "source_file" @@ -103,37 +87,25 @@ columns = [ "sum(:C, [:B], [:A])", ]|} ~input: - [ - [ - (0, ImportCSV.DataType.Integer 1); - (1, ImportCSV.DataType.Content "A"); - (2, ImportCSV.DataType.Integer 100); - ]; + ImportCSV.DataType. [ - (0, ImportCSV.DataType.Integer 2); - (1, ImportCSV.DataType.Content "A"); - (2, ImportCSV.DataType.Integer 100); - ]; - [ - (0, ImportCSV.DataType.Integer 3); - (1, ImportCSV.DataType.Content "A"); - (2, ImportCSV.DataType.Integer 100); - ]; - ] + [ (0, Integer 1); (1, Content "A"); (2, Integer 100) ]; + [ (0, Integer 2); (1, Content "A"); (2, Integer 100) ]; + [ (0, Integer 3); (1, Content "A"); (2, Integer 100) ]; + ] ~expected: (Ok - [ - [| ImportCSV.DataType.Integer 1; ImportCSV.DataType.Integer 100 |]; - [| ImportCSV.DataType.Integer 2; ImportCSV.DataType.Integer 200 |]; - [| ImportCSV.DataType.Integer 3; ImportCSV.DataType.Integer 300 |]; - ]) + ImportCSV.DataType. + [ + [| Integer 1; Integer 100 |]; + [| Integer 2; Integer 200 |]; + [| Integer 3; Integer 300 |]; + ]) let sum_total = run_test "sum_total" ~configuration: - {|version = 1 - -[source] + {|[source] name = "source_name" file = "source_file" @@ -143,14 +115,11 @@ columns = [ "sum(:C, [], [])", ]|} ~input: - [ + ImportCSV.DataType. [ - (0, ImportCSV.DataType.Integer 1); (2, ImportCSV.DataType.Integer 100); - ]; - [ - (0, ImportCSV.DataType.Integer 2); (2, ImportCSV.DataType.Integer 100); - ]; - ] + [ (0, Integer 1); (2, Integer 100) ]; + [ (0, Integer 2); (2, Integer 100) ]; + ] ~expected: (Ok [ [| ImportCSV.DataType.Integer 1; ImportCSV.DataType.Integer 200 |] ]) @@ -159,9 +128,7 @@ columns = [ let sum_unfiltered = run_test "sum_unfiltered" ~configuration: - {|version = 1 - -[source] + {|[source] name = "source_name" file = "source_file" @@ -171,16 +138,76 @@ columns = [ "sum(:C, [], [:A])", ]|} ~input: - [ + ImportCSV.DataType. [ - (0, ImportCSV.DataType.Integer 1); (2, ImportCSV.DataType.Integer 100); - ]; - [ - (0, ImportCSV.DataType.Integer 2); (2, ImportCSV.DataType.Integer 100); - ]; - ] + [ (0, Integer 1); (2, Integer 100) ]; + [ (0, Integer 2); (2, Integer 100) ]; + ] ~expected: (Ok [ [| ImportCSV.DataType.Integer 1; ImportCSV.DataType.Integer 200 |] ]) -let test_suit = [ simple_extraction; sum_sort; sum_total; sum_unfiltered ] +let sum_group = + run_test "sum_group" + ~configuration: + {|[source] +name = "source_name" +file = "source_file" + +[sheet] +columns = [ + ":A", + "sum(:C, [:A], [])", +]|} + ~input: + ImportCSV.DataType. + [ + [ (0, Integer 1); (2, Integer 100) ]; + [ (0, Integer 1); (2, Integer 100) ]; + [ (0, Integer 2); (2, Integer 100) ]; + ] + ~expected: + (Ok + ImportCSV.DataType. + [ + [| Integer 1; Integer 200 |]; + [| Integer 1; Integer 200 |]; + [| Integer 2; Integer 100 |]; + ]) + +(** Adding a uniq filter on column A does not change the sum of the values *) +let sum_group_uniq = + run_test "sum_group_uniq" + ~configuration: + {|[source] +name = "source_name" +file = "source_file" + +[sheet] +uniq = [":A"] +columns = [ + ":A", + "sum(:C, [:A], [])", +]|} + ~input: + ImportCSV.DataType. + [ + [ (0, Integer 1); (2, Integer 100) ]; + [ (0, Integer 1); (2, Integer 100) ]; + [ (0, Integer 2); (2, Integer 100) ]; + ] + ~expected: + (Ok + ImportCSV.DataType. + [ [| Integer 1; Integer 200 |]; [| Integer 2; Integer 100 |] ]) + +let test_suit = + [ + simple_extraction; + sum_sort; + sum_total; + sum_unfiltered; + sum_group; + sum_group_uniq; + ] + let tests = "sql_db" >::: test_suit |