aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSébastien Dailly <sebastien@dailly.me>2025-02-04 22:30:34 +0100
committerSébastien Dailly <sebastien@dailly.me>2025-02-13 18:30:43 +0100
commitd3e0821b9c1551177afb34220d951b087acdea22 (patch)
treeba23b2916c7cb0336cb42793e4bee931053cacf5
parent37556ab070abcbf87a1a822c95aeccf19dade687 (diff)
Fixed a bug when a group function did not partition in the same way as the uniq parameter
-rw-r--r--lib/analysers/query.ml55
-rw-r--r--lib/configuration/read_conf.ml81
-rw-r--r--lib/configuration/syntax.ml4
-rwxr-xr-xlib/expression/dune6
-rw-r--r--lib/expression/sym.ml4
-rw-r--r--lib/expression/t.ml9
-rw-r--r--lib/expression/t.mli2
-rw-r--r--lib/expression/type_of.mli6
-rw-r--r--readme.rst9
-rw-r--r--tests/sql_db.ml153
10 files changed, 240 insertions, 89 deletions
diff --git a/lib/analysers/query.ml b/lib/analysers/query.ml
index dff3b9d..bcf4a72 100644
--- a/lib/analysers/query.ml
+++ b/lib/analysers/query.ml
@@ -273,6 +273,49 @@ type query = {
parameters : ImportCSV.DataType.t Seq.t;
}
+let rec take_elements :
+ prefix:'a list -> eq:('a -> 'a -> bool) -> 'a list -> 'a list =
+ fun ~prefix ~eq group ->
+ match (prefix, group) with
+ | [], any -> any
+ | _, [] -> []
+ | hd1 :: tl1, hd2 :: tl2 when eq hd1 hd2 -> take_elements ~eq ~prefix:tl1 tl2
+ | _, _ -> raise Not_found
+
+(** The window functions shall be grouped in the same way as the uniq property.
+ (We cannot make a partition over a group not kept in the final result).
+
+ But the SQL query need to remove from the window function the elements
+ already defined in the group by statement, and we need to filter them from
+ the configuration before building the query. *)
+let clean_window :
+ prefix:Path.t ImportExpression.T.t list ->
+ Path.t ImportExpression.T.t ->
+ Path.t ImportExpression.T.t =
+ fun ~prefix expression ->
+ let open ImportExpression.T in
+ let rec f = function
+ | Expr e -> Expr (f e)
+ | Empty -> Empty
+ | Literal s -> Literal s
+ | Integer i -> Integer i
+ | Path p -> Path p
+ | Concat pp -> Concat (List.map ~f pp)
+ | Function' (name, pp) -> Function' (name, List.map ~f pp)
+ | Function (name, pp) -> Function (name, List.map ~f pp)
+ | Nvl pp -> Nvl (List.map ~f pp)
+ | Join (sep, pp) -> Join (sep, List.map ~f pp)
+ | Window (window_f, group, order) ->
+ let w = map_window ~f window_f in
+ let group =
+ take_elements ~eq:(ImportExpression.T.equal Path.equal) ~prefix group
+ in
+ Window (w, List.map ~f group, List.map ~f order)
+ | BOperator (n, arg1, arg2) -> BOperator (n, f arg1, f arg2)
+ | GEquality (n, arg1, args) -> GEquality (n, f arg1, List.map ~f args)
+ in
+ f expression
+
(** Build the query and return also the mapping in order to identify each
external links between files.
@@ -301,7 +344,17 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array =
*)
let headers = Array.make (List.length conf.columns) (Obj.magic None) in
- let columns = List.to_seq conf.columns |> Seq.mapi (fun i c -> (i, c)) in
+ (* Transform the columns to extract from the query.
+ - Associate each of them with a number
+ - Clean the window functions and remove the part already defined in the
+ [uniq] parameter.
+ *)
+ let columns =
+ List.to_seq conf.columns
+ |> Seq.mapi (fun i c ->
+ let expression = c in
+ (i, clean_window ~prefix:conf.uniq expression))
+ in
let formatter = Format.formatter_of_buffer b in
let () =
Format.fprintf formatter "SELECT %a"
diff --git a/lib/configuration/read_conf.ml b/lib/configuration/read_conf.ml
index 52ccb62..69240c1 100644
--- a/lib/configuration/read_conf.ml
+++ b/lib/configuration/read_conf.ml
@@ -126,6 +126,45 @@ end = struct
let column = Expression_parser.Incremental.column_expr
end
+(** Ensure the group criteria in window functions match the global group by
+ criteria.
+
+ Traverse the configuration tree until finding a group window. *)
+let matchWindowGroup :
+ eq:('a -> 'a -> bool) ->
+ subset:'a ImportExpression.T.t list ->
+ 'a ImportExpression.T.t ->
+ bool =
+ fun ~eq ~subset expression ->
+ let exception Divergent in
+ let open ImportExpression.T in
+ let rec f = function
+ | Empty | Literal _ | Integer _ | Path _ -> ()
+ | Expr e -> f e
+ | Concat pp | Function' (_, pp) | Function (_, pp) | Nvl pp | Join (_, pp)
+ -> List.iter ~f pp
+ | Window (_, pp1, _) ->
+ if List.equal ~eq:(ImportExpression.T.equal eq) subset pp1 then ()
+ else raise_notrace Divergent
+ | BOperator (_, arg1, arg2) ->
+ f arg1;
+ f arg2
+ | GEquality (_, arg1, args) ->
+ f arg1;
+ List.iter ~f args
+ in
+ match subset with
+ | [] ->
+ (* Do not bother traversing the tree if there is no group by, just
+ return Ok *)
+ true
+ | _ -> (
+ try
+ f expression;
+ true
+ with
+ | Divergent -> false)
+
module Make (S : Decoders.Decode.S) = struct
let ( let* ) = S.( let* )
let ( and* ) = S.( and* )
@@ -135,14 +174,24 @@ module Make (S : Decoders.Decode.S) = struct
class loader =
object (self)
method parse_expression : type a.
+ ?groups:a ImportExpression.T.t list ->
+ eq:(a -> a -> bool) ->
a ExpressionParser.path_builder ->
S.value ->
(a ImportExpression.T.t, S.value Decoders.Error.t) result =
- fun path ->
+ fun ?(groups = []) ~eq path ->
S.string >>= fun v ->
match ExpressionParser.of_string path v with
- | Ok expr -> S.succeed expr
| Error e -> S.fail_with Decoders.Error.(make e)
+ | Ok expr -> (
+ (* Now check that every window function include at least the uniq list *)
+ let valid_subset = matchWindowGroup ~eq ~subset:groups expr in
+ match valid_subset with
+ | true -> S.succeed expr
+ | false ->
+ S.fail
+ "The group function shall match the same arguments as the \
+ \"uniq\" parameter")
method source =
let* file = S.field "file" S.string
@@ -152,9 +201,11 @@ module Make (S : Decoders.Decode.S) = struct
method external_ name =
let* intern_key =
- S.field "intern_key" (self#parse_expression ExpressionParser.path)
+ S.field "intern_key"
+ (self#parse_expression ~eq:Path.equal ExpressionParser.path)
and* extern_key =
- S.field "extern_key" (self#parse_expression ExpressionParser.column)
+ S.field "extern_key"
+ (self#parse_expression ~eq:Int.equal ExpressionParser.column)
and* file = S.field "file" S.string
and* tab = S.field_opt_or ~default:1 "tab" S.int
and* allow_missing =
@@ -172,18 +223,28 @@ module Make (S : Decoders.Decode.S) = struct
}
method sheet =
+ (* Check the uniq property first, beecause the group functions need
+ to include the same expression (at least) *)
+ let* uniq =
+ S.field_opt_or ~default:[] "uniq"
+ @@ S.list (self#parse_expression ~eq:Path.equal ExpressionParser.path)
+ in
+
let* columns =
S.field "columns"
- @@ S.list (self#parse_expression ExpressionParser.path)
+ @@ S.list
+ (self#parse_expression ~eq:Path.equal ~groups:uniq
+ ExpressionParser.path)
and* filters =
S.field_opt_or ~default:[] "filters"
- @@ S.list (self#parse_expression ExpressionParser.path)
+ @@ S.list
+ (self#parse_expression ~eq:Path.equal ~groups:uniq
+ ExpressionParser.path)
and* sort =
S.field_opt_or ~default:[] "sort"
- @@ S.list (self#parse_expression ExpressionParser.path)
- and* uniq =
- S.field_opt_or ~default:[] "uniq"
- @@ S.list (self#parse_expression ExpressionParser.path)
+ @@ S.list
+ (self#parse_expression ~eq:Path.equal ~groups:uniq
+ ExpressionParser.path)
in
S.succeed @@ fun version source externals locale ->
Syntax.
diff --git a/lib/configuration/syntax.ml b/lib/configuration/syntax.ml
index ee47277..9a0a706 100644
--- a/lib/configuration/syntax.ml
+++ b/lib/configuration/syntax.ml
@@ -53,6 +53,10 @@ module Extern = struct
|> Otoml.table
end
+module ExpressionSet = Set.Make (struct
+ type t = Path.t ImportExpression.T.t [@@deriving show, eq, ord]
+end)
+
type t = {
version : int;
locale : string option;
diff --git a/lib/expression/dune b/lib/expression/dune
index d141b7b..8bf6e62 100755
--- a/lib/expression/dune
+++ b/lib/expression/dune
@@ -7,6 +7,8 @@
importErrors
)
(preprocess (pps
- ppx_deriving.show
- ppx_deriving.eq ))
+ ppx_deriving.ord
+ ppx_deriving.show
+ ppx_deriving.eq
+ ))
)
diff --git a/lib/expression/sym.ml b/lib/expression/sym.ml
index 0360e8e..49e923d 100644
--- a/lib/expression/sym.ml
+++ b/lib/expression/sym.ml
@@ -1,8 +1,6 @@
(** The signature for an expression analyzer.
- Every element is mapped to a function, using the tagless final pattern.
-
- *)
+ Every element is mapped to a function, using the tagless final pattern. *)
module type SYM_EXPR = sig
type 'a repr
type 'a obs
diff --git a/lib/expression/t.ml b/lib/expression/t.ml
index fec8fd7..324d515 100644
--- a/lib/expression/t.ml
+++ b/lib/expression/t.ml
@@ -6,7 +6,7 @@ type 'a window =
| Counter
| Previous of 'a
| Sum of 'a
-[@@deriving show, eq]
+[@@deriving show, eq, ord]
type 'a t =
| Empty
@@ -22,7 +22,7 @@ type 'a t =
| BOperator of binary_operator * 'a t * 'a t
| GEquality of binary_operator * 'a t * 'a t list
| Function' of funct * 'a t list
-[@@deriving show, eq]
+[@@deriving show, eq, ord]
and binary_operator =
| Equal
@@ -34,6 +34,7 @@ and binary_operator =
| GT
| And
| Or
+[@@deriving ord]
and funct =
| Cmp
@@ -119,13 +120,13 @@ let rec cmp : ('a -> 'a -> int) -> 'a t -> 'a t -> int =
let name_cmp = String.compare n1 n2 in
if name_cmp = 0 then List.compare ~cmp:(cmp f) elems1 elems2 else name_cmp
| Window (s1, l11, l12), Window (s2, l21, l22) -> (
- match compare s1 s2 with
+ match compare_window (cmp f) s1 s2 with
| 0 ->
let l1_cmp = List.compare ~cmp:(cmp f) l11 l21 in
if l1_cmp = 0 then List.compare ~cmp:(cmp f) l12 l22 else l1_cmp
| other -> other)
| BOperator (n1, arg11, arg12), BOperator (n2, arg21, arg22) -> begin
- match compare n1 n2 with
+ match compare_binary_operator n1 n2 with
| 0 -> begin
match cmp f arg11 arg21 with
| 0 -> cmp f arg12 arg22
diff --git a/lib/expression/t.mli b/lib/expression/t.mli
index 4e1af55..4c9e0a7 100644
--- a/lib/expression/t.mli
+++ b/lib/expression/t.mli
@@ -19,7 +19,7 @@ type 'a t =
| BOperator of binary_operator * 'a t * 'a t
| GEquality of binary_operator * 'a t * 'a t list
| Function' of funct * 'a t list
-[@@deriving show, eq]
+[@@deriving show, eq, ord]
and binary_operator =
| Equal
diff --git a/lib/expression/type_of.mli b/lib/expression/type_of.mli
index 7a11582..a839c09 100644
--- a/lib/expression/type_of.mli
+++ b/lib/expression/type_of.mli
@@ -1,7 +1,7 @@
-(** This module tries to identify the type of an expression.
+(** This module tries to identify the type of an expression.
-The references to data comming from the spreaedsheet cannot be evaluated and
-marked as [Extern]. *)
+ The references to data comming from the spreaedsheet cannot be evaluated and
+ marked as [Extern]. *)
type t = ImportDataTypes.Types.t
diff --git a/readme.rst b/readme.rst
index 3c7c1a9..bad0d53 100644
--- a/readme.rst
+++ b/readme.rst
@@ -169,7 +169,7 @@ Le chargement des dépendances
La recherche de valeurs dans d’autres fichiers Excel nécessite de déclarer les
fichiers à charger. La liste `externals` décrit tous les fichiers qui doivent
-être chargés, et comment les données doivent être associée avec le fichier
+être chargés, et comment les données doivent être associées avec le fichier
source.
.. code:: toml
@@ -608,6 +608,11 @@ Elles prennent se basent sur deux paramètres supplémentaires :
Le tri est ignoré si le regroupement est omi.
+.. note::
+
+ Quand une fonction de groupe est utilisée en combinaison du paramètre
+ `uniq`, le critère de regroupement doit correspondre à la règle d’unicité.
+
`counter`
Crée un compteur qui s’incrémente tant que les lignes sont identiques.
@@ -677,7 +682,7 @@ Elles prennent se basent sur deux paramètres supplémentaires :
`min`
La fonction `min` permet d’aller chercher le premier élément du groupe
- (c’est à dire le plus petit selon l’ordre de tri)
+ (c’est-à-dire le plus petit selon l’ordre de tri)
`min(expression, [regroupement, …], [tri, …])`
diff --git a/tests/sql_db.ml b/tests/sql_db.ml
index 28666b2..75b8293 100644
--- a/tests/sql_db.ml
+++ b/tests/sql_db.ml
@@ -57,9 +57,7 @@ let run_test ~configuration ~input ~expected name =
let simple_extraction =
run_test "simple_extraction"
~configuration:
- {|version = 1
-
-[source]
+ {|[source]
name = "source_name"
file = "source_file"
@@ -69,31 +67,17 @@ columns = [
":B",
":E"]|}
~input:
- [
- [
- (0, ImportCSV.DataType.Integer 123);
- (1, ImportCSV.DataType.Integer 2);
- (4, ImportCSV.DataType.Integer 5);
- ];
- ]
+ ImportCSV.DataType.
+ [ [ (0, Integer 123); (1, Integer 2); (4, Integer 5) ] ]
~expected:
- (Ok
- [
- [|
- ImportCSV.DataType.Content "123_";
- ImportCSV.DataType.Integer 2;
- ImportCSV.DataType.Integer 5;
- |];
- ])
+ (Ok ImportCSV.DataType.[ [| Content "123_"; Integer 2; Integer 5 |] ])
(** Ensure the behavior of the sum function when a filter is given. It is
expected to accumulate the values over each line *)
let sum_sort =
run_test "sum_sort"
~configuration:
- {|version = 1
-
-[source]
+ {|[source]
name = "source_name"
file = "source_file"
@@ -103,37 +87,25 @@ columns = [
"sum(:C, [:B], [:A])",
]|}
~input:
- [
- [
- (0, ImportCSV.DataType.Integer 1);
- (1, ImportCSV.DataType.Content "A");
- (2, ImportCSV.DataType.Integer 100);
- ];
+ ImportCSV.DataType.
[
- (0, ImportCSV.DataType.Integer 2);
- (1, ImportCSV.DataType.Content "A");
- (2, ImportCSV.DataType.Integer 100);
- ];
- [
- (0, ImportCSV.DataType.Integer 3);
- (1, ImportCSV.DataType.Content "A");
- (2, ImportCSV.DataType.Integer 100);
- ];
- ]
+ [ (0, Integer 1); (1, Content "A"); (2, Integer 100) ];
+ [ (0, Integer 2); (1, Content "A"); (2, Integer 100) ];
+ [ (0, Integer 3); (1, Content "A"); (2, Integer 100) ];
+ ]
~expected:
(Ok
- [
- [| ImportCSV.DataType.Integer 1; ImportCSV.DataType.Integer 100 |];
- [| ImportCSV.DataType.Integer 2; ImportCSV.DataType.Integer 200 |];
- [| ImportCSV.DataType.Integer 3; ImportCSV.DataType.Integer 300 |];
- ])
+ ImportCSV.DataType.
+ [
+ [| Integer 1; Integer 100 |];
+ [| Integer 2; Integer 200 |];
+ [| Integer 3; Integer 300 |];
+ ])
let sum_total =
run_test "sum_total"
~configuration:
- {|version = 1
-
-[source]
+ {|[source]
name = "source_name"
file = "source_file"
@@ -143,14 +115,11 @@ columns = [
"sum(:C, [], [])",
]|}
~input:
- [
+ ImportCSV.DataType.
[
- (0, ImportCSV.DataType.Integer 1); (2, ImportCSV.DataType.Integer 100);
- ];
- [
- (0, ImportCSV.DataType.Integer 2); (2, ImportCSV.DataType.Integer 100);
- ];
- ]
+ [ (0, Integer 1); (2, Integer 100) ];
+ [ (0, Integer 2); (2, Integer 100) ];
+ ]
~expected:
(Ok [ [| ImportCSV.DataType.Integer 1; ImportCSV.DataType.Integer 200 |] ])
@@ -159,9 +128,7 @@ columns = [
let sum_unfiltered =
run_test "sum_unfiltered"
~configuration:
- {|version = 1
-
-[source]
+ {|[source]
name = "source_name"
file = "source_file"
@@ -171,16 +138,76 @@ columns = [
"sum(:C, [], [:A])",
]|}
~input:
- [
+ ImportCSV.DataType.
[
- (0, ImportCSV.DataType.Integer 1); (2, ImportCSV.DataType.Integer 100);
- ];
- [
- (0, ImportCSV.DataType.Integer 2); (2, ImportCSV.DataType.Integer 100);
- ];
- ]
+ [ (0, Integer 1); (2, Integer 100) ];
+ [ (0, Integer 2); (2, Integer 100) ];
+ ]
~expected:
(Ok [ [| ImportCSV.DataType.Integer 1; ImportCSV.DataType.Integer 200 |] ])
-let test_suit = [ simple_extraction; sum_sort; sum_total; sum_unfiltered ]
+let sum_group =
+ run_test "sum_group"
+ ~configuration:
+ {|[source]
+name = "source_name"
+file = "source_file"
+
+[sheet]
+columns = [
+ ":A",
+ "sum(:C, [:A], [])",
+]|}
+ ~input:
+ ImportCSV.DataType.
+ [
+ [ (0, Integer 1); (2, Integer 100) ];
+ [ (0, Integer 1); (2, Integer 100) ];
+ [ (0, Integer 2); (2, Integer 100) ];
+ ]
+ ~expected:
+ (Ok
+ ImportCSV.DataType.
+ [
+ [| Integer 1; Integer 200 |];
+ [| Integer 1; Integer 200 |];
+ [| Integer 2; Integer 100 |];
+ ])
+
+(** Adding a uniq filter on column A does not change the sum of the values *)
+let sum_group_uniq =
+ run_test "sum_group_uniq"
+ ~configuration:
+ {|[source]
+name = "source_name"
+file = "source_file"
+
+[sheet]
+uniq = [":A"]
+columns = [
+ ":A",
+ "sum(:C, [:A], [])",
+]|}
+ ~input:
+ ImportCSV.DataType.
+ [
+ [ (0, Integer 1); (2, Integer 100) ];
+ [ (0, Integer 1); (2, Integer 100) ];
+ [ (0, Integer 2); (2, Integer 100) ];
+ ]
+ ~expected:
+ (Ok
+ ImportCSV.DataType.
+ [ [| Integer 1; Integer 200 |]; [| Integer 2; Integer 100 |] ])
+
+let test_suit =
+ [
+ simple_extraction;
+ sum_sort;
+ sum_total;
+ sum_unfiltered;
+ sum_group;
+ sum_group_uniq;
+ ]
+
let tests = "sql_db" >::: test_suit