aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorSébastien Dailly <sebastien@dailly.me>2025-02-04 22:30:34 +0100
committerSébastien Dailly <sebastien@dailly.me>2025-02-13 18:30:43 +0100
commitd3e0821b9c1551177afb34220d951b087acdea22 (patch)
treeba23b2916c7cb0336cb42793e4bee931053cacf5 /lib
parent37556ab070abcbf87a1a822c95aeccf19dade687 (diff)
Fixed a bug when a group function did not partition in the same way as the uniq parameter
Diffstat (limited to 'lib')
-rw-r--r--lib/analysers/query.ml55
-rw-r--r--lib/configuration/read_conf.ml81
-rw-r--r--lib/configuration/syntax.ml4
-rwxr-xr-xlib/expression/dune6
-rw-r--r--lib/expression/sym.ml4
-rw-r--r--lib/expression/t.ml9
-rw-r--r--lib/expression/t.mli2
-rw-r--r--lib/expression/type_of.mli6
8 files changed, 143 insertions, 24 deletions
diff --git a/lib/analysers/query.ml b/lib/analysers/query.ml
index dff3b9d..bcf4a72 100644
--- a/lib/analysers/query.ml
+++ b/lib/analysers/query.ml
@@ -273,6 +273,49 @@ type query = {
parameters : ImportCSV.DataType.t Seq.t;
}
+let rec take_elements :
+ prefix:'a list -> eq:('a -> 'a -> bool) -> 'a list -> 'a list =
+ fun ~prefix ~eq group ->
+ match (prefix, group) with
+ | [], any -> any
+ | _, [] -> []
+ | hd1 :: tl1, hd2 :: tl2 when eq hd1 hd2 -> take_elements ~eq ~prefix:tl1 tl2
+ | _, _ -> raise Not_found
+
+(** The window functions shall be grouped in the same way as the uniq property.
+ (We cannot make a partition over a group not kept in the final result).
+
+ But the SQL query need to remove from the window function the elements
+ already defined in the group by statement, and we need to filter them from
+ the configuration before building the query. *)
+let clean_window :
+ prefix:Path.t ImportExpression.T.t list ->
+ Path.t ImportExpression.T.t ->
+ Path.t ImportExpression.T.t =
+ fun ~prefix expression ->
+ let open ImportExpression.T in
+ let rec f = function
+ | Expr e -> Expr (f e)
+ | Empty -> Empty
+ | Literal s -> Literal s
+ | Integer i -> Integer i
+ | Path p -> Path p
+ | Concat pp -> Concat (List.map ~f pp)
+ | Function' (name, pp) -> Function' (name, List.map ~f pp)
+ | Function (name, pp) -> Function (name, List.map ~f pp)
+ | Nvl pp -> Nvl (List.map ~f pp)
+ | Join (sep, pp) -> Join (sep, List.map ~f pp)
+ | Window (window_f, group, order) ->
+ let w = map_window ~f window_f in
+ let group =
+ take_elements ~eq:(ImportExpression.T.equal Path.equal) ~prefix group
+ in
+ Window (w, List.map ~f group, List.map ~f order)
+ | BOperator (n, arg1, arg2) -> BOperator (n, f arg1, f arg2)
+ | GEquality (n, arg1, args) -> GEquality (n, f arg1, List.map ~f args)
+ in
+ f expression
+
(** Build the query and return also the mapping in order to identify each
external links between files.
@@ -301,7 +344,17 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array =
*)
let headers = Array.make (List.length conf.columns) (Obj.magic None) in
- let columns = List.to_seq conf.columns |> Seq.mapi (fun i c -> (i, c)) in
+ (* Transform the columns to extract from the query.
+ - Associate each of them with a number
+ - Clean the window functions and remove the part already defined in the
+ [uniq] parameter.
+ *)
+ let columns =
+ List.to_seq conf.columns
+ |> Seq.mapi (fun i c ->
+ let expression = c in
+ (i, clean_window ~prefix:conf.uniq expression))
+ in
let formatter = Format.formatter_of_buffer b in
let () =
Format.fprintf formatter "SELECT %a"
diff --git a/lib/configuration/read_conf.ml b/lib/configuration/read_conf.ml
index 52ccb62..69240c1 100644
--- a/lib/configuration/read_conf.ml
+++ b/lib/configuration/read_conf.ml
@@ -126,6 +126,45 @@ end = struct
let column = Expression_parser.Incremental.column_expr
end
+(** Ensure the group criteria in window functions match the global group by
+ criteria.
+
+ Traverse the configuration tree until finding a group window. *)
+let matchWindowGroup :
+ eq:('a -> 'a -> bool) ->
+ subset:'a ImportExpression.T.t list ->
+ 'a ImportExpression.T.t ->
+ bool =
+ fun ~eq ~subset expression ->
+ let exception Divergent in
+ let open ImportExpression.T in
+ let rec f = function
+ | Empty | Literal _ | Integer _ | Path _ -> ()
+ | Expr e -> f e
+ | Concat pp | Function' (_, pp) | Function (_, pp) | Nvl pp | Join (_, pp)
+ -> List.iter ~f pp
+ | Window (_, pp1, _) ->
+ if List.equal ~eq:(ImportExpression.T.equal eq) subset pp1 then ()
+ else raise_notrace Divergent
+ | BOperator (_, arg1, arg2) ->
+ f arg1;
+ f arg2
+ | GEquality (_, arg1, args) ->
+ f arg1;
+ List.iter ~f args
+ in
+ match subset with
+ | [] ->
+ (* Do not bother traversing the tree if there is no group by, just
+ return Ok *)
+ true
+ | _ -> (
+ try
+ f expression;
+ true
+ with
+ | Divergent -> false)
+
module Make (S : Decoders.Decode.S) = struct
let ( let* ) = S.( let* )
let ( and* ) = S.( and* )
@@ -135,14 +174,24 @@ module Make (S : Decoders.Decode.S) = struct
class loader =
object (self)
method parse_expression : type a.
+ ?groups:a ImportExpression.T.t list ->
+ eq:(a -> a -> bool) ->
a ExpressionParser.path_builder ->
S.value ->
(a ImportExpression.T.t, S.value Decoders.Error.t) result =
- fun path ->
+ fun ?(groups = []) ~eq path ->
S.string >>= fun v ->
match ExpressionParser.of_string path v with
- | Ok expr -> S.succeed expr
| Error e -> S.fail_with Decoders.Error.(make e)
+ | Ok expr -> (
+ (* Now check that every window function include at least the uniq list *)
+ let valid_subset = matchWindowGroup ~eq ~subset:groups expr in
+ match valid_subset with
+ | true -> S.succeed expr
+ | false ->
+ S.fail
+ "The group function shall match the same arguments as the \
+ \"uniq\" parameter")
method source =
let* file = S.field "file" S.string
@@ -152,9 +201,11 @@ module Make (S : Decoders.Decode.S) = struct
method external_ name =
let* intern_key =
- S.field "intern_key" (self#parse_expression ExpressionParser.path)
+ S.field "intern_key"
+ (self#parse_expression ~eq:Path.equal ExpressionParser.path)
and* extern_key =
- S.field "extern_key" (self#parse_expression ExpressionParser.column)
+ S.field "extern_key"
+ (self#parse_expression ~eq:Int.equal ExpressionParser.column)
and* file = S.field "file" S.string
and* tab = S.field_opt_or ~default:1 "tab" S.int
and* allow_missing =
@@ -172,18 +223,28 @@ module Make (S : Decoders.Decode.S) = struct
}
method sheet =
+ (* Check the uniq property first, beecause the group functions need
+ to include the same expression (at least) *)
+ let* uniq =
+ S.field_opt_or ~default:[] "uniq"
+ @@ S.list (self#parse_expression ~eq:Path.equal ExpressionParser.path)
+ in
+
let* columns =
S.field "columns"
- @@ S.list (self#parse_expression ExpressionParser.path)
+ @@ S.list
+ (self#parse_expression ~eq:Path.equal ~groups:uniq
+ ExpressionParser.path)
and* filters =
S.field_opt_or ~default:[] "filters"
- @@ S.list (self#parse_expression ExpressionParser.path)
+ @@ S.list
+ (self#parse_expression ~eq:Path.equal ~groups:uniq
+ ExpressionParser.path)
and* sort =
S.field_opt_or ~default:[] "sort"
- @@ S.list (self#parse_expression ExpressionParser.path)
- and* uniq =
- S.field_opt_or ~default:[] "uniq"
- @@ S.list (self#parse_expression ExpressionParser.path)
+ @@ S.list
+ (self#parse_expression ~eq:Path.equal ~groups:uniq
+ ExpressionParser.path)
in
S.succeed @@ fun version source externals locale ->
Syntax.
diff --git a/lib/configuration/syntax.ml b/lib/configuration/syntax.ml
index ee47277..9a0a706 100644
--- a/lib/configuration/syntax.ml
+++ b/lib/configuration/syntax.ml
@@ -53,6 +53,10 @@ module Extern = struct
|> Otoml.table
end
+module ExpressionSet = Set.Make (struct
+ type t = Path.t ImportExpression.T.t [@@deriving show, eq, ord]
+end)
+
type t = {
version : int;
locale : string option;
diff --git a/lib/expression/dune b/lib/expression/dune
index d141b7b..8bf6e62 100755
--- a/lib/expression/dune
+++ b/lib/expression/dune
@@ -7,6 +7,8 @@
importErrors
)
(preprocess (pps
- ppx_deriving.show
- ppx_deriving.eq ))
+ ppx_deriving.ord
+ ppx_deriving.show
+ ppx_deriving.eq
+ ))
)
diff --git a/lib/expression/sym.ml b/lib/expression/sym.ml
index 0360e8e..49e923d 100644
--- a/lib/expression/sym.ml
+++ b/lib/expression/sym.ml
@@ -1,8 +1,6 @@
(** The signature for an expression analyzer.
- Every element is mapped to a function, using the tagless final pattern.
-
- *)
+ Every element is mapped to a function, using the tagless final pattern. *)
module type SYM_EXPR = sig
type 'a repr
type 'a obs
diff --git a/lib/expression/t.ml b/lib/expression/t.ml
index fec8fd7..324d515 100644
--- a/lib/expression/t.ml
+++ b/lib/expression/t.ml
@@ -6,7 +6,7 @@ type 'a window =
| Counter
| Previous of 'a
| Sum of 'a
-[@@deriving show, eq]
+[@@deriving show, eq, ord]
type 'a t =
| Empty
@@ -22,7 +22,7 @@ type 'a t =
| BOperator of binary_operator * 'a t * 'a t
| GEquality of binary_operator * 'a t * 'a t list
| Function' of funct * 'a t list
-[@@deriving show, eq]
+[@@deriving show, eq, ord]
and binary_operator =
| Equal
@@ -34,6 +34,7 @@ and binary_operator =
| GT
| And
| Or
+[@@deriving ord]
and funct =
| Cmp
@@ -119,13 +120,13 @@ let rec cmp : ('a -> 'a -> int) -> 'a t -> 'a t -> int =
let name_cmp = String.compare n1 n2 in
if name_cmp = 0 then List.compare ~cmp:(cmp f) elems1 elems2 else name_cmp
| Window (s1, l11, l12), Window (s2, l21, l22) -> (
- match compare s1 s2 with
+ match compare_window (cmp f) s1 s2 with
| 0 ->
let l1_cmp = List.compare ~cmp:(cmp f) l11 l21 in
if l1_cmp = 0 then List.compare ~cmp:(cmp f) l12 l22 else l1_cmp
| other -> other)
| BOperator (n1, arg11, arg12), BOperator (n2, arg21, arg22) -> begin
- match compare n1 n2 with
+ match compare_binary_operator n1 n2 with
| 0 -> begin
match cmp f arg11 arg21 with
| 0 -> cmp f arg12 arg22
diff --git a/lib/expression/t.mli b/lib/expression/t.mli
index 4e1af55..4c9e0a7 100644
--- a/lib/expression/t.mli
+++ b/lib/expression/t.mli
@@ -19,7 +19,7 @@ type 'a t =
| BOperator of binary_operator * 'a t * 'a t
| GEquality of binary_operator * 'a t * 'a t list
| Function' of funct * 'a t list
-[@@deriving show, eq]
+[@@deriving show, eq, ord]
and binary_operator =
| Equal
diff --git a/lib/expression/type_of.mli b/lib/expression/type_of.mli
index 7a11582..a839c09 100644
--- a/lib/expression/type_of.mli
+++ b/lib/expression/type_of.mli
@@ -1,7 +1,7 @@
-(** This module tries to identify the type of an expression.
+(** This module tries to identify the type of an expression.
-The references to data comming from the spreaedsheet cannot be evaluated and
-marked as [Extern]. *)
+ The references to data comming from the spreaedsheet cannot be evaluated and
+ marked as [Extern]. *)
type t = ImportDataTypes.Types.t