From 81db1bfd580791910646525e30bc45af34533987 Mon Sep 17 00:00:00 2001
From: Sébastien Dailly <sebastien@dailly.me>
Date: Sat, 1 Mar 2025 08:39:02 +0100
Subject: Rewrite the way to handle filters

---
 lib/configuration/cte.ml         | 53 ++++++++++++++++++++++++++++++
 lib/configuration/cte.mli        | 20 ++++++++++++
 lib/configuration/importConf.ml  |  1 +
 lib/configuration/importConf.mli | 12 +++----
 lib/configuration/read_conf.ml   | 69 +++++++++++++++++++++++-----------------
 5 files changed, 119 insertions(+), 36 deletions(-)
 create mode 100644 lib/configuration/cte.ml
 create mode 100644 lib/configuration/cte.mli

(limited to 'lib/configuration')

diff --git a/lib/configuration/cte.ml b/lib/configuration/cte.ml
new file mode 100644
index 0000000..ff43d6d
--- /dev/null
+++ b/lib/configuration/cte.ml
@@ -0,0 +1,53 @@
+open StdLabels
+module Path = ImportDataTypes.Path
+module Expression = ImportExpression.T
+
+type t = {
+  filters : Path.t Expression.t list;
+  group : Path.t Expression.t option;
+}
+
+(** Ensure the group criteria in window functions match the global group by
+    criteria.
+
+    Traverse the configuration tree until finding a group window. *)
+
+(** Check if the expression contains a group function *)
+let matchWindowGroup : 'a ImportExpression.T.t -> bool =
+ fun expression ->
+  let exception Found in
+  let open ImportExpression.T in
+  let rec f = function
+    | Empty | Literal _ | Integer _ | Path _ -> ()
+    | Expr e -> f e
+    | Concat pp | Function' (_, pp) | Function (_, pp) | Nvl pp | Join (_, pp)
+      -> List.iter ~f pp
+    | Window (_, _, _) -> raise Found
+    | BOperator (_, arg1, arg2) ->
+        f arg1;
+        f arg2
+    | GEquality (_, arg1, args) ->
+        f arg1;
+        List.iter ~f args
+  in
+  try
+    f expression;
+    false
+  with
+  | Found -> true
+
+(** Transform a list of expression into a list of CTE to evaluate. *)
+let of_filters : Path.t Expression.t list -> t list =
+ fun filters ->
+  let last_group, prev =
+    List.fold_left filters
+      ~init:({ filters = []; group = None }, [])
+      ~f:(fun (cte, acc) expr ->
+        begin
+          if matchWindowGroup expr then
+            ( { filters = []; group = None },
+              { cte with group = Some expr } :: acc )
+          else ({ cte with filters = expr :: cte.filters }, acc)
+        end)
+  in
+  List.rev (last_group :: prev)
diff --git a/lib/configuration/cte.mli b/lib/configuration/cte.mli
new file mode 100644
index 0000000..0f2b3e3
--- /dev/null
+++ b/lib/configuration/cte.mli
@@ -0,0 +1,20 @@
+module Path = ImportDataTypes.Path
+module Expression = ImportExpression.T
+
+type t = {
+  filters : Path.t Expression.t list;
+  group : Path.t Expression.t option;
+}
+(** Represent a filter to apply in the querry
+
+    The CTE can have filters applied on the previous CTE (or directly in the
+    sources if there is any yet) and can hold a group (an only one).
+
+    If there is a group, it must be applied after the others filters.
+
+    The order in which the filters are presented in the configuration can change
+    the results ; it does not matter when we only have classicals filters,
+    because all cf them can be evaluated at the same time, but as soon we have a
+    group function, the result become dependant of the previous ones. *)
+
+val of_filters : Path.t Expression.t list -> t list
diff --git a/lib/configuration/importConf.ml b/lib/configuration/importConf.ml
index ebbcb7c..8516008 100644
--- a/lib/configuration/importConf.ml
+++ b/lib/configuration/importConf.ml
@@ -1,5 +1,6 @@
 open StdLabels
 module Syntax = Syntax
+module CTE = Cte
 module Table = ImportDataTypes.Table
 module Path = ImportDataTypes.Path
 module T = Read_conf
diff --git a/lib/configuration/importConf.mli b/lib/configuration/importConf.mli
index 9ddc40c..40b985b 100644
--- a/lib/configuration/importConf.mli
+++ b/lib/configuration/importConf.mli
@@ -1,18 +1,18 @@
 module Syntax = Syntax
-module Table = ImportDataTypes.Table
-module Path = ImportDataTypes.Path
+module CTE = Cte
 
 val dummy_conf : Syntax.t
 
-val root_table : Syntax.t -> Table.t
+val root_table : Syntax.t -> ImportDataTypes.Table.t
 (** Get the root table, this table is the main table to load and each line in
     this table will be processed *)
 
 val t_of_toml : Otoml.t -> (Syntax.t, string) result
-val get_table_for_name : Syntax.t -> string option -> Table.t
+val get_table_for_name : Syntax.t -> string option -> ImportDataTypes.Table.t
 
-val get_dependancies_for_table : Syntax.t -> Table.t -> Syntax.Extern.t list
+val get_dependancies_for_table :
+  Syntax.t -> ImportDataTypes.Table.t -> Syntax.Extern.t list
 (** Get all the externals refered by the source *)
 
 val expression_from_string :
-  string -> (Path.t ImportExpression.T.t, string) result
+  string -> (ImportDataTypes.Path.t ImportExpression.T.t, string) result
diff --git a/lib/configuration/read_conf.ml b/lib/configuration/read_conf.ml
index 69240c1..11f6726 100644
--- a/lib/configuration/read_conf.ml
+++ b/lib/configuration/read_conf.ml
@@ -126,44 +126,51 @@ end = struct
   let column = Expression_parser.Incremental.column_expr
 end
 
+exception Divergent
 (** Ensure the group criteria in window functions match the global group by
-    criteria.
+    criteria. *)
 
-    Traverse the configuration tree until finding a group window. *)
+exception NestedGroup
+(** Raised when a group contains another one *)
+
+(** Traverse the configuration tree until finding a group window. *)
 let matchWindowGroup :
     eq:('a -> 'a -> bool) ->
     subset:'a ImportExpression.T.t list ->
     'a ImportExpression.T.t ->
-    bool =
+    unit =
  fun ~eq ~subset expression ->
-  let exception Divergent in
   let open ImportExpression.T in
-  let rec f = function
+  let rec f isIngroup = function
     | Empty | Literal _ | Integer _ | Path _ -> ()
-    | Expr e -> f e
+    | Expr e -> f isIngroup e
     | Concat pp | Function' (_, pp) | Function (_, pp) | Nvl pp | Join (_, pp)
-      -> List.iter ~f pp
-    | Window (_, pp1, _) ->
-        if List.equal ~eq:(ImportExpression.T.equal eq) subset pp1 then ()
-        else raise_notrace Divergent
+      -> List.iter ~f:(f isIngroup) pp
+    | Window (expr, pp1, pp2) ->
+        let () =
+          if List.equal ~eq:(ImportExpression.T.equal eq) subset pp1 then ()
+          else
+            match subset with
+            | [] -> ()
+            | _ -> raise_notrace Divergent
+        in
+        let () =
+          match isIngroup with
+          | true -> raise NestedGroup
+          | false -> ()
+        in
+
+        ignore @@ ImportExpression.T.map_window ~f:(f true) expr;
+        List.iter ~f:(f true) pp1;
+        List.iter ~f:(f true) pp2
     | BOperator (_, arg1, arg2) ->
-        f arg1;
-        f arg2
+        f isIngroup arg1;
+        f isIngroup arg2
     | GEquality (_, arg1, args) ->
-        f arg1;
-        List.iter ~f args
+        f isIngroup arg1;
+        List.iter ~f:(f isIngroup) args
   in
-  match subset with
-  | [] ->
-      (* Do not bother traversing the tree if there is no group by, just
-         return Ok *)
-      true
-  | _ -> (
-      try
-        f expression;
-        true
-      with
-      | Divergent -> false)
+  f false expression
 
 module Make (S : Decoders.Decode.S) = struct
   let ( let* ) = S.( let* )
@@ -185,13 +192,15 @@ module Make (S : Decoders.Decode.S) = struct
           | Error e -> S.fail_with Decoders.Error.(make e)
           | Ok expr -> (
               (* Now check that every window function include at least the uniq list *)
-              let valid_subset = matchWindowGroup ~eq ~subset:groups expr in
-              match valid_subset with
-              | true -> S.succeed expr
-              | false ->
+              match matchWindowGroup ~eq ~subset:groups expr with
+              | () -> S.succeed expr
+              | exception Divergent ->
                   S.fail
                     "The group function shall match the same arguments as the \
-                     \"uniq\" parameter")
+                     \"uniq\" parameter"
+              | exception NestedGroup ->
+                  S.fail
+                    "A group function cannot contains another group function")
 
       method source =
         let* file = S.field "file" S.string
-- 
cgit v1.2.3