diff options
| author | Sébastien Dailly <sebastien@dailly.me> | 2025-02-04 22:30:34 +0100 | 
|---|---|---|
| committer | Sébastien Dailly <sebastien@dailly.me> | 2025-02-13 18:30:43 +0100 | 
| commit | d3e0821b9c1551177afb34220d951b087acdea22 (patch) | |
| tree | ba23b2916c7cb0336cb42793e4bee931053cacf5 /lib | |
| parent | 37556ab070abcbf87a1a822c95aeccf19dade687 (diff) | |
Fixed a bug when a group function did not partition in the same way as the uniq parameter
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/analysers/query.ml | 55 | ||||
| -rw-r--r-- | lib/configuration/read_conf.ml | 81 | ||||
| -rw-r--r-- | lib/configuration/syntax.ml | 4 | ||||
| -rwxr-xr-x | lib/expression/dune | 6 | ||||
| -rw-r--r-- | lib/expression/sym.ml | 4 | ||||
| -rw-r--r-- | lib/expression/t.ml | 9 | ||||
| -rw-r--r-- | lib/expression/t.mli | 2 | ||||
| -rw-r--r-- | lib/expression/type_of.mli | 6 | 
8 files changed, 143 insertions, 24 deletions
| diff --git a/lib/analysers/query.ml b/lib/analysers/query.ml index dff3b9d..bcf4a72 100644 --- a/lib/analysers/query.ml +++ b/lib/analysers/query.ml @@ -273,6 +273,49 @@ type query = {    parameters : ImportCSV.DataType.t Seq.t;  } +let rec take_elements : +    prefix:'a list -> eq:('a -> 'a -> bool) -> 'a list -> 'a list = + fun ~prefix ~eq group -> +  match (prefix, group) with +  | [], any -> any +  | _, [] -> [] +  | hd1 :: tl1, hd2 :: tl2 when eq hd1 hd2 -> take_elements ~eq ~prefix:tl1 tl2 +  | _, _ -> raise Not_found + +(** The window functions shall be grouped in the same way as the uniq property. +    (We cannot make a partition over a group not kept in the final result). + +    But the SQL query need to remove from the window function the elements +    already defined in the group by statement, and we need to filter them from +    the configuration before building the query. *) +let clean_window : +    prefix:Path.t ImportExpression.T.t list -> +    Path.t ImportExpression.T.t -> +    Path.t ImportExpression.T.t = + fun ~prefix expression -> +  let open ImportExpression.T in +  let rec f = function +    | Expr e -> Expr (f e) +    | Empty -> Empty +    | Literal s -> Literal s +    | Integer i -> Integer i +    | Path p -> Path p +    | Concat pp -> Concat (List.map ~f pp) +    | Function' (name, pp) -> Function' (name, List.map ~f pp) +    | Function (name, pp) -> Function (name, List.map ~f pp) +    | Nvl pp -> Nvl (List.map ~f pp) +    | Join (sep, pp) -> Join (sep, List.map ~f pp) +    | Window (window_f, group, order) -> +        let w = map_window ~f window_f in +        let group = +          take_elements ~eq:(ImportExpression.T.equal Path.equal) ~prefix group +        in +        Window (w, List.map ~f group, List.map ~f order) +    | BOperator (n, arg1, arg2) -> BOperator (n, f arg1, f arg2) +    | GEquality (n, arg1, args) -> GEquality (n, f arg1, List.map ~f args) +  in +  f expression +  (** Build the query and return also the mapping in order to identify each      external links between files. @@ -301,7 +344,17 @@ let select : Syntax.t -> query * Path.t ImportExpression.T.t array =    *)    let headers = Array.make (List.length conf.columns) (Obj.magic None) in -  let columns = List.to_seq conf.columns |> Seq.mapi (fun i c -> (i, c)) in +  (* Transform the columns to extract from the query.  +      - Associate each of them with a number +      - Clean the window functions and remove the part already defined in the +        [uniq] parameter. +   *) +  let columns = +    List.to_seq conf.columns +    |> Seq.mapi (fun i c -> +           let expression = c in +           (i, clean_window ~prefix:conf.uniq expression)) +  in    let formatter = Format.formatter_of_buffer b in    let () =      Format.fprintf formatter "SELECT %a" diff --git a/lib/configuration/read_conf.ml b/lib/configuration/read_conf.ml index 52ccb62..69240c1 100644 --- a/lib/configuration/read_conf.ml +++ b/lib/configuration/read_conf.ml @@ -126,6 +126,45 @@ end = struct    let column = Expression_parser.Incremental.column_expr  end +(** Ensure the group criteria in window functions match the global group by +    criteria. + +    Traverse the configuration tree until finding a group window. *) +let matchWindowGroup : +    eq:('a -> 'a -> bool) -> +    subset:'a ImportExpression.T.t list -> +    'a ImportExpression.T.t -> +    bool = + fun ~eq ~subset expression -> +  let exception Divergent in +  let open ImportExpression.T in +  let rec f = function +    | Empty | Literal _ | Integer _ | Path _ -> () +    | Expr e -> f e +    | Concat pp | Function' (_, pp) | Function (_, pp) | Nvl pp | Join (_, pp) +      -> List.iter ~f pp +    | Window (_, pp1, _) -> +        if List.equal ~eq:(ImportExpression.T.equal eq) subset pp1 then () +        else raise_notrace Divergent +    | BOperator (_, arg1, arg2) -> +        f arg1; +        f arg2 +    | GEquality (_, arg1, args) -> +        f arg1; +        List.iter ~f args +  in +  match subset with +  | [] -> +      (* Do not bother traversing the tree if there is no group by, just +         return Ok *) +      true +  | _ -> ( +      try +        f expression; +        true +      with +      | Divergent -> false) +  module Make (S : Decoders.Decode.S) = struct    let ( let* ) = S.( let* )    let ( and* ) = S.( and* ) @@ -135,14 +174,24 @@ module Make (S : Decoders.Decode.S) = struct    class loader =      object (self)        method parse_expression : type a. +          ?groups:a ImportExpression.T.t list -> +          eq:(a -> a -> bool) ->            a ExpressionParser.path_builder ->            S.value ->            (a ImportExpression.T.t, S.value Decoders.Error.t) result = -        fun path -> +        fun ?(groups = []) ~eq path ->            S.string >>= fun v ->            match ExpressionParser.of_string path v with -          | Ok expr -> S.succeed expr            | Error e -> S.fail_with Decoders.Error.(make e) +          | Ok expr -> ( +              (* Now check that every window function include at least the uniq list *) +              let valid_subset = matchWindowGroup ~eq ~subset:groups expr in +              match valid_subset with +              | true -> S.succeed expr +              | false -> +                  S.fail +                    "The group function shall match the same arguments as the \ +                     \"uniq\" parameter")        method source =          let* file = S.field "file" S.string @@ -152,9 +201,11 @@ module Make (S : Decoders.Decode.S) = struct        method external_ name =          let* intern_key = -          S.field "intern_key" (self#parse_expression ExpressionParser.path) +          S.field "intern_key" +            (self#parse_expression ~eq:Path.equal ExpressionParser.path)          and* extern_key = -          S.field "extern_key" (self#parse_expression ExpressionParser.column) +          S.field "extern_key" +            (self#parse_expression ~eq:Int.equal ExpressionParser.column)          and* file = S.field "file" S.string          and* tab = S.field_opt_or ~default:1 "tab" S.int          and* allow_missing = @@ -172,18 +223,28 @@ module Make (S : Decoders.Decode.S) = struct              }        method sheet = +        (* Check the uniq property first, beecause the group functions need +             to include the same expression (at least) *) +        let* uniq = +          S.field_opt_or ~default:[] "uniq" +          @@ S.list (self#parse_expression ~eq:Path.equal ExpressionParser.path) +        in +          let* columns =            S.field "columns" -          @@ S.list (self#parse_expression ExpressionParser.path) +          @@ S.list +               (self#parse_expression ~eq:Path.equal ~groups:uniq +                  ExpressionParser.path)          and* filters =            S.field_opt_or ~default:[] "filters" -          @@ S.list (self#parse_expression ExpressionParser.path) +          @@ S.list +               (self#parse_expression ~eq:Path.equal ~groups:uniq +                  ExpressionParser.path)          and* sort =            S.field_opt_or ~default:[] "sort" -          @@ S.list (self#parse_expression ExpressionParser.path) -        and* uniq = -          S.field_opt_or ~default:[] "uniq" -          @@ S.list (self#parse_expression ExpressionParser.path) +          @@ S.list +               (self#parse_expression ~eq:Path.equal ~groups:uniq +                  ExpressionParser.path)          in          S.succeed @@ fun version source externals locale ->          Syntax. diff --git a/lib/configuration/syntax.ml b/lib/configuration/syntax.ml index ee47277..9a0a706 100644 --- a/lib/configuration/syntax.ml +++ b/lib/configuration/syntax.ml @@ -53,6 +53,10 @@ module Extern = struct      |> Otoml.table  end +module ExpressionSet = Set.Make (struct +  type t = Path.t ImportExpression.T.t [@@deriving show, eq, ord] +end) +  type t = {    version : int;    locale : string option; diff --git a/lib/expression/dune b/lib/expression/dune index d141b7b..8bf6e62 100755 --- a/lib/expression/dune +++ b/lib/expression/dune @@ -7,6 +7,8 @@     importErrors
   )
   (preprocess (pps 
 -    ppx_deriving.show 
 -    ppx_deriving.eq ))
 +   ppx_deriving.ord
 +   ppx_deriving.show 
 +   ppx_deriving.eq 
 +   ))
  )
 diff --git a/lib/expression/sym.ml b/lib/expression/sym.ml index 0360e8e..49e923d 100644 --- a/lib/expression/sym.ml +++ b/lib/expression/sym.ml @@ -1,8 +1,6 @@  (** The signature for an expression analyzer. -    Every element is mapped to a function, using the tagless final pattern. - - *) +    Every element is mapped to a function, using the tagless final pattern. *)  module type SYM_EXPR = sig    type 'a repr    type 'a obs diff --git a/lib/expression/t.ml b/lib/expression/t.ml index fec8fd7..324d515 100644 --- a/lib/expression/t.ml +++ b/lib/expression/t.ml @@ -6,7 +6,7 @@ type 'a window =    | Counter    | Previous of 'a    | Sum of 'a -[@@deriving show, eq] +[@@deriving show, eq, ord]  type 'a t =    | Empty @@ -22,7 +22,7 @@ type 'a t =    | BOperator of binary_operator * 'a t * 'a t    | GEquality of binary_operator * 'a t * 'a t list    | Function' of funct * 'a t list -[@@deriving show, eq] +[@@deriving show, eq, ord]  and binary_operator =    | Equal @@ -34,6 +34,7 @@ and binary_operator =    | GT    | And    | Or +[@@deriving ord]  and funct =    | Cmp @@ -119,13 +120,13 @@ let rec cmp : ('a -> 'a -> int) -> 'a t -> 'a t -> int =        let name_cmp = String.compare n1 n2 in        if name_cmp = 0 then List.compare ~cmp:(cmp f) elems1 elems2 else name_cmp    | Window (s1, l11, l12), Window (s2, l21, l22) -> ( -      match compare s1 s2 with +      match compare_window (cmp f) s1 s2 with        | 0 ->            let l1_cmp = List.compare ~cmp:(cmp f) l11 l21 in            if l1_cmp = 0 then List.compare ~cmp:(cmp f) l12 l22 else l1_cmp        | other -> other)    | BOperator (n1, arg11, arg12), BOperator (n2, arg21, arg22) -> begin -      match compare n1 n2 with +      match compare_binary_operator n1 n2 with        | 0 -> begin            match cmp f arg11 arg21 with            | 0 -> cmp f arg12 arg22 diff --git a/lib/expression/t.mli b/lib/expression/t.mli index 4e1af55..4c9e0a7 100644 --- a/lib/expression/t.mli +++ b/lib/expression/t.mli @@ -19,7 +19,7 @@ type 'a t =    | BOperator of binary_operator * 'a t * 'a t    | GEquality of binary_operator * 'a t * 'a t list    | Function' of funct * 'a t list -[@@deriving show, eq] +[@@deriving show, eq, ord]  and binary_operator =    | Equal diff --git a/lib/expression/type_of.mli b/lib/expression/type_of.mli index 7a11582..a839c09 100644 --- a/lib/expression/type_of.mli +++ b/lib/expression/type_of.mli @@ -1,7 +1,7 @@ -(** This module tries to identify the type of an expression.  +(** This module tries to identify the type of an expression. -The references to data comming from the spreaedsheet cannot be evaluated and -marked as [Extern]. *) +    The references to data comming from the spreaedsheet cannot be evaluated and +    marked as [Extern]. *)  type t = ImportDataTypes.Types.t | 
