From 6b377719c10d5ab3343fd5221f99a4a21008e25a Mon Sep 17 00:00:00 2001 From: Sébastien Dailly Date: Thu, 14 Mar 2024 08:26:58 +0100 Subject: Initial commit --- lib/expression/query.ml | 335 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 335 insertions(+) create mode 100644 lib/expression/query.ml (limited to 'lib/expression/query.ml') diff --git a/lib/expression/query.ml b/lib/expression/query.ml new file mode 100644 index 0000000..5bd914a --- /dev/null +++ b/lib/expression/query.ml @@ -0,0 +1,335 @@ +(** + This module create an sql query from an expression. + *) + +open StdLabels + +(** This type is used in the query builder (see [query_of_expression] just + below in order to tell if we need to bind the parameters in the query, or + if we can use plain literal as is (with some risk at the execution time. *) +type _ binded_query = + | BindParam : ImportCSV.DataType.t Queue.t binded_query + | NoParam : unit binded_query + +module QueryParameter = struct + (** Internaly, we need to keep a different type for the Literal chunks + (which requires to be quoted), and raw (which should be given as is to the + sql engine) + + The Raw can be generated from both BindParam or NoParam queries. *) + type t = + | Literal + | Queue of ImportCSV.DataType.t Queue.t + | Raw of t + + (** Wrap the given parameter mode into the raw mode *) + let raw : t -> t = function + | Raw t -> Raw t + | Literal -> Raw Literal + | Queue q -> Raw (Queue q) + + (** Nest the parameter in order to use it inside another function call. + + The rule is to get out of the Raw mode as soon as we dive into another + one function. *) + let nest : t -> t = function + | Raw t -> t + | other -> other +end + +module TypeBuilder = + Compose.Expression + (Type_of) + (struct + let v = ignore + end) + +module Query = TypeBuilder.Make (struct + type 'a repr = Format.formatter -> nested:QueryParameter.t -> unit + type 'a obs = Format.formatter -> nested:QueryParameter.t -> unit + type 'a path_repr = Format.formatter -> 'a -> unit + + let observe : 'a Type_of.obs * 'a repr -> 'a obs = + fun (_, x) formatter ~nested -> + let () = x formatter ~nested in + Format.pp_print_flush formatter () + + (** Unify an external reference with a given type, using the COALESCE + function *) + let unify : + with_:Type_of.t -> + nested:QueryParameter.t -> + Format.formatter -> + 'a Type_of.obs * 'a repr -> + unit = + fun ~with_ ~nested format (type_of, expr) -> + match (type_of, with_) with + | ImportDataTypes.Types.Extern, Number + | ImportDataTypes.Types.Extern, Extern -> + Format.fprintf format "COALESCE(%a,0)" + (fun f expr -> expr f ~nested) + expr + | ImportDataTypes.Types.Extern, String -> + Format.fprintf format "COALESCE(%a,'')" + (fun f expr -> expr f ~nested) + expr + | _, Float -> + Format.fprintf format "CAST(%a AS REAL)" + (fun f expr -> expr f ~nested) + expr + | _, _ -> expr ~nested format + + let empty : 'a Type_of.obs -> 'a repr = + fun type_of formatter ~nested -> + ignore type_of; + ignore nested; + Format.fprintf formatter "''" + + let expr : 'a Type_of.obs * 'a repr -> 'a Type_of.obs -> 'a repr = + fun expr type_of formatter ~nested -> + ignore type_of; + Format.fprintf formatter "("; + (snd expr) ~nested formatter; + Format.fprintf formatter ")" + + let literal : string -> 'a Type_of.obs -> 'a repr = + fun l type_of formatter ~nested -> + ignore type_of; + match nested with + | QueryParameter.Literal -> + (* If the text is a true literal, we insert it directly. This is + only called from the [query_of_expression] function *) + Format.fprintf formatter "'%s'" l + | QueryParameter.Queue queue -> + Format.fprintf formatter "?"; + Queue.add (ImportCSV.DataType.Content l) queue + | QueryParameter.Raw _ -> Format.fprintf formatter "%s" l + + let integer : string -> 'a Type_of.obs -> 'a repr = + fun l type_of formatter ~nested -> + ignore type_of; + ignore nested; + Format.fprintf formatter "%s" l + + let path : 'b path_repr -> 'b -> 'a Type_of.obs -> 'a repr = + fun repr p type_of formatter ~nested -> + ignore nested; + ignore type_of; + repr formatter p + + let concat : ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr = + fun expression type_of formatter ~nested -> + ignore type_of; + let nested' = QueryParameter.nest nested in + + Format.pp_print_list + ~pp_sep:(fun f () -> Format.fprintf f " || ") + (unify ~with_:ImportDataTypes.Types.String ~nested:nested') + formatter expression + + let print_expression : + ?sep:string -> + QueryParameter.t -> + Format.formatter -> + ('a Type_of.obs * 'a repr) list -> + unit = + fun ?(sep = ", ") nested formatter expression -> + (Format.pp_print_list + ~pp_sep:(fun f () -> Format.fprintf f "%s" sep) + (fun f v -> (snd v) f ~nested)) + formatter expression + + (** Format the partition expression. This function is used internally and + only form the expression inside the clause. *) + let group_windows : + QueryParameter.t -> + Format.formatter -> + ('a Type_of.obs * 'a repr) list + * ('a Type_of.obs * 'a repr) list + * string option -> + unit = + fun nested formatter (expressions, order, range) -> + match (expressions, order) with + | [], _ -> () + | _, [] -> + Format.fprintf formatter " OVER (PARTITION BY %a%a)" + (print_expression nested) expressions + (Format.pp_print_option (fun f v -> Format.fprintf f "%s" v)) + range + | _, _ -> + Format.fprintf formatter " OVER (PARTITION BY %a ORDER BY %a%a)" + (print_expression nested) expressions (print_expression nested) order + (Format.pp_print_option (fun f v -> Format.fprintf f "%s" v)) + range + + let window : + ('a Type_of.obs * 'a repr) T.window -> + ('a Type_of.obs * 'a repr) list -> + ('a Type_of.obs * 'a repr) list -> + 'a Type_of.obs -> + 'a repr = + fun name expressions order type_of formatter ~nested -> + ignore type_of; + let nested' = QueryParameter.nest nested in + + (* By default, the range is defined like this + + [RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW EXCLUDE NO OTHERS] + + this only build a range until the current row, but in some cases (min, + last), we want to scan the whole group in order to evaluate the value to + keep. + *) + let range = " RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" in + match name with + | T.Min expr -> + Format.fprintf formatter "FIRST_VALUE(%a)%a " + (fun f v -> (snd v) f ~nested:nested') + expr (group_windows nested') + (expressions, order, Some range) + | T.Max expr -> + Format.fprintf formatter "LAST_VALUE(%a)%a" + (fun f v -> (snd v) f ~nested:nested') + expr (group_windows nested') + (expressions, order, Some range) + | T.Counter -> + (* If no order is given, return the number of elements in the + whole group *) + let operator = + match order with + | [] -> "COUNT" + | _ -> "ROW_NUMBER" + in + Format.fprintf formatter "%s()%a" operator (group_windows nested') + (expressions, order, None) + | T.Previous expr -> + Format.fprintf formatter "LAG(%a)%a" + (fun f v -> (snd v) f ~nested:nested') + expr (group_windows nested') (expressions, order, None) + | T.Sum expr -> + Format.fprintf formatter "SUM(%a)%a" + (fun f v -> (snd v) f ~nested:nested') + expr (group_windows nested') (expressions, order, None) + + let nvl : ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr = + fun expression type_of formatter ~nested -> + ignore type_of; + let nested' = QueryParameter.nest nested in + Format.fprintf formatter "COALESCE(%a)" (print_expression nested') + expression + + let join : + string -> ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr = + fun sep expression type_of formatter ~nested -> + ignore type_of; + let nested' = QueryParameter.nest nested in + + (* Directly call the literal function for the first argument *) + Format.fprintf formatter "CONCAT(%a, %a)" + (fun f v -> (literal v ImportDataTypes.Types.String) f ~nested:nested') + sep (print_expression nested') expression + + let boperator : + T.binary_operator -> + 'a Type_of.obs * 'a repr -> + 'a Type_of.obs * 'a repr -> + 'a Type_of.obs -> + 'a repr = + fun name e1 e2 type_of formatter ~nested -> + ignore type_of; + (* When dividing, we need to be sure that the type is a float, + otherwise SQL will truncate the result *) + let with_ = + match name with + | T.Division -> ImportDataTypes.Types.Float + | _ -> fst e2 + in + + let nested' = QueryParameter.nest nested in + Format.fprintf formatter "%a%s%a" + (unify ~with_ ~nested:nested') + e1 + (* The operator *) + (T.name_of_operator name) + (unify ~with_:(fst e1) ~nested:nested') + e2 + + let gequality : + T.binary_operator -> + 'a Type_of.obs * 'a repr -> + ('a Type_of.obs * 'a repr) list -> + 'a Type_of.obs -> + 'a repr = + fun name e1 group type_of -> + ignore type_of; + let group_type = List.map ~f:fst group in + fun formatter ~nested -> + let nested' = QueryParameter.nest nested in + let op_name = + match name with + | T.Equal -> " IN(" + | T.Different -> " NOT IN(" + | _ -> "" + in + + Format.fprintf formatter "%a%s%a)" + (unify ~with_:(Type_of.group' group_type) ~nested:nested') + e1 op_name (print_expression nested') group + + let exprs expressions formatter ~nested = + (* Literal expression, starting from now, all the quoted string are + directly given to the sql engine *) + let nested' = QueryParameter.raw nested in + + Format.fprintf formatter "(%a)" + (print_expression ~sep:" " nested') + expressions + + let rec funct : + string -> ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr = + fun name expressions type_of formatter ~nested -> + ignore type_of; + let nested' = QueryParameter.nest nested in + match name with + | "expr" -> + (* Raw expression are parsed directly *) + exprs expressions formatter ~nested + | "if" -> + (* The if is renamed into IIF *) + funct "IIF" expressions type_of formatter ~nested + | _ -> + (* Default case *) + Format.fprintf formatter "%s(%a)" name (print_expression nested') + expressions + + let function' : + T.funct -> ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr = + fun name expressions type_of formatter ~nested -> + ignore type_of; + let nested' = QueryParameter.nest nested in + match name with + | Upper | Trim -> + Format.fprintf formatter "%s(%a)" (T.name_of_function name) + (print_expression nested') expressions +end) + +module M = Sym.M (Query) + +let query_of_expression : + type b. + b binded_query -> + Format.formatter -> + (Format.formatter -> 'a -> unit) -> + 'a T.t -> + b = + fun parameter formatter printer expr -> + let repr = M.eval ~path_repr:printer expr in + match parameter with + | BindParam -> + let p = Queue.create () in + let parameter = QueryParameter.Queue p in + Query.observe repr formatter ~nested:parameter; + p + | NoParam -> + Query.observe repr formatter ~nested:Literal; + () -- cgit v1.2.3