aboutsummaryrefslogtreecommitdiff
path: root/lib/expression/query.ml
diff options
context:
space:
mode:
authorSébastien Dailly <sebastien@dailly.me>2024-03-14 08:26:58 +0100
committerSébastien Dailly <sebastien@dailly.me>2024-03-14 08:26:58 +0100
commit6b377719c10d5ab3343fd5221f99a4a21008e25a (patch)
treea7c1e9a820d339a2f161af3e09cf9e3161286796 /lib/expression/query.ml
Initial commitmain
Diffstat (limited to 'lib/expression/query.ml')
-rw-r--r--lib/expression/query.ml335
1 files changed, 335 insertions, 0 deletions
diff --git a/lib/expression/query.ml b/lib/expression/query.ml
new file mode 100644
index 0000000..5bd914a
--- /dev/null
+++ b/lib/expression/query.ml
@@ -0,0 +1,335 @@
+(**
+ This module create an sql query from an expression.
+ *)
+
+open StdLabels
+
+(** This type is used in the query builder (see [query_of_expression] just
+ below in order to tell if we need to bind the parameters in the query, or
+ if we can use plain literal as is (with some risk at the execution time. *)
+type _ binded_query =
+ | BindParam : ImportCSV.DataType.t Queue.t binded_query
+ | NoParam : unit binded_query
+
+module QueryParameter = struct
+ (** Internaly, we need to keep a different type for the Literal chunks
+ (which requires to be quoted), and raw (which should be given as is to the
+ sql engine)
+
+ The Raw can be generated from both BindParam or NoParam queries. *)
+ type t =
+ | Literal
+ | Queue of ImportCSV.DataType.t Queue.t
+ | Raw of t
+
+ (** Wrap the given parameter mode into the raw mode *)
+ let raw : t -> t = function
+ | Raw t -> Raw t
+ | Literal -> Raw Literal
+ | Queue q -> Raw (Queue q)
+
+ (** Nest the parameter in order to use it inside another function call.
+
+ The rule is to get out of the Raw mode as soon as we dive into another
+ one function. *)
+ let nest : t -> t = function
+ | Raw t -> t
+ | other -> other
+end
+
+module TypeBuilder =
+ Compose.Expression
+ (Type_of)
+ (struct
+ let v = ignore
+ end)
+
+module Query = TypeBuilder.Make (struct
+ type 'a repr = Format.formatter -> nested:QueryParameter.t -> unit
+ type 'a obs = Format.formatter -> nested:QueryParameter.t -> unit
+ type 'a path_repr = Format.formatter -> 'a -> unit
+
+ let observe : 'a Type_of.obs * 'a repr -> 'a obs =
+ fun (_, x) formatter ~nested ->
+ let () = x formatter ~nested in
+ Format.pp_print_flush formatter ()
+
+ (** Unify an external reference with a given type, using the COALESCE
+ function *)
+ let unify :
+ with_:Type_of.t ->
+ nested:QueryParameter.t ->
+ Format.formatter ->
+ 'a Type_of.obs * 'a repr ->
+ unit =
+ fun ~with_ ~nested format (type_of, expr) ->
+ match (type_of, with_) with
+ | ImportDataTypes.Types.Extern, Number
+ | ImportDataTypes.Types.Extern, Extern ->
+ Format.fprintf format "COALESCE(%a,0)"
+ (fun f expr -> expr f ~nested)
+ expr
+ | ImportDataTypes.Types.Extern, String ->
+ Format.fprintf format "COALESCE(%a,'')"
+ (fun f expr -> expr f ~nested)
+ expr
+ | _, Float ->
+ Format.fprintf format "CAST(%a AS REAL)"
+ (fun f expr -> expr f ~nested)
+ expr
+ | _, _ -> expr ~nested format
+
+ let empty : 'a Type_of.obs -> 'a repr =
+ fun type_of formatter ~nested ->
+ ignore type_of;
+ ignore nested;
+ Format.fprintf formatter "''"
+
+ let expr : 'a Type_of.obs * 'a repr -> 'a Type_of.obs -> 'a repr =
+ fun expr type_of formatter ~nested ->
+ ignore type_of;
+ Format.fprintf formatter "(";
+ (snd expr) ~nested formatter;
+ Format.fprintf formatter ")"
+
+ let literal : string -> 'a Type_of.obs -> 'a repr =
+ fun l type_of formatter ~nested ->
+ ignore type_of;
+ match nested with
+ | QueryParameter.Literal ->
+ (* If the text is a true literal, we insert it directly. This is
+ only called from the [query_of_expression] function *)
+ Format.fprintf formatter "'%s'" l
+ | QueryParameter.Queue queue ->
+ Format.fprintf formatter "?";
+ Queue.add (ImportCSV.DataType.Content l) queue
+ | QueryParameter.Raw _ -> Format.fprintf formatter "%s" l
+
+ let integer : string -> 'a Type_of.obs -> 'a repr =
+ fun l type_of formatter ~nested ->
+ ignore type_of;
+ ignore nested;
+ Format.fprintf formatter "%s" l
+
+ let path : 'b path_repr -> 'b -> 'a Type_of.obs -> 'a repr =
+ fun repr p type_of formatter ~nested ->
+ ignore nested;
+ ignore type_of;
+ repr formatter p
+
+ let concat : ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr =
+ fun expression type_of formatter ~nested ->
+ ignore type_of;
+ let nested' = QueryParameter.nest nested in
+
+ Format.pp_print_list
+ ~pp_sep:(fun f () -> Format.fprintf f " || ")
+ (unify ~with_:ImportDataTypes.Types.String ~nested:nested')
+ formatter expression
+
+ let print_expression :
+ ?sep:string ->
+ QueryParameter.t ->
+ Format.formatter ->
+ ('a Type_of.obs * 'a repr) list ->
+ unit =
+ fun ?(sep = ", ") nested formatter expression ->
+ (Format.pp_print_list
+ ~pp_sep:(fun f () -> Format.fprintf f "%s" sep)
+ (fun f v -> (snd v) f ~nested))
+ formatter expression
+
+ (** Format the partition expression. This function is used internally and
+ only form the expression inside the clause. *)
+ let group_windows :
+ QueryParameter.t ->
+ Format.formatter ->
+ ('a Type_of.obs * 'a repr) list
+ * ('a Type_of.obs * 'a repr) list
+ * string option ->
+ unit =
+ fun nested formatter (expressions, order, range) ->
+ match (expressions, order) with
+ | [], _ -> ()
+ | _, [] ->
+ Format.fprintf formatter " OVER (PARTITION BY %a%a)"
+ (print_expression nested) expressions
+ (Format.pp_print_option (fun f v -> Format.fprintf f "%s" v))
+ range
+ | _, _ ->
+ Format.fprintf formatter " OVER (PARTITION BY %a ORDER BY %a%a)"
+ (print_expression nested) expressions (print_expression nested) order
+ (Format.pp_print_option (fun f v -> Format.fprintf f "%s" v))
+ range
+
+ let window :
+ ('a Type_of.obs * 'a repr) T.window ->
+ ('a Type_of.obs * 'a repr) list ->
+ ('a Type_of.obs * 'a repr) list ->
+ 'a Type_of.obs ->
+ 'a repr =
+ fun name expressions order type_of formatter ~nested ->
+ ignore type_of;
+ let nested' = QueryParameter.nest nested in
+
+ (* By default, the range is defined like this
+
+ [RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW EXCLUDE NO OTHERS]
+
+ this only build a range until the current row, but in some cases (min,
+ last), we want to scan the whole group in order to evaluate the value to
+ keep.
+ *)
+ let range = " RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" in
+ match name with
+ | T.Min expr ->
+ Format.fprintf formatter "FIRST_VALUE(%a)%a "
+ (fun f v -> (snd v) f ~nested:nested')
+ expr (group_windows nested')
+ (expressions, order, Some range)
+ | T.Max expr ->
+ Format.fprintf formatter "LAST_VALUE(%a)%a"
+ (fun f v -> (snd v) f ~nested:nested')
+ expr (group_windows nested')
+ (expressions, order, Some range)
+ | T.Counter ->
+ (* If no order is given, return the number of elements in the
+ whole group *)
+ let operator =
+ match order with
+ | [] -> "COUNT"
+ | _ -> "ROW_NUMBER"
+ in
+ Format.fprintf formatter "%s()%a" operator (group_windows nested')
+ (expressions, order, None)
+ | T.Previous expr ->
+ Format.fprintf formatter "LAG(%a)%a"
+ (fun f v -> (snd v) f ~nested:nested')
+ expr (group_windows nested') (expressions, order, None)
+ | T.Sum expr ->
+ Format.fprintf formatter "SUM(%a)%a"
+ (fun f v -> (snd v) f ~nested:nested')
+ expr (group_windows nested') (expressions, order, None)
+
+ let nvl : ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr =
+ fun expression type_of formatter ~nested ->
+ ignore type_of;
+ let nested' = QueryParameter.nest nested in
+ Format.fprintf formatter "COALESCE(%a)" (print_expression nested')
+ expression
+
+ let join :
+ string -> ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr =
+ fun sep expression type_of formatter ~nested ->
+ ignore type_of;
+ let nested' = QueryParameter.nest nested in
+
+ (* Directly call the literal function for the first argument *)
+ Format.fprintf formatter "CONCAT(%a, %a)"
+ (fun f v -> (literal v ImportDataTypes.Types.String) f ~nested:nested')
+ sep (print_expression nested') expression
+
+ let boperator :
+ T.binary_operator ->
+ 'a Type_of.obs * 'a repr ->
+ 'a Type_of.obs * 'a repr ->
+ 'a Type_of.obs ->
+ 'a repr =
+ fun name e1 e2 type_of formatter ~nested ->
+ ignore type_of;
+ (* When dividing, we need to be sure that the type is a float,
+ otherwise SQL will truncate the result *)
+ let with_ =
+ match name with
+ | T.Division -> ImportDataTypes.Types.Float
+ | _ -> fst e2
+ in
+
+ let nested' = QueryParameter.nest nested in
+ Format.fprintf formatter "%a%s%a"
+ (unify ~with_ ~nested:nested')
+ e1
+ (* The operator *)
+ (T.name_of_operator name)
+ (unify ~with_:(fst e1) ~nested:nested')
+ e2
+
+ let gequality :
+ T.binary_operator ->
+ 'a Type_of.obs * 'a repr ->
+ ('a Type_of.obs * 'a repr) list ->
+ 'a Type_of.obs ->
+ 'a repr =
+ fun name e1 group type_of ->
+ ignore type_of;
+ let group_type = List.map ~f:fst group in
+ fun formatter ~nested ->
+ let nested' = QueryParameter.nest nested in
+ let op_name =
+ match name with
+ | T.Equal -> " IN("
+ | T.Different -> " NOT IN("
+ | _ -> ""
+ in
+
+ Format.fprintf formatter "%a%s%a)"
+ (unify ~with_:(Type_of.group' group_type) ~nested:nested')
+ e1 op_name (print_expression nested') group
+
+ let exprs expressions formatter ~nested =
+ (* Literal expression, starting from now, all the quoted string are
+ directly given to the sql engine *)
+ let nested' = QueryParameter.raw nested in
+
+ Format.fprintf formatter "(%a)"
+ (print_expression ~sep:" " nested')
+ expressions
+
+ let rec funct :
+ string -> ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr =
+ fun name expressions type_of formatter ~nested ->
+ ignore type_of;
+ let nested' = QueryParameter.nest nested in
+ match name with
+ | "expr" ->
+ (* Raw expression are parsed directly *)
+ exprs expressions formatter ~nested
+ | "if" ->
+ (* The if is renamed into IIF *)
+ funct "IIF" expressions type_of formatter ~nested
+ | _ ->
+ (* Default case *)
+ Format.fprintf formatter "%s(%a)" name (print_expression nested')
+ expressions
+
+ let function' :
+ T.funct -> ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr =
+ fun name expressions type_of formatter ~nested ->
+ ignore type_of;
+ let nested' = QueryParameter.nest nested in
+ match name with
+ | Upper | Trim ->
+ Format.fprintf formatter "%s(%a)" (T.name_of_function name)
+ (print_expression nested') expressions
+end)
+
+module M = Sym.M (Query)
+
+let query_of_expression :
+ type b.
+ b binded_query ->
+ Format.formatter ->
+ (Format.formatter -> 'a -> unit) ->
+ 'a T.t ->
+ b =
+ fun parameter formatter printer expr ->
+ let repr = M.eval ~path_repr:printer expr in
+ match parameter with
+ | BindParam ->
+ let p = Queue.create () in
+ let parameter = QueryParameter.Queue p in
+ Query.observe repr formatter ~nested:parameter;
+ p
+ | NoParam ->
+ Query.observe repr formatter ~nested:Literal;
+ ()