From 6b377719c10d5ab3343fd5221f99a4a21008e25a Mon Sep 17 00:00:00 2001 From: Sébastien Dailly Date: Thu, 14 Mar 2024 08:26:58 +0100 Subject: Initial commit --- lib/expression/ast.ml | 31 +++++ lib/expression/compose.ml | 150 ++++++++++++++++++++ lib/expression/compose.mli | 59 ++++++++ lib/expression/dune | 9 ++ lib/expression/filters.ml | 193 ++++++++++++++++++++++++++ lib/expression/filters.mli | 9 ++ lib/expression/headers.ml | 89 ++++++++++++ lib/expression/headers.mli | 7 + lib/expression/lazier.ml | 71 ++++++++++ lib/expression/query.ml | 335 +++++++++++++++++++++++++++++++++++++++++++++ lib/expression/query.mli | 27 ++++ lib/expression/repr.ml | 127 +++++++++++++++++ lib/expression/repr.mli | 6 + lib/expression/sym.ml | 71 ++++++++++ lib/expression/t.ml | 153 +++++++++++++++++++++ lib/expression/t.mli | 54 ++++++++ lib/expression/type_of.ml | 150 ++++++++++++++++++++ lib/expression/type_of.mli | 10 ++ 18 files changed, 1551 insertions(+) create mode 100644 lib/expression/ast.ml create mode 100644 lib/expression/compose.ml create mode 100644 lib/expression/compose.mli create mode 100755 lib/expression/dune create mode 100644 lib/expression/filters.ml create mode 100644 lib/expression/filters.mli create mode 100644 lib/expression/headers.ml create mode 100644 lib/expression/headers.mli create mode 100644 lib/expression/lazier.ml create mode 100644 lib/expression/query.ml create mode 100644 lib/expression/query.mli create mode 100644 lib/expression/repr.ml create mode 100644 lib/expression/repr.mli create mode 100644 lib/expression/sym.ml create mode 100644 lib/expression/t.ml create mode 100644 lib/expression/t.mli create mode 100644 lib/expression/type_of.ml create mode 100644 lib/expression/type_of.mli (limited to 'lib/expression') diff --git a/lib/expression/ast.ml b/lib/expression/ast.ml new file mode 100644 index 0000000..ef083e9 --- /dev/null +++ b/lib/expression/ast.ml @@ -0,0 +1,31 @@ +(** This module rebuilds an AST from an evaluation *) + +type 'a repr = 'a T.t +type 'a obs = 'a T.t +type 'a path_repr = unit + +let observe : 'a repr -> 'a obs = Fun.id +let empty : unit -> 'a repr = fun () -> T.Empty +let expr : 'a repr -> 'a repr = fun t -> T.Expr t +let literal : string -> 'a repr = fun s -> T.Literal s +let integer : string -> 'a repr = fun i -> T.Integer i +let path : 'a path_repr -> 'a -> 'a repr = fun _repr p -> T.Path p +let concat : 'a repr list -> 'a repr = fun ll -> T.Concat ll + +let window : 'a repr T.window -> 'a repr list -> 'a repr list -> 'a repr = + fun w groups order -> T.Window (w, groups, order) + +let nvl : 'a repr list -> 'a repr = fun ll -> T.Nvl ll +let join : string -> 'a repr list -> 'a repr = fun s ll -> T.Join (s, ll) + +let boperator : T.binary_operator -> 'a repr -> 'a repr -> 'a repr = + fun op e1 e2 -> T.BOperator (op, e1, e2) + +let gequality : T.binary_operator -> 'a repr -> 'a repr list -> 'a repr = + fun op e1 ll -> T.GEquality (op, e1, ll) + +let funct : string -> 'a repr list -> 'a repr = + fun name args -> T.Function (name, args) + +let function' : T.funct -> 'a repr list -> 'a repr = + fun f args -> T.Function' (f, args) diff --git a/lib/expression/compose.ml b/lib/expression/compose.ml new file mode 100644 index 0000000..028602b --- /dev/null +++ b/lib/expression/compose.ml @@ -0,0 +1,150 @@ +open StdLabels + +(** Build an expression module with the result from another expression. The + signature of the fuctions is a bit different, as they all receive the + result from the previous evaluated element in argument. *) +module Expression + (E : Sym.SYM_EXPR) + (R : sig + val v : 'a E.path_repr + end) = +struct + module type SIG = sig + type 'a repr + type 'a obs + type 'a path_repr + + val empty : 'a E.obs -> 'a repr + val expr : 'a E.obs * 'a repr -> 'a E.obs -> 'a repr + val literal : string -> 'a E.obs -> 'a repr + val integer : string -> 'a E.obs -> 'a repr + val path : 'a path_repr -> 'a -> 'a E.obs -> 'a repr + val concat : ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr + + val window : + ('a E.obs * 'a repr) T.window -> + ('a E.obs * 'a repr) list -> + ('a E.obs * 'a repr) list -> + 'a E.obs -> + 'a repr + + val nvl : ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr + val join : string -> ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr + + val boperator : + T.binary_operator -> + 'a E.obs * 'a repr -> + 'a E.obs * 'a repr -> + 'a E.obs -> + 'a repr + + val gequality : + T.binary_operator -> + 'a E.obs * 'a repr -> + ('a E.obs * 'a repr) list -> + 'a E.obs -> + 'a repr + + val funct : string -> ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr + val function' : T.funct -> ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr + val observe : 'a E.obs * 'a repr -> 'a obs + end + + module Make (M : SIG) = struct + type 'a repr = 'a E.repr * 'a M.repr + type 'a obs = 'a M.obs + type 'a path_repr = 'a M.path_repr + + let map' : 'a repr list -> 'a E.repr list * ('a E.obs * 'a M.repr) list = + fun ll -> + let e = List.map ~f:fst ll in + (e, List.map ll ~f:(fun (e, m) -> (E.observe e, m))) + + let observe : 'a repr -> 'a obs = fun (t, v) -> M.observe (E.observe t, v) + + let empty : unit -> 'a repr = + fun () -> + let e = E.empty () in + (e, M.empty (E.observe e)) + + let expr : 'a repr -> 'a repr = + fun (e, m) -> + let e' = E.expr e in + (e', M.expr (E.observe e, m) (E.observe e')) + + let literal : string -> 'a repr = + fun litt -> + let e = E.literal litt in + (e, M.literal litt (E.observe e)) + + let integer : string -> 'a repr = + fun i -> + let e' = E.integer i in + (e', M.integer i (E.observe e')) + + let path : 'b path_repr -> 'b -> 'a repr = + fun path_repr path -> + let e = E.path R.v path in + let m = M.path path_repr path (E.observe e) in + (e, m) + + let concat : 'a repr list -> 'a repr = + fun reprs -> + let e, m = map' reprs in + let e' = E.concat e in + (e', M.concat m (E.observe e')) + + let window : 'a repr T.window -> 'a repr list -> 'a repr list -> 'a repr = + fun window expressions order -> + let e_expressions, m_expressions = map' expressions + and e_order, m_order = map' order + and e_window = T.map_window window ~f:fst + and m_window = T.map_window window ~f:(fun (e, m) -> (E.observe e, m)) in + + let e = E.window e_window e_expressions e_order in + (e, M.window m_window m_expressions m_order (E.observe e)) + + let nvl : 'a repr list -> 'a repr = + fun reprs -> + let e, m = List.split reprs in + + let e' = E.nvl e in + let e = List.map ~f:E.observe e in + (e', M.nvl (List.combine e m) (E.observe e')) + + let join : string -> 'a repr list -> 'a repr = + fun sep reprs -> + let e_reprs, m = map' reprs in + + let e = E.join sep e_reprs in + (e, M.join sep m (E.observe e)) + + let boperator : T.binary_operator -> 'a repr -> 'a repr -> 'a repr = + fun op (e1, m1) (e2, m2) -> + let e1' = E.observe e1 + and e2' = E.observe e2 + and e = E.boperator op e1 e2 in + let m' = M.boperator op (e1', m1) (e2', m2) (E.observe e) in + (e, m') + + let gequality : T.binary_operator -> 'a repr -> 'a repr list -> 'a repr = + fun op (e1, m1) exprs -> + let e_reprs, m_reprs = map' exprs in + let e' = E.gequality op e1 e_reprs in + let m' = M.gequality op (E.observe e1, m1) m_reprs (E.observe e') in + (e', m') + + let funct : string -> 'a repr list -> 'a repr = + fun sep reprs -> + let e_reprs, m = map' reprs in + + let e = E.funct sep e_reprs in + (e, M.funct sep m (E.observe e)) + + let function' : T.funct -> 'a repr list -> 'a repr = + fun f reprs -> + let e_reprs, m = map' reprs in + let e = E.function' f e_reprs in + (e, M.function' f m (E.observe e)) + end +end diff --git a/lib/expression/compose.mli b/lib/expression/compose.mli new file mode 100644 index 0000000..4cced8c --- /dev/null +++ b/lib/expression/compose.mli @@ -0,0 +1,59 @@ +(** Build an expression module with the result from another expression. The + signature of the fuctions is a bit different, as they all receive the + result from the previous evaluated element in argument. *) +module Expression + (E : Sym.SYM_EXPR) + (_ : sig + val v : 'a E.path_repr + end) : sig + (** The signature for the module the adapt is a bit different for the + SYM_EXPR: every function takes an extra argument which is the Expression + we are wrapping, and every expression becomes a tuple with the same + expression represented in the composed type. *) + module type SIG = sig + type 'a repr + type 'a obs + type 'a path_repr + + val empty : 'a E.obs -> 'a repr + val expr : 'a E.obs * 'a repr -> 'a E.obs -> 'a repr + val literal : string -> 'a E.obs -> 'a repr + val integer : string -> 'a E.obs -> 'a repr + val path : 'a path_repr -> 'a -> 'a E.obs -> 'a repr + val concat : ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr + + val window : + ('a E.obs * 'a repr) T.window -> + ('a E.obs * 'a repr) list -> + ('a E.obs * 'a repr) list -> + 'a E.obs -> + 'a repr + + val nvl : ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr + val join : string -> ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr + + val boperator : + T.binary_operator -> + 'a E.obs * 'a repr -> + 'a E.obs * 'a repr -> + 'a E.obs -> + 'a repr + + val gequality : + T.binary_operator -> + 'a E.obs * 'a repr -> + ('a E.obs * 'a repr) list -> + 'a E.obs -> + 'a repr + + val funct : string -> ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr + val function' : T.funct -> ('a E.obs * 'a repr) list -> 'a E.obs -> 'a repr + val observe : 'a E.obs * 'a repr -> 'a obs + end + + module Make (M : SIG) : + Sym.SYM_EXPR + with type 'a obs = 'a M.obs + and type 'a repr = 'a E.repr * 'a M.repr + and type 'a path_repr = 'a M.path_repr +end diff --git a/lib/expression/dune b/lib/expression/dune new file mode 100755 index 0000000..96e386e --- /dev/null +++ b/lib/expression/dune @@ -0,0 +1,9 @@ +(library + (name importExpression) + (libraries + re + importCSV + importDataTypes + importErrors + ) +) diff --git a/lib/expression/filters.ml b/lib/expression/filters.ml new file mode 100644 index 0000000..42c794b --- /dev/null +++ b/lib/expression/filters.ml @@ -0,0 +1,193 @@ +(** This module evaluate the sql query to use in order to filter an expression + + The result is built over [Query] except for the group function, which are + translated into a CTE in sql + *) + +open StdLabels +module Q = Query + +type 'a result = { + repr : Format.formatter -> nested:Query.QueryParameter.t -> unit; + group : 'a T.t option; +} + +module Filter = struct + type 'a repr = { + repr : 'a Q.Query.repr; + with_group : 'a T.t option; + } + + type 'a obs = 'a result + type 'a path_repr = 'a Q.Query.path_repr + + let observe : 'a Ast.obs * 'a repr -> 'a obs = + fun (_, v) -> { repr = Q.Query.observe v.repr; group = v.with_group } + + let empty : 'a Ast.obs -> 'a repr = + fun _ -> { repr = Q.Query.empty (); with_group = None } + + let expr : 'a Ast.obs * 'a repr -> 'a Ast.obs -> 'a repr = + fun (_, expr) _ -> + { repr = Q.Query.expr expr.repr; with_group = expr.with_group } + + let path : 'a path_repr -> 'a -> 'a Ast.obs -> 'a repr = + fun repr p _ -> { repr = Q.Query.path repr p; with_group = None } + + let literal : string -> 'a Ast.obs -> 'a repr = + fun l _ -> { repr = Q.Query.literal l; with_group = None } + + let integer : string -> 'a Ast.obs -> 'a repr = + fun l _ -> { repr = Q.Query.integer l; with_group = None } + + let nvl : ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr = + fun expression _ -> + let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression in + let with_group = + List.find_map ~f:(fun v -> (snd v).with_group) expression + in + match with_group with + | None -> { repr = Q.Query.nvl expr_repr; with_group } + | Some _ -> raise ImportErrors.MisplacedWindow + + let concat : ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr = + fun expression _ -> + let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression in + let with_group = + List.find_map ~f:(fun v -> (snd v).with_group) expression + in + match with_group with + | None -> { repr = Q.Query.concat expr_repr; with_group } + | Some _ -> raise ImportErrors.MisplacedWindow + + let join : string -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr = + fun sep expression _ -> + let expr_repr = List.map ~f:(fun v -> (snd v).repr) expression + and with_group = + List.find_map ~f:(fun v -> (snd v).with_group) expression + in + match with_group with + | None -> { repr = Q.Query.join sep expr_repr; with_group } + | Some _ -> raise ImportErrors.MisplacedWindow + + let boperator : + T.binary_operator -> + 'a Ast.obs * 'a repr -> + 'a Ast.obs * 'a repr -> + 'a Ast.obs -> + 'a repr = + fun name (_, e1) (_, e2) _ -> + let with_group = + match (e1.with_group, e2.with_group) with + | Some e, None -> Some e + | None, Some e -> Some e + | None, None -> None + | _ -> raise ImportErrors.MisplacedWindow + in + { repr = Q.Query.boperator name e1.repr e2.repr; with_group } + + let gequality : + T.binary_operator -> + 'a Ast.obs * 'a repr -> + ('a Ast.obs * 'a repr) list -> + 'a Ast.obs -> + 'a repr = + fun name (_, e1) group _ -> + let group_repr = List.map ~f:(fun v -> (snd v).repr) group + and with_group = List.find_map ~f:(fun v -> (snd v).with_group) group in + + match with_group with + | None -> + { + repr = Q.Query.gequality name e1.repr group_repr; + with_group = e1.with_group; + } + | _ -> raise ImportErrors.MisplacedWindow + + let funct : string -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr = + fun name expressions _ -> + let expr_repr = List.map ~f:(fun v -> (snd v).repr) expressions in + let with_group = + List.find_map ~f:(fun v -> (snd v).with_group) expressions + in + match with_group with + | None -> { repr = Q.Query.funct name expr_repr; with_group } + | Some _ -> raise ImportErrors.MisplacedWindow + + let function' : + T.funct -> ('a Ast.obs * 'a repr) list -> 'a Ast.obs -> 'a repr = + fun name expressions _ -> + let expr_repr = List.map ~f:(fun v -> (snd v).repr) expressions in + let with_group = + List.find_map ~f:(fun v -> (snd v).with_group) expressions + in + match with_group with + | None -> + { repr = Q.Query.funct (T.name_of_function name) expr_repr; with_group } + | Some _ -> raise ImportErrors.MisplacedWindow + + (** Window functions are not handled in the filters, we save them as an AST + in order to process them in a separated handler. + + It is not allowed to build nested window functions. *) + let window : + ('a Ast.obs * 'a repr) T.window -> + ('a Ast.obs * 'a repr) list -> + ('a Ast.obs * 'a repr) list -> + 'a Ast.obs -> + 'a repr = + fun name expressions order ast -> + ignore name; + let with_group_expr = + List.find_map ~f:(fun v -> (snd v).with_group) expressions + and with_group_order = + List.find_map ~f:(fun v -> (snd v).with_group) order + in + match (with_group_expr, with_group_order) with + | Some _, _ | _, Some _ -> raise ImportErrors.MisplacedWindow + | None, None -> + (* The column name used with the cte. The name is fixed here, and used + as is in [Analysers.Query.build_cte] and + [Analysers.Query.eval_filters] *) + let q = "cte.group0" in + { + with_group = Some ast; + repr = Q.Query.funct "expr" [ Q.Query.literal q ]; + } +end + +module ASTBuilder = + Compose.Expression + (Ast) + (struct + let v = () + end) + +module F : + Sym.SYM_EXPR + with type 'a obs = 'a result + and type 'a path_repr = Format.formatter -> 'a -> unit = + ASTBuilder.Make (Filter) + +module M = Sym.M (F) + +let query_of_expression : + type b. + b Q.binded_query -> + Format.formatter -> + (Format.formatter -> 'a -> unit) -> + 'a T.t -> + b * 'a T.t option = + fun parameter formatter printer expr -> + let repr = M.eval ~path_repr:printer expr in + match parameter with + | BindParam -> + let p = Queue.create () in + let parameter = Q.QueryParameter.Queue p in + let value = F.observe repr in + value.repr ~nested:parameter formatter; + (p, value.group) + | NoParam -> + let value = F.observe repr in + value.repr ~nested:Literal formatter; + ((), value.group) diff --git a/lib/expression/filters.mli b/lib/expression/filters.mli new file mode 100644 index 0000000..d462b5f --- /dev/null +++ b/lib/expression/filters.mli @@ -0,0 +1,9 @@ +module F : Sym.SYM_EXPR with type 'a path_repr = Format.formatter -> 'a -> unit +(** Query used inside the filter clauses *) + +val query_of_expression : + 'b Query.binded_query -> + Format.formatter -> + (Format.formatter -> 'a -> unit) -> + 'a T.t -> + 'b * 'a T.t option diff --git a/lib/expression/headers.ml b/lib/expression/headers.ml new file mode 100644 index 0000000..6371e4f --- /dev/null +++ b/lib/expression/headers.ml @@ -0,0 +1,89 @@ +open StdLabels + +let truncate buffer n = Buffer.truncate buffer (Buffer.length buffer - n) + +module E : + Sym.SYM_CHUNK + with type 'a obs = buffer:Buffer.t -> unit + and type 'a path_repr = 'a -> Buffer.t -> unit = struct + type 'a repr = buffer:Buffer.t -> unit + type 'a obs = buffer:Buffer.t -> unit + type 'a path_repr = 'a -> Buffer.t -> unit + + let group : 'a repr list -> 'a repr = + fun args ~buffer -> + Buffer.add_string buffer "["; + List.iter args ~f:(fun v -> + v ~buffer; + Buffer.add_string buffer ", "); + + truncate buffer 2; + Buffer.add_string buffer "]" + + let arguments : 'a repr list -> 'a repr = + fun expressions ~buffer -> + Buffer.add_string buffer "("; + List.iter expressions ~f:(fun v -> + v ~buffer; + Buffer.add_string buffer ", "); + + truncate buffer 2; + Buffer.add_string buffer ")" + + let observe x ~buffer = x ~buffer + let empty : unit -> 'a repr = fun _ ~buffer -> Buffer.add_string buffer "''" + let path printer p ~buffer = printer p buffer + let literal l ~buffer = Buffer.add_string buffer l + let integer l ~buffer = Buffer.add_string buffer l + + let expr expr ~buffer = + Buffer.add_char buffer '('; + expr ~buffer; + Buffer.add_char buffer ')' + + let nvl expression ~buffer = + Buffer.add_string buffer "nvl"; + arguments ~buffer expression + + let concat expression ~buffer = List.iter expression ~f:(fun v -> v ~buffer) + + let join sep expression ~buffer = + List.iter expression ~f:(fun v -> + v ~buffer; + Buffer.add_string buffer sep); + truncate buffer (String.length sep) + + let window : 'a repr T.window -> 'a repr list -> 'a repr list -> 'a repr = + fun name expressions order ~buffer -> + ignore order; + let name = T.name_of_window name in + + Buffer.add_string buffer name; + arguments ~buffer expressions + + let boperator name e1 e2 ~buffer = + e1 ~buffer; + Buffer.add_string buffer (T.name_of_operator name); + e2 ~buffer + + let gequality name e1 e2 ~buffer = + e1 ~buffer; + Buffer.add_string buffer (T.name_of_operator name); + group ~buffer e2 + + let funct name expressions ~buffer = + Buffer.add_string buffer name; + arguments ~buffer expressions + + let function' name expressions ~buffer = + Buffer.add_string buffer (T.name_of_function name); + arguments ~buffer expressions +end + +module M = Sym.M (E) + +let headers_of_expression : + Buffer.t -> ('a -> Buffer.t -> unit) -> 'a T.t -> unit = + fun buffer printer expr -> + let repr = M.eval expr ~path_repr:printer in + E.observe repr ~buffer diff --git a/lib/expression/headers.mli b/lib/expression/headers.mli new file mode 100644 index 0000000..1fafad0 --- /dev/null +++ b/lib/expression/headers.mli @@ -0,0 +1,7 @@ +val headers_of_expression : + Buffer.t -> ('a -> Buffer.t -> unit) -> 'a T.t -> unit + +module E : + Sym.SYM_EXPR + with type 'a obs = buffer:Buffer.t -> unit + and type 'a path_repr = 'a -> Buffer.t -> unit diff --git a/lib/expression/lazier.ml b/lib/expression/lazier.ml new file mode 100644 index 0000000..d8b12d9 --- /dev/null +++ b/lib/expression/lazier.ml @@ -0,0 +1,71 @@ +open StdLabels + +(** Make a module lazy *) +module Make (S : Sym.SYM_EXPR) = struct + type 'a repr = 'a S.repr Lazy.t + type 'a obs = 'a S.obs Lazy.t + type 'a path_repr = 'a S.path_repr + + let empty : unit -> 'a repr = fun () -> lazy (S.empty ()) + + let expr : 'a repr -> 'a repr = + fun expr -> Lazy.map (fun expr -> S.expr expr) expr + + let literal : string -> 'a repr = fun l -> lazy (S.literal l) + let integer : string -> 'a repr = fun i -> lazy (S.integer i) + + let path : 'b path_repr -> 'b -> 'a repr = + fun repr path -> lazy (S.path repr path) + + let concat : 'a repr list -> 'a repr = + fun exprs -> + lazy + (let exprs' = List.map ~f:Lazy.force exprs in + S.concat exprs') + + let window : 'a repr T.window -> 'a repr list -> 'a repr list -> 'a repr = + fun w group sort -> + lazy + (let w' = T.map_window ~f:Lazy.force w + and group' = List.map ~f:Lazy.force group + and sort' = List.map ~f:Lazy.force sort in + S.window w' group' sort') + + let nvl : 'a repr list -> 'a repr = + fun exprs -> + lazy + (let exprs' = List.map ~f:Lazy.force exprs in + S.nvl exprs') + + let join : string -> 'a repr list -> 'a repr = + fun sep exprs -> + lazy + (let exprs' = List.map ~f:Lazy.force exprs in + S.join sep exprs') + + let boperator : T.binary_operator -> 'a repr -> 'a repr -> 'a repr = + fun op e1 e2 -> + lazy + (let e1' = Lazy.force e1 and e2' = Lazy.force e2 in + S.boperator op e1' e2') + + let gequality : T.binary_operator -> 'a repr -> 'a repr list -> 'a repr = + fun op e exprs -> + lazy + (let e' = Lazy.force e and exprs' = List.map ~f:Lazy.force exprs in + S.gequality op e' exprs') + + let funct : string -> 'a repr list -> 'a repr = + fun name exprs -> + lazy + (let exprs' = List.map ~f:Lazy.force exprs in + S.funct name exprs') + + let function' : T.funct -> 'a repr list -> 'a repr = + fun f exprs -> + lazy + (let exprs' = List.map ~f:Lazy.force exprs in + S.function' f exprs') + + let observe : 'a repr -> 'a obs = fun v -> Lazy.map S.observe v +end diff --git a/lib/expression/query.ml b/lib/expression/query.ml new file mode 100644 index 0000000..5bd914a --- /dev/null +++ b/lib/expression/query.ml @@ -0,0 +1,335 @@ +(** + This module create an sql query from an expression. + *) + +open StdLabels + +(** This type is used in the query builder (see [query_of_expression] just + below in order to tell if we need to bind the parameters in the query, or + if we can use plain literal as is (with some risk at the execution time. *) +type _ binded_query = + | BindParam : ImportCSV.DataType.t Queue.t binded_query + | NoParam : unit binded_query + +module QueryParameter = struct + (** Internaly, we need to keep a different type for the Literal chunks + (which requires to be quoted), and raw (which should be given as is to the + sql engine) + + The Raw can be generated from both BindParam or NoParam queries. *) + type t = + | Literal + | Queue of ImportCSV.DataType.t Queue.t + | Raw of t + + (** Wrap the given parameter mode into the raw mode *) + let raw : t -> t = function + | Raw t -> Raw t + | Literal -> Raw Literal + | Queue q -> Raw (Queue q) + + (** Nest the parameter in order to use it inside another function call. + + The rule is to get out of the Raw mode as soon as we dive into another + one function. *) + let nest : t -> t = function + | Raw t -> t + | other -> other +end + +module TypeBuilder = + Compose.Expression + (Type_of) + (struct + let v = ignore + end) + +module Query = TypeBuilder.Make (struct + type 'a repr = Format.formatter -> nested:QueryParameter.t -> unit + type 'a obs = Format.formatter -> nested:QueryParameter.t -> unit + type 'a path_repr = Format.formatter -> 'a -> unit + + let observe : 'a Type_of.obs * 'a repr -> 'a obs = + fun (_, x) formatter ~nested -> + let () = x formatter ~nested in + Format.pp_print_flush formatter () + + (** Unify an external reference with a given type, using the COALESCE + function *) + let unify : + with_:Type_of.t -> + nested:QueryParameter.t -> + Format.formatter -> + 'a Type_of.obs * 'a repr -> + unit = + fun ~with_ ~nested format (type_of, expr) -> + match (type_of, with_) with + | ImportDataTypes.Types.Extern, Number + | ImportDataTypes.Types.Extern, Extern -> + Format.fprintf format "COALESCE(%a,0)" + (fun f expr -> expr f ~nested) + expr + | ImportDataTypes.Types.Extern, String -> + Format.fprintf format "COALESCE(%a,'')" + (fun f expr -> expr f ~nested) + expr + | _, Float -> + Format.fprintf format "CAST(%a AS REAL)" + (fun f expr -> expr f ~nested) + expr + | _, _ -> expr ~nested format + + let empty : 'a Type_of.obs -> 'a repr = + fun type_of formatter ~nested -> + ignore type_of; + ignore nested; + Format.fprintf formatter "''" + + let expr : 'a Type_of.obs * 'a repr -> 'a Type_of.obs -> 'a repr = + fun expr type_of formatter ~nested -> + ignore type_of; + Format.fprintf formatter "("; + (snd expr) ~nested formatter; + Format.fprintf formatter ")" + + let literal : string -> 'a Type_of.obs -> 'a repr = + fun l type_of formatter ~nested -> + ignore type_of; + match nested with + | QueryParameter.Literal -> + (* If the text is a true literal, we insert it directly. This is + only called from the [query_of_expression] function *) + Format.fprintf formatter "'%s'" l + | QueryParameter.Queue queue -> + Format.fprintf formatter "?"; + Queue.add (ImportCSV.DataType.Content l) queue + | QueryParameter.Raw _ -> Format.fprintf formatter "%s" l + + let integer : string -> 'a Type_of.obs -> 'a repr = + fun l type_of formatter ~nested -> + ignore type_of; + ignore nested; + Format.fprintf formatter "%s" l + + let path : 'b path_repr -> 'b -> 'a Type_of.obs -> 'a repr = + fun repr p type_of formatter ~nested -> + ignore nested; + ignore type_of; + repr formatter p + + let concat : ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr = + fun expression type_of formatter ~nested -> + ignore type_of; + let nested' = QueryParameter.nest nested in + + Format.pp_print_list + ~pp_sep:(fun f () -> Format.fprintf f " || ") + (unify ~with_:ImportDataTypes.Types.String ~nested:nested') + formatter expression + + let print_expression : + ?sep:string -> + QueryParameter.t -> + Format.formatter -> + ('a Type_of.obs * 'a repr) list -> + unit = + fun ?(sep = ", ") nested formatter expression -> + (Format.pp_print_list + ~pp_sep:(fun f () -> Format.fprintf f "%s" sep) + (fun f v -> (snd v) f ~nested)) + formatter expression + + (** Format the partition expression. This function is used internally and + only form the expression inside the clause. *) + let group_windows : + QueryParameter.t -> + Format.formatter -> + ('a Type_of.obs * 'a repr) list + * ('a Type_of.obs * 'a repr) list + * string option -> + unit = + fun nested formatter (expressions, order, range) -> + match (expressions, order) with + | [], _ -> () + | _, [] -> + Format.fprintf formatter " OVER (PARTITION BY %a%a)" + (print_expression nested) expressions + (Format.pp_print_option (fun f v -> Format.fprintf f "%s" v)) + range + | _, _ -> + Format.fprintf formatter " OVER (PARTITION BY %a ORDER BY %a%a)" + (print_expression nested) expressions (print_expression nested) order + (Format.pp_print_option (fun f v -> Format.fprintf f "%s" v)) + range + + let window : + ('a Type_of.obs * 'a repr) T.window -> + ('a Type_of.obs * 'a repr) list -> + ('a Type_of.obs * 'a repr) list -> + 'a Type_of.obs -> + 'a repr = + fun name expressions order type_of formatter ~nested -> + ignore type_of; + let nested' = QueryParameter.nest nested in + + (* By default, the range is defined like this + + [RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW EXCLUDE NO OTHERS] + + this only build a range until the current row, but in some cases (min, + last), we want to scan the whole group in order to evaluate the value to + keep. + *) + let range = " RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" in + match name with + | T.Min expr -> + Format.fprintf formatter "FIRST_VALUE(%a)%a " + (fun f v -> (snd v) f ~nested:nested') + expr (group_windows nested') + (expressions, order, Some range) + | T.Max expr -> + Format.fprintf formatter "LAST_VALUE(%a)%a" + (fun f v -> (snd v) f ~nested:nested') + expr (group_windows nested') + (expressions, order, Some range) + | T.Counter -> + (* If no order is given, return the number of elements in the + whole group *) + let operator = + match order with + | [] -> "COUNT" + | _ -> "ROW_NUMBER" + in + Format.fprintf formatter "%s()%a" operator (group_windows nested') + (expressions, order, None) + | T.Previous expr -> + Format.fprintf formatter "LAG(%a)%a" + (fun f v -> (snd v) f ~nested:nested') + expr (group_windows nested') (expressions, order, None) + | T.Sum expr -> + Format.fprintf formatter "SUM(%a)%a" + (fun f v -> (snd v) f ~nested:nested') + expr (group_windows nested') (expressions, order, None) + + let nvl : ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr = + fun expression type_of formatter ~nested -> + ignore type_of; + let nested' = QueryParameter.nest nested in + Format.fprintf formatter "COALESCE(%a)" (print_expression nested') + expression + + let join : + string -> ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr = + fun sep expression type_of formatter ~nested -> + ignore type_of; + let nested' = QueryParameter.nest nested in + + (* Directly call the literal function for the first argument *) + Format.fprintf formatter "CONCAT(%a, %a)" + (fun f v -> (literal v ImportDataTypes.Types.String) f ~nested:nested') + sep (print_expression nested') expression + + let boperator : + T.binary_operator -> + 'a Type_of.obs * 'a repr -> + 'a Type_of.obs * 'a repr -> + 'a Type_of.obs -> + 'a repr = + fun name e1 e2 type_of formatter ~nested -> + ignore type_of; + (* When dividing, we need to be sure that the type is a float, + otherwise SQL will truncate the result *) + let with_ = + match name with + | T.Division -> ImportDataTypes.Types.Float + | _ -> fst e2 + in + + let nested' = QueryParameter.nest nested in + Format.fprintf formatter "%a%s%a" + (unify ~with_ ~nested:nested') + e1 + (* The operator *) + (T.name_of_operator name) + (unify ~with_:(fst e1) ~nested:nested') + e2 + + let gequality : + T.binary_operator -> + 'a Type_of.obs * 'a repr -> + ('a Type_of.obs * 'a repr) list -> + 'a Type_of.obs -> + 'a repr = + fun name e1 group type_of -> + ignore type_of; + let group_type = List.map ~f:fst group in + fun formatter ~nested -> + let nested' = QueryParameter.nest nested in + let op_name = + match name with + | T.Equal -> " IN(" + | T.Different -> " NOT IN(" + | _ -> "" + in + + Format.fprintf formatter "%a%s%a)" + (unify ~with_:(Type_of.group' group_type) ~nested:nested') + e1 op_name (print_expression nested') group + + let exprs expressions formatter ~nested = + (* Literal expression, starting from now, all the quoted string are + directly given to the sql engine *) + let nested' = QueryParameter.raw nested in + + Format.fprintf formatter "(%a)" + (print_expression ~sep:" " nested') + expressions + + let rec funct : + string -> ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr = + fun name expressions type_of formatter ~nested -> + ignore type_of; + let nested' = QueryParameter.nest nested in + match name with + | "expr" -> + (* Raw expression are parsed directly *) + exprs expressions formatter ~nested + | "if" -> + (* The if is renamed into IIF *) + funct "IIF" expressions type_of formatter ~nested + | _ -> + (* Default case *) + Format.fprintf formatter "%s(%a)" name (print_expression nested') + expressions + + let function' : + T.funct -> ('a Type_of.obs * 'a repr) list -> 'a Type_of.obs -> 'a repr = + fun name expressions type_of formatter ~nested -> + ignore type_of; + let nested' = QueryParameter.nest nested in + match name with + | Upper | Trim -> + Format.fprintf formatter "%s(%a)" (T.name_of_function name) + (print_expression nested') expressions +end) + +module M = Sym.M (Query) + +let query_of_expression : + type b. + b binded_query -> + Format.formatter -> + (Format.formatter -> 'a -> unit) -> + 'a T.t -> + b = + fun parameter formatter printer expr -> + let repr = M.eval ~path_repr:printer expr in + match parameter with + | BindParam -> + let p = Queue.create () in + let parameter = QueryParameter.Queue p in + Query.observe repr formatter ~nested:parameter; + p + | NoParam -> + Query.observe repr formatter ~nested:Literal; + () diff --git a/lib/expression/query.mli b/lib/expression/query.mli new file mode 100644 index 0000000..fa789a9 --- /dev/null +++ b/lib/expression/query.mli @@ -0,0 +1,27 @@ +module QueryParameter : sig + (** Internaly, we need to keep a different type for the Literal chunks + (which requires to be quoted), and raw (which should be given as is to the + sql engine) + + The Raw can be generated from both BindParam or NoParam queries. *) + type t = + | Literal + | Queue of ImportCSV.DataType.t Queue.t + | Raw of t +end + +type _ binded_query = + | BindParam : ImportCSV.DataType.t Queue.t binded_query + | NoParam : unit binded_query + +val query_of_expression : + 'b binded_query -> + Format.formatter -> + (Format.formatter -> 'a -> unit) -> + 'a T.t -> + 'b + +module Query : + Sym.SYM_EXPR + with type 'a obs = Format.formatter -> nested:QueryParameter.t -> unit + and type 'a path_repr = Format.formatter -> 'a -> unit diff --git a/lib/expression/repr.ml b/lib/expression/repr.ml new file mode 100644 index 0000000..4990236 --- /dev/null +++ b/lib/expression/repr.ml @@ -0,0 +1,127 @@ +open StdLabels + +let escape_dquote = Re.Str.regexp "'" +let escape content = Re.Str.global_replace escape_dquote "\\'" content + +module E : + Sym.SYM_CHUNK + with type 'a obs = top:bool -> string + and type 'a path_repr = 'a -> string = struct + type 'a repr = top:bool -> string + type 'a obs = top:bool -> string + type 'a path_repr = 'a -> string + + let observe x = x + + let group : 'a repr list -> 'a repr = + fun args ~top -> + let args_repr = List.map ~f:(fun v -> v ~top) args in + let args = String.concat ~sep:", " args_repr in + "[" ^ args ^ "]" + + let arguments : 'a repr list -> 'a repr = + fun args ~top -> + let args_repr = List.map ~f:(fun v -> v ~top) args in + let args = String.concat ~sep:", " args_repr in + "(" ^ args ^ ")" + + let empty : unit -> 'a repr = + fun () ~top -> + match top with + | false -> "''" + | true -> "" + + let literal : string -> 'a repr = + fun l ~top -> + if String.equal String.empty l then (empty ()) ~top + else + match int_of_string_opt l with + | Some _ -> l + | None -> "'" ^ escape l ^ "'" + + let integer : string -> 'a repr = + fun l ~top -> if String.equal String.empty l then (empty ()) ~top else l + + let expr : 'a repr -> 'a repr = + fun expr ~top -> + ignore top; + String.concat ~sep:"" [ "("; expr ~top:false; ")" ] + + let path : 'b path_repr -> 'b -> 'a repr = + fun path_repr p ~top -> + ignore top; + path_repr p + + let concat : 'a repr list -> 'a repr = + fun elems ~top -> + ignore top; + let top = false in + let strs = List.map elems ~f:(fun v -> v ~top) in + String.concat ~sep:" ^ " strs + + let window : 'a repr T.window -> 'a repr list -> 'a repr list -> 'a repr = + fun name g1 sort ~top -> + ignore top; + + let args1 = group ~top:false g1 + and args2 = group ~top:false sort + and f_name = T.name_of_window name in + let args = [ args1; args2 ] in + let args = + match name with + | T.Counter -> args + | T.Min prefix_arg + | T.Max prefix_arg + | T.Previous prefix_arg + | T.Sum prefix_arg -> prefix_arg ~top:false :: args + in + + f_name ^ "(" ^ String.concat ~sep:", " args ^ ")" + + let nvl : 'a repr list -> 'a repr = + fun elems ~top -> + ignore top; + let args = arguments ~top:false elems in + "nvl" ^ args + + let join : string -> 'a repr list -> 'a repr = + fun sep elems ~top -> + ignore top; + let header = literal sep in + let args = arguments ~top:false (header :: elems) in + "join" ^ args + + let boperator : T.binary_operator -> 'a repr -> 'a repr -> 'a repr = + fun op arg1 arg2 ~top -> + ignore top; + let top = false in + let sep = T.name_of_operator op in + String.concat ~sep [ arg1 ~top; arg2 ~top ] + + let gequality : T.binary_operator -> 'a repr -> 'a repr list -> 'a repr = + fun op arg1 arg2 ~top -> + ignore top; + let top = false in + let sep = T.name_of_operator op in + let args = group ~top:false arg2 in + String.concat ~sep [ arg1 ~top; args ] + + let funct : string -> 'a repr list -> 'a repr = + fun f args ~top -> + ignore top; + let args = arguments ~top:false args in + f ^ args + + let function' : T.funct -> 'a repr list -> 'a repr = + fun f args ~top -> + ignore top; + let args = arguments ~top:false args in + T.name_of_function f ^ args +end + +module M = Sym.M (E) + +let repr : ?top:bool -> ('a -> string) -> 'a T.t -> string = + fun ?(top = false) printer expr -> + let repr = M.eval ~path_repr:printer expr in + E.observe repr ~top diff --git a/lib/expression/repr.mli b/lib/expression/repr.mli new file mode 100644 index 0000000..4431655 --- /dev/null +++ b/lib/expression/repr.mli @@ -0,0 +1,6 @@ +val repr : ?top:bool -> ('a -> string) -> 'a T.t -> string + +module E : + Sym.SYM_EXPR + with type 'a obs = top:bool -> string + and type 'a path_repr = 'a -> string diff --git a/lib/expression/sym.ml b/lib/expression/sym.ml new file mode 100644 index 0000000..0360e8e --- /dev/null +++ b/lib/expression/sym.ml @@ -0,0 +1,71 @@ +(** The signature for an expression analyzer. + + Every element is mapped to a function, using the tagless final pattern. + + *) +module type SYM_EXPR = sig + type 'a repr + type 'a obs + type 'a path_repr + + val empty : unit -> 'a repr + val expr : 'a repr -> 'a repr + val literal : string -> 'a repr + val integer : string -> 'a repr + val path : 'a path_repr -> 'a -> 'a repr + val concat : 'a repr list -> 'a repr + val window : 'a repr T.window -> 'a repr list -> 'a repr list -> 'a repr + val nvl : 'a repr list -> 'a repr + val join : string -> 'a repr list -> 'a repr + val boperator : T.binary_operator -> 'a repr -> 'a repr -> 'a repr + val gequality : T.binary_operator -> 'a repr -> 'a repr list -> 'a repr + val funct : string -> 'a repr list -> 'a repr + val function' : T.funct -> 'a repr list -> 'a repr + val observe : 'a repr -> 'a obs +end + +module type SYM_CHUNK = sig + include SYM_EXPR + + val group : 'a repr list -> 'a repr + val arguments : 'a repr list -> 'a repr +end + +open StdLabels + +module M (Expr : SYM_EXPR) = struct + let rec eval : path_repr:'a Expr.path_repr -> 'a T.t -> 'c Expr.repr = + fun ~path_repr t -> + match t with + | T.Expr expr -> Expr.expr (eval ~path_repr expr) + | T.Empty -> Expr.empty () + | T.Literal s -> Expr.literal s + | T.Integer i -> Expr.integer i + | T.Concat elems -> Expr.concat (List.map elems ~f:(eval ~path_repr)) + | T.Function (name, args) -> + Expr.funct name (List.map args ~f:(eval ~path_repr)) + | T.Function' (name, args) -> + Expr.function' name (List.map args ~f:(eval ~path_repr)) + | T.Nvl elems -> Expr.nvl (List.map elems ~f:(eval ~path_repr)) + | T.Join (sep, args) -> Expr.join sep (List.map args ~f:(eval ~path_repr)) + | T.Window (name, group, sort) -> + Expr.window + (eval_window ~path_repr name) + (List.map group ~f:(eval ~path_repr)) + (List.map sort ~f:(eval ~path_repr)) + | T.BOperator (op, arg1, arg2) -> + Expr.boperator op (eval ~path_repr arg1) (eval ~path_repr arg2) + | T.GEquality (op, arg1, arg2) -> + Expr.gequality op (eval ~path_repr arg1) + (List.map arg2 ~f:(eval ~path_repr)) + | T.Path p -> Expr.path path_repr p + + and eval_window : + path_repr:'a Expr.path_repr -> 'a T.t T.window -> 'a Expr.repr T.window = + fun ~path_repr -> function + | Min a -> Min (eval ~path_repr a) + | Max a -> Max (eval ~path_repr a) + | Counter -> Counter + | Previous a -> Previous (eval ~path_repr a) + | Sum a -> Sum (eval ~path_repr a) +end diff --git a/lib/expression/t.ml b/lib/expression/t.ml new file mode 100644 index 0000000..7e61317 --- /dev/null +++ b/lib/expression/t.ml @@ -0,0 +1,153 @@ +open StdLabels + +type 'a window = + | Min of 'a + | Max of 'a + | Counter + | Previous of 'a + | Sum of 'a + +type 'a t = + | Empty + | Expr of 'a t + | Literal of string + | Integer of string + | Path of 'a + | Concat of 'a t list + | Function of string * 'a t list + | Nvl of 'a t list + | Join of string * 'a t list + | Window of ('a t window * 'a t list * 'a t list) + | BOperator of binary_operator * 'a t * 'a t + | GEquality of binary_operator * 'a t * 'a t list + | Function' of funct * 'a t list + +and binary_operator = + | Equal + | Different + | Add + | Minus + | Division + | LT + | GT + | And + | Or + +and funct = + | Upper + | Trim + +let name_of_function = function + | Upper -> "UPPER" + | Trim -> "TRIM" + +let name_of_operator = function + | Equal -> "=" + | Different -> "<>" + | Add -> "+" + | Minus -> "-" + | Division -> "/" + | LT -> "<" + | GT -> ">" + | And -> " and " + | Or -> " or " + +let name_of_window = function + | Min _ -> "min" + | Max _ -> "max" + | Counter -> "counter" + | Previous _ -> "previous" + | Sum _ -> "sum" + +let map_window : f:('a -> 'b) -> 'a window -> 'b window = + fun ~f -> function + | Min t -> Min (f t) + | Max t -> Max (f t) + | Counter -> Counter + | Previous t -> Previous (f t) + | Sum t -> Sum (f t) + +(** Extract the kind of the window function from the given name. *) +let window_of_name name opt = + match (name, opt) with + | "min", Some p -> Min p + | "max", Some p -> Max p + | "counter", None -> Counter + | "previous", Some p -> Previous p + | "sum", Some p -> Sum p + | _other -> raise Not_found + +let rec cmp : ('a -> 'a -> int) -> 'a t -> 'a t -> int = + fun f e1 e2 -> + match (e1, e2) with + | Empty, Empty -> 0 + | Literal l1, Literal l2 -> String.compare l1 l2 + | Integer l1, Integer l2 -> String.compare l1 l2 + | Path p1, Path p2 -> f p1 p2 + | Concat elems1, Concat elems2 | Nvl elems1, Nvl elems2 -> + List.compare ~cmp:(cmp f) elems1 elems2 + | Function (n1, elems1), Function (n2, elems2) -> + let name_cmp = String.compare n1 n2 in + if name_cmp = 0 then List.compare ~cmp:(cmp f) elems1 elems2 else name_cmp + | Window (s1, l11, l12), Window (s2, l21, l22) -> ( + match compare s1 s2 with + | 0 -> + let l1_cmp = List.compare ~cmp:(cmp f) l11 l21 in + if l1_cmp = 0 then List.compare ~cmp:(cmp f) l12 l22 else l1_cmp + | other -> other) + | BOperator (n1, arg11, arg12), BOperator (n2, arg21, arg22) -> begin + match compare n1 n2 with + | 0 -> begin + match cmp f arg11 arg21 with + | 0 -> cmp f arg12 arg22 + | other -> other + end + | other -> other + end + (* Any other case *) + | other1, other2 -> Stdlib.compare other1 other2 + +let fold_values : f:('b -> 'a -> 'b) -> init:'b -> 'a t -> 'b = + fun ~f ~init expression -> + let rec _f acc = function + | Empty | Literal _ | Integer _ -> acc + | Expr e -> _f acc e + | Path p -> f acc p + | Concat pp | Function' (_, pp) | Function (_, pp) | Nvl pp | Join (_, pp) + -> List.fold_left ~f:(fun acc a -> _f acc a) ~init:acc pp + | Window (window_f, pp1, pp2) -> + (* Each window function can have a distinct parameter first. *) + let acc' = + match window_f with + | Counter -> acc + | Min key | Max key | Previous key | Sum key -> _f acc key + in + let eval1 = List.fold_left ~f:(fun acc a -> _f acc a) ~init:acc' pp1 in + List.fold_left ~f:(fun acc a -> _f acc a) ~init:eval1 pp2 + | BOperator (_, arg1, arg2) -> _f (_f acc arg1) arg2 + | GEquality (_, arg1, arg2) -> + let eval1 = List.fold_left ~f:(fun acc a -> _f acc a) ~init:acc arg2 in + _f eval1 arg1 + in + _f init expression + +let map : type a b. f:(a -> b) -> a t -> b t = + fun ~f expression -> + let rec map = function + | Expr e -> Expr (map e) + | Empty -> Empty + | Literal s -> Literal s + | Integer i -> Integer i + | Path p -> Path (f p) + | Concat pp -> Concat (List.map ~f:map pp) + | Function' (name, pp) -> Function' (name, List.map ~f:map pp) + | Function (name, pp) -> Function (name, List.map ~f:map pp) + | Nvl pp -> Nvl (List.map ~f:map pp) + | Join (sep, pp) -> Join (sep, List.map ~f:map pp) + | Window (window_f, pp1, pp2) -> + let w = map_window ~f:map window_f in + Window (w, List.map ~f:map pp1, List.map ~f:map pp2) + | BOperator (n, arg1, arg2) -> BOperator (n, map arg1, map arg2) + | GEquality (n, arg1, args) -> GEquality (n, map arg1, List.map ~f:map args) + in + map expression diff --git a/lib/expression/t.mli b/lib/expression/t.mli new file mode 100644 index 0000000..840805d --- /dev/null +++ b/lib/expression/t.mli @@ -0,0 +1,54 @@ +type 'a window = + | Min of 'a + | Max of 'a + | Counter + | Previous of 'a + | Sum of 'a + +type 'a t = + | Empty + | Expr of 'a t + | Literal of string + | Integer of string + | Path of 'a + | Concat of 'a t list + | Function of string * 'a t list + | Nvl of 'a t list + | Join of string * 'a t list + | Window of ('a t window * 'a t list * 'a t list) + | BOperator of binary_operator * 'a t * 'a t + | GEquality of binary_operator * 'a t * 'a t list + | Function' of funct * 'a t list + +and binary_operator = + | Equal + | Different + | Add + | Minus + | Division + | LT + | GT + | And + | Or + +and funct = + | Upper + | Trim + +val cmp : ('a -> 'a -> int) -> 'a t -> 'a t -> int +(** Compare two expressions *) + +val fold_values : f:('b -> 'a -> 'b) -> init:'b -> 'a t -> 'b +(** Fold over all the path presents inside the expression. Used for example to + identify all the columns to extract from the file. + + The order is not guarantee to follow the order from the expression *) + +val map : f:('a -> 'b) -> 'a t -> 'b t +(** The map function. Mainly used in the configuration migration. *) + +val name_of_operator : binary_operator -> string +val name_of_window : 'a window -> string +val map_window : f:('a -> 'b) -> 'a window -> 'b window +val window_of_name : string -> 'a option -> 'a window +val name_of_function : funct -> string diff --git a/lib/expression/type_of.ml b/lib/expression/type_of.ml new file mode 100644 index 0000000..ce1a17e --- /dev/null +++ b/lib/expression/type_of.ml @@ -0,0 +1,150 @@ +(** + This module evaluate the type of an expression. + + The type is given with an analysis from all the component involved inside + the exrpssion. It is used inside the [query] module in order to check if one + type need conversion before being used. + *) + +open StdLabels + +module Lazy_Repr = + Compose.Expression + (Lazier.Make + (Repr.E)) + (struct + let v _ = "" + end) + +type t = ImportDataTypes.Types.t + +(** Fold over the list of parameters and ensure all the elements are typed in +the same way *) +let group' : t list -> t = + fun elements -> + List.fold_left elements ~init:None + ~f:(fun (acc : ImportDataTypes.Types.t option) v -> + match acc with + | None -> Some v + | Some t when t = v -> acc + | _ -> Some Extern) + |> Option.value ~default:ImportDataTypes.Types.None + +include Lazy_Repr.Make (struct + type nonrec t = t + type 'a repr = t + type 'a obs = ImportDataTypes.Types.t + type 'a path_repr = 'a -> unit + + let observe : 'a Repr.E.obs Lazy.t * 'a repr -> 'a obs = snd + + let empty : 'a Repr.E.obs Lazy.t -> 'a repr = + fun _ -> ImportDataTypes.Types.None + + let expr : 'a Repr.E.obs Lazy.t * 'a repr -> 'a Repr.E.obs Lazy.t -> 'a repr = + fun e _ -> snd e + + let literal : string -> 'a Repr.E.obs Lazy.t -> 'a repr = + fun _ _ -> ImportDataTypes.Types.String + + let integer : string -> 'a Repr.E.obs Lazy.t -> 'a repr = + fun _ _ -> ImportDataTypes.Types.Number + + let path : 'b path_repr -> 'b -> 'a Repr.E.obs Lazy.t -> 'a repr = + fun _ _ _ -> ImportDataTypes.Types.Extern + + let concat : + ('a Repr.E.obs Lazy.t * 'a repr) list -> 'a Repr.E.obs Lazy.t -> 'a repr = + fun _ _ -> ImportDataTypes.Types.String + + let window : + ('a Repr.E.obs Lazy.t * 'a repr) T.window -> + ('a Repr.E.obs Lazy.t * 'a repr) list -> + ('a Repr.E.obs Lazy.t * 'a repr) list -> + 'a Repr.E.obs Lazy.t -> + 'a repr = + fun name expressions order _ -> + ignore order; + ignore expressions; + match name with + | T.Counter | T.Max _ | T.Min _ | T.Sum _ -> Number + | T.Previous expr -> snd expr + + let nvl : + ('a Repr.E.obs Lazy.t * 'a repr) list -> 'a Repr.E.obs Lazy.t -> 'a repr = + fun v _ -> group' (List.map ~f:snd v) + + let join : + string -> + ('a Repr.E.obs Lazy.t * 'a repr) list -> + 'a Repr.E.obs Lazy.t -> + 'a repr = + fun _ _ _ -> ImportDataTypes.Types.String + + let boperator : + T.binary_operator -> + 'a Repr.E.obs Lazy.t * 'a repr -> + 'a Repr.E.obs Lazy.t * 'a repr -> + 'a Repr.E.obs Lazy.t -> + 'a repr = + fun name _ _ _ -> + match name with + | T.Equal | T.Different | T.LT | T.GT -> Bool + | T.Add | T.Minus -> Number + | T.Division -> Float + | T.And | T.Or -> Bool + + let gequality : + T.binary_operator -> + 'a Repr.E.obs Lazy.t * 'a repr -> + ('a Repr.E.obs Lazy.t * 'a repr) list -> + 'a Repr.E.obs Lazy.t -> + 'a repr = + fun name _ _ _ -> + match name with + | T.Equal | T.Different -> Bool + | _ -> None + + let function' : + T.funct -> + ('a Repr.E.obs Lazy.t * 'a repr) list -> + 'a Repr.E.obs Lazy.t -> + 'a repr = + fun name expressions _ -> + ignore expressions; + match name with + | Upper | Trim -> String + + let check : expected:t -> actual:t -> string -> 'a Repr.E.obs Lazy.t -> t = + fun ~expected ~actual subset expr -> + if actual = expected then actual + else + let expression = (Lazy.force expr) ~top:false in + raise (ImportErrors.TypeError { expression; subset; expected; actual }) + + let funct : + string -> + ('a Repr.E.obs Lazy.t * 'a repr) list -> + 'a Repr.E.obs Lazy.t -> + 'a repr = + fun name expressions repr -> + match name with + | "if" -> begin + match expressions with + | [] -> Extern + | (_, hd) :: arg1 :: _ when hd = Bool -> snd arg1 + | (_, hd) :: _ -> + let expected = ImportDataTypes.Types.Bool and actual = hd in + check ~expected ~actual "the predicate" repr + end + | _ -> Extern +end) + +let group : + ('a Lazier.Make(Repr.E).repr * t) list -> 'a Lazier.Make(Repr.E).repr * t = + fun v -> + let v' = group' (List.map v ~f:snd) in + let l = lazy (Repr.E.empty ()) in + (l, v') + +let arguments = group diff --git a/lib/expression/type_of.mli b/lib/expression/type_of.mli new file mode 100644 index 0000000..7a11582 --- /dev/null +++ b/lib/expression/type_of.mli @@ -0,0 +1,10 @@ +(** This module tries to identify the type of an expression. + +The references to data comming from the spreaedsheet cannot be evaluated and +marked as [Extern]. *) + +type t = ImportDataTypes.Types.t + +include Sym.SYM_CHUNK with type 'a obs = t and type 'a path_repr = 'a -> unit + +val group' : t list -> t -- cgit v1.2.3