From 6b377719c10d5ab3343fd5221f99a4a21008e25a Mon Sep 17 00:00:00 2001 From: Sébastien Dailly Date: Thu, 14 Mar 2024 08:26:58 +0100 Subject: Initial commit --- lib/configuration/dune | 29 ++++ lib/configuration/expression_lexer.mll | 91 +++++++++++ lib/configuration/expression_parser.messages | 123 +++++++++++++++ lib/configuration/expression_parser.mly | 185 +++++++++++++++++++++++ lib/configuration/importConf.ml | 90 +++++++++++ lib/configuration/importConf.mli | 23 +++ lib/configuration/of_json.ml | 134 +++++++++++++++++ lib/configuration/read_conf.ml | 216 +++++++++++++++++++++++++++ lib/configuration/syntax.ml | 88 +++++++++++ 9 files changed, 979 insertions(+) create mode 100755 lib/configuration/dune create mode 100644 lib/configuration/expression_lexer.mll create mode 100644 lib/configuration/expression_parser.messages create mode 100644 lib/configuration/expression_parser.mly create mode 100644 lib/configuration/importConf.ml create mode 100644 lib/configuration/importConf.mli create mode 100644 lib/configuration/of_json.ml create mode 100644 lib/configuration/read_conf.ml create mode 100644 lib/configuration/syntax.ml (limited to 'lib/configuration') diff --git a/lib/configuration/dune b/lib/configuration/dune new file mode 100755 index 0000000..27d31a6 --- /dev/null +++ b/lib/configuration/dune @@ -0,0 +1,29 @@ +(library + (name importConf) + (libraries + decoders + otoml + menhirLib + importCSV + yojson + re + helpers + importDataTypes + importExpression + importErrors + ) + +(preprocess (pps ppx_yojson_conv ppx_deriving.ord)) +) + +(rule + (targets expression_parser_messages.ml) + (deps expression_parser.messages expression_parser.mly) + (action (with-stdout-to %{targets} (run menhir --compile-errors %{deps})))) + +(menhir + (modules expression_parser) + (flags --table) +) + +(ocamllex expression_lexer) diff --git a/lib/configuration/expression_lexer.mll b/lib/configuration/expression_lexer.mll new file mode 100644 index 0000000..cbfc8dc --- /dev/null +++ b/lib/configuration/expression_lexer.mll @@ -0,0 +1,91 @@ +{ + open Expression_parser + module Expression = ImportExpression.T + + exception UnclosedQuote of { content: string ; line : int} +} + + +let spaces = [ ' ' '\t' ] +let letters = [^ '"' '\'' '(' ')' '[' ']' ':' '.' ',' '^' ' ' '\t' '\n' '\r' ] +let digit = [ '0'-'9' ] +let eol = [ '\r' '\n' ] + +let escaped = [ '\'' '\\'] + +rule token = parse +| eol { Lexing.new_line lexbuf; token lexbuf } +| spaces { token lexbuf } +| '\'' { + try read_quoted_string (Buffer.create 17) lexbuf + with Failure _ -> + let line = lexbuf.Lexing.lex_curr_p.pos_lnum + and content = Bytes.to_string lexbuf.Lexing.lex_buffer in + raise (UnclosedQuote {line; content}) +} +| '"' { read_dquoted_string (Buffer.create 17) lexbuf } +| '#' { skip_comment lexbuf } +| '(' { L_PAREN } +| ')' { R_PAREN } +| '[' { L_BRACKET } +| ']' { R_BRACKET } +| ':' { COLUMN } +| '.' { DOT } +| ',' { COMA } +| '^' { CONCAT_OPERATOR } +| '+' { BINARY_OPERATOR (Expression.Add) } +| '-' { BINARY_OPERATOR (Expression.Minus) } +| '/' { BINARY_OPERATOR (Expression.Division) } +| "and" { BOOL_OPERATOR (Expression.And) } +| "or" { BOOL_OPERATOR (Expression.Or) } +| '<' { INEQUALITY_OPERATOR (Expression.LT) } +| '>' { INEQUALITY_OPERATOR (Expression.GT) } +| "<>" { EQUALITY_OPERATOR (Expression.Different) } +| '=' { EQUALITY_OPERATOR (Expression.Equal) } +| digit+ as l { INTEGER l} +| '-' digit+ as l { INTEGER l} +| letters+ as l { IDENT l} +| eof { EOF } + +and skip_comment = parse + | [^ '\r' '\n' ] + { skip_comment lexbuf } + | eol + { token lexbuf } + +(* Read the content until we got another one quote *) +and read_quoted_string buf = parse + | [^ '\'' '\\' ]+ + { Buffer.add_string buf (Lexing.lexeme lexbuf); + read_quoted_string buf lexbuf + } + | "\\\'" + { Buffer.add_char buf '\''; + read_quoted_string buf lexbuf + } + | '\\' + { Buffer.add_char buf '\\'; + read_quoted_string buf lexbuf + } + | '\'' + { LITERAL (Buffer.contents buf) + } + +(* Read the content until we got another one quote *) +and read_dquoted_string buf = parse + | [^ '"' '\\' ]+ + { Buffer.add_string buf (Lexing.lexeme lexbuf); + read_dquoted_string buf lexbuf + } + | "\\\"" + { Buffer.add_char buf '"'; + read_dquoted_string buf lexbuf + } + | '\\' + { Buffer.add_char buf '\\'; + read_dquoted_string buf lexbuf + } + | '"' + { + LITERAL (Buffer.contents buf) + } diff --git a/lib/configuration/expression_parser.messages b/lib/configuration/expression_parser.messages new file mode 100644 index 0000000..ff7e757 --- /dev/null +++ b/lib/configuration/expression_parser.messages @@ -0,0 +1,123 @@ +column_expr: R_PAREN +## + +Invalid expression + +path_expr: IDENT R_PAREN +column_expr: IDENT R_PAREN +column_expr: IDENT L_PAREN IDENT R_PAREN +path_expr: IDENT L_PAREN IDENT R_PAREN +## + +Misplaced function. Did you forgot to quote the text ? + +column_expr: IDENT L_PAREN EOF +path_expr: IDENT L_PAREN EOF +## + +Uncomplete expression + +column_expr: COLUMN R_PAREN +path_expr: COLUMN R_PAREN +## + +The path is missing. + +column_expr: LITERAL CONCAT_OPERATOR LITERAL L_PAREN +path_expr: LITERAL CONCAT_OPERATOR LITERAL L_PAREN +path_expr: LITERAL L_PAREN +column_expr: LITERAL CONCAT_OPERATOR LITERAL BINARY_OPERATOR LITERAL L_PAREN +path_expr: LITERAL CONCAT_OPERATOR LITERAL BINARY_OPERATOR LITERAL L_PAREN +column_expr: LITERAL CONCAT_OPERATOR LITERAL CONCAT_OPERATOR LITERAL L_PAREN +path_expr: LITERAL CONCAT_OPERATOR LITERAL CONCAT_OPERATOR LITERAL L_PAREN +column_expr: IDENT L_PAREN L_PAREN LITERAL L_PAREN +path_expr: IDENT L_PAREN LITERAL L_PAREN +## + +A text is given where it was expected a function. + +column_expr: LITERAL CONCAT_OPERATOR IDENT L_PAREN L_BRACKET R_BRACKET COMA R_PAREN +column_expr: IDENT L_PAREN L_BRACKET R_BRACKET COMA R_PAREN +path_expr: IDENT L_PAREN L_BRACKET R_BRACKET COMA R_PAREN +path_expr: LITERAL BINARY_OPERATOR IDENT L_PAREN L_BRACKET R_BRACKET COMA R_PAREN +## +## Ends in an error in state: 61. +## +## separated_nonempty_list(COMA,expr_(path_,COMA)) -> expr_(path_,COMA) COMA . separated_nonempty_list(COMA,expr_(path_,COMA)) [ R_PAREN ] +## +## The known suffix of the stack is as follows: +## expr_(path_,COMA) COMA +## + +Uncomplete expression + +column_expr: IDENT L_PAREN LITERAL COMA R_PAREN +path_expr: IDENT L_PAREN LITERAL COMA R_PAREN + +Misplaced coma + +column_expr: IDENT L_PAREN LITERAL EOF +column_expr: LITERAL CONCAT_OPERATOR IDENT L_PAREN EOF +path_expr: LITERAL CONCAT_OPERATOR IDENT L_PAREN EOF +path_expr: IDENT L_PAREN L_PAREN LITERAL EOF +## + +Uncomplete expression. Did you forgot a ')' ? + + +column_expr: LITERAL R_PAREN +path_expr: LITERAL BINARY_OPERATOR LITERAL R_PAREN +## + +Invalid expression + +path_expr: COLUMN IDENT L_PAREN +## +# Also apply to : +# path_expr: COLUMN IDENT COLUMN + +Misplaced path + +path_expr: COLUMN IDENT DOT R_PAREN +## + +Incomplete path: the table is missing + +column_expr: INTEGER BINARY_OPERATOR INTEGER R_PAREN +## + +Unbalanced parens. Did you wanted to write ')' instead of '(' ? + + +path_expr: IDENT L_PAREN L_BRACKET LITERAL R_PAREN + +Unbalanced brackets. Did you wanted to write ']' instead of ')' ? + +column_expr: IDENT L_PAREN LITERAL CONCAT_OPERATOR R_PAREN +path_expr: IDENT L_PAREN LITERAL CONCAT_OPERATOR R_PAREN +column_expr: LITERAL CONCAT_OPERATOR R_PAREN +path_expr: LITERAL CONCAT_OPERATOR R_PAREN +column_expr: IDENT L_PAREN LITERAL BINARY_OPERATOR R_PAREN +path_expr: IDENT L_PAREN LITERAL BINARY_OPERATOR R_PAREN +column_expr: LITERAL BINARY_OPERATOR R_PAREN +path_expr: LITERAL BINARY_OPERATOR R_PAREN +column_expr: INTEGER EQUALITY_OPERATOR R_PAREN +path_expr: INTEGER EQUALITY_OPERATOR R_PAREN +column_expr: INTEGER INEQUALITY_OPERATOR R_PAREN +path_expr: INTEGER INEQUALITY_OPERATOR R_PAREN +column_expr: INTEGER EQUALITY_OPERATOR INTEGER INEQUALITY_OPERATOR R_PAREN +path_expr: INTEGER EQUALITY_OPERATOR INTEGER INEQUALITY_OPERATOR R_PAREN +column_expr: INTEGER EQUALITY_OPERATOR INTEGER EQUALITY_OPERATOR R_PAREN +path_expr: INTEGER EQUALITY_OPERATOR INTEGER EQUALITY_OPERATOR R_PAREN + +The operator expect two arguments. Only one is given + +column_expr: IDENT L_PAREN L_BRACKET R_PAREN +path_expr: IDENT L_PAREN L_BRACKET R_PAREN +column_expr: IDENT L_PAREN L_BRACKET LITERAL R_PAREN +column_expr: LITERAL CONCAT_OPERATOR IDENT L_PAREN L_BRACKET R_BRACKET COMA L_BRACKET R_BRACKET R_BRACKET +path_expr: INTEGER CONCAT_OPERATOR IDENT L_PAREN L_BRACKET R_BRACKET COMA L_BRACKET R_BRACKET R_BRACKET +path_expr: IDENT L_PAREN L_BRACKET R_BRACKET COMA L_BRACKET R_BRACKET R_BRACKET + +Mix between brackets and parens. + diff --git a/lib/configuration/expression_parser.mly b/lib/configuration/expression_parser.mly new file mode 100644 index 0000000..1304c4d --- /dev/null +++ b/lib/configuration/expression_parser.mly @@ -0,0 +1,185 @@ +%token IDENT +%token L_PAREN +%token R_PAREN +%token L_BRACKET R_BRACKET +%token COLUMN +%token DOT +%token LITERAL +%token INTEGER +%token COMA +%token EOF +%token CONCAT_OPERATOR + +%token BINARY_OPERATOR +%token INEQUALITY_OPERATOR +%token EQUALITY_OPERATOR +%token BOOL_OPERATOR + +%start path_expr +%start column_expr + +%right BOOL_OPERATOR +%right INEQUALITY_OPERATOR EQUALITY_OPERATOR +%right CONCAT_OPERATOR BINARY_OPERATOR + +%{ + + let function_of_name param f = + match (String.lowercase_ascii f, param) with + | "nvl", _ -> + ImportExpression.T.Nvl param + | "join", (ImportExpression.T.Literal sep:: tl) -> + ImportExpression.T.Join (sep, tl) + | "join", (ImportExpression.T.Empty:: tl) -> + ImportExpression.T.Join ("", tl) + | "upper", _ -> + ImportExpression.T.Function' (ImportExpression.T.Upper, param) + | "trim", _ -> + ImportExpression.T.Function' (ImportExpression.T.Trim, param) + | other, _ -> + ImportExpression.T.Function (other, param) + +%} + +%% + +path_expr: + | expr_(path_, EOF) EOF { $1 } + | EOF { ImportExpression.T.Empty } +column_expr: + | expr_(column_, EOF) EOF { $1 } + | EOF { ImportExpression.T.Empty } + + +path_: + | COLUMN + column = IDENT + { ImportExpression.T.Path + Syntax.Path.{ alias = None + ; column = ImportCSV.Csv.column_of_string column + } + } + + | COLUMN + table = IDENT + DOT + column = IDENT + { ImportExpression.T.Path + Syntax.Path.{ alias = Some table + ; column = ImportCSV.Csv.column_of_string column} + } + +column_: + | COLUMN + column = IDENT + { try ImportExpression.T.Path (ImportCSV.Csv.column_of_string column) + with _ -> ImportExpression.T.Literal column } + +arguments(PATH): + | L_PAREN + expr = separated_list(COMA, expr_(PATH, COMA)) + R_PAREN + { expr } + +group(PATH): + | L_BRACKET + expr = separated_list(COMA, expr_(PATH, COMA)) + R_BRACKET + { expr } + +fixed(PATH): + | d = INTEGER + { ImportExpression.T.Integer d } + | l = LITERAL + { + if String.equal String.empty l then + ImportExpression.T.Empty + else + ImportExpression.T.Literal l + } + +%inline boperator: + | e = BINARY_OPERATOR { e } + | e = INEQUALITY_OPERATOR { e } + | e = EQUALITY_OPERATOR { e } + | e = BOOL_OPERATOR { e } + +(* The expression evaluation receveive in parameters : + 1. the way to buidl a path, as we have two distinct ways to build them in + the case of externals (the external_key does not allow a table name) + 2. a phantom type telling wich kind of element will end the expression. + This can be EOF for the root expression, or COMA when inside a function. + This prevent merlin to optimize thoses two path, and allow more precise + error messages. *) +expr_(PATH, ENDING_PHANTOM): + | L_PAREN + e = expr_(PATH, R_PAREN) + R_PAREN + { ImportExpression.T.Expr e + } + | + p1 = expr_(PATH, ENDING_PHANTOM) + CONCAT_OPERATOR + p2 = expr_(PATH, COMA) + { match p2 with + | ImportExpression.T.Concat args -> ImportExpression.T.Concat (p1::args) + | _ -> ImportExpression.T.Concat (p1::p2::[]) + } + | p1 = expr_(PATH, ENDING_PHANTOM) + + op = boperator + p2 = expr_(PATH, COMA) + { ImportExpression.T.BOperator (op, p1, p2) } + + | p1 = expr_(PATH, ENDING_PHANTOM) + op = EQUALITY_OPERATOR + p2 = group(PATH) + { ImportExpression.T.GEquality(op, p1, p2) } + + + + | p = PATH + { p } + | f = fixed(PATH) + { f } + | s = IDENT + args = arguments(PATH) + { function_of_name args s } + | + s = IDENT + L_PAREN + opt_arg = opt_arg(PATH, COMA)? + args1 = group(PATH) + COMA + args2 = group(PATH) + R_PAREN + { let window_name = ImportExpression.T.window_of_name s opt_arg in + ImportExpression.T.Window (window_name, args1, args2) } +(* + | (* This case is here to describe a window function which has 2 arguments + level. + I’m not completely satisfied with it, as it prevent the ability to + create a exprpression block with parens arround. *) + s = IDENT + L_PAREN + opt_arg = opt_arg(PATH, COMA)? + args1 = arguments(PATH) + COMA + args2 = arguments(PATH) + R_PAREN + { let window_name = ImportExpression.T.window_of_name s opt_arg in + let expr = ImportExpression.T.Window (window_name, args1, args2) in + + let expr_repr = ImportExpression.Repr.repr ~top:true (fun _ -> "") + expr in + Printf.printf "Deprecated syntax in \"%s\" use [] instead of ()\n" expr_repr; + + + expr + } +*) + +opt_arg(PATH, SEP): + | expr = expr_(PATH, COMA) + SEP + { expr } diff --git a/lib/configuration/importConf.ml b/lib/configuration/importConf.ml new file mode 100644 index 0000000..586be3c --- /dev/null +++ b/lib/configuration/importConf.ml @@ -0,0 +1,90 @@ +open StdLabels +module Syntax = Syntax +module Table = ImportDataTypes.Table +module Path = ImportDataTypes.Path +module T = Read_conf +module Expression = ImportExpression.T + +let current_syntax = 1 + +let t_of_yojson : Yojson.Safe.t -> Syntax.t = + fun json -> + let keys = Yojson.Safe.Util.keys json in + let version = + match List.find_opt keys ~f:(String.equal "version") with + | None -> + Printf.printf + "No version given. Your setup may break in the future.\n\ + Please add « \"version\":%d » in your configuration.\n\n" + current_syntax; + `Int 1 + | Some _ -> Yojson.Safe.Util.member "version" json + in + + match version with + | `Int 1 -> Of_json.t_of_yojson json + | other -> + Printf.eprintf "Unsuported version : %s\n" (Yojson.Safe.to_string other); + exit 1 + +module TomlReader = Read_conf.Make (Helpers.Toml.Decode) + +let t_of_toml : Otoml.t -> (Syntax.t, string) result = + fun toml -> + let version = + Otoml.find toml (Otoml.get_integer ~strict:false) [ "version" ] + in + match version with + | 1 -> TomlReader.read toml + | _ -> + Printf.eprintf "Unsuported version : %d\n" version; + exit 1 + +let dummy_conf = + Syntax. + { + source = { file = ""; tab = 0; name = "" }; + version = 1; + externals = []; + columns = []; + filters = []; + sort = []; + uniq = []; + } + +let get_table_for_name : Syntax.t -> string option -> Table.t = + fun conf name -> + match name with + | None -> conf.source + | Some name -> + if String.equal name conf.source.name then conf.source + else + let ext = + List.find conf.externals ~f:(fun (ext : Syntax.extern) -> + String.equal name ext.target.name) + in + ext.target + +let root_table : Syntax.t -> Table.t = fun conf -> conf.source + +let get_dependancies_for_table : Syntax.t -> Table.t -> Syntax.extern list = + fun conf source -> + let is_root = source = conf.source in + + List.filter conf.externals ~f:(fun (ext : Syntax.extern) -> + (* Enumerate the intern_key and check the source pointed by each column *) + Expression.fold_values ext.intern_key ~init:false ~f:(fun acc expr -> + if acc then acc + else + match expr.Syntax.Path.alias with + | Some v -> String.equal v source.name + | None -> is_root)) + +let print_path_expression t = ImportExpression.Repr.repr Path.repr t + +let print_extern t = + let toml = Syntax.toml_of_extern t in + Otoml.Printer.to_string toml + +let expression_from_string s = + Read_conf.ExpressionParser.of_string Read_conf.ExpressionParser.path s diff --git a/lib/configuration/importConf.mli b/lib/configuration/importConf.mli new file mode 100644 index 0000000..3a8ae75 --- /dev/null +++ b/lib/configuration/importConf.mli @@ -0,0 +1,23 @@ +module Syntax = Syntax +module Table = ImportDataTypes.Table +module Path = ImportDataTypes.Path + +val dummy_conf : Syntax.t + +val root_table : Syntax.t -> Table.t +(** Get the root table, this table is the main table to load and each line in + this table will be processed *) + +val t_of_yojson : Yojson.Safe.t -> Syntax.t +val t_of_toml : Otoml.t -> (Syntax.t, string) result +val get_table_for_name : Syntax.t -> string option -> Table.t + +val get_dependancies_for_table : Syntax.t -> Table.t -> Syntax.extern list +(** Get all the externals refered by the source *) + +val print_path_expression : Path.t ImportExpression.T.t -> string + +val expression_from_string : + string -> (Path.t ImportExpression.T.t, string) result + +val print_extern : Syntax.extern -> string diff --git a/lib/configuration/of_json.ml b/lib/configuration/of_json.ml new file mode 100644 index 0000000..f9171b9 --- /dev/null +++ b/lib/configuration/of_json.ml @@ -0,0 +1,134 @@ +open StdLabels +module Table = ImportDataTypes.Table +module Path = ImportDataTypes.Path +module Expression = ImportExpression.T + +open Ppx_yojson_conv_lib.Yojson_conv.Primitives + +let current_syntax = 1 + +let rec expression_of_yojson : + (Yojson.Safe.t -> 'a) -> Yojson.Safe.t -> 'a Expression.t = + fun f expr -> + match expr with + | `Null -> Empty + | `List l -> Concat (List.map ~f:(expression_of_yojson f) l) + | `String s as json -> ( + try Path (f json) with + | _ -> Literal s) + | `Assoc [ (fn, `List [ `List l1; `List l2 ]) ] + when String.equal "counter" (String.lowercase_ascii fn) -> + Window + ( Expression.Counter, + List.map ~f:(expression_of_yojson f) l1, + List.map ~f:(expression_of_yojson f) l2 ) + | `Assoc [ (fn, `List [ expr1; `List l2; `List l3 ]) ] + when String.equal "previous" (String.lowercase_ascii fn) -> + Window + ( Expression.Previous (expression_of_yojson f expr1), + List.map ~f:(expression_of_yojson f) l2, + List.map ~f:(expression_of_yojson f) l3 ) + | `Assoc [ (fn, `List l) ] when String.equal "nvl" (String.lowercase_ascii fn) + -> Nvl (List.map ~f:(expression_of_yojson f) l) + | `Assoc [ (fn, `List l) ] -> + Function + (String.lowercase_ascii fn, List.map ~f:(expression_of_yojson f) l) + | json -> ( + try Path (f json) with + | _ -> + let str_json = Yojson.Safe.pretty_to_string json in + raise + (ImportErrors.JsonError { json = str_json; element = "Expression" }) + ) + +type 'a expression = 'a Expression.t +type column = Path.column + +let column_of_yojson : Yojson.Safe.t -> int = function + | `Int i -> i + | `String s -> ImportCSV.Csv.column_of_string s + | _ -> raise (Invalid_argument "column") + +let yojson_of_column i = `String (ImportCSV.Csv.column_to_string i) + +type path = Syntax.Path.t = { + alias : string option; [@default None] [@yojson_drop_default ( = )] + (* External file to load, when the information is missing, load in + the current file *) + column : column; +} +[@@deriving of_yojson] + +let path_of_yojson : Yojson.Safe.t -> path = function + | `String s -> + Scanf.sscanf s ":%s@.%s" (fun table column -> + if String.equal column "" then + { alias = None; column = ImportCSV.Csv.column_of_string table } + else + { + alias = Some table; + column = ImportCSV.Csv.column_of_string column; + }) + | other -> path_of_yojson other + +let yojson_of_path : path -> Yojson.Safe.t = + fun { alias; column } -> + let table = + match alias with + | None -> "" + | Some table -> String.cat table "." + in + + `String + (String.concat ~sep:"" + [ ":"; table; ImportCSV.Csv.column_to_string column ]) + +type table = Table.t = { + file : string; + tab : int; [@default 1] [@yojson_drop_default ( = )] + name : string; +} +[@@deriving of_yojson] + +type extern = { + source : string option; [@default None] [@yojson_drop_default ( = )] + intern_key : column expression; + target : table; + extern_key : column expression; + allow_missing : bool; [@default false] [@yojson_drop_default ( = )] + match_rule : string option; [@default None] [@yojson_drop_default ( = )] +} +[@@deriving of_yojson] + +type syntax_v1_extern = Syntax.extern + +let syntax_v1_extern_of_yojson yojson = + let e = extern_of_yojson yojson in + let intern_key : path Expression.t = + Expression.map e.intern_key ~f:(fun column -> + Syntax.Path.{ column; alias = e.source }) + in + Syntax. + { + extern_key = e.extern_key; + intern_key; + target = e.target; + allow_missing = e.allow_missing; + match_rule = e.match_rule; + } + +type predicate = unit + +let predicate_of_yojson _ = () +let yojson_of_predicate () = `Null + +type t = Syntax.t = { + version : int; [@default current_syntax] + source : table; + externals : syntax_v1_extern list; [@default []] + columns : path expression list; + filters : path expression list; [@default []] [@yojson_drop_default ( = )] + sort : path expression list; [@default []] [@yojson_drop_default ( = )] + uniq : path expression list; [@default []] [@yojson_drop_default ( = )] +} +[@@deriving of_yojson] diff --git a/lib/configuration/read_conf.ml b/lib/configuration/read_conf.ml new file mode 100644 index 0000000..8d467a5 --- /dev/null +++ b/lib/configuration/read_conf.ml @@ -0,0 +1,216 @@ +open StdLabels +module Table = ImportDataTypes.Table +module Path = ImportDataTypes.Path + +module ExpressionParser : sig + type 'a path_builder + + val path : Path.t path_builder + val column : Path.column path_builder + + val of_string : + 'a path_builder -> string -> ('a ImportExpression.T.t, string) result +end = struct + module MI = Expression_parser.MenhirInterpreter + module E = MenhirLib.ErrorReports + module L = MenhirLib.LexerUtil + + type error = { + message : string; + start_line : int; + start_pos : int; + end_pos : int; + } + + let range_message start_pos end_pos message = + let start_c = start_pos.Lexing.pos_cnum - start_pos.Lexing.pos_bol + and end_c = end_pos.Lexing.pos_cnum - end_pos.Lexing.pos_bol in + { + message; + start_line = start_pos.Lexing.pos_bol; + start_pos = start_c; + end_pos = end_c; + } + + (** Extract the line in error from the whole expression, and print some + characters just under the faulty part *) + let get_line_error : error -> string -> string = + fun error content -> + let sub_text = + try + let end_pos = String.index_from content error.start_line '\n' in + + String.sub content ~pos:error.start_line + ~len:(end_pos - error.start_line) + with + | Not_found -> + (* There is no new line, extract the ending part *) + let len = String.length content - error.start_line in + String.sub content ~pos:error.start_line ~len + in + (* I’m not sure how to produce it, but the error may be over two lines. + This line is here to prevent the underline to overflow. *) + let stop_pos = min error.end_pos (String.length sub_text) in + let error_length = stop_pos - error.start_pos in + String.concat ~sep:"" + [ + sub_text; + "\n"; + String.make error.start_pos ' '; + String.make error_length '^'; + ] + + let get_parse_error default_position env : error = + match MI.stack env with + | (lazy Nil) -> + range_message default_position.Lexing.lex_start_p + default_position.Lexing.lex_curr_p "Invalid syntax\n" + | (lazy (Cons (MI.Element (state, _, start_pos, end_pos), _))) -> + let message = + try Expression_parser_messages.message (MI.number state) with + | Not_found -> "Invalid syntax (no specific message for this eror)\n" + in + + range_message start_pos end_pos message + + type 'a path_builder = + Lexing.position -> 'a ImportExpression.T.t MI.checkpoint + + let rec _parse lexbuf (checkpoint : 'a ImportExpression.T.t MI.checkpoint) = + match checkpoint with + | MI.InputNeeded _env -> + let token = Expression_lexer.token lexbuf in + let startp = lexbuf.lex_start_p and endp = lexbuf.lex_curr_p in + let checkpoint = MI.offer checkpoint (token, startp, endp) in + _parse lexbuf checkpoint + | MI.Shifting _ | MI.AboutToReduce _ -> + let checkpoint = MI.resume checkpoint in + _parse lexbuf checkpoint + | MI.HandlingError _env -> + let err = get_parse_error lexbuf _env in + Error err + | MI.Accepted v -> Ok v + | MI.Rejected -> + let err = + range_message lexbuf.lex_start_p lexbuf.lex_curr_p + "invalid syntax (parser rejected the input)" + in + Error err + + let of_string : + 'a path_builder -> string -> ('a ImportExpression.T.t, string) result = + fun f str_expression -> + try + let lexbuf = Lexing.from_string str_expression in + let init = f lexbuf.lex_curr_p in + match _parse lexbuf init with + | Ok res -> Ok res + | Error e -> + let message = + String.concat ~sep:"\n" + [ e.message; get_line_error e str_expression ] + in + Error message + with + | Expression_lexer.UnclosedQuote { line; content } -> + let message = + Printf.sprintf "Unclosed quote at line %d : \"%s\"" line content + in + Error message + | e -> + let message = Printexc.to_string e in + Error message + + let path = Expression_parser.Incremental.path_expr + let column = Expression_parser.Incremental.column_expr +end + +module Make (S : Decoders.Decode.S) = struct + let ( let* ) = S.( let* ) + let ( and* ) = S.( and* ) + let ( >>= ) = S.( >>= ) + let ( >|= ) = S.( >|= ) + + class loader = + object (self) + method parse_expression : type a. + a ExpressionParser.path_builder -> + S.value -> + (a ImportExpression.T.t, S.value Decoders.Error.t) result = + fun path -> + S.string >>= fun v -> + match ExpressionParser.of_string path v with + | Ok expr -> S.succeed expr + | Error e -> S.fail_with Decoders.Error.(make e) + + method source = + let* file = S.field "file" S.string + and* name = S.field "name" S.string + and* tab = S.field_opt_or ~default:1 "tab" S.int in + S.succeed { Table.file; name; tab } + + method external_ name = + let* intern_key = + S.field "intern_key" (self#parse_expression ExpressionParser.path) + and* extern_key = + S.field "extern_key" (self#parse_expression ExpressionParser.column) + and* file = S.field "file" S.string + and* tab = S.field_opt_or ~default:1 "tab" S.int + and* allow_missing = + S.field_opt_or ~default:false "allow_missing" S.bool + in + + S.succeed + Syntax. + { + intern_key; + extern_key; + target = { name; file; tab }; + allow_missing; + match_rule = None; + } + + method sheet = + let* columns = + S.field "columns" + @@ S.list (self#parse_expression ExpressionParser.path) + and* filters = + S.field_opt_or ~default:[] "filters" + @@ S.list (self#parse_expression ExpressionParser.path) + and* sort = + S.field_opt_or ~default:[] "sort" + @@ S.list (self#parse_expression ExpressionParser.path) + and* uniq = + S.field_opt_or ~default:[] "uniq" + @@ S.list (self#parse_expression ExpressionParser.path) + in + S.succeed @@ fun version source externals -> + Syntax.{ version; source; externals; columns; filters; sort; uniq } + + method conf = + let* source = S.field "source" self#source + and* externals = + S.field_opt_or ~default:[] "externals" + (S.key_value_pairs_seq self#external_) + in + let* sheet = + S.field "sheet" self#sheet >|= fun v -> v 1 source externals + in + + S.succeed sheet + end + + let read_file file = + S.decode_file (new loader)#conf file + |> Result.map_error (fun v -> + let formatter = Format.str_formatter in + Format.fprintf formatter "%a@." S.pp_error v; + Format.flush_str_formatter ()) + + let read toml = + S.decode_value (new loader)#conf toml + |> Result.map_error (fun v -> + let formatter = Format.str_formatter in + Format.fprintf formatter "%a@." S.pp_error v; + Format.flush_str_formatter ()) +end diff --git a/lib/configuration/syntax.ml b/lib/configuration/syntax.ml new file mode 100644 index 0000000..8efdc59 --- /dev/null +++ b/lib/configuration/syntax.ml @@ -0,0 +1,88 @@ +open StdLabels +module E = ImportExpression.T +module Table = ImportDataTypes.Table +module Path = ImportDataTypes.Path + +let toml_of_table Table.{ file; tab; name } = + let values = [ ("file", Otoml.string file); ("name", Otoml.string name) ] in + let values = + match tab with + | 1 -> values + | tab -> ("tab", Otoml.integer tab) :: values + in + + Otoml.table values + +type extern = { + intern_key : Path.t E.t; + target : Table.t; + extern_key : Path.column E.t; + allow_missing : bool; + match_rule : string option; +} + +let toml_of_extern extern = + let values = + [ + ( "intern_key", + Otoml.string + @@ ImportExpression.Repr.repr ~top:true Path.repr extern.intern_key ); + ( "extern_key", + Otoml.string + @@ ImportExpression.Repr.repr ~top:true + (fun v -> ":" ^ ImportCSV.Csv.column_to_string v) + extern.extern_key ); + ("file", Otoml.string extern.target.file); + ("allow_missing", Otoml.boolean extern.allow_missing); + ] + in + + let values = + match extern.target.tab with + | 1 -> values + | tab -> ("tab", Otoml.integer tab) :: values + in + + Otoml.table values + +let toml_of_externs externs = + List.map externs ~f:(fun e -> (e.target.name, toml_of_extern e)) + |> Otoml.table + +type t = { + version : int; + source : Table.t; + externals : extern list; + columns : Path.t E.t list; + filters : Path.t E.t list; + sort : Path.t E.t list; + uniq : Path.t E.t list; +} + +let repr t = + let repr_expression_list l = + Otoml.array + (List.map l ~f:(fun v -> + Otoml.string (ImportExpression.Repr.repr ~top:true Path.repr v))) + in + + let sheet = + Otoml.table + [ + ("columns", repr_expression_list t.columns); + ("filters", repr_expression_list t.filters); + ("sort", repr_expression_list t.sort); + ("uniq", repr_expression_list t.uniq); + ] + in + + let values = + [ + ("version", Otoml.integer t.version); + ("source", toml_of_table t.source); + ("externals", toml_of_externs t.externals); + ("sheet", sheet); + ] + in + + Otoml.table values -- cgit v1.2.3