From ef312564ca84a2b49fc291434d8fb2f8501bb618 Mon Sep 17 00:00:00 2001 From: Sébastien Dailly Date: Tue, 15 Nov 2016 13:00:01 +0100 Subject: Initial commit --- odf/odf.ml | 311 +++++++++++++++++++++++++++++++++++++++++++ odf/odf_ExpressionLexer.mll | 88 ++++++++++++ odf/odf_ExpressionParser.mly | 92 +++++++++++++ odf/odf_ns.ml | 95 +++++++++++++ 4 files changed, 586 insertions(+) create mode 100755 odf/odf.ml create mode 100755 odf/odf_ExpressionLexer.mll create mode 100755 odf/odf_ExpressionParser.mly create mode 100755 odf/odf_ns.ml (limited to 'odf') diff --git a/odf/odf.ml b/odf/odf.ml new file mode 100755 index 0000000..cfbd964 --- /dev/null +++ b/odf/odf.ml @@ -0,0 +1,311 @@ +module Xml = Ezxmlm +module T = Tools +module NS = Odf_ns + +let u = UTF8.from_utf8string + +type t + +let load_attrs attrs = + + let _load t = function + | (("urn:oasis:names:tc:opendocument:xmlns:table:1.0", "formula"), x) -> + T.Tuple3.replace1 (Some x) t + | (("urn:oasis:names:tc:opendocument:xmlns:office:1.0", "value"), x) -> + T.Tuple3.replace2 (Some x) t + | (("urn:oasis:names:tc:opendocument:xmlns:office:1.0", "date-value"), x) -> + T.Tuple3.replace2 (Some x) t + | (("urn:oasis:names:tc:opendocument:xmlns:table:1.0", "number-columns-repeated"), x) -> + T.Tuple3.replace3 (Some x) t + | _ -> t + in List.fold_left _load (None, None, None) attrs + +let load_formula formula = + let lineBuffer = Lexing.from_string formula in + Expression.Formula ( + Expression.Expression ( + Odf_ExpressionParser.value Odf_ExpressionLexer.read lineBuffer)) + +let load_content content = begin function + | "float" -> Expression.Basic ( + ScTypes.Num ( + (Tools.Num.of_float_string content), Some (u @@ Tools.String.filter_float content))) + | "date" -> Expression.Basic ( + ScTypes.Date ( + Tools.Date.from_string content)) + | _ -> Expression.Basic ( + ScTypes.Str ( + UTF8.from_utf8string content)) +end + +let load_cell sheet cell_num row_num changed (attrs, cell) = begin + + let attributes = load_attrs attrs in + let repetition = match T.Tuple3.thd attributes with + | None -> 1 + | Some x -> int_of_string x in + + let vtype = + try List.assoc NS.ovalue_type_attr attrs + with Not_found -> "" in + let expression, new_change = begin match attributes with + | Some x, _, _ -> load_formula x, true + | _, Some x, _ -> + (load_content x vtype) , true + | _ -> + begin try + Xml.member "p" cell + |> Xml.data_to_string + |> fun x -> (load_content x vtype, true) + with Xml.Tag_not_found _ -> Expression.Basic ScTypes.Undefined, false + end + end in + + if new_change then ( + for i = 1 to repetition do + incr cell_num; + sheet := snd @@ Sheet.Raw.add (!cell_num, !row_num) (Expression.load_expr expression) !sheet + done + ) else ( + cell_num := !cell_num + repetition + ); + changed || new_change +end + +let load_row sheet row_num (attrs, row) = begin + + let repetition = + try int_of_string @@ List.assoc (NS.table, "number-rows-repeated") attrs + with Not_found -> 1 in + + let cells = Xml.members_with_attr "table-cell" row in + + try + for i = 1 to repetition do + incr row_num; + let cell_num = ref 0 in + if not (List.fold_left (load_cell sheet cell_num row_num) false cells) then + (* No changes on the whole row. Do not repeat, and break the loop *) + raise Not_found + done + with Not_found -> row_num := !row_num + repetition - 1 +end + +let load_xml input = begin + + let sheet = ref Sheet.Raw.create in + let row_num = ref 0 in + + let xml = + Xmlm.make_input ~enc:(Some `UTF_8) (`Channel input) + |> Xml.from_input + |> snd in + let rows = Xml.member "document-content" (xml::[]) + |> Xml.member "body" + |> Xml.member "spreadsheet" + |> Xml.member "table" + |> Xml.members_with_attr "table-row" in + List.iter (fun x -> (load_row sheet row_num) x) rows; + !sheet +end + + +let load file = + let tmp_file = Filename.temp_file "content" ".xml" in + Unix.unlink tmp_file; + + let zip = Zip.open_in file in + let content = Zip.find_entry zip "content.xml" in + Zip.copy_entry_to_file zip content tmp_file; + + let input = open_in_bin tmp_file in + Tools.try_finally (fun () -> + load_xml input + )( fun () -> + close_in input; + Unix.unlink tmp_file; + Zip.close_in zip + ) + + +let write_type ovalue_type cvalue_type attrs output value = begin + let attrs = + (NS.ovalue_type_attr, ovalue_type):: + (NS.cvalue_type_attr, cvalue_type):: + attrs in + + Xmlm.output output (`El_start (NS.table_cell_node, attrs)); + Xmlm.output output (`El_start (NS.text_node, [])); + Xmlm.output output (`Data value); + Xmlm.output output `El_end; + Xmlm.output output `El_end; +end + +(* Writers for differents types *) +let write_num = write_type "float" "float" +let write_str = write_type "string" "string" +let write_bool = write_type "bool" "bool" +let write_error = write_type "string" "error" +let write_date = write_type "date" "date" + +let write_basic attrs output = begin function + | ScTypes.Num (n,_) -> + let value = (string_of_float @@ Num.float_of_num n) in + write_num ((NS.value_attr, value)::attrs) output value + | ScTypes.Str s -> write_str attrs output (UTF8.to_utf8string s) + | ScTypes.Bool b -> write_bool attrs output (string_of_bool b) + | ScTypes.List l -> write_error attrs output "" + | ScTypes.Date d -> + let value = Tools.Date.to_string d in + write_date ((NS.date_value_attr, value)::attrs) output value + | _ -> () +end + +let write_formula output attrs f = begin function + | ScTypes.Result x -> write_basic attrs output x + | ScTypes.Error exn -> write_str attrs output "#NAME?" +end + +let print_ref buffer c = + UTF8.Buffer.add_string buffer @@ u"[."; + begin match c with + | ScTypes.Cell c -> UTF8.Buffer.add_string buffer @@ Cell.to_string c; + | ScTypes.Range (c1, c2) -> + UTF8.Buffer.add_string buffer @@ Cell.to_string c1; + UTF8.Buffer.add_string buffer @@ u":."; + UTF8.Buffer.add_string buffer @@ Cell.to_string c2; + end; + UTF8.Buffer.add_string buffer @@ u"]" + +let rec print_expr buffer = begin function + | ScTypes.Value (ScTypes.Num (n, _)) -> + UTF8.Buffer.add_string buffer @@ u(string_of_float @@ Num.float_of_num n) + | ScTypes.Value (ScTypes.Str s) -> + UTF8.Buffer.add_string buffer @@ u"\""; + UTF8.Buffer.add_string buffer s; + UTF8.Buffer.add_string buffer @@ u"\"" + | ScTypes.Value (ScTypes.Bool b) -> UTF8.Buffer.add_string buffer @@ u(string_of_bool b) + | ScTypes.Value x -> () + | ScTypes.Ref r -> print_ref buffer r + | ScTypes.Expression x -> + UTF8.Buffer.add_char buffer '('; + print_expr buffer x; + UTF8.Buffer.add_char buffer ')'; + | ScTypes.Call (ident, params) -> + begin match (UTF8.to_utf8string ident) with + | "+" | "*" | "-" | "/" | "^" | "=" + | "<>" | "<=" | ">=" | "<" | ">" -> begin match params with + | v1::v2::[] -> + UTF8.Printf.bprintf buffer "%a%s%a" + print_expr v1 + (UTF8.to_utf8string ident) + print_expr v2 + | _ -> + UTF8.Buffer.add_string buffer ident; + Tools.List.printb ~sep:(u";") print_expr buffer params + end + | _ -> + UTF8.Buffer.add_string buffer ident; + Tools.List.printb ~sep:(u";") print_expr buffer params + end +end + +let write_cell output value = begin function + | Expression.Basic b -> write_basic [] output b + | Expression.Formula (Expression.Expression f) -> + let buffer = UTF8.Buffer.create 10 in + print_expr buffer f; + let formula = UTF8.Buffer.contents buffer + |> UTF8.to_utf8string in + write_formula output [(NS.formula_attr, ("of:=" ^formula))] f value + | Expression.Formula (Expression.Error (i, s)) -> + write_error [(NS.formula_attr, ("of:" ^ (UTF8.to_utf8string s)))] output (UTF8.to_utf8string s) +end + +(** Jump to the wanted position *) +let goto output (from_x, from_y) (to_x, to_y) = begin + + let insert_rows count = begin + (* Close the previous openend rows *) + Xmlm.output output `El_end; + + if (count > 1) then ( + Xmlm.output output ( + `El_start ( + NS.table_row_node, [(NS.table_row_repeat_attr, string_of_int (count-1))])); + Xmlm.output output (`El_start (NS.table_cell_node, [])); + Xmlm.output output `El_end; + Xmlm.output output `El_end; + ); + Xmlm.output output (`El_start (NS.table_row_node, [])); + 1 + end + + and insert_cells count = begin + Xmlm.output output ( + `El_start ( + NS.table_cell_node, [(NS.number_columns_repeat_attr, string_of_int count)])); + Xmlm.output output `El_end; + end in + + (* Insert empty rows or columns until the desired position *) + let jump_row = to_y - from_y in + let from_x' = + if jump_row > 0 then + insert_rows jump_row + else + from_x in + let jump_cell = to_x - from_x' in + if jump_cell > 0 then insert_cells jump_cell; + + +end + +let f output cursor position (expr, value) = begin + + goto output cursor position; + + (* Write the value *) + write_cell output value expr; + + (* Return the update position *) + Tools.Tuple2.map1 ((+) 1) position +end + +let save sheet file = + let tmp_file = Filename.temp_file "content" ".xml" in + Unix.unlink tmp_file; + let out_channel = open_out_bin tmp_file in + let zip = Zip.open_out file in + + let manifest = "\ +\ +\ + +" in + + Tools.try_finally (fun () -> + + let output = Xmlm.make_output (`Channel out_channel) in + Xmlm.output output (`Dtd None); + Xmlm.output output (`El_start (NS.document_content_node, NS.name_spaces())); + Xmlm.output output (`El_start (NS.body_node, [])); + Xmlm.output output (`El_start (NS.spreadsheet_node, [])); + Xmlm.output output (`El_start (NS.table_node, [])); + + Xmlm.output output (`El_start (NS.table_row_node, [])); + ignore (Sheet.Raw.fold (f output) (1,1) sheet); + Xmlm.output output `El_end; + + Xmlm.output output `El_end; + Xmlm.output output `El_end; + Xmlm.output output `El_end; + Xmlm.output output `El_end; + + close_out out_channel; + Zip.copy_file_to_entry tmp_file zip "content.xml"; + Zip.add_entry manifest zip "META-INF/manifest.xml" + ) (fun () -> + Zip.close_out zip; + Unix.unlink tmp_file + ) diff --git a/odf/odf_ExpressionLexer.mll b/odf/odf_ExpressionLexer.mll new file mode 100755 index 0000000..28fce22 --- /dev/null +++ b/odf/odf_ExpressionLexer.mll @@ -0,0 +1,88 @@ +{ + open Odf_ExpressionParser + open Lexing + + exception SyntaxError of string +} + +let digit = ['0'-'9'] +let real = digit+ | digit* '.' digit+ | digit+ '.' digit* + + +let newline = "\r\n" | '\n' | '\r' +let space = ['\t' ' '] | newline + +let letters = ['A'-'Z' 'a'-'z'] +let identifier = letters (letters | digit | ['-' '_' '.'])* (letters | digit)+ + +let text = letters | digit + +let cell = letters+ digit+ + +rule read = parse + | space+ { read lexbuf } + + | digit+ as _1 { NUM (_1, Num.num_of_string _1)} + | real as _1 { REAL (Tools.String.filter_float _1, Tools.Num.of_float_string _1)} + | '$' { DOLLAR } + + | '=' { EQ } + | "<>" { NEQ } + | '<' { LT } + | "<=" { LE } + | '>' { GT } + | ">=" { GE } + | '*' { TIMES } + | '+' { PLUS } + | '-' { MINUS } + | '/' { DIVIDE } + | '"' { read_string (Buffer.create 17) lexbuf } + | ';' { SEMICOLON } + | ':' { COLON } + | '[' { L_SQ_BRACKET } + | ']' { R_SQ_BRACKET } + | '(' { LPAREN } + | ')' { RPAREN } + | '^' { POW } + | '.' { DOT } + + | letters+ as _1 { LETTERS _1} + + | '\000' { EOF } + | eof { EOF } + +and read_string buf = parse + | '"' { STR (Buffer.contents buf) } + | '\\' '/' { Buffer.add_char buf '/'; read_string buf lexbuf } + | '\\' '\\' { Buffer.add_char buf '\\'; read_string buf lexbuf } + | '\\' 'b' { Buffer.add_char buf '\b'; read_string buf lexbuf } + | '\\' 'f' { Buffer.add_char buf '\012'; read_string buf lexbuf } + | '\\' 'n' { Buffer.add_char buf '\n'; read_string buf lexbuf } + | '\\' 'r' { Buffer.add_char buf '\r'; read_string buf lexbuf } + | '\\' 't' { Buffer.add_char buf '\t'; read_string buf lexbuf } + | '\\' '"' { Buffer.add_char buf '"'; read_string buf lexbuf } + | [^ '"' '\\' '\000']+ + { Buffer.add_string buf (Lexing.lexeme lexbuf); + read_string buf lexbuf + } + | '\000' { STR (Buffer.contents buf) } + | _ { raise (SyntaxError ("Illegal string character: " ^ Lexing.lexeme lexbuf)) } + | eof { STR ( Buffer.contents buf) } + +and quoteless_string buf = parse + | '\\' '/' { Buffer.add_char buf '/'; quoteless_string buf lexbuf } + | '\\' '\\' { Buffer.add_char buf '\\'; quoteless_string buf lexbuf } + | '\\' 'b' { Buffer.add_char buf '\b'; quoteless_string buf lexbuf } + | '\\' 'f' { Buffer.add_char buf '\012'; quoteless_string buf lexbuf } + | '\\' 'n' { Buffer.add_char buf '\n'; quoteless_string buf lexbuf } + | '\\' 'r' { Buffer.add_char buf '\r'; quoteless_string buf lexbuf } + | '\\' 't' { Buffer.add_char buf '\t'; quoteless_string buf lexbuf } + | '\\' '"' { Buffer.add_char buf '"'; quoteless_string buf lexbuf } + | [^ '\\' '\000']+ + { Buffer.add_string buf (Lexing.lexeme lexbuf); + quoteless_string buf lexbuf + } + | '\000' { STR (Buffer.contents buf) } + | _ { raise (SyntaxError ("Illegal string character: " ^ Lexing.lexeme lexbuf)) } + | eof { STR (Buffer.contents buf) } + diff --git a/odf/odf_ExpressionParser.mly b/odf/odf_ExpressionParser.mly new file mode 100755 index 0000000..6c34c1d --- /dev/null +++ b/odf/odf_ExpressionParser.mly @@ -0,0 +1,92 @@ +%{ + open ScTypes + module F = Functions + + let u = UTF8.from_utf8string + + let extractColumnNameFromNum (fixed, (str, value)) = (fixed, value) + +%} + +%token REAL +%token NUM +%token STR + +%token LETTERS + +%token DOLLAR +%token DOT + +%token LPAREN RPAREN +%token L_SQ_BRACKET R_SQ_BRACKET +%token PLUS +%token TIMES +%token DIVIDE +%token MINUS +%token EQ NEQ +%token LT LE GT GE +%token EOF +%token COLON SEMICOLON +%token POW + +%nonassoc EQ NEQ LT LE GT GE +%left PLUS MINUS +%left TIMES DIVIDE +%left POW + +%start value + +%% + +value: + | LETTERS COLON EQ expr EOF {$4} + +expr: + | num {Value (Num ((snd $1), Some (u(fst $1))))} + | MINUS num {Value (Num (Num.minus_num (snd $2), Some (u("-" ^(fst $2)) )))} + + + | L_SQ_BRACKET ref R_SQ_BRACKET {$2} + + | LPAREN expr RPAREN {Expression $2} + | STR {Value (Str (u $1))} + + (* Mathematical operators *) + | expr MINUS expr {Call (F.sub, [$1; $3])} + | expr DIVIDE expr {Call (F.div, [$1; $3])} + | expr TIMES expr {Call (F.mul, [$1; $3])} + | expr PLUS expr {Call (F.add, [$1; $3])} + | expr POW expr {Call (F.pow, [$1; $3])} + + (* Comparaison *) + | expr EQ expr {Call (F.eq, [$1; $3])} + | expr NEQ expr {Call (F.neq, [$1; $3])} + | expr LT expr {Call (F.lt, [$1; $3])} + | expr GT expr {Call (F.gt, [$1; $3])} + | expr LE expr {Call (F.le, [$1; $3])} + | expr GE expr {Call (F.ge, [$1; $3])} + + | ident LPAREN separated_list(SEMICOLON, expr) RPAREN { Call (u $1, $3) } + + +ref: + | cell {Ref (Cell $1)} + | cell COLON cell {Ref (Range ($1, $3))} + +cell: + | DOT fixed(LETTERS) fixed(NUM){Cell.from_string $2 (extractColumnNameFromNum $3)} + +fixed(X): + | DOLLAR X {true, $2} + | X {false, $1} + +num: + | REAL {$1} + | NUM {$1} + +ident: + | text+ { String.concat "" $1 } + +text: + | LETTERS { $1 } + | NUM { fst $1 } diff --git a/odf/odf_ns.ml b/odf/odf_ns.ml new file mode 100755 index 0000000..c22ae7e --- /dev/null +++ b/odf/odf_ns.ml @@ -0,0 +1,95 @@ +let ooo = "http://openoffice.org/2004/office" +let ooow = "http://openoffice.org/2004/writer" +let oooc = "http://openoffice.org/2004/calc" +let rpt = "http://openoffice.org/2005/report" +let tableooo = "http://openoffice.org/2009/table" +let drawooo = "http://openoffice.org/2010/draw" + +let dc = "http://purl.org/dc/elements/1.1/" + +let xhtml = "http://www.w3.org/1999/xhtml" +let grddl = "http://www.w3.org/2003/g/data-view#" +let css3t = "http://www.w3.org/TR/css3-text/" +let xlink = "http://www.w3.org/1999/xlink" +let dom = "http://www.w3.org/2001/xml-events" +let xforms = "http://www.w3.org/2002/xforms" +let xsd = "http://www.w3.org/2001/XMLSchema" +let xsi = "http://www.w3.org/2001/XMLSchema-instance" +let math = "http://www.w3.org/1998/Math/MathML" + +let office = "urn:oasis:names:tc:opendocument:xmlns:office:1.0" +let style = "urn:oasis:names:tc:opendocument:xmlns:style:1.0" +let text = "urn:oasis:names:tc:opendocument:xmlns:text:1.0" +let table = "urn:oasis:names:tc:opendocument:xmlns:table:1.0" +let draw = "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" +let fo = "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" +let meta = "urn:oasis:names:tc:opendocument:xmlns:meta:1.0" +let number = "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" +let presentation= "urn:oasis:names:tc:opendocument:xmlns:presentation:1.0" +let svg = "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" +let chart = "urn:oasis:names:tc:opendocument:xmlns:chart:1.0" +let dr3d = "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" +let form = "urn:oasis:names:tc:opendocument:xmlns:form:1.0" +let script = "urn:oasis:names:tc:opendocument:xmlns:script:1.0" +let oof = "urn:oasis:names:tc:opendocument:xmlns:of:1.2" + +let calcext = "urn:org:documentfoundation:names:experimental:calc:xmlns:calcext:1.0" +let loext = "urn:org:documentfoundation:names:experimental:office:xmlns:loext:1.0" + +let field = "urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0" +let formx = "urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0" + +let document_content_node = (office, "document-content") +let body_node = (office, "body") +let spreadsheet_node = (office, "spreadsheet") +let table_node = (table, "table") +let table_row_node = (table, "table-row") + let table_row_repeat_attr = (table, "number-rows-repeated") + +let table_cell_node = (table, "table-cell") + let number_columns_repeat_attr = (table, "number-columns-repeated") + let cvalue_type_attr = (calcext, "value-type") + let ovalue_type_attr = (office, "value-type") + let value_attr = (office, "value") + let formula_attr = (table, "formula") + let date_value_attr = (office, "date-value") + +let text_node = (text, "p") + +let name_spaces () = [ + (Xmlm.ns_xmlns, "office"), office; + (Xmlm.ns_xmlns, "style"), style; + (Xmlm.ns_xmlns, "text"), text; + (Xmlm.ns_xmlns, "table"), table; + (Xmlm.ns_xmlns, "draw"), draw; + (Xmlm.ns_xmlns, "fo"), fo; + (Xmlm.ns_xmlns, "xlink"), xlink; + (Xmlm.ns_xmlns, "dc"), dc; + (Xmlm.ns_xmlns, "meta"), meta; + (Xmlm.ns_xmlns, "number"), number; + (Xmlm.ns_xmlns, "presentation"),presentation; + (Xmlm.ns_xmlns, "svg"), svg; + (Xmlm.ns_xmlns, "chart"), chart; + (Xmlm.ns_xmlns, "dr3d"), dr3d; + (Xmlm.ns_xmlns, "math"), math; + (Xmlm.ns_xmlns, "form"), form; + (Xmlm.ns_xmlns, "script"), script; + (Xmlm.ns_xmlns, "ooo"), ooo; + (Xmlm.ns_xmlns, "ooow"), ooow; + (Xmlm.ns_xmlns, "oooc"), oooc; + (Xmlm.ns_xmlns, "dom"), dom; + (Xmlm.ns_xmlns, "xforms"), xforms; + (Xmlm.ns_xmlns, "xsd"), xsd; + (Xmlm.ns_xmlns, "xsi"), xsi; + (Xmlm.ns_xmlns, "rpt"), rpt; + (Xmlm.ns_xmlns, "of"), oof; + (Xmlm.ns_xmlns, "xhtml"), xhtml; + (Xmlm.ns_xmlns, "grddl"), grddl; + (Xmlm.ns_xmlns, "tableooo"), tableooo; + (Xmlm.ns_xmlns, "drawooo"), drawooo; + (Xmlm.ns_xmlns, "calcext"), calcext; + (Xmlm.ns_xmlns, "loext"), loext; + (Xmlm.ns_xmlns, "field"), field; + (Xmlm.ns_xmlns, "formx"), formx; + (Xmlm.ns_xmlns, "css3t"), css3t; +] -- cgit v1.2.3