From a6b5a6bdd138a5ccc6827bcc73580df1e9218820 Mon Sep 17 00:00:00 2001 From: Sébastien Dailly Date: Fri, 24 Nov 2017 09:22:24 +0100 Subject: Moved all the code to src directory --- src/odf/odf.ml | 346 +++++++++++++++++++++++++++++++++++++++ src/odf/odf_ExpressionLexer.mll | 93 +++++++++++ src/odf/odf_ExpressionParser.mly | 95 +++++++++++ src/odf/odf_ns.ml | 96 +++++++++++ 4 files changed, 630 insertions(+) create mode 100755 src/odf/odf.ml create mode 100755 src/odf/odf_ExpressionLexer.mll create mode 100755 src/odf/odf_ExpressionParser.mly create mode 100755 src/odf/odf_ns.ml (limited to 'src/odf') diff --git a/src/odf/odf.ml b/src/odf/odf.ml new file mode 100755 index 0000000..ae120d9 --- /dev/null +++ b/src/odf/odf.ml @@ -0,0 +1,346 @@ +module Xml = Ezxmlm +module T = Tools +module NS = Odf_ns + +let u = UTF8.from_utf8string + +type t + +(** Map for storing all the attributes *) +module AttributesMap = Map.Make (struct + type t = string * string + let compare = Pervasives.compare +end) + +let get_attr map key = begin + try Some (AttributesMap.find key map) with + Not_found -> None +end + +let load_formula formula = + let lineBuffer = Lexing.from_string formula in + try + Expression.Formula ( + Expression.Expression ( + Odf_ExpressionParser.value Odf_ExpressionLexer.read lineBuffer)) + with e -> + print_endline formula; + raise e + + +let load_content content = begin function + | "float" -> Expression.Basic ( + ScTypes.number ( + DataType.Num.of_float (float_of_string content) + )) + | "date" -> Expression.Basic ( + ScTypes.date ( + DataType.Num.of_float (float_of_string content) + )) + | _ -> Expression.Basic ( + ScTypes.string ( + UTF8.from_utf8string content)) +end + +(** Load the content from a cell *) +let load_cell sheet cell_num row_num changed (attrs, cell) = begin + + (* Load all the attributes from the xml element *) + let add_attr map (key, value) = AttributesMap.add key value map in + let attributes = List.fold_left add_attr AttributesMap.empty attrs in + + (* Check if the content is repeated *) + let repetition = match get_attr attributes NS.number_columns_repeat_attr with + | None -> 1 + | Some x -> int_of_string x + + (* cell width *) + and cell_width = match get_attr attributes NS.number_columns_spanned_attr with + | None -> 1 + | Some x -> int_of_string x in + + let vtype = + try List.assoc NS.ovalue_type_attr attrs + with Not_found -> "" in + + let formula = get_attr attributes NS.formula_attr + and value = get_attr attributes NS.value_attr in + + let expression, update = begin match formula, value with + | Some x, _ -> load_formula x, true + | _, Some x -> + (load_content x vtype) , true + | _ -> + begin try + Xml.member "p" cell + |> Xml.data_to_string + |> fun x -> (load_content x vtype, true) + with Xml.Tag_not_found _ -> Expression.Undefined, false + end + end in + + if update then ( + for i = 1 to repetition do + cell_num := !cell_num + cell_width; + sheet := snd @@ Sheet.Raw.add (!cell_num, !row_num) (Expression.load_expr expression) !sheet + done + ) else ( + cell_num := !cell_num + (repetition * cell_width ) + ); + changed || update +end + +let load_row sheet row_num (attrs, row) = begin + + let repetition = + try int_of_string @@ List.assoc (NS.table, "number-rows-repeated") attrs + with Not_found -> 1 in + + let cells = Xml.members_with_attr "table-cell" row in + + try + for i = 1 to repetition do + incr row_num; + let cell_num = ref 0 in + if not (List.fold_left (load_cell sheet cell_num row_num) false cells) then + (* No changes on the whole row. Do not repeat, and break the loop *) + raise Not_found + done + with Not_found -> row_num := !row_num + repetition - 1 +end + +let load_xml input = begin + + let sheet = ref Sheet.Raw.empty in + let row_num = ref 0 in + + let xml = + Xmlm.make_input ~enc:(Some `UTF_8) (`Channel input) + |> Xml.from_input + |> snd in + let rows = Xml.member "document-content" (xml::[]) + |> Xml.member "body" + |> Xml.member "spreadsheet" + |> Xml.member "table" + |> Xml.members_with_attr "table-row" in + List.iter (fun x -> (load_row sheet row_num) x) rows; + !sheet +end + + +let load file = + let tmp_file = Filename.temp_file "content" ".xml" in + Unix.unlink tmp_file; + + let zip = Zip.open_in file in + let content = Zip.find_entry zip "content.xml" in + Zip.copy_entry_to_file zip content tmp_file; + + let input = open_in_bin tmp_file in + Tools.try_finally + (fun () -> load_xml input) + (fun () -> + close_in input; + Unix.unlink tmp_file; + Zip.close_in zip + ) + + +let write_type ovalue_type cvalue_type attrs output value = begin + let attrs = + (NS.ovalue_type_attr, ovalue_type):: + (NS.cvalue_type_attr, cvalue_type):: + attrs in + + Xmlm.output output (`El_start (NS.table_cell_node, attrs)); + Xmlm.output output (`El_start (NS.text_node, [])); + Xmlm.output output (`Data value); + Xmlm.output output `El_end; + Xmlm.output output `El_end; +end + +(* Writers for differents types *) +let write_num = write_type "float" "float" +let write_str = write_type "string" "string" +let write_bool = write_type "bool" "bool" +let write_error = write_type "string" "error" +let write_date = write_type "date" "date" + +let write_basic: type a. 'b list -> Xmlm.output -> a ScTypes.types -> unit = fun attrs output types -> begin match types with + | ScTypes.Str s -> write_str attrs output (UTF8.to_utf8string s) + | ScTypes.Bool b -> write_bool attrs output (string_of_bool b) + | ScTypes.Num (data_type, d) -> + begin match ScTypes.get_numeric_type data_type with + | ScTypes.Number -> + let f = DataType.Num.to_float d in + let value = string_of_float f in + write_num ((NS.value_attr, value)::attrs) output value + | ScTypes.Date -> + let value = DataType.Date.to_string d in + write_date ((NS.date_value_attr, value)::attrs) output value + end +end + +let write_formula output attrs f = begin function + | ScTypes.Result x -> write_basic attrs output x + | ScTypes.Error exn -> write_str attrs output "#NAME?" +end + +let print_ref buffer c = + UTF8.Buffer.add_string buffer @@ u"[."; + begin match c with + | ScTypes.Cell c -> UTF8.Buffer.add_string buffer @@ Cell.to_string c; + | ScTypes.Range (c1, c2) -> + UTF8.Buffer.add_string buffer @@ Cell.to_string c1; + UTF8.Buffer.add_string buffer @@ u":."; + UTF8.Buffer.add_string buffer @@ Cell.to_string c2; + end; + UTF8.Buffer.add_string buffer @@ u"]" + +let rec print_expr : UTF8.Buffer.buffer -> ScTypes.expression -> unit = fun buffer -> begin function + | ScTypes.Value (ScTypes.Str s) -> + UTF8.Printf.bprintf buffer "\"%s\"" (UTF8.to_utf8string s) + | ScTypes.Value (ScTypes.Bool b) -> + u(string_of_bool b) + |> UTF8.Buffer.add_string buffer + | ScTypes.Value (ScTypes.Num (data_type, d)) -> + begin match ScTypes.get_numeric_type data_type with + | ScTypes.Number -> + let f = DataType.Num.to_float d in + UTF8.Buffer.add_string buffer @@ u(string_of_float f) + | ScTypes.Date -> + DataType.Date.to_string d + |> u + |> UTF8.Buffer.add_string buffer + end + | ScTypes.Ref r -> print_ref buffer r + | ScTypes.Expression x -> + UTF8.Buffer.add_char buffer '('; + print_expr buffer x; + UTF8.Buffer.add_char buffer ')'; + | ScTypes.Call (ident, params) -> + begin match (UTF8.to_utf8string ident) with + | "+" | "*" | "-" | "/" | "^" | "=" + | "<>" | "<=" | ">=" | "<" | ">" -> begin match params with + | v1::[] -> + UTF8.Printf.bprintf buffer "%s%a" + (UTF8.to_utf8string ident) + print_expr v1 + | v1::v2::[] -> + UTF8.Printf.bprintf buffer "%a%s%a" + print_expr v1 + (UTF8.to_utf8string ident) + print_expr v2 + | _ -> + UTF8.Buffer.add_string buffer ident; + Tools.List.printb ~sep:(u";") print_expr buffer params + end + | _ -> + UTF8.Buffer.add_string buffer ident; + Tools.List.printb ~sep:(u";") print_expr buffer params + end +end + +let write_cell output value = begin function + | Expression.Undefined -> () + | Expression.Basic b -> write_basic [] output b + | Expression.Formula (Expression.Expression f) -> + let buffer = UTF8.Buffer.create 10 in + print_expr buffer f; + let formula = UTF8.Buffer.contents buffer + |> UTF8.to_utf8string in + write_formula output [(NS.formula_attr, ("of:=" ^formula))] f value + | Expression.Formula (Expression.Error (i, s)) -> + write_error [(NS.formula_attr, ("of:" ^ (UTF8.to_utf8string s)))] output (UTF8.to_utf8string s) +end + +(** Jump to the wanted position *) +let goto output (from_x, from_y) (to_x, to_y) :unit = begin + + (** Insert as many rows than required *) + let insert_rows count = begin + (* Close the previous openend rows *) + Xmlm.output output `El_end; + + if (count > 1) then ( + Xmlm.output output ( + `El_start ( + NS.table_row_node, [(NS.table_row_repeat_attr, string_of_int (count-1))])); + Xmlm.output output (`El_start (NS.table_cell_node, [])); + Xmlm.output output `El_end; + Xmlm.output output `El_end; + ); + Xmlm.output output (`El_start (NS.table_row_node, [])); + 1 + end + + (** Insert as many cells as required *) + and insert_cells count = begin + Xmlm.output output ( + `El_start ( + NS.table_cell_node, [(NS.number_columns_repeat_attr, string_of_int count)])); + Xmlm.output output `El_end; + end in + + (* Insert empty rows or columns until the desired position *) + let jump_row = to_y - from_y in + let from_x' = + if jump_row > 0 then + insert_rows jump_row + else + from_x in + let jump_cell = to_x - from_x' in + if jump_cell > 0 then + insert_cells jump_cell +end + +(** Write the cell content and return the updated position *) +let f output cursor position (expr, value) = begin + + goto output cursor position; + + (* Write the value *) + write_cell output value expr; + + (* Return the update position *) + Tools.Tuple2.map1 ((+) 1) position +end + +let save sheet file = begin + let tmp_file = Filename.temp_file "content" ".xml" in + Unix.unlink tmp_file; + let out_channel = open_out_bin tmp_file in + let zip = Zip.open_out file in + + let manifest = "\ +\ +\ + +" in + + Tools.try_finally (fun () -> + + let output = Xmlm.make_output (`Channel out_channel) in + Xmlm.output output (`Dtd None); + Xmlm.output output (`El_start (NS.document_content_node, NS.name_spaces())); + Xmlm.output output (`El_start (NS.body_node, [])); + Xmlm.output output (`El_start (NS.spreadsheet_node, [])); + Xmlm.output output (`El_start (NS.table_node, [])); + + Xmlm.output output (`El_start (NS.table_row_node, [])); + ignore (Sheet.Raw.fold (f output) (1,1) sheet); + Xmlm.output output `El_end; + + Xmlm.output output `El_end; + Xmlm.output output `El_end; + Xmlm.output output `El_end; + Xmlm.output output `El_end; + + close_out out_channel; + Zip.copy_file_to_entry tmp_file zip "content.xml"; + Zip.add_entry manifest zip "META-INF/manifest.xml" + ) (fun () -> + Zip.close_out zip; + Unix.unlink tmp_file + ) +end diff --git a/src/odf/odf_ExpressionLexer.mll b/src/odf/odf_ExpressionLexer.mll new file mode 100755 index 0000000..7f6a55b --- /dev/null +++ b/src/odf/odf_ExpressionLexer.mll @@ -0,0 +1,93 @@ +{ + open Odf_ExpressionParser + open Lexing + + exception SyntaxError of string +} + +let digit = ['0'-'9'] +let real = digit+ | digit* '.' digit+ | digit+ '.' digit* + + +let newline = "\r\n" | '\n' | '\r' +let space = ['\t' ' '] | newline + +let letters = ['A'-'Z' 'a'-'z'] + +(* Function identifier. + Valid identifiers are : + ORG.OPENOFFICE.DAYSINMONTH + it cannot end with a digit. + *) +let identifier = letters (letters | digit | ['-' '_' '.'])* letters+ + +let cell = letters+ digit+ + +rule read = parse + | space+ { read lexbuf } + + | digit+ as _1 { NUM _1} + | real as _1 { REAL (Tools.String.filter_float _1)} + | '$' { DOLLAR } + + | '=' { EQ } + | "<>" { NEQ } + | '<' { LT } + | "<=" { LE } + | '>' { GT } + | ">=" { GE } + | '*' { TIMES } + | '+' { PLUS } + | '-' { MINUS } + | '/' { DIVIDE } + | '"' { read_string (Buffer.create 16) lexbuf } + | ';' { SEMICOLON } + | ':' { COLON } + | '[' { L_SQ_BRACKET } + | ']' { R_SQ_BRACKET } + | '(' { LPAREN } + | ')' { RPAREN } + | '^' { POW } + | '.' { DOT } + + | letters+ as _1 { LETTERS _1} + | identifier as _1 { IDENT _1} + + | '\000' { EOF } + | eof { EOF } + +and read_string buf = parse + | '"' { STR (Buffer.contents buf) } + | '\\' '/' { Buffer.add_char buf '/'; read_string buf lexbuf } + | '\\' '\\' { Buffer.add_char buf '\\'; read_string buf lexbuf } + | '\\' 'b' { Buffer.add_char buf '\b'; read_string buf lexbuf } + | '\\' 'f' { Buffer.add_char buf '\012'; read_string buf lexbuf } + | '\\' 'n' { Buffer.add_char buf '\n'; read_string buf lexbuf } + | '\\' 'r' { Buffer.add_char buf '\r'; read_string buf lexbuf } + | '\\' 't' { Buffer.add_char buf '\t'; read_string buf lexbuf } + | '\\' '"' { Buffer.add_char buf '"'; read_string buf lexbuf } + | [^ '"' '\\' '\000']+ + { Buffer.add_string buf (Lexing.lexeme lexbuf); + read_string buf lexbuf + } + | '\000' { STR (Buffer.contents buf) } + | _ { raise (SyntaxError ("Illegal string character: " ^ Lexing.lexeme lexbuf)) } + | eof { STR ( Buffer.contents buf) } + +and quoteless_string buf = parse + | '\\' '/' { Buffer.add_char buf '/'; quoteless_string buf lexbuf } + | '\\' '\\' { Buffer.add_char buf '\\'; quoteless_string buf lexbuf } + | '\\' 'b' { Buffer.add_char buf '\b'; quoteless_string buf lexbuf } + | '\\' 'f' { Buffer.add_char buf '\012'; quoteless_string buf lexbuf } + | '\\' 'n' { Buffer.add_char buf '\n'; quoteless_string buf lexbuf } + | '\\' 'r' { Buffer.add_char buf '\r'; quoteless_string buf lexbuf } + | '\\' 't' { Buffer.add_char buf '\t'; quoteless_string buf lexbuf } + | '\\' '"' { Buffer.add_char buf '"'; quoteless_string buf lexbuf } + | [^ '\\' '\000']+ + { Buffer.add_string buf (Lexing.lexeme lexbuf); + quoteless_string buf lexbuf + } + | '\000' { STR (Buffer.contents buf) } + | _ { raise (SyntaxError ("Illegal string character: " ^ Lexing.lexeme lexbuf)) } + | eof { STR (Buffer.contents buf) } + diff --git a/src/odf/odf_ExpressionParser.mly b/src/odf/odf_ExpressionParser.mly new file mode 100755 index 0000000..6b571a9 --- /dev/null +++ b/src/odf/odf_ExpressionParser.mly @@ -0,0 +1,95 @@ +%{ + open ScTypes + module F = Functions + + let u = UTF8.from_utf8string + + let extractColumnNameFromNum (fixed, str) = (fixed, int_of_string str) + +%} + +%token REAL +%token NUM +%token STR + +%token LETTERS +%token IDENT + +%token DOLLAR +%token DOT + +%token LPAREN RPAREN +%token L_SQ_BRACKET R_SQ_BRACKET +%token PLUS +%token TIMES +%token DIVIDE +%token MINUS +%token EQ NEQ +%token LT LE GT GE +%token EOF +%token COLON SEMICOLON +%token POW + +%nonassoc EQ NEQ LT LE GT GE +%left PLUS MINUS +%left TIMES DIVIDE +%left POW + +%start value + +%% + +value: + | LETTERS COLON EQ expr EOF {$4} + +expr: + | num {Value (number ($1))} + | MINUS expr {Call (F.sub, [$2])} + | PLUS expr {Call (F.add, [$2])} + + + | L_SQ_BRACKET ref R_SQ_BRACKET {$2} + + | LPAREN expr RPAREN {Expression $2} + | STR {Value (string (u $1))} + + (* Mathematical operators *) + | expr MINUS expr {Call (F.sub, [$1; $3])} + | expr DIVIDE expr {Call (F.div, [$1; $3])} + | expr TIMES expr {Call (F.mul, [$1; $3])} + | expr PLUS expr {Call (F.add, [$1; $3])} + | expr POW expr {Call (F.pow, [$1; $3])} + + (* Comparaison *) + | expr EQ expr {Call (F.eq, [$1; $3])} + | expr NEQ expr {Call (F.neq, [$1; $3])} + | expr LT expr {Call (F.lt, [$1; $3])} + | expr GT expr {Call (F.gt, [$1; $3])} + | expr LE expr {Call (F.le, [$1; $3])} + | expr GE expr {Call (F.ge, [$1; $3])} + + | ident LPAREN separated_list(SEMICOLON, expr) RPAREN { Call (u $1, $3) } + + +ref: + | cell {Ref (Cell $1)} + | cell COLON cell {Ref (Range ($1, $3))} + +cell: + | DOT fixed(LETTERS) fixed(NUM){Cell.from_string $2 (extractColumnNameFromNum $3)} + +fixed(X): + | DOLLAR X {true, $2} + | X {false, $1} + +num: + | REAL {DataType.Num.of_float @@ float_of_string $1} + | NUM {DataType.Num.of_int @@ int_of_string $1} + +ident: + | IDENT { $1 } + | text+ { String.concat "" $1 } + +text: + | LETTERS { $1 } + | NUM { $1 } diff --git a/src/odf/odf_ns.ml b/src/odf/odf_ns.ml new file mode 100755 index 0000000..5a501da --- /dev/null +++ b/src/odf/odf_ns.ml @@ -0,0 +1,96 @@ +let ooo = "http://openoffice.org/2004/office" +let ooow = "http://openoffice.org/2004/writer" +let oooc = "http://openoffice.org/2004/calc" +let rpt = "http://openoffice.org/2005/report" +let tableooo = "http://openoffice.org/2009/table" +let drawooo = "http://openoffice.org/2010/draw" + +let dc = "http://purl.org/dc/elements/1.1/" + +let xhtml = "http://www.w3.org/1999/xhtml" +let grddl = "http://www.w3.org/2003/g/data-view#" +let css3t = "http://www.w3.org/TR/css3-text/" +let xlink = "http://www.w3.org/1999/xlink" +let dom = "http://www.w3.org/2001/xml-events" +let xforms = "http://www.w3.org/2002/xforms" +let xsd = "http://www.w3.org/2001/XMLSchema" +let xsi = "http://www.w3.org/2001/XMLSchema-instance" +let math = "http://www.w3.org/1998/Math/MathML" + +let office = "urn:oasis:names:tc:opendocument:xmlns:office:1.0" +let style = "urn:oasis:names:tc:opendocument:xmlns:style:1.0" +let text = "urn:oasis:names:tc:opendocument:xmlns:text:1.0" +let table = "urn:oasis:names:tc:opendocument:xmlns:table:1.0" +let draw = "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" +let fo = "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" +let meta = "urn:oasis:names:tc:opendocument:xmlns:meta:1.0" +let number = "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" +let presentation= "urn:oasis:names:tc:opendocument:xmlns:presentation:1.0" +let svg = "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" +let chart = "urn:oasis:names:tc:opendocument:xmlns:chart:1.0" +let dr3d = "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" +let form = "urn:oasis:names:tc:opendocument:xmlns:form:1.0" +let script = "urn:oasis:names:tc:opendocument:xmlns:script:1.0" +let oof = "urn:oasis:names:tc:opendocument:xmlns:of:1.2" + +let calcext = "urn:org:documentfoundation:names:experimental:calc:xmlns:calcext:1.0" +let loext = "urn:org:documentfoundation:names:experimental:office:xmlns:loext:1.0" + +let field = "urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0" +let formx = "urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0" + +let document_content_node = (office, "document-content") +let body_node = (office, "body") +let spreadsheet_node = (office, "spreadsheet") +let table_node = (table, "table") +let table_row_node = (table, "table-row") + let table_row_repeat_attr = (table, "number-rows-repeated") + +let table_cell_node = (table, "table-cell") + let number_columns_repeat_attr = (table, "number-columns-repeated") + let cvalue_type_attr = (calcext, "value-type") + let ovalue_type_attr = (office, "value-type") + let value_attr = (office, "value") + let formula_attr = (table, "formula") + let date_value_attr = (office, "date-value") + let number_columns_spanned_attr = (table, "number-columns-spanned") + +let text_node = (text, "p") + +let name_spaces () = [ + (Xmlm.ns_xmlns, "office"), office; + (Xmlm.ns_xmlns, "style"), style; + (Xmlm.ns_xmlns, "text"), text; + (Xmlm.ns_xmlns, "table"), table; + (Xmlm.ns_xmlns, "draw"), draw; + (Xmlm.ns_xmlns, "fo"), fo; + (Xmlm.ns_xmlns, "xlink"), xlink; + (Xmlm.ns_xmlns, "dc"), dc; + (Xmlm.ns_xmlns, "meta"), meta; + (Xmlm.ns_xmlns, "number"), number; + (Xmlm.ns_xmlns, "presentation"),presentation; + (Xmlm.ns_xmlns, "svg"), svg; + (Xmlm.ns_xmlns, "chart"), chart; + (Xmlm.ns_xmlns, "dr3d"), dr3d; + (Xmlm.ns_xmlns, "math"), math; + (Xmlm.ns_xmlns, "form"), form; + (Xmlm.ns_xmlns, "script"), script; + (Xmlm.ns_xmlns, "ooo"), ooo; + (Xmlm.ns_xmlns, "ooow"), ooow; + (Xmlm.ns_xmlns, "oooc"), oooc; + (Xmlm.ns_xmlns, "dom"), dom; + (Xmlm.ns_xmlns, "xforms"), xforms; + (Xmlm.ns_xmlns, "xsd"), xsd; + (Xmlm.ns_xmlns, "xsi"), xsi; + (Xmlm.ns_xmlns, "rpt"), rpt; + (Xmlm.ns_xmlns, "of"), oof; + (Xmlm.ns_xmlns, "xhtml"), xhtml; + (Xmlm.ns_xmlns, "grddl"), grddl; + (Xmlm.ns_xmlns, "tableooo"), tableooo; + (Xmlm.ns_xmlns, "drawooo"), drawooo; + (Xmlm.ns_xmlns, "calcext"), calcext; + (Xmlm.ns_xmlns, "loext"), loext; + (Xmlm.ns_xmlns, "field"), field; + (Xmlm.ns_xmlns, "formx"), formx; + (Xmlm.ns_xmlns, "css3t"), css3t; +] -- cgit v1.2.3