From 112ab4b1c396fc2117191297227d8e411f9b9bb3 Mon Sep 17 00:00:00 2001 From: Sébastien Dailly Date: Fri, 19 Jan 2018 11:24:29 +0100 Subject: Better memory management --- src/odf/odf.ml | 122 ++--------------------------------------------- src/odf/odfLoader.ml | 130 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+), 119 deletions(-) create mode 100755 src/odf/odfLoader.ml (limited to 'src/odf') diff --git a/src/odf/odf.ml b/src/odf/odf.ml index ae120d9..048be2e 100755 --- a/src/odf/odf.ml +++ b/src/odf/odf.ml @@ -1,4 +1,3 @@ -module Xml = Ezxmlm module T = Tools module NS = Odf_ns @@ -6,125 +5,10 @@ let u = UTF8.from_utf8string type t -(** Map for storing all the attributes *) -module AttributesMap = Map.Make (struct - type t = string * string - let compare = Pervasives.compare -end) - -let get_attr map key = begin - try Some (AttributesMap.find key map) with - Not_found -> None -end - -let load_formula formula = - let lineBuffer = Lexing.from_string formula in - try - Expression.Formula ( - Expression.Expression ( - Odf_ExpressionParser.value Odf_ExpressionLexer.read lineBuffer)) - with e -> - print_endline formula; - raise e - - -let load_content content = begin function - | "float" -> Expression.Basic ( - ScTypes.number ( - DataType.Num.of_float (float_of_string content) - )) - | "date" -> Expression.Basic ( - ScTypes.date ( - DataType.Num.of_float (float_of_string content) - )) - | _ -> Expression.Basic ( - ScTypes.string ( - UTF8.from_utf8string content)) -end - -(** Load the content from a cell *) -let load_cell sheet cell_num row_num changed (attrs, cell) = begin - - (* Load all the attributes from the xml element *) - let add_attr map (key, value) = AttributesMap.add key value map in - let attributes = List.fold_left add_attr AttributesMap.empty attrs in - - (* Check if the content is repeated *) - let repetition = match get_attr attributes NS.number_columns_repeat_attr with - | None -> 1 - | Some x -> int_of_string x - - (* cell width *) - and cell_width = match get_attr attributes NS.number_columns_spanned_attr with - | None -> 1 - | Some x -> int_of_string x in - - let vtype = - try List.assoc NS.ovalue_type_attr attrs - with Not_found -> "" in - - let formula = get_attr attributes NS.formula_attr - and value = get_attr attributes NS.value_attr in - - let expression, update = begin match formula, value with - | Some x, _ -> load_formula x, true - | _, Some x -> - (load_content x vtype) , true - | _ -> - begin try - Xml.member "p" cell - |> Xml.data_to_string - |> fun x -> (load_content x vtype, true) - with Xml.Tag_not_found _ -> Expression.Undefined, false - end - end in - - if update then ( - for i = 1 to repetition do - cell_num := !cell_num + cell_width; - sheet := snd @@ Sheet.Raw.add (!cell_num, !row_num) (Expression.load_expr expression) !sheet - done - ) else ( - cell_num := !cell_num + (repetition * cell_width ) - ); - changed || update -end - -let load_row sheet row_num (attrs, row) = begin - - let repetition = - try int_of_string @@ List.assoc (NS.table, "number-rows-repeated") attrs - with Not_found -> 1 in - - let cells = Xml.members_with_attr "table-cell" row in - - try - for i = 1 to repetition do - incr row_num; - let cell_num = ref 0 in - if not (List.fold_left (load_cell sheet cell_num row_num) false cells) then - (* No changes on the whole row. Do not repeat, and break the loop *) - raise Not_found - done - with Not_found -> row_num := !row_num + repetition - 1 -end - let load_xml input = begin - - let sheet = ref Sheet.Raw.empty in - let row_num = ref 0 in - - let xml = - Xmlm.make_input ~enc:(Some `UTF_8) (`Channel input) - |> Xml.from_input - |> snd in - let rows = Xml.member "document-content" (xml::[]) - |> Xml.member "body" - |> Xml.member "spreadsheet" - |> Xml.member "table" - |> Xml.members_with_attr "table-row" in - List.iter (fun x -> (load_row sheet row_num) x) rows; - !sheet + let source = Xmlm.make_input ~enc:(Some `UTF_8) (`Channel input) in + let sheet = OdfLoader.load source in + sheet end diff --git a/src/odf/odfLoader.ml b/src/odf/odfLoader.ml new file mode 100755 index 0000000..9420fdd --- /dev/null +++ b/src/odf/odfLoader.ml @@ -0,0 +1,130 @@ +module NS = Odf_ns + +type tree = + | Data of string + | Cell of {repetition: int; cell_width: int; expression: Expression.t} + | Unit + +let memoization cache key f = begin + try + Hashtbl.find cache key + with Not_found -> + let value = f key in + Hashtbl.add cache key value; + value +end + +let load_content cache content = begin function + | "float" -> Expression.Basic ( + ScTypes.number ( + DataType.Num.of_float (float_of_string content) + )) + | "date" -> Expression.Basic ( + ScTypes.date ( + DataType.Num.of_float (float_of_string content) + )) + | _ -> + (* If the same text is present many times, use the same string instead of creating a new one *) + memoization cache content (fun content -> + Expression.Basic ( + ScTypes.string ( + UTF8.from_utf8string content))) +end + +let load_formula formula = + let lineBuffer = Lexing.from_string formula in + try + Expression.Formula ( + Expression.Expression ( + Odf_ExpressionParser.value Odf_ExpressionLexer.read lineBuffer)) + with e -> + print_endline formula; + raise e + + +let build_cell cache (attributes:Xmlm.attribute list) (childs:tree list) = begin + + (* Check if the content is repeated *) + let repetition = + try int_of_string @@ List.assoc NS.number_columns_repeat_attr attributes + with Not_found -> 1 + + (* cell width *) + and cell_width = + try int_of_string @@ List.assoc NS.number_columns_spanned_attr attributes + with Not_found -> 1 + + and expression = + try + load_formula @@ List.assoc NS.formula_attr attributes + with Not_found -> ( + let vtype = + try List.assoc NS.ovalue_type_attr attributes + with Not_found -> "" in + + try + load_content cache (List.assoc NS.value_attr attributes) vtype + with Not_found -> ( + (* This is not a formula, neither a value ? *) + try + let value = Tools.List.find_map (function | Data x -> Some x | _ -> None) childs in + load_content cache value vtype + with Not_found -> Expression.Undefined + ) + ) in + + Cell {repetition; cell_width; expression} + +end + +let build_p (attributes:Xmlm.attribute list) = begin function + | Data x::_ -> Data x + | _ -> Data "" +end + + +let build_row (sheet:Sheet.Raw.t ref) (row_num:int ref) (attributes:Xmlm.attribute list) (childs:tree list) = begin + + let repetition = + try int_of_string @@ List.assoc (NS.table, "number-rows-repeated") attributes + with Not_found -> 1 in + + for i = 1 to repetition do + let cell_num = ref 1 in + List.iter (function + | Cell cell -> + for i = 1 to cell.repetition do + sheet := snd @@ Sheet.Raw.add (!cell_num, !row_num) cell.expression !sheet; + cell_num := !cell_num + cell.cell_width + done; + | _ -> () + ) childs; + incr row_num + done; + Unit +end + +let data str = Data str + +let load source = begin + + (* Mutable datas *) + let sheet = ref Sheet.Raw.empty in + let cache = Hashtbl.create 10 in + + let table = Base.String_dict.of_alist_exn [ + ((NS.text ^ "p"), build_p); + ((NS.table ^ "table-cell"), build_cell cache); + ((NS.table ^ "table-row"), build_row sheet (ref 1)) + ] in + + let el (((ns, name), attributes):Xmlm.tag) childs = begin + match Base.String_dict.find table (ns ^ name) with + | Some f -> f attributes childs + | None -> Unit + end in + + match Xmlm.input_doc_tree ~el ~data source with + | _, Unit -> !sheet + | _ -> raise Not_found +end -- cgit v1.2.3