aboutsummaryrefslogtreecommitdiff
path: root/src/odf
diff options
context:
space:
mode:
authorSébastien Dailly <sebastien@chimrod.com>2018-01-19 11:24:29 +0100
committerSébastien Dailly <sebastien@chimrod.com>2018-01-25 17:17:15 +0100
commit112ab4b1c396fc2117191297227d8e411f9b9bb3 (patch)
treef6d06ef60c696b43d48e2cd8e2f7f426a03b3706 /src/odf
parent098ac444e731d7674d8910264ae58fb876618a5a (diff)
Better memory management
Diffstat (limited to 'src/odf')
-rwxr-xr-xsrc/odf/odf.ml122
-rwxr-xr-xsrc/odf/odfLoader.ml130
2 files changed, 133 insertions, 119 deletions
diff --git a/src/odf/odf.ml b/src/odf/odf.ml
index ae120d9..048be2e 100755
--- a/src/odf/odf.ml
+++ b/src/odf/odf.ml
@@ -1,4 +1,3 @@
-module Xml = Ezxmlm
module T = Tools
module NS = Odf_ns
@@ -6,125 +5,10 @@ let u = UTF8.from_utf8string
type t
-(** Map for storing all the attributes *)
-module AttributesMap = Map.Make (struct
- type t = string * string
- let compare = Pervasives.compare
-end)
-
-let get_attr map key = begin
- try Some (AttributesMap.find key map) with
- Not_found -> None
-end
-
-let load_formula formula =
- let lineBuffer = Lexing.from_string formula in
- try
- Expression.Formula (
- Expression.Expression (
- Odf_ExpressionParser.value Odf_ExpressionLexer.read lineBuffer))
- with e ->
- print_endline formula;
- raise e
-
-
-let load_content content = begin function
- | "float" -> Expression.Basic (
- ScTypes.number (
- DataType.Num.of_float (float_of_string content)
- ))
- | "date" -> Expression.Basic (
- ScTypes.date (
- DataType.Num.of_float (float_of_string content)
- ))
- | _ -> Expression.Basic (
- ScTypes.string (
- UTF8.from_utf8string content))
-end
-
-(** Load the content from a cell *)
-let load_cell sheet cell_num row_num changed (attrs, cell) = begin
-
- (* Load all the attributes from the xml element *)
- let add_attr map (key, value) = AttributesMap.add key value map in
- let attributes = List.fold_left add_attr AttributesMap.empty attrs in
-
- (* Check if the content is repeated *)
- let repetition = match get_attr attributes NS.number_columns_repeat_attr with
- | None -> 1
- | Some x -> int_of_string x
-
- (* cell width *)
- and cell_width = match get_attr attributes NS.number_columns_spanned_attr with
- | None -> 1
- | Some x -> int_of_string x in
-
- let vtype =
- try List.assoc NS.ovalue_type_attr attrs
- with Not_found -> "" in
-
- let formula = get_attr attributes NS.formula_attr
- and value = get_attr attributes NS.value_attr in
-
- let expression, update = begin match formula, value with
- | Some x, _ -> load_formula x, true
- | _, Some x ->
- (load_content x vtype) , true
- | _ ->
- begin try
- Xml.member "p" cell
- |> Xml.data_to_string
- |> fun x -> (load_content x vtype, true)
- with Xml.Tag_not_found _ -> Expression.Undefined, false
- end
- end in
-
- if update then (
- for i = 1 to repetition do
- cell_num := !cell_num + cell_width;
- sheet := snd @@ Sheet.Raw.add (!cell_num, !row_num) (Expression.load_expr expression) !sheet
- done
- ) else (
- cell_num := !cell_num + (repetition * cell_width )
- );
- changed || update
-end
-
-let load_row sheet row_num (attrs, row) = begin
-
- let repetition =
- try int_of_string @@ List.assoc (NS.table, "number-rows-repeated") attrs
- with Not_found -> 1 in
-
- let cells = Xml.members_with_attr "table-cell" row in
-
- try
- for i = 1 to repetition do
- incr row_num;
- let cell_num = ref 0 in
- if not (List.fold_left (load_cell sheet cell_num row_num) false cells) then
- (* No changes on the whole row. Do not repeat, and break the loop *)
- raise Not_found
- done
- with Not_found -> row_num := !row_num + repetition - 1
-end
-
let load_xml input = begin
-
- let sheet = ref Sheet.Raw.empty in
- let row_num = ref 0 in
-
- let xml =
- Xmlm.make_input ~enc:(Some `UTF_8) (`Channel input)
- |> Xml.from_input
- |> snd in
- let rows = Xml.member "document-content" (xml::[])
- |> Xml.member "body"
- |> Xml.member "spreadsheet"
- |> Xml.member "table"
- |> Xml.members_with_attr "table-row" in
- List.iter (fun x -> (load_row sheet row_num) x) rows;
- !sheet
+ let source = Xmlm.make_input ~enc:(Some `UTF_8) (`Channel input) in
+ let sheet = OdfLoader.load source in
+ sheet
end
diff --git a/src/odf/odfLoader.ml b/src/odf/odfLoader.ml
new file mode 100755
index 0000000..9420fdd
--- /dev/null
+++ b/src/odf/odfLoader.ml
@@ -0,0 +1,130 @@
+module NS = Odf_ns
+
+type tree =
+ | Data of string
+ | Cell of {repetition: int; cell_width: int; expression: Expression.t}
+ | Unit
+
+let memoization cache key f = begin
+ try
+ Hashtbl.find cache key
+ with Not_found ->
+ let value = f key in
+ Hashtbl.add cache key value;
+ value
+end
+
+let load_content cache content = begin function
+ | "float" -> Expression.Basic (
+ ScTypes.number (
+ DataType.Num.of_float (float_of_string content)
+ ))
+ | "date" -> Expression.Basic (
+ ScTypes.date (
+ DataType.Num.of_float (float_of_string content)
+ ))
+ | _ ->
+ (* If the same text is present many times, use the same string instead of creating a new one *)
+ memoization cache content (fun content ->
+ Expression.Basic (
+ ScTypes.string (
+ UTF8.from_utf8string content)))
+end
+
+let load_formula formula =
+ let lineBuffer = Lexing.from_string formula in
+ try
+ Expression.Formula (
+ Expression.Expression (
+ Odf_ExpressionParser.value Odf_ExpressionLexer.read lineBuffer))
+ with e ->
+ print_endline formula;
+ raise e
+
+
+let build_cell cache (attributes:Xmlm.attribute list) (childs:tree list) = begin
+
+ (* Check if the content is repeated *)
+ let repetition =
+ try int_of_string @@ List.assoc NS.number_columns_repeat_attr attributes
+ with Not_found -> 1
+
+ (* cell width *)
+ and cell_width =
+ try int_of_string @@ List.assoc NS.number_columns_spanned_attr attributes
+ with Not_found -> 1
+
+ and expression =
+ try
+ load_formula @@ List.assoc NS.formula_attr attributes
+ with Not_found -> (
+ let vtype =
+ try List.assoc NS.ovalue_type_attr attributes
+ with Not_found -> "" in
+
+ try
+ load_content cache (List.assoc NS.value_attr attributes) vtype
+ with Not_found -> (
+ (* This is not a formula, neither a value ? *)
+ try
+ let value = Tools.List.find_map (function | Data x -> Some x | _ -> None) childs in
+ load_content cache value vtype
+ with Not_found -> Expression.Undefined
+ )
+ ) in
+
+ Cell {repetition; cell_width; expression}
+
+end
+
+let build_p (attributes:Xmlm.attribute list) = begin function
+ | Data x::_ -> Data x
+ | _ -> Data ""
+end
+
+
+let build_row (sheet:Sheet.Raw.t ref) (row_num:int ref) (attributes:Xmlm.attribute list) (childs:tree list) = begin
+
+ let repetition =
+ try int_of_string @@ List.assoc (NS.table, "number-rows-repeated") attributes
+ with Not_found -> 1 in
+
+ for i = 1 to repetition do
+ let cell_num = ref 1 in
+ List.iter (function
+ | Cell cell ->
+ for i = 1 to cell.repetition do
+ sheet := snd @@ Sheet.Raw.add (!cell_num, !row_num) cell.expression !sheet;
+ cell_num := !cell_num + cell.cell_width
+ done;
+ | _ -> ()
+ ) childs;
+ incr row_num
+ done;
+ Unit
+end
+
+let data str = Data str
+
+let load source = begin
+
+ (* Mutable datas *)
+ let sheet = ref Sheet.Raw.empty in
+ let cache = Hashtbl.create 10 in
+
+ let table = Base.String_dict.of_alist_exn [
+ ((NS.text ^ "p"), build_p);
+ ((NS.table ^ "table-cell"), build_cell cache);
+ ((NS.table ^ "table-row"), build_row sheet (ref 1))
+ ] in
+
+ let el (((ns, name), attributes):Xmlm.tag) childs = begin
+ match Base.String_dict.find table (ns ^ name) with
+ | Some f -> f attributes childs
+ | None -> Unit
+ end in
+
+ match Xmlm.input_doc_tree ~el ~data source with
+ | _, Unit -> !sheet
+ | _ -> raise Not_found
+end