aboutsummaryrefslogtreecommitdiff
path: root/unicode.ml
diff options
context:
space:
mode:
Diffstat (limited to 'unicode.ml')
-rwxr-xr-xunicode.ml51
1 files changed, 51 insertions, 0 deletions
diff --git a/unicode.ml b/unicode.ml
new file mode 100755
index 0000000..eb0d60d
--- /dev/null
+++ b/unicode.ml
@@ -0,0 +1,51 @@
+type t = Uutf.uchar array
+
+type decoder_encoding = Uutf.decoder_encoding
+
+let array_from_rev_list l = begin
+ let length = (List.length l) - 1 in
+ let arr = Array.make (length + 1) (Obj.magic 0) in
+ List.iteri (fun i elem -> Array.set arr (length - i) elem) l;
+ arr
+end
+
+
+let decode ?encoding str = begin
+ let decoder = Uutf.decoder ?encoding (`String str) in
+ let rec loop buf = begin match Uutf.decode decoder with
+ | `Uchar u -> loop (u::buf)
+ | `Malformed _ -> loop (Uutf.u_rep::buf)
+ | `Await -> assert false
+ | `End -> (
+ array_from_rev_list buf
+ )
+ end in
+ loop []
+end
+
+let to_utf8 (t:t) = begin
+ let buf = Buffer.create 512 in
+ Array.iter (Uutf.Buffer.add_utf_8 buf) t;
+ Buffer.contents buf
+end
+
+let length = Array.length
+
+let get t i = Uchar.of_int @@ Array.get t i
+
+let make i v = Array.make i @@ Uchar.to_int v
+
+let init s f = Array.init s (fun x -> Uchar.to_int @@ f x)
+
+let sub = Array.sub
+
+let blit = Array.blit
+
+let concat = Array.concat
+
+let iter f t = Array.iter (fun x -> f @@ Uchar.of_int x) t
+
+
+let to_list t =
+ Array.map Uchar.of_int t
+ |> Array.to_list