From ef312564ca84a2b49fc291434d8fb2f8501bb618 Mon Sep 17 00:00:00 2001 From: Sébastien Dailly Date: Tue, 15 Nov 2016 13:00:01 +0100 Subject: Initial commit --- unicode.ml | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100755 unicode.ml (limited to 'unicode.ml') diff --git a/unicode.ml b/unicode.ml new file mode 100755 index 0000000..eb0d60d --- /dev/null +++ b/unicode.ml @@ -0,0 +1,51 @@ +type t = Uutf.uchar array + +type decoder_encoding = Uutf.decoder_encoding + +let array_from_rev_list l = begin + let length = (List.length l) - 1 in + let arr = Array.make (length + 1) (Obj.magic 0) in + List.iteri (fun i elem -> Array.set arr (length - i) elem) l; + arr +end + + +let decode ?encoding str = begin + let decoder = Uutf.decoder ?encoding (`String str) in + let rec loop buf = begin match Uutf.decode decoder with + | `Uchar u -> loop (u::buf) + | `Malformed _ -> loop (Uutf.u_rep::buf) + | `Await -> assert false + | `End -> ( + array_from_rev_list buf + ) + end in + loop [] +end + +let to_utf8 (t:t) = begin + let buf = Buffer.create 512 in + Array.iter (Uutf.Buffer.add_utf_8 buf) t; + Buffer.contents buf +end + +let length = Array.length + +let get t i = Uchar.of_int @@ Array.get t i + +let make i v = Array.make i @@ Uchar.to_int v + +let init s f = Array.init s (fun x -> Uchar.to_int @@ f x) + +let sub = Array.sub + +let blit = Array.blit + +let concat = Array.concat + +let iter f t = Array.iter (fun x -> f @@ Uchar.of_int x) t + + +let to_list t = + Array.map Uchar.of_int t + |> Array.to_list -- cgit v1.2.3