diff options
Diffstat (limited to 'unicode.ml')
| -rwxr-xr-x | unicode.ml | 51 | 
1 files changed, 51 insertions, 0 deletions
| diff --git a/unicode.ml b/unicode.ml new file mode 100755 index 0000000..eb0d60d --- /dev/null +++ b/unicode.ml @@ -0,0 +1,51 @@ +type t = Uutf.uchar array + +type decoder_encoding = Uutf.decoder_encoding + +let array_from_rev_list l = begin +  let length = (List.length l) - 1 in +  let arr = Array.make (length + 1) (Obj.magic 0) in +  List.iteri (fun i elem -> Array.set arr (length - i) elem)  l; +  arr +end + + +let decode ?encoding str = begin +  let decoder = Uutf.decoder ?encoding (`String str) in +  let rec loop buf = begin match Uutf.decode decoder with +  | `Uchar u -> loop (u::buf) +  | `Malformed _ -> loop (Uutf.u_rep::buf) +  | `Await -> assert false +  | `End -> ( +    array_from_rev_list buf +  ) +  end in +  loop [] +end + +let to_utf8 (t:t) = begin +  let buf = Buffer.create 512 in +  Array.iter (Uutf.Buffer.add_utf_8 buf) t; +  Buffer.contents buf +end + +let length = Array.length + +let get t i = Uchar.of_int @@ Array.get t i + +let make i v = Array.make i @@ Uchar.to_int v + +let init s f = Array.init s (fun x -> Uchar.to_int @@ f x) + +let sub = Array.sub + +let blit = Array.blit + +let concat = Array.concat + +let iter f t = Array.iter (fun x -> f @@ Uchar.of_int x) t + + +let to_list t = +    Array.map Uchar.of_int t + |> Array.to_list | 
