aboutsummaryrefslogtreecommitdiff
path: root/src/unicode.ml
blob: cc8c087396e6d6959a979fd8306cfe0604ab5496 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
type t = Uchar.t array

type decoder_encoding = Uutf.decoder_encoding

let array_from_rev_list l = begin
  let length = (List.length l) - 1 in
  let arr = Array.make (length + 1) (Obj.magic 0) in
  List.iteri (fun i elem -> Array.set arr (length - i) elem)  l;
  arr
end


let decode ?encoding str = begin
  let decoder = Uutf.decoder ?encoding (`String str) in
  let rec loop buf = begin match Uutf.decode decoder with
  | `Uchar u -> loop (u::buf)
  | `Malformed _ -> loop (Uutf.u_rep::buf)
  | `Await -> assert false
  | `End -> (
    array_from_rev_list buf
  )
  end in
  loop []
end

let to_utf8 (t:t) = begin
  let buf = Buffer.create 512 in
  Array.iter (Uutf.Buffer.add_utf_8 buf) t;
  Buffer.contents buf
end

let length = Array.length

let get t i = Uchar.of_int @@ Array.get t i

let make i v = Array.make i @@ Uchar.to_int v

let init s f = Array.init s (fun x -> Uchar.to_int @@ f x)

let sub = Array.sub

let blit = Array.blit

let concat = Array.concat

let iter f t = Array.iter (fun x -> f @@ Uchar.of_int x) t


let to_list t =
    Array.map Uchar.of_int t
 |> Array.to_list