1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# 1 "Camomile/public/uTF16.ml"
(** UTF-16 encoded string. *)
open Bigarray
exception Out_of_range
type t = (int, int16_unsigned_elt, c_layout) Array1.t
type index = int
exception Malformed_code
let rec validate_aux (a:t) i =
if i >= Array1.dim a then () else
let n = a.{i} in
if n < 0xd800 || n >= 0xe000 && n < 0xfffe then
validate_aux a (i + 1)
else if n >= 0xd800 && n < 0xdc00 then
if i + 1 >= Array1.dim a then raise Malformed_code else
let n' = a.{i + 1} in
if n' < 0xdc00 || n' >= 0xe000 then raise Malformed_code else
validate_aux a (i + 2)
else raise Malformed_code
let validate (a:t) = validate_aux a 0
let look (a:t) i : UChar.t =
let n0 = a.{i} in
if n0 < 0xd800 || n0 >= 0xe000 then UChar.chr_of_uint n0 else
if n0 < 0xdc00 then
let n1 = a.{i + 1} in
UChar.chr_of_uint
(((n0 - 0xd800) lsl 10) + (n1 - 0xdc00) + 0x10000)
else invalid_arg "UTF16.look"
let rec length_aux (a:t) c i =
if i >= Array1.dim a then c else
let n = a.{i} in
if n < 0xd800 || n >= 0xe000 then length_aux a (c + 1) (i + 1)
else length_aux a (c + 1) (i + 2)
let length (a:t) = length_aux a 0 0
let next (a:t) i =
let n = a.{i} in
if n < 0xd800 || n >= 0xdc00 then i + 1 else
i + 2
let prev (a:t) i =
let i' = i - 1 in
let n = a.{i'} in
if n < 0xdc00 || n >= 0xe000 then i' else i' - 1
let rec move_forward (a:t) i c =
if c > 0 then move_forward a (next a i) (c - 1) else i
let rec move_backward (a:t) i c =
if c < 0 then move_backward a (prev a i) (c + 1) else i
let move (a:t) i c =
if c > 0 then move_forward a i c else
if c < 0 then move_backward a i c else
i
let first _ = 0
let last (a:t) = prev a (Array1.dim a)
let out_of_range (a:t) i = i < 0 || i >= Array1.dim a
let compare_index _ i j = i - j
let nth (a:t) c = move_forward a 0 c
let get (a:t) c = look a (nth a c)
let rec iter_aux proc (a:t) i =
if i >= Array1.dim a then () else begin
proc (look a i);
iter_aux proc a (next a i)
end
let iter proc (a:t) = iter_aux proc a 0
module Buf = struct
let set (a:t) i u =
let n = UChar.uint_code u in
if n < 0 then raise Out_of_range else
if n < 0xd800 || n >= 0xe000 && n <= 0xfffd then begin
a.{i} <- n;
1
end else if n >= 0x10000 && n <= 0x10ffff then begin
a.{i} <- ((n - 0x10000) lsr 10) + 0xd800;
a.{i + 1} <- ((n - 0x10000) land 0x3ff) + 0xdc00;
2
end else
raise Out_of_range
type buf = {init_size : int; mutable pos : index; mutable contents : t}
let create n =
let contents = Array1.create int16_unsigned c_layout n in
{init_size = n; pos = 0; contents = contents}
let clear buf = buf.pos <- 0
let reset buf =
buf.contents <- Array1.create int16_unsigned c_layout buf.init_size;
buf.pos <- 0
let contents buf =
let a = Array1.create int16_unsigned c_layout buf.pos in
let src = Array1.sub buf.contents 0 buf.pos in
Array1.blit src a;
a
let resize buf n =
if Array1.dim buf.contents >= n then () else
let a = Array1.create int16_unsigned c_layout (2 * n) in
let a' = Array1.sub a 0 (Array1.dim buf.contents) in
Array1.blit buf.contents a';
buf.contents <- a
let add_char buf u =
resize buf (buf.pos + 2);
buf.pos <- buf.pos + set buf.contents buf.pos u
let add_string buf (a:t) =
let len = buf.pos + Array1.dim a in
resize buf len;
let b = Array1.sub buf.contents buf.pos (Array1.dim a) in
Array1.blit a b;
buf.pos <- len
let add_buffer buf1 buf2 =
let len = buf1.pos + buf2.pos in
resize buf1 len;
let a = Array1.sub buf2.contents 0 buf2.pos in
let b = Array1.sub buf1.contents buf1.pos buf2.pos in
Array1.blit a b;
buf1.pos <- len
end
let init len f =
let buf = Buf.create (len + 1) in
for i = 0 to len - 1 do Buf.add_char buf (f i) done;
Buf.contents buf
let rec compare_aux (a:t) b i =
if i >= Array1.dim a then 0 else
let n1 = a.{i} in
let n2 = b.{i} in
if n1 = n2 then compare_aux a b (i + 1) else
(if n1 < 0xd800 || n1 >= 0xdc00 then n1 else 0x10000 lor n1) -
(if n2 < 0xd800 || n2 >= 0xdc00 then n2 else 0x10000 lor n2)
let compare (a:t) b =
let sgn = Array1.dim a - Array1.dim b in
if sgn = 0 then compare_aux a b 0 else sgn