1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
open CamomileLibraryDefault.Camomile
open Result
type t= Zed_utf8.t
type char_prop=
| Printable of int
| Other
| Null
let to_raw= Zed_utf8.explode
let to_array t= Array.of_list (Zed_utf8.explode t)
let zero= String.make 1 (Char.chr 0)
let core t= Zed_utf8.unsafe_extract t 0
let combined t= List.tl (Zed_utf8.explode t)
let prop_uChar uChar=
match CharInfo_width.width uChar with
| -1 -> Other
| 0->
if UChar.code uChar = 0
then Null
else Printable 0
| w-> Printable w
let prop t= prop_uChar (Zed_utf8.unsafe_extract t 0)
let is_printable uChar=
match prop_uChar uChar with
| Printable _ -> true
| _-> false
let is_printable_core uChar=
match prop_uChar uChar with
| Printable w when w > 0 -> true
| _-> false
let is_combining_mark uChar=
match prop_uChar uChar with
| Printable w when w = 0 -> true
| _-> false
let length= Zed_utf8.length
let size= length
let width t= CharInfo_width.width (Zed_utf8.unsafe_extract t 0)
let out_of_range t i= i < 0 || i >= size t
let get= Zed_utf8.get
let get_opt t i=
try Some (get t i)
with _-> None
let append ch mark=
match prop_uChar mark with
| Printable 0-> ch ^ (Zed_utf8.singleton mark)
| _-> failwith "combining mark expected"
let compare_core t1 t2=
let core1= Zed_utf8.unsafe_extract t1 0
and core2= Zed_utf8.unsafe_extract t2 0 in
UChar.compare core1 core2
let compare_raw= Zed_utf8.compare
let compare= compare_raw
let mix_uChar zChar uChar=
match prop_uChar uChar with
| Printable 0->
Ok (zChar ^ (Zed_utf8.singleton uChar))
| _->
Error (Zed_utf8.singleton uChar)
let first_core ?(trim=false) uChars=
let rec aux uChars=
match uChars with
| []-> None, []
| uChar::tl->
let prop= prop_uChar uChar in
match prop with
| Printable w->
if w > 0
then Some (prop, uChar), tl
else aux tl
| Other-> Some (prop, uChar), tl
| Null-> Some (prop, uChar), tl
in
match uChars with
| []-> None, []
| uChar::_->
if not trim && is_combining_mark uChar then
None, uChars
else
aux uChars
let rec subsequent uChars=
match uChars with
| []-> [], []
| uChar::tl->
let prop= prop_uChar uChar in
match prop with
| Printable w->
if w > 0 then
[], uChars
else
let seq, remain= subsequent tl in
uChar :: seq, remain
| _-> [], uChars
let of_uChars ?(trim=false) ?(indv_combining=true) uChars=
match uChars with
| []-> None, []
| uChar::tl->
match first_core ~trim uChars with
| None, _->
if indv_combining then
Some (Zed_utf8.singleton uChar), tl
else
None, uChars
| Some (Printable _w, uChar), tl->
let combined, tl= subsequent tl in
Some (Zed_utf8.implode (uChar::combined)), tl
| Some (Null, uChar), tl->
Some (Zed_utf8.singleton uChar) ,tl
| Some (Other, uChar), tl->
Some (Zed_utf8.singleton uChar) ,tl
let zChars_of_uChars ?(trim=false) ?(indv_combining=true) uChars=
let rec aux zChars uChars=
match of_uChars ~trim ~indv_combining uChars with
| None, tl-> List.rev zChars, tl
| Some zChar, tl-> aux (zChar::zChars) tl
in
aux [] uChars
external id : 'a -> 'a = "%identity"
let unsafe_of_utf8 : string -> t=
fun str-> if String.length str > 0
then str
else failwith "malformed Zed_char sequence"
let of_utf8 ?(indv_combining=true) str=
match of_uChars ~indv_combining (Zed_utf8.explode str) with
| Some zChar, []-> zChar
| _-> failwith "malformed Zed_char sequence"
let to_utf8 : t -> string= id
let unsafe_of_char c=
Zed_utf8.singleton (UChar.of_char c)
let unsafe_of_uChar uChar= Zed_utf8.singleton uChar
let for_all= Zed_utf8.for_all
let iter= Zed_utf8.iter
module US(US:UnicodeString.Type) = struct
module Convert = Zed_utils.Convert(US)
let of_t t= Zed_utf8.explode t |> Convert.of_list
let to_t us=
let len= US.length us in
let rec create i=
if i < len
then US.get us i :: create (i+1)
else []
in
let uChars= create 0 in
of_uChars uChars
let to_t_exn us=
match to_t us with
| Some t, _-> t
| _-> failwith "to_t_exn"
end