Source file OASISString.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
(** Various string utilities.
Mostly inspired by extlib and batteries ExtString and BatString libraries.
@author Sylvain Le Gall
*)
let nsplitf str f =
if str = "" then
[]
else
let buf = Buffer.create 13 in
let lst = ref [] in
let push () =
lst := Buffer.contents buf :: !lst;
Buffer.clear buf
in
let str_len = String.length str in
for i = 0 to str_len - 1 do
if f str.[i] then
push ()
else
Buffer.add_char buf str.[i]
done;
push ();
List.rev !lst
(** [nsplit c s] Split the string [s] at char [c]. It doesn't include the
separator.
*)
let nsplit str c =
nsplitf str ((=) c)
let find ~what ?(offset=0) str =
let what_idx = ref 0 in
let str_idx = ref offset in
while !str_idx < String.length str &&
!what_idx < String.length what do
if str.[!str_idx] = what.[!what_idx] then
incr what_idx
else
what_idx := 0;
incr str_idx
done;
if !what_idx <> String.length what then
raise Not_found
else
!str_idx - !what_idx
let sub_start str len =
let str_len = String.length str in
if len >= str_len then
""
else
String.sub str len (str_len - len)
let sub_end ?(offset=0) str len =
let str_len = String.length str in
if len >= str_len then
""
else
String.sub str 0 (str_len - len)
let starts_with ~what ?(offset=0) str =
let what_idx = ref 0 in
let str_idx = ref offset in
let ok = ref true in
while !ok &&
!str_idx < String.length str &&
!what_idx < String.length what do
if str.[!str_idx] = what.[!what_idx] then
incr what_idx
else
ok := false;
incr str_idx
done;
if !what_idx = String.length what then
true
else
false
let strip_starts_with ~what str =
if starts_with ~what str then
sub_start str (String.length what)
else
raise Not_found
let ends_with ~what ?(offset=0) str =
let what_idx = ref ((String.length what) - 1) in
let str_idx = ref ((String.length str) - 1) in
let ok = ref true in
while !ok &&
offset <= !str_idx &&
0 <= !what_idx do
if str.[!str_idx] = what.[!what_idx] then
decr what_idx
else
ok := false;
decr str_idx
done;
if !what_idx = -1 then
true
else
false
let strip_ends_with ~what str =
if ends_with ~what str then
sub_end str (String.length what)
else
raise Not_found
let replace_chars f s =
let buf = Buffer.create (String.length s) in
String.iter (fun c -> Buffer.add_char buf (f c)) s;
Buffer.contents buf
let lowercase_ascii =
replace_chars
(fun c ->
if (c >= 'A' && c <= 'Z') then
Char.chr (Char.code c + 32)
else
c)
let uncapitalize_ascii s =
if s <> "" then
(lowercase_ascii (String.sub s 0 1)) ^ (String.sub s 1 ((String.length s) - 1))
else
s
let uppercase_ascii =
replace_chars
(fun c ->
if (c >= 'a' && c <= 'z') then
Char.chr (Char.code c - 32)
else
c)
let capitalize_ascii s =
if s <> "" then
(uppercase_ascii (String.sub s 0 1)) ^ (String.sub s 1 ((String.length s) - 1))
else
s
let is_whitespace =
function
| ' ' | '\r' | '\n' | '\t' -> true
| _ -> false
let tokenize ?(is_whitespace=is_whitespace) ?(tokens=[]) str =
let lst = ref [] in
let buf = Buffer.create 13 in
let idx = ref 0 in
let push () =
if Buffer.length buf > 0 then
begin
lst := Buffer.contents buf :: !lst;
Buffer.clear buf
end
in
let match_token () =
List.exists
(fun tok ->
if starts_with ~what:tok ~offset:!idx str then
begin
push ();
lst := tok :: !lst;
idx := !idx + (String.length tok);
true
end
else
false)
tokens
in
while !idx < String.length str do
let c = str.[!idx] in
if is_whitespace c then
begin
push ();
incr idx
end
else if match_token () then
begin
()
end
else
begin
Buffer.add_char buf c;
incr idx
end
done;
push ();
List.rev !lst
let tokenize_genlex ?(tokens=[]) str =
let strm = Genlex.make_lexer tokens (Stream.of_string str) in
let lst = ref [] in
Stream.iter (fun tok -> lst := tok :: !lst) strm;
List.rev !lst
let split str c =
let idx = String.index str c in
String.sub str 0 idx,
String.sub str (idx + 1) (String.length str - idx - 1)
let trim str =
let start_non_blank = ref 0 in
let stop_non_blank = ref ((String.length str) - 1) in
while !start_non_blank < String.length str &&
is_whitespace (str.[!start_non_blank]) do
incr start_non_blank
done;
while !start_non_blank <= !stop_non_blank &&
is_whitespace (str.[!stop_non_blank]) do
decr stop_non_blank
done;
String.sub str !start_non_blank (!stop_non_blank - !start_non_blank + 1)
let fold_left f acc str =
let racc = ref acc in
for i = 0 to String.length str - 1 do
racc := f !racc str.[i]
done;
!racc
let contains ~what str =
let len_what = String.length what in
let len_str = String.length str in
let rec check idx_what idx_str =
if idx_what >= len_what then
true
else if idx_str >= len_str then
false
else if str.[idx_str] = what.[idx_what] then
check (idx_what + 1) (idx_str + 1)
else
check 0 (idx_str + 1)
in
check 0 0
(** Split a list using ',' as separator. {b Not exported} *)
let split_comma str =
List.map trim (nsplit str ',')
(** Split a list using '\n' as separator. {b Not exported} *)
let split_newline ?(do_trim=true) str =
let lst = nsplit str '\n' in
if do_trim then
List.map trim lst
else
lst
(** Split a string containing '(...)' optionally. {b Not exported} *)
let split_optional_parentheses str =
try
let beg_str, end_str = split (trim str) '(' in
let content_str = strip_ends_with ~what:")" end_str in
trim beg_str,
Some (trim content_str)
with Not_found ->
trim str, None