1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
type char = int
type index = int
let rec search_head s i =
if i >= String.length s then i
else
let n = Char.code (String.unsafe_get s i) in
if n < 0x80 || n >= 0xc2 then i else search_head s (i + 1)
let next s i =
let n = Char.code s.[i] in
if n < 0x80 then i + 1
else if n < 0xc0 then search_head s (i + 1)
else if n <= 0xdf then i + 2
else if n <= 0xef then i + 3
else if n <= 0xf7 then i + 4
else if n <= 0xfb then i + 5
else if n <= 0xfd then i + 6
else invalid_arg "UTF8.next"
let rec length_aux s c i =
if i >= String.length s then c
else
let n = Char.code (String.unsafe_get s i) in
let k =
if n < 0x80 then 1
else if n < 0xc0 then invalid_arg "UTF8.length"
else if n < 0xe0 then 2
else if n < 0xf0 then 3
else if n < 0xf8 then 4
else if n < 0xfc then 5
else if n < 0xfe then 6
else invalid_arg "UTF8.length"
in
length_aux s (c + 1) (i + k)
let length s = length_aux s 0 0
let rec nth_aux s i n = if n = 0 then i else nth_aux s (next s i) (n - 1)
let nth s n = nth_aux s 0 n
let byte_of_char ~line ~char =
let ll = length line in
if ll <= char then if ll = 0 then 0 else nth line ll - 1 else nth line char
let find_char line byte =
let rec f index n_chars =
let next_index = next line index in
if next_index > byte then n_chars else f next_index (n_chars + 1)
in
if String.length line <= byte then length line else f 0 0
let char_of_byte ~line ~byte =
if Debug.unicode then
Io.Log.trace "get_last_text"
(Format.asprintf "str: '%s' | byte: %d" line byte);
let res = find_char line byte in
if Debug.unicode then
Io.Log.trace "get_last_text" (Format.asprintf "char: %d" res);
res