1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
open Let_syntax.Result
let ( % ) = Int.logor
let ( << ) = Int.shift_left
let ( >> ) = Int.shift_right
let ( & ) = Int.logand
let utf_8_string_of_unicode i =
if i <= 0x007F then (
let b = Bytes.create 1 in
Bytes.set_int8 b 0 i;
Ok (Bytes.to_string b))
else if i <= 0x07FF then (
let five_high_bits = i >> 6 & 0b11111 in
let six_low_bits = i & 0b111111 in
let high = 0b11000000 % five_high_bits << 8 in
let low = 0b10000000 % six_low_bits in
let n = high % low in
let b = Bytes.create 2 in
Bytes.set_int16_be b 0 n;
Ok (Bytes.to_string b))
else if i <= 0xFFFF then (
let four_high_bits = i >> 12 & 0b1111 in
let six_mid_bits = i >> 6 & 0b111111 in
let six_low_bits = i & 0b111111 in
let high = 0b11100000 % four_high_bits << 16 in
let mid = 0b10000000 % six_mid_bits << 8 in
let low = 0b10000000 % six_low_bits in
let n = high % mid % low in
let b = Bytes.create 3 in
Bytes.set_int32_be b 0 (Int32.of_int n);
Ok (Bytes.to_string b))
else if i <= 0x10FFFF then (
let three_hh_bits = i >> 18 & 0b111 in
let six_hl_bits = i >> 12 & 0b111111 in
let six_lh_bits = i >> 6 & 0b111111 in
let six_ll_bits = i & 0b111111 in
let hh = 0b11110000 % three_hh_bits << 24 in
let hl = 0b10000000 % six_hl_bits << 16 in
let lh = 0b10000000 % six_lh_bits << 8 in
let ll = 0b10000000 % six_ll_bits in
let n = hh % hl % lh % ll in
let b = Bytes.create 4 in
Bytes.set_int32_be b 0 (Int32.of_int n);
Ok (Bytes.to_string b))
else Error (Format.sprintf "invalid code point %X" i)
let unescape str =
if String.length str < 2 then
Error (Format.sprintf "too small escape sequence %s" str)
else
match str.[1] with
| 'u' ->
let escape_chars = String.sub str 2 4 in
let* as_int =
Format.sprintf "0x%s" escape_chars |> int_of_string_opt |> function
| Some x -> Ok x
| None -> Error (Format.sprintf "bad escape sequence %s" escape_chars)
in
utf_8_string_of_unicode as_int
| 'x' ->
let escape_chars = String.sub str 2 2 in
let* as_int =
Format.sprintf "0x%s" escape_chars |> int_of_string_opt |> function
| Some x -> Ok x
| None -> Error (Format.sprintf "bad escape sequence %s" escape_chars)
in
utf_8_string_of_unicode as_int
| '"' | '\'' | 'b' | 'f' | 'n' | 'r' | 't' | 'v' -> Ok str
| '\\' -> Ok {|\|}
| '0' ->
if String.length str = 2 then Ok "\x00"
else if String.length str = 4 then
let octal_str = String.(sub str 2 2) in
let* as_int =
Format.sprintf "0o%s" octal_str |> int_of_string_opt |> function
| Some x -> Ok x
| None -> Error (Format.sprintf "bad escape sequence %s" octal_str)
in
utf_8_string_of_unicode as_int
else Error (Format.sprintf "invalid octal sequence %s" str)
| _ -> Error (Format.sprintf "invalid escape sequence %c" str.[1])