1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
open! Import
let failwithf = Printf.failwithf
module T = struct
include Uchar0
let module_name = "Base.Uchar"
let hash_fold_t state t = Hash.fold_int state (to_int t)
let hash t = Hash.run hash_fold_t t
let to_string t = Printf.sprintf "U+%04X" (to_int t)
let sexp_of_t t = Sexp.Atom (to_string t)
let t_of_sexp sexp =
match sexp with
| Sexp.List _ -> of_sexp_error "Uchar.t_of_sexp: atom needed" sexp
| Sexp.Atom s ->
(try Stdlib.Scanf.sscanf s "U+%X" (fun i -> Uchar0.of_int i) with
| _ -> of_sexp_error "Uchar.t_of_sexp: atom of the form U+XXXX needed" sexp)
;;
let t_sexp_grammar : t Sexplib0.Sexp_grammar.t =
Sexplib0.Sexp_grammar.coerce String.t_sexp_grammar
;;
end
include T
include Pretty_printer.Register (T)
include Comparable.Make (T)
open! Uchar_replace_polymorphic_compare
let invariant (_ : t) = ()
let int_is_scalar = is_valid
let succ_exn c =
try Uchar0.succ c with
| Invalid_argument msg -> failwithf "Uchar.succ_exn: %s" msg ()
;;
let succ c =
try Some (Uchar0.succ c) with
| Invalid_argument _ -> None
;;
let pred_exn c =
try Uchar0.pred c with
| Invalid_argument msg -> failwithf "Uchar.pred_exn: %s" msg ()
;;
let pred c =
try Some (Uchar0.pred c) with
| Invalid_argument _ -> None
;;
let of_scalar i = if int_is_scalar i then Some (unsafe_of_int i) else None
let of_scalar_exn i =
if int_is_scalar i
then unsafe_of_int i
else failwithf "Uchar.of_int_exn got a invalid Unicode scalar value: %04X" i ()
;;
let to_scalar t = Uchar0.to_int t
let to_char c = if is_char c then Some (unsafe_to_char c) else None
let to_char_exn c =
if is_char c
then unsafe_to_char c
else failwithf "Uchar.to_char_exn got a non latin-1 character: U+%04X" (to_int c) ()
;;
let utf8_byte_length uchar =
let codepoint = to_scalar uchar in
if Int.( < ) codepoint 0x80
then 1
else if Int.( < ) codepoint 0x800
then 2
else if Int.( < ) codepoint 0x10000
then 3
else 4
;;
include Uchar_replace_polymorphic_compare