Source file stdcompat__uchar.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106

include Uchar

(*
(*
include Uchar
*)

type t = int

let min = 0

let max = 0x10FFFF

let lo_bound = 0xD7FF

let hi_bound = 0xE000

let succ u =
  if u = lo_bound then
    hi_bound
  else if u = max then
    invalid_arg "Uchar.succ"
  else
    succ u

let pred u =
  if u = hi_bound then
    lo_bound
  else if u = min then
    invalid_arg "Uchar.pred"
  else
  pred u

let is_valid i =
  min <= i && i <= lo_bound || hi_bound <= i && i <= max

let of_int i =
  if is_valid i then
    i
  else
    invalid_arg "Uchar.of_int"

let unsafe_of_int i =
  i

let to_int i =
  i

let is_char u =
  u < 0x100

let of_char c =
  Char.code c

let to_char u =
  if is_char u then
    Char.unsafe_chr u
  else
    invalid_arg "Uchar.to_char"

let unsafe_to_char u =
  Char.unsafe_chr u

let equal : t -> t -> bool = ( = )

let compare : t -> t -> int = compare

let hash = to_int

*)

(*
let rep' = 0xFFFD

type utf_decode = int

let valid_bit = 27
let decode_bits = 24

let utf_decode_is_valid d = (d lsr valid_bit) = 1
let utf_decode_length d = (d lsr decode_bits) land 0b111
let utf_decode_uchar d = unsafe_of_int (d land 0xFFFFFF)
let utf_decode n u = ((8 lor n) lsl decode_bits) lor (to_int u)
let utf_decode_invalid n = (n lsl decode_bits) lor rep'

let utf_8_byte_length u = match to_int u with
| u when u < 0 -> assert false
| u when u <= 0x007F -> 1
| u when u <= 0x07FF -> 2
| u when u <= 0xFFFF -> 3
| u when u <= 0x10FFFF -> 4
| _ -> assert false

let utf_16_byte_length u = match to_int u with
| u when u < 0 -> assert false
| u when u <= 0xFFFF -> 2
| u when u <= 0x10FFFF -> 4
| _ -> assert false
*)

(*
let bom = unsafe_of_int 0xFEFF

let rep = unsafe_of_int rep'
*)