Source file `libunicode.ml`

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
(** Low-level bindings to QuickJS's libunicode - Unicode Character Utilities

    Unicode character classification and case conversion. These are raw C
    bindings; for a higher-level API, use [Quickjs.Unicode]. *)

(** {2 Character Classification} *)

(** Check if character has uppercase/lowercase variants (Cased property) *)
let is_cased cp = Bindings.C.Functions.lre_is_cased cp

(** Check if character is ignored during case mapping (Case_Ignorable) *)
let is_case_ignorable cp = Bindings.C.Functions.lre_is_case_ignorable cp

(** Check if character can start an identifier (ID_Start) *)
let is_id_start cp = Bindings.C.Functions.lre_is_id_start cp

(** Check if character can continue an identifier (ID_Continue) *)
let is_id_continue cp = Bindings.C.Functions.lre_is_id_continue cp

(** Check if non-ASCII character is whitespace (for codepoints >= 256) *)
let is_space_non_ascii cp = Bindings.C.Functions.lre_is_space_non_ascii cp

(** {2 Case Conversion} *)

(** Convert character case. conv_type: 0 = uppercase, 1 = lowercase, 2 = case
    folding Returns number of output codepoints (1-3) *)
let case_conv res cp conv_type =
  Bindings.C.Functions.lre_case_conv res cp conv_type

(** Canonicalize character for case-insensitive regex matching. is_unicode: 1 =
    full Unicode folding, 0 = ASCII only *)
let canonicalize cp is_unicode =
  Bindings.C.Functions.lre_canonicalize cp is_unicode

(** {2 Normalization} *)

(** Normalize Unicode string. n_type: 0 = NFC, 1 = NFD, 2 = NFKC, 3 = NFKD
    Returns length of output, or -1 on error *)
let normalize src len n_type dst =
  Bindings.C.Functions.unicode_normalize_shim src len n_type dst

(** Free buffer allocated by [normalize] *)
let normalize_free ptr = Bindings.C.Functions.unicode_normalize_free ptr