Sourcetype normalization = | NFC| NFD| NFKC| NFKD
Sourcetype char_category = | Letter| Number| Punctuation| Symbol| Whitespace| Control| Other
Sourceval case_fold : string -> string Sourceval strip_accents : string -> string Sourceval clean_text :
?remove_control:bool ->
?normalize_whitespace:bool ->
string ->
string Sourceval split_words : string -> string list Sourceval grapheme_count : string -> int Sourceval is_valid_utf8 : string -> bool Sourceval remove_emoji : string -> string