UCharInfo.MakeCharacter Information
module Config : ConfigInt.Typetype general_category_type = [ | `LuLetter, Uppercase
*)| `LlLetter, Lowercase
*)| `LtLetter, Titlecase
*)| `MnMark, Non-Spacing
*)| `McMark, Spacing Combining
*)| `MeMark, Enclosing
*)| `NdNumber, Decimal Digit
*)| `NlNumber, Letter
*)| `NoNumber, Other
*)| `ZsSeparator, Space
*)| `ZlSeparator, Line
*)| `ZpSeparator, Paragraph
*)| `CcOther, Control
*)| `CfOther, Format
*)| `CsOther, Surrogate
*)| `CoOther, Private Use
*)| `CnOther, Not Assigned
*)| `LmLetter, Modifier
*)| `LoLetter, Other
*)| `PcPunctuation, Connector
*)| `PdPunctuation, Dash
*)| `PsPunctuation, Open
*)| `PePunctuation, Close
*)| `PiPunctuation, Initial quote
*)| `PfPunctuation, Final quote
*)| `PoPunctuation, Other
*)| `SmSymbol, Math
*)| `ScSymbol, Currency
*)| `SkSymbol, Modifier
*)| `SoSymbol, Other
*) ]Type of Unicode general character categories. Each variant specifies
`Lu : Letter, Uppercase`Ll : Letter, Lowercase`Lt : Letter, Titlecase`Mn : Mark, Non-Spacing`Mc : Mark, Spacing Combining`Me : Mark, Enclosing`Nd : Number, Decimal Digit`Nl : Number, Letter`No : Number, Other`Zs : Separator, Space`Zl : Separator, Line`Zp : Separator, Paragraph`Cc : Other, Control`Cf : Other, Format`Cs : Other, Surrogate`Co : Other, Private Use`Cn : Other, Not Assigned`Lm : Letter, Modifier`Lo : Letter, Other`Pc : Punctuation, Connector`Pd : Punctuation, Dash`Ps : Punctuation, Open`Pe : Punctuation, Close`Pi : Punctuation, Initial`Pf : Punctuation, Final`Po : Punctuation, Other`Sm : Symbol, Math`Sc : Symbol, Currency`Sk : Symbol, Modifier`So : Symbol, Otherval general_category : UChar.t -> general_category_typeval load_general_category_map : unit -> general_category_type UMap.ttype character_property_type = [ | `MathDerived Core Properties
*)| `Alphabetic| `Lowercase| `Uppercase| `ID_Start| `ID_Continue| `XID_Start| `XID_Continue| `Default_Ignorable_Code_Point| `Grapheme_Extend| `Grapheme_Base| `Bidi_ControlExtended Properties
*)| `White_Space| `Hyphen| `Quotation_Mark| `Terminal_Punctuation| `Other_Math| `Hex_Digit| `Ascii_Hex_Digit| `Other_Alphabetic| `Ideographic| `Diacritic| `Extender| `Other_Lowercase| `Other_Uppercase| `Noncharacter_Code_Point| `Other_Grapheme_Extend| `Grapheme_Link| `IDS_Binary_Operator| `IDS_Trinary_Operator| `Radical| `Unified_Ideograph| `Other_default_Ignorable_Code_Point| `Deprecated| `Soft_Dotted| `Logical_Order_Exception ]Type of character properties
val load_property_tbl : character_property_type -> UCharTbl.Bool.tLoad the table for the given character type.
val load_property_tbl_by_name : string -> UCharTbl.Bool.tLoad the table for the given name of the character type. The name can be obtained by removing ` from its name of the polymorphic variant tag.
val load_property_set : character_property_type -> USet.tLoad the set of characters of the given character type.
val load_property_set_by_name : string -> USet.tLoad the set of characters of the given name of the character type. The name can be obtained by removing ` from its name of the polymorphic variant tag.
type script_type = [ | `Common| `Inherited| `Latin| `Greek| `Cyrillic| `Armenian| `Hebrew| `Arabic| `Syriac| `Thaana| `Devanagari| `Bengali| `Gurmukhi| `Gujarati| `Oriya| `Tamil| `Telugu| `Kannada| `Malayalam| `Sinhala| `Thai| `Lao| `Tibetan| `Myanmar| `Georgian| `Hangul| `Ethiopic| `Cherokee| `Canadian_Aboriginal| `Ogham| `Runic| `Khmer| `Mongolian| `Hiragana| `Katakana| `Bopomofo| `Han| `Yi| `Old_Italic| `Gothic| `Deseret| `Tagalog| `Hanunoo| `Buhid| `Tagbanwa ]Type for script type
val script : UChar.t -> script_typeval load_script_map : unit -> script_type UMap.tval age : UChar.t -> version_typeage c unicode version in wich c was introduced
val older : version_type -> version_type -> boololder v1 v2 is true if v1 is older ( or the same version ) than v2. Everithing is older than `Nc
casing
val load_to_lower1_tbl : unit -> UChar.t UCharTbl.tval load_to_upper1_tbl : unit -> UChar.t UCharTbl.tval load_to_title1_tbl : unit -> UChar.t UCharTbl.ttype casemap_condition = [ | `Locale of string| `FinalSigma| `AfterSoftDotted| `MoreAbove| `BeforeDot| `Not of casemap_condition ]type special_casing_property = {lower : UChar.t list;title : UChar.t list;upper : UChar.t list;condition : casemap_condition list;}val load_conditional_casing_tbl :
unit ->
special_casing_property list UCharTbl.tval load_casefolding_tbl : unit -> UChar.t list UCharTbl.tval combined_class : UChar.t -> intCombined class A combined class is an integer of 0 -- 255, showing how this character interacts to other combined characters.
Decomposition
type decomposition_type = [ | `Canon| `Font| `NoBreak| `Initial| `Medial| `Final| `Isolated| `Circle| `Super| `Sub| `Vertical| `Wide| `Narrow| `Small| `Square| `Fraction| `Compat ]Types of decomposition.
type decomposition_info = [ | `CanonformAlready in the canonical form
*)| `HangulSyllableHangul is treated algotighmically.
*)| `Composite of decomposition_type * UChar.t list`Composite (dtype, text) means the given character is decomposed into text by dtype decomposition.
]val load_decomposition_tbl : unit -> decomposition_info UCharTbl.tCanonical Composition
val load_composition_tbl : unit -> (UChar.t * UChar.t) list UCharTbl.tThe return value [(u_1, u'_1); ... (u_n, u'_1)] means for the given character u, u u_i forms the canonical composition u'_i. If u is a Hangul jamo, composition returns .
val load_composition_exclusion_tbl : unit -> UCharTbl.Bool.tWhether the given composed character is used in NFC or NFKC