123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552# 1 "Camomile/public/uCharInfo.ml"(* Copyright (C) 2002 Yamagata Yoriyuki.*)(* 2010 Pierre Chambart *)(* This library is free software; you can redistribute it and/or *)(* modify it under the terms of the GNU Lesser General Public License *)(* as published by the Free Software Foundation; either version 2 of *)(* the License, or (at your option) any later version. *)(* As a special exception to the GNU Library General Public License, you *)(* may link, statically or dynamically, a "work that uses this library" *)(* with a publicly distributed version of this library to produce an *)(* executable file containing portions of this library, and distribute *)(* that executable file under terms of your choice, without any of the *)(* additional requirements listed in clause 6 of the GNU Library General *)(* Public License. By "a publicly distributed version of this library", *)(* we mean either the unmodified Library as distributed by the authors, *)(* or a modified version of this library that is distributed under the *)(* conditions defined in clause 3 of the GNU Library General Public *)(* License. This exception does not however invalidate any other reasons *)(* why the executable file might be covered by the GNU Library General *)(* Public License . *)(* This library is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)(* Lesser General Public License for more details. *)(* You should have received a copy of the GNU Lesser General Public *)(* License along with this library; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)(* USA *)(* You can contact the authour by sending email to *)(* yoriyuki.y@gmail.com *)moduletypeType=sigtypegeneral_category_type=[`Lu|`Ll|`Lt|`Mn|`Mc|`Me|`Nd|`Nl|`No|`Zs|`Zl|`Zp|`Cc|`Cf|`Cs|`Co|`Cn|`Lm|`Lo|`Pc|`Pd|`Ps|`Pe|`Pi|`Pf|`Po|`Sm|`Sc|`Sk|`So]valgeneral_category:UChar.t->general_category_typevalload_general_category_map:unit->general_category_typeUMap.ttypecharacter_property_type=[`Math|`Alphabetic|`Lowercase|`Uppercase|`ID_Start|`ID_Continue|`XID_Start|`XID_Continue|`Default_Ignorable_Code_Point|`Grapheme_Extend|`Grapheme_Base|`Bidi_Control|`White_Space|`Hyphen|`Quotation_Mark|`Terminal_Punctuation|`Other_Math|`Hex_Digit|`Ascii_Hex_Digit|`Other_Alphabetic|`Ideographic|`Diacritic|`Extender|`Other_Lowercase|`Other_Uppercase|`Noncharacter_Code_Point|`Other_Grapheme_Extend|`Grapheme_Link|`IDS_Binary_Operator|`IDS_Trinary_Operator|`Radical|`Unified_Ideograph|`Other_default_Ignorable_Code_Point|`Deprecated|`Soft_Dotted|`Logical_Order_Exception]valload_property_tbl:character_property_type->UCharTbl.Bool.tvalload_property_tbl_by_name:string->UCharTbl.Bool.tvalload_property_set:character_property_type->USet.tvalload_property_set_by_name:string->USet.ttypescript_type=[`Common|`Inherited|`Latin|`Greek|`Cyrillic|`Armenian|`Hebrew|`Arabic|`Syriac|`Thaana|`Devanagari|`Bengali|`Gurmukhi|`Gujarati|`Oriya|`Tamil|`Telugu|`Kannada|`Malayalam|`Sinhala|`Thai|`Lao|`Tibetan|`Myanmar|`Georgian|`Hangul|`Ethiopic|`Cherokee|`Canadian_Aboriginal|`Ogham|`Runic|`Khmer|`Mongolian|`Hiragana|`Katakana|`Bopomofo|`Han|`Yi|`Old_Italic|`Gothic|`Deseret|`Tagalog|`Hanunoo|`Buhid|`Tagbanwa]valscript:UChar.t->script_typevalload_script_map:unit->script_typeUMap.ttypeversion_type=[`Nc|`v1_0|`v1_1|`v2_0|`v2_1|`v3_0|`v3_1|`v3_2]valage:UChar.t->version_typevalolder:version_type->version_type->boolvalload_to_lower1_tbl:unit->UChar.tUCharTbl.tvalload_to_upper1_tbl:unit->UChar.tUCharTbl.tvalload_to_title1_tbl:unit->UChar.tUCharTbl.ttypecasemap_condition=[`Localeofstring|`FinalSigma|`AfterSoftDotted|`MoreAbove|`BeforeDot|`Notofcasemap_condition]typespecial_casing_property={lower:UChar.tlist;title:UChar.tlist;upper:UChar.tlist;condition:casemap_conditionlist;}valload_conditional_casing_tbl:unit->special_casing_propertylistUCharTbl.tvalload_casefolding_tbl:unit->UChar.tlistUCharTbl.tvalcombined_class:UChar.t->inttypedecomposition_type=[`Canon|`Font|`NoBreak|`Initial|`Medial|`Final|`Isolated|`Circle|`Super|`Sub|`Vertical|`Wide|`Narrow|`Small|`Square|`Fraction|`Compat]typedecomposition_info=[`Canonform|`HangulSyllable|`Compositeofdecomposition_type*UChar.tlist]valload_decomposition_tbl:unit->decomposition_infoUCharTbl.tvalload_composition_tbl:unit->(UChar.t*UChar.t)listUCharTbl.tvalload_composition_exclusion_tbl:unit->UCharTbl.Bool.tendmoduleMake(Config:ConfigInt.Type):Type=structincludeUnidata.Make(Config)(* General category *)letgeneral_category_tbl:UCharTbl.Bits.t=read_data"general_category"letgeneral_categoryu=matchUCharTbl.Bits.getgeneral_category_tbluwith0->letn=UChar.uint_codeuinifn>=0x0f0000&&n<=0x100000then`Coelseifn>=0xe00000&&n<=0xff0000then`Coelseifn>=0x60000000&&n<=0x7f000000then`Coelse`Cn|x->cat_of_numxletload_general_category_map()=read_data"general_category_map"(* character property *)typecharacter_property_type=[`Math(*Derived Core Properties*)|`Alphabetic|`Lowercase|`Uppercase|`ID_Start|`ID_Continue|`XID_Start|`XID_Continue|`Default_Ignorable_Code_Point|`Grapheme_Extend|`Grapheme_Base|`Bidi_Control(*Extended Properties*)|`White_Space|`Hyphen|`Quotation_Mark|`Terminal_Punctuation|`Other_Math|`Hex_Digit|`Ascii_Hex_Digit|`Other_Alphabetic|`Ideographic|`Diacritic|`Extender|`Other_Lowercase|`Other_Uppercase|`Noncharacter_Code_Point|`Other_Grapheme_Extend|`Grapheme_Link|`IDS_Binary_Operator|`IDS_Trinary_Operator|`Radical|`Unified_Ideograph|`Other_default_Ignorable_Code_Point|`Deprecated|`Soft_Dotted|`Logical_Order_Exception]letname_of_propertyp=matchpwith`Math->"Math"|`Alphabetic->"Alphabetic"|`Lowercase->"Lowercase"|`Uppercase->"Uppercase"|`ID_Start->"ID_Start"|`ID_Continue->"ID_Continue"|`XID_Start->"XID_Start"|`XID_Continue->"XID_Continue"|`Default_Ignorable_Code_Point->"Default_Ignorable_Code_Point"|`Grapheme_Extend->"Grapheme_Extend"|`Grapheme_Base->"Grapheme_Base"|`Bidi_Control->"Bidi_Control"|`White_Space->"White_Space"|`Hyphen->"Hyphen"|`Quotation_Mark->"Quotation_Mark"|`Terminal_Punctuation->"Terminal_Punctuation"|`Other_Math->"Other_Math"|`Hex_Digit->"Hex_Digit"|`Ascii_Hex_Digit->"Ascii_Hex_Digit"|`Other_Alphabetic->"Other_Alphabetic"|`Ideographic->"Ideographic"|`Diacritic->"Diacritic"|`Extender->"Extender"|`Other_Lowercase->"Other_Lowercase"|`Other_Uppercase->"Other_Uppercase"|`Noncharacter_Code_Point->"Noncharacter_Code_Point"|`Other_Grapheme_Extend->"Other_Grapheme_Extend"|`Grapheme_Link->"Grapheme_Link"|`IDS_Binary_Operator->"IDS_Binary_Operator"|`IDS_Trinary_Operator->"IDS_Trinary_Operator"|`Radical->"Radical"|`Unified_Ideograph->"Unified_Ideograph"|`Other_default_Ignorable_Code_Point->"Other_default_Ignorable_Code_Point"|`Deprecated->"Deprecated"|`Soft_Dotted->"Soft_Dotted"|`Logical_Order_Exception->"Logical_Order_Exception"letproperty_of_name:string->character_property_type=function"Math"->`Math|"Alphabetic"->`Alphabetic|"Lowercase"->`Lowercase|"Uppercase"->`Uppercase|"ID_Start"->`ID_Start|"ID_Continue"->`ID_Continue|"XID_Start"->`XID_Start|"XID_Continue"->`XID_Continue|"Default_Ignorable_Code_Point"->`Default_Ignorable_Code_Point|"Grapheme_Extend"->`Grapheme_Extend|"Grapheme_Base"->`Grapheme_Base|"Bidi_Control"->`Bidi_Control|"White_Space"->`White_Space|"Hyphen"->`Hyphen|"Quotation_Mark"->`Quotation_Mark|"Terminal_Punctuation"->`Terminal_Punctuation|"Other_Math"->`Other_Math|"Hex_Digit"->`Hex_Digit|"Ascii_Hex_Digit"->`Ascii_Hex_Digit|"Other_Alphabetic"->`Other_Alphabetic|"Ideographic"->`Ideographic|"Diacritic"->`Diacritic|"Extender"->`Extender|"Other_Lowercase"->`Other_Lowercase|"Other_Uppercase"->`Other_Uppercase|"Noncharacter_Code_Point"->`Noncharacter_Code_Point|"Other_Grapheme_Extend"->`Other_Grapheme_Extend|"Grapheme_Link"->`Grapheme_Link|"IDS_Binary_Operator"->`IDS_Binary_Operator|"IDS_Trinary_Operator"->`IDS_Trinary_Operator|"Radical"->`Radical|"Unified_Ideograph"->`Unified_Ideograph|"Other_default_Ignorable_Code_Point"->`Other_default_Ignorable_Code_Point|"Deprecated"->`Deprecated|"Soft_Dotted"->`Soft_Dotted|"Logical_Order_Exception"->`Logical_Order_Exception|_->raiseNot_foundletloaded_props=Hashtbl.create0letload_property_tblp=tryletb=Hashtbl.findloaded_propspinmatchWeak.getb0withNone->Hashtbl.removeloaded_propsp;raiseNot_found|Somex->xwithNot_found->lettbl=read_data(name_of_propertyp)inletb=Weak.create1inWeak.setb0(Sometbl);Hashtbl.addloaded_propspb;tblletload_property_tbl_by_names=load_property_tbl(property_of_names)letloaded_prop_sets=Hashtbl.create0letload_property_setp=tryletb=Hashtbl.findloaded_prop_setspinmatchWeak.getb0withNone->Hashtbl.removeloaded_prop_setsp;raiseNot_found|Somex->xwithNot_found->lettbl=read_data((name_of_propertyp)^"_set")inletb=Weak.create1inWeak.setb0(Sometbl);Hashtbl.addloaded_prop_setspb;tblletload_property_set_by_names=load_property_set(property_of_names)(* Scripts *)letscript_tbl:UCharTbl.Bits.t=read_data"scripts"letscriptu=script_of_num(UCharTbl.Bits.getscript_tblu)letload_script_map()=read_data"scripts_map"typeversion_type=[`Nc|`v1_0|`v1_1|`v2_0|`v2_1|`v3_0|`v3_1|`v3_2]letversion_of_char=function|'\x10'->`v1_0|'\x11'->`v1_1|'\x20'->`v2_0|'\x21'->`v2_1|'\x30'->`v3_0|'\x31'->`v3_1|'\x32'->`v3_2|'\xfe'->`Nc|i->failwith(Printf.sprintf"version_of_char, unknown version v%x"(Char.codei))letversion_to_char=function|`v1_0->'\x10'|`v1_1->'\x11'|`v2_0->'\x20'|`v2_1->'\x21'|`v3_0->'\x30'|`v3_1->'\x31'|`v3_2->'\x32'|`Nc->'\xfe'letage_tbl:UCharTbl.Char.t=read_data"age"letageu=version_of_char(UCharTbl.Char.getage_tblu)letolderv1v2=(version_to_charv1)<=(version_to_charv2)(* Casing *)letcache=Weak.create3letload_to_lower1_tbl()=matchWeak.getcache0withSomet->t|None->lett=read_data"to_lower1"inWeak.setcache0(Somet);tletload_to_upper1_tbl()=matchWeak.getcache1withSomet->t|None->lett=read_data"to_upper1"inWeak.setcache1(Somet);tletload_to_title1_tbl()=matchWeak.getcache2withSomet->t|None->lett=read_data"to_title1"inWeak.setcache2(Somet);ttypecasemap_condition=[`Localeofstring|`FinalSigma|`AfterSoftDotted|`MoreAbove|`BeforeDot|`Notofcasemap_condition]typespecial_casing_property={lower:UChar.tlist;title:UChar.tlist;upper:UChar.tlist;condition:casemap_conditionlist;}letcache=Weak.create1letload_conditional_casing_tbl():special_casing_propertylistUCharTbl.t=matchWeak.getcache0withSomet->t|None->lett=read_data"special_casing"inWeak.setcache0(Somet);tletcache=Weak.create1letload_casefolding_tbl()=matchWeak.getcache0withSomet->t|None->lett=read_data"case_folding"inWeak.setcache0(Somet);t(* Combined class *)letcombined_class_tbl:UCharTbl.Char.t=read_data"combined_class"letcombined_classu=Char.code(UCharTbl.Char.getcombined_class_tblu)(* Decomposition *)letcache=Weak.create1letload_decomposition_tbl()=matchWeak.getcache0withSomet->t|None->lett=read_data"decomposition"inWeak.setcache0(Somet);t(* Composition *)letcache=Weak.create1letload_composition_tbl()=matchWeak.getcache0withSomet->t|None->lett=read_data"composition"inWeak.setcache0(Somet);tletcache=Weak.create1letload_composition_exclusion_tbl()=matchWeak.getcache0withSomet->t|None->lett=read_data"composition_exclusion"inWeak.setcache0(Somet);tend