123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291# 1 "Camomile/public/caseMap.ml"(* Copyright (C) 2002, 2003, 2004 Yamagata Yoriyuki *)(* This library is free software; you can redistribute it and/or *)(* modify it under the terms of the GNU Lesser General Public License *)(* as published by the Free Software Foundation; either version 2 of *)(* the License, or (at your option) any later version. *)(* As a special exception to the GNU Library General Public License, you *)(* may link, statically or dynamically, a "work that uses this library" *)(* with a publicly distributed version of this library to produce an *)(* executable file containing portions of this library, and distribute *)(* that executable file under terms of your choice, without any of the *)(* additional requirements listed in clause 6 of the GNU Library General *)(* Public License. By "a publicly distributed version of this library", *)(* we mean either the unmodified Library as distributed by the authors, *)(* or a modified version of this library that is distributed under the *)(* conditions defined in clause 3 of the GNU Library General Public *)(* License. This exception does not however invalidate any other reasons *)(* why the executable file might be covered by the GNU Library General *)(* Public License . *)(* This library is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)(* Lesser General Public License for more details. *)(* You should have received a copy of the GNU Lesser General Public *)(* License along with this library; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)(* USA *)(* You can contact the authour by sending email to *)(* yoriyuki.y@gmail.com *)moduletypeType=sigtypetextvallowercase:?locale:string->text->textvaluppercase:?locale:string->text->textvalcapitalize:?locale:string->text->textvaltitlecase:?locale:string->text->textvalcasefolding:text->textvalcompare_caseless:text->text->intendmoduleMake(Config:ConfigInt.Type)(Text:UnicodeString.Type)=structmoduleUnidata=Unidata.Make(Config)moduleUCharInfo=UCharInfo.Make(Config)letuppercase_tbl=UCharInfo.load_property_tbl`Uppercaseletis_uppercaseu=UCharTbl.Bool.getuppercase_tbluletlowercase_tbl=UCharInfo.load_property_tbl`Lowercaseletis_lowercaseu=UCharTbl.Bool.getlowercase_tbluletconditional_casing_tbl=UCharInfo.load_conditional_casing_tbl()letconditional_casingu=UCharTbl.getconditional_casing_tbluletcasefolding_tbl=UCharInfo.load_casefolding_tbl()letcasefolding_charu=matchUCharTbl.getcasefolding_tbluwith[]->[u](* default *)|us->usletis_nullu=UChar.uint_codeu=0letto_lower1_tbl=UCharInfo.load_to_lower1_tbl()letto_lower1u=letu'=UCharTbl.getto_lower1_tbluinifis_nullu'thenuelseu'letto_upper1_tbl=UCharInfo.load_to_upper1_tbl()letto_upper1u=letu'=UCharTbl.getto_upper1_tbluinifis_nullu'thenuelseu'letto_title1_tbl=UCharInfo.load_to_title1_tbl()letto_title1u=letu'=UCharTbl.getto_title1_tbluinifis_nullu'thenuelseu'letis_case_ignorableu=letn=UChar.uint_codeuinifn=0x0027||n=0x00ad||n=0x2016thentrueelsematch(UCharInfo.general_categoryu)with`Mn->true|`Me->true|`Cf->true|`Lm->true|`Sk->true|_->false(* Fix me: "normalization clause" of UTR#25 is ommited. *)letis_casedu=is_uppercaseu||is_lowercaseu||UCharInfo.general_categoryu=`Lttypetext=Text.tletis_final_sigmati=letrecsee_backwardti=ifText.out_of_rangetithenfalseelseletu=Text.looktiinifis_case_ignorableuthensee_backwardt(Text.prevti)elseis_caseduinletrecsee_forwardti=ifText.out_of_rangetithenfalseelseletu=Text.looktiinifis_case_ignorableuthensee_forwardt(Text.nextti)elseis_caseduinsee_backwardt(Text.prevti)&¬(see_forwardt(Text.nextti))letis_more_aboveti=letrecsearchi=ifText.out_of_rangetithenfalseelseletu=Text.looktiinletc=UCharInfo.combined_classuinifc=0thenfalseelseifc=230thentrueelsesearch(Text.nextti)insearch(Text.nextti)letsoft_dotted_tbl=UCharInfo.load_property_tbl`Soft_Dottedletis_soft_dottedu=UCharTbl.Bool.getsoft_dotted_tbluletis_after_soft_dottedti=letrecsearchi=ifText.out_of_rangetithenfalseelseletu=Text.looktiinletc=UCharInfo.combined_classuinifc=0thenis_soft_dotteduelseifc=230thenfalseelsesearch(Text.prevti)insearch(Text.prevti)letis_before_dotti=letrecsearchi=ifText.out_of_rangetithenfalseelseletu=Text.looktiinifUChar.int_ofu=0x0307thentrueelseletc=UCharInfo.combined_classuinifc=0||c=230thenfalseelsesearch(Text.nextti)insearch(Text.nextti)letrecmatch_condition?localeticondition=matchconditionwith`Localeloc->(matchlocalewithNone->false|Someloc'->Locale.containlocloc')|`FinalSigma->is_final_sigmati|`MoreAbove->is_more_aboveti|`AfterSoftDotted->is_after_soft_dottedti|`BeforeDot->is_before_dotti|`Notcond->not(match_condition?localeticond)letis_matched_casing_property?localetiprop=List.for_all(match_condition?localeti)prop.UCharInfo.conditionletlowercase?localet=letbuf=Text.Buf.create0inletrecloopi=ifText.out_of_rangetithenText.Buf.contentsbufelseletu=Text.looktiin(matchconditional_casinguwith[]->Text.Buf.add_charbuf(to_lower1u)|conds->tryletp=is_matched_casing_property?localetiinletc=List.findpcondsinList.iter(Text.Buf.add_charbuf)c.lowerwithNot_found->Text.Buf.add_charbuf(to_lower1u));loop(Text.nextti)inloop(Text.ntht0)letuppercase?localet=letbuf=Text.Buf.create0inletrecloopi=ifText.out_of_rangetithenText.Buf.contentsbufelseletu=Text.looktiin(matchconditional_casinguwith[]->Text.Buf.add_charbuf(to_upper1u)|conds->tryletp=is_matched_casing_property?localetiinletc=List.findpcondsinList.iter(Text.Buf.add_charbuf)c.upperwithNot_found->Text.Buf.add_charbuf(to_upper1u));loop(Text.nextti)inloop(Text.ntht0)letcapitalize?localet=letbuf=Text.Buf.create0inletreccopyi=ifText.out_of_rangetithenText.Buf.contentsbufelseletu=Text.looktiinText.Buf.add_charbufu;copy(Text.nextti)inleti=Text.ntht0inifText.out_of_rangetithenText.Buf.contentsbufelseletu=Text.looktiin(matchconditional_casinguwith[]->Text.Buf.add_charbuf(to_title1u)|conds->tryletp=is_matched_casing_property?localetiinletc=List.findpcondsinList.iter(Text.Buf.add_charbuf)c.titlewithNot_found->Text.Buf.add_charbuf(to_title1u));copy(Text.nextti)lettitlecase?localet=letbuf=Text.Buf.create0inletrecloopis_headi=ifText.out_of_rangetithenText.Buf.contentsbufelseletu=Text.looktiin(matchconditional_casinguwith[]->letu'=ifis_headthento_title1uelseto_lower1uinText.Buf.add_charbufu'|conds->tryletp=is_matched_casing_property?localetiinletc=List.findpcondsinletus=ifis_headthenc.titleelsec.lowerinList.iter(Text.Buf.add_charbuf)uswithNot_found->letu'=ifis_headthento_title1uelseto_lower1uinText.Buf.add_charbufu');letis_head=ifis_case_ignorableuthenis_headelsenot(is_casedu)inloopis_head(Text.nextti)inlooptrue(Text.ntht0)letcasefoldingt=letbuf=Text.Buf.create0inText.iter(funu->letus=casefolding_charuinList.iter(Text.Buf.add_charbuf)us)t;Text.Buf.contentsbufletcompare_caselesst1t2=letrecloopi1i2us1=function[]->ifText.out_of_ranget2i2thenifus1=[]&&Text.out_of_ranget1i1then0else1elseletu=Text.lookt2i2inloopi1(Text.nextt2i2)us1(casefolding_charu)|(u::r)asus2->matchus1with[]->ifText.out_of_ranget1i1then-1elseletu=Text.lookt1i1inloop(Text.nextt1i1)i2(casefolding_charu)us2|u'::r'->letsgn=UChar.compareu'uinifsgn=0thenloopi1i2r'relsesgninloop(Text.firstt1)(Text.firstt2)[][]end