123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152(**************************************************************************)(* Lablgtk *)(* *)(* This program is free software; you can redistribute it *)(* and/or modify it under the terms of the GNU Library General *)(* Public License as published by the Free Software Foundation *)(* version 2, with the exception described in file COPYING which *)(* comes with the library. *)(* *)(* This program is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *)(* GNU Library General Public License for more details. *)(* *)(* You should have received a copy of the GNU Library General *)(* Public License along with this program; if not, write to the *)(* Free Software Foundation, Inc., 59 Temple Place, Suite 330, *)(* Boston, MA 02111-1307 USA *)(* *)(* *)(**************************************************************************)openStdLabelstypeunichar=inttypeunistring=unichararraymoduleError=structtypeerror=|NO_CONVERSION|ILLEGAL_SEQUENCE|FAILED|PARTIAL_INPUT|BAD_URI|NOT_ABSOLUTE_PATHexceptionErroroferror*stringletraise_bad_utf8()=raise(Error(ILLEGAL_SEQUENCE,"Invalid byte sequence for UTF-8 string"))endopenErrorletreclog64n=ifn=0then0else1+log64(nlsr5)letutf8_storage_lenn=ifn<0x80then1elselog64(nlsr1)(* this function is not exported, so it's OK to do a few 'unsafe' things *)letwrite_unichars~pos(c:unichar)=letlen=utf8_storage_lencinletp=!posiniflen=1thenBytes.unsafe_setsp(Char.unsafe_chrc)elsebeginBytes.unsafe_setsp(Char.unsafe_chr(((1lsllen-1)lsl(8-len))lor(clsr((len-1)*6))));fori=1tolen-1doBytes.unsafe_sets(p+i)(Char.unsafe_chr(((clsr((len-1-i)*6))land0x3f)lor0x80))done;end;pos:=p+lenletsub_strings~pos~len=Bytes.sub_stringsposlenletfrom_unichar(n:unichar)=lets=Bytes.create6andpos=ref0inwrite_unichars~posn;sub_strings~pos:0~len:!posletfrom_unistring(s:unistring)=letlen=Array.lengthsinletr=Bytes.create(len*6)inletpos=ref0infori=0tolen-1dowrite_unicharr~poss.(i)done;sub_stringr~pos:0~len:!posletrechi_bitsn=ifnland0x80=0then0else1+hi_bits(nlsl1)letto_unichars~pos:unichar=letc=Char.codes.[!pos]inincrpos;letn=hi_bitscinifn=0thencelse(* if string is valid then 2 <= n <= 6 *)letu=ref(cland(1lsl(7-n)-1))infori=1ton-1doletc=Char.codes.[!pos]inu:=!ulsl6+cland0x3f;incrposdone;!uletfirst_chars=to_unichars~pos:(ref0)letvalidatec=c<0x110000&&(cland0x7FFFF800)<>0xD800&&(c<0xFDD0||c>0xFDEF)&&(cland0xFFFE)<>0xFFFEletto_unichar_validateds~pos:unichar=letc=Char.codes.[!pos]inincrpos;letn=hi_bitscinifn=0thencelsebeginifn=1||n>6thenraise_bad_utf8();if!pos+n>String.lengthsthenraise(Error(PARTIAL_INPUT,"partial UTF-8 character"));letu=ref(cland(1lsl(7-n)-1))infori=1ton-1doletc=Char.codes.[!pos]inifclsr6<>0b10thenraise_bad_utf8();u:=!ulsl6+cland0x3f;incrposdone;letv=!uin(* reject overlong sequences && invalid values *)ifutf8_storage_lenv<>n||not(validatev)thenraise_bad_utf8();vendletrecend_of_chars~pos=letc=Char.codes.[pos]inif(cland0xc0)=0x80thenend_of_chars~pos:(pos+1)elseposletnexts~pos=letc=Char.codes.[pos]inletn=hi_bitscinifn=0thenpos+1elseifn=1thenend_of_chars~pos:(pos+1)elsepos+nletlengths=letlen=String.lengthsinletrecloopcount~pos=ifpos>=lenthencountelseloop(count+1)~pos:(nexts~pos)inloop0~pos:0letto_unistrings:unistring=letlen=lengthsinletus=Array.makelen0inletpos=ref0infori=0tolen-1dous.(i)<-to_unichars~posdone;us