12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214(*---------------------------------------------------------------------------
Copyright (c) 2007 The xmlm programmers. All rights reserved.
Distributed under the ISC license, see terms at the end of the file.
---------------------------------------------------------------------------*)moduleStd_string=StringmoduleStd_buffer=Buffertypestd_string=stringtypestd_buffer=Buffer.tmoduletypeString=sigtypetvalempty:tvallength:t->intvalappend:t->t->tvallowercase:t->tvaliter:(int->unit)->t->unitvalof_string:std_string->tvalto_utf_8:('a->std_string->'a)->'a->t->'avalcompare:t->t->intendmoduletypeBuffer=sigtypestringtypetexceptionFullvalcreate:int->tvaladd_uchar:t->int->unitvalclear:t->unitvalcontents:t->stringvallength:t->intendmoduletypeS=sigtypestringtypeencoding=[|`UTF_8|`UTF_16|`UTF_16BE|`UTF_16LE|`ISO_8859_1|`US_ASCII]typedtd=stringoptiontypename=string*stringtypeattribute=name*stringtypetag=name*attributelisttypesignal=[`Dtdofdtd|`El_startoftag|`El_end|`Dataofstring]valns_xml:stringvalns_xmlns:stringtypepos=int*inttypeerror=[|`Max_buffer_size|`Unexpected_eoi|`Malformed_char_stream|`Unknown_encodingofstring|`Unknown_entity_refofstring|`Unknown_ns_prefixofstring|`Illegal_char_refofstring|`Illegal_char_seqofstring|`Expected_char_seqsofstringlist*string|`Expected_root_element]exceptionErrorofpos*errorvalerror_message:error->stringtypesource=[|`Channelofin_channel|`Stringofint*std_string|`Funof(unit->int)]typeinputvalmake_input:?enc:encodingoption->?strip:bool->?ns:(string->stringoption)->?entity:(string->stringoption)->source->inputvalinput:input->signalvalinput_tree:el:(tag->'alist->'a)->data:(string->'a)->input->'avalinput_doc_tree:el:(tag->'alist->'a)->data:(string->'a)->input->(dtd*'a)valpeek:input->signalvaleoi:input->boolvalpos:input->postype'afrag=[`Eloftag*'alist|`Dataofstring]typedest=[|`Channelofout_channel|`Bufferofstd_buffer|`Funof(int->unit)]typeoutputvalmake_output:?decl:bool->?nl:bool->?indent:intoption->?ns_prefix:(string->stringoption)->dest->outputvaloutput_depth:output->intvaloutput:output->signal->unitvaloutput_tree:('a->'afrag)->output->'a->unitvaloutput_doc_tree:('a->'afrag)->output->(dtd*'a)->unitend(* Unicode character lexers *)exceptionMalformed(* for character stream, internal only. *)letutf8_len=[|(* Char byte length according to first UTF-8 byte. *)1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;2;2;2;2;2;2;2;2;2;2;2;2;2;2;2;2;2;2;2;2;2;2;2;2;2;2;2;2;2;2;3;3;3;3;3;3;3;3;3;3;3;3;3;3;3;3;4;4;4;4;4;0;0;0;0;0;0;0;0;0;0;0|]letuchar_utf8i=letb0=i()inbeginmatchutf8_len.(b0)with|0->raiseMalformed|1->b0|2->letb1=i()inifb1lsr6!=0b10thenraiseMalformedelse((b0land0x1F)lsl6)lor(b1land0x3F)|3->letb1=i()inletb2=i()inifb2lsr6!=0b10thenraiseMalformedelsebeginmatchb0with|0xE0->ifb1<0xA0||0xBF<b1thenraiseMalformedelse()|0xED->ifb1<0x80||0x9F<b1thenraiseMalformedelse()|_->ifb1lsr6!=0b10thenraiseMalformedelse()end;((b0land0x0F)lsl12)lor((b1land0x3F)lsl6)lor(b2land0x3F)|4->letb1=i()inletb2=i()inletb3=i()inifb3lsr6!=0b10||b2lsr6!=0b10thenraiseMalformedelsebeginmatchb0with|0xF0->ifb1<0x90||0xBF<b1thenraiseMalformedelse()|0xF4->ifb1<0x80||0x8F<b1thenraiseMalformedelse()|_->ifb1lsr6!=0b10thenraiseMalformedelse()end;((b0land0x07)lsl18)lor((b1land0x3F)lsl12)lor((b2land0x3F)lsl6)lor(b3land0x3F)|_->assertfalseendletint16_bei=letb0=i()inletb1=i()in(b0lsl8)lorb1letint16_lei=letb0=i()inletb1=i()in(b1lsl8)lorb0letuchar_utf16int16i=letc0=int16iinifc0<0xD800||c0>0xDFFFthenc0elseifc0>0xDBFFthenraiseMalformedelseletc1=int16iin(((c0land0x3FF)lsl10)lor(c1land0x3FF))+0x10000letuchar_utf16be=uchar_utf16int16_beletuchar_utf16le=uchar_utf16int16_leletuchar_bytei=i()letuchar_iso_8859_1i=i()letuchar_asciii=letb=i()inifb>127thenraiseMalformedelseb(* Functorized streaming XML IO *)moduleMake(String:String)(Buffer:Bufferwithtypestring=String.t)=structtypestring=String.tletstr=String.of_stringletstr_eqss'=(comparess')=0letstr_emptys=(comparesString.empty)=0letcat=String.appendletstr_of_charu=letb=Buffer.create4inBuffer.add_ucharbu;Buffer.contentsbmoduleHt=Hashtbl.Make(structtypet=stringletequal=str_eqlethash=Hashtbl.hashend)letu_nl=0x000A(* newline *)letu_cr=0x000D(* carriage return *)letu_space=0x0020(* space *)letu_quot=0x0022(* quote *)letu_sharp=0x0023(* # *)letu_amp=0x0026(* & *)letu_apos=0x0027(* ' *)letu_minus=0x002D(* - *)letu_slash=0x002F(* / *)letu_colon=0x003A(* : *)letu_scolon=0x003B(* ; *)letu_lt=0x003C(* < *)letu_eq=0x003D(* = *)letu_gt=0x003E(* > *)letu_qmark=0x003F(* ? *)letu_emark=0x0021(* ! *)letu_lbrack=0x005B(* [ *)letu_rbrack=0x005D(* ] *)letu_x=0x0078(* x *)letu_bom=0xFEFF(* BOM *)letu_9=0x0039(* 9 *)letu_F=0x0046(* F *)letu_D=0X0044(* D *)lets_cdata=str"CDATA["letns_xml=str"http://www.w3.org/XML/1998/namespace"letns_xmlns=str"http://www.w3.org/2000/xmlns/"letn_xml=str"xml"letn_xmlns=str"xmlns"letn_space=str"space"letn_version=str"version"letn_encoding=str"encoding"letn_standalone=str"standalone"letv_yes=str"yes"letv_no=str"no"letv_preserve=str"preserve"letv_default=str"default"letv_version_1_0=str"1.0"letv_version_1_1=str"1.1"letv_utf_8=str"utf-8"letv_utf_16=str"utf-16"letv_utf_16be=str"utf-16be"letv_utf_16le=str"utf-16le"letv_iso_8859_1=str"iso-8859-1"letv_us_ascii=str"us-ascii"letv_ascii=str"ascii"letname_str(p,l)=ifstr_emptypthenlelsecatp(cat(str":")l)(* Basic types and values *)typeencoding=[|`UTF_8|`UTF_16|`UTF_16BE|`UTF_16LE|`ISO_8859_1|`US_ASCII]typedtd=stringoptiontypename=string*stringtypeattribute=name*stringtypetag=name*attributelisttypesignal=[`Dtdofdtd|`El_startoftag|`El_end|`Dataofstring](* Input *)typepos=int*inttypeerror=[|`Max_buffer_size|`Unexpected_eoi|`Malformed_char_stream|`Unknown_encodingofstring|`Unknown_entity_refofstring|`Unknown_ns_prefixofstring|`Illegal_char_refofstring|`Illegal_char_seqofstring|`Expected_char_seqsofstringlist*string|`Expected_root_element]exceptionErrorofpos*errorleterror_messagee=letbracketlvr=cat(strl)(catv(strr))inmatchewith|`Expected_root_element->str"expected root element"|`Max_buffer_size->str"maximal buffer size exceeded"|`Unexpected_eoi->str"unexpected end of input"|`Malformed_char_stream->str"malformed character stream"|`Unknown_encodinge->bracket"unknown encoding ("e")"|`Unknown_entity_refe->bracket"unknown entity reference ("e")"|`Unknown_ns_prefixe->bracket"unknown namespace prefix ("e")"|`Illegal_char_refs->bracket"illegal character reference (#"s")"|`Illegal_char_seqs->bracket"character sequence illegal here (\""s"\")"|`Expected_char_seqs(exps,fnd)->letexps=letexpaccv=catacc(bracket"\""v"\", ")inList.fold_leftexpString.emptyexpsincat(str"expected one of these character sequence: ")(catexps(bracket"found \""fnd"\""))typelimit=(* XML is odd to parse. *)|Stagofname(* '<' qname *)|Etagofname(* '</' qname whitespace* *)|Piofname(* '<?' qname *)|Comment(* '<!--' *)|Cdata(* '<![CDATA[' *)|Dtd(* '<!' *)|Text(* other character *)|Eoi(* End of input *)typesource=[|`Channelofin_channel|`Stringofint*std_string|`Funof(unit->int)]typeinput={enc:encodingoption;(* Expected encoding. *)strip:bool;(* Whitespace stripping default behaviour. *)fun_ns:string->stringoption;(* Namespace callback. *)fun_entity:string->stringoption;(* Entity reference callback. *)i:unit->int;(* Byte level input. *)mutableuchar:(unit->int)->int;(* Unicode character lexer. *)mutablec:int;(* Character lookahead. *)mutablecr:bool;(* True if last u was '\r'. *)mutableline:int;(* Current line number. *)mutablecol:int;(* Current column number. *)mutablelimit:limit;(* Last parsed limit. *)mutablepeek:signal;(* Signal lookahead. *)mutablestripping:bool;(* True if stripping whitespace. *)mutablelast_white:bool;(* True if last char was white. *)mutablescopes:(name*stringlist*bool)list;(* Stack of qualified el. name, bound prefixes and strip behaviour. *)ns:stringHt.t;(* prefix -> uri bindings. *)ident:Buffer.t;(* Buffer for names and entity refs. *)data:Buffer.t;}(* Buffer for character and attribute data. *)leterr_input_tree="input signal not `El_start or `Data"leterr_input_doc_tree="input signal not `Dtd"leterrie=raise(Error((i.line,i.col),e))leterr_illegal_chariu=erri(`Illegal_char_seq(str_of_charu))leterr_expected_seqsiexpss=erri(`Expected_char_seqs(exps,s))leterr_expected_charsiexps=erri(`Expected_char_seqs(List.mapstr_of_charexps,str_of_chari.c))letu_eoi=max_intletu_start_doc=u_eoi-1letu_end_doc=u_start_doc-1letsignal_start_stream=`DataString.emptyletmake_input?(enc=None)?(strip=false)?(ns=fun_->None)?(entity=fun_->None)src=leti=matchsrcwith|`Funf->f|`Channelic->fun()->input_byteic|`String(pos,s)->letlen=Std_string.lengthsinletpos=ref(pos-1)infun()->incrpos;if!pos=lenthenraiseEnd_of_fileelseChar.code(Std_string.gets!pos)inletbindings=leth=Ht.create15inHt.addhString.emptyString.empty;Ht.addhn_xmlns_xml;Ht.addhn_xmlnsns_xmlns;hin{enc=enc;strip=strip;fun_ns=ns;fun_entity=entity;i=i;uchar=uchar_byte;c=u_start_doc;cr=false;line=1;col=0;limit=Text;peek=signal_start_stream;stripping=strip;last_white=true;scopes=[];ns=bindings;ident=Buffer.create64;data=Buffer.create1024;}(* Bracketed non-terminals in comments refer to XML 1.0 non terminals *)letr:int->int->int->bool=funuab->a<=u&&u<=bletis_white=function0x0020|0x0009|0x000D|0x000A->true|_->falseletis_char=function(* {Char} *)|uwhenru0x00200xD7FF->true|0x0009|0x000A|0x000D->true|uwhenru0xE0000xFFFD||ru0x100000x10FFFF->true|_->falseletis_digitu=ru0x00300x0039letis_hex_digitu=ru0x00300x0039||ru0x00410x0046||ru0x00610x0066letcomm_rangeu=(* common to functions below *)ru0x00C00x00D6||ru0x00D80x00F6||ru0x00F80x02FF||ru0x03700x037D||ru0x037F0x1FFF||ru0x200C0x200D||ru0x20700x218F||ru0x2C000x2FEF||ru0x30010xD7FF||ru0xF9000xFDCF||ru0xFDF00xFFFD||ru0x100000xEFFFFletis_name_start_char=function(* {NameStartChar} - ':' (XML 1.1) *)|uwhenru0x00610x007A||ru0x00410x005A->true(* [a-z] | [A-Z] *)|uwhenis_whiteu->false|0x005F->true(* '_' *)|uwhencomm_rangeu->true|_->falseletis_name_char=function(* {NameChar} - ':' (XML 1.1) *)|uwhenru0x00610x007A||ru0x00410x005A->true(* [a-z] | [A-Z] *)|uwhenis_whiteu->false|uwhenru0x00300x0039->true(* [0-9] *)|0x005F|0x002D|0x002E|0x00B7->true(* '_' '-' '.' *)|uwhencomm_rangeu||ru0x03000x036F||ru0x203F0x2040->true|_->falseletrecnextci=ifi.c=u_eoithenerri`Unexpected_eoi;ifi.c=u_nlthen(i.line<-i.line+1;i.col<-1)elsei.col<-i.col+1;i.c<-i.uchari.i;ifnot(is_chari.c)thenraiseMalformed;ifi.cr&&i.c=u_nltheni.c<-i.uchari.i;(* cr nl business *)ifi.c=u_crthen(i.cr<-true;i.c<-u_nl)elsei.cr<-falseletnextc_eofi=trynextciwithEnd_of_file->i.c<-u_eoiletskip_whitei=while(is_whitei.c)donextcidoneletskip_white_eofi=while(is_whitei.c)donextc_eofidoneletacceptic=ifi.c=cthennextcielseerr_expected_charsi[c]letclear_identi=Buffer.cleari.identletclear_datai=Buffer.cleari.dataletaddc_identic=Buffer.add_uchari.identcletaddc_dataic=Buffer.add_uchari.datacletaddc_data_stripic=ifis_whitectheni.last_white<-trueelsebeginifi.last_white&&Buffer.lengthi.data<>0thenaddc_dataiu_space;i.last_white<-false;addc_dataicendletexpand_namei(prefix,local)=letexternal_prefix=matchi.fun_nsprefixwith|None->erri(`Unknown_ns_prefixprefix)|Someuri->uriintryleturi=Ht.findi.nsprefixinifnot(str_emptyuri)then(uri,local)elseifstr_emptyprefixthenString.empty,localelse(external_prefix),local(* unbound with xmlns:prefix="" *)withNot_found->external_prefix,localletfind_encodingi=(* Encoding mess. *)letresetuchari=i.uchar<-uchar;i.col<-0;nextciinmatchi.encwith|None->(* User doesn't know encoding. *)beginmatchnextci;i.cwith|0xFE->(* UTF-16BE BOM. *)nextci;ifi.c<>0xFFthenerri`Malformed_char_stream;resetuchar_utf16bei;true|0xFF->(* UTF-16LE BOM. *)nextci;ifi.c<>0xFEthenerri`Malformed_char_stream;resetuchar_utf16lei;true|0xEF->(* UTF-8 BOM. *)nextci;ifi.c<>0xBBthenerri`Malformed_char_stream;nextci;ifi.c<>0xBFthenerri`Malformed_char_stream;resetuchar_utf8i;true|0x3C|_->(* UTF-8 or other, try declaration. *)i.uchar<-uchar_utf8;falseend|Somee->(* User knows encoding. *)beginmatchewith|`US_ASCII->resetuchar_asciii|`ISO_8859_1->resetuchar_iso_8859_1i|`UTF_8->(* Skip BOM if present. *)resetuchar_utf8i;ifi.c=u_bomthen(i.col<-0;nextci)|`UTF_16->(* Which UTF-16 ? look BOM. *)letb0=nextci;i.cinletb1=nextci;i.cinbeginmatchb0,b1with|0xFE,0xFF->resetuchar_utf16bei|0xFF,0xFE->resetuchar_utf16lei|_->erri`Malformed_char_stream;end|`UTF_16BE->(* Skip BOM if present. *)resetuchar_utf16bei;ifi.c=u_bomthen(i.col<-0;nextci)|`UTF_16LE->resetuchar_utf16lei;ifi.c=u_bomthen(i.col<-0;nextci)end;true(* Ignore xml declaration. *)letp_ncnamei=(* {NCName} (Namespace 1.1) *)clear_identi;ifnot(is_name_start_chari.c)thenerr_illegal_charii.celsebeginaddc_identii.c;nextci;whileis_name_chari.cdoaddc_identii.c;nextcidone;Buffer.contentsi.identendletp_qnamei=(* {QName} (Namespace 1.1) *)letn=p_ncnameiinifi.c<>u_colonthen(String.empty,n)else(nextci;(n,p_ncnamei))letp_charrefi=(* {CharRef}, '&' was eaten. *)letc=ref0inclear_identi;nextci;ifi.c=u_scolonthenerri(`Illegal_char_refString.empty)elsebegintryifi.c=u_xthenbeginaddc_identii.c;nextci;while(i.c<>u_scolon)doaddc_identii.c;ifnot(is_hex_digiti.c)thenraiseExitelsec:=!c*16+(ifi.c<=u_9theni.c-48elseifi.c<=u_Ftheni.c-55elsei.c-87);nextci;doneendelsewhile(i.c<>u_scolon)doaddc_identii.c;ifnot(is_digiti.c)thenraiseExitelsec:=!c*10+(i.c-48);nextcidonewithExit->c:=-1;whilei.c<>u_scolondoaddc_identii.c;nextcidoneend;nextci;ifis_char!cthen(clear_identi;addc_identi!c;Buffer.contentsi.ident)elseerri(`Illegal_char_ref(Buffer.contentsi.ident))letpredefined_entities=leth=Ht.create5inletekv=Ht.addh(strk)(strv)ine"lt""<";e"gt"">";e"amp""&";e"apos""'";e"quot""\"";hletp_entity_refi=(* {EntityRef}, '&' was eaten. *)letent=p_ncnameiinacceptiu_scolon;tryHt.findpredefined_entitiesentwithNot_found->matchi.fun_entityentwith|Somes->s|None->erri(`Unknown_entity_refent)letp_referencei=(* {Reference} *)nextci;ifi.c=u_sharpthenp_charrefielsep_entity_refiletp_attr_valuei=(* {S}? {AttValue} *)skip_whitei;letdelim=ifi.c=u_quot||i.c=u_apostheni.celseerr_expected_charsi[u_quot;u_apos]innextci;skip_whitei;clear_datai;i.last_white<-true;while(i.c<>delim)doifi.c=u_ltthenerr_illegal_chariu_ltelseifi.c=u_ampthenString.iter(addc_data_stripi)(p_referencei)else(addc_data_stripii.c;nextci)done;nextci;Buffer.contentsi.dataletp_attributesi=(* ({S} {Attribute})* {S}? *)letrecauxipre_accacc=ifnot(is_whitei.c)thenpre_acc,accelsebeginskip_whitei;ifi.c=u_slash||i.c=u_gtthenpre_acc,accelsebeginlet(prefix,local)asn=p_qnameiinletv=skip_whitei;acceptiu_eq;p_attr_valueiinletatt=n,vinifstr_emptyprefix&&str_eqlocaln_xmlnsthenbegin(* xmlns *)Ht.addi.nsString.emptyv;auxi(String.empty::pre_acc)(att::acc)endelseifstr_eqprefixn_xmlnsthenbegin(* xmlns:local *)Ht.addi.nslocalv;auxi(local::pre_acc)(att::acc)endelseifstr_eqprefixn_xml&&str_eqlocaln_spacethenbegin(* xml:space *)ifstr_eqvv_preservetheni.stripping<-falseelseifstr_eqvv_defaulttheni.stripping<-i.stripelse();auxipre_acc(att::acc)endelseauxipre_acc(att::acc)endendinauxi[][](* Returns a list of bound prefixes and attributes *)letp_limiti=(* Parses a markup limit *)i.limit<-ifi.c=u_eoithenEoielseifi.c<>u_ltthenTextelsebeginnextci;ifi.c=u_qmarkthen(nextci;Pi(p_qnamei))elseifi.c=u_slashthenbeginnextci;letn=p_qnameiinskip_whitei;Etagnendelseifi.c=u_emarkthenbeginnextci;ifi.c=u_minusthen(nextci;acceptiu_minus;Comment)elseifi.c=u_DthenDtdelseifi.c=u_lbrackthenbeginnextci;clear_identi;fork=1to6do(addc_identii.c;nextci)done;letcdata=Buffer.contentsi.identinifstr_eqcdatas_cdatathenCdataelseerr_expected_seqsi[s_cdata]cdataendelseerri(`Illegal_char_seq(cat(str"<!")(str_of_chari.c)))endelseStag(p_qnamei)endletrecskip_commenti=(* {Comment}, '<!--' was eaten *)while(i.c<>u_minus)donextcidone;nextci;ifi.c<>u_minusthenskip_commentielsebeginnextci;ifi.c<>u_gtthenerr_expected_charsi[u_gt];nextc_eofiendletrecskip_pii=(* {PI}, '<?' qname was eaten *)while(i.c<>u_qmark)donextcidone;nextci;ifi.c<>u_gtthenskip_piielsenextc_eofiletrecskip_misci~allow_xmlpi=matchi.limitwith(* {Misc}* *)|Pi(p,l)when(str_emptyp&&str_eqn_xml(String.lowercasel))->ifallow_xmlpithen()elseerri(`Illegal_char_seql)|Pi_->skip_pii;p_limiti;skip_misci~allow_xmlpi|Comment->skip_commenti;p_limiti;skip_misci~allow_xmlpi|Textwhenis_whitei.c->skip_white_eofi;p_limiti;skip_misci~allow_xmlpi|_->()letp_chardataaddci=(* {CharData}* ({Reference}{Chardata})* *)while(i.c<>u_lt)doifi.c=u_ampthenString.iter(addci)(p_referencei)elseifi.c=u_rbrackthenbeginaddcii.c;nextci;ifi.c=u_rbrackthenbeginaddcii.c;nextci;(* detects ']'*']]>' *)while(i.c=u_rbrack)doaddcii.c;nextcidone;ifi.c=u_gtthenerri(`Illegal_char_seq(str"]]>"));endendelse(addcii.c;nextci)doneletrecp_cdataaddci=(* {CData} {CDEnd} *)trywhile(true)doifi.c=u_rbrackthenbeginnextci;whilei.c=u_rbrackdonextci;ifi.c=u_gtthen(nextci;raiseExit);addciu_rbrackdone;addciu_rbrack;end;addcii.c;nextci;donewithExit->()letp_xml_decli~ignore_enc~ignore_utf16=(* {XMLDecl}? *)letyes_no=[v_yes;v_no]inletp_vali=skip_whitei;acceptiu_eq;skip_whitei;p_attr_valueiinletp_val_expiexp=letv=p_valiinifnot(List.exists(str_eqv)exp)thenerr_expected_seqsiexpvinmatchi.limitwith|Pi(p,l)when(str_emptyp&&str_eqln_xml)->letv=skip_whitei;p_ncnameiinifnot(str_eqvn_version)thenerr_expected_seqsi[n_version]v;p_val_expi[v_version_1_0;v_version_1_1];skip_whitei;ifi.c<>u_qmarkthenbeginletn=p_ncnameiinifstr_eqnn_encodingthenbeginletenc=String.lowercase(p_vali)inifnotignore_encthenbeginifstr_eqencv_utf_8theni.uchar<-uchar_utf8elseifstr_eqencv_utf_16betheni.uchar<-uchar_utf16beelseifstr_eqencv_utf_16letheni.uchar<-uchar_utf16leelseifstr_eqencv_iso_8859_1theni.uchar<-uchar_iso_8859_1elseifstr_eqencv_us_asciitheni.uchar<-uchar_asciielseifstr_eqencv_asciitheni.uchar<-uchar_asciielseifstr_eqencv_utf_16thenifignore_utf16then()else(erri`Malformed_char_stream)(* A BOM should have been found. *)elseerri(`Unknown_encodingenc)end;skip_whitei;ifi.c<>u_qmarkthenbeginletn=p_ncnameiinifstr_eqnn_standalonethenp_val_expiyes_noelseerr_expected_seqsi[n_standalone;str"?>"]nendendelseifstr_eqnn_standalonethenp_val_expiyes_noelseerr_expected_seqsi[n_encoding;n_standalone;str"?>"]nend;skip_whitei;acceptiu_qmark;acceptiu_gt;p_limiti|_->()letp_dtd_signali=(* {Misc}* {doctypedecl} {Misc}* *)skip_misci~allow_xmlpi:false;ifi.limit<>Dtdthen`DtdNoneelsebeginletbuf=addc_dataiinletnest=ref1inclear_datai;bufu_lt;bufu_emark;(* add eaten "<!" *)while(!nest>0)doifi.c=u_ltthenbeginnextci;ifi.c<>u_emarkthen(bufu_lt;incrnest)elsebeginnextci;ifi.c<>u_minusthen(* Carefull with comments ! *)(bufu_lt;bufu_emark;incrnest)elsebeginnextci;ifi.c<>u_minusthen(bufu_lt;bufu_emark;bufu_minus;incrnest)else(nextci;skip_commenti)endendendelseifi.c=u_quot||i.c=u_aposthenbeginletc=i.cinbufc;nextci;while(i.c<>c)do(bufi.c;nextci)done;bufc;nextciendelseifi.c=u_gtthen(bufu_gt;nextci;decrnest)else(bufi.c;nextci)done;letdtd=Buffer.contentsi.datainp_limiti;skip_misci~allow_xmlpi:false;`Dtd(Somedtd);endletp_datai=letrecbufferizeaddci=matchi.limitwith|Text->p_chardataaddci;p_limiti;bufferizeaddci|Cdata->p_cdataaddci;p_limiti;bufferizeaddci|(Stag_|Etag_)->()|Pi_->skip_pii;p_limiti;bufferizeaddci|Comment->skip_commenti;p_limiti;bufferizeaddci|Dtd->erri(`Illegal_char_seq(str"<!D"))|Eoi->erri`Unexpected_eoiinclear_datai;i.last_white<-true;bufferize(ifi.strippingthenaddc_data_stripelseaddc_data)i;letd=Buffer.contentsi.dataindletp_el_start_signalin=letexpand_att(((prefix,local)asn,v)asatt)=ifnot(str_eqprefixString.empty)thenexpand_namein,velseifstr_eqlocaln_xmlnsthen(ns_xmlns,n_xmlns),velseatt(* default namespaces do not influence attributes. *)inletstrip=i.strippingin(* save it here, p_attributes may change it. *)letprefixes,atts=p_attributesiini.scopes<-(n,prefixes,strip)::i.scopes;`El_start((expand_namein),List.rev_mapexpand_attatts)letp_el_end_signalin=matchi.scopeswith|(n',prefixes,strip)::scopes->ifi.c<>u_gtthenerr_expected_charsi[u_gt];ifnot(str_eqnn')thenerr_expected_seqsi[name_strn'](name_strn);i.scopes<-scopes;i.stripping<-strip;List.iter(Ht.removei.ns)prefixes;ifscopes=[]theni.c<-u_end_docelse(nextci;p_limiti);`El_end|_->assertfalseletp_signali=ifi.scopes=[]thenmatchi.limitwith|Stagn->p_el_start_signalin|_->erri`Expected_root_elementelseletrecfindi=matchi.limitwith|Stagn->p_el_start_signalin|Etagn->p_el_end_signalin|Text|Cdata->letd=p_dataiinifstr_emptydthenfindielse`Datad|Pi_->skip_pii;p_limiti;findi|Comment->skip_commenti;p_limiti;findi|Dtd->erri(`Illegal_char_seq(str"<!D"))|Eoi->erri`Unexpected_eoiinbeginmatchi.peekwith|`El_start(n,_)->(* finish to input start el. *)skip_whitei;ifi.c=u_gtthen(acceptiu_gt;p_limiti)elseifi.c=u_slashthenbeginlettag=matchi.scopeswith|(tag,_,_)::_->tag|_->assertfalsein(nextci;i.limit<-Etagtag)endelseerr_expected_charsi[u_slash;u_gt]|_->()end;findileteoii=tryifi.c=u_eoithentrueelseifi.c<>u_start_docthenfalseelse(* In a document. *)ifi.peek<>`El_endthen(* Start of document sequence. *)beginletignore_enc=find_encodingiinp_limiti;p_xml_decli~ignore_enc~ignore_utf16:false;i.peek<-p_dtd_signali;falseendelse(* Subsequent documents. *)beginnextc_eofi;p_limiti;ifi.c=u_eoithentrueelsebeginskip_misci~allow_xmlpi:true;ifi.c=u_eoithentrueelsebeginp_xml_decli~ignore_enc:false~ignore_utf16:true;i.peek<-p_dtd_signali;falseendendendwith|Buffer.Full->erri`Max_buffer_size|Malformed->erri`Malformed_char_stream|End_of_file->erri`Unexpected_eoiletpeeki=ifeoiithenerri`Unexpected_eoielsei.peekletinputi=tryifi.c=u_end_docthen(i.c<-u_start_doc;i.peek)elselets=peekiini.peek<-p_signali;swith|Buffer.Full->erri`Max_buffer_size|Malformed->erri`Malformed_char_stream|End_of_file->erri`Unexpected_eoiletinput_tree~el~datai=matchinputiwith|`Datad->datad|`El_starttag->letrecauxitagscontext=matchinputiwith|`El_starttag->auxi(tag::tags)([]::context)|`El_end->beginmatchtags,contextwith|tag::tags',childs::context'->letel=eltag(List.revchilds)inbeginmatchcontext'with|parent::context''->auxitags'((el::parent)::context'')|[]->elend|_->assertfalseend|`Datad->beginmatchcontextwith|childs::context'->auxitags(((datad)::childs)::context')|[]->assertfalseend|`Dtd_->assertfalseinauxi(tag::[])([]::[])|_->invalid_argerr_input_treeletinput_doc_tree~el~datai=matchinputiwith|`Dtdd->d,input_tree~el~datai|_->invalid_argerr_input_doc_treeletposi=i.line,i.col(* Output *)type'afrag=[`Eloftag*'alist|`Dataofstring]typedest=[|`Channelofout_channel|`Bufferofstd_buffer|`Funof(int->unit)]typeoutput={decl:bool;(* True if the XML declaration should be output. *)nl:bool;(* True if a newline is output at the end. *)indent:intoption;(* Optional indentation. *)fun_prefix:string->stringoption;(* Prefix callback. *)prefixes:stringHt.t;(* uri -> prefix bindings. *)outs:std_string->int->int->unit;(* String output. *)outc:char->unit;(* character output. *)mutablelast_el_start:bool;(* True if last signal was `El_start *)mutablescopes:(name*(stringlist))list;(* Qualified el. name and bound uris. *)mutabledepth:int;}(* Scope depth. *)leterr_prefixuri="unbound namespace ("^uri^")"leterr_dtd="dtd signal not allowed here"leterr_el_start="start signal not allowed here"leterr_el_end="end signal without matching start signal"leterr_data="data signal not allowed here"letmake_output?(decl=true)?(nl=false)?(indent=None)?(ns_prefix=fun_->None)d=letouts,outc=matchdwith|`Channelc->(output_substringc),(output_charc)|`Bufferb->(Std_buffer.add_substringb),(Std_buffer.add_charb)|`Funf->letosspl=fori=ptop+l-1dof(Char.code(Std_string.getsi))doneinletocc=f(Char.codec)inos,ocinletprefixes=leth=Ht.create10inHt.addhString.emptyString.empty;Ht.addhns_xmln_xml;Ht.addhns_xmlnsn_xmlns;hin{decl=decl;outs=outs;outc=outc;nl=nl;indent=indent;last_el_start=false;prefixes=prefixes;scopes=[];depth=-1;fun_prefix=ns_prefix;}letoutput_deptho=o.depthletoutsos=o.outss0(Std_string.lengths)letstr_utf_8s=String.to_utf_8(fun_s->s)""sletout_utf_8os=ignore(String.to_utf_8(funos->outsos;o)os)letprefix_nameo(ns,local)=tryifstr_eqnsns_xmlns&&str_eqlocaln_xmlnsthen(String.empty,n_xmlns)else(Ht.findo.prefixesns,local)withNot_found->matcho.fun_prefixnswith|None->invalid_arg(err_prefix(str_utf_8ns))|Someprefix->prefix,localletbind_prefixesoatts=letaddacc((ns,local),uri)=ifnot(str_eqnsns_xmlns)thenaccelsebeginletprefix=ifstr_eqlocaln_xmlnsthenString.emptyelselocalinHt.addo.prefixesuriprefix;uri::accendinList.fold_leftadd[]attsletout_dataos=letout()s=letlen=Std_string.lengthsinletstart=ref0inletlast=ref0inletescapee=o.outss!start(!last-!start);outsoe;incrlast;start:=!lastinwhile(!last<len)domatchStd_string.gets!lastwith|'<'->escape"<"(* Escape markup delimiters. *)|'>'->escape">"|'&'->escape"&"(* | '\'' -> escape "'" *)(* Not needed we use \x22 for attributes. *)|'\x22'->escape"""|'\n'|'\t'|'\r'->incrlast|cwhenc<' '->escape"\xEF\xBF\xBD"(* illegal, subst. by U+FFFD *)|_->incrlastdone;o.outss!start(!last-!start)inString.to_utf_8out()sletout_qnameo(p,l)=ifnot(str_emptyp)then(out_utf_8op;o.outc':');out_utf_8olletout_attributeo(n,v)=o.outc' ';out_qnameo(prefix_nameon);outso"=\x22";out_dataov;o.outc'\x22'letoutputos=letindento=matcho.indentwith|None->()|Somec->fori=1to(o.depth*c)doo.outc' 'doneinletunindento=matcho.indentwithNone->()|Some_->o.outc'\n'inifo.depth=-1thenbeginmatchswith|`Dtdd->ifo.declthenoutso"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";beginmatchdwith|Somedtd->out_utf_8odtd;o.outc'\n'|None->()end;o.depth<-0|`Data_->invalid_argerr_data|`El_start_->invalid_argerr_el_start|`El_end->invalid_argerr_el_endendelsebeginmatchswith|`El_start(n,atts)->ifo.last_el_startthen(outso">";unindento);indento;leturis=bind_prefixesoattsinletqn=prefix_nameonino.outc'<';out_qnameoqn;List.iter(out_attributeo)atts;o.scopes<-(qn,uris)::o.scopes;o.depth<-o.depth+1;o.last_el_start<-true|`El_end->beginmatcho.scopeswith|(n,uris)::scopes'->o.depth<-o.depth-1;ifo.last_el_startthenoutso"/>"elsebeginindento;outso"</";out_qnameon;o.outc'>';end;o.scopes<-scopes';List.iter(Ht.removeo.prefixes)uris;o.last_el_start<-false;ifo.depth=0then(ifo.nltheno.outc'\n';o.depth<--1;)elseunindento|[]->invalid_argerr_el_endend|`Datad->ifo.last_el_startthen(outso">";unindento);indento;out_dataod;unindento;o.last_el_start<-false|`Dtd_->failwitherr_dtdendletoutput_treefragov=letrecauxo=function|(v::rest)::context->beginmatchfragvwith|`El(tag,childs)->outputo(`El_starttag);auxo(childs::rest::context)|(`Datad)assignal->outputosignal;auxo(rest::context)end|[]::[]->()|[]::context->outputo`El_end;auxocontext|[]->assertfalseinauxo([v]::[])letoutput_doc_treefrago(dtd,v)=outputo(`Dtddtd);output_treefragovend(* Default streaming XML IO *)moduleString=structtypet=stringletempty=""letlength=String.lengthletappend=(^)letlowercase=String.lowercase_asciiletiterfs=letlen=Std_string.lengthsinletpos=ref~-1inleti()=incrpos;if!pos=lenthenraiseExitelseChar.code(Std_string.gets!pos)intrywhiletruedof(uchar_utf8i)donewithExit->()letof_strings=sletto_utf_8fvx=fvxletcompare=String.compareendmoduleBuffer=structtypestring=String.ttypet=Buffer.texceptionFullletcreate=Buffer.createletadd_ucharbu=try(* UTF-8 encodes an uchar in the buffer, assumes u is valid code point. *)letbufc=Buffer.add_charb(Char.chrc)inifu<=0x007Fthen(bufu)elseifu<=0x07FFthen(buf(0xC0lor(ulsr6));buf(0x80lor(uland0x3F)))elseifu<=0xFFFFthen(buf(0xE0lor(ulsr12));buf(0x80lor((ulsr6)land0x3F));buf(0x80lor(uland0x3F)))else(buf(0xF0lor(ulsr18));buf(0x80lor((ulsr12)land0x3F));buf(0x80lor((ulsr6)land0x3F));buf(0x80lor(uland0x3F)))withFailure_->raiseFullletclearb=Buffer.clearbletcontents=Buffer.contentsletlength=Buffer.lengthendincludeMake(String)(Buffer)(* Pretty printers *)letpp=Format.fprintfletrecpp_list?(pp_sep=Format.pp_print_cut)pp_vppf=function|[]->()|v::vs->pp_vppfv;ifvs<>[]then(pp_sepppf();pp_list~pp_seppp_vppfvs)letpp_nameppf(p,l)=ifp<>""thenppppf"%s:%s"plelseppppf"%s"lletpp_attributeppf(n,v)=ppppf"@[<1>(%a,@,%S)@]"pp_namenvletpp_tagppf(name,atts)=letpp_sepppf()=ppppf";@ "inppppf"@[<1>(%a,@,@[<1>[%a]@])@]"pp_namename(pp_list~pp_seppp_attribute)attsletpp_dtdppf=function|None->ppppf"None"|Somedtd->ppppf"@[<1>(Some@ %S)@]"dtdletpp_signalppf=function|`Datas->ppppf"@[`Data %S@]"s|`El_end->ppppf"`El_end"|`El_starttag->ppppf"@[`El_start %a@]"pp_tagtag|`Dtddtd->ppppf"@[`Dtd %a@]"pp_dtddtd(*----------------------------------------------------------------------------
Copyright (c) 2007 The xmlm programmers
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
---------------------------------------------------------------------------*)