123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448(*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library has the special exception on linking described in file
* README.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301 USA
*)openXml_light_typesopenXml_light_utilsopenPrintftypeparse_error_msg=Xml_light_errors.dtd_parse_error_msg=|InvalidDTDDecl|InvalidDTDElement|InvalidDTDAttribute|InvalidDTDTag|DTDItemExpectedtypecheck_error=Xml_light_errors.dtd_check_error=|ElementDefinedTwiceofstring|AttributeDefinedTwiceofstring*string|ElementEmptyContructorofstring|ElementReferencedofstring*string|ElementNotDeclaredofstring|WrongImplicitValueForIDofstring*stringtypeprove_error=Xml_light_errors.dtd_prove_error=|UnexpectedPCData|UnexpectedTagofstring|UnexpectedAttributeofstring|InvalidAttributeValueofstring|RequiredAttributeofstring|ChildExpectedofstring|EmptyExpected|DuplicateIDofstring|MissingIDofstringtypedtd_child=Xml_light_types.dtd_child=|DTDTagofstring|DTDPCData|DTDOptionalofdtd_child|DTDZeroOrMoreofdtd_child|DTDOneOrMoreofdtd_child|DTDChoiceofdtd_childlist|DTDChildrenofdtd_childlisttypedtd_element_type=Xml_light_types.dtd_element_type=|DTDEmpty|DTDAny|DTDChildofdtd_childtypedtd_attr_default=Xml_light_types.dtd_attr_default=|DTDDefaultofstring|DTDRequired|DTDImplied|DTDFixedofstringtypedtd_attr_type=Xml_light_types.dtd_attr_type=|DTDCData|DTDNMToken|DTDEnumofstringlist|DTDID|DTDIDReftypedtd_item=Xml_light_types.dtd_item=|DTDAttributeofstring*string*dtd_attr_type*dtd_attr_default|DTDElementofstring*dtd_element_typetypedtd_result=|DTDNext|DTDNotMatched|DTDMatched|DTDMatchedResultofdtd_childtypeerror_pos=Xml_light_errors.error_pos={eline:int;eline_start:int;emin:int;emax:int;}typeparse_error=parse_error_msg*error_posexceptionParse_error=Xml_light_errors.Dtd_parse_errorexceptionCheck_error=Xml_light_errors.Dtd_check_errorexceptionProve_error=Xml_light_errors.Dtd_prove_errortypedtd=dtd_itemlistmoduleStringMap=Map.Make(String)typechecked=Xml_light_dtd_check.checkedtypedtd_state={elements:dtd_element_typemut_map;attribs:(dtd_attr_type*dtd_attr_default)mut_mapmut_map;mutablecurrent:dtd_element_type;mutablecurtag:string;state:(string*dtd_element_type)Stack.t;}letconvert=function|Xml_lexer.EInvalidDTDDecl->InvalidDTDDecl|Xml_lexer.EInvalidDTDElement->InvalidDTDElement|Xml_lexer.EInvalidDTDTag->InvalidDTDTag|Xml_lexer.EDTDItemExpected->DTDItemExpected|Xml_lexer.EInvalidDTDAttribute->InvalidDTDAttributeletparsesource:dtd=tryXml_lexer.initsource;letdtd=Xml_lexer.dtdsourceinXml_lexer.closesource;dtdwithXml_lexer.DTDErrore->Xml_lexer.closesource;letpos=Xml_lexer.error_possourceinraise(Parse_error(converte,pos))letparse_strings=parse(Lexing.from_strings)letparse_inch=parse(Lexing.from_channelch)letparse_filefname=letch=tryopen_infnamewithSys_error_->raise(Xml_light_errors.File_not_foundfname)intryletx=parse(Lexing.from_channelch)inclose_inch;xwithe->close_inch;raiseeletcheck=Xml_light_dtd_check.checkletstart_provedtdroot=letd={elements=refdtd.Xml_light_dtd_check.c_elements;attribs=ref(StringMap.maprefdtd.Xml_light_dtd_check.c_attribs);state=Stack.create();current=DTDChild(DTDTagroot);curtag="_root";}intryignore(find_mapd.elements(String.uppercase_asciiroot));dwithNot_found->raise(Check_error(ElementNotDeclaredroot))(* - for debug only - *)letto_string_ref=ref(fun_->assertfalse)lettracedtdtag=letitem=DTDElement("current",dtd.current)inprintf"%s : %s\n"(matchtagwithNone->"#PCDATA"|Somet->t)(!to_string_refitem)exceptionTmpResultofdtd_resultletprove_childdtdtag=matchdtd.currentwith|DTDEmpty->raise(Prove_errorEmptyExpected)|DTDAny->()|DTDChildelt->(letrecupdate=function|DTDTags->(matchtagwith|None->DTDNotMatched|Sometwhent=String.uppercase_asciis->DTDMatched|Some_->DTDNotMatched)|DTDPCData->(matchtagwithNone->DTDMatched|Some_->DTDNotMatched)|DTDOptionalx->(matchupdatexwith|DTDNotMatched|DTDNext->DTDNext|DTDMatched|DTDMatchedResult_->DTDMatched)|DTDZeroOrMorex->(matchupdatexwith|DTDNotMatched|DTDNext->DTDNext|DTDMatched|DTDMatchedResult_->DTDMatchedResult(DTDZeroOrMorex))|DTDOneOrMorex->(matchupdatexwith|DTDNotMatched|DTDNext->DTDNotMatched|DTDMatched|DTDMatchedResult_->DTDMatchedResult(DTDZeroOrMorex))|DTDChoicel->(trymatchList.exists(funx->matchupdatexwith|DTDMatched->true|DTDMatchedResult_asr->raise(TmpResultr)|DTDNext|DTDNotMatched->false)lwith|true->DTDMatched|false->DTDNotMatchedwithTmpResultr->r)|DTDChildren[]->assertfalse(* DTD is checked ! *)|DTDChildren(h::t)->(matchupdatehwith|DTDNext->(matchtwith|[]->DTDNotMatched|_->update(DTDChildrent))|DTDNotMatched->DTDNotMatched|DTDMatchedResultr->DTDMatchedResult(DTDChildren(r::t))|DTDMatched->(matchtwith|[]->DTDMatched|_->DTDMatchedResult(DTDChildrent)))inmatchupdateeltwith|DTDNext|DTDNotMatched->(matchtagwith|None->raise(Prove_errorUnexpectedPCData)|Somet->raise(Prove_error(UnexpectedTagt)))|DTDMatched->dtd.current<-DTDEmpty|DTDMatchedResultr->dtd.current<-DTDChildr)letis_nmtoken_char=function|'A'..'Z'|'a'..'z'|'0'..'9'|'.'|'-'|'_'|':'->true|_->falseletprove_attribdtdhidhidrefattraname(atype,adef)accu=letaval=trySome(List.assocanameattr)withNot_found->Nonein(match(atype,aval)with|DTDCData,_->()|DTDNMToken,None->()|DTDNMToken,Somev->fori=0toString.lengthv-1doifnot(is_nmtoken_charv.[i])thenraise(Prove_error(InvalidAttributeValueaname))done|DTDEnuml,None->()|DTDEnuml,Somev->ifnot(List.exists((=)v)l)thenraise(Prove_error(InvalidAttributeValueaname))|DTDID,None->()|DTDID,Someid->ifmem_maphididthenraise(Prove_error(DuplicateIDid));set_maphidid()|DTDIDRef,None->()|DTDIDRef,Someidref->set_maphidrefidref());match(adef,aval)with|DTDRequired,None->raise(Prove_error(RequiredAttributeaname))|DTDFixedv,Someavwhenv<>av->raise(Prove_error(InvalidAttributeValueaname))|DTDImplied,None->accu|DTDFixedv,None|DTDDefault_,Somev|DTDDefaultv,None|DTDRequired,Somev|DTDImplied,Somev|DTDFixed_,Somev->(aname,v)::acculetcheck_attribahash(aname,_)=tryignore(find_mapahashaname)withNot_found->raise(Prove_error(UnexpectedAttributeaname))letrecdo_provehidhidrefdtd=function|PCDatas->prove_childdtdNone;PCDatas|Element(tag,attr,childs)->letutag=String.uppercase_asciitaginletuattr=List.map(fun(aname,aval)->(String.uppercase_asciianame,aval))attrinprove_childdtd(Someutag);Stack.push(dtd.curtag,dtd.current)dtd.state;letelt=tryfind_mapdtd.elementsutagwithNot_found->raise(Prove_error(UnexpectedTagtag))inletahash=tryfind_mapdtd.attribsutagwithNot_found->empty_map()indtd.curtag<-tag;dtd.current<-elt;List.iter(check_attribahash)uattr;letattr=fold_map(prove_attribdtdhidhidrefuattr)ahash[]inletchilds=ref(List.map(do_provehidhidrefdtd)childs)in(matchdtd.currentwith|DTDAny|DTDEmpty->()|DTDChildelt->(letname=ref""inletreccheck=function|DTDTagt->name:=t;false|DTDPCDatawhen!childs=[]->childs:=[PCData""];true|DTDPCData->name:="#PCDATA";false|DTDOptional_->true|DTDZeroOrMore_->true|DTDOneOrMoree->ignore(checke);false|DTDChoicel->List.existscheckl|DTDChildrenl->List.for_allchecklinmatchcheckeltwith|true->()|false->raise(Prove_error(ChildExpected!name))));letctag,cur=Stack.popdtd.stateindtd.curtag<-tag;dtd.current<-cur;Element(tag,attr,!childs)letprovedtdrootxml=lethid=create_map()inlethidref=create_map()inletx=do_provehidhidref(start_provedtdroot)xmliniter_map(funid()->ifnot(mem_maphidid)thenraise(Prove_error(MissingIDid)))hidref;xletparse_error_msg=function|InvalidDTDDecl->"Invalid DOCTYPE declaration"|InvalidDTDElement->"Invalid DTD element declaration"|InvalidDTDAttribute->"Invalid DTD attribute declaration"|InvalidDTDTag->"Invalid DTD tag"|DTDItemExpected->"DTD item expected"letparse_error(msg,pos)=ifpos.emin=pos.emaxthensprintf"%s line %d character %d"(parse_error_msgmsg)pos.eline(pos.emin-pos.eline_start)elsesprintf"%s line %d characters %d-%d"(parse_error_msgmsg)pos.eline(pos.emin-pos.eline_start)(pos.emax-pos.eline_start)letcheck_error=function|ElementDefinedTwicetag->sprintf"Element '%s' defined twice"tag|AttributeDefinedTwice(tag,aname)->sprintf"Attribute '%s' of element '%s' defined twice"anametag|ElementEmptyContructortag->sprintf"Element '%s' has empty constructor"tag|ElementReferenced(tag,from)->sprintf"Element '%s' referenced by '%s' is not declared"tagfrom|ElementNotDeclaredtag->sprintf"Element '%s' needed but is not declared"tag|WrongImplicitValueForID(tag,idname)->sprintf"Attribute '%s' of type ID of element '%s' not defined with implicit \
value #REQUIRED or #IMPLIED"idnametagletprove_error=function|UnexpectedPCData->"Unexpected PCData"|UnexpectedTagtag->sprintf"Unexpected tag : '%s'"tag|UnexpectedAttributeatt->sprintf"Unexpected attribute : '%s'"att|InvalidAttributeValueatt->sprintf"Invalid attribute value for '%s'"att|RequiredAttributeatt->sprintf"Required attribute not found : '%s'"att|ChildExpectedcname->sprintf"Child expected : '%s'"cname|EmptyExpected->"No more children expected"|DuplicateIDid->sprintf"ID '%s' used several times"id|MissingIDidref->sprintf"missing ID value for IDREF '%s'"idrefletto_string=function|DTDAttribute(tag,aname,atype,adef)->letatype_to_string=function|DTDCData->"CDATA"|DTDNMToken->"NMTOKEN"|DTDEnuml->sprintf"(%s)"(String.concat"|"l)|DTDID->"ID"|DTDIDRef->"IDREF"inletadefault_to_string=function|DTDDefaults->sprintf"\"%s\""s|DTDRequired->"#REQUIRED"|DTDImplied->"#IMPLIED"|DTDFixeds->sprintf"#FIXED \"%s\""sinsprintf"<!ATTLIST %s %s %s %s>"taganame(atype_to_stringatype)(adefault_to_stringadef)|DTDElement(tag,etype)->letrecechild_to_string=function|DTDTags->s|DTDPCData->"#PCDATA"|DTDOptionalc->sprintf"%s?"(echild_to_stringc)|DTDZeroOrMorec->sprintf"%s*"(echild_to_stringc)|DTDOneOrMorec->sprintf"%s+"(echild_to_stringc)|DTDChoice[c]->echild_to_stringc|DTDChoicel->sprintf"(%s)"(String.concat"|"(List.mapechild_to_stringl))|DTDChildren[c]->echild_to_stringc|DTDChildrenl->sprintf"(%s)"(String.concat","(List.mapechild_to_stringl))inletetype_to_string=function|DTDEmpty->"EMPTY"|DTDAny->"ANY"|DTDChildx->(letrecop_to_string=function|DTDOptionalc->sprintf"%s?"(op_to_stringc)|DTDZeroOrMorec->sprintf"%s*"(op_to_stringc)|DTDOneOrMorec->sprintf"%s+"(op_to_stringc)|_->""inletrecroot=function|DTDOptionalc|DTDZeroOrMorec|DTDOneOrMorec->rootc|(DTDChoice[_]|DTDChildren[_])asx->(x,false)|(DTDChoice_|DTDChildren_)asx->(x,true)|x->(x,false)inmatchrootxwith|r,true->sprintf"%s%s"(echild_to_stringr)(op_to_stringx)|r,false->sprintf"(%s%s)"(echild_to_stringr)(op_to_stringx))insprintf"<!ELEMENT %s %s>"tag(etype_to_stringetype);;to_string_ref:=to_string