123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531(*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library has the special exception on linking described in file
* README.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301 USA
*)openPrintfopenTypestypeparse_error_msg=|InvalidDTDDecl|InvalidDTDElement|InvalidDTDAttribute|InvalidDTDTag|DTDItemExpectedtypecheck_error=|ElementDefinedTwiceofstring|AttributeDefinedTwiceofstring*string|ElementEmptyContructorofstring|ElementReferencedofstring*string|ElementNotDeclaredofstring|WrongImplicitValueForIDofstring*stringtypeprove_error=|UnexpectedPCData|UnexpectedTagofstring|UnexpectedAttributeofstring|InvalidAttributeValueofstring|RequiredAttributeofstring|ChildExpectedofstring|EmptyExpected|DuplicateIDofstring|MissingIDofstringtypedtd_child=Types.dtd_child=|DTDTagofstring|DTDPCData|DTDOptionalofdtd_child|DTDZeroOrMoreofdtd_child|DTDOneOrMoreofdtd_child|DTDChoiceofdtd_childlist|DTDChildrenofdtd_childlisttypedtd_element_type=Types.dtd_element_type=|DTDEmpty|DTDAny|DTDChildofdtd_childtypedtd_attr_default=Types.dtd_attr_default=|DTDDefaultofstring|DTDRequired|DTDImplied|DTDFixedofstringtypedtd_attr_type=Types.dtd_attr_type=|DTDCData|DTDNMToken|DTDEnumofstringlist|DTDID|DTDIDReftypedtd_item=Types.dtd_item=|DTDAttributeofstring*string*dtd_attr_type*dtd_attr_default|DTDElementofstring*dtd_element_typetypedtd_result=|DTDNext|DTDNotMatched|DTDMatched|DTDMatchedResultofdtd_childtypeparse_error=parse_error_msg*error_posexceptionParse_errorofparse_errorexceptionCheck_errorofcheck_errorexceptionProve_errorofprove_errortypedtd=dtd_itemlisttype('a,'b)hash=('a,'b)Hashtbl.ttypechecked={c_elements:(string,dtd_element_type)hash;c_attribs:(string,(string,(dtd_attr_type*dtd_attr_default))hash)hash;}typedtd_state={elements:(string,dtd_element_type)hash;attribs:(string,(string,(dtd_attr_type*dtd_attr_default))hash)hash;mutablecurrent:dtd_element_type;mutablecurtag:string;state:(string*dtd_element_type)Stack.t;}letfile_not_found=ref(fun_->assertfalse)let_raisese=file_not_found:=eletempty_hash=Hashtbl.create0letpossource=letline,lstart,min,max=Xml_lexer.possourcein{eline=line;eline_start=lstart;emin=min;emax=max;}letconvert=function|Xml_lexer.EInvalidDTDDecl->InvalidDTDDecl|Xml_lexer.EInvalidDTDElement->InvalidDTDElement|Xml_lexer.EInvalidDTDTag->InvalidDTDTag|Xml_lexer.EDTDItemExpected->DTDItemExpected|Xml_lexer.EInvalidDTDAttribute->InvalidDTDAttributeletparsesource=tryXml_lexer.initsource;(* local cast Dtd.dtd -> dtd *)letdtd=Xml_lexer.dtdsourceinXml_lexer.closesource;dtdwith|Xml_lexer.DTDErrore->Xml_lexer.closesource;raise(Parse_error(converte,possource))letparse_strings=parse(Lexing.from_strings)letparse_inch=parse(Lexing.from_channelch)letparse_filefname=letch=(tryopen_infnamewithSys_error_->raise(!file_not_foundfname))intryletx=parse(Lexing.from_channelch)inclose_inch;xwithe->close_inch;raiseeletcheckdtd=letattribs=Hashtbl.create0inlethdone=Hashtbl.create0inlethtodo=Hashtbl.create0inletftodotagfrom=tryignore(Hashtbl.findhdonetag);withNot_found->trymatchHashtbl.findhtodotagwith|None->Hashtbl.replacehtodotagfrom|Some_->()withNot_found->Hashtbl.addhtodotagfrominletfdonetagedata=tryignore(Hashtbl.findhdonetag);raise(Check_error(ElementDefinedTwicetag));withNot_found->Hashtbl.removehtodotag;Hashtbl.addhdonetagedatainletfattribtaganameadata=(matchadatawith|DTDID,DTDImplied->()|DTDID,DTDRequired->()|DTDID,_->raise(Check_error(WrongImplicitValueForID(tag,aname)))|_->());leth=(tryHashtbl.findattribstagwithNot_found->leth=Hashtbl.create1inHashtbl.addattribstagh;h)intryignore(Hashtbl.findhaname);raise(Check_error(AttributeDefinedTwice(tag,aname)));withNot_found->Hashtbl.addhanameadatainletcheck_item=function|DTDAttribute(tag,aname,atype,adef)->letutag=String.uppercasetaginftodoutagNone;fattributag(String.uppercaseaname)(atype,adef)|DTDElement(tag,etype)->letutag=String.uppercasetaginfdoneutagetype;letcheck_type=function|DTDEmpty->()|DTDAny->()|DTDChildx->letreccheck_child=function|DTDTags->ftodo(String.uppercases)(Someutag)|DTDPCData->()|DTDOptionalc|DTDZeroOrMorec|DTDOneOrMorec->check_childc|DTDChoice[]|DTDChildren[]->raise(Check_error(ElementEmptyContructortag))|DTDChoicel|DTDChildrenl->List.itercheck_childlincheck_childxincheck_typeetypeinList.itercheck_itemdtd;Hashtbl.iter(funtfrom->matchfromwith|None->raise(Check_error(ElementNotDeclaredt))|Sometag->raise(Check_error(ElementReferenced(t,tag))))htodo;{c_elements=hdone;c_attribs=attribs;}letstart_provedtdroot=letd={elements=dtd.c_elements;attribs=dtd.c_attribs;state=Stack.create();current=DTDChild(DTDTagroot);curtag="_root";}intryignore(Hashtbl.findd.elements(String.uppercaseroot));dwithNot_found->raise(Check_error(ElementNotDeclaredroot))(* - for debug only - *)letto_string_ref=ref(fun_->assertfalse)(*let trace dtd tag =
let item = DTDElement ("current",dtd.current) in
printf "%s : %s\n"
(match tag with None -> "#PCDATA" | Some t -> t)
(!to_string_ref item)*)exceptionTmpResultofdtd_resultletprove_childdtdtag=matchdtd.currentwith|DTDEmpty->raise(Prove_errorEmptyExpected)|DTDAny->()|DTDChildelt->letrecupdate=function|DTDTags->(matchtagwith|None->DTDNotMatched|Sometwhent=String.uppercases->DTDMatched|Some_->DTDNotMatched)|DTDPCData->(matchtagwith|None->DTDMatched|Some_->DTDNotMatched)|DTDOptionalx->(matchupdatexwith|DTDNotMatched|DTDNext->DTDNext|DTDMatched|DTDMatchedResult_->DTDMatched)|DTDZeroOrMorex->(matchupdatexwith|DTDNotMatched|DTDNext->DTDNext|DTDMatched|DTDMatchedResult_->DTDMatchedResult(DTDZeroOrMorex))|DTDOneOrMorex->(matchupdatexwith|DTDNotMatched|DTDNext->DTDNotMatched|DTDMatched|DTDMatchedResult_->DTDMatchedResult(DTDZeroOrMorex))|DTDChoicel->(try(matchList.exists(funx->matchupdatexwith|DTDMatched->true|DTDMatchedResult_asr->raise(TmpResultr)|DTDNext|DTDNotMatched->false)lwith|true->DTDMatched|false->DTDNotMatched)withTmpResultr->r)|DTDChildren[]->assertfalse(* DTD is checked ! *)|DTDChildren(h::t)->(matchupdatehwith|DTDNext->(matchtwith|[]->DTDNotMatched|_->update(DTDChildrent))|DTDNotMatched->DTDNotMatched|DTDMatchedResultr->DTDMatchedResult(DTDChildren(r::t))|DTDMatched->matchtwith|[]->DTDMatched|_->DTDMatchedResult(DTDChildrent))inmatchupdateeltwith|DTDNext|DTDNotMatched->(matchtagwith|None->raise(Prove_errorUnexpectedPCData)|Somet->raise(Prove_error(UnexpectedTagt)))|DTDMatched->dtd.current<-DTDEmpty|DTDMatchedResultr->dtd.current<-DTDChildrletis_nmtoken_char=function|'A'..'Z'|'a'..'z'|'0'..'9'|'.'|'-'|'_'|':'->true|_->falseletprove_attrib_dtdhidhidrefattraname(atype,adef)accu=letaval=(trySome(List.assocanameattr)withNot_found->None)in(matchatype,avalwith|DTDCData,_->()|DTDNMToken,None->()|DTDNMToken,Somev->fori=0toString.lengthv-1doifnot(is_nmtoken_charv.[i])thenraise(Prove_error(InvalidAttributeValueaname));done|DTDEnum_l,None->()|DTDEnuml,Somev->ifnot(List.exists((=)v)l)thenraise(Prove_error(InvalidAttributeValueaname))|DTDID,None->()|DTDID,Someid->ifHashtbl.memhididthenraise(Prove_error(DuplicateIDid));Hashtbl.addhidid()|DTDIDRef,None->()|DTDIDRef,Someidref->Hashtbl.addhidrefidref());matchadef,avalwith|DTDRequired,None->raise(Prove_error(RequiredAttributeaname))|DTDFixedv,Someavwhenv<>av->raise(Prove_error(InvalidAttributeValueaname))|DTDImplied,None->accu|DTDFixedv,None|DTDDefault_,Somev|DTDDefaultv,None|DTDRequired,Somev|DTDImplied,Somev|DTDFixed_,Somev->(aname,v)::acculetcheck_attribahash(aname,_)=tryignore(Hashtbl.findahashaname);withNot_found->raise(Prove_error(UnexpectedAttributeaname))letrecdo_provehidhidrefdtd=function|PCDatas->prove_childdtdNone;PCDatas|Element(tag,attr,childs)->letutag=String.uppercasetaginletuattr=List.map(fun(aname,aval)->String.uppercaseaname,aval)attrinprove_childdtd(Someutag);Stack.push(dtd.curtag,dtd.current)dtd.state;letelt=(tryHashtbl.finddtd.elementsutagwithNot_found->raise(Prove_error(UnexpectedTagtag)))inletahash=(tryHashtbl.finddtd.attribsutagwithNot_found->empty_hash)indtd.curtag<-tag;dtd.current<-elt;List.iter(check_attribahash)uattr;letattr=Hashtbl.fold(prove_attribdtdhidhidrefuattr)ahash[]inletchilds=ref(List.map(do_provehidhidrefdtd)childs)in(matchdtd.currentwith|DTDAny|DTDEmpty->()|DTDChildelt->letname=ref""inletreccheck=function|DTDTagt->name:=t;false|DTDPCDatawhen!childs=[]->childs:=[PCData""];true|DTDPCData->name:="#PCDATA";false|DTDOptional_->true|DTDZeroOrMore_->true|DTDOneOrMoree->ignore(checke);false|DTDChoicel->List.existscheckl|DTDChildrenl->List.for_allchecklinmatchcheckeltwith|true->()|false->raise(Prove_error(ChildExpected!name)));let_ctag,cur=Stack.popdtd.stateindtd.curtag<-tag;dtd.current<-cur;Element(tag,attr,!childs)letprovedtdrootxml=lethid=Hashtbl.create0inlethidref=Hashtbl.create0inletx=do_provehidhidref(start_provedtdroot)xmlinHashtbl.iter(funid()->ifnot(Hashtbl.memhidid)thenraise(Prove_error(MissingIDid)))hidref;xletparse_error_msg=function|InvalidDTDDecl->"Invalid DOCTYPE declaration"|InvalidDTDElement->"Invalid DTD element declaration"|InvalidDTDAttribute->"Invalid DTD attribute declaration"|InvalidDTDTag->"Invalid DTD tag"|DTDItemExpected->"DTD item expected"letparse_error(msg,pos)=ifpos.emin=pos.emaxthensprintf"%s line %d character %d"(parse_error_msgmsg)pos.eline(pos.emin-pos.eline_start)elsesprintf"%s line %d characters %d-%d"(parse_error_msgmsg)pos.eline(pos.emin-pos.eline_start)(pos.emax-pos.eline_start)letcheck_error=function|ElementDefinedTwicetag->sprintf"Element '%s' defined twice"tag|AttributeDefinedTwice(tag,aname)->sprintf"Attribute '%s' of element '%s' defined twice"anametag|ElementEmptyContructortag->sprintf"Element '%s' has empty constructor"tag|ElementReferenced(tag,from)->sprintf"Element '%s' referenced by '%s' is not declared"tagfrom|ElementNotDeclaredtag->sprintf"Element '%s' needed but is not declared"tag|WrongImplicitValueForID(tag,idname)->sprintf"Attribute '%s' of type ID of element '%s' not defined with implicit value #REQUIRED or #IMPLIED"idnametagletprove_error=function|UnexpectedPCData->"Unexpected PCData"|UnexpectedTagtag->sprintf"Unexpected tag : '%s'"tag|UnexpectedAttributeatt->sprintf"Unexpected attribute : '%s'"att|InvalidAttributeValueatt->sprintf"Invalid attribute value for '%s'"att|RequiredAttributeatt->sprintf"Required attribute not found : '%s'"att|ChildExpectedcname->sprintf"Child expected : '%s'"cname|EmptyExpected->"No more children expected"|DuplicateIDid->sprintf"ID '%s' used several times"id|MissingIDidref->sprintf"missing ID value for IDREF '%s'"idrefletto_string=function|DTDAttribute(tag,aname,atype,adef)->letatype_to_string=function|DTDCData->"CDATA"|DTDNMToken->"NMTOKEN"|DTDEnuml->sprintf"(%s)"(String.concat"|"l)|DTDID->"ID"|DTDIDRef->"IDREF"inletadefault_to_string=function|DTDDefaults->sprintf"\"%s\""s|DTDRequired->"#REQUIRED"|DTDImplied->"#IMPLIED"|DTDFixeds->sprintf"#FIXED \"%s\""sinsprintf"<!ATTLIST %s %s %s %s>"taganame(atype_to_stringatype)(adefault_to_stringadef)|DTDElement(tag,etype)->letrecechild_to_string=function|DTDTags->s|DTDPCData->"#PCDATA"|DTDOptionalc->sprintf"%s?"(echild_to_stringc)|DTDZeroOrMorec->sprintf"%s*"(echild_to_stringc)|DTDOneOrMorec->sprintf"%s+"(echild_to_stringc)|DTDChoice[c]->echild_to_stringc|DTDChoicel->sprintf"(%s)"(String.concat"|"(List.mapechild_to_stringl))|DTDChildren[c]->echild_to_stringc|DTDChildrenl->sprintf"(%s)"(String.concat","(List.mapechild_to_stringl))inletetype_to_string=function|DTDEmpty->"EMPTY"|DTDAny->"ANY"|DTDChildx->letrecop_to_string=function|DTDOptionalc->sprintf"%s?"(op_to_stringc)|DTDZeroOrMorec->sprintf"%s*"(op_to_stringc)|DTDOneOrMorec->sprintf"%s+"(op_to_stringc)|_->""inletrecroot=function|DTDOptionalc|DTDZeroOrMorec|DTDOneOrMorec->rootc|DTDChoice[_]|DTDChildren[_]asx->x,false|DTDChoice_|DTDChildren_asx->x,true|x->x,falseinmatchrootxwith|r,true->sprintf"%s%s"(echild_to_stringr)(op_to_stringx)|r,false->sprintf"(%s%s)"(echild_to_stringr)(op_to_stringx)insprintf"<!ELEMENT %s %s>"tag(etype_to_stringetype);;to_string_ref:=to_string