123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232(*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
* Copyright (C) 2003 Jacques Garrigue
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)openPrintfopenXml_datatypetypexml=Xml_datatype.xmltypeerror_pos={eline:int;eline_start:int;emin:int;emax:int;}typeerror_msg=|UnterminatedComment|UnterminatedString|UnterminatedEntity|IdentExpected|CloseExpected|NodeExpected|AttributeNameExpected|AttributeValueExpected|EndOfTagExpectedofstring|EOFExpected|Emptytypeerror=error_msg*error_posexceptionErroroferrorexceptionFile_not_foundofstringtypet={mutablecheck_eof:bool;mutableconcat_pcdata:bool;source:Lexing.lexbuf;stack:Xml_lexer.tokenStack.t;}typesource=|SChannelofin_channel|SStringofstring|SLexbufofLexing.lexbufexceptionInternal_erroroferror_msgexceptionNoMoreDataletxml_error=ref(fun_->assertfalse)letfile_not_found=ref(fun_->assertfalse)letis_blanks=letlen=String.lengthsinletbreak=reftrueinleti=ref0inwhile!break&&!i<lendoletc=s.[!i]in(* no '\r' because we replaced them in the lexer *)ifc=' '||c='\n'||c='\t'thenincrielsebreak:=falsedone;!i=lenlet_raisesef=xml_error:=e;file_not_found:=fletmakesource=letsource=matchsourcewith|SChannelchan->Lexing.from_channelchan|SStrings->Lexing.from_strings|SLexbuflexbuf->lexbufinlet()=Xml_lexer.initsourcein{check_eof=false;concat_pcdata=true;source=source;stack=Stack.create();}letcheck_eofpv=p.check_eof<-vletpops=tryStack.pops.stackwithStack.Empty->Xml_lexer.tokens.sourceletpushts=Stack.pushts.stackletcanonicalizel=lethas_elt=List.exists(functionElement_->true|_->false)linifhas_eltthenList.filter(functionPCDatas->not(is_blanks)|_->true)lelselletrecread_xmldo_not_canonicalizes=letrecread_nodes=matchpopswith|Xml_lexer.PCDatas->PCDatas|Xml_lexer.Tag(tag,attr,true)->Element(tag,attr,[])|Xml_lexer.Tag(tag,attr,false)->letelements=read_elemstagsinletelements=ifdo_not_canonicalizethenelementselsecanonicalizeelementsinElement(tag,attr,elements)|t->pushts;raiseNoMoreDataandread_elemstags=letelems=ref[]in(trywhiletruedoletnode=read_nodesinmatchnode,!elemswith|PCDatac,(PCDatac2)::q->elems:=PCData(c2^c)::q|_,l->elems:=node::ldonewithNoMoreData->());matchpopswith|Xml_lexer.Endtagswhens=tag->List.rev!elems|t->raise(Internal_error(EndOfTagExpectedtag))inmatchread_nodeswith|(Element_)asnode->node|PCDatac->ifis_blankcthenread_xmldo_not_canonicalizeselseraise(Xml_lexer.ErrorXml_lexer.ENodeExpected)letconvert=function|Xml_lexer.EUnterminatedComment->UnterminatedComment|Xml_lexer.EUnterminatedString->UnterminatedString|Xml_lexer.EIdentExpected->IdentExpected|Xml_lexer.ECloseExpected->CloseExpected|Xml_lexer.ENodeExpected->NodeExpected|Xml_lexer.EAttributeNameExpected->AttributeNameExpected|Xml_lexer.EAttributeValueExpected->AttributeValueExpected|Xml_lexer.EUnterminatedEntity->UnterminatedEntityleterror_of_exnxparser=function|NoMoreDatawhenpopxparser=Xml_lexer.Eof->Empty|NoMoreData->NodeExpected|Internal_errore->e|Xml_lexer.Errore->converte|e->(*let e = Errors.push e in: We do not record backtrace here. *)raiseeletdo_parsedo_not_canonicalizexparser=tryXml_lexer.initxparser.source;letx=read_xmldo_not_canonicalizexparserinifxparser.check_eof&&popxparser<>Xml_lexer.Eofthenraise(Internal_errorEOFExpected);Xml_lexer.close();xwithany->Xml_lexer.close();raise(!xml_error(error_of_exnxparserany)xparser.source)letparse?(do_not_canonicalize=false)p=do_parsedo_not_canonicalizepleterror_msg=function|UnterminatedComment->"Unterminated comment"|UnterminatedString->"Unterminated string"|UnterminatedEntity->"Unterminated entity"|IdentExpected->"Ident expected"|CloseExpected->"Element close expected"|NodeExpected->"Xml node expected"|AttributeNameExpected->"Attribute name expected"|AttributeValueExpected->"Attribute value expected"|EndOfTagExpectedtag->sprintf"End of tag expected : '%s'"tag|EOFExpected->"End of file expected"|Empty->"Empty"leterror(msg,pos)=ifpos.emin=pos.emaxthensprintf"%s line %d character %d"(error_msgmsg)pos.eline(pos.emin-pos.eline_start)elsesprintf"%s line %d characters %d-%d"(error_msgmsg)pos.eline(pos.emin-pos.eline_start)(pos.emax-pos.eline_start)letlinee=e.elineletrangee=e.emin-e.eline_start,e.emax-e.eline_startletabs_rangee=e.emin,e.emaxletpossource=letline,lstart,min,max=Xml_lexer.possourcein{eline=line;eline_start=lstart;emin=min;emax=max;}let()=_raises(funxp->(* local cast : Xml.error_msg -> error_msg *)Error(x,posp))(funf->File_not_foundf)