123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899(*
* Copyright (c) 2011-2013 Thomas Gazagnaire <thomas@gazagnaire.org>
* Copyright (c) 2012 Anil Madhavapeddy <anil@recoil.org>
* Copyright (c) 2013-2014 David Sheets <sheets@alum.mit.edu>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*)includeXmlmtypet=('afragas'a)fraglistletidx=xletto_string?(decl=false)=function|[]->""|h::t->letbuf=Buffer.create1024inletappenddeclelt=leto=make_output~decl(`Bufferbuf)inoutputo(`DtdNone);output_treeidoeltinappenddeclh;List.iter(appendfalse)t;Buffer.contentsbuf(* XXX: do a proper input_subtree integration *)(*** XHTML parsing (using Xml) ***)let_input_treeinput:t=letel(name,attrs)body:t=[`El((name,attrs),List.concatbody)]inletdatastr:t=[`Datastr]ininput_tree~el~datainputletof_string?entity?encstr=(* XXX: ugly hack to manually remove the DTD *)letremove_dtdstr=letxml_decl="<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"inletlen=String.lengthstrinletdecl_len=String.lengthxml_decliniflen>=decl_len&&String.substr0decl_len=xml_declthenString.substrdecl_len(len-decl_len)elsestrin(* Here, we want to be able to deal with a forest of possible XML
trees. To do so correctly, we root the forest with a dummy
node. *)letrootstr=letstr=Printf.sprintf"<xxx>%s</xxx>"strinleti=make_input~enc?entity(`String(0,str))in(matchpeekiwith|`Dtd_->let_=inputiin()|_->());match_input_treeiwith|[`El(_,childs)]->childs|_->raiseParsing.Parse_errorin(* It is illegal to write <:html<<b>foo</b>>> so we use a small trick
and write <:html<<b>foo</b>&>> *)letremove_trailing_ampstr=letlen=String.lengthstriniflen=0||str.[len-1]<>'&'thenstrelseString.substr0(String.lengthstr-1)intryroot(remove_trailing_amp(remove_dtdstr))withError(pos,e)->Printf.eprintf"[XMLM:%d-%d] %s: %s\n"(fstpos)(sndpos)str(error_messagee);raiseParsing.Parse_errorletempty:t=[]letstrings:t=[`Datas]letinti=string@@string_of_intiletfloatf=string@@string_of_floatflet(++)=List.appendletlist=List.concatletsome=functionNone->empty|Somex->xleturix=string(Uri.to_stringx)lettagt?(attrs=[])body:t=letattrs=List.map(fun(k,v)->(("",k),v))attrsin[`El((("",t),attrs),body)]lettagot?attrs=functionNone->empty|Someb->tagt?attrsb