123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561(*********************************************************************************)(* OCaml-RDF *)(* *)(* Copyright (C) 2012-2021 Institut National de Recherche en Informatique *)(* et en Automatique. All rights reserved. *)(* *)(* This program is free software; you can redistribute it and/or modify *)(* it under the terms of the GNU Lesser General Public License version *)(* 3 as published by the Free Software Foundation. *)(* *)(* This program is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *)(* GNU General Public License for more details. *)(* *)(* You should have received a copy of the GNU General Public License *)(* along with this program; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA *)(* 02111-1307 USA *)(* *)(* Contact: Maxence.Guesdon@inria.fr *)(* *)(*********************************************************************************)(** *)openGraph;;openTerm;;(** {2 Using trees for XML docs}
Code taken from Xmlm examples.
Thanks to Xmlm, namespaces are already handled by the parser :-)
*)typetree=EofXmlm.tag*treelist|Dofstringletin_treei=leteltagchilds=E(tag,childs)inletdatad=DdintryXmlm.input_doc_tree~el~dataiwithXmlm.Error((line,col),error)->letmsg=Printf.sprintf"Line %d, column %d: %s"linecol(Xmlm.error_messageerror)infailwithmsgletout_treeot=letfrag=function|E(tag,childs)->`El(tag,childs)|Dd->`DatadinXmlm.output_doc_treefragotletapply_namespaces=Dot.apply_namespaces;;letoutput_doc_treens?(decl=true)desttree=letmap(pref,s)=matchprefwith""->apply_namespacesnss|_->(pref,s)inlettree=matchtreewithD_->tree|E((tag,atts),subs)->letatts=List.fold_left(funacc(((pref,s),v)asatt)->ifpref=Xmlm.ns_xmlnsthenaccelseatt::acc)[]attsinletns_atts=List.map(fun(pref,iri)->((Xmlm.ns_xmlns,pref),iri))nsinE((tag,ns_atts@atts),subs)inletns_prefixs=Somesinletoutput=Xmlm.make_output~ns_prefix~decldestinletfrag=function|Dd->`Datad|E(((pref,s),atts),childs)->let(pref,s)=map(pref,s)inletatts=List.map(fun((pref,s),v)->(map(pref,s),v))attsin`El(((pref,s),atts),childs)inXmlm.output_doc_treefragoutput(None,tree);;;letstring_of_xmlsnamespacestrees=tryletb=Buffer.create256inList.iter(output_doc_treenamespaces~decl:false(`Bufferb))trees;Buffer.contentsbwithXmlm.Error((line,col),error)->letmsg=Printf.sprintf"Line %d, column %d: %s"linecol(Xmlm.error_messageerror)infailwithmsg;;letxml_of_stringstr=tryleti=Xmlm.make_input~strip:true(`String(0,str))inlet(_,tree)=in_treeiin(*prerr_endline "parse ok";*)treewithXmlm.Error((line,col),error)->letmsg=Printf.sprintf"Line %d, column %d: %s\n%s"linecol(Xmlm.error_messageerror)strinfailwithmsg;;letxmls_of_stringstr=(*prerr_endline "xmls_of_string";*)letstr="<foo__>"^str^"</foo__>"inmatchxml_of_stringstrwithE((("","foo__"),_),subs)->subs|_->assertfalse;;letget_first_childxmltag=matchxmlwithD_->None|E((_,_),subs)->trySome(List.find(functionE((t,_),_)->t=tag|_->false)subs)withNot_found->None;;letis_elementiri(pref,loc)=letiri2=Iri.of_string(pref^loc)inletb=Iri.compareiriiri2=0in(*prerr_endline (Printf.sprintf "is_element %s %s: %b"
(Iri.to_string iri) (Iri.to_string iri2) b);*)b;;(** {2 Input} *)moduleSMap=Types.SMap;;moduleIrimap=Iri.Maptypestate={subject:Term.termoption;predicate:Iri.toption;xml_base:Iri.t;xml_lang:stringoption;datatype:Iri.toption;namespaces:stringIrimap.t;}typeglobal_state={blanks:Term.blank_idSMap.t;gnamespaces:stringIrimap.t;}exceptionInvalid_rdfofstringleterrors=raise(Invalid_rdfs);;let()=Printexc.register_printer(function|Invalid_rdfstr->Some(Printf.sprintf"Invalid RDF: %s"str)|_->None)letget_attattl=trySome(List.assocattl)withNot_found->None;;letget_att_iri=letreciterpred=function[]->None|(x,v)::q->ifpredxthenSomevelseiterpredqinfuniril->iter(is_elementiri)l;;(*
let abs_iri state iri =
prerr_endline (Printf.sprintf "resolve base=%s iri=%s"
(Iri.to_string state.xml_base) (Iri.ref_to_string iri));
let iri = Iri.resolve ~base: state.xml_base iri in
prerr_endline (Printf.sprintf "=> %s" (Iri.to_string iri));
iri
*)letabs_iristateiri=(*prerr_endline (Printf.sprintf "resolve base=%s iri=%s"
(Iri.to_string state.xml_base) (Iri.to_string iri));*)letiri=matchIri.is_relativeiriwithfalse->iri|true->Iri.resolve~base:state.xml_baseiriin(*prerr_endline (Printf.sprintf "=> %s" (Iri.to_string iri));*)iriletset_xml_basestate=functionD_->state|E((_,atts),_)->matchget_att(Xmlm.ns_xml,"base")attswithNone->state|Somes->letr=Iri.of_stringsinletxml_base=abs_iristaterin{statewithxml_base};;letset_xml_langstate=functionD_->state|E((_,atts),_)->matchget_att(Xmlm.ns_xml,"lang")attswithNone->state|Somes->(*prerr_endline ("setting lang to "^s);*){statewithxml_lang=Somes};;letset_namespacesgstatestate=functionD_->(gstate,state)|E((_,atts),_)->letf(gstate,state)((pref,s),v)=ifpref=Xmlm.ns_xmlnsthenbeginletiri=Iri.of_stringvinletgstate={gstatewithgnamespaces=Irimap.addirisgstate.gnamespaces}inletstate={statewithnamespaces=Irimap.addirisstate.namespaces}in(gstate,state)endelse(gstate,state)inList.fold_leftf(gstate,state)atts;;letupdate_stategstatestatet=set_namespacesgstate(set_xml_lang(set_xml_basestatet)t)t;;letget_blank_nodeggstateid=try(Blank_(SMap.findidgstate.blanks),gstate)withNot_found->(*prerr_endline (Printf.sprintf "blank_id for %s not found, forging one" id);*)letbid=g.new_blank_id()inletgstate={gstatewithblanks=SMap.addidbidgstate.blanks}in(Blank_bid,gstate)letrecinput_nodegstategstatet=let(gstate,state)=update_stategstatestatetinmatchtwithDswhenstate.predicate=None->letmsg=Printf.sprintf"Found (Data %S) with no current predicate."sinerrormsg|Ds->letobj=Term.term_of_literal_string?typ:state.datatype?lang:state.xml_langsinletsub=matchstate.subjectwithNone->assertfalse|Somes->sinletpred=matchstate.predicatewithNone->assertfalse|Someu->uing.add_triple~sub~pred~obj;gstate|E(((pref,s),atts),children)->let(node,gstate)=matchget_att_iriRdf_.aboutattswithSomes->(Iri(abs_iristate(Iri.of_strings)),gstate)|None->matchget_att_iriRdf_.idattswithSomeid->(Iri(Iri.of_string((Iri.to_stringstate.xml_base)^"#"^id)),gstate)|None->matchget_att_iriRdf_.nodeIDattswithSomeid->get_blank_nodeggstateid|None->(Blank_(g.new_blank_id()),gstate)inbeginmatchstate.subject,state.predicatewithSomesub,Somepred->g.add_triple~sub~pred~obj:node|_->()end;letstate={statewithsubject=Somenode;predicate=None}in(* add a type arc if the node is not introduced with rdf:Description *)ifnot(is_elementRdf_.description(pref,s))thenbeginlettype_iri=Iri.of_string(pref^s)ing.add_triple~sub:node~pred:Rdf_.type_~obj:(Iritype_iri)end;(* all remaining attributes define triples with literal object values *)letf((pref,s),v)=ifpref<>Xmlm.ns_xml&&pref<>Xmlm.ns_xmlnsthenbeginletiri_prop=Iri.of_string(pref^s)inifnot(List.exists(Iri.equaliri_prop)[Rdf_.about;Rdf_.id;Rdf_.nodeID])thenbeginletobj=Term.term_of_literal_string?lang:state.xml_langving.add_triple~sub:node~pred:iri_prop~objendendinList.iterfatts;let(gstate,_)=List.fold_left(input_propgstate)(gstate,1)childreningstate(* FIXME: handle rdf:ID *)andinput_propgstate(gstate,li)t=let(gstate,state)=update_stategstatestatetinmatchtwithDs->letmsg=Printf.sprintf"Found (Data %S) but expected a property node."sinerrormsg|E(((pref,s),atts),children)->letsub=matchstate.subjectwithNone->assertfalse|Somesub->subinletprop_iri=Iri.of_string(pref^s)inlet(prop_iri,li)=ifIri.equalprop_iriRdf_.lithen(Rdf_.nli,li+1)else(prop_iri,li)inmatchget_att_iriRdf_.resourceattswithSomes->letiri=Iri.of_stringsinletobj=Iri(abs_iristateiri)ing.add_triple~sub~pred:prop_iri~obj;(gstate,li)|None->matchget_att_iriRdf_.nodeIDattswithSomeid->let(obj,gstate)=get_blank_nodeggstateiding.add_triple~sub~pred:prop_iri~obj;(gstate,li)|None->matchget_att_iriRdf_.parseTypeattswithSome"Literal"->letxml=string_of_xmls(Irimap.fold(funirisacc->(s,Iri.to_stringiri)::acc)state.namespaces[])childreninletobj=Term.term_of_literal_string~typ:Rdf_.dt_XMLLiteralxmling.add_triple~sub~pred:prop_iri~obj;(gstate,li)|Some"Resource"->beginletnode=Blank_(g.new_blank_id())ing.add_triple~sub~pred:prop_iri~obj:node;letstate={statewithsubject=Somenode;predicate=None}inList.fold_left(input_propgstate)(gstate,1)childrenend|Some"Collection"->beginletrecf(gstate,previous)=function[]->assertfalse|first::rest->letstate={statewithsubject=Someprevious;predicate=SomeRdf_.first}inletgstate=input_nodegstategstatefirstinmatchrestwith[]->g.add_triple~sub:previous~pred:Rdf_.rest~obj:(IriRdf_.nil);(gstate,previous)|_->letblank=Term.Blank_(g.new_blank_id())ing.add_triple~sub:previous~pred:Rdf_.rest~obj:blank;f(gstate,blank)restinletgstate=matchchildrenwith[]->gstate|_->letblank=Term.Blank_(g.new_blank_id())ing.add_triple~sub~pred:prop_iri~obj:blank;fst(f(gstate,blank)children)in(gstate,li)end|Somes->error(Printf.sprintf"Unknown parseType %S"s)|None->matchget_att_iriRdf_.datatypeatts,childrenwith|Somes,[Dlit]->lettyp=abs_iristate(Iri.of_strings)inletobj=Term.term_of_literal_string~typ?lang:state.xml_langliting.add_triple~sub~pred:prop_iri~obj;(gstate,li)|Somes,_->letmsg=Printf.sprintf"Property %S with datatype %S but no data"(Iri.to_stringprop_iri)sinerrormsg|None,_->(* if we have other attributes than the ones filtered above, they
are property relations, with ommited blank nodes *)letpred((pref,s),v)=pref<>Xmlm.ns_xml&&pref<>Xmlm.ns_xmlns&&(letiri=Iri.of_string(pref^s)innot(Iri.equaliriRdf_.id))inmatchList.filterpredattswith[]->letstate={statewithpredicate=Someprop_iri}inletgstate=List.fold_left(input_nodegstate)gstatechildrenin(gstate,li)|l->letnode=Blank_(g.new_blank_id())ing.add_triple~sub~pred:prop_iri~obj:node;letf((pref,s),lit)=letobj=Term.term_of_literal_string?lang:state.xml_langlitinletiri_prop=Iri.of_string(pref^s)ing.add_triple~sub:node~pred:iri_prop~objinList.iterfl;(gstate,li);;letinput_treeg?(base=g.Graph.name())t=letstate={subject=None;predicate=None;xml_base=base;xml_lang=None;datatype=None;namespaces=Irimap.empty;}inletgstate={gnamespaces=Irimap.empty;blanks=SMap.empty}inlet(gstate,state)=update_stategstatestatetinletgstate=matchtwithD_->assertfalse|E((e,_),children)whenis_elementRdf_._RDFe->List.fold_left(input_nodegstate)gstatechildren|t->input_nodegstategstatetin(* add namespaces *)letadd_nsiriprefix=g.add_namespaceiriprefixinIrimap.iteradd_nsgstate.gnamespaces;;letfrom_inputg?basei=let(_,tree)=in_treeiininput_treeg?basetree;;letfrom_xml=input_tree;;letfrom_stringg?bases=leti=Xmlm.make_input~strip:true(`String(0,s))infrom_inputg?basei;;letfrom_fileg?basefile=letic=open_infileinleti=Xmlm.make_input~strip:true(`Channelic)inlet(_,tree)=trylett=in_treeiinclose_inic;twithe->close_inic;raiseeininput_treeg?basetree;;(** {2 Output} *)letoutput?(compact=true)g=letxml_proppred_iriobj=let(atts,children)=matchobjwith|Iriiri->([("",Iri.to_stringRdf_.resource),Iri.to_stringiri],[])|Blank_id->([("",Iri.to_stringRdf_.nodeID),Term.string_of_blank_idid],[])|Blank->assertfalse|Literallit->let(atts,subs)=matchlit.lit_typewithNone->([],[Dlit.lit_value])|SomeiriwhenIri.equaliriRdf_.dt_XMLLiteral->letsubs=xmls_of_stringlit.lit_valuein([("",Iri.to_stringRdf_.parseType),"Literal"],subs)|Someiri->([("",Iri.to_stringRdf_.datatype),Iri.to_stringiri],[Dlit.lit_value])inletatts=atts@(matchlit.lit_languagewithNone->[]|Somelang->[(Xmlm.ns_xml,"lang"),lang])in(atts,subs)inE((("",Iri.to_stringpred_iri),atts),children)inletsubject_atts=function|Iriiri->[("",Iri.to_stringRdf_.about),Iri.to_stringiri]|Blank_id->[("",Iri.to_stringRdf_.nodeID),Term.string_of_blank_idid]|Blank->assertfalse|Literal_->assertfalseinletfold_propsmap=letfirisetacc=letfoobjacc=letn=xml_propiriobjinn::accinTerm.TSet.foldfosetaccinIri.Map.foldfmap[]inletxmls=matchg.folder()with|Somemapwhencompact->letfsubmapacc=letxml_props=fold_propsmapinletatts=subject_attssubinletnode=E((("",Iri.to_stringRdf_.description),atts),xml_props)innode::accinTerm.TMap.foldfmap[]|_->letf_tripleacc(sub,pred,obj)=letatts=subject_attssubinletxml_prop=xml_proppredobjinletnode=E((("",Iri.to_stringRdf_.description),atts),[xml_prop])innode::accinList.fold_leftf_triple[](g.find())inE((("",Iri.to_stringRdf_._RDF),[]),xmls)letto_?compact?namespacesgdest=letnamespaces=Dot.build_namespaces?namespacesgintrylettree=output?compactginoutput_doc_treenamespaces~decl:truedesttreewithXmlm.Error((line,col),error)->letmsg=Printf.sprintf"Line %d, column %d: %s"linecol(Xmlm.error_messageerror)infailwithmsg;;letto_string?compact?namespacesg=letbuf=Buffer.create256inletdest=`Bufferbufinto_?compact?namespacesgdest;Buffer.contentsbuf;;letto_file?compact?namespacesgfile=letoc=open_outfileintryto_?compact?namespacesg(`Channeloc);close_outocwithe->close_outoc;raisee;;