123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250(* This file is part of Markup.ml, released under the MIT license. See
LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *)openCommonopenKstreamopenToken_tagletis_whitespace_onlystrings=List.for_allis_whitespace_onlystringsletparsecontextnamespacereporttokens=letopen_elements=ref[]inletnamespaces=Namespace.Parsing.initnamespaceinletis_fragment=reffalseinletfragment_allowed=reftrueinletthrow=ref(fun_->())inletended=ref(fun_->())inletoutput=ref(fun_->())inletreccurrent_state=ref(fun()->matchcontextwith|None->initial_state[]|Some`Document->fragment_allowed:=false;document_state()|Some`Fragment->is_fragment:=false;content_state())andemitlsignalstate=current_state:=state;!output(l,signal)andpush_and_emitl{name=raw_name;attributes}state=Namespace.Parsing.push(fun()->reportl)namespacesraw_nameattributes!throw(fun(expanded_name,attributes)->letrecdeduplicateaccattributesk=matchattributeswith|[]->k(List.revacc)|((n,_)asattr)::more->ifacc|>List.exists(fun(n',_)->n'=n)thenreportl(`Bad_token(sndn,"tag","duplicate attribute"))!throw(fun()->deduplicateaccmorek)elsededuplicate(attr::acc)morekindeduplicate[]attributes(funattributes->open_elements:=(l,expanded_name,raw_name)::!open_elements;emitl(`Start_element(expanded_name,attributes))state))andpoplstate=match!open_elementswith|[]->state()|_::more->Namespace.Parsing.popnamespaces;open_elements:=more;emitl`End_elementstateandemit_end()=current_state:=(fun()->!ended());!ended()andinitial_stateleading=next_expectedtokens!throwbeginfunction|_,(`Xml_|`Doctype_|`Start_|`End_)asv->pushtokensv;push_listtokens(List.revleading);document_state()|_,`Charssasvwhenis_whitespace_onlys->initial_state(v::leading)|_,(`Comment_|`PI_)asv->initial_state(v::leading)|_,(`Chars_|`EOF)asv->is_fragment:=true;pushtokensv;push_listtokens(List.revleading);content_state()endanddocument_state()=next_expectedtokens!throwbeginfunction|l,`Xmldeclaration->fragment_allowed:=false;emitl(`Xmldeclaration)doctype_state|v->pushtokensv;doctype_state()endanddoctype_state()=next_expectedtokens!throwbeginfunction|l,`Doctyped->fragment_allowed:=false;emitl(`Doctyped)root_state|_,`Charsswhenis_whitespace_onlys->doctype_state()|l,`Comments->emitl(`Comments)doctype_state|l,`PIs->emitl(`PIs)doctype_state|l,`Xml_->reportl(`Bad_document"XML declaration must be first")!throwdoctype_state|l,`Chars_->reportl(`Bad_document"text at top level")!throwdoctype_state|v->pushtokensv;root_state()endandroot_state()=next_expectedtokens!throwbeginfunction|l,`Startt->ift.self_closingthenpush_and_emitlt(fun()->poplafter_root_state)elsepush_and_emitltcontent_state|_,`Charsswhenis_whitespace_onlys->root_state()|l,`Comments->emitl(`Comments)root_state|l,`PIs->emitl(`PIs)root_state|l,`Xml_->reportl(`Bad_document"XML declaration must be first")!throwroot_state|l,`EOF->reportl(`Unexpected_eoi"document before root element")!throwemit_end|l,_->reportl(`Bad_document"expected root element")!throwroot_stateendandafter_root_state()=next_expectedtokens!throwbeginfunction|_,`Charsswhenis_whitespace_onlys->after_root_state()|l,`Comments->emitl(`Comments)after_root_state|l,`PIs->emitl(`PIs)after_root_state|_,`EOF->emit_end()|_,(`Chars_|`Start_|`End_)asvwhen!fragment_allowed->is_fragment:=true;pushtokensv;content_state()|l,_asv->reportl(`Bad_document"not allowed after root element")!throw(fun()->is_fragment:=true;pushtokensv;content_state())endandcontent_state()=next_expectedtokens!throwbeginfunction|l,`Startt->ift.self_closingthenpush_and_emitlt(fun()->poplcontent_state)elsepush_and_emitltcontent_state|l,`End{name=raw_name}->Namespace.Parsing.expand_element(fun()->reportl)namespacesraw_name!throw(funexpanded_name->letis_on_stack=!open_elements|>List.exists(fun(_,name,_)->name=expanded_name)inifnotis_on_stackthenreportl(`Unmatched_end_tagraw_name)!throwcontent_stateelseletrecpop_until_match()=match!open_elementswith|(_,name,_)::_whenname=expanded_name->popl(fun()->match!open_elementswith|[]whennot!is_fragment->after_root_state()|_->content_state())|(l',_,name)::_->reportl'(`Unmatched_start_tagname)!throw(fun()->poplpop_until_match)|_->failwith"impossible"inpop_until_match())|l,`Charss->emitl(`Texts)content_state|l,`PIs->emitl(`PIs)content_state|l,`Comments->emitl(`Comments)content_state|l,`EOF->letrecpop_stack()=match!open_elementswith|[]->emit_end()|(l',_,raw_name)::_->reportl'(`Unmatched_start_tagraw_name)!throw(fun()->poplpop_stack)inpop_stack()|l,`Xml_->reportl(`Bad_document"XML declaration should be at top level")!throwcontent_state|l,`Doctype_->reportl(`Bad_document"doctype should be at top level")!throwcontent_stateendin(funthrow_ek->throw:=throw_;ended:=e;output:=k;!current_state())|>make