123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467(* This file is part of Markup.ml, released under the MIT license. See
LICENSE.md for details, or visit https://github.com/aantron/markup.ml. *)openCommonopenKstreamletcontents=letfiltersignal_k=matchsignalwith|`Start_element_|`End_element|`Text_assignal->k(Somesignal)|`Comment_|`PI_|`Doctype_|`Xml_->kNoneinfilter_mapfiltersletstrings_to_bytesstrings=letcurrent_string=ref""inletindex=ref0inletrecemitthrowek=if!index<String.length!current_stringthenbeginindex:=!index+1;k(!current_string.[!index-1])endelsenextstringsthrowe(funs->current_string:=s;index:=0;emitthrowek)inmakeemitletunwrap_listsls=letcurrent_list=ref[]inletrecemitthrowek=match!current_listwith|v::l->current_list:=l;kv|[]->nextlsthrowe(funl->current_list:=l;emitthrowek)inmakeemitlettrees?text?element?comment?pi?xml?doctypes=letrecmatch_nodethrowknone=nextsthrownonebeginfunction|`Start_element(name,attributes)->match_content[]throw(funchildren->matchelementwith|None->match_nodethrowknone|Someelement->k(elementnameattributeschildren))|`End_element->none()|`Textss->beginmatchtextwith|None->match_nodethrowknone|Sometext->k(textss)end|`Doctyped->beginmatchdoctypewith|None->match_nodethrowknone|Somedoctype->k(doctyped)end|`Xmlx->beginmatchxmlwith|None->match_nodethrowknone|Somexml->k(xmlx)end|`PI(t,s)->beginmatchpiwith|None->match_nodethrowknone|Somepi->k(pits)end|`Comments->beginmatchcommentwith|None->match_nodethrowknone|Somecomment->k(comments)endendandmatch_contentaccthrowk=match_nodethrow(funn->match_content(n::acc)throwk)(fun()->k(List.revacc))in(funthrowek->match_nodethrowke)|>makelettree?text?element?comment?pi?xml?doctypesthrowk=lets'=trees?text?element?comment?pi?xml?doctypesinnexts'throw(fun()->kNone)(funt->k(Somet))type'anode=[`Elementofname*(name*string)list*'alist|`Textofstring|`Doctypeofdoctype|`Xmlofxml_declaration|`PIofstring*string|`Commentofstring]letfrom_treefnode=letrectraverseaccnode=matchfnodewith|`Element(name,attributes,children)->children|>List.fold_lefttraverse((`Start_element(name,attributes))::acc)|>funacc->`End_element::acc|`Texts->(`Text[s])::acc|`Doctype_|`Xml_|`PI_|`Comment_asnode->node::accintraverse[]node|>List.rev|>of_listletelementsselects=letdepth=ref0inletstarted=ref0inletfinished=ref0inletrecscanthrowek=nextsthrowebeginfunsignal->matchsignalwith|`Start_element(name,attributes)when!started=!finished&&selectnameattributes->letindex=!started+1instarted:=index;depth:=0;letconstructor_k=pushssignal;(funthrowek->if!finished>=indexthene()elsenextsthrowebeginfunsignal->matchsignalwith|`Start_element_->depth:=!depth+1;ksignal|`End_element->depth:=!depth-1;if!depth=0thenfinished:=index;ksignal|`Text_|`Comment_|`PI_|`Doctype_|`Xml_->ksignalend)|>make|>kinconstructconstructor|>k|`Start_element_when!started>!finished->depth:=!depth+1;scanthrowek|`End_elementwhen!started>!finished->depth:=!depth-1;if!depth=0thenfinished:=!started;scanthrowek|`Text_|`Start_element_|`End_element|`Comment_|`PI_|`Doctype_|`Xml_->scanthrowekendinmakescanlettexts=letfilterv_k=matchvwith|`Textss->k(Somess)|`Start_element_|`End_element|`Comment_|`PI_|`Doctype_|`Xml_->kNoneinfilter_mapfilters|>unwrap_lists|>strings_to_bytesletnormalize_texts=letrecmatch_textaccthrowek=next_optionsthrowbeginfunction|Some(`Textss)->match_text(ss::acc)throwek|v->push_optionsv;letss=List.revacc|>List.flatten|>List.filter(funs->String.lengths>0)inmatchsswith|[]->match_otherthrowek|_->k(`Textss)endandmatch_otherthrowek=nextsthrowe(function|`Textss->match_text[ss]throwek|signal->ksignal)inmakematch_otherletis_phrasing_element(namespace,element_name)=ifnamespace<>html_nsthenfalseelsematchelement_namewith|"a"|"abbr"|"b"|"bdi"|"bdo"|"br"|"button"|"cite"|"code"|"data"|"dfn"|"em"|"i"|"img"|"input"|"kbd"|"label"|"mark"|"pre"|"q"|"rb"|"rt"|"ruby"|"s"|"samp"|"select"|"small"|"span"|"strong"|"sub"|"sup"|"textarea"|"time"|"u"|"var"|"wbr"->true|_->falseletrectrim_string_listtrim=function|[]->[]|s::more->matchtrimswith|""->trim_string_listtrimmore|s->s::morelettrimsignals=letsignals=normalize_textsignalsinletsignals_and_flow=Kstream.transformbeginfunphrasing_nesting_levelsignal_throwk->matchsignalwith|`Start_element(name,_)->ifphrasing_nesting_level>0thenk([signal,false],Some(phrasing_nesting_level+1))elseifis_phrasing_elementnamethenk([signal,false],Some1)elsek([signal,true],Some0)|`End_element->ifphrasing_nesting_level>0thenk([signal,false],Some(phrasing_nesting_level-1))elsek([signal,true],Some0)|_->k([signal,false],Somephrasing_nesting_level)end0signalsinletsignals=Kstream.transformbeginfunsaw_flow_tag(signal,is_flow_tag)throwk->matchsignalwith|`Textss->letss=ifsaw_flow_tagthentrim_string_listCommon.trim_string_leftsselsessinKstream.peek_optionsignals_and_flowthrow(funmaybe_signal->letss=matchmaybe_signalwith|Some(_,true)->ss|>List.rev|>trim_string_listCommon.trim_string_right|>List.rev|_->ssink([`Textss],Somefalse))|_->k([signal],Someis_flow_tag)endtruesignals_and_flowinnormalize_textsignalslettab_width=1letpretty_printsignals=letsignals=trimsignalsinletindentn=letn=ifn<0then0elseninString.make(n*tab_width)' 'inletreccurrent_state=ref(funthrowek->flow0throwek)andflowindentationthrowek=nextsignalsthrowebeginfunsignal->matchsignalwith|`Start_element(name,_)whennot@@is_phrasing_elementname->(* If the next signal is `End_element, don't insert a line break. This
is mainly for collapsing inherently empty tags like <meta> and
<br>. *)peek_expectedsignalsthrowbeginfunnext_signal->matchnext_signalwith|`End_element->next_expectedsignalsthrowbeginfun_->list[`Text[indentindentation];signal;next_signal;`Text["\n"]](flowindentation)throwekend|_->list[`Text[indentindentation];signal;`Text["\n"]](flow(indentation+1))throwekend|`End_element->list[`Text[indent(indentation-1)];signal;`Text["\n"]](flow(indentation-1))throwek|`Start_element_|`Text_->pushsignalssignal;list[`Text[indentindentation]](phrasingindentation0)throwek|`Doctype_->list[signal;`Text["\n"]](flowindentation)throwek|_->list[signal](flowindentation)throwekendandphrasingindentationphrasing_nesting_levelthrowek=nextsignalsthrowebeginfunsignal->matchsignalwith|`Start_element(name,_)whenis_phrasing_elementname->list[signal](phrasingindentation(phrasing_nesting_level+1))throwek|`End_elementwhenphrasing_nesting_level>0->list[signal](phrasingindentation(phrasing_nesting_level-1))throwek|`Text_->list[signal](phrasingindentationphrasing_nesting_level)throwek|_->pushsignalssignal;list[`Text["\n"]](flowindentation)throwekendandlistsignalsstatethrowek=matchsignalswith|[]->statethrowek|signal::more->current_state:=listmorestate;ksignalin(funthrowek->!current_statethrowek)|>make|>normalize_textlethtml5s=letremove_markupv_k=matchvwith|`Doctype_|`Xml_|`PI_->kNone|`Text_|`Start_element_|`End_element|`Comment_asv->k(Somev)ins|>filter_mapremove_markup|>funs->pushs(`Doctype{doctype_name=Some"html";public_identifier=None;system_identifier=None;raw_text=None;force_quirks=false});sletxhtml?dtds=letdoctype_text=matchdtdwith|Some`Strict_1_0->"html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" "^"\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\""|Some`Transitional_1_0->"html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" "^"\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\""|Some`Frameset_1_0->"html PUBLIC \"-//W3C//DTD XHTML 1.0 Frameset//EN\" "^"\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd\""|Some`Strict_1_1|None->"html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" "^"\"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\""inletremove_markupv_k=matchvwith|`Doctype_|`Xml_->kNone|`Text_|`Start_element_|`End_element|`Comment_|`PI_asv->k(Somev)ins|>filter_mapremove_markup|>funs->pushs(`Doctype{doctype_name=None;public_identifier=None;system_identifier=None;raw_text=Somedoctype_text;force_quirks=false});pushs(`Xml{version="1.0";encoding=Some"utf-8";standalone=None});sletxhtml_entityname=letreclookupindex=ifindex>=Array.lengthEntities.entitiesthenraiseExitelseiffstEntities.entities.(index)<>namethenlookup(index+1)elsesndEntities.entities.(index)intryletbuffer=Buffer.create8inmatchlookup0with|`Onec->add_utf_8bufferc;Some(Buffer.contentsbuffer)|`Two(c,c')->add_utf_8bufferc;add_utf_8bufferc';Some(Buffer.contentsbuffer)withExit->None