1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333(* This file is part of Lambda Soup, released under the MIT license. See
LICENSE.md for details, or visit https://github.com/aantron/lambdasoup. *)moduleString=structincludeStringlettrim=letwhitespace=" \t\n\r"infuns->letrecmeasure_prefixindex=ifindex=String.lengthsthenindexelseifString.containswhitespaces.[index]thenmeasure_prefix(index+1)elseindexinletprefix_length=measure_prefix0inlets=String.subsprefix_length(String.lengths-prefix_length)inletrecmeasure_suffixrindex=ifrindex=String.lengthsthenrindexelseifString.containswhitespaces.[String.lengths-rindex-1]thenmeasure_suffix(rindex+1)elserindexinletsuffix_length=measure_suffix0inString.subs0(String.lengths-suffix_length)endtypeelement=unittypegeneral=unittypesoup=unittypeelement_values={mutablename:string;mutableattributes:(string*string)list;mutablechildren:generalnodelist}anddocument_values={mutableroots:generalnodelist;doctype:Markup.doctypeoption}and'anode={mutableself:'b.'bnodeoption;mutableparent:generalnodeoption;values:[`Elementofelement_values|`Textofstring|`Documentofdocument_values]}letrequire_internalmessage=function|None->failwithmessage|Somev->vletrequire=function|None->failwith"require: argument is None"|Somev->vletforget_type:(_node)->(_node)=funn->require_internal"Soup._forget_type: internal error: node's self reference not set"n.selfletcoercenode=forget_typenodeletcreate_elementnameattributeschildren=letvalues={name;attributes;children}inletnode={self=None;parent=None;values=`Elementvalues}innode.self<-Somenode;children|>List.iter(funchild->child.parent<-Somenode);nodeletcreate_texttext=letnode={self=None;parent=None;values=`Texttext}innode.self<-Somenode;nodeletcreate_documentdoctyperoots=letnode={self=None;parent=None;values=`Document{roots;doctype}}innode.self<-Somenode;roots|>List.iter(funroot->root.parent<-Somenode);nodeletcreate_soup()=create_documentNone[]letfrom_signals'~map_attributessignals=letdoctype=refNoneinsignals|>Markup.map(funs->beginmatchswith|`Doctyped->doctype:=Somed|_->()end;s)|>(funs->Markup.trees~text:(funss->create_text(String.concat""ss))~element:(funnameattributeschildren->letattributes=attributes|>List.map(fun((ns,n),v)->matchnswith|""->(n,v)|_->(ns^":"^n,v))|>map_attributesnameincreate_element(sndname)attributeschildren)s)|>Markup.to_list|>create_document!doctypeletfrom_signals=from_signals'~map_attributes:(fun_na->a)letparsetext=letbody_attributes=ref[]inletreport_le=matchewith|`Misnested_tag("body",_,attributes)->body_attributes:=!body_attributes@attributes|_->()intext|>Markup.string|>(funs->Markup.parse_html~reports)|>Markup.signals|>from_signals'~map_attributes:(funnameattributes->matchnamewith|ns,"body"whenns=Markup.Ns.html->List.fold_left(funattributes(n,v)->matchList.mem_assocnattributeswith|true->attributes|false->(n,v)::attributes)attributes!body_attributes|_->attributes)letis_documentnode=matchnode.valueswith|`Element_->false|`Text_->false|`Document_->trueletis_elementnode=matchnode.valueswith|`Element_->true|`Text_->false|`Document_->falseletis_textnode=matchnode.valueswith|`Text_->true|`Element_->false|`Document_->falseletelementnode=ifis_elementnodethenSome(forget_typenode)elseNonetype'astop={throw:'b.'a->'b}exceptionStopofint64letgenerate_id=letnext=ref0Linfun()->letcurrent=!nextinnext:=Int64.succcurrent;currentletwith_stopf=letresult=refNoneinletid=generate_id()inletstop={throw=funv->result:=Somev;raise_notrace(Stopid)}intryfstopwithStopid'whenid'=id->match!resultwith|None->failwith"Soup.with_stop: internal error: !result = None"[@coverageoff]|Somev->vletname=function|{values=`Element{name;_};_}->String.lowercase_asciiname|_->failwith"Soup.name: internal error: not an element"[@coverageoff]letfold_attributesfinit=function|{values=`Element{attributes;_};_}->attributes|>List.fold_left(funv(name,value)->fvnamevalue)init|_->failwith"Soup.fold_attributes: internal error: not an element"[@coverageoff]letattributenamenode=with_stop(funstop->node|>fold_attributes(fun_name'value->ifname'=namethenstop.throw(Somevalue)elseNone)None)lethas_attributenamenode=matchattributenamenodewith|None->false|Some_->trueletsplit_attributes=letrecloopindexvs=ifindex=String.lengthsthenList.revvselseletmaybe_index'=trySome(String.index_fromsindex' ')withNot_found->Noneinmatchmaybe_index'with|None->(String.subsindex(String.lengths-index))::vs|>List.rev|Someindex'whenindex'=index->loop(index'+1)vs|Someindex'->(String.subsindex(index'-index))::vs|>loop(index'+1)inloop0[]letclassesnode=matchattribute"class"nodewith|None->[]|Someclasses->split_attributeclassesletid=attribute"id"type'anodes={eliminate:'b.('b->'anode->'b)->'b->'b}letempty={eliminate=fun_init->init}letfoldfinitsequence=sequence.eliminatefinitletfilter_mapfsequence={eliminate=funf'init->init|>sequence.eliminate(funvnode->matchfnodewith|None->v|Somenode'->f'vnode')}letfilterf=filter_map(funnode->iffnodethenSomenodeelseNone)letmapf=filter_map(funnode->Some(fnode))letflattenfsequence={eliminate=funf'init->init|>sequence.eliminate(funvnode->v|>(fnode).eliminatef')}letiterfsequence=fold(fun()node->fnode)()sequenceletnthindexsequence=with_stop(funstop->sequence|>fold(funindex'node->ifindex'=indexthenstop.throw(Somenode)elseindex'+1)1|>ignore;None)letfirstsequence=nth1sequenceletlastsequence=sequence|>fold(fun_node->Somenode)Noneletcountsequence=sequence|>fold(funcount_->count+1)0letto_listsequence=fold(funlnode->node::l)[]sequence|>List.revletof_listl={eliminate=funfinit->List.fold_leftfinitl}letrevsequence=sequence|>to_list|>List.rev|>of_listletelementssequence={eliminate=funfinit->init|>sequence.eliminate(funvnode->matchelementnodewith|None->v|Someelement->fvelement)}letchild_list=function|{values=`Element{children;_};_}->Somechildren|{values=`Document{roots;_};_}->Someroots|_->Noneletchildrennode=matchchild_listnodewith|Somechildren->{eliminate=funfinit->List.fold_leftfinitchildren}|_->emptyletrecdescendantsnode={eliminate=funfinit->init|>(childrennode).eliminate(funvchild->fvchild|>(descendants(forget_typechild)).eliminatef)}letchildnode=node|>children|>firstletchild_elementnode=node|>children|>elements|>firstletsimple_parentnode=node.parentletparentnode=matchnode.parentwith|None->None|Somenodewhenis_documentnode->None|Somenode->Somenodeletrecgeneral_ancestorsget_parentnode={eliminate=funfinit->matchget_parentnodewith|None->init|Someparent->finitparent|>(general_ancestorsget_parent(forget_typeparent)).eliminatef}letsimple_ancestors=general_ancestorssimple_parentletancestorsnode=general_ancestorsparentnodeletsiblingsnode=matchsimple_parentnodewith|None->empty|Someparent->childrenparent|>filter(funchild->child!=(forget_typenode))letsplit_at_identityfunction_namevl=letrecloopprefix=function|[]->failwith("Soup."^function_name^": internal error: child not in parent's child list")[@coverageoff]|u::suffix->ifu==vthenprefix,suffixelseloop(u::prefix)suffixinloop[]lletsibling_listsfunction_nameselectnode=matchsimple_parentnodewith|None->empty|Someparent->matchchild_listparentwith|None->failwith("Soup."^function_name^": internal error: parent has no children")[@coverageoff]|Somechildren->letlists=split_at_identityfunction_name(forget_typenode)childrenin{eliminate=funfinit->selectlists|>List.fold_leftfinit}letnext_siblingsnode=sibling_lists"next_siblings"sndnodeletprevious_siblingsnode=sibling_lists"previous_siblings"fstnodeletnext_siblingnode=next_siblingsnode|>firstletprevious_siblingnode=previous_siblingsnode|>firstletnext_elementnode=next_siblingsnode|>elements|>firstletprevious_elementnode=previous_siblingsnode|>elements|>firstletindex_ofnode=matchsimple_parentnodewith|None->1|Someparent->matchchild_listparentwith|None->failwith"Soup.index_of: internal error: parent has no children"[@coverageoff]|Somechildren->with_stop(funstop->children|>List.iteri(funindexchild->ifchild==(forget_typenode)thenstop.throw(index+1));failwith"Soup.index_of: internal error: child not in parent's child list")[@coverageoff]letindex_of_elementelement=matchsimple_parentelementwith|None->1|Someparent->with_stop(funstop->parent|>children|>elements|>fold(funindexelement'->ifelement'==elementthenstop.throwindexelseindex+1)1|>ignore[@coverageoff];(failwith("Soup.index_of_element: internal error: "^"element is not a child of its own parent"))[@coverageoff])letat_most_n_childrencountnode=matchnth(count+1)(childrennode)with|None->true|Some_->falseletno_childrennode=at_most_n_children0nodeletat_most_one_childnode=at_most_n_children1nodeletis_rootnode=matchnode.parentwith|None->not(is_documentnode)|Someparent->is_documentparentlettagsname'node=letname'=String.lowercase_asciiname'innode|>descendants|>elements|>filter(funelement->nameelement=name')lettagnamenode=tagsnamenode|>firstletnormalize_childrentrimchildren=letrecloopprefix=function|[]->List.revprefix|node::rest->matchnode.valueswith|`Texts->lets=trimsinifs=""thenloopprefixrestelse(matchprefixwith|{values=`Texts';_}::prefix'->loop((create_text(s'^s))::prefix')rest|_->loop((create_texts)::prefix)rest)|_->loop(node::prefix)restinloop[]childrenletrecleaf_textnode=lettrims=ifString.trims=""then""elsesinmatchnode.valueswith|`Texts->Somes|`Element_|`Document_->letchildren=child_listnode|>require_internal("Soup.leaf_text: internal error: node is not a text node, "^"but has no child list")|>normalize_childrentriminmatchchildrenwith|[]->Some""|[child]->leaf_text(forget_typechild)|_->Noneletrectextsnode=matchnode.valueswith|`Texts->[s]|`Element{children;_}->children|>List.mapforget_type|>List.maptexts|>List.fold_left(@)[]|`Document{roots;_}->roots|>List.mapforget_type|>List.maptexts|>List.fold_left(@)[]lettrimmed_textsnode=textsnode|>List.mapString.trim|>List.filter(funs->String.lengths>0)exceptionParse_errorofstringmoduleSelector:sigtypetvalparse:string->tvalselect:(_node)->t->elementnodesvalmatches_selector:t->soupnode->soupnode->boolend=structtypetype_=Nameofstring|Universaltypeattribute=|Presentofstring|Exactlyofstring*string|Memberofstring*string|HasDashSeparatedPrefixofstring*string|Prefixofstring*string|Suffixofstring*string|Substringofstring*stringtypepseudo_class=|Root|NthChildofint*int|NthLastChildofint*int|NthOfTypeofint*int|NthLastOfTypeofint*int|OnlyChild|OnlyOfType|Empty|Contentofstring|Hasofsimple_selector|Notofsimple_selectorandsimple_selector=|Typeoftype_|Attributeofattribute|Pseudo_classofpseudo_classtypecombinator=|Descendant|Child|AdjacentSibling|IndirectSiblingtypet=(combinator*simple_selectorlist)listlethas_prefixprefixs=String.subs0(String.lengthprefix)=prefixlethas_suffixsuffixs=letsuffix_length=String.lengthsuffixinString.subs(String.lengths-suffix_length)suffix_length=suffixlethas_substrings's=letsubstring_length=String.lengths'inletrecloopindex=ifString.subsindexsubstring_length=s'thentrueelseloop(index+1)intryloop0withInvalid_argument_->falseletmatches_attribute_selectornodeselector=letcaptured="Soup.matches_attribute_selector: internal error: "^"this exception should have been caught"intrymatchselectorwith|Presentname->has_attributenamenode|Exactly(name,value)->attributenamenode=Somevalue|Member(name,value)->attributenamenode|>require_internalcaptured|>split_attribute|>List.memvalue|HasDashSeparatedPrefix(name,value)->letvalue'=attributenamenode|>require_internalcapturedinvalue'=value||has_prefix(value^"-")value'|Prefix(name,value)->attributenamenode|>require_internalcaptured|>has_prefixvalue|Suffix(name,value)->attributenamenode|>require_internalcaptured|>has_suffixvalue|Substring(name,value)->attributenamenode|>require_internalcaptured|>has_substringvaluewith_->falseletelement_countnode=matchsimple_parentnodewith|None->1[@coverageoff]|Someparent->parent|>children|>elements|>countletelement_count_with_namename'node=matchsimple_parentnodewith|None->1[@coverageoff]|Someparent->parent|>children|>elements|>filter(funelement->nameelement=name')|>countletelement_index_with_namename'node=matchsimple_parentnodewith|None->1[@coverageoff]|Someparent->with_stop(funstop->flushstdout;parent|>children|>elements|>filter(funelement->nameelement=name')|>fold(funindexelement->ifelement==nodethenstop.throwindexelseindex+1)1|>ignore[@coverageoff];(failwith("Soup.Selector.element_index_with_name: internal error: "^"parent does not have given child"))[@coverageoff])letconditional_modna=ifa=0thennelsenmodaletrecmatches_pseudo_class_selectornodeselector=matchselectorwith|Root->parentnode=None|NthChild(a,b)->conditional_mod(index_of_elementnode)a=b|NthLastChild(a,b)->letelement_count=element_countnodeinconditional_mod(element_count-(index_of_elementnode)+1)a=b|NthOfType(a,b)->conditional_mod(element_index_with_name(namenode)node)a=b|NthLastOfType(a,b)->letname=namenodeinletelement_count=element_count_with_namenamenodeinconditional_mod(element_count-(element_index_with_namenamenode)+1)a=b|OnlyChild->element_countnode=1|OnlyOfType->element_count_with_name(namenode)node=1|Empty->no_childrennode|Contents->textsnode|>String.concat""|>has_substrings|Hasselector->descendantsnode|>filter(fundescendant->not(is_textdescendant))|>filter(fundescendant->matches_simple_selectordescendantselector)|>count|>funcount->count>0|Notselector->not(matches_simple_selectornodeselector)andmatches_simple_selectornode=function|TypeUniversal->true|Type(Namename')->namenode=(String.lowercase_asciiname')|Attributeattribute_selector->matches_attribute_selectornodeattribute_selector|Pseudo_classpseudo_class_selector->matches_pseudo_class_selectornodepseudo_class_selectorletmatches_simple_selectorsnodeselectors=List.for_all(matches_simple_selectornode)selectorsletup_tonodesequence={eliminate=funfinit->with_stop(funstop->sequence.eliminate(funvnode'->letv'=fvnode'inifnode'==nodethenstop.throwv'elsev')init)}letonesequence={eliminate=funfinit->with_stop(funstop->sequence.eliminate(funvnode->fvnode|>stop.throw)init)}letmatches_selectorselectorroot_nodeat_node=with_stop(funstop->letrecbackwards_traversalat_node=function|[]->ifat_node==root_nodethenstop.throwtrueelse()|(combinator,simple_selectors)::rest->ifnot(is_elementat_node)then()elseifnot(matches_simple_selectorsat_nodesimple_selectors)then()elseletnext_nodes=matchcombinatorwith|Descendant->at_node|>simple_ancestors|>up_toroot_node|Child->at_node|>ancestors|>one|IndirectSibling->at_node|>previous_siblings|>elements|>up_toroot_node|AdjacentSibling->at_node|>previous_siblings|>elements|>oneinnext_nodes|>iter(funnode->backwards_traversalnoderest)inbackwards_traversalat_node(List.revselector);false)letselectroot_nodeselector=letroot_node=forget_typeroot_nodeinletcandidates=matchsimple_parentroot_nodewith|None->descendantsroot_node|Someparent->descendantsparentincandidates|>elements|>filter(matches_selectorselectorroot_node)letis_decimal_charc=((Char.codec)>=(Char.code'0'))&&((Char.codec)<=(Char.code'9'))letis_hexadecimal_charc=(is_decimal_charc)||(c>='a'&&c<='f')||(c>='A'&&c<='F')letis_identifier_charc=letc=Char.lowercase_asciicin((Char.codec)>=(Char.code'a')&&(Char.codec)<=(Char.code'z'))||(is_decimal_charc)||(c=='-')||(c=='_')letis_whitespace_charc=c=' '||c='\t'||c='\n'||c='\r'letis_continuation_simple_selector_start_charc=(c=='.')||(c=='#')||(c=='[')||(c==':')lethexadecimal_value=function|'A'|'a'->0xA|'B'|'b'->0xB|'C'|'c'->0xC|'D'|'d'->0xD|'E'|'e'->0xE|'F'|'f'->0xF|c->Char.codec-Char.code'0'letparse_errormsg=raise(Parse_errormsg)letrecparse_hexadecimal_escapevaluecountstream=ifcount>=6thenvalueelsematchStream.peekstreamwith|Some(' '|'\t'|'\n')->Stream.junkstream;value|Some'\r'->beginmatchStream.npeek2streamwith|['\r';'\n']->Stream.junkstream;Stream.junkstream|_->()end;value|Somecwhenis_hexadecimal_charc->Stream.junkstream;letvalue=value*0x10+hexadecimal_valuecinparse_hexadecimal_escapevalue(count+1)stream|_->valueletparse_escape_sequencestream=Stream.junkstream;matchStream.peekstreamwith|None->'\\'|Somecwhenis_hexadecimal_charc->beginmatchparse_hexadecimal_escape00streamwith|nwhenn>0xFF->'\x1A'|n->Char.chrnend|Somec->Stream.junkstream;cletparse_identifierstream=letbuffer=Buffer.create32inbeginmatchStream.peekstreamwith|Some'\\'->()|Somecwhenis_identifier_charc->()|_->(parse_error[@coverageoff])"expected an identifier"end;letrecloop()=matchStream.peekstreamwith|Some'\\'->Buffer.add_charbuffer(parse_escape_sequencestream);loop()|Somecwhenis_identifier_charc->Buffer.add_charbufferc;Stream.junkstream;loop()|_->Buffer.contentsbufferinloop()letparse_type_selectorstream=matchStream.peekstreamwith|Some'*'->Stream.junkstream;Universal|_->tryletname=parse_identifierstreaminNamenamewith_->parse_error"expected tag name or '*'"letparse_attribute_operatorstream=matchStream.npeek2streamwith|['=';_]->Stream.junkstream;"="|[c;'=']->Stream.junkstream;Stream.junkstream;Printf.sprintf"%c="c|_->parse_error"expected attribute operator"letparse_quoted_stringstream=matchStream.peekstreamwith|Some('"'asdelim)|Some('\''asdelim)->Stream.junkstream;letbuffer=Buffer.create64inletrecloop()=matchStream.peekstreamwith|Somecwhenc=delim->Stream.junkstream;Buffer.contentsbuffer|Some'\\'->Stream.junkstream;(matchStream.peekstreamwith|Somecwhenc=delim->Buffer.add_charbufferdelim;Stream.junkstream|_->Buffer.add_charbuffer'\\');loop()|Somec->Buffer.add_charbufferc;Stream.junkstream;loop()|None->parse_error"unterminated string"inloop()|_->parse_error"expected quoted string"letparse_stringstream=matchStream.peekstreamwith|Some'"'|Some'\''->parse_quoted_stringstream|_->letbuffer=Buffer.create32inletrecloop()=matchStream.peekstreamwith|Some']'|None->Buffer.contentsbuffer|Somec->Buffer.add_charbufferc;Stream.junkstream;loop()inloop()letconsume_whitespacestream=letrecloop()=matchStream.peekstreamwith|Somecwhenis_whitespace_charc->Stream.junkstream;loop()|_->()inloop()letparse_attribute_selectorstream=Stream.junkstream;consume_whitespacestream;letname=parse_identifierstreaminconsume_whitespacestream;(matchStream.peekstreamwith|None->parse_error"unterminated attribute selector"|Some']'->Stream.junkstream;Presentname|Some_->letoperator=parse_attribute_operatorstreaminconsume_whitespacestream;(matchStream.peekstreamwith|None->parse_error"unterminated attribute selector"|Some']'->parse_error"expected value in attribute selector"|Some_->letvalue=parse_stringstreaminconsume_whitespacestream;(matchStream.peekstreamwith|None->parse_error"unterminated attribute selector"|Some']'->Stream.junkstream;(matchoperatorwith|"="->Exactly(name,value)|"~="->Member(name,value)|"|="->HasDashSeparatedPrefix(name,value)|"^="->Prefix(name,value)|"$="->Suffix(name,value)|"*="->Substring(name,value)|_->Printf.ksprintfparse_error"invalid attribute operator '%s'"operator)|Some_->parse_error"expected end of attribute selector (']')")))letparse_class_selectorstream=Stream.junkstream;letvalue=parse_identifierstreaminMember("class",value)letparse_id_selectorstream=Stream.junkstream;letvalue=parse_identifierstreaminExactly("id",value)letparse_numberstream=letbuffer=Buffer.create16inletrecloop()=matchStream.peekstreamwith|Somecwhenis_decimal_charc->Buffer.add_charbufferc;Stream.junkstream;loop()|_->Buffer.contentsbufferinloop()|>int_of_stringletparse_modular_pattern_tailastream=Stream.junkstream;matchStream.peekstreamwith|Some('+'asc)|Some('-'asc)->Stream.junkstream;(matchStream.peekstreamwith|Somec'whenis_decimal_charc'->letb=parse_numberstreaminletb=ifc='+'thenbmodaelsea-(bmoda)ina,b|_->parse_error"expected number after '+' or '-'")|_->a,0letparse_modular_patternstream=matchStream.peekstreamwith|Some'e'|Some'o'->(matchparse_identifierstreamwith|"even"->(2,0)|"odd"->(2,1)|_->parse_error"expected 'n', 'even', or 'odd'")|Some'n'->parse_modular_pattern_tail1stream|Somecwhenis_decimal_charc->leta=parse_numberstreamin(matchStream.peekstreamwith|Some'n'->parse_modular_pattern_tailastream|_->(0,a))|_->parse_error"expected expression"letparse_parenthesized_valuefstream=matchStream.peekstreamwith|Some'('->Stream.junkstream;consume_whitespacestream;letvalue=fstreaminconsume_whitespacestream;(matchStream.peekstreamwith|Some')'->Stream.junkstream;value|_->parse_error"unterminated '('")|_->parse_error"expected parenthesized expression"letrecparse_pseudo_class_selectorstream=Stream.junkstream;letfunction_=parse_identifierstreamin(matchfunction_with|"root"->Root|"first-child"->NthChild(0,1)|"last-child"->NthLastChild(0,1)|"first-of-type"->NthOfType(0,1)|"last-of-type"->NthLastOfType(0,1)|"only-child"->OnlyChild|"only-of-type"->OnlyOfType|"nth-child"->leta,b=parse_parenthesized_valueparse_modular_patternstreaminNthChild(a,b)|"nth-of-type"->leta,b=parse_parenthesized_valueparse_modular_patternstreaminNthOfType(a,b)|"nth-last-child"->leta,b=parse_parenthesized_valueparse_modular_patternstreaminNthLastChild(a,b)|"nth-last-of-type"->leta,b=parse_parenthesized_valueparse_modular_patternstreaminNthLastOfType(a,b)|"contains"->lets=parse_parenthesized_valueparse_quoted_stringstreaminContents|"empty"->Empty|"has"->letselector=parse_parenthesized_valueparse_simple_selectorstreaminHasselector|"not"->letselector=parse_parenthesized_valueparse_simple_selectorstreaminNotselector|_->Printf.ksprintfparse_error"unknown pseudo-class or pseudo-element ':%s'"function_)andparse_simple_selectorstream=matchStream.peekstreamwith|Some'['->Attribute(parse_attribute_selectorstream)|Some':'->Pseudo_class(parse_pseudo_class_selectorstream)|Some'.'->Attribute(parse_class_selectorstream)|Some'#'->Attribute(parse_id_selectorstream)|Some_->Type(parse_type_selectorstream)|None->parse_error"expected simple selector"letparse_simple_selector_liststream=letfirst=parse_simple_selectorstreaminletrecloopselectors=matchStream.peekstreamwith|Somecwhenis_continuation_simple_selector_start_charc->(parse_simple_selectorstream)::selectors|>loop|_->List.revselectorsinloop[first]letparses=letstream=Stream.of_stringsinletrecloopselectors=consume_whitespacestream;matchStream.peekstreamwith|None->List.revselectors|_->letcombinator=matchStream.peekstreamwith|Some'>'->Stream.junkstream;Child|Some'+'->Stream.junkstream;AdjacentSibling|Some'~'->Stream.junkstream;IndirectSibling|_->Descendantinconsume_whitespacestream;(combinator,parse_simple_selector_liststream)::selectors|>loopinloop[]endletmatches_selectorroot_nodeselectornode=letroot_node=forget_typeroot_nodeinletnode=forget_typenodeinletselector=Selector.parseselectorinSelector.matches_selectorselectorroot_nodenodeletselectselectornode=selector|>Selector.parse|>Selector.selectnodeletselect_oneselectornode=selectselectornode|>firstlet($)nodeselector=node|>select_oneselector|>require_internal(Printf.sprintf"Soup.($): '%s' not found.\n%s"selector"Try Soup.($?) if you'd prefer returning None instead of an exception.")let($?)nodeselector=node|>select_oneselectorlet($$)nodeselector=node|>selectselectormoduleInfix=structlet($)=($)let($?)=($?)let($$)=($$)endletsignalsroot=letroot=forget_typerootinletrectraverseacc=function|{values=`Element{name;attributes;children};_}->letstart_signal=`Start_element(("http://www.w3.org/1999/xhtml",name),List.map(fun(n,v)->("",n),v)attributes)in`End_element::(traverse_list(start_signal::acc)children)|{values=`Document{roots;doctype};_}->letacc=matchdoctypewith|None->acc|Somedoctype->acc@[`Doctypedoctype]intraverse_listaccroots|{values=`Texts;_}->(`Text[s])::accandtraverse_listaccl=List.fold_lefttraverseacclinList.rev(traverse[]root)|>Markup.of_listletpretty_printroot=signalsroot|>Markup.pretty_print|>(funs->Markup.write_htmls)|>Markup.to_stringletto_stringroot=signalsroot|>(funs->Markup.write_htmls)|>Markup.to_stringletrecequal_generalnormalize_childrennn'=letequal_textss'=s=s'inletequal_childrenchildrenchildren'=letchildren=normalize_childrenchildreninletchildren'=normalize_childrenchildren'intryList.iter2(funcc'->ifnot(equal_generalnormalize_childrencc')thenraise_notrace(Invalid_argument"not equal"))childrenchildren';truewithInvalid_argument_->falseinletequal_elementvaluesvalues'=(values.name=values'.name)&&beginletsort=List.sort(funattrattr'->compare(fstattr)(fstattr'))in(sortvalues.attributes)=(sortvalues'.attributes)end&&equal_childrenvalues.childrenvalues'.childreninletequal_documentvaluesvalues'=equal_childrenvalues.rootsvalues'.rootsinmatchn,n'with|{values=`Texts;_},{values=`Texts';_}->equal_textss'|{values=`Elementv;_},{values=`Elementv';_}->equal_elementvv'|{values=`Documentv;_},{values=`Documentv';_}->equal_documentvv'|_->falseletequalnn'=equal_general(normalize_children(funs->s))(forget_typen)(forget_typen')letequal_modulo_whitespacenn'=equal_general(normalize_childrenString.trim)(forget_typen)(forget_typen')letmutate_child_listfnode=matchnode.valueswith|`Elementvalues->values.children<-fvalues.children|`Documentvalues->values.roots<-fvalues.roots|`Text_->failwith"Soup.mutate_child_list: node has no children"letstrip_documentnode=ifis_documentnodethenletchildren=node|>children|>to_listin(children|>List.iter(funchild->child.parent<-None);mutate_child_list(fun_->[])node);childrenelse[node]letdeletenode=matchnode.parentwith|None->()|Someparent->mutate_child_list(List.filter(funchild->child!=(forget_typenode)))parent;node.parent<-Noneletinsert_at_indexkelementnode=letelement=forget_typeelementinletnode=forget_typenodeindeletenode;letnodes=strip_documentnodeinmutate_child_list(funl->letrecloopprefixindex=function|[]->(List.revprefix)@nodes|x::l'->ifk<=indexthen(List.revprefix)@nodes@(x::l')elseloop(x::prefix)(index+1)l'inloop[]1l)element;nodes|>List.iter(funnode->node.parent<-Someelement)letappend_childelementnode=insert_at_index((element|>children|>count)+1)elementnodeletprepend_childelementnode=insert_at_index1elementnodeletinsert_beforetargetnode=insert_at_index(index_oftarget)(parenttarget|>require_internal"Soup.insert_before: target node has no parent")nodeletinsert_aftertargetnode=insert_at_index((index_oftarget)+1)(parenttarget|>require_internal"Soup.insert_after: target node has no parent")nodeletclearnode=mutate_child_list(funchildren->children|>List.iter(funchild->child.parent<-None);[])nodeletreplacetargetnode=deletenode;letparent=parenttarget|>require_internal"Soup.replace: target node has no parent"inletindex=index_oftargetindeletetarget;insert_at_indexindexparentnodeletswaptargetelement=letinternal="Soup.swap: internal error: non-element node given"inlettarget_children=child_listtarget|>require_internalinternalinletelement_children=child_listelement|>require_internalinternalintarget_children|>List.iter(funchild->child.parent<-Someelement);element_children|>List.iter(funchild->child.parent<-Sometarget);mutate_child_list(fun_->element_children)target;mutate_child_list(fun_->target_children)element;replacetargetelementletwraptargetelement=deleteelement;clearelement;replacetargetelement;append_childelementtargetletunwrapnode=letparent=parentnode|>require_internal"Soup.unwrap: node has no parent"inletindex=index_ofnodeindeletenode;letchildren=matchchild_listnodewith|None->[]|Somel->lin(tryclearnodewithFailure_->());List.revchildren|>List.iter(insert_at_indexindexparent)letappend_rootdocumentnode=deletenode;mutate_child_list(funf->f@[forget_typenode])document;node.parent<-Somedocumentletprepend_rootdocumentnode=deletenode;mutate_child_list(funf->(forget_typenode)::f)document;node.parent<-Somedocumentletset_namenew_name=function|{values=`Elemente;_}->e.name<-new_name|>String.trim|>String.lowercase_ascii|_->failwith"Soup.set_name: internal error: not an element"[@coverageoff]letdelete_attributename=function|{values=`Elemente;_}->e.attributes<-e.attributes|>List.filter(fun(name',_)->name'<>name)|_->failwith"Soup.delete_attribute: internal error: not an element"[@coverageoff]letset_attributenamevalue=function|{values=`Elemente;_}->e.attributes<-e.attributes|>List.filter(fun(name',_)->name'<>name)|>funattributes->(name,value)::attributes|_->failwith"Soup.set_attribute: internal error: not an element"[@coverageoff]letset_classesclasseselement=classes|>String.concat" "|>funv->set_attribute"class"velementletadd_classclass_element=letclasses=classeselementinifList.memclass_classesthen()elseset_classes(class_::classes)elementletremove_classclass_element=classeselement|>List.filter(func->c<>class_)|>function|[]->delete_attribute"class"element|v->set_classesvelementletcreate_element?id?class_?classes?(attributes=[])?inner_textname=letchildren=matchinner_textwith|None->[]|Somes->[create_texts]inletelement=create_elementname[]childreninattributes|>List.iter(fun(n,v)->set_attributenvelement);(matchclasseswith|None->()|Someclasses->classes|>String.concat" "|>funv->set_attribute"class"velement);(matchclass_with|None->()|Someclass_->set_attribute"class"class_element);(matchidwith|None->()|Someid->set_attribute"id"idelement);elementmoduleR=structletselect_onesn=select_onesn|>require_internal"Soup.R.select_one: None"letattributesn=attributesn|>require_internal"Soup.R.attribute: None"letidn=idn|>require_internal"Soup.R.id: None"letelementn=elementn|>require_internal"Soup.R.element: None"letleaf_textn=leaf_textn|>require_internal"Soup.R.leaf_text: None"letnthnt=nthnt|>require_internal"Soup.R.nth: None"letfirstt=firstt|>require_internal"Soup.R.first: None"letlastt=lastt|>require_internal"Soup.R.last: None"lettagsn=tagsn|>require_internal"Soup.R.tag: None"letparentn=parentn|>require_internal"Soup.R.parent: None"letchildn=childn|>require_internal"Soup.R.child: None"letchild_elementn=child_elementn|>require_internal"Soup.R.child_element: None"letnext_siblingn=next_siblingn|>require_internal"Soup.R.next_sibling: None"letprevious_siblingn=previous_siblingn|>require_internal"Soup.R.previous_sibling: None"letnext_elementn=next_elementn|>require_internal"Soup.R.next_element: None"letprevious_elementn=previous_elementn|>require_internal"Soup.R.previous_element: None"endletread_channelchannel=Markup.channelchannel|>Markup.to_stringletread_filepath=Markup.filepath|>fst|>Markup.to_stringletwrite_channel=output_stringletwrite_filepathdata=Markup.stringdata|>Markup.to_filepath[@@coverageoff]