123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297(* This file is part of Markup.ml, released under the BSD 2-clause license. See
doc/LICENSE for details, or visit https://github.com/aantron/markup.ml. *)(* Aliases for reducing the number of deprecation warnings. *)moduleString=structincludeStringletlowercase=lowercaseendmoduleChar=structincludeCharletlowercase=lowercaseendtype'acont='a->unittype'acps=exncont->'acont->unittypelocation=int*intletcompare_locations(line,column)(line',column')=matchline-line'with|0->column-column'|order->ordertypename=string*stringlet(|>)xf=fxlet(@@)fx=fxletxml_ns="http://www.w3.org/XML/1998/namespace"letxmlns_ns="http://www.w3.org/2000/xmlns/"letxlink_ns="http://www.w3.org/1999/xlink"lethtml_ns="http://www.w3.org/1999/xhtml"letsvg_ns="http://www.w3.org/2000/svg"letmathml_ns="http://www.w3.org/1998/Math/MathML"moduleToken_tag=structtypet={name:string;attributes:(string*string)list;self_closing:bool}endtypexml_declaration={version:string;encoding:stringoption;standalone:booloption}typedoctype={doctype_name:stringoption;public_identifier:stringoption;system_identifier:stringoption;raw_text:stringoption;force_quirks:bool}typesignal=[`Start_elementofname*(name*string)list|`End_element|`Textofstringlist|`Xmlofxml_declaration|`Doctypeofdoctype|`PIofstring*string|`Commentofstring]typecontent_signal=[`Start_elementofname*(name*string)list|`End_element|`Textofstringlist]typegeneral_token=[`Xmlofxml_declaration|`Doctypeofdoctype|`StartofToken_tag.t|`EndofToken_tag.t|`Charsofstringlist|`Charofint|`PIofstring*string|`Commentofstring|`EOF]letu_rep=Uchar.to_intUutf.u_repletadd_utf_8bufferc=Uutf.Buffer.add_utf_8buffer(Uchar.unsafe_of_intc)letformat_char=Printf.sprintf"U+%04X"(* Type constraints are necessary to avoid polymorphic comparison, which would
greatly reduce performance: https://github.com/aantron/markup.ml/pull/15. *)letis_in_range(lower:int)(upper:int)c=c>=lower&&c<=upper(* HTML 8.2.2.5. *)letis_control_character=function|0x000B->true|cwhenis_in_range0x00010x0008c->true|cwhenis_in_range0x000E0x001Fc->true|cwhenis_in_range0x007F0x009Fc->true|_->false(* HTML 8.2.2.5. *)letis_non_character=function|cwhenis_in_range0xFDD00xFDEFc->true|cwhen(cland0xFFFF=0xFFFF)||(cland0xFFFF=0xFFFE)->true|_->falseletis_digit=is_in_range0x00300x0039letis_hex_digit=function|cwhenis_digitc->true|cwhenis_in_range0x00410x0046c->true|cwhenis_in_range0x00610x0066c->true|_->falseletis_scalar=function|cwhen(c>=0x10FFFF)||((c>=0xD800)&&(c<=0xDFFF))->false|_->trueletis_uppercase=is_in_range0x00410x005Aletis_lowercase=is_in_range0x00610x007Aletis_alphabetic=function|cwhenis_uppercasec->true|cwhenis_lowercasec->true|_->falseletis_alphanumeric=function|cwhenis_alphabeticc->true|cwhenis_digitc->true|_->falseletis_whitespacec=c=0x0020||c=0x000A||c=0x0009||c=0x000Dletis_whitespace_onlys=trys|>String.iter(func->ifis_whitespace(int_of_charc)then()elseraiseExit);truewithExit->falseletto_lowercase=function|cwhenis_uppercasec->c+0x20|c->cletis_printable=is_in_range0x00200x007Eletcharc=ifis_printablecthenbeginletbuffer=Buffer.create4inadd_utf_8bufferc;Buffer.contentsbufferendelseformat_charcletis_valid_html_charc=not(is_control_characterc||is_non_characterc)letis_valid_xml_charc=is_in_range0x00200xD7FFc||c=0x0009||c=0x000A||c=0x000D||is_in_range0xE0000xFFFDc||is_in_range0x100000x10FFFFcletsignal_to_string=function|`Comments->Printf.sprintf"<!--%s-->"s|`Doctyped->lettext=matchd.doctype_namewith|None->beginmatchd.raw_textwith|None->""|Somes->" "^send|Somename->matchd.public_identifier,d.system_identifierwith|None,None->name|Somep,None->Printf.sprintf" %s PUBLIC \"%s\""namep|None,Somes->Printf.sprintf" %s SYSTEM \"%s\""names|Somep,Somes->Printf.sprintf" %s PUBLIC \"%s\" \"%s\""namepsinPrintf.sprintf"<!DOCTYPE %s>"text|`Start_element(name,attributes)->letname_to_string=function|"",local_name->local_name|ns,local_name->ns^":"^local_nameinletattributes=attributes|>List.map(fun(name,value)->Printf.sprintf" %s=\"%s\""(name_to_stringname)value)|>String.concat""inPrintf.sprintf"<%s%s>"(name_to_stringname)attributes|`End_element->"</...>"|`Textss->String.concat""ss|`Xmlx->lets=Printf.sprintf"<?xml version=\"%s\">"x.versioninlets=matchx.encodingwith|None->s|Someencoding->Printf.sprintf"%s encoding=\"%s\""sencodinginlets=matchx.standalonewith|None->s|Somestandalone->Printf.sprintf"%s standalone=\"%s\""s(ifstandalonethen"yes"else"no")ins^"?>"|`PI(target,s)->Printf.sprintf"<?%s %s?>"targetslettoken_to_string=function|`Xmlx->signal_to_string(`Xmlx)|`Doctyped->signal_to_string(`Doctyped)|`Startt->letname="",t.Token_tag.nameinletattributes=t.Token_tag.attributes|>List.map(fun(n,v)->("",n),v)inlets=signal_to_string(`Start_element(name,attributes))inifnott.Token_tag.self_closingthenselse(String.subs0(String.lengths-1))^"/>"|`Endt->Printf.sprintf"</%s>"t.Token_tag.name|`Charsss->String.concat""ss|`Chari->chari|`PIv->signal_to_string(`PIv)|`Comments->signal_to_string(`Comments)|`EOF->"EOF"letwhitespace_chars=" \t\n\r"letwhitespace_prefix_lengths=letrecloopindex=ifindex=String.lengthsthenindexelseifString.containswhitespace_charss.[index]thenloop(index+1)elseindexinloop0letwhitespace_suffix_lengths=letreclooprindex=ifrindex=String.lengthsthenrindexelseifString.containswhitespace_charss.[String.lengths-rindex-1]thenloop(rindex+1)elserindexinloop0lettrim_string_lefts=letprefix_length=whitespace_prefix_lengthsinString.subsprefix_length(String.lengths-prefix_length)lettrim_string_rights=letsuffix_length=whitespace_suffix_lengthsinString.subs0(String.lengths-suffix_length)(* String.trim not available for OCaml < 4.00. *)lettrim_strings=s|>trim_string_left|>trim_string_right(* Specialization of List.mem at string list, to avoid polymorphic
comparison. *)letlist_mem_string(s:string)l=List.exists(funs'->s'=s)l