123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451typemailbox={name:phraseoption;local:local;domain:domain*domainlist}anddomain=[`Domainofstringlist|`Addrofaddr|`Literalofstring]andaddr=|IPv4ofIpaddr.V4.t|IPv6ofIpaddr.V6.t|Extof(string*string)andphrase=[`Dot|`Wordofword|`Encodedofstring*raw]listandraw=|Quoted_printableof(string,[`Msgofstring])result|Base64of(string,[`Msgofstring])resultandword=[`Atomofstring|`Stringofstring]andlocal=wordlistandgroup={group:phrase;mailboxes:mailboxlist}andaddress=local*(domain*domainlist)andt=[`Mailboxofmailbox|`Groupofgroup](* Pretty-printers *)moduleFmt=structletpfppffmt=Format.fprintfppffmtletstring=Format.pp_print_stringletchar=Format.pp_print_charletconstppvppf()=ppppfvletalwaysfmtppf()=pfppffmtletquotepp_valppfv=pfppf"@[<1>@<1>\"%a@<1>\"@]"pp_valvletlist~sep:pp_seppp_valppflst=letrecgo=function|[]->()|[x]->pp_valppfx|x::r->pfppf"%a%a"pp_valxpp_sep();goringolstend(* Encoder *)letstr_wordppf=function|`Atomv->Fmt.stringppfv|`Stringv->letescape=function|'\\'->Fmt.stringppf"\\\\"|'"'->Fmt.stringppf"\\\""|'\000'->Fmt.stringppf"\\\000"|'\x07'->Fmt.stringppf"\\a"|'\b'->Fmt.stringppf"\\b"|'\t'->Fmt.stringppf"\\t"|'\n'->Fmt.stringppf"\\n"|'\x0b'->Fmt.stringppf"\\v"|'\x0c'->Fmt.stringppf"\\f"|'\r'->Fmt.stringppf"\\r"|chr->Fmt.charppfchrinFmt.stringppf"\"";String.iterescapev;Fmt.stringppf"\""letstr_localppflocal=letrecgo=function|[]->()|[x]->str_wordppfx|x::r->Fmt.pfppf"%a."str_wordx;goringolocalletstr_domainppf=function|`Domainlst->letrecgo=function|[]->()|[x]->Fmt.stringppfx|x::r->Fmt.pfppf"%s."x;goringolst|`Addr(IPv4v4)->Fmt.pfppf"[%s]"(Ipaddr.V4.to_stringv4)|`Addr(IPv6v6)->Fmt.pfppf"[IPv6:%s]"(Ipaddr.V6.to_stringv6)|`Addr(Ext(key,value))->Fmt.pfppf"[%s:%s]"keyvalue|`Literalv->Fmt.pfppf"[%s]"v[@@@warning"-8"]letstr_raw~charsetppf=function|Quoted_printable(Okv)->letbuf=Buffer.create16inletencoder=Pecu.Inline.encoder(`Bufferbuf)inletrecgoidx=(* XXX(dinosaure): safe due to [`Buffer]. *)let[@warning"-8"]`Ok:[`Ok|`Partial]=ifidx=String.lengthvthenPecu.Inline.encodeencoder`EndelsePecu.Inline.encodeencoder(`Charv.[idx])inifidx<String.lengthvthengo(succidx)ingo0;Fmt.pfppf"=?%s?Q?%s?="(String.lowercase_asciicharset)(Buffer.contentsbuf)|Base64(Okv)->Fmt.pfppf"=?%s?B?%s?="(String.lowercase_asciicharset)(Base64.encode_exn~pad:truev)|_->assertfalse[@@@warning"+8"]letstr_phraseppfphrase=letstr_eltppf=function|`Dot->Fmt.charppf'.'|`Wordw->str_wordppfw|`Encoded(charset,v)->str_raw~charsetppfvinletrecgo=function|[]->()|[x]->str_eltppfx|x::r->Fmt.pfppf"%a "str_eltx;goringophraseletstr_mailboxppf=function|{name=None;local;domain=(domain,[])}->Fmt.pfppf"%a@%a"str_locallocalstr_domaindomain|{name=Somename;local;domain=(domain,[])}->Fmt.pfppf"%a <%a@%a>"str_phrasenamestr_locallocalstr_domaindomain|{name;local;domain=(x,r)}->let()=matchnamewith|Somename->Fmt.pfppf"%a <"str_phrasename|None->Fmt.stringppf"<"inletrecgo=function|[]->()|[e]->Fmt.pfppf"@%a"str_domaine|h::t->Fmt.pfppf"@%a,"str_domainh;gotingor;Fmt.pfppf":%a@%a>"str_locallocalstr_domainxletstr_addressppf(local,domain)=str_mailboxppf{name=None;local;domain;}letstr_groupppf{group;mailboxes;}=Fmt.pfppf"%a: "str_phrasegroup;letrecgo=function|[]->()|[x]->str_mailboxppfx|x::r->Fmt.pfppf"%a, "str_mailboxx;goringomailboxes;Fmt.stringppf";"letstr_addressesppflst=letrecgo=function|[]->()|[`Mailboxx]->str_mailboxppfx|[`Groupx]->str_groupppfx|`Mailboxx::r->Fmt.pfppf"%a, "str_mailboxx;gor|`Groupx::r->Fmt.pfppf"%a, "str_groupx;goringolsttype'afmt=Format.formatter->'a->unitletpp_addrppf=function|IPv4ipv4->Fmt.pfppf"[%s]"(Ipaddr.V4.to_stringipv4)|IPv6ipv6->Fmt.pfppf"[IPv6:%s]"(Ipaddr.V6.to_stringipv6)|Ext(key,value)->Fmt.pfppf"[%s:%s]"keyvalueletpp_domainppf=function|`Domainlst->Fmt.list~sep:(Fmt.constFmt.string".")Fmt.stringppflst|`Addraddr->pp_addrppfaddr|`Literallit->Fmt.pfppf"[%s]"litletpp_wordppf=function|`Atomatom->Fmt.stringppfatom|`Stringstr->Fmt.quoteFmt.stringppfstrletpp_localppflst=Fmt.list~sep:(Fmt.constFmt.char'.')pp_wordppflstletpp_rawppf=function|Quoted_printable(Oks)->Fmt.pfppf"quoted-printable:%s"s|Base64(Oks)->Fmt.pfppf"base64:%s"s|Quoted_printable(Error(`Msg_))|Base64(Error(`Msg_))->Fmt.stringppf"#error"letpp_phraseppfphrase=letpp_elemppf=function|`Dot->Fmt.stringppf"."|`Wordx->Fmt.pfppf"%a"pp_wordx|`Encoded(_,raw)->Fmt.pfppf"<@[<hov>%a@]>"pp_rawrawinFmt.list~sep:(Fmt.always"@ ")pp_elemppfphraseletpp_mailboxppf=function|{name=None;local;domain=domain,[]}->Fmt.pfppf"@[<0>%a@%a@]"pp_locallocalpp_domaindomain|{name=None;local;domain=first,rest}->letppppfdomain=Fmt.pfppf"@%a"pp_domaindomaininFmt.pfppf"@[<1><%a:%a@%a>@]"(Fmt.list~sep:(Fmt.constFmt.char',')pp)restpp_locallocalpp_domainfirst|{name=Somename;local;domain}->letpp_addrppf(local,domains)=matchdomainswith|domain,[]->Fmt.pfppf"@[<1><%a@%a>@]"pp_locallocalpp_domaindomain|domain,rest->letppppfdomain=Fmt.pfppf"@%a"pp_domaindomaininFmt.pfppf"@[<1><%a:%a@%a>@]"(Fmt.list~sep:(Fmt.constFmt.string",")pp)restpp_locallocalpp_domaindomaininFmt.pfppf"@[<hov>%a@]@ %a"pp_phrasenamepp_addr(local,domain)letpp_groupppf{group;mailboxes}=Fmt.pfppf"@[<hov>%a@]:@ @[<hov>%a@]"pp_phrasegroupFmt.(list~sep:(always",@ ")pp_mailbox)mailboxesletpp_addressppf(local,domain)=pp_mailboxppf{name=None;local;domain}letppppf=function|`Mailboxmailbox->pp_mailboxppfmailbox|`Groupgroup->pp_groupppfgroup(* Equal *)(* XXX(dinosaure): CFWS (useless white space and comment) are already deleted by
parser below.
However, some parts are semantically equal, like:
- raw with base64 encoding or quoted-printable encoding: if produced content
is equal, these values are semantically equal
- order of domains (RFC did not explain any specific process about order of
domains)
- RFC 1034 explains domains are case-insensitive
- IPv4 could be equal to a subset of IPv6
- [`Atom] and [`String] could be semantically equal (it's a /word/) (RFC 5321
explains local-part - which contains /word/ - SHOULD be case-sensitive)
So, for all of these, we implement two kinds of [equal]:
- a strict implementation which strictly checks if two addresses are equal
structurally
- a semantic implementation which follows rules above *)type'aequal='a->'a->booltype'acompare='a->'a->intletcase_sensitiveab=String.compareabletcase_insensitiveab=String.(compare(lowercase_asciia)(lowercase_asciib))letequal_word~compareab=matcha,bwith|`Atoma,`Atomb|`Stringa,`Atomb|`Atoma,`Stringb|`Stringa,`Stringb->compareab=0letcompare_word~compareab=matcha,bwith|`Atoma,`Atomb|`Stringa,`Atomb|`Atoma,`Stringb|`Stringa,`Stringb->compareabletequal_raw~compareab=matcha,bwith|Quoted_printable(Oka),Quoted_printable(Okb)|Base64(Oka),Base64(Okb)|Base64(Oka),Quoted_printable(Okb)|Quoted_printable(Oka),Base64(Okb)->compareab=0|_,_->false(* XXX(dinosaure): both error return [false]. *)letinf=(-1)andsup=1letcompare_raw~compareab=matcha,bwith|Quoted_printable(Oka),Quoted_printable(Okb)|Base64(Oka),Base64(Okb)|Base64(Oka),Quoted_printable(Okb)|Quoted_printable(Oka),Base64(Okb)->compareab|(Quoted_printable(Error_)|Base64(Error_)),(Quoted_printable(Ok_)|Base64(Ok_))->sup|(Quoted_printable(Ok_)|Base64(Ok_)),(Quoted_printable(Error_)|Base64(Error_))->inf|_,_->0(* XXX(dinosaure): both error are equal. *)letcompare_raw_with_string~compareab=matchawith|Quoted_printable(Oka)->compareab|Base64(Oka)->compareab|_->supletcompare_string_with_raw~compareab=matchbwith|Quoted_printable(Okb)->compareab|Base64(Okb)->compareab|_->infletequal_phraseab=ifList.lengtha<>List.lengthbthenfalseelseletcompareab=case_insensitiveabinList.for_all2(funab->matcha,bwith|`Encoded(_,a),`Encoded(_,b)->equal_raw~compareab|`Dot,`Dot->true|`Worda,`Wordb->equal_word~compareab|`Encoded(_,a),`Word(`Atomb|`Stringb)->compare_raw_with_string~compareab=0|`Word(`Atoma|`Stringa),`Encoded(_,b)->compare_string_with_raw~compareab=0|_,_->false)abletcompare_phraseab=letcompare=case_insensitiveinletrecgoab=matcha,bwith|[],[]->0|_::_,[]->sup|[],_::_->inf|a::ar,b::br->matcha,bwith|`Worda,`Wordb->letres=compare_word~compareabinifres=0thengoarbrelseres|`Encoded(_,a),`Encoded(_,b)->letres=compare_raw~compareabinifres=0thengoarbrelseres|`Dot,`Dot->goarbr|`Encoded(_,a),`Word(`Atomb|`Stringb)->letres=compare_raw_with_string~compareabinifres=0thengoarbrelseres|`Word(`Atoma|`Stringa),`Encoded(_,b)->letres=compare_string_with_raw~compareabinifres=0thengoarbrelseres|`Dot,_->sup|`Word_,`Dot->inf|`Word_,_->sup|`Encoded_,`Dot->inf|`Encoded_,_->supingoabletequal_addrab=matcha,bwith|IPv4ipv4,IPv6ipv6|IPv6ipv6,IPv4ipv4->Ipaddr.(compare(V4ipv4)(V6ipv6))=0|IPv6a,IPv6b->Ipaddr.V6.compareab=0|IPv4a,IPv4b->Ipaddr.V4.compareab=0|Ext(ldh_a,content_a),Ext(ldh_b,content_b)->String.equalldh_aldh_b&&String.equalcontent_acontent_b(* XXX(dinosaure): RFC 5321 does not explain if Ldh token is case-insensitive. *)|_,_->falseletcompare_addrab=matcha,bwith|IPv4ipv4,IPv6ipv6|IPv6ipv6,IPv4ipv4->Ipaddr.(compare(V4ipv4)(V6ipv6))|IPv6a,IPv6b->Ipaddr.V6.compareab|IPv4a,IPv4b->Ipaddr.V4.compareab|Ext(ldh_a,content_a),Ext(ldh_b,content_b)->letret=String.compareldh_aldh_binifret=0thenString.comparecontent_acontent_belseret(* XXX(dinosaure): lexicographic compare. *)|IPv6_,_->sup|IPv4_,_->sup|Ext_,(IPv4_|IPv6_)->infletcompare_domainab=matcha,bwith|`Domaina,`Domainb->letrecgoab=matcha,bwith|[],[]->0|a::ar,b::br->letres=case_insensitiveabinifres=0thengoarbrelseres|[],_::_->inf|_::_,[]->supin(* compare [] [0] = -1 && compare [0] [] = 1 *)goab|`Literala,`Literalb->case_insensitiveab|`Addra,`Addrb->compare_addrab|`Domain_,_->sup|`Literal_,`Domain_->inf|`Literal_,_->sup|`Addr_,(`Domain_|`Literal_)->inf|`Addr_,_->supletcompare_word?case_sensitive:(c=false)ab=matcha,bwith|`Atoma,`Atomb|`Stringa,`Stringb|`Atoma,`Stringb|`Stringa,`Atomb->ifnotcthencase_insensitiveabelsecase_sensitiveabletcompare_local?case_sensitiveab=letrecgoab=matcha,bwith|_::_,[]->sup|[],_::_->inf|a::ar,b::br->letres=compare_word?case_sensitiveabinifres=0thengoarbrelseres|[],[]->0ingoabletequal_domainab=matcha,bwith|`Domaina,`Domainb->ifList.lengtha<>List.lengthbthenfalseelseList.for_all2(funab->case_insensitiveab=0)ab|`Literala,`Literalb->case_insensitiveab=0|`Addra,`Addrb->equal_addrab|_,_->false(* XXX(dinosaure) we should resolve domain and compare with IP
address if they are equal or not. *)letequal_domainsab=ifList.lengtha<>List.lengthbthenfalseelseleta=List.sortcompare_domainainletb=List.sortcompare_domainbinList.for_all2(funab->equal_domainab)abletequal_domains(a,ar)(b,br)=equal_domains(a::ar)(b::br)letcompare_domainsab=letrecgoab=matcha,bwith|_::_,[]->sup|[],_::_->inf|a::ar,b::br->letres=compare_domainabinifres=0thengoarbrelseres|[],[]->0ingo(List.sortcompare_domaina)(List.sortcompare_domainb)letcompare_domains(a,ar)(b,br)=compare_domains(a::ar)(b::br)letequal_local?case_sensitive:(case=false)ab=letcompareab=ifnotcasethencase_insensitiveabelsecase_sensitiveabinifList.lengtha<>List.lengthbthenfalseelseList.for_all2(funab->equal_word~compareab)abletequal_mailbox?case_sensitiveab=letequal_nameab=matcha,bwith|Some_,None|None,Some_|None,None->true|Somea,Someb->equal_phraseabinequal_local?case_sensitivea.localb.local&&equal_domainsa.domainb.domain&&equal_namea.nameb.nameletcompare_mailbox?case_sensitiveab=letres=compare_domainsa.domainb.domaininifres=0thenletres=compare_local?case_sensitivea.localb.localinifres=0thenmatcha.name,b.namewith|Some_,None->sup|None,Some_->inf|Somea,Someb->compare_phraseab|None,None->0elsereselseresletcompare_groupab=letrecgoab=matcha,bwith|[],[]->0|_::_,[]->sup|[],_::_->inf|a::ar,b::br->letres=compare_mailboxabinifres=0thengoarbrelseresinletres=compare_phrasea.groupb.groupinifres=0thengo(List.sortcompare_mailboxa.mailboxes)(List.sortcompare_mailboxb.mailboxes)elseresletequal_groupab=letrecgoab=matcha,bwith|[],[]->true|_::_,[]|[],_::_->false|a::ar,b::br->letres=equal_mailboxabinifresthengoarbrelseresinequal_phrasea.groupb.group&&go(List.sortcompare_mailboxa.mailboxes)(List.sortcompare_mailboxb.mailboxes)letcompare_addressab=compare_mailbox{name=None;local=fsta;domain=snda}{name=None;local=fstb;domain=sndb}letequal_addressab=equal_mailbox{name=None;local=fsta;domain=snda}{name=None;local=fstb;domain=sndb}letequal_setab=matcha,bwith|`Group_,`Mailbox_|`Mailbox_,`Group_->false|`Groupa,`Groupb->equal_groupab|`Mailboxa,`Mailboxb->equal_mailboxabletcompare_setab=matcha,bwith|`Group_,`Mailbox_->sup|`Mailbox_,`Group_->inf|`Groupa,`Groupb->compare_groupab|`Mailboxa,`Mailboxb->compare_mailboxabmoduleParser=struct[@@@warning"-32"]openAngstrom(* XXX(dinosaure): about each comment, because we don't have a software to
prove these implementations, we check all /tokens/ by hands. All
occurrences of token in RFCs appears: ABNF and comments. It's useful to not
forget something when we implement them. It's little annoying to do this
but some crazy people have the power to decide about how to write an
e-mail - not my fault.
Then, a dependence /by hands/ again show you why we need to implement
token.
Finally, a little comment by the implementor to explain what we have and
what we _don't_ have. *)(* From RFC 5234 (used in RFC 5322)
VCHAR = %x21-7E
; visible (printing) characters
Dependence
VCHAR <- quoted-pair
quoted-pair <- ccontent & qcontent
ccontent <- comment
qcontent <- quoted-string
quoted-string <- word
word <- local-part
local-part <- addr-spec
addr-spec <- mailbox
*)letis_vchar=function'\x21'..'\x7e'->true|_->false(* From RFC 5322
obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
%d11 / ; characters that do not
%d12 / ; include the carriage
%d14-31 / ; return, line feed, and
%d127 ; white space characters
Dependence
obs-NO-WS-CTL <- obs-ctext
obs-ctext <- ctext
ctext <- ccontent
ccontent <- comment
comment <- CFWS
*)letis_obs_no_ws_ctl=function|'\001'..'\008'|'\011'|'\012'|'\014'..'\031'|'\127'->true|_->false(* From RFC 822
ctext = <any CHAR excluding "(", ; => may be folded
")", BACKSLASH & CR, & including
linear-white-space>
From RFC 1522
5. Use of encoded-words in message headers
(2) An encoded-word may appear within a comment delimited by "(" and
")", i.e., wherever a "ctext" is allowed. More precisely, the
RFC 822 ABNF definition for "comment" is amended as follows:
comment = "(" *(ctext / quoted-pair / comment / encoded-word) ")"
A "Q"-encoded encoded-word which appears in a comment MUST NOT
contain the characters "(", ")" or DQUOTE encoded-word that
appears in a "comment" MUST be separated from any adjacent
encoded-word or "ctext" by linear-white-space.
7. Conformance
A mail reading program claiming compliance with this specification
must be able to distinguish encoded-words from "text", "ctext", or
"word"s, according to the rules in section 6, anytime they appear in
appropriate places in message headers. It must support both the "B"
and "Q" encodings for any character set which it supports. The
program must be able to display the unencoded text if the character
From RFC 2047
Update from RFC 1522:
+ clarification: an 'encoded-word' may appear immediately following
the initial "(" or immediately before the final ")" that delimits a
comment, not just adjacent to "(" and ")" *within* *ctext.
From RFC 2822
ctext = NO-WS-CTL / ; Non white space controls
%d33-39 / ; The rest of the US-ASCII
%d42-91 / ; characters not including "(",
%d93-126 ; ")", or BACKSLASH
From RFC 5322
ctext = %d33-39 / ; Printable US-ASCII
%d42-91 / ; characters not including
%d93-126 / ; "(", ")", or BACKSLASH
obs-ctext
obs-ctext = obs-NO-WS-CTL
Update from RFC 2822
+ Removed NO-WS-CTL from ctext
From RFC 5335
ctext =/ UTF8-xtra-char
UTF8-xtra-char = UTF8-2 / UTF8-3 / UTF8-4
UTF8-2 = %xC2-DF UTF8-tail
UTF8-3 = %xE0 %xA0-BF UTF8-tail /
%xE1-EC 2(UTF8-tail) /
%xED %x80-9F UTF8-tail /
%xEE-EF 2(UTF8-tail)
UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) /
%xF1-F3 3( UTF8-tail ) /
%xF4 %x80-8F 2( UTF8-tail )
UTF8-tail = %x80-BF
From RFC 6532
ctext =/ UTF8-non-ascii
Dependence
ctext <- ccontent
ccontent <- comment
comment <- CFWS
XXX(dinosaure):
- about UTF-8, the process is out of this scope where we check only one byte here
- about compliance with RFC 1522, it's out of scope where we check only one byte here
This code is a translation of RFC 5322's ABNF.
*)letis_ctext=function|'\033'..'\039'|'\042'..'\091'|'\093'..'\126'->true|c->is_obs_no_ws_ctlc(* From RFC 822
qtext = <any CHAR excepting DQUOTE, ; => may be folded
BACKSLASH & CR, and including
linear-white-space>
From RFC 2822
qtext = NO-WS-CTL / ; Non white space controls
%d33 / ; The rest of the US-ASCII
%d35-91 / ; characters not including BACKSLASH
%d93-126 ; or the quote character
From RFC 5322
qtext = %d33 / ; Printable US-ASCII
%d35-91 / ; characters not including
%d93-126 / ; BACKSLASH or the quote character
obs-qtext
obs-qtext = obs-NO-WS-CTL
From RFC 5335
See [is_ctext] for UTF8-xtra-char.
utf8-qtext = qtext / UTF8-xtra-char
From RFC 6532
qtext =/ UTF8-non-ascii
Dependence
qtext <- qcontent
qcontent <- quoted-string
quoted-string <- word
word <- local-part
local-part <- addr-spec
addr-spec <- mailbox
XXX(dinosaure):
- about UTF-8, the process is out of this scope where we check only one byte here
This code is a translation of RFC 5322's ABNF.
*)letis_qtext=function|'\033'|'\035'..'\091'|'\093'..'\126'->true|c->is_obs_no_ws_ctlc(* From RFC 822
The ABNF of atext is not explicit from RFC 822 but the relic could be find here:
atom = 1*<any CHAR except specials, SPACE and CTLs>
From RFC 2822
atext = ALPHA / DIGIT / ; Any character except controls,
"!" / "#" / ; SP, and specials.
"$" / "%" / ; Used for atoms
"&" / "'" /
"*" / "+" /
"-" / "/" /
"=" / "?" /
"^" / "_" /
"`" / "{" /
"|" / "}" /
"~"
From RFC 5322
atext = ALPHA / DIGIT / ; Printable US-ASCII
"!" / "#" / ; characters not including
"$" / "%" / ; specials. Used for atoms.
"&" / "'" /
"*" / "+" /
"-" / "/" /
"=" / "?" /
"^" / "_" /
"`" / "{" /
"|" / "}" /
"~"
From 5335
utf8-atext = ALPHA / DIGIT /
"!" / "#" / ; Any character except
"$" / "%" / ; controls, SP, and specials.
"&" / "'" / ; Used for atoms.
"*" / "+" /
"-" / "/" /
"=" / "?" /
"^" / "_" /
"`" / "{" /
"|" / "}" /
"~" /
UTF8-xtra-char
UTF8-xtra-char: see is_ctext
This means that all the [RFC2822] constructs that build upon these
will permit UTF-8 characters, including comments and quoted strings.
We do not change the syntax of <atext> in order to allow UTF8
characters in <addr-spec>. This would also allow UTF-8 characters in
<message-id>, which is not allowed due to the limitation described in
Section 4.5. Instead, <utf8-atext> is added to meet this
requirement.
From RFC 6532
atext =/ UTF8-non-ascii
Dependence
atext <- atom
atom <- word
word <- local-part
local-part <- addr-spec
addr-spec <- mailbox
XXX(dinosaure):
- about UTF-8, the process is out of this scope where we check only one byte here
This code is a translation of RFC 5322's ABNF.
*)letis_atext=function|'a'..'z'|'A'..'Z'|'0'..'9'|'!'|'#'|'$'|'%'|'&'|'\''|'*'|'+'|'-'|'/'|'='|'?'|'^'|'_'|'`'|'{'|'}'|'|'|'~'->true|_->falseletis_cr=(=)'\r'letis_lf=(=)'\n'letis_d0=(=)'\000'(* From RFC 822
LWSP-char = SPACE / HTAB ; semantics = SPACE
From RFC 2822 and RFC 5322, we did not find any occurrence of LWSP-char,
it replaced by WSP. However, these RFCs does not provide an ABNF to describe
WSP (described by RFC5234).
*)letis_wsp=function'\x09'|'\x20'->true|_->false(* From RFC 822
quoted-pair = BACKSLASH CHAR ; may quote any char
CHAR is case-sensitive
From RFC 2822
quoted-pair = (BACKSLASH text) / obs-qp
text = %d1-9 / ; Characters excluding CR and LF
%d11 /
%d12 /
%d14-127 /
obs-text
obs-text = *LF *CR *(obs-char *LF *CR)
obs-char = %d0-9 / %d11 / ; %d0-127 except CR and
%d12 / %d14-127 ; LF
obs-qp = BACKSLASH (%d0-127)
From RFC 5322
quoted-pair = (BACKSLASH (VCHAR / WSP)) / obs-qp
obs-qp = BACKSLASH (%d0 / obs-NO-WS-CTL / LF / CR)
From RFC 5335
See [is_ctext] for UTF8-xtra-char.
utf8-text = %d1-9 / ; all UTF-8 characters except
%d11-12 / ; US-ASCII NUL, CR, and LF
%d14-127 /
UTF8-xtra-char
utf8-quoted-pair = (BACKSLASH utf8-text) / obs-qp
Dependence
quoted-pair <- ccontent & qcontent
ccontent <- comment
qcontent <- quoted-string
quoted-string <- word
word <- local-part
local-part <- addr-spec
addr-spec <- mailbox
XXX(dinosaure): we can factorize this code by [fun _ -> true]
*)letis_quoted_pairchr=is_vcharchr||is_wspchr||is_d0chr||is_obs_no_ws_ctlchr||is_lfchr||is_crchr(* From RFC 822
dtext = <any CHAR excluding "[", ; => may be folded
"]", BACKSLASH & CR, & including
linear-white-space>
From RFC 2822
dtext = NO-WS-CTL / ; Non white space controls
%d33-90 / ; The rest of the US-ASCII
%d94-126 ; characters not including "[",
; "]", or BACKSLASH
From RFC 5322
Update from RFC 2822:
+ Removed NO-WS-CTL from dtext
dtext = %d33-90 / ; Printable US-ASCII
%d94-126 / ; characters not including
obs-dtext ; "[", "]", or BACKSLASH
obs-dtext = obs-NO-WS-CTL / quoted-pair
Dependence
dtext <- domain-literal
domain-literal <- domain
domain <- addr-spec
addr-spec <- mailbox
XXX(dinosaure): [quoted-pair] can not be processed here where we handle only one byte.
*)letis_dtext=function|'\033'..'\090'|'\094'..'\126'->true|c->is_obs_no_ws_ctlcletof_escaped_character=function|'\x61'->'\x07'(* "\a" *)|'\x62'->'\x08'(* "\b" *)|'\x74'->'\x09'(* "\t" *)|'\x6E'->'\x0A'(* "\n" *)|'\x76'->'\x0B'(* "\v" *)|'\x66'->'\x0C'(* "\f" *)|'\x72'->'\x0D'(* "\r" *)|c->c(* See [is_quoted_pair] *)letquoted_pair_ignore,quoted_pair=letquoted_char=char'\\'*>satisfyis_quoted_pairinquoted_char*>return(),quoted_char>>|of_escaped_characterletwsp=satisfyis_wsp(* From RFC 822
Each header field can be viewed as a single, logical line of
ASCII characters, comprising a field-name and a field-body.
For convenience, the field-body portion of this conceptual
entity can be split into a multiple-line representation; this
is called "folding". The general rule is that wherever there
may be linear-white-space (NOT simply LWSP-chars), a CRLF
immediately followed by AT LEAST one LWSP-char may instead be
inserted. Thus, the single line
To: "Joe & J. Harvey" <ddd @Org>, JJV @ BBN
can be represented as:
To: "Joe & J. Harvey" <ddd @ Org>,
JJV@BBN
and
To: "Joe & J. Harvey"
<ddd@ Org>, JJV
@BBN
and
To: "Joe &
J. Harvey" <ddd @ Org>, JJV @ BBN
The process of moving from this folded multiple-line
representation of a header field to its single line represen-
tation is called "unfolding". Unfolding is accomplished by
regarding CRLF immediately followed by a LWSP-char as
equivalent to the LWSP-char.
Note: While the standard permits folding wherever linear-
white-space is permitted, it is recommended that struc-
tured fields, such as those containing addresses, limit
folding to higher-level syntactic breaks. For address
fields, it is recommended that such folding occur
between addresses, after the separating comma.
From RFC 2822
White space characters, including white space used in folding
(described in section 2.2.3), may appear between many elements in
header field bodies. Also, strings of characters that are treated as
comments may be included in structured field bodies as characters
enclosed in parentheses. The following defines the folding white
space (FWS) and comment constructs.
Strings of characters enclosed in parentheses are considered comments
so long as they do not appear within a "quoted-string", as defined in
section 3.2.5. Comments may nest.
There are several places in this standard where comments and FWS may
be freely inserted. To accommodate that syntax, an additional token
for "CFWS" is defined for places where comments and/or FWS can occur.
However, where CFWS occurs in this standard, it MUST NOT be inserted
in such a way that any line of a folded header field is made up
entirely of WSP characters and nothing else.
FWS = ([*WSP CRLF] 1*WSP) / ; Folding white space
obs-FWS
In the obsolete syntax, any amount of folding white space MAY be
inserted where the obs-FWS rule is allowed. This creates the
possibility of having two consecutive "folds" in a line, and
therefore the possibility that a line which makes up a folded header
field could be composed entirely of white space.
obs-FWS = 1*WSP *(CRLF 1*WSP)
From RFC 5322
White space characters, including white space used in folding
(described in section 2.2.3), may appear between many elements in
header field bodies. Also, strings of characters that are treated as
comments may be included in structured field bodies as characters
enclosed in parentheses. The following defines the folding white
space (FWS) and comment constructs.
Strings of characters enclosed in parentheses are considered comments
so long as they do not appear within a "quoted-string", as defined in
section 3.2.4. Comments may nest.
There are several places in this specification where comments and FWS
may be freely inserted. To accommodate that syntax, an additional
token for "CFWS" is defined for places where comments and/or FWS can
occur. However, where CFWS occurs in this specification, it MUST NOT
be inserted in such a way that any line of a folded header field is
made up entirely of WSP characters and nothing else.
FWS = ([*WSP CRLF] 1*WSP) / obs-FWS
; Folding white space
In the obsolete syntax, any amount of folding white space MAY be
inserted where the obs-FWS rule is allowed. This creates the
possibility of having two consecutive "folds" in a line, and
therefore the possibility that a line which makes up a folded header
field could be composed entirely of white space.
obs-FWS = 1*WSP *(CRLF 1*WSP)
Dependence
FWS <- CFWS
val fws: (bool, bool, bool) t
- the first bool say if we have WSP BEFORE CRLF
- the second bool say if we have CRLF
- the third bool say if we have WSP AFTER CRLF
Impossible case: (true, false, true), we set to [true] the third value
only if we found a CRLF, so the third bool __could__ be [true] only
if the second bool __is__ [true].
XXX(dinosaure): [FWS] is a special token about mail (according RFC 822)
and should never occur in real/usual inputs (like value of a form). [FWS] complexifies
the way to parse an email address at the end. However, [emile] should be used in usual
context where input does not have any [FWS] token.
Be aware, if you want to extract an email address from an email, we should do a first
pass with [unstrctrd] to remove [FWS] token. Then, output can be handle by [emile].
*)letfws=take_while1is_wsp(* From RFC 822
comment = "(" *(ctext / quoted-pair / comment) ")"
From RFC 2822
ccontent = ctext / quoted-pair / comment
comment = "(" *([FWS] ccontent) [FWS] ")"
From RFC 5322
ccontent = ctext / quoted-pair / comment
comment = "(" *([FWS] ccontent) [FWS] ")"
Dependence
comment <- CFWS
*)letcomment=fix@@funcomment->letccontent=peek_char_fail<?>"comment">>=function|'('->comment|'\\'->quoted_pair_ignore|cwhenis_ctextc->skip_whileis_ctext(* TODO: replace skip_while and handle unicode. *)|_->fail"comment"inchar'('*>many(option""fws*>ccontent)*>option""fws*>char')'*>return()(* From RFC 822
See [obs_fws] and [fws].
From RFC 2822
CFWS = *([FWS] comment) (([FWS] comment) / FWS)
From RFC 5322
CFWS = (1*([FWS] comment) [FWS]) / FWS
Update from RFC 2822:
+ Simplified CFWS syntax.
Dependence
CFWS is needed for all
*)letcfws=(many1(option""fws*>comment)*>option""fws<|>fws)*>return()letcfws=cfws<?>"cfws"letis_ascii=function'\000'..'\127'->true|_->falseletuchar_is_asciix=(Uchar.to_intx)>=0&&(Uchar.to_intx)<=0x7fletwith_uutfis=letdecoder=Uutf.decoder~encoding:`UTF_8`Manualinletbuf=Buffer.create0x100inletrecgobyte_count=matchUutf.decodedecoderwith|`Await->`Continue|`Malformed_->`Error"Invalid UTF-8 character"|`Ucharucharwhenuchar_is_asciiuchar->ifis(Uchar.to_charuchar)then(Uutf.Buffer.add_utf_8bufuchar;gobyte_count)else(`End(Uutf.decoder_byte_countdecoder-byte_count-1))|`Ucharuchar->Uutf.Buffer.add_utf_8bufuchar;gobyte_count|`End->(`End(Uutf.decoder_byte_countdecoder-byte_count))inletscanbuf~off~len=letsrc=Bigstringaf.substringbuf~off~leninUutf.Manual.srcdecoder(Bytes.unsafe_of_stringsrc)0len;go(Uutf.decoder_byte_countdecoder)infix@@funm->available>>=funlen->Unsafe.peeklenscan>>=function|`Errorerr->failerr|`Continue->advancelen>>=fun()->m|`Endlen->advancelen>>=fun()->return(Buffer.contentsbuf)letwith_uutf1is=available>>=funn->ifn>0then(with_uutfis>>=funs->ifString.lengths>0thenreturnselsefail"with_uutf1")elsefail"with_uutf1"(* From RFC 822
The ABNF of qcontent is not explicit from RFC 822 but the relic could be find here:
quoted-string = <"> *(qtext/quoted-pair) <">; Regular qtext or
From RFC 2822
qcontent = qtext / quoted-pair
From RFC 5322
qcontent = qtext / quoted-pair
From RFC 5335
utf8-qcontent = utf8-qtext / utf8-quoted-pair
Dependence
qcontent <- quoted-string
quoted-string <- word
word <- local-part
local-part <- addr-spec
addr-spec <- mailbox
*)letqcontent=with_uutf1is_qtext(* TODO: replace take_while and handle unicode. *)<|>(quoted_pair>>|String.make1)letqcontent=qcontent<?>"qcontent"(* From RFC 822
quoted-string = DQUOTE *(qtext/quoted-pair) DQUOTE; Regular qtext or
; quoted chars.
From RFC 2047
+ An 'encoded-word' MUST NOT appear within a 'quoted-string'
From RFC 2822
quoted-string = [CFWS]
DQUOTE *([FWS] qcontent) [FWS] DQUOTE
[CFWS]
A quoted-string is treated as a unit. That is, quoted-string is
identical to atom, semantically. Since a quoted-string is allowed to
contain FWS, folding is permitted. Also note that since quoted-pair
is allowed in a quoted-string, the quote and backslash characters may
appear in a quoted-string so long as they appear as a quoted-pair.
Semantically, neither the optional CFWS outside of the quote
characters nor the quote characters themselves are part of the
quoted-string; the quoted-string is what is contained between the two
quote characters. As stated earlier, the BACKSLASH in any quoted-pair
and the CRLF in any FWS/CFWS that appears within the quoted-string are
semantically "invisible" and therefore not part of the quoted-string
either.
XXX(dinosaure): in other words, space(s) in [FWS] are "visible" between DQUOTE.
From RFC 5322
quoted-string = [CFWS]
DQUOTE *([FWS] qcontent) [FWS] DQUOTE
[CFWS]
The explanation does not change from RFC 2822.
Dependence
quoted-string <- word
word <- local-part
local-part <- addr-spec
addr-spec <- mailbox
XXX(dinosaure): currently, this implementation has a bug about multiple spaces in
[quoted-string]. We need to update [fws] to count how many space(s) we skip.
TODO: optimize and count space(s).
*)letquoted_string=option()cfws*>char'"'*>(many(option""fws>>=funfws->qcontent>>|funs->fws^s)>>=funpre->option""fws>>|funfws->pre@[fws])<*char'"'>>|String.concat""<*option()cfws(* From RFC 822
atom = 1*<any CHAR except specials, SPACE and CTLs>
Difference from RFC 733:
- Atoms may not contain SPACE.
From RFC 2822
atom = [CFWS] 1*atext [CFWS]
From RFC 5322
atom = [CFWS] 1*atext [CFWS]
From RFC 5335
utf8-atom = [CFWS] 1*utf8-atext [CFWS]
Dependence
atom <- word
word <- local-part
local-part <- addr-spec
addr-spec <- mailbox
*)letatom=option()cfws*>with_uutf1is_atext<*option()cfwsletatom=atom<?>"atom"(* From RFC 822
word = atom / quoted-string
From RFC 2822
word = atom / quoted-string
From RFC 5322
word = atom / quoted-string
Dependence
word <- atom
atom <- obs-local-part & dot-aton
dot-atom <- local-part
local-part <- addr-spec
addr-spec <- mailbox
*)letword=atom>>|(funs->`Atoms)<|>(quoted_string>>|funs->`Strings)letword=word<?>"word"(* From RFC 2822
dot-atom-text = 1*atext *("." 1*atext)
From RFC 5322
dot-atom-text = 1*atext *("." 1*atext)
Dependence
dot-atom-text <- local-part
local-part <- addr-spec
addr-spec <- mailbox
*)letdot_atom_text=sep_by1(char'.')(with_uutf1is_atext)letdot_atom_text=dot_atom_text<?>"dot-atom-text"(* From RFC 2822
dot-atom = [CFWS] dot-atom-text [CFWS]
From RFC 5322
dot-atom = [CFWS] dot-atom-text [CFWS]
Dependence
dot-atom <- local-part
local-part <- addr-spec
addr-spec <- mailbox
*)letdot_atom=option()cfws*>dot_atom_text<*option()cfwsletdot_atom=dot_atom<?>"dot-atom"(* From RFC 822
local-part = word *("." word) ; uninterpreted
; case-preserved
The local-part of an addr-spec in a mailbox specification
(i.e., the host's name for the mailbox) is understood to be
whatever the receiving mail protocol server allows. For exam-
ple, some systems do not understand mailbox references of the
form "P. D. Q. Bach", but others do.
This specification treats periods (".") as lexical separators.
Hence, their presence in local-parts which are not quoted-
strings, is detected. However, such occurrences carry NO
semantics. That is, if a local-part has periods within it, an
address parser will divide the local-part into several tokens,
but the sequence of tokens will be treated as one uninter-
preted unit. The sequence will be re-assembled, when the
address is passed outside of the system such as to a mail pro-
tocol service.
For example, the address:
First.Last@Registry.Org
is legal and does not require the local-part to be surrounded
with quotation-marks. (However, "First Last" DOES require
quoting.) The local-part of the address, when passed outside
of the mail system, within the Registry.Org domain, is
"First.Last", again without quotation marks.
Fron RFC 2822
local-part = dot-atom / quoted-string / obs-local-part
obs-local-part = word *("." word)
The local-part portion is a domain dependent string. In addresses,
it is simply interpreted on the particular host as a name of a
particular mailbox.
Update:
+ CFWS within local-parts and domains not allowed.*
From RFC 5322
local-part = dot-atom / quoted-string / obs-local-part
obs-local-part = word *("." word)
Dependence
local-part <- addr-spec
addr-spec <- mailbox
XXX(dinosaure): local-part MUST not be empty.
*)letobs_local_part=sep_by1(char'.')wordletobs_local_part=obs_local_part<?>"obs-local-part"letlocal_part=letlength=function|`Atoms->String.lengths|`Strings->String.lengthsinobs_local_part<|>(dot_atom>>|List.map(funx->`Atomx))<|>(quoted_string>>|funs->[`Strings])>>=funlocal->ifList.fold_left(funax->a+lengthx)0local>0thenreturnlocalelsefail"local-part empty"letobs_domain=lift2(funxr->x::r)atom(many1(char'.'*>atom))(* From RFC 822
domain-literal = "[" *(dtext / quoted-pair) "]"
o Square brackets ("[" and "]") are used to indicate the
presence of a domain-literal, which the appropriate
name-domain is to use directly, bypassing normal
name-resolution mechanisms.
Domain-literals which refer to domains within the ARPA Inter-
net specify 32-bit Internet addresses, in four 8-bit fields
noted in decimal, as described in Request for Comments #820,
"Assigned Numbers." For example:
[10.0.3.19]
Note: THE USE OF DOMAIN-LITERALS IS STRONGLY DISCOURAGED. It
is permitted only as a means of bypassing temporary
system limitations, such as name tables which are not
complete.
From RFC 2822
domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
From RFC 5322
domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
Dependence
domain-literal <- domain
domain <- e-mail
*)letdomain_literal=option()cfws*>char'['*>(many(option""fws*>(with_uutf1is_dtext<|>(quoted_pair>>|String.make1)))>>|String.concat"")<*option""fws<*char']'<*option()cfws(* From RFC 5321
Let-dig = ALPHA / DIGIT
Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig
address-literal = "[" ( IPv4-address-literal /
IPv6-address-literal /
General-address-literal ) "]"
; See Section 4.1.3
IPv4-address-literal = Snum 3("." Snum)
IPv6-address-literal = "IPv6:" IPv6-addr
General-address-literal = Standardized-tag ":" 1*dcontent
Standardized-tag = Ldh-str
; Standardized-tag MUST be specified in a
; Standards-Track RFC and registered with IANA
dcontent = %d33-90 / ; Printable US-ASCII
%d94-126 ; excl. "[", BACKSLASH, "]"
Snum = 1*3DIGIT
; representing a decimal integer
; value in the range 0 through 255
IPv6-addr = IPv6-full / IPv6-comp / IPv6v4-full / IPv6v4-comp
IPv6-hex = 1*4HEXDIG
IPv6-full = IPv6-hex 7(":" IPv6-hex)
IPv6-comp = [IPv6-hex *5(":" IPv6-hex)] "::"
[IPv6-hex *5(":" IPv6-hex)]
; The "::" represents at least 2 16-bit groups of
; zeros. No more than 6 groups in addition to the
; "::" may be present.
IPv6v4-full = IPv6-hex 5(":" IPv6-hex) ":" IPv4-address-literal
IPv6v4-comp = [IPv6-hex *3(":" IPv6-hex)] "::"
[IPv6-hex *3(":" IPv6-hex) ":"]
IPv4-address-literal
; The "::" represents at least 2 16-bit groups of
; zeros. No more than 4 groups in addition to the
; "::" and IPv4-address-literal may be present.
XXX(dinosaure): about IPv4 and IPv6 parser, we use [Ipaddr].
*)letis_dcontent=function|'\033'..'\090'|'\094'..'\126'->true|_->falseletipv4_addr=letipv4_address_literals=letpos=ref0intryletipv4=Ipaddr.V4.of_string_rawsposinif!pos=String.lengthsthenreturn(IPv4ipv4)elsefail"IPv4"withIpaddr.Parse_error_->fail"IPv4"intake_while1is_dcontent>>=ipv4_address_literalletipv6_addr=letipv6_address_literals=letpos=ref0intryletipv6=Ipaddr.V6.of_string_rawsposinif!pos=String.lengthsthenreturn(IPv6ipv6)elsefail"IPv6"withIpaddr.Parse_error_->fail"IPv6"instring"IPv6:"*>take_while1is_dcontent>>=ipv6_address_literalletlet_dig=satisfy(function|'a'..'z'|'A'..'Z'|'0'..'9'->true|_->false)letldh_str=take_while1(function|'a'..'z'|'A'..'Z'|'0'..'9'|'-'->true|_->false)>>=funldh->ifldh.[String.lengthldh-1]='-'thenfail"invalid ldh-str"elsereturnldhletgeneral_address_literal=ldh_str<*char':'>>=funldh->take_while1is_dcontent>>|funvalue->Ext(ldh,value)(* From RFC 5321
Let-dig = ALPHA / DIGIT
Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig
address-literal = "[" ( IPv4-address-literal /
IPv6-address-literal /
General-address-literal ) "]"
; See Section 4.1.3
Sometimes a host is not known to the domain name system and
communication (and, in particular, communication to report and repair
the error) is blocked. To bypass this barrier, a special literal
form of the address is allowed as an alternative to a domain name.
For IPv4 addresses, this form uses four small decimal integers
separated by dots and enclosed by brackets such as [123.255.37.2],
which indicates an (IPv4) Internet Address in sequence-of-octets
form. For IPv6 and other forms of addressing that might eventually
be standardized, the form consists of a standardized "tag" that
identifies the address syntax, a colon, and the address itself, in a
format specified as part of the relevant standards (i.e., RFC 4291
[8] for IPv6).
Specifically:
IPv4-address-literal = Snum 3("." Snum)
IPv6-address-literal = "IPv6:" IPv6-addr
General-address-literal = Standardized-tag ":" 1*dcontent
Standardized-tag = Ldh-str
; Standardized-tag MUST be specified in a
; Standards-Track RFC and registered with IANA
dcontent = %d33-90 / ; Printable US-ASCII
%d94-126 ; excl. "[", BACKSLASH, "]"
Snum = 1*3DIGIT
; representing a decimal integer
; value in the range 0 through 255
IPv6-addr = IPv6-full / IPv6-comp / IPv6v4-full / IPv6v4-comp
IPv6-hex = 1*4HEXDIG
IPv6-full = IPv6-hex 7(":" IPv6-hex)
IPv6-comp = [IPv6-hex *5(":" IPv6-hex)] "::"
[IPv6-hex *5(":" IPv6-hex)]
; The "::" represents at least 2 16-bit groups of
; zeros. No more than 6 groups in addition to the
; "::" may be present.
IPv6v4-full = IPv6-hex 5(":" IPv6-hex) ":" IPv4-address-literal
IPv6v4-comp = [IPv6-hex *3(":" IPv6-hex)] "::"
[IPv6-hex *3(":" IPv6-hex) ":"]
IPv4-address-literal
; The "::" represents at least 2 16-bit groups of
; zeros. No more than 4 groups in addition to the
; "::" and IPv4-address-literal may be present.
XXX(dinosaure): we use the [Ipaddr] parser about IPv4 and IPv6. Then, the input
should be a [general_address_literal]. However we decided to accept any input which
respect [dtext] as a `Literal (see [domain]).
*)letaddress_literal=ipv4_addr<|>ipv6_addr<|>general_address_literal(* From RFC 822
domain = sub-domain *("." sub-domain)
sub-domain = domain-ref / domain-literal
domain-ref = atom ; symbolic reference
6.2.1. DOMAINS
A name-domain is a set of registered (mail) names. A name-
domain specification resolves to a subordinate name-domain
specification or to a terminal domain-dependent string.
Hence, domain specification is extensible, permitting any
number of registration levels.
Name-domains model a global, logical, hierarchical addressing
scheme. The model is logical, in that an address specifica-
tion is related to name registration and is not necessarily
tied to transmission path. The model's hierarchy is a
directed graph, called an in-tree, such that there is a single
path from the root of the tree to any node in the hierarchy.
If more than one path actually exists, they are considered to
be different addresses.
The root node is common to all addresses; consequently, it is
not referenced. Its children constitute "top-level" name-
domains. Usually, a service has access to its own full domain
specification and to the names of all top-level name-domains.
The "top" of the domain addressing hierarchy -- a child of the
root -- is indicated by the right-most field, in a domain
specification. Its child is specified to the left, its child
to the left, and so on.
Some groups provide formal registration services; these con-
stitute name-domains that are independent logically of
specific machines. In addition, networks and machines impli-
citly compose name-domains, since their membership usually is
registered in name tables.
In the case of formal registration, an organization implements
a (distributed) data base which provides an address-to-route
mapping service for addresses of the form:
person@registry.organization
Note that "organization" is a logical entity, separate from
any particular communication network.
A mechanism for accessing "organization" is universally avail-
able. That mechanism, in turn, seeks an instantiation of the
registry; its location is not indicated in the address specif-
ication. It is assumed that the system which operates under
the name "organization" knows how to find a subordinate regis-
try. The registry will then use the "person" string to deter-
mine where to send the mail specification.
The latter, network-oriented case permits simple, direct,
attachment-related address specification, such as:
user@host.network
Once the network is accessed, it is expected that a message
will go directly to the host and that the host will resolve
the user name, placing the message in the user's mailbox.
6.2.2. ABBREVIATED DOMAIN SPECIFICATION
Since any number of levels is possible within the domain
hierarchy, specification of a fully qualified address can
become inconvenient. This standard permits abbreviated domain
specification, in a special case:
For the address of the sender, call the left-most
sub-domain Level N. In a header address, if all of
the sub-domains above (i.e., to the right of) Level N
are the same as those of the sender, then they do not
have to appear in the specification. Otherwise, the
address must be fully qualified.
This feature is subject to approval by local sub-
domains. Individual sub-domains may require their
member systems, which originate mail, to provide full
domain specification only. When permitted, abbrevia-
tions may be present only while the message stays
within the sub-domain of the sender.
Use of this mechanism requires the sender's sub-domain
to reserve the names of all top-level domains, so that
full specifications can be distinguished from abbrevi-
ated specifications.
For example, if a sender's address is:
sender@registry-A.registry-1.organization-X
and one recipient's address is:
recipient@registry-B.registry-1.organization-X
and another's is:
recipient@registry-C.registry-2.organization-X
then ".registry-1.organization-X" need not be specified in the
the message, but "registry-C.registry-2" DOES have to be
specified. That is, the first two addresses may be abbrevi-
ated, but the third address must be fully specified.
When a message crosses a domain boundary, all addresses must
be specified in the full format, ending with the top-level
name-domain in the right-most field. It is the responsibility
of mail forwarding services to ensure that addresses conform
with this requirement. In the case of abbreviated addresses,
the relaying service must make the necessary expansions. It
should be noted that it often is difficult for such a service
to locate all occurrences of address abbreviations. For exam-
ple, it will not be possible to find such abbreviations within
the body of the message. The "Return-Path" field can aid
recipients in recovering from these errors.
Note: When passing any portion of an addr-spec onto a process
which does not interpret data according to this stan-
dard (e.g., mail protocol servers). There must be NO
LWSP-chars preceding or following the at-sign or any
delimiting period ("."), such as shown in the above
examples, and only ONE SPACE between contiguous
<word>s.
6.2.3. DOMAIN TERMS
A domain-ref must be THE official name of a registry, network,
or host. It is a symbolic reference, within a name sub-
domain. At times, it is necessary to bypass standard mechan-
isms for resolving such references, using more primitive
information, such as a network host address rather than its
associated host name.
To permit such references, this standard provides the domain-
literal construct. Its contents must conform with the needs
of the sub-domain in which it is interpreted.
Domain-literals which refer to domains within the ARPA Inter-
net specify 32-bit Internet addresses, in four 8-bit fields
noted in decimal, as described in Request for Comments #820,
"Assigned Numbers." For example:
[10.0.3.19]
Note: THE USE OF DOMAIN-LITERALS IS STRONGLY DISCOURAGED. It
is permitted only as a means of bypassing temporary
system limitations, such as name tables which are not
complete.
The names of "top-level" domains, and the names of domains
under in the ARPA Internet, are registered with the Network
Information Center, SRI International, Menlo Park, California.
From RFC 2822
domain = dot-atom / domain-literal / obs-domain
obs-domain = atom *("." atom)
Update:
+ CFWS within local-parts and domains not allowed.*
From RFC 5322
domain = dot-atom / domain-literal / obs-domain
obs-domain = atom *("." atom)
XXX(dinosaure): from the RFC 5322, we should accept any domain as
[`Literal] and let the user to resolve it. Currently, we fail when
we catch a [`Literal] and do the best effort where we follow
RFC 5321. But may be it's inconvenient (or not?) to fail. TODO!
*)letdomain=letof_string~errorps=matchparse_string~consume:Allpswith|Okv->returnv|Error_->failerrorinlet_literals=return(`Literals)inletaddrs=of_string~error:"address-literal"address_literals>>|funaddr->`Addraddrinobs_domain>>|(fundomain->`Domaindomain)<|>(domain_literal>>=funs->addrs)<|>(dot_atom>>|fundomain->`Domaindomain)(* From RFC 2822
obs-id-left = local-part
no-fold-quote = DQUOTE *(qtext / quoted-pair) DQUOTE
id-left = dot-atom-text / no-fold-quote / obs-id-left
From RFC 5322
id-left = dot-atom-text / obs-id-left
obs-id-left = local-part
XXX(dinosaure): we took the RFC 5322's ABNF, the no-fold-quote token
is available on the local-part as quoted-string.
*)letid_left=local_part<|>(dot_atom_text>>|List.map(funx->`Atomx))(* From RFC 2822
no-fold-literal = "[" *(dtext / quoted-pair) "]"
From RFC 5322
no-fold-literal = "[" *dtext "]"
Dependence
no-fold-literal <- id-right
id-right <- e-mail
*)letno_fold_literal=char'['*>with_uutfis_dtext(* TODO: replace take_while and handle unicode. *)<*char']'(* From RFC 2822
id-right = dot-atom-text / no-fold-literal / obs-id-right
obs-id-right = domain
From RFC 5322
id-right = dot-atom-text / no-fold-literal / obs-id-right
obs-id-right = domain
*)letid_right=no_fold_literal>>|(funliteral->`Literalliteral)<|>domain<|>(dot_atom_text>>|fundomain->`Domaindomain)(* From RFC 822
addr-spec = local-part "@" domain ; global address
msg-id = "<" addr-spec ">" ; Unique message id
From RFC 2822
msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS]
Update:
+ CFWS within msg-id not allowed.*
The message identifier (msg-id) is similar in syntax to an angle-addr
construct without the internal CFWS.
From RFC 5322
msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS]
Update:
+ Removed no-fold-quote from msg-id. Clarified syntax
The message identifier (msg-id) itself MUST be a globally unique
identifier for a message. The generator of the message identifier
MUST guarantee that the msg-id is unique. There are several
algorithms that can be used to accomplish this. Since the msg-id has
a similar syntax to addr-spec (identical except that quoted strings,
comments, and folding white space are not allowed), a good method is
to put the domain name (or a domain literal IP address) of the host
on which the message identifier was created on the right-hand side of
the "@" (since domain names and IP addresses are normally unique),
and put a combination of the current absolute date and time along
with some other currently unique (perhaps sequential) identifier
available on the system (for example, a process id number) on the
left-hand side. Though other algorithms will work, it is RECOMMENDED
that the right-hand side contain some domain identifier (either of
the host itself or otherwise) such that the generator of the message
identifier can guarantee the uniqueness of the left-hand side within
the scope of that domain.
Semantically, the angle bracket characters are not part of the
msg-id; the msg-id is what is contained between the two angle bracket
characters.
Dependence
msg-id __is not__ used by mailbox
*)letmsg_id=option()cfws*>lift2(funxy->x,y)(char'<'*>id_left)(char'@'*>id_right<*char'>')<*option()cfwsletfilter_mappredicatelst=List.fold_right(funxa->matchpredicatexwithSomex->x::a|None->a)lst[](* From RFC 822
addr-spec = local-part "@" domain ; global address
From RFC 2822
An addr-spec is a specific Internet identifier that contains a
locally interpreted string followed by the at-sign character ("@",
ASCII value 64) followed by an Internet domain. The locally
interpreted string is either a quoted-string or a dot-atom. If the
string can be represented as a dot-atom (that is, it contains no
characters other than atext characters or "." surrounded by atext
characters), then the dot-atom form SHOULD be used and the
quoted-string form SHOULD NOT be used. Comments and folding white
space SHOULD NOT be used around the "@" in the addr-spec.
addr-spec = local-part "@" domain
From RFC 5322
Note: A liberal syntax for the domain portion of addr-spec is
given here. However, the domain portion contains addressing
information specified by and used in other protocols (e.g.,
[RFC1034], [RFC1035], [RFC1123], [RFC5321]). It is therefore
incumbent upon implementations to conform to the syntax of
addresses for the context in which they are used.
addr-spec = local-part "@" domain
Dependence
addr-spec <- mailbox
XXX(dinosaure): about domain, we follow RFC 5321.
*)letaddr_spec=lift2(funlocald->{name=None;local;domain=d,[]})(local_part<?>"local-part")(char'@'>>=fun_->(domain<?>"domain"))letaddr_spec=addr_spec<?>"addr-spec"letobs_domain_list=many(cfws<|>char','*>return())*>char'@'*>domain>>=funfirst->many(char','*>option()cfws*>optionNone(char'@'*>domain>>|funx->Somex))>>|filter_map(funx->x)>>|funrest->first::restletobs_route=obs_domain_list<*char':'letobs_angle_addr=option()cfws*>char'<'*>obs_route>>=fundomains->addr_spec>>=(function|{domain=_,[];_}asaddr->return{addrwithdomain=fstaddr.domain,domains}|_->fail"Invalid addr-spec")<*char'>'<*option()cfws(* From RFC 822
The ABNF of angle-addr is not explicit from RFC 822 but the relic could be find here,
as a part of mailbox:
mailbox = addr-spec ; simple address
/ phrase route-addr ; name & addr-spec
From RFC 2822
obs-domain-list = "@" domain *( *(CFWS / "," ) [CFWS] "@" domain)
obs-route = [CFWS] obs-domain-list ":" [CFWS]
obs-angle-addr = [CFWS] "<" [obs-route] addr-spec ">" [CFWS]
angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr
From RFC 5322
obs-domain-list = *(CFWS / ",") "@" domain
*("," [CFWS] ["@" domain])
obs-route = obs-domain-list ":"
obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS]
angle-addr = [CFWS] "<" addr-spec ">" [CFWS] /
Dependence
angle-addr <- name-addr
name-addr <- mailbox
*)letangle_addr=option()cfws*>char'<'*>addr_spec<*char'>'<*option()cfws<|>obs_angle_addr(* From RFC 822
phrase = 1*word ; Sequence of words
From RFC 2047
(3) As a replacement for a 'word' entity within a 'phrase', for example,
one that precedes an address in a From, To, or Cc header. The ABNF
definition for 'phrase' from RFC 822 thus becomes:
phrase = 1*( encoded-word / word )
In this case the set of characters that may be used in a "Q"-encoded
'encoded-word' is restricted to: <upper and lower case ASCII
letters, decimal digits, "!", "*", "+", "-", "/", "=", and "_"
(underscore, ASCII 95.)>. An 'encoded-word' that appears within a
'phrase' MUST be separated from any adjacent 'word', 'text' or
'special' by 'linear-white-space'.
encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
charset = token ; see section 3
encoding = token ; see section 4
token = 1*<Any CHAR except SPACE, CTLs, and especials>
especials = "(" / ")" / "<" / ">" / "@" / "," / ";" / ":" / "
<"> / "/" / "[" / "]" / "?" / "." / "="
encoded-text = 1*<Any printable ASCII character other than "?"
or SPACE>
; (but see "Use of encoded-words in message
; headers", section 5)
From RFC 2822
obs-phrase = word *(word / "." / CFWS)
phrase = 1*word / obs-phrase
Update:
+ Period allowed in obsolete form of phrase.
From RFC 5322
phrase = 1*word / obs-phrase
Note: The "period" (or "full stop") character (".") in obs-phrase
is not a form that was allowed in earlier versions of this or any
other specification. Period (nor any other character from
specials) was not allowed in phrase because it introduced a
parsing difficulty distinguishing between phrases and portions of
an addr-spec (see section 4.4). It appears here because the
period character is currently used in many messages in the
display-name portion of addresses, especially for initials in
names, and therefore must be interpreted properly.
obs-phrase = word *(word / "." / CFWS)
Dependence
phrase <- display-name
display-name <- name-addr
name-addr <- mailbox
*)letis_especials=function|'('|')'|'<'|'>'|'@'|','|';'|':'|'"'|'/'|'['|']'|'?'|'.'|'='->true|_->falseletis_ctl=function'\000'..'\031'->true|_->falseletis_space=(=)' 'lettoken=take_while1(funchr->not(is_especialschr||is_ctlchr||is_spacechr))letis_b64=function|'A'..'Z'|'a'..'z'|'0'..'9'|'+'|'/'->true|_->falseletbase64=take_till((=)'?')>>|funx->Base64.decodexletis_hex=function|'0'..'9'|'a'..'f'|'A'..'F'->true|_->falselethexab=letauxcode=matchcodewith|'0'..'9'->Char.codecode-Char.code'0'+0|'a'..'f'->Char.codecode-Char.code'a'+10|'A'..'F'->Char.codecode-Char.code'A'+10|_->assertfalseinChar.chr((auxa*16)+auxb)lethex=char'='*>satisfyis_hex>>=funa->satisfyis_hex>>=funb->return(hexab)letquoted_printable=take_till((=)'?')>>|funs->letdecoder=Pecu.Inline.decoder(`Strings)inletresult=Buffer.create16inletrecgo()=matchPecu.Inline.decodedecoderwith|`Await->assertfalse(* XXX(dinosaure): impossible case with [src = `String _] *)|`Charchr->Buffer.add_charresultchr;go()|`End->Ok(Buffer.contentsresult)|`Malformederr->Error(`Msgerr)ingo()letencoded_word=string"=?"*>token>>=funcharset->char'?'*>satisfy(function'Q'|'B'->true|_->false)>>=(function'Q'->return`Q|'B'->return`B|_->assertfalse)>>=funencoding->char'?'(* XXX(dinosaure): in this part, we allocate in both cases a buffer, it
could be interesting to find an other way to return decoded content
(Base64 or Quoted-Printable). *)>>=fun_->(matchencodingwith|`B->base64>>|funv->Base64v|`Q->quoted_printable>>|funv->Quoted_printablev)>>=fundecoded->string"?="*>return(charset,decoded)(* XXX(dinosaure): I did not find mention of CFWS token which surrounds
encoded-word. However, this code come from Mr. MIME which passes all tests.
So, I decide to let CFWS token but we need to understand why. TODO! *)letextended_word=option()cfws*>(encoded_word>>|funx->`Encodedx)<*option()cfws<|>(word>>|funx->`Wordx)letobs_phrase=extended_word>>=funfirst->fix(funm->lift2(function|(`Dot|`Word_|`Encoded_)asx->funr->x::r|`CFWS->funr->r)(extended_word<|>(char'.'>>|fun_->`Dot)<|>(cfws>>|fun()->`CFWS))m<|>return[])>>|funrest->first::restletphrase=obs_phrase<|>many1extended_word(* From RFC 822
The ABNF of name-addr is not explicit from RFC 822 but the relic could be find here:
mailbox = addr-spec ; simple address
/ phrase route-addr ; name & addr-spec
From RFC 2822
display-name = phrase
name-addr = [display-name] angle-addr
Note: Some legacy implementations used the simple form where the
addr-spec appears without the angle brackets, but included the name
of the recipient in parentheses as a comment following the addr-spec.
Since the meaning of the information in a comment is unspecified,
implementations SHOULD use the full name-addr form of the mailbox,
instead of the legacy form, to specify the display name associated
with a mailbox. Also, because some legacy implementations interpret
the comment, comments generally SHOULD NOT be used in address fields
to avoid confusing such implementations.
From RFC 5322
name-addr = [display-name] angle-addr
display-name = phrase
Dependence
name-addr <- mailbox
*)letdisplay_name=phraseletname_addr=optionNone(display_name>>|funx->Somex)>>=funname->angle_addr>>|funaddr->{addrwithname}letname_addr=name_addr<?>"name-addr"(* Last (but not least).
Discard RFC 720.
Discard RFC 724.
Discard RFC 733.
From RFC 822
mailbox = addr-spec ; simple address
/ phrase route-addr ; name & addr-spec
From RFC 2822
mailbox = name-addr / addr-spec
From RFC 5322
mailbox = name-addr / addr-spec
*)letmailbox=name_addr<|>addr_spec<?>"mailbox"letobs_mbox_list=letrest=fix(funm->lift2(function`Mailboxx->funr->x::r|`Sep->funr->r)(char','*>option`Sep(mailbox>>|(funm->`Mailboxm)<|>(cfws>>|fun()->`Sep)))m<|>return[])inmany(option()cfws*>char',')*>mailbox>>=funx->rest>>|funr->x::rletobs_group_list=many1(option()cfws*>char',')*>option()cfwsletmailbox_list=obs_mbox_list<|>(mailbox>>=funx->many(char','*>mailbox)>>|funr->x::r)letgroup_list=mailbox_list<|>(obs_group_list>>|fun()->[])<|>(cfws>>|fun()->[])letgroup=display_name>>=fungroup->char':'*>(option[]group_list<?>"group-list")>>=funmailboxes->char';'*>option()cfws>>|fun_->{group;mailboxes}letaddress=group>>|(fung->`Groupg)<|>(mailbox>>|funm->`Mailboxm)letobs_addr_list=letrest=fix@@funm->lift2(function`Addrx->funr->x::r|`Sep->funr->r)(char','*>option`Sep(address>>|(funa->`Addra)<|>(cfws>>|fun()->`Sep)))m<|>return[]inmany(option()cfws*>char',')*>address>>=funx->rest>>|funr->x::rletaddress_list=obs_addr_list<?>"obs-addr-list"<|>(address>>=(funx->many(char','*>address)>>|funr->x::r)<?>"regular-address-list")endtypeerror=[`Invalidofstring*string]letpp_errorppf=function|`Invalid(committed,rest)->Fmt.pfppf"Invalid email address: %s%s"committedrestletof_stringparsersrctmpoffmax=letopenAngstrom.Unbufferedinletreck1len=function|Done(committed,v)->Ok(committed,v)|Partial{continue;committed;}->k1(len-committed)(continuetmp~off:committed~len:(len-committed)Complete)|Fail(committed,_,_)->letcommitted,rest=String.subsrc0committed,String.subsrccommitted(String.lengthsrc-committed)inError(`Invalid(committed,rest))andk0poscur=function|Done(committed,v)->Ok(committed,v)|Fail(committed,_,_)->letcommitted,rest=String.subsrc0committed,String.subsrccommitted(String.lengthsrc-committed)inError(`Invalid(committed,rest))|Partial{continue;committed;}->letlen=min(Bigstringaf.lengthtmp-committed)(max-pos)inBigstringaf.blittmp~src_off:committedtmp~dst_off:0~len:cur;Bigstringaf.blit_from_stringsrc~src_off:offtmp~dst_off:cur~len;matchmax-(pos+len)with|0->k1(cur+len)(continuetmp~off:0~len:(cur+len)Complete)|_->k0(pos+len)(cur+len)(continuetmp~off:0~len:(cur+len)Incomplete)ink000(Angstrom.Unbuffered.parseparser)letof_string_with_crlfparsersrctmpoffmax=letparser=letopenAngstrominparser<*char'\r'<*char'\n'<*commitinof_stringparsersrctmpoffmaxletwith_tmpkparsersrcofflen=lettmp=Bigstringaf.createleninkparsersrctmpofflenletwith_off_and_lenkparsersrc=letlen=String.lengthsrcinwith_tmpkparsersrc0lenletrr_mapf=functionOkv->Ok(fv)|Error_aserr->errlet(>|=)af=rr_mapfamoduleList=structletof_string_raw~off~len?(tmp=Bigstringaf.createlen)src=of_stringParser.address_listsrctmpofflenletof_string_with_crlfsrc=with_off_and_lenof_string_with_crlfParser.address_listsrc>|=sndletof_stringsrc=with_off_and_lenof_stringParser.address_listsrc>|=sndletto_stringlst=Format.asprintf"%a"str_addresseslstendletaddress_of_string_with_crlfsrc=with_off_and_lenof_string_with_crlfParser.addr_specsrc>|=fun(_,{local;domain;_})->local,domainletaddress_of_stringsrc=with_off_and_lenof_stringParser.addr_specsrc>|=fun(_,{local;domain;_})->local,domainletaddress_of_string_raw~off~len?(tmp=Bigstringaf.createlen)src=of_stringParser.addr_specsrctmpofflen>|=fun(committed,{local;domain;_})->(committed,(local,domain))letset_of_string_raw~off~len?(tmp=Bigstringaf.createlen)src=of_stringParser.addresssrctmpofflenletset_of_string_with_crlfsrc=with_off_and_lenof_string_with_crlfParser.addresssrc>|=sndletset_of_stringsrc=with_off_and_lenof_stringParser.addresssrc>|=sndletof_string_raw~off~len?(tmp=Bigstringaf.createlen)src=of_stringParser.mailboxsrctmpofflenletof_string_with_crlfsrc=with_off_and_lenof_string_with_crlfParser.mailboxsrc>|=sndletof_stringsrc=with_off_and_lenof_stringParser.mailboxsrc>|=sndletto_stringmailbox=Format.asprintf"%a"str_mailboxmailboxletset_to_string=function|`Mailboxm->Format.asprintf"%a"str_mailboxm|`Groupg->Format.asprintf"%a"str_groupgletaddress_to_stringaddress=Format.asprintf"%a"str_addressaddress