123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105openCoreopenAngstrommoduleLet_syntax=structletbindt~f=t>>=fletmapt~f=t>>|fletbothab=lift2Tuple2.createabendletws=take_while1Char.is_whitespaceletcharset=choice(* The following might not be an exhaustive list. We can add to this as we encounter
more cases. *)[string_ci"US-ASCII">>|const`Ascii;string_ci"UTF-8">>|const`Utf8;string_ci"ISO-8859-1">>|const`Latin1;string_ci"ISO-8859-2">>|const`Latin2;string_ci"GB2312">>|const`GB2312;string_ci"WINDOWS-1252">>|const`Windows1252];;letencoding:[`Base64|`Quoted_printable]Angstrom.t=choice[string_ci"B">>|const`Base64;string_ci"Q">>|const`Quoted_printable];;letparser_:stringAngstrom.t=let%bind()=string"=?">>|ignoreandcharset=charsetand()=string"?">>|ignoreandencoding=encodingand()=string"?">>|ignoreanddata=take_while(function|'?'->false|c->(not(Char.is_whitespacec))&&Char.is_printc)and()=string"?=">>|ignoreinlet%binddata=matchencodingwith|`Quoted_printable->(* RFC2047 deviates slightly from common quoted printable.
In particular
4.2(2) - Underscore may be used to encode space, and
4.2(3)- underscore must be encoded.
This substituion handles that decoding step. *)letdata=String.substr_replace_alldata~pattern:"_"~with_:" "inletdata_bstr,_=Quoted_printable_lexer.decode_quoted_printable(String.lengthdata)(Lexing.from_stringdata)inreturn(Bigbuffer.contentsdata_bstr)|`Base64->(matchBase64.decodedatawith|Okdata->returndata|Error(`Msgmsg)->failmsg)inmatchcharsetwith|`Ascii|`Utf8|`Latin1|`Latin2|`GB2312|`Windows1252->returndata;;letparser_many:stringAngstrom.t=many(choice[(let%maphd=parser_andtl=(* RFC2047 6.2 When displaying a particular header field that contains
multiple 'encoded-word's, any 'linear-white-space' that separates a
pair of adjacent 'encoded-word's is ignored. *)many(let%bind(_:string)=option""wsinparser_)inhd::tl);(let%mapc=choice[take_while1(function|'='->false|c->not(Char.is_whitespacec));string"="(* Collapse Line breaks as per
RFC822 - 3.1.1 Unfolding is accomplished by regarding CRLF immediately
followed by an LWSP-char as equivalent to the LWSP-char.
RFC822 - 3.1.3 Rules of (un)folding apply to these (unstructured) fields *);(let%bind(_:string)=choice[string"\r\n";string"\n"]inws)(* The RFC is ambiguous on what should happen if there is a lone CRLF, so we
ignore those, and treat these as regular white space. The RFC is also
ambiguous on how to treat multiple consecutive whitespaces, so we do the
conservative thing and leave them exactly as is. *);ws]in[c])])>>|List.concat>>|String.concat~sep:"";;letdecodestr=Angstrom.parse_string~consume:Prefixparser_manystr|>Result.map_error~f:Error.of_string;;