123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753letio_buffer_size=65536letinvalid_argfmt=Format.ksprintf(funs->invalid_args)fmtletinvalid_encode()=invalid_arg"Expected `Await encode"letinvalid_boundsofflen=invalid_arg"Invalid bounds (off: %d, len: %d)"offlenletmalformedsourceoffposlen=`Malformed(Bytes.sub_stringsource(off+pos)len)letunsafe_bytesourceoffpos=Bytes.unsafe_getsource(off+pos)letunsafe_blit=Bytes.unsafe_blitletunsafe_chr=Char.unsafe_chrletunsafe_set_chrsourceoffchr=Bytes.unsafe_setsourceoffchr(* Base character decoders. They assume enough data. *)letr_reprsourceofflen=(* assert (0 <= j && 0 <= l && j + l <= String.length s); *)(* assert (l = 3); *)leta=unsafe_bytesourceoff1inletb=unsafe_bytesourceoff2inletof_hex=function|'0'..'9'aschr->Char.codechr-Char.code'0'|'A'..'F'aschr->Char.codechr-Char.code'A'+10|_->assertfalsein(* (General 8bit representation) Any octet, except a CR or LF that is part of
a CRLF line break of the canonical (standard) form of the data being
encoded, may be represented by an "=" followed by a two digit hexadecimal
representation of the octet's value. The digits of the hexadecimal
alphabet, for this purpose, are "0123456789ABCDEF". Uppercase letters must
be used; lowercase letters are not allowed. Thus, for example, the decimal
value 12 (US-ASCII form feed) can be represented by "=0C", and the decimal
value 61 (US- ASCII EQUAL SIGN) can be represented by "=3D". This rule
must be followed except when the following rules allow an alternative
encoding.
See RFC2045 § 6.7. *)match(unsafe_bytesourceoff0,a,b)with|'=',('0'..'9'|'A'..'F'),('0'..'9'|'A'..'F')->`Repr((of_hexa*16)+of_hexb)|'=','\r','\n'->`Soft_line_break|_e,_a,_b->malformedsourceoff0lenletr_chrchr=`Chrchrletr_wspwsp=`Wspwspletr_line_breaksourceofflen=(* assert (0 <= j && 0 <= l && j + l <= String.length s); *)(* assert (l = 2); *)matchBytes.sub_stringsourceofflenwith|"\r\n"->`Line_break|_->malformedsourceoff0lentypesrc=[`Channelofin_channel|`Stringofstring|`Manual]typedecode=[`Await|`End|`Malformedofstring|`Lineofstring|`Dataofstring]typeinput=[`Malformedofstring|`Soft_line_break|`Line_break|`Wspofchar|`Reprofint|`Chrofchar|`End](* [quoted-printable] has two kind to break a line but only one is relevant:
[`Line_break]. [`Soft_line_break] must be used if longer lines are to be
encoded with the quoted-printable encoding.
This provides a mechanism with which long lines are encoded in such a way as
to be restored by the user agent. The 76 character limit does not count the
trailing CRLF, but counts all other characters, including any equal signs.
[`Wsp] must not be represented at the end of the encoded line. We keep a
different buffer to store them and decide if they are followed by a
printable character (like "="), we decoded them as printable whitespaces.
[`Repr] is a decoded 8 bits value.
[`Chr] is only a printable character. *)typedecoder={src:src;mutablei:Bytes.t;mutablei_off:int;mutablei_pos:int;mutablei_len:int;t:Buffer.t;w:Buffer.t;h:Bytes.t;mutableh_len:int;mutableh_need:int;mutableunsafe:bool;mutablebyte_count:int;mutablelimit_count:int;mutablepp:decoder->input->decode;mutablek:decoder->decode}(* On decodes that overlap two (or more) [d.i] buffers, we use [t_fill] to copy
the input data to [d.t] and decode from there. If the [d.i] buffers are not
too small this is faster than continuation based byte per byte writes.
End of input is sgnaled by [d.i_pos = 0] and [d.i_len = min_int] which
implies that [i_rem d < 0] is [true]. *)leti_remdecoder=decoder.i_len-decoder.i_pos+1letend_of_inputdecoder=decoder.i<-Bytes.empty;decoder.i_off<-0;decoder.i_pos<-0;decoder.i_len<-min_intletsrcdecodersourceofflen=ifoff<0||len<0||off+len>Bytes.lengthsourcetheninvalid_boundsofflenelseiflen=0thenend_of_inputdecoderelse(decoder.i<-source;decoder.i_off<-off;decoder.i_pos<-0;decoder.i_len<-len-1)letrefillkdecoder=matchdecoder.srcwith|`Manual->decoder.k<-k;`Await|`String_->end_of_inputdecoder;kdecoder|`Channelic->letlen=inputicdecoder.i0(Bytes.lengthdecoder.i)insrcdecoderdecoder.i0len;kdecoderletdangerousdecoderv=decoder.unsafe<-vletresetdecoder=decoder.limit_count<-0letretkvbyte_countdecoder=decoder.k<-k;decoder.byte_count<-decoder.byte_count+byte_count;decoder.limit_count<-decoder.limit_count+byte_count;ifdecoder.limit_count>78thendangerousdecodertrue;decoder.ppdecodervletmalformed_linesourceofflendecoder=Buffer.add_bufferdecoder.tdecoder.w;Buffer.add_subbytesdecoder.tsourceofflen;letline=Buffer.contentsdecoder.tinBuffer.cleardecoder.w;Buffer.cleardecoder.t;`Malformedlinelett_needdecoderneed=decoder.h_len<-0;decoder.h_need<-needletrect_fillkdecoder=letblitdecoderlen=unsafe_blitdecoder.i(decoder.i_off+decoder.i_pos)decoder.hdecoder.h_lenlen;decoder.i_pos<-decoder.i_pos+len;decoder.h_len<-decoder.h_len+leninletrem=i_remdecoderinifrem<0(* end of input *)thenkdecoderelseletneed=decoder.h_need-decoder.h_leninifrem<needthen(blitdecoderrem;refill(t_fillk)decoder)else(blitdecoderneed;kdecoder)letrect_decode_quoted_printabledecoder=ifdecoder.h_len<decoder.h_needthenretdecode_quoted_printable(malformed_linedecoder.h0decoder.h_lendecoder)decoder.h_lendecoderelseretdecode_quoted_printable(r_reprdecoder.h0decoder.h_len)decoder.h_lendecoderandt_decode_line_breakdecoder=ifdecoder.h_len<decoder.h_needthenretdecode_quoted_printable(malformed_linedecoder.h0decoder.h_lendecoder)decoder.h_lendecoderelseretdecode_quoted_printable(r_line_breakdecoder.h0decoder.h_len)decoder.h_lendecoderanddecode_quoted_printabledecoder=letrem=i_remdecoderinifrem<=0thenifrem<0thenret(fun_decoder->`End)`End0decoderelserefilldecode_quoted_printabledecoderelsematchunsafe_bytedecoder.idecoder.i_offdecoder.i_poswith|('\009'|'\032')aswsp->(* HT | SPACE *)decoder.i_pos<-decoder.i_pos+1;retdecode_quoted_printable(r_wspwsp)1decoder|'\013'->(* CR *)(* TODO: optimize it! *)t_needdecoder2;t_fillt_decode_line_breakdecoder|'='->(* TODO: optimize it! *)t_needdecoder3;t_fillt_decode_quoted_printabledecoder|('\033'..'\060'|'\062'..'\126')aschr->Buffer.add_bufferdecoder.tdecoder.w;Buffer.cleardecoder.w;decoder.i_pos<-decoder.i_pos+1;retdecode_quoted_printable(r_chrchr)1decoder|_->(* XXX(dinosaure): If characters other than HT, CR, LF or octets with
decimal values greater than 126 found in incoming quoted-printable
data by a decoder, a robust implementation might exclude them from
the decoded data and warn the user that illegal characters were
discovered. See RFC2045 § 6.7. *)letj=decoder.i_posindecoder.i_pos<-decoder.i_pos+1;retdecode_quoted_printable(malformeddecoder.idecoder.i_offj1)1decoderletf_fill_bytebytedecoder=Buffer.add_chardecoder.t(unsafe_chrbyte);decoder.kdecoderletf_fill_chrchrdecoder=Buffer.add_chardecoder.tchr;decoder.kdecoderletpp_quoted_printabledecoder=function|`Soft_line_break->Buffer.add_bufferdecoder.tdecoder.w;letdata=Buffer.contentsdecoder.tinBuffer.cleardecoder.w;Buffer.cleardecoder.t;resetdecoder;`Datadata|`Line_break->letline=Buffer.contentsdecoder.tinBuffer.cleardecoder.w;Buffer.cleardecoder.t;resetdecoder;`Lineline|`End->Buffer.add_bufferdecoder.tdecoder.w;letdata=Buffer.contentsdecoder.tinBuffer.cleardecoder.w;Buffer.cleardecoder.t;`Datadata|`Wspwsp->Buffer.add_chardecoder.wwsp;decoder.kdecoder|`Reprbyte->f_fill_bytebytedecoder|`Chrchr->f_fill_chrchrdecoder|`Malformed_asv->vletdecodersrc=letpp=pp_quoted_printableinletk=decode_quoted_printableinleti,i_off,i_pos,i_len=matchsrcwith|`Manual->(Bytes.empty,0,1,0)|`Channel_->(Bytes.createio_buffer_size,0,1,0)|`Strings->(Bytes.unsafe_of_strings,0,0,String.lengths-1)in{src;i_off;i_pos;i_len;i;t=Buffer.create80;w=Buffer.create80;h=Bytes.create3;h_need=0;h_len=0;unsafe=false;limit_count=0;byte_count=0;pp;k}letdecodedecoder=decoder.kdecoderletdecoder_byte_countdecoder=decoder.byte_countletdecoder_srcdecoder=decoder.srcletdecoder_dangerousdecoder=decoder.unsafemoduleInline=struct(* XXX(dinosaure): I want structural typing and row polymophism on record,
please. *)typeunsafe_char=chartypedecode=[`Await|`End|`Malformedofstring|`Charofunsafe_char]typeinput=[`Malformedofstring|`Wsp|`Chrofchar|`Reprofint|`End]letr_reprsourceofflen=(* assert (0 <= j && 0 <= l && j + l <= String.length s); *)(* assert (l = 3); *)leta=unsafe_bytesourceoff1inletb=unsafe_bytesourceoff2inletof_hex=function|'0'..'9'aschr->Char.codechr-Char.code'0'|'A'..'F'aschr->Char.codechr-Char.code'A'+10|'a'..'f'aschr->Char.codechr-Char.code'a'+10(* RFC 2047 says: uppercase SHOULD be used for hexadecimal digits. *)|_->assertfalseinmatch(unsafe_bytesourceoff0,a,b)with|'=',('0'..'9'|'A'..'F'|'a'..'f'),('0'..'9'|'A'..'F'|'a'..'f')->`Repr((of_hexa*16)+of_hexb)|_e,_a,_b->malformedsourceoff0lenletr_wsp=`Wsptypedecoder={src:src;mutablei:Bytes.t;mutablei_off:int;mutablei_pos:int;mutablei_len:int;h:Bytes.t;mutableh_len:int;mutableh_need:int;mutablebyte_count:int;mutablepp:decoder->input->decode;mutablek:decoder->decode}leti_remdecoder=decoder.i_len-decoder.i_pos+1letend_of_inputdecoder=decoder.i<-Bytes.empty;decoder.i_off<-0;decoder.i_pos<-0;decoder.i_len<-min_intletsrcdecodersourceofflen=ifoff<0||len<0||off+len>Bytes.lengthsourcetheninvalid_boundsofflenelseiflen=0thenend_of_inputdecoderelse(decoder.i<-source;decoder.i_off<-off;decoder.i_pos<-0;decoder.i_len<-len-1)letrefillkdecoder=matchdecoder.srcwith|`Manual->decoder.k<-k;`Await|`String_->end_of_inputdecoder;kdecoder|`Channelic->letlen=inputicdecoder.i0(Bytes.lengthdecoder.i)insrcdecoderdecoder.i0len;kdecoderletretkvbyte_countdecoder=decoder.k<-k;decoder.byte_count<-decoder.byte_count+byte_count;decoder.ppdecodervlett_needdecoderneed=decoder.h_len<-0;decoder.h_need<-needletrect_fillkdecoder=letblitdecoderlen=unsafe_blitdecoder.i(decoder.i_off+decoder.i_pos)decoder.hdecoder.h_lenlen;decoder.i_pos<-decoder.i_pos+len;decoder.h_len<-decoder.h_len+leninletrem=i_remdecoderinifrem<0(* end of input *)thenkdecoderelseletneed=decoder.h_need-decoder.h_leninifrem<needthen(blitdecoderrem;refill(t_fillk)decoder)else(blitdecoderneed;kdecoder)letrect_decode_inline_quoted_printabledecoder=ifdecoder.h_len<decoder.h_needthenretdecode_inline_quoted_printable(malformeddecoder.h00decoder.h_len)decoder.h_lendecoder(* XXX(dinosaure): malformed line? *)elseretdecode_inline_quoted_printable(r_reprdecoder.h0decoder.h_len)decoder.h_lendecoderanddecode_inline_quoted_printabledecoder=letrem=i_remdecoderinifrem<=0thenifrem<0thenret(fun_decoder->`End)`End0decoderelserefilldecode_inline_quoted_printabledecoderelsematchunsafe_bytedecoder.idecoder.i_offdecoder.i_poswith|'_'->decoder.i_pos<-decoder.i_pos+1;retdecode_inline_quoted_printabler_wsp1decoder|'='->t_needdecoder3;t_fillt_decode_inline_quoted_printabledecoder|('\033'..'\060'|'\062'..'\126')aschr->decoder.i_pos<-decoder.i_pos+1;retdecode_inline_quoted_printable(r_chrchr)1decoder|_->letj=decoder.i_posindecoder.i_pos<-decoder.i_pos+1;retdecode_inline_quoted_printable(malformeddecoder.idecoder.i_offj1)1decoderletpp_inline_quoted_printable_decoder=function|`Wsp->`Char' '|`Chrchr->`Charchr|`Reprbyte->`Char(unsafe_chrbyte)|`End->`End|`Malformed_asv->vletdecodersrc=letpp=pp_inline_quoted_printableinletk=decode_inline_quoted_printableinleti,i_off,i_pos,i_len=matchsrcwith|`Manual->(Bytes.empty,0,1,0)|`Channel_->(Bytes.createio_buffer_size,0,1,0)|`Strings->(Bytes.unsafe_of_strings,0,0,String.lengths-1)in{src;i_off;i_pos;i_len;i;h=Bytes.create3;h_need=0;h_len=0;byte_count=0;pp;k}letdecodedecoder=decoder.kdecoderletdecoder_byte_countdecoder=decoder.byte_countletdecoder_srcdecoder=decoder.srctypedst=[`Channelofout_channel|`BufferofBuffer.t|`Manual]typeencode=[`Await|`End|`Charofunsafe_char]typeencoder={dst:dst;mutableo:Bytes.t;mutableo_off:int;mutableo_pos:int;mutableo_len:int;t:Bytes.t;mutablet_pos:int;mutablet_len:int;mutablek:encoder->encode->[`Ok|`Partial]}leto_remencoder=encoder.o_len-encoder.o_pos+1letdstencodersourceofflen=ifoff<0||len<0||off+len>Bytes.lengthsourcetheninvalid_boundsofflen;encoder.o<-source;encoder.o_off<-off;encoder.o_pos<-0;encoder.o_len<-len-1letdst_rem=o_remletpartialkencoder=function|`Await->kencoder|`Char_|`End->invalid_encode()letflushkencoder=matchencoder.dstwith|`Manual->encoder.k<-partialk;`Partial|`Channeloc->outputocencoder.oencoder.o_offencoder.o_pos;encoder.o_pos<-0;kencoder|`Bufferb->leto=Bytes.unsafe_to_stringencoder.oinBuffer.add_substringboencoder.o_offencoder.o_pos;encoder.o_pos<-0;kencoderlett_rangeencoderlen=encoder.t_pos<-0;encoder.t_len<-lenletrect_flushkencoder=letblitencoderlen=unsafe_blitencoder.tencoder.t_posencoder.oencoder.o_poslen;encoder.o_pos<-encoder.o_pos+len;encoder.t_pos<-encoder.t_pos+leninletrem=o_remencoderinletlen=encoder.t_len-encoder.t_pos+1inifrem<lenthen(blitencoderrem;flush(t_flushk)encoder)else(blitencoderlen;kencoder)letto_hexcode=matchChar.unsafe_chrcodewith|'\000'..'\009'->Char.chr(Char.code'0'+code)|'\010'..'\015'->Char.chr(Char.code'A'+code-10)|_->assertfalseletrecencode_quoted_printableencoderv=letkencoder=encoder.k<-encode_quoted_printable;`Okinmatchvwith|`Await->kencoder|`End->flushkencoder|`Charchr->letrem=o_remencoderinifrem<1thenflush(funencoder->encode_quoted_printableencoderv)encoderelsematchchrwith|' '->unsafe_set_chrencoder.o(encoder.o_off+encoder.o_pos)'_';encoder.o_pos<-encoder.o_pos+1;kencoder|'\033'..'\060'|'\062'..'\126'->unsafe_set_chrencoder.o(encoder.o_off+encoder.o_pos)chr;encoder.o_pos<-encoder.o_pos+1;kencoder|unsafe_chr->lethi=to_hex(Char.codeunsafe_chr/16)inletlo=to_hex(Char.codeunsafe_chrmod16)inlets,j,k=ifrem<3then(t_rangeencoder3;(encoder.t,0,t_flushk))elseletj=encoder.o_posinencoder.o_pos<-encoder.o_pos+3;(encoder.o,encoder.o_off+j,k)inunsafe_set_chrsj'=';unsafe_set_chrs(j+1)hi;unsafe_set_chrs(j+2)lo;kencoderletencoderdst=leto,o_off,o_pos,o_len=matchdstwith|`Manual->(Bytes.empty,1,0,0)|`Buffer_|`Channel_->(Bytes.createio_buffer_size,0,0,io_buffer_size-1)in{dst;o_off;o_pos;o_len;o;t=Bytes.create3;t_pos=1;t_len=0;k=encode_quoted_printable}letencodeencoderv=encoder.kencodervletencoder_dstencoder=encoder.dstend(* Encode *)typeunsafe_char=chartypedst=[`Channelofout_channel|`BufferofBuffer.t|`Manual]typeencode=[`Await|`End|`Charofunsafe_char|`Line_break]typeencoder={dst:dst;mutableo:Bytes.t;mutableo_off:int;mutableo_pos:int;mutableo_len:int;t:Bytes.t;mutablet_pos:int;mutablet_len:int;mutablec_col:int;mutablek:encoder->encode->[`Ok|`Partial]}leto_remencoder=encoder.o_len-encoder.o_pos+1letdstencodersourceofflen=ifoff<0||len<0||off+len>Bytes.lengthsourcetheninvalid_boundsofflen;encoder.o<-source;encoder.o_off<-off;encoder.o_pos<-0;encoder.o_len<-len-1letdst_rem=o_remletpartialkencoder=function|`Await->kencoder|`Char_|`Line_break|`End->invalid_encode()letflushkencoder=matchencoder.dstwith|`Manual->encoder.k<-partialk;`Partial|`Channeloc->outputocencoder.oencoder.o_offencoder.o_pos;encoder.o_pos<-0;kencoder|`Bufferb->leto=Bytes.unsafe_to_stringencoder.oinBuffer.add_substringboencoder.o_offencoder.o_pos;encoder.o_pos<-0;kencoderlett_rangeencoderlen=encoder.t_pos<-0;encoder.t_len<-lenletrect_flushkencoder=letblitencoderlen=unsafe_blitencoder.tencoder.t_posencoder.oencoder.o_poslen;encoder.o_pos<-encoder.o_pos+len;encoder.t_pos<-encoder.t_pos+leninletrem=o_remencoderinletlen=encoder.t_len-encoder.t_pos+1inifrem<lenthen(blitencoderrem;flush(t_flushk)encoder)else(blitencoderlen;kencoder)letto_hexcode=matchChar.unsafe_chrcodewith|'\000'..'\009'->Char.chr(Char.code'0'+code)|'\010'..'\015'->Char.chr(Char.code'A'+code-10)|_->assertfalseletrecencode_quoted_printableencoderv=letkcol_countencoder=encoder.c_col<-encoder.c_col+col_count;encoder.k<-encode_quoted_printable;`Okinmatchvwith|`Await->k0encoder|`End->flush(k0)encoder|`Line_break->letrem=o_remencoderinlets,j,k=ifrem<2then(t_rangeencoder2;(encoder.t,0,t_flush(k2)))elseletj=encoder.o_posinencoder.o_pos<-encoder.o_pos+2;(encoder.o,encoder.o_off+j,k2)inunsafe_set_chrsj'\r';unsafe_set_chrs(j+1)'\n';encoder.c_col<-0;kencoder|`Charchr->(letrem=o_remencoderinifrem<1thenflush(funencoder->encode_quoted_printableencoderv)encoderelseifencoder.c_col=75thenencode_soft_line_break(funencoder->encode_quoted_printableencoderv)encoderelsematchchrwith|'\033'..'\060'|'\062'..'\126'->unsafe_set_chrencoder.o(encoder.o_off+encoder.o_pos)chr;encoder.o_pos<-encoder.o_pos+1;k1encoder|unsafe_chr->ifencoder.c_col<73then(lethi=to_hex(Char.codeunsafe_chr/16)inletlo=to_hex(Char.codeunsafe_chrmod16)inlets,j,k=ifrem<3then(t_rangeencoder3;(encoder.t,0,t_flush(k3)))elseletj=encoder.o_posinencoder.o_pos<-encoder.o_pos+3;(encoder.o,encoder.o_off+j,k3)inunsafe_set_chrsj'=';unsafe_set_chrs(j+1)hi;unsafe_set_chrs(j+2)lo;kencoder)elseencode_soft_line_break(funencoder->encode_quoted_printableencoderv)encoder)andencode_soft_line_breakkencoder=letrem=o_remencoderinlets,j,k=ifrem<3then(t_rangeencoder3;(encoder.t,0,t_flushk))elseletj=encoder.o_posinencoder.o_pos<-encoder.o_pos+3;(encoder.o,encoder.o_off+j,k)inunsafe_set_chrsj'=';unsafe_set_chrs(j+1)'\r';unsafe_set_chrs(j+2)'\n';encoder.c_col<-0;kencoderletencoderdst=leto,o_off,o_pos,o_len=matchdstwith|`Manual->(Bytes.empty,1,0,0)|`Buffer_|`Channel_->(Bytes.createio_buffer_size,0,0,io_buffer_size-1)in{dst;o_off;o_pos;o_len;o;t=Bytes.create3;t_pos=1;t_len=0;c_col=0;k=encode_quoted_printable}letencodeencoderv=encoder.kencodervletencoder_dstencoder=encoder.dst