123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380(*----------------------------------------------------------------------------
Copyright (c) 2016 Inhabited Type LLC.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the author nor the names of his contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
----------------------------------------------------------------------------*)includeAngstrommoduleP=structletis_space=function|' '|'\t'->true|_->falseletis_cr=function|'\r'->true|_->falseletis_space_or_colon=function|' '|'\t'|':'->true|_->falseletis_hex=function|'0'..'9'|'a'..'f'|'A'..'F'->true|_->falseletis_digit=function'0'..'9'->true|_->falseletis_separator=function|')'|'('|'<'|'>'|'@'|','|';'|':'|'\\'|'"'|'/'|'['|']'|'?'|'='|'{'|'}'|' '|'\t'->true|_->falseletis_token=(* The commented-out ' ' and '\t' are not necessary because of the range at
* the top of the match. *)function|'\000'..'\031'|'\127'|')'|'('|'<'|'>'|'@'|','|';'|':'|'\\'|'"'|'/'|'['|']'|'?'|'='|'{'|'}'(* | ' ' | '\t' *)->false|_->trueendletunit=return()lettoken=take_while1P.is_tokenletspaces=skip_whileP.is_spaceletdigit=satisfyP.is_digit>>|function|'0'->0|'1'->1|'2'->2|'3'->3|'4'->4|'5'->5|'6'->6|'7'->7|'8'->8|'9'->9|_->assertfalseleteol=string"\r\n"<?>"eol"lethexstr=tryreturn(Int64.of_string("0x"^str))with_->fail"hex"letskip_line=take_tillP.is_cr*>eolletversion=string"HTTP/"*>lift2(funmajorminor->{Version.major;minor})(digit<*char'.')digitletheader=(* From RFC7230§3.2.4:
"No whitespace is allowed between the header field-name and colon. In
the past, differences in the handling of such whitespace have led to
security vulnerabilities in request routing and response handling. A
server MUST reject any received request message that contains whitespace
between a header field-name and colon with a response code of 400 (Bad
Request). A proxy MUST remove any such whitespace from a response
message before forwarding the message downstream."
This can be detected by checking the message and marks in a parse failure,
which should look like this when serialized "... > header > :". *)lift2(funkeyvalue->(key,value))(take_tillP.is_space_or_colon<*char':'<*spaces)(take_tillP.is_cr<*eol>>|String.trim)<*commit<?>"header"letheaders=letconsxxs=x::xsinfix(funheaders->let_emp=return[]inlet_rec=lift2consheaderheadersinpeek_char_fail>>=function|'\r'->_emp|_->_rec)>>|Headers.of_listletrequest=letmeth=take_tillP.is_space>>|Method.of_stringinlift4(funmethtargetversionheaders->Request.create~version~headersmethtarget)(meth<*char' ')(take_tillP.is_space<*char' ')(version<*eol<*commit)(headers<*eol)letresponse=letstatus=take_whileP.is_digit>>=funstr->ifString.lengthstr=0thenfail"status-code empty"else(ifString.lengthstr>3thenfail(Printf.sprintf"status-code too long: %S"str)elsereturn(Status.of_stringstr))inlift4(funversionstatusreasonheaders->Response.create~reason~version~headersstatus)(version<*char' ')(status<*char' ')(take_tillP.is_cr<*eol<*commit)(headers<*eol)letfinishbody=Body.Reader.closebody;commitletschedule_sizebodyn=letfaraday=Body.Reader.unsafe_faradaybodyin(* XXX(seliopou): performance regression due to switching to a single output
* format in Farady. Once a specialized operation is exposed to avoid the
* intemediate copy, this should be back to the original performance. *)beginifFaraday.is_closedfaradaythenadvancenelsetake_bigstringn>>|funs->Faraday.schedule_bigstringfaradaysend*>commitletbody~encodingbody=letrecfixedn~unexpected=ifn=0Lthenunitelseat_end_of_input>>=function|true->commit*>failunexpected|false->available>>=funm->letm'=Int64.(min(of_intm)n)inletn'=Int64.subnm'inschedule_sizebody(Int64.to_intm')>>=fun()->fixedn'~unexpectedinmatchencodingwith|`Fixedn->fixedn~unexpected:"expected more from fixed body">>=fun()->finishbody|`Chunked->(* XXX(seliopou): The [eol] in this parser should really parse a collection
* of "chunk extensions", as defined in RFC7230§4.1. These do not show up
* in the wild very frequently, and the httpaf API has no way of exposing
* them to the suer, so for now the parser does not attempt to recognize
* them. This means that any chunked messages that contain chunk extensions
* will fail to parse. *)fix(funp->let_hex=(take_while1P.is_hex>>=funsize->hexsize)(* swallows chunk-ext, if present, and CRLF *)<*(eol*>commit)in_hex>>=funsize->ifsize=0Ltheneol*>finishbodyelsefixedsize~unexpected:"expected more from body chunk"*>eol*>p)|`Close_delimited->fix(funp->let_rec=(available>>=funn->schedule_sizebodyn)*>pinat_end_of_input>>=function|true->finishbody|false->_rec)moduleReader=structmoduleAU=Angstrom.Unbufferedtyperequest_error=[|`Bad_requestofRequest.t|`Parseofstringlist*string]typeresponse_error=[|`Invalid_response_body_lengthofResponse.t|`Parseofstringlist*string]type'errorparse_state=|Done|Failof'error|Partialof(Bigstringaf.t->off:int->len:int->AU.more->(unit,'error)resultAU.state)type'errort={parser:(unit,'error)resultAngstrom.t;mutableparse_state:'errorparse_state(* The state of the parse for the current request *);mutableclosed:bool(* Whether the input source has left the building, indicating that no
* further input will be received. *);mutablewakeup:Optional_thunk.t}typerequest=request_errorttyperesponse=response_errortletcreateparser={parser;parse_state=Done;closed=false;wakeup=Optional_thunk.none}letok=return(Ok())letis_closedt=t.closedleton_wakeuptk=ifis_closedtthenfailwith"on_wakeup on closed reader"elseifOptional_thunk.is_somet.wakeupthenfailwith"on_wakeup: only one callback can be registered at a time"elset.wakeup<-Optional_thunk.somekletwakeupt=letf=t.wakeupint.wakeup<-Optional_thunk.none;Optional_thunk.call_if_somefletrequest~wakeuphandler=letparserhandler=request<*commit>>=funrequest->matchRequest.body_lengthrequestwith|`Error`Bad_request->return(Error(`Bad_requestrequest))|`Fixed0L->handlerrequest(Body.Reader.create_empty());ok|`Fixed_|`Chunkedasencoding->letrequest_body=Body.Reader.createBigstringaf.empty~when_ready_to_read:(Optional_thunk.somewakeup)inhandlerrequestrequest_body;body~encodingrequest_body*>okincreate(parserhandler)letresponserequest_queue=letparsertrequest_queue=response<*commit>>=funresponse->assert(not(Queue.is_emptyrequest_queue));letexceptionLocalofRespd.tinletrespd=match(Queue.iter(funrespd->ifrespd.Respd.state=Awaiting_responsethenraise(Localrespd))request_queue)with|exceptionLocalrespd->respd|_->assertfalseinletrequest=Respd.requestrespdinletproxy=falseinmatchResponse.body_length~request_method:request.methresponsewith|`Error`Bad_gateway->assert(notproxy);assertfalse|`Error`Internal_server_error->return(Error(`Invalid_response_body_lengthresponse))|`Fixed0L->respd.response_handlerresponse(Body.Reader.create_empty());ok|`Fixed_|`Chunked|`Close_delimitedasencoding->(* We do not trust the length provided in the [`Fixed] case, as the
client could DOS easily. *)letresponse_body=Body.Reader.createBigstringaf.empty~when_ready_to_read:(Optional_thunk.some(fun()->wakeup(Lazy.forcet)))inrespd.response_handlerresponseresponse_body;body~encodingresponse_body*>okinletrect=lazy(create(parsertrequest_queue))inLazy.forcet;;lettransitiontstate=matchstatewith|AU.Done(consumed,Ok())->t.parse_state<-Done;consumed|AU.Done(consumed,Errorerror)->t.parse_state<-Failerror;consumed|AU.Fail(consumed,marks,msg)->t.parse_state<-Fail(`Parse(marks,msg));consumed|AU.Partial{committed;continue}->t.parse_state<-Partialcontinue;committedandstarttstate=matchstatewith|AU.Done_->failwith"httpaf.Parse.unable to start parser"|AU.Fail(0,marks,msg)->t.parse_state<-Fail(`Parse(marks,msg))|AU.Partial{committed=0;continue}->t.parse_state<-Partialcontinue|_->assertfalse;;letrec_read_with_moretbs~off~lenmore=letinitial=matcht.parse_statewithDone->true|_->falseinletconsumed=matcht.parse_statewith|Fail_->0(* Don't feed empty input when we're at a request boundary *)|Donewhenlen=0->0|Done->startt(AU.parset.parser);_read_with_moretbs~off~lenmore;|Partialcontinue->transitiont(continuebsmore~off~len)in(* Special case where the parser just started and was fed a zero-length
* bigstring. Avoid putting them parser in an error state in this scenario.
* If we were already in a `Partial` state, return the error. *)ifinitial&&len=0thent.parse_state<-Done;matcht.parse_statewith|Donewhenconsumed<len->letoff=off+consumedandlen=len-consumedinconsumed+_read_with_moretbs~off~lenmore|_->consumed;;letread_with_moretbs~off~lenmore=letconsumed=_read_with_moretbs~off~lenmorein(matchmorewith|Complete->t.closed<-true|Incomplete->());consumedletforce_closet=t.closed<-true;;;letnextt=matcht.parse_statewith|Failfailure->`Errorfailure|_whent.closed->`Close|Done->`Start|Partial_->`Read;;end