123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428(* This code is inspired by mimestring.ml from OcamlNet *)(* Copyright Gerd Stolpmann, Patrick Doane *)(* Modified for Ocsigen/Lwt by Nataliya Guts and Vincent Balat *)(*VVV Check wether we should support int64 for large files? *)openLwt.InfixmoduleS=Ocsigen_lib.Netstring_pcreletsection=Lwt_log.Section.make"ocsigen:server:multipart"exceptionMultipart_errorofstringexceptionOcsigen_upload_forbiddenletmatch_endresult=snd(Pcre.get_substring_ofsresult0)letcr_or_lf_re=S.regexp"[\013\n]"letheader_stripped_re=S.regexp"([^ \t\r\n:]+):[ \t]*((.*[^ \t\r\n])?([ \t\r]*\n[ \t](.*[^ \t\r\n])?)*)[ \t\r]*\n"letheader_unstripped_re=S.regexp"([^ \t\r\n:]+):([ \t]*.*\n([ \t].*\n)*)"(* This much simpler expression returns the name and the unstripped
value. *)letempty_line_re=S.regexp"\013?\n";;letend_of_header_re=S.regexp"\n\013?\n";;letscan_header?(downcase=true)?(unfold=true)?(strip=false)parstr~start_pos~end_pos=letheader_re=ifunfold||stripthenheader_stripped_reelseheader_unstripped_reinletrecparse_headeril=matchS.string_matchheader_reparstriwith|Somer->leti'=match_endrinifi'>end_posthenraise(Multipart_error"Mimestring.scan_header");letname=ifdowncasethenString.lowercase_ascii(S.matched_groupr1parstr)elseS.matched_groupr1parstrinletvalue_with_crlf=S.matched_groupr2parstrinletvalue=ifunfoldthenS.global_replacecr_or_lf_re""value_with_crlfelsevalue_with_crlfinparse_headeri'((name,value)::l)|None->(* The header must end with an empty line *)(matchS.string_matchempty_line_reparstriwith|Somer'->List.revl,match_endr'|None->raise(Multipart_error"Mimestring.scan_header"))inparse_headerstart_pos[]letread_header?downcase?unfold?strips=letrecfind_end_of_headers=Lwt.catch(fun()->letb=Ocsigen_stream.current_buffersin(* Maybe the header is empty. In this case, there is an empty
line right at the beginning *)matchS.string_matchempty_line_reb0with|Somer->Lwt.return(s,match_endr)|None->(* Search for an empty line *)Lwt.return(s,match_end(snd(S.search_forwardend_of_header_reb0))))(function|Not_found->Ocsigen_stream.enlarge_streams>>=(function|Ocsigen_stream.Finished_->Lwt.failOcsigen_stream.Stream_too_small|Ocsigen_stream.Cont_ass->find_end_of_headers)|e->Lwt.faile)infind_end_of_headers>>=fun(s,end_pos)->letb=Ocsigen_stream.current_buffersinleth,_=scan_header?downcase?unfold?stripb~start_pos:0~end_posinOcsigen_stream.skips(Int64.of_intend_pos)>>=funs->Lwt.return(s,h)letlf_re=S.regexp"[\n]"letrecsearch_windowsrestart=tryLwt.return(s,snd(S.search_forwardre(Ocsigen_stream.current_buffers)start))withNot_found->Ocsigen_stream.enlarge_streams>>=function|Ocsigen_stream.Finished_->Lwt.failOcsigen_stream.Stream_too_small|Ocsigen_stream.Cont_ass->search_windowsrestartletsearch_end_of_linesk=(* Search LF beginning at position k *)Lwt.catch(fun()->search_windowslf_rek>>=fun(s,x)->Lwt.return(s,match_endx))(function|Not_found->Lwt.fail(Multipart_error"read_multipart_body: MIME boundary without line end")|e->Lwt.faile)letsearch_first_boundary~boundarys=(* Search boundary per regexp; return the position of the
character immediately following the boundary (on the same
line), or raise Not_found. *)letre=S.regexp("\n--"^Pcre.quoteboundary)insearch_windowsre0>>=fun(s,x)->Lwt.return(s,match_endx)letcheck_beginning_is_boundary~boundarys=letdel="--"^boundaryinletldel=String.lengthdelinOcsigen_stream.stream_wants(ldel+2)>>=function|Ocsigen_stream.Finished_asstr2->Lwt.return(str2,false,false)|Ocsigen_stream.Cont(ss,_f)asstr2->letlong=String.lengthssinletisdelim=(long>=ldel)&&(String.subss0ldel=del)inletislast=isdelim&&(String.subssldel2="--")inLwt.return(str2,isdelim,islast)letrecparse_parts~boundary~decode_partsuses_crlf=(* PRE: [s] is at the beginning of the next part. [uses_crlf] must
be true if CRLF is used as EOL sequence, and false if only LF is
used as EOL sequence. *)letdelimiter=(ifuses_crlfthen"\r"else"")^"\n--"^boundaryinOcsigen_stream.substreamdelimiters>>=funa->decode_parta>>=fun(y,s)->(* Now the position of [s] is at the beginning of the delimiter.
Check if there is a "--" after the delimiter (==> last part) *)letl_delimiter=String.lengthdelimiterinOcsigen_stream.nexts>>=funs->Ocsigen_stream.stream_wants(l_delimiter+2)>>=funs->letlast_part=matchswith|Ocsigen_stream.Finished_->false|Ocsigen_stream.Cont(ss,_f)->letlong=String.lengthssinlong>=l_delimiter+2&&ss.[l_delimiter]='-'&&ss.[l_delimiter+1]='-'iniflast_partthenLwt.return[y]elsesearch_end_of_lines2>>=fun(s,k)->(* [k]: Beginning of next part *)Ocsigen_stream.skips(Int64.of_intk)>>=funs->parse_parts~boundary~decode_partsuses_crlf>>=funl->Lwt.return(y::l)letread_multipart_body~boundary~decode_parts=(* Check whether s directly begins with a boundary *)check_beginning_is_boundary~boundarys>>=fun(s,b,islast)->ifislastthenLwt.return[]elseifbthen(* Move to the beginning of the next line *)search_end_of_lines0>>=fun(s,k_eol)->letuses_crlf=(Ocsigen_stream.current_buffers).[k_eol-2]='\r'inOcsigen_stream.skips(Int64.of_intk_eol)>>=funs->(* Begin with first part: *)parse_parts~boundary~decode_partsuses_crlfelse(* Look for the first boundary *)Lwt.catch(fun()->search_first_boundary~boundarys>>=fun(s,k_eob)->search_end_of_linesk_eob>>=fun(s,k_eol)->letuses_crlf=(Ocsigen_stream.current_buffers).[k_eol-2]='\r'in(* Printf.printf "k_eol=%d\n" k_eol; *)Ocsigen_stream.skips(Int64.of_intk_eol)>>=funs->(* Begin with first part: *)parse_parts~boundary~decode_partsuses_crlf)(function|Not_found->(* No boundary at all, empty body *)Lwt.return[]|e->Lwt.faile)letempty_stream=Ocsigen_stream.get(Ocsigen_stream.make(fun()->Ocsigen_stream.emptyNone))letdecode_part~max_size~create~add~stopstream=read_headerstream>>=fun(s,header)->letp=createheaderinletrecwhile_streamsize=function|Ocsigen_stream.FinishedNone->Lwt.return(size,empty_stream)|Ocsigen_stream.Finished(Somess)->Lwt.return(size,ss)|Ocsigen_stream.Cont(stri,f)->letlong=String.lengthstriinletsize2=Int64.addsize(Int64.of_intlong)inifmatchmax_sizewith|None->false|Somem->Int64.comparesize2m>0thenLwt.failOcsigen_lib.Ocsigen_Request_too_longelseifstri=""thenOcsigen_stream.nextf>>=while_streamsizeelseaddpstri>>=fun()->Ocsigen_stream.nextf>>=while_streamsize2inLwt.catch(fun()->while_streamInt64.zeros>>=fun(size,s)->stopsizep>>=funr->Lwt.return(r,s))(funerror->stopInt64.zerop>>=fun_->Lwt.failerror)letscan_multipart_body_from_stream?max_size~boundary~create~add~stops=letdecode_part=decode_part~max_size~create~add~stopinLwt.catch(fun()->(* read the multipart body: *)Ocsigen_stream.nexts>>=funs->read_multipart_body~boundary~decode_parts>>=fun_->Lwt.return())(function|Ocsigen_stream.Stream_too_small->Lwt.failOcsigen_lib.Ocsigen_Bad_Request|e->Lwt.faile)letget_boundaryctparams=List.assoc"boundary"ctparamsletcounter=letc=ref(Random.int1000000)infun()->c:=!c+1;!cletfieldfieldcontent_disp=let(_,res)=S.search_forward(S.regexp(field^"=.([^\"]*).;?"))content_disp0inS.matched_groupres1content_displetparse_content_types=matchOcsigen_lib.String.split';'swith|[]->None|a::l->trylettyp,subtype=Ocsigen_lib.String.sep'/'ainletparams=tryList.map(Ocsigen_lib.String.sep'=')lwithNot_found->[]in(*VVV If syntax error, we return no parameter at all *)Some((typ,subtype),params)(*VVV If syntax error in type, we return None *)withNot_found->Nonetypecontent_type=(string*string)*(string*string)listtypefile_info={tmp_filename:string;filesize:int64;raw_original_filename:string;file_content_type:((string*string)*(string*string)list)option}typepost_data=(string*string)list*(string*file_info)listletpost_params_form_urlencodedbody_gen__=Lwt.catch(fun()->letbody=Ocsigen_stream.getbody_genin(* BY, adapted from a previous comment. Should this stream be
consumed in case of error? *)Ocsigen_stream.string_of_stream(Ocsigen_config.get_maxrequestbodysizeinmemory())body>>=funr->letr=Ocsigen_lib.Url.fixup_url_stringrinletl=Uri.query_of_encodedr|>List.map(fun(s,l)->List.map(funv->s,v)l)|>List.concatinLwt.return(l,[]))(function|Ocsigen_stream.String_too_large->Lwt.failOcsigen_lib.Input_is_too_large|e->Lwt.faile)letpost_params_multipart_form_datactparamsbody_genupload_dirmax_size=(* Same question here, should this stream be consumed after an
error? *)letbody=Ocsigen_stream.getbody_genandboundary=get_boundaryctparamsandparams=ref[]andfiles=ref[]andfilenames=ref[]inletrecaddps=matchpwith|_,`No_fileto_buf->Buffer.add_stringto_bufs;Lwt.return()|_,`Some_file(_,_,wh,_)->letlen=String.lengthsinletr=Unix.write_substringwhs0leninifr<lenthen(*XXXX Inefficient if s is long *)addp(String.subsr(len-r))elseLwt_unix.yield()inletcreatehs=letcontent_type=tryletct=List.assoc"content-type"hsinparse_content_typectwith_->Noneinletcd=List.assoc"content-disposition"hsinletp_name=field"name"cdintryletstore=field"filename"cdinmatchupload_dirwith|Somedname->letfname=Printf.sprintf"%s/%f-%d"dname(Unix.gettimeofday())(counter())inletfd=Unix.openfilefname[Unix.O_CREAT;Unix.O_TRUNC;Unix.O_WRONLY;Unix.O_NONBLOCK]0o666inLwt_log.ign_info~section("Upload file opened: "^fname);filenames:=fname::!filenames;p_name,`Some_file(fname,store,fd,content_type)|None->raiseOcsigen_upload_forbiddenwithNot_found->p_name,`No_file(Buffer.create1024)andstopfilesize=function|p_name,`No_fileto_buf->params:=!params@[p_name,Buffer.contentsto_buf];Lwt.return()(* in the end ? *)|p_name,`Some_file(tmp_filename,raw_original_filename,wh,file_content_type)->letfile_info={tmp_filename;filesize;raw_original_filename;file_content_type;}infiles:=!files@[p_name,file_info];Unix.closewh;Lwt.return()inscan_multipart_body_from_stream?max_size~boundary~create~add~stopbody>>=fun()->(*VVV Does scan_multipart_body_from_stream read until the end or
only what it needs? If we do not consume here, the following
request will be read only when this one is finished ... *)Ocsigen_stream.consumebody_gen>>=fun()->Lwt.return(!params,!files)letpost_params~content_typebody_gen=let(ct,cst),ctparams=content_typeinmatchString.lowercase_asciict,String.lowercase_asciicstwith|"application","x-www-form-urlencoded"->Some(body_gen|>Cohttp_lwt.Body.to_stream|>Ocsigen_stream.of_lwt_stream|>post_params_form_urlencoded)|"multipart","form-data"->Some(body_gen|>Cohttp_lwt.Body.to_stream|>Ocsigen_stream.of_lwt_stream|>post_params_multipart_form_datactparams)|_->None