123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438(**************************************************************************)(* *)(* OCaml *)(* *)(* Pierre Weis and Xavier Leroy, projet Cristal, INRIA Rocquencourt *)(* *)(* Copyright 1999 Institut National de Recherche en Informatique et *)(* en Automatique. *)(* *)(* All rights reserved. This file is distributed under the terms of *)(* the GNU Lesser General Public License version 2.1, with the *)(* special exception on linking described in the file LICENSE. *)(* *)(**************************************************************************)(* Extensible buffers *)typet={mutablebuffer:bytes;mutableposition:int;mutablelength:int;initial_buffer:bytes}(* Invariants: all parts of the code preserve the invariants that:
- [0 <= b.position <= b.length]
- [b.length = Bytes.length b.buffer]
Note in particular that [b.position = b.length] is legal,
it means that the buffer is full and will have to be extended
before any further addition. *)letcreaten=letn=ifn<1then1elseninletn=ifn>Sys.max_string_lengththenSys.max_string_lengthelseninlets=Bytes.createnin{buffer=s;position=0;length=n;initial_buffer=s}letcontentsb=Bytes.sub_stringb.buffer0b.positionletto_bytesb=Bytes.subb.buffer0b.positionletsubbofslen=ifofs<0||len<0||ofs>b.position-lentheninvalid_arg"Buffer.sub"elseBytes.sub_stringb.bufferofslenletblitsrcsrcoffdstdstofflen=iflen<0||srcoff<0||srcoff>src.position-len||dstoff<0||dstoff>(Bytes.lengthdst)-lentheninvalid_arg"Buffer.blit"elseBytes.unsafe_blitsrc.buffersrcoffdstdstofflenletnthbofs=ifofs<0||ofs>=b.positiontheninvalid_arg"Buffer.nth"elseBytes.unsafe_getb.bufferofsletlengthb=b.positionletclearb=b.position<-0letresetb=b.position<-0;b.buffer<-b.initial_buffer;b.length<-Bytes.lengthb.buffer(* [resize b more] ensures that [b.position + more <= b.length] holds
by dynamically extending [b.buffer] if necessary -- and thus
increasing [b.length].
In particular, after [resize b more] is called, a direct access of
size [more] at [b.position] will always be in-bounds, so that
(unsafe_{get,set}) may be used for performance.
*)letresizebmore=letold_pos=b.positioninletold_len=b.lengthinletnew_len=refold_leninwhileold_pos+more>!new_lendonew_len:=2*!new_lendone;if!new_len>Sys.max_string_lengththenbeginifold_pos+more<=Sys.max_string_lengththennew_len:=Sys.max_string_lengthelsefailwith"Buffer.add: cannot grow buffer"end;letnew_buffer=Bytes.create!new_lenin(* PR#6148: let's keep using [blit] rather than [unsafe_blit] in
this tricky function that is slow anyway. *)Bytes.blitb.buffer0new_buffer0b.position;b.buffer<-new_buffer;b.length<-!new_len;assert(b.position+more<=b.length);assert(old_pos+more<=b.length);()(* Note: there are various situations (preemptive threads, signals and
gc finalizers) where OCaml code may be run asynchronously; in
particular, there may be a race with another user of [b], changing
its mutable fields in the middle of the [resize] call. The Buffer
module does not provide any correctness guarantee if that happens,
but we must still ensure that the datastructure invariants hold for
memory-safety -- as we plan to use [unsafe_{get,set}].
There are two potential allocation points in this function,
[ref] and [Bytes.create], but all reads and writes to the fields
of [b] happen before both of them or after both of them.
We therefore assume that [b.position] may change at these allocations,
and check that the [b.position + more <= b.length] postcondition
holds for both values of [b.position], before or after the function
is called. More precisely, the following invariants must hold if the
function returns correctly, in addition to the usual buffer invariants:
- [old(b.position) + more <= new(b.length)]
- [new(b.position) + more <= new(b.length)]
- [old(b.length) <= new(b.length)]
Note: [b.position + more <= old(b.length)] does *not*
hold in general, as it is precisely the case where you need
to call [resize] to increase [b.length].
Note: [assert] above does not mean that we know the conditions
always hold, but that the function may return correctly
only if they hold.
Note: the other functions in this module does not need
to be checked with this level of scrutiny, given that they
read/write the buffer immediately after checking that
[b.position + more <= b.length] hold or calling [resize].
*)letadd_charbc=letpos=b.positioninifpos>=b.lengththenresizeb1;Bytes.unsafe_setb.bufferposc;b.position<-pos+1letadd_utf_8_ucharbu=matchUchar.to_intuwith|uwhenu<0->assertfalse|uwhenu<=0x007F->add_charb(Char.unsafe_chru)|uwhenu<=0x07FF->letpos=b.positioninifpos+2>b.lengththenresizeb2;Bytes.unsafe_setb.buffer(pos)(Char.unsafe_chr(0xC0lor(ulsr6)));Bytes.unsafe_setb.buffer(pos+1)(Char.unsafe_chr(0x80lor(uland0x3F)));b.position<-pos+2|uwhenu<=0xFFFF->letpos=b.positioninifpos+3>b.lengththenresizeb3;Bytes.unsafe_setb.buffer(pos)(Char.unsafe_chr(0xE0lor(ulsr12)));Bytes.unsafe_setb.buffer(pos+1)(Char.unsafe_chr(0x80lor((ulsr6)land0x3F)));Bytes.unsafe_setb.buffer(pos+2)(Char.unsafe_chr(0x80lor(uland0x3F)));b.position<-pos+3|uwhenu<=0x10FFFF->letpos=b.positioninifpos+4>b.lengththenresizeb4;Bytes.unsafe_setb.buffer(pos)(Char.unsafe_chr(0xF0lor(ulsr18)));Bytes.unsafe_setb.buffer(pos+1)(Char.unsafe_chr(0x80lor((ulsr12)land0x3F)));Bytes.unsafe_setb.buffer(pos+2)(Char.unsafe_chr(0x80lor((ulsr6)land0x3F)));Bytes.unsafe_setb.buffer(pos+3)(Char.unsafe_chr(0x80lor(uland0x3F)));b.position<-pos+4|_->assertfalseletadd_utf_16be_ucharbu=matchUchar.to_intuwith|uwhenu<0->assertfalse|uwhenu<=0xFFFF->letpos=b.positioninifpos+2>b.lengththenresizeb2;Bytes.unsafe_setb.buffer(pos)(Char.unsafe_chr(ulsr8));Bytes.unsafe_setb.buffer(pos+1)(Char.unsafe_chr(uland0xFF));b.position<-pos+2|uwhenu<=0x10FFFF->letu'=u-0x10000inlethi=0xD800lor(u'lsr10)inletlo=0xDC00lor(u'land0x3FF)inletpos=b.positioninifpos+4>b.lengththenresizeb4;Bytes.unsafe_setb.buffer(pos)(Char.unsafe_chr(hilsr8));Bytes.unsafe_setb.buffer(pos+1)(Char.unsafe_chr(hiland0xFF));Bytes.unsafe_setb.buffer(pos+2)(Char.unsafe_chr(lolsr8));Bytes.unsafe_setb.buffer(pos+3)(Char.unsafe_chr(loland0xFF));b.position<-pos+4|_->assertfalseletadd_utf_16le_ucharbu=matchUchar.to_intuwith|uwhenu<0->assertfalse|uwhenu<=0xFFFF->letpos=b.positioninifpos+2>b.lengththenresizeb2;Bytes.unsafe_setb.buffer(pos)(Char.unsafe_chr(uland0xFF));Bytes.unsafe_setb.buffer(pos+1)(Char.unsafe_chr(ulsr8));b.position<-pos+2|uwhenu<=0x10FFFF->letu'=u-0x10000inlethi=0xD800lor(u'lsr10)inletlo=0xDC00lor(u'land0x3FF)inletpos=b.positioninifpos+4>b.lengththenresizeb4;Bytes.unsafe_setb.buffer(pos)(Char.unsafe_chr(hiland0xFF));Bytes.unsafe_setb.buffer(pos+1)(Char.unsafe_chr(hilsr8));Bytes.unsafe_setb.buffer(pos+2)(Char.unsafe_chr(loland0xFF));Bytes.unsafe_setb.buffer(pos+3)(Char.unsafe_chr(lolsr8));b.position<-pos+4|_->assertfalseletadd_substringbsoffsetlen=ifoffset<0||len<0||offset>String.lengths-lentheninvalid_arg"Buffer.add_substring/add_subbytes";letnew_position=b.position+leninifnew_position>b.lengththenresizeblen;Bytes.unsafe_blit_stringsoffsetb.bufferb.positionlen;b.position<-new_positionletadd_subbytesbsoffsetlen=add_substringb(Bytes.unsafe_to_strings)offsetlenletadd_stringbs=letlen=String.lengthsinletnew_position=b.position+leninifnew_position>b.lengththenresizeblen;Bytes.unsafe_blit_strings0b.bufferb.positionlen;b.position<-new_positionletadd_bytesbs=add_stringb(Bytes.unsafe_to_strings)letadd_bufferbbs=add_subbytesbbs.buffer0bs.position(* this (private) function could move into the standard library *)letreally_input_up_toicbufofslen=letrecloopicbuf~already_read~ofs~to_read=ifto_read=0thenalready_readelsebeginletr=inputicbufofsto_readinifr=0thenalready_readelsebeginletalready_read=already_read+rinletofs=ofs+rinletto_read=to_read-rinloopicbuf~already_read~ofs~to_readendendinloopicbuf~already_read:0~ofs~to_read:lenletunsafe_add_channel_up_tobiclen=ifb.position+len>b.lengththenresizeblen;letn=really_input_up_toicb.bufferb.positionlenin(* The assertion below may fail in weird scenario where
threaded/finalizer code, run asynchronously during the
[really_input_up_to] call, races on the buffer; we don't ensure
correctness in this case, but need to preserve the invariants for
memory-safety (see discussion of [resize]). *)assert(b.position+n<=b.length);b.position<-b.position+n;nletadd_channelbiclen=iflen<0||len>Sys.max_string_lengththen(* PR#5004 *)invalid_arg"Buffer.add_channel";letn=unsafe_add_channel_up_tobiclenin(* It is intentional that a consumer catching End_of_file
will see the data written (see #6719, #7136). *)ifn<lenthenraiseEnd_of_file;()letoutput_bufferocb=outputocb.buffer0b.positionletclosing=function|'('->')'|'{'->'}'|_->assertfalse(* opening and closing: open and close characters, typically ( and )
k: balance of opening and closing chars
s: the string where we are searching
start: the index where we start the search. *)letadvance_to_closingopeningclosingksstart=letrecadvancekilim=ifi>=limthenraiseNot_foundelseifs.[i]=openingthenadvance(k+1)(i+1)limelseifs.[i]=closingthenifk=0thenielseadvance(k-1)(i+1)limelseadvancek(i+1)liminadvancekstart(String.lengths)letadvance_to_non_alphasstart=letrecadvanceilim=ifi>=limthenlimelsematchs.[i]with|'a'..'z'|'A'..'Z'|'0'..'9'|'_'->advance(i+1)lim|_->iinadvancestart(String.lengths)(* We are just at the beginning of an ident in s, starting at start. *)letfind_identsstartlim=ifstart>=limthenraiseNot_foundelsematchs.[start]with(* Parenthesized ident ? *)|'('|'{'asc->letnew_start=start+1inletstop=advance_to_closingc(closingc)0snew_startinString.subsnew_start(stop-start-1),stop+1(* Regular ident *)|_->letstop=advance_to_non_alphas(start+1)inString.subsstart(stop-start),stop(* Substitute $ident, $(ident), or ${ident} in s,
according to the function mapping f. *)letadd_substitutebfs=letlim=String.lengthsinletrecsubstpreviousi=ifi<limthenbeginmatchs.[i]with|'$'ascurrentwhenprevious='\\'->add_charbcurrent;subst' '(i+1)|'$'->letj=i+1inletident,next_i=find_identsjliminadd_stringb(fident);subst' 'next_i|currentwhenprevious=='\\'->add_charb'\\';add_charbcurrent;subst' '(i+1)|'\\'ascurrent->substcurrent(i+1)|current->add_charbcurrent;substcurrent(i+1)endelseifprevious='\\'thenadd_charbpreviousinsubst' '0lettruncateblen=iflen<0||len>lengthbtheninvalid_arg"Buffer.truncate"elseb.position<-len(** {1 Iterators} *)letto_seqb=letrecauxi()=(* Note that b.position is not a constant and cannot be lifted out of aux *)ifi>=b.positionthenSeq.Nilelseletx=Bytes.unsafe_getb.bufferiinSeq.Cons(x,aux(i+1))inaux0letto_seqib=letrecauxi()=(* Note that b.position is not a constant and cannot be lifted out of aux *)ifi>=b.positionthenSeq.Nilelseletx=Bytes.unsafe_getb.bufferiinSeq.Cons((i,x),aux(i+1))inaux0letadd_seqbseq=Seq.iter(add_charb)seqletof_seqi=letb=create32inadd_seqbi;b(** {6 Binary encoding of integers} *)externalunsafe_set_int8:bytes->int->int->unit="%bytes_unsafe_set"externalunsafe_set_int16:bytes->int->int->unit="%caml_bytes_set16u"externalunsafe_set_int32:bytes->int->int32->unit="%caml_bytes_set32u"externalunsafe_set_int64:bytes->int->int64->unit="%caml_bytes_set64u"externalswap16:int->int="%bswap16"externalswap32:int32->int32="%bswap_int32"externalswap64:int64->int64="%bswap_int64"letadd_int8bx=letnew_position=b.position+1inifnew_position>b.lengththenresizeb1;unsafe_set_int8b.bufferb.positionx;b.position<-new_positionletadd_int16_nebx=letnew_position=b.position+2inifnew_position>b.lengththenresizeb2;unsafe_set_int16b.bufferb.positionx;b.position<-new_positionletadd_int32_nebx=letnew_position=b.position+4inifnew_position>b.lengththenresizeb4;unsafe_set_int32b.bufferb.positionx;b.position<-new_positionletadd_int64_nebx=letnew_position=b.position+8inifnew_position>b.lengththenresizeb8;unsafe_set_int64b.bufferb.positionx;b.position<-new_positionletadd_int16_lebx=add_int16_neb(ifSys.big_endianthenswap16xelsex)letadd_int16_bebx=add_int16_neb(ifSys.big_endianthenxelseswap16x)letadd_int32_lebx=add_int32_neb(ifSys.big_endianthenswap32xelsex)letadd_int32_bebx=add_int32_neb(ifSys.big_endianthenxelseswap32x)letadd_int64_lebx=add_int64_neb(ifSys.big_endianthenswap64xelsex)letadd_int64_bebx=add_int64_neb(ifSys.big_endianthenxelseswap64x)letadd_uint8=add_int8letadd_uint16_ne=add_int16_neletadd_uint16_le=add_int16_leletadd_uint16_be=add_int16_be