123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195open!ImportmoduleInt=Int0moduleString=String0moduleArray=Array0(* We maintain the property that all values of type [t] do not have the tag
[double_array_tag]. Some functions below assume this in order to avoid testing the
tag, and will segfault if this property doesn't hold. *)typet=Stdlib.Obj.tarrayletinvariantt=assert(Stdlib.Obj.tag(Stdlib.Obj.reprt)<>Stdlib.Obj.double_array_tag);;letlength=Array.length(* would check for float arrays in 32 bit, but whatever *)letsexp_of_tt=Sexp.Atom(String.concat~sep:""["<Obj_array.t of length ";Int.to_string(lengtht);">"]);;letzero_obj=Stdlib.Obj.repr(0:int)(* We call [Array.create] with a value that is not a float so that the array doesn't get
tagged with [Double_array_tag]. *)letcreate_zero~len=Array.create~lenzero_objletempty=[||]typenot_a_float=|Not_a_float_0|Not_a_float_1ofintlet_not_a_float_0=Not_a_float_0let_not_a_float_1=Not_a_float_142letgetti=(* Make the compiler believe [t] is an array not containing floats so it does not check
if [t] is tagged with [Double_array_tag]. It is NOT ok to use [int array] since (if
this function is inlined and the array contains in-heap boxed values) wrong register
typing may result, leading to a failure to register necessary GC roots. *)Stdlib.Obj.repr(* [Sys.opaque_identity] is required on the array because this code breaks the usual
assumptions about array kinds that the Flambda 2 optimiser can see. *)((Sys.opaque_identity(Stdlib.Obj.magic(t:t):not_a_floatarray)).(i):not_a_float);;let[@inlinealways]unsafe_getti=(* Make the compiler believe [t] is an array not containing floats so it does not check
if [t] is tagged with [Double_array_tag]. *)Stdlib.Obj.repr(Array.unsafe_get(Sys.opaque_identity(Obj_local.magic(t:t):not_a_floatarray))i:not_a_float);;let[@inlinealways]unsafe_set_with_caml_modifytiobj=(* Same comment as [unsafe_get]. Sys.opaque_identity prevents the compiler from
potentially wrongly guessing the type of the array based on the type of element, that
is prevent the implication: (Obj.tag obj = Obj.double_tag) => (Obj.tag t =
Obj.double_array_tag) which flambda has tried in the past (at least that's assuming
the compiler respects Sys.opaque_identity, which is not always the case). *)Array.unsafe_set(Sys.opaque_identity(Obj_local.magic(t:t):not_a_floatarray))i(Stdlib.Obj.obj(Sys.opaque_identityobj):not_a_float);;let[@inlinealways]set_with_caml_modifytiobj=(* same as unsafe_set_with_caml_modify but safe *)(Sys.opaque_identity(Stdlib.Obj.magic(t:t):not_a_floatarray)).(i)<-(Stdlib.Obj.obj(Sys.opaque_identityobj):not_a_float);;let[@inlinealways]unsafe_set_int_assuming_currently_inttiint=(* This skips [caml_modify], which is OK if both the old and new values are integers. *)Array.unsafe_set(Sys.opaque_identity(Obj_local.magic(t:t):intarray))i(Sys.opaque_identityint);;(* For [set] and [unsafe_set], if a pointer is involved, we first do a physical-equality
test to see if the pointer is changing. If not, we don't need to do the [set], which
saves a call to [caml_modify]. We think this physical-equality test is worth it
because it is very cheap (both values are already available from the [is_int] test)
and because [caml_modify] is expensive. *)letsettiobj=(* We use [get] first but then we use [Array.unsafe_set] since we know that [i] is
valid. *)letold_obj=gettiinifStdlib.Obj.is_intold_obj&&Stdlib.Obj.is_intobjthenunsafe_set_int_assuming_currently_intti(Stdlib.Obj.objobj:int)elseifnot(phys_equalold_objobj)thenunsafe_set_with_caml_modifytiobj;;let[@inlinealways]unsafe_settiobj=letold_obj=unsafe_gettiinifStdlib.Obj.is_intold_obj&&Stdlib.Obj.is_intobjthenunsafe_set_int_assuming_currently_intti(Stdlib.Obj.objobj:int)elseifnot(phys_equalold_objobj)thenunsafe_set_with_caml_modifytiobj;;let[@inlinealways]unsafe_set_omit_phys_equal_checktiobj=letold_obj=unsafe_gettiinifStdlib.Obj.is_intold_obj&&Stdlib.Obj.is_intobjthenunsafe_set_int_assuming_currently_intti(Stdlib.Obj.objobj:int)elseunsafe_set_with_caml_modifytiobj;;letswaptij=leta=gettiinletb=gettjinunsafe_settib;unsafe_settja;;letcreate~lenx=(* If we can, use [Array.create] directly. Even though [is_int] check is subsumed by
the tag check, checking it is much faster, since it avoids a C function call. *)ifStdlib.Obj.is_intx||Stdlib.Obj.tagx<>Stdlib.Obj.double_tagthenArray.create~lenxelse((* Otherwise use [create_zero] and set the contents *)lett=create_zero~leninletx=Sys.opaque_identityxinfori=0tolen-1dounsafe_set_with_caml_modifytixdone;t);;letsingletonobj=create~len:1obj(* Pre-condition: t.(i) is an integer. *)letunsafe_set_assuming_currently_inttiobj=ifStdlib.Obj.is_intobjthenunsafe_set_int_assuming_currently_intti(Stdlib.Obj.objobj:int)else(* [t.(i)] is an integer and [obj] is not, so we do not need to check if they are
equal. *)unsafe_set_with_caml_modifytiobj;;letunsafe_set_inttiint=letold_obj=unsafe_gettiinifStdlib.Obj.is_intold_objthenunsafe_set_int_assuming_currently_inttiintelseunsafe_set_with_caml_modifyti(Stdlib.Obj.reprint);;letunsafe_clear_if_pointerti=letold_obj=unsafe_gettiinifnot(Stdlib.Obj.is_intold_obj)thenunsafe_set_with_caml_modifyti(Stdlib.Obj.repr0);;(** [unsafe_blit] is like [Array.blit], except it uses our own for-loop to avoid
caml_modify when possible. Its performance is still not comparable to a memcpy. *)letunsafe_blit~src~src_pos~dst~dst_pos~len=(* When [phys_equal src dst], we need to check whether [dst_pos < src_pos] and have the
for loop go in the right direction so that we don't overwrite data that we still need
to read. When [not (phys_equal src dst)], doing this is harmless. From a
memory-performance perspective, it doesn't matter whether one loops up or down.
Constant-stride access, forward or backward, should be indistinguishable (at least on
an intel i7). So, we don't do a check for [phys_equal src dst] and always loop up in
that case. *)ifdst_pos<src_posthenfori=0tolen-1dounsafe_setdst(dst_pos+i)(unsafe_getsrc(src_pos+i))doneelsefori=len-1downto0dounsafe_setdst(dst_pos+i)(unsafe_getsrc(src_pos+i))done;;includeBlit.Make(structtypenonrect=tletcreate=create_zeroletlength=lengthletunsafe_blit=unsafe_blitend)letcopysrc=letdst=create_zero~len:(lengthsrc)inblito~src~dst();dst;;