src/owl/dense/owl_dense_ndarray_generic.ml"(*
* OWL - OCaml Scientific and Engineering Computing
* Copyright (c) 2016-2018 Liang Wang <liang.wang@cl.cam.ac.uk>
*)[@@@warning"-32"]openOwl_typesopenBigarrayopenOwl_ndarraytype('a,'b)t=('a,'b,c_layout)Genarray.ttype('a,'b)kind=('a,'b)Bigarray.kind(* Basic functions from Genarray module *)letemptykinddimension=Genarray.createkindc_layoutdimensionletgetxi=Genarray.getxiletsetxia=Genarray.setxialetnum_dimsx=Genarray.num_dimsxletshapex=Genarray.dimsxletnth_dimxi=Genarray.nth_dimxiletnumelx=Owl_utils.numelxletkindx=Genarray.kindxletlayoutx=Genarray.layoutxletsize_in_bytesx=Genarray.size_in_bytesxletsub_left=Genarray.sub_leftletsub_right=Genarray.sub_rightletslice_left=Genarray.slice_leftletslice_right=Genarray.slice_rightletcopyx=lety=empty(kindx)(shapex)in_owl_copy(kindx)(numelx)~ofsx:0~incx:1~ofsy:0~incy:1xy;yletcopy_~outsrc=ifOwl_ndarray._owl_ndarray_same_dataoutsrc=falsethen(letk=kindsrcinletn=numelsrcinletm=numeloutinassert(m=n);_owl_copykn~ofsx:0~incx:1~ofsy:0~incy:1srcout)letget_fancyaxisx=Owl_slicing.get_fancy_list_typaxisxletget_fancy_~outaxisx=Owl_slicing.get_fancy_list_typ_axisxoutletset_fancyaxisxy=Owl_slicing.set_fancy_list_typaxisxyletset_fancy_~outaxisxy=ifOwl_ndarray._owl_ndarray_same_dataoutx=falsethencopy_~outx;Owl_slicing.set_fancy_list_typaxisoutyletget_sliceaxisx=Owl_slicing.get_slice_list_typaxisxletget_slice_~outaxisx=Owl_slicing.get_slice_list_typ_axisxoutletset_sliceaxisxy=Owl_slicing.set_slice_list_typaxisxyletset_slice_~outaxisxy=ifOwl_ndarray._owl_ndarray_same_dataoutx=falsethencopy_~outx;Owl_slicing.set_slice_list_typaxisoutyletfillxa=Genarray.fillxaletreshapexd=letminus_one=Owl_utils.Array.countd(-1)inassert(minus_one<=1);ifminus_one=0thenreshapexdelse(letn=numelxinletm=Array.fold_right(*)d(-1)inlete=Array.map(funa->ifa=-1thenn/melsea)dinreshapexe)letreshape_~outx=ifOwl_ndarray._owl_ndarray_same_dataoutx=falsethencopy_~outxletresetx=Genarray.fillx(Owl_const.zero(kindx))letmmapfd?poskindshareddims=Unix.map_filefd?poskindc_layoutshareddimsletflattenx=reshapex[|numelx|]letinitkdf=letx=emptykdinlety=array1_of_genarray(flattenx)inletn=numelxinfori=0ton-1doArray1.unsafe_setyi(fi)done;xletinit_ndkdf=letx=emptykdinlety=array1_of_genarray(flattenx)inletn=numelxinlets=Owl_utils.calc_stridedinletj=Array.copysinfori=0ton-1doOwl_utils.index_1d_ndijs;Array1.unsafe_setyi(fj)done;xletsame_shapexy=(shapex)=(shapey)letsame_dataxy=Owl_ndarray._owl_ndarray_same_dataxyletreversex=lety=copyxinletn=numelxin_owl_copy(kindx)n~ofsx:0~incx:1~ofsy:(n-1)~incy:(-1)xy;yletreverse_~outx=ifOwl_ndarray._owl_ndarray_same_dataoutx=falsethen(copy_~outx);reverseout|>ignoreletrepeatxreps=(* check the validity of reps *)ifArray.exists((>)1)repsthenfailwith"repeat: repetition must be >= 1";let_kind=kindxinletx_dims=num_dimsxinassert(Array.lengthreps=x_dims);(* case 1: all repeats equal to 1 *)if(Array.for_all((=)1)reps)=truethencopyxelse(letx_shape=shapexinlety_shape=Array.map2(*)x_shaperepsinlety=empty_kindy_shapein(* case 2 : vector input *)if(x_dims=1)then(Owl_ndarray_repeat._ndarray_repeat_axis_kindxy0reps.(0))(* case 3: only one axis to be repeated *)elseif(Owl_utils_array.countreps1=x_dims-1)then(letr=ref(-1)inleta=ref(-1)inwhile!r=-1&&!a<x_dimsdoa:=!a+1;ifreps.(!a)!=1thenr:=reps.(!a)done;Owl_ndarray_repeat._ndarray_repeat_axis_kindxy!a!r)(* general case *)else(letreps'=reps|>Array.mapInt64.of_int|>Array1.of_arrayint64c_layout|>genarray_of_array1inletx_shape'=x_shape|>Array.mapInt64.of_int|>Array1.of_arrayint64c_layout|>genarray_of_array1inOwl_ndarray_repeat._ndarray_repeat_kindxyreps'x_shape';);reshapeyy_shape)letrepeat_~outxreps=(* check the validity of reps *)ifArray.exists((>)1)repsthenfailwith"repeat: repetition must be >= 1";let_kind=kindxinletx_dims=num_dimsxinassert(Array.lengthreps=x_dims);(* case 1: all repeats equal to 1 *)if(Array.for_all((=)1)reps)=truethencopy_x~outelse((* case 2 : vector input *)if(x_dims=1)then(Owl_ndarray_repeat._ndarray_repeat_axis_kindxout0reps.(0))(* case 3: only one axis to be repeated *)elseif(Owl_utils_array.countreps1=x_dims-1)then(letr=ref(-1)inleta=ref(-1)inwhile!r=-1&&!a<x_dimsdoa:=!a+1;ifreps.(!a)!=1thenr:=reps.(!a)done;Owl_ndarray_repeat._ndarray_repeat_axis_kindxout!a!r)(* general case *)else(letreps'=reps|>Array.mapInt64.of_int|>Array1.of_arrayint64c_layout|>genarray_of_array1inletx_shape'=shapex|>Array.mapInt64.of_int|>Array1.of_arrayint64c_layout|>genarray_of_array1inOwl_ndarray_repeat._ndarray_repeat_kindxoutreps'x_shape'))lettilexreps=(* check the validity of reps *)ifArray.exists((>)1)repsthenfailwith"tile: repitition must be >= 1";(* case 1: all repeats equal to 1 *)if(Array.for_all((=)1)reps)=truethencopyxelse((* align and promote the shape *)leta=num_dimsxinletb=Array.lengthrepsinletx,reps=matcha<bwith|true->letd=Owl_utils.Array.pad`Left1(b-a)(shapex)in(reshapexd),reps|false->letr=Owl_utils.Array.pad`Left1(a-b)repsinx,rinletx_shape=shapexinlety_shape=Array.map2(*)x_shaperepsinlet_kind=kindxinlety=empty_kindy_shapeinletx_dims=num_dimsxin(* case 2 : vector input *)if(x_dims=1)then(Owl_ndarray_repeat._ndarray_tile_axis_kindxy0reps.(0))(* case 3: only one axis to be repeated *)elseif(Owl_utils_array.countreps1=x_dims-1)then(letr=ref(-1)inletax=ref(-1)inwhile!r=-1&&!ax<x_dimsdoax:=!ax+1;ifreps.(!ax)!=1thenr:=reps.(!ax)done;Owl_ndarray_repeat._ndarray_tile_axis_kindxy!ax!r)(* general case *)else(letreps'=reps|>Array.mapInt64.of_int|>Array1.of_arrayint64c_layout|>genarray_of_array1inletx_shape'=x_shape|>Array.mapInt64.of_int|>Array1.of_arrayint64c_layout|>genarray_of_array1inOwl_ndarray_repeat._ndarray_tile_kindxyreps'x_shape');y)lettile_~outxreps=(* check the validity of reps *)ifArray.exists((>)1)repsthenfailwith"tile: repitition must be >= 1";(* case 1: all repeats equal to 1 *)if(Array.for_all((=)1)reps)=truethencopy_x~outelse((* align and promote the shape *)leta=num_dimsxinletb=Array.lengthrepsinletx,reps=matcha<bwith|true->letd=Owl_utils.Array.pad`Left1(b-a)(shapex)in(reshapexd),reps|false->letr=Owl_utils.Array.pad`Left1(a-b)repsinx,rinlet_kind=kindxinletx_dims=num_dimsxin(* case 2 : vector input *)if(x_dims=1)then(Owl_ndarray_repeat._ndarray_tile_axis_kindxout0reps.(0))(* case 3: only one axis to be repeated *)elseif(Owl_utils_array.countreps1=x_dims-1)then(letr=ref(-1)inletax=ref(-1)inwhile!r=-1&&!ax<x_dimsdoax:=!ax+1;ifreps.(!ax)!=1thenr:=reps.(!ax)done;Owl_ndarray_repeat._ndarray_tile_axis_kindxout!ax!r)(* general case *)else(letreps'=reps|>Array.mapInt64.of_int|>Array1.of_arrayint64c_layout|>genarray_of_array1inletx_shape'=shapex|>Array.mapInt64.of_int|>Array1.of_arrayint64c_layout|>genarray_of_array1inOwl_ndarray_repeat._ndarray_tile_kindxoutreps'x_shape'))letconcatenate?(axis=0)xs=letaxis=Owl_utils.adjust_indexaxis(num_dimsxs.(0))in(* get the shapes of all inputs and etc. *)letshapes=Array.mapshapexsinletshape0=Array.copyshapes.(0)inshape0.(axis)<-0;letacc_dim=ref0in(* validate all the input shapes; update step_sz *)letstep_sz=Array.(make(lengthxs)0)inArray.iteri(funishape1->step_sz.(i)<-(Owl_utils.calc_sliceshape1).(axis);acc_dim:=!acc_dim+shape1.(axis);shape1.(axis)<-0;assert(shape0=shape1);)shapes;(* allocalte space for new array *)let_kind=kindxs.(0)inshape0.(axis)<-!acc_dim;lety=empty_kindshape0in(* calculate the number of copies *)letslice_sz=(Owl_utils.calc_sliceshape0).(axis)inletm=numely/slice_szinletn=Array.lengthxsin(* init the copy location for all inputs *)letx_ofs=Array.maken0in(* copy data in the flattened space *)lety_ofs=ref0infor_i=0tom-1doforj=0ton-1do_owl_copy_kindstep_sz.(j)~ofsx:x_ofs.(j)~incx:1~ofsy:!y_ofs~incy:1xs.(j)y;x_ofs.(j)<-x_ofs.(j)+step_sz.(j);y_ofs:=!y_ofs+step_sz.(j);done;done;(* all done, return the combined result *)yletconcat_verticalx1x2=concatenate~axis:0[|x1;x2|]letconcat_horizontalx1x2=concatenate~axis:(num_dimsx1-1)[|x1;x2|]letconcat_vhxs=Array.map(concatenate~axis:1)xs|>concatenate~axis:0letsqueeze?(axis=[||])x=leta=matchArray.lengthaxiswith|0->Array.init(num_dimsx)(funi->i)|_->axisinlets=Owl_utils.Array.filteri(funiv->not(v==1&&Array.memia))(shapex)inreshapexsletexpand?(hi=false)xd=letd0=d-(num_dimsx)inmatchd0>0with|true->(ifhi=truethenOwl_utils.Array.pad`Right1d0(shapex)|>reshapexelseOwl_utils.Array.pad`Left1d0(shapex)|>reshapex)|false->xletresize?(head=true)xd=letn0=numelxinletn1=Array.fold_left(funab->a*b)1dinletofsx,ofsy=matchhead,n0<n1with|true,true->0,0|true,false->0,0|false,true->0,(n1-n0)|false,false->(n0-n1),0inmatchn0<n1with|true->(letk=kindxinlety=emptykdinfilly(Owl_const.zerok);_owl_copykn0~ofsx~incx:1~ofsy~incy:1xy;y)|false->(let_x=reshape_1xn0inlet_y=Array1.sub_xofsxn1|>genarray_of_array1inreshape_yd)letsortx=lety=copyxinOwl_ndarray._owl_sort(kindy)(numely)y;yletsort_x=Owl_ndarray._owl_sort(kindx)(numelx)xletstridesx=x|>shape|>Owl_utils.calc_strideletslice_sizex=x|>shape|>Owl_utils.calc_sliceletind=Owl_utils.indleti1d=Owl_utils.i1d(* align and calculate the output shape for broadcasting over [x0] and [x1] *)letbroadcast_align_shapex0x1=(* align the rank of inputs *)letd0=num_dimsx0inletd1=num_dimsx1inletd3=maxd0d1inlety0=expand~hi:falsex0d3inlety1=expand~hi:falsex1d3in(* check whether the shape is valid *)lets0=shapey0inlets1=shapey1inArray.iter2(funab->Owl_exception.(check(not(a<>1&&b<>1&&a<>b))NOT_BROADCASTABLE);)s0s1;(* calculate the output shape *)lets2=Array.map2maxs0s1in(* calculate the strides *)lett0=Owl_utils.calc_strides0|>Array.mapInt64.of_int|>Array1.of_arrayint64c_layout|>genarray_of_array1inlett1=Owl_utils.calc_strides1|>Array.mapInt64.of_int|>Array1.of_arrayint64c_layout|>genarray_of_array1inlett2=Owl_utils.calc_strides2|>Array.mapInt64.of_int|>Array1.of_arrayint64c_layout|>genarray_of_array1in(* return aligned arrays, shapes, strides *)y0,y1,s0,s1,s2,t0,t1,t2(* general broadcast operation for add/sub/mul/div and etc.
This function compares the dimension element-wise from the highest to the
lowest with the following broadcast rules (same as numpy):
1. equal; 2. either is 1.
*)letbroadcast_op?outopx0x1=(* align the input rank, calculate the output shape and stride *)lety0,y1,_s0,_s1,s2,t0,t1,t2=broadcast_align_shapex0x1inlety2=matchoutwith|Somey2->y2|None->empty(kindx0)s2in(* call the specific map function *)opy0t0y1t1y2t2;y2(* the following functions are for broadcasting among x, y, z three variables. *)letbroadcast_align_shape2x0x1x2=lets0,s1,s2=Owl_utils_array.align3`Left1(shapex0)(shapex1)(shapex2)inlety0=reshapex0s0inlety1=reshapex1s1inlety2=reshapex2s2inlets3=Owl_utils_array.map3(funabc->maxa(maxbc))s0s1s2inOwl_utils_array.iter4(funabcd->Owl_exception.(check(not(a<>1&&a<>d))NOT_BROADCASTABLE);Owl_exception.(check(not(b<>1&&b<>d))NOT_BROADCASTABLE);Owl_exception.(check(not(c<>1&&c<>d))NOT_BROADCASTABLE);)s0s1s2s3;(* calculate the strides *)lett0=Owl_utils.calc_strides0|>Array.mapInt64.of_int|>Array1.of_arrayint64c_layout|>genarray_of_array1inlett1=Owl_utils.calc_strides1|>Array.mapInt64.of_int|>Array1.of_arrayint64c_layout|>genarray_of_array1inlett2=Owl_utils.calc_strides2|>Array.mapInt64.of_int|>Array1.of_arrayint64c_layout|>genarray_of_array1inlett3=Owl_utils.calc_strides3|>Array.mapInt64.of_int|>Array1.of_arrayint64c_layout|>genarray_of_array1in(* return aligned arrays, shapes, strides *)y0,y1,y2,s0,s1,s2,s3,t0,t1,t2,t3letbroadcast_op2?outopx0x1x2=(* align the input rank, calculate the output shape and stride *)lety0,y1,y2,_s0,_s1,_s2,s3,t0,t1,t2,t3=broadcast_align_shape2x0x1x2inlety3=matchoutwith|Somey3->y3|None->empty(kindx0)s3in(* call the specific map function *)opy0t0y1t1y2t2y3t3;y3(* mathematical functions *)letmin_ix=lety=flattenx|>array1_of_genarrayinleti=_owl_min_i(kindx)(numelx)xinlets=Owl_utils.calc_stride(shapex)inletj=Array.copysinOwl_utils.index_1d_ndijs;y.{i},jletmax_ix=lety=flattenx|>array1_of_genarrayinleti=_owl_max_i(kindx)(numelx)xinlets=Owl_utils.calc_stride(shapex)inletj=Array.copysinOwl_utils.index_1d_ndijs;y.{i},jletminmax_ix=min_ix,max_ixletmin'x=x|>min_i|>fstletmax'x=x|>max_i|>fstletminmax'x=letminx_i,maxx_i=minmax_ixinfstminx_i,fstmaxx_iletaddxy=matchsame_shapexywith|true->(lety=copyyin_owl_add(kindx)(numelx)xyy;y)|false->broadcast_op(_owl_broadcast_add(kindx))xyletsubxy=matchsame_shapexywith|true->(lety=copyyin_owl_sub(kindx)(numelx)xyy;y)|false->broadcast_op(_owl_broadcast_sub(kindx))xyletmulxy=matchsame_shapexywith|true->(lety=copyyin_owl_mul(kindx)(numelx)xyy;y)|false->broadcast_op(_owl_broadcast_mul(kindx))xyletdivxy=matchsame_shapexywith|true->(lety=copyyin_owl_div(kindx)(numelx)xyy;y)|false->broadcast_op(_owl_broadcast_div(kindx))xyletadd_scalarxa=letx=copyxin_owl_add_scalar(kindx)(numelx)xxa;xletsub_scalarxa=add_scalarx(_neg_elt(kindx)a)letmul_scalarxa=letx=copyxin_owl_mul_scalar(kindx)(numelx)xxa;xletdiv_scalarxa=letx=copyxin_owl_div_scalar(kindx)(numelx)xxa;xletpowxy=matchsame_shapexywith|true->(lety=copyyin_owl_pow(kindx)(numelx)xyy;y)|false->broadcast_op(_owl_broadcast_pow(kindx))xyletatan2xy=matchsame_shapexywith|true->(lety=copyyin_owl_atan2(kindx)(numelx)xyy;y)|false->broadcast_op(_owl_broadcast_atan2(kindx))xylethypotxy=matchsame_shapexywith|true->(lety=copyyin_owl_hypot(kindx)(numelx)xyy;y)|false->broadcast_op(_owl_broadcast_hypot(kindx))xyletmin2xy=matchsame_shapexywith|true->(lety=copyyin_owl_min2(kindx)(numelx)xyy;y)|false->broadcast_op(_owl_broadcast_min2(kindx))xyletmax2xy=matchsame_shapexywith|true->(lety=copyyin_owl_max2(kindx)(numelx)xyy;y)|false->broadcast_op(_owl_broadcast_max2(kindx))xyletfmodxy=matchsame_shapexywith|true->(lety=copyyin_owl_fmod(kindx)(numelx)xyy;y)|false->broadcast_op(_owl_broadcast_fmod(kindx))xyletfmod_scalarxa=lety=empty(kindx)(shapex)in_owl_fmod_scalar(kindx)(numely)xya;yletscalar_fmodax=lety=empty(kindx)(shapex)in_owl_scalar_fmod(kindx)(numely)xya;yletfmaxyz=letxshp=shapexinletyshp=shapeyinletzshp=shapezinletrshp=Owl_utils_infer_shape.broadcast2xshpyshpzshpinletout=empty(kindx)rshpinifxshp=yshp&&yshp=zshpthenOwl_ndarray_fma._ndarray_fma(kindx)(numelx)xyzoutelse(let_op=Owl_ndarray_fma._ndarray_fma_broadcast(kindx)inbroadcast_op2_op~outxyz|>ignore);outletfma_?outxyz=letout=matchoutwithSomeo->o|None->xinletxshp=shapexinletyshp=shapeyinletzshp=shapezinifxshp=yshp&&yshp=zshpthenOwl_ndarray_fma._ndarray_fma(kindx)(numelx)xyzoutelse(let_op=Owl_ndarray_fma._ndarray_fma_broadcast(kindx)inbroadcast_op2_op~outxyz|>ignore)letssqr_diff'xy=_owl_ssqr_diff(kindx)(numelx)xyletabsx=lety=copyxin_owl_abs(kindx)(numely)xy;yletabs2x=lety=copyxin_owl_abs2(kindx)(numely)xy;yletconjx=lety=copyxin_owl_conj(kindx)(numely)xy;yletnegx=lety=copyxin_owl_neg(kindx)(numely)xy;yletrecix=lety=copyxin_owl_reci(kindx)(numely)xy;yletsignumx=lety=copyxin_owl_signum(kindx)(numely)xy;yletsqrx=lety=copyxin_owl_sqr(kindx)(numely)xy;yletsqrtx=lety=copyxin_owl_sqrt(kindx)(numely)xy;yletcbrtx=lety=copyxin_owl_cbrt(kindx)(numely)xy;yletexpx=lety=copyxin_owl_exp(kindx)(numely)xy;yletexp2x=lety=copyxin_owl_exp2(kindx)(numely)xy;yletexp10x=lety=copyxin_owl_exp10(kindx)(numely)xy;yletexpm1x=lety=copyxin_owl_expm1(kindx)(numely)xy;yletlogx=lety=copyxin_owl_log(kindx)(numely)xy;yletlog10x=lety=copyxin_owl_log10(kindx)(numely)xy;yletlog2x=lety=copyxin_owl_log2(kindx)(numely)xy;yletlog1px=lety=copyxin_owl_log1p(kindx)(numely)xy;yletsinx=lety=copyxin_owl_sin(kindx)(numely)xy;yletcosx=lety=copyxin_owl_cos(kindx)(numely)xy;ylettanx=lety=copyxin_owl_tan(kindx)(numely)xy;yletasinx=lety=copyxin_owl_asin(kindx)(numely)xy;yletacosx=lety=copyxin_owl_acos(kindx)(numely)xy;yletatanx=lety=copyxin_owl_atan(kindx)(numely)xy;yletsinhx=lety=copyxin_owl_sinh(kindx)(numely)xy;yletcoshx=lety=copyxin_owl_cosh(kindx)(numely)xy;ylettanhx=lety=copyxin_owl_tanh(kindx)(numely)xy;yletasinhx=lety=copyxin_owl_asinh(kindx)(numely)xy;yletacoshx=lety=copyxin_owl_acosh(kindx)(numely)xy;yletatanhx=lety=copyxin_owl_atanh(kindx)(numely)xy;yletfloorx=lety=copyxin_owl_floor(kindx)(numely)xy;yletceilx=lety=copyxin_owl_ceil(kindx)(numely)xy;yletroundx=lety=copyxin_owl_round(kindx)(numely)xy;ylettruncx=lety=copyxin_owl_trunc(kindx)(numely)xy;yletfixx=lety=copyxin_owl_fix(kindx)(numely)xy;yletanglex=lety=copyxin_owl_angle(kindx)(numely)xy;yletprojx=lety=copyxin_owl_proj(kindx)(numely)xy;yleterfx=lety=copyxin_owl_erf(kindx)(numely)xy;yleterfcx=lety=copyxin_owl_erfc(kindx)(numely)xy;yletlogisticx=lety=copyxin_owl_logistic(kindx)(numely)xy;yletrelux=lety=copyxin_owl_relu(kindx)(numely)xy;yletelu?(alpha=1.0)x=lety=empty(kindx)(shapex)in_owl_elu(kindx)(numelx)xyalpha;yletleaky_relu?(alpha=0.2)x=lety=empty(kindx)(shapex)in_owl_leaky_relu(kindx)(numelx)xyalpha;yletsoftplusx=lety=copyxin_owl_softplus(kindx)(numely)xy;yletsoftsignx=lety=copyxin_owl_softsign(kindx)(numely)xy;yletsigmoidx=lety=copyxin_owl_sigmoid(kindx)(numely)xy;yletssqr'xa=_owl_ssqr(kindx)(numelx)axletl1norm'x=let_kind=kindxin_owl_l1norm_kind(numelx)x|>_float_typ_elt_kindletl2norm_sqr'x=let_kind=kindxin_owl_l2norm_sqr_kind(numelx)x|>_float_typ_elt_kindletl2norm'x=let_kind=kindxin_owl_l2norm_sqr_kind(numelx)x|>Owl_maths.sqrt|>_float_typ_elt_kindletlog_sum_exp'x=_owl_log_sum_exp(kindx)(numelx)xletscalar_powax=letx=copyxin_owl_scalar_pow(kindx)(numelx)xxa;xletpow_scalarxa=letx=copyxin_owl_pow_scalar(kindx)(numelx)xxa;xletscalar_atan2ax=letx=copyxin_owl_scalar_atan2(kindx)(numelx)xxa;xletatan2_scalarxa=letx=copyxin_owl_atan2_scalar(kindx)(numelx)xxa;xletscalar_addax=letx=copyxin_owl_add_scalar(kindx)(numelx)xxa;xletscalar_subax=letx=copyxin_owl_scalar_sub(kindx)(numelx)xxa;xletscalar_mulax=letx=copyxinletx'=flattenx|>array1_of_genarrayinOwl_cblas_basic.scal(numelx)ax'1;xletscalar_divax=letx=copyxin_owl_scalar_div(kindx)(numelx)xxa;xletreci_tol?tolx=lettol=matchtolwith|Somet->t|None->_float_typ_elt(kindx)(Owl_utils.epsFloat32)inlety=copyxin_owl_reci_tol(kindx)(numely)xytol;y(* element-wise comparison functions *)letelt_equalxy=matchsame_shapexywith|true->(letz=empty(kindx)(shapex)in_owl_elt_equal(kindx)(numelz)xyz;z)|false->broadcast_op(_owl_broadcast_elt_equal(kindx))xyletelt_not_equalxy=matchsame_shapexywith|true->(letz=empty(kindx)(shapex)in_owl_elt_not_equal(kindx)(numelz)xyz;z)|false->broadcast_op(_owl_broadcast_elt_not_equal(kindx))xyletelt_lessxy=matchsame_shapexywith|true->(letz=empty(kindx)(shapex)in_owl_elt_less(kindx)(numelz)xyz;z)|false->broadcast_op(_owl_broadcast_elt_less(kindx))xyletelt_greaterxy=matchsame_shapexywith|true->(letz=empty(kindx)(shapex)in_owl_elt_greater(kindx)(numelz)xyz;z)|false->broadcast_op(_owl_broadcast_elt_greater(kindx))xyletelt_less_equalxy=matchsame_shapexywith|true->(letz=empty(kindx)(shapex)in_owl_elt_less_equal(kindx)(numelz)xyz;z)|false->broadcast_op(_owl_broadcast_elt_less_equal(kindx))xyletelt_greater_equalxy=matchsame_shapexywith|true->(letz=empty(kindx)(shapex)in_owl_elt_greater_equal(kindx)(numelz)xyz;z)|false->broadcast_op(_owl_broadcast_elt_greater_equal(kindx))xyletelt_equal_scalarxa=lety=empty(kindx)(shapex)in_owl_elt_equal_scalar(kindx)(numelx)xya;yletelt_not_equal_scalarxa=lety=empty(kindx)(shapex)in_owl_elt_not_equal_scalar(kindx)(numelx)xya;yletelt_less_scalarxa=lety=empty(kindx)(shapex)in_owl_elt_less_scalar(kindx)(numelx)xya;yletelt_greater_scalarxa=lety=empty(kindx)(shapex)in_owl_elt_greater_scalar(kindx)(numelx)xya;yletelt_less_equal_scalarxa=lety=empty(kindx)(shapex)in_owl_elt_less_equal_scalar(kindx)(numelx)xya;yletelt_greater_equal_scalarxa=lety=empty(kindx)(shapex)in_owl_elt_greater_equal_scalar(kindx)(numelx)xya;yletuniformk?a?bd=leta=matchawithSomea->a|None->Owl_const.zerokinletb=matchbwithSomeb->b|None->Owl_const.onekinletx=emptykdin_owl_uniformk(numelx)xab;xletuniform_?a?b~out=letk=kindoutinleta=matchawithSomea->a|None->Owl_const.zerokinletb=matchbwithSomeb->b|None->Owl_const.onekin_owl_uniformk(numelout)outabletgaussiank?mu?sigmad=letmu=matchmuwithSomea->a|None->Owl_const.zerokinletsigma=matchsigmawithSomea->a|None->Owl_const.onekinletx=emptykdin_owl_gaussiank(numelx)xmusigma;xletgaussian_?mu?sigma~out=letk=kindoutinletmu=matchmuwithSomea->a|None->Owl_const.zerokinletsigma=matchsigmawithSomea->a|None->Owl_const.onekin_owl_gaussiank(numelout)outmusigmaletlinspacekabn=letx=emptyk[|n|]in_owl_linspaceknabx;xletlogspacek?(base=Owl_const.e)abn=letx=emptyk[|n|]in(ifbase=2.then_owl_logspace_2knabxelseifbase=10.then_owl_logspace_10knabxelseifbase=Owl_const.ethen_owl_logspace_eknabxelse_owl_logspace_baseknbaseabx);xletbernoullik?(p=0.5)d=assert(p>=0.&&p<=1.);letx=emptykdin(_owl_bernoullik)(numelx)xp0;xletbernoulli_?(p=0.5)~out=assert(p>=0.&&p<=1.);letk=kindoutin(_owl_bernoullik)(numelout)outp0letcreatekinddimensiona=letx=emptykinddimensioninlet_=fillxainxletcreate_~outa=filloutaletzeroskinddimension=createkinddimension(Owl_const.zerokind)letzeros_~out=resetoutletoneskinddimension=createkinddimension(Owl_const.onekind)letones_~out=fillout(Owl_const.one(kindout))letsequentialk?a?stepdimension=leta=matchawith|Somea->a|None->Owl_const.zerokinletstep=matchstepwith|Somestep->step|None->Owl_const.onekinletx=emptykdimensionin_owl_sequentialk(numelx)xastep;xletsequential_?a?step~out=letk=kindoutinleta=matchawith|Somea->a|None->Owl_const.zerokinletstep=matchstepwith|Somestep->step|None->Owl_const.onekin_owl_sequentialk(numelout)outastepletdropout?(rate=0.5)x=assert(rate>=0.&&rate<=1.);letx=copyxin_owl_dropout(kindx)(numelx)xrate0;xletargsortx=lety=sequentialInt64(shapex)inOwl_ndarray._owl_argsort(kindx)(numelx)xy;y(* advanced operations *)letiterifx=letx'=flattenx|>array1_of_genarrayinfori=0to(Array1.dimx')-1doleta=Array1.unsafe_getx'iinfiadoneletiterfx=letx'=flattenx|>array1_of_genarrayinfori=0to(Array1.dimx')-1doleta=Array1.unsafe_getx'iinfadoneletiter2ifxy=assert(same_shapexy);letx'=flattenx|>array1_of_genarrayinlety'=flatteny|>array1_of_genarrayinfori=0to(Array1.dimx')-1doleta=Array1.unsafe_getx'iinletb=Array1.unsafe_gety'iinfiabdoneletiter2fxy=assert(same_shapexy);letx'=flattenx|>array1_of_genarrayinlety'=flatteny|>array1_of_genarrayinfori=0to(Array1.dimx')-1doleta=Array1.unsafe_getx'iinletb=Array1.unsafe_gety'iinfabdoneletmapifx=lety=copyxinlety'=flatteny|>array1_of_genarrayinfori=0to(Array1.dimy')-1doleta=Array1.unsafe_gety'iinArray1.unsafe_sety'i(fia)done;yletmapfx=lety=copyxinlety'=flatteny|>array1_of_genarrayinfori=0to(Array1.dimy')-1doleta=Array1.unsafe_gety'iinArray1.unsafe_sety'i(fa)done;yletmap2ifxy=assert(same_shapexy);letz=copyxinlety'=flatteny|>array1_of_genarrayinletz'=flattenz|>array1_of_genarrayinfori=0to(Array1.dimz')-1doleta=Array1.unsafe_getz'iinletb=Array1.unsafe_gety'iinArray1.unsafe_setz'i(fiab)done;zletmap2fxy=assert(same_shapexy);letz=copyxinlety'=flatteny|>array1_of_genarrayinletz'=flattenz|>array1_of_genarrayinfori=0to(Array1.dimz')-1doleta=Array1.unsafe_getz'iinletb=Array1.unsafe_gety'iinArray1.unsafe_setz'i(fab)done;zletiteri_ndfx=iteri(funia->f(Owl_utils.indxi)a)xletmapi_ndfx=mapi(funia->f(Owl_utils.indxi)a)xletiter2i_ndfxy=assert(same_shapexy);iter2i(funiab->f(Owl_utils.indxi)ab)xyletmap2i_ndfxy=assert(same_shapexy);map2i(funiab->f(Owl_utils.indxi)ab)xyletiteri_slice?(axis=0)fx=letd=num_dimsxinletaxis=Owl_utils.adjust_indexaxisdinletm=(numelx)/(stridesx).(axis)inlets=Array.sub(shapex)(axis+1)(d-axis-1)inletn=s.(0)ins.(0)<-m*s.(0);lety=reshapexsinletofs=ref(-n)infori=0tom-1doofs:=!ofs+n;fi(sub_lefty!ofsn)doneletiter_slice?axisfx=iteri_slice?axis(fun_y->fy)xletmapi_slice?(axis=0)fx=letd=num_dimsxinletaxis=Owl_utils.adjust_indexaxisdinletm=(numelx)/(stridesx).(axis)inlets=Array.sub(shapex)(axis+1)(d-axis-1)inletn=s.(0)ins.(0)<-m*s.(0);lety=reshapexsinletofs=ref(-n)inArray.initm(funi->ofs:=!ofs+n;fi(sub_lefty!ofsn))letmap_slice?axisfx=mapi_slice?axis(fun_y->fy)xletfilteri_slice?axisfx=lets=Owl_utils.Stack.make()initeri_slice?axis(funiy->if(fiy)thenOwl_utils.Stack.pushsy)x;Owl_utils.Stack.to_arraysletfilter_slice?axisfx=filteri_slice?axis(fun_y->fy)xletfoldi_slice?axisfax=letacc=refainiteri_slice?axis(funiy->acc:=fi!accy)x;!accletfold_slice?axisfx=foldi_slice?axis(fun_y->fy)x(* manipulation functions *)let_check_transpose_axisaxisd=letinfo="check_transpose_axis fails"inifArray.lengthaxis<>dthenfailwithinfo;leth=Hashtbl.create16inArray.iter(funx->ifx<0||x>=dthenfailwithinfo;ifHashtbl.memhx=truethenfailwithinfo;Hashtbl.addhx0)axisletmatrix_transposex=letk=kindxinlets=shapexinletm,n=s.(0),s.(1)inlety=emptyk[|n;m|]inOwl_matrix._matrix_transposekxy;yletmatrix_transpose_~outx=letk=kindxinOwl_matrix._matrix_transposekxoutlettranspose?axisx=letd=num_dimsxinleta=matchaxiswith|Somea->a|None->Array.initd(funi->d-i-1)in(* trivial case *)ifa=Array.initd(funi->i)thencopyxelse((* check if axis is a correct permutation *)_check_transpose_axisad;ifd=2thenmatrix_transposexelse(letsx=shapexinletsy=Array.map(funj->sx.(j))ainlety=empty(kindx)syin(* calculate the inverse of the permutation *)letb=Array.maked0inArray.iteri(funij->b.(j)<-i)a;let_incy=stridesyinlet_incy=Array.map(funj->Int32.of_int_incy.(j))binlet_incx=Array.mapInt32.of_int(stridesx)inletincx=Array1.of_arrayInt32C_layout_incx|>genarray_of_array1inletincy=Array1.of_arrayInt32C_layout_incy|>genarray_of_array1inOwl_ndarray._ndarray_transpose(kindx)xyincxincy;y))lettranspose_~out?axisx=letd=num_dimsxinleta=matchaxiswith|Somea->a|None->Array.initd(funi->d-i-1)in(* trivial case *)ifa=Array.initd(funi->i)thencopy_~outxelse((* check if axis is a correct permutation *)_check_transpose_axisad;ifd=2thenmatrix_transpose_~outxelse(letsx=shapexinletsy=Array.map(funj->sx.(j))ain(* calculate the inverse of the permutation *)letb=Array.maked0inArray.iteri(funij->b.(j)<-i)a;let_incy=Owl_utils.calc_stridesyinlet_incy=Array.map(funj->Int32.of_int_incy.(j))binlet_incx=Array.mapInt32.of_int(stridesx)inletincx=Array1.of_arrayInt32C_layout_incx|>genarray_of_array1inletincy=Array1.of_arrayInt32C_layout_incy|>genarray_of_array1inOwl_ndarray._ndarray_transpose(kindx)xoutincxincy))letswapa0a1x=letd=num_dimsxinleta=Array.initd(funi->i)inlett=a.(a0)ina.(a0)<-a.(a1);a.(a1)<-t;transpose~axis:axletfilterifx=lets=Owl_utils.Stack.make()initeri(funiy->iffiy=truethenOwl_utils.Stack.pushsi)x;Owl_utils.Stack.to_arraysletfilterfx=filteri(fun_y->fy)xletfilteri_ndfx=lets=Owl_utils.Stack.make()initeri(funiy->leti'=Owl_utils.indxiiniffi'y=truethenOwl_utils.Stack.pushsi')x;Owl_utils.Stack.to_arraysletflip?(axis=0)x=leta=Array.init(num_dimsx)(fun_->R_[||])ina.(axis)<-R_[|-1;0|];Owl_slicing.get_slice_array_typaxletrotatexdegree=assert(degreemod90=0);letk=(degreemod360)/90inlet_kind=kindxinifnum_dimsx<2||k=0thencopyxelseifk=1then(letsx=shapexinletsy=Array.copysxinsy.(0)<-sx.(1);sy.(1)<-sx.(0);lety=empty_kindsyinletm=sx.(0)inletn=(numelx)/minifm<=nthen(letofsx=ref0infori=1tomdo_owl_copy_kindn~ofsx:!ofsx~incx:1~ofsy:(m-i)~incy:mxy;ofsx:=!ofsx+ndone)else(letofsy=ref(m-1)infori=0ton-1do_owl_copy_kindm~ofsx:i~incx:n~ofsy:!ofsy~incy:(-1)xy;ofsy:=!ofsy+mdone);y)elseifk=2then(letsx=shapexinlety=empty_kindsxinletm=sx.(0)inletn=(numelx)/minifm<=nthen(letofsx=ref0inletofsy=ref(m*n-1)infor_i=0tom-1do_owl_copy_kindn~ofsx:!ofsx~incx:1~ofsy:!ofsy~incy:(-1)xy;ofsx:=!ofsx+n;ofsy:=!ofsy-ndone)else(letofsy=m*n-1infori=0ton-1do_owl_copy_kindm~ofsx:i~incx:n~ofsy:(ofsy-i)~incy:(-n)xydone);y)else(letsx=shapexinletsy=Array.copysxinsy.(0)<-sx.(1);sy.(1)<-sx.(0);lety=empty(kindx)syinletm=sx.(0)inletn=(numelx)/minifm<=nthen(letofsx=ref0inletofsy=(n-1)*minfori=0tom-1do_owl_copy_kindn~ofsx:!ofsx~incx:1~ofsy:(ofsy+i)~incy:(-m)xy;ofsx:=!ofsx+ndone)else(letofsy=ref((n-1)*m)infori=0ton-1do_owl_copy_kindm~ofsx:i~incx:n~ofsy:!ofsy~incy:1xy;ofsy:=!ofsy-mdone);y)letget_indexxaxis=letd=num_dimsxinassert(Array.lengthaxis=d);letn=Array.lengthaxis.(0)inletindices=Array.make_matrixnd0inArray.iteri(funja->Array.iteri(funib->indices.(i).(j)<-b)a)axis;Array.map(funi->Bigarray.Genarray.getxi)indicesletset_indexxaxisa=letd=num_dimsxinassert(Array.lengthaxis=d);letn=Array.lengthaxis.(0)inletindices=Array.make_matrixnd0inArray.iteri(funja->Array.iteri(funib->indices.(i).(j)<-b)a)axis;ifArray.lengtha=1thenArray.iteri(fun_ij->Bigarray.Genarray.setxja.(0))indiceselseArray.iteri(funij->Bigarray.Genarray.setxja.(i))indices(* some comparison functions *)letis_zerox=_owl_is_zero(kindx)(numelx)x=1letis_positivex=_owl_is_positive(kindx)(numelx)x=1letis_negativex=_owl_is_negative(kindx)(numelx)x=1letis_nonnegativex=_owl_is_nonnegative(kindx)(numelx)x=1letis_nonpositivex=_owl_is_nonpositive(kindx)(numelx)x=1letis_normalx=_owl_is_normal(kindx)(numelx)x=1letnot_nanx=_owl_not_nan(kindx)(numelx)x=1letnot_infx=_owl_not_inf(kindx)(numelx)x=1letequalxy=(=)xyletnot_equalxy=(<>)xyletgreaterxy=_owl_greater(kindx)(numelx)xy=1letlessxy=_owl_less(kindx)(numelx)xy=1letgreater_equalxy=_owl_greater_equal(kindx)(numelx)xy=1letless_equalxy=_owl_less_equal(kindx)(numelx)xy=1letequal_scalarxa=_owl_equal_scalar(kindx)(numelx)xa=1letnot_equal_scalarxa=_owl_equal_scalar(kindx)(numelx)xa=1letless_scalarxa=_owl_less_scalar(kindx)(numelx)xa=1letgreater_scalarxa=_owl_greater_scalar(kindx)(numelx)xa=1letless_equal_scalarxa=_owl_less_equal_scalar(kindx)(numelx)xa=1letgreater_equal_scalarxa=_owl_greater_equal_scalar(kindx)(numelx)xa=1letapprox_equal?epsxy=leteps=matchepswith|Someeps->eps|None->Owl_utils.epsFloat32in_owl_approx_equal(kindx)(numelx)xyeps=1letapprox_equal_scalar?epsxa=leteps=matchepswith|Someeps->eps|None->Owl_utils.epsFloat32in_owl_approx_equal_scalar(kindx)(numelx)xaeps=1letapprox_elt_equal?epsxy=leteps=matchepswith|Someeps->eps|None->Owl_utils.epsFloat32inlet_eps:typeab.(a,b)kind->float->a=funka->matchkwith|Float32->a|Float64->a|Complex32->Complex.({re=a;im=0.})|Complex64->Complex.({re=a;im=0.})|_->failwith"Owl_dense_ndarray_generic:approx_elt_equal"inletk=kindxinletz=createk(shapex)(_epskeps)in_owl_approx_elt_equalk(numelz)xyz;zletapprox_elt_equal_scalar?epsxa=leteps=matchepswith|Someeps->eps|None->Owl_utils.epsFloat32inlet_eps:typeab.(a,b)kind->float->a=funka->matchkwith|Float32->a|Float64->a|Complex32->Complex.({re=a;im=0.})|Complex64->Complex.({re=a;im=0.})|_->failwith"Owl_dense_ndarray_generic:approx_elt_equal"inletk=kindxinlety=createk(shapex)(_epskeps)in_owl_approx_elt_equal_scalark(numely)xya;yletexistsfx=letb=reffalseintryiter(funy->if(fy)then(b:=true;failwith"found";))x;!bwithFailure_->!bletnot_existsfx=not(existsfx)letfor_allfx=letgy=not(fy)innot_existsgxletnnzx=_owl_nnz(kindx)(numelx)xletdensityx=(nnzx|>float_of_int)/.(numelx|>float_of_int)(* input/output functions *)letprint_indexi=Printf.printf"[ ";Array.iter(funx->Printf.printf"%i "x)i;Printf.printf"] "letprint_elementkv=lets=(Owl_utils.elt_to_strk)vinPrintf.printf"%s"sletprint?max_row?max_col?header?fmtx=letn=(shapex).(num_dimsx-1)inletmax_row=matchmax_rowwith|Somea->Somea|None->Some((numelx)/n)inletmax_col=matchmax_colwith|Somea->Somea|None->SomeninOwl_pretty.print_dsnda?max_row?max_col?header?elt_to_str_fun:fmtxletpp_dsndaformatterx=Owl_pretty.pp_dsndaformatterxletsavexf=Owl_io.marshal_to_filexfletload_kf=Owl_io.marshal_from_filefletof_arraykxd=letn=Array.fold_left(funab->a*b)1dinassert(Array.lengthx=n);lety=Array1.of_arraykC_layoutx|>genarray_of_array1inreshapeydletto_arrayx=letn=numelxinlety=flattenx|>array1_of_genarrayinArray.initn(funi->y.{i})letcomplex:typeabcd.(a,b)kind->(c,d)kind->(a,b)t->(a,b)t->(c,d)t=funreal_kindcomplex_kindreim->assert(shapere=shapeim);letx=emptycomplex_kind(shapere)in_owl_to_complexreal_kindcomplex_kind(numelre)reimx;xletpolar:typeabcd.(a,b)kind->(c,d)kind->(a,b)t->(a,b)t->(c,d)t=funreal_kindcomplex_kindrhotheta->assert(shaperho=shapetheta);letx=emptycomplex_kind(shaperho)in_owl_polarreal_kindcomplex_kind(numelrho)rhothetax;x(* math operations. code might be verbose for performance concern. *)letre_c2sx=lety=emptyFloat32(shapex)in_owl_re_c2s(numelx)xy;yletre_z2dx=lety=emptyFloat64(shapex)in_owl_re_z2d(numelx)xy;yletim_c2sx=lety=emptyFloat32(shapex)in_owl_im_c2s(numelx)xy;yletim_z2dx=lety=emptyFloat64(shapex)in_owl_im_z2d(numelx)xy;yletabs_c2sx=absx|>re_c2sletabs_z2dx=absx|>re_z2dletabs2_c2sx=abs2x|>re_c2sletabs2_z2dx=abs2x|>re_z2d(* cast functions *)letcast:typeabcd.(a,b)kind->(c,d)t->(a,b)t=fundst_typx->letsrc_typ=kindxinlety=emptydst_typ(shapex)inmatchsrc_typ,dst_typwith|Float32,Float32->copyx|Float64,Float64->copyx|Complex32,Complex32->copyx|Complex64,Complex64->copyx|Float32,Float64->_owl_cast_s2d(numelx)xy;y|Float64,Float32->_owl_cast_d2s(numelx)xy;y|Float32,Complex32->_owl_cast_s2c(numelx)xy;y|Float64,Complex64->_owl_cast_d2z(numelx)xy;y|Float32,Complex64->_owl_cast_s2z(numelx)xy;y|Float64,Complex32->_owl_cast_d2c(numelx)xy;y|Complex32,Complex64->_owl_cast_c2z(numelx)xy;y|Complex64,Complex32->_owl_cast_z2c(numelx)xy;y|_->failwith"Owl_dense_ndarray_generic:cast"letcast_s2dx=castFloat64xletcast_d2sx=castFloat32xletcast_c2zx=castComplex64xletcast_z2cx=castComplex32xletcast_s2cx=castComplex32xletcast_d2zx=castComplex64xletcast_s2zx=castComplex64xletcast_d2cx=castComplex32x(* padding and its helper functions *)let_expand_padding_indexds=letls=Array.lengthsinletld=Array.lengthdinletd=Owl_utils.Array.pad`Right[|0;0|](ls-ld)dinArray.map(function|[||]->[|0;0|]|[|x|]->[|x;x|]|x->x)d(*
p1: padding index
ls: slice size of the source
l0: stride size of the source
l1: stride size of the destination
i0: current source nd index
i1: current destination nd index
d0: current depth of index
d1: depth threshold
s0: shape of the source
s1: shape of the destination
x0: source
x1: destination
*)letrec_copy_to_paddingp1lsl0l1i0i1d0d1s0s1x0x1=ifd0<d1then(fori=0tos0.(d0)-1doi0.(d0)<-i;i1.(d0)<-i+p1.(d0).(0);_copy_to_paddingp1lsl0l1i0i1(d0+1)d1s0s1x0x1;i0.(d0)<-0;i1.(d0)<-p1.(d0).(0);done)else((* print_index i0; Printf.printf " === "; print_index i1; print_endline ""; *)letj0=Owl_utils.index_nd_1di0l0inletj1=Owl_utils.index_nd_1di1l1in_owl_copy(kindx0)ls.(d0)~ofsx:j0~incx:1~ofsy:j1~incy:1x0x1)(* according to the expanded padding index, calcuate the highest dimension
with padding, so we can figure out the minimum continuous block size.
*)let_highest_padding_dimensionp=letl=Array.lengthp-1inletd=reflin(tryfori=ldownto0dod:=i;ifp.(i)<>[|0;0|]thenfailwith"stop"donewith_exn->());!dletpad?vdx=letk=kindxinletv=matchvwith|Somev->v|None->Owl_const.zerokinlets0=shapexinletp1=_expand_padding_index(Owl_utils.llss2aarrd)s0inlets1=Array.map2(funmn->m+n.(0)+n.(1))s0p1inlety=createks1vin(* prepare variables for block copying *)letls=Owl_utils.calc_slices0inletl0=Owl_utils.calc_strides0inletl1=Owl_utils.calc_strides1inleti0=Array.make(num_dimsx)0inleti1=Array.map(funa->a.(0))p1inletd0=0inletd1=_highest_padding_dimensionp1in_copy_to_paddingp1lsl0l1i0i1d0d1s0s1xy;y(* NOTE
The following functions (i.e., conv2d* and conv3d* and etc.) are for neural
network functionality. Currently I keep them here because Algodiff functor
uses this module as parameter. In future, I might wrap them into separate
modules to reduce the compplexity of the generic module.
*)(* conv2d: 4d input and 4d kernel, refer to tensorlfow doc
input : [batch; input_column; input_row; input_channel]
kernel: [kernel_column; kernel_row; input_channel; output_channel]
stride: [column_stride; row_stride]
output: [batch; output_column; output_row; output_channel]
*)letconv2d?(padding=SAME)inputkernelstride=assert(num_dimsinput=4);assert(num_dimskernel=4);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletout_channel=kernel_shp.(3)inassert(in_channel=kernel_shp.(2));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=1inletrow_in_stride=1inletoutput_cols,output_rows=Owl_utils_infer_shape.calc_conv2d_output_shapepaddinginput_colsinput_rowskernel_colskernel_rowsrow_stridecol_strideinletoutput=empty(kindinput)[|batches;output_cols;output_rows;out_channel|]inletpad_typ=matchpaddingwithSAME->0|VALID->1in_owl_spatial_conv(kindinput)inputkerneloutputbatchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsout_channelrow_stridecol_stridepad_typrow_in_stridecol_in_stride;outputletconv2d_~out?(padding=SAME)inputkernelstride=assert(num_dimsinput=4);assert(num_dimskernel=4);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletout_channel=kernel_shp.(3)inassert(in_channel=kernel_shp.(2));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=1inletrow_in_stride=1inletoutput_cols,output_rows=Owl_utils_infer_shape.calc_conv2d_output_shapepaddinginput_colsinput_rowskernel_colskernel_rowsrow_stridecol_strideinletpad_typ=matchpaddingwithSAME->0|VALID->1in_owl_spatial_conv(kindinput)inputkerneloutbatchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsout_channelrow_stridecol_stridepad_typrow_in_stridecol_in_stride(* gradient of conv2d w.r.t the input *)letconv2d_backward_inputinputkernelstrideoutput'=assert(num_dimsinput=4);assert(num_dimskernel=4);assert(num_dimsoutput'=4);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletout_channel=kernel_shp.(3)inassert(in_channel=kernel_shp.(2));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(3));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=1inletrow_in_stride=1inletinput'=empty(kindinput)(shapeinput)in_owl_spatial_conv_backward_input(kindinput')input'kerneloutput'batchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsout_channelrow_stridecol_striderow_in_stridecol_in_stride;input'letconv2d_backward_input_~outinputkernelstrideoutput'=assert(num_dimsinput=4);assert(num_dimskernel=4);assert(num_dimsoutput'=4);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletout_channel=kernel_shp.(3)inassert(in_channel=kernel_shp.(2));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(3));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=1inletrow_in_stride=1in_owl_spatial_conv_backward_input(kindinput)outkerneloutput'batchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsout_channelrow_stridecol_striderow_in_stridecol_in_stride(* gradient of conv2d w.r.t the kernel *)letconv2d_backward_kernelinputkernelstrideoutput'=assert(num_dimsinput=4);assert(num_dimskernel=4);assert(num_dimsoutput'=4);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletout_channel=kernel_shp.(3)inassert(in_channel=kernel_shp.(2));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(3));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=1inletrow_in_stride=1inletkernel'=empty(kindkernel)(shapekernel)in_owl_spatial_conv_backward_kernel(kindinput)inputkernel'output'batchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsout_channelrow_stridecol_striderow_in_stridecol_in_stride;kernel'letconv2d_backward_kernel_~outinputkernelstrideoutput'=assert(num_dimsinput=4);assert(num_dimskernel=4);assert(num_dimsoutput'=4);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletout_channel=kernel_shp.(3)inassert(in_channel=kernel_shp.(2));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(3));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=1inletrow_in_stride=1in_owl_spatial_conv_backward_kernel(kindinput)inputoutoutput'batchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsout_channelrow_stridecol_striderow_in_stridecol_in_stride(* conv3d: 5d input and 5d kernel, refer to tensorflow doc
input : [batch; input_column; input_row; input_depth; input_channel]
kernel: [kernel_column; kernel_row; kernel_depth; input_channel; output_channel]
stride: [column_stride; row_stride; depth_stride]
output: [batch; output_column; output_row; output_dpts; output_channel]
*)letconv3d?(padding=SAME)inputkernelstride=assert(num_dimsinput=5);assert(num_dimskernel=5);assert(Array.lengthstride=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletkernel_dpts=kernel_shp.(2)inletout_channel=kernel_shp.(4)inassert(in_channel=kernel_shp.(3));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletoutput_cols,output_rows,output_dpts=Owl_utils_infer_shape.calc_conv3d_output_shapepaddinginput_colsinput_rowsinput_dptskernel_colskernel_rowskernel_dptsrow_stridecol_stridedpt_strideinletoutput=empty(kindinput)[|batches;output_cols;output_rows;output_dpts;out_channel|]inletpad_typ=matchpaddingwithSAME->0|VALID->1in_owl_cuboid_conv(kindinput)inputkerneloutputbatchesinput_colsinput_rowsinput_dptsin_channelkernel_colskernel_rowskernel_dptsoutput_colsoutput_rowsoutput_dptsout_channeldpt_striderow_stridecol_stridepad_typ;outputletconv3d_~out?(padding=SAME)inputkernelstride=assert(num_dimsinput=5);assert(num_dimskernel=5);assert(Array.lengthstride=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletkernel_dpts=kernel_shp.(2)inletout_channel=kernel_shp.(4)inassert(in_channel=kernel_shp.(3));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletoutput_cols,output_rows,output_dpts=Owl_utils_infer_shape.calc_conv3d_output_shapepaddinginput_colsinput_rowsinput_dptskernel_colskernel_rowskernel_dptsrow_stridecol_stridedpt_strideinletpad_typ=matchpaddingwithSAME->0|VALID->1in_owl_cuboid_conv(kindinput)inputkerneloutbatchesinput_colsinput_rowsinput_dptsin_channelkernel_colskernel_rowskernel_dptsoutput_colsoutput_rowsoutput_dptsout_channeldpt_striderow_stridecol_stridepad_typ(* gradient of conv3d w.r.t the input *)letconv3d_backward_inputinputkernelstrideoutput'=assert(num_dimsinput=5);assert(num_dimskernel=5);assert(num_dimsoutput'=5);assert(Array.lengthstride=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletkernel_dpts=kernel_shp.(2)inletout_channel=kernel_shp.(4)inassert(in_channel=kernel_shp.(3));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inletoutput_dpts=output_shp.(3)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(4));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletinput'=empty(kindinput)(shapeinput)in_owl_cuboid_conv_backward_input(kindinput')input'kerneloutput'batchesinput_colsinput_rowsinput_dptsin_channelkernel_colskernel_rowskernel_dptsoutput_colsoutput_rowsoutput_dptsout_channeldpt_striderow_stridecol_stride;input'letconv3d_backward_input_~outinputkernelstrideoutput'=assert(num_dimsinput=5);assert(num_dimskernel=5);assert(num_dimsoutput'=5);assert(Array.lengthstride=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletkernel_dpts=kernel_shp.(2)inletout_channel=kernel_shp.(4)inassert(in_channel=kernel_shp.(3));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inletoutput_dpts=output_shp.(3)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(4));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)in_owl_cuboid_conv_backward_input(kindinput)outkerneloutput'batchesinput_colsinput_rowsinput_dptsin_channelkernel_colskernel_rowskernel_dptsoutput_colsoutput_rowsoutput_dptsout_channeldpt_striderow_stridecol_stride(* gradient of conv3d w.r.t the kernel *)letconv3d_backward_kernelinputkernelstrideoutput'=assert(num_dimsinput=5);assert(num_dimskernel=5);assert(num_dimsoutput'=5);assert(Array.lengthstride=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletkernel_dpts=kernel_shp.(2)inletout_channel=kernel_shp.(4)inassert(in_channel=kernel_shp.(3));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inletoutput_dpts=output_shp.(3)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(4));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletkernel'=empty(kindkernel)(shapekernel)in_owl_cuboid_conv_backward_kernel(kindinput)inputkernel'output'batchesinput_colsinput_rowsinput_dptsin_channelkernel_colskernel_rowskernel_dptsoutput_colsoutput_rowsoutput_dptsout_channeldpt_striderow_stridecol_stride;kernel'letconv3d_backward_kernel_~outinputkernelstrideoutput'=assert(num_dimsinput=5);assert(num_dimskernel=5);assert(num_dimsoutput'=5);assert(Array.lengthstride=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletkernel_dpts=kernel_shp.(2)inletout_channel=kernel_shp.(4)inassert(in_channel=kernel_shp.(3));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inletoutput_dpts=output_shp.(3)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(4));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)in_owl_cuboid_conv_backward_kernel(kindinput)inputoutoutput'batchesinput_colsinput_rowsinput_dptsin_channelkernel_colskernel_rowskernel_dptsoutput_colsoutput_rowsoutput_dptsout_channeldpt_striderow_stridecol_stride(* conv1d: 3d input and 3d kernel, refer to tensorlfow doc
input : [batch; input_column; input_channel]
kernel: [kernel_column; input_channel; output_channel]
stride: [column_stride]
output: [batch; output_column; output_channel]
*)letconv1d?(padding=SAME)inputkernelstride=assert(num_dimsinput=3);assert(num_dimskernel=3);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput=reshapeinput[|batches;1;input_cols;in_channel|]inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletout_channel=kernel_shp.(2)inassert(in_channel=kernel_shp.(1));letkernel=reshapekernel[|1;kernel_cols;in_channel;out_channel|]inletcol_stride=stride.(0)inletstride=[|1;col_stride|]inletoutput=conv2d~paddinginputkernelstrideinletoutput_shp=shapeoutputinletoutput_cols=output_shp.(2)inletoutput=reshapeoutput[|batches;output_cols;out_channel|]inoutputletconv1d_~out?(padding=SAME)inputkernelstride=assert(num_dimsinput=3);assert(num_dimskernel=3);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput=reshapeinput[|batches;1;input_cols;in_channel|]inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletout_channel=kernel_shp.(2)inassert(in_channel=kernel_shp.(1));letkernel=reshapekernel[|1;kernel_cols;in_channel;out_channel|]inletcol_stride=stride.(0)inletstride=[|1;col_stride|]inconv2d_~out~paddinginputkernelstride(* gradient of conv1d w.r.t the input *)letconv1d_backward_inputinputkernelstrideoutput'=assert(num_dimsinput=3);assert(num_dimskernel=3);assert(num_dimsoutput'=3);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput_rows=1inletinput=reshapeinput[|batches;input_rows;input_cols;in_channel|]inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletout_channel=kernel_shp.(2)inassert(in_channel=kernel_shp.(1));letkernel_rows=1inletkernel=reshapekernel[|kernel_rows;kernel_cols;in_channel;out_channel|]inletoutput'_shp=shapeoutput'inletoutput_cols=output'_shp.(1)inassert(batches=output'_shp.(0));assert(out_channel=output'_shp.(2));letoutput_rows=1inletoutput'=reshapeoutput'[|batches;output_rows;output_cols;out_channel|]inletcol_stride=stride.(0)inletrow_stride=1inletstride=[|row_stride;col_stride|]inletinput'=conv2d_backward_inputinputkernelstrideoutput'inreshapeinput'input_shpletconv1d_backward_input_~outinputkernelstrideoutput'=assert(num_dimsinput=3);assert(num_dimskernel=3);assert(num_dimsoutput'=3);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput_rows=1inletinput=reshapeinput[|batches;input_rows;input_cols;in_channel|]inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletout_channel=kernel_shp.(2)inassert(in_channel=kernel_shp.(1));letkernel_rows=1inletkernel=reshapekernel[|kernel_rows;kernel_cols;in_channel;out_channel|]inletoutput'_shp=shapeoutput'inletoutput_cols=output'_shp.(1)inassert(batches=output'_shp.(0));assert(out_channel=output'_shp.(2));letoutput_rows=1inletoutput'=reshapeoutput'[|batches;output_rows;output_cols;out_channel|]inletcol_stride=stride.(0)inletrow_stride=1inletstride=[|row_stride;col_stride|]inconv2d_backward_input_~outinputkernelstrideoutput'(* gradient of conv1d w.r.t the kernel *)letconv1d_backward_kernelinputkernelstrideoutput'=assert(num_dimsinput=3);assert(num_dimskernel=3);assert(num_dimsoutput'=3);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput_rows=1inletinput=reshapeinput[|batches;input_rows;input_cols;in_channel|]inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletout_channel=kernel_shp.(2)inassert(in_channel=kernel_shp.(1));letkernel_rows=1inletkernel=reshapekernel[|kernel_rows;kernel_cols;in_channel;out_channel|]inletoutput'_shp=shapeoutput'inletoutput_cols=output'_shp.(1)inassert(batches=output'_shp.(0));assert(out_channel=output'_shp.(2));letoutput_rows=1inletoutput'=reshapeoutput'[|batches;output_rows;output_cols;out_channel|]inletcol_stride=stride.(0)inletrow_stride=1inletstride=[|row_stride;col_stride|]inletkernel'=conv2d_backward_kernelinputkernelstrideoutput'inreshapekernel'kernel_shpletconv1d_backward_kernel_~outinputkernelstrideoutput'=assert(num_dimsinput=3);assert(num_dimskernel=3);assert(num_dimsoutput'=3);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput_rows=1inletinput=reshapeinput[|batches;input_rows;input_cols;in_channel|]inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletout_channel=kernel_shp.(2)inassert(in_channel=kernel_shp.(1));letkernel_rows=1inletkernel=reshapekernel[|kernel_rows;kernel_cols;in_channel;out_channel|]inletoutput'_shp=shapeoutput'inletoutput_cols=output'_shp.(1)inassert(batches=output'_shp.(0));assert(out_channel=output'_shp.(2));letoutput_rows=1inletoutput'=reshapeoutput'[|batches;output_rows;output_cols;out_channel|]inletcol_stride=stride.(0)inletrow_stride=1inletstride=[|row_stride;col_stride|]inconv2d_backward_kernel_~outinputkernelstrideoutput'(* dilated_conv2d: 4d input and 4d kernel, refer to tensorlfow doc
input : [batch; input_column; input_row; input_channel]
kernel: [kernel_column; kernel_row; input_channel; output_channel]
stride: [column_stride; row_stride]
rate : [col_dilation_rate; row_dilation_rate]
output: [batch; output_column; output_row; output_channel]
*)letdilated_conv2d?(padding=SAME)inputkernelstriderate=assert(num_dimsinput=4);assert(num_dimskernel=4);assert(Array.lengthstride=2);assert(Array.lengthrate=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletout_channel=kernel_shp.(3)inassert(in_channel=kernel_shp.(2));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=rate.(0)inletrow_in_stride=rate.(1)inletkernel_cols_up=kernel_cols+(kernel_cols-1)*(col_in_stride-1)inletkernel_rows_up=kernel_rows+(kernel_rows-1)*(row_in_stride-1)inletoutput_cols,output_rows=Owl_utils_infer_shape.calc_conv2d_output_shapepaddinginput_colsinput_rowskernel_cols_upkernel_rows_uprow_stridecol_strideinletoutput=empty(kindinput)[|batches;output_cols;output_rows;out_channel|]inletpad_typ=matchpaddingwithSAME->0|VALID->1in_owl_dilated_spatial_conv(kindinput)inputkerneloutputbatchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsout_channelrow_stridecol_stridepad_typrow_in_stridecol_in_stride;outputletdilated_conv2d_~out?(padding=SAME)inputkernelstriderate=assert(num_dimsinput=4);assert(num_dimskernel=4);assert(Array.lengthstride=2);assert(Array.lengthrate=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletout_channel=kernel_shp.(3)inassert(in_channel=kernel_shp.(2));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=rate.(0)inletrow_in_stride=rate.(1)inletkernel_cols_up=kernel_cols+(kernel_cols-1)*(col_in_stride-1)inletkernel_rows_up=kernel_rows+(kernel_rows-1)*(row_in_stride-1)inletoutput_cols,output_rows=Owl_utils_infer_shape.calc_conv2d_output_shapepaddinginput_colsinput_rowskernel_cols_upkernel_rows_uprow_stridecol_strideinletpad_typ=matchpaddingwithSAME->0|VALID->1in_owl_dilated_spatial_conv(kindinput)inputkerneloutbatchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsout_channelrow_stridecol_stridepad_typrow_in_stridecol_in_stride(* gradient of dilated_conv2d w.r.t the input *)letdilated_conv2d_backward_inputinputkernelstriderateoutput'=assert(num_dimsinput=4);assert(num_dimskernel=4);assert(num_dimsoutput'=4);assert(Array.lengthstride=2);assert(Array.lengthrate=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletout_channel=kernel_shp.(3)inassert(in_channel=kernel_shp.(2));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(3));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=rate.(0)inletrow_in_stride=rate.(1)inletinput'=empty(kindinput)(shapeinput)in_owl_dilated_spatial_conv_backward_input(kindinput')input'kerneloutput'batchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsout_channelrow_stridecol_striderow_in_stridecol_in_stride;input'letdilated_conv2d_backward_input_~outinputkernelstriderateoutput'=assert(num_dimsinput=4);assert(num_dimskernel=4);assert(num_dimsoutput'=4);assert(Array.lengthstride=2);assert(Array.lengthrate=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletout_channel=kernel_shp.(3)inassert(in_channel=kernel_shp.(2));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(3));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=rate.(0)inletrow_in_stride=rate.(1)in_owl_dilated_spatial_conv_backward_input(kindinput)outkerneloutput'batchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsout_channelrow_stridecol_striderow_in_stridecol_in_stride(* gradient of dilated_conv2d w.r.t the kernel *)letdilated_conv2d_backward_kernelinputkernelstriderateoutput'=assert(num_dimsinput=4);assert(num_dimskernel=4);assert(num_dimsoutput'=4);assert(Array.lengthstride=2);assert(Array.lengthrate=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletout_channel=kernel_shp.(3)inassert(in_channel=kernel_shp.(2));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(3));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=rate.(0)inletrow_in_stride=rate.(1)inletkernel'=empty(kindkernel)(shapekernel)in_owl_dilated_spatial_conv_backward_kernel(kindinput)inputkernel'output'batchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsout_channelrow_stridecol_striderow_in_stridecol_in_stride;kernel'letdilated_conv2d_backward_kernel_~outinputkernelstriderateoutput'=assert(num_dimsinput=4);assert(num_dimskernel=4);assert(num_dimsoutput'=4);assert(Array.lengthstride=2);assert(Array.lengthrate=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletout_channel=kernel_shp.(3)inassert(in_channel=kernel_shp.(2));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(3));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=rate.(0)inletrow_in_stride=rate.(1)in_owl_dilated_spatial_conv_backward_kernel(kindinput)inputoutoutput'batchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsout_channelrow_stridecol_striderow_in_stridecol_in_stride(* dilated_conv3d: 5d input and 5d kernel, refer to tensorflow doc
input : [batch; input_column; input_row; input_depth; input_channel]
kernel: [kernel_column; kernel_row; kernel_depth; input_channel; output_channel]
stride: [column_stride; row_stride; depth_stride]
rate : [col_dilation_rate; row_dilation_rate; depth_dilation_rate]
output: [batch; output_column; output_row; output_dpts; output_channel]
*)letdilated_conv3d?(padding=SAME)inputkernelstriderate=assert(num_dimsinput=5);assert(num_dimskernel=5);assert(Array.lengthstride=3);assert(Array.lengthrate=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletkernel_dpts=kernel_shp.(2)inletout_channel=kernel_shp.(4)inassert(in_channel=kernel_shp.(3));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletcol_in_stride=rate.(0)inletrow_in_stride=rate.(1)inletdpt_in_stride=rate.(2)inletkernel_cols_up=kernel_cols+(kernel_cols-1)*(col_in_stride-1)inletkernel_rows_up=kernel_rows+(kernel_rows-1)*(row_in_stride-1)inletkernel_dpts_up=kernel_dpts+(kernel_dpts-1)*(dpt_in_stride-1)inletoutput_cols,output_rows,output_dpts=Owl_utils_infer_shape.calc_conv3d_output_shapepaddinginput_colsinput_rowsinput_dptskernel_cols_upkernel_rows_upkernel_dpts_uprow_stridecol_stridedpt_strideinletoutput=empty(kindinput)[|batches;output_cols;output_rows;output_dpts;out_channel|]inletpad_typ=matchpaddingwithSAME->0|VALID->1in_owl_dilated_cuboid_conv(kindinput)inputkerneloutputbatchesinput_colsinput_rowsinput_dptsin_channelkernel_colskernel_rowskernel_dptsoutput_colsoutput_rowsoutput_dptsout_channeldpt_striderow_stridecol_stridedpt_in_striderow_in_stridecol_in_stridepad_typ;outputletdilated_conv3d_~out?(padding=SAME)inputkernelstriderate=assert(num_dimsinput=5);assert(num_dimskernel=5);assert(Array.lengthstride=3);assert(Array.lengthrate=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletkernel_dpts=kernel_shp.(2)inletout_channel=kernel_shp.(4)inassert(in_channel=kernel_shp.(3));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletcol_in_stride=rate.(0)inletrow_in_stride=rate.(1)inletdpt_in_stride=rate.(2)inletkernel_cols_up=kernel_cols+(kernel_cols-1)*(col_in_stride-1)inletkernel_rows_up=kernel_rows+(kernel_rows-1)*(row_in_stride-1)inletkernel_dpts_up=kernel_dpts+(kernel_dpts-1)*(dpt_in_stride-1)inletoutput_cols,output_rows,output_dpts=Owl_utils_infer_shape.calc_conv3d_output_shapepaddinginput_colsinput_rowsinput_dptskernel_cols_upkernel_rows_upkernel_dpts_uprow_stridecol_stridedpt_strideinletpad_typ=matchpaddingwithSAME->0|VALID->1in_owl_dilated_cuboid_conv(kindinput)inputkerneloutbatchesinput_colsinput_rowsinput_dptsin_channelkernel_colskernel_rowskernel_dptsoutput_colsoutput_rowsoutput_dptsout_channeldpt_striderow_stridecol_stridedpt_in_striderow_in_stridecol_in_stridepad_typ(* gradient of dilated_conv3d w.r.t the input *)letdilated_conv3d_backward_inputinputkernelstriderateoutput'=assert(num_dimsinput=5);assert(num_dimskernel=5);assert(num_dimsoutput'=5);assert(Array.lengthstride=3);assert(Array.lengthrate=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletkernel_dpts=kernel_shp.(2)inletout_channel=kernel_shp.(4)inassert(in_channel=kernel_shp.(3));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inletoutput_dpts=output_shp.(3)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(4));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletcol_in_stride=rate.(0)inletrow_in_stride=rate.(1)inletdpt_in_stride=rate.(2)inletinput'=empty(kindinput)(shapeinput)in_owl_dilated_cuboid_conv_backward_input(kindinput')input'kerneloutput'batchesinput_colsinput_rowsinput_dptsin_channelkernel_colskernel_rowskernel_dptsoutput_colsoutput_rowsoutput_dptsout_channeldpt_striderow_stridecol_stridedpt_in_striderow_in_stridecol_in_stride;input'letdilated_conv3d_backward_input_~outinputkernelstriderateoutput'=assert(num_dimsinput=5);assert(num_dimskernel=5);assert(num_dimsoutput'=5);assert(Array.lengthstride=3);assert(Array.lengthrate=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletkernel_dpts=kernel_shp.(2)inletout_channel=kernel_shp.(4)inassert(in_channel=kernel_shp.(3));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inletoutput_dpts=output_shp.(3)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(4));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletcol_in_stride=rate.(0)inletrow_in_stride=rate.(1)inletdpt_in_stride=rate.(2)in_owl_dilated_cuboid_conv_backward_input(kindinput)outkerneloutput'batchesinput_colsinput_rowsinput_dptsin_channelkernel_colskernel_rowskernel_dptsoutput_colsoutput_rowsoutput_dptsout_channeldpt_striderow_stridecol_stridedpt_in_striderow_in_stridecol_in_stride(* gradient of dilated_conv3d w.r.t the kernel *)letdilated_conv3d_backward_kernelinputkernelstriderateoutput'=assert(num_dimsinput=5);assert(num_dimskernel=5);assert(num_dimsoutput'=5);assert(Array.lengthstride=3);assert(Array.lengthrate=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletkernel_dpts=kernel_shp.(2)inletout_channel=kernel_shp.(4)inassert(in_channel=kernel_shp.(3));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inletoutput_dpts=output_shp.(3)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(4));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletcol_in_stride=rate.(0)inletrow_in_stride=rate.(1)inletdpt_in_stride=rate.(2)inletkernel'=empty(kindkernel)(shapekernel)in_owl_dilated_cuboid_conv_backward_kernel(kindinput)inputkernel'output'batchesinput_colsinput_rowsinput_dptsin_channelkernel_colskernel_rowskernel_dptsoutput_colsoutput_rowsoutput_dptsout_channeldpt_striderow_stridecol_stridedpt_in_striderow_in_stridecol_in_stride;kernel'letdilated_conv3d_backward_kernel_~outinputkernelstriderateoutput'=assert(num_dimsinput=5);assert(num_dimskernel=5);assert(num_dimsoutput'=5);assert(Array.lengthstride=3);assert(Array.lengthrate=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletkernel_dpts=kernel_shp.(2)inletout_channel=kernel_shp.(4)inassert(in_channel=kernel_shp.(3));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inletoutput_dpts=output_shp.(3)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(4));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletcol_in_stride=rate.(0)inletrow_in_stride=rate.(1)inletdpt_in_stride=rate.(2)in_owl_dilated_cuboid_conv_backward_kernel(kindinput)inputoutoutput'batchesinput_colsinput_rowsinput_dptsin_channelkernel_colskernel_rowskernel_dptsoutput_colsoutput_rowsoutput_dptsout_channeldpt_striderow_stridecol_stridedpt_in_striderow_in_stridecol_in_stride(* dilated_conv1d: 3d input and 3d kernel, refer to tensorlfow doc
input : [batch; input_column; input_channel]
kernel: [kernel_column; input_channel; output_channel]
stride: [column_stride]
reate : [column_dilation_rate]
output: [batch; output_column; output_channel]
*)letdilated_conv1d?(padding=SAME)inputkernelstriderate=assert(num_dimsinput=3);assert(num_dimskernel=3);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput=reshapeinput[|batches;1;input_cols;in_channel|]inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletout_channel=kernel_shp.(2)inassert(in_channel=kernel_shp.(1));letkernel=reshapekernel[|1;kernel_cols;in_channel;out_channel|]inletcol_stride=stride.(0)inletstride=[|1;col_stride|]inletoutput=dilated_conv2d~paddinginputkernelstriderateinletoutput_shp=shapeoutputinletoutput_cols=output_shp.(2)inletoutput=reshapeoutput[|batches;output_cols;out_channel|]inoutputletdilated_conv1d_~out?(padding=SAME)inputkernelstriderate=assert(num_dimsinput=3);assert(num_dimskernel=3);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput=reshapeinput[|batches;1;input_cols;in_channel|]inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletout_channel=kernel_shp.(2)inassert(in_channel=kernel_shp.(1));letkernel=reshapekernel[|1;kernel_cols;in_channel;out_channel|]inletcol_stride=stride.(0)inletstride=[|1;col_stride|]indilated_conv2d_~out~paddinginputkernelstriderate(* gradient of dilated_conv1d w.r.t the input *)letdilated_conv1d_backward_inputinputkernelstriderateoutput'=assert(num_dimsinput=3);assert(num_dimskernel=3);assert(num_dimsoutput'=3);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput_rows=1inletinput=reshapeinput[|batches;input_rows;input_cols;in_channel|]inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletout_channel=kernel_shp.(2)inassert(in_channel=kernel_shp.(1));letkernel_rows=1inletkernel=reshapekernel[|kernel_rows;kernel_cols;in_channel;out_channel|]inletoutput'_shp=shapeoutput'inletoutput_cols=output'_shp.(1)inassert(batches=output'_shp.(0));assert(out_channel=output'_shp.(2));letoutput_rows=1inletoutput'=reshapeoutput'[|batches;output_rows;output_cols;out_channel|]inletcol_stride=stride.(0)inletrow_stride=1inletstride=[|row_stride;col_stride|]inletinput'=dilated_conv2d_backward_inputinputkernelstriderateoutput'inreshapeinput'input_shpletdilated_conv1d_backward_input_~outinputkernelstriderateoutput'=assert(num_dimsinput=3);assert(num_dimskernel=3);assert(num_dimsoutput'=3);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput_rows=1inletinput=reshapeinput[|batches;input_rows;input_cols;in_channel|]inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletout_channel=kernel_shp.(2)inassert(in_channel=kernel_shp.(1));letkernel_rows=1inletkernel=reshapekernel[|kernel_rows;kernel_cols;in_channel;out_channel|]inletoutput'_shp=shapeoutput'inletoutput_cols=output'_shp.(1)inassert(batches=output'_shp.(0));assert(out_channel=output'_shp.(2));letoutput_rows=1inletoutput'=reshapeoutput'[|batches;output_rows;output_cols;out_channel|]inletcol_stride=stride.(0)inletrow_stride=1inletstride=[|row_stride;col_stride|]indilated_conv2d_backward_input_~outinputkernelstriderateoutput'(* gradient of dilated_conv1d w.r.t the kernel *)letdilated_conv1d_backward_kernelinputkernelstriderateoutput'=assert(num_dimsinput=3);assert(num_dimskernel=3);assert(num_dimsoutput'=3);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput_rows=1inletinput=reshapeinput[|batches;input_rows;input_cols;in_channel|]inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletout_channel=kernel_shp.(2)inassert(in_channel=kernel_shp.(1));letkernel_rows=1inletkernel=reshapekernel[|kernel_rows;kernel_cols;in_channel;out_channel|]inletoutput'_shp=shapeoutput'inletoutput_cols=output'_shp.(1)inassert(batches=output'_shp.(0));assert(out_channel=output'_shp.(2));letoutput_rows=1inletoutput'=reshapeoutput'[|batches;output_rows;output_cols;out_channel|]inletcol_stride=stride.(0)inletrow_stride=1inletstride=[|row_stride;col_stride|]inletkernel'=dilated_conv2d_backward_kernelinputkernelstriderateoutput'inreshapekernel'kernel_shpletdilated_conv1d_backward_kernel_~outinputkernelstriderateoutput'=assert(num_dimsinput=3);assert(num_dimskernel=3);assert(num_dimsoutput'=3);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput_rows=1inletinput=reshapeinput[|batches;input_rows;input_cols;in_channel|]inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletout_channel=kernel_shp.(2)inassert(in_channel=kernel_shp.(1));letkernel_rows=1inletkernel=reshapekernel[|kernel_rows;kernel_cols;in_channel;out_channel|]inletoutput'_shp=shapeoutput'inletoutput_cols=output'_shp.(1)inassert(batches=output'_shp.(0));assert(out_channel=output'_shp.(2));letoutput_rows=1inletoutput'=reshapeoutput'[|batches;output_rows;output_cols;out_channel|]inletcol_stride=stride.(0)inletrow_stride=1inletstride=[|row_stride;col_stride|]indilated_conv2d_backward_kernel_~outinputkernelstriderateoutput'(* transpose_conv2d: 4d input and 4d kernel, refer to tensorlfow doc
input : [batch; input_column; input_row; input_channel]
kernel: [kernel_column; kernel_row; input_channel; output_channel]
stride: [column_stride; row_stride]
output: [batch; output_column; output_row; output_channel]
*)lettranspose_conv2d?(padding=SAME)inputkernelstride=assert(num_dimsinput=4);assert(num_dimskernel=4);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletout_channel=kernel_shp.(3)inassert(in_channel=kernel_shp.(2));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=1inletrow_in_stride=1inletoutput_cols,output_rows=Owl_utils_infer_shape.calc_transpose_conv2d_output_shapepaddinginput_colsinput_rowskernel_colskernel_rowsrow_stridecol_strideinletoutput=empty(kindinput)[|batches;output_cols;output_rows;out_channel|]in_owl_spatial_conv_backward_input(kindinput)outputkernelinputbatchesoutput_colsoutput_rowsout_channelkernel_colskernel_rowsinput_colsinput_rowsin_channelrow_stridecol_striderow_in_stridecol_in_stride;outputlettranspose_conv2d_~out?(padding=SAME)inputkernelstride=assert(num_dimsinput=4);assert(num_dimskernel=4);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletout_channel=kernel_shp.(3)inassert(in_channel=kernel_shp.(2));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=1inletrow_in_stride=1inletoutput_cols,output_rows=Owl_utils_infer_shape.calc_transpose_conv2d_output_shapepaddinginput_colsinput_rowskernel_colskernel_rowsrow_stridecol_stridein_owl_spatial_conv_backward_input(kindinput)outkernelinputbatchesoutput_colsoutput_rowsout_channelkernel_colskernel_rowsinput_colsinput_rowsin_channelrow_stridecol_striderow_in_stridecol_in_stride(* gradient of transpose_conv2d w.r.t the kernel *)lettranspose_conv2d_backward_kernelinputkernelstrideoutput'=assert(num_dimsinput=4);assert(num_dimskernel=4);assert(num_dimsoutput'=4);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletout_channel=kernel_shp.(3)inassert(in_channel=kernel_shp.(2));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(3));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=1inletrow_in_stride=1inletkernel'=empty(kindkernel)(shapekernel)in_owl_spatial_conv_backward_kernel(kindinput)output'kernel'inputbatchesoutput_colsoutput_rowsout_channelkernel_colskernel_rowsinput_colsinput_rowsin_channelrow_stridecol_striderow_in_stridecol_in_stride;kernel'lettranspose_conv2d_backward_kernel_~outinputkernelstrideoutput'=assert(num_dimsinput=4);assert(num_dimskernel=4);assert(num_dimsoutput'=4);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletout_channel=kernel_shp.(3)inassert(in_channel=kernel_shp.(2));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(3));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=1inletrow_in_stride=1in_owl_spatial_conv_backward_kernel(kindinput)output'outinputbatchesoutput_colsoutput_rowsout_channelkernel_colskernel_rowsinput_colsinput_rowsin_channelrow_stridecol_striderow_in_stridecol_in_stride(* gradient of transpose_conv2d w.r.t the input *)lettranspose_conv2d_backward_inputinputkernelstrideoutput'=assert(num_dimsinput=4);assert(num_dimskernel=4);assert(num_dimsoutput'=4);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletout_channel=kernel_shp.(3)inassert(in_channel=kernel_shp.(2));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(3));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=1inletrow_in_stride=1inletinput'=empty(kindinput)(shapeinput)inletdummy_pad_typ=0in_owl_spatial_conv(kindinput)output'kernelinput'batchesoutput_colsoutput_rowsout_channelkernel_colskernel_rowsinput_colsinput_rowsin_channelrow_stridecol_stridedummy_pad_typrow_in_stridecol_in_stride;input'lettranspose_conv2d_backward_input_~outinputkernelstrideoutput'=assert(num_dimsinput=4);assert(num_dimskernel=4);assert(num_dimsoutput'=4);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletout_channel=kernel_shp.(3)inassert(in_channel=kernel_shp.(2));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(3));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=1inletrow_in_stride=1inletdummy_pad_typ=0in_owl_spatial_conv(kindinput)output'kerneloutbatchesoutput_colsoutput_rowsout_channelkernel_colskernel_rowsinput_colsinput_rowsin_channelrow_stridecol_stridedummy_pad_typrow_in_stridecol_in_stride(* transpose_conv3d: 5d input and 5d kernel, refer to tensorflow doc
input : [batch; input_column; input_row; input_depth; input_channel]
kernel: [kernel_column; kernel_row; kernel_depth; input_channel; output_channel]
stride: [column_stride; row_stride; depth_stride]
output: [batch; output_column; output_row; output_dpts; output_channel]
*)lettranspose_conv3d?(padding=SAME)inputkernelstride=assert(num_dimsinput=5);assert(num_dimskernel=5);assert(Array.lengthstride=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletkernel_dpts=kernel_shp.(2)inletout_channel=kernel_shp.(4)inassert(in_channel=kernel_shp.(3));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletoutput_cols,output_rows,output_dpts=Owl_utils_infer_shape.calc_transpose_conv3d_output_shapepaddinginput_colsinput_rowsinput_dptskernel_colskernel_rowskernel_dptsrow_stridecol_stridedpt_strideinletoutput=empty(kindinput)[|batches;output_cols;output_rows;output_dpts;out_channel|]in_owl_cuboid_conv_backward_input(kindinput)outputkernelinputbatchesoutput_colsoutput_rowsoutput_dptsout_channelkernel_colskernel_rowskernel_dptsinput_colsinput_rowsinput_dptsin_channeldpt_striderow_stridecol_stride;outputlettranspose_conv3d_~out?(padding=SAME)inputkernelstride=assert(num_dimsinput=5);assert(num_dimskernel=5);assert(Array.lengthstride=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletkernel_dpts=kernel_shp.(2)inletout_channel=kernel_shp.(4)inassert(in_channel=kernel_shp.(3));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletoutput_cols,output_rows,output_dpts=Owl_utils_infer_shape.calc_transpose_conv3d_output_shapepaddinginput_colsinput_rowsinput_dptskernel_colskernel_rowskernel_dptsrow_stridecol_stridedpt_stridein_owl_cuboid_conv_backward_input(kindinput)outkernelinputbatchesoutput_colsoutput_rowsoutput_dptsout_channelkernel_colskernel_rowskernel_dptsinput_colsinput_rowsinput_dptsin_channeldpt_striderow_stridecol_stride(* gradient of transpose_conv3d w.r.t the input *)lettranspose_conv3d_backward_inputinputkernelstrideoutput'=assert(num_dimsinput=5);assert(num_dimskernel=5);assert(num_dimsoutput'=5);assert(Array.lengthstride=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletkernel_dpts=kernel_shp.(2)inletout_channel=kernel_shp.(4)inassert(in_channel=kernel_shp.(3));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inletoutput_dpts=output_shp.(3)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(4));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletinput'=empty(kindinput)(shapeinput)inletdummy_pad_typ=0in_owl_cuboid_conv(kindinput)output'kernelinput'batchesoutput_colsoutput_rowsoutput_dptsout_channelkernel_colskernel_rowskernel_dptsinput_colsinput_rowsinput_dptsin_channeldpt_striderow_stridecol_stridedummy_pad_typ;input'lettranspose_conv3d_backward_input_~outinputkernelstrideoutput'=assert(num_dimsinput=5);assert(num_dimskernel=5);assert(num_dimsoutput'=5);assert(Array.lengthstride=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletkernel_dpts=kernel_shp.(2)inletout_channel=kernel_shp.(4)inassert(in_channel=kernel_shp.(3));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inletoutput_dpts=output_shp.(3)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(4));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletdummy_pad_typ=0in_owl_cuboid_conv(kindinput)output'kerneloutbatchesoutput_colsoutput_rowsoutput_dptsout_channelkernel_colskernel_rowskernel_dptsinput_colsinput_rowsinput_dptsin_channeldpt_striderow_stridecol_stridedummy_pad_typ(* gradient of transpose_conv3d w.r.t the kernel *)lettranspose_conv3d_backward_kernelinputkernelstrideoutput'=assert(num_dimsinput=5);assert(num_dimskernel=5);assert(num_dimsoutput'=5);assert(Array.lengthstride=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletkernel_dpts=kernel_shp.(2)inletout_channel=kernel_shp.(4)inassert(in_channel=kernel_shp.(3));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inletoutput_dpts=output_shp.(3)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(4));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletkernel'=empty(kindkernel)(shapekernel)in_owl_cuboid_conv_backward_kernel(kindinput)output'kernel'inputbatchesoutput_colsoutput_rowsoutput_dptsout_channelkernel_colskernel_rowskernel_dptsinput_colsinput_rowsinput_dptsin_channeldpt_striderow_stridecol_stride;kernel'lettranspose_conv3d_backward_kernel_~outinputkernelstrideoutput'=assert(num_dimsinput=5);assert(num_dimskernel=5);assert(num_dimsoutput'=5);assert(Array.lengthstride=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletkernel_rows=kernel_shp.(1)inletkernel_dpts=kernel_shp.(2)inletout_channel=kernel_shp.(4)inassert(in_channel=kernel_shp.(3));letoutput_shp=shapeoutput'inletoutput_cols=output_shp.(1)inletoutput_rows=output_shp.(2)inletoutput_dpts=output_shp.(3)inassert(batches=output_shp.(0));assert(out_channel=output_shp.(4));letcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)in_owl_cuboid_conv_backward_kernel(kindinput)output'outinputbatchesoutput_colsoutput_rowsoutput_dptsout_channelkernel_colskernel_rowskernel_dptsinput_colsinput_rowsinput_dptsin_channeldpt_striderow_stridecol_stridelettranspose_conv1d?(padding=SAME)inputkernelstride=assert(num_dimsinput=3);assert(num_dimskernel=3);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput=reshapeinput[|batches;1;input_cols;in_channel|]inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletout_channel=kernel_shp.(2)inassert(in_channel=kernel_shp.(1));letkernel=reshapekernel[|1;kernel_cols;in_channel;out_channel|]inletcol_stride=stride.(0)inletstride=[|1;col_stride|]inletoutput=transpose_conv2d~paddinginputkernelstrideinletoutput_shp=shapeoutputinletoutput_cols=output_shp.(2)inletoutput=reshapeoutput[|batches;output_cols;out_channel|]inoutputlettranspose_conv1d_~out?(padding=SAME)inputkernelstride=assert(num_dimsinput=3);assert(num_dimskernel=3);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput=reshapeinput[|batches;1;input_cols;in_channel|]inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletout_channel=kernel_shp.(2)inassert(in_channel=kernel_shp.(1));letkernel=reshapekernel[|1;kernel_cols;in_channel;out_channel|]inletcol_stride=stride.(0)inletstride=[|1;col_stride|]intranspose_conv2d_~out~paddinginputkernelstride(* gradient of transpose_conv1d w.r.t the input *)lettranspose_conv1d_backward_inputinputkernelstrideoutput'=assert(num_dimsinput=3);assert(num_dimskernel=3);assert(num_dimsoutput'=3);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput_rows=1inletinput=reshapeinput[|batches;input_rows;input_cols;in_channel|]inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletout_channel=kernel_shp.(2)inassert(in_channel=kernel_shp.(1));letkernel_rows=1inletkernel=reshapekernel[|kernel_rows;kernel_cols;in_channel;out_channel|]inletoutput'_shp=shapeoutput'inletoutput_cols=output'_shp.(1)inassert(batches=output'_shp.(0));assert(out_channel=output'_shp.(2));letoutput_rows=1inletoutput'=reshapeoutput'[|batches;output_rows;output_cols;out_channel|]inletcol_stride=stride.(0)inletrow_stride=1inletstride=[|row_stride;col_stride|]inletinput'=transpose_conv2d_backward_inputinputkernelstrideoutput'inreshapeinput'input_shplettranspose_conv1d_backward_input_~outinputkernelstrideoutput'=assert(num_dimsinput=3);assert(num_dimskernel=3);assert(num_dimsoutput'=3);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput_rows=1inletinput=reshapeinput[|batches;input_rows;input_cols;in_channel|]inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletout_channel=kernel_shp.(2)inassert(in_channel=kernel_shp.(1));letkernel_rows=1inletkernel=reshapekernel[|kernel_rows;kernel_cols;in_channel;out_channel|]inletoutput'_shp=shapeoutput'inletoutput_cols=output'_shp.(1)inassert(batches=output'_shp.(0));assert(out_channel=output'_shp.(2));letoutput_rows=1inletoutput'=reshapeoutput'[|batches;output_rows;output_cols;out_channel|]inletcol_stride=stride.(0)inletrow_stride=1inletstride=[|row_stride;col_stride|]intranspose_conv2d_backward_input_~outinputkernelstrideoutput'(* gradient of transpose_conv1d w.r.t the kernel *)lettranspose_conv1d_backward_kernelinputkernelstrideoutput'=assert(num_dimsinput=3);assert(num_dimskernel=3);assert(num_dimsoutput'=3);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput_rows=1inletinput=reshapeinput[|batches;input_rows;input_cols;in_channel|]inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletout_channel=kernel_shp.(2)inassert(in_channel=kernel_shp.(1));letkernel_rows=1inletkernel=reshapekernel[|kernel_rows;kernel_cols;in_channel;out_channel|]inletoutput'_shp=shapeoutput'inletoutput_cols=output'_shp.(1)inassert(batches=output'_shp.(0));assert(out_channel=output'_shp.(2));letoutput_rows=1inletoutput'=reshapeoutput'[|batches;output_rows;output_cols;out_channel|]inletcol_stride=stride.(0)inletrow_stride=1inletstride=[|row_stride;col_stride|]inletkernel'=transpose_conv2d_backward_kernelinputkernelstrideoutput'inreshapekernel'kernel_shplettranspose_conv1d_backward_kernel_~outinputkernelstrideoutput'=assert(num_dimsinput=3);assert(num_dimskernel=3);assert(num_dimsoutput'=3);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput_rows=1inletinput=reshapeinput[|batches;input_rows;input_cols;in_channel|]inletkernel_shp=shapekernelinletkernel_cols=kernel_shp.(0)inletout_channel=kernel_shp.(2)inassert(in_channel=kernel_shp.(1));letkernel_rows=1inletkernel=reshapekernel[|kernel_rows;kernel_cols;in_channel;out_channel|]inletoutput'_shp=shapeoutput'inletoutput_cols=output'_shp.(1)inassert(batches=output'_shp.(0));assert(out_channel=output'_shp.(2));letoutput_rows=1inletoutput'=reshapeoutput'[|batches;output_rows;output_cols;out_channel|]inletcol_stride=stride.(0)inletrow_stride=1inletstride=[|row_stride;col_stride|]intranspose_conv2d_backward_kernel_~outinputkernelstrideoutput'(* max_pool2d: 4d input and 2d kernel, refer to tensorlfow doc
input : [batch; input_column; input_row; input_channel]
kernel: [kernel_column; kernel_row]
stride: [column_stride; row_stride]
output: [batch; output_column; output_row; input_channel]
*)letmax_pool2d?(padding=SAME)inputkernelstride=assert(num_dimsinput=4);assert(Array.lengthkernel=2);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_cols=kernel.(0)inletkernel_rows=kernel.(1)inletcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=1inletrow_in_stride=1inletoutput_cols,output_rows=Owl_utils_infer_shape.calc_conv2d_output_shapepaddinginput_colsinput_rowskernel_colskernel_rowsrow_stridecol_strideinletoutput=empty(kindinput)[|batches;output_cols;output_rows;in_channel|]inletpad_typ=matchpaddingwithSAME->0|VALID->1in_owl_spatial_max_pooling(kindinput)inputoutputbatchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsrow_stridecol_stridepad_typrow_in_stridecol_in_stride;outputletmax_pool2d_~out?(padding=SAME)inputkernelstride=assert(num_dimsinput=4);assert(Array.lengthkernel=2);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_cols=kernel.(0)inletkernel_rows=kernel.(1)inletcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=1inletrow_in_stride=1inletoutput_cols,output_rows=Owl_utils_infer_shape.calc_conv2d_output_shapepaddinginput_colsinput_rowskernel_colskernel_rowsrow_stridecol_strideinletpad_typ=matchpaddingwithSAME->0|VALID->1in_owl_spatial_max_pooling(kindinput)inputoutbatchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsrow_stridecol_stridepad_typrow_in_stridecol_in_stride(* max_pool1d: 3d input and 1d kernel, refer to tensorlfow doc
input : [batch; input_column; input_channel]
kernel: [kernel_column]
stride: [column_stride]
output: [batch; output_column; input_channel]
*)letmax_pool1d?(padding=SAME)inputkernelstride=assert(num_dimsinput=3);assert(Array.lengthkernel=1);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput=reshapeinput[|batches;1;input_cols;in_channel|]inletkernel_cols=kernel.(0)inletkernel=[|1;kernel_cols|]inletcol_stride=stride.(0)inletstride=[|1;col_stride|]inletoutput=max_pool2d~paddinginputkernelstrideinletoutput_shp=shapeoutputinletoutput_cols=output_shp.(2)inletoutput=reshapeoutput[|batches;output_cols;in_channel|]inoutputletmax_pool1d_~out?(padding=SAME)inputkernelstride=assert(num_dimsinput=3);assert(Array.lengthkernel=1);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput=reshapeinput[|batches;1;input_cols;in_channel|]inletkernel_cols=kernel.(0)inletkernel=[|1;kernel_cols|]inletcol_stride=stride.(0)inletstride=[|1;col_stride|]inmax_pool2d_~padding~outinputkernelstride(* similar to max_pool2d *)letavg_pool2d?(padding=SAME)inputkernelstride=assert(num_dimsinput=4);assert(Array.lengthkernel=2);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_cols=kernel.(0)inletkernel_rows=kernel.(1)inletcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=1inletrow_in_stride=1inletoutput_cols,output_rows=Owl_utils_infer_shape.calc_conv2d_output_shapepaddinginput_colsinput_rowskernel_colskernel_rowsrow_stridecol_strideinletoutput=empty(kindinput)[|batches;output_cols;output_rows;in_channel|]inletpad_typ=matchpaddingwithSAME->0|VALID->1in_owl_spatial_avg_pooling(kindinput)inputoutputbatchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsrow_stridecol_stridepad_typrow_in_stridecol_in_stride;outputletavg_pool2d_~out?(padding=SAME)inputkernelstride=assert(num_dimsinput=4);assert(Array.lengthkernel=2);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_cols=kernel.(0)inletkernel_rows=kernel.(1)inletcol_stride=stride.(0)inletrow_stride=stride.(1)inletcol_in_stride=1inletrow_in_stride=1inletoutput_cols,output_rows=Owl_utils_infer_shape.calc_conv2d_output_shapepaddinginput_colsinput_rowskernel_colskernel_rowsrow_stridecol_strideinletpad_typ=matchpaddingwithSAME->0|VALID->1in_owl_spatial_avg_pooling(kindinput)inputoutbatchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsrow_stridecol_stridepad_typrow_in_stridecol_in_stride(* similar to max_pool1d *)letavg_pool1d?(padding=SAME)inputkernelstride=assert(num_dimsinput=3);assert(Array.lengthkernel=1);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput=reshapeinput[|batches;1;input_cols;in_channel|]inletkernel_cols=kernel.(0)inletkernel=[|1;kernel_cols|]inletcol_stride=stride.(0)inletstride=[|1;col_stride|]inletoutput=avg_pool2d~paddinginputkernelstrideinletoutput_shp=shapeoutputinletoutput_cols=output_shp.(2)inletoutput=reshapeoutput[|batches;output_cols;in_channel|]inoutputletavg_pool1d_~out?(padding=SAME)inputkernelstride=assert(num_dimsinput=3);assert(Array.lengthkernel=1);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletin_channel=input_shp.(2)inletinput=reshapeinput[|batches;1;input_cols;in_channel|]inletkernel_cols=kernel.(0)inletkernel=[|1;kernel_cols|]inletcol_stride=stride.(0)inletstride=[|1;col_stride|]inavg_pool2d_~out~paddinginputkernelstride(* max_pool3d: 5d input and 3d kernel, refer to tensorflow doc
input : [batch; input_column; input_row; input_depth; input_channel]
kernel: [kernel_column; kernel_row; kernel_depth]
stride: [column_stride; row_stride; depth_stride]
output: [batch; output_column; output_row; output_dpts; input_channel]
*)letmax_pool3d?(padding=SAME)inputkernelstride=assert(num_dimsinput=5);assert(Array.lengthkernel=3);assert(Array.lengthstride=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_cols=kernel.(0)inletkernel_rows=kernel.(1)inletkernel_dpts=kernel.(2)inletcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletoutput_cols,output_rows,output_dpts=Owl_utils_infer_shape.calc_conv3d_output_shapepaddinginput_colsinput_rowsinput_dptskernel_colskernel_rowskernel_dptsrow_stridecol_stridedpt_strideinletoutput=empty(kindinput)[|batches;output_cols;output_rows;output_dpts;in_channel|]inletpad_typ=matchpaddingwithSAME->0|VALID->1in_owl_cuboid_max_pooling(kindinput)inputoutputbatchesinput_colsinput_rowsinput_dptsin_channelkernel_colskernel_rowskernel_dptsoutput_colsoutput_rowsoutput_dptsdpt_striderow_stridecol_stridepad_typ;outputletmax_pool3d_~out?(padding=SAME)inputkernelstride=assert(num_dimsinput=5);assert(Array.lengthkernel=3);assert(Array.lengthstride=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_cols=kernel.(0)inletkernel_rows=kernel.(1)inletkernel_dpts=kernel.(2)inletcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletoutput_cols,output_rows,output_dpts=Owl_utils_infer_shape.calc_conv3d_output_shapepaddinginput_colsinput_rowsinput_dptskernel_colskernel_rowskernel_dptsrow_stridecol_stridedpt_strideinletpad_typ=matchpaddingwithSAME->0|VALID->1in_owl_cuboid_max_pooling(kindinput)inputoutbatchesinput_colsinput_rowsinput_dptsin_channelkernel_colskernel_rowskernel_dptsoutput_colsoutput_rowsoutput_dptsdpt_striderow_stridecol_stridepad_typ(* simiar to max_pool3d *)letavg_pool3d?(padding=SAME)inputkernelstride=assert(num_dimsinput=5);assert(Array.lengthkernel=3);assert(Array.lengthstride=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_cols=kernel.(0)inletkernel_rows=kernel.(1)inletkernel_dpts=kernel.(2)inletcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletoutput_cols,output_rows,output_dpts=Owl_utils_infer_shape.calc_conv3d_output_shapepaddinginput_colsinput_rowsinput_dptskernel_colskernel_rowskernel_dptsrow_stridecol_stridedpt_strideinletoutput=empty(kindinput)[|batches;output_cols;output_rows;output_dpts;in_channel|]inletpad_typ=matchpaddingwithSAME->0|VALID->1in_owl_cuboid_avg_pooling(kindinput)inputoutputbatchesinput_colsinput_rowsinput_dptsin_channelkernel_colskernel_rowskernel_dptsoutput_colsoutput_rowsoutput_dptsdpt_striderow_stridecol_stridepad_typ;outputletavg_pool3d_~out?(padding=SAME)inputkernelstride=assert(num_dimsinput=5);assert(Array.lengthkernel=3);assert(Array.lengthstride=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_cols=kernel.(0)inletkernel_rows=kernel.(1)inletkernel_dpts=kernel.(2)inletcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletoutput_cols,output_rows,output_dpts=Owl_utils_infer_shape.calc_conv3d_output_shapepaddinginput_colsinput_rowsinput_dptskernel_colskernel_rowskernel_dptsrow_stridecol_stridedpt_strideinletpad_typ=matchpaddingwithSAME->0|VALID->1in_owl_cuboid_avg_pooling(kindinput)inputoutbatchesinput_colsinput_rowsinput_dptsin_channelkernel_colskernel_rowskernel_dptsoutput_colsoutput_rowsoutput_dptsdpt_striderow_stridecol_stridepad_typ(* similar to max_pool2d, but also return the flatten indices of the max values *)letmax_pool2d_argmax?(padding=SAME)inputkernelstride=assert(num_dimsinput=4);assert(Array.lengthkernel=2);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_cols=kernel.(0)inletkernel_rows=kernel.(1)inletcol_stride=stride.(0)inletrow_stride=stride.(1)inletoutput_cols,output_rows=Owl_utils_infer_shape.calc_conv2d_output_shapepaddinginput_colsinput_rowskernel_colskernel_rowsrow_stridecol_strideinletoutput=empty(kindinput)[|batches;output_cols;output_rows;in_channel|]inletargmax=Genarray.createint64c_layout[|batches;output_cols;output_rows;in_channel|]inletpad_top,pad_left,_,_=Owl_utils_infer_shape.calc_conv2d_paddinginput_colsinput_rowskernel_colskernel_rowsoutput_colsoutput_rowsrow_stridecol_stridein_owl_spatial_max_pooling_argmax(kindinput)inputoutputargmaxbatchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsrow_stridecol_stridepad_toppad_left;output,argmax(* calculate the gradient of max_pool2d *)letmax_pool3d_backwardpaddinginputkernelstrideoutput'=assert(num_dimsinput=5);assert(Array.lengthkernel=3);assert(Array.lengthstride=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_cols=kernel.(0)inletkernel_rows=kernel.(1)inletkernel_dpts=kernel.(2)inletcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletoutput_cols,output_rows,output_dpts=Owl_utils_infer_shape.calc_conv3d_output_shapepaddinginput_colsinput_rowsinput_dptskernel_colskernel_rowskernel_dptsrow_stridecol_stridedpt_strideinletpad_typ=matchpaddingwithSAME->0|VALID->1inletinput'=empty(kindinput)(shapeinput)in_owl_cuboid_max_pooling_backward(kindinput)inputoutput'input'batchesinput_colsinput_rowsinput_dptsin_channelkernel_colskernel_rowskernel_dptsoutput_colsoutput_rowsoutput_dptscol_striderow_stridedpt_stridepad_typ;input'letmax_pool3d_backward_~outpaddinginputkernelstrideoutput'=assert(num_dimsinput=5);assert(Array.lengthkernel=3);assert(Array.lengthstride=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_cols=kernel.(0)inletkernel_rows=kernel.(1)inletkernel_dpts=kernel.(2)inletcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletoutput_cols,output_rows,output_dpts=Owl_utils_infer_shape.calc_conv3d_output_shapepaddinginput_colsinput_rowsinput_dptskernel_colskernel_rowskernel_dptsrow_stridecol_stridedpt_strideinletpad_typ=matchpaddingwithSAME->0|VALID->1in_owl_cuboid_max_pooling_backward(kindinput)inputoutput'outbatchesinput_colsinput_rowsinput_dptsin_channelkernel_colskernel_rowskernel_dptsoutput_colsoutput_rowsoutput_dptscol_striderow_stridedpt_stridepad_typ(* calculate the gradient of max_pool2d *)letmax_pool2d_backwardpaddinginputkernelstrideoutput'=assert(num_dimsinput=4);assert(Array.lengthkernel=2);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_cols=kernel.(0)inletkernel_rows=kernel.(1)inletcol_stride=stride.(0)inletrow_stride=stride.(1)inletoutput_cols,output_rows=Owl_utils_infer_shape.calc_conv2d_output_shapepaddinginput_colsinput_rowskernel_colskernel_rowsrow_stridecol_strideinletpad_top,pad_left,_,_=Owl_utils_infer_shape.calc_conv2d_paddinginput_colsinput_rowskernel_colskernel_rowsoutput_colsoutput_rowsrow_stridecol_strideinletinput'=empty(kindinput)(shapeinput)in_owl_spatial_max_pooling_backward(kindinput)inputoutput'input'batchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsrow_stridecol_stridepad_toppad_left;input'letmax_pool2d_backward_~outpaddinginputkernelstrideoutput'=assert(num_dimsinput=4);assert(Array.lengthkernel=2);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_cols=kernel.(0)inletkernel_rows=kernel.(1)inletcol_stride=stride.(0)inletrow_stride=stride.(1)inletoutput_cols,output_rows=Owl_utils_infer_shape.calc_conv2d_output_shapepaddinginput_colsinput_rowskernel_colskernel_rowsrow_stridecol_strideinletpad_top,pad_left,_,_=Owl_utils_infer_shape.calc_conv2d_paddinginput_colsinput_rowskernel_colskernel_rowsoutput_colsoutput_rowsrow_stridecol_stridein_owl_spatial_max_pooling_backward(kindinput)inputoutput'outbatchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsrow_stridecol_stridepad_toppad_left(* calculate the gradient of max_pool1d *)letmax_pool1d_backwardpaddinginputkernelstrideoutput'=assert(num_dimsinput=3);assert(Array.lengthkernel=1);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=1inletin_channel=input_shp.(2)inletinput=reshapeinput[|batches;input_rows;input_cols;in_channel|]inletkernel_cols=kernel.(0)inletkernel_rows=1inletkernel=[|kernel_rows;kernel_cols|]inletcol_stride=stride.(0)inletrow_stride=1inletstride=[|row_stride;col_stride|]inletoutput'_shp=shapeoutput'inletoutput_cols=output'_shp.(1)inletoutput_rows=1inletout_channel=output'_shp.(2)inletoutput'=reshapeoutput'[|batches;output_rows;output_cols;out_channel|]inletinput'=max_pool2d_backwardpaddinginputkernelstrideoutput'inreshapeinput'input_shpletmax_pool1d_backward_~outpaddinginputkernelstrideoutput'=assert(num_dimsinput=3);assert(Array.lengthkernel=1);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=1inletin_channel=input_shp.(2)inletinput=reshapeinput[|batches;input_rows;input_cols;in_channel|]inletkernel_cols=kernel.(0)inletkernel_rows=1inletkernel=[|kernel_rows;kernel_cols|]inletcol_stride=stride.(0)inletrow_stride=1inletstride=[|row_stride;col_stride|]inletoutput'_shp=shapeoutput'inletoutput_cols=output'_shp.(1)inletoutput_rows=1inletout_channel=output'_shp.(2)inletoutput'=reshapeoutput'[|batches;output_rows;output_cols;out_channel|]inmax_pool2d_backward_~outpaddinginputkernelstrideoutput'(* calculate the gradient of max_pool2d *)letavg_pool3d_backwardpaddinginputkernelstrideoutput'=assert(num_dimsinput=5);assert(Array.lengthkernel=3);assert(Array.lengthstride=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_cols=kernel.(0)inletkernel_rows=kernel.(1)inletkernel_dpts=kernel.(2)inletcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletoutput_cols,output_rows,output_dpts=Owl_utils_infer_shape.calc_conv3d_output_shapepaddinginput_colsinput_rowsinput_dptskernel_colskernel_rowskernel_dptsrow_stridecol_stridedpt_strideinletpad_typ=matchpaddingwithSAME->0|VALID->1inletinput'=empty(kindinput)(shapeinput)in_owl_cuboid_avg_pooling_backward(kindinput)input'output'batchesinput_colsinput_rowsinput_dptsin_channelkernel_colskernel_rowskernel_dptsoutput_colsoutput_rowsoutput_dptscol_striderow_stridedpt_stridepad_typ;input'letavg_pool3d_backward_~outpaddinginputkernelstrideoutput'=assert(num_dimsinput=5);assert(Array.lengthkernel=3);assert(Array.lengthstride=3);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletinput_dpts=input_shp.(3)inletin_channel=input_shp.(4)inletkernel_cols=kernel.(0)inletkernel_rows=kernel.(1)inletkernel_dpts=kernel.(2)inletcol_stride=stride.(0)inletrow_stride=stride.(1)inletdpt_stride=stride.(2)inletoutput_cols,output_rows,output_dpts=Owl_utils_infer_shape.calc_conv3d_output_shapepaddinginput_colsinput_rowsinput_dptskernel_colskernel_rowskernel_dptsrow_stridecol_stridedpt_strideinletpad_typ=matchpaddingwithSAME->0|VALID->1in_owl_cuboid_avg_pooling_backward(kindinput)outoutput'batchesinput_colsinput_rowsinput_dptsin_channelkernel_colskernel_rowskernel_dptsoutput_colsoutput_rowsoutput_dptscol_striderow_stridedpt_stridepad_typ(* calculate the gradient of avg_pool2d *)letavg_pool2d_backwardpaddinginputkernelstrideoutput'=assert(num_dimsinput=4);assert(Array.lengthkernel=2);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_cols=kernel.(0)inletkernel_rows=kernel.(1)inletcol_stride=stride.(0)inletrow_stride=stride.(1)inletoutput_cols,output_rows=Owl_utils_infer_shape.calc_conv2d_output_shapepaddinginput_colsinput_rowskernel_colskernel_rowsrow_stridecol_strideinletpad_top,pad_left,_,_=Owl_utils_infer_shape.calc_conv2d_paddinginput_colsinput_rowskernel_colskernel_rowsoutput_colsoutput_rowsrow_stridecol_strideinletinput'=empty(kindinput)(shapeinput)in_owl_spatial_avg_pooling_backward(kindinput)input'output'batchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsrow_stridecol_stridepad_toppad_left;input'letavg_pool2d_backward_~outpaddinginputkernelstrideoutput'=assert(num_dimsinput=4);assert(Array.lengthkernel=2);assert(Array.lengthstride=2);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=input_shp.(2)inletin_channel=input_shp.(3)inletkernel_cols=kernel.(0)inletkernel_rows=kernel.(1)inletcol_stride=stride.(0)inletrow_stride=stride.(1)inletoutput_cols,output_rows=Owl_utils_infer_shape.calc_conv2d_output_shapepaddinginput_colsinput_rowskernel_colskernel_rowsrow_stridecol_strideinletpad_top,pad_left,_,_=Owl_utils_infer_shape.calc_conv2d_paddinginput_colsinput_rowskernel_colskernel_rowsoutput_colsoutput_rowsrow_stridecol_stridein_owl_spatial_avg_pooling_backward(kindinput)outoutput'batchesinput_colsinput_rowsin_channelkernel_colskernel_rowsoutput_colsoutput_rowsrow_stridecol_stridepad_toppad_left(* calculate the gradient of avg_pool1d *)letavg_pool1d_backwardpaddinginputkernelstrideoutput'=assert(num_dimsinput=3);assert(Array.lengthkernel=1);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=1inletin_channel=input_shp.(2)inletinput=reshapeinput[|batches;input_rows;input_cols;in_channel|]inletkernel_cols=kernel.(0)inletkernel_rows=1inletkernel=[|kernel_rows;kernel_cols|]inletcol_stride=stride.(0)inletrow_stride=1inletstride=[|row_stride;col_stride|]inletoutput'_shp=shapeoutput'inletoutput_cols=output'_shp.(1)inletoutput_rows=1inletout_channel=output'_shp.(2)inletoutput'=reshapeoutput'[|batches;output_rows;output_cols;out_channel|]inletinput'=avg_pool2d_backwardpaddinginputkernelstrideoutput'inreshapeinput'input_shpletavg_pool1d_backward_~outpaddinginputkernelstrideoutput'=assert(num_dimsinput=3);assert(Array.lengthkernel=1);assert(Array.lengthstride=1);letinput_shp=shapeinputinletbatches=input_shp.(0)inletinput_cols=input_shp.(1)inletinput_rows=1inletin_channel=input_shp.(2)inletinput=reshapeinput[|batches;input_rows;input_cols;in_channel|]inletkernel_cols=kernel.(0)inletkernel_rows=1inletkernel=[|kernel_rows;kernel_cols|]inletcol_stride=stride.(0)inletrow_stride=1inletstride=[|row_stride;col_stride|]inletoutput'_shp=shapeoutput'inletoutput_cols=output'_shp.(1)inletoutput_rows=1inletout_channel=output'_shp.(2)inletoutput'=reshapeoutput'[|batches;output_rows;output_cols;out_channel|]inavg_pool2d_backward_~outpaddinginputkernelstrideoutput'let_diffax=let_stride=stridesxinlet_slicez=slice_sizexinletm=(numelx)/_slicez.(a)inletn=_slicez.(a)-_stride.(a)inletincx_m=_slicez.(a)inletincx_n=1inletincy_m=_slicez.(a)-_stride.(a)inletincy_n=1inletofsx=_stride.(a)inletofsy=0inletk=kindxinlets=shapexins.(a)<-s.(a)-1;lety=emptyksin_owl_diffkmnxofsxincx_mincx_nyofsyincy_mincy_n;yletdiff?(axis=(-1))?(n=1)x=letd=num_dimsxinleta=Owl_utils.adjust_indexaxisdinassert(n<nth_dimxa);lety=refxinfor_i=1tondoy:=_diffa!ydone;!yletone_hotdepthidx=letsx=shapeidxinletsy=Array.appendsx[|depth|]inletk=kindidxinletn=numelidxinlety=zeros(kindidx)syin_owl_one_hotkn~ofsx:0~incx:1~ofsy:0~incy:depthidxy;yletone_hot_~outdepthidx=letk=kindidxinletn=numelidxinresetout;_owl_one_hotkn~ofsx:0~incx:1~ofsy:0~incy:depthidxout(* TODO: optimise performance, slow along the low dimension *)letcumulative_op?(axis=(-1))_cumopxy=letd=num_dimsxinleta=Owl_utils.adjust_indexaxisdinlet_stride=stridesxinlet_slicez=slice_sizexinletm=(numelx)/_slicez.(a)inletn=_slicez.(a)-_stride.(a)inletincx_m=_slicez.(a)inletincx_n=1inletincy_m=_slicez.(a)inletincy_n=1inletofsx=0inletofsy=_stride.(a)in_cumopmnxofsxincx_mincx_nyofsyincy_mincy_nletcumsum?axisx=letx=copyxinlet_cumop=_owl_cumsum(kindx)incumulative_op?axis_cumopxx;xletcumprod?axisx=letx=copyxinlet_cumop=_owl_cumprod(kindx)incumulative_op?axis_cumopxx;xletcummin?axisx=letx=copyxinlet_cumop=_owl_cummin(kindx)incumulative_op?axis_cumopxx;xletcummax?axisx=letx=copyxinlet_cumop=_owl_cummax(kindx)incumulative_op?axis_cumopxx;xletmodfx=letx=copyxinlety=empty(kindx)(shapex)in(* the last parameter zero is just a dummy parameter *)_owl_modf(kindx)(numelx)xy(Owl_const.zero(kindx));x,yletsub_ndarraypartsx=letn=Array.fold_left(+)0partsinassert(n=(shapex).(0));letm=Array.lengthpartsinletofs=ref(-parts.(0))inArray.initm(funi->ofs:=!ofs+parts.(i);sub_leftx!ofsparts.(i))letsplit?(axis=0)partsx=letx_shp=shapexinletx_dim=num_dimsxinlet_d=Array.fold_left(+)0partsinleta=Owl_utils.adjust_indexaxis_dinassert(a<x_dim);assert(_d=x_shp.(a));let_pos=ref0inletslices=Array.map(fund->lets_def=Array.makex_dim(R_[||])ins_def.(a)<-R_[|!_pos;!_pos+d-1|];_pos:=!_pos+d;Owl_slicing.get_slice_array_typs_defx)partsinslicesletsplit_vhpartsx=assert(num_dimsx>=2);letparts_a0=Array.map(funp->fstp.(0))partsinArray.mapi(funipart->letparts_a1=Array.mapsndparts.(i)insplit~axis:1parts_a1part)(sub_ndarrayparts_a0x)letsum'x=_owl_sum(kindx)(numelx)xletprod'x=_owl_prod(kindx)(numelx)x(* TODO: performance can be optimised by removing embedded loops *)(* generic fold funtion *)letfoldi?axisfax=letx'=flattenx|>array1_of_genarrayinmatchaxiswith|Someaxis->(letm,n,o,s=Owl_utils.reduce_paramsaxisxinletstart_x=ref0inletstart_y=ref0inletincy=ref0inletk=ref0inlety=create(kindx)sainlety'=flatteny|>array1_of_genarrayinfor_i=0tom-1doforj=0ton-1doletb=Array1.unsafe_gety'(!start_y+!incy)inletc=Array1.unsafe_getx'(!start_x+j)inArray1.unsafe_sety'(!start_y+!incy)(f!kbc);if!incy+1=othenincy:=0elseincy:=!incy+1;k:=!k+1;done;start_x:=!start_x+n;start_y:=!start_y+o;done;y)|None->(letb=refainfori=0to(numelx)-1doletc=Array1.unsafe_getx'iinb:=fi!bcdone;create(kindx)[|1|]!b)letfold?axisfax=foldi?axis(fun_bc->fbc)axletfoldi_nd?axisfax=foldi?axis(funibc->f(Owl_utils.indxi)bc)ax(* generic scan function *)letscani?(axis=(-1))fx=letd=num_dimsxinleta=Owl_utils.adjust_indexaxisdinlet_stride=stridesxinlet_slicez=slice_sizexinletm=(numelx)/_slicez.(a)inletn=_slicez.(a)-_stride.(a)inletincx=_slicez.(a)inletincy=_slicez.(a)inletstart_x=ref0inletstart_y=ref_stride.(a)inletk=ref0inlety=copyxinlety'=flatteny|>array1_of_genarrayinfor_i=0tom-1doforj=0ton-1doletb=Array1.unsafe_gety'(!start_x+j)inletc=Array1.unsafe_gety'(!start_y+j)inArray1.unsafe_sety'(!start_y+j)(f!kbc);k:=!k+1done;start_x:=!start_x+incx;start_y:=!start_y+incy;done;yletscan?axisfx=scani?axis(fun_ab->fab)xletscani_nd?axisfx=scani?axis(funiab->f(Owl_utils.indxi)ab)xletsum?axisx=let_kind=kindxinmatchaxiswith|Somea->(letm,n,o,s=Owl_utils.reduce_paramsaxinlety=zeros_kindsin_owl_sum_along_kindmnoxy;y)|None->_owl_sum_kind(numelx)x|>create_kind[|1|]letsum_~out~axisx=let_kind=kindxinletm,n,o,_s=Owl_utils.reduce_paramsaxisxin(* TODO: this can be optimised, only need to reset first slice actually. *)resetout;_owl_sum_along_kindmnoxoutletprod?axisx=let_kind=kindxinmatchaxiswith|Somea->(letm,n,o,s=Owl_utils.reduce_paramsaxinlety=ones_kindsin_owl_prod_along_kindmnoxy;y)|None->_owl_prod_kind(numelx)x|>create_kind[|1|]letmin?axisx=let_kind=kindxinmatchaxiswith|Somea->(letm,n,o,s=Owl_utils.reduce_paramsaxinlety=create_kinds(Owl_const.pos_inf_kind)in_owl_min_along_kindmnoxy;y)|None->min'x|>create_kind[|1|]letmin_~out~axisx=let_kind=kindxinletm,n,o,_s=Owl_utils.reduce_paramsaxisxin(* TODO: this can be optimised, only need to reset first slice actually. *)fillout(Owl_const.pos_inf_kind);_owl_min_along_kindmnoxoutletmax?axisx=let_kind=kindxinmatchaxiswith|Somea->(letm,n,o,s=Owl_utils.reduce_paramsaxinlety=create_kinds(Owl_const.neg_inf_kind)in_owl_max_along_kindmnoxy;y)|None->max'x|>create_kind[|1|]letmax_~out~axisx=let_kind=kindxinletm,n,o,_s=Owl_utils.reduce_paramsaxisxin(* TODO: this can be optimised, only need to reset first slice actually. *)fillout(Owl_const.neg_inf_kind);_owl_max_along_kindmnoxoutletminmax?axisx=min?axisx,max?axisxletmean'x=let_kind=kindxinlet_numel=numelxinlety=_owl_sum_kind_numelxin_mean_elt_kindy_numelletmean?axisx=let_kind=kindxinmatchaxiswith|Somea->(lety=sum~axis:axinletn=(shapex).(a)|>float_of_int|>_float_typ_elt_kindin_owl_div_scalar_kind(numely)yyn;y)|None->mean'x|>create_kind[|1|]letvar'x=let_kind=kindxinletmu=mean'xinlety=sub_scalarxmuin_owl_sqr_kind(numely)yy;lety=sum'yinletn=(numelx)-1|>Pervasives.max1|>float_of_int|>_float_typ_elt_kindin_div_elt_kindynletvar?axisx=let_kind=kindxinmatchaxiswith|Somea->(leta=Owl_utils.adjust_indexa(num_dimsx)inletmu=mean~axis:axinlety=subxmuin_owl_sqr_kind(numely)yy;lety=sum~axis:ayinletn=(shapex).(a)-1|>Pervasives.max1|>float_of_int|>_float_typ_elt_kindin_owl_div_scalar_kind(numely)yyn;y)|None->var'x|>create_kind[|1|]letstd'x=let_kind=kindxinletmu=mean'xinlety=sub_scalarxmuin_owl_sqr_kind(numely)yy;lety=sum'yinletn=(numelx)-1|>Pervasives.max1|>float_of_int|>_float_typ_elt_kindin_div_elt_kindyn|>_sqrt_elt_kindletstd?axisx=let_kind=kindxinmatchaxiswith|Somea->(leta=Owl_utils.adjust_indexa(num_dimsx)inletmu=mean~axis:axinlety=subxmuin_owl_sqr_kind(numely)yy;lety=sum~axis:ayinletn=(shapex).(a)-1|>Pervasives.max1|>float_of_int|>_float_typ_elt_kindin_owl_div_scalar_kind(numely)yyn;_owl_sqrt_kind(numely)yy;y)|None->std'x|>create_kind[|1|]letl1norm?axisx=let_kind=kindxinmatchaxiswith|Somea->(letm,n,o,s=Owl_utils.reduce_paramsaxinlety=zeros_kindsin_owl_l1norm_along_kindmnoxy;y)|None->l1norm'x|>create_kind[|1|]letl2norm_sqr?axisx=let_kind=kindxinmatchaxiswith|Somea->(letm,n,o,s=Owl_utils.reduce_paramsaxinlety=zeros_kindsin_owl_l2norm_sqr_along_kindmnoxy;y)|None->l2norm_sqr'x|>create_kind[|1|]letl2norm?axisx=let_kind=kindxinmatchaxiswith|Somea->(letm,n,o,s=Owl_utils.reduce_paramsaxinlety=zeros_kindsin_owl_l2norm_sqr_along_kindmnoxy;_owl_sqrt_kind(numely)yy;y)|None->l2norm'x|>create_kind[|1|]letvecnorm?axis?(p=2.)x=ifp=1.thenl1norm?axisxelseifp=2.thenl2norm?axisxelse(lety=absxinifp=infinitythenmax?axisyelseifp=neg_infinitythenmin?axisyelse(letq=_float_typ_elt(kindx)(1./.p)inletp=_float_typ_elt(kindx)pinletz=pow_scalaryp|>sum?axisinpow_scalarzq))letvecnorm'?px=lety=vecnorm?pxingety[|0|](* this function is used for searching top/bottom values in [x] *)let_search_close_to_extremexnneg_extcmp_fun=letm=numelxinletn=Pervasives.minnminletvls=Array.makenneg_extinletidx=Array.makenmax_intinlety=flattenx|>array1_of_genarrayinletl=n-1inlet_insertvlsidxxp=forq=ldownto0doifcmp_funxvls.(q)then(ifq<lthen(vls.(q+1)<-vls.(q);idx.(q+1)<-idx.(q););vls.(q)<-x;idx.(q)<-p;)doneinfori=0tom-1doifcmp_funy.{i}vls.(l)then_insertvlsidxy.{i}idone;letk=num_dimsxinlets=stridesxinArray.map(funi->letj=Array.makek0inOwl_utils.index_1d_ndijs;j)idx(* FIXME:
the (<) and (>) functions needs to be changed for complex numbers, since
Pervasives module may have different way to compare complex numbers.
*)lettopxn=_search_close_to_extremexn(Owl_const.neg_inf(kindx))(>)letbottomxn=_search_close_to_extremexn(Owl_const.pos_inf(kindx))(<)(* fucntions which modify the data in-place, not so pure *)letadd_?outxy=letout=matchoutwithSomeo->o|None->xinletsx=shapexinletsy=shapeyinifsx=sythen_owl_add(kindx)(numelx)xyoutelse(letso=Owl_utils_infer_shape.broadcast1sxsyinassert(shapeout=so);broadcast_op(_owl_broadcast_add(kindx))xy~out|>ignore)letsub_?outxy=letout=matchoutwithSomeo->o|None->xinletsx=shapexinletsy=shapeyinifsx=sythen_owl_sub(kindx)(numelx)xyoutelse(letso=Owl_utils_infer_shape.broadcast1sxsyinassert(shapeout=so);broadcast_op(_owl_broadcast_sub(kindx))xy~out|>ignore)letmul_?outxy=letout=matchoutwithSomeo->o|None->xinletsx=shapexinletsy=shapeyinifsx=sythen_owl_mul(kindx)(numelx)xyoutelse(letso=Owl_utils_infer_shape.broadcast1sxsyinassert(shapeout=so);broadcast_op(_owl_broadcast_mul(kindx))xy~out|>ignore)letdiv_?outxy=letout=matchoutwithSomeo->o|None->xinletsx=shapexinletsy=shapeyinifsx=sythen_owl_div(kindx)(numelx)xyoutelse(letso=Owl_utils_infer_shape.broadcast1sxsyinassert(shapeout=so);broadcast_op(_owl_broadcast_div(kindx))xy~out|>ignore)letpow_?outxy=letout=matchoutwithSomeo->o|None->xinletsx=shapexinletsy=shapeyinifsx=sythen_owl_pow(kindx)(numelx)xyoutelse(letso=Owl_utils_infer_shape.broadcast1sxsyinassert(shapeout=so);broadcast_op(_owl_broadcast_pow(kindx))xy~out|>ignore)letatan2_?outxy=letout=matchoutwithSomeo->o|None->xinletsx=shapexinletsy=shapeyinifsx=sythen_owl_atan2(kindx)(numelx)xyoutelse(letso=Owl_utils_infer_shape.broadcast1sxsyinassert(shapeout=so);broadcast_op(_owl_broadcast_atan2(kindx))xy~out|>ignore)lethypot_?outxy=letout=matchoutwithSomeo->o|None->xinletsx=shapexinletsy=shapeyinifsx=sythen_owl_hypot(kindx)(numelx)xyoutelse(letso=Owl_utils_infer_shape.broadcast1sxsyinassert(shapeout=so);broadcast_op(_owl_broadcast_hypot(kindx))xy~out|>ignore)letfmod_?outxy=letout=matchoutwithSomeo->o|None->xinletsx=shapexinletsy=shapeyinifsx=sythen_owl_fmod(kindx)(numelx)xyoutelse(letso=Owl_utils_infer_shape.broadcast1sxsyinassert(shapeout=so);broadcast_op(_owl_broadcast_fmod(kindx))xy~out|>ignore)letmin2_?outxy=letout=matchoutwithSomeo->o|None->xinletsx=shapexinletsy=shapeyinifsx=sythen_owl_min2(kindx)(numelx)xyoutelse(letso=Owl_utils_infer_shape.broadcast1sxsyinassert(shapeout=so);broadcast_op(_owl_broadcast_min2(kindx))xy~out|>ignore)letmax2_?outxy=letout=matchoutwithSomeo->o|None->xinletsx=shapexinletsy=shapeyinifsx=sythen_owl_max2(kindx)(numelx)xyoutelse(letso=Owl_utils_infer_shape.broadcast1sxsyinassert(shapeout=so);broadcast_op(_owl_broadcast_max2(kindx))xy~out|>ignore)letelt_equal_?outxy=letout=matchoutwithSomeo->o|None->xinletsx=shapexinletsy=shapeyinifsx=sythen_owl_elt_equal(kindx)(numelx)xyoutelse(letso=Owl_utils_infer_shape.broadcast1sxsyinassert(shapeout=so);broadcast_op(_owl_broadcast_elt_equal(kindx))xy~out|>ignore)letelt_not_equal_?outxy=letout=matchoutwithSomeo->o|None->xinletsx=shapexinletsy=shapeyinifsx=sythen_owl_elt_not_equal(kindx)(numelx)xyoutelse(letso=Owl_utils_infer_shape.broadcast1sxsyinassert(shapeout=so);broadcast_op(_owl_broadcast_elt_not_equal(kindx))xy~out|>ignore)letelt_less_?outxy=letout=matchoutwithSomeo->o|None->xinletsx=shapexinletsy=shapeyinifsx=sythen_owl_elt_less(kindx)(numelx)xyoutelse(letso=Owl_utils_infer_shape.broadcast1sxsyinassert(shapeout=so);broadcast_op(_owl_broadcast_elt_less(kindx))xy~out|>ignore)letelt_greater_?outxy=letout=matchoutwithSomeo->o|None->xinletsx=shapexinletsy=shapeyinifsx=sythen_owl_elt_greater(kindx)(numelx)xyoutelse(letso=Owl_utils_infer_shape.broadcast1sxsyinassert(shapeout=so);broadcast_op(_owl_broadcast_elt_greater(kindx))xy~out|>ignore)letelt_less_equal_?outxy=letout=matchoutwithSomeo->o|None->xinletsx=shapexinletsy=shapeyinifsx=sythen_owl_elt_less_equal(kindx)(numelx)xyoutelse(letso=Owl_utils_infer_shape.broadcast1sxsyinassert(shapeout=so);broadcast_op(_owl_broadcast_elt_less_equal(kindx))xy~out|>ignore)letelt_greater_equal_?outxy=letout=matchoutwithSomeo->o|None->xinletsx=shapexinletsy=shapeyinifsx=sythen_owl_elt_equal(kindx)(numelx)xyxelse(letso=Owl_utils_infer_shape.broadcast1sxsyinassert(shapeout=so);broadcast_op(_owl_broadcast_elt_greater_equal(kindx))xy~out|>ignore)letelt_equal_scalar_?outxa=letout=matchoutwithSomeo->o|None->xin_owl_elt_equal_scalar(kindx)(numelx)xoutaletelt_not_equal_scalar_?outxa=letout=matchoutwithSomeo->o|None->xin_owl_elt_not_equal_scalar(kindx)(numelx)xoutaletelt_less_scalar_?outxa=letout=matchoutwithSomeo->o|None->xin_owl_elt_less_scalar(kindx)(numelx)xoutaletelt_greater_scalar_?outxa=letout=matchoutwithSomeo->o|None->xin_owl_elt_greater_scalar(kindx)(numelx)xoutaletelt_less_equal_scalar_?outxa=letout=matchoutwithSomeo->o|None->xin_owl_elt_less_equal_scalar(kindx)(numelx)xoutaletelt_greater_equal_scalar_?outxa=letout=matchoutwithSomeo->o|None->xin_owl_elt_greater_equal_scalar(kindx)(numelx)xoutaletadd_scalar_?outxa=letout=matchoutwithSomeo->o|None->xin_owl_add_scalar(kindx)(numelx)xoutaletsub_scalar_?outxa=letout=matchoutwithSomeo->o|None->xinadd_scalar_~outx(_neg_elt(kindx)a)letmul_scalar_?outxa=letout=matchoutwithSomeo->o|None->xin_owl_mul_scalar(kindx)(numelx)xoutaletdiv_scalar_?outxa=letout=matchoutwithSomeo->o|None->xin_owl_div_scalar(kindx)(numelx)xoutaletpow_scalar_?outxa=letout=matchoutwithSomeo->o|None->xin_owl_pow_scalar(kindx)(numelx)xoutaletatan2_scalar_?outxa=letout=matchoutwithSomeo->o|None->xin_owl_atan2_scalar(kindx)(numelx)xoutaletfmod_scalar_?outxa=letout=matchoutwithSomeo->o|None->xin_owl_fmod_scalar(kindx)(numelx)xoutaletscalar_add_?outax=letout=matchoutwithSomeo->o|None->xin_owl_add_scalar(kindx)(numelx)xoutaletscalar_sub_?outax=letout=matchoutwithSomeo->o|None->xin_owl_scalar_sub(kindx)(numelx)xoutaletscalar_mul_?outax=letout=matchoutwithSomeo->o|None->xin_owl_mul_scalar(kindx)(numelx)xoutaletscalar_div_?outax=letout=matchoutwithSomeo->o|None->xin_owl_scalar_div(kindx)(numelx)xoutaletscalar_pow_?outax=letout=matchoutwithSomeo->o|None->xin_owl_scalar_pow(kindx)(numelx)xoutaletscalar_atan2_?outax=letout=matchoutwithSomeo->o|None->xin_owl_scalar_atan2(kindx)(numelx)xoutaletscalar_fmod_?outax=letout=matchoutwithSomeo->o|None->xin_owl_scalar_fmod(kindx)(numelx)xoutaletconj_?outx=letout=matchoutwithSomeo->o|None->xin_owl_conj(kindx)(numelx)xoutletabs_?outx=letout=matchoutwithSomeo->o|None->xin_owl_abs(kindx)(numelx)xoutletneg_?outx=letout=matchoutwithSomeo->o|None->xin_owl_neg(kindx)(numelx)xoutletreci_?outx=letout=matchoutwithSomeo->o|None->xin_owl_reci(kindx)(numelx)xoutletsignum_?outx=letout=matchoutwithSomeo->o|None->xin_owl_signum(kindx)(numelx)xoutletsqr_?outx=letout=matchoutwithSomeo->o|None->xin_owl_sqr(kindx)(numelx)xoutletsqrt_?outx=letout=matchoutwithSomeo->o|None->xin_owl_sqrt(kindx)(numelx)xoutletcbrt_?outx=letout=matchoutwithSomeo->o|None->xin_owl_cbrt(kindx)(numelx)xoutletexp_?outx=letout=matchoutwithSomeo->o|None->xin_owl_exp(kindx)(numelx)xoutletexp2_?outx=letout=matchoutwithSomeo->o|None->xin_owl_exp2(kindx)(numelx)xoutletexp10_?outx=letout=matchoutwithSomeo->o|None->xin_owl_exp10(kindx)(numelx)xoutletexpm1_?outx=letout=matchoutwithSomeo->o|None->xin_owl_expm1(kindx)(numelx)xoutletlog_?outx=letout=matchoutwithSomeo->o|None->xin_owl_log(kindx)(numelx)xoutletlog2_?outx=letout=matchoutwithSomeo->o|None->xin_owl_log2(kindx)(numelx)xoutletlog10_?outx=letout=matchoutwithSomeo->o|None->xin_owl_log10(kindx)(numelx)xoutletlog1p_?outx=letout=matchoutwithSomeo->o|None->xin_owl_log1p(kindx)(numelx)xoutletsin_?outx=letout=matchoutwithSomeo->o|None->xin_owl_sin(kindx)(numelx)xoutletcos_?outx=letout=matchoutwithSomeo->o|None->xin_owl_cos(kindx)(numelx)xoutlettan_?outx=letout=matchoutwithSomeo->o|None->xin_owl_tan(kindx)(numelx)xoutletasin_?outx=letout=matchoutwithSomeo->o|None->xin_owl_asin(kindx)(numelx)xoutletacos_?outx=letout=matchoutwithSomeo->o|None->xin_owl_acos(kindx)(numelx)xoutletatan_?outx=letout=matchoutwithSomeo->o|None->xin_owl_atan(kindx)(numelx)xoutletsinh_?outx=letout=matchoutwithSomeo->o|None->xin_owl_sinh(kindx)(numelx)xoutletcosh_?outx=letout=matchoutwithSomeo->o|None->xin_owl_cosh(kindx)(numelx)xoutlettanh_?outx=letout=matchoutwithSomeo->o|None->xin_owl_tanh(kindx)(numelx)xoutletasinh_?outx=letout=matchoutwithSomeo->o|None->xin_owl_asinh(kindx)(numelx)xoutletacosh_?outx=letout=matchoutwithSomeo->o|None->xin_owl_acosh(kindx)(numelx)xoutletatanh_?outx=letout=matchoutwithSomeo->o|None->xin_owl_atanh(kindx)(numelx)xoutletfloor_?outx=letout=matchoutwithSomeo->o|None->xin_owl_floor(kindx)(numelx)xoutletceil_?outx=letout=matchoutwithSomeo->o|None->xin_owl_ceil(kindx)(numelx)xoutletround_?outx=letout=matchoutwithSomeo->o|None->xin_owl_round(kindx)(numelx)xoutlettrunc_?outx=letout=matchoutwithSomeo->o|None->xin_owl_trunc(kindx)(numelx)xoutletfix_?outx=letout=matchoutwithSomeo->o|None->xin_owl_fix(kindx)(numelx)xoutleterf_?outx=letout=matchoutwithSomeo->o|None->xin_owl_erf(kindx)(numelx)xoutleterfc_?outx=letout=matchoutwithSomeo->o|None->xin_owl_erfc(kindx)(numelx)xoutletrelu_?outx=letout=matchoutwithSomeo->o|None->xin_owl_relu(kindx)(numelx)xoutletsoftplus_?outx=letout=matchoutwithSomeo->o|None->xin_owl_softplus(kindx)(numelx)xoutletsoftsign_?outx=letout=matchoutwithSomeo->o|None->xin_owl_softsign(kindx)(numelx)xoutletsigmoid_?outx=letout=matchoutwithSomeo->o|None->xin_owl_sigmoid(kindx)(numelx)xoutletsoftmax?(axis=(-1))x=letx=copyxinletaxis=Owl_utils.adjust_indexaxis(num_dimsx)insub_~out:xx(max~axisx);exp_~out:xx;leta=sum~axisxindiv_~out:xxa;xletsoftmax_?out?(axis=(-1))x=letout=matchoutwithSomeo->o|None->xinletaxis=Owl_utils.adjust_indexaxis(num_dimsx)insub_~outx(max~axisx);exp_~outx;leta=sum~axisxindiv_~outxaletcumsum_?out?axisx=letout=matchoutwithSomeo->o|None->xinlet_cumop=_owl_cumsum(kindx)incumulative_op?axis_cumopxoutletcumprod_?out?axisx=letout=matchoutwithSomeo->o|None->xinlet_cumop=_owl_cumprod(kindx)incumulative_op?axis_cumopxoutletcummin_?out?axisx=letout=matchoutwithSomeo->o|None->xinlet_cumop=_owl_cummin(kindx)incumulative_op?axis_cumopxoutletcummax_?out?axisx=letout=matchoutwithSomeo->o|None->xinlet_cumop=_owl_cummax(kindx)incumulative_op?axis_cumopxoutletcross_entropy'xy=lety=copyyinlog_~out:yy;mul_~out:yyx;_neg_elt(kindy)(sum'y)letdropout_?out?(rate=0.5)x=assert(rate>=0.&&rate<=1.);letout=matchoutwithSomeo->o|None->xinifnot(out==x)thencopy_~outx;_owl_dropout(kindx)(numelx)outrate0letfused_adagrad_?out~rate~epsx=letout=matchoutwithSomeo->o|None->xin_owl_fused_adagrad(kindx)(numelx)rateepsxoutletclip_by_value_?out?amin?amaxx=letout=matchoutwithSomeo->o|None->xinifsame_dataoutx=falsethencopy_~outx;letk=kindxinletamin=matchaminwith|Somea->a|None->Owl_const.neg_infkinletamax=matchamaxwith|Somea->a|None->Owl_const.pos_infkin_owl_clip_by_valuek(numelx)aminamaxoutletclip_by_value?amin?amaxx=letout=copyxinclip_by_value_~out?amin?amaxout;outletclip_by_l2norm_?outtx=letout=matchoutwithSomeo->o|None->xinleta=l2norm'xinifa>tthen(letb=_div_elt(kindx)tainmul_scalar_~outxb)else(ifsame_dataoutx=falsethencopy_~outx)letclip_by_l2normtx=letout=copyxinclip_by_l2norm_~outtout;out(** Matrix functions *)typearea={a:int;b:int;c:int;d:int}letareaabcd={a=a;b=b;c=c;d=d}letarea_ofx=lets=shapexinletm,n=s.(0),s.(1)in{a=0;b=0;c=m-1;d=n-1}letarea_of_rowxi=letn=(shapex).(1)inareai0i(n-1)letarea_of_colxi=letm=(shapex).(0)inarea0i(m-1)iletequal_arear1r2=((r1.c-r1.a=r2.c-r2.a)&&(r1.d-r1.b=r2.d-r2.b))letsame_arear1r2=r1=r2letcopy_area_tox1r1x2r2=assert(equal_arear1r2);fori=0tor1.c-r1.adoforj=0tor1.d-r1.bdosetx2[|r2.a+i;r2.b+j|](getx1[|r1.a+i;r1.b+j|])donedoneletcopy_areaxr=lety=empty(kindx)[|r.c-r.a+1;r.d-r.b+1|]incopy_area_toxry(area_ofy)let_matrix_shapex=lets=shapexinassert(Array.lengths=2);s.(0),s.(1)letrow_numx=assert(num_dimsx=2);(shapex).(0)letcol_numx=assert(num_dimsx=2);(shapex).(1)letrowxi=letm,n=_matrix_shapexinleti=Owl_utils.adjust_indeximinlety=Bigarray.Genarray.slice_leftx[|i|]inreshapey[|1;n|]letcolxj=letm,n=_matrix_shapexinletj=Owl_utils.adjust_indexjninlet_kind=kindxinlety=empty_kind[|m;1|]in_owl_copy_kindm~ofsx:j~incx:n~ofsy:0~incy:1xy;yletcopy_row_tovxi=letu=rowxiincopy_~out:uvletcopy_col_tovxi=letr1=area_ofvinletr2=area_of_colxiincopy_area_tovr1xr2(* NOTE: same implementaton code as that in Owl_linalg_generic *)letdotx1x2=letm,k=_matrix_shapex1inletl,n=_matrix_shapex2inassert(k=l);let_kind=kindx1inletalpha=Owl_const.one_kindinletbeta=Owl_const.zero_kindinletx3=empty_kind[|m;n|]inleta=flattenx1|>Bigarray.array1_of_genarrayinletb=flattenx2|>Bigarray.array1_of_genarrayinletc=flattenx3|>Bigarray.array1_of_genarrayinletlayout=Owl_cblas_basic.CblasRowMajorinlettransa=Owl_cblas_basic.CblasNoTransinlettransb=Owl_cblas_basic.CblasNoTransinOwl_cblas_basic.gemmlayouttransatransbmnkalphaakbnbetacn;x3letdot_?(transa=false)?(transb=false)?alpha?beta~cab=Owl_cblas.gemm~transa~transb?alpha?beta~a~b~cleteyekn=letx=zerosk[|n;n|]inlety=Bigarray.array2_of_genarrayxinleta=Owl_const.onekinfori=0ton-1doBigarray.Array2.unsafe_setyiiadone;xletdiag?(k=0)x=letm,n=_matrix_shapexinletl=matchk>=0with|true->Pervasives.(max0(minm(n-k)))|false->Pervasives.(max0(minn(m+k)))inleti,j=matchk>=0with|true->0,k|false->Pervasives.absk,0inlety=empty(kindx)[|1;l|]infork=0tol-1dosety[|0;k|](getx[|i+k;j+k|])done;ylettracex=sum'(diagx)letto_rowsx=Array.init(row_numx)(funi->rowxi)letto_colsx=Array.init(col_numx)(funi->colxi)letof_rowsl=letx=empty(kindl.(0))[|(Array.lengthl);(col_numl.(0))|]inArray.iteri(funiv->copy_row_tovxi)l;xletof_colsl=letx=empty(kindl.(0))[|(row_numl.(0));(Array.lengthl)|]inArray.iteri(funiv->copy_col_tovxi)l;xletof_arrayskx=Array2.of_arraykC_layoutx|>genarray_of_array2letto_arraysx=lets=shapexinletm=s.(0)inletn=s.(1)inleta0=Owl_const.zero(kindx)inletx=array2_of_genarrayxinlety=Array.initm(fun_->Array.makena0)infori=0tom-1doforj=0ton-1doy.(i).(j)<-x.{i,j}donedone;yletrowsxl=letm,n=Array.lengthl,col_numxinlety=empty(kindx)[|m;n|]inArray.iteri(funij->copy_row_to(rowxj)yi)l;yletcolsxl=letm,n=_matrix_shapexinletnl=Array.lengthlinlet_kind=kindxinlety=empty_kind[|m;nl|]inArray.iteri(funij->letj=Owl_utils.adjust_indexjnin_owl_copy_kindm~ofsx:j~incx:n~ofsy:i~incy:nlxy;)l;yletdraw_rows?(replacement=true)xc=leta=Array.init(row_numx)(funi->i)inletl=matchreplacementwith|true->Owl_stats.sampleac|false->Owl_stats.chooseacinrowsxl,lletdraw_cols?(replacement=true)xc=leta=Array.init(col_numx)(funi->i)inletl=matchreplacementwith|true->Owl_stats.sampleac|false->Owl_stats.chooseacincolsxl,lletdraw_rows2?(replacement=true)xyc=letx_rows,l=draw_rows~replacementxcinx_rows,rowsyl,lletdraw_cols2?(replacement=true)xyc=letx_cols,l=draw_rows~replacementxcinx_cols,colsyl,l(*
simiar to sum_rows in matrix, sum all the slices along an axis.
The default [axis] is the highest dimension. E.g., for [x] of [|2;3;4;5|],
[sum_slices ~axis:2] returns an ndarray of shape [|4;5|].
currently, the operation is done using [gemm], fast but uses more memory.
*)letsum_slices?axisx=letaxis=matchaxiswith|Somea->a|None->num_dimsx-1in(* reshape into 2d matrix *)lets=shapexinletn=(Owl_utils.calc_slices).(axis)inletm=(numelx)/ninlety=reshapex[|m;n|]in(* create a row vector of all ones *)letv=ones(kindx)[|1;m|]in(* sum all the rows using gemm operation *)lety=dotvyin(* reshape back into ndarray *)lets=Array.(subsaxis(lengths-axis))inreshapeys(*
Simiar to ``sum``, but sums the elements along multiple axes specified in an
array. E.g., for [x] of [|2;3;4;5|], [sum_reduce ~axis:[|1;3|] x] returns an
ndarray of shape [|2;1;4;1|]; if axis not specified, it returns an ndarray of
shape [|1;1;1;1|].
*)letsum_reduce?axisx=let_kind=kindxinlet_dims=num_dimsxinmatchaxiswith|Somea->(letx_shape=shapexinletdims'=Owl_utils.squeeze_continuous_dimsx_shapeainifArray.lengthdims'=1then(_owl_sum_kind(numelx)x|>create_kind(Array.make_dims1))else((* TODO: optimise with C implementation *)lety=ref(reshapexdims')inletflag=ref(Array.mem0a)infori=0toArray.lengthdims'-1doif!flag=truethen(letm,n,o,s=Owl_utils.reduce_paramsi!yinletz=zeros_kindsin_owl_sum_along_kindmno!yz;y:=z);flag:=not!flagdone;lety_shape=Array.copyx_shapeinArray.iter(funj->y_shape.(j)<-1)a;reshape!yy_shape))|None->_owl_sum_kind(numelx)x|>create_kind(Array.make_dims1)letslide?(axis=(-1))?(ofs=0)?(step=1)~windowx=letd=num_dimsxinleta=ifaxis>=0thenaxiselsed+axisinletsx=shapexinassert(a<d);assert(ofs+window<=sx.(a));let_stride=stridesxinlet_slicez=slice_sizexinletm=(numelx)/_slicez.(a)inletn=(sx.(a)-ofs-window)/step+1inleto=_stride.(a)*windowinletofsx_m=_stride.(a)*ofsinletincx_m=_slicez.(a)inletincx_n=_stride.(a)*stepinsx.(a)<-n*window;lety=empty(kindx)sxinletincy_m=(slice_sizey).(a)inletincy_n=oinOwl_ndarray._ndarray_slide(kindx)xymnoofsx_mincx_mincx_nincy_mincy_n;letsy=Owl_utils.Array.replacea1sx[|n;window|]inreshapeysyletdraw?(axis=0)xn=letaxis=Owl_utils.adjust_indexaxis(num_dimsx)inletb=nth_dimxaxisinletindices=Array.initn(fun_->Owl_stats.uniform_int_rvs~a:0~b:(b-1))inletslice=Array.init(num_dimsx)(funi->ifi=axisthenL_indiceselseR_[||])inletsamples=Owl_slicing.get_fancy_array_typslicexinsamples,indiceslet_contract1_check_indicesidxx=lets=shapexinletn=num_dimsxinArray.for_all(fun(i,j)->(i>=0&&i<n&&j>=0&&j<n)&&(s.(i)=s.(j)&&i<>j))idxletcontract1index_pairsx=letd=num_dimsxinassert(d>1);assert(_contract1_check_indicesindex_pairsx);letpermut_1=Owl_utils.Array.of_tuplesindex_pairsinletpermut_0=Owl_utils.Array.(complement(range0(d-1))permut_1)inletpermut=Owl_utils.Array.(permut_0@permut_1)inlets0=shapexinleti0=stridesxinletsa=Array.copys0inOwl_utils.Array.set_nsapermut_11;letia=Owl_utils.calc_stridesainlets1=Owl_utils.Array.permutepermuts0inleti1=Owl_utils.Array.permutepermuti0inletsb=Owl_utils.Array.permutepermutsainletib=Owl_utils.Array.permutepermutiainletp=reshapexs1inletq=zeros(kindx)sbinletincp=Array.mapInt64.of_inti1|>Array1.of_arrayint64c_layout|>genarray_of_array1inletincq=Array.mapInt64.of_intib|>Array1.of_arrayint64c_layout|>genarray_of_array1inletrtd=d-(Array.lengthpermut_1)inOwl_ndarray._ndarray_contract_one(kindx)pqincpincq(Int64.of_intrtd);reshapeq(Array.subsb0rtd)let_contract2_check_indicesidxxy=letsx=shapexinletnx=num_dimsxinletsy=shapeyinletny=num_dimsyinArray.for_all(fun(i,j)->i>=0&&i<nx&&j>=0&&j<ny&&sx.(i)=sy.(j))idxletcontract2index_pairsxy=assert(_contract2_check_indicesindex_pairsxy);letdx=num_dimsxinletpermut_x1=Owl_utils.Array.mapfstindex_pairsinletpermut_x0=Owl_utils.Array.(complement(range0(dx-1))permut_x1)inletpermut_x=Owl_utils.Array.(permut_x0@permut_x1)inletshpx=Owl_utils.Array.permutepermut_x(shapex)inletincx=Owl_utils.Array.permutepermut_x(stridesx)inletdy=num_dimsyinletpermut_y1=Owl_utils.Array.mapsndindex_pairsinletpermut_y0=Owl_utils.Array.(complement(range0(dy-1))permut_y1)inletpermut_y=Owl_utils.Array.(permut_y0@permut_y1)inletshpy=Owl_utils.Array.permutepermut_y(shapey)inletincy=Owl_utils.Array.permutepermut_y(stridesy)inletouter_nx=Array.lengthpermut_x0inletouter_ny=Array.lengthpermut_y0inletinner_nx=Array.lengthpermut_x1inletinner_ny=Array.lengthpermut_y1inassert(inner_nx=inner_ny);letshpz_x=Array.subshpx0outer_nxinletshpz_y=Array.subshpy0outer_nyinletshpz=Owl_utils.Array.(shpz_x@shpz_y)inletz=zeros(kindx)shpzinletloop0=Owl_utils.Array.(shpz@(subshpxouter_nxinner_nx))inletincx0=Owl_utils.Array.(insertincx(makeouter_ny0)outer_nx)inletincy0=Owl_utils.Array.(insertincy(makeouter_nx0)0)inletincz0=Owl_utils.Array.(stridesz@(makeinner_nx0))inletloop1=Array.mapInt64.of_intloop0|>Array1.of_arrayint64c_layout|>genarray_of_array1inletincx1=Array.mapInt64.of_intincx0|>Array1.of_arrayint64c_layout|>genarray_of_array1inletincy1=Array.mapInt64.of_intincy0|>Array1.of_arrayint64c_layout|>genarray_of_array1inletincz1=Array.mapInt64.of_intincz0|>Array1.of_arrayint64c_layout|>genarray_of_array1inletndims=Array.lengthloop0|>Int64.of_intinOwl_ndarray._ndarray_contract_two(kindx)xyzincx1incy1incz1loop1ndims;z(* Helper functions *)letfloat_to_eltx=xletelt_to_floatx=x(* ends here *)