123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373exceptionValueErrorofstringletrowmi=ifi<Array.lengthmthenArray.copym.(i)elsefailwith(sprintf"invalid row index %d"i)letcolumnmi=ifArray.for_all~f:(funrow->i<Array.lengthrow)mthenArray.init(Array.lengthm)~f:(funj->m.(j).(i))elsefailwith(sprintf"invalid column index %d"i)letis_rectangulara=letdimension=Array.lengthainArray.for_alla~f:(funsuba->Array.lengthsuba=dimension)lettransposea=ifnot(is_rectangulara)theninvalid_arg"Math.transpose: not-rectangular";letn_rows=Array.lengthainifArray.lengtha=0||Array.lengtha.(0)=0then[||]elseletn_cols=Array.lengtha.(0)inletans=Array.make_matrix~dimx:n_cols~dimy:n_rowsa.(0).(0)infori=0ton_rows-1doforj=0toArray.lengtha.(i)-1doans.(j).(i)<-a.(i).(j)donedone;ansletlog?basex=matchbasewith|None->Stdlib.logx|Someb->(Stdlib.logx)/.(Stdlib.logb)letlog10=Stdlib.log10letlog2=log~base:2.0letevenx=(xmod2)=0letoddx=(xmod2)<>0letmina=Option.value_exn~message:"Math.min: empty"(Array.reducea~f:Poly.min)letmaxa=Option.value_exn~message:"Math.max: empty"(Array.reducea~f:Poly.max)letprangeaddsteplohi=letrecfaccx=ifPoly.(x>hi)thenList.revaccelseletnext=addxstepinf(x::acc)nextinf[]loletrange_ints=prange(+)letrange_floats=prange(+.)letrangestepfirstlast=assertFloat.(step>0.0);letn=(((last-.first)/.step)|>Float.abs|>Float.round_up|>Int.of_float)+1inleta=Array.create~len:n0.0inlet(op,comp)=Float.(iffirst<=lastthen((+.),(<=))else((-.),(>=)))inArray.iteri~f:(funi_->a.(i)<-opfirst(Float.of_inti*.step))a;ifcompa.(n-1)lastthenaelseArray.suba~pos:0~len:(n-1)letmeana=letn=Array.lengthainassert(n>0);(Array.fold~f:(+.)~init:0.a)/.(Float.of_intn)letvariancea=letn=Array.lengthainassert(n>1);letavrg=meanainletfv=letdiff=v-.avrgindiff*.diffinleta=Array.map~fain(Array.fold~f:(+.)~init:0.a)/.(Float.of_int(n-1))letrmsa=Array.map~f:(funx->x*.x)a|>mean|>sqrtletstdvx=variancex|>sqrtletmediana=letn=Array.lengthainassert(n>0);leta=Array.copyainArray.sort~compare:Stdlib.comparea;ifoddnthena.((n+1)/2-1)elseletm=(n+1)/2in(a.(m-1)+.a.(m))/.2.0letpseudomediana=letn=Array.lengthainassert(n>0);ifn=1thena.(0)elseletnn=n*(n-1)/2inletaverages=Array.create~len:nn0.0inletidx=ref0infori=0ton-2doforj=i+1ton-1doaverages.(!idx)<-(a.(i)+.a.(j))/.2.0;incridxdonedone;medianaveragesletmada=assert(Array.lengtha>0);letmed=medianainleta=Array.map~f:(funv->Float.abs(v-.med))ainmedianaletquantile_normalizationaa=assert(is_rectangularaa);ifArray.lengthaa=0||Array.lengthaa.(0)=0||Array.lengthaa.(0)=1thenArray.copyaaelseletnum_expts=Float.of_int(Array.lengthaa.(0))inletnum_pts=Array.lengthaainletcomp1(a,_)(b,_)=Stdlib.compareabinletcomp2(_,a)(_,b)=Stdlib.compareabinletaa=transposeaainletaa=Array.map~f:(Array.mapi~f:(funab->(a,b)))aain(Array.iter~f:(Array.sort~compare:comp2))aa;letavgi=(Array.fold~f:(funsumexpt->sndexpt.(i)+.sum)~init:0.0aa)/.num_exptsinletnorms=Array.initnum_pts~f:avginletaa=Array.map~f:(Array.mapi~f:(funi(idx,_)->idx,norms.(i)))aainArray.iter~f:(Array.sort~compare:comp1)aa;transpose(Array.map~f:(Array.map~f:snd)aa)lethistogram(typet)?(cmp=Stdlib.compare)arr=letmoduleM=structincludeMap.Make(structtypet_=t(* required only because OCaml doesn't have type non-rec definitions *)typet=t_letcompare=cmpletsexp_of_t_=assertfalselett_of_sexp_=assertfalseend)endinletf(mp:intM.t)(a:t)=matchM.findmpawith|Somee->M.setmp~key:a~data:(e+1)|None->M.setmp~key:a~data:1inletmp=Array.fold~f~init:M.emptyarrinletans=M.fold~f:(fun~key~dataans->(key,data)::ans)mp~init:[]inArray.of_list(List.revans)letprediction_valuestptnfpfn=lettp=Float.of_inttpinlettn=Float.of_inttninletfp=Float.of_intfpinletfn=Float.of_intfninletsensitivity=tp/.(tp+.fn)inletspecificity=tn/.(fp+.tn)inletpos_prediction_accuracy=tp/.(tp+.fp)inletneg_prediction_accuracy=tn/.(tn+.fn)insensitivity,specificity,pos_prediction_accuracy,neg_prediction_accuracyletpearson(a1:floatarray)(a2:floatarray)=leta1avg,a2avg=(meana1),(meana2)inleta1sd,a2sd=(stdva1),(stdva2)inleta1,a2=(Array.to_lista1),(Array.to_lista2)inletfacce1e2=(((e1-.a1avg)/.a1sd)*.((e2-.a2avg)/.a2sd))+.accin(List.fold2_exn~f~init:0.a1a2)/.(Float.of_int((List.lengtha1)-1))letrankarr=letarr=Array.copyarrinletarr=Array.mapi~f:(funia->a,i)arrinArray.sort~compare:(fun(a,_)(b,_)->Stdlib.compareab)arr;letg_ilans=letcount=List.lengthilinletn=count+(List.lengthans)inlethi=Float.of_intninletlo=Float.of_int(n-count+1)inletrank=(hi+.lo)/.2.in(List.map~f:(funi->rank,i)il)@ansinletf(prev,il,ans)(x,i)=(* prev is the value that was equal *)letcount=List.lengthilin(* il is list of original indices in
reverse for items that were equal *)ifcount=0then(* ans is list of ranks and original index pairs
in reverse *)x,[i],anselseifPoly.(x=prev)thenx,i::il,anselsex,[i],gprevilansinletprev,il,ans=Array.fold~f~init:(0.,[],[])arrinletans=gprevilansinletans=List.sort~compare:(fun(_,a)(_,b)->Stdlib.compareab)ansinArray.of_list(List.map~f:fstans)letspearman(arr1:floatarray)(arr2:floatarray)=letarr1,arr2=rankarr1,rankarr2inpearsonarr1arr2letcndx=(* Modified from C++ code by David Koppstein. Found from
www.sitmo.com/doc/Calculating_the_Cumulative_Normal_Distribution *)letb1,b2,b3,b4,b5,p,c=0.319381530,-0.356563782,1.781477937,-1.821255978,1.330274429,0.2316419,0.39894228inifFloat.(x>=0.)thenlett=1./.(1.+.(p*.x))in(1.-.(c*.(exp(-.x*.x/.2.))*.t*.(t*.(t*.(t*.((t*.b5)+.b4)+.b3)+.b2)+.b1)))elselett=1./.(1.-.p*.x)inc*.(exp(-.x*.x/.2.))*.t*.(t*.(t*.(t*.((t*.b5)+.b4)+.b3)+.b2)+.b1)letltqnormp=(*
Modified from python code by David Koppstein. Original comments follow below.
First version was written in perl, by Peter J. Acklam, 2000-07-19.
Second version was ported to python, by Dan Field, 2004-05-03.
*)ifFloat.(p<=0.||p>=1.)thenraise(ValueError("Argument to ltqnorm "^(Float.to_stringp)^" must be in open interval (0,1)"))else(* Coefficients in rational approximations. *)leta=[|-3.969683028665376e+01;2.209460984245205e+02;-2.759285104469687e+02;1.383577518672690e+02;-3.066479806614716e+01;2.506628277459239e+00|]inletb=[|-5.447609879822406e+01;1.615858368580409e+02;-1.556989798598866e+02;6.680131188771972e+01;-1.328068155288572e+01|]inletc=[|-7.784894002430293e-03;-3.223964580411365e-01;-2.400758277161838e+00;-2.549732539343734e+00;4.374664141464968e+00;2.938163982698783e+00|]inletd=[|7.784695709041462e-03;3.224671290700398e-01;2.445134137142996e+00;3.754408661907416e+00|]in(* Define break-points. *)letplow=0.02425inletphigh=1.-.plowinletfq=(((((c.(0)*.q+.c.(1))*.q+.c.(2))*.q+.c.(3))*.q+.c.(4))*.q+.c.(5))/.((((d.(0)*.q+.d.(1))*.q+.d.(2))*.q+.d.(3))*.q+.1.)in(* Rational approximation for lower region: *)ifFloat.(p<plow)thenletq=sqrt((-2.)*.(logp))infq(* Rational approximation for upper region: *)elseifFloat.(phigh<p)thenletq=sqrt((-2.)*.(log(1.-.p)))infq(* Rational approximation for central region: *)elseletq=p-.0.5inletr=q*.qin(((((a.(0)*.r+.a.(1))*.r+.a.(2))*.r+.a.(3))*.r+.a.(4))*.r+.a.(5))*.q/.(((((b.(0)*.r+.b.(1))*.r+.b.(2))*.r+.b.(3))*.r+.b.(4))*.r+.1.)letwilcoxon_rank_sum_to_zarr1arr2=letl1,l2=(Array.lengtharr1),(Array.lengtharr2)inletranked=rank(Array.appendarr1arr2)inletarr1=Array.subranked~pos:0~len:l1inletl1,l2=(Float.of_intl1),(Float.of_intl2)inletsum1=letfaccelem=elem+.accinArray.fold~f~init:0.arr1inletexpectation=(l1*.(l1+.l2+.1.))/.2.inletvar=(l1*.l2*.((l1+.l2+.1.)/.12.))in(sum1-.expectation)/.(sqrtvar)letwilcoxon_rank_sum_to_parr1arr2=(* assumes a two-tailed distribution *)letz=wilcoxon_rank_sum_to_zarr1arr2in2.*.(1.-.(cnd(Float.absz)))letwilcoxon_rank_sum?(alpha=0.05)arr1arr2=Float.(wilcoxon_rank_sum_to_parr1arr2<alpha)letidxsort(cmp:'a->'a->int)(a:'aarray):intarray=leta=Array.mapia~f:(funib->(i,b))inArray.sort~compare:(funab->cmp(snda)(sndb))a;Array.map~f:fstaletfind_regions?(max_gap=0)preda=ifmax_gap<0thenfailwith("max gap must be non-negative but is "^(string_of_intmax_gap));letsize=Array.lengthainletans=ref[]in(* Add region built up thus far, if any, to ans.
* curr_index is one beyond what will be considered for inclusion in region *)letadd_regioncurr_indexstart_indexcurrGap=ifstart_index>=0thenletfinish_index=curr_index-currGap-1inans:=(start_index,finish_index)::!ansin(* i is current array index.
* start_index is index of a region that has started to be built, -1
if none started yet.
* currGap is number of previous contiguous items failing pred *)letrecloopistart_indexcurrGap=ifi=sizethenadd_regionistart_indexcurrGapelse(ifpreda.(i)thenifstart_index>=0thenloop(i+1)start_index0elseloop(i+1)i0else(ifcurrGap>=max_gapthen(add_regionistart_indexcurrGap;loop(i+1)(-1)(currGap+1))elseloop(i+1)start_index(currGap+1)))inloop0(-1)0;Array.of_list(List.rev!ans)letfind_min_window?(init_direction="fwd")apredi=letsize=Array.lengthainifsize<1then[||]elseletv=Range.make_unsafe0(size-1)inletpredv=predv.Range.lov.Range.hiinletans=Range.find_min_range~init_directionvprediinmatchanswith|None->[||]|Someans->Array.suba~pos:ans.Range.lo~len:(ans.Range.hi-ans.Range.lo+1)letfactorialn=ifn<2then1elseletrecauxaccn=ifn<2thenaccelseaux(n*acc)(n-1)inaux1nletepsilonfinitfin=letrecauxaccn=ifn=finthenaccelseaux(acc+.(fnfin))(n+1)inaux0.initletshuffleresult=letresult=Array.copyresultinfori=Array.lengthresult-1downto0doletother=Random.int(i+1)andtmp=result.(i)inresult.(i)<-result.(other);result.(other)<-tmpdone;result