123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360moduleOLS=structmoduleCi95=struct(* 95% confidence interval *)typet={r:float;l:float}letppppfx=Fmt.pfppf"@[<hov>%f to %f@]"x.rx.lletbad={r=neg_infinity;l=neg_infinity}end(* Linear regression inputs *)letmake_lr_inputs?indices~responder~predictorsm=letresponder_accessor=Measurement_raw.get~label:responderinletpredictors_accessor=Array.map(funlabel->Measurement_raw.get~label)predictorsinmatchindiceswith|Someindices->(Array.map(funi->Array.map(funa->am.(i))predictors_accessor)indices,Array.map(funi->responder_accessorm.(i))indices)|None->(Array.init(Array.lengthm)(funi->Array.map(funa->am.(i))predictors_accessor),Array.init(Array.lengthm)(funi->responder_accessorm.(i)))typet={predictors:stringarray;responder:string;value:(v,[`Msgofstring])result}andv={estimates:floatarray;ci95:Ci95.tarrayoption;r_square:floatoption}letr_squarem~responder~predictorsr=letpredictors_matrix,responder_vector=make_lr_inputs~responder~predictorsminletsum_responder=Array.fold_left(+.)0.responder_vectorinletmean=sum_responder/.float(Array.lengthresponder_vector)inlettot_ss=ref0.inletres_ss=ref0.inletpredictedi=letx=ref0.inforj=0toArray.lengthr-1dox:=!x+.(predictors_matrix.(i).(j)*.r.(j))done;!xinfori=0toArray.lengthresponder_vector-1dotot_ss:=!tot_ss+.((responder_vector.(i)-.mean)**2.);res_ss:=!res_ss+.((responder_vector.(i)-.predictedi)**2.)done;1.-.(!res_ss/.!tot_ss)(* XXX(dinosaure): see core_bench and [(1/e)^bootstrap_threshold <
0.05/predictors] which describe area on top of logarithm curve (where
maximum seems close to 6~7). *)letbootstrap_threshold=10letcan_bootstrap~responder~predictorsm=letmatrix,_=make_lr_inputs~responder~predictorsminletnon_zero=Array.make(Array.lengthpredictors)0inletnon_zero_cols=ref0inArray.iter(funrow->fori=0toArray.lengthnon_zero-1doifrow.(i)<>0.0then(non_zero.(i)<-non_zero.(i)+1;ifnon_zero.(i)=bootstrap_thresholdthenincrnon_zero_cols)done)matrix;if!non_zero_cols=Array.lengthnon_zerothentrueelsefalselet()=Random.self_init()letrandom_indices_in_place~maxarr=letlen=Array.lengtharrinfori=0tolen-1doarr.(i)<-Random.intmaxdoneletquantile_of_array?(failures=0)~len~low~higharr=Array.sort(compare:float->float->int)arr;letindexq=int_of_float((floatlen*.q)+.(0.5*.floatfailures))inletextended_geti=ifi>=lentheninfinityelsearr.(i)inletl=extended_get((min:int->int->int)(indexlow)(len-1))inletr=extended_get((max:int->int->int)(indexhigh)failures)inCi95.{l;r}letbootstrap~trialsm~responder~predictors=letp=Array.lengthpredictorsinmatchcan_bootstrap~responder~predictorsmwith|false->assertfalse|true->letbootstrap_fails=ref0inletindices=Array.make(Array.lengthm)0inletbootstrap_coeffs=Array.initp(fun_->Array.maketrials0.0)infori=0totrials-1dorandom_indices_in_placeindices~max:(Array.lengthm);letmatrix,vector=make_lr_inputs~indices~responder~predictorsminmatchLinear_algebra.ols~in_place:truematrixvectorwith|Okbt_ols_result->forp=0top-1dobootstrap_coeffs.(p).(i)<-bt_ols_result.(p)done|_->incrbootstrap_fails;forp=0top-1dobootstrap_coeffs.(p).(i)<-neg_infinitydonedone;Array.initp(funi->iftrials=0thenCi95.badelsequantile_of_arraybootstrap_coeffs.(i)~failures:!bootstrap_fails~len:trials~low:0.025~high:0.975)(* Ordinary Least Square *)letols?bootstrap:(trials=0)?r_square:(do_r_square=false)~responder~predictorsm=letmatrix,vector=make_lr_inputs~responder~predictorsminmatchLinear_algebra.ols~in_place:truematrixvectorwith|Okestimates->letr_square=ifdo_r_squarethenSome(r_squarem~responder~predictorsestimates)elseNoneinletci95=matchtrialswith|0->None|trials->Some(bootstrap~trials~responder~predictorsm)in{predictors;responder;value=Ok{estimates;ci95;r_square}}|Error_aserr->{predictors;responder;value=err}letpp~predictors~responderppfv=Fmt.pfppf"{ @[";fori=0toArray.lengthpredictors-1doFmt.pfppf"%s per %s = %f"responderpredictors.(i)v.estimates.(i);(matchv.ci95with|Someci95->Fmt.pfppf" (confidence: %a)"Ci95.ppci95.(i)|None->());Fmt.pfppf";@ "done;Fmt.pfppf"r² = %a@] }"Fmt.(Dump.optionfloat)v.r_squareletppppfx=matchx.valuewith|Okv->pp~predictors:x.predictors~responder:x.responderppfv|Error(`Msgerr)->Format.fprintfppf"%s"errletpredictors{predictors;_}=Array.to_listpredictorsletresponder{responder;_}=responderletestimates{value;_}=matchvaluewith|Ok{estimates;_}->Some(Array.to_listestimates)|Error_->Noneletr_square{value;_}=matchvaluewithOk{r_square;_}->r_square|Error_->NoneendmoduleRANSAC=struct(* returns [a, b] such that [f(x) = a*x + b] minimize the distance between
[sum(fun (x -> (f(x) - v(x))^2)] *)letaffine_adjustment(r:(float*float)array)=letlen=float(Array.lengthr)inletmean_x=letsum_x=Array.fold_right(fun(x,_)acc->x+.acc)r0.insum_x/.leninletmean_y=letsum_y=Array.fold_right(fun(_,y)acc->y+.acc)r0.insum_y/.leninletvariance_x=letsumvar=Array.fold_right(fun(x,_)acc->letv=x-.mean_xin(v*.v)+.acc)r0.insumvar/.leninletcovariance_x_y=letsumcovar=Array.fold_right(fun(x,y)acc->letv=(x-.mean_x)*.(y-.mean_y)inv+.acc)r0.insumcovar/.leninleta=covariance_x_y/.variance_xinletb=mean_y-.(a*.mean_x)in(a,b)letqualitydata(a,b)=letacc=ref0.infori=0toArray.lengthdata-1doletx,y=data.(i)inletdiff=letd=(a*.x)+.b-.yind*.dinacc:=!acc+.diffdone;!acc/.float(Array.lengthdata)letransac_filter_distance(x,y)(a,b)=letlevel=maxx(maxy(maxab))inabs_float((a*.x)+.b-.y)/.levelletransac_paramdata={Ransac.model=affine_adjustment;data;subset_size=10;rounds=100;distance=ransac_filter_distance;filter_distance=0.05;minimum_valid=Array.lengthdata/3;error=quality}letsuma=Array.fold_left(+.)0.aletstandard_error~a~b(r:(float*float)array)=letestimatex=(a*.x)+.binletdy(x,y)=letd=y-.estimatexind*.dinletsum_dy=sum(Array.mapdyr)inletmean_x=sum(Array.map(fun(x,_)->x)r)/.float(Array.lengthr)inletdx(x,_)=letd=x-.mean_xind*.dinsqrt(sum_dy/.float(Array.lengthr-2))/.sqrt(sum(Array.mapdxr))typet={predictor:string;responder:string;mean_value:float;constant:float;max_value:float*float;min_value:float*float;standard_error:float}letppppft=Fmt.pfppf"{ @[<hov>%s per %s = %f;@ standard-error = %f;@] }"t.respondert.predictort.mean_valuet.standard_errorletresult_column~predictor~responderm=(Measurement_raw.get~label:predictorm,Measurement_raw.get~label:responderm)letransac?(filter_outliers=true)~predictor~responderml=leta=Array.map(result_column~predictor~responder)mlinletmean_value,constant=iffilter_outliersthenmatchRansac.ransac(ransac_parama)with|None->(* Couldn't extract a model, just return crude affine adjustment *)affine_adjustmenta|Some{Ransac.model;_}->modelelseaffine_adjustmentainletmin_value=Array.fold_left(fun(row_min,val_min)(row,value)->letvalue=(value-.constant)/.rowinifval_min<value||value<=0.then(row_min,val_min)else(row,value))(0.,max_float)ainletcorrect_floatf=classify_floatf=FP_normalinletmax_value=Array.fold_left(fun(row_max,val_max)(row,value)->letvalue=(value-.constant)/.rowinifval_max>value||not(correct_floatvalue)then(row_max,val_max)else(row,value))(0.,min_float)ainletstandard_error=standard_error~a:mean_value~b:constantain{predictor;responder;mean_value;constant;min_value;max_value;standard_error}letresponder{responder;_}=responderletpredictor{predictor;_}=predictorletmean{mean_value;_}=mean_valueletconstant{constant;_}=constantletmin{min_value;_}=min_valueletmax{max_value;_}=max_valueleterror{standard_error;_}=standard_errorendtype'at=|OLS:{predictors:stringarray;r_square:bool;bootstrap:int}->OLS.tt|RANSAC:{filter_outliers:bool;predictor:string}->RANSAC.ttletols~r_square~bootstrap~predictors=OLS{predictors;r_square;bootstrap}letransac~filter_outliers~predictor=RANSAC{filter_outliers;predictor}letone:typea.at->Measure.witness->Benchmark.t->a=funkinde{lr=m;_}->letlabel=Measure.labeleinmatchkindwith|OLS{predictors;r_square;bootstrap}->OLS.ols~bootstrap~r_square~predictors~responder:labelm|RANSAC{filter_outliers;predictor}->RANSAC.ransac~filter_outliers~predictor~responder:labelmletall:typea.at->Measure.witness->(string,Benchmark.t)Hashtbl.t->(string,a)Hashtbl.t=funkindems->letret=Hashtbl.create(Hashtbl.lengthms)inHashtbl.iter(funnamem->Hashtbl.addretname(onekindem))ms;retletmerge:typea.at->Measure.witnesslist->(string,a)Hashtbl.tlist->(string,(string,a)Hashtbl.t)Hashtbl.t=fun_instancesresults->letret=Hashtbl.create(List.lengthinstances)inList.iter2(funinstanceresult->Hashtbl.addret(Measure.labelinstance)result)instancesresults;ret