123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107(* Performance measures for regression models
cf. chapter 12 "regression models" in book
Varnek, A. ed., 2017. Tutorials in chemoinformatics. John Wiley & Sons. *)moduleA=BatArraymoduleL=BatListletsquarex=x*.x(** Root Mean Squared Error
[rmse exp pred] *)letrmse(l1:floatlist)(l2:floatlist):float=leta1=A.of_listl1inleta2=A.of_listl2inletm=A.lengtha1inletn=A.lengtha2inassert(m=n);letsum_squared_diffs=A.fold_lefti(funaccix->lety=a2.(i)inacc+.square(x-.y))0.0a1insqrt(sum_squared_diffs/.(floatn))(** Mean Absolute Error
[mae exp pred] *)letmae(l1:floatlist)(l2:floatlist):float=leta1=A.of_listl1inleta2=A.of_listl2inletm=A.lengtha1inletn=A.lengtha2inassert(m=n);letsum_abs_diffs=A.fold_lefti(funaccix->lety=a2.(i)inacc+.abs_float(x-.y))0.0a1insum_abs_diffs/.(floatn)(** standard deviation of residuals
[std_dev_res exp pred] *)letstd_dev_res(l1:floatlist)(l2:floatlist):float=leta1=A.of_listl1inleta2=A.of_listl2inletm=A.lengtha1inletn=A.lengtha2inassert(m=n);letsum_squared_diffs=A.fold_lefti(funaccix->lety=a2.(i)inacc+.square(x-.y))0.0a1insqrt(sum_squared_diffs/.(float(n-2)))(** coefficient of determination
[r2 exp pred] *)letr2(l1:floatlist)(l2:floatlist):float=leta1=A.of_listl1inleta2=A.of_listl2inletm=A.lengtha1inletn=A.lengtha2inassert(m=n);letsum_squared_diffs=A.fold_lefti(funaccix->lety=a2.(i)inacc+.square(x-.y))0.0a1inletsum_squared_exp_diffs=letavg_exp=A.favga1inA.fold_left(funaccx->acc+.square(x-.avg_exp))0.0a1in1.0-.(sum_squared_diffs/.sum_squared_exp_diffs)(** raw Regression Error Characteristic Curve
(raw means not scaled by a null model)
[raw_REC_curve exp pred]
Cf. Bi, J. and Bennett, K.P., 2003.
Regression error characteristic curves.
In Proceedings of the 20th international conference on machine learning
(ICML-03) (pp. 43-50). *)letraw_REC_curve(l1:floatlist)(l2:floatlist):(float*float)list=letarray_filter_countpa=float(A.fold_left(funaccx->ifpxthenacc+1elseacc)0a)inleta1=A.of_listl1inleta2=A.of_listl2inletn=A.lengtha1inleterrors=A.map2(funxy->abs_float(x-.y))a1a2inA.sortBatFloat.compareerrors;letmax_err=errors.(n-1)in(* 100 steps on the X axis *)letxs=L.frange0.0`Tomax_err100in(* WARNING: not very efficient algorithm *)letm=floatninL.map(funerr_tol->letpercent_ok=letok_count=array_filter_count(funerr->err<=err_tol)errorsin(ok_count/.m)in(err_tol,percent_ok))xs