123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141(* Copyright (C) 2020, Francois Berenger
Yamanishi laboratory,
Department of Bioscience and Bioinformatics,
Faculty of Computer Science and Systems Engineering,
Kyushu Institute of Technology,
680-4 Kawazu, Iizuka, Fukuoka, 820-8502, Japan. *)openPrintfincludeBatListletto_stringto_strl=letbuff=Buffer.create80inBuffer.add_charbuff'[';iteri(funix->ifi>0thenBuffer.add_charbuff';';Buffer.add_stringbuff(to_strx);)l;Buffer.add_charbuff']';Buffer.contentsbuffletof_stringof_strs=lets'=BatString.chop~l:1~r:1sinifs'=""then(* the empty list case was not handled in the past *)[]elsebeginifString.containss'']'thenfailwith("MyList.of_string: sub lists inside: "^s);mapof_str(BatString.split_on_strings'~by:";")end(* count elements satisfying 'p' *)letfilter_countpl=fold_left(funaccx->ifpxthenacc+1elseacc)0lletfilter_countspl=letok_count=ref0inletko_count=ref0initer(funx->ifpxthenincrok_countelseincrko_count)l;(!ok_count,!ko_count)(* only map 'f' on elements satisfying 'p' *)letfilter_mappfl=letres=fold_left(funaccx->ifpxthen(fx)::accelseacc)[]linrevres(* split a list into n parts (the last part might have
a different number of elements) *)letnpartsnl=letlen=lengthlinletres=ref[]inletcurr=reflinletm=BatFloat.round_to_int(floatlen/.floatn)infor_=1ton-1doletxs,ys=takedropm!currincurr:=ys;res:=xs::!resdone;rev(!curr::!res)(* create folds of cross validation; each fold consists in (train, test) *)letcv_foldsnl=lettest_sets=npartsnlinletrecloopaccprevcurr=matchcurrwith|[]->acc|x::xs->letbefore_after=flatten(rev_appendprevxs)inletprev'=x::previnlettrain_test=(before_after,x)inletacc'=train_test::accinloopacc'prev'xsinloop[][]test_sets(* dump list to file *)letto_file(fn:string)(to_string:'a->string)(l:'alist):unit=Utls.with_out_filefn(funout->iter(funx->fprintfout"%s\n"(to_stringx))l)(* List.combine for 4 lists *)letcombine4l1l2l3l4=letrecloopacc=function|([],[],[],[])->revacc|(w::ws,x::xs,y::ys,z::zs)->loop((w,x,y,z)::acc)(ws,xs,ys,zs)|_->raise(Invalid_argument"MyList.combine4: list lengths differ")inloop[](l1,l2,l3,l4)(* alias *)letfold=fold_leftletreally_takenl=letres=takenlinassert(lengthres=n);res(* non reproducible randomization of a list *)letrandom_shufflel=letrng=BatRandom.State.make_self_init()inshuffle~state:rnglletrev_combinel1l2=letrecloopacclr=match(l,r)with|([],[])->acc|(x::xs,y::ys)->loop((x,y)::acc)xsys|_->raise(Invalid_argument"MyList.rev_combine: list lengths differ")inloop[]l1l2(* filter using bit-mask [m] *)letfilter_maskml=letrecloopacc=function|[]->acc|(p,x)::rest->loop(ifpthenx::accelseacc)restinloop[](rev_combineml)(* should be in batteries soon *)letfold_whilepfinitli=letrecloopacc=function|[]->(acc,[])|(x::xs)asl->ifpxthenloop(faccx)xselse(acc,l)inloopinitli