123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899(* Keep only the N top scoring elements in memory.
WARNING: we will have several elements with equal scores when screening
a huge database *)moduleL=Listtype'at={max_size:int;(* max number of (top scoring) elements *)mutablecurr_size:int;(* how many elts currently *)mutablemin_score:float;(* For a given score, elements are in LIFO order *)mutableelements:(float*'alist)list}(* this does not update the count, on purpose because drop_lowest_score
* is called when there is one score too much *)letdrop_lowest_scoret=matcht.elementswith|[]->assert(false)|(score,elts)::rest->matcheltswith|[]->assert(false)|[_x]->(* this whole score class is dropped, since it has no more members *)t.elements<-rest|(_x::y::zs)->(* just drop the last element that came in with that score *)t.elements<-(score,y::zs)::rest(* peek at the currently known min score *)letpeek_scoret=matcht.elementswith|[]->assert(false)|(score,_elts)::_rest->scoreletinserttscorex=letrecloopacc=function|[]->L.rev_appendacc[(score,[x])]|(score',elts)::rest->ifscore'<scorethenloop((score',elts)::acc)restelseifscore'=scorethenL.rev_appendacc((score',x::elts)::rest)else(* score' > score *)L.rev_appendacc((score,[x])::(score',elts)::rest)int.elements<-loop[]t.elementsletget_min_scoret=t.min_scoreletget_curr_sizet=t.curr_sizeletget_max_sizet=t.max_size(* when we insert an element *)letupdate_boundtscore=ifscore<t.min_scorethent.min_score<-score(* after we drop one *)letrecompute_boundt=t.min_score<-peek_scoretletcreate(max_size:int):'at=assert(max_size>0);letcurr_size=0inletmin_score=max_floatinletelements=[]in{max_size;curr_size;elements;min_score}letadd(t:'at)(score:float)(x:'a):unit=ift.curr_size<t.max_sizethenbegin(* don't filter, as long as there are not enough elements *)inserttscorex;t.curr_size<-t.curr_size+1;update_boundtscoreendelsebegin(* enforce data structure invariant *)assert(t.curr_size=t.max_size);ifscore>t.min_scorethenbegininserttscorex;drop_lowest_scoret;recompute_boundtendendlethigh_scores_first(t:'at):(float*'a)list=(* put scores in decreasing order *)L.fold_left(funacc1(score,elts)->(* put back elements in FIFO order *)L.fold_left(funacc2x->(score,x)::acc2)acc1elts)[]t.elements