123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324(*
* Bitset - Efficient bit sets
* Copyright (C) 2003 Nicolas Cannasse
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version,
* with the special exception on linking described in file LICENSE.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)typeinternletbcreate:int->intern=Obj.magicBytes.createexternalfast_get:intern->int->int="%string_unsafe_get"letfast_set:intern ->int->int->unit =Obj.magicBytes.unsafe_setexternalfast_bool:int->bool="%identity"letfast_blit:intern ->int->intern->int->int->unit =Obj.magicBytes.blitletfast_fill :intern ->int->int->int ->unit =Obj.magicBytes.fillletfast_length :intern->int=Obj.magicBytes.lengthletbget sndx=assert(ndx>=0&&ndx<fast_lengths);fast_getsndxletbsetsndxv=assert(ndx>=0&&ndx<fast_lengths);fast_setsndxvletbblitsrcsrcoff dstdstoff len =assert(srcoff>=0&&dstoff>=0&&len>=0);fast_blit srcsrcoffdstdstofflenletbfilldststartlenc=assert (start>=0&&len>=0);fast_fill dststartlencexceptionNegative_indexofstringtypet={mutabledata:intern;mutablelen:int;}leterrorfname=raise(Negative_index fname)letempty()={data=bcreate0;len=0;}letint_size=7(* value used to round up index *)letlog_int_size=3(* number of shifts *)letcreaten=ifn<0thenerror"create";letsize=(n+int_size)lsrlog_int_sizeinlet b=bcreatesizeinbfillb0size0;{data=b;len=size;}letcopyt=letb=bcreatet.leninbblitt.data0b0t.len;{data=b;len=t.len}letclone=copyletsettx=ifx<0thenerror"set";letpos=xlsrlog_int_sizeanddelta =xlandint_sizeinletsize=t.leninifpos>=sizethenbeginletb=bcreate(pos+1)inbblitt.data 0b0size;bfillbsize(pos-size+1)0;t.len<-pos+1;t.data<-b;end;bsett.datapos((bgett.datapos)lor(1lsldelta))letunsettx=ifx<0thenerror"unset";letpos=xlsrlog_int_sizeanddelta =xlandint_sizeinifpos<t.lenthenbsett.datapos((bgett.datapos)land(0xFFlxor(1lsldelta)))let toggletx=ifx<0thenerror"toggle";letpos=xlsrlog_int_sizeanddelta =xlandint_sizeinletsize=t.leninifpos>=sizethenbeginletb=bcreate(pos+1)inbblitt.data 0b0size;bfillbsize(pos-size+1)0;t.len<-pos+1;t.data<-b;end;bsett.datapos((bgett.datapos)lxor(1lsldelta))letputt=function|true->sett|false->unsettletis_settx=ifx<0thenerror"is_set";letpos=xlsrlog_int_sizeanddelta =xlandint_sizeinletsize=t.leninifpos<sizethenfast_bool(((bgett.datapos)lsrdelta)land1)elsefalseexceptionBreak_intofint(* Find highest set element or raise Not_found *)letfind_msbt=(* Find highest set bit in a byte. Does not work with zero. *)letbyte_msbb=assert(b<>0);letrecloopn=ifbland(1lsln)=0thenloop(n-1)elseninloop7inletn=t.len -1andbuf=t.dataintryfori=ndownto0doletbyte=bgetbufiinifbyte<>0thenraise(Break_int ((ilsllog_int_size)+(byte_msbbyte)))done;raise Not_foundwithBreak_intn->n|_->raiseNot_foundletcompare t1t2=letsome_msbb=trySome(find_msbb)withNot_found->Noneinmatch(some_msbt1,some_msbt2)with(None,Some_)->-1(* 0-y -> -1 *)|(Some_,None)->1(* x-0 -> 1 *)|(None,None)->0(* 0-0 -> 0 *)|(Somea,Someb)->(* x-y *)ifa<bthen-1elseifa>bthen1elsebegin(* MSBs differ, we need to scan arrays until we find a
difference *)letndx=alsrlog_int_sizeinassert(ndx<t1.len&&ndx<t2.len);tryfori=ndxdownto0doletb1=bgett1.dataiandb2=bgett2.dataiinifb1<>b2thenraise(Break_int (compareb1b2))done;0withBreak_intres->resendletequals t1t2=comparet1t2=0letpartial_counttx=letrecnbitsx=ifx=0then0elseiffast_bool(xland1)then1+(nbits(xlsr1))elsenbits(xlsr1)inlet size=t.leninletpos=xlsrlog_int_sizeanddelta =xlandint_sizeinletrecloopnacc=ifn=sizethenaccelseletx=bgett.dataninloop(n+1)(acc+nbitsx)inifpos>=sizethen0elseloop(pos+1)(nbits((bgett.datapos)lsrdelta))letcountt=partial_countt0(* Find the first set bit in the bit array *)letfind_first_setbn=(* TODO thereare many ways to speed this up. Lookup table would be
one way to speed this up. *)letfind_lsbb=assert(b<>0);letrecloopn=ifbland(1lsln)<>0thennelseloop(n+1)inloop0inletbuf=b.datainletrecfind_bitbyte_ndxbit_offs=ifbyte_ndx >= b.lenthenNoneelseletbyte=(bgetbufbyte_ndx)lsrbit_offsinifbyte=0thenfind_bit (byte_ndx+1)0elseSome((find_lsbbyte)+(byte_ndxlsllog_int_size)+bit_offs)infind_bit(nlsrlog_int_size)(nland int_size)let enumt=letrecmaken=letcur=refninletrecnext()=matchfind_first_sett!curwithSomeelem->cur:=(elem+1);elem|None->raiseEnum.No_more_elementsinEnum.make~next~count:(fun()->partial_countt!cur)~clone:(fun()->make!cur)inmake0letraw_createsize=letb=bcreate sizeinbfillb0size0;{data=b;len =size}letinterab=letmax_size=maxa.lenb.leninletd=raw_createmax_sizeinletsl=mina.lenb.leninletabuf=a.dataandbbuf=b.datain(* Note:rest of the array is set to zero automatically *)fori=0tosl-1dobsetd.datai((bgetabufi)land(bgetbbufi))done;d(* Note: rest of the array is handled automatically correct, since we
took a copy of the bigger set. *)letunionab=letd=ifa.len>b.lenthencopyaelsecopybinletsl=mina.lenb.leninletabuf=a.dataandbbuf=b.datainfori=0tosl-1dobsetd.datai((bgetabufi)lor(bgetbbufi))done;dlet diffab=letmaxlen=maxa.lenb.leninletbuf=bcreatemaxleninbblita.data0buf0a.len;letsl=mina.lenb.leninletabuf=a.dataandbbuf=b.datainfori=0tosl-1dobsetbufi((bgetabufi)land (lnot(bgetbbufi)))done;{data =buf;len=maxlen}let sym_diffab=letmaxlen=maxa.lenb.leninletbuf=bcreatemaxlenin(* Copy larger (assumes missing bits are zero) *)bblit(ifa.len>b.lenthena.dataelseb.data)0buf0maxlen;letsl=mina.lenb.leninletabuf=a.dataandbbuf=b.datainfori=0tosl-1dobsetbufi((bgetabufi)lxor (bgetbbufi))done;{data =buf;len=maxlen}(* TODO the following set operations can be made faster if you do the
set operation in-place instead of taking a copy. But be careful
when the sizes of the bitvector strings differ. *)letintersecttt'=letd=intertt'int.data<-d.data;t.len<-d.lenletdifferentiatett'=letd=difftt'int.data<-d.data;t.len<-d.lenletunitett'=letd=uniontt'int.data<-d.data;t.len<-d.lenletdifferentiate_symtt'=letd=sym_diff tt'int.data<-d.data;t.len<-d.len