123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225(*****************************************************************************)(* *)(* Open Source License *)(* Copyright (c) 2019,2020 DaiLambda, Inc. <contact@dailambda.jp> *)(* *)(* Permission is hereby granted, free of charge, to any person obtaining a *)(* copy of this software and associated documentation files (the "Software"),*)(* to deal in the Software without restriction, including without limitation *)(* the rights to use, copy, modify, merge, publish, distribute, sublicense, *)(* and/or sell copies of the Software, and to permit persons to whom the *)(* Software is furnished to do so, subject to the following conditions: *)(* *)(* The above copyright notice and this permission notice shall be included *)(* in all copies or substantial portions of the Software. *)(* *)(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*)(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *)(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *)(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*)(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *)(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *)(* DEALINGS IN THE SOFTWARE. *)(* *)(*****************************************************************************)(*
Caching of small Value.t
Strategy:
* Value.t larger than max_leaf_size is never cached.
* Any Value.t equal to or smaller than max_leaf_size is cached for small
amount of time.
* Each Value.t in the cache has its score. Score decays gradually.
* Once its score becomes 0, the Value.t may be removed from the cache.
* If cache hits, the Value.t gains some score.
*)openUtilsopenArray.SyntaxtypeError.t+=Hashconsofstringlet()=Error.register_printer@@function|Hashconss->Some("Hashcons: "^s)|_->Nonetypeconfig={max_leaf_size:int;max_bytes_commit:int;max_bytes_absolute:int;shrink_ratio:float}letconfig_enabled={max_leaf_size=36;max_bytes_commit=10_000_000(* 10MB *);max_bytes_absolute=50_000_000(* 50MB *);shrink_ratio=0.8}letconfig_disabled={max_leaf_size=0;max_bytes_commit=0;max_bytes_absolute=0;shrink_ratio=0.0}letcheck_configc=ifc.max_leaf_size>=0&&c.max_bytes_commit<=c.max_bytes_absolute&&0.<=c.shrink_ratio&&c.shrink_ratio<=1.0thenOk()elseError()(* Number of blocks for the value of the given size in bytes *)letblockssize=ifsize<=32then2elseifsize<=64then3else(* assuming using only 1 chunk *)(size+6+31)/32typecontents={index:Index.t;mutablefreq:int}typet={tbl:(Value.t,contents)Hashtbl.t;config:config;mutablecurrent_bytes:int;mutablesaved_blocks:int;by_size:intarray;scores:intarray;mutablewarned_level:int}letconfigt=t.configletis_disabledt=t.config.max_leaf_size=0letmax_freq=10000letentry_byteslen=(len/8+2)*Sys.word_size/8+61(* 61 bytes per Hashtbl entry *)letscorelenfreq=letblocks=blocksleninletgain=blocks*freqinletcost=entry_byteslen/8ingain/costletmax_scorelen=scorelenmax_freqletscorev{freq;_}=score(Value.lengthv)freqletentry_bytesv=entry_bytes(Value.lengthv)letcreateconfig={tbl=Hashtbl.create101;config;current_bytes=0;saved_blocks=0;by_size=Array.makeconfig.max_leaf_size0;scores=Array.make(max_scoreconfig.max_leaf_size+1)0;warned_level=1;}letestimated_size_in_bytest=t.current_bytesletstatppft=Format.fprintfppf"hashcons saved: %d saved: %.2f MB current_bytes: %d max: %d@."t.saved_blocks(floatt.saved_blocks*.floatSys.word_size/.8_000_000.0)t.current_bytest.config.max_bytes_commitletstat_tableppft=Format.fprintfppf"hashcons buckets:@.";Array.iteri(funin->Format.fprintfppf"bksz %2d %6d@."(i+1)n)t.by_sizeletfindtv=letlen=Value.lengthviniflen=0||len>t.config.max_leaf_sizethenError(Hashcons"hashcons: too large or 0")elsematchHashtbl.find_optt.tblvwith|None->OkNone|Somec->letb=entry_bytesvinlets=scorevcint.scores.!(s)<-t.scores.(s)-b;c.freq<-Int.minmax_freq(c.freq+100);lets=scorevcint.scores.!(s)<-t.scores.(s)+b;t.saved_blocks<-t.saved_blocks+blockslen;Ok(Somec.index)letshrink'tthreshold=ift.current_bytes<=thresholdthen()elsebeginletdown_to=int_of_float@@floatthreshold*.t.config.shrink_ratioinLog.notice"hashcons: shrinking from %d to %d ..."t.current_bytesdown_to;ifEnvconf.use_reachable_wordsthenLog.notice"hashcons: %.2f MB to..."(Utils.reachable_mbst);letorg_current_bytes=t.current_bytesinFormat.kasprintf(Log.debug"%s")"%a"statt;Format.kasprintf(Log.debug"%s")"%a"stat_tablet;let(),secs=with_time@@fun()->letbound_score=letb=t.current_bytes-down_toinletrecfis=lets=s+t.scores.(i)int.scores.!(i)<-0;ifb<=sthenielsef(i+1)sinf00inHashtbl.filter_map_inplace(funvc->letcost=entry_bytesvinlets=scorevcinifs<=bound_scorethenbegint.current_bytes<-t.current_bytes-cost;leti=Value.lengthv-1int.by_size.!(i)<-t.by_size.!(i)-1;Noneendelsebegint.scores.!(s)<-t.scores.(s)-cost;c.freq<-c.freq*9/10;lets=scorevcint.scores.!(s)<-t.scores.(s)+cost;Somecend)t.tbl;assert(t.current_bytes<=down_to)inLog.notice"hashcons: shrank from %d to %d in %a"org_current_bytest.current_bytesMtime.Span.ppsecs;ifEnvconf.use_reachable_wordsthenLog.notice"hashcons: shrank to %.2f MB"(Utils.reachable_mbst);Format.kasprintf(Log.notice"%s")"%a"statt;Format.kasprintf(Log.notice"%s")"%a"stat_tabletendletshrinkt=ifis_disabledtthen()elseshrink'tt.config.max_bytes_commitletaddtvindex=letlen=Value.lengthviniflen=0||len>t.config.max_leaf_sizethenError(Hashcons"hashcons: too large or 0")elsebeginmatchHashtbl.find_optt.tblvwith|Some{index=index';freq}->(* Let's use newer index *)Hashtbl.replacet.tblv{index=Index.maxindexindex';freq};Ok()|None->letc={index;freq=100}inletb=entry_bytesvinHashtbl.replacet.tblvc;t.current_bytes<-t.current_bytes+b;t.by_size.!(len-1)<-t.by_size.!(len-1)+1;lets=scorevcint.scores.(s)<-t.scores.(s)+b;shrink'tt.config.max_bytes_absolute;Ok()end