123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344(*
* Copyright (c) 2018-2022 Tarides <contact@tarides.com>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*)open!ImportopenSnapshot_intfletrm_indexpath=letpath_index=Filename.concatpath"index"inSys.readdirpath_index|>Array.iter(funname->Unix.unlink(Filename.concatpath_indexname));Unix.rmdirpath_index;Unix.rmdirpathmoduleMake(Args:Args)=structmoduleHashes=Irmin.Hash.Set.Make(Args.Hash)openArgsmoduleInode_pack=Inode.PackmodulePack_index=Pack_index.Make(Hash)letpp_hash=Irmin.Type.ppHash.tletpp_key=Irmin.Type.ppInode_pack.Key.tletpp_kind=Irmin.Type.ppPack_value.Kind.tletpp_snapshot=Irmin.Type.ppInode.Snapshot.inode_tmoduleExport=structmoduleValue_unit=structtypet=unit[@@derivingirmin]letencode_=""letencoded_size=0letdecode__=()endmoduleIndex=Index_unix.Make(Pack_index.Key)(Value_unit)(Index.Cache.Unbounded)typet={fm:Fm.t;dispatcher:Dispatcher.t;log_size:int;inode_pack:readInode_pack.t;contents_pack:readContents_pack.t;}letvconfigcontents_packinode_pack=(* In order to read from the pack files, we need to open at least two
files: suffix and control. We just open the file manager for
simplicity. *)letfm=Fm.open_roconfig|>Fm.Errs.raise_if_errorinletdispatcher=Dispatcher.vfm|>Fm.Errs.raise_if_errorinletlog_size=Conf.index_log_sizeconfigin{fm;dispatcher;log_size;inode_pack;contents_pack}letcloset=Fm.closet.fmletkey_of_hashhasht=Inode_pack.index_direct_with_kindthash|>Option.getletlength_of_hashhasht=letkey,_=key_of_hashhashtinmatchPack_key.inspectkeywith|Indexed_->(* This case cannot happen, as [key_of_hash] converts an
indexed key to a direct one. *)assertfalse|Direct{length;_}->lengthletio_read_and_decode_entry_prefix~offt=letentry_prefix:Inode_pack.Entry_prefix.t=Inode_pack.read_and_decode_entry_prefix~offt.dispatcherinletlength=matchInode_pack.Entry_prefix.total_entry_lengthentry_prefixwith|Somelength->length|None->(* If the length is not on disk, the object is in index. *)length_of_hashentry_prefix.hasht.inode_packinletkey=Pack_key.v_direct~offset:off~lengthentry_prefix.hashin(key,entry_prefix.kind)(* Get the childrens offsets and then read their keys at that offset. *)letdecode_children_offsets~off~lent=letbuf=Bytes.createleninlet_=Dispatcher.read_exnt.dispatcher~off~lenbufinletentry_of_offsetoffset=[%log.debug"key_of_offset: %a"Int63.ppoffset];io_read_and_decode_entry_prefix~off:offsettinletentry_of_hashhash=key_of_hashhasht.inode_packin(* Bytes.unsafe_to_string usage: buf is created locally, uniquely owned; we assume
Dispatcher.read_exn returns unique ownership; then call to Bytes.unsafe_to_string
gives up unique ownership of buf. This is safe. *)Inode.Raw.decode_children_offsets~entry_of_offset~entry_of_hash(Bytes.unsafe_to_stringbuf)(* safe: see comment above *)(ref0)typevisit={visited:Hash.t->bool;set_visit:Hash.t->unit}letitertvf_contentsf_inodes(root_key,root_kind)=lettotal_visited=ref0inletset_visith=incrtotal_visited;v.set_visithinletrecaux(key,kind)=matchPack_key.inspectkeywith|Indexed_->(* This case cannot happen:
- either the root key is indexed, in which case it converted to a
direct key just before the call to [aux];
- or one of the children of a node is indexed, in which case
[Inode.Raw.decode_children_offsets] converts it to a direct key
before the call to [aux]. *)assertfalse|Direct{length;offset;hash;_}->ifv.visitedhashthenLwt.return_unitelse(set_visithash;[%log.debug"visit hash: %a, %a"pp_hashhashpp_kindkind];(* [unsafe_find] decodes the values based on their kind, we need
to detect the type in order to call the correspoding
[unsafe_find].*)matchkindwith|Contents->(letvalue=Contents_pack.unsafe_find~check_integrity:falset.contents_packkeyinmatchvaluewith|None->Fmt.failwith"contents not found in store. Key: %a "pp_keykey|Somevalue->letsnapshot_blob=valueinf_contentssnapshot_blob)|Inode_v1_unstable|Inode_v1_stable|Inode_v2_root|Inode_v2_nonroot->(letchildren=decode_children_offsets~off:offset~len:lengthtinlet*()=Lwt_list.iter_s(funkey->auxkey)childreninletvalue=Inode_pack.unsafe_find~check_integrity:falset.inode_packkeyinmatchvaluewith|None->Fmt.failwith"node not found in store. Key: %a "pp_keykey|Somevalue->letsnapshot_inode=Inode.to_snapshotvaluein[%log.debug"iter inode snapshot: %a"pp_snapshotsnapshot_inode];f_inodessnapshot_inode)|Commit_v1|Commit_v2->(* The traversal starts with a node, it never iters over
commits. *)assertfalse|Dangling_parent_commit->assertfalse)in(* In case the root node of a tree is indexed, we need to convert it to a
direct key first. *)letroot_key=matchPack_key.inspectroot_keywith|Indexedhash->key_of_hashhasht.inode_pack|>fst|Direct_->root_keyinlet*()=aux(root_key,root_kind)inLwt.return!total_visitedletrun_in_memorytf_contentsf_inodesroot_key=[%log.info"iter in memory"];letvisited_hash=Hashes.create~initial_slots:100_000()inletvisitedh=Hashes.memvisited_hashhinletset_visith=matchHashes.addvisited_hashhwith|`Duplicate->Fmt.failwith"should not visit hash twice. Hash: %a "pp_hashh|`Ok->()initert{visited;set_visit}f_contentsf_inodesroot_keyletrun_on_diskpathtf_contentsf_inodesroot_key=[%log.info"iter on disk"];letindex=Index.v~fresh:true~readonly:false~log_size:t.log_sizepathinletvisitedh=Index.memindexhinletset_visith=ifvisitedhthenFmt.failwith"Should not visit hash twice. Hash: %a "pp_hashhelseIndex.replaceindexh()inlet*total=itert{visited;set_visit}f_contentsf_inodesroot_keyinIndex.closeindex;rm_indexpath;Lwt.returntotalletrun?on_disk=matchon_diskwith|None->run_in_memory|Some(`Pathpath)->run_on_diskpathendmoduleImport=structmoduleValue=structtypet=int63*int[@@derivingirmin]letencoded_size=(64/8)+(32/8)letencode((off,len):t)=letbuf=Bytes.createencoded_sizeinBytes.set_int64_bebuf0(Int63.to_int64off);Bytes.set_int32_bebuf8(Int32.of_intlen);(* Bytes.unsafe_to_string usage: buf is local, uniquely owned; we assume the
Bytes.set... functions return unique ownership; then Bytes.unsafe_to_string
gives up unique ownership of buf to get shared ownership of the resulting
string, which is then returned. buf is no longer accessible. This is safe. *)Bytes.unsafe_to_stringbufletdecodespos:t=(* Bytes.unsafe_of_string usage: s is shared; buf is shared (we cannot mutate it);
we assume Bytes.get_... functions need shared ownership only. This usage is
safe. *)letbuf=Bytes.unsafe_of_stringsinletoff=Bytes.get_int64_bebufpos|>Int63.of_int64inletlen=Bytes.get_int32_bebuf(pos+8)|>Int32.to_intin(off,len)endmoduleIndex=Index_unix.Make(Pack_index.Key)(Value)(Index.Cache.Unbounded)typepath=stringtypet={inode_pack:readInode_pack.t;contents_pack:readContents_pack.t;visited:Hash.t->Hash.tPack_key.t;set_visit:Hash.t->Hash.tPack_key.t->unit;index:(path*Index.t)option;}letsave_contentstb:Hash.tPack_key.tLwt.t=let*key=Contents_pack.batcht.contents_pack(funwriter->Contents_pack.addwriterb)inlethash=Inode.Key.to_hashkeyint.set_visithashkey;Lwt.returnkeyletsave_inodesti:Hash.tPack_key.tLwt.t=letinode=Inode.of_snapshott.inode_pack~index:t.visitediinletkey=Inode.save~allow_non_root:truet.inode_packinodeinlethash=Inode.Key.to_hashkeyint.set_visithashkey;Lwt.returnkeylethash_not_foundh=Fmt.failwith"You are trying to save to the backend an inode that contains pointers \
to objects unknown to the backend. Hash: %a"pp_hashhletsave_reuse_indexinodes=[%log.info"save reuse index "];(* objects are added to index by [save_contents] and [save_inodes]
functions. *)letset_visit__=()inletvisitedh=matchInode_pack.index_directinodeshwith|Somex->x|None->hash_not_foundhin(set_visit,visited,None)letsave_in_memory()=[%log.info"save in memory"];lettbl:(Hash.t,Hash.tPack_key.t)Hashtbl.t=Hashtbl.create10inletset_visithk=Hashtbl.addtblhkinletvisitedh=matchHashtbl.find_opttblhwith|Somex->x|None->hash_not_foundhin(set_visit,visited,None)letsave_on_disklog_sizepath=(* Make sure we are not reusing the same index as irmin-pack. *)letpath=path^"_tmp"in[%log.info"save on disk: %s"path];letindex=Index.v~fresh:true~readonly:false~log_sizepathinletset_visithk=letoffset,length=matchPack_key.inspectkwith|Direct{offset;length;_}->(offset,length)|Indexed_->(* Visited objects have direct keys. *)assertfalseinIndex.replaceindexh(offset,length)inletvisitedh=tryletoffset,length=Index.findindexhinletkey=Pack_key.v_direct~offset~lengthhinkeywithNot_found->hash_not_foundhin(set_visit,visited,Some(path,index))letv?on_disklog_sizecontents_packinode_pack=letset_visit,visited,index=matchon_diskwith|None->save_in_memory()|Some(`Pathpath)->save_on_disklog_sizepath|Some`Reuse->save_reuse_indexinode_packin{inode_pack;contents_pack;visited;set_visit;index}letcloset=Option.iter(fun(path,index)->Index.closeindex;rm_indexpath)t.indexendend