
* Copyright (c) 2018-2022 Tarides <contact@tarides.com>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*)open!ImportincludeChecks_intfletsetup_log=letinitstyle_rendererlevel=letformat_reporter=letreport_srclevel~overkmsgf=letk_=over();k()inmsgf@@fun?header:_?tags:_fmt->matchlevelwith|Logs.App->Fmt.kpfkFmt.stderr("@[<v 0>%a"^^fmt^^"@]@.")Fmt.(styled`Bold(styled(`Fg`Cyan)string))">> "|_->Fmt.kpfkFmt.stdout("@[<v 0>"^^fmt^^"@]@.")in{Logs.report}inFmt_tty.setup_std_outputs?style_renderer();Logs.set_levellevel;Logs.set_reporterformat_reporterinCmdliner.Term.(constinit$Fmt_cli.style_renderer()$Logs_cli.level())letpath=letopenCmdliner.Arginrequired@@pos0(somestring)None@@info~doc:"Path to the Irmin store on disk"~docv:"PATH"[]letdeprecated_info=(Cmdliner.Term.info[@alert"-deprecated"])letppf_or_nullppf=letnull=matchSys.os_typewith|"Unix"|"Cygwin"->"/dev/null"|"Win32"->"NUL"|_->invalid_arg"invalid os type"inmatchppfwith|Somep->p|None->open_outnull|>Format.formatter_of_out_channelmoduleMake(Store:Store)=structmoduleHash=Store.HashmoduleIndex=Pack_index.Make(Hash)(** Read basic metrics from an existing store. *)moduleStat=structtypesize=Bytesofint[@@derivingirmin]typeio={size:size;offset:int63;version:Version.t}[@@derivingirmin]typeobjects={nb_commits:int;nb_nodes:int;nb_contents:int}[@@derivingirmin]typet={hash_size:size;log_size:int;objects:objects}[@@derivingirmin]lettraverse_index~rootlog_size=letindex=Index.v_exn~readonly:true~fresh:false~log_sizerootinletppf=Format.err_formatterinletbar,(progress_contents,progress_nodes,progress_commits)=Utils.Object_counter.startppfinletf_(_,_,(kind:Pack_value.Kind.t))=matchkindwith|Contents->progress_contents()|Inode_v1_stable|Inode_v1_unstable|Inode_v2_root|Inode_v2_nonroot->progress_nodes()|Commit_v1|Commit_v2->progress_commits()|Dangling_parent_commit->assertfalseinIndex.iterfindex;letnb_contents,nb_nodes,nb_commits=Utils.Object_counter.finalise_with_statsbarin{nb_contents;nb_nodes;nb_commits}letconfroot=Conf.init~readonly:true~fresh:false~no_migrate:truerootletrun~root=[%logs.app"Getting statistics for store: `%s'@,"root];letlog_size=confroot|>Conf.index_log_sizeinletobjects=traverse_index~rootlog_sizein{hash_size=BytesHash.hash_size;log_size;objects}|>Irmin.Type.pp_json~minify:falsetFmt.stdout;Lwt.return_unitletterm_internal=Cmdliner.Term.(const(funroot()->Lwt_main.run(run~root))$path)letterm=letdoc="Print high-level statistics about the store."inCmdliner.Term.(term_internal$setup_log,deprecated_info~doc"stat")endmoduleReconstruct_index=structletconf~index_log_sizeroot=Conf.init~readonly:false~fresh:false?index_log_size~no_migrate:truerootletdest=letopenCmdliner.Arginvalue&pos1(somestring)None@@info~doc:"Path to the new index file"~docv:"DEST"[]letindex_log_size=letopenCmdliner.Arginvalue&opt(someint)None@@info~doc:"Size of the index log file"["index-log-size"]letrun~root~output?index_log_size()=letconf=conf~index_log_sizerootinmatchoutputwith|None->Store.traverse_pack_file(`Reconstruct_index`In_place)conf|Somep->Store.traverse_pack_file(`Reconstruct_index(`Outputp))confletterm_internal=Cmdliner.Term.(const(funrootoutputindex_log_size()->run~root~output?index_log_size())$path$dest$index_log_size)letterm=letdoc="Reconstruct index from an existing pack file."inCmdliner.Term.(term_internal$setup_log,deprecated_info~doc"reconstruct-index")endmoduleIntegrity_check_index=structletconfroot=Conf.init~readonly:true~fresh:false~no_migrate:truerootletrun~root~auto_repair()=letconf=confrootinifauto_repairthenStore.traverse_pack_file`Check_and_fix_indexconfelseStore.traverse_pack_file`Check_indexconfletauto_repair=letopenCmdliner.Arginvalue&(flag@@info~doc:"Add missing entries in index"["auto-repair"])letterm_internal=Cmdliner.Term.(const(funrootauto_repair()->run~root~auto_repair())$path$auto_repair)letterm=letdoc="Check index integrity."inCmdliner.Term.(term_internal$setup_log,deprecated_info~doc"integrity-check-index")endmoduleIntegrity_check=structletconfrootalways=letindexing_strategy=ifalwaysthenIrmin_pack.Indexing_strategy.alwayselseIrmin_pack.Indexing_strategy.minimalinConf.init~readonly:false~fresh:false~no_migrate:true~indexing_strategyrootlethandle_result?ppf?nameres=letppf=ppf_or_nullppfinletname=matchnamewithSomex->x^": "|None->""inmatchreswith|Ok(`Fixedn)->Fmt.pfppf"%sOk -- fixed %d\n%!"namen|Ok`No_error->Fmt.pfppf"%sOk\n%!"name|Error(`Cannot_fixx)->Printf.eprintf"%sError -- cannot fix: %s\n%!"namex|Error(`Corruptedx)->Printf.eprintf"%sError -- corrupted: %d\n%!"namexletrun?ppf~root~auto_repair~always~heads()=letconf=confrootalwaysinlet*repo=Store.Repo.vconfinlet*heads=matchheadswith|None->Store.Repo.headsrepo|Someheads->Lwt_list.filter_map_s(funx->matchRepr.of_stringStore.Hash.txwith|Okx->Store.Commit.of_hashrepox|Error(`Msgm)->Fmt.kstrLwt.fail_with"Invalid hash %S"m)headsinlet*result=Store.integrity_check?ppf~auto_repair~headsrepoinlet+()=Store.Repo.closerepoinhandle_result?ppf?name:Noneresultletheads=letopenCmdliner.Arginvalue&opt(some(list~sep:','string))None&info["heads"]~doc:"List of head commit hashes"~docv:"HEADS"letauto_repair=letopenCmdliner.Arginvalue&(flag@@info~doc:"Automatically repair issues"["auto-repair"])letalways=letopenCmdliner.Arginvalue&(flag@@info~doc:"Use always indexing strategy"["always"])letterm_internal=Cmdliner.Term.(const(funrootauto_repairalwaysheads()->Lwt_main.run(run~ppf:Format.err_formatter~root~auto_repair~always~heads()))$path$auto_repair$always$heads)letterm=letdoc="Check integrity of an existing store."inCmdliner.Term.(term_internal$setup_log,deprecated_info~doc"integrity-check")endmoduleIntegrity_check_inodes=structletconfroot=Conf.init~readonly:true~fresh:false~no_migrate:truerootletheads=letopenCmdliner.Arginvalue&opt(some(list~sep:','string))None&info["heads"]~doc:"List of head commit hashes"~docv:"HEADS"letrun~root~heads=letconf=confrootinlet*repo=Store.Repo.vconfinlet*heads=matchheadswith|None->Store.Repo.headsrepo|Someheads->Lwt_list.filter_map_s(funx->matchRepr.of_stringStore.Hash.txwith|Okx->Store.Commit.of_hashrepox|Error(`Msgm)->Fmt.kstrLwt.fail_with"Invalid hash %S"m)headsinlet*()=Store.integrity_check_inodes~headsrepo>|=function|Ok`No_error->[%logs.app"Ok"]|Error(`Cannot_fixmsg)->Fmt.failwith"Error: %s"msginStore.Repo.closerepoletterm_internal=Cmdliner.Term.(const(funrootheads()->Lwt_main.run(run~root~heads))$path$heads)letterm=letdoc="Check integrity of inodes in an existing store."inCmdliner.Term.(term_internal$setup_log,deprecated_info~doc"integrity-check-inodes")endmoduleStats_commit=structletconfroot=Conf.init~readonly:true~fresh:false~no_migrate:truerootletcommit=letopenCmdliner.Arginvalue&opt(somestring)None&info["commit"]~doc:"The commit whose underlying tree is traversed."~docv:"COMMIT"letdump_blob_paths_to=letopenCmdliner.Arginvalue&opt(somestring)None&info["dump_blob_paths_to"]~doc:"Print all paths to a blob in the tree in a file."letrun~root~commit~dump_blob_paths_to()=letconf=confrootinlet*repo=Store.Repo.vconfinlet*commit=matchcommitwith|None->(let*heads=Store.Repo.headsrepoinmatchheadswith|[]->Lwt.fail_with"No heads found"|[head]->Lwt.returnhead|ls->Fmt.kstrLwt.fail_with"Several heads found, please specify one. Heads = %a"Fmt.(list~sep:commaStore.Commit.pp_hash)ls)|Somehash->(matchRepr.of_stringStore.Hash.thashwith|Okx->(Store.Commit.of_hashrepox>>=function|None->Fmt.kstrLwt.fail_with"Commit with hash %s not found"hash|Somex->Lwt.returnx)|Error(`Msgm)->Fmt.kstrLwt.fail_with"Invalid hash %S"m)inlet*()=Store.stats~dump_blob_paths_to~commitrepoinStore.Repo.closerepoletterm_internal=Cmdliner.Term.(const(funrootcommitdump_blob_paths_to()->Lwt_main.run(run~root~commit~dump_blob_paths_to()))$path$commit$dump_blob_paths_to)letterm=letdoc="Traverse one commit, specified with the --commit argument, in the \
store for stats. If no commit is specified the current head is used."inCmdliner.Term.(term_internal$setup_log,deprecated_info~doc"stat-store")endmoduleCli=structopenCmdlinerletmain?(terms=[Stat.term;Reconstruct_index.term;Integrity_check.term;Integrity_check_inodes.term;Integrity_check_index.term;Stats_commit.term;])():empty=letdefault=letdefault_info=letdoc="Check Irmin data-stores."indeprecated_info~doc"irmin-fsck"inTerm.(ret(const(`Help(`Auto,None))),default_info)inletdeprecated_eval_choice=(Term.eval_choice[@alert"-deprecated"])inletdeprecated_exit=(Term.exit[@alert"-deprecated"])indeprecated_eval_choicedefaultterms|>deprecated_exit;assertfalseendletcli=Cli.mainendmoduleIntegrity_checks(XKey:Pack_key.S)(X:Irmin.Backend.SwithtypeCommit.key=XKey.tandtypeNode.key=XKey.tandtypeSchema.Hash.t=XKey.hash)(Index:Pack_index.S)=structletcheck_always?ppf~auto_repair~checkindex=letppf=ppf_or_nullppfinFmt.pfppf"Running the integrity_check.\n%!";letnb_absent=ref0inletnb_corrupted=ref0inletexceptionCannot_fixinletcounter,(progress_contents,progress_nodes,progress_commits)=Utils.Object_counter.startppfinletf(k,(offset,length,(kind:Pack_value.Kind.t)))=matchkindwith|Contents->progress_contents();check~kind:`Contents~offset~lengthk|Inode_v1_stable|Inode_v1_unstable|Inode_v2_root|Inode_v2_nonroot->progress_nodes();check~kind:`Node~offset~lengthk|Commit_v1|Commit_v2->progress_commits();check~kind:`Commit~offset~lengthk|Dangling_parent_commit->assertfalseinletresult=ifauto_repairthentryIndex.filterindex(funbinding->matchfbindingwith|Ok()->true|Error`Wrong_hash->raiseCannot_fix|Error`Absent_value->incrnb_absent;false);if!nb_absent=0thenOk`No_errorelseOk(`Fixed!nb_absent)withCannot_fix->Error(`Cannot_fix"Not implemented")else(Index.iter(funkv->matchf(k,v)with|Ok()->()|Error`Wrong_hash->incrnb_corrupted|Error`Absent_value->incrnb_absent)index;if!nb_absent=0&&!nb_corrupted=0thenOk`No_errorelseError(`Corrupted(!nb_corrupted+!nb_absent)))inUtils.Object_counter.finalisecounter;resultletcheck_minimal?ppf~pred~iter~check~recompute_hasht=letppf=ppf_or_nullppfinFmt.pfppf"Running the integrity_check.\n%!";leterrors=ref[]inletcounter,(progress_contents,progress_nodes,progress_commits)=Utils.Object_counter.startppfinletpp_hash=Irmin.Type.ppX.Hash.tinletequal_hash=Irmin.Type.(unstage(equalX.Hash.t))inletadd_errorerrhash=letmsg=matcherrwith|`Wrong_hash->Fmt.str"Wrong_hash %a"pp_hashhash|`Absent_value->Fmt.str"Absent_value for hash %a"pp_hashhashinerrors:=msg::!errorsinletcheck_contentskey=matchPack_key.inspectkeywith|Indexed_hash->(* TODO: The goal here is to check a "one commit" store, generated
by a gc, in which indexed keys cannot occur. We might want to
extends this to stores that have both indexed and direct keys. *)Lwt.fail_with"Not supported for stores which have entries obtained with irmin < \
3.0. If all entries were added with irmin < 3.0, please use \
[integrity_check] instead."|Direct{offset;length;hash;_}->(letresult=check~offset~lengthhashinmatchresultwith|Ok()->Lwt.return_unit|Errorerr->add_errorerrhash;Lwt.return_unit)in(* Commits are read from disk and checked by the [find] function in [pred].
We need to explicitly check the contents and the nodes. *)letcontentskey=progress_contents();check_contentskeyinletpred_noderepokey=tryX.Node.find(X.Repo.node_trepo)key>|=function|None->Fmt.failwith"node with hash %a not found"pp_hash(XKey.to_hashkey)|Somev->letpreds=predvinList.rev_map(function|s,`Inodex->assert(s=None);`Nodex|_,`Nodex->`Nodex|_,`Contentsx->`Contentsx)predswith_exn->add_error`Wrong_hash(XKey.to_hashkey);Lwt.return[]inletcheck_nodeskey=X.Node.find(X.Repo.node_tt)key>|=function|None->Fmt.failwith"node with hash %a not found"pp_hash(XKey.to_hashkey)|Somev->leth=XKey.to_hashkeyinleth'=recompute_hashvinifnot(equal_hashhh')thenadd_error`Wrong_hashhinletnodekey=progress_nodes();check_nodeskeyin(* Only visit the nodes of the commits and not the parents of the commit. *)letpred_commitrepok=tryprogress_commits();X.Commit.find(X.Repo.commit_trepo)k>|=function|None->[]|Somec->letnode=X.Commit.Val.nodecin[`Nodenode]with_exn->add_error`Wrong_hash(XKey.to_hashk);Lwt.return[]inlet+()=iter~contents~node~pred_node~pred_committinUtils.Object_counter.finalisecounter;if!errors=[]thenOk`No_errorelseFmt.kstr(funx->Error(`Cannot_fixx))"Inconsistencies found: %a"Fmt.(list~sep:commastring)!errorsletcheck_inodes?ppf~iter~pred~checkt=letppf=ppf_or_nullppfinFmt.pfppf"Check integrity for inodes.\n%!";letcounter,(_,progress_nodes,progress_commits)=Utils.Object_counter.startppfinleterrors=ref[]inletpred_noderepokey=Lwt.catch(fun()->predrepokey)(fun_->errors:="Error in repo iter"::!errors;Lwt.return[])inletnodek=progress_nodes();checkk>|=functionOk()->()|Errormsg->errors:=msg::!errorsinletcommit_=progress_commits();Lwt.return_unitinlet+()=iter~pred_node~node~committinUtils.Object_counter.finalisecounter;if!errors=[]thenOk`No_errorelseFmt.kstr(funx->Error(`Cannot_fixx))"Inconsistent inodes found %a"Fmt.(list~sep:commastring)!errorsendmoduleStats(S:sigtypestepvalstep_t:stepIrmin.Type.tmoduleHash:Irmin.Hash.Send)=structtypestep=NodeofS.step|Inodetypepath=steplistmoduleMetrics:sigtypemaxtypenodevalmax_length:node->intvalall_paths:node->pathlistvalmp:node->maxvalmaximum:max->intvalmaximal_count:max->intvalrepresentative:max->pathvalv:?maximal_count:int->maximum:int->representative:path->unit->maxvalempty_root_node:nodevalempty_node:nodevalempty_max:maxvalupdate_node:node->node->step->int->nodevalupdate_width:node->int->max->maxvalpp:maxFmt.tvalpp_all_paths:nodeFmt.tend=structtypemax={maximum:int;maximal_count:int;representative:path}typenode={all_paths:pathlist;(* All paths to a node. *)max_length:int;(* The max length of a path to a node. *)mp:max;(* The maximum size of a membership proof: the number of siblings at
every level along the path. *)}letmax_length{max_length;_}=max_lengthletall_paths{all_paths;_}=all_pathsletmp{mp;_}=mpletmaximum{maximum;_}=maximumletrepresentative{representative;_}=representativeletmaximal_count{maximal_count;_}=maximal_countletv?(maximal_count=1)~maximum~representative()={maximum;maximal_count;representative}letempty_max={maximum=0;maximal_count=0;representative=[]}letempty_root_node=letmp=empty_maxin{all_paths=[[]];max_length=0;mp}letempty_node=letmp=empty_maxin{all_paths=[];max_length=0;mp}letincr({maximal_count;_}ast)={twithmaximal_count=maximal_count+1}letupdate_mpstat_kstat_predstepnb_siblings=letmp=stat_k.maximum+nb_siblingsinifstat_pred.maximum>mpthenstat_predelseifstat_pred.maximum=mp&¬(mp=0)thenincrstat_predelseletpath_to_k=stat_k.representativeinletnew_path_to_pred=step::path_to_kinv~maximum:mp~representative:new_path_to_pred()letupdate_widthstat_kwidth_kmax_width=ifmax_width.maximum>width_kthenmax_widthelseifmax_width.maximum=width_kthenincrmax_widthelseletrepresentative=List.hdstat_k.all_pathsinv~maximum:width_k~representative()letupdate_pathpaths_to_kstep_k_to_npaths_to_n=letnew_paths_to_n=List.rev_map(funrev_path->step_k_to_n::rev_path)paths_to_kinList.rev_appendnew_paths_to_npaths_to_nletupdate_nodestat_kstat_predstep_k_to_prednb_siblings=letall_paths,max_length=matchstep_k_to_predwith|Inode->(* Do not update if pred is an inode. *)(stat_k.all_paths,stat_k.max_length)|Node_->letpaths_to_pred=update_pathstat_k.all_pathsstep_k_to_predstat_pred.all_pathsinletlength=(* The new current length to pred. *)letlk=stat_k.max_length+1in(* The previous max length to pred. *)letln=stat_pred.max_lengthinmaxlklnin(paths_to_pred,length)inletmp=update_mpstat_k.mpstat_pred.mpstep_k_to_prednb_siblingsinletstat_pred'={all_paths;max_length;mp}instat_pred'letpp_stepppf=function|Inode->Fmt.pfppf"-"|Nodex->Fmt.pfppf"%a"(Irmin.Type.ppS.step_t)xletpp_path=Fmt.list~sep:(Fmt.any"/")pp_stepletpp_all_pathsfmtstats=List.iter(funl->Fmt.pffmt"%a\n"pp_path(List.revl))stats.all_pathsletpp=letopenFmt.Dumpinrecord[field"maximum"(funt->t.maximum)Fmt.int;field"maximal_count"(funt->t.maximal_count)Fmt.int;field"representative"(funt->List.revt.representative)pp_path;]endtypet={visited:(S.Hash.t,Metrics.node)Hashtbl.t;mutablemax_width:Metrics.max;mutablemax_mp:int;mutablemax_length:int;}letv()=letvisited=Hashtbl.create100inletmax_width=Metrics.empty_maxin{visited;max_width;max_length=0;max_mp=0}letgettk=tryHashtbl.findt.visitedkwithNot_found->Metrics.empty_nodeletvisit_nodetkpreds~nb_children~width=letpreds=List.map(functionNone,x->(Inode,x)|Somes,x->(Nodes,x))predsinletstat_k=gettkinletvisitsteppred=letstat_pred=gettpredinletnb_siblings=nb_children-1inletstat_pred'=Metrics.update_nodestat_kstat_predstepnb_siblingsinHashtbl.replacet.visitedpredstat_pred'inlet()=List.iter(function|Inode,`Inodex->visitInodex|Nodes,`Nodex->visit(Nodes)x|Nodes,`Contentsx->visit(Nodes)x|_->assertfalse)predsin(* Once we updated its preds we can remove the node from the
table. If its a max width, we update the max_width stats. *)Hashtbl.removet.visitedk;t.max_width<-Metrics.update_widthstat_kwidtht.max_widthletvisit_committroot_node=letstat=Metrics.empty_root_nodeinHashtbl.replacet.visitedroot_nodestat(* Update the max length and max_mp while traversing the contents. *)letvisit_contentstk=letstat=gettkinletmax_length=Metrics.max_lengthstatinifmax_length>t.max_lengththent.max_length<-max_length;letmaximum=Metrics.mpstat|>Metrics.maximuminifmaximum>t.max_mpthent.max_mp<-maximumletpp_results~dump_blob_paths_tot=[%log.app"Max width = %a"Metrics.ppt.max_width];letmaximal_count,representative=Hashtbl.fold(fun_(stat:Metrics.node)((counter,_)asacc)->letmaximum=Metrics.mpstat|>Metrics.maximuminifmaximum=t.max_mpthenletmaximal_count=Metrics.mpstat|>Metrics.maximal_countinletcounter'=counter+maximal_countinletrepr=Metrics.mpstat|>Metrics.representativein(counter',repr)elseacc)t.visited(0,[])inletmax_mp=Metrics.v~maximal_count~representative~maximum:t.max_mp()in[%log.app"Max number of path-adjacent nodes = %a"Metrics.ppmax_mp];(* Count all paths that have max length. *)letmaximal_count,representative=Hashtbl.fold(fun_(stat:Metrics.node)acc->ifMetrics.max_lengthstat=t.max_lengththenList.fold_left(fun((counter,_)asacc)l->ifList.lengthl=t.max_lengththen(counter+1,l)elseacc)acc(Metrics.all_pathsstat)elseacc)t.visited(0,[])inletmax_length=Metrics.v~maximal_count~representative~maximum:t.max_length()in[%log.app"Max length = %a"Metrics.ppmax_length];matchdump_blob_paths_towith|None->()|Somefilename->letchan=open_outfilenameinletfmt=Format.formatter_of_out_channelchaninHashtbl.iter(fun_stats->Metrics.pp_all_pathsfmtstats)t.visited;Fmt.flushfmt();close_outchanend