123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342openLwt.Infix(* This is rather complicated, because (unlike btrfs):
- zfs won't let you delete datasets that other datasets are cloned from.
However, you can "promote" a dataset, so that it switches roles with its parent.
- Some zfs commands use "--" to separate options from arguments, but others interpret it as an argument!
- Sometimes we need "datasets" and at other times we need pathnames (the difference is a leading '/')
Example of zfs promotion:
1. Create ds1.
2. Create snapshots ds1@snap1, ds1@snap2, ds1@snap3.
3. Create clones of ds1@snap2: clone1, clone2, clone3.
At this point:
- ds1 has clones {clone1, clone2, clone3} and snapshots {snap1, snap2, snap3}.
4. Promote clone2.
Now:
- clone2 has clones {clone1, ds1, clone3} and snapshots {snap1, snap2}.
- ds1 has no clones and snapshots {snap3}.
*)letstrf=Printf.sprintftypecache={lock:Lwt_mutex.t;mutablegen:int;(* Version counter. *)mutablen_clones:int;}typet={pool:string;prefix:string;(* To be prepended to `pool` to give the full path to the pool *)caches:(string,cache)Hashtbl.t;mutablenext:int;}letdefault_snapshot="snap"moduleDataset:sigtypedatasetvalstate:datasetvalcache_tmp_group:datasetvalgroups:datasetlistvalresult:S.id->datasetvalcache:string->datasetvalcache_tmp:int->string->datasetvalfull_name:?snapshot:string->t->dataset->stringvalpath:?snapshot:string->t->dataset->stringvalexists:?snapshot:string->t->dataset->boolvalif_missing:?snapshot:string->t->dataset->(unit->unitLwt.t)->unitLwt.tend=structtypedataset=stringletstate="state"letresult_group="result"letcache_group="cache"letcache_tmp_group="cache-tmp"letgroups=[state;result_group;cache_group;cache_tmp_group]letresultid="result/"^idletcachename="cache/"^Escape.cachenameletcache_tmpiname=strf"cache-tmp/%d-%s"i(Escape.cachename)letfull_name?snapshottds=matchsnapshotwith|None->strf"%s/%s"t.poolds|Somesnapshot->strf"%s/%s@%s"t.pooldssnapshotletpath?snapshottds=matchsnapshotwith|None->strf"%s%s/%s"t.prefixt.poolds|Somesnapshot->strf"%s%s/%s/.zfs/snapshot/%s"t.prefixt.pooldssnapshotletexists?snapshottds=matchOs.check_dir(path?snapshottds)with|`Missing->false|`Present->trueletif_missing?snapshottdsfn=ifexists?snapshottdsthenLwt.return_unitelsefn()endletuser=`Unix{Obuilder_spec.uid=Unix.getuid();gid=Unix.getgid()}moduleZfs=structletchown~usertds=let{Obuilder_spec.uid;gid}=matchuserwith`Unixuser->user|`Windows_->assertfalseinOs.sudo["chown";strf"%d:%d"uidgid;Dataset.pathtds]letcreatetds=Os.sudo["zfs";"create";"--";Dataset.full_nametds]letdestroytdsmode=letopts=matchmodewith|`Only->[]|`And_snapshots->["-r"]|`And_snapshots_and_clones->["-R"]inOs.sudo(["zfs";"destroy"]@opts@["--";Dataset.full_nametds])letdestroy_snapshottdssnapshotmode=letopts=matchmodewith|`Defer->["-d"]|`Recurse->["-R"]|`Immediate->[]inOs.sudo(["zfs";"destroy"]@opts@["--";Dataset.full_nametds^"@"^snapshot])letclonet~src~snapshotdst=Os.sudo["zfs";"clone";"--";Dataset.full_nametsrc~snapshot;Dataset.full_nametdst]letsnapshottds~snapshot=Os.sudo["zfs";"snapshot";"--";Dataset.full_nametds~snapshot]letpromotetds=Os.sudo["zfs";"promote";Dataset.full_nametds]letrenamet~oldds=Os.sudo["zfs";"rename";"--";Dataset.full_nametold;Dataset.full_nametds]letrename_snapshottds~oldsnapshot=Os.sudo["zfs";"rename";"--";Dataset.full_nametds~snapshot:old;Dataset.full_nametds~snapshot]endletdelete_if_existstdsmode=ifDataset.existstdsthenZfs.destroytdsmodeelseLwt.return_unitletstate_dirt=Dataset.pathtDataset.stateletroott=t.poolletprefix_and_poolpath=letpool=Filename.basenamepathinmatchFilename.chop_suffix_opt~suffix:poolpathwith|Some""->("/",pool)(* Preserves original behaviour *)|Someprefix->(prefix,pool)|None->failwith("Failed to get preffix from: "^path)letcreate~path=letprefix,pool=prefix_and_poolpathinlett={pool;prefix;caches=Hashtbl.create10;next=0}in(* Ensure any left-over temporary datasets are removed before we start. *)delete_if_existst(Dataset.cache_tmp_group)`And_snapshots_and_clones>>=fun()->Dataset.groups|>Lwt_list.iter_s(fungroup->Dataset.if_missingtgroup(fun()->Zfs.createtgroup)>>=fun()->Zfs.chown~usertgroup)>>=fun()->Lwt.returnt(* The builder will always delete child datasets before their parent.
It's possible that we crashed after cloning this but before recording that
in the database. So any clones of this dataset must be unregistered junk. *)letdeletetid=delete_if_existst(Dataset.resultid)`And_snapshots_and_clones(* We start by either creating a new dataset or by cloning base@snap (if [base] is given).
On success, we snapshot the clone as clone@snap.
On failure, we destroy the clone. This will always succeed because we can't have
tagged it or created further clones at this point. *)letbuildt?base~idfn=Log.debug(funf->f"zfs: build %S"id);letds=Dataset.resultidin(* We have to create the dataset in its final location because ZFS can't
rename it while we have the log file open (which we need to do). But
we don't create the snapshot unless the build succeeds. If we crash
with a partially written directory, `result` will see there is no
snapshot and we'll end up here and delete it. *)delete_if_existstds`Only>>=fun()->letclone=Dataset.pathtdsinbeginmatchbasewith|None->Zfs.createtds>>=fun()->Zfs.chown~usertds|Somebase->letsrc=Dataset.resultbaseinZfs.clonet~src~snapshot:default_snapshotdsend>>=fun()->Lwt.try_bind(fun()->fnclone)(function|Ok()->Log.debug(funf->f"zfs: build %S succeeded"id);Zfs.snapshottds~snapshot:default_snapshot>>=fun()->(* ZFS can't delete the clone while the snapshot still exists. So I guess we'll just
keep it around? *)Lwt_result.return()|Error_ase->Log.debug(funf->f"zfs: build %S failed"id);Zfs.destroytds`Only>>=fun()->Lwt.returne)(funex->Log.warn(funf->f"Uncaught exception from %S build function: %a"idFmt.exnex);Zfs.destroytds`Only>>=fun()->Lwt.failex)letresulttid=letds=Dataset.resultidinletpath=Dataset.pathtds~snapshot:default_snapshotinifSys.file_existspaththenLwt.return_somepathelseLwt.return_noneletlog_filetid=resulttid>|=function|Somedir->Filename.concatdir"log"|None->letds=Dataset.resultidinletclone=Dataset.pathtdsinFilename.concatclone"log"letget_cachetname=matchHashtbl.find_optt.cachesnamewith|Somec->c|None->letc={lock=Lwt_mutex.create();gen=0;n_clones=0}inHashtbl.addt.cachesnamec;c(* Return the name of an unused temporary dataset, based on [name]. *)letget_tmp_dstname=lettmp_ds=Dataset.cache_tmpt.nextnameint.next<-t.next+1;tmp_ds(* Properties you can assume after taking the lock:
- Either we have a dataset with the latest snapshot of the cache
(main@snap), or it doesn't exist yet (in which case we create it and
snapshot immediately).
- Any other tags on main are marked for deletion, but some clones
still depend on them. They will all be older than "snap".
- There may be clones of main. These clones have no snapshots, and no
further sub-clones.
We clone main@snap, and then let the user write to that (tmp) with the lock
released.
When the user releases tmp, we retake the lock and then either:
- Destroy tmp, or
- Replace main with tmp (see comments in code).
Crash recovery:
- We might crash before making the main@snap tag. If main is missing this tag,
it is safe to create it, since we must have been just about to do that.
*)letcache~usertname:(string*(unit->unitLwt.t))Lwt.t=letcache=get_cachetnameinLwt_mutex.with_lockcache.lock@@fun()->Log.debug(funf->f"zfs: get cache %S"(name:>string));letgen=cache.geninletmain_ds=Dataset.cachenameinlettmp_ds=get_tmp_dstnamein(* Create the cache as an empty directory if it doesn't exist. *)Dataset.if_missingtmain_ds(fun()->Zfs.createtmain_ds)>>=fun()->(* Ensure we have the snapshot. This is needed on first creation, and
also to recover from crashes. *)Dataset.if_missingtmain_ds~snapshot:default_snapshot(fun()->Zfs.chown~usertmain_ds>>=fun()->Zfs.snapshottmain_ds~snapshot:default_snapshot)>>=fun()->cache.n_clones<-cache.n_clones+1;Zfs.clonet~src:main_ds~snapshot:default_snapshottmp_ds>>=fun()->letrelease()=Lwt_mutex.with_lockcache.lock@@fun()->Log.debug(funf->f"zfs: release cache %S"(name:>string));cache.n_clones<-cache.n_clones-1;ifcache.gen=genthen((* main_ds hasn't changed since we cloned it. Update it. *)(* todo: check if tmp_ds has changed. *)cache.gen<-cache.gen+1;(* Rename main to something temporary, so if we crash here then we'll
just start again with an empty cache next time. *)letdelete_me=get_tmp_dstnameinZfs.renamet~old:main_dsdelete_me>>=fun()->Zfs.promotettmp_ds>>=fun()->(* At this point:
- All the other clones of main are now clones of tmp_ds.
- main@snap has moved to tmp@snap.
- Any other tags were older than snap and so have also moved to tmp. *)Zfs.destroytdelete_me`Only>>=fun()->(* Move the old @snap tag out of the way. *)letarchive_name=strf"old-%d"genin(* We know [archive_name] doesn't exist because [gen] is unique for
this process, and we delete stale tmp dirs from previous runs at start-up,
which would remove any such deferred tags. *)Zfs.rename_snapshotttmp_ds~old:default_snapshotarchive_name>>=fun()->(* Mark the archived snapshot for removal. If other clones are using it,
this will defer the deletion until they're done *)Zfs.destroy_snapshotttmp_dsarchive_name`Defer>>=fun()->(* Create the new snapshot and rename this as the new main_ds. *)Zfs.snapshotttmp_ds~snapshot:default_snapshot>>=fun()->Zfs.renamet~old:tmp_dsmain_ds)else((* We have no snapshots or clones here. *)Lwt.catch(fun()->Zfs.destroyttmp_ds`Only)(funex->Log.warn(funf->f"Error trying to release cache (will retry): %a"Fmt.exnex);(* XXX: Don't know what's causing this. By the time fuser runs, the problem has disappeared! *)Unix.system(strf"fuser -mv %S"(Dataset.pathttmp_ds))|>ignore;Lwt_unix.sleep10.0>>=fun()->Zfs.destroyttmp_ds`Only))inLwt.return(Dataset.pathttmp_ds,release)letdelete_cachetname=letcache=get_cachetnameinLwt_mutex.with_lockcache.lock@@fun()->Log.debug(funf->f"zfs: delete_cache %S"(name:>string));ifcache.n_clones>0thenLwt_result.fail`Busyelse(letmain_ds=Dataset.cachenameinifDataset.existstmain_dsthen(Zfs.destroytmain_ds`And_snapshots>>=fun()->Lwt_result.return())elseLwt_result.return())letcomplete_deletes_t=(* The man-page says "Pending changes are generally accounted for within a few seconds" *)Lwt_unix.sleep5.0