123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348openLwt.InfixopenSexplib.Convlet(/)=Filename.concattypet={runc_state_dir:string;fast_sync:bool;arches:stringlist;}typeconfig={fast_sync:bool;}[@@derivingsexp]letget_machine()=letch=Unix.open_process_in"uname -m"inletarch=input_linechinmatchUnix.close_process_inchwith|Unix.WEXITED0->String.trimarch|_->failwith"Failed to get arch with 'uname -m'"letget_arches()=ifSys.unixthen(matchget_machine()with|"x86_64"->["SCMP_ARCH_X86_64";"SCMP_ARCH_X86";"SCMP_ARCH_X32"]|"aarch64"->["SCMP_ARCH_AARCH64";"SCMP_ARCH_ARM"]|_->[])else([])letsecret_fileid="secret-"^string_of_intidmoduleJson_config=structletmount?(options=[])~ty~srcdst=`Assoc["destination",`Stringdst;"type",`Stringty;"source",`Stringsrc;"options",`List(List.map(funx->`Stringx)options);]letuser_mounts=List.map@@fun{Config.Mount.src;dst;readonly}->letoptions=["bind";"nosuid";"nodev";]inmount~ty:"bind"~srcdst~options:(ifreadonlythen"ro"::optionselseoptions)letstringsxs=`List(List.map(funx->`Stringx)xs)letnamespacex=`Assoc["type",`Stringx](* This is a subset of the capabilities that Docker uses by default.
These control what root can do in the container.
If the init process is non-root, permitted, effective and ambient sets are cleared.
See capabilities(7) for full details. *)letdefault_linux_caps=["CAP_CHOWN";(* Make arbitrary changes to file UIDs and GIDs *)"CAP_DAC_OVERRIDE";(* Bypass file read, write, and execute permission checks. *)"CAP_FSETID";(* Set SUID/SGID bits. *)"CAP_FOWNER";(* Bypass permission checks. *)"CAP_MKNOD";(* Create special files using mknod. *)"CAP_SETGID";(* Make arbitrary manipulations of process GIDs. *)"CAP_SETUID";(* Make arbitrary manipulations of process UIDs. *)"CAP_SETFCAP";(* Set arbitrary capabilities on a file. *)"CAP_SETPCAP";(* Add any capability from bounding set to inheritable set. *)"CAP_SYS_CHROOT";(* Use chroot. *)"CAP_KILL";(* Bypass permission checks for sending signals. *)"CAP_AUDIT_WRITE"(* Write records to kernel auditing log. *)(* Allowed by Docker, but disabled here (because we use host networking):
"CAP_NET_RAW"; (* Use RAW and PACKET sockets / bind to any address *)
"CAP_NET_BIND_SERVICE"; (* Bind a socket to Internet domain privileged ports. *)
*)]letseccomp_syscalls~fast_sync=iffast_syncthen[`Assoc[(* Sync calls are pointless for the builder, because if the computer crashes then we'll
just throw the build dir away and start again. And btrfs sync is really slow.
Based on https://bblank.thinkmo.de/using-seccomp-to-filter-sync-operations.html
Note: requires runc >= v1.0.0-rc92. *)"names",strings["fsync";"fdatasync";"msync";"sync";"syncfs";"sync_file_range";];"action",`String"SCMP_ACT_ERRNO";"errnoRet",`Int0;(* Return error "success" *)];]else[]letseccomp_policy(t:t)=letfields=["defaultAction",`String"SCMP_ACT_ALLOW";"syscalls",`List(seccomp_syscalls~fast_sync:t.fast_sync);]@(ift.arches=[]then[]else["architectures",stringst.arches])in`Assocfieldsletmake{Config.cwd;argv;hostname;user;env;mounts;network;mount_secrets;entrypoint}t~config_dir~results_dir:Yojson.Safe.t=assert(entrypoint=None);letuser=let{Obuilder_spec.uid;gid}=matchuserwith|`Unixuser->user|`Windows_->assertfalse(* runc not supported on Windows *)in`Assoc["uid",`Intuid;"gid",`Intgid;]inletnetwork_ns=matchnetworkwith|["host"]->[]|[]->["network"]|xs->Fmt.failwith"Unsupported network configuration %a"Fmt.Dump.(liststring)xsinletnamespaces=network_ns@["pid";"ipc";"uts";"mount"]in`Assoc["ociVersion",`String"1.0.1-dev";"process",`Assoc["terminal",`Boolfalse;"user",user;"args",stringsargv;"env",strings(List.map(fun(k,v)->Printf.sprintf"%s=%s"kv)env);"cwd",`Stringcwd;"capabilities",`Assoc["bounding",stringsdefault_linux_caps;(* Limits capabilities gained on execve. *)"effective",stringsdefault_linux_caps;(* Checked by kernel to decide access *)"inheritable",stringsdefault_linux_caps;(* Preserved across an execve (if root, or cap in ambient set) *)"permitted",stringsdefault_linux_caps;(* Limiting superset for the effective capabilities *)];"rlimits",`List[`Assoc["type",`String"RLIMIT_NOFILE";"hard",`Int1024;"soft",`Int1024];];"noNewPrivileges",`Boolfalse;];"root",`Assoc["path",`String(results_dir/"rootfs");"readonly",`Boolfalse;];"hostname",`Stringhostname;"mounts",`List(mount"/proc"~options:[(* TODO: copy to others? *)"nosuid";"noexec";"nodev";]~ty:"proc"~src:"proc"::mount"/dev"~ty:"tmpfs"~src:"tmpfs"~options:["nosuid";"strictatime";"mode=755";"size=65536k";]::mount"/dev/pts"~ty:"devpts"~src:"devpts"~options:["nosuid";"noexec";"newinstance";"ptmxmode=0666";"mode=0620";"gid=5";(* tty *)]::mount"/sys"(* This is how Docker does it. runc's default is a bit different. *)~ty:"sysfs"~src:"sysfs"~options:["nosuid";"noexec";"nodev";"ro";]::mount"/sys/fs/cgroup"~ty:"cgroup"~src:"cgroup"~options:["ro";"nosuid";"noexec";"nodev";]::mount"/dev/shm"~ty:"tmpfs"~src:"shm"~options:["nosuid";"noexec";"nodev";"mode=1777";"size=65536k";]::mount"/dev/mqueue"~ty:"mqueue"~src:"mqueue"~options:["nosuid";"noexec";"nodev";]::mount"/etc/hosts"~ty:"bind"~src:(config_dir/"hosts")~options:["ro";"rbind";"rprivate"]::(ifnetwork=["host"]then[mount"/etc/resolv.conf"~ty:"bind"~src:"/etc/resolv.conf"~options:["ro";"rbind";"rprivate"]]else[])@List.mapi(funid{Config.Secret.target;_}->mounttarget~ty:"bind"~src:(config_dir/secret_fileid)~options:["rbind";"rprivate";"ro";])mount_secrets@user_mountsmounts);"linux",`Assoc["namespaces",`List(List.mapnamespacenamespaces);"maskedPaths",strings["/proc/acpi";"/proc/asound";"/proc/kcore";"/proc/keys";"/proc/latency_stats";"/proc/timer_list";"/proc/timer_stats";"/proc/sched_debug";"/sys/firmware";"/proc/scsi"];"readonlyPaths",strings["/proc/bus";"/proc/fs";"/proc/irq";"/proc/sys";"/proc/sysrq-trigger"];"seccomp",seccomp_policyt;];]endletnext_id=ref0letrun~cancelled?stdin:stdin~logtconfigresults_dir=Lwt_io.with_temp_dir~perm:0o700~prefix:"obuilder-runc-"@@funtmp->letjson_config=Json_config.makeconfig~config_dir:tmp~results_dirtinOs.write_file~path:(tmp/"config.json")(Yojson.Safe.pretty_to_stringjson_config^"\n")>>=fun()->Os.write_file~path:(tmp/"hosts")"127.0.0.1 localhost builder">>=fun()->Lwt_list.fold_left_s(funidConfig.Secret.{value;_}->Os.write_file~path:(tmp/secret_fileid)value>|=fun()->id+1)0config.mount_secrets>>=fun_->letid=string_of_int!next_idinincrnext_id;Os.with_pipe_from_child@@fun~r:out_r~w:out_w->letcmd=["runc";"--root";t.runc_state_dir;"run";id]inletstdout=`FD_move_safelyout_winletstderr=stdoutinletcopy_log=Build_log.copy~src:out_r~dst:loginletproc=letstdin=Option.map(funx->`FD_move_safelyx)stdininletppf=Os.pp_cmdf("",config.argv)inOs.sudo_result~cwd:tmp?stdin~stdout~stderr~ppcmdinLwt.on_terminationcancelled(fun()->letrecaux()=ifLwt.is_sleepingprocthen(letppf=Fmt.pff"runc kill %S"idinOs.sudo_result~cwd:tmp["runc";"--root";t.runc_state_dir;"kill";id;"KILL"]~pp>>=function|Ok()->Lwt.return_unit|Error(`Msgm)->(* This might be because it hasn't been created yet, so retry. *)Log.warn(funf->f"kill failed: %s (will retry in 10s)"m);Lwt_unix.sleep10.0>>=aux)elseLwt.return_unit(* Process has already finished *)inLwt.asyncaux);proc>>=funr->copy_log>>=fun()->ifLwt.is_sleepingcancelledthenLwt.return(r:>(unit,[`Msgofstring|`Cancelled])result)elseLwt_result.fail`Cancelledletclean_runcdir=Sys.readdirdir|>Array.to_list|>Lwt_list.iter_s(funitem->Log.warn(funf->f"Removing left-over runc container %S"item);Os.sudo["runc";"--root";dir;"delete";"--force";item])letcreate~state_dir(c:config)=Os.ensure_dirstate_dir;letarches=get_arches()inLog.info(funf->f"Architectures for multi-arch system: %a"Fmt.(Dump.liststring)arches);clean_runcstate_dir>|=fun()->{runc_state_dir=state_dir;fast_sync=c.fast_sync;arches}openCmdlinerletfast_sync=Arg.value@@Arg.flag@@Arg.info~doc:"Ignore sync syscalls (requires runc >= 1.0.0-rc92)."["fast-sync"]letcmdliner:configTerm.t=letmakefast_sync={fast_sync}inTerm.(constmake$fast_sync)