diff --git a/runner/go.mod b/runner/go.mod index 260fb880a..f338e78ed 100644 --- a/runner/go.mod +++ b/runner/go.mod @@ -23,6 +23,7 @@ require ( github.com/urfave/cli/v3 v3.6.1 golang.org/x/crypto v0.22.0 golang.org/x/sys v0.26.0 + kernel.org/pub/linux/libs/security/libcap/cap v1.2.77 ) require ( @@ -84,4 +85,5 @@ require ( gopkg.in/warnings.v0 v0.1.2 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect gotest.tools/v3 v3.5.1 // indirect + kernel.org/pub/linux/libs/security/libcap/psx v1.2.77 // indirect ) diff --git a/runner/go.sum b/runner/go.sum index 20c4568f9..655ea59dc 100644 --- a/runner/go.sum +++ b/runner/go.sum @@ -321,3 +321,7 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.5.1 h1:EENdUnS3pdur5nybKYIh2Vfgc8IUNBjxDPSjtiJcOzU= gotest.tools/v3 v3.5.1/go.mod h1:isy3WKz7GK6uNw/sbHzfKBLvlvXwUyV06n6brMxxopU= +kernel.org/pub/linux/libs/security/libcap/cap v1.2.77 h1:iQtQTjFUOcTT19fI8sTCzYXsjeVs56et3D8AbKS2Uks= +kernel.org/pub/linux/libs/security/libcap/cap v1.2.77/go.mod h1:oV+IO8kGh0B7TxErbydDe2+BRmi9g/W0CkpVV+QBTJU= +kernel.org/pub/linux/libs/security/libcap/psx v1.2.77 h1:Z06sMOzc0GNCwp6efaVrIrz4ywGJ1v+DP0pjVkOfDuA= +kernel.org/pub/linux/libs/security/libcap/psx v1.2.77/go.mod h1:+l6Ee2F59XiJ2I6WR5ObpC1utCQJZ/VLsEbQCD8RG24= diff --git a/runner/internal/executor/executor.go b/runner/internal/executor/executor.go index ea2ef6393..85a59408f 100644 --- a/runner/internal/executor/executor.go +++ b/runner/internal/executor/executor.go @@ -26,6 +26,7 @@ import ( "github.com/dstackai/dstack/runner/consts" "github.com/dstackai/dstack/runner/internal/common" "github.com/dstackai/dstack/runner/internal/connections" + cap "github.com/dstackai/dstack/runner/internal/linux/capabilities" linuxuser "github.com/dstackai/dstack/runner/internal/linux/user" "github.com/dstackai/dstack/runner/internal/log" "github.com/dstackai/dstack/runner/internal/schemas" @@ -467,10 +468,19 @@ func (ex *RunExecutor) execJob(ctx context.Context, jobLogFile io.Writer) error } cmd.Dir = ex.jobWorkingDir - // Strictly speaking, we need CAP_SETUID and CAP_GUID (for Cmd.Start()-> - // Cmd.SysProcAttr.Credential) and CAP_CHOWN (for startCommand()->os.Chown()), - // but for the sake of simplicity we instead check if we are root or not - if ex.currentUser.IsRoot() { + // CAP_SET{UID,GID} for startCommand() -> Cmd.Start() -> set{uid,gid,groups} syscalls during fork-exec + // CAP_CHOWN for startCommand() -> os.Chown(pts.Name()) + if missing, err := cap.Check(cap.SETUID, cap.SETGID, cap.CHOWN); err != nil { + log.Error( + ctx, "Failed to check capabilities, won't try to set process credentials", + "err", err, "user", ex.currentUser, + ) + } else if len(missing) > 0 { + log.Info( + ctx, "Required capabilities are missing, cannot set process credentials", + "missing", missing, "user", ex.currentUser, + ) + } else { log.Trace(ctx, "Using credentials", "user", ex.jobUser) if cmd.SysProcAttr == nil { cmd.SysProcAttr = &syscall.SysProcAttr{} @@ -480,8 +490,6 @@ func (ex *RunExecutor) execJob(ctx context.Context, jobLogFile io.Writer) error return fmt.Errorf("prepare process credentials: %w", err) } cmd.SysProcAttr.Credential = creds - } else { - log.Info(ctx, "Current user is not root, cannot set process credentials", "user", ex.currentUser) } envMap := NewEnvMap(ParseEnvList(os.Environ()), jobEnvs, ex.secrets) @@ -509,11 +517,15 @@ func (ex *RunExecutor) execJob(ctx context.Context, jobLogFile io.Writer) error // Note: we already set RLIMIT_MEMLOCK to unlimited in the shim if we've detected IB devices // (see configureHpcNetworkingIfAvailable() function), but, as it's on the shim side, it only works // with VM-based backends. - rlimitMemlock := unix.Rlimit{Cur: unix.RLIM_INFINITY, Max: unix.RLIM_INFINITY} - // TODO: Check if we have CAP_SYS_RESOURCE. In container environments, even root usually doesn't have - // this capability. - if err := unix.Setrlimit(unix.RLIMIT_MEMLOCK, &rlimitMemlock); err != nil { - log.Error(ctx, "Failed to set resource limits", "err", err) + if ok, err := cap.Has(cap.SYS_RESOURCE); err != nil { + log.Error(ctx, "Failed to check capabilities, won't try to set resource limits", "err", err) + } else if !ok { + log.Info(ctx, "Required capability is missing, cannot set resource limits", "missing", cap.SYS_RESOURCE) + } else { + rlimitMemlock := unix.Rlimit{Cur: unix.RLIM_INFINITY, Max: unix.RLIM_INFINITY} + if err := unix.Setrlimit(unix.RLIMIT_MEMLOCK, &rlimitMemlock); err != nil { + log.Error(ctx, "Failed to set resource limits", "err", err) + } } // HOME must be added after writeDstackProfile to avoid overriding the correct per-user value set by sshd diff --git a/runner/internal/linux/capabilities/capabilities_darwin.go b/runner/internal/linux/capabilities/capabilities_darwin.go new file mode 100644 index 000000000..6a60f94af --- /dev/null +++ b/runner/internal/linux/capabilities/capabilities_darwin.go @@ -0,0 +1,22 @@ +//go:build darwin + +package capabilities + +import "errors" + +type Capability string + +const ( + SETUID = Capability("SETUID") + SETGID = Capability("SETGID") + CHOWN = Capability("CHOWN") + SYS_RESOURCE = Capability("SYS_RESOURCE") +) + +func Has(c Capability) (bool, error) { + return false, errors.New("not supported") +} + +func Check(cs ...Capability) (missing []Capability, err error) { + return nil, errors.New("not supported") +} diff --git a/runner/internal/linux/capabilities/capabilities_linux.go b/runner/internal/linux/capabilities/capabilities_linux.go new file mode 100644 index 000000000..c27e887a5 --- /dev/null +++ b/runner/internal/linux/capabilities/capabilities_linux.go @@ -0,0 +1,52 @@ +//go:build linux + +package capabilities + +import ( + "strings" + + "kernel.org/pub/linux/libs/security/libcap/cap" +) + +type Capability cap.Value + +const ( + SETUID = Capability(cap.SETUID) + SETGID = Capability(cap.SETGID) + CHOWN = Capability(cap.CHOWN) + SYS_RESOURCE = Capability(cap.SYS_RESOURCE) +) + +// String returns a text representation of the capability in the form used by container folks: +// UPPER_CASE, no CAP_ prefix: cap_sys_admin -> SYS_ADMIN +func (c Capability) String() string { + return strings.ToUpper(cap.Value(c).String()[4:]) +} + +// Has returns true if the current process has the specified capability in its effective set +func Has(c Capability) (bool, error) { + set, err := cap.GetPID(0) + if err != nil { + return false, err + } + return set.GetFlag(cap.Effective, cap.Value(c)) +} + +// Check checks and returns those capabilities that are _missing_ from the effective set +// of the current process +func Check(cs ...Capability) (missing []Capability, err error) { + set, err := cap.GetPID(0) + if err != nil { + return nil, err + } + for _, c := range cs { + ok, err := set.GetFlag(cap.Effective, cap.Value(c)) + if err != nil { + return nil, err + } + if !ok { + missing = append(missing, c) + } + } + return missing, nil +}