Skip to content

Commit

Permalink
chown cgroup to process uid in container namespace
Browse files Browse the repository at this point in the history
Delegating cgroups to the container enables more complex workloads,
including systemd-based workloads.  The OCI runtime-spec was
recently updated to explicitly admit such delegation, through
specification of cgroup ownership semantics:

  opencontainers/runtime-spec#1123

Pursuant to the updated OCI runtime-spec, change the ownership of
the container's cgroup directory and particular files therein, when
using cgroups v2 and when the cgroupfs is to be mounted read/write.

As a result of this change, systemd workloads can run in isolated
user namespaces on OpenShift when the sandbox's cgroupfs is mounted
read/write.

It might be possible to implement this feature in other cgroup
managers, but that work is deferred.

Signed-off-by: Fraser Tweedale <[email protected]>
  • Loading branch information
frasertweedale committed Nov 29, 2021
1 parent b6fa634 commit 9391cd3
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 0 deletions.
39 changes: 39 additions & 0 deletions libcontainer/cgroups/systemd/v2.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package systemd

import (
"bufio"
"fmt"
"math"
"os"
"path/filepath"
"strconv"
"strings"
Expand Down Expand Up @@ -288,9 +290,46 @@ func (m *unifiedManager) Apply(pid int) error {
if err := fs2.CreateCgroupPath(m.path, m.cgroups); err != nil {
return err
}

if c.OwnerUID != nil {
filesToChown, err := cgroupFilesToChown()
if err != nil {
return err
}

for _, v := range filesToChown {
err := os.Chown(m.path+"/"+v, *c.OwnerUID, -1)
if err != nil {
return err
}
}
}

return nil
}

// The kernel exposes a list of files that should be chowned to the delegate
// uid in /sys/kernel/cgroup/delegate. If the file is not present
// (Linux < 4.15), use the initial values mentioned in cgroups(7).
func cgroupFilesToChown() ([]string, error) {
filesToChown := []string{"."} // the directory itself must be chowned
const cgroupDelegateFile = "/sys/kernel/cgroup/delegate"
f, err := os.Open(cgroupDelegateFile)
if err == nil {
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
filesToChown = append(filesToChown, scanner.Text())
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("error reading %s: %w", cgroupDelegateFile, err)
}
} else {
filesToChown = append(filesToChown, "cgroup.procs", "cgroup.subtree_control", "cgroup.threads")
}
return filesToChown, nil
}

func (m *unifiedManager) Destroy() error {
m.mu.Lock()
defer m.mu.Unlock()
Expand Down
7 changes: 7 additions & 0 deletions libcontainer/configs/cgroup_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ type Cgroup struct {

// Rootless tells if rootless cgroups should be used.
Rootless bool

// The host UID that should own the cgroup, or nil to accept
// the default ownership. This should only be set when the
// cgroupfs is to be mounted read/write.
// Not all cgroup manager implementations support changing
// the ownership.
OwnerUID *int `json:"owner_uid,omitempty"`
}

type Resources struct {
Expand Down

0 comments on commit 9391cd3

Please sign in to comment.