Skip to content
This repository has been archived by the owner on May 12, 2021. It is now read-only.

Commit

Permalink
virtcontainers/pkg/cgroups: implement cgroup manager
Browse files Browse the repository at this point in the history
cgroup manager is in charge to create and setup cgroups for
virtual containers, for example it adds /dev/kvm and
/dev/vhost-net to the list of cgroup devices in order to have
virtual containers working.

fixes #2438
fixes #2419

Signed-off-by: Julio Montes <[email protected]>
  • Loading branch information
Julio Montes committed Feb 21, 2020
1 parent 03cdf6c commit ea82922
Show file tree
Hide file tree
Showing 4 changed files with 434 additions and 45 deletions.
321 changes: 321 additions & 0 deletions virtcontainers/pkg/cgroups/manager.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,321 @@
// Copyright (c) 2020 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//

package cgroups

import (
"bufio"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"sync"

"github.com/kata-containers/runtime/virtcontainers/pkg/rootless"
libcontcgroups "github.com/opencontainers/runc/libcontainer/cgroups"
libcontcgroupsfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
libcontcgroupssystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/specconv"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)

type Config struct {
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
// placed into to limit the resources the container has available
// If nil, New() will create one.
Cgroups *configs.Cgroup

// CgroupPaths contains paths to all the cgroups setup for a container. Key is cgroup subsystem name
// with the value as the path.
CgroupPaths map[string]string

// Resources represents the runtime resource constraints
Resources specs.LinuxResources

// CgroupPath is the OCI spec cgroup path
CgroupPath string
}

type Manager struct {
sync.Mutex
mgr libcontcgroups.Manager
}

const (
// file in the cgroup that contains the pids
cgroupProcs = "cgroup.procs"
)

var (
// If set to true, expects cgroupsPath to be of form "slice:prefix:name", otherwise cgroups creation will fail
systemdCgroup *bool

cgroupsLogger = logrus.WithField("source", "virtcontainers/pkg/cgroups")
)

func EnableSystemdCgroup() {
systemd := true
systemdCgroup = &systemd
}

func UseSystemdCgroup() bool {
if systemdCgroup != nil {
return *systemdCgroup
}
return false
}

// returns the list of devices that a hypervisor may need
func hypervisorDevices() []specs.LinuxDeviceCgroup {
wildcard := int64(-1)
devicemapperMajor := int64(253)

devices := []specs.LinuxDeviceCgroup{}

devices = append(devices,
// hypervisor needs access to all devicemapper devices,
// since they can be hotplugged in the VM.
specs.LinuxDeviceCgroup{
Allow: true,
Type: "b",
Major: &devicemapperMajor,
Minor: &wildcard,
Access: "rwm",
})

// Processes running in a device-cgroup are constrained, they have acccess
// only to the devices listed in the devices.list file.
// In order to run Virtual Machines and create virtqueues, hypervisors
// need access to certain character devices in the host, like kvm and vhost-net.
hypervisorDevices := []string{
"/dev/kvm", // To run virtual machines
"/dev/vhost-net", // To create virtqueues
}

for _, device := range hypervisorDevices {
var st unix.Stat_t
linuxDevice := specs.LinuxDeviceCgroup{
Allow: true,
Access: "rwm",
}

if err := unix.Stat(device, &st); err != nil {
cgroupsLogger.WithError(err).WithField("device", device).Warn("Could not get device information")
continue
}

switch st.Mode & unix.S_IFMT {
case unix.S_IFCHR:
linuxDevice.Type = "c"
case unix.S_IFBLK:
linuxDevice.Type = "b"
}

major := int64(unix.Major(st.Rdev))
minor := int64(unix.Minor(st.Rdev))
linuxDevice.Major = &major
linuxDevice.Minor = &minor

devices = append(devices, linuxDevice)
}

return devices
}

// New creates a new CgroupManager
func New(config *Config) (*Manager, error) {
var err error
useSystemdCgroup := UseSystemdCgroup()

devices := []specs.LinuxDeviceCgroup{}
copy(devices, config.Resources.Devices)
devices = append(devices, hypervisorDevices()...)
// Do not modify original devices
config.Resources.Devices = devices

newSpec := specs.Spec{
Linux: &specs.Linux{
Resources: &config.Resources,
},
}

rootless := rootless.IsRootless()

cgroups := config.Cgroups
cgroupPaths := config.CgroupPaths

// Create a new cgroup if the current one is nil
// this cgroups must be saved later
if cgroups == nil {
if config.CgroupPath == "" && !rootless {
cgroupsLogger.Warn("cgroups have not been created and cgroup path is empty")
}

newSpec.Linux.CgroupsPath, err = ValidCgroupPath(config.CgroupPath, useSystemdCgroup)
if err != nil {
return nil, fmt.Errorf("Invalid cgroup path: %v", err)
}

if cgroups, err = specconv.CreateCgroupConfig(&specconv.CreateOpts{
// cgroup name is taken from spec
CgroupName: "",
UseSystemdCgroup: useSystemdCgroup,
Spec: &newSpec,
RootlessCgroups: rootless,
}); err != nil {
return nil, fmt.Errorf("Could not create cgroup config: %v", err)
}
}

// Set cgroupPaths to nil when the map is empty, it can and will be
// populated by `Manager.Apply()` when the runtime or any other process
// is moved to the cgroup.
if len(cgroupPaths) == 0 {
cgroupPaths = nil
}

if useSystemdCgroup {
systemdCgroupFunc, err := libcontcgroupssystemd.NewSystemdCgroupsManager()
if err != nil {
return nil, fmt.Errorf("Could not create systemd cgroup manager: %v", err)
}
libcontcgroupssystemd.UseSystemd()
return &Manager{
mgr: systemdCgroupFunc(cgroups, cgroupPaths),
}, nil
}

return &Manager{
mgr: &libcontcgroupsfs.Manager{
Cgroups: cgroups,
Rootless: rootless,
Paths: cgroupPaths,
},
}, nil
}

// read all the pids in cgroupPath
func readPids(cgroupPath string) ([]int, error) {
pids := []int{}
f, err := os.Open(filepath.Join(cgroupPath, cgroupProcs))
if err != nil {
return nil, err
}
defer f.Close()
buf := bufio.NewScanner(f)

for buf.Scan() {
if t := buf.Text(); t != "" {
pid, err := strconv.Atoi(t)
if err != nil {
return nil, err
}
pids = append(pids, pid)
}
}
return pids, nil
}

// write the pids into cgroup.procs
func writePids(pids []int, cgroupPath string) error {
cgroupProcsPath := filepath.Join(cgroupPath, cgroupProcs)
for _, pid := range pids {
if err := ioutil.WriteFile(cgroupProcsPath,
[]byte(strconv.Itoa(pid)),
os.FileMode(0),
); err != nil {
return err
}
}
return nil
}

func (m *Manager) logger() *logrus.Entry {
return cgroupsLogger.WithField("source", "cgroup-manager")
}

// move all the processes in the current cgroup to the parent
func (m *Manager) moveToParent() error {
m.Lock()
defer m.Unlock()
for _, cgroupPath := range m.mgr.GetPaths() {
pids, err := readPids(cgroupPath)
if err != nil {
return err
}

if len(pids) == 0 {
// no pids in this cgroup
continue
}

cgroupParentPath := filepath.Dir(filepath.Clean(cgroupPath))
if err = writePids(pids, cgroupParentPath); err != nil {
if !strings.Contains(err.Error(), "no such process") {
return err
}
}
}
return nil
}

// Add pid to cgroups
func (m *Manager) Add(pid int) error {
if rootless.IsRootless() {
m.logger().Debug("Unable to setup add pids to cgroup: running rootless")
return nil
}

m.Lock()
defer m.Unlock()
return m.mgr.Apply(pid)
}

// Apply constraints
func (m *Manager) Apply() error {
if rootless.IsRootless() {
m.logger().Debug("Unable to apply constraints: running rootless")
return nil
}

cgroups, err := m.GetCgroups()
if err != nil {
return err
}

m.Lock()
defer m.Unlock()
return m.mgr.Set(&configs.Config{
Cgroups: cgroups,
})
}

func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
m.Lock()
defer m.Unlock()
return m.mgr.GetCgroups()
}

func (m *Manager) GetPaths() map[string]string {
m.Lock()
defer m.Unlock()
return m.mgr.GetPaths()
}

func (m *Manager) Destroy() error {
// cgroup can't be destroyed if it contains running processes
if err := m.moveToParent(); err != nil {
return fmt.Errorf("Could not move processes into parent cgroup: %v", err)
}

m.Lock()
defer m.Unlock()
return m.mgr.Destroy()
}
55 changes: 55 additions & 0 deletions virtcontainers/pkg/cgroups/manager_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Copyright (c) 2020 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//

package cgroups

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestEnableSystemdCgroup(t *testing.T) {
assert := assert.New(t)

orgSystemdCgroup := systemdCgroup
defer func() {
systemdCgroup = orgSystemdCgroup
}()

useSystemdCgroup := UseSystemdCgroup()
if systemdCgroup != nil {
assert.Equal(*systemdCgroup, useSystemdCgroup)
} else {
assert.False(useSystemdCgroup)
}

EnableSystemdCgroup()
assert.True(UseSystemdCgroup())
}

func TestNew(t *testing.T) {
assert := assert.New(t)
useSystemdCgroup := false
orgSystemdCgroup := systemdCgroup
defer func() {
systemdCgroup = orgSystemdCgroup
}()
systemdCgroup = &useSystemdCgroup

c := &Config{
Cgroups: nil,
CgroupPath: "",
}

mgr, err := New(c)
assert.NoError(err)
assert.NotNil(mgr.mgr)

useSystemdCgroup = true
mgr, err = New(c)
assert.Error(err)
assert.Nil(mgr)
}
Loading

0 comments on commit ea82922

Please sign in to comment.