From 4d470e513b31fb02bcfc4666f7976022a87e715b Mon Sep 17 00:00:00 2001 From: Archana Shinde Date: Tue, 29 May 2018 11:02:27 -0700 Subject: [PATCH] shm: Create shared /dev/shm This commit checks the size of "/dev/shm" for the sandbox container which is then used to create the shared memory inside the guest. kata agent then uses this size to set up a sandbox level ephemeral storage for shm. The containers then simply bind mount this sandbox level shm. With this, we will now be able to support docker --shm-size option as well have a shared shm within containers in a pod, since they are supposed to be in the same IPC namespace. Fixes #356 Signed-off-by: Archana Shinde --- virtcontainers/kata_agent.go | 45 ++++++++++++++++--- virtcontainers/kata_agent_test.go | 39 ++++++++++++++-- virtcontainers/mount.go | 4 ++ virtcontainers/pkg/oci/utils.go | 34 ++++++++++++++ virtcontainers/pkg/oci/utils_test.go | 66 ++++++++++++++++++++++++++++ virtcontainers/sandbox.go | 5 +++ 6 files changed, 185 insertions(+), 8 deletions(-) diff --git a/virtcontainers/kata_agent.go b/virtcontainers/kata_agent.go index 6ef55bbeb3..b49efae9dc 100644 --- a/virtcontainers/kata_agent.go +++ b/virtcontainers/kata_agent.go @@ -42,12 +42,15 @@ var ( kataHostSharedDir = "/run/kata-containers/shared/sandboxes/" kataGuestSharedDir = "/run/kata-containers/shared/containers/" mountGuest9pTag = "kataShared" + kataGuestSandboxDir = "/run/kata-containers/sandbox/" type9pFs = "9p" vsockSocketScheme = "vsock" kata9pDevType = "9p" kataBlkDevType = "blk" kataSCSIDevType = "scsi" sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L", "nodev"} + shmDir = "shm" + kataEphemeralDevType = "ephemeral" ) // KataAgentConfig is a structure storing information needed @@ -505,9 +508,27 @@ func (k *kataAgent) startSandbox(sandbox *Sandbox) error { Options: sharedDir9pOptions, } + storages := []*grpc.Storage{sharedVolume} + + if sandbox.shmSize > 0 { + path := filepath.Join(kataGuestSandboxDir, shmDir) + shmSizeOption := fmt.Sprintf("size=%d", sandbox.shmSize) + + shmStorage := &grpc.Storage{ + Driver: kataEphemeralDevType, + MountPoint: path, + Source: "shm", + Fstype: "tmpfs", + Options: []string{"noexec", "nosuid", "nodev", "mode=1777", shmSizeOption}, + } + + storages = append(storages, shmStorage) + } + req := &grpc.CreateSandboxRequest{ - Hostname: hostname, - Storages: []*grpc.Storage{sharedVolume}, + Hostname: hostname, + Storages: storages, + SandboxPidns: false, } _, err = k.sendReq(req) @@ -611,13 +632,25 @@ func constraintGRPCSpec(grpcSpec *grpc.Spec) { } } grpcSpec.Linux.Namespaces = tmpNamespaces +} - // Handle /dev/shm mount +func (k *kataAgent) handleShm(grpcSpec *grpc.Spec, sandbox *Sandbox) { for idx, mnt := range grpcSpec.Mounts { - if mnt.Destination == "/dev/shm" { + if mnt.Destination != "/dev/shm" { + continue + } + + if sandbox.shmSize > 0 { + grpcSpec.Mounts[idx].Type = "bind" + grpcSpec.Mounts[idx].Options = []string{"rbind"} + grpcSpec.Mounts[idx].Source = filepath.Join(kataGuestSandboxDir, shmDir) + k.Logger().WithField("shm-size", sandbox.shmSize).Info("Using sandbox shm") + } else { + sizeOption := fmt.Sprintf("size=%d", DefaultShmSize) grpcSpec.Mounts[idx].Type = "tmpfs" grpcSpec.Mounts[idx].Source = "shm" - grpcSpec.Mounts[idx].Options = []string{"noexec", "nosuid", "nodev", "mode=1777", "size=65536k"} + grpcSpec.Mounts[idx].Options = []string{"noexec", "nosuid", "nodev", "mode=1777", sizeOption} + k.Logger().WithField("shm-size", sizeOption).Info("Setting up a separate shm for container") } } } @@ -785,6 +818,8 @@ func (k *kataAgent) createContainer(sandbox *Sandbox, c *Container) (p *Process, // irrelevant information to the agent. constraintGRPCSpec(grpcSpec) + k.handleShm(grpcSpec, sandbox) + req := &grpc.CreateContainerRequest{ ContainerId: c.id, ExecId: c.id, diff --git a/virtcontainers/kata_agent_test.go b/virtcontainers/kata_agent_test.go index 0140ebcae8..92846e8919 100644 --- a/virtcontainers/kata_agent_test.go +++ b/virtcontainers/kata_agent_test.go @@ -10,6 +10,7 @@ import ( "io/ioutil" "net" "os" + "path/filepath" "reflect" "testing" @@ -458,10 +459,42 @@ func TestConstraintGRPCSpec(t *testing.T) { // check mounts assert.Len(g.Mounts, 1) +} + +func TestHandleShm(t *testing.T) { + assert := assert.New(t) + k := kataAgent{} + sandbox := &Sandbox{ + shmSize: 8192, + } + + g := &pb.Spec{ + Hooks: &pb.Hooks{}, + Mounts: []pb.Mount{ + {Destination: "/dev/shm"}, + }, + } + + k.handleShm(g, sandbox) + + assert.Len(g.Mounts, 1) assert.NotEmpty(g.Mounts[0].Destination) - assert.NotEmpty(g.Mounts[0].Type) - assert.NotEmpty(g.Mounts[0].Source) - assert.NotEmpty(g.Mounts[0].Options) + assert.Equal(g.Mounts[0].Destination, "/dev/shm") + assert.Equal(g.Mounts[0].Type, "bind") + assert.NotEmpty(g.Mounts[0].Source, filepath.Join(kataGuestSharedDir, shmDir)) + assert.Equal(g.Mounts[0].Options, []string{"rbind"}) + + sandbox.shmSize = 0 + k.handleShm(g, sandbox) + + assert.Len(g.Mounts, 1) + assert.NotEmpty(g.Mounts[0].Destination) + assert.Equal(g.Mounts[0].Destination, "/dev/shm") + assert.Equal(g.Mounts[0].Type, "tmpfs") + assert.Equal(g.Mounts[0].Source, "shm") + + sizeOption := fmt.Sprintf("size=%d", DefaultShmSize) + assert.Equal(g.Mounts[0].Options, []string{"noexec", "nosuid", "nodev", "mode=1777", sizeOption}) } func testIsPidNamespacePresent(grpcSpec *pb.Spec) bool { diff --git a/virtcontainers/mount.go b/virtcontainers/mount.go index e6d5d3cba9..c68d9093ae 100644 --- a/virtcontainers/mount.go +++ b/virtcontainers/mount.go @@ -18,6 +18,10 @@ import ( "github.com/kata-containers/runtime/virtcontainers/device/drivers" ) +// DefaultShmSize is the default shm size to be used in case host +// IPC is used. +const DefaultShmSize = 65536 * 1024 + var rootfsDir = "rootfs" var systemMountPrefixes = []string{"/proc", "/sys"} diff --git a/virtcontainers/pkg/oci/utils.go b/virtcontainers/pkg/oci/utils.go index de3dcdd30b..cc68cfc214 100644 --- a/virtcontainers/pkg/oci/utils.go +++ b/virtcontainers/pkg/oci/utils.go @@ -13,6 +13,7 @@ import ( "path/filepath" "strconv" "strings" + "syscall" criContainerdAnnotations "github.com/containerd/cri-containerd/pkg/annotations" crioAnnotations "github.com/kubernetes-incubator/cri-o/pkg/annotations" @@ -481,6 +482,11 @@ func SandboxConfig(ocispec CompatOCISpec, runtime RuntimeConfig, bundlePath, cid return vc.SandboxConfig{}, err } + shmSize, err := getShmSize(containerConfig) + if err != nil { + return vc.SandboxConfig{}, err + } + networkConfig, err := networkConfig(ocispec, runtime) if err != nil { return vc.SandboxConfig{}, err @@ -526,6 +532,8 @@ func SandboxConfig(ocispec CompatOCISpec, runtime RuntimeConfig, bundlePath, cid vcAnnotations.ConfigJSONKey: string(ociSpecJSON), vcAnnotations.BundlePathKey: bundlePath, }, + + ShmSize: shmSize, } addAssetAnnotations(ocispec, &sandboxConfig) @@ -610,6 +618,32 @@ func ContainerConfig(ocispec CompatOCISpec, bundlePath, cid, console string, det return containerConfig, nil } +func getShmSize(c vc.ContainerConfig) (uint64, error) { + var shmSize uint64 + + for _, m := range c.Mounts { + if m.Destination != "/dev/shm" { + continue + } + + shmSize = vc.DefaultShmSize + + if m.Type == "bind" && m.Source != "/dev/shm" { + var s syscall.Statfs_t + + if err := syscall.Statfs(m.Source, &s); err != nil { + return 0, err + } + shmSize = uint64(s.Bsize) * s.Blocks + } + break + } + + ociLog.Infof("shm-size detected: %d", shmSize) + + return shmSize, nil +} + // StatusToOCIState translates a virtcontainers container status into an OCI state. func StatusToOCIState(status vc.ContainerStatus) spec.State { return spec.State{ diff --git a/virtcontainers/pkg/oci/utils_test.go b/virtcontainers/pkg/oci/utils_test.go index be3dffc95f..e007986d26 100644 --- a/virtcontainers/pkg/oci/utils_test.go +++ b/virtcontainers/pkg/oci/utils_test.go @@ -13,11 +13,13 @@ import ( "path" "path/filepath" "reflect" + "strconv" "testing" "github.com/kubernetes-incubator/cri-o/pkg/annotations" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/stretchr/testify/assert" + "golang.org/x/sys/unix" vc "github.com/kata-containers/runtime/virtcontainers" "github.com/kata-containers/runtime/virtcontainers/device/config" @@ -839,6 +841,70 @@ func TestCompatOCISpecWithStruct(t *testing.T) { assert.Nil(t, err, "This test should not fail") } +func TestGetShmSize(t *testing.T) { + containerConfig := vc.ContainerConfig{ + Mounts: []vc.Mount{}, + } + + shmSize, err := getShmSize(containerConfig) + assert.Nil(t, err) + assert.Equal(t, shmSize, uint64(0)) + + m := vc.Mount{ + Source: "/dev/shm", + Destination: "/dev/shm", + Type: "tmpfs", + Options: nil, + } + + containerConfig.Mounts = append(containerConfig.Mounts, m) + shmSize, err = getShmSize(containerConfig) + assert.Nil(t, err) + assert.Equal(t, shmSize, uint64(vc.DefaultShmSize)) + + containerConfig.Mounts[0].Source = "/var/run/shared/shm" + containerConfig.Mounts[0].Type = "bind" + _, err = getShmSize(containerConfig) + assert.NotNil(t, err) +} + +func TestGetShmSizeBindMounted(t *testing.T) { + if os.Geteuid() != 0 { + t.Skip("Test disabled as requires root privileges") + } + + dir, err := ioutil.TempDir("", "") + assert.Nil(t, err) + defer os.RemoveAll(dir) + + shmPath := filepath.Join(dir, "shm") + err = os.Mkdir(shmPath, 0700) + assert.Nil(t, err) + + size := 8192 + + shmOptions := "mode=1777,size=" + strconv.Itoa(size) + err = unix.Mount("shm", shmPath, "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, shmOptions) + assert.Nil(t, err) + + defer unix.Unmount(shmPath, 0) + + containerConfig := vc.ContainerConfig{ + Mounts: []vc.Mount{ + { + Source: shmPath, + Destination: "/dev/shm", + Type: "bind", + Options: nil, + }, + }, + } + + shmSize, err := getShmSize(containerConfig) + assert.Nil(t, err) + assert.Equal(t, shmSize, uint64(size)) +} + func TestMain(m *testing.M) { /* Create temp bundle directory if necessary */ err := os.MkdirAll(tempBundlePath, dirMode) diff --git a/virtcontainers/sandbox.go b/virtcontainers/sandbox.go index 7b7459e5ac..bf17fc5d9c 100644 --- a/virtcontainers/sandbox.go +++ b/virtcontainers/sandbox.go @@ -356,6 +356,8 @@ type SandboxConfig struct { // Annotations keys must be unique strings and must be name-spaced // with e.g. reverse domain notation (org.clearlinux.key). Annotations map[string]string + + ShmSize uint64 } // valid checks that the sandbox configuration is valid. @@ -459,6 +461,8 @@ type Sandbox struct { annotationsLock *sync.RWMutex wg *sync.WaitGroup + + shmSize uint64 } // ID returns the sandbox identifier string. @@ -738,6 +742,7 @@ func newSandbox(sandboxConfig SandboxConfig) (*Sandbox, error) { state: State{}, annotationsLock: &sync.RWMutex{}, wg: &sync.WaitGroup{}, + shmSize: sandboxConfig.ShmSize, } if err = globalSandboxList.addSandbox(s); err != nil {