diff --git a/virtcontainers/container.go b/virtcontainers/container.go index 37117d7e38..4dad304171 100644 --- a/virtcontainers/container.go +++ b/virtcontainers/container.go @@ -499,7 +499,7 @@ func (c *Container) shareFiles(m Mount, idx int, hostSharedDir, hostMountDir, gu } // mountSharedDirMounts handles bind-mounts by bindmounting to the host shared -// directory which is mounted through 9pfs in the VM. +// directory which is mounted through virtiofs/9pfs in the VM. // It also updates the container mount list with the HostPath info, and store // container mounts to the storage. This way, we will have the HostPath info // available when we will need to unmount those mounts. @@ -522,6 +522,18 @@ func (c *Container) mountSharedDirMounts(hostSharedDir, hostMountDir, guestShare continue } + // Check if mount is a block device file. If it is, the block device will be attached to the host + // instead of passing this as a shared mount: + if len(m.BlockDeviceID) > 0 { + // Attach this block device, all other devices passed in the config have been attached at this point + if err = c.sandbox.devManager.AttachDevice(m.BlockDeviceID, c.sandbox); err != nil { + return nil, nil, err + } + devicesToDetach = append(devicesToDetach, m.BlockDeviceID) + continue + } + + // For non-block based mounts, we are only interested in bind mounts if m.Type != "bind" { continue } @@ -533,17 +545,6 @@ func (c *Container) mountSharedDirMounts(hostSharedDir, hostMountDir, guestShare continue } - // Check if mount is a block device file. If it is, the block device will be attached to the host - // instead of passing this as a shared mount. - if len(m.BlockDeviceID) > 0 { - // Attach this block device, all other devices passed in the config have been attached at this point - if err = c.sandbox.devManager.AttachDevice(m.BlockDeviceID, c.sandbox); err != nil { - return nil, nil, err - } - devicesToDetach = append(devicesToDetach, m.BlockDeviceID) - continue - } - // Ignore /dev, directories and all other device files. We handle // only regular files in /dev. It does not make sense to pass the host // device nodes to the guest. @@ -639,6 +640,9 @@ func filterDevices(c *Container, devices []ContainerDevice) (ret []ContainerDevi return } +// Add any mount based block devices to the device manager and save the +// device ID for the particular mount. This'll occur when the mountpoint source +// is a block device. func (c *Container) createBlockDevices() error { if !c.checkBlockDeviceSupport() { c.Logger().Warn("Block device not supported") @@ -647,13 +651,18 @@ func (c *Container) createBlockDevices() error { // iterate all mounts and create block device if it's block based. for i, m := range c.mounts { - if len(m.BlockDeviceID) > 0 || m.Type != "bind" { + if len(m.BlockDeviceID) > 0 { // Non-empty m.BlockDeviceID indicates there's already one device // associated with the mount,so no need to create a new device for it // and we only create block device for bind mount continue } + if m.Type != "bind" { + // We only handle for bind-mounts + continue + } + var stat unix.Stat_t if err := unix.Stat(m.Source, &stat); err != nil { return fmt.Errorf("stat %q failed: %v", m.Source, err) @@ -681,7 +690,6 @@ func (c *Container) createBlockDevices() error { if err == nil && di != nil { b, err := c.sandbox.devManager.NewDevice(*di) - if err != nil { // Do not return an error, try to create // devices for other mounts @@ -750,11 +758,12 @@ func newContainer(sandbox *Sandbox, contConfig *ContainerConfig) (*Container, er } } - // Go to next step for first created container + // If mounts are block devices, add to devmanager if err := c.createMounts(); err != nil { return nil, err } + // Add container's devices to sandbox's device-manager if err := c.createDevices(contConfig); err != nil { return nil, err } @@ -792,11 +801,7 @@ func (c *Container) createMounts() error { } // Create block devices for newly created container - if err := c.createBlockDevices(); err != nil { - return err - } - - return nil + return c.createBlockDevices() } func (c *Container) createDevices(contConfig *ContainerConfig) error { @@ -878,6 +883,7 @@ func (c *Container) create() (err error) { }() if c.checkBlockDeviceSupport() { + // If the rootfs is backed by a block device, go ahead and hotplug it to the guest if err = c.hotplugDrive(); err != nil { return } @@ -1315,11 +1321,14 @@ func (c *Container) resume() error { return c.setContainerState(types.StateRunning) } +// hotplugDrive will attempt to hotplug the container rootfs if it is backed by a +// block device func (c *Container) hotplugDrive() error { var dev device var err error - // container rootfs is blockdevice backed and isn't mounted + // Check to see if the rootfs is an umounted block device (source) or if the + // mount (target) is backed by a block device: if !c.rootFs.Mounted { dev, err = getDeviceForPath(c.rootFs.Source) // there is no "rootfs" dir on block device backed rootfs @@ -1381,6 +1390,7 @@ func (c *Container) hotplugDrive() error { return c.setStateFstype(fsType) } +// plugDevice will attach the rootfs if blockdevice is supported (this is rootfs specific) func (c *Container) plugDevice(devicePath string) error { var stat unix.Stat_t if err := unix.Stat(devicePath, &stat); err != nil { diff --git a/virtcontainers/kata_agent.go b/virtcontainers/kata_agent.go index a45b630b2d..bc9e2cc2a5 100644 --- a/virtcontainers/kata_agent.go +++ b/virtcontainers/kata_agent.go @@ -1333,7 +1333,6 @@ func (k *kataAgent) buildContainerRootfs(sandbox *Sandbox, c *Container, rootPat } case sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioSCSI: - rootfs.Driver = kataSCSIDevType rootfs.Source = blockDrive.SCSIAddr default: @@ -1348,8 +1347,8 @@ func (k *kataAgent) buildContainerRootfs(sandbox *Sandbox, c *Container, rootPat } // Ensure container mount destination exists - // TODO: remove dependency on shared fs path. shared fs is just one kind of storage sources. - // we should not always use shared fs path for all kinds of storage. Stead, all storage + // TODO: remove dependency on shared fs path. shared fs is just one kind of storage source. + // we should not always use shared fs path for all kinds of storage. Instead, all storage // should be bind mounted to a tmpfs path for containers to use. if err := os.MkdirAll(filepath.Join(getMountPath(c.sandbox.id), c.id, c.rootfsSuffix), DirMode); err != nil { return nil, err @@ -1357,13 +1356,10 @@ func (k *kataAgent) buildContainerRootfs(sandbox *Sandbox, c *Container, rootPat return rootfs, nil } - // This is not a block based device rootfs. - // We are going to bind mount it into the 9pfs - // shared drive between the host and the guest. - // With 9pfs we don't need to ask the agent to - // mount the rootfs as the shared directory - // (kataGuestSharedDir) is already mounted in the - // guest. We only need to mount the rootfs from + // This is not a block based device rootfs. We are going to bind mount it into the shared drive + // between the host and the guest. + // With virtiofs/9pfs we don't need to ask the agent to mount the rootfs as the shared directory + // (kataGuestSharedDir) is already mounted in the guest. We only need to mount the rootfs from // the host and it will show up in the guest. if err := bindMountContainerRootfs(k.ctx, getMountPath(sandbox.id), c.id, c.rootFs.Target, false); err != nil { return nil, err @@ -1403,9 +1399,14 @@ func (k *kataAgent) createContainer(sandbox *Sandbox, c *Container) (p *Process, } }() + // setup rootfs -- if its block based, we'll receive a non-nil storage object representing + // the block device for the rootfs, which us utilized for mounting in the guest. This'll be handled + // already for non-block based rootfs if rootfs, err = k.buildContainerRootfs(sandbox, c, rootPathParent); err != nil { return nil, err - } else if rootfs != nil { + } + + if rootfs != nil { // Add rootfs to the list of container storage. // We only need to do this for block based rootfs, as we // want the agent to mount it into the right location @@ -1454,6 +1455,7 @@ func (k *kataAgent) createContainer(sandbox *Sandbox, c *Container) (p *Process, if err != nil { return nil, err } + if err := k.replaceOCIMountsForStorages(ociSpec, volumeStorages); err != nil { return nil, err } @@ -1580,7 +1582,7 @@ func (k *kataAgent) handleLocalStorage(mounts []specs.Mount, sandboxID string, r // handleDeviceBlockVolume handles volume that is block device file // and DeviceBlock type. -func (k *kataAgent) handleDeviceBlockVolume(c *Container, device api.Device) (*grpc.Storage, error) { +func (k *kataAgent) handleDeviceBlockVolume(c *Container, m Mount, device api.Device) (*grpc.Storage, error) { vol := &grpc.Storage{} blockDrive, ok := device.GetDeviceInfo().(*config.BlockDrive) @@ -1615,12 +1617,22 @@ func (k *kataAgent) handleDeviceBlockVolume(c *Container, device api.Device) (*g return nil, fmt.Errorf("Unknown block device driver: %s", c.sandbox.config.HypervisorConfig.BlockDeviceDriver) } + vol.MountPoint = m.Destination + + // If no explicit FS Type or Options are being set, then let's use what is provided for the particular mount: + if vol.Fstype == "" { + vol.Fstype = m.Type + } + if len(vol.Options) == 0 { + vol.Options = m.Options + } + return vol, nil } // handleVhostUserBlkVolume handles volume that is block device file // and VhostUserBlk type. -func (k *kataAgent) handleVhostUserBlkVolume(c *Container, device api.Device) (*grpc.Storage, error) { +func (k *kataAgent) handleVhostUserBlkVolume(c *Container, m Mount, device api.Device) (*grpc.Storage, error) { vol := &grpc.Storage{} d, ok := device.GetDeviceInfo().(*config.VhostUserDeviceAttrs) @@ -1631,6 +1643,9 @@ func (k *kataAgent) handleVhostUserBlkVolume(c *Container, device api.Device) (* vol.Driver = kataBlkDevType vol.Source = d.PCIAddr + vol.Fstype = "bind" + vol.Options = []string{"bind"} + vol.MountPoint = m.Destination return vol, nil } @@ -1663,9 +1678,9 @@ func (k *kataAgent) handleBlockVolumes(c *Container) ([]*grpc.Storage, error) { var err error switch device.DeviceType() { case config.DeviceBlock: - vol, err = k.handleDeviceBlockVolume(c, device) + vol, err = k.handleDeviceBlockVolume(c, m, device) case config.VhostUserBlk: - vol, err = k.handleVhostUserBlkVolume(c, device) + vol, err = k.handleVhostUserBlkVolume(c, m, device) default: k.Logger().Error("Unknown device type") continue @@ -1675,14 +1690,6 @@ func (k *kataAgent) handleBlockVolumes(c *Container) ([]*grpc.Storage, error) { return nil, err } - vol.MountPoint = m.Destination - if vol.Fstype == "" { - vol.Fstype = "bind" - } - if len(vol.Options) == 0 { - vol.Options = []string{"bind"} - } - volumeStorages = append(volumeStorages, vol) } diff --git a/virtcontainers/kata_agent_test.go b/virtcontainers/kata_agent_test.go index 8e8f8da10c..686954900a 100644 --- a/virtcontainers/kata_agent_test.go +++ b/virtcontainers/kata_agent_test.go @@ -413,6 +413,7 @@ func TestHandleDeviceBlockVolume(t *testing.T) { tests := []struct { BlockDeviceDriver string + inputMount Mount inputDev *drivers.BlockDevice resultVol *pb.Storage }{ @@ -424,6 +425,7 @@ func TestHandleDeviceBlockVolume(t *testing.T) { Format: testBlkDriveFormat, }, }, + inputMount: Mount{}, resultVol: &pb.Storage{ Driver: kataNvdimmDevType, Source: fmt.Sprintf("/dev/pmem%s", testNvdimmID), @@ -433,18 +435,25 @@ func TestHandleDeviceBlockVolume(t *testing.T) { }, { BlockDeviceDriver: config.VirtioBlockCCW, + inputMount: Mount{ + Type: "bind", + Options: []string{"ro"}, + }, inputDev: &drivers.BlockDevice{ BlockDrive: &config.BlockDrive{ DevNo: testDevNo, }, }, resultVol: &pb.Storage{ - Driver: kataBlkCCWDevType, - Source: testDevNo, + Driver: kataBlkCCWDevType, + Source: testDevNo, + Fstype: "bind", + Options: []string{"ro"}, }, }, { BlockDeviceDriver: config.VirtioBlock, + inputMount: Mount{}, inputDev: &drivers.BlockDevice{ BlockDrive: &config.BlockDrive{ PCIAddr: testPCIAddr, @@ -505,7 +514,7 @@ func TestHandleDeviceBlockVolume(t *testing.T) { }, } - vol, _ := k.handleDeviceBlockVolume(c, test.inputDev) + vol, _ := k.handleDeviceBlockVolume(c, test.inputMount, test.inputDev) assert.True(t, reflect.DeepEqual(vol, test.resultVol), "Volume didn't match: got %+v, expecting %+v", vol, test.resultVol) @@ -521,22 +530,26 @@ func TestHandleBlockVolume(t *testing.T) { containers := map[string]*Container{} containers[c.id] = c - // Create a VhostUserBlk device and a DeviceBlock device + // Create a devices for VhostUserBlk, standard DeviceBlock and direct assigned Block device vDevID := "MockVhostUserBlk" bDevID := "MockDeviceBlock" + dDevID := "MockDeviceBlockDirect" vDestination := "/VhostUserBlk/destination" bDestination := "/DeviceBlock/destination" + dDestination := "/DeviceDirectBlock/destination" vPCIAddr := "0001:01" bPCIAddr := "0002:01" - + dPCIAddr := "0003:01" vDev := drivers.NewVhostUserBlkDevice(&config.DeviceInfo{ID: vDevID}) bDev := drivers.NewBlockDevice(&config.DeviceInfo{ID: bDevID}) + dDev := drivers.NewBlockDevice(&config.DeviceInfo{ID: dDevID}) vDev.VhostUserDeviceAttrs = &config.VhostUserDeviceAttrs{PCIAddr: vPCIAddr} bDev.BlockDrive = &config.BlockDrive{PCIAddr: bPCIAddr} + dDev.BlockDrive = &config.BlockDrive{PCIAddr: dPCIAddr} var devices []api.Device - devices = append(devices, vDev, bDev) + devices = append(devices, vDev, bDev, dDev) // Create a VhostUserBlk mount and a DeviceBlock mount var mounts []Mount @@ -547,8 +560,16 @@ func TestHandleBlockVolume(t *testing.T) { bMount := Mount{ BlockDeviceID: bDevID, Destination: bDestination, + Type: "bind", + Options: []string{"bind"}, + } + dMount := Mount{ + BlockDeviceID: dDevID, + Destination: dDestination, + Type: "ext4", + Options: []string{"ro"}, } - mounts = append(mounts, vMount, bMount) + mounts = append(mounts, vMount, bMount, dMount) tmpDir := "/vhost/user/dir" dm := manager.NewDeviceManager(manager.VirtioBlock, true, tmpDir, devices) @@ -583,9 +604,17 @@ func TestHandleBlockVolume(t *testing.T) { Driver: kataBlkDevType, Source: bPCIAddr, } + dStorage := &pb.Storage{ + MountPoint: dDestination, + Fstype: "ext4", + Options: []string{"ro"}, + Driver: kataBlkDevType, + Source: dPCIAddr, + } assert.Equal(t, vStorage, volumeStorages[0], "Error while handle VhostUserBlk type block volume") assert.Equal(t, bStorage, volumeStorages[1], "Error while handle BlockDevice type block volume") + assert.Equal(t, dStorage, volumeStorages[2], "Error while handle direct BlockDevice type block volume") } func TestAppendDevicesEmptyContainerDeviceList(t *testing.T) { diff --git a/virtcontainers/qemu.go b/virtcontainers/qemu.go index 2ac8cec41b..dbc6b3762b 100644 --- a/virtcontainers/qemu.go +++ b/virtcontainers/qemu.go @@ -1105,9 +1105,9 @@ func (q *qemu) hotplugAddBlockDevice(drive *config.BlockDrive, op operation, dev } if q.config.BlockDeviceCacheSet { - err = q.qmpMonitorCh.qmp.ExecuteBlockdevAddWithCache(q.qmpMonitorCh.ctx, drive.File, drive.ID, q.config.BlockDeviceCacheDirect, q.config.BlockDeviceCacheNoflush) + err = q.qmpMonitorCh.qmp.ExecuteBlockdevAddWithCache(q.qmpMonitorCh.ctx, drive.File, drive.ID, q.config.BlockDeviceCacheDirect, q.config.BlockDeviceCacheNoflush, false) } else { - err = q.qmpMonitorCh.qmp.ExecuteBlockdevAdd(q.qmpMonitorCh.ctx, drive.File, drive.ID) + err = q.qmpMonitorCh.qmp.ExecuteBlockdevAdd(q.qmpMonitorCh.ctx, drive.File, drive.ID, false) } if err != nil { return err diff --git a/virtcontainers/sandbox.go b/virtcontainers/sandbox.go index dcc1894dd5..220cb086f9 100644 --- a/virtcontainers/sandbox.go +++ b/virtcontainers/sandbox.go @@ -1148,13 +1148,13 @@ func (s *Sandbox) fetchContainers() error { // This should be called only when the sandbox is already created. // It will add new container config to sandbox.config.Containers func (s *Sandbox) CreateContainer(contConfig ContainerConfig) (VCContainer, error) { - // Create the container. + // Create the container object, add devices to the sandbox's device-manager: c, err := newContainer(s, &contConfig) if err != nil { return nil, err } - // Update sandbox config. + // Update sandbox config to include the new container's config s.config.Containers = append(s.config.Containers, contConfig) defer func() { @@ -1170,6 +1170,7 @@ func (s *Sandbox) CreateContainer(contConfig ContainerConfig) (VCContainer, erro } }() + // create and start the container err = c.create() if err != nil { return nil, err