Skip to content
This repository has been archived by the owner on May 12, 2021. It is now read-only.

Commit

Permalink
config: add option to control hotplug timeout of block devices
Browse files Browse the repository at this point in the history
This adds an option to the agent to control the hotplug timeout of block devices.
Retains the previous behaviour of defaulting to 3 seconds if not specified.
Can be increased when block device hot plugging is taking longer than expected.

Fixes #683

Signed-off-by: Alex Price <[email protected]>
  • Loading branch information
awprice committed Nov 1, 2019
1 parent e648ab2 commit 1ee8516
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 7 deletions.
15 changes: 14 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
* [Enable trace support](#enable-trace-support)
* [Enable debug console](#enable-debug-console)
* [`cpuset` cgroup details](#cpuset-cgroup-details)
* [Hotplug Timeout](#hotplug-timeout)

This project implements an agent called `kata-agent` that runs inside a virtual machine (VM).

Expand Down Expand Up @@ -64,5 +65,17 @@ CONNECT 1026

See the [cpuset cgroup documentation](documentation/features/cpuset.md).

## Hotplug Timeout

[1]: https://github.com/firecracker-microvm/firecracker/blob/master/docs/vsock.md
When hot plugging devices into the Kata VM, the agent will wait by default for 3 seconds
for the device to be plugged in and the corresponding add uevent for the device. If the timeout
is reached without the above happening, the hot plug action will fail.

The length of the timeout can be increased by specifying the `agent.hotplug_timeout` to the guest
kernel command line. For example, `agent.hotplug_timeout=10s` will increase the timeout to 10 seconds.
The value of the option is in the [Go duration format][2].

Any invalid values used for `agent.hotplug_timeout` will fall back to the default of 3 seconds.

[1]: https://github.com/firecracker-microvm/firecracker/blob/master/docs/vsock.md
[2]: https://golang.org/pkg/time/#ParseDuration
3 changes: 3 additions & 0 deletions agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,9 @@ var logsVSockPort = uint32(0)
// Specify a vsock port where debug console is attached.
var debugConsoleVSockPort = uint32(0)

// Timeout waiting for a device to be hotplugged
var hotplugTimeout = 3 * time.Second

// commType is used to denote the communication channel type used.
type commType int

Expand Down
11 changes: 11 additions & 0 deletions config.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"io/ioutil"
"strconv"
"strings"
"time"

"github.com/sirupsen/logrus"
"google.golang.org/grpc/codes"
Expand All @@ -25,6 +26,7 @@ const (
useVsockFlag = optionPrefix + "use_vsock"
debugConsoleFlag = optionPrefix + "debug_console"
debugConsoleVPortFlag = optionPrefix + "debug_console_vport"
hotplugTimeoutFlag = optionPrefix + "hotplug_timeout"
kernelCmdlineFile = "/proc/cmdline"
traceModeStatic = "static"
traceModeDynamic = "dynamic"
Expand Down Expand Up @@ -121,6 +123,15 @@ func (c *agentConfig) parseCmdlineOption(option string) error {
}
debugConsole = true
debugConsoleVSockPort = uint32(port)
case hotplugTimeoutFlag:
timeout, err := time.ParseDuration(split[valuePosition])
if err != nil {
return err
}
// Only use the provided timeout if a positive value is provided
if timeout > 0 {
hotplugTimeout = timeout
}
case traceModeFlag:
switch split[valuePosition] {
case traceTypeIsolated:
Expand Down
43 changes: 43 additions & 0 deletions config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"os"
"reflect"
"testing"
"time"

"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -408,3 +409,45 @@ func TestParseCmdlineOptionDebugConsoleVPort(t *testing.T) {
assert.Equal(debugConsoleVSockPort, d.expectedVPort)
}
}

func TestParseCmdlineOptionHotplugTimeout(t *testing.T) {
assert := assert.New(t)

a := &agentConfig{}

type testData struct {
option string
shouldErr bool
expectedHotplugTimeout time.Duration
}

data := []testData{
{"", false, 3 * time.Second},
{"hotpug_timout", false, 3 * time.Second},
{"hotplug_timeout", false, 3 * time.Second},
{"hotplug_timeout=1h", false, 3 * time.Second},
{"agnt.hotplug_timeout=1h", false, 3 * time.Second},
{"agent.hotplug_timeout=3h", false, 3 * time.Hour},
{"agent.hotplug_timeout=1s", false, 1 * time.Second},
{"agent.hotplug_timeout=0s", false, 3 * time.Second},
{"agent.hotplug_timeout=0", false, 3 * time.Second},
{"agent.hotplug_timeout=100ms", false, 100 * time.Millisecond},
{"agent.hotplug_timeout=-1", true, 3 * time.Second},
{"agent.hotplug_timeout=foobar", true, 3 * time.Second},
{"agent.hotplug_timeout=5.0", true, 3 * time.Second},
}

for i, d := range data {
// reset the hotplug timeout
hotplugTimeout = 3 * time.Second

err := a.parseCmdlineOption(d.option)
if d.shouldErr {
assert.Error(err)
} else {
assert.NoError(err)
}

assert.Equal(d.expectedHotplugTimeout, hotplugTimeout, "test %d (%+v)", i, d)
}
}
7 changes: 3 additions & 4 deletions device.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ var (
pciBusRescanFile = sysfsDir + "/bus/pci/rescan"
pciBusPathFormat = "%s/%s/pci_bus/"
systemDevPath = "/dev"
timeoutHotplug = 3
getSCSIDevPath = getSCSIDevPathImpl
getPCIDeviceName = getPCIDeviceNameImpl
getDevicePCIAddress = getDevicePCIAddressImpl
Expand Down Expand Up @@ -158,14 +157,14 @@ func getDeviceName(s *sandbox, devID string) (string, error) {
fieldLogger.Infof("Waiting on channel for device: %s notification", devID)
select {
case devName = <-notifyChan:
case <-time.After(time.Duration(timeoutHotplug) * time.Second):
case <-time.After(hotplugTimeout):
s.Lock()
delete(s.deviceWatchers, devID)
s.Unlock()

return "", grpcStatus.Errorf(codes.DeadlineExceeded,
"Timeout reached after %ds waiting for device %s",
timeoutHotplug, devID)
"Timeout reached after %s waiting for device %s",
hotplugTimeout, devID)
}
}

Expand Down
4 changes: 2 additions & 2 deletions device_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -647,11 +647,11 @@ func TestGetSCSIDevPath(t *testing.T) {
assert := assert.New(t)

savedFunc := scanSCSIBus
savedTimeout := timeoutHotplug
savedTimeout := hotplugTimeout

defer func() {
scanSCSIBus = savedFunc
timeoutHotplug = savedTimeout
hotplugTimeout = savedTimeout
}()

scanSCSIBus = func(_ string) error {
Expand Down

0 comments on commit 1ee8516

Please sign in to comment.