Skip to content
This repository has been archived by the owner on May 12, 2021. It is now read-only.

Commit

Permalink
signal: Switch to standard signal handling
Browse files Browse the repository at this point in the history
Change from using the golang "debug" package for providing backtraces to
using the Kata Project standard signal handling code:

- A trace is written to the agents logger if a fatal signal is received
  or an internal error is detected.

- For consistency with other components, it is possible to enable a
  coredump on fatal error. However, this is not desirable even with debug
  enabled so the agent will only attempt to dump core if the new developer
  mode (enabled by specifying `agent.devmode` on the guest kernel
  command-line) is enabled.

Signed-off-by: James O. D. Hunt <[email protected]>
  • Loading branch information
jodh-intel committed May 8, 2018
1 parent f8081ca commit a0880aa
Show file tree
Hide file tree
Showing 4 changed files with 219 additions and 10 deletions.
40 changes: 30 additions & 10 deletions agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
package main

import (
"errors"
"flag"
"fmt"
"io"
Expand All @@ -17,7 +18,6 @@ import (
"os/signal"
"path/filepath"
"runtime"
"runtime/debug"
"strings"
"sync"
"syscall"
Expand Down Expand Up @@ -97,6 +97,9 @@ var agentLog = logrus.WithFields(agentFields)
// version is the agent version. This variable is populated at build time.
var version = "unknown"

// if true, coredump when an internal error occurs or a fatal signal is received
var crashOnError = false

// This is the list of file descriptors we can properly close after the process
// has been started. When the new process is exec(), those file descriptors are
// duplicated and it is our responsibility to close them since we have opened
Expand Down Expand Up @@ -363,15 +366,26 @@ func (s *sandbox) signalHandlerLoop(sigCh chan os.Signal) {
for sig := range sigCh {
logger := agentLog.WithField("signal", sig)

switch sig {
case unix.SIGCHLD:
if sig == unix.SIGCHLD {
if err := s.subreaper.reap(); err != nil {
logger.WithError(err).Error("failed to reap")
return
continue
}
default:
logger.Info("ignoring unexpected signal")
}

nativeSignal, ok := sig.(syscall.Signal)
if !ok {
err := errors.New("unknown signal")
logger.WithError(err).Error("failed to handle signal")
continue
}

if fatalSignal(nativeSignal) {
logger.Error("received fatal signal")
die()
}

logger.Info("ignoring unexpected signal")
}
}

Expand All @@ -385,6 +399,10 @@ func (s *sandbox) setupSignalHandler() error {
sigCh := make(chan os.Signal, 512)
signal.Notify(sigCh, unix.SIGCHLD)

for _, sig := range handledSignals() {
signal.Notify(sigCh, sig)
}

go s.signalHandlerLoop(sigCh)

return nil
Expand Down Expand Up @@ -648,9 +666,6 @@ func initAgentAsInit() error {
}

func init() {
// Force full stacktrace on internal error
debug.SetTraceback("system")

if len(os.Args) > 1 && os.Args[1] == "init" {
runtime.GOMAXPROCS(1)
runtime.LockOSThread()
Expand All @@ -662,7 +677,7 @@ func init() {
}
}

func main() {
func realMain() {
var err error
var showVersion bool

Expand Down Expand Up @@ -730,3 +745,8 @@ func main() {

s.wg.Wait()
}

func main() {
defer handlePanic()
realMain()
}
3 changes: 3 additions & 0 deletions config.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
const (
optionPrefix = "agent."
logLevelFlag = optionPrefix + "log"
devModeFlag = optionPrefix + "devmode"
kernelCmdlineFile = "/proc/cmdline"
)

Expand Down Expand Up @@ -83,6 +84,8 @@ func (c *agentConfig) parseCmdlineOption(option string) error {
return err
}
c.logLevel = level
case devModeFlag:
crashOnError = true
default:
if strings.HasPrefix(split[optionPosition], optionPrefix) {
return grpcStatus.Errorf(codes.NotFound, "Unknown option %s", split[optionPosition])
Expand Down
87 changes: 87 additions & 0 deletions signals.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Copyright 2018 Intel Corporation.
//
// SPDX-License-Identifier: Apache-2.0
//

package main

import (
"bytes"
"fmt"
"os"
"os/signal"
"runtime/pprof"
"strings"
"syscall"
)

// List of handled signals.
//
// The value is true if receiving the signal should be fatal.
var handledSignalsMap = map[syscall.Signal]bool{
syscall.SIGABRT: true,
syscall.SIGBUS: true,
syscall.SIGILL: true,
syscall.SIGQUIT: true,
syscall.SIGSEGV: true,
syscall.SIGSTKFLT: true,
syscall.SIGSYS: true,
syscall.SIGTRAP: true,
}

func handlePanic() {
r := recover()

if r != nil {
msg := fmt.Sprintf("%s", r)
agentLog.WithField("panic", msg).Error("fatal error")

die()
}
}

func backtrace() {
profiles := pprof.Profiles()

buf := &bytes.Buffer{}

for _, p := range profiles {
// The magic number requests a full stacktrace. See
// https://golang.org/pkg/runtime/pprof/#Profile.WriteTo.
pprof.Lookup(p.Name()).WriteTo(buf, 2)
}

for _, line := range strings.Split(buf.String(), "\n") {
agentLog.Error(line)
}
}

func fatalSignal(sig syscall.Signal) bool {
s, exists := handledSignalsMap[sig]
if !exists {
return false
}

return s
}

func handledSignals() []syscall.Signal {
var signals []syscall.Signal

for sig := range handledSignalsMap {
signals = append(signals, sig)
}

return signals
}

func die() {
backtrace()

if crashOnError {
signal.Reset(syscall.SIGABRT)
syscall.Kill(0, syscall.SIGABRT)
}

os.Exit(1)
}
99 changes: 99 additions & 0 deletions signals_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Copyright (c) 2018 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//

package main

import (
"bytes"
"reflect"
goruntime "runtime"
"sort"
"strings"
"syscall"
"testing"

"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
)

func TestSignalHandledSignalsMap(t *testing.T) {
assert := assert.New(t)

for sig, fatal := range handledSignalsMap {
result := fatalSignal(sig)
if fatal {
assert.True(result)
} else {
assert.False(result)
}
}
}

func TestSignalHandledSignals(t *testing.T) {
assert := assert.New(t)

var expected []syscall.Signal

for sig := range handledSignalsMap {
expected = append(expected, sig)
}

got := handledSignals()

sort.Slice(expected, func(i, j int) bool {
return int(expected[i]) < int(expected[j])
})

sort.Slice(got, func(i, j int) bool {
return int(got[i]) < int(got[j])
})

assert.True(reflect.DeepEqual(expected, got))
}

func TestSignalFatalSignalInvalidSignal(t *testing.T) {
assert := assert.New(t)

sig := syscall.SIGXCPU

result := fatalSignal(sig)
assert.False(result)
}

func TestSignalBacktrace(t *testing.T) {
assert := assert.New(t)

// create buffer to save logger output
buf := &bytes.Buffer{}

savedLog := agentLog
defer func() {
agentLog = savedLog
}()

agentLog = logrus.WithField("test-agent-logger", true)

agentLog.Logger.Formatter = &logrus.TextFormatter{
DisableColors: true,
}

// capture output to buffer
agentLog.Logger.Out = buf

// determine name of *this* function
pc := make([]uintptr, 1)
goruntime.Callers(1, pc)
fn := goruntime.FuncForPC(pc[0])
name := fn.Name()

backtrace()

b := buf.String()

// very basic tests to check if a backtrace was produced
assert.True(strings.Contains(b, "contention:"))
assert.True(strings.Contains(b, `level=error`))
assert.True(strings.Contains(b, name))
}

0 comments on commit a0880aa

Please sign in to comment.