Skip to content

Commit

Permalink
agent: add grpc endpoint to retrieve oom events
Browse files Browse the repository at this point in the history
This commit adds a new GRPC endpoint to retrieve OOM events
from the running containers. This uses a single unbuffered channel
to collect OOM events from multiple OOM event channels. The GRPC endpoint
will block waiting for new OOM events.

fixes kata-containers#751

Signed-off-by: Alex Price <[email protected]>
  • Loading branch information
awprice committed Mar 13, 2020
1 parent d40e3d0 commit 91cb641
Show file tree
Hide file tree
Showing 7 changed files with 532 additions and 180 deletions.
15 changes: 15 additions & 0 deletions agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ type sandbox struct {
sandboxPidNs bool
storages map[string]*sandboxStorage
stopServer chan struct{}
oomEvents chan string
}

var agentFields = logrus.Fields{
Expand Down Expand Up @@ -787,6 +788,19 @@ func (s *sandbox) listenToUdevEvents() {
}
}

func (s *sandbox) runOOMEventMonitor(ch <-chan struct{}, containerID string) {
go func() {
for {
_, ok := <-ch
if !ok {
return
}
agentLog.WithField("container_id", containerID).Info("Received OOM event")
s.oomEvents <- containerID
}
}()
}

// This loop is meant to be run inside a separate Go routine.
func (s *sandbox) signalHandlerLoop(sigCh chan os.Signal, errCh chan error) {
// Lock OS thread as subreaper is a thread local capability
Expand Down Expand Up @@ -1492,6 +1506,7 @@ func realMain() error {
deviceWatchers: make(map[string](chan string)),
storages: make(map[string]*sandboxStorage),
stopServer: make(chan struct{}),
oomEvents: make(chan string),
}

rootSpan, rootContext, err = setupTracing(agentName)
Expand Down
18 changes: 18 additions & 0 deletions agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -954,3 +954,21 @@ func TestSetupDebugConsole(t *testing.T) {
assert.NoError(err, msg)
}
}

func TestRunOOMEventMonitor(t *testing.T) {
assert := assert.New(t)

cid := "foo"
eventChan := make(chan struct{})
s := &sandbox{
oomEvents: make(chan string),
}

s.runOOMEventMonitor(eventChan, cid)

eventChan <- struct{}{}
oomEvent := <- s.oomEvents
assert.Equal(cid, oomEvent)

close(eventChan)
}
12 changes: 12 additions & 0 deletions grpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -861,6 +861,13 @@ func (a *agentGRPC) StartContainer(ctx context.Context, req *pb.StartContainerRe
return emptyResp, err
}

// Add the container to the OOM event monitor
oomCh, err := ctr.container.NotifyOOM()
if err != nil {
return emptyResp, err
}
a.sandbox.runOOMEventMonitor(oomCh, req.ContainerId)

return emptyResp, nil
}

Expand Down Expand Up @@ -1791,3 +1798,8 @@ func (a *agentGRPC) StopTracing(ctx context.Context, req *pb.StopTracingRequest)

return emptyResp, nil
}

func (a *agentGRPC) GetOOMEvent(ctx context.Context, req *pb.GetOOMEventRequest) (*pb.OOMEvent, error) {
containerID := <-a.sandbox.oomEvents
return &pb.OOMEvent{ContainerId: containerID}, nil
}
46 changes: 46 additions & 0 deletions grpc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1800,3 +1800,49 @@ func TestLoadKernelModule(t *testing.T) {
err = loadKernelModule(m)
assert.NoError(err)
}

func TestGetOOMEvent(t *testing.T) {
assert := assert.New(t)

a := &agentGRPC{
sandbox: &sandbox{
oomEvents: make(chan string),
},
}

cid1 := "foo"
cid2 := "bar"

cid1EventChan := make(chan struct{})
cid2EventChan := make(chan struct{})

// Start the oom monitors
a.sandbox.runOOMEventMonitor(cid1EventChan, cid1)
a.sandbox.runOOMEventMonitor(cid2EventChan, cid2)

// Test sending two oom events
cid1EventChan <- struct{}{}
cid2EventChan <- struct{}{}

req := &pb.GetOOMEventRequest{}

// Retrieve both events
oomEventRes, err := a.GetOOMEvent(context.Background(), req)
assert.NoError(err)
assert.Equal(cid1, oomEventRes.ContainerId)

oomEventRes, err = a.GetOOMEvent(context.Background(), req)
assert.NoError(err)
assert.Equal(cid2, oomEventRes.ContainerId)

close(cid1EventChan)

cid2EventChan <- struct{}{}

// Ensure that cid2 still works
oomEventRes, err = a.GetOOMEvent(context.Background(), req)
assert.NoError(err)
assert.Equal(cid2, oomEventRes.ContainerId)

close(cid2EventChan)
}
Loading

0 comments on commit 91cb641

Please sign in to comment.