From e86c81ff3bb64b71da4f78f5a6f918b327cb268f Mon Sep 17 00:00:00 2001
From: jiangpengfei <jiangpengfei9@huawei.com>
Date: Sat, 8 Aug 2020 16:00:45 -0400
Subject: [PATCH 1/3] virtcontainers: fix delete sandbox failed problem

fixes: #2882

reason: If error happens after container create and before sandbox
updateResouce in the `CreateContainer()`, then delete sandbox
forcefully will return error because s.config.Containers config not
flushed into persist store.

Signed-off-by: jiangpengfei <jiangpengfei9@huawei.com>
---
 virtcontainers/sandbox.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/virtcontainers/sandbox.go b/virtcontainers/sandbox.go
index 7760a9085b..e6cfbffc56 100644
--- a/virtcontainers/sandbox.go
+++ b/virtcontainers/sandbox.go
@@ -1163,6 +1163,10 @@ func (s *Sandbox) CreateContainer(contConfig ContainerConfig) (VCContainer, erro
 			if len(s.config.Containers) > 0 {
 				// delete container config
 				s.config.Containers = s.config.Containers[:len(s.config.Containers)-1]
+				// need to flush change to persist storage
+				if newErr := s.storeSandbox(); newErr != nil {
+					s.Logger().WithError(newErr).Error("Fail to flush s.config.Containers change into sandbox store")
+				}
 			}
 			if !storeAlreadyExists {
 				if delerr := c.store.Delete(); delerr != nil {

From 42bda650349c3633457bf63a509529a65a8abb0a Mon Sep 17 00:00:00 2001
From: Evan Foster <efoster@adobe.com>
Date: Mon, 31 Aug 2020 10:12:56 -0600
Subject: [PATCH 2/3] sandbox: Disconnect from agent after VM shutdown

When a one-shot pod dies in CRI-O, the shimv2 process isn't killed until
the pod is actually deleted, even though the VM is shut down. In this
case, the shim appears to busyloop when attempting to talk to the (now
dead) agent via VSOCK. To address this, we disconnect from the agent
after the VM is shut down.

This is especially catastrophic for one-shot pods that may persist for
hours or days, but it also applies to any shimv2 pod where Kata is
configured to use VSOCK for communication.

Backport of https://github.com/kata-containers/kata-containers/pull/556
to kata-containers/runtime master branch.

See github.com/kata-containers/runtime#2719 for details.
Fixes #2719

Signed-off-by: Evan Foster <efoster@adobe.com>
---
 virtcontainers/sandbox.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/virtcontainers/sandbox.go b/virtcontainers/sandbox.go
index e6cfbffc56..0960f58845 100644
--- a/virtcontainers/sandbox.go
+++ b/virtcontainers/sandbox.go
@@ -1598,6 +1598,11 @@ func (s *Sandbox) Stop(force bool) error {
 		return err
 	}
 
+	// Stop communicating with the agent.
+	if err := s.agent.disconnect(); err != nil && !force {
+		return err
+	}
+
 	return nil
 }
 

From 6f33f4e2f10b035f3050f6aa9c95a12bc37e063a Mon Sep 17 00:00:00 2001
From: Yves Chan <shanks.cyp@gmail.com>
Date: Mon, 28 Sep 2020 22:22:34 +0800
Subject: [PATCH 3/3] shimv2: handle ctx passed by containerd

Sometimes shim process cannot be shutdown because of container list
is not empty. This container list is written in shim service, while
creating container. We find that if containerd cancel its Create
Container Request due to timeout, but runtime didn't handle it properly
and continue creating action, then this container cannot be deleted at
all. So we should make sure the ctx passed to Create Service rpc call
is effective.

Fixes: #2992

Signed-off-by: Yves Chan <shanks.cyp@gmail.com>
---
 containerd-shim-v2/service.go | 64 +++++++++++++++++++++--------------
 1 file changed, 38 insertions(+), 26 deletions(-)

diff --git a/containerd-shim-v2/service.go b/containerd-shim-v2/service.go
index af26d94ef5..65052c1895 100644
--- a/containerd-shim-v2/service.go
+++ b/containerd-shim-v2/service.go
@@ -342,34 +342,46 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *
 	s.mu.Lock()
 	defer s.mu.Unlock()
 
-	var c *container
-
-	c, err = create(ctx, s, r)
-	if err != nil {
-		return nil, err
-	}
-
-	c.status = task.StatusCreated
-
-	s.containers[r.ID] = c
+	type Result struct {
+		container *container
+		err       error
+	}
+	ch := make(chan Result, 1)
+	go func() {
+		container, err := create(ctx, s, r)
+		ch <- Result{container, err}
+	}()
 
-	s.send(&eventstypes.TaskCreate{
-		ContainerID: r.ID,
-		Bundle:      r.Bundle,
-		Rootfs:      r.Rootfs,
-		IO: &eventstypes.TaskIO{
-			Stdin:    r.Stdin,
-			Stdout:   r.Stdout,
-			Stderr:   r.Stderr,
-			Terminal: r.Terminal,
-		},
-		Checkpoint: r.Checkpoint,
-		Pid:        s.pid,
-	})
+	select {
+	case <-ctx.Done():
+		return nil, errors.Errorf("create container timeout: %v", r.ID)
+	case res := <-ch:
+		if res.err != nil {
+			return nil, res.err
+		}
+		container := res.container
+		container.status = task.StatusCreated
+
+		s.containers[r.ID] = container
+
+		s.send(&eventstypes.TaskCreate{
+			ContainerID: r.ID,
+			Bundle:      r.Bundle,
+			Rootfs:      r.Rootfs,
+			IO: &eventstypes.TaskIO{
+				Stdin:    r.Stdin,
+				Stdout:   r.Stdout,
+				Stderr:   r.Stderr,
+				Terminal: r.Terminal,
+			},
+			Checkpoint: r.Checkpoint,
+			Pid:        s.pid,
+		})
 
-	return &taskAPI.CreateTaskResponse{
-		Pid: s.pid,
-	}, nil
+		return &taskAPI.CreateTaskResponse{
+			Pid: s.pid,
+		}, nil
+	}
 }
 
 // Start a process