Skip to content

Commit

Permalink
Pod startup latency with Calico and EKS (aws#1629)
Browse files Browse the repository at this point in the history
* Calico optimization

* make format because of older commits

* Update the annotation

* update env variable
  • Loading branch information
jayanthvn committed Oct 14, 2021
1 parent b9c1b2a commit bd916ed
Show file tree
Hide file tree
Showing 9 changed files with 114 additions and 14 deletions.
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,18 @@ Setting `DISABLE_NETWORK_RESOURCE_PROVISIONING` to `true` will make IPAMD to dep

---

#### `ANNOTATE_POD_IP` (v1.9.3+)

Type: Boolean as a String

Default: `false`

Setting `ANNOTATE_POD_IP` to `true` will allow IPAMD to add an annotation `vpc.amazonaws.com/pod-ips` to the pod with pod IP.

There is a known [issue](https://github.com/kubernetes/kubernetes/issues/39113) with kubelet taking time to update `Pod.Status.PodIP` leading to calico being blocked on programming the policy. Setting `ANNOTATE_POD_IP` to `true` will enable AWS VPC CNI similar to the optimization added in Calico CNI plugin to write the IP address back to the pod as an annotation to close this race condition.

---

### ENI tags related to Allocation

This plugin interacts with the following tags on ENIs:
Expand Down
5 changes: 4 additions & 1 deletion charts/aws-vpc-cni/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,12 @@ rules:
verbs: ["list", "watch", "get"]
- apiGroups: [""]
resources:
- pods
- namespaces
verbs: ["list", "watch", "get"]
- apiGroups: [""]
resources:
- pods
verbs: ["list", "watch", "get", "patch"]
- apiGroups: [""]
resources:
- nodes
Expand Down
10 changes: 9 additions & 1 deletion config/master/aws-k8s-cni-cn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,20 @@
- "apiGroups":
- ""
"resources":
- "pods"
- "namespaces"
"verbs":
- "list"
- "watch"
- "get"
- "apiGroups":
- ""
"resources":
- "pods"
"verbs":
- "list"
- "watch"
- "get"
- "patch"
- "apiGroups":
- ""
"resources":
Expand Down
10 changes: 9 additions & 1 deletion config/master/aws-k8s-cni-us-gov-east-1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,20 @@
- "apiGroups":
- ""
"resources":
- "pods"
- "namespaces"
"verbs":
- "list"
- "watch"
- "get"
- "apiGroups":
- ""
"resources":
- "pods"
"verbs":
- "list"
- "watch"
- "get"
- "patch"
- "apiGroups":
- ""
"resources":
Expand Down
10 changes: 9 additions & 1 deletion config/master/aws-k8s-cni-us-gov-west-1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,20 @@
- "apiGroups":
- ""
"resources":
- "pods"
- "namespaces"
"verbs":
- "list"
- "watch"
- "get"
- "apiGroups":
- ""
"resources":
- "pods"
"verbs":
- "list"
- "watch"
- "get"
- "patch"
- "apiGroups":
- ""
"resources":
Expand Down
10 changes: 9 additions & 1 deletion config/master/aws-k8s-cni.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,20 @@
- "apiGroups":
- ""
"resources":
- "pods"
- "namespaces"
"verbs":
- "list"
- "watch"
- "get"
- "apiGroups":
- ""
"resources":
- "pods"
"verbs":
- "list"
- "watch"
- "get"
- "patch"
- "apiGroups":
- ""
"resources":
Expand Down
7 changes: 6 additions & 1 deletion config/master/manifests.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,14 @@ local awsnode = {
},
{
apiGroups: [""],
resources: ["pods", "namespaces"],
resources: ["namespaces"],
verbs: ["list", "watch", "get"],
},
{
apiGroups: [""],
resources: ["pods"],
verbs: ["list", "watch", "get", "patch"],
},
{
apiGroups: [""],
resources: ["nodes"],
Expand Down
55 changes: 47 additions & 8 deletions pkg/ipamd/ipamd.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/util/retry"

"github.com/aws/amazon-vpc-cni-k8s/pkg/awsutils"
"github.com/aws/amazon-vpc-cni-k8s/pkg/eniconfig"
Expand Down Expand Up @@ -147,6 +148,14 @@ const (
envManageUntaggedENI = "MANAGE_UNTAGGED_ENI"

eniNodeTagKey = "node.k8s.amazonaws.com/instance_id"

// envAnnotatePodIP is used to annotate[vpc.amazonaws.com/pod-ips] pod's with IPs
// Ref : https://github.com/projectcalico/calico/issues/3530
// not present; in which case we fall back to the k8s podIP
// Present and set to an IP; in which case we use it
// Present and set to the empty string, which we use to mean "CNI DEL had occurred; networking has been removed from this pod"
// The empty string one helps close a trace at pod shutdown where it looks like the pod still has its IP when the IP has been released
envAnnotatePodIP = "ANNOTATE_POD_IP"
)

var log = logger.Get()
Expand Down Expand Up @@ -229,14 +238,15 @@ type IPAMContext struct {
lastDecreaseIPPool time.Time
// reconcileCooldownCache keeps timestamps of the last time an IP address was unassigned from an ENI,
// so that we don't reconcile and add it back too quickly if IMDS lags behind reality.
reconcileCooldownCache ReconcileCooldownCache
terminating int32 // Flag to warn that the pod is about to shut down.
disableENIProvisioning bool
enablePodENI bool
myNodeName string
enableIpv4PrefixDelegation bool
lastInsufficientCidrError time.Time
enableManageUntaggedMode bool
reconcileCooldownCache ReconcileCooldownCache
terminating int32 // Flag to warn that the pod is about to shut down.
disableENIProvisioning bool
enablePodENI bool
myNodeName string
enablePrefixDelegation bool
lastInsufficientCidrError time.Time
enableManageUntaggedMode bool
enablePodIPAnnotation bool
}

// setUnmanagedENIs will rebuild the set of ENI IDs for ENIs tagged as "no_manage"
Expand Down Expand Up @@ -361,6 +371,7 @@ func New(rawK8SClient client.Client, cachedK8SClient client.Client) (*IPAMContex

c.enablePodENI = enablePodENI()
c.enableManageUntaggedMode = enableManageUntaggedMode()
c.enablePodIPAnnotation = enablePodIPAnnotation()

err = c.awsClient.FetchInstanceTypeLimits()
if err != nil {
Expand Down Expand Up @@ -1581,6 +1592,10 @@ func enableManageUntaggedMode() bool {
return getEnvBoolWithDefault(envManageUntaggedENI, true)
}

func enablePodIPAnnotation() bool {
return getEnvBoolWithDefault(envAnnotatePodIP, false)
}

// filterUnmanagedENIs filters out ENIs marked with the "node.k8s.amazonaws.com/no_manage" tag
func (c *IPAMContext) filterUnmanagedENIs(enis []awsutils.ENIMetadata) []awsutils.ENIMetadata {
numFiltered := 0
Expand Down Expand Up @@ -1779,6 +1794,30 @@ func (c *IPAMContext) GetPod(podName, namespace string) (*corev1.Pod, error) {
return &pod, nil
}

// AnnotatePod annotates the pod with the provided key and value
func (c *IPAMContext) AnnotatePod(podNamespace, podName, key, val string) error {
ctx := context.TODO()
var pod *corev1.Pod
var err error

err = retry.RetryOnConflict(retry.DefaultBackoff, func() error {
if pod, err = c.GetPod(podNamespace, podName); err != nil {
return err
}

newPod := pod.DeepCopy()
newPod.Annotations[key] = val
if err = c.rawK8SClient.Patch(ctx, newPod, client.MergeFrom(pod)); err != nil {
log.Errorf("Failed to annotate %s the pod with %s, error %v", key, val, err)
return err
}
log.Debugf("Annotates pod %s with %s: %s", podName, key, val)
return nil
})

return err
}

func (c *IPAMContext) tryUnassignIPsFromENIs() {
log.Debugf("In tryUnassignIPsFromENIs")
eniInfos := c.dataStore.GetENIInfos()
Expand Down
9 changes: 9 additions & 0 deletions pkg/ipamd/rpc_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ import (
const (
ipamdgRPCaddress = "127.0.0.1:50051"
grpcHealthServiceName = "grpc.health.v1.aws-node"

vpccniPodIPKey = "vpc.amazonaws.com/pod-ips"
)

// server controls RPC service responses.
Expand Down Expand Up @@ -162,6 +164,9 @@ func (s *server) AddNetwork(ctx context.Context, in *rpc.AddNetworkRequest) (*rp
}
}

if s.ipamContext.enablePodIPAnnotation {
s.ipamContext.AnnotatePod(in.K8S_POD_NAME, in.K8S_POD_NAMESPACE, vpccniPodIPKey, ipv4Addr)
}
resp := rpc.AddNetworkReply{
Success: err == nil,
IPv4Addr: addr,
Expand Down Expand Up @@ -244,6 +249,10 @@ func (s *server) DelNetwork(ctx context.Context, in *rpc.DelNetworkRequest) (*rp
}
}

if s.ipamContext.enablePodIPAnnotation {
s.ipamContext.AnnotatePod(in.K8S_POD_NAME, in.K8S_POD_NAMESPACE, vpccniPodIPKey, "")
}

log.Infof("Send DelNetworkReply: IPv4Addr %s, DeviceNumber: %d, err: %v", ip, deviceNumber, err)

return &rpc.DelNetworkReply{Success: err == nil, IPv4Addr: ip, DeviceNumber: int32(deviceNumber)}, err
Expand Down

0 comments on commit bd916ed

Please sign in to comment.