Skip to content

Commit 53939cd

Browse files
sushrkyash97
authored andcommitted
skip leaked ENIs cleanup on unmanaged nodes
1 parent fe59128 commit 53939cd

File tree

3 files changed

+13
-18
lines changed

3 files changed

+13
-18
lines changed

pkg/node/manager/manager.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -123,28 +123,28 @@ func NewNodeManager(logger logr.Logger, resourceManager resource.ResourceManager
123123
}
124124

125125
func (m *manager) CheckNodeForLeakedENIs(nodeName string) {
126-
managedNode, found := m.GetNode(nodeName)
127-
if !found {
128-
m.Log.Info("Node manager couldn't find the node for reconciliation cleanup", "NodeName", nodeName)
126+
cachedNode, found := m.GetNode(nodeName)
127+
if !found || !cachedNode.IsManaged() {
128+
m.Log.V(1).Info("node not found or not managed by controller, skip reconciliation", "nodeName", nodeName)
129129
return
130130
}
131131

132132
// Only start a goroutine when need to
133-
if time.Now().After(managedNode.GetNextReconciliationTime()) {
133+
if time.Now().After(cachedNode.GetNextReconciliationTime()) {
134134
go func() {
135135
if resourceProvider, found := m.resourceManager.GetResourceProvider(config.ResourceNamePodENI); found {
136136
foundLeakedENI := resourceProvider.ReconcileNode(nodeName)
137137
if foundLeakedENI {
138-
managedNode.SetReconciliationInterval(node.NodeInitialCleanupInterval)
138+
cachedNode.SetReconciliationInterval(node.NodeInitialCleanupInterval)
139139
} else {
140-
interval := wait.Jitter(managedNode.GetReconciliationInterval(), 5)
140+
interval := wait.Jitter(cachedNode.GetReconciliationInterval(), 5)
141141
if interval > node.MaxNodeReconciliationInterval {
142142
interval = node.MaxNodeReconciliationInterval
143143
}
144-
managedNode.SetReconciliationInterval(interval)
144+
cachedNode.SetReconciliationInterval(interval)
145145
}
146-
managedNode.SetNextReconciliationTime(time.Now().Add(managedNode.GetReconciliationInterval()))
147-
m.Log.Info("reconciled cleanup node for leaking branch interfaces", "NodeName", nodeName, "NextInterval", managedNode.GetReconciliationInterval(), "NextReconciliationTime", managedNode.GetNextReconciliationTime())
146+
cachedNode.SetNextReconciliationTime(time.Now().Add(cachedNode.GetReconciliationInterval()))
147+
m.Log.Info("reconciled node to cleanup leaked branch ENIs", "NodeName", nodeName, "NextInterval", cachedNode.GetReconciliationInterval(), "NextReconciliationTime", cachedNode.GetNextReconciliationTime())
148148
} else {
149149
// no SGP provider enabled
150150
return

pkg/provider/branch/provider.go

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -276,20 +276,17 @@ func (b *branchENIProvider) ReconcileNode(nodeName string) bool {
276276
log := b.log.WithValues("node", nodeName)
277277
if !isPresent {
278278
// return true to set the node next clean up asap since we don't know why trunk is missing
279-
log.Info("no trunk ENI is pointing to the given node", "nodeName", nodeName)
279+
log.V(1).Info("trunk ENI not found, requeue node", "nodeName", nodeName)
280280
return true
281281
}
282282
podList, err := b.apiWrapper.PodAPI.ListPods(nodeName)
283283
if err != nil {
284284
// return true to set the node next cleanup asap since the LIST call may fail for other reasons
285285
// we should assume that there are leaked resources need to be cleaned up
286-
log.Error(err, "failed fo list pod")
286+
log.Error(err, "failed to list pods, requeue node", "nodeName", nodeName)
287287
return true
288288
}
289289
foundLeakedENI := trunkENI.Reconcile(podList.Items)
290-
291-
log.Info("completed reconcile node cleanup on branch ENIs", "nodeName", nodeName)
292-
293290
return foundLeakedENI
294291
}
295292

pkg/provider/branch/trunk/trunk.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -368,9 +368,7 @@ func (t *trunkENI) Reconcile(pods []v1.Pod) bool {
368368
t.deleteQueue = append(t.deleteQueue, eni)
369369
}
370370
delete(t.uidToBranchENIMap, uid)
371-
372-
t.log.Info("trunk controller found leaked branch ENI. the controller pushed leaked ENI to delete queue and deleted pod that doesn't exist anymore", "pod uid", uid,
373-
"eni", branchENIs)
371+
t.log.Info("leaked eni pushed to delete queue, deleted non-existing pod", "pod uid", uid, "eni", branchENIs)
374372
}
375373
}
376374

@@ -505,7 +503,7 @@ func (t *trunkENI) PushBranchENIsToCoolDownQueue(UID string) {
505503
branchENIs, isPresent := t.uidToBranchENIMap[UID]
506504
if !isPresent {
507505
t.log.Info("couldn't find Branch ENI in cache, it could have been released if pod"+
508-
"succeeded/failed before being deleted", "UID", UID, "BranchENIs", branchENIs)
506+
"succeeded/failed before being deleted", "UID", UID)
509507
trunkENIOperationsErrCount.WithLabelValues("get_branch_from_cache").Inc()
510508
return
511509
}

0 commit comments

Comments
 (0)