Skip to content

Commit 418b603

Browse files
authored
leader election bugfix: Delete evicted leader pods (#2210)
* leader election bugfix: Delete evicted leader pods Before this patch, when the leader pod is hard evicted but not deleted the leader lock configmap is not garbage collected and subsequent operator pods can never become leader. With this patch, an operator attempting to become the leader is able to delete evicted leader pods triggering garbage collection and allowing leader election to continue. Sometimes, evicted operator pods will remain, even with this patch. This occurs when the leader operator pod is evicted and a new operator pod is created on the same node. In this case, the new pod will also be evicted. When an operator pod is created on a non-failing node, leader election will delete only the evicted leader pod, leaving any evicted operator pods that were not the leader. To replicate the evicted state, I used a `kind` cluster with 2 worker nodes with altered kubelet configuration and a memory-hog version of the memcached operator. See the [altered operator docs](https://github.com/asmacdo/go-memcahced-operator/blob/explosive-operator/README.md)
1 parent 3b2f684 commit 418b603

File tree

1 file changed

+35
-1
lines changed

1 file changed

+35
-1
lines changed

pkg/leader/leader.go

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,35 @@ func Become(ctx context.Context, lockName string) error {
109109
log.Info("Became the leader.")
110110
return nil
111111
case apierrors.IsAlreadyExists(err):
112-
log.Info("Not the leader. Waiting.")
112+
existingOwners := existing.GetOwnerReferences()
113+
switch {
114+
case len(existingOwners) != 1:
115+
log.Info("Leader lock configmap must have exactly one owner reference.", "ConfigMap", existing)
116+
case existingOwners[0].Kind != "Pod":
117+
log.Info("Leader lock configmap owner reference must be a pod.", "OwnerReference", existingOwners[0])
118+
default:
119+
leaderPod := &corev1.Pod{}
120+
key = crclient.ObjectKey{Namespace: ns, Name: existingOwners[0].Name}
121+
err = client.Get(ctx, key, leaderPod)
122+
switch {
123+
case apierrors.IsNotFound(err):
124+
log.Info("Leader pod has been deleted, waiting for garbage collection do remove the lock.")
125+
case err != nil:
126+
return err
127+
case isPodEvicted(*leaderPod) && leaderPod.GetDeletionTimestamp() == nil:
128+
log.Info("Operator pod with leader lock has been evicted.", "leader", leaderPod.Name)
129+
log.Info("Deleting evicted leader.")
130+
// Pod may not delete immediately, continue with backoff
131+
err := client.Delete(ctx, leaderPod)
132+
if err != nil {
133+
log.Error(err, "Leader pod could not be deleted.")
134+
}
135+
136+
default:
137+
log.Info("Not the leader. Waiting.")
138+
}
139+
}
140+
113141
select {
114142
case <-time.After(wait.Jitter(backoff, .2)):
115143
if backoff < maxBackoffInterval {
@@ -143,3 +171,9 @@ func myOwnerRef(ctx context.Context, client crclient.Client, ns string) (*metav1
143171
}
144172
return owner, nil
145173
}
174+
175+
func isPodEvicted(pod corev1.Pod) bool {
176+
podFailed := pod.Status.Phase == corev1.PodFailed
177+
podEvicted := pod.Status.Reason == "Evicted"
178+
return podFailed && podEvicted
179+
}

0 commit comments

Comments
 (0)