Skip to content

Commit 5674ec6

Browse files
Merge pull request #1651 from deads2k/fix-ordering-bug
Fix ordering bug
2 parents 77eeebc + a21cc97 commit 5674ec6

File tree

2 files changed

+175
-31
lines changed

2 files changed

+175
-31
lines changed

pkg/operator/revisioncontroller/revision_controller.go

Lines changed: 59 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,14 @@ func (c RevisionController) createRevisionIfNeeded(ctx context.Context, recorder
8989

9090
// check to make sure that the latestRevision has the exact content we expect. No mutation here, so we start creating the next Revision only when it is required
9191
if isLatestRevisionCurrent {
92+
klog.V(4).Infof("Returning early, %d triggered and up to date", latestAvailableRevision)
9293
return false, nil
9394
}
9495

9596
nextRevision := latestAvailableRevision + 1
96-
recorder.Eventf("RevisionTriggered", "new revision %d triggered by %q", nextRevision, reason)
97-
if err := c.createNewRevision(ctx, recorder, nextRevision, reason); err != nil {
97+
recorder.Eventf("StartingNewRevision", "new revision %d triggered by %q", nextRevision, reason)
98+
createdNewRevision, err := c.createNewRevision(ctx, recorder, nextRevision, reason)
99+
if err != nil {
98100
cond := operatorv1.OperatorCondition{
99101
Type: "RevisionControllerDegraded",
100102
Status: operatorv1.ConditionTrue,
@@ -108,6 +110,12 @@ func (c RevisionController) createRevisionIfNeeded(ctx context.Context, recorder
108110
return true, nil
109111
}
110112

113+
if !createdNewRevision {
114+
klog.V(4).Infof("Revision %v not created", nextRevision)
115+
return false, nil
116+
}
117+
recorder.Eventf("RevisionTriggered", "new revision %d triggered by %q", nextRevision, reason)
118+
111119
cond := operatorv1.OperatorCondition{
112120
Type: "RevisionControllerDegraded",
113121
Status: operatorv1.ConditionFalse,
@@ -208,55 +216,80 @@ func (c RevisionController) isLatestRevisionCurrent(ctx context.Context, revisio
208216
return true, ""
209217
}
210218

211-
func (c RevisionController) createNewRevision(ctx context.Context, recorder events.Recorder, revision int32, reason string) error {
219+
// returns true if we created a revision
220+
func (c RevisionController) createNewRevision(ctx context.Context, recorder events.Recorder, revision int32, reason string) (bool, error) {
212221
// Create a new InProgress status configmap
213-
statusConfigMap := &corev1.ConfigMap{
222+
desiredStatusConfigMap := &corev1.ConfigMap{
214223
ObjectMeta: metav1.ObjectMeta{
215224
Namespace: c.targetNamespace,
216225
Name: nameFor("revision-status", revision),
226+
Annotations: map[string]string{
227+
"operator.openshift.io/revision-ready": "false",
228+
},
217229
},
218230
Data: map[string]string{
219231
"revision": fmt.Sprintf("%d", revision),
220232
"reason": reason,
221233
},
222234
}
223-
statusConfigMap, _, err := resourceapply.ApplyConfigMap(ctx, c.configMapGetter, recorder, statusConfigMap)
224-
if err != nil {
225-
return err
235+
createdStatus, err := c.configMapGetter.ConfigMaps(desiredStatusConfigMap.Namespace).Create(ctx, desiredStatusConfigMap, metav1.CreateOptions{})
236+
switch {
237+
case apierrors.IsAlreadyExists(err):
238+
if createdStatus == nil || len(createdStatus.UID) == 0 {
239+
createdStatus, err = c.configMapGetter.ConfigMaps(desiredStatusConfigMap.Namespace).Get(ctx, desiredStatusConfigMap.Name, metav1.GetOptions{})
240+
if err != nil {
241+
return false, err
242+
}
243+
}
244+
// take a live GET here to get current status to check the annotation
245+
if createdStatus.Annotations["operator.openshift.io/revision-ready"] == "true" {
246+
// no work to do because our cache is out of date and when we're updated, we will be able to see the result
247+
klog.Infof("down the branch indicating that our cache was out of date and we're trying to recreate a revision.")
248+
return false, nil
249+
}
250+
// update the sync and continue
251+
case err != nil:
252+
return false, err
226253
}
227254

228255
ownerRefs := []metav1.OwnerReference{{
229256
APIVersion: "v1",
230257
Kind: "ConfigMap",
231-
Name: statusConfigMap.Name,
232-
UID: statusConfigMap.UID,
258+
Name: createdStatus.Name,
259+
UID: createdStatus.UID,
233260
}}
234261

235262
for _, cm := range c.configMaps {
236263
obj, _, err := resourceapply.SyncConfigMap(ctx, c.configMapGetter, recorder, c.targetNamespace, cm.Name, c.targetNamespace, nameFor(cm.Name, revision), ownerRefs)
237264
if err != nil {
238-
return err
265+
return false, err
239266
}
240267
if obj == nil && !cm.Optional {
241-
return apierrors.NewNotFound(corev1.Resource("configmaps"), cm.Name)
268+
return false, apierrors.NewNotFound(corev1.Resource("configmaps"), cm.Name)
242269
}
243270
}
244271
for _, s := range c.secrets {
245272
obj, _, err := resourceapply.SyncSecret(ctx, c.secretGetter, recorder, c.targetNamespace, s.Name, c.targetNamespace, nameFor(s.Name, revision), ownerRefs)
246273
if err != nil {
247-
return err
274+
return false, err
248275
}
249276
if obj == nil && !s.Optional {
250-
return apierrors.NewNotFound(corev1.Resource("secrets"), s.Name)
277+
return false, apierrors.NewNotFound(corev1.Resource("secrets"), s.Name)
251278
}
252279
}
253280

254-
return nil
281+
createdStatus.Annotations["operator.openshift.io/revision-ready"] = "true"
282+
if _, err := c.configMapGetter.ConfigMaps(createdStatus.Namespace).Update(ctx, createdStatus, metav1.UpdateOptions{}); err != nil {
283+
return false, err
284+
}
285+
286+
return true, nil
255287
}
256288

257289
// getLatestAvailableRevision returns the latest known revision to the operator
258290
// This is determined by checking revision status configmaps.
259291
func (c RevisionController) getLatestAvailableRevision(ctx context.Context) (int32, error) {
292+
// this appears to use a cached getter. I conceded that past-David should have explicitly used Listers
260293
configMaps, err := c.configMapGetter.ConfigMaps(c.targetNamespace).List(ctx, metav1.ListOptions{})
261294
if err != nil {
262295
return 0, err
@@ -281,7 +314,7 @@ func (c RevisionController) getLatestAvailableRevision(ctx context.Context) (int
281314
}
282315

283316
func (c RevisionController) sync(ctx context.Context, syncCtx factory.SyncContext) error {
284-
operatorSpec, _, latestAvailableRevision, resourceVersion, err := c.operatorClient.GetLatestRevisionState()
317+
operatorSpec, _, latestAvailableRevisionSeenByOperator, resourceVersion, err := c.operatorClient.GetLatestRevisionState()
285318
if err != nil {
286319
return err
287320
}
@@ -290,26 +323,22 @@ func (c RevisionController) sync(ctx context.Context, syncCtx factory.SyncContex
290323
return nil
291324
}
292325

293-
// If the operator status has 0 as its latest available revision, this is either the first revision
294-
// or possibly the operator resource was deleted and reset back to 0, which is not what we want so check configmaps
295-
if latestAvailableRevision == 0 {
296-
// Check to see if current revision is accurate and if not, search through configmaps for latest revision
297-
latestRevision, err := c.getLatestAvailableRevision(ctx)
326+
// If the operator status's latest available revision is not the same as the observed latest revision, update the operator.
327+
latestObservedRevision, err := c.getLatestAvailableRevision(ctx)
328+
if err != nil {
329+
return err
330+
}
331+
if latestObservedRevision != 0 && latestAvailableRevisionSeenByOperator != latestObservedRevision {
332+
// Then make sure that revision number is what's in the operator status
333+
_, _, err := c.operatorClient.UpdateLatestRevisionOperatorStatus(ctx, latestObservedRevision)
298334
if err != nil {
299335
return err
300336
}
301-
if latestRevision != 0 {
302-
// Then make sure that revision number is what's in the operator status
303-
_, _, err := c.operatorClient.UpdateLatestRevisionOperatorStatus(ctx, latestRevision)
304-
if err != nil {
305-
return err
306-
}
307-
// regardless of whether we made a change, requeue to rerun the sync with updated status
308-
return factory.SyntheticRequeueError
309-
}
337+
// regardless of whether we made a change, requeue to rerun the sync with updated status
338+
return factory.SyntheticRequeueError
310339
}
311340

312-
requeue, syncErr := c.createRevisionIfNeeded(ctx, syncCtx.Recorder(), latestAvailableRevision, resourceVersion)
341+
requeue, syncErr := c.createRevisionIfNeeded(ctx, syncCtx.Recorder(), latestAvailableRevisionSeenByOperator, resourceVersion)
313342
if requeue && syncErr == nil {
314343
return factory.SyntheticRequeueError
315344
}

pkg/operator/revisioncontroller/revision_controller_test.go

Lines changed: 116 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ package revisioncontroller
22

33
import (
44
"context"
5+
"fmt"
6+
"reflect"
57
"strings"
68
"testing"
79
"time"
@@ -15,6 +17,7 @@ import (
1517
"k8s.io/client-go/informers"
1618
"k8s.io/client-go/kubernetes/fake"
1719
clienttesting "k8s.io/client-go/testing"
20+
"k8s.io/klog/v2"
1821

1922
operatorv1 "github.com/openshift/api/operator/v1"
2023
"github.com/openshift/library-go/pkg/operator/events"
@@ -27,6 +30,11 @@ func filterCreateActions(actions []clienttesting.Action) []runtime.Object {
2730
if !isCreate {
2831
continue
2932
}
33+
// Filter out Update actions as both clienttesting.CreateAction
34+
// and clienttesting.UpdateAction implement the same interface
35+
if reflect.TypeOf(a) == reflect.TypeOf(clienttesting.UpdateActionImpl{}) {
36+
continue
37+
}
3038
_, isEvent := createAction.GetObject().(*v1.Event)
3139
if isEvent {
3240
continue
@@ -101,7 +109,7 @@ func TestRevisionController(t *testing.T) {
101109
},
102110
validateActions: func(t *testing.T, actions []clienttesting.Action, kclient *fake.Clientset) {
103111
updatedObjects := filterUpdateActions(actions)
104-
if len(updatedObjects) != 3 {
112+
if len(updatedObjects) != 4 {
105113
t.Errorf("expected 4 updated objects, but got %v", len(updatedObjects))
106114
}
107115
_, err := kclient.CoreV1().ConfigMaps(targetNamespace).Get(context.TODO(), "revision-status-2", metav1.GetOptions{})
@@ -532,3 +540,110 @@ func TestRevisionController(t *testing.T) {
532540
})
533541
}
534542
}
543+
544+
type fakeStaticPodLatestRevisionClient struct {
545+
v1helpers.StaticPodOperatorClient
546+
client *StaticPodLatestRevisionClient
547+
updateLatestRevisionOperatorStatusErrs bool
548+
}
549+
550+
var _ LatestRevisionClient = &fakeStaticPodLatestRevisionClient{}
551+
552+
func (c fakeStaticPodLatestRevisionClient) GetLatestRevisionState() (*operatorv1.OperatorSpec, *operatorv1.OperatorStatus, int32, string, error) {
553+
return c.client.GetLatestRevisionState()
554+
}
555+
556+
func (c fakeStaticPodLatestRevisionClient) UpdateLatestRevisionOperatorStatus(ctx context.Context, latestAvailableRevision int32, updateFuncs ...v1helpers.UpdateStatusFunc) (*operatorv1.OperatorStatus, bool, error) {
557+
if c.updateLatestRevisionOperatorStatusErrs {
558+
return nil, false, fmt.Errorf("Operation cannot be fulfilled on kubeapiservers.operator.openshift.io \"cluster\": the object has been modified; please apply your changes to the latest version and try again")
559+
}
560+
return c.client.UpdateLatestRevisionOperatorStatus(ctx, latestAvailableRevision, updateFuncs...)
561+
}
562+
563+
func TestRevisionControllerRevisionCreatedFailedStatusUpdate(t *testing.T) {
564+
startingObjects := []runtime.Object{
565+
&v1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "test-secret", Namespace: targetNamespace}},
566+
&v1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "test-secret-2", Namespace: targetNamespace}},
567+
&v1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "test-config", Namespace: targetNamespace}, Data: map[string]string{"key": "value", "key2": "value"}},
568+
&v1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "test-config-2", Namespace: targetNamespace}, Data: map[string]string{"key": "value"}},
569+
&v1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "revision-status-2", Namespace: targetNamespace, Annotations: map[string]string{"operator.openshift.io/revision-ready": "false"}}},
570+
}
571+
572+
testConfigs := []RevisionResource{{Name: "test-config"}, {Name: "test-config-opt", Optional: true}}
573+
testSecrets := []RevisionResource{{Name: "test-secret"}, {Name: "test-secret-opt", Optional: true}}
574+
575+
staticPodOperatorClient := v1helpers.NewFakeStaticPodOperatorClient(
576+
&operatorv1.StaticPodOperatorSpec{
577+
OperatorSpec: operatorv1.OperatorSpec{
578+
ManagementState: operatorv1.Managed,
579+
},
580+
},
581+
&operatorv1.StaticPodOperatorStatus{
582+
LatestAvailableRevision: 2,
583+
NodeStatuses: []operatorv1.NodeStatus{
584+
{
585+
NodeName: "test-node-1",
586+
CurrentRevision: 0,
587+
TargetRevision: 0,
588+
},
589+
},
590+
},
591+
nil,
592+
nil,
593+
)
594+
595+
fakeStaticPodOperatosClient := &fakeStaticPodLatestRevisionClient{client: &StaticPodLatestRevisionClient{StaticPodOperatorClient: staticPodOperatorClient}, StaticPodOperatorClient: staticPodOperatorClient}
596+
597+
kubeClient := fake.NewSimpleClientset(startingObjects...)
598+
eventRecorder := events.NewRecorder(kubeClient.CoreV1().Events("test"), "test-operator", &v1.ObjectReference{})
599+
600+
c := NewRevisionController(
601+
targetNamespace,
602+
testConfigs,
603+
testSecrets,
604+
informers.NewSharedInformerFactoryWithOptions(kubeClient, 1*time.Minute, informers.WithNamespace(targetNamespace)),
605+
fakeStaticPodOperatosClient,
606+
kubeClient.CoreV1(),
607+
kubeClient.CoreV1(),
608+
eventRecorder,
609+
)
610+
611+
klog.Infof("Running NewRevisionController.Sync with UpdateLatestRevisionOperatorStatus returning an error")
612+
// make the first UpdateLatestRevisionOperatorStatus call fail
613+
fakeStaticPodOperatosClient.updateLatestRevisionOperatorStatusErrs = true
614+
syncErr := c.Sync(context.TODO(), factory.NewSyncContext("RevisionController", eventRecorder))
615+
klog.Infof("Validating NewRevisionController.Sync returned an error: %v", syncErr)
616+
if syncErr == nil {
617+
t.Errorf("expected error after running NewRevisionController.Sync, got nil")
618+
return
619+
}
620+
_, status, _, statusErr := staticPodOperatorClient.GetStaticPodOperatorState()
621+
if statusErr != nil {
622+
t.Errorf("unexpected status err: %v", statusErr)
623+
return
624+
}
625+
klog.Infof("Validating status.LatestAvailableRevision (%v) has not changed", status.LatestAvailableRevision)
626+
if status.LatestAvailableRevision != 2 {
627+
t.Errorf("unexpected status.LatestAvailableRevision: %v, expected 2", status.LatestAvailableRevision)
628+
return
629+
}
630+
631+
klog.Infof("Running NewRevisionController.Sync with UpdateLatestRevisionOperatorStatus succeeding")
632+
// make the second UpdateLatestRevisionOperatorStatus call to succeed
633+
fakeStaticPodOperatosClient.updateLatestRevisionOperatorStatusErrs = false
634+
syncErr = c.Sync(context.TODO(), factory.NewSyncContext("RevisionController", eventRecorder))
635+
if syncErr != nil && syncErr != factory.SyntheticRequeueError {
636+
t.Errorf("unexpected error after running NewRevisionController.Sync: %v", syncErr)
637+
return
638+
}
639+
_, status, _, statusErr = staticPodOperatorClient.GetStaticPodOperatorState()
640+
if statusErr != nil {
641+
t.Errorf("unexpected status err: %v", statusErr)
642+
return
643+
}
644+
klog.Infof("Validating status.LatestAvailableRevision (%v) changed", status.LatestAvailableRevision)
645+
if status.LatestAvailableRevision != 3 {
646+
t.Errorf("unexpected status.LatestAvailableRevision: %v, expected 3", status.LatestAvailableRevision)
647+
return
648+
}
649+
}

0 commit comments

Comments
 (0)