Skip to content

Commit b11215a

Browse files
Merge pull request #2077 from hasbro17/invalid-og-on-installplan-status
Indicate invalid OperatorGroup on InstallPlan status
2 parents 0bc157a + 2450a37 commit b11215a

File tree

7 files changed

+366
-90
lines changed

7 files changed

+366
-90
lines changed

doc/design/architecture.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,10 +93,10 @@ The OLM Operator will then pickup the installation and carry it through to compl
9393
### InstallPlan Control Loop
9494

9595
```
96-
None --> Planning +------>------->------> Installing --> Complete
97-
| ^
98-
v |
99-
+--> RequiresApproval --+
96+
None --> Planning +------>------->------> Installing +---> Complete
97+
| ^ |
98+
v | v
99+
+--> RequiresApproval --+ Failed
100100
```
101101

102102
| Phase | Description |
@@ -106,6 +106,7 @@ None --> Planning +------>------->------> Installing --> Complete
106106
| RequiresApproval | occurs when using manual approval, will not transition phase until `approved` field is true |
107107
| Installing | resolved resources in the InstallPlan `Status` block are being created |
108108
| Complete | all resolved resources in the `Status` block exist |
109+
| Failed | occurs when resources fail to install or there is an invalid OperatorGroup |
109110

110111
### Subscription Control Loop
111112

pkg/controller/operators/catalog/operator.go

Lines changed: 53 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1331,10 +1331,44 @@ func (o *Operator) syncInstallPlans(obj interface{}) (syncError error) {
13311331
return
13321332
}
13331333

1334+
// Complete and Failed are terminal phases
1335+
if plan.Status.Phase == v1alpha1.InstallPlanPhaseFailed || plan.Status.Phase == v1alpha1.InstallPlanPhaseComplete {
1336+
return
1337+
}
1338+
13341339
querier := o.serviceAccountQuerier.NamespaceQuerier(plan.GetNamespace())
13351340
ref, err := querier()
13361341
if err != nil {
1337-
syncError = fmt.Errorf("attenuated service account query failed - %v", err)
1342+
1343+
// Retry sync if non-fatal error
1344+
if !scoped.IsOperatorGroupError(err) {
1345+
syncError = fmt.Errorf("attenuated service account query failed - %v", err)
1346+
return
1347+
}
1348+
1349+
// Mark the InstallPlan as failed for a fatal Operator Group related error
1350+
logger.Infof("attenuated service account query failed - %v", err)
1351+
ipFailError := fmt.Errorf("invalid operator group - %v", err)
1352+
now := o.now()
1353+
out := plan.DeepCopy()
1354+
out.Status.SetCondition(v1alpha1.ConditionFailed(v1alpha1.InstallPlanInstalled,
1355+
v1alpha1.InstallPlanReasonInstallCheckFailed, ipFailError.Error(), &now))
1356+
out.Status.Phase = v1alpha1.InstallPlanPhaseFailed
1357+
1358+
logger.Info("transitioning InstallPlan to failed")
1359+
if _, err := o.client.OperatorsV1alpha1().InstallPlans(plan.GetNamespace()).UpdateStatus(context.TODO(), out, metav1.UpdateOptions{}); err != nil {
1360+
updateErr := errors.New("error updating InstallPlan status: " + err.Error())
1361+
logger = logger.WithField("updateError", updateErr)
1362+
logger.Errorf("error transitioning InstallPlan to failed")
1363+
1364+
// retry sync with error to update InstallPlan status
1365+
syncError = fmt.Errorf("InstallPlan failed: %s and error updating InstallPlan status as failed: %s", ipFailError, updateErr)
1366+
return
1367+
}
1368+
1369+
// Requeue subscription to propagate SubscriptionInstallPlanFailed condtion to subscription
1370+
o.requeueSubscriptionForInstallPlan(plan, logger)
1371+
13381372
return
13391373
}
13401374

@@ -1394,19 +1428,7 @@ func (o *Operator) syncInstallPlans(obj interface{}) (syncError error) {
13941428
defer o.ipQueueSet.RequeueAfter(outInstallPlan.GetNamespace(), outInstallPlan.GetName(), time.Second*5)
13951429
}
13961430

1397-
defer func() {
1398-
// Notify subscription loop of installplan changes
1399-
if owners := ownerutil.GetOwnersByKind(plan, v1alpha1.SubscriptionKind); len(owners) > 0 {
1400-
for _, owner := range owners {
1401-
logger.WithField("owner", owner).Debug("requeueing installplan owner")
1402-
if err := o.subQueueSet.Requeue(plan.GetNamespace(), owner.Name); err != nil {
1403-
logger.WithError(err).Warn("error requeuing installplan owner")
1404-
}
1405-
}
1406-
return
1407-
}
1408-
logger.Trace("no installplan owner subscriptions found to requeue")
1409-
}()
1431+
defer o.requeueSubscriptionForInstallPlan(plan, logger)
14101432

14111433
// Update InstallPlan with status of transition. Log errors if we can't write them to the status.
14121434
if _, err := o.client.OperatorsV1alpha1().InstallPlans(plan.GetNamespace()).UpdateStatus(context.TODO(), outInstallPlan, metav1.UpdateOptions{}); err != nil {
@@ -1423,6 +1445,23 @@ func (o *Operator) syncInstallPlans(obj interface{}) (syncError error) {
14231445
return
14241446
}
14251447

1448+
func (o *Operator) requeueSubscriptionForInstallPlan(plan *v1alpha1.InstallPlan, logger *logrus.Entry) {
1449+
// Notify subscription loop of installplan changes
1450+
owners := ownerutil.GetOwnersByKind(plan, v1alpha1.SubscriptionKind)
1451+
1452+
if len(owners) == 0 {
1453+
logger.Trace("no installplan owner subscriptions found to requeue")
1454+
return
1455+
}
1456+
1457+
for _, owner := range owners {
1458+
logger.WithField("owner", owner).Debug("requeueing installplan owner")
1459+
if err := o.subQueueSet.Requeue(plan.GetNamespace(), owner.Name); err != nil {
1460+
logger.WithError(err).Warn("error requeuing installplan owner")
1461+
}
1462+
}
1463+
}
1464+
14261465
type installPlanTransitioner interface {
14271466
ResolvePlan(*v1alpha1.InstallPlan) error
14281467
ExecutePlan(*v1alpha1.InstallPlan) error

pkg/controller/operators/catalog/operator_test.go

Lines changed: 89 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ import (
4141
apiregistrationv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1"
4242
apiregistrationfake "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/fake"
4343

44+
operatorsv1 "github.com/operator-framework/api/pkg/operators/v1"
4445
"github.com/operator-framework/api/pkg/operators/v1alpha1"
4546
"github.com/operator-framework/operator-lifecycle-manager/pkg/api/client/clientset/versioned/fake"
4647
"github.com/operator-framework/operator-lifecycle-manager/pkg/api/client/informers/externalversions"
@@ -142,38 +143,75 @@ func TestTransitionInstallPlan(t *testing.T) {
142143

143144
func TestSyncInstallPlanUnhappy(t *testing.T) {
144145
namespace := "ns"
146+
ipWithSteps := withSteps(installPlan("p", namespace, v1alpha1.InstallPlanPhaseInstalling, "csv"),
147+
[]*v1alpha1.Step{
148+
{
149+
Resource: v1alpha1.StepResource{
150+
CatalogSource: "catalog",
151+
CatalogSourceNamespace: namespace,
152+
Group: "",
153+
Version: "v1",
154+
Kind: "ServiceAccount",
155+
Name: "sa",
156+
Manifest: toManifest(t, serviceAccount("sa", namespace, "",
157+
objectReference("init secret"))),
158+
},
159+
Status: v1alpha1.StepStatusUnknown,
160+
},
161+
},
162+
)
145163

146164
tests := []struct {
147-
testName string
148-
in *v1alpha1.InstallPlan
149-
err error
165+
testName string
166+
err error
167+
in *v1alpha1.InstallPlan
168+
expectedPhase v1alpha1.InstallPlanPhase
169+
170+
clientObjs []runtime.Object
150171
}{
151172
{
152-
testName: "NoStatus",
153-
in: installPlan("p", namespace, v1alpha1.InstallPlanPhaseNone),
154-
err: nil,
173+
testName: "NoStatus",
174+
err: nil,
175+
expectedPhase: v1alpha1.InstallPlanPhaseNone,
176+
in: installPlan("p", namespace, v1alpha1.InstallPlanPhaseNone),
155177
},
156178
{
157-
// This checks that installplans are not applied when no operatorgroup is present
158-
testName: "HasSteps/NoOperatorGroup",
159-
in: withSteps(installPlan("p", namespace, v1alpha1.InstallPlanPhaseInstalling, "csv"),
160-
[]*v1alpha1.Step{
161-
{
162-
Resource: v1alpha1.StepResource{
163-
CatalogSource: "catalog",
164-
CatalogSourceNamespace: namespace,
165-
Group: "",
166-
Version: "v1",
167-
Kind: "ServiceAccount",
168-
Name: "sa",
169-
Manifest: toManifest(t, serviceAccount("sa", namespace, "",
170-
objectReference("init secret"))),
171-
},
172-
Status: v1alpha1.StepStatusUnknown,
173-
},
174-
},
175-
),
176-
err: fmt.Errorf("attenuated service account query failed - no operator group found that is managing this namespace"),
179+
// This checks that an installplan is marked as failed when no operatorgroup is present
180+
testName: "HasSteps/NoOperatorGroup",
181+
err: nil,
182+
expectedPhase: v1alpha1.InstallPlanPhaseFailed,
183+
in: ipWithSteps,
184+
},
185+
{
186+
// This checks that an installplan is marked as failed when multiple operator groups are present for the same namespace
187+
testName: "HasSteps/TooManyOperatorGroups",
188+
err: nil,
189+
expectedPhase: v1alpha1.InstallPlanPhaseFailed,
190+
in: ipWithSteps,
191+
clientObjs: []runtime.Object{
192+
operatorGroup("og1", "sa", namespace,
193+
&corev1.ObjectReference{
194+
Kind: "ServiceAccount",
195+
Namespace: namespace,
196+
Name: "sa",
197+
}),
198+
operatorGroup("og2", "sa", namespace,
199+
&corev1.ObjectReference{
200+
Kind: "ServiceAccount",
201+
Namespace: namespace,
202+
Name: "sa",
203+
}),
204+
},
205+
},
206+
{
207+
// This checks that an installplan is marked as failed when no service account is synced for the operator group, i.e the service account ref doesn't exist
208+
testName: "HasSteps/NonExistentServiceAccount",
209+
err: nil,
210+
expectedPhase: v1alpha1.InstallPlanPhaseFailed,
211+
in: ipWithSteps,
212+
clientObjs: []runtime.Object{
213+
operatorGroup("og", "sa1", namespace, nil),
214+
},
177215
},
178216
}
179217

@@ -182,11 +220,18 @@ func TestSyncInstallPlanUnhappy(t *testing.T) {
182220
ctx, cancel := context.WithCancel(context.TODO())
183221
defer cancel()
184222

185-
op, err := NewFakeOperator(ctx, namespace, []string{namespace}, withClientObjs(tt.in))
223+
tt.clientObjs = append(tt.clientObjs, tt.in)
224+
225+
op, err := NewFakeOperator(ctx, namespace, []string{namespace}, withClientObjs(tt.clientObjs...))
186226
require.NoError(t, err)
187227

188228
err = op.syncInstallPlans(tt.in)
189229
require.Equal(t, tt.err, err)
230+
231+
ip, err := op.client.OperatorsV1alpha1().InstallPlans(namespace).Get(ctx, tt.in.Name, metav1.GetOptions{})
232+
require.NoError(t, err)
233+
234+
require.Equal(t, tt.expectedPhase, ip.Status.Phase)
190235
})
191236
}
192237
}
@@ -1535,3 +1580,20 @@ func apiResourcesForObjects(objs []runtime.Object) []*metav1.APIResourceList {
15351580
}
15361581
return apis
15371582
}
1583+
1584+
func operatorGroup(ogName, saName, namespace string, saRef *corev1.ObjectReference) *operatorsv1.OperatorGroup {
1585+
return &operatorsv1.OperatorGroup{
1586+
ObjectMeta: metav1.ObjectMeta{
1587+
Name: ogName,
1588+
Namespace: namespace,
1589+
},
1590+
Spec: operatorsv1.OperatorGroupSpec{
1591+
TargetNamespaces: []string{namespace},
1592+
ServiceAccountName: saName,
1593+
},
1594+
Status: operatorsv1.OperatorGroupStatus{
1595+
Namespaces: []string{namespace},
1596+
ServiceAccountRef: saRef,
1597+
},
1598+
}
1599+
}

pkg/lib/scoped/error.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package scoped
2+
3+
// operatorGroupError is an error type returned by the service account querier when
4+
// there is an invalid operatorGroup (zero groups, multiple groups, non-existent service account)
5+
type operatorGroupError struct {
6+
s string
7+
}
8+
9+
func NewOperatorGroupError(s string) error {
10+
return operatorGroupError{s: s}
11+
}
12+
13+
func (e operatorGroupError) Error() string {
14+
return e.s
15+
}
16+
17+
func (e operatorGroupError) IsOperatorGroupError() bool {
18+
return true
19+
}
20+
21+
// IsOperatorGroupError checks if an error is an operator group error
22+
// This lets us classify multiple errors as operatorGroupError without
23+
// defining and checking all the specific error value types
24+
func IsOperatorGroupError(err error) bool {
25+
type operatorGroupError interface {
26+
IsOperatorGroupError() bool
27+
}
28+
ogErr, ok := err.(operatorGroupError)
29+
return ok && ogErr.IsOperatorGroupError()
30+
}

pkg/lib/scoped/querier.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ func queryServiceAccountFromNamespace(logger *logrus.Entry, crclient versioned.I
5555
}
5656

5757
if len(list.Items) == 0 {
58-
err = fmt.Errorf("no operator group found that is managing this namespace")
58+
err = NewOperatorGroupError("no operator group found that is managing this namespace")
5959
return
6060
}
6161

@@ -70,12 +70,12 @@ func queryServiceAccountFromNamespace(logger *logrus.Entry, crclient versioned.I
7070
}
7171

7272
if len(groups) == 0 {
73-
err = fmt.Errorf("no operator group found that is managing this namespace")
73+
err = NewOperatorGroupError("no operator group found that is managing this namespace")
7474
return
7575
}
7676

7777
if len(groups) > 1 {
78-
err = fmt.Errorf("more than one operator group(s) are managing this namespace count=%d", len(groups))
78+
err = NewOperatorGroupError(fmt.Sprintf("more than one operator group(s) are managing this namespace count=%d", len(groups)))
7979
return
8080
}
8181

@@ -86,7 +86,7 @@ func queryServiceAccountFromNamespace(logger *logrus.Entry, crclient versioned.I
8686
}
8787

8888
if !group.HasServiceAccountSynced() {
89-
err = fmt.Errorf("please make sure the service account exists. sa=%s operatorgroup=%s/%s", group.Spec.ServiceAccountName, group.GetNamespace(), group.GetName())
89+
err = NewOperatorGroupError(fmt.Sprintf("please make sure the service account exists. sa=%s operatorgroup=%s/%s", group.Spec.ServiceAccountName, group.GetNamespace(), group.GetName()))
9090
return
9191
}
9292

pkg/lib/scoped/querier_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ func TestUserDefinedServiceAccountQuerier(t *testing.T) {
149149
}
150150
gotReference, err := f.NamespaceQuerier(tt.namespace)()
151151
if tt.wantErr {
152-
require.Equal(t, tt.err, err)
152+
require.Equal(t, tt.err.Error(), err.Error())
153153
} else {
154154
require.Nil(t, tt.err)
155155
}

0 commit comments

Comments
 (0)