Skip to content

Indicate invalid OperatorGroup on InstallPlan status #2077

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions doc/design/architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,10 @@ The OLM Operator will then pickup the installation and carry it through to compl
### InstallPlan Control Loop

```
None --> Planning +------>------->------> Installing --> Complete
| ^
v |
+--> RequiresApproval --+
None --> Planning +------>------->------> Installing +---> Complete
| ^ |
v | v
+--> RequiresApproval --+ Failed
```

| Phase | Description |
Expand All @@ -106,6 +106,7 @@ None --> Planning +------>------->------> Installing --> Complete
| RequiresApproval | occurs when using manual approval, will not transition phase until `approved` field is true |
| Installing | resolved resources in the InstallPlan `Status` block are being created |
| Complete | all resolved resources in the `Status` block exist |
| Failed | occurs when resources fail to install or there is an invalid OperatorGroup |

### Subscription Control Loop

Expand Down
67 changes: 53 additions & 14 deletions pkg/controller/operators/catalog/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -1331,10 +1331,44 @@ func (o *Operator) syncInstallPlans(obj interface{}) (syncError error) {
return
}

// Complete and Failed are terminal phases
if plan.Status.Phase == v1alpha1.InstallPlanPhaseFailed || plan.Status.Phase == v1alpha1.InstallPlanPhaseComplete {
return
}

querier := o.serviceAccountQuerier.NamespaceQuerier(plan.GetNamespace())
ref, err := querier()
if err != nil {
syncError = fmt.Errorf("attenuated service account query failed - %v", err)

// Retry sync if non-fatal error
if !scoped.IsOperatorGroupError(err) {
syncError = fmt.Errorf("attenuated service account query failed - %v", err)
return
}

// Mark the InstallPlan as failed for a fatal Operator Group related error
logger.Infof("attenuated service account query failed - %v", err)
ipFailError := fmt.Errorf("invalid operator group - %v", err)
now := o.now()
out := plan.DeepCopy()
out.Status.SetCondition(v1alpha1.ConditionFailed(v1alpha1.InstallPlanInstalled,
v1alpha1.InstallPlanReasonInstallCheckFailed, ipFailError.Error(), &now))
out.Status.Phase = v1alpha1.InstallPlanPhaseFailed

logger.Info("transitioning InstallPlan to failed")
if _, err := o.client.OperatorsV1alpha1().InstallPlans(plan.GetNamespace()).UpdateStatus(context.TODO(), out, metav1.UpdateOptions{}); err != nil {
updateErr := errors.New("error updating InstallPlan status: " + err.Error())
logger = logger.WithField("updateError", updateErr)
logger.Errorf("error transitioning InstallPlan to failed")

// retry sync with error to update InstallPlan status
syncError = fmt.Errorf("InstallPlan failed: %s and error updating InstallPlan status as failed: %s", ipFailError, updateErr)
return
}

// Requeue subscription to propagate SubscriptionInstallPlanFailed condtion to subscription
o.requeueSubscriptionForInstallPlan(plan, logger)

return
}

Expand Down Expand Up @@ -1394,19 +1428,7 @@ func (o *Operator) syncInstallPlans(obj interface{}) (syncError error) {
defer o.ipQueueSet.RequeueAfter(outInstallPlan.GetNamespace(), outInstallPlan.GetName(), time.Second*5)
}

defer func() {
// Notify subscription loop of installplan changes
if owners := ownerutil.GetOwnersByKind(plan, v1alpha1.SubscriptionKind); len(owners) > 0 {
for _, owner := range owners {
logger.WithField("owner", owner).Debug("requeueing installplan owner")
if err := o.subQueueSet.Requeue(plan.GetNamespace(), owner.Name); err != nil {
logger.WithError(err).Warn("error requeuing installplan owner")
}
}
return
}
logger.Trace("no installplan owner subscriptions found to requeue")
}()
defer o.requeueSubscriptionForInstallPlan(plan, logger)

// Update InstallPlan with status of transition. Log errors if we can't write them to the status.
if _, err := o.client.OperatorsV1alpha1().InstallPlans(plan.GetNamespace()).UpdateStatus(context.TODO(), outInstallPlan, metav1.UpdateOptions{}); err != nil {
Expand All @@ -1423,6 +1445,23 @@ func (o *Operator) syncInstallPlans(obj interface{}) (syncError error) {
return
}

func (o *Operator) requeueSubscriptionForInstallPlan(plan *v1alpha1.InstallPlan, logger *logrus.Entry) {
// Notify subscription loop of installplan changes
owners := ownerutil.GetOwnersByKind(plan, v1alpha1.SubscriptionKind)

if len(owners) == 0 {
logger.Trace("no installplan owner subscriptions found to requeue")
return
}

for _, owner := range owners {
logger.WithField("owner", owner).Debug("requeueing installplan owner")
if err := o.subQueueSet.Requeue(plan.GetNamespace(), owner.Name); err != nil {
logger.WithError(err).Warn("error requeuing installplan owner")
}
}
}

type installPlanTransitioner interface {
ResolvePlan(*v1alpha1.InstallPlan) error
ExecutePlan(*v1alpha1.InstallPlan) error
Expand Down
116 changes: 89 additions & 27 deletions pkg/controller/operators/catalog/operator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
apiregistrationv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1"
apiregistrationfake "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/fake"

operatorsv1 "github.com/operator-framework/api/pkg/operators/v1"
"github.com/operator-framework/api/pkg/operators/v1alpha1"
"github.com/operator-framework/operator-lifecycle-manager/pkg/api/client/clientset/versioned/fake"
"github.com/operator-framework/operator-lifecycle-manager/pkg/api/client/informers/externalversions"
Expand Down Expand Up @@ -142,38 +143,75 @@ func TestTransitionInstallPlan(t *testing.T) {

func TestSyncInstallPlanUnhappy(t *testing.T) {
namespace := "ns"
ipWithSteps := withSteps(installPlan("p", namespace, v1alpha1.InstallPlanPhaseInstalling, "csv"),
[]*v1alpha1.Step{
{
Resource: v1alpha1.StepResource{
CatalogSource: "catalog",
CatalogSourceNamespace: namespace,
Group: "",
Version: "v1",
Kind: "ServiceAccount",
Name: "sa",
Manifest: toManifest(t, serviceAccount("sa", namespace, "",
objectReference("init secret"))),
},
Status: v1alpha1.StepStatusUnknown,
},
},
)

tests := []struct {
testName string
in *v1alpha1.InstallPlan
err error
testName string
err error
in *v1alpha1.InstallPlan
expectedPhase v1alpha1.InstallPlanPhase

clientObjs []runtime.Object
}{
{
testName: "NoStatus",
in: installPlan("p", namespace, v1alpha1.InstallPlanPhaseNone),
err: nil,
testName: "NoStatus",
err: nil,
expectedPhase: v1alpha1.InstallPlanPhaseNone,
in: installPlan("p", namespace, v1alpha1.InstallPlanPhaseNone),
},
{
// This checks that installplans are not applied when no operatorgroup is present
testName: "HasSteps/NoOperatorGroup",
in: withSteps(installPlan("p", namespace, v1alpha1.InstallPlanPhaseInstalling, "csv"),
[]*v1alpha1.Step{
{
Resource: v1alpha1.StepResource{
CatalogSource: "catalog",
CatalogSourceNamespace: namespace,
Group: "",
Version: "v1",
Kind: "ServiceAccount",
Name: "sa",
Manifest: toManifest(t, serviceAccount("sa", namespace, "",
objectReference("init secret"))),
},
Status: v1alpha1.StepStatusUnknown,
},
},
),
err: fmt.Errorf("attenuated service account query failed - no operator group found that is managing this namespace"),
// This checks that an installplan is marked as failed when no operatorgroup is present
testName: "HasSteps/NoOperatorGroup",
err: nil,
expectedPhase: v1alpha1.InstallPlanPhaseFailed,
in: ipWithSteps,
},
{
// This checks that an installplan is marked as failed when multiple operator groups are present for the same namespace
testName: "HasSteps/TooManyOperatorGroups",
err: nil,
expectedPhase: v1alpha1.InstallPlanPhaseFailed,
in: ipWithSteps,
clientObjs: []runtime.Object{
operatorGroup("og1", "sa", namespace,
&corev1.ObjectReference{
Kind: "ServiceAccount",
Namespace: namespace,
Name: "sa",
}),
operatorGroup("og2", "sa", namespace,
&corev1.ObjectReference{
Kind: "ServiceAccount",
Namespace: namespace,
Name: "sa",
}),
},
},
{
// This checks that an installplan is marked as failed when no service account is synced for the operator group, i.e the service account ref doesn't exist
testName: "HasSteps/NonExistentServiceAccount",
err: nil,
expectedPhase: v1alpha1.InstallPlanPhaseFailed,
in: ipWithSteps,
clientObjs: []runtime.Object{
operatorGroup("og", "sa1", namespace, nil),
},
},
}

Expand All @@ -182,11 +220,18 @@ func TestSyncInstallPlanUnhappy(t *testing.T) {
ctx, cancel := context.WithCancel(context.TODO())
defer cancel()

op, err := NewFakeOperator(ctx, namespace, []string{namespace}, withClientObjs(tt.in))
tt.clientObjs = append(tt.clientObjs, tt.in)

op, err := NewFakeOperator(ctx, namespace, []string{namespace}, withClientObjs(tt.clientObjs...))
require.NoError(t, err)

err = op.syncInstallPlans(tt.in)
require.Equal(t, tt.err, err)

ip, err := op.client.OperatorsV1alpha1().InstallPlans(namespace).Get(ctx, tt.in.Name, metav1.GetOptions{})
require.NoError(t, err)

require.Equal(t, tt.expectedPhase, ip.Status.Phase)
})
}
}
Expand Down Expand Up @@ -1535,3 +1580,20 @@ func apiResourcesForObjects(objs []runtime.Object) []*metav1.APIResourceList {
}
return apis
}

func operatorGroup(ogName, saName, namespace string, saRef *corev1.ObjectReference) *operatorsv1.OperatorGroup {
return &operatorsv1.OperatorGroup{
ObjectMeta: metav1.ObjectMeta{
Name: ogName,
Namespace: namespace,
},
Spec: operatorsv1.OperatorGroupSpec{
TargetNamespaces: []string{namespace},
ServiceAccountName: saName,
},
Status: operatorsv1.OperatorGroupStatus{
Namespaces: []string{namespace},
ServiceAccountRef: saRef,
},
}
}
30 changes: 30 additions & 0 deletions pkg/lib/scoped/error.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package scoped

// operatorGroupError is an error type returned by the service account querier when
// there is an invalid operatorGroup (zero groups, multiple groups, non-existent service account)
type operatorGroupError struct {
s string
}

func NewOperatorGroupError(s string) error {
return operatorGroupError{s: s}
}

func (e operatorGroupError) Error() string {
return e.s
}

func (e operatorGroupError) IsOperatorGroupError() bool {
return true
}

// IsOperatorGroupError checks if an error is an operator group error
// This lets us classify multiple errors as operatorGroupError without
// defining and checking all the specific error value types
func IsOperatorGroupError(err error) bool {
type operatorGroupError interface {
IsOperatorGroupError() bool
}
ogErr, ok := err.(operatorGroupError)
return ok && ogErr.IsOperatorGroupError()
}
8 changes: 4 additions & 4 deletions pkg/lib/scoped/querier.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func queryServiceAccountFromNamespace(logger *logrus.Entry, crclient versioned.I
}

if len(list.Items) == 0 {
err = fmt.Errorf("no operator group found that is managing this namespace")
err = NewOperatorGroupError("no operator group found that is managing this namespace")
return
}

Expand All @@ -70,12 +70,12 @@ func queryServiceAccountFromNamespace(logger *logrus.Entry, crclient versioned.I
}

if len(groups) == 0 {
err = fmt.Errorf("no operator group found that is managing this namespace")
err = NewOperatorGroupError("no operator group found that is managing this namespace")
return
}

if len(groups) > 1 {
err = fmt.Errorf("more than one operator group(s) are managing this namespace count=%d", len(groups))
err = NewOperatorGroupError(fmt.Sprintf("more than one operator group(s) are managing this namespace count=%d", len(groups)))
return
}

Expand All @@ -86,7 +86,7 @@ func queryServiceAccountFromNamespace(logger *logrus.Entry, crclient versioned.I
}

if !group.HasServiceAccountSynced() {
err = fmt.Errorf("please make sure the service account exists. sa=%s operatorgroup=%s/%s", group.Spec.ServiceAccountName, group.GetNamespace(), group.GetName())
err = NewOperatorGroupError(fmt.Sprintf("please make sure the service account exists. sa=%s operatorgroup=%s/%s", group.Spec.ServiceAccountName, group.GetNamespace(), group.GetName()))
return
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/lib/scoped/querier_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ func TestUserDefinedServiceAccountQuerier(t *testing.T) {
}
gotReference, err := f.NamespaceQuerier(tt.namespace)()
if tt.wantErr {
require.Equal(t, tt.err, err)
require.Equal(t, tt.err.Error(), err.Error())
} else {
require.Nil(t, tt.err)
}
Expand Down
Loading