Skip to content

Commit c8e2189

Browse files
Merge pull request #2024 from joelanford/fix/upgrade-alert
Bug 1932626: Gracefully handle service unavailable errors from kube-apiserver
2 parents 8ccfdb9 + 41e5126 commit c8e2189

File tree

3 files changed

+42
-12
lines changed

3 files changed

+42
-12
lines changed

pkg/controller/operators/olm/apiservices.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,8 +239,8 @@ func (a *Operator) areAPIServicesAvailable(csv *v1alpha1.ClusterServiceVersion)
239239
return false, nil
240240
}
241241

242-
if err := a.isGVKRegistered(desc.Group, desc.Version, desc.Kind); err != nil {
243-
return false, nil
242+
if ok, err := a.isGVKRegistered(desc.Group, desc.Version, desc.Kind); !ok || err != nil {
243+
return false, err
244244
}
245245
}
246246

pkg/controller/operators/olm/operator.go

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1565,15 +1565,21 @@ func (a *Operator) transitionCSVState(in v1alpha1.ClusterServiceVersion) (out *v
15651565
out.SetPhaseWithEvent(v1alpha1.CSVPhasePending, v1alpha1.CSVReasonNeedsReinstall, "calculated deployment install is bad", now, a.recorder)
15661566
return
15671567
}
1568-
if installErr := a.updateInstallStatus(out, installer, strategy, v1alpha1.CSVPhaseInstalling, v1alpha1.CSVReasonWaiting); installErr == nil {
1569-
logger.WithField("strategy", out.Spec.InstallStrategy.StrategyName).Infof("install strategy successful")
1570-
} else {
1568+
if installErr := a.updateInstallStatus(out, installer, strategy, v1alpha1.CSVPhaseInstalling, v1alpha1.CSVReasonWaiting); installErr != nil {
1569+
// Re-sync if kube-apiserver was unavailable
1570+
if k8serrors.IsServiceUnavailable(installErr) {
1571+
logger.WithError(installErr).Info("could not update install status")
1572+
syncError = installErr
1573+
return
1574+
}
15711575
// Set phase to failed if it's been a long time since the last transition (5 minutes)
15721576
if out.Status.LastTransitionTime != nil && a.now().Sub(out.Status.LastTransitionTime.Time) >= 5*time.Minute {
15731577
logger.Warn("install timed out")
15741578
out.SetPhaseWithEvent(v1alpha1.CSVPhaseFailed, v1alpha1.CSVReasonInstallCheckFailed, fmt.Sprintf("install timeout"), now, a.recorder)
1579+
return
15751580
}
15761581
}
1582+
logger.WithField("strategy", out.Spec.InstallStrategy.StrategyName).Infof("install strategy successful")
15771583

15781584
case v1alpha1.CSVPhaseSucceeded:
15791585
// Check if the current CSV is being replaced, return with replacing status if so
@@ -1622,6 +1628,12 @@ func (a *Operator) transitionCSVState(in v1alpha1.ClusterServiceVersion) (out *v
16221628
return
16231629
}
16241630
if installErr := a.updateInstallStatus(out, installer, strategy, v1alpha1.CSVPhaseFailed, v1alpha1.CSVReasonComponentUnhealthy); installErr != nil {
1631+
// Re-sync if kube-apiserver was unavailable
1632+
if k8serrors.IsServiceUnavailable(installErr) {
1633+
logger.WithError(installErr).Info("could not update install status")
1634+
syncError = installErr
1635+
return
1636+
}
16251637
logger.WithField("strategy", out.Spec.InstallStrategy.StrategyName).Warnf("unhealthy component: %s", installErr)
16261638
return
16271639
}
@@ -1704,6 +1716,12 @@ func (a *Operator) transitionCSVState(in v1alpha1.ClusterServiceVersion) (out *v
17041716
return
17051717
}
17061718
if installErr := a.updateInstallStatus(out, installer, strategy, v1alpha1.CSVPhasePending, v1alpha1.CSVReasonNeedsReinstall); installErr != nil {
1719+
// Re-sync if kube-apiserver was unavailable
1720+
if k8serrors.IsServiceUnavailable(installErr) {
1721+
logger.WithError(installErr).Info("could not update install status")
1722+
syncError = installErr
1723+
return
1724+
}
17071725
logger.WithField("strategy", out.Spec.InstallStrategy.StrategyName).Warnf("needs reinstall: %s", installErr)
17081726
}
17091727

@@ -1782,6 +1800,10 @@ func (a *Operator) updateInstallStatus(csv *v1alpha1.ClusterServiceVersion, inst
17821800
return nil
17831801
}
17841802

1803+
if err := findFirstError(k8serrors.IsServiceUnavailable, strategyErr, apiServiceErr, webhookErr); err != nil {
1804+
return err
1805+
}
1806+
17851807
// installcheck determined we can't progress (e.g. deployment failed to come up in time)
17861808
if install.IsErrorUnrecoverable(strategyErr) {
17871809
csv.SetPhaseWithEventIfChanged(v1alpha1.CSVPhaseFailed, v1alpha1.CSVReasonInstallCheckFailed, fmt.Sprintf("install failed: %s", strategyErr), now, a.recorder)
@@ -1829,6 +1851,15 @@ func (a *Operator) updateInstallStatus(csv *v1alpha1.ClusterServiceVersion, inst
18291851
return nil
18301852
}
18311853

1854+
func findFirstError(f func(error) bool, errs ...error) error {
1855+
for _, err := range errs {
1856+
if f(err) {
1857+
return err
1858+
}
1859+
}
1860+
return nil
1861+
}
1862+
18321863
// parseStrategiesAndUpdateStatus returns a StrategyInstaller and a Strategy for a CSV if it can, else it sets a status on the CSV and returns
18331864
func (a *Operator) parseStrategiesAndUpdateStatus(csv *v1alpha1.ClusterServiceVersion) (install.StrategyInstaller, install.Strategy) {
18341865
strategy, err := a.resolver.UnmarshalStrategy(csv.Spec.InstallStrategy)

pkg/controller/operators/olm/requirements.go

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1212

1313
"github.com/operator-framework/api/pkg/operators/v1alpha1"
14-
olmErrors "github.com/operator-framework/operator-lifecycle-manager/pkg/controller/errors"
1514
"github.com/operator-framework/operator-lifecycle-manager/pkg/controller/install"
1615
"github.com/operator-framework/operator-lifecycle-manager/pkg/lib/ownerutil"
1716
)
@@ -156,7 +155,7 @@ func (a *Operator) requirementStatus(strategyDetailsDeployment *v1alpha1.Strateg
156155
}
157156

158157
// Check if GVK exists
159-
if err := a.isGVKRegistered(r.Group, r.Version, r.Kind); err != nil {
158+
if ok, err := a.isGVKRegistered(r.Group, r.Version, r.Kind); !ok || err != nil {
160159
status.Status = "NotPresent"
161160
met = false
162161
statuses = append(statuses, status)
@@ -219,7 +218,7 @@ func (a *Operator) requirementStatus(strategyDetailsDeployment *v1alpha1.Strateg
219218
Name: name,
220219
}
221220

222-
if err := a.isGVKRegistered(r.Group, r.Version, r.Kind); err != nil {
221+
if ok, err := a.isGVKRegistered(r.Group, r.Version, r.Kind); !ok || err != nil {
223222
status.Status = v1alpha1.RequirementStatusReasonNotPresent
224223
status.Message = "Native API does not exist"
225224
met = false
@@ -388,7 +387,7 @@ func (a *Operator) requirementAndPermissionStatus(csv *v1alpha1.ClusterServiceVe
388387
return met, statuses, nil
389388
}
390389

391-
func (a *Operator) isGVKRegistered(group, version, kind string) error {
390+
func (a *Operator) isGVKRegistered(group, version, kind string) (bool, error) {
392391
logger := a.logger.WithFields(logrus.Fields{
393392
"group": group,
394393
"version": version,
@@ -399,15 +398,15 @@ func (a *Operator) isGVKRegistered(group, version, kind string) error {
399398
resources, err := a.opClient.KubernetesInterface().Discovery().ServerResourcesForGroupVersion(gv.String())
400399
if err != nil {
401400
logger.WithField("err", err).Info("could not query for GVK in api discovery")
402-
return err
401+
return false, err
403402
}
404403

405404
for _, r := range resources.APIResources {
406405
if r.Kind == kind {
407-
return nil
406+
return true, nil
408407
}
409408
}
410409

411410
logger.Info("couldn't find GVK in api discovery")
412-
return olmErrors.GroupVersionKindNotFoundError{group, version, kind}
411+
return false, nil
413412
}

0 commit comments

Comments
 (0)