Skip to content

Commit a130f2c

Browse files
wkingopenshift-cherrypick-robot
authored andcommitted
pkg/operator/sync: Track lastError in waitForDeploymentRollout
Because otherwise stuck deployments will result in the not-very-useful "timed out waiting for the condition" errors like [1]: Oct 17 18:41:52.205 E clusteroperator/machine-api changed Degraded to True: SyncingFailed: Failed when progressing towards operator: 4.3.0-0.ci-2019-10-17-173803 because timed out waiting for the condition Also use %s instead of %q for formatting the deployment name, because we control the names being monitored and they don't contain whitespace or other potentially-confusing characters. [1]: https://prow.svc.ci.openshift.org/view/gcs/origin-ci-test/logs/release-openshift-origin-installer-e2e-aws-upgrade/8809
1 parent 545465b commit a130f2c

File tree

1 file changed

+13
-4
lines changed

1 file changed

+13
-4
lines changed

pkg/operator/sync.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,26 +90,35 @@ func (optr *Operator) syncClusterAPIController(config OperatorConfig) error {
9090
}
9191

9292
func (optr *Operator) waitForDeploymentRollout(resource *appsv1.Deployment) error {
93-
return wait.Poll(deploymentRolloutPollInterval, deploymentRolloutTimeout, func() (bool, error) {
93+
var lastError error
94+
err := wait.Poll(deploymentRolloutPollInterval, deploymentRolloutTimeout, func() (bool, error) {
9495
d, err := optr.deployLister.Deployments(resource.Namespace).Get(resource.Name)
9596
if apierrors.IsNotFound(err) {
9697
return false, nil
9798
}
9899
if err != nil {
99100
// Do not return error here, as we could be updating the API Server itself, in which case we
100101
// want to continue waiting.
101-
glog.Errorf("Error getting Deployment %q during rollout: %v", resource.Name, err)
102+
lastError = fmt.Errorf("getting Deployment %s during rollout: %v", resource.Name, err)
103+
glog.Error(lastError)
102104
return false, nil
103105
}
104106

105107
if d.DeletionTimestamp != nil {
106-
return false, fmt.Errorf("deployment %q is being deleted", resource.Name)
108+
lastError = nil
109+
return false, fmt.Errorf("deployment %s is being deleted", resource.Name)
107110
}
108111

109112
if d.Generation <= d.Status.ObservedGeneration && d.Status.UpdatedReplicas == d.Status.Replicas && d.Status.UnavailableReplicas == 0 {
113+
lastError = nil
110114
return true, nil
111115
}
112-
glog.V(4).Infof("Deployment %q is not ready. status: (replicas: %d, updated: %d, ready: %d, unavailable: %d)", d.Name, d.Status.Replicas, d.Status.UpdatedReplicas, d.Status.ReadyReplicas, d.Status.UnavailableReplicas)
116+
lastError = fmt.Errorf("deployment %s is not ready. status: (replicas: %d, updated: %d, ready: %d, unavailable: %d)", d.Name, d.Status.Replicas, d.Status.UpdatedReplicas, d.Status.ReadyReplicas, d.Status.UnavailableReplicas)
117+
glog.V(4).Info(lastError)
113118
return false, nil
114119
})
120+
if lastError != nil {
121+
return lastError
122+
}
123+
return err
115124
}

0 commit comments

Comments
 (0)