Skip to content

Commit 8d44b86

Browse files
committed
pkg/operator/sync: Track lastError in waitForDeploymentRollout
Because otherwise stuck deployments will result in the not-very-useful "timed out waiting for the condition" errors like [1]: Oct 17 18:41:52.205 E clusteroperator/machine-api changed Degraded to True: SyncingFailed: Failed when progressing towards operator: 4.3.0-0.ci-2019-10-17-173803 because timed out waiting for the condition Also use %s instead of %q for formatting the deployment name, because we control the names being monitored and they don't contain whitespace or other potentially-confusing characters. [1]: https://prow.svc.ci.openshift.org/view/gcs/origin-ci-test/logs/release-openshift-origin-installer-e2e-aws-upgrade/8809
1 parent 2426fa7 commit 8d44b86

File tree

1 file changed

+12
-4
lines changed

1 file changed

+12
-4
lines changed

pkg/operator/sync.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,28 +113,36 @@ func (optr *Operator) syncBaremetalControllers(config *OperatorConfig) error {
113113
}
114114

115115
func (optr *Operator) waitForDeploymentRollout(resource *appsv1.Deployment) error {
116-
return wait.Poll(deploymentRolloutPollInterval, deploymentRolloutTimeout, func() (bool, error) {
116+
var lastError error
117+
err := wait.Poll(deploymentRolloutPollInterval, deploymentRolloutTimeout, func() (bool, error) {
117118
d, err := optr.deployLister.Deployments(resource.Namespace).Get(resource.Name)
118119
if apierrors.IsNotFound(err) {
119120
return false, nil
120121
}
121122
if err != nil {
122123
// Do not return error here, as we could be updating the API Server itself, in which case we
123124
// want to continue waiting.
124-
glog.Errorf("Error getting Deployment %q during rollout: %v", resource.Name, err)
125+
lastError = fmt.Errorf("getting Deployment %s during rollout: %v", resource.Name, err)
126+
glog.Errorf("Error %s", lastError)
125127
return false, nil
126128
}
127129

128130
if d.DeletionTimestamp != nil {
129-
return false, fmt.Errorf("deployment %q is being deleted", resource.Name)
131+
lastError = nil
132+
return false, fmt.Errorf("deployment %s is being deleted", resource.Name)
130133
}
131134

132135
if d.Generation <= d.Status.ObservedGeneration && d.Status.UpdatedReplicas == d.Status.Replicas && d.Status.UnavailableReplicas == 0 {
133136
return true, nil
134137
}
135-
glog.V(4).Infof("Deployment %q is not ready. status: (replicas: %d, updated: %d, ready: %d, unavailable: %d)", d.Name, d.Status.Replicas, d.Status.UpdatedReplicas, d.Status.ReadyReplicas, d.Status.UnavailableReplicas)
138+
lastError = fmt.Errorf("deployment %s is not ready. status: (replicas: %d, updated: %d, ready: %d, unavailable: %d)", d.Name, d.Status.Replicas, d.Status.UpdatedReplicas, d.Status.ReadyReplicas, d.Status.UnavailableReplicas)
139+
glog.V(4).Info(lastError)
136140
return false, nil
137141
})
142+
if lastError != nil {
143+
return lastError
144+
}
145+
return err
138146
}
139147

140148
func newDeployment(config *OperatorConfig, features map[string]bool) *appsv1.Deployment {

0 commit comments

Comments
 (0)