Skip to content

Commit f9aaf50

Browse files
Merge pull request #155 from njhale/4.8/fix/invalid-max
Bug 1989711: fix(openshift): block upgrades on invalid max properties (#2302)
2 parents e2cb8ed + 944354f commit f9aaf50

File tree

6 files changed

+262
-201
lines changed

6 files changed

+262
-201
lines changed

staging/operator-lifecycle-manager/pkg/controller/operators/openshift/clusteroperator_controller.go

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,8 @@ func (r *ClusterOperatorReconciler) setDegraded(_ context.Context, co *ClusterOp
209209
}
210210

211211
const (
212-
IncompatibleOperatorsInstalled = "IncompatibleOperatorsInstalled"
212+
IncompatibleOperatorsInstalled = "IncompatibleOperatorsInstalled"
213+
ErrorCheckingOperatorCompatibility = "ErrorCheckingOperatorCompatibility"
213214
)
214215

215216
func (r *ClusterOperatorReconciler) setUpgradeable(ctx context.Context, co *ClusterOperator) error {
@@ -221,31 +222,39 @@ func (r *ClusterOperatorReconciler) setUpgradeable(ctx context.Context, co *Clus
221222

222223
// Set upgradeable=false if (either/or):
223224
// 1. OLM currently upgrading (takes priorty in the status message)
224-
// 2. Operators currently installed that are incompatible with the next OCP minor version
225+
// 2. Operators currently installed that are incompatible with the next minor version of OpenShift
226+
// 3. An error occurs while determining 2
227+
var err error
225228
if r.syncTracker.SuccessfulSyncs() < 1 || !versionsMatch(co.Status.Versions, r.TargetVersions) {
226229
// OLM is still upgrading
227230
desired.Status = configv1.ConditionFalse
228231
desired.Message = "Waiting for updates to take effect"
229232
} else {
230-
incompatible, err := incompatibleOperators(ctx, r.Client)
233+
var incompatible skews
234+
incompatible, err = incompatibleOperators(ctx, r.Client)
231235
if err != nil {
232-
return err
233-
}
234-
235-
if len(incompatible) > 0 {
236-
// Some incompatible operator is installed
236+
// "Fail closed" when we can't determine compatibility
237+
// Note: Unspecified compatibility = universal compatibility; i.e. operators that don't specify a "maxOpenShiftVersion" property are compatible with everything.
238+
desired.Status = configv1.ConditionFalse
239+
desired.Reason = ErrorCheckingOperatorCompatibility
240+
desired.Message = fmt.Sprintf("Encountered errors while checking compatibility with the next minor version of OpenShift: %s", err)
241+
} else if len(incompatible) > 0 {
242+
// Operators are installed that have incompatible and/or invalid max versions
237243
desired.Status = configv1.ConditionFalse
238244
desired.Reason = IncompatibleOperatorsInstalled
239-
desired.Message = incompatible.String() // TODO: Truncate message to field length
245+
desired.Message = incompatible.String()
240246
}
241247
}
242248

249+
// Only return transient errors likely resolved by retrying immediately
250+
err = transientErrors(err)
251+
243252
current := co.GetCondition(configv1.OperatorUpgradeable)
244253
if conditionsEqual(current, desired) { // Comparison ignores lastUpdated
245-
return nil
254+
return err
246255
}
247256

248257
co.SetCondition(desired)
249258

250-
return nil
259+
return err
251260
}

staging/operator-lifecycle-manager/pkg/controller/operators/openshift/clusteroperator_controller_test.go

Lines changed: 85 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,35 @@ var _ = Describe("ClusterOperator controller", func() {
142142
LastTransitionTime: fixedNow(),
143143
}))
144144

145-
By("setting upgradeable=false when incompatible operators exist")
145+
By("setting upgradeable=false when there's an error determining compatibility")
146+
cv.Status = configv1.ClusterVersionStatus{}
147+
148+
Eventually(func() error {
149+
return k8sClient.Status().Update(ctx, cv)
150+
}).Should(Succeed())
151+
152+
Eventually(func() ([]configv1.ClusterOperatorStatusCondition, error) {
153+
err := k8sClient.Get(ctx, clusterOperatorName, co)
154+
return co.Status.Conditions, err
155+
}, timeout).Should(ContainElement(configv1.ClusterOperatorStatusCondition{
156+
Type: configv1.OperatorUpgradeable,
157+
Status: configv1.ConditionFalse,
158+
Reason: ErrorCheckingOperatorCompatibility,
159+
Message: "Encountered errors while checking compatibility with the next minor version of OpenShift: Desired release version missing from ClusterVersion",
160+
LastTransitionTime: fixedNow(),
161+
}))
162+
163+
cv.Status = configv1.ClusterVersionStatus{
164+
Desired: configv1.Update{
165+
Version: clusterVersion,
166+
},
167+
}
168+
169+
Eventually(func() error {
170+
return k8sClient.Status().Update(ctx, cv)
171+
}).Should(Succeed())
146172

173+
By("setting upgradeable=false when incompatible operators exist")
147174
ns := &corev1.Namespace{}
148175
ns.SetName("nostromo")
149176

@@ -160,12 +187,15 @@ var _ = Describe("ClusterOperator controller", func() {
160187
incompatible.SetName("xenomorph")
161188
incompatible.SetNamespace(ns.GetName())
162189

163-
withMax := func(version string) map[string]string {
164-
maxProperty := &api.Property{
165-
Type: MaxOpenShiftVersionProperty,
166-
Value: version,
190+
withMax := func(versions ...string) map[string]string {
191+
var properties []*api.Property
192+
for _, v := range versions {
193+
properties = append(properties, &api.Property{
194+
Type: MaxOpenShiftVersionProperty,
195+
Value: v,
196+
})
167197
}
168-
value, err := projection.PropertiesAnnotationFromPropertyList([]*api.Property{maxProperty})
198+
value, err := projection.PropertiesAnnotationFromPropertyList(properties)
169199
Expect(err).ToNot(HaveOccurred())
170200

171201
return map[string]string{
@@ -245,5 +275,54 @@ var _ = Describe("ClusterOperator controller", func() {
245275
}.String(),
246276
LastTransitionTime: fixedNow(),
247277
}))
278+
279+
By("setting upgradeable=false when invalid max versions are found")
280+
incompatible.SetAnnotations(withMax(`"garbage"`))
281+
282+
Eventually(func() error {
283+
return k8sClient.Update(ctx, incompatible)
284+
}).Should(Succeed())
285+
286+
_, parseErr := semver.ParseTolerant("garbage")
287+
Eventually(func() ([]configv1.ClusterOperatorStatusCondition, error) {
288+
err := k8sClient.Get(ctx, clusterOperatorName, co)
289+
return co.Status.Conditions, err
290+
}, timeout).Should(ContainElement(configv1.ClusterOperatorStatusCondition{
291+
Type: configv1.OperatorUpgradeable,
292+
Status: configv1.ConditionFalse,
293+
Reason: IncompatibleOperatorsInstalled,
294+
Message: skews{
295+
{
296+
namespace: ns.GetName(),
297+
name: incompatible.GetName(),
298+
err: fmt.Errorf(`Failed to parse "garbage" as semver: %w`, parseErr),
299+
},
300+
}.String(),
301+
LastTransitionTime: fixedNow(),
302+
}))
303+
304+
By("setting upgradeable=false when more than one max version property is defined")
305+
incompatible.SetAnnotations(withMax(fmt.Sprintf(`"%s"`, clusterVersion), fmt.Sprintf(`"%s"`, next.String())))
306+
307+
Eventually(func() error {
308+
return k8sClient.Update(ctx, incompatible)
309+
}).Should(Succeed())
310+
311+
Eventually(func() ([]configv1.ClusterOperatorStatusCondition, error) {
312+
err := k8sClient.Get(ctx, clusterOperatorName, co)
313+
return co.Status.Conditions, err
314+
}, timeout).Should(ContainElement(configv1.ClusterOperatorStatusCondition{
315+
Type: configv1.OperatorUpgradeable,
316+
Status: configv1.ConditionFalse,
317+
Reason: IncompatibleOperatorsInstalled,
318+
Message: skews{
319+
{
320+
namespace: ns.GetName(),
321+
name: incompatible.GetName(),
322+
err: fmt.Errorf(`Defining more than one "%s" property is not allowed`, MaxOpenShiftVersionProperty),
323+
},
324+
}.String(),
325+
LastTransitionTime: fixedNow(),
326+
}))
248327
})
249328
})

staging/operator-lifecycle-manager/pkg/controller/operators/openshift/helpers.go

Lines changed: 63 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package openshift
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
67
"strings"
78

@@ -80,22 +81,47 @@ func versionsMatch(a []configv1.OperandVersion, b []configv1.OperandVersion) boo
8081
type skews []skew
8182

8283
func (s skews) String() string {
83-
var msg []string
84+
msg := make([]string, len(s))
85+
i, j := 0, len(s)-1
8486
for _, sk := range s {
85-
msg = append(msg, sk.String())
87+
m := sk.String()
88+
// Partial order: error skews first
89+
if sk.err != nil {
90+
msg[i] = m
91+
i++
92+
continue
93+
}
94+
msg[j] = m
95+
j--
8696
}
8797

88-
return "The following operators block OpenShift upgrades: " + strings.Join(msg, ",")
98+
return "ClusterServiceVersions blocking cluster upgrade: " + strings.Join(msg, ",")
8999
}
90100

91101
type skew struct {
92102
namespace string
93103
name string
94104
maxOpenShiftVersion string
105+
err error
95106
}
96107

97108
func (s skew) String() string {
98-
return fmt.Sprintf("Operator %s in namespace %s is not compatible with OpenShift versions greater than %s", s.name, s.namespace, s.maxOpenShiftVersion)
109+
if s.err != nil {
110+
return fmt.Sprintf("%s/%s has invalid %s properties: %s", s.namespace, s.name, MaxOpenShiftVersionProperty, s.err)
111+
}
112+
113+
return fmt.Sprintf("%s/%s is incompatible with OpenShift versions greater than %s", s.namespace, s.name, s.maxOpenShiftVersion)
114+
}
115+
116+
type transientError struct {
117+
error
118+
}
119+
120+
// transientErrors returns the result of stripping all wrapped errors not of type transientError from the given error.
121+
func transientErrors(err error) error {
122+
return utilerrors.FilterOut(err, func(e error) bool {
123+
return !errors.As(e, new(transientError))
124+
})
99125
}
100126

101127
func incompatibleOperators(ctx context.Context, cli client.Client) (skews, error) {
@@ -105,58 +131,59 @@ func incompatibleOperators(ctx context.Context, cli client.Client) (skews, error
105131
}
106132

107133
if next == nil {
108-
return nil, nil
134+
// Note: This shouldn't happen
135+
return nil, fmt.Errorf("Failed to determine next OpenShift Y-stream release")
109136
}
110137
next.Minor++
111138

112139
csvList := &operatorsv1alpha1.ClusterServiceVersionList{}
113140
if err := cli.List(ctx, csvList); err != nil {
114-
return nil, err
141+
return nil, &transientError{fmt.Errorf("Failed to list ClusterServiceVersions: %w", err)}
115142
}
116143

117-
var (
118-
s skews
119-
errs []error
120-
)
144+
var incompatible skews
121145
for _, csv := range csvList.Items {
122146
if csv.IsCopied() {
123147
continue
124148
}
125149

150+
s := skew{
151+
name: csv.GetName(),
152+
namespace: csv.GetNamespace(),
153+
}
126154
max, err := maxOpenShiftVersion(&csv)
127155
if err != nil {
128-
errs = append(errs, err)
156+
s.err = err
157+
incompatible = append(incompatible, s)
129158
continue
130159
}
160+
131161
if max == nil || max.GTE(*next) {
132162
continue
133163
}
164+
s.maxOpenShiftVersion = max.String()
134165

135-
s = append(s, skew{
136-
name: csv.GetName(),
137-
namespace: csv.GetNamespace(),
138-
maxOpenShiftVersion: max.String(),
139-
})
166+
incompatible = append(incompatible, s)
140167
}
141168

142-
return s, utilerrors.NewAggregate(errs)
169+
return incompatible, nil
143170
}
144171

145172
func desiredRelease(ctx context.Context, cli client.Client) (*semver.Version, error) {
146173
cv := configv1.ClusterVersion{}
147174
if err := cli.Get(ctx, client.ObjectKey{Name: "version"}, &cv); err != nil { // "version" is the name of OpenShift's ClusterVersion singleton
148-
return nil, err
175+
return nil, &transientError{fmt.Errorf("Failed to get ClusterVersion: %w", err)}
149176
}
150177

151178
v := cv.Status.Desired.Version
152179
if v == "" {
153180
// The release version hasn't been set yet
154-
return nil, nil
181+
return nil, fmt.Errorf("Desired release version missing from ClusterVersion")
155182
}
156183

157184
desired, err := semver.ParseTolerant(v)
158185
if err != nil {
159-
return nil, err
186+
return nil, fmt.Errorf("ClusterVersion has invalid desired release version: %w", err)
160187
}
161188

162189
return &desired, nil
@@ -178,57 +205,36 @@ func maxOpenShiftVersion(csv *operatorsv1alpha1.ClusterServiceVersion) (*semver.
178205
return nil, err
179206
}
180207

181-
// Take the highest semver if there's more than one max version specified
182-
var (
183-
max *semver.Version
184-
dups []semver.Version
185-
errs []error
186-
)
208+
var max *string
187209
for _, property := range properties {
188210
if property.Type != MaxOpenShiftVersionProperty {
189211
continue
190212
}
191213

192-
value := strings.Trim(property.Value, "\"")
193-
if value == "" {
194-
continue
195-
}
196-
197-
version, err := semver.ParseTolerant(value)
198-
if err != nil {
199-
errs = append(errs, err)
200-
continue
214+
if max != nil {
215+
return nil, fmt.Errorf(`Defining more than one "%s" property is not allowed`, MaxOpenShiftVersionProperty)
201216
}
202217

203-
if max == nil {
204-
max = &version
205-
continue
206-
}
207-
if version.LT(*max) {
208-
continue
209-
}
210-
if version.EQ(*max) {
211-
// Found a duplicate, mark it
212-
dups = append(dups, *max)
213-
}
218+
max = &property.Value
219+
}
214220

215-
max = &version
221+
if max == nil {
222+
return nil, nil
216223
}
217224

218-
// Return an error if THE max version has a duplicate (i.e. equivalent version)
219-
// Note: This may not be a problem since there should be no difference as far as blocking upgrades is concerned.
220-
// This is more for clear status messages.
221-
for _, dup := range dups {
222-
if max.EQ(dup) && max.String() != dup.String() { // "1.0.0" vs "1.0.0" is fine, but not "1.0.0" vs "1.0.0+1"
223-
errs = append(errs, fmt.Errorf("max openshift version ambiguous, equivalent versions %s and %s have been specified concurrently", max, dup))
224-
}
225+
// Account for any additional quoting
226+
value := strings.Trim(*max, "\"")
227+
if value == "" {
228+
// Handle "" separately, so parse doesn't treat it as a zero
229+
return nil, fmt.Errorf(`Value cannot be "" (an empty string)`)
225230
}
226231

227-
if len(errs) > 0 {
228-
return nil, utilerrors.NewAggregate(errs)
232+
version, err := semver.ParseTolerant(value)
233+
if err != nil {
234+
return nil, fmt.Errorf(`Failed to parse "%s" as semver: %w`, value, err)
229235
}
230236

231-
return max, nil
237+
return &version, nil
232238
}
233239

234240
func notCopiedSelector() (labels.Selector, error) {

0 commit comments

Comments
 (0)