-
Notifications
You must be signed in to change notification settings - Fork 562
Bug 1859178: fix(installplans): GC older installplans #1669
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ import ( | |
"errors" | ||
"fmt" | ||
"reflect" | ||
"sort" | ||
"strings" | ||
"sync" | ||
"time" | ||
|
@@ -22,6 +23,7 @@ import ( | |
extinf "k8s.io/apiextensions-apiserver/pkg/client/informers/externalversions" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" | ||
"k8s.io/apimachinery/pkg/labels" | ||
"k8s.io/apimachinery/pkg/runtime/schema" | ||
utilclock "k8s.io/apimachinery/pkg/util/clock" | ||
utilerrors "k8s.io/apimachinery/pkg/util/errors" | ||
|
@@ -67,6 +69,8 @@ const ( | |
roleKind = "Role" | ||
roleBindingKind = "RoleBinding" | ||
generatedByKey = "olm.generated-by" | ||
maxInstallPlanCount = 5 | ||
maxDeletesPerSweep = 5 | ||
) | ||
|
||
// Operator represents a Kubernetes operator that executes InstallPlans by | ||
|
@@ -777,6 +781,8 @@ func (o *Operator) syncResolvingNamespace(obj interface{}) error { | |
"id": queueinformer.NewLoopID(), | ||
}) | ||
|
||
o.gcInstallPlans(logger, namespace) | ||
|
||
// get the set of sources that should be used for resolution and best-effort get their connections working | ||
logger.Debug("resolving sources") | ||
|
||
|
@@ -1180,6 +1186,77 @@ func (o *Operator) unpackBundles(plan *v1alpha1.InstallPlan) (bool, *v1alpha1.In | |
return unpacked, out, nil | ||
} | ||
|
||
// gcInstallPlans garbage collects installplans that are too old | ||
// installplans are ownerrefd to all subscription inputs, so they will not otherwise | ||
// be GCd unless all inputs have been deleted. | ||
func (o *Operator) gcInstallPlans(log logrus.FieldLogger, namespace string) { | ||
allIps, err := o.lister.OperatorsV1alpha1().InstallPlanLister().InstallPlans(namespace).List(labels.Everything()) | ||
if err != nil { | ||
log.Warn("unable to list installplans for GC") | ||
} | ||
|
||
if len(allIps) <= maxInstallPlanCount { | ||
return | ||
} | ||
|
||
// we only consider maxDeletesPerSweep more than the allowed number of installplans for delete at one time | ||
ips := allIps | ||
if len(ips) > maxInstallPlanCount + maxDeletesPerSweep { | ||
ips = allIps[:maxInstallPlanCount+maxDeletesPerSweep] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so here we limit the number of extra installplans removed to 8 at a time? Makes sense, but for these clusters with 1000s of installplans they could be waiting a while. I was thinking of a more async process that goes through and deletes these extra ips, eventually reporting status back to OLM. Its definitely more work though and this is an urgent bugfix so idk There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't fully understand the slicing with indices here. Is this array ordered/sorted? Otherwise, why just randomly pick a bunch of InstallPlans from the first 10 in the array? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, there are a few clusters with InstallPlans in order of 1000s. I know one of them has at least 1200. Given the sync period of 15 mins (though in reality the namespace sync happens more often due to resources changes) and the limit of 5 IPs per sweep, would this take a bit too much time to clean up especially in the case of idle cluster? In fact, I would prefer to keep the InstallPlans that are currently referenced in the Subscriptions and clean up the others in a higher rate. Maybe this can be changed in the future instead of now due to the urgent need of getting this in. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
IIUC, for each sweep we take up to
njhale marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
byGen := map[int][]*v1alpha1.InstallPlan{} | ||
for _, ip := range ips { | ||
gen, ok := byGen[ip.Spec.Generation] | ||
if !ok { | ||
gen = make([]*v1alpha1.InstallPlan, 0) | ||
} | ||
byGen[ip.Spec.Generation] = append(gen, ip) | ||
} | ||
|
||
gens := make([]int, 0) | ||
for i := range byGen { | ||
gens = append(gens, i) | ||
} | ||
|
||
sort.Ints(gens) | ||
|
||
toDelete := make([]*v1alpha1.InstallPlan, 0) | ||
|
||
for _, i := range gens { | ||
g := byGen[i] | ||
|
||
if len(ips)-len(toDelete) <= maxInstallPlanCount { | ||
break | ||
} | ||
|
||
// if removing all installplans at this generation doesn't dip below the max, safe to delete all of them | ||
if len(ips)-len(toDelete)-len(g) >= maxInstallPlanCount { | ||
toDelete = append(toDelete, g...) | ||
continue | ||
} | ||
|
||
// CreationTimestamp sorting shouldn't ever be hit unless there is a bug that causes installplans to be | ||
// generated without bumping the generation. It is here as a safeguard only. | ||
|
||
// sort by creation time | ||
sort.Slice(g, func(i, j int) bool { | ||
if !g[i].CreationTimestamp.Equal(&g[j].CreationTimestamp) { | ||
return g[i].CreationTimestamp.Before(&g[j].CreationTimestamp) | ||
} | ||
// final fallback to lexicographic sort, in case many installplans are created with the same timestamp | ||
return g[i].GetName() < g[j].GetName() | ||
}) | ||
toDelete = append(toDelete, g[:len(ips)-len(toDelete)-maxInstallPlanCount]...) | ||
} | ||
|
||
for _, i := range toDelete { | ||
if err := o.client.OperatorsV1alpha1().InstallPlans(namespace).Delete(context.TODO(), i.GetName(), metav1.DeleteOptions{}); err != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm thinking we want to set the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the |
||
log.WithField("deleting", i.GetName()).WithError(err).Warn("error GCing old installplan - may have already been deleted") | ||
} | ||
} | ||
} | ||
|
||
func (o *Operator) syncInstallPlans(obj interface{}) (syncError error) { | ||
plan, ok := obj.(*v1alpha1.InstallPlan) | ||
if !ok { | ||
|
Uh oh!
There was an error while loading. Please reload this page.