Skip to content

Bug 1859755: fix(installplans): GC older installplans #1675

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions pkg/controller/operators/catalog/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"errors"
"fmt"
"reflect"
"sort"
"strings"
"sync"
"time"
Expand All @@ -22,6 +23,7 @@ import (
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime/schema"
utilclock "k8s.io/apimachinery/pkg/util/clock"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
Expand Down Expand Up @@ -66,6 +68,8 @@ const (
roleKind = "Role"
roleBindingKind = "RoleBinding"
generatedByKey = "olm.generated-by"
maxInstallPlanCount = 5
maxDeletesPerSweep = 5
)

// Operator represents a Kubernetes operator that executes InstallPlans by
Expand Down Expand Up @@ -770,6 +774,8 @@ func (o *Operator) syncResolvingNamespace(obj interface{}) error {
"id": queueinformer.NewLoopID(),
})

o.gcInstallPlans(logger, namespace)

// get the set of sources that should be used for resolution and best-effort get their connections working
logger.Debug("resolving sources")

Expand Down Expand Up @@ -1173,6 +1179,77 @@ func (o *Operator) unpackBundles(plan *v1alpha1.InstallPlan) (bool, *v1alpha1.In
return unpacked, out, nil
}

// gcInstallPlans garbage collects installplans that are too old
// installplans are ownerrefd to all subscription inputs, so they will not otherwise
// be GCd unless all inputs have been deleted.
func (o *Operator) gcInstallPlans(log logrus.FieldLogger, namespace string) {
allIps, err := o.lister.OperatorsV1alpha1().InstallPlanLister().InstallPlans(namespace).List(labels.Everything())
if err != nil {
log.Warn("unable to list installplans for GC")
}

if len(allIps) <= maxInstallPlanCount {
return
}

// we only consider maxDeletesPerSweep more than the allowed number of installplans for delete at one time
ips := allIps
if len(ips) > maxInstallPlanCount + maxDeletesPerSweep {
ips = allIps[:maxInstallPlanCount+maxDeletesPerSweep]
}

byGen := map[int][]*v1alpha1.InstallPlan{}
for _, ip := range ips {
gen, ok := byGen[ip.Spec.Generation]
if !ok {
gen = make([]*v1alpha1.InstallPlan, 0)
}
byGen[ip.Spec.Generation] = append(gen, ip)
}

gens := make([]int, 0)
for i := range byGen {
gens = append(gens, i)
}

sort.Ints(gens)

toDelete := make([]*v1alpha1.InstallPlan, 0)

for _, i := range gens {
g := byGen[i]

if len(ips)-len(toDelete) <= maxInstallPlanCount {
break
}

// if removing all installplans at this generation doesn't dip below the max, safe to delete all of them
if len(ips)-len(toDelete)-len(g) >= maxInstallPlanCount {
toDelete = append(toDelete, g...)
continue
}

// CreationTimestamp sorting shouldn't ever be hit unless there is a bug that causes installplans to be
// generated without bumping the generation. It is here as a safeguard only.

// sort by creation time
sort.Slice(g, func(i, j int) bool {
if !g[i].CreationTimestamp.Equal(&g[j].CreationTimestamp) {
return g[i].CreationTimestamp.Before(&g[j].CreationTimestamp)
}
// final fallback to lexicographic sort, in case many installplans are created with the same timestamp
return g[i].GetName() < g[j].GetName()
})
toDelete = append(toDelete, g[:len(ips)-len(toDelete)-maxInstallPlanCount]...)
}

for _, i := range toDelete {
if err := o.client.OperatorsV1alpha1().InstallPlans(namespace).Delete(i.GetName(), &metav1.DeleteOptions{}); err != nil {
log.WithField("deleting", i.GetName()).WithError(err).Warn("error GCing old installplan - may have already been deleted")
}
}
}

func (o *Operator) syncInstallPlans(obj interface{}) (syncError error) {
plan, ok := obj.(*v1alpha1.InstallPlan)
if !ok {
Expand Down
80 changes: 80 additions & 0 deletions pkg/controller/operators/catalog/operator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@ import (
"errors"
"fmt"
"io/ioutil"
"math/rand"
"reflect"
"strings"
"testing"
"testing/quick"
"time"

"github.com/sirupsen/logrus"
Expand All @@ -27,6 +30,7 @@ import (
"k8s.io/apimachinery/pkg/types"
utilclock "k8s.io/apimachinery/pkg/util/clock"
utilyaml "k8s.io/apimachinery/pkg/util/yaml"
"k8s.io/apiserver/pkg/storage/names"
fakedynamic "k8s.io/client-go/dynamic/fake"
"k8s.io/client-go/informers"
k8sfake "k8s.io/client-go/kubernetes/fake"
Expand Down Expand Up @@ -342,6 +346,82 @@ func TestRemoveDeprecatedStoredVersions(t *testing.T) {
}
}

type ipSet []v1alpha1.InstallPlan

func (ipSet) Generate(rand *rand.Rand, size int) reflect.Value {
ips := []v1alpha1.InstallPlan{}

// each i is the generation value
for i := 0; i < rand.Intn(size)+1; i++ {

// generate a few at each generation to account for bugs that don't increment the generation
for j := 0; j < rand.Intn(3); j++ {
ips = append(ips, v1alpha1.InstallPlan{
ObjectMeta: metav1.ObjectMeta{
Namespace: "ns",
Name: names.SimpleNameGenerator.GenerateName(fmt.Sprintf("%d", i)),
},
Spec: v1alpha1.InstallPlanSpec{
Generation: i,
},
})
}
}
return reflect.ValueOf(ipSet(ips))
}

func TestGCInstallPlans(t *testing.T) {
f := func(ips ipSet) bool {
if len(ips) == 0 {
return true
}
ctx, cancel := context.WithCancel(context.TODO())
defer cancel()

var maxGen int64 = 0
for _, i := range ips {
if g := i.Generation; g > maxGen {
maxGen = g
}
}
objs := make([]runtime.Object, 0)
for _, i := range ips {
objs = append(objs, i.DeepCopy())
}
op, err := NewFakeOperator(ctx, "ns", []string{"ns"}, withClientObjs(objs...))
require.NoError(t, err)

out := make([]v1alpha1.InstallPlan, 0)
for {
op.gcInstallPlans(logrus.New(), "ns")
require.NoError(t, err)

outList, err := op.client.OperatorsV1alpha1().InstallPlans("ns").List(metav1.ListOptions{})
require.NoError(t, err)
out = outList.Items

if len(out) <= maxInstallPlanCount {
break
}
}

keptMax := false
for _, o := range out {
if o.Generation == maxGen {
keptMax = true
break
}
}
require.True(t, keptMax)

if len(ips) < maxInstallPlanCount {
return len(out) == len(ips)
}
return len(out) == maxInstallPlanCount
}
require.NoError(t, quick.Check(f, nil))
}

func TestExecutePlan(t *testing.T) {
namespace := "ns"

Expand Down