Skip to content

Sync 2023 10 05 no steve #579

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ require (
github.com/mikefarah/yq/v3 v3.0.0-20201202084205-8846255d1c37
github.com/onsi/ginkgo/v2 v2.9.5
github.com/openshift/api v3.9.0+incompatible
github.com/operator-framework/api v0.17.8-0.20230908201838-28c6773d2b74
github.com/operator-framework/api v0.17.8-0.20230929142219-7961b0208d99
github.com/operator-framework/operator-lifecycle-manager v0.0.0-00010101000000-000000000000
github.com/operator-framework/operator-registry v1.29.0
github.com/sirupsen/logrus v1.9.2
Expand Down
4 changes: 4 additions & 0 deletions manifests/0000_50_olm_00-olmconfigs.crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ spec:
disableCopiedCSVs:
description: DisableCopiedCSVs is used to disable OLM's "Copied CSV" feature for operators installed at the cluster scope, where a cluster scoped operator is one that has been installed in an OperatorGroup that targets all namespaces. When reenabled, OLM will recreate the "Copied CSVs" for each cluster scoped operator.
type: boolean
packageServerSyncInterval:
description: PackageServerSyncInterval is used to define the sync interval for packagerserver pods. Packageserver pods periodically check the status of CatalogSources; this specifies the period using duration format (e.g. "60m"). For this parameter, only hours ("h"), minutes ("m"), and seconds ("s") may be specified. When not specified, the period defaults to the value specified within the packageserver.
type: string
pattern: ^([0-9]+(\.[0-9]+)?(s|m|h))+$
status:
description: OLMConfigStatus is the status for an OLMConfig resource.
type: object
Expand Down
4 changes: 4 additions & 0 deletions staging/api/crds/operators.coreos.com_olmconfigs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ spec:
disableCopiedCSVs:
description: DisableCopiedCSVs is used to disable OLM's "Copied CSV" feature for operators installed at the cluster scope, where a cluster scoped operator is one that has been installed in an OperatorGroup that targets all namespaces. When reenabled, OLM will recreate the "Copied CSVs" for each cluster scoped operator.
type: boolean
packageServerSyncInterval:
description: PackageServerSyncInterval is used to define the sync interval for packagerserver pods. Packageserver pods periodically check the status of CatalogSources; this specifies the period using duration format (e.g. "60m"). For this parameter, only hours ("h"), minutes ("m"), and seconds ("s") may be specified. When not specified, the period defaults to the value specified within the packageserver.
type: string
pattern: ^([0-9]+(\.[0-9]+)?(s|m|h))+$
status:
description: OLMConfigStatus is the status for an OLMConfig resource.
type: object
Expand Down
2 changes: 1 addition & 1 deletion staging/api/crds/zz_defs.go

Large diffs are not rendered by default.

70 changes: 70 additions & 0 deletions staging/api/pkg/operators/v1/olmconfig_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,84 @@ package v1

import (
"testing"
"time"

"github.com/stretchr/testify/require"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func boolPointer(in bool) *bool {
return &in
}
func TestPackageServerSyncInterval(t *testing.T) {
five := time.Minute * 5
one := time.Second * 60

fiveParsed, err := time.ParseDuration("5m")
require.NoError(t, err)

oneParsed, err := time.ParseDuration("60s")
require.NoError(t, err)

tests := []struct {
description string
olmConfig *OLMConfig
expected *time.Duration
}{
{
description: "NilConfig",
olmConfig: nil,
expected: nil,
},
{
description: "MissingSpec",
olmConfig: &OLMConfig{},
expected: nil,
},
{
description: "MissingFeatures",
olmConfig: &OLMConfig{
Spec: OLMConfigSpec{},
},
expected: nil,
},
{
description: "MissingPackageServerInterval",
olmConfig: &OLMConfig{
Spec: OLMConfigSpec{},
},
expected: nil,
},
{
description: "PackageServerInterval5m",
olmConfig: &OLMConfig{
Spec: OLMConfigSpec{
Features: &Features{
PackageServerSyncInterval: &metav1.Duration{Duration: fiveParsed},
},
},
},
expected: &five,
},
{
description: "PackageServerInterval60s",
olmConfig: &OLMConfig{
Spec: OLMConfigSpec{
Features: &Features{
PackageServerSyncInterval: &metav1.Duration{Duration: oneParsed},
},
},
},
expected: &one,
},
}

for _, tt := range tests {
t.Run(tt.description, func(t *testing.T) {
require.EqualValues(t, tt.expected, tt.olmConfig.PackageServerSyncInterval())
})
}
}
func TestCopiedCSVsAreEnabled(t *testing.T) {
tests := []struct {
description string
Expand Down
19 changes: 19 additions & 0 deletions staging/api/pkg/operators/v1/olmconfig_types.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package v1

import (
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

Expand All @@ -23,6 +25,16 @@ type Features struct {
// When reenabled, OLM will recreate the "Copied CSVs" for each
// cluster scoped operator.
DisableCopiedCSVs *bool `json:"disableCopiedCSVs,omitempty"`
// PackageServerSyncInterval is used to define the sync interval for
// packagerserver pods. Packageserver pods periodically check the
// status of CatalogSources; this specifies the period using duration
// format (e.g. "60m"). For this parameter, only hours ("h"), minutes
// ("m"), and seconds ("s") may be specified. When not specified, the
// period defaults to the value specified within the packageserver.
// +optional
// +kubebuilder:validation:Type=string
// +kubebuilder:validation:Pattern="^([0-9]+(\\.[0-9]+)?(s|m|h))+$"
PackageServerSyncInterval *metav1.Duration `json:"packageServerSyncInterval,omitempty"`
}

// OLMConfigStatus is the status for an OLMConfig resource.
Expand Down Expand Up @@ -69,3 +81,10 @@ func (config *OLMConfig) CopiedCSVsAreEnabled() bool {

return !*config.Spec.Features.DisableCopiedCSVs
}

func (config *OLMConfig) PackageServerSyncInterval() *time.Duration {
if config == nil || config.Spec.Features == nil || config.Spec.Features.PackageServerSyncInterval == nil {
return nil
}
return &config.Spec.Features.PackageServerSyncInterval.Duration
}
5 changes: 5 additions & 0 deletions staging/api/pkg/operators/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions staging/operator-lifecycle-manager/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ Learn more about the components used by OLM by reading about the [architecture]

OLM standardizes interactions with operators by requiring that the interface to an operator be via the Kubernetes API. Because we expect users to define the interfaces to their applications, OLM currently uses CRDs to define the Kubernetes API interactions.

Examples: [EtcdCluster CRD](https://github.com/redhat-openshift-ecosystem/community-operators-prod/blob/main/operators/etcd/0.9.4/etcdclusters.etcd.database.coreos.com.crd.yaml),
[EtcdBackup CRD](https://github.com/redhat-openshift-ecosystem/community-operators-prod/blob/main/operators/etcd/0.9.4/etcdbackups.etcd.database.coreos.com.crd.yaml)
Examples: [EtcdCluster CRD](https://github.com/redhat-openshift-ecosystem/community-operators-prod/blob/main/operators/etcd/0.9.4/manifests/etcdclusters.etcd.database.coreos.com.crd.yaml),
[EtcdBackup CRD](https://github.com/redhat-openshift-ecosystem/community-operators-prod/blob/main/operators/etcd/0.9.4/manifests/etcdbackups.etcd.database.coreos.com.crd.yaml)

## Descriptors

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ spec:
disableCopiedCSVs:
description: DisableCopiedCSVs is used to disable OLM's "Copied CSV" feature for operators installed at the cluster scope, where a cluster scoped operator is one that has been installed in an OperatorGroup that targets all namespaces. When reenabled, OLM will recreate the "Copied CSVs" for each cluster scoped operator.
type: boolean
packageServerSyncInterval:
description: PackageServerSyncInterval is used to define the sync interval for packagerserver pods. Packageserver pods periodically check the status of CatalogSources; this specifies the period using duration format (e.g. "60m"). For this parameter, only hours ("h"), minutes ("m"), and seconds ("s") may be specified. When not specified, the period defaults to the value specified within the packageserver.
type: string
pattern: ^([0-9]+(\.[0-9]+)?(s|m|h))+$
status:
description: OLMConfigStatus is the status for an OLMConfig resource.
type: object
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ spec:
{{- if .Values.debug }}
- --debug
{{- end }}
{{- if .Values.package.interval }}
- --interval
- {{ .Values.package.interval }}
{{- end }}
{{- if .Values.package.commandArgs }}
- {{ .Values.package.commandArgs }}
{{- end }}
Expand Down
2 changes: 1 addition & 1 deletion staging/operator-lifecycle-manager/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ require (
github.com/onsi/gomega v1.27.7
github.com/openshift/api v3.9.0+incompatible
github.com/openshift/client-go v0.0.0-20220525160904-9e1acff93e4a
github.com/operator-framework/api v0.17.8-0.20230908201838-28c6773d2b74
github.com/operator-framework/api v0.17.8-0.20230929142219-7961b0208d99
github.com/operator-framework/operator-registry v1.29.0
github.com/otiai10/copy v1.12.0
github.com/pkg/errors v0.9.1
Expand Down
4 changes: 2 additions & 2 deletions staging/operator-lifecycle-manager/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -604,8 +604,8 @@ github.com/openshift/api v0.0.0-20221021112143-4226c2167e40 h1:PxjGCA72RtsdHWToZ
github.com/openshift/api v0.0.0-20221021112143-4226c2167e40/go.mod h1:aQ6LDasvHMvHZXqLHnX2GRmnfTWCF/iIwz8EMTTIE9A=
github.com/openshift/client-go v0.0.0-20221019143426-16aed247da5c h1:CV76yFOTXmq9VciBR3Bve5ZWzSxdft7gaMVB3kS0rwg=
github.com/openshift/client-go v0.0.0-20221019143426-16aed247da5c/go.mod h1:lFMO8mLHXWFzSdYvGNo8ivF9SfF6zInA8ZGw4phRnUE=
github.com/operator-framework/api v0.17.8-0.20230908201838-28c6773d2b74 h1:BNzxQqrfGRaEuw5SliqTFvloLE76L1MAo/uzbszzrPw=
github.com/operator-framework/api v0.17.8-0.20230908201838-28c6773d2b74/go.mod h1:Wbg136l1Po6zqG2QcTN1QZ8dbT4BQvNlQDM9tmQYvz0=
github.com/operator-framework/api v0.17.8-0.20230929142219-7961b0208d99 h1:0x4FfGvKIEmpXnhqX9OumEnvJWn51zUVwvFulh17tu4=
github.com/operator-framework/api v0.17.8-0.20230929142219-7961b0208d99/go.mod h1:Wbg136l1Po6zqG2QcTN1QZ8dbT4BQvNlQDM9tmQYvz0=
github.com/operator-framework/operator-registry v1.29.0 h1:HMmVTiuOAGoHLzYqR9Lr2QSOqbVzA50++ojNl2mu9f4=
github.com/operator-framework/operator-registry v1.29.0/go.mod h1:4rVQu/cOuCtVt3JzKsAmwyq2lsiu9uPaH9nYNfnqj9o=
github.com/otiai10/copy v1.12.0 h1:cLMgSQnXBs1eehF0Wy/FAGsgDTDmAqFR7rQylBb1nDY=
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"crypto/sha256"
"fmt"
"sort"
"strings"
"time"

Expand All @@ -18,6 +19,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
k8slabels "k8s.io/apimachinery/pkg/labels"
"k8s.io/apiserver/pkg/storage/names"
"k8s.io/client-go/kubernetes"
listersbatchv1 "k8s.io/client-go/listers/batch/v1"
listerscorev1 "k8s.io/client-go/listers/core/v1"
Expand All @@ -41,6 +43,13 @@ const (
// e.g 1m30s
BundleUnpackTimeoutAnnotationKey = "operatorframework.io/bundle-unpack-timeout"
BundleUnpackPodLabel = "job-name"

// BundleUnpackRetryMinimumIntervalAnnotationKey sets a minimum interval to wait before
// attempting to recreate a failed unpack job for a bundle.
BundleUnpackRetryMinimumIntervalAnnotationKey = "operatorframework.io/bundle-unpack-min-retry-interval"

// bundleUnpackRefLabel is used to filter for all unpack jobs for a specific bundle.
bundleUnpackRefLabel = "operatorframework.io/bundle-unpack-ref"
)

type BundleUnpackResult struct {
Expand Down Expand Up @@ -89,6 +98,7 @@ func (c *ConfigMapUnpacker) job(cmRef *corev1.ObjectReference, bundlePath string
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
install.OLMManagedLabelKey: install.OLMManagedLabelValue,
bundleUnpackRefLabel: cmRef.Name,
},
},
Spec: batchv1.JobSpec{
Expand Down Expand Up @@ -287,7 +297,7 @@ func (c *ConfigMapUnpacker) job(cmRef *corev1.ObjectReference, bundlePath string
//go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 . Unpacker

type Unpacker interface {
UnpackBundle(lookup *operatorsv1alpha1.BundleLookup, timeout time.Duration) (result *BundleUnpackResult, err error)
UnpackBundle(lookup *operatorsv1alpha1.BundleLookup, timeout, retryInterval time.Duration) (result *BundleUnpackResult, err error)
}

type ConfigMapUnpacker struct {
Expand Down Expand Up @@ -448,7 +458,7 @@ const (
NotUnpackedMessage = "bundle contents have not yet been persisted to installplan status"
)

func (c *ConfigMapUnpacker) UnpackBundle(lookup *operatorsv1alpha1.BundleLookup, timeout time.Duration) (result *BundleUnpackResult, err error) {
func (c *ConfigMapUnpacker) UnpackBundle(lookup *operatorsv1alpha1.BundleLookup, timeout, retryInterval time.Duration) (result *BundleUnpackResult, err error) {
result = newBundleUnpackResult(lookup)

// if bundle lookup failed condition already present, then there is nothing more to do
Expand Down Expand Up @@ -510,7 +520,7 @@ func (c *ConfigMapUnpacker) UnpackBundle(lookup *operatorsv1alpha1.BundleLookup,
secrets = append(secrets, corev1.LocalObjectReference{Name: secretName})
}
var job *batchv1.Job
job, err = c.ensureJob(cmRef, result.Path, secrets, timeout)
job, err = c.ensureJob(cmRef, result.Path, secrets, timeout, retryInterval)
if err != nil || job == nil {
// ensureJob can return nil if the job present does not match the expected job (spec and ownerefs)
// The current job is deleted in that case so UnpackBundle needs to be retried
Expand Down Expand Up @@ -649,16 +659,39 @@ func (c *ConfigMapUnpacker) ensureConfigmap(csRef *corev1.ObjectReference, name
return
}

func (c *ConfigMapUnpacker) ensureJob(cmRef *corev1.ObjectReference, bundlePath string, secrets []corev1.LocalObjectReference, timeout time.Duration) (job *batchv1.Job, err error) {
func (c *ConfigMapUnpacker) ensureJob(cmRef *corev1.ObjectReference, bundlePath string, secrets []corev1.LocalObjectReference, timeout time.Duration, unpackRetryInterval time.Duration) (job *batchv1.Job, err error) {
fresh := c.job(cmRef, bundlePath, secrets, timeout)
job, err = c.jobLister.Jobs(fresh.GetNamespace()).Get(fresh.GetName())
var jobs, toDelete []*batchv1.Job
jobs, err = c.jobLister.Jobs(fresh.GetNamespace()).List(k8slabels.ValidatedSetSelector{bundleUnpackRefLabel: cmRef.Name})
if err != nil {
if apierrors.IsNotFound(err) {
job, err = c.client.BatchV1().Jobs(fresh.GetNamespace()).Create(context.TODO(), fresh, metav1.CreateOptions{})
}

return
}
if len(jobs) == 0 {
job, err = c.client.BatchV1().Jobs(fresh.GetNamespace()).Create(context.TODO(), fresh, metav1.CreateOptions{})
return
}

maxRetainedJobs := 5 // TODO: make this configurable
job, toDelete = sortUnpackJobs(jobs, maxRetainedJobs) // choose latest or on-failed job attempt

// only check for retries if an unpackRetryInterval is specified
if unpackRetryInterval > 0 {
if _, isFailed := getCondition(job, batchv1.JobFailed); isFailed {
// Look for other unpack jobs for the same bundle
if cond, failed := getCondition(job, batchv1.JobFailed); failed {
if time.Now().After(cond.LastTransitionTime.Time.Add(unpackRetryInterval)) {
fresh.SetName(names.SimpleNameGenerator.GenerateName(fresh.GetName()))
job, err = c.client.BatchV1().Jobs(fresh.GetNamespace()).Create(context.TODO(), fresh, metav1.CreateOptions{})
}
}

// cleanup old failed jobs, but don't clean up successful jobs to avoid repeat unpacking
for _, j := range toDelete {
_ = c.client.BatchV1().Jobs(j.GetNamespace()).Delete(context.TODO(), j.GetName(), metav1.DeleteOptions{})
}
return
}
}

if equality.Semantic.DeepDerivative(fresh.GetOwnerReferences(), job.GetOwnerReferences()) && equality.Semantic.DeepDerivative(fresh.Spec, job.Spec) {
return
Expand Down Expand Up @@ -801,6 +834,37 @@ func getCondition(job *batchv1.Job, conditionType batchv1.JobConditionType) (con
return
}

func sortUnpackJobs(jobs []*batchv1.Job, maxRetainedJobs int) (latest *batchv1.Job, toDelete []*batchv1.Job) {
if len(jobs) == 0 {
return
}
// sort jobs so that latest job is first
// with preference for non-failed jobs
sort.Slice(jobs, func(i, j int) bool {
condI, failedI := getCondition(jobs[i], batchv1.JobFailed)
condJ, failedJ := getCondition(jobs[j], batchv1.JobFailed)
if failedI != failedJ {
return !failedI // non-failed job goes first
}
return condI.LastTransitionTime.After(condJ.LastTransitionTime.Time)
})
latest = jobs[0]
if len(jobs) <= maxRetainedJobs {
return
}
if maxRetainedJobs == 0 {
toDelete = jobs[1:]
return
}

// cleanup old failed jobs, n-1 recent jobs and the oldest job
for i := 0; i < maxRetainedJobs && i+maxRetainedJobs < len(jobs); i++ {
toDelete = append(toDelete, jobs[maxRetainedJobs+i])
}

return
}

// OperatorGroupBundleUnpackTimeout returns bundle timeout from annotation if specified.
// If the timeout annotation is not set, return timeout < 0 which is subsequently ignored.
// This is to overrides the --bundle-unpack-timeout flag value on per-OperatorGroup basis.
Expand All @@ -827,3 +891,28 @@ func OperatorGroupBundleUnpackTimeout(ogLister v1listers.OperatorGroupNamespaceL

return d, nil
}

// OperatorGroupBundleUnpackRetryInterval returns bundle unpack retry interval from annotation if specified.
// If the retry annotation is not set, return retry = 0 which is subsequently ignored. This interval, if > 0,
// determines the minimum interval between recreating a failed unpack job.
func OperatorGroupBundleUnpackRetryInterval(ogLister v1listers.OperatorGroupNamespaceLister) (time.Duration, error) {
ogs, err := ogLister.List(k8slabels.Everything())
if err != nil {
return 0, err
}
if len(ogs) != 1 {
return 0, fmt.Errorf("found %d operatorGroups, expected 1", len(ogs))
}

timeoutStr, ok := ogs[0].GetAnnotations()[BundleUnpackRetryMinimumIntervalAnnotationKey]
if !ok {
return 0, nil
}

d, err := time.ParseDuration(timeoutStr)
if err != nil {
return 0, fmt.Errorf("failed to parse unpack retry annotation(%s: %s): %w", BundleUnpackRetryMinimumIntervalAnnotationKey, timeoutStr, err)
}

return d, nil
}
Loading