Skip to content

Bug 1851095: Delete subscription metric when an operator is uninstalled #1746

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 3 additions & 10 deletions pkg/controller/operators/catalog/subscription/syncer.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func (s *subscriptionSyncer) Sync(ctx context.Context, event kubestate.ResourceE
return err
}

s.recordMetrics(res)
metrics.EmitSubMetric(res)

logger := s.logger.WithFields(logrus.Fields{
"reconciling": fmt.Sprintf("%T", res),
Expand All @@ -68,8 +68,10 @@ func (s *subscriptionSyncer) Sync(ctx context.Context, event kubestate.ResourceE
initial = initial.Add()
case kubestate.ResourceUpdated:
initial = initial.Update()
metrics.UpdateSubsSyncCounterStorage(res)
case kubestate.ResourceDeleted:
initial = initial.Delete()
metrics.DeleteSubsMetric(res)
}

reconciled, err := s.reconcilers.Reconcile(ctx, initial)
Expand All @@ -85,15 +87,6 @@ func (s *subscriptionSyncer) Sync(ctx context.Context, event kubestate.ResourceE
return nil
}

func (s *subscriptionSyncer) recordMetrics(sub *v1alpha1.Subscription) {
// sub.Spec is not a required field.
if sub.Spec == nil {
return
}

metrics.CounterForSubscription(sub.GetName(), sub.Status.InstalledCSV, sub.Spec.Channel, sub.Spec.Package).Inc()
}

func (s *subscriptionSyncer) Notify(event kubestate.ResourceEvent) {
s.notify(event)
}
Expand Down
53 changes: 53 additions & 0 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,19 @@ var (
},
[]string{NAMESPACE_LABEL, NAME_LABEL, VERSION_LABEL, PHASE_LABEL, REASON_LABEL},
)

// subscriptionSyncCounters keeps a record of the promethues counters emitted by
// Subscription objects. The key of a record is the Subscription name, while the value
// is struct containing label values used in the counter
subscriptionSyncCounters = make(map[string]subscriptionSyncLabelValues)
)

type subscriptionSyncLabelValues struct {
installedCSV string
pkg string
channel string
}

func RegisterOLM() {
prometheus.MustRegister(csvCount)
prometheus.MustRegister(csvSucceeded)
Expand Down Expand Up @@ -217,3 +228,45 @@ func EmitCSVMetric(oldCSV *olmv1alpha1.ClusterServiceVersion, newCSV *olmv1alpha
csvAbnormal.WithLabelValues(newCSV.Namespace, newCSV.Name, newCSV.Spec.Version.String(), string(newCSV.Status.Phase), string(newCSV.Status.Reason)).Set(1)
}
}

func EmitSubMetric(sub *olmv1alpha1.Subscription) {
if sub.Spec == nil {
return
}
SubscriptionSyncCount.WithLabelValues(sub.GetName(), sub.Status.InstalledCSV, sub.Spec.Channel, sub.Spec.Package).Inc()
if _, present := subscriptionSyncCounters[sub.GetName()]; !present {
subscriptionSyncCounters[sub.GetName()] = subscriptionSyncLabelValues{
installedCSV: sub.Status.InstalledCSV,
pkg: sub.Spec.Package,
channel: sub.Spec.Channel,
}
}
}

func DeleteSubsMetric(sub *olmv1alpha1.Subscription) {
if sub.Spec == nil {
return
}
SubscriptionSyncCount.DeleteLabelValues(sub.GetName(), sub.Status.InstalledCSV, sub.Spec.Channel, sub.Spec.Package)
}

func UpdateSubsSyncCounterStorage(sub *olmv1alpha1.Subscription) {
if sub.Spec == nil {
return
}
counterValues := subscriptionSyncCounters[sub.GetName()]

if sub.Spec.Channel != counterValues.channel ||
sub.Spec.Package != counterValues.pkg ||
sub.Status.InstalledCSV != counterValues.installedCSV {

// Delete metric will label values of old Subscription first
SubscriptionSyncCount.DeleteLabelValues(sub.GetName(), counterValues.installedCSV, counterValues.channel, counterValues.pkg)

counterValues.installedCSV = sub.Status.InstalledCSV
counterValues.pkg = sub.Spec.Package
counterValues.channel = sub.Spec.Channel

subscriptionSyncCounters[sub.GetName()] = counterValues
}
}
150 changes: 136 additions & 14 deletions test/e2e/metrics_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,33 @@
package e2e

import (
"strings"
"testing"
"time"

log "github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/net"
"k8s.io/apimachinery/pkg/util/wait"

"github.com/operator-framework/operator-lifecycle-manager/pkg/api/apis/operators/v1alpha1"
"github.com/operator-framework/operator-lifecycle-manager/pkg/lib/operatorclient"
)

// TestMetrics tests the metrics endpoint of the OLM pod.
func TestMetricsEndpoint(t *testing.T) {
const (
// RetryInterval defines the frequency at which we check for updates against the
// k8s api when waiting for a specific condition to be true.
RetryInterval = time.Second * 5

// Timeout defines the amount of time we should spend querying the k8s api
// when waiting for a specific condition to be true.
Timeout = time.Minute * 5
)

// TestCSVMetrics tests the metrics endpoint of the OLM pod for metrics emitted by CSVs.
func TestCSVMetrics(t *testing.T) {
c := newKubeClient(t)
crc := newCRClient(t)

Expand All @@ -41,7 +55,7 @@ func TestMetricsEndpoint(t *testing.T) {
_, err = fetchCSV(t, crc, failingCSV.Name, testNamespace, csvFailedChecker)
require.NoError(t, err)

rawOutput, err := getMetricsFromPod(t, c, getOLMPodName(t, c), operatorNamespace, "8081")
rawOutput, err := getMetricsFromPod(t, c, getPodWithLabel(t, c, "app=olm-operator"), operatorNamespace, "8081")
if err != nil {
t.Fatalf("Metrics test failed: %v\n", err)
}
Expand All @@ -56,16 +70,128 @@ func TestMetricsEndpoint(t *testing.T) {

cleanupCSV()

rawOutput, err = getMetricsFromPod(t, c, getOLMPodName(t, c), operatorNamespace, "8081")
rawOutput, err = getMetricsFromPod(t, c, getPodWithLabel(t, c, "app=olm-operator"), operatorNamespace, "8081")
if err != nil {
t.Fatalf("Failed to retrieve metrics from OLM pod because of: %v\n", err)
}
require.NotContains(t, rawOutput, "csv_abnormal{name=\""+failingCSV.Name+"\"")
require.NotContains(t, rawOutput, "csv_succeeded{name=\""+failingCSV.Name+"\"")
}

func getOLMPodName(t *testing.T, client operatorclient.ClientInterface) string {
listOptions := metav1.ListOptions{LabelSelector: "app=olm-operator"}
func TestSubscriptionMetrics(t *testing.T) {
c := newKubeClient(t)
crc := newCRClient(t)

subscriptionCleanup, subscription := createSubscription(t, crc, testNamespace, "metric-subscription", testPackageName, stableChannel, v1alpha1.ApprovalManual)

err := wait.PollImmediate(RetryInterval, Timeout, func() (done bool, err error) {
rawOutput, err := getMetricsFromPod(t, c, getPodWithLabel(t, c, "app=catalog-operator"), operatorNamespace, "8081")
if err != nil {
return false, err
}
if strings.Contains(rawOutput, "subscription_sync_total") &&
strings.Contains(rawOutput, "name=\"metric-subscription\"") &&
strings.Contains(rawOutput, "channel=\""+stableChannel+"\"") &&
strings.Contains(rawOutput, "package=\""+testPackageName+"\"") {
return true, nil
}
return false, nil
})
require.NoError(t, err)

updatedSubscription, err := crc.OperatorsV1alpha1().Subscriptions(subscription.GetNamespace()).Get(subscription.GetName(), metav1.GetOptions{})
require.NoError(t, err)

updatedSubscription.Spec.Channel = betaChannel
updateSubscription(t, crc, updatedSubscription)

err = wait.PollImmediate(RetryInterval, Timeout, func() (done bool, err error) {
rawOutput, err := getMetricsFromPod(t, c, getPodWithLabel(t, c, "app=catalog-operator"), operatorNamespace, "8081")
if err != nil {
return false, err
}
if strings.Contains(rawOutput, "subscription_sync_total") &&
strings.Contains(rawOutput, "name=\"metric-subscription\"") &&
strings.Contains(rawOutput, "channel=\""+stableChannel+"\"") &&
strings.Contains(rawOutput, "package=\""+testPackageName+"\"") {
return false, nil
}
return true, nil
})
require.NoError(t, err)

err = wait.PollImmediate(RetryInterval, Timeout, func() (done bool, err error) {
rawOutput, err := getMetricsFromPod(t, c, getPodWithLabel(t, c, "app=catalog-operator"), operatorNamespace, "8081")
if err != nil {
return false, err
}
if strings.Contains(rawOutput, "subscription_sync_total") &&
strings.Contains(rawOutput, "name=\"metric-subscription\"") &&
strings.Contains(rawOutput, "channel=\""+betaChannel+"\"") &&
strings.Contains(rawOutput, "package=\""+testPackageName+"\"") {
return true, nil
}
return false, nil
})
require.NoError(t, err)

updatedSubscription, err = crc.OperatorsV1alpha1().Subscriptions(subscription.GetNamespace()).Get(subscription.GetName(), metav1.GetOptions{})
require.NoError(t, err)

updatedSubscription.Spec.Channel = alphaChannel
updateSubscription(t, crc, updatedSubscription)

err = wait.PollImmediate(RetryInterval, Timeout, func() (done bool, err error) {
rawOutput, err := getMetricsFromPod(t, c, getPodWithLabel(t, c, "app=catalog-operator"), operatorNamespace, "8081")
if err != nil {
return false, err
}
if strings.Contains(rawOutput, "subscription_sync_total") &&
strings.Contains(rawOutput, "name=\"metric-subscription\"") &&
strings.Contains(rawOutput, "channel=\""+betaChannel+"\"") &&
strings.Contains(rawOutput, "package=\""+testPackageName+"\"") {
return false, nil
}
return true, nil
})
require.NoError(t, err)

err = wait.PollImmediate(RetryInterval, Timeout, func() (done bool, err error) {
rawOutput, err := getMetricsFromPod(t, c, getPodWithLabel(t, c, "app=catalog-operator"), operatorNamespace, "8081")
if err != nil {
return false, err
}
if strings.Contains(rawOutput, "subscription_sync_total") &&
strings.Contains(rawOutput, "name=\"metric-subscription\"") &&
strings.Contains(rawOutput, "channel=\""+alphaChannel+"\"") &&
strings.Contains(rawOutput, "package=\""+testPackageName+"\"") {
return true, nil
}
return false, nil
})
require.NoError(t, err)

if subscriptionCleanup != nil {
subscriptionCleanup()
}
err = wait.PollImmediate(RetryInterval, Timeout, func() (done bool, err error) {
rawOutput, err := getMetricsFromPod(t, c, getPodWithLabel(t, c, "app=catalog-operator"), operatorNamespace, "8081")
if err != nil {
return false, err
}
if strings.Contains(rawOutput, "subscription_sync_total") &&
strings.Contains(rawOutput, "name=metric-subscription") &&
strings.Contains(rawOutput, "channel=\""+alphaChannel+"\"") &&
strings.Contains(rawOutput, "package=\""+testPackageName+"\"") {
return false, nil
}
return true, nil
})
require.NoError(t, err)
}

func getPodWithLabel(t *testing.T, client operatorclient.ClientInterface, label string) *corev1.Pod {
listOptions := metav1.ListOptions{LabelSelector: label}
podList, err := client.KubernetesInterface().CoreV1().Pods(operatorNamespace).List(listOptions)
if err != nil {
log.Infof("Error %v\n", err)
Expand All @@ -74,15 +200,11 @@ func getOLMPodName(t *testing.T, client operatorclient.ClientInterface) string {
if len(podList.Items) != 1 {
t.Fatalf("Expected 1 olm-operator pod, got %v", len(podList.Items))
}

podName := podList.Items[0].GetName()
log.Infof("Looking at pod %v in namespace %v", podName, operatorNamespace)
return podName

return &podList.Items[0]
}

func getMetricsFromPod(t *testing.T, client operatorclient.ClientInterface, podName string, namespace string, port string) (string, error) {
olmPod, err := client.KubernetesInterface().CoreV1().Pods(namespace).Get(podName, metav1.GetOptions{})
func getMetricsFromPod(t *testing.T, client operatorclient.ClientInterface, pod *corev1.Pod, namespace string, port string) (string, error) {
olmPod, err := client.KubernetesInterface().CoreV1().Pods(namespace).Get(pod.GetName(), metav1.GetOptions{})
if err != nil {
return "", err
}
Expand Down Expand Up @@ -113,7 +235,7 @@ func getMetricsFromPod(t *testing.T, client operatorclient.ClientInterface, podN
Namespace(namespace).
Resource("pods").
SubResource("proxy").
Name(net.JoinSchemeNamePort(scheme, podName, port)).
Name(net.JoinSchemeNamePort(scheme, pod.GetName(), port)).
Suffix("metrics").
Do().Raw()
if err != nil {
Expand Down
15 changes: 10 additions & 5 deletions test/e2e/subscription_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ func buildSubscriptionCleanupFunc(t *testing.T, crc versioned.Interface, subscri
}
}

func createSubscription(t *testing.T, crc versioned.Interface, namespace, name, packageName, channel string, approval v1alpha1.Approval) cleanupFunc {
func createSubscription(t *testing.T, crc versioned.Interface, namespace, name, packageName, channel string, approval v1alpha1.Approval) (cleanupFunc, *v1alpha1.Subscription) {
subscription := &v1alpha1.Subscription{
TypeMeta: metav1.TypeMeta{
Kind: v1alpha1.SubscriptionKind,
Expand All @@ -407,7 +407,12 @@ func createSubscription(t *testing.T, crc versioned.Interface, namespace, name,

subscription, err := crc.OperatorsV1alpha1().Subscriptions(namespace).Create(subscription)
require.NoError(t, err)
return buildSubscriptionCleanupFunc(t, crc, subscription)
return buildSubscriptionCleanupFunc(t, crc, subscription), subscription
}

func updateSubscription(t *testing.T, crc versioned.Interface, subscription *v1alpha1.Subscription) {
_, err := crc.OperatorsV1alpha1().Subscriptions(subscription.GetNamespace()).Update(subscription)
require.NoError(t, err)
}

func createSubscriptionForCatalog(t *testing.T, crc versioned.Interface, namespace, name, catalog, packageName, channel, startingCSV string, approval v1alpha1.Approval) cleanupFunc {
Expand Down Expand Up @@ -465,7 +470,7 @@ func TestCreateNewSubscriptionNotInstalled(t *testing.T) {
}()
require.NoError(t, initCatalog(t, c, crc))

cleanup := createSubscription(t, crc, testNamespace, testSubscriptionName, testPackageName, betaChannel, v1alpha1.ApprovalAutomatic)
cleanup, _ := createSubscription(t, crc, testNamespace, testSubscriptionName, testPackageName, betaChannel, v1alpha1.ApprovalAutomatic)
defer cleanup()

subscription, err := fetchSubscription(t, crc, testNamespace, testSubscriptionName, subscriptionStateAtLatestChecker)
Expand Down Expand Up @@ -493,7 +498,7 @@ func TestCreateNewSubscriptionExistingCSV(t *testing.T) {
_, err := createCSV(t, c, crc, stableCSV, testNamespace, false, false)
require.NoError(t, err)

subscriptionCleanup := createSubscription(t, crc, testNamespace, testSubscriptionName, testPackageName, alphaChannel, v1alpha1.ApprovalAutomatic)
subscriptionCleanup, _ := createSubscription(t, crc, testNamespace, testSubscriptionName, testPackageName, alphaChannel, v1alpha1.ApprovalAutomatic)
defer subscriptionCleanup()

subscription, err := fetchSubscription(t, crc, testNamespace, testSubscriptionName, subscriptionStateAtLatestChecker)
Expand Down Expand Up @@ -600,7 +605,7 @@ func TestCreateNewSubscriptionManualApproval(t *testing.T) {
}()
require.NoError(t, initCatalog(t, c, crc))

subscriptionCleanup := createSubscription(t, crc, testNamespace, "manual-subscription", testPackageName, stableChannel, v1alpha1.ApprovalManual)
subscriptionCleanup, _ := createSubscription(t, crc, testNamespace, "manual-subscription", testPackageName, stableChannel, v1alpha1.ApprovalManual)
defer subscriptionCleanup()

subscription, err := fetchSubscription(t, crc, testNamespace, "manual-subscription", subscriptionStateUpgradePendingChecker)
Expand Down