Skip to content

Commit da89fe5

Browse files
authored
Merge pull request kubernetes-sigs#132 from pusher/metrics
Add prometheus metrics to internal controller
2 parents 012d0da + 29fb7a5 commit da89fe5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+4239
-1190
lines changed

Gopkg.lock

Lines changed: 12 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Gopkg.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ required = ["sigs.k8s.io/testing_frameworks/integration",
2020
"github.com/go-openapi/spec",
2121
"k8s.io/kube-openapi/pkg/common",
2222
"k8s.io/apiextensions-apiserver",
23+
"github.com/prometheus/client_golang/prometheus",
2324
]
2425

2526
[[constraint]]
@@ -54,6 +55,10 @@ required = ["sigs.k8s.io/testing_frameworks/integration",
5455
name = "go.uber.org/zap"
5556
version = "1.8.0"
5657

58+
[[constraint]]
59+
name = "github.com/prometheus/client_golang"
60+
version = "0.9.0"
61+
5762
# these are not listed explicitly until we get version tags,
5863
# since dep doesn't like bare revision dependencies
5964

pkg/builder/builder_suite_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
. "github.com/onsi/gomega"
2424
"k8s.io/client-go/rest"
2525
"sigs.k8s.io/controller-runtime/pkg/envtest"
26+
"sigs.k8s.io/controller-runtime/pkg/metrics"
2627
logf "sigs.k8s.io/controller-runtime/pkg/runtime/log"
2728
)
2829

@@ -43,9 +44,15 @@ var _ = BeforeSuite(func(done Done) {
4344
cfg, err = testenv.Start()
4445
Expect(err).NotTo(HaveOccurred())
4546

47+
// Prevent the metrics listener being created
48+
metrics.DefaultBindAddress = "0"
49+
4650
close(done)
4751
}, 60)
4852

4953
var _ = AfterSuite(func() {
5054
testenv.Stop()
55+
56+
// Put the DefaultBindAddress back
57+
metrics.DefaultBindAddress = ":8080"
5158
})

pkg/controller/controller_suite_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"k8s.io/client-go/kubernetes"
2525
"k8s.io/client-go/rest"
2626
"sigs.k8s.io/controller-runtime/pkg/envtest"
27+
"sigs.k8s.io/controller-runtime/pkg/metrics"
2728
logf "sigs.k8s.io/controller-runtime/pkg/runtime/log"
2829
)
2930

@@ -48,9 +49,15 @@ var _ = BeforeSuite(func(done Done) {
4849
clientset, err = kubernetes.NewForConfig(cfg)
4950
Expect(err).NotTo(HaveOccurred())
5051

52+
// Prevent the metrics listener being created
53+
metrics.DefaultBindAddress = "0"
54+
5155
close(done)
5256
}, 60)
5357

5458
var _ = AfterSuite(func() {
5559
testenv.Stop()
60+
61+
// Put the DefaultBindAddress back
62+
metrics.DefaultBindAddress = ":8080"
5663
})

pkg/controller/controller_test.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,21 @@ var _ = Describe("controller.Controller", func() {
7575

7676
close(done)
7777
})
78+
79+
It("should not return an error if two controllers are registered with different names", func(done Done) {
80+
m, err := manager.New(cfg, manager.Options{})
81+
Expect(err).NotTo(HaveOccurred())
82+
83+
c1, err := controller.New("c1", m, controller.Options{Reconciler: rec})
84+
Expect(err).NotTo(HaveOccurred())
85+
Expect(c1).ToNot(BeNil())
86+
87+
c2, err := controller.New("c2", m, controller.Options{Reconciler: rec})
88+
Expect(err).NotTo(HaveOccurred())
89+
Expect(c2).ToNot(BeNil())
90+
91+
close(done)
92+
})
7893
})
7994
})
8095

pkg/internal/controller/controller.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"sigs.k8s.io/controller-runtime/pkg/cache"
3131
"sigs.k8s.io/controller-runtime/pkg/client"
3232
"sigs.k8s.io/controller-runtime/pkg/handler"
33+
ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/internal/controller/metrics"
3334
"sigs.k8s.io/controller-runtime/pkg/predicate"
3435
"sigs.k8s.io/controller-runtime/pkg/reconcile"
3536
"sigs.k8s.io/controller-runtime/pkg/runtime/inject"
@@ -171,6 +172,10 @@ func (c *Controller) Start(stop <-chan struct{}) error {
171172
func (c *Controller) processNextWorkItem() bool {
172173
// This code copy-pasted from the sample-Controller.
173174

175+
// Update metrics after processing each item
176+
reconcileStartTS := time.Now()
177+
defer c.updateMetrics(time.Now().Sub(reconcileStartTS))
178+
174179
obj, shutdown := c.Queue.Get()
175180
if obj == nil {
176181
// Sometimes the Queue gives us nil items when it starts up
@@ -207,6 +212,7 @@ func (c *Controller) processNextWorkItem() bool {
207212
if result, err := c.Do.Reconcile(req); err != nil {
208213
c.Queue.AddRateLimited(req)
209214
log.Error(err, "Reconciler error", "Controller", c.Name, "Request", req)
215+
ctrlmetrics.ReconcileErrors.WithLabelValues(c.Name).Inc()
210216

211217
return false
212218
} else if result.RequeueAfter > 0 {
@@ -233,3 +239,9 @@ func (c *Controller) InjectFunc(f inject.Func) error {
233239
c.SetFields = f
234240
return nil
235241
}
242+
243+
// updateMetrics updates prometheus metrics within the controller
244+
func (c *Controller) updateMetrics(reconcileTime time.Duration) {
245+
ctrlmetrics.QueueLength.WithLabelValues(c.Name).Set(float64(c.Queue.Len()))
246+
ctrlmetrics.ReconcileTime.WithLabelValues(c.Name).Observe(reconcileTime.Seconds())
247+
}

pkg/internal/controller/controller_test.go

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ import (
2323

2424
. "github.com/onsi/ginkgo"
2525
. "github.com/onsi/gomega"
26+
"github.com/prometheus/client_golang/prometheus"
27+
dto "github.com/prometheus/client_model/go"
2628
"k8s.io/api/apps/v1"
2729
corev1 "k8s.io/api/core/v1"
2830
"k8s.io/apimachinery/pkg/types"
@@ -31,6 +33,7 @@ import (
3133
"sigs.k8s.io/controller-runtime/pkg/cache/informertest"
3234
"sigs.k8s.io/controller-runtime/pkg/controller/controllertest"
3335
"sigs.k8s.io/controller-runtime/pkg/handler"
36+
ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/internal/controller/metrics"
3437
"sigs.k8s.io/controller-runtime/pkg/predicate"
3538
"sigs.k8s.io/controller-runtime/pkg/reconcile"
3639
"sigs.k8s.io/controller-runtime/pkg/reconcile/reconciletest"
@@ -404,6 +407,90 @@ var _ = Describe("controller", func() {
404407
It("should create a new go routine for MaxConcurrentReconciles", func() {
405408
// TODO(community): write this test
406409
})
410+
411+
Context("should update prometheus metrics", func() {
412+
It("should requeue a Request if there is an error and continue processing items", func(done Done) {
413+
ctrlmetrics.QueueLength = prometheus.NewGaugeVec(prometheus.GaugeOpts{
414+
Name: "controller_runtime_reconcile_queue_length",
415+
Help: "Length of reconcile queue per controller",
416+
}, []string{"controller"})
417+
ctrlmetrics.ReconcileErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
418+
Name: "controller_runtime_reconcile_errors_total",
419+
Help: "Total number of reconcile errors per controller",
420+
}, []string{"controller"})
421+
422+
fakeReconcile.Err = fmt.Errorf("expected error: reconcile")
423+
go func() {
424+
defer GinkgoRecover()
425+
Expect(ctrl.Start(stop)).NotTo(HaveOccurred())
426+
}()
427+
ctrl.Queue.Add(request)
428+
429+
// Reduce the jitterperiod so we don't have to wait a second before the reconcile function is rerun.
430+
ctrl.JitterPeriod = time.Millisecond
431+
432+
By("Invoking Reconciler which will give an error")
433+
Expect(<-reconciled).To(Equal(request))
434+
var queueLength, reconcileErrs dto.Metric
435+
Eventually(func() error {
436+
ctrlmetrics.QueueLength.WithLabelValues(ctrl.Name).Write(&queueLength)
437+
if queueLength.GetGauge().GetValue() != 1.0 {
438+
return fmt.Errorf("metrics not updated")
439+
}
440+
return nil
441+
}, 2.0).Should(Succeed())
442+
Eventually(func() error {
443+
ctrlmetrics.ReconcileErrors.WithLabelValues(ctrl.Name).Write(&reconcileErrs)
444+
if reconcileErrs.GetCounter().GetValue() != 1.0 {
445+
return fmt.Errorf("metrics not updated")
446+
}
447+
return nil
448+
}, 2.0).Should(Succeed())
449+
450+
By("Invoking Reconciler a second time without error")
451+
fakeReconcile.Err = nil
452+
Expect(<-reconciled).To(Equal(request))
453+
454+
By("Removing the item from the queue")
455+
Eventually(ctrl.Queue.Len).Should(Equal(0))
456+
Eventually(func() int { return ctrl.Queue.NumRequeues(request) }).Should(Equal(0))
457+
458+
close(done)
459+
}, 2.0)
460+
461+
It("should add a reconcile time to the reconcile time histogram", func(done Done) {
462+
ctrlmetrics.ReconcileTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
463+
Name: "controller_runtime_reconcile_time_second",
464+
Help: "Length of time per reconcile per controller",
465+
}, []string{"controller"})
466+
467+
go func() {
468+
defer GinkgoRecover()
469+
Expect(ctrl.Start(stop)).NotTo(HaveOccurred())
470+
}()
471+
ctrl.Queue.Add(request)
472+
473+
By("Invoking Reconciler")
474+
Expect(<-reconciled).To(Equal(request))
475+
476+
By("Removing the item from the queue")
477+
Eventually(ctrl.Queue.Len).Should(Equal(0))
478+
Eventually(func() int { return ctrl.Queue.NumRequeues(request) }).Should(Equal(0))
479+
480+
var reconcileTime dto.Metric
481+
Eventually(func() error {
482+
histObserver := ctrlmetrics.ReconcileTime.WithLabelValues(ctrl.Name)
483+
hist := histObserver.(prometheus.Histogram)
484+
hist.Write(&reconcileTime)
485+
if reconcileTime.GetHistogram().GetSampleCount() != uint64(1) {
486+
return fmt.Errorf("metrics not updated")
487+
}
488+
return nil
489+
}, 2.0).Should(Succeed())
490+
491+
close(done)
492+
}, 4.0)
493+
})
407494
})
408495
})
409496

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
Copyright 2018 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package metrics
18+
19+
import (
20+
"github.com/prometheus/client_golang/prometheus"
21+
"sigs.k8s.io/controller-runtime/pkg/metrics"
22+
)
23+
24+
var (
25+
// QueueLength is a prometheus metric which counts the current reconcile
26+
// queue length per controller
27+
QueueLength = prometheus.NewGaugeVec(prometheus.GaugeOpts{
28+
Name: "controller_runtime_reconcile_queue_length",
29+
Help: "Length of reconcile queue per controller",
30+
}, []string{"controller"})
31+
32+
// ReconcileErrors is a prometheus counter metrics which holds the total
33+
// number of errors from the Reconciler
34+
ReconcileErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
35+
Name: "controller_runtime_reconcile_errors_total",
36+
Help: "Total number of reconcile errors per controller",
37+
}, []string{"controller"})
38+
39+
// ReconcileTime is a prometheus metric which keeps track of the duration
40+
// of reconciles
41+
ReconcileTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
42+
Name: "controller_runtime_reconcile_time_seconds",
43+
Help: "Length of time per reconcile per controller",
44+
}, []string{"controller"})
45+
)
46+
47+
func init() {
48+
metrics.Registry.MustRegister(QueueLength)
49+
metrics.Registry.MustRegister(ReconcileErrors)
50+
metrics.Registry.MustRegister(ReconcileTime)
51+
}

0 commit comments

Comments
 (0)