Skip to content

Commit 623b4a6

Browse files
committed
✨ added reconciles_total metric
1 parent 43351af commit 623b4a6

File tree

3 files changed

+143
-7
lines changed

3 files changed

+143
-7
lines changed

pkg/internal/controller/controller.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,13 +214,15 @@ func (c *Controller) processNextWorkItem() bool {
214214
c.Queue.AddRateLimited(req)
215215
log.Error(err, "Reconciler error", "controller", c.Name, "request", req)
216216
ctrlmetrics.ReconcileErrors.WithLabelValues(c.Name).Inc()
217-
217+
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, "error").Inc()
218218
return false
219219
} else if result.RequeueAfter > 0 {
220220
c.Queue.AddAfter(req, result.RequeueAfter)
221+
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, "requeue_after").Inc()
221222
return true
222223
} else if result.Requeue {
223224
c.Queue.AddRateLimited(req)
225+
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, "requeue").Inc()
224226
return true
225227
}
226228

@@ -231,6 +233,7 @@ func (c *Controller) processNextWorkItem() bool {
231233
// TODO(directxman12): What does 1 mean? Do we want level constants? Do we want levels at all?
232234
log.V(1).Info("Successfully Reconciled", "controller", c.Name, "request", req)
233235

236+
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, "success").Inc()
234237
// Return true, don't take a break
235238
return true
236239
}

pkg/internal/controller/controller_test.go

Lines changed: 126 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -408,14 +408,137 @@ var _ = Describe("controller", func() {
408408
// TODO(community): write this test
409409
})
410410

411+
Context("prometheus metric reconcile_total", func() {
412+
var reconcileTotal dto.Metric
413+
414+
BeforeEach(func() {
415+
ctrlmetrics.ReconcileTotal.Reset()
416+
})
417+
418+
It("should get updated on successful reconciliation", func(done Done) {
419+
Expect(func() error {
420+
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "success").Write(&reconcileTotal)
421+
if reconcileTotal.GetCounter().GetValue() != 0.0 {
422+
return fmt.Errorf("metric reconcile total not reset")
423+
}
424+
return nil
425+
}()).Should(Succeed())
426+
427+
go func() {
428+
defer GinkgoRecover()
429+
Expect(ctrl.Start(stop)).NotTo(HaveOccurred())
430+
}()
431+
By("Invoking Reconciler which will succeed")
432+
ctrl.Queue.Add(request)
433+
434+
Expect(<-reconciled).To(Equal(request))
435+
Eventually(func() error {
436+
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "success").Write(&reconcileTotal)
437+
if reconcileTotal.GetCounter().GetValue() != 1.0 {
438+
return fmt.Errorf("metric reconcile total not updated")
439+
}
440+
return nil
441+
}, 2.0).Should(Succeed())
442+
443+
close(done)
444+
}, 2.0)
445+
446+
It("should get updated on reconcile errors", func(done Done) {
447+
Expect(func() error {
448+
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "error").Write(&reconcileTotal)
449+
if reconcileTotal.GetCounter().GetValue() != 0.0 {
450+
return fmt.Errorf("metric reconcile total not reset")
451+
}
452+
return nil
453+
}()).Should(Succeed())
454+
455+
fakeReconcile.Err = fmt.Errorf("expected error: reconcile")
456+
go func() {
457+
defer GinkgoRecover()
458+
Expect(ctrl.Start(stop)).NotTo(HaveOccurred())
459+
}()
460+
By("Invoking Reconciler which will give an error")
461+
ctrl.Queue.Add(request)
462+
463+
Expect(<-reconciled).To(Equal(request))
464+
Eventually(func() error {
465+
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "error").Write(&reconcileTotal)
466+
if reconcileTotal.GetCounter().GetValue() != 1.0 {
467+
return fmt.Errorf("metric reconcile total not updated")
468+
}
469+
return nil
470+
}, 2.0).Should(Succeed())
471+
472+
close(done)
473+
}, 2.0)
474+
475+
It("should get updated when reconcile returns with retry enabled", func(done Done) {
476+
Expect(func() error {
477+
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "retry").Write(&reconcileTotal)
478+
if reconcileTotal.GetCounter().GetValue() != 0.0 {
479+
return fmt.Errorf("metric reconcile total not reset")
480+
}
481+
return nil
482+
}()).Should(Succeed())
483+
484+
fakeReconcile.Result.Requeue = true
485+
go func() {
486+
defer GinkgoRecover()
487+
Expect(ctrl.Start(stop)).NotTo(HaveOccurred())
488+
}()
489+
By("Invoking Reconciler which will return result with Requeue enabled")
490+
ctrl.Queue.Add(request)
491+
492+
Expect(<-reconciled).To(Equal(request))
493+
Eventually(func() error {
494+
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "requeue").Write(&reconcileTotal)
495+
if reconcileTotal.GetCounter().GetValue() != 1.0 {
496+
return fmt.Errorf("metric reconcile total not updated")
497+
}
498+
return nil
499+
}, 2.0).Should(Succeed())
500+
501+
close(done)
502+
}, 2.0)
503+
504+
It("should get updated when reconcile returns with retryAfter enabled", func(done Done) {
505+
Expect(func() error {
506+
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "retry_after").Write(&reconcileTotal)
507+
if reconcileTotal.GetCounter().GetValue() != 0.0 {
508+
return fmt.Errorf("metric reconcile total not reset")
509+
}
510+
return nil
511+
}()).Should(Succeed())
512+
513+
fakeReconcile.Result.RequeueAfter = 5 * time.Hour
514+
go func() {
515+
defer GinkgoRecover()
516+
Expect(ctrl.Start(stop)).NotTo(HaveOccurred())
517+
}()
518+
By("Invoking Reconciler which will return result with requeueAfter enabled")
519+
ctrl.Queue.Add(request)
520+
521+
Expect(<-reconciled).To(Equal(request))
522+
Eventually(func() error {
523+
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "requeue_after").Write(&reconcileTotal)
524+
if reconcileTotal.GetCounter().GetValue() != 1.0 {
525+
return fmt.Errorf("metric reconcile total not updated")
526+
}
527+
return nil
528+
}, 2.0).Should(Succeed())
529+
530+
close(done)
531+
}, 2.0)
532+
})
533+
411534
Context("should update prometheus metrics", func() {
412535
It("should requeue a Request if there is an error and continue processing items", func(done Done) {
413536
var queueLength, reconcileErrs dto.Metric
414537
ctrlmetrics.QueueLength.Reset()
415538
Expect(func() error {
416539
ctrlmetrics.QueueLength.WithLabelValues(ctrl.Name).Write(&queueLength)
417540
if queueLength.GetGauge().GetValue() != 0.0 {
418-
return fmt.Errorf("metrics not reset")
541+
return fmt.Errorf("metric queue length not reset")
419542
}
420543
return nil
421544
}()).Should(Succeed())
@@ -424,7 +547,7 @@ var _ = Describe("controller", func() {
424547
Expect(func() error {
425548
ctrlmetrics.ReconcileErrors.WithLabelValues(ctrl.Name).Write(&reconcileErrs)
426549
if reconcileErrs.GetCounter().GetValue() != 0.0 {
427-
return fmt.Errorf("metrics not reset")
550+
return fmt.Errorf("metric reconcile errors not reset")
428551
}
429552
return nil
430553
}()).Should(Succeed())
@@ -444,7 +567,7 @@ var _ = Describe("controller", func() {
444567
Eventually(func() error {
445568
ctrlmetrics.QueueLength.WithLabelValues(ctrl.Name).Write(&queueLength)
446569
if queueLength.GetGauge().GetValue() != 1.0 {
447-
return fmt.Errorf("metrics not updated")
570+
return fmt.Errorf("metric queue length not updated")
448571
}
449572
return nil
450573
}, 2.0).Should(Succeed())

pkg/internal/controller/metrics/metrics.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,24 +29,34 @@ var (
2929
Help: "Length of reconcile queue per controller",
3030
}, []string{"controller"})
3131

32+
// ReconcileTotal is a prometheus counter metrics which holds the total
33+
// number of reconciliations per controller. It has two labels. controller label refers
34+
// to the controller name and result label refers to the reconcile result i.e
35+
// success, error, requeue, requeue_after
36+
ReconcileTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
37+
Name: "controller_runtime_reconcile_total",
38+
Help: "Total number of reconciliations per controller",
39+
}, []string{"controller", "result"})
40+
3241
// ReconcileErrors is a prometheus counter metrics which holds the total
3342
// number of errors from the Reconciler
3443
ReconcileErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
3544
Name: "controller_runtime_reconcile_errors_total",
36-
Help: "Total number of reconcile errors per controller",
45+
Help: "Total number of reconciliation errors per controller",
3746
}, []string{"controller"})
3847

3948
// ReconcileTime is a prometheus metric which keeps track of the duration
40-
// of reconciles
49+
// of reconciliations
4150
ReconcileTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
4251
Name: "controller_runtime_reconcile_time_seconds",
43-
Help: "Length of time per reconcile per controller",
52+
Help: "Length of time per reconciliation per controller",
4453
}, []string{"controller"})
4554
)
4655

4756
func init() {
4857
metrics.Registry.MustRegister(
4958
QueueLength,
59+
ReconcileTotal,
5060
ReconcileErrors,
5161
ReconcileTime,
5262
// expose process metrics like CPU, Memory, file descriptor usage etc.

0 commit comments

Comments
 (0)