Skip to content

Commit 05643fc

Browse files
committed
Expose client-go metrics
This exposes the client-go metrics (client, workqueue, reflector) metrics in the controller-runtime registry.
1 parent b44385d commit 05643fc

File tree

2 files changed

+300
-0
lines changed

2 files changed

+300
-0
lines changed

Gopkg.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/metrics/client_go_adapter.go

Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
/*
2+
Copyright 2018 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package metrics
18+
19+
import (
20+
"net/url"
21+
"time"
22+
23+
"github.com/prometheus/client_golang/prometheus"
24+
reflectormetrics "k8s.io/client-go/tools/cache"
25+
clientmetrics "k8s.io/client-go/tools/metrics"
26+
workqueuemetrics "k8s.io/client-go/util/workqueue"
27+
)
28+
29+
// this file contains setup logic to initialize the myriad of places
30+
// that client-go registers metrics. We copy the names and formats
31+
// from Kubernetes so that we match the core controllers.
32+
33+
var (
34+
// client metrics
35+
36+
requestLatency = prometheus.NewHistogramVec(
37+
prometheus.HistogramOpts{
38+
Name: "rest_client_request_latency_seconds",
39+
Help: "Request latency in seconds. Broken down by verb and URL.",
40+
Buckets: prometheus.ExponentialBuckets(0.001, 2, 10),
41+
},
42+
[]string{"verb", "url"},
43+
)
44+
45+
requestResult = prometheus.NewCounterVec(
46+
prometheus.CounterOpts{
47+
Name: "rest_client_requests_total",
48+
Help: "Number of HTTP requests, partitioned by status code, method, and host.",
49+
},
50+
[]string{"code", "method", "host"},
51+
)
52+
53+
// reflector metrics
54+
55+
// TODO(directxman12): update these to be histograms once the metrics overhaul KEP
56+
// PRs start landing.
57+
58+
reflectorSubsystem = "reflector"
59+
60+
listsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
61+
Subsystem: reflectorSubsystem,
62+
Name: "lists_total",
63+
Help: "Total number of API lists done by the reflectors",
64+
}, []string{"name"})
65+
66+
listsDuration = prometheus.NewSummaryVec(prometheus.SummaryOpts{
67+
Subsystem: reflectorSubsystem,
68+
Name: "list_duration_seconds",
69+
Help: "How long an API list takes to return and decode for the reflectors",
70+
}, []string{"name"})
71+
72+
itemsPerList = prometheus.NewSummaryVec(prometheus.SummaryOpts{
73+
Subsystem: reflectorSubsystem,
74+
Name: "items_per_list",
75+
Help: "How many items an API list returns to the reflectors",
76+
}, []string{"name"})
77+
78+
watchesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
79+
Subsystem: reflectorSubsystem,
80+
Name: "watches_total",
81+
Help: "Total number of API watches done by the reflectors",
82+
}, []string{"name"})
83+
84+
shortWatchesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
85+
Subsystem: reflectorSubsystem,
86+
Name: "short_watches_total",
87+
Help: "Total number of short API watches done by the reflectors",
88+
}, []string{"name"})
89+
90+
watchDuration = prometheus.NewSummaryVec(prometheus.SummaryOpts{
91+
Subsystem: reflectorSubsystem,
92+
Name: "watch_duration_seconds",
93+
Help: "How long an API watch takes to return and decode for the reflectors",
94+
}, []string{"name"})
95+
96+
itemsPerWatch = prometheus.NewSummaryVec(prometheus.SummaryOpts{
97+
Subsystem: reflectorSubsystem,
98+
Name: "items_per_watch",
99+
Help: "How many items an API watch returns to the reflectors",
100+
}, []string{"name"})
101+
102+
lastResourceVersion = prometheus.NewGaugeVec(prometheus.GaugeOpts{
103+
Subsystem: reflectorSubsystem,
104+
Name: "last_resource_version",
105+
Help: "Last resource version seen for the reflectors",
106+
}, []string{"name"})
107+
108+
// workqueue metrics
109+
110+
workQueueSubsystem = "workqueue"
111+
112+
depth = prometheus.NewGaugeVec(prometheus.GaugeOpts{
113+
Subsystem: workQueueSubsystem,
114+
Name: "depth",
115+
Help: "Current depth of workqueue",
116+
}, []string{"name"})
117+
118+
adds = prometheus.NewCounterVec(prometheus.CounterOpts{
119+
Subsystem: workQueueSubsystem,
120+
Name: "adds_total",
121+
Help: "Total number of adds handled by workqueue",
122+
}, []string{"name"})
123+
124+
latency = prometheus.NewHistogramVec(prometheus.HistogramOpts{
125+
Subsystem: workQueueSubsystem,
126+
Name: "queue_latency_seconds",
127+
Help: "How long in seconds an item stays in workqueue before being requested.",
128+
Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10),
129+
}, []string{"name"})
130+
131+
workDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
132+
Subsystem: workQueueSubsystem,
133+
Name: "work_duration_seconds",
134+
Help: "How long in seconds processing an item from workqueue takes.",
135+
Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10),
136+
}, []string{"name"})
137+
138+
retries = prometheus.NewCounterVec(prometheus.CounterOpts{
139+
Subsystem: workQueueSubsystem,
140+
Name: "retries_total",
141+
Help: "Total number of retries handled by workqueue",
142+
}, []string{"name"})
143+
144+
longestRunning = prometheus.NewGaugeVec(prometheus.GaugeOpts{
145+
Subsystem: workQueueSubsystem,
146+
Name: "longest_running_processor_microseconds",
147+
Help: "How many microseconds has the longest running " +
148+
"processor for workqueue been running.",
149+
}, []string{"name"})
150+
151+
unfinishedWork = prometheus.NewGaugeVec(prometheus.GaugeOpts{
152+
Subsystem: workQueueSubsystem,
153+
Name: "unfinished_work_seconds",
154+
Help: "How many seconds of work has done that " +
155+
"is in progress and hasn't been observed by work_duration. Large " +
156+
"values indicate stuck threads. One can deduce the number of stuck " +
157+
"threads by observing the rate at which this increases.",
158+
}, []string{"name"})
159+
)
160+
161+
func init() {
162+
registerClientMetrics()
163+
registerReflectorMetrics()
164+
registerWorkqueueMetrics()
165+
}
166+
167+
// registerClientMetrics sets up the client latency metrics from client-go
168+
func registerClientMetrics() {
169+
// register the metrics with our registry
170+
Registry.MustRegister(requestLatency)
171+
Registry.MustRegister(requestResult)
172+
173+
// register the metrics with client-go
174+
clientmetrics.Register(&latencyAdapter{metric: requestLatency}, &resultAdapter{metric: requestResult})
175+
}
176+
177+
// registerReflectorMetrics sets up reflector (reconile) loop metrics
178+
func registerReflectorMetrics() {
179+
Registry.MustRegister(listsTotal)
180+
Registry.MustRegister(listsDuration)
181+
Registry.MustRegister(itemsPerList)
182+
Registry.MustRegister(watchesTotal)
183+
Registry.MustRegister(shortWatchesTotal)
184+
Registry.MustRegister(watchDuration)
185+
Registry.MustRegister(itemsPerWatch)
186+
Registry.MustRegister(lastResourceVersion)
187+
188+
reflectormetrics.SetReflectorMetricsProvider(reflectorMetricsProvider{})
189+
}
190+
191+
// registerWorkQueueMetrics sets up workqueue (other reconcile) metrics
192+
func registerWorkqueueMetrics() {
193+
Registry.MustRegister(depth)
194+
Registry.MustRegister(adds)
195+
Registry.MustRegister(latency)
196+
Registry.MustRegister(workDuration)
197+
Registry.MustRegister(retries)
198+
Registry.MustRegister(longestRunning)
199+
Registry.MustRegister(unfinishedWork)
200+
201+
workqueuemetrics.SetProvider(workqueueMetricsProvider{})
202+
}
203+
204+
// this section contains adapters, implementations, and other sundry organic, artisinally
205+
// hand-crafted syntax trees required to convince client-go that it actually wants to let
206+
// someone use its metrics.
207+
208+
// Client metrics adapters (method #1 for client-go metrics),
209+
// copied (more-or-less directly) from k8s.io/kubernetes setup code
210+
// (which isn't anywhere in an easily-importable place).
211+
212+
type latencyAdapter struct {
213+
metric *prometheus.HistogramVec
214+
}
215+
216+
func (l *latencyAdapter) Observe(verb string, u url.URL, latency time.Duration) {
217+
l.metric.WithLabelValues(verb, u.String()).Observe(latency.Seconds())
218+
}
219+
220+
type resultAdapter struct {
221+
metric *prometheus.CounterVec
222+
}
223+
224+
func (r *resultAdapter) Increment(code, method, host string) {
225+
r.metric.WithLabelValues(code, method, host).Inc()
226+
}
227+
228+
// Reflector metrics provider (method #2 for client-go metrics),
229+
// copied (more-or-less directly) from k8s.io/kubernetes setup code
230+
// (which isn't anywhere in an easily-importable place).
231+
232+
type reflectorMetricsProvider struct{}
233+
234+
func (reflectorMetricsProvider) NewListsMetric(name string) reflectormetrics.CounterMetric {
235+
return listsTotal.WithLabelValues(name)
236+
}
237+
238+
func (reflectorMetricsProvider) NewListDurationMetric(name string) reflectormetrics.SummaryMetric {
239+
return listsDuration.WithLabelValues(name)
240+
}
241+
242+
func (reflectorMetricsProvider) NewItemsInListMetric(name string) reflectormetrics.SummaryMetric {
243+
return itemsPerList.WithLabelValues(name)
244+
}
245+
246+
func (reflectorMetricsProvider) NewWatchesMetric(name string) reflectormetrics.CounterMetric {
247+
return watchesTotal.WithLabelValues(name)
248+
}
249+
250+
func (reflectorMetricsProvider) NewShortWatchesMetric(name string) reflectormetrics.CounterMetric {
251+
return shortWatchesTotal.WithLabelValues(name)
252+
}
253+
254+
func (reflectorMetricsProvider) NewWatchDurationMetric(name string) reflectormetrics.SummaryMetric {
255+
return watchDuration.WithLabelValues(name)
256+
}
257+
258+
func (reflectorMetricsProvider) NewItemsInWatchMetric(name string) reflectormetrics.SummaryMetric {
259+
return itemsPerWatch.WithLabelValues(name)
260+
}
261+
262+
func (reflectorMetricsProvider) NewLastResourceVersionMetric(name string) reflectormetrics.GaugeMetric {
263+
return lastResourceVersion.WithLabelValues(name)
264+
}
265+
266+
// Workqueue metrics (method #3 for client-go metrics),
267+
// copied (more-or-less directly) from k8s.io/kubernetes setup code
268+
// (which isn't anywhere in an easily-importable place).
269+
// TODO(directxman12): stop "cheating" and calling histograms summaries when we pull in the latest deps
270+
271+
type workqueueMetricsProvider struct{}
272+
273+
func (workqueueMetricsProvider) NewDepthMetric(name string) workqueuemetrics.GaugeMetric {
274+
return depth.WithLabelValues(name)
275+
}
276+
277+
func (workqueueMetricsProvider) NewAddsMetric(name string) workqueuemetrics.CounterMetric {
278+
return adds.WithLabelValues(name)
279+
}
280+
281+
func (workqueueMetricsProvider) NewLatencyMetric(name string) workqueuemetrics.SummaryMetric {
282+
return latency.WithLabelValues(name)
283+
}
284+
285+
func (workqueueMetricsProvider) NewWorkDurationMetric(name string) workqueuemetrics.SummaryMetric {
286+
return workDuration.WithLabelValues(name)
287+
}
288+
289+
func (workqueueMetricsProvider) NewRetriesMetric(name string) workqueuemetrics.CounterMetric {
290+
return retries.WithLabelValues(name)
291+
}
292+
293+
func (workqueueMetricsProvider) NewLongestRunningProcessorMicrosecondsMetric(name string) workqueuemetrics.SettableGaugeMetric {
294+
return longestRunning.WithLabelValues(name)
295+
}
296+
297+
func (workqueueMetricsProvider) NewUnfinishedWorkSecondsMetric(name string) workqueuemetrics.SettableGaugeMetric {
298+
return unfinishedWork.WithLabelValues(name)
299+
}

0 commit comments

Comments
 (0)