|
| 1 | +/* |
| 2 | +Copyright 2018 The Kubernetes Authors. |
| 3 | +
|
| 4 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +you may not use this file except in compliance with the License. |
| 6 | +You may obtain a copy of the License at |
| 7 | +
|
| 8 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +
|
| 10 | +Unless required by applicable law or agreed to in writing, software |
| 11 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +See the License for the specific language governing permissions and |
| 14 | +limitations under the License. |
| 15 | +*/ |
| 16 | + |
| 17 | +package metrics |
| 18 | + |
| 19 | +import ( |
| 20 | + "net/url" |
| 21 | + "time" |
| 22 | + |
| 23 | + "github.com/prometheus/client_golang/prometheus" |
| 24 | + reflectormetrics "k8s.io/client-go/tools/cache" |
| 25 | + clientmetrics "k8s.io/client-go/tools/metrics" |
| 26 | + workqueuemetrics "k8s.io/client-go/util/workqueue" |
| 27 | +) |
| 28 | + |
| 29 | +// this file contains setup logic to initialize the myriad of places |
| 30 | +// that client-go registers metrics. We copy the names and formats |
| 31 | +// from Kubernetes so that we match the core controllers. |
| 32 | + |
| 33 | +var ( |
| 34 | + // client metrics |
| 35 | + |
| 36 | + requestLatency = prometheus.NewHistogramVec( |
| 37 | + prometheus.HistogramOpts{ |
| 38 | + Name: "rest_client_request_latency_seconds", |
| 39 | + Help: "Request latency in seconds. Broken down by verb and URL.", |
| 40 | + Buckets: prometheus.ExponentialBuckets(0.001, 2, 10), |
| 41 | + }, |
| 42 | + []string{"verb", "url"}, |
| 43 | + ) |
| 44 | + |
| 45 | + requestResult = prometheus.NewCounterVec( |
| 46 | + prometheus.CounterOpts{ |
| 47 | + Name: "rest_client_requests_total", |
| 48 | + Help: "Number of HTTP requests, partitioned by status code, method, and host.", |
| 49 | + }, |
| 50 | + []string{"code", "method", "host"}, |
| 51 | + ) |
| 52 | + |
| 53 | + // reflector metrics |
| 54 | + |
| 55 | + // TODO(directxman12): update these to be histograms once the metrics overhaul KEP |
| 56 | + // PRs start landing. |
| 57 | + |
| 58 | + reflectorSubsystem = "reflector" |
| 59 | + |
| 60 | + listsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ |
| 61 | + Subsystem: reflectorSubsystem, |
| 62 | + Name: "lists_total", |
| 63 | + Help: "Total number of API lists done by the reflectors", |
| 64 | + }, []string{"name"}) |
| 65 | + |
| 66 | + listsDuration = prometheus.NewSummaryVec(prometheus.SummaryOpts{ |
| 67 | + Subsystem: reflectorSubsystem, |
| 68 | + Name: "list_duration_seconds", |
| 69 | + Help: "How long an API list takes to return and decode for the reflectors", |
| 70 | + }, []string{"name"}) |
| 71 | + |
| 72 | + itemsPerList = prometheus.NewSummaryVec(prometheus.SummaryOpts{ |
| 73 | + Subsystem: reflectorSubsystem, |
| 74 | + Name: "items_per_list", |
| 75 | + Help: "How many items an API list returns to the reflectors", |
| 76 | + }, []string{"name"}) |
| 77 | + |
| 78 | + watchesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ |
| 79 | + Subsystem: reflectorSubsystem, |
| 80 | + Name: "watches_total", |
| 81 | + Help: "Total number of API watches done by the reflectors", |
| 82 | + }, []string{"name"}) |
| 83 | + |
| 84 | + shortWatchesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ |
| 85 | + Subsystem: reflectorSubsystem, |
| 86 | + Name: "short_watches_total", |
| 87 | + Help: "Total number of short API watches done by the reflectors", |
| 88 | + }, []string{"name"}) |
| 89 | + |
| 90 | + watchDuration = prometheus.NewSummaryVec(prometheus.SummaryOpts{ |
| 91 | + Subsystem: reflectorSubsystem, |
| 92 | + Name: "watch_duration_seconds", |
| 93 | + Help: "How long an API watch takes to return and decode for the reflectors", |
| 94 | + }, []string{"name"}) |
| 95 | + |
| 96 | + itemsPerWatch = prometheus.NewSummaryVec(prometheus.SummaryOpts{ |
| 97 | + Subsystem: reflectorSubsystem, |
| 98 | + Name: "items_per_watch", |
| 99 | + Help: "How many items an API watch returns to the reflectors", |
| 100 | + }, []string{"name"}) |
| 101 | + |
| 102 | + lastResourceVersion = prometheus.NewGaugeVec(prometheus.GaugeOpts{ |
| 103 | + Subsystem: reflectorSubsystem, |
| 104 | + Name: "last_resource_version", |
| 105 | + Help: "Last resource version seen for the reflectors", |
| 106 | + }, []string{"name"}) |
| 107 | + |
| 108 | + // workqueue metrics |
| 109 | + |
| 110 | + workQueueSubsystem = "workqueue" |
| 111 | + |
| 112 | + depth = prometheus.NewGaugeVec(prometheus.GaugeOpts{ |
| 113 | + Subsystem: workQueueSubsystem, |
| 114 | + Name: "depth", |
| 115 | + Help: "Current depth of workqueue", |
| 116 | + }, []string{"name"}) |
| 117 | + |
| 118 | + adds = prometheus.NewCounterVec(prometheus.CounterOpts{ |
| 119 | + Subsystem: workQueueSubsystem, |
| 120 | + Name: "adds_total", |
| 121 | + Help: "Total number of adds handled by workqueue", |
| 122 | + }, []string{"name"}) |
| 123 | + |
| 124 | + latency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ |
| 125 | + Subsystem: workQueueSubsystem, |
| 126 | + Name: "queue_latency_seconds", |
| 127 | + Help: "How long in seconds an item stays in workqueue before being requested.", |
| 128 | + Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10), |
| 129 | + }, []string{"name"}) |
| 130 | + |
| 131 | + workDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ |
| 132 | + Subsystem: workQueueSubsystem, |
| 133 | + Name: "work_duration_seconds", |
| 134 | + Help: "How long in seconds processing an item from workqueue takes.", |
| 135 | + Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10), |
| 136 | + }, []string{"name"}) |
| 137 | + |
| 138 | + retries = prometheus.NewCounterVec(prometheus.CounterOpts{ |
| 139 | + Subsystem: workQueueSubsystem, |
| 140 | + Name: "retries_total", |
| 141 | + Help: "Total number of retries handled by workqueue", |
| 142 | + }, []string{"name"}) |
| 143 | + |
| 144 | + longestRunning = prometheus.NewGaugeVec(prometheus.GaugeOpts{ |
| 145 | + Subsystem: workQueueSubsystem, |
| 146 | + Name: "longest_running_processor_microseconds", |
| 147 | + Help: "How many microseconds has the longest running " + |
| 148 | + "processor for workqueue been running.", |
| 149 | + }, []string{"name"}) |
| 150 | + |
| 151 | + unfinishedWork = prometheus.NewGaugeVec(prometheus.GaugeOpts{ |
| 152 | + Subsystem: workQueueSubsystem, |
| 153 | + Name: "unfinished_work_seconds", |
| 154 | + Help: "How many seconds of work has done that " + |
| 155 | + "is in progress and hasn't been observed by work_duration. Large " + |
| 156 | + "values indicate stuck threads. One can deduce the number of stuck " + |
| 157 | + "threads by observing the rate at which this increases.", |
| 158 | + }, []string{"name"}) |
| 159 | +) |
| 160 | + |
| 161 | +func init() { |
| 162 | + registerClientMetrics() |
| 163 | + registerReflectorMetrics() |
| 164 | + registerWorkqueueMetrics() |
| 165 | +} |
| 166 | + |
| 167 | +// registerClientMetrics sets up the client latency metrics from client-go |
| 168 | +func registerClientMetrics() { |
| 169 | + // register the metrics with our registry |
| 170 | + Registry.MustRegister(requestLatency) |
| 171 | + Registry.MustRegister(requestResult) |
| 172 | + |
| 173 | + // register the metrics with client-go |
| 174 | + clientmetrics.Register(&latencyAdapter{metric: requestLatency}, &resultAdapter{metric: requestResult}) |
| 175 | +} |
| 176 | + |
| 177 | +// registerReflectorMetrics sets up reflector (reconile) loop metrics |
| 178 | +func registerReflectorMetrics() { |
| 179 | + Registry.MustRegister(listsTotal) |
| 180 | + Registry.MustRegister(listsDuration) |
| 181 | + Registry.MustRegister(itemsPerList) |
| 182 | + Registry.MustRegister(watchesTotal) |
| 183 | + Registry.MustRegister(shortWatchesTotal) |
| 184 | + Registry.MustRegister(watchDuration) |
| 185 | + Registry.MustRegister(itemsPerWatch) |
| 186 | + Registry.MustRegister(lastResourceVersion) |
| 187 | + |
| 188 | + reflectormetrics.SetReflectorMetricsProvider(reflectorMetricsProvider{}) |
| 189 | +} |
| 190 | + |
| 191 | +// registerWorkQueueMetrics sets up workqueue (other reconcile) metrics |
| 192 | +func registerWorkqueueMetrics() { |
| 193 | + Registry.MustRegister(depth) |
| 194 | + Registry.MustRegister(adds) |
| 195 | + Registry.MustRegister(latency) |
| 196 | + Registry.MustRegister(workDuration) |
| 197 | + Registry.MustRegister(retries) |
| 198 | + Registry.MustRegister(longestRunning) |
| 199 | + Registry.MustRegister(unfinishedWork) |
| 200 | + |
| 201 | + workqueuemetrics.SetProvider(workqueueMetricsProvider{}) |
| 202 | +} |
| 203 | + |
| 204 | +// this section contains adapters, implementations, and other sundry organic, artisinally |
| 205 | +// hand-crafted syntax trees required to convince client-go that it actually wants to let |
| 206 | +// someone use its metrics. |
| 207 | + |
| 208 | +// Client metrics adapters (method #1 for client-go metrics), |
| 209 | +// copied (more-or-less directly) from k8s.io/kubernetes setup code |
| 210 | +// (which isn't anywhere in an easily-importable place). |
| 211 | + |
| 212 | +type latencyAdapter struct { |
| 213 | + metric *prometheus.HistogramVec |
| 214 | +} |
| 215 | + |
| 216 | +func (l *latencyAdapter) Observe(verb string, u url.URL, latency time.Duration) { |
| 217 | + l.metric.WithLabelValues(verb, u.String()).Observe(latency.Seconds()) |
| 218 | +} |
| 219 | + |
| 220 | +type resultAdapter struct { |
| 221 | + metric *prometheus.CounterVec |
| 222 | +} |
| 223 | + |
| 224 | +func (r *resultAdapter) Increment(code, method, host string) { |
| 225 | + r.metric.WithLabelValues(code, method, host).Inc() |
| 226 | +} |
| 227 | + |
| 228 | +// Reflector metrics provider (method #2 for client-go metrics), |
| 229 | +// copied (more-or-less directly) from k8s.io/kubernetes setup code |
| 230 | +// (which isn't anywhere in an easily-importable place). |
| 231 | + |
| 232 | +type reflectorMetricsProvider struct{} |
| 233 | + |
| 234 | +func (reflectorMetricsProvider) NewListsMetric(name string) reflectormetrics.CounterMetric { |
| 235 | + return listsTotal.WithLabelValues(name) |
| 236 | +} |
| 237 | + |
| 238 | +func (reflectorMetricsProvider) NewListDurationMetric(name string) reflectormetrics.SummaryMetric { |
| 239 | + return listsDuration.WithLabelValues(name) |
| 240 | +} |
| 241 | + |
| 242 | +func (reflectorMetricsProvider) NewItemsInListMetric(name string) reflectormetrics.SummaryMetric { |
| 243 | + return itemsPerList.WithLabelValues(name) |
| 244 | +} |
| 245 | + |
| 246 | +func (reflectorMetricsProvider) NewWatchesMetric(name string) reflectormetrics.CounterMetric { |
| 247 | + return watchesTotal.WithLabelValues(name) |
| 248 | +} |
| 249 | + |
| 250 | +func (reflectorMetricsProvider) NewShortWatchesMetric(name string) reflectormetrics.CounterMetric { |
| 251 | + return shortWatchesTotal.WithLabelValues(name) |
| 252 | +} |
| 253 | + |
| 254 | +func (reflectorMetricsProvider) NewWatchDurationMetric(name string) reflectormetrics.SummaryMetric { |
| 255 | + return watchDuration.WithLabelValues(name) |
| 256 | +} |
| 257 | + |
| 258 | +func (reflectorMetricsProvider) NewItemsInWatchMetric(name string) reflectormetrics.SummaryMetric { |
| 259 | + return itemsPerWatch.WithLabelValues(name) |
| 260 | +} |
| 261 | + |
| 262 | +func (reflectorMetricsProvider) NewLastResourceVersionMetric(name string) reflectormetrics.GaugeMetric { |
| 263 | + return lastResourceVersion.WithLabelValues(name) |
| 264 | +} |
| 265 | + |
| 266 | +// Workqueue metrics (method #3 for client-go metrics), |
| 267 | +// copied (more-or-less directly) from k8s.io/kubernetes setup code |
| 268 | +// (which isn't anywhere in an easily-importable place). |
| 269 | +// TODO(directxman12): stop "cheating" and calling histograms summaries when we pull in the latest deps |
| 270 | + |
| 271 | +type workqueueMetricsProvider struct{} |
| 272 | + |
| 273 | +func (workqueueMetricsProvider) NewDepthMetric(name string) workqueuemetrics.GaugeMetric { |
| 274 | + return depth.WithLabelValues(name) |
| 275 | +} |
| 276 | + |
| 277 | +func (workqueueMetricsProvider) NewAddsMetric(name string) workqueuemetrics.CounterMetric { |
| 278 | + return adds.WithLabelValues(name) |
| 279 | +} |
| 280 | + |
| 281 | +func (workqueueMetricsProvider) NewLatencyMetric(name string) workqueuemetrics.SummaryMetric { |
| 282 | + return latency.WithLabelValues(name) |
| 283 | +} |
| 284 | + |
| 285 | +func (workqueueMetricsProvider) NewWorkDurationMetric(name string) workqueuemetrics.SummaryMetric { |
| 286 | + return workDuration.WithLabelValues(name) |
| 287 | +} |
| 288 | + |
| 289 | +func (workqueueMetricsProvider) NewRetriesMetric(name string) workqueuemetrics.CounterMetric { |
| 290 | + return retries.WithLabelValues(name) |
| 291 | +} |
| 292 | + |
| 293 | +func (workqueueMetricsProvider) NewLongestRunningProcessorMicrosecondsMetric(name string) workqueuemetrics.SettableGaugeMetric { |
| 294 | + return longestRunning.WithLabelValues(name) |
| 295 | +} |
| 296 | + |
| 297 | +func (workqueueMetricsProvider) NewUnfinishedWorkSecondsMetric(name string) workqueuemetrics.SettableGaugeMetric { |
| 298 | + return unfinishedWork.WithLabelValues(name) |
| 299 | +} |
0 commit comments