|
| 1 | +/* |
| 2 | +Copyright 2018 The Kubernetes Authors. |
| 3 | +
|
| 4 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +you may not use this file except in compliance with the License. |
| 6 | +You may obtain a copy of the License at |
| 7 | +
|
| 8 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +
|
| 10 | +Unless required by applicable law or agreed to in writing, software |
| 11 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +See the License for the specific language governing permissions and |
| 14 | +limitations under the License. |
| 15 | +*/ |
| 16 | + |
| 17 | +package metrics |
| 18 | + |
| 19 | +import ( |
| 20 | + "net/url" |
| 21 | + "time" |
| 22 | + |
| 23 | + "github.com/prometheus/client_golang/prometheus" |
| 24 | + reflectormetrics "k8s.io/client-go/tools/cache" |
| 25 | + clientmetrics "k8s.io/client-go/tools/metrics" |
| 26 | + workqueuemetrics "k8s.io/client-go/util/workqueue" |
| 27 | +) |
| 28 | + |
| 29 | +// this file contains setup logic to initialize the myriad of places |
| 30 | +// that client-go registers metrics. We copy the names and formats |
| 31 | +// from Kubernetes so that we match the core controllers. |
| 32 | + |
| 33 | +var ( |
| 34 | + // client metrics |
| 35 | + |
| 36 | + requestLatency = prometheus.NewHistogramVec( |
| 37 | + prometheus.HistogramOpts{ |
| 38 | + Name: "rest_client_request_latency_seconds", |
| 39 | + Help: "Request latency in seconds. Broken down by verb and URL.", |
| 40 | + Buckets: prometheus.ExponentialBuckets(0.001, 2, 10), |
| 41 | + }, |
| 42 | + []string{"verb", "url"}, |
| 43 | + ) |
| 44 | + |
| 45 | + requestResult = prometheus.NewCounterVec( |
| 46 | + prometheus.CounterOpts{ |
| 47 | + Name: "rest_client_requests_total", |
| 48 | + Help: "Number of HTTP requests, partitioned by status code, method, and host.", |
| 49 | + }, |
| 50 | + []string{"code", "method", "host"}, |
| 51 | + ) |
| 52 | + |
| 53 | + // reflector metrics |
| 54 | + |
| 55 | + // TODO(directxman12): update these to be histograms once the metrics overhaul KEP |
| 56 | + // PRs start landing. |
| 57 | + |
| 58 | + reflectorSubsystem = "reflector" |
| 59 | + |
| 60 | + listsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ |
| 61 | + Subsystem: reflectorSubsystem, |
| 62 | + Name: "lists_total", |
| 63 | + Help: "Total number of API lists done by the reflectors", |
| 64 | + }, []string{"name"}) |
| 65 | + |
| 66 | + listsDuration = prometheus.NewSummaryVec(prometheus.SummaryOpts{ |
| 67 | + Subsystem: reflectorSubsystem, |
| 68 | + Name: "list_duration_seconds", |
| 69 | + Help: "How long an API list takes to return and decode for the reflectors", |
| 70 | + }, []string{"name"}) |
| 71 | + |
| 72 | + itemsPerList = prometheus.NewSummaryVec(prometheus.SummaryOpts{ |
| 73 | + Subsystem: reflectorSubsystem, |
| 74 | + Name: "items_per_list", |
| 75 | + Help: "How many items an API list returns to the reflectors", |
| 76 | + }, []string{"name"}) |
| 77 | + |
| 78 | + watchesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ |
| 79 | + Subsystem: reflectorSubsystem, |
| 80 | + Name: "watches_total", |
| 81 | + Help: "Total number of API watches done by the reflectors", |
| 82 | + }, []string{"name"}) |
| 83 | + |
| 84 | + shortWatchesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ |
| 85 | + Subsystem: reflectorSubsystem, |
| 86 | + Name: "short_watches_total", |
| 87 | + Help: "Total number of short API watches done by the reflectors", |
| 88 | + }, []string{"name"}) |
| 89 | + |
| 90 | + watchDuration = prometheus.NewSummaryVec(prometheus.SummaryOpts{ |
| 91 | + Subsystem: reflectorSubsystem, |
| 92 | + Name: "watch_duration_seconds", |
| 93 | + Help: "How long an API watch takes to return and decode for the reflectors", |
| 94 | + }, []string{"name"}) |
| 95 | + |
| 96 | + itemsPerWatch = prometheus.NewSummaryVec(prometheus.SummaryOpts{ |
| 97 | + Subsystem: reflectorSubsystem, |
| 98 | + Name: "items_per_watch", |
| 99 | + Help: "How many items an API watch returns to the reflectors", |
| 100 | + }, []string{"name"}) |
| 101 | + |
| 102 | + lastResourceVersion = prometheus.NewGaugeVec(prometheus.GaugeOpts{ |
| 103 | + Subsystem: reflectorSubsystem, |
| 104 | + Name: "last_resource_version", |
| 105 | + Help: "Last resource version seen for the reflectors", |
| 106 | + }, []string{"name"}) |
| 107 | +) |
| 108 | + |
| 109 | +func init() { |
| 110 | + registerClientMetrics() |
| 111 | + registerReflectorMetrics() |
| 112 | + registerWorkqueueMetrics() |
| 113 | +} |
| 114 | + |
| 115 | +// registerClientMetrics sets up the client latency metrics from client-go |
| 116 | +func registerClientMetrics() { |
| 117 | + // register the metrics with our registry |
| 118 | + Registry.MustRegister(requestLatency) |
| 119 | + Registry.MustRegister(requestResult) |
| 120 | + |
| 121 | + // register the metrics with client-go |
| 122 | + clientmetrics.Register(&latencyAdapter{metric: requestLatency}, &resultAdapter{metric: requestResult}) |
| 123 | +} |
| 124 | + |
| 125 | +// registerReflectorMetrics sets up reflector (reconile) loop metrics |
| 126 | +func registerReflectorMetrics() { |
| 127 | + Registry.MustRegister(listsTotal) |
| 128 | + Registry.MustRegister(listsDuration) |
| 129 | + Registry.MustRegister(itemsPerList) |
| 130 | + Registry.MustRegister(watchesTotal) |
| 131 | + Registry.MustRegister(shortWatchesTotal) |
| 132 | + Registry.MustRegister(watchDuration) |
| 133 | + Registry.MustRegister(itemsPerWatch) |
| 134 | + Registry.MustRegister(lastResourceVersion) |
| 135 | + |
| 136 | + reflectormetrics.SetReflectorMetricsProvider(reflectorMetricsProvider{}) |
| 137 | +} |
| 138 | + |
| 139 | +// registerWorkQueueMetrics sets up workqueue (other reconcile) metrics |
| 140 | +func registerWorkqueueMetrics() { |
| 141 | + workqueuemetrics.SetProvider(workqueueMetricsProvider{}) |
| 142 | +} |
| 143 | + |
| 144 | +// this section contains adapters, implementations, and other sundry organic, artisinally |
| 145 | +// hand-crafted syntax trees required to convince client-go that it actually wants to let |
| 146 | +// someone use its metrics. |
| 147 | + |
| 148 | +// Client metrics adapters (method #1 for client-go metrics), |
| 149 | +// copied (more-or-less directly) from k8s.io/kubernetes setup code |
| 150 | +// (which isn't anywhere in an easily-importable place). |
| 151 | + |
| 152 | +type latencyAdapter struct { |
| 153 | + metric *prometheus.HistogramVec |
| 154 | +} |
| 155 | + |
| 156 | +func (l *latencyAdapter) Observe(verb string, u url.URL, latency time.Duration) { |
| 157 | + l.metric.WithLabelValues(verb, u.String()).Observe(latency.Seconds()) |
| 158 | +} |
| 159 | + |
| 160 | +type resultAdapter struct { |
| 161 | + metric *prometheus.CounterVec |
| 162 | +} |
| 163 | + |
| 164 | +func (r *resultAdapter) Increment(code, method, host string) { |
| 165 | + r.metric.WithLabelValues(code, method, host).Inc() |
| 166 | +} |
| 167 | + |
| 168 | +// Reflector metrics provider (method #2 for client-go metrics), |
| 169 | +// copied (more-or-less directly) from k8s.io/kubernetes setup code |
| 170 | +// (which isn't anywhere in an easily-importable place). |
| 171 | + |
| 172 | +type reflectorMetricsProvider struct{} |
| 173 | + |
| 174 | +func (reflectorMetricsProvider) NewListsMetric(name string) reflectormetrics.CounterMetric { |
| 175 | + return listsTotal.WithLabelValues(name) |
| 176 | +} |
| 177 | + |
| 178 | +func (reflectorMetricsProvider) NewListDurationMetric(name string) reflectormetrics.SummaryMetric { |
| 179 | + return listsDuration.WithLabelValues(name) |
| 180 | +} |
| 181 | + |
| 182 | +func (reflectorMetricsProvider) NewItemsInListMetric(name string) reflectormetrics.SummaryMetric { |
| 183 | + return itemsPerList.WithLabelValues(name) |
| 184 | +} |
| 185 | + |
| 186 | +func (reflectorMetricsProvider) NewWatchesMetric(name string) reflectormetrics.CounterMetric { |
| 187 | + return watchesTotal.WithLabelValues(name) |
| 188 | +} |
| 189 | + |
| 190 | +func (reflectorMetricsProvider) NewShortWatchesMetric(name string) reflectormetrics.CounterMetric { |
| 191 | + return shortWatchesTotal.WithLabelValues(name) |
| 192 | +} |
| 193 | + |
| 194 | +func (reflectorMetricsProvider) NewWatchDurationMetric(name string) reflectormetrics.SummaryMetric { |
| 195 | + return watchDuration.WithLabelValues(name) |
| 196 | +} |
| 197 | + |
| 198 | +func (reflectorMetricsProvider) NewItemsInWatchMetric(name string) reflectormetrics.SummaryMetric { |
| 199 | + return itemsPerWatch.WithLabelValues(name) |
| 200 | +} |
| 201 | + |
| 202 | +func (reflectorMetricsProvider) NewLastResourceVersionMetric(name string) reflectormetrics.GaugeMetric { |
| 203 | + return lastResourceVersion.WithLabelValues(name) |
| 204 | +} |
| 205 | + |
| 206 | +// Workqueue metrics (method #3 for client-go metrics), |
| 207 | +// copied (more-or-less directly) from k8s.io/kubernetes setup code |
| 208 | +// (which isn't anywhere in an easily-importable place). |
| 209 | + |
| 210 | +// NB(directxman12): these are changed to MustRegister from Register. It's not clear why they weren't |
| 211 | +// MustRegister in the first place, except maybe to not bring down the controller if the metrics fail |
| 212 | +// to register (which shouldn't happen unless there's a duplicate metric). |
| 213 | + |
| 214 | +type workqueueMetricsProvider struct{} |
| 215 | + |
| 216 | +func (workqueueMetricsProvider) NewDepthMetric(name string) workqueuemetrics.GaugeMetric { |
| 217 | + depth := prometheus.NewGauge(prometheus.GaugeOpts{ |
| 218 | + Subsystem: name, |
| 219 | + Name: "depth", |
| 220 | + Help: "Current depth of workqueue: " + name, |
| 221 | + }) |
| 222 | + Registry.MustRegister(depth) |
| 223 | + return depth |
| 224 | +} |
| 225 | + |
| 226 | +func (workqueueMetricsProvider) NewAddsMetric(name string) workqueuemetrics.CounterMetric { |
| 227 | + adds := prometheus.NewCounter(prometheus.CounterOpts{ |
| 228 | + Subsystem: name, |
| 229 | + Name: "adds", |
| 230 | + Help: "Total number of adds handled by workqueue: " + name, |
| 231 | + }) |
| 232 | + Registry.MustRegister(adds) |
| 233 | + return adds |
| 234 | +} |
| 235 | + |
| 236 | +func (workqueueMetricsProvider) NewLatencyMetric(name string) workqueuemetrics.SummaryMetric { |
| 237 | + latency := prometheus.NewSummary(prometheus.SummaryOpts{ |
| 238 | + Subsystem: name, |
| 239 | + Name: "queue_latency", |
| 240 | + Help: "How long an item stays in workqueue" + name + " before being requested.", |
| 241 | + }) |
| 242 | + Registry.MustRegister(latency) |
| 243 | + return latency |
| 244 | +} |
| 245 | + |
| 246 | +func (workqueueMetricsProvider) NewWorkDurationMetric(name string) workqueuemetrics.SummaryMetric { |
| 247 | + workDuration := prometheus.NewSummary(prometheus.SummaryOpts{ |
| 248 | + Subsystem: name, |
| 249 | + Name: "work_duration", |
| 250 | + Help: "How long processing an item from workqueue" + name + " takes.", |
| 251 | + }) |
| 252 | + Registry.MustRegister(workDuration) |
| 253 | + return workDuration |
| 254 | +} |
| 255 | + |
| 256 | +func (workqueueMetricsProvider) NewRetriesMetric(name string) workqueuemetrics.CounterMetric { |
| 257 | + retries := prometheus.NewCounter(prometheus.CounterOpts{ |
| 258 | + Subsystem: name, |
| 259 | + Name: "retries", |
| 260 | + Help: "Total number of retries handled by workqueue: " + name, |
| 261 | + }) |
| 262 | + Registry.MustRegister(retries) |
| 263 | + return retries |
| 264 | +} |
0 commit comments