Skip to content

Commit 13538cb

Browse files
authored
FEAT: LBC metric collector (#3941)
1 parent c701a42 commit 13538cb

File tree

10 files changed

+205
-77
lines changed

10 files changed

+205
-77
lines changed

main.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ import (
3939
"sigs.k8s.io/aws-load-balancer-controller/pkg/config"
4040
"sigs.k8s.io/aws-load-balancer-controller/pkg/inject"
4141
"sigs.k8s.io/aws-load-balancer-controller/pkg/k8s"
42+
awsmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/aws"
43+
lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc"
4244
"sigs.k8s.io/aws-load-balancer-controller/pkg/networking"
4345
"sigs.k8s.io/aws-load-balancer-controller/pkg/runtime"
4446
"sigs.k8s.io/aws-load-balancer-controller/pkg/targetgroupbinding"
@@ -81,7 +83,14 @@ func main() {
8183
ctrl.SetLogger(appLogger)
8284
klog.SetLoggerWithOptions(appLogger, klog.ContextualLogger(true))
8385

84-
cloud, err := aws.NewCloud(controllerCFG.AWSConfig, metrics.Registry, ctrl.Log, nil)
86+
var awsMetricsCollector *awsmetrics.Collector
87+
lbcMetricsCollector := lbcmetrics.NewCollector(metrics.Registry)
88+
89+
if metrics.Registry != nil {
90+
awsMetricsCollector = awsmetrics.NewCollector(metrics.Registry)
91+
}
92+
93+
cloud, err := aws.NewCloud(controllerCFG.AWSConfig, awsMetricsCollector, ctrl.Log, nil)
8594
if err != nil {
8695
setupLog.Error(err, "unable to initialize AWS cloud")
8796
os.Exit(1)
@@ -113,7 +122,7 @@ func main() {
113122
subnetResolver := networking.NewDefaultSubnetsResolver(azInfoProvider, cloud.EC2(), cloud.VpcID(), controllerCFG.ClusterName, ctrl.Log.WithName("subnets-resolver"))
114123
multiClusterManager := targetgroupbinding.NewMultiClusterManager(mgr.GetClient(), mgr.GetAPIReader(), ctrl.Log)
115124
tgbResManager := targetgroupbinding.NewDefaultResourceManager(mgr.GetClient(), cloud.ELBV2(), cloud.EC2(),
116-
podInfoRepo, sgManager, sgReconciler, vpcInfoProvider, multiClusterManager,
125+
podInfoRepo, sgManager, sgReconciler, vpcInfoProvider, multiClusterManager, lbcMetricsCollector,
117126
cloud.VpcID(), controllerCFG.ClusterName, controllerCFG.FeatureGates.Enabled(config.EndpointsFailOpen), controllerCFG.EnableEndpointSlices, controllerCFG.DisableRestrictedSGRules,
118127
controllerCFG.ServiceTargetENISGTags, mgr.GetEventRecorderFor("targetGroupBinding"), ctrl.Log)
119128
backendSGProvider := networking.NewBackendSGProvider(controllerCFG.ClusterName, controllerCFG.BackendSecurityGroup,

pkg/aws/cloud.go

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
smithymiddleware "github.com/aws/smithy-go/middleware"
1212
"net"
1313
"os"
14-
"sigs.k8s.io/aws-load-balancer-controller/pkg/aws/metrics"
1514
"sigs.k8s.io/aws-load-balancer-controller/pkg/aws/throttle"
1615
"sigs.k8s.io/aws-load-balancer-controller/pkg/version"
1716
"strings"
@@ -21,11 +20,11 @@ import (
2120
"github.com/aws/aws-sdk-go-v2/service/ec2"
2221
"github.com/go-logr/logr"
2322
"github.com/pkg/errors"
24-
"github.com/prometheus/client_golang/prometheus"
2523
amerrors "k8s.io/apimachinery/pkg/util/errors"
2624
epresolver "sigs.k8s.io/aws-load-balancer-controller/pkg/aws/endpoints"
2725
"sigs.k8s.io/aws-load-balancer-controller/pkg/aws/provider"
2826
"sigs.k8s.io/aws-load-balancer-controller/pkg/aws/services"
27+
aws_metrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/aws"
2928
)
3029

3130
const userAgent = "elbv2.k8s.aws"
@@ -60,7 +59,7 @@ type Cloud interface {
6059
}
6160

6261
// NewCloud constructs new Cloud implementation.
63-
func NewCloud(cfg CloudConfig, metricsRegisterer prometheus.Registerer, logger logr.Logger, awsClientsProvider provider.AWSClientsProvider) (Cloud, error) {
62+
func NewCloud(cfg CloudConfig, metricsCollector *aws_metrics.Collector, logger logr.Logger, awsClientsProvider provider.AWSClientsProvider) (Cloud, error) {
6463
hasIPv4 := true
6564
addrs, err := net.InterfaceAddrs()
6665
if err == nil {
@@ -122,12 +121,8 @@ func NewCloud(cfg CloudConfig, metricsRegisterer prometheus.Registerer, logger l
122121
})
123122
}
124123

125-
if metricsRegisterer != nil {
126-
metricsCollector, err := metrics.NewCollector(metricsRegisterer)
127-
if err != nil {
128-
return nil, errors.Wrapf(err, "failed to initialize sdk metrics collector")
129-
}
130-
awsConfig.APIOptions = metrics.WithSDKMetricCollector(metricsCollector, awsConfig.APIOptions)
124+
if metricsCollector != nil {
125+
awsConfig.APIOptions = aws_metrics.WithSDKMetricCollector(metricsCollector, awsConfig.APIOptions)
131126
}
132127

133128
if awsClientsProvider == nil {

pkg/aws/metrics/collector.go renamed to pkg/metrics/aws/collector.go

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package metrics
1+
package aws
22

33
import (
44
"context"
@@ -18,24 +18,21 @@ const (
1818
sdkMiddlewareCollectAPIRequestMetric = "collectAPIRequestMetric"
1919
)
2020

21-
type collector struct {
21+
type Collector struct {
2222
instruments *instruments
2323
}
2424

25-
func NewCollector(registerer prometheus.Registerer) (*collector, error) {
26-
instruments, err := newInstruments(registerer)
27-
if err != nil {
28-
return nil, err
29-
}
30-
return &collector{
25+
func NewCollector(registerer prometheus.Registerer) *Collector {
26+
instruments := newInstruments(registerer)
27+
return &Collector{
3128
instruments: instruments,
32-
}, nil
29+
}
3330
}
3431

3532
/*
3633
WithSDKMetricCollector is a function that collects prometheus metrics for the AWS SDK Go v2 API calls ad requests
3734
*/
38-
func WithSDKMetricCollector(c *collector, apiOptions []func(*smithymiddleware.Stack) error) []func(*smithymiddleware.Stack) error {
35+
func WithSDKMetricCollector(c *Collector, apiOptions []func(*smithymiddleware.Stack) error) []func(*smithymiddleware.Stack) error {
3936
apiOptions = append(apiOptions, func(stack *smithymiddleware.Stack) error {
4037
return WithSDKCallMetricCollector(c)(stack)
4138
}, func(stack *smithymiddleware.Stack) error {
@@ -48,7 +45,7 @@ func WithSDKMetricCollector(c *collector, apiOptions []func(*smithymiddleware.St
4845
WithSDKCallMetricCollector is a middleware for the AWS SDK Go v2 that collects and reports metrics on API calls.
4946
The call metrics are collected after the call is completed
5047
*/
51-
func WithSDKCallMetricCollector(c *collector) func(stack *smithymiddleware.Stack) error {
48+
func WithSDKCallMetricCollector(c *Collector) func(stack *smithymiddleware.Stack) error {
5249
return func(stack *smithymiddleware.Stack) error {
5350
return stack.Initialize.Add(smithymiddleware.InitializeMiddlewareFunc(sdkMiddlewareCollectAPICallMetric, func(
5451
ctx context.Context, input smithymiddleware.InitializeInput, next smithymiddleware.InitializeHandler,
@@ -91,7 +88,7 @@ func WithSDKCallMetricCollector(c *collector) func(stack *smithymiddleware.Stack
9188
WithSDKRequestMetricCollector is a middleware for the AWS SDK Go v2 that collects and reports metrics on API requests.
9289
The request metrics are collected after each retry attempts
9390
*/
94-
func WithSDKRequestMetricCollector(c *collector) func(stack *smithymiddleware.Stack) error {
91+
func WithSDKRequestMetricCollector(c *Collector) func(stack *smithymiddleware.Stack) error {
9592
return func(stack *smithymiddleware.Stack) error {
9693
return stack.Finalize.Add(smithymiddleware.FinalizeMiddlewareFunc(sdkMiddlewareCollectAPIRequestMetric, func(
9794
ctx context.Context, input smithymiddleware.FinalizeInput, next smithymiddleware.FinalizeHandler,

pkg/aws/metrics/collector_test.go renamed to pkg/metrics/aws/collector_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package metrics
1+
package aws
22

33
import (
44
"errors"
Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
package metrics
1+
package aws
22

33
import (
44
"github.com/prometheus/client_golang/prometheus"
55
)
66

77
const (
8-
metricSubsystemAWS = "aws"
8+
metricSubSystem = "aws"
99

1010
metricAPICallsTotal = "api_calls_total"
1111
metricAPICallDurationSeconds = "api_call_duration_seconds"
@@ -31,55 +31,41 @@ type instruments struct {
3131
}
3232

3333
// newInstruments allocates and register new metrics to registerer
34-
func newInstruments(registerer prometheus.Registerer) (*instruments, error) {
34+
func newInstruments(registerer prometheus.Registerer) *instruments {
3535
apiCallsTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
36-
Subsystem: metricSubsystemAWS,
36+
Subsystem: metricSubSystem,
3737
Name: metricAPICallsTotal,
3838
Help: "Total number of SDK API calls from the customer's code to AWS services",
3939
}, []string{labelService, labelOperation, labelStatusCode, labelErrorCode})
4040
apiCallDurationSeconds := prometheus.NewHistogramVec(prometheus.HistogramOpts{
41-
Subsystem: metricSubsystemAWS,
41+
Subsystem: metricSubSystem,
4242
Name: metricAPICallDurationSeconds,
4343
Help: "Perceived latency from when your code makes an SDK call, includes retries",
4444
}, []string{labelService, labelOperation})
4545
apiCallRetries := prometheus.NewHistogramVec(prometheus.HistogramOpts{
46-
Subsystem: metricSubsystemAWS,
46+
Subsystem: metricSubSystem,
4747
Name: metricAPICallRetries,
4848
Help: "Number of times the SDK retried requests to AWS services for SDK API calls",
4949
Buckets: []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
5050
}, []string{labelService, labelOperation})
5151

5252
apiRequestsTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
53-
Subsystem: metricSubsystemAWS,
53+
Subsystem: metricSubSystem,
5454
Name: metricAPIRequestsTotal,
5555
Help: "Total number of HTTP requests that the SDK made",
5656
}, []string{labelService, labelOperation, labelStatusCode, labelErrorCode})
5757
apiRequestDurationSecond := prometheus.NewHistogramVec(prometheus.HistogramOpts{
58-
Subsystem: metricSubsystemAWS,
58+
Subsystem: metricSubSystem,
5959
Name: metricAPIRequestDurationSeconds,
6060
Help: "Latency of an individual HTTP request to the service endpoint",
6161
}, []string{labelService, labelOperation})
6262

63-
if err := registerer.Register(apiCallsTotal); err != nil {
64-
return nil, err
65-
}
66-
if err := registerer.Register(apiCallDurationSeconds); err != nil {
67-
return nil, err
68-
}
69-
if err := registerer.Register(apiCallRetries); err != nil {
70-
return nil, err
71-
}
72-
if err := registerer.Register(apiRequestsTotal); err != nil {
73-
return nil, err
74-
}
75-
if err := registerer.Register(apiRequestDurationSecond); err != nil {
76-
return nil, err
77-
}
63+
registerer.MustRegister(apiCallsTotal, apiCallDurationSeconds, apiCallRetries, apiRequestsTotal, apiRequestDurationSecond)
7864
return &instruments{
7965
apiCallsTotal: apiCallsTotal,
8066
apiCallDurationSeconds: apiCallDurationSeconds,
8167
apiCallRetries: apiCallRetries,
8268
apiRequestsTotal: apiRequestsTotal,
8369
apiRequestDurationSecond: apiRequestDurationSecond,
84-
}, nil
70+
}
8571
}

pkg/metrics/lbc/collector.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
package lbc
2+
3+
import (
4+
"github.com/prometheus/client_golang/prometheus"
5+
"time"
6+
)
7+
8+
type MetricCollector interface {
9+
// ObservePodReadinessGateReady this metric is useful to determine how fast pods are becoming ready in the load balancer.
10+
// Due to some architectural constraints, we can only emit this metric for pods that are using readiness gates.
11+
ObservePodReadinessGateReady(namespace string, tgbName string, duration time.Duration)
12+
}
13+
14+
type collector struct {
15+
instruments *instruments
16+
}
17+
18+
type noOpCollector struct{}
19+
20+
func (n *noOpCollector) ObservePodReadinessGateReady(_ string, _ string, _ time.Duration) {
21+
}
22+
23+
func NewCollector(registerer prometheus.Registerer) MetricCollector {
24+
if registerer == nil {
25+
return &noOpCollector{}
26+
}
27+
28+
instruments := newInstruments(registerer)
29+
return &collector{
30+
instruments: instruments,
31+
}
32+
}
33+
34+
func (c *collector) ObservePodReadinessGateReady(namespace string, tgbName string, duration time.Duration) {
35+
c.instruments.podReadinessFlipSeconds.With(prometheus.Labels{
36+
labelNamespace: namespace,
37+
labelName: tgbName,
38+
}).Observe(duration.Seconds())
39+
}

pkg/metrics/lbc/instruments.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package lbc
2+
3+
import (
4+
"github.com/prometheus/client_golang/prometheus"
5+
)
6+
7+
const (
8+
metricSubsystem = "awslbc"
9+
)
10+
11+
// These metrics are exported to be used in unit test validation.
12+
const (
13+
// MetricPodReadinessGateReady tracks the time to flip a readiness gate to true
14+
MetricPodReadinessGateReady = "readiness_gate_ready_seconds"
15+
)
16+
17+
const (
18+
labelNamespace = "namespace"
19+
labelName = "name"
20+
)
21+
22+
type instruments struct {
23+
podReadinessFlipSeconds *prometheus.HistogramVec
24+
}
25+
26+
// newInstruments allocates and register new metrics to registerer
27+
func newInstruments(registerer prometheus.Registerer) *instruments {
28+
podReadinessFlipSeconds := prometheus.NewHistogramVec(prometheus.HistogramOpts{
29+
Subsystem: metricSubsystem,
30+
Name: MetricPodReadinessGateReady,
31+
Help: "Latency from pod getting added to the load balancer until the readiness gate is flipped to healthy.",
32+
}, []string{labelNamespace, labelName})
33+
34+
registerer.MustRegister(podReadinessFlipSeconds)
35+
return &instruments{
36+
podReadinessFlipSeconds: podReadinessFlipSeconds,
37+
}
38+
}

pkg/metrics/lbc/mockcollector.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
package lbc
2+
3+
import (
4+
"time"
5+
)
6+
7+
type MockCollector struct {
8+
Invocations map[string][]interface{}
9+
}
10+
11+
type MockHistogramMetric struct {
12+
namespace string
13+
name string
14+
duration time.Duration
15+
}
16+
17+
func (m *MockCollector) ObservePodReadinessGateReady(namespace string, tgbName string, d time.Duration) {
18+
m.recordHistogram(MetricPodReadinessGateReady, namespace, tgbName, d)
19+
}
20+
21+
func (m *MockCollector) recordHistogram(metricName string, namespace string, name string, d time.Duration) {
22+
m.Invocations[metricName] = append(m.Invocations[MetricPodReadinessGateReady], MockHistogramMetric{
23+
namespace: namespace,
24+
name: name,
25+
duration: d,
26+
})
27+
}
28+
29+
func NewMockCollector() MetricCollector {
30+
31+
mockInvocations := make(map[string][]interface{})
32+
mockInvocations[MetricPodReadinessGateReady] = make([]interface{}, 0)
33+
34+
return &MockCollector{
35+
Invocations: mockInvocations,
36+
}
37+
}

0 commit comments

Comments
 (0)