Skip to content

Commit 2e740a7

Browse files
authored
fix: health check timeout for services/NLB (#2899)
* fix: health check timeout for services/NLB The annotation for health check timeout was not consumed at all for services * add feature gate ServiceHealthCheckTimeout allows people to set old behavior * feat: rename featuregate
1 parent b5e9427 commit 2e740a7

File tree

5 files changed

+54
-9
lines changed

5 files changed

+54
-9
lines changed

docs/deploy/configurations.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@ This document covers configuration of the AWS Load Balancer controller
33

44
!!!warning "limitation"
55
The v2.0.0+ version of AWSLoadBalancerController currently only support one controller deployment(with one or multiple replicas) per cluster.
6-
6+
77
The AWSLoadBalancerController assumes it's the solo owner of worker node security group rules with `elbv2.k8s.aws/targetGroupBinding=shared` description, running multiple controller deployment will cause these controllers compete with each other updating worker node security group rules.
8-
8+
99
We will remove this limitation in future versions: [tracking issue](https://github.com/kubernetes-sigs/aws-load-balancer-controller/issues/2185)
1010

1111
## AWS API Access
@@ -158,3 +158,4 @@ They are a set of kye=value pairs that describe AWS load balance controller feat
158158
| EnableServiceController | string | true | Toggles support for `Service` type resources. |
159159
| EnableIPTargetType | string | true | Used to toggle support for target-type `ip` across `Ingress` and `Service` type resources. |
160160
| SubnetsClusterTagCheck | string | true | Enable or disable the check for `kubernetes.io/cluster/${cluster-name}` during subnet auto-discovery |
161+
| NLBHealthCheckTimeout | string | true | Enable or disable the use of `service.beta.kubernetes.io/aws-load-balancer-healthcheck-timeout` for `Service` type resources (NLB) |

pkg/config/feature_gates.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@ package config
22

33
import (
44
"fmt"
5-
"github.com/spf13/pflag"
65
"strconv"
76
"strings"
7+
8+
"github.com/spf13/pflag"
89
)
910

1011
type Feature string
@@ -17,6 +18,7 @@ const (
1718
EnableServiceController Feature = "EnableServiceController"
1819
EnableIPTargetType Feature = "EnableIPTargetType"
1920
SubnetsClusterTagCheck Feature = "SubnetsClusterTagCheck"
21+
NLBHealthCheckTimeout Feature = "NLBHealthCheckTimeout"
2022
)
2123

2224
type FeatureGates interface {
@@ -51,6 +53,7 @@ func NewFeatureGates() FeatureGates {
5153
EnableServiceController: true,
5254
EnableIPTargetType: true,
5355
SubnetsClusterTagCheck: true,
56+
NLBHealthCheckTimeout: true,
5457
},
5558
}
5659
}

pkg/service/model_build_target_group.go

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"k8s.io/apimachinery/pkg/util/intstr"
1919
elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1"
2020
"sigs.k8s.io/aws-load-balancer-controller/pkg/annotations"
21+
"sigs.k8s.io/aws-load-balancer-controller/pkg/config"
2122
"sigs.k8s.io/aws-load-balancer-controller/pkg/k8s"
2223
elbv2model "sigs.k8s.io/aws-load-balancer-controller/pkg/model/elbv2"
2324
"sigs.k8s.io/aws-load-balancer-controller/pkg/networking"
@@ -113,6 +114,13 @@ func (t *defaultModelBuildTask) buildTargetGroupHealthCheckConfigDefault(ctx con
113114
if err != nil {
114115
return nil, err
115116
}
117+
var healthCheckTimeoutSeconds *int64
118+
if t.featureGates.Enabled(config.NLBHealthCheckTimeout) {
119+
healthCheckTimeoutSeconds, err = t.buildTargetGroupHealthCheckTimeoutSeconds(ctx, t.defaultHealthCheckTimeout)
120+
if err != nil {
121+
return nil, err
122+
}
123+
}
116124
healthyThresholdCount, err := t.buildTargetGroupHealthCheckHealthyThresholdCount(ctx, t.defaultHealthCheckHealthyThreshold)
117125
if err != nil {
118126
return nil, err
@@ -126,6 +134,7 @@ func (t *defaultModelBuildTask) buildTargetGroupHealthCheckConfigDefault(ctx con
126134
Protocol: &healthCheckProtocol,
127135
Path: healthCheckPathPtr,
128136
IntervalSeconds: &intervalSeconds,
137+
TimeoutSeconds: healthCheckTimeoutSeconds,
129138
HealthyThresholdCount: &healthyThresholdCount,
130139
UnhealthyThresholdCount: &unhealthyThresholdCount,
131140
}, nil
@@ -148,6 +157,13 @@ func (t *defaultModelBuildTask) buildTargetGroupHealthCheckConfigForInstanceMode
148157
if err != nil {
149158
return nil, err
150159
}
160+
var healthCheckTimeoutSeconds *int64
161+
if t.featureGates.Enabled(config.NLBHealthCheckTimeout) {
162+
healthCheckTimeoutSeconds, err = t.buildTargetGroupHealthCheckTimeoutSeconds(ctx, t.defaultHealthCheckTimeoutForInstanceModeLocal)
163+
if err != nil {
164+
return nil, err
165+
}
166+
}
151167
healthyThresholdCount, err := t.buildTargetGroupHealthCheckHealthyThresholdCount(ctx, t.defaultHealthCheckHealthyThresholdForInstanceModeLocal)
152168
if err != nil {
153169
return nil, err
@@ -161,6 +177,7 @@ func (t *defaultModelBuildTask) buildTargetGroupHealthCheckConfigForInstanceMode
161177
Protocol: &healthCheckProtocol,
162178
Path: healthCheckPathPtr,
163179
IntervalSeconds: &intervalSeconds,
180+
TimeoutSeconds: healthCheckTimeoutSeconds,
164181
HealthyThresholdCount: &healthyThresholdCount,
165182
UnhealthyThresholdCount: &unhealthyThresholdCount,
166183
}, nil
@@ -308,12 +325,12 @@ func (t *defaultModelBuildTask) buildTargetGroupHealthCheckIntervalSeconds(_ con
308325
return intervalSeconds, nil
309326
}
310327

311-
func (t *defaultModelBuildTask) buildTargetGroupHealthCheckTimeoutSeconds(_ context.Context, defaultHealthCheckTimeout int64) (int64, error) {
328+
func (t *defaultModelBuildTask) buildTargetGroupHealthCheckTimeoutSeconds(_ context.Context, defaultHealthCheckTimeout int64) (*int64, error) {
312329
timeoutSeconds := defaultHealthCheckTimeout
313330
if _, err := t.annotationParser.ParseInt64Annotation(annotations.SvcLBSuffixHCTimeout, &timeoutSeconds, t.service.Annotations); err != nil {
314-
return 0, err
331+
return nil, err
315332
}
316-
return timeoutSeconds, nil
333+
return &timeoutSeconds, nil
317334
}
318335

319336
func (t *defaultModelBuildTask) buildTargetGroupHealthCheckHealthyThresholdCount(_ context.Context, defaultHealthCheckHealthyThreshold int64) (int64, error) {

pkg/service/model_build_target_group_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"k8s.io/apimachinery/pkg/util/intstr"
1616
elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1"
1717
"sigs.k8s.io/aws-load-balancer-controller/pkg/annotations"
18+
"sigs.k8s.io/aws-load-balancer-controller/pkg/config"
1819
"sigs.k8s.io/aws-load-balancer-controller/pkg/model/elbv2"
1920
)
2021

@@ -183,6 +184,7 @@ func Test_defaultModelBuilderTask_buildTargetHealthCheck(t *testing.T) {
183184
Port: &trafficPort,
184185
Protocol: (*elbv2.Protocol)(aws.String(string(elbv2.ProtocolTCP))),
185186
IntervalSeconds: aws.Int64(10),
187+
TimeoutSeconds: aws.Int64(10),
186188
HealthyThresholdCount: aws.Int64(3),
187189
UnhealthyThresholdCount: aws.Int64(3),
188190
},
@@ -209,6 +211,7 @@ func Test_defaultModelBuilderTask_buildTargetHealthCheck(t *testing.T) {
209211
Protocol: (*elbv2.Protocol)(aws.String("HTTP")),
210212
Path: aws.String("/healthz"),
211213
IntervalSeconds: aws.Int64(10),
214+
TimeoutSeconds: aws.Int64(30),
212215
HealthyThresholdCount: aws.Int64(2),
213216
UnhealthyThresholdCount: aws.Int64(2),
214217
},
@@ -229,6 +232,7 @@ func Test_defaultModelBuilderTask_buildTargetHealthCheck(t *testing.T) {
229232
Protocol: (*elbv2.Protocol)(aws.String("HTTP")),
230233
Path: aws.String("/"),
231234
IntervalSeconds: aws.Int64(10),
235+
TimeoutSeconds: aws.Int64(10),
232236
HealthyThresholdCount: aws.Int64(3),
233237
UnhealthyThresholdCount: aws.Int64(3),
234238
},
@@ -284,6 +288,7 @@ func Test_defaultModelBuilderTask_buildTargetHealthCheck(t *testing.T) {
284288
Port: &trafficPort,
285289
Protocol: (*elbv2.Protocol)(aws.String(string(elbv2.ProtocolTCP))),
286290
IntervalSeconds: aws.Int64(10),
291+
TimeoutSeconds: aws.Int64(10),
287292
HealthyThresholdCount: aws.Int64(3),
288293
UnhealthyThresholdCount: aws.Int64(3),
289294
},
@@ -304,6 +309,7 @@ func Test_defaultModelBuilderTask_buildTargetHealthCheck(t *testing.T) {
304309
Protocol: (*elbv2.Protocol)(aws.String(string(elbv2.ProtocolHTTP))),
305310
Path: aws.String("/healthz"),
306311
IntervalSeconds: aws.Int64(10),
312+
TimeoutSeconds: aws.Int64(6),
307313
HealthyThresholdCount: aws.Int64(2),
308314
UnhealthyThresholdCount: aws.Int64(2),
309315
},
@@ -333,6 +339,7 @@ func Test_defaultModelBuilderTask_buildTargetHealthCheck(t *testing.T) {
333339
Port: &port8888,
334340
Protocol: (*elbv2.Protocol)(aws.String(string(elbv2.ProtocolTCP))),
335341
IntervalSeconds: aws.Int64(10),
342+
TimeoutSeconds: aws.Int64(30),
336343
HealthyThresholdCount: aws.Int64(5),
337344
UnhealthyThresholdCount: aws.Int64(5),
338345
},
@@ -345,6 +352,7 @@ func Test_defaultModelBuilderTask_buildTargetHealthCheck(t *testing.T) {
345352
builder := &defaultModelBuildTask{
346353
service: tt.svc,
347354
annotationParser: parser,
355+
featureGates: config.NewFeatureGates(),
348356
defaultAccessLogsS3Bucket: "",
349357
defaultAccessLogsS3Prefix: "",
350358
defaultLoadBalancingCrossZoneEnabled: false,

pkg/service/model_builder_test.go

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
190190
"port":"traffic-port",
191191
"protocol":"TCP",
192192
"intervalSeconds":10,
193+
"timeoutSeconds":10,
193194
"healthyThresholdCount":3,
194195
"unhealthyThresholdCount":3
195196
},
@@ -334,6 +335,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
334335
"port":"traffic-port",
335336
"protocol":"TCP",
336337
"intervalSeconds":10,
338+
"timeoutSeconds":10,
337339
"healthyThresholdCount":3,
338340
"unhealthyThresholdCount":3
339341
},
@@ -518,6 +520,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
518520
"protocol":"HTTP",
519521
"path":"/healthz",
520522
"intervalSeconds":10,
523+
"timeoutSeconds":30,
521524
"healthyThresholdCount":2,
522525
"unhealthyThresholdCount":2
523526
},
@@ -541,6 +544,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
541544
"protocol":"HTTP",
542545
"path":"/healthz",
543546
"intervalSeconds":10,
547+
"timeoutSeconds":30,
544548
"healthyThresholdCount":2,
545549
"unhealthyThresholdCount":2
546550
},
@@ -852,6 +856,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
852856
"protocol":"HTTP",
853857
"path":"/healthz",
854858
"intervalSeconds":10,
859+
"timeoutSeconds":30,
855860
"healthyThresholdCount":2,
856861
"unhealthyThresholdCount":2
857862
},
@@ -875,6 +880,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
875880
"protocol":"HTTP",
876881
"path":"/healthz",
877882
"intervalSeconds":10,
883+
"timeoutSeconds":30,
878884
"healthyThresholdCount":2,
879885
"unhealthyThresholdCount":2
880886
},
@@ -1180,6 +1186,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
11801186
"port": "traffic-port",
11811187
"protocol":"TCP",
11821188
"intervalSeconds":10,
1189+
"timeoutSeconds":10,
11831190
"healthyThresholdCount":3,
11841191
"unhealthyThresholdCount":3
11851192
},
@@ -1202,6 +1209,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
12021209
"port":"traffic-port",
12031210
"protocol":"TCP",
12041211
"intervalSeconds":10,
1212+
"timeoutSeconds":10,
12051213
"healthyThresholdCount":3,
12061214
"unhealthyThresholdCount":3
12071215
},
@@ -1448,8 +1456,9 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
14481456
"healthCheckConfig":{
14491457
"port": 29123,
14501458
"protocol":"HTTP",
1451-
"path":"/healthz",
1459+
"path":"/healthz",
14521460
"intervalSeconds":10,
1461+
"timeoutSeconds":6,
14531462
"healthyThresholdCount":2,
14541463
"unhealthyThresholdCount":2
14551464
},
@@ -1471,8 +1480,9 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
14711480
"healthCheckConfig":{
14721481
"port": 29123,
14731482
"protocol":"HTTP",
1474-
"path":"/healthz",
1483+
"path":"/healthz",
14751484
"intervalSeconds":10,
1485+
"timeoutSeconds":6,
14761486
"healthyThresholdCount":2,
14771487
"unhealthyThresholdCount":2
14781488
},
@@ -1729,6 +1739,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
17291739
"port":"traffic-port",
17301740
"protocol":"TCP",
17311741
"intervalSeconds":10,
1742+
"timeoutSeconds":10,
17321743
"healthyThresholdCount":3,
17331744
"unhealthyThresholdCount":3
17341745
},
@@ -1941,7 +1952,8 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
19411952
"unhealthyThresholdCount": 3,
19421953
"protocol": "TCP",
19431954
"port": "traffic-port",
1944-
"intervalSeconds": 10
1955+
"intervalSeconds": 10,
1956+
"timeoutSeconds":10
19451957
},
19461958
"targetGroupAttributes": [
19471959
{
@@ -2210,6 +2222,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
22102222
"port": "traffic-port",
22112223
"protocol": "TCP",
22122224
"intervalSeconds": 10,
2225+
"timeoutSeconds":10,
22132226
"healthyThresholdCount": 3,
22142227
"unhealthyThresholdCount": 3
22152228
},
@@ -2355,6 +2368,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
23552368
"port": "traffic-port",
23562369
"protocol": "TCP",
23572370
"intervalSeconds": 10,
2371+
"timeoutSeconds":10,
23582372
"healthyThresholdCount": 3,
23592373
"unhealthyThresholdCount": 3
23602374
},
@@ -2551,6 +2565,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
25512565
"port": "traffic-port",
25522566
"protocol": "TCP",
25532567
"intervalSeconds": 10,
2568+
"timeoutSeconds":10,
25542569
"healthyThresholdCount": 3,
25552570
"unhealthyThresholdCount": 3
25562571
},
@@ -2693,6 +2708,7 @@ func Test_defaultModelBuilderTask_Build(t *testing.T) {
26932708
"port":"traffic-port",
26942709
"protocol":"TCP",
26952710
"intervalSeconds":10,
2711+
"timeoutSeconds":10,
26962712
"healthyThresholdCount":3,
26972713
"unhealthyThresholdCount":3
26982714
},

0 commit comments

Comments
 (0)