Skip to content

Commit 79ae371

Browse files
authored
Add flags to configure controller parameters (#492)
* Add flags to configure controller parameters * update flag names
1 parent b709b9c commit 79ae371

File tree

6 files changed

+46
-44
lines changed

6 files changed

+46
-44
lines changed

controllers/core/configmap_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ func (r *ConfigMapReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
9191
)
9292
}
9393
} else {
94-
r.Log.Error(err, "failed to retrieve branch ENI cool down period from amazon-vpc-cni configmap, will retain the current cooldown period", "cool down period", curCoolDownPeriod)
94+
r.Log.Info("branch ENI cool down period not configured in amazon-vpc-cni configmap, will retain the current cooldown period", "cool down period", curCoolDownPeriod)
9595
}
9696

9797
// Check if the Windows IPAM flag has changed

controllers/core/node_controller.go

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,7 @@ var (
5757
// one routines to help high rate churn and larger nodes groups restarting
5858
// when the controller has to be restarted for various reasons.
5959
const (
60-
MaxNodeConcurrentReconciles = 10
61-
NodeTerminationFinalizer = "networking.k8s.aws/resource-cleanup"
60+
NodeTerminationFinalizer = "networking.k8s.aws/resource-cleanup"
6261
)
6362

6463
// NodeReconciler reconciles a Node object
@@ -143,7 +142,7 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
143142
return ctrl.Result{}, err
144143
}
145144

146-
func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager, healthzHandler *rcHealthz.HealthzHandler) error {
145+
func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager, maxConcurrentReconciles int, healthzHandler *rcHealthz.HealthzHandler) error {
147146
// add health check on subpath for node controller
148147
healthzHandler.AddControllersHealthCheckers(
149148
map[string]healthz.Checker{"health-node-controller": r.Check()},
@@ -153,7 +152,7 @@ func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager, healthzHandler *rcHe
153152

154153
return ctrl.NewControllerManagedBy(mgr).
155154
For(&corev1.Node{}).
156-
WithOptions(controller.Options{MaxConcurrentReconciles: MaxNodeConcurrentReconciles}).
155+
WithOptions(controller.Options{MaxConcurrentReconciles: maxConcurrentReconciles}).
157156
Owns(&v1alpha1.CNINode{}).
158157
Complete(r)
159158
}

controllers/core/pod_controller.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,7 @@ type PodReconciler struct {
5656
}
5757

5858
var (
59-
PodRequeueRequest = ctrl.Result{Requeue: true, RequeueAfter: time.Second}
60-
MaxPodConcurrentReconciles = 20
59+
PodRequeueRequest = ctrl.Result{Requeue: true, RequeueAfter: time.Second}
6160
)
6261

6362
// Reconcile handles create/update/delete event by delegating the request to the handler
@@ -192,8 +191,8 @@ func getAggregateResources(pod *v1.Pod) map[string]int64 {
192191
// list of runnable. After Manager acquire the lease the pod controller runnable
193192
// will be started and the Pod events will be sent to Reconcile function
194193
func (r *PodReconciler) SetupWithManager(ctx context.Context, manager ctrl.Manager,
195-
clientSet *kubernetes.Clientset, pageLimit int, syncPeriod time.Duration, healthzHandler *rcHealthz.HealthzHandler) error {
196-
r.Log.Info("The pod controller is using MaxConcurrentReconciles", "Routines", MaxPodConcurrentReconciles)
194+
clientSet *kubernetes.Clientset, pageLimit int, syncPeriod time.Duration, maxConcurrentReconciles int, healthzHandler *rcHealthz.HealthzHandler) error {
195+
r.Log.Info("The pod controller is using MaxConcurrentReconciles", "Routines", maxConcurrentReconciles)
197196

198197
customChecker, err := custom.NewControllerManagedBy(ctx, manager).
199198
WithLogger(r.Log.WithName("custom pod controller")).
@@ -205,7 +204,7 @@ func (r *PodReconciler) SetupWithManager(ctx context.Context, manager ctrl.Manag
205204
}).Options(custom.Options{
206205
PageLimit: pageLimit,
207206
ResyncPeriod: syncPeriod,
208-
MaxConcurrentReconciles: MaxPodConcurrentReconciles,
207+
MaxConcurrentReconciles: maxConcurrentReconciles,
209208
}).UsingConditions(r.Condition).Complete(r)
210209

211210
// add health check on subpath for pod and pod customized controllers

main.go

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,15 @@ func main() {
108108
var enableWindowsPrefixDelegation bool
109109
var region string
110110
var vpcID string
111+
var nodeWorkerCount int
112+
var userClientQPS int
113+
var userClientBurst int
114+
var instanceClientQPS int
115+
var instanceClientBurst int
116+
var apiServerQPS int
117+
var apiServerBurst int
118+
var maxPodConcurrentReconciles int
119+
var maxNodeConcurrentReconciles int
111120

112121
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080",
113122
"The address the metric endpoint binds to.")
@@ -143,6 +152,17 @@ func main() {
143152
"Enable the feature flag for Windows prefix delegation")
144153
flag.StringVar(&region, "aws-region", "", "The aws region of the k8s cluster")
145154
flag.StringVar(&vpcID, "vpc-id", "", "The VPC ID where EKS cluster is deployed")
155+
flag.IntVar(&nodeWorkerCount, "node-mgr-workers", 10, "The number of node workers")
156+
flag.IntVar(&userClientQPS, "user-client-qps", 12, "The user client QPS rate")
157+
flag.IntVar(&userClientBurst, "user-client-burst", 18, "The user client burst limit")
158+
flag.IntVar(&instanceClientQPS, "instance-client-qps", 12, "The instance client QPS rate")
159+
flag.IntVar(&instanceClientBurst, "instance-client-burst", 18, "The instance client burst limit")
160+
// API Server QPS & burst
161+
// Use the same values as default client (https://github.com/kubernetes-sigs/controller-runtime/blob/main/pkg/client/config/config.go#L85)
162+
flag.IntVar(&apiServerQPS, "apiserver-qps", 20, "The API server client QPS rate")
163+
flag.IntVar(&apiServerBurst, "apiserver-burst", 30, "The API server client burst limit")
164+
flag.IntVar(&maxPodConcurrentReconciles, "max-pod-reconcile", 20, "The maximum number of concurrent reconciles for pod controller")
165+
flag.IntVar(&maxNodeConcurrentReconciles, "max-node-reconcile", 10, "The maximum number of concurrent reconciles for node controller")
146166

147167
flag.Parse()
148168

@@ -200,8 +220,8 @@ func main() {
200220

201221
kubeConfig := ctrl.GetConfigOrDie()
202222
// Set the API Server QPS and Burst
203-
kubeConfig.QPS = config.DefaultAPIServerQPS
204-
kubeConfig.Burst = config.DefaultAPIServerBurst
223+
kubeConfig.QPS = float32(apiServerQPS)
224+
kubeConfig.Burst = apiServerBurst
205225
kubeConfig.UserAgent = fmt.Sprintf("%s/%s", ec2API.AppName, version.GitVersion)
206226

207227
setupLog.Info("starting the controller with leadership setting",
@@ -270,7 +290,8 @@ func main() {
270290
region = ""
271291
}
272292

273-
ec2Wrapper, err := ec2API.NewEC2Wrapper(roleARN, clusterName, region, setupLog)
293+
ec2Wrapper, err := ec2API.NewEC2Wrapper(roleARN, clusterName, region, instanceClientQPS,
294+
instanceClientBurst, userClientQPS, userClientBurst, setupLog)
274295
if err != nil {
275296
setupLog.Error(err, "unable to create ec2 wrapper")
276297
}
@@ -316,7 +337,7 @@ func main() {
316337
}
317338

318339
nodeManagerWorkers := asyncWorkers.NewDefaultWorkerPool("node async workers",
319-
10, 1, ctrl.Log.WithName("node async workers"), ctx)
340+
nodeWorkerCount, 1, ctrl.Log.WithName("node async workers"), ctx)
320341
nodeManager, err := manager.NewNodeManager(ctrl.Log.WithName("node manager"), resourceManager,
321342
apiWrapper, nodeManagerWorkers, controllerConditions, version.GitVersion, healthzHandler)
322343

@@ -334,7 +355,7 @@ func main() {
334355
K8sAPI: k8sApi,
335356
DataStore: dataStore,
336357
Condition: controllerConditions,
337-
}).SetupWithManager(ctx, mgr, clientSet, listPageLimit, syncPeriod, healthzHandler); err != nil {
358+
}).SetupWithManager(ctx, mgr, clientSet, listPageLimit, syncPeriod, maxPodConcurrentReconciles, healthzHandler); err != nil {
338359
setupLog.Error(err, "unable to create controller", "controller", "pod")
339360
os.Exit(1)
340361
}
@@ -357,7 +378,7 @@ func main() {
357378
Manager: nodeManager,
358379
Conditions: controllerConditions,
359380
Context: ctx,
360-
}).SetupWithManager(mgr, healthzHandler); err != nil {
381+
}).SetupWithManager(mgr, maxNodeConcurrentReconciles, healthzHandler); err != nil {
361382
setupLog.Error(err, "unable to create controller", "controller", "Node")
362383
os.Exit(1)
363384
}

pkg/aws/ec2/api/wrapper.go

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import (
1919
"strings"
2020
"time"
2121

22-
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config"
2322
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/utils"
2423

2524
"github.com/aws/aws-sdk-go/aws"
@@ -363,7 +362,8 @@ type ec2Wrapper struct {
363362

364363
// NewEC2Wrapper takes the roleARN that will be assumed to make all the EC2 API Calls, if no roleARN
365364
// is passed then the ec2 client will be initialized with the instance's service role account.
366-
func NewEC2Wrapper(roleARN, clusterName, region string, log logr.Logger) (EC2Wrapper, error) {
365+
func NewEC2Wrapper(roleARN, clusterName, region string, instanceClientQPS, instanceClientBurst,
366+
userClientQPS, userClientBurst int, log logr.Logger) (EC2Wrapper, error) {
367367
// Register the metrics
368368
prometheusRegister()
369369

@@ -377,28 +377,28 @@ func NewEC2Wrapper(roleARN, clusterName, region string, log logr.Logger) (EC2Wra
377377
// Role ARN is passed, assume the role ARN to make EC2 API Calls
378378
if roleARN != "" {
379379
// Create the instance service client with low QPS, it will be only used fro associate branch to trunk calls
380-
log.Info("Creating INSTANCE service client with configured QPS", "QPS", config.InstanceServiceClientQPS, "Burst", config.InstanceServiceClientBurst)
381-
instanceServiceClient, err := ec2Wrapper.getInstanceServiceClient(config.InstanceServiceClientQPS,
382-
config.InstanceServiceClientBurst, instanceSession)
380+
log.Info("Creating INSTANCE service client with configured QPS", "QPS", instanceClientQPS, "Burst", instanceClientBurst)
381+
instanceServiceClient, err := ec2Wrapper.getInstanceServiceClient(instanceClientQPS, instanceClientBurst,
382+
instanceSession)
383383
if err != nil {
384384
return nil, err
385385
}
386386
ec2Wrapper.instanceServiceClient = instanceServiceClient
387387

388388
// Create the user service client with higher QPS, this will be used to make rest of the EC2 API Calls
389-
log.Info("Creating USER service client with configured QPS", "QPS", config.UserServiceClientQPS, "Burst", config.UserServiceClientQPSBurst)
389+
log.Info("Creating USER service client with configured QPS", "QPS", userClientQPS, "Burst", userClientBurst)
390390
userServiceClient, err := ec2Wrapper.getClientUsingAssumedRole(*instanceSession.Config.Region, roleARN, clusterName, region,
391-
config.UserServiceClientQPS, config.UserServiceClientQPSBurst)
391+
userClientQPS, userClientBurst)
392392
if err != nil {
393393
return nil, err
394394
}
395395
ec2Wrapper.userServiceClient = userServiceClient
396396
} else {
397-
// Role ARN is not provided, assuming that instance service client is whitelisted for ENI branching and use
397+
// Role ARN is not provided, assuming that instance service client is allowlisted for ENI branching and use
398398
// the instance service client as the user service client with higher QPS.
399-
log.Info("Creating INSTANCE service client with configured USER Service QPS", "QPS", config.InstanceServiceClientQPS, "Burst", config.InstanceServiceClientBurst)
400-
instanceServiceClient, err := ec2Wrapper.getInstanceServiceClient(config.UserServiceClientQPS,
401-
config.UserServiceClientQPSBurst, instanceSession)
399+
log.Info("Creating INSTANCE service client with configured USER Service QPS", "QPS", userClientQPS, "Burst", userClientBurst)
400+
instanceServiceClient, err := ec2Wrapper.getInstanceServiceClient(userClientQPS,
401+
userClientBurst, instanceSession)
402402
if err != nil {
403403
return nil, err
404404
}

pkg/config/loader.go

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -43,23 +43,6 @@ const (
4343
IPv4PDDefaultWarmIPTargetSize = 1
4444
IPv4PDDefaultMinIPTargetSize = 3
4545
IPv4PDDefaultWarmPrefixTargetSize = 0
46-
47-
// EC2 API QPS for user service client
48-
// Tested: 15 + 20 limits
49-
// Tested: 15 + 8 limits (not seeing significant degradation from 15+20)
50-
// Tested: 12 + 8 limits (not seeing significant degradation from 15+8)
51-
// Larger number seems not make latency better than 12+8
52-
UserServiceClientQPS = 12
53-
UserServiceClientQPSBurst = 18
54-
55-
// EC2 API QPS for instance service client
56-
InstanceServiceClientQPS = 12
57-
InstanceServiceClientBurst = 18
58-
59-
// API Server QPS
60-
// Use the same values as default client (https://github.com/kubernetes-sigs/controller-runtime/blob/main/pkg/client/config/config.go#L85)
61-
DefaultAPIServerQPS = 20
62-
DefaultAPIServerBurst = 30
6346
)
6447

6548
// LoadResourceConfig returns the Resource Configuration for all resources managed by the VPC Resource Controller. Currently

0 commit comments

Comments
 (0)