@@ -15,12 +15,11 @@ import (
15
15
"time"
16
16
17
17
"github.com/bombsimon/logrusr/v2"
18
+ workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"
18
19
"github.com/spf13/cobra"
19
20
corev1 "k8s.io/api/core/v1"
20
21
"k8s.io/apimachinery/pkg/api/errors"
21
22
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
22
- "k8s.io/apimachinery/pkg/fields"
23
- "k8s.io/apimachinery/pkg/labels"
24
23
"k8s.io/apimachinery/pkg/runtime"
25
24
"k8s.io/apimachinery/pkg/types"
26
25
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
@@ -113,25 +112,13 @@ var runCmd = &cobra.Command{
113
112
log .WithError (err ).Fatal ("unable to bind controller watch event handler" )
114
113
}
115
114
116
- // the pod count reconciler needs an index on spec.nodeName to be able to list pods by node
117
- if err := mgr .GetFieldIndexer ().IndexField (
118
- context .Background (),
119
- & corev1.Pod {},
120
- "spec.nodeName" ,
121
- func (o client.Object ) []string {
122
- pod := o .(* corev1.Pod )
123
- return []string {pod .Spec .NodeName }
124
- }); err != nil {
125
- log .WithError (err ).Fatal ("unable to create index for pod nodeName" )
126
- }
127
-
128
- pc , err := NewPodCountController (mgr .GetClient ())
115
+ wc , err := NewWorkspaceCountController (mgr .GetClient ())
129
116
if err != nil {
130
- log .WithError (err ).Fatal ("unable to create pod count controller" )
117
+ log .WithError (err ).Fatal ("unable to create workspace count controller" )
131
118
}
132
- err = pc .SetupWithManager (mgr )
119
+ err = wc .SetupWithManager (mgr )
133
120
if err != nil {
134
- log .WithError (err ).Fatal ("unable to bind pod count controller" )
121
+ log .WithError (err ).Fatal ("unable to bind workspace count controller" )
135
122
}
136
123
137
124
metrics .Registry .MustRegister (NodeLabelerCounterVec )
@@ -159,6 +146,7 @@ var runCmd = &cobra.Command{
159
146
160
147
func init () {
161
148
utilruntime .Must (clientgoscheme .AddToScheme (scheme ))
149
+ utilruntime .Must (workspacev1 .AddToScheme (scheme ))
162
150
163
151
rootCmd .AddCommand (runCmd )
164
152
}
@@ -274,101 +262,119 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
274
262
return reconcile.Result {}, nil
275
263
}
276
264
277
- type PodCountController struct {
265
+ type WorkspaceCountController struct {
278
266
client.Client
279
267
}
280
268
281
- // NewPodCountController creates a controller that tracks workspace pod counts and updates node annotations
282
- func NewPodCountController (client client.Client ) (* PodCountController , error ) {
283
- return & PodCountController {
269
+ func NewWorkspaceCountController (client client.Client ) (* WorkspaceCountController , error ) {
270
+ return & WorkspaceCountController {
284
271
Client : client ,
285
272
}, nil
286
273
}
287
274
288
- func (pc * PodCountController ) SetupWithManager (mgr ctrl.Manager ) error {
275
+ func (wc * WorkspaceCountController ) SetupWithManager (mgr ctrl.Manager ) error {
289
276
return ctrl .NewControllerManagedBy (mgr ).
290
- Named ("pod -count" ).
291
- For (& corev1. Pod {}).
292
- WithEventFilter (workspacePodFilter ()).
293
- Complete (pc )
277
+ Named ("workspace -count" ).
278
+ For (& workspacev1. Workspace {}).
279
+ WithEventFilter (workspaceFilter ()).
280
+ Complete (wc )
294
281
}
295
282
296
- func workspacePodFilter () predicate.Predicate {
283
+ func workspaceFilter () predicate.Predicate {
297
284
return predicate.Funcs {
298
285
CreateFunc : func (e event.CreateEvent ) bool {
299
- pod := e .Object .(* corev1. Pod )
300
- return pod . Labels [ "component" ] = = "workspace "
286
+ ws := e .Object .(* workspacev1. Workspace )
287
+ return ws . Status . Runtime != nil && ws . Status . Runtime . NodeName ! = ""
301
288
},
302
289
UpdateFunc : func (e event.UpdateEvent ) bool {
303
290
return false
304
291
},
305
292
DeleteFunc : func (e event.DeleteEvent ) bool {
306
- pod := e .Object .(* corev1. Pod )
307
- return pod . Labels [ "component" ] = = "workspace "
293
+ ws := e .Object .(* workspacev1. Workspace )
294
+ return ws . Status . Runtime != nil && ws . Status . Runtime . NodeName ! = ""
308
295
},
309
296
}
310
297
}
311
298
312
- func (pc * PodCountController ) Reconcile (ctx context.Context , req ctrl.Request ) (ctrl.Result , error ) {
313
- log .WithField ("request" , req .NamespacedName .String ()).Info ("PodCountController reconciling" )
299
+ func (wc * WorkspaceCountController ) Reconcile (ctx context.Context , req ctrl.Request ) (ctrl.Result , error ) {
300
+ log .WithField ("request" , req .NamespacedName .String ()).Info ("WorkspaceCountController reconciling" )
314
301
315
- var pod corev1. Pod
316
- if err := pc .Get (ctx , req .NamespacedName , & pod ); err != nil {
302
+ var ws workspacev1. Workspace
303
+ if err := wc .Get (ctx , req .NamespacedName , & ws ); err != nil {
317
304
if ! errors .IsNotFound (err ) {
318
- log .WithError (err ).WithField ("pod " , req .NamespacedName ).Error ("unable to fetch Pod " )
305
+ log .WithError (err ).WithField ("workspace " , req .NamespacedName ).Error ("unable to fetch Workspace " )
319
306
return ctrl.Result {}, err
320
307
}
308
+ // If workspace not found, do a full reconciliation
309
+ log .WithField ("workspace" , req .NamespacedName ).Info ("Workspace not found, reconciling all nodes" )
310
+ return wc .reconcileAllNodes (ctx )
311
+ }
321
312
322
- log .WithField ("pod" , req .NamespacedName ).Info ("Pod not found, assuming it was deleted, reconciling all nodes" )
313
+ if ws .Status .Runtime != nil && ws .Status .Runtime .NodeName != "" {
314
+ var workspaceList workspacev1.WorkspaceList
315
+ if err := wc .List (ctx , & workspaceList ); err != nil {
316
+ log .WithError (err ).Error ("failed to list workspaces" )
317
+ return ctrl.Result {}, err
318
+ }
323
319
324
- // Pod was deleted, reconcile all nodes
325
- return pc .reconcileAllNodes (ctx )
326
- }
320
+ count := 0
321
+ nodeName := ws .Status .Runtime .NodeName
322
+ for _ , ws := range workspaceList .Items {
323
+ if ws .Status .Runtime != nil &&
324
+ ws .Status .Runtime .NodeName == nodeName &&
325
+ ws .DeletionTimestamp .IsZero () {
326
+ count ++
327
+ }
328
+ }
327
329
328
- if pod .Spec .NodeName == "" {
329
- log .WithField ("pod" , req .NamespacedName ).Info ("Pod has no node, requesting reconciliation" )
330
- return ctrl.Result {RequeueAfter : 5 * time .Second }, nil
330
+ if err := wc .updateNodeAnnotation (ctx , nodeName , count ); err != nil {
331
+ return ctrl.Result {}, err
332
+ }
333
+ log .WithField ("node" , nodeName ).WithField ("count" , count ).Info ("updated node annotation" )
331
334
}
332
335
333
- return pc . reconcileNode ( ctx , pod . Spec . NodeName )
336
+ return ctrl. Result {}, nil
334
337
}
335
338
336
- func (pc * PodCountController ) reconcileAllNodes (ctx context.Context ) (ctrl.Result , error ) {
339
+ func (wc * WorkspaceCountController ) reconcileAllNodes (ctx context.Context ) (ctrl.Result , error ) {
340
+ var workspaceList workspacev1.WorkspaceList
341
+ if err := wc .List (ctx , & workspaceList ); err != nil {
342
+ log .WithError (err ).Error ("failed to list workspaces" )
343
+ return ctrl.Result {}, err
344
+ }
345
+
346
+ workspaceCounts := make (map [string ]int )
347
+ for _ , ws := range workspaceList .Items {
348
+ if ws .Status .Runtime != nil &&
349
+ ws .Status .Runtime .NodeName != "" &&
350
+ ws .DeletionTimestamp .IsZero () {
351
+ workspaceCounts [ws .Status .Runtime .NodeName ]++
352
+ }
353
+ }
354
+
337
355
var nodes corev1.NodeList
338
- if err := pc .List (ctx , & nodes ); err != nil {
356
+ if err := wc .List (ctx , & nodes ); err != nil {
339
357
log .WithError (err ).Error ("failed to list nodes" )
340
358
return ctrl.Result {}, err
341
359
}
342
360
361
+ // Update each node's annotation based on its count
343
362
for _ , node := range nodes .Items {
344
- if _ , err := pc . reconcileNode ( ctx , node .Name ); err != nil {
345
- log . WithError ( err ). WithField ( "node" , node .Name ). Error ( "failed to reconcile node" )
346
- // Continue with other nodes even if one fails
363
+ count := workspaceCounts [ node .Name ]
364
+ if err := wc . updateNodeAnnotation ( ctx , node .Name , count ); err != nil {
365
+ log . WithError ( err ). WithField ( "node" , node . Name ). Error ( "failed to update node" )
347
366
continue
348
367
}
349
- log .WithField ("node" , node .Name ).Info ("reconciled node" )
368
+ log .WithField ("node" , node .Name ).WithField ( "count" , count ). Info ("updated node annotation " )
350
369
}
351
370
352
371
return ctrl.Result {}, nil
353
372
}
354
373
355
- func (pc * PodCountController ) reconcileNode (ctx context.Context , nodeName string ) (ctrl.Result , error ) {
356
- var podList corev1.PodList
357
- err := pc .List (ctx , & podList , & client.ListOptions {
358
- FieldSelector : fields .SelectorFromSet (fields.Set {"spec.nodeName" : nodeName }),
359
- LabelSelector : labels .SelectorFromSet (labels.Set {"component" : "workspace" }),
360
- })
361
- if err != nil {
362
- log .WithError (err ).WithField ("nodeName" , nodeName ).Error ("failed to list pods" )
363
- return ctrl.Result {}, fmt .Errorf ("failed to list pods: %w" , err )
364
- }
365
-
366
- workspaceCount := len (podList .Items )
367
- log .WithField ("nodeName" , nodeName ).WithField ("workspaceCount" , workspaceCount ).Info ("reconciling node" )
368
-
369
- err = retry .RetryOnConflict (retry .DefaultBackoff , func () error {
374
+ func (wc * WorkspaceCountController ) updateNodeAnnotation (ctx context.Context , nodeName string , count int ) error {
375
+ return retry .RetryOnConflict (retry .DefaultBackoff , func () error {
370
376
var node corev1.Node
371
- err := pc .Get (ctx , types.NamespacedName {Name : nodeName }, & node )
377
+ err := wc .Get (ctx , types.NamespacedName {Name : nodeName }, & node )
372
378
if err != nil {
373
379
return fmt .Errorf ("obtaining node %s: %w" , nodeName , err )
374
380
}
@@ -377,22 +383,16 @@ func (pc *PodCountController) reconcileNode(ctx context.Context, nodeName string
377
383
node .Annotations = make (map [string ]string )
378
384
}
379
385
380
- if workspaceCount > 0 {
386
+ if count > 0 {
381
387
node .Annotations ["cluster-autoscaler.kubernetes.io/scale-down-disabled" ] = "true"
382
388
log .WithField ("nodeName" , nodeName ).Info ("disabling scale-down for node" )
383
389
} else {
384
390
delete (node .Annotations , "cluster-autoscaler.kubernetes.io/scale-down-disabled" )
385
391
log .WithField ("nodeName" , nodeName ).Info ("enabling scale-down for node" )
386
392
}
387
393
388
- return pc .Update (ctx , & node )
394
+ return wc .Update (ctx , & node )
389
395
})
390
- if err != nil {
391
- log .WithError (err ).WithField ("nodeName" , nodeName ).Error ("failed to update node" )
392
- return ctrl.Result {}, fmt .Errorf ("failed to update node: %w" , err )
393
- }
394
-
395
- return ctrl.Result {}, nil
396
396
}
397
397
398
398
func updateLabel (label string , add bool , nodeName string , client client.Client ) error {
0 commit comments