@@ -50,6 +50,13 @@ const (
50
50
51
51
registryFacade = "registry-facade"
52
52
wsDaemon = "ws-daemon"
53
+
54
+ // Taint keys for different components
55
+ registryFacadeTaintKey = "gitpod.io/registry-facade-not-ready"
56
+ wsDaemonTaintKey = "gitpod.io/ws-daemon-not-ready"
57
+
58
+ workspacesRegularLabel = "gitpod.io/workload_workspace_regular"
59
+ workspacesHeadlessLabel = "gitpod.io/workload_workspace_headless"
53
60
)
54
61
55
62
var defaultRequeueTime = time .Second * 10
@@ -61,6 +68,15 @@ var runCmd = &cobra.Command{
61
68
Run : func (cmd * cobra.Command , args []string ) {
62
69
ctrl .SetLogger (logrusr .New (log .Log ))
63
70
71
+ kClient , err := client .New (ctrl .GetConfigOrDie (), client.Options {})
72
+ if err != nil {
73
+ log .WithError (err ).Fatal ("unable to create client" )
74
+ }
75
+
76
+ if err := initializeLabels (context .Background (), kClient ); err != nil {
77
+ log .WithError (err ).Fatal ("failed to initialize labels" )
78
+ }
79
+
64
80
mgr , err := ctrl .NewManager (ctrl .GetConfigOrDie (), ctrl.Options {
65
81
Scheme : scheme ,
66
82
HealthProbeBindAddress : ":8086" ,
@@ -84,11 +100,6 @@ var runCmd = &cobra.Command{
84
100
log .WithError (err ).Fatal ("unable to start node-labeler" )
85
101
}
86
102
87
- kClient , err := client .New (ctrl .GetConfigOrDie (), client.Options {})
88
- if err != nil {
89
- log .WithError (err ).Fatal ("unable to create client" )
90
- }
91
-
92
103
r := & PodReconciler {
93
104
kClient ,
94
105
}
@@ -198,21 +209,18 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
198
209
}
199
210
200
211
var (
201
- ipAddress string
202
- port string
203
- component string
204
- labelToUpdate string
212
+ ipAddress string
213
+ port string
214
+ taintKey string
205
215
)
206
216
207
217
switch {
208
218
case strings .HasPrefix (pod .Name , registryFacade ):
209
- component = registryFacade
210
- labelToUpdate = fmt .Sprintf (registryFacadeLabel , namespace )
219
+ taintKey = registryFacadeTaintKey
211
220
ipAddress = pod .Status .HostIP
212
221
port = strconv .Itoa (registryFacadePort )
213
222
case strings .HasPrefix (pod .Name , wsDaemon ):
214
- component = wsDaemon
215
- labelToUpdate = fmt .Sprintf (wsdaemonLabel , namespace )
223
+ taintKey = wsDaemonTaintKey
216
224
ipAddress = pod .Status .PodIP
217
225
port = strconv .Itoa (wsdaemonPort )
218
226
default :
@@ -222,17 +230,17 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
222
230
223
231
if ! pod .ObjectMeta .DeletionTimestamp .IsZero () {
224
232
// the pod is being removed.
225
- // remove the component label from the node
233
+ // add the taint to the node
226
234
time .Sleep (1 * time .Second )
227
- err := updateLabel ( labelToUpdate , false , nodeName , r )
235
+ err := updateNodeTaint ( taintKey , true , nodeName , r )
228
236
if err != nil {
229
237
// this is a edge case when cluster-autoscaler removes a node
230
238
// (all the running pods will be removed after that)
231
239
if errors .IsNotFound (err ) {
232
240
return reconcile.Result {}, nil
233
241
}
234
242
235
- log .WithError (err ).Error ("removing node label " )
243
+ log .WithError (err ).Error ("adding node taint " )
236
244
return reconcile.Result {RequeueAfter : defaultRequeueTime }, err
237
245
}
238
246
@@ -250,8 +258,17 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
250
258
return reconcile.Result {}, fmt .Errorf ("obtaining node %s: %w" , nodeName , err )
251
259
}
252
260
253
- if labelValue , exists := node .Labels [labelToUpdate ]; exists && labelValue == "true" {
254
- // nothing to do, the label already exists.
261
+ // Check if taint exists
262
+ taintExists := false
263
+ for _ , taint := range node .Spec .Taints {
264
+ if taint .Key == taintKey {
265
+ taintExists = true
266
+ break
267
+ }
268
+ }
269
+
270
+ if ! taintExists {
271
+ // nothing to do, the taint doesn't exist.
255
272
return reconcile.Result {}, nil
256
273
}
257
274
@@ -261,7 +278,7 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
261
278
return reconcile.Result {RequeueAfter : defaultRequeueTime }, nil
262
279
}
263
280
264
- if component == registryFacade {
281
+ if strings . HasPrefix ( pod . Name , registryFacade ) {
265
282
err = checkRegistryFacade (ipAddress , port )
266
283
if err != nil {
267
284
log .WithError (err ).Error ("checking registry-facade" )
@@ -271,15 +288,15 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
271
288
time .Sleep (1 * time .Second )
272
289
}
273
290
274
- err = updateLabel ( labelToUpdate , true , nodeName , r )
291
+ err = updateNodeTaint ( taintKey , false , nodeName , r )
275
292
if err != nil {
276
- log .WithError (err ).Error ("updating node label " )
277
- return reconcile.Result {}, fmt .Errorf ("trying to add the label : %v" , err )
293
+ log .WithError (err ).Error ("removing node taint " )
294
+ return reconcile.Result {}, fmt .Errorf ("trying to remove the taint : %v" , err )
278
295
}
279
296
280
297
readyIn := time .Since (pod .Status .StartTime .Time )
281
- NodeLabelerTimeHistVec .WithLabelValues (component ).Observe (readyIn .Seconds ())
282
- NodeLabelerCounterVec .WithLabelValues (component ).Inc ()
298
+ NodeLabelerTimeHistVec .WithLabelValues (strings . Split ( pod . Name , "-" )[ 0 ] ).Observe (readyIn .Seconds ())
299
+ NodeLabelerCounterVec .WithLabelValues (strings . Split ( pod . Name , "-" )[ 0 ] ).Inc ()
283
300
284
301
return reconcile.Result {}, nil
285
302
}
@@ -485,7 +502,7 @@ func (c *NodeScaledownAnnotationController) updateNodeAnnotation(ctx context.Con
485
502
})
486
503
}
487
504
488
- func updateLabel ( label string , add bool , nodeName string , client client.Client ) error {
505
+ func updateNodeTaint ( taintKey string , add bool , nodeName string , client client.Client ) error {
489
506
return retry .RetryOnConflict (retry .DefaultBackoff , func () error {
490
507
ctx , cancel := context .WithTimeout (context .Background (), 5 * time .Second )
491
508
defer cancel ()
@@ -496,12 +513,36 @@ func updateLabel(label string, add bool, nodeName string, client client.Client)
496
513
return err
497
514
}
498
515
516
+ // Create or remove taint
499
517
if add {
500
- node .Labels [label ] = "true"
501
- log .WithField ("label" , label ).WithField ("node" , nodeName ).Info ("adding label to node" )
518
+ // Add taint if it doesn't exist
519
+ taintExists := false
520
+ for _ , taint := range node .Spec .Taints {
521
+ if taint .Key == taintKey {
522
+ taintExists = true
523
+ break
524
+ }
525
+ }
526
+ if ! taintExists {
527
+ node .Spec .Taints = append (node .Spec .Taints , corev1.Taint {
528
+ Key : taintKey ,
529
+ Value : "true" ,
530
+ Effect : corev1 .TaintEffectNoSchedule ,
531
+ })
532
+ log .WithField ("taint" , taintKey ).WithField ("node" , nodeName ).Info ("adding taint to node" )
533
+ }
502
534
} else {
503
- delete (node .Labels , label )
504
- log .WithField ("label" , label ).WithField ("node" , nodeName ).Info ("removing label from node" )
535
+ // Remove taint if it exists
536
+ newTaints := make ([]corev1.Taint , 0 )
537
+ for _ , taint := range node .Spec .Taints {
538
+ if taint .Key != taintKey {
539
+ newTaints = append (newTaints , taint )
540
+ }
541
+ }
542
+ if len (newTaints ) != len (node .Spec .Taints ) {
543
+ node .Spec .Taints = newTaints
544
+ log .WithField ("taint" , taintKey ).WithField ("node" , nodeName ).Info ("removing taint from node" )
545
+ }
505
546
}
506
547
507
548
err = client .Update (ctx , & node )
@@ -569,3 +610,72 @@ func newDefaultTransport() *http.Transport {
569
610
DisableKeepAlives : true ,
570
611
}
571
612
}
613
+
614
+ func initializeLabels (ctx context.Context , kClient client.Client ) error {
615
+ log .Info ("initializing labels on nodes" )
616
+
617
+ var nodes corev1.NodeList
618
+ if err := kClient .List (ctx , & nodes ); err != nil {
619
+ return fmt .Errorf ("failed to list nodes: %w" , err )
620
+ }
621
+
622
+ for _ , node := range nodes .Items {
623
+ if node .Labels == nil {
624
+ continue
625
+ }
626
+ _ , isRegularWorkspaceNode := node .Labels [workspacesRegularLabel ]
627
+ _ , isHeadlessWorkspaceNode := node .Labels [workspacesHeadlessLabel ]
628
+
629
+ if isRegularWorkspaceNode || isHeadlessWorkspaceNode {
630
+ err := updateNodeLabel (node .Name , kClient )
631
+ if err != nil {
632
+ log .WithError (err ).WithField ("node" , node .Name ).Error ("failed to initialize labels on node" )
633
+ }
634
+ }
635
+ }
636
+
637
+ log .Info ("finished initializing labels on nodes" )
638
+ return nil
639
+ }
640
+
641
+ func updateNodeLabel (nodeName string , client client.Client ) error {
642
+ return retry .RetryOnConflict (retry .DefaultBackoff , func () error {
643
+ ctx , cancel := context .WithTimeout (context .Background (), 5 * time .Second )
644
+ defer cancel ()
645
+
646
+ var node corev1.Node
647
+ err := client .Get (ctx , types.NamespacedName {Name : nodeName }, & node )
648
+ if err != nil {
649
+ return err
650
+ }
651
+
652
+ registryFacadeLabelForNamespace := fmt .Sprintf (registryFacadeLabel , namespace )
653
+ wsDaemonLabelForNamespace := fmt .Sprintf (wsdaemonLabel , namespace )
654
+
655
+ needUpdate := false
656
+
657
+ if node .Labels == nil {
658
+ node .Labels = make (map [string ]string )
659
+ }
660
+
661
+ if v := node .Labels [registryFacadeLabelForNamespace ]; v != "true" {
662
+ needUpdate = true
663
+ }
664
+ if v := node .Labels [wsDaemonLabelForNamespace ]; v != "true" {
665
+ needUpdate = true
666
+ }
667
+
668
+ if ! needUpdate {
669
+ return nil
670
+ }
671
+ node .Labels [registryFacadeLabelForNamespace ] = "true"
672
+ node .Labels [wsDaemonLabelForNamespace ] = "true"
673
+
674
+ err = client .Update (ctx , & node )
675
+ if err != nil {
676
+ return err
677
+ }
678
+
679
+ return nil
680
+ })
681
+ }
0 commit comments