@@ -18,22 +18,22 @@ import (
18
18
"github.com/spf13/cobra"
19
19
corev1 "k8s.io/api/core/v1"
20
20
"k8s.io/apimachinery/pkg/api/errors"
21
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21
22
"k8s.io/apimachinery/pkg/runtime"
22
23
"k8s.io/apimachinery/pkg/types"
23
24
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
24
25
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
25
26
_ "k8s.io/client-go/plugin/pkg/client/auth"
26
27
"k8s.io/client-go/util/retry"
28
+ "k8s.io/utils/pointer"
27
29
ctrl "sigs.k8s.io/controller-runtime"
30
+ "sigs.k8s.io/controller-runtime/pkg/builder"
28
31
"sigs.k8s.io/controller-runtime/pkg/client"
29
32
"sigs.k8s.io/controller-runtime/pkg/controller"
30
- "sigs.k8s.io/controller-runtime/pkg/event"
31
- "sigs.k8s.io/controller-runtime/pkg/handler"
32
33
"sigs.k8s.io/controller-runtime/pkg/healthz"
33
34
"sigs.k8s.io/controller-runtime/pkg/metrics"
34
35
"sigs.k8s.io/controller-runtime/pkg/predicate"
35
36
"sigs.k8s.io/controller-runtime/pkg/reconcile"
36
- "sigs.k8s.io/controller-runtime/pkg/source"
37
37
38
38
"github.com/gitpod-io/gitpod/common-go/log"
39
39
)
@@ -60,6 +60,10 @@ var runCmd = &cobra.Command{
60
60
LeaderElection : true ,
61
61
LeaderElectionID : "node-labeler.gitpod.io" ,
62
62
Namespace : namespace ,
63
+ // default sync period is 10h.
64
+ // in case node-labeler is restarted and not change happens, we could waste (at least) 20m in a node
65
+ // that never will run workspaces and the additional nodes cluster-autoscaler adds to compensate
66
+ SyncPeriod : pointer .Duration (1 * time .Minute ),
63
67
})
64
68
if err != nil {
65
69
log .WithError (err ).Fatal ("unable to start node-labeber" )
@@ -74,35 +78,27 @@ var runCmd = &cobra.Command{
74
78
client ,
75
79
}
76
80
77
- c , err := controller .New ("pod-watcher" , mgr , controller.Options {
78
- Reconciler : r ,
79
- MaxConcurrentReconciles : 20 ,
81
+ filterPredicate , err := predicate .LabelSelectorPredicate (metav1.LabelSelector {
82
+ MatchLabels : map [string ]string {
83
+ "required-by-node-labeler" : "true" ,
84
+ },
80
85
})
86
+ if err != nil {
87
+ log .WithError (err ).Fatal ("unable to create predicate" )
88
+ }
89
+
90
+ err = ctrl .NewControllerManagedBy (mgr ).
91
+ Named ("pod-watcher" ).
92
+ For (& corev1.Pod {}, builder .WithPredicates (filterPredicate )).
93
+ WithOptions (controller.Options {MaxConcurrentReconciles : 1 }).
94
+ Complete (r )
81
95
if err != nil {
82
96
log .WithError (err ).Fatal ("unable to bind controller watch event handler" )
83
97
}
84
98
85
99
metrics .Registry .MustRegister (NodeLabelerCounterVec )
86
100
metrics .Registry .MustRegister (NodeLabelerTimeHistVec )
87
101
88
- err = c .Watch (& source.Kind {Type : & corev1.Pod {}}, & handler.EnqueueRequestForObject {}, predicate.Funcs {
89
- CreateFunc : func (ce event.CreateEvent ) bool {
90
- return processPodEvent (ce .Object )
91
- },
92
- UpdateFunc : func (ue event.UpdateEvent ) bool {
93
- return processPodEvent (ue .ObjectNew )
94
- },
95
- DeleteFunc : func (deleteEvent event.DeleteEvent ) bool {
96
- return processPodEvent (deleteEvent .Object )
97
- },
98
- GenericFunc : func (genericEvent event.GenericEvent ) bool {
99
- return false
100
- },
101
- })
102
- if err != nil {
103
- log .WithError (err ).Fatal ("unable to create controller" )
104
- }
105
-
106
102
err = mgr .AddHealthzCheck ("healthz" , healthz .Ping )
107
103
if err != nil {
108
104
log .WithError (err ).Fatal ("unable to set up health check" )
@@ -132,14 +128,6 @@ var (
132
128
scheme = runtime .NewScheme ()
133
129
)
134
130
135
- func processPodEvent (pod client.Object ) bool {
136
- if strings .HasPrefix (pod .GetName (), registryFacade ) || strings .HasPrefix (pod .GetName (), wsDaemon ) {
137
- return true
138
- }
139
-
140
- return false
141
- }
142
-
143
131
type PodReconciler struct {
144
132
client.Client
145
133
}
@@ -165,8 +153,6 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
165
153
port string
166
154
component string
167
155
labelToUpdate string
168
-
169
- waitTimeout time.Duration = 5 * time .Second
170
156
)
171
157
172
158
switch {
@@ -181,7 +167,7 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
181
167
ipAddress = pod .Status .PodIP
182
168
port = strconv .Itoa (wsdaemonPort )
183
169
default :
184
- log . WithField ( "pod" , pod . Name ). Info ( "Invalid pod. Skipping..." )
170
+ // nothing to do
185
171
return reconcile.Result {}, nil
186
172
}
187
173
@@ -215,26 +201,25 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
215
201
return reconcile.Result {}, fmt .Errorf ("obtaining node %s: %w" , nodeName , err )
216
202
}
217
203
218
- if node .Labels [labelToUpdate ] == "true" {
219
- // Label already exists.
204
+ if labelValue , exists := node .Labels [labelToUpdate ]; exists && labelValue == "true" {
205
+ // nothing to do, the label already exists.
220
206
return reconcile.Result {}, nil
221
207
}
222
208
223
- err = waitForTCPPortToBeReachable (ipAddress , port , 30 * time . Second )
209
+ err = checkTCPPortIsReachable (ipAddress , port )
224
210
if err != nil {
225
- return reconcile.Result {}, fmt .Errorf ("waiting for TCP port: %v" , err )
211
+ log .WithField ("host" , ipAddress ).WithField ("port" , port ).WithField ("pod" , pod .Name ).WithError (err ).Error ("checking if TCP port is open" )
212
+ return reconcile.Result {RequeueAfter : time .Second * 5 }, nil
226
213
}
227
214
228
215
if component == registryFacade {
229
216
err = checkRegistryFacade (ipAddress , port )
230
217
if err != nil {
231
218
log .WithError (err ).Error ("checking registry-facade" )
232
- return reconcile.Result {RequeueAfter : time .Second * 10 }, nil
219
+ return reconcile.Result {RequeueAfter : time .Second * 5 }, nil
233
220
}
234
221
}
235
222
236
- time .Sleep (waitTimeout )
237
-
238
223
err = updateLabel (labelToUpdate , true , nodeName , r )
239
224
if err != nil {
240
225
return reconcile.Result {}, fmt .Errorf ("trying to add the label: %v" , err )
@@ -280,31 +265,14 @@ func updateLabel(label string, add bool, nodeName string, client client.Client)
280
265
})
281
266
}
282
267
283
- func waitForTCPPortToBeReachable (host string , port string , timeout time.Duration ) error {
284
- ctx , cancel := context .WithTimeout (context .Background (), timeout )
285
- defer cancel ()
286
-
287
- ticker := time .NewTicker (1 * time .Second )
288
- defer ticker .Stop ()
289
-
290
- for {
291
- select {
292
- case <- ctx .Done ():
293
- return fmt .Errorf ("port %v on host %v never reachable" , port , host )
294
- case <- ticker .C :
295
- conn , err := net .DialTimeout ("tcp" , net .JoinHostPort (host , port ), 500 * time .Millisecond )
296
- if err != nil {
297
- continue
298
- }
299
-
300
- if conn != nil {
301
- conn .Close ()
302
- return nil
303
- }
304
-
305
- continue
306
- }
268
+ func checkTCPPortIsReachable (host string , port string ) error {
269
+ conn , err := net .DialTimeout ("tcp" , net .JoinHostPort (host , port ), 1 * time .Second )
270
+ if err != nil {
271
+ return err
307
272
}
273
+ defer conn .Close ()
274
+
275
+ return nil
308
276
}
309
277
310
278
func checkRegistryFacade (host , port string ) error {
0 commit comments