Skip to content

Commit 9a6fe23

Browse files
committed
Replace watch with builder.ControllerManagedBy
Signed-off-by: Manuel de Brito Fontes <[email protected]>
1 parent 72a58dd commit 9a6fe23

File tree

1 file changed

+40
-63
lines changed
  • components/node-labeler/cmd

1 file changed

+40
-63
lines changed

components/node-labeler/cmd/run.go

Lines changed: 40 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,18 @@ import (
1818
"github.com/spf13/cobra"
1919
corev1 "k8s.io/api/core/v1"
2020
"k8s.io/apimachinery/pkg/api/errors"
21+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2122
"k8s.io/apimachinery/pkg/runtime"
2223
"k8s.io/apimachinery/pkg/types"
2324
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
2425
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
2526
_ "k8s.io/client-go/plugin/pkg/client/auth"
2627
"k8s.io/client-go/util/retry"
28+
"k8s.io/utils/pointer"
2729
ctrl "sigs.k8s.io/controller-runtime"
30+
"sigs.k8s.io/controller-runtime/pkg/builder"
2831
"sigs.k8s.io/controller-runtime/pkg/client"
2932
"sigs.k8s.io/controller-runtime/pkg/controller"
30-
"sigs.k8s.io/controller-runtime/pkg/event"
3133
"sigs.k8s.io/controller-runtime/pkg/handler"
3234
"sigs.k8s.io/controller-runtime/pkg/healthz"
3335
"sigs.k8s.io/controller-runtime/pkg/metrics"
@@ -60,6 +62,10 @@ var runCmd = &cobra.Command{
6062
LeaderElection: true,
6163
LeaderElectionID: "node-labeler.gitpod.io",
6264
Namespace: namespace,
65+
// default sync period is 10h.
66+
// in case node-labeler is restarted and not change happens, we could waste (at least) 20m in a node
67+
// that never will run workspaces and the additional nodes cluster-autoscaler adds to compensate
68+
SyncPeriod: pointer.Duration(1 * time.Minute),
6369
})
6470
if err != nil {
6571
log.WithError(err).Fatal("unable to start node-labeber")
@@ -74,35 +80,32 @@ var runCmd = &cobra.Command{
7480
client,
7581
}
7682

77-
c, err := controller.New("pod-watcher", mgr, controller.Options{
78-
Reconciler: r,
79-
MaxConcurrentReconciles: 20,
83+
filterPredicate, err := predicate.LabelSelectorPredicate(metav1.LabelSelector{
84+
MatchLabels: map[string]string{
85+
"required-by-node-labeler": "true",
86+
},
8087
})
88+
if err != nil {
89+
log.WithError(err).Fatal("unable to create predicate")
90+
}
91+
92+
err = ctrl.NewControllerManagedBy(mgr).
93+
Named("pod-watcher").
94+
For(&corev1.Pod{}).
95+
Watches(
96+
&source.Kind{Type: &corev1.Pod{}},
97+
&handler.EnqueueRequestForObject{},
98+
builder.WithPredicates(filterPredicate),
99+
).
100+
WithOptions(controller.Options{MaxConcurrentReconciles: 20}).
101+
Complete(r)
81102
if err != nil {
82103
log.WithError(err).Fatal("unable to bind controller watch event handler")
83104
}
84105

85106
metrics.Registry.MustRegister(NodeLabelerCounterVec)
86107
metrics.Registry.MustRegister(NodeLabelerTimeHistVec)
87108

88-
err = c.Watch(&source.Kind{Type: &corev1.Pod{}}, &handler.EnqueueRequestForObject{}, predicate.Funcs{
89-
CreateFunc: func(ce event.CreateEvent) bool {
90-
return processPodEvent(ce.Object)
91-
},
92-
UpdateFunc: func(ue event.UpdateEvent) bool {
93-
return processPodEvent(ue.ObjectNew)
94-
},
95-
DeleteFunc: func(deleteEvent event.DeleteEvent) bool {
96-
return processPodEvent(deleteEvent.Object)
97-
},
98-
GenericFunc: func(genericEvent event.GenericEvent) bool {
99-
return false
100-
},
101-
})
102-
if err != nil {
103-
log.WithError(err).Fatal("unable to create controller")
104-
}
105-
106109
err = mgr.AddHealthzCheck("healthz", healthz.Ping)
107110
if err != nil {
108111
log.WithError(err).Fatal("unable to set up health check")
@@ -132,14 +135,6 @@ var (
132135
scheme = runtime.NewScheme()
133136
)
134137

135-
func processPodEvent(pod client.Object) bool {
136-
if strings.HasPrefix(pod.GetName(), registryFacade) || strings.HasPrefix(pod.GetName(), wsDaemon) {
137-
return true
138-
}
139-
140-
return false
141-
}
142-
143138
type PodReconciler struct {
144139
client.Client
145140
}
@@ -165,8 +160,6 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
165160
port string
166161
component string
167162
labelToUpdate string
168-
169-
waitTimeout time.Duration = 5 * time.Second
170163
)
171164

172165
switch {
@@ -181,7 +174,7 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
181174
ipAddress = pod.Status.PodIP
182175
port = strconv.Itoa(wsdaemonPort)
183176
default:
184-
log.WithField("pod", pod.Name).Info("Invalid pod. Skipping...")
177+
// nothing to do
185178
return reconcile.Result{}, nil
186179
}
187180

@@ -215,14 +208,17 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
215208
return reconcile.Result{}, fmt.Errorf("obtaining node %s: %w", nodeName, err)
216209
}
217210

218-
if node.Labels[labelToUpdate] == "true" {
219-
// Label already exists.
211+
if labelValue, exists := node.Labels[labelToUpdate]; exists && labelValue == "true" {
212+
// nothing to do, the label already exists.
220213
return reconcile.Result{}, nil
221214
}
222215

223-
err = waitForTCPPortToBeReachable(ipAddress, port, 30*time.Second)
216+
err = checkTCPPortIsReachable(ipAddress, port)
224217
if err != nil {
225-
return reconcile.Result{}, fmt.Errorf("waiting for TCP port: %v", err)
218+
log.WithField("host", ipAddress).WithField("port", port).
219+
WithField("pod", pod.Name).
220+
WithError(err).Error("checking if TCP port is open")
221+
return reconcile.Result{RequeueAfter: time.Second * 5}, nil
226222
}
227223

228224
if component == registryFacade {
@@ -233,8 +229,6 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
233229
}
234230
}
235231

236-
time.Sleep(waitTimeout)
237-
238232
err = updateLabel(labelToUpdate, true, nodeName, r)
239233
if err != nil {
240234
return reconcile.Result{}, fmt.Errorf("trying to add the label: %v", err)
@@ -280,31 +274,14 @@ func updateLabel(label string, add bool, nodeName string, client client.Client)
280274
})
281275
}
282276

283-
func waitForTCPPortToBeReachable(host string, port string, timeout time.Duration) error {
284-
ctx, cancel := context.WithTimeout(context.Background(), timeout)
285-
defer cancel()
286-
287-
ticker := time.NewTicker(1 * time.Second)
288-
defer ticker.Stop()
289-
290-
for {
291-
select {
292-
case <-ctx.Done():
293-
return fmt.Errorf("port %v on host %v never reachable", port, host)
294-
case <-ticker.C:
295-
conn, err := net.DialTimeout("tcp", net.JoinHostPort(host, port), 500*time.Millisecond)
296-
if err != nil {
297-
continue
298-
}
299-
300-
if conn != nil {
301-
conn.Close()
302-
return nil
303-
}
304-
305-
continue
306-
}
277+
func checkTCPPortIsReachable(host string, port string) error {
278+
conn, err := net.DialTimeout("tcp", net.JoinHostPort(host, port), 1*time.Second)
279+
if err != nil {
280+
return err
307281
}
282+
defer conn.Close()
283+
284+
return nil
308285
}
309286

310287
func checkRegistryFacade(host, port string) error {

0 commit comments

Comments
 (0)