Skip to content

Commit 9b199da

Browse files
committed
Replace watch with builder.ControllerManagedBy
Signed-off-by: Manuel de Brito Fontes <[email protected]>
1 parent 72a58dd commit 9b199da

File tree

1 file changed

+41
-73
lines changed
  • components/node-labeler/cmd

1 file changed

+41
-73
lines changed

components/node-labeler/cmd/run.go

Lines changed: 41 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,22 @@ import (
1818
"github.com/spf13/cobra"
1919
corev1 "k8s.io/api/core/v1"
2020
"k8s.io/apimachinery/pkg/api/errors"
21+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2122
"k8s.io/apimachinery/pkg/runtime"
2223
"k8s.io/apimachinery/pkg/types"
2324
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
2425
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
2526
_ "k8s.io/client-go/plugin/pkg/client/auth"
2627
"k8s.io/client-go/util/retry"
28+
"k8s.io/utils/pointer"
2729
ctrl "sigs.k8s.io/controller-runtime"
30+
"sigs.k8s.io/controller-runtime/pkg/builder"
2831
"sigs.k8s.io/controller-runtime/pkg/client"
2932
"sigs.k8s.io/controller-runtime/pkg/controller"
30-
"sigs.k8s.io/controller-runtime/pkg/event"
31-
"sigs.k8s.io/controller-runtime/pkg/handler"
3233
"sigs.k8s.io/controller-runtime/pkg/healthz"
3334
"sigs.k8s.io/controller-runtime/pkg/metrics"
3435
"sigs.k8s.io/controller-runtime/pkg/predicate"
3536
"sigs.k8s.io/controller-runtime/pkg/reconcile"
36-
"sigs.k8s.io/controller-runtime/pkg/source"
3737

3838
"github.com/gitpod-io/gitpod/common-go/log"
3939
)
@@ -46,6 +46,8 @@ const (
4646
wsDaemon = "ws-daemon"
4747
)
4848

49+
var defaultRequeueTime = time.Second * 10
50+
4951
// serveCmd represents the serve command
5052
var runCmd = &cobra.Command{
5153
Use: "run",
@@ -60,6 +62,10 @@ var runCmd = &cobra.Command{
6062
LeaderElection: true,
6163
LeaderElectionID: "node-labeler.gitpod.io",
6264
Namespace: namespace,
65+
// default sync period is 10h.
66+
// in case node-labeler is restarted and not change happens, we could waste (at least) 20m in a node
67+
// that never will run workspaces and the additional nodes cluster-autoscaler adds to compensate
68+
SyncPeriod: pointer.Duration(2 * time.Minute),
6369
})
6470
if err != nil {
6571
log.WithError(err).Fatal("unable to start node-labeber")
@@ -74,35 +80,29 @@ var runCmd = &cobra.Command{
7480
client,
7581
}
7682

77-
c, err := controller.New("pod-watcher", mgr, controller.Options{
78-
Reconciler: r,
79-
MaxConcurrentReconciles: 20,
83+
componentPredicate, err := predicate.LabelSelectorPredicate(metav1.LabelSelector{
84+
MatchExpressions: []metav1.LabelSelectorRequirement{{
85+
Key: "component",
86+
Operator: metav1.LabelSelectorOpIn,
87+
Values: []string{"ws-daemon", "registry-facade"},
88+
}},
8089
})
90+
if err != nil {
91+
log.WithError(err).Fatal("unable to create predicate")
92+
}
93+
94+
err = ctrl.NewControllerManagedBy(mgr).
95+
Named("pod-watcher").
96+
For(&corev1.Pod{}, builder.WithPredicates(predicate.Or(componentPredicate))).
97+
WithOptions(controller.Options{MaxConcurrentReconciles: 1}).
98+
Complete(r)
8199
if err != nil {
82100
log.WithError(err).Fatal("unable to bind controller watch event handler")
83101
}
84102

85103
metrics.Registry.MustRegister(NodeLabelerCounterVec)
86104
metrics.Registry.MustRegister(NodeLabelerTimeHistVec)
87105

88-
err = c.Watch(&source.Kind{Type: &corev1.Pod{}}, &handler.EnqueueRequestForObject{}, predicate.Funcs{
89-
CreateFunc: func(ce event.CreateEvent) bool {
90-
return processPodEvent(ce.Object)
91-
},
92-
UpdateFunc: func(ue event.UpdateEvent) bool {
93-
return processPodEvent(ue.ObjectNew)
94-
},
95-
DeleteFunc: func(deleteEvent event.DeleteEvent) bool {
96-
return processPodEvent(deleteEvent.Object)
97-
},
98-
GenericFunc: func(genericEvent event.GenericEvent) bool {
99-
return false
100-
},
101-
})
102-
if err != nil {
103-
log.WithError(err).Fatal("unable to create controller")
104-
}
105-
106106
err = mgr.AddHealthzCheck("healthz", healthz.Ping)
107107
if err != nil {
108108
log.WithError(err).Fatal("unable to set up health check")
@@ -132,14 +132,6 @@ var (
132132
scheme = runtime.NewScheme()
133133
)
134134

135-
func processPodEvent(pod client.Object) bool {
136-
if strings.HasPrefix(pod.GetName(), registryFacade) || strings.HasPrefix(pod.GetName(), wsDaemon) {
137-
return true
138-
}
139-
140-
return false
141-
}
142-
143135
type PodReconciler struct {
144136
client.Client
145137
}
@@ -157,16 +149,14 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
157149

158150
nodeName := pod.Spec.NodeName
159151
if nodeName == "" {
160-
return reconcile.Result{RequeueAfter: time.Second * 10}, err
152+
return reconcile.Result{RequeueAfter: defaultRequeueTime}, nil
161153
}
162154

163155
var (
164156
ipAddress string
165157
port string
166158
component string
167159
labelToUpdate string
168-
169-
waitTimeout time.Duration = 5 * time.Second
170160
)
171161

172162
switch {
@@ -181,7 +171,7 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
181171
ipAddress = pod.Status.PodIP
182172
port = strconv.Itoa(wsdaemonPort)
183173
default:
184-
log.WithField("pod", pod.Name).Info("Invalid pod. Skipping...")
174+
// nothing to do
185175
return reconcile.Result{}, nil
186176
}
187177

@@ -198,7 +188,7 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
198188
}
199189

200190
log.WithError(err).Error("removing node label")
201-
return reconcile.Result{RequeueAfter: time.Second * 10}, err
191+
return reconcile.Result{RequeueAfter: defaultRequeueTime}, err
202192
}
203193

204194
return reconcile.Result{}, err
@@ -215,28 +205,28 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
215205
return reconcile.Result{}, fmt.Errorf("obtaining node %s: %w", nodeName, err)
216206
}
217207

218-
if node.Labels[labelToUpdate] == "true" {
219-
// Label already exists.
208+
if labelValue, exists := node.Labels[labelToUpdate]; exists && labelValue == "true" {
209+
// nothing to do, the label already exists.
220210
return reconcile.Result{}, nil
221211
}
222212

223-
err = waitForTCPPortToBeReachable(ipAddress, port, 30*time.Second)
213+
err = checkTCPPortIsReachable(ipAddress, port)
224214
if err != nil {
225-
return reconcile.Result{}, fmt.Errorf("waiting for TCP port: %v", err)
215+
log.WithField("host", ipAddress).WithField("port", port).WithField("pod", pod.Name).WithError(err).Error("checking if TCP port is open")
216+
return reconcile.Result{RequeueAfter: defaultRequeueTime}, nil
226217
}
227218

228219
if component == registryFacade {
229220
err = checkRegistryFacade(ipAddress, port)
230221
if err != nil {
231222
log.WithError(err).Error("checking registry-facade")
232-
return reconcile.Result{RequeueAfter: time.Second * 10}, nil
223+
return reconcile.Result{RequeueAfter: defaultRequeueTime}, nil
233224
}
234225
}
235226

236-
time.Sleep(waitTimeout)
237-
238227
err = updateLabel(labelToUpdate, true, nodeName, r)
239228
if err != nil {
229+
log.WithError(err).Error("updating node label")
240230
return reconcile.Result{}, fmt.Errorf("trying to add the label: %v", err)
241231
}
242232

@@ -258,11 +248,6 @@ func updateLabel(label string, add bool, nodeName string, client client.Client)
258248
return err
259249
}
260250

261-
_, hasLabel := node.Labels[label]
262-
if add == hasLabel {
263-
return nil
264-
}
265-
266251
if add {
267252
node.Labels[label] = "true"
268253
log.WithField("label", label).WithField("node", nodeName).Info("adding label to node")
@@ -280,31 +265,14 @@ func updateLabel(label string, add bool, nodeName string, client client.Client)
280265
})
281266
}
282267

283-
func waitForTCPPortToBeReachable(host string, port string, timeout time.Duration) error {
284-
ctx, cancel := context.WithTimeout(context.Background(), timeout)
285-
defer cancel()
286-
287-
ticker := time.NewTicker(1 * time.Second)
288-
defer ticker.Stop()
289-
290-
for {
291-
select {
292-
case <-ctx.Done():
293-
return fmt.Errorf("port %v on host %v never reachable", port, host)
294-
case <-ticker.C:
295-
conn, err := net.DialTimeout("tcp", net.JoinHostPort(host, port), 500*time.Millisecond)
296-
if err != nil {
297-
continue
298-
}
299-
300-
if conn != nil {
301-
conn.Close()
302-
return nil
303-
}
304-
305-
continue
306-
}
268+
func checkTCPPortIsReachable(host string, port string) error {
269+
conn, err := net.DialTimeout("tcp", net.JoinHostPort(host, port), 1*time.Second)
270+
if err != nil {
271+
return err
307272
}
273+
defer conn.Close()
274+
275+
return nil
308276
}
309277

310278
func checkRegistryFacade(host, port string) error {

0 commit comments

Comments
 (0)