Skip to content

Commit 72c0f31

Browse files
committed
Replace watch with builder.ControllerManagedBy
Signed-off-by: Manuel de Brito Fontes <[email protected]>
1 parent 72a58dd commit 72c0f31

File tree

2 files changed

+52
-73
lines changed

2 files changed

+52
-73
lines changed

components/node-labeler/cmd/run.go

Lines changed: 51 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,22 @@ import (
1818
"github.com/spf13/cobra"
1919
corev1 "k8s.io/api/core/v1"
2020
"k8s.io/apimachinery/pkg/api/errors"
21+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2122
"k8s.io/apimachinery/pkg/runtime"
2223
"k8s.io/apimachinery/pkg/types"
2324
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
2425
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
2526
_ "k8s.io/client-go/plugin/pkg/client/auth"
2627
"k8s.io/client-go/util/retry"
28+
"k8s.io/utils/pointer"
2729
ctrl "sigs.k8s.io/controller-runtime"
30+
"sigs.k8s.io/controller-runtime/pkg/builder"
2831
"sigs.k8s.io/controller-runtime/pkg/client"
2932
"sigs.k8s.io/controller-runtime/pkg/controller"
30-
"sigs.k8s.io/controller-runtime/pkg/event"
31-
"sigs.k8s.io/controller-runtime/pkg/handler"
3233
"sigs.k8s.io/controller-runtime/pkg/healthz"
3334
"sigs.k8s.io/controller-runtime/pkg/metrics"
3435
"sigs.k8s.io/controller-runtime/pkg/predicate"
3536
"sigs.k8s.io/controller-runtime/pkg/reconcile"
36-
"sigs.k8s.io/controller-runtime/pkg/source"
3737

3838
"github.com/gitpod-io/gitpod/common-go/log"
3939
)
@@ -46,6 +46,8 @@ const (
4646
wsDaemon = "ws-daemon"
4747
)
4848

49+
var defaultRequeueTime = time.Second * 10
50+
4951
// serveCmd represents the serve command
5052
var runCmd = &cobra.Command{
5153
Use: "run",
@@ -60,6 +62,10 @@ var runCmd = &cobra.Command{
6062
LeaderElection: true,
6163
LeaderElectionID: "node-labeler.gitpod.io",
6264
Namespace: namespace,
65+
// default sync period is 10h.
66+
// in case node-labeler is restarted and not change happens, we could waste (at least) 20m in a node
67+
// that never will run workspaces and the additional nodes cluster-autoscaler adds to compensate
68+
SyncPeriod: pointer.Duration(2 * time.Minute),
6369
})
6470
if err != nil {
6571
log.WithError(err).Fatal("unable to start node-labeber")
@@ -74,35 +80,38 @@ var runCmd = &cobra.Command{
7480
client,
7581
}
7682

77-
c, err := controller.New("pod-watcher", mgr, controller.Options{
78-
Reconciler: r,
79-
MaxConcurrentReconciles: 20,
83+
rfPredicate, err := predicate.LabelSelectorPredicate(metav1.LabelSelector{
84+
MatchLabels: map[string]string{
85+
"app": "gitpod",
86+
"component": "registry-facade",
87+
},
8088
})
8189
if err != nil {
82-
log.WithError(err).Fatal("unable to bind controller watch event handler")
90+
log.WithError(err).Fatal("unable to create predicate")
8391
}
8492

85-
metrics.Registry.MustRegister(NodeLabelerCounterVec)
86-
metrics.Registry.MustRegister(NodeLabelerTimeHistVec)
87-
88-
err = c.Watch(&source.Kind{Type: &corev1.Pod{}}, &handler.EnqueueRequestForObject{}, predicate.Funcs{
89-
CreateFunc: func(ce event.CreateEvent) bool {
90-
return processPodEvent(ce.Object)
91-
},
92-
UpdateFunc: func(ue event.UpdateEvent) bool {
93-
return processPodEvent(ue.ObjectNew)
94-
},
95-
DeleteFunc: func(deleteEvent event.DeleteEvent) bool {
96-
return processPodEvent(deleteEvent.Object)
97-
},
98-
GenericFunc: func(genericEvent event.GenericEvent) bool {
99-
return false
93+
wsPredicate, err := predicate.LabelSelectorPredicate(metav1.LabelSelector{
94+
MatchLabels: map[string]string{
95+
"app": "gitpod",
96+
"component": "ws-daemon",
10097
},
10198
})
10299
if err != nil {
103-
log.WithError(err).Fatal("unable to create controller")
100+
log.WithError(err).Fatal("unable to create predicate")
101+
}
102+
103+
err = ctrl.NewControllerManagedBy(mgr).
104+
Named("pod-watcher").
105+
For(&corev1.Pod{}, builder.WithPredicates(predicate.Or(rfPredicate, wsPredicate))).
106+
WithOptions(controller.Options{MaxConcurrentReconciles: 1}).
107+
Complete(r)
108+
if err != nil {
109+
log.WithError(err).Fatal("unable to bind controller watch event handler")
104110
}
105111

112+
metrics.Registry.MustRegister(NodeLabelerCounterVec)
113+
metrics.Registry.MustRegister(NodeLabelerTimeHistVec)
114+
106115
err = mgr.AddHealthzCheck("healthz", healthz.Ping)
107116
if err != nil {
108117
log.WithError(err).Fatal("unable to set up health check")
@@ -132,19 +141,13 @@ var (
132141
scheme = runtime.NewScheme()
133142
)
134143

135-
func processPodEvent(pod client.Object) bool {
136-
if strings.HasPrefix(pod.GetName(), registryFacade) || strings.HasPrefix(pod.GetName(), wsDaemon) {
137-
return true
138-
}
139-
140-
return false
141-
}
142-
143144
type PodReconciler struct {
144145
client.Client
145146
}
146147

147148
func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) {
149+
log.WithField("pod", req.Name).Info("reconciling")
150+
148151
var pod corev1.Pod
149152
err := r.Get(ctx, req.NamespacedName, &pod)
150153
if err != nil {
@@ -157,16 +160,14 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
157160

158161
nodeName := pod.Spec.NodeName
159162
if nodeName == "" {
160-
return reconcile.Result{RequeueAfter: time.Second * 10}, err
163+
return reconcile.Result{RequeueAfter: defaultRequeueTime}, nil
161164
}
162165

163166
var (
164167
ipAddress string
165168
port string
166169
component string
167170
labelToUpdate string
168-
169-
waitTimeout time.Duration = 5 * time.Second
170171
)
171172

172173
switch {
@@ -181,7 +182,7 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
181182
ipAddress = pod.Status.PodIP
182183
port = strconv.Itoa(wsdaemonPort)
183184
default:
184-
log.WithField("pod", pod.Name).Info("Invalid pod. Skipping...")
185+
// nothing to do
185186
return reconcile.Result{}, nil
186187
}
187188

@@ -198,14 +199,13 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
198199
}
199200

200201
log.WithError(err).Error("removing node label")
201-
return reconcile.Result{RequeueAfter: time.Second * 10}, err
202+
return reconcile.Result{RequeueAfter: defaultRequeueTime}, err
202203
}
203204

204205
return reconcile.Result{}, err
205206
}
206207

207208
if !IsPodReady(&pod) {
208-
// not ready. Wait until the next update.
209209
return reconcile.Result{}, nil
210210
}
211211

@@ -215,28 +215,28 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
215215
return reconcile.Result{}, fmt.Errorf("obtaining node %s: %w", nodeName, err)
216216
}
217217

218-
if node.Labels[labelToUpdate] == "true" {
219-
// Label already exists.
218+
if labelValue, exists := node.Labels[labelToUpdate]; exists && labelValue == "true" {
219+
// nothing to do, the label already exists.
220220
return reconcile.Result{}, nil
221221
}
222222

223-
err = waitForTCPPortToBeReachable(ipAddress, port, 30*time.Second)
223+
err = checkTCPPortIsReachable(ipAddress, port)
224224
if err != nil {
225-
return reconcile.Result{}, fmt.Errorf("waiting for TCP port: %v", err)
225+
log.WithField("host", ipAddress).WithField("port", port).WithField("pod", pod.Name).WithError(err).Error("checking if TCP port is open")
226+
return reconcile.Result{RequeueAfter: defaultRequeueTime}, nil
226227
}
227228

228229
if component == registryFacade {
229230
err = checkRegistryFacade(ipAddress, port)
230231
if err != nil {
231232
log.WithError(err).Error("checking registry-facade")
232-
return reconcile.Result{RequeueAfter: time.Second * 10}, nil
233+
return reconcile.Result{RequeueAfter: defaultRequeueTime}, nil
233234
}
234235
}
235236

236-
time.Sleep(waitTimeout)
237-
238237
err = updateLabel(labelToUpdate, true, nodeName, r)
239238
if err != nil {
239+
log.WithError(err).Error("updating node label")
240240
return reconcile.Result{}, fmt.Errorf("trying to add the label: %v", err)
241241
}
242242

@@ -258,11 +258,6 @@ func updateLabel(label string, add bool, nodeName string, client client.Client)
258258
return err
259259
}
260260

261-
_, hasLabel := node.Labels[label]
262-
if add == hasLabel {
263-
return nil
264-
}
265-
266261
if add {
267262
node.Labels[label] = "true"
268263
log.WithField("label", label).WithField("node", nodeName).Info("adding label to node")
@@ -280,31 +275,14 @@ func updateLabel(label string, add bool, nodeName string, client client.Client)
280275
})
281276
}
282277

283-
func waitForTCPPortToBeReachable(host string, port string, timeout time.Duration) error {
284-
ctx, cancel := context.WithTimeout(context.Background(), timeout)
285-
defer cancel()
286-
287-
ticker := time.NewTicker(1 * time.Second)
288-
defer ticker.Stop()
289-
290-
for {
291-
select {
292-
case <-ctx.Done():
293-
return fmt.Errorf("port %v on host %v never reachable", port, host)
294-
case <-ticker.C:
295-
conn, err := net.DialTimeout("tcp", net.JoinHostPort(host, port), 500*time.Millisecond)
296-
if err != nil {
297-
continue
298-
}
299-
300-
if conn != nil {
301-
conn.Close()
302-
return nil
303-
}
304-
305-
continue
306-
}
278+
func checkTCPPortIsReachable(host string, port string) error {
279+
conn, err := net.DialTimeout("tcp", net.JoinHostPort(host, port), 1*time.Second)
280+
if err != nil {
281+
return err
307282
}
283+
defer conn.Close()
284+
285+
return nil
308286
}
309287

310288
func checkRegistryFacade(host, port string) error {

install/installer/pkg/components/ws-daemon/daemonset.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222

2323
func daemonset(ctx *common.RenderContext) ([]runtime.Object, error) {
2424
cfg := ctx.Config
25+
2526
labels := common.CustomizeLabel(ctx, Component, common.TypeMetaDaemonset)
2627

2728
configHash, err := common.ObjectHash(configmap(ctx))

0 commit comments

Comments
 (0)