Skip to content

Commit e76b62e

Browse files
committed
move to node-labeler
1 parent 2806157 commit e76b62e

File tree

9 files changed

+257
-104
lines changed

9 files changed

+257
-104
lines changed

components/node-labeler/BUILD.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ packages:
77
- "go.sum"
88
deps:
99
- components/common-go:lib
10+
- components/ws-manager-api/go:lib
1011
env:
1112
- CGO_ENABLED=0
1213
- GOOS=linux
@@ -36,3 +37,4 @@ packages:
3637
- "go.sum"
3738
deps:
3839
- components/common-go:lib
40+
- components/ws-manager-api/go:lib

components/node-labeler/cmd/run.go

Lines changed: 148 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ import (
1919
corev1 "k8s.io/api/core/v1"
2020
"k8s.io/apimachinery/pkg/api/errors"
2121
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
22+
"k8s.io/apimachinery/pkg/fields"
23+
"k8s.io/apimachinery/pkg/labels"
2224
"k8s.io/apimachinery/pkg/runtime"
2325
"k8s.io/apimachinery/pkg/types"
2426
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
@@ -31,6 +33,7 @@ import (
3133
"sigs.k8s.io/controller-runtime/pkg/cache"
3234
"sigs.k8s.io/controller-runtime/pkg/client"
3335
"sigs.k8s.io/controller-runtime/pkg/controller"
36+
"sigs.k8s.io/controller-runtime/pkg/event"
3437
"sigs.k8s.io/controller-runtime/pkg/healthz"
3538
"sigs.k8s.io/controller-runtime/pkg/metrics"
3639
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
@@ -81,13 +84,13 @@ var runCmd = &cobra.Command{
8184
log.WithError(err).Fatal("unable to start node-labeler")
8285
}
8386

84-
client, err := client.New(ctrl.GetConfigOrDie(), client.Options{})
87+
kClient, err := client.New(ctrl.GetConfigOrDie(), client.Options{})
8588
if err != nil {
8689
log.WithError(err).Fatal("unable to create client")
8790
}
8891

8992
r := &PodReconciler{
90-
client,
93+
kClient,
9194
}
9295

9396
componentPredicate, err := predicate.LabelSelectorPredicate(metav1.LabelSelector{
@@ -110,6 +113,27 @@ var runCmd = &cobra.Command{
110113
log.WithError(err).Fatal("unable to bind controller watch event handler")
111114
}
112115

116+
// the pod count reconciler needs an index on spec.nodeName to be able to list pods by node
117+
if err := mgr.GetFieldIndexer().IndexField(
118+
context.Background(),
119+
&corev1.Pod{},
120+
"spec.nodeName",
121+
func(o client.Object) []string {
122+
pod := o.(*corev1.Pod)
123+
return []string{pod.Spec.NodeName}
124+
}); err != nil {
125+
log.WithError(err).Fatal("unable to create index for pod nodeName")
126+
}
127+
128+
pc, err := NewPodCountController(mgr.GetClient())
129+
if err != nil {
130+
log.WithError(err).Fatal("unable to create pod count controller")
131+
}
132+
err = pc.SetupWithManager(mgr)
133+
if err != nil {
134+
log.WithError(err).Fatal("unable to bind pod count controller")
135+
}
136+
113137
metrics.Registry.MustRegister(NodeLabelerCounterVec)
114138
metrics.Registry.MustRegister(NodeLabelerTimeHistVec)
115139

@@ -135,6 +159,7 @@ var runCmd = &cobra.Command{
135159

136160
func init() {
137161
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
162+
138163
rootCmd.AddCommand(runCmd)
139164
}
140165

@@ -249,6 +274,127 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
249274
return reconcile.Result{}, nil
250275
}
251276

277+
type PodCountController struct {
278+
client.Client
279+
}
280+
281+
// NewPodCountController creates a controller that tracks workspace pod counts and updates node annotations
282+
func NewPodCountController(client client.Client) (*PodCountController, error) {
283+
return &PodCountController{
284+
Client: client,
285+
}, nil
286+
}
287+
288+
func (pc *PodCountController) SetupWithManager(mgr ctrl.Manager) error {
289+
return ctrl.NewControllerManagedBy(mgr).
290+
Named("pod-count").
291+
For(&corev1.Pod{}).
292+
WithEventFilter(workspacePodFilter()).
293+
Complete(pc)
294+
}
295+
296+
func workspacePodFilter() predicate.Predicate {
297+
return predicate.Funcs{
298+
CreateFunc: func(e event.CreateEvent) bool {
299+
pod := e.Object.(*corev1.Pod)
300+
return pod.Labels["component"] == "workspace"
301+
},
302+
UpdateFunc: func(e event.UpdateEvent) bool {
303+
return false
304+
},
305+
DeleteFunc: func(e event.DeleteEvent) bool {
306+
pod := e.Object.(*corev1.Pod)
307+
return pod.Labels["component"] == "workspace"
308+
},
309+
}
310+
}
311+
312+
func (pc *PodCountController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
313+
log.WithField("request", req.NamespacedName.String()).Info("PodCountController reconciling")
314+
315+
var pod corev1.Pod
316+
if err := pc.Get(ctx, req.NamespacedName, &pod); err != nil {
317+
if !errors.IsNotFound(err) {
318+
log.WithError(err).WithField("pod", req.NamespacedName).Error("unable to fetch Pod")
319+
return ctrl.Result{}, err
320+
}
321+
322+
log.WithField("pod", req.NamespacedName).Info("Pod not found, assuming it was deleted, reconciling all nodes")
323+
324+
// Pod was deleted, reconcile all nodes
325+
return pc.reconcileAllNodes(ctx)
326+
}
327+
328+
if pod.Spec.NodeName == "" {
329+
log.WithField("pod", req.NamespacedName).Info("Pod has no node, requesting reconciliation")
330+
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
331+
}
332+
333+
return pc.reconcileNode(ctx, pod.Spec.NodeName)
334+
}
335+
336+
func (pc *PodCountController) reconcileAllNodes(ctx context.Context) (ctrl.Result, error) {
337+
var nodes corev1.NodeList
338+
if err := pc.List(ctx, &nodes); err != nil {
339+
log.WithError(err).Error("failed to list nodes")
340+
return ctrl.Result{}, err
341+
}
342+
343+
for _, node := range nodes.Items {
344+
if _, err := pc.reconcileNode(ctx, node.Name); err != nil {
345+
log.WithError(err).WithField("node", node.Name).Error("failed to reconcile node")
346+
// Continue with other nodes even if one fails
347+
continue
348+
}
349+
log.WithField("node", node.Name).Info("reconciled node")
350+
}
351+
352+
return ctrl.Result{}, nil
353+
}
354+
355+
func (pc *PodCountController) reconcileNode(ctx context.Context, nodeName string) (ctrl.Result, error) {
356+
var podList corev1.PodList
357+
err := pc.List(ctx, &podList, &client.ListOptions{
358+
FieldSelector: fields.SelectorFromSet(fields.Set{"spec.nodeName": nodeName}),
359+
LabelSelector: labels.SelectorFromSet(labels.Set{"component": "workspace"}),
360+
})
361+
if err != nil {
362+
log.WithError(err).WithField("nodeName", nodeName).Error("failed to list pods")
363+
return ctrl.Result{}, fmt.Errorf("failed to list pods: %w", err)
364+
}
365+
366+
workspaceCount := len(podList.Items)
367+
log.WithField("nodeName", nodeName).WithField("workspaceCount", workspaceCount).Info("reconciling node")
368+
369+
err = retry.RetryOnConflict(retry.DefaultBackoff, func() error {
370+
var node corev1.Node
371+
err := pc.Get(ctx, types.NamespacedName{Name: nodeName}, &node)
372+
if err != nil {
373+
return fmt.Errorf("obtaining node %s: %w", nodeName, err)
374+
}
375+
376+
if node.Annotations == nil {
377+
node.Annotations = make(map[string]string)
378+
}
379+
380+
if workspaceCount > 0 {
381+
node.Annotations["cluster-autoscaler.kubernetes.io/scale-down-disabled"] = "true"
382+
log.WithField("nodeName", nodeName).Info("disabling scale-down for node")
383+
} else {
384+
delete(node.Annotations, "cluster-autoscaler.kubernetes.io/scale-down-disabled")
385+
log.WithField("nodeName", nodeName).Info("enabling scale-down for node")
386+
}
387+
388+
return pc.Update(ctx, &node)
389+
})
390+
if err != nil {
391+
log.WithError(err).WithField("nodeName", nodeName).Error("failed to update node")
392+
return ctrl.Result{}, fmt.Errorf("failed to update node: %w", err)
393+
}
394+
395+
return ctrl.Result{}, nil
396+
}
397+
252398
func updateLabel(label string, add bool, nodeName string, client client.Client) error {
253399
return retry.RetryOnConflict(retry.DefaultBackoff, func() error {
254400
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)

components/node-labeler/go.mod

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ require (
3535
github.com/google/uuid v1.3.0 // indirect
3636
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect
3737
github.com/hashicorp/golang-lru v1.0.2 // indirect
38-
github.com/imdario/mergo v0.3.6 // indirect
38+
github.com/imdario/mergo v0.3.12 // indirect
3939
github.com/inconshreveable/mousetrap v1.1.0 // indirect
4040
github.com/josharian/intern v1.0.0 // indirect
4141
github.com/json-iterator/go v1.1.12 // indirect
@@ -74,6 +74,8 @@ require (
7474
sigs.k8s.io/yaml v1.4.0 // indirect
7575
)
7676

77+
replace github.com/gitpod-io/gitpod/ws-manager/api => ../ws-manager-api/go // leeway
78+
7779
replace github.com/gitpod-io/gitpod/common-go => ../common-go // leeway
7880

7981
replace github.com/gitpod-io/gitpod/components/scrubber => ../scrubber // leeway

components/node-labeler/go.sum

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

components/ws-daemon/pkg/controller/suite_test.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,17 +77,27 @@ var _ = BeforeSuite(func() {
7777
Expect(err).ToNot(HaveOccurred())
7878
ctx, cancel = context.WithCancel(context.Background())
7979

80+
By("Setting up workspace controller")
8081
workspaceCtrl, err = NewWorkspaceController(k8sClient, record.NewFakeRecorder(100), NodeName, secretsNamespace, 5, nil, ctrl_metrics.Registry, nil)
8182
Expect(err).NotTo(HaveOccurred())
82-
8383
Expect(workspaceCtrl.SetupWithManager(k8sManager)).To(Succeed())
84+
8485
_ = createNamespace(secretsNamespace)
8586

87+
By("Starting the manager")
8688
go func() {
8789
defer GinkgoRecover()
8890
err = k8sManager.Start(ctx)
8991
Expect(err).ToNot(HaveOccurred(), "failed to run manager")
9092
}()
93+
94+
By("Waiting for controllers to be ready")
95+
DeferCleanup(cancel)
96+
97+
// Wait for controllers to be ready
98+
Eventually(func() bool {
99+
return k8sManager.GetCache().WaitForCacheSync(ctx)
100+
}, time.Second*10, time.Millisecond*100).Should(BeTrue())
91101
})
92102

93103
func createNamespace(name string) *corev1.Namespace {

components/ws-daemon/pkg/controller/workspace_controller.go

Lines changed: 0 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,6 @@ import (
2626
corev1 "k8s.io/api/core/v1"
2727
"k8s.io/apimachinery/pkg/api/errors"
2828
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
29-
"k8s.io/apimachinery/pkg/fields"
30-
"k8s.io/apimachinery/pkg/labels"
3129
"k8s.io/apimachinery/pkg/types"
3230
"k8s.io/apimachinery/pkg/util/wait"
3331
"k8s.io/client-go/tools/record"
@@ -83,41 +81,6 @@ func NewWorkspaceController(c client.Client, recorder record.EventRecorder, node
8381
}, nil
8482
}
8583

86-
type PodCountController struct {
87-
client.Client
88-
NodeName string
89-
}
90-
91-
// NewPodCountController creates a controller that tracks workspace pod counts and updates node annotations
92-
func NewPodCountController(client client.Client, nodeName string) (*PodCountController, error) {
93-
return &PodCountController{
94-
Client: client,
95-
NodeName: nodeName,
96-
}, nil
97-
}
98-
99-
func (pc *PodCountController) SetupWithManager(mgr ctrl.Manager) error {
100-
return ctrl.NewControllerManagedBy(mgr).
101-
Named("pod-count").
102-
For(&workspacev1.Workspace{}).
103-
WithEventFilter(podEventFilter(pc.NodeName)).
104-
Complete(pc)
105-
}
106-
107-
func podEventFilter(nodeName string) predicate.Predicate {
108-
return predicate.Funcs{
109-
CreateFunc: func(e event.CreateEvent) bool {
110-
return workspaceFilter(e.Object, nodeName)
111-
},
112-
UpdateFunc: func(e event.UpdateEvent) bool {
113-
return workspaceFilter(e.ObjectNew, nodeName)
114-
},
115-
DeleteFunc: func(e event.DeleteEvent) bool {
116-
return workspaceFilter(e.Object, nodeName)
117-
},
118-
}
119-
}
120-
12184
// SetupWithManager sets up the controller with the Manager.
12285
func (wsc *WorkspaceController) SetupWithManager(mgr ctrl.Manager) error {
12386
return ctrl.NewControllerManagedBy(mgr).
@@ -183,45 +146,6 @@ func (wsc *WorkspaceController) Reconcile(ctx context.Context, req ctrl.Request)
183146
return ctrl.Result{}, nil
184147
}
185148

186-
func (pc *PodCountController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
187-
var podList corev1.PodList
188-
err := pc.List(ctx, &podList, &client.ListOptions{
189-
FieldSelector: fields.SelectorFromSet(fields.Set{"spec.nodeName": pc.NodeName}),
190-
LabelSelector: labels.SelectorFromSet(labels.Set{"component": "workspace"}),
191-
})
192-
if err != nil {
193-
glog.WithError(err).WithField("nodeName", pc.NodeName).Error("failed to list pods")
194-
return ctrl.Result{}, err
195-
}
196-
workspaceCount := len(podList.Items)
197-
198-
err = retry.RetryOnConflict(retry.DefaultBackoff, func() error {
199-
var node corev1.Node
200-
err := pc.Get(ctx, types.NamespacedName{Name: pc.NodeName}, &node)
201-
if err != nil {
202-
return fmt.Errorf("obtaining node %s: %w", pc.NodeName, err)
203-
}
204-
205-
if node.Annotations == nil {
206-
node.Annotations = make(map[string]string)
207-
}
208-
209-
if workspaceCount > 0 {
210-
node.Annotations["cluster-autoscaler.kubernetes.io/scale-down-disabled"] = "true"
211-
} else {
212-
delete(node.Annotations, "cluster-autoscaler.kubernetes.io/scale-down-disabled")
213-
}
214-
215-
return pc.Update(ctx, &node)
216-
})
217-
if err != nil {
218-
glog.WithError(err).WithField("nodeName", pc.NodeName).Error("[failed to update node")
219-
return ctrl.Result{}, err
220-
}
221-
222-
return ctrl.Result{}, nil
223-
}
224-
225149
// latestWorkspace checks if the we have the latest generation of the workspace CR. We do this because
226150
// the cache could be stale and we retrieve a workspace CR that does not have the content init/backup
227151
// conditions even though we have set them previously. This will lead to us performing these operations

0 commit comments

Comments
 (0)