Skip to content

Commit cf4e38f

Browse files
committed
act on ws crds
1 parent e76b62e commit cf4e38f

File tree

6 files changed

+455
-162
lines changed

6 files changed

+455
-162
lines changed

components/node-labeler/cmd/run.go

Lines changed: 75 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,11 @@ import (
1515
"time"
1616

1717
"github.com/bombsimon/logrusr/v2"
18+
workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"
1819
"github.com/spf13/cobra"
1920
corev1 "k8s.io/api/core/v1"
2021
"k8s.io/apimachinery/pkg/api/errors"
2122
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
22-
"k8s.io/apimachinery/pkg/fields"
23-
"k8s.io/apimachinery/pkg/labels"
2423
"k8s.io/apimachinery/pkg/runtime"
2524
"k8s.io/apimachinery/pkg/types"
2625
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
@@ -113,25 +112,13 @@ var runCmd = &cobra.Command{
113112
log.WithError(err).Fatal("unable to bind controller watch event handler")
114113
}
115114

116-
// the pod count reconciler needs an index on spec.nodeName to be able to list pods by node
117-
if err := mgr.GetFieldIndexer().IndexField(
118-
context.Background(),
119-
&corev1.Pod{},
120-
"spec.nodeName",
121-
func(o client.Object) []string {
122-
pod := o.(*corev1.Pod)
123-
return []string{pod.Spec.NodeName}
124-
}); err != nil {
125-
log.WithError(err).Fatal("unable to create index for pod nodeName")
126-
}
127-
128-
pc, err := NewPodCountController(mgr.GetClient())
115+
wc, err := NewWorkspaceCountController(mgr.GetClient())
129116
if err != nil {
130-
log.WithError(err).Fatal("unable to create pod count controller")
117+
log.WithError(err).Fatal("unable to create workspace count controller")
131118
}
132-
err = pc.SetupWithManager(mgr)
119+
err = wc.SetupWithManager(mgr)
133120
if err != nil {
134-
log.WithError(err).Fatal("unable to bind pod count controller")
121+
log.WithError(err).Fatal("unable to bind workspace count controller")
135122
}
136123

137124
metrics.Registry.MustRegister(NodeLabelerCounterVec)
@@ -159,6 +146,7 @@ var runCmd = &cobra.Command{
159146

160147
func init() {
161148
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
149+
utilruntime.Must(workspacev1.AddToScheme(scheme))
162150

163151
rootCmd.AddCommand(runCmd)
164152
}
@@ -274,101 +262,119 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
274262
return reconcile.Result{}, nil
275263
}
276264

277-
type PodCountController struct {
265+
type WorkspaceCountController struct {
278266
client.Client
279267
}
280268

281-
// NewPodCountController creates a controller that tracks workspace pod counts and updates node annotations
282-
func NewPodCountController(client client.Client) (*PodCountController, error) {
283-
return &PodCountController{
269+
func NewWorkspaceCountController(client client.Client) (*WorkspaceCountController, error) {
270+
return &WorkspaceCountController{
284271
Client: client,
285272
}, nil
286273
}
287274

288-
func (pc *PodCountController) SetupWithManager(mgr ctrl.Manager) error {
275+
func (wc *WorkspaceCountController) SetupWithManager(mgr ctrl.Manager) error {
289276
return ctrl.NewControllerManagedBy(mgr).
290-
Named("pod-count").
291-
For(&corev1.Pod{}).
292-
WithEventFilter(workspacePodFilter()).
293-
Complete(pc)
277+
Named("workspace-count").
278+
For(&workspacev1.Workspace{}).
279+
WithEventFilter(workspaceFilter()).
280+
Complete(wc)
294281
}
295282

296-
func workspacePodFilter() predicate.Predicate {
283+
func workspaceFilter() predicate.Predicate {
297284
return predicate.Funcs{
298285
CreateFunc: func(e event.CreateEvent) bool {
299-
pod := e.Object.(*corev1.Pod)
300-
return pod.Labels["component"] == "workspace"
286+
ws := e.Object.(*workspacev1.Workspace)
287+
return ws.Status.Runtime != nil && ws.Status.Runtime.NodeName != ""
301288
},
302289
UpdateFunc: func(e event.UpdateEvent) bool {
303290
return false
304291
},
305292
DeleteFunc: func(e event.DeleteEvent) bool {
306-
pod := e.Object.(*corev1.Pod)
307-
return pod.Labels["component"] == "workspace"
293+
ws := e.Object.(*workspacev1.Workspace)
294+
return ws.Status.Runtime != nil && ws.Status.Runtime.NodeName != ""
308295
},
309296
}
310297
}
311298

312-
func (pc *PodCountController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
313-
log.WithField("request", req.NamespacedName.String()).Info("PodCountController reconciling")
299+
func (wc *WorkspaceCountController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
300+
log.WithField("request", req.NamespacedName.String()).Info("WorkspaceCountController reconciling")
314301

315-
var pod corev1.Pod
316-
if err := pc.Get(ctx, req.NamespacedName, &pod); err != nil {
302+
var ws workspacev1.Workspace
303+
if err := wc.Get(ctx, req.NamespacedName, &ws); err != nil {
317304
if !errors.IsNotFound(err) {
318-
log.WithError(err).WithField("pod", req.NamespacedName).Error("unable to fetch Pod")
305+
log.WithError(err).WithField("workspace", req.NamespacedName).Error("unable to fetch Workspace")
319306
return ctrl.Result{}, err
320307
}
308+
// If workspace not found, do a full reconciliation
309+
log.WithField("workspace", req.NamespacedName).Info("Workspace not found, reconciling all nodes")
310+
return wc.reconcileAllNodes(ctx)
311+
}
321312

322-
log.WithField("pod", req.NamespacedName).Info("Pod not found, assuming it was deleted, reconciling all nodes")
313+
if ws.Status.Runtime != nil && ws.Status.Runtime.NodeName != "" {
314+
var workspaceList workspacev1.WorkspaceList
315+
if err := wc.List(ctx, &workspaceList); err != nil {
316+
log.WithError(err).Error("failed to list workspaces")
317+
return ctrl.Result{}, err
318+
}
323319

324-
// Pod was deleted, reconcile all nodes
325-
return pc.reconcileAllNodes(ctx)
326-
}
320+
count := 0
321+
nodeName := ws.Status.Runtime.NodeName
322+
for _, ws := range workspaceList.Items {
323+
if ws.Status.Runtime != nil &&
324+
ws.Status.Runtime.NodeName == nodeName &&
325+
ws.DeletionTimestamp.IsZero() {
326+
count++
327+
}
328+
}
327329

328-
if pod.Spec.NodeName == "" {
329-
log.WithField("pod", req.NamespacedName).Info("Pod has no node, requesting reconciliation")
330-
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
330+
if err := wc.updateNodeAnnotation(ctx, nodeName, count); err != nil {
331+
return ctrl.Result{}, err
332+
}
333+
log.WithField("node", nodeName).WithField("count", count).Info("updated node annotation")
331334
}
332335

333-
return pc.reconcileNode(ctx, pod.Spec.NodeName)
336+
return ctrl.Result{}, nil
334337
}
335338

336-
func (pc *PodCountController) reconcileAllNodes(ctx context.Context) (ctrl.Result, error) {
339+
func (wc *WorkspaceCountController) reconcileAllNodes(ctx context.Context) (ctrl.Result, error) {
340+
var workspaceList workspacev1.WorkspaceList
341+
if err := wc.List(ctx, &workspaceList); err != nil {
342+
log.WithError(err).Error("failed to list workspaces")
343+
return ctrl.Result{}, err
344+
}
345+
346+
workspaceCounts := make(map[string]int)
347+
for _, ws := range workspaceList.Items {
348+
if ws.Status.Runtime != nil &&
349+
ws.Status.Runtime.NodeName != "" &&
350+
ws.DeletionTimestamp.IsZero() {
351+
workspaceCounts[ws.Status.Runtime.NodeName]++
352+
}
353+
}
354+
337355
var nodes corev1.NodeList
338-
if err := pc.List(ctx, &nodes); err != nil {
356+
if err := wc.List(ctx, &nodes); err != nil {
339357
log.WithError(err).Error("failed to list nodes")
340358
return ctrl.Result{}, err
341359
}
342360

361+
// Update each node's annotation based on its count
343362
for _, node := range nodes.Items {
344-
if _, err := pc.reconcileNode(ctx, node.Name); err != nil {
345-
log.WithError(err).WithField("node", node.Name).Error("failed to reconcile node")
346-
// Continue with other nodes even if one fails
363+
count := workspaceCounts[node.Name]
364+
if err := wc.updateNodeAnnotation(ctx, node.Name, count); err != nil {
365+
log.WithError(err).WithField("node", node.Name).Error("failed to update node")
347366
continue
348367
}
349-
log.WithField("node", node.Name).Info("reconciled node")
368+
log.WithField("node", node.Name).WithField("count", count).Info("updated node annotation")
350369
}
351370

352371
return ctrl.Result{}, nil
353372
}
354373

355-
func (pc *PodCountController) reconcileNode(ctx context.Context, nodeName string) (ctrl.Result, error) {
356-
var podList corev1.PodList
357-
err := pc.List(ctx, &podList, &client.ListOptions{
358-
FieldSelector: fields.SelectorFromSet(fields.Set{"spec.nodeName": nodeName}),
359-
LabelSelector: labels.SelectorFromSet(labels.Set{"component": "workspace"}),
360-
})
361-
if err != nil {
362-
log.WithError(err).WithField("nodeName", nodeName).Error("failed to list pods")
363-
return ctrl.Result{}, fmt.Errorf("failed to list pods: %w", err)
364-
}
365-
366-
workspaceCount := len(podList.Items)
367-
log.WithField("nodeName", nodeName).WithField("workspaceCount", workspaceCount).Info("reconciling node")
368-
369-
err = retry.RetryOnConflict(retry.DefaultBackoff, func() error {
374+
func (wc *WorkspaceCountController) updateNodeAnnotation(ctx context.Context, nodeName string, count int) error {
375+
return retry.RetryOnConflict(retry.DefaultBackoff, func() error {
370376
var node corev1.Node
371-
err := pc.Get(ctx, types.NamespacedName{Name: nodeName}, &node)
377+
err := wc.Get(ctx, types.NamespacedName{Name: nodeName}, &node)
372378
if err != nil {
373379
return fmt.Errorf("obtaining node %s: %w", nodeName, err)
374380
}
@@ -377,22 +383,16 @@ func (pc *PodCountController) reconcileNode(ctx context.Context, nodeName string
377383
node.Annotations = make(map[string]string)
378384
}
379385

380-
if workspaceCount > 0 {
386+
if count > 0 {
381387
node.Annotations["cluster-autoscaler.kubernetes.io/scale-down-disabled"] = "true"
382388
log.WithField("nodeName", nodeName).Info("disabling scale-down for node")
383389
} else {
384390
delete(node.Annotations, "cluster-autoscaler.kubernetes.io/scale-down-disabled")
385391
log.WithField("nodeName", nodeName).Info("enabling scale-down for node")
386392
}
387393

388-
return pc.Update(ctx, &node)
394+
return wc.Update(ctx, &node)
389395
})
390-
if err != nil {
391-
log.WithError(err).WithField("nodeName", nodeName).Error("failed to update node")
392-
return ctrl.Result{}, fmt.Errorf("failed to update node: %w", err)
393-
}
394-
395-
return ctrl.Result{}, nil
396396
}
397397

398398
func updateLabel(label string, add bool, nodeName string, client client.Client) error {

0 commit comments

Comments
 (0)