Skip to content

Commit 687f337

Browse files
authored
Enable leader election in ws-manager-mk2 (v3) (#18539)
* Enable leader election in ws-manager-mk2 * Update go modules * Move workspace activity to CRD * Remove workspace activity * Cleanup * Update ws-manager-mk2 CRD * Cleanup * Restore lastActivity logic * TEST * Disable observability * Start the grpc server after leader election * Bount the source of subscribers to an informer * Cleanup * Avoid deepCopy * Remove goroutine to execute OnReconcile * Refactor last activity to be consistent acrtoss the controllers * Address feedback
1 parent 4200369 commit 687f337

File tree

21 files changed

+492
-180
lines changed

21 files changed

+492
-180
lines changed

components/ws-manager-api/go/crd/v1/workspace_types.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ type WorkspaceStatus struct {
171171

172172
// +kubebuilder:validation:Optional
173173
Runtime *WorkspaceRuntimeStatus `json:"runtime,omitempty"`
174+
175+
LastActivity *metav1.Time `json:"lastActivity,omitempty"`
174176
}
175177

176178
func (s *WorkspaceStatus) SetCondition(cond metav1.Condition) {

components/ws-manager-api/go/crd/v1/zz_generated.deepcopy.go

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

components/ws-manager-mk2/cmd/sample-workspace/main.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,11 @@ import (
1010
"log"
1111
"time"
1212

13-
workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"
1413
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1514
"k8s.io/utils/pointer"
1615
"sigs.k8s.io/yaml"
16+
17+
workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"
1718
)
1819

1920
func main() {

components/ws-manager-mk2/config/crd/bases/workspace.gitpod.io_workspaces.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,9 @@ spec:
522522
type: string
523523
type: array
524524
type: object
525+
lastActivity:
526+
format: date-time
527+
type: string
525528
ownerToken:
526529
type: string
527530
phase:

components/ws-manager-mk2/config/manager/manager.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@ spec:
3333
containers:
3434
- command:
3535
- /manager
36-
args:
37-
- --leader-elect
3836
image: controller:latest
3937
name: manager
4038
securityContext:

components/ws-manager-mk2/controllers/maintenance_controller.go

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"context"
99
"encoding/json"
1010
"fmt"
11+
"os"
1112
"sync"
1213
"time"
1314

@@ -17,7 +18,12 @@ import (
1718
"k8s.io/apimachinery/pkg/types"
1819
ctrl "sigs.k8s.io/controller-runtime"
1920
"sigs.k8s.io/controller-runtime/pkg/client"
21+
"sigs.k8s.io/controller-runtime/pkg/controller"
22+
"sigs.k8s.io/controller-runtime/pkg/event"
23+
"sigs.k8s.io/controller-runtime/pkg/handler"
2024
"sigs.k8s.io/controller-runtime/pkg/log"
25+
"sigs.k8s.io/controller-runtime/pkg/predicate"
26+
"sigs.k8s.io/controller-runtime/pkg/source"
2127
)
2228

2329
var (
@@ -106,9 +112,46 @@ func (r *MaintenanceReconciler) setEnabledUntil(ctx context.Context, enabledUnti
106112
log.FromContext(ctx).Info("maintenance mode state change", "enabledUntil", enabledUntil)
107113
}
108114

109-
func (r *MaintenanceReconciler) SetupWithManager(mgr ctrl.Manager) error {
110-
return ctrl.NewControllerManagedBy(mgr).
111-
Named("maintenance").
112-
For(&corev1.ConfigMap{}).
113-
Complete(r)
115+
func (r *MaintenanceReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error {
116+
// We need to use an unmanaged controller to avoid issues when the pod is in standby mode.
117+
// In that scenario, the controllers are not started and don't watch changes and only
118+
// observe the maintenance mode during the initialization.
119+
c, err := controller.NewUnmanaged("maintenance-controller", mgr, controller.Options{Reconciler: r})
120+
if err != nil {
121+
return err
122+
}
123+
124+
go func() {
125+
err = c.Start(ctx)
126+
if err != nil {
127+
log.FromContext(ctx).Error(err, "cannot start maintenance reconciler")
128+
os.Exit(1)
129+
}
130+
}()
131+
132+
return c.Watch(source.Kind(mgr.GetCache(), &corev1.ConfigMap{}), &handler.EnqueueRequestForObject{}, &filterConfigMap{})
133+
}
134+
135+
type filterConfigMap struct {
136+
predicate.Funcs
137+
}
138+
139+
func (f filterConfigMap) Create(e event.CreateEvent) bool {
140+
return f.filter(e.Object)
141+
}
142+
143+
func (f filterConfigMap) Update(e event.UpdateEvent) bool {
144+
return f.filter(e.ObjectNew)
145+
}
146+
147+
func (f filterConfigMap) Generic(e event.GenericEvent) bool {
148+
return f.filter(e.Object)
149+
}
150+
151+
func (f filterConfigMap) filter(obj client.Object) bool {
152+
if obj == nil {
153+
return false
154+
}
155+
156+
return obj.GetName() == configMapKey.Name && obj.GetNamespace() == configMapKey.Namespace
114157
}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
// Copyright (c) 2022 Gitpod GmbH. All rights reserved.
2+
// Licensed under the GNU Affero General Public License (AGPL).
3+
// See License-AGPL.txt in the project root for license information.
4+
5+
package controllers
6+
7+
import (
8+
"context"
9+
"os"
10+
11+
"k8s.io/apimachinery/pkg/api/errors"
12+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
13+
ctrl "sigs.k8s.io/controller-runtime"
14+
"sigs.k8s.io/controller-runtime/pkg/client"
15+
"sigs.k8s.io/controller-runtime/pkg/controller"
16+
"sigs.k8s.io/controller-runtime/pkg/handler"
17+
"sigs.k8s.io/controller-runtime/pkg/log"
18+
"sigs.k8s.io/controller-runtime/pkg/source"
19+
20+
config "github.com/gitpod-io/gitpod/ws-manager/api/config"
21+
workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"
22+
)
23+
24+
func NewSubscriberReconciler(c client.Client, cfg *config.Configuration) (*SubscriberReconciler, error) {
25+
reconciler := &SubscriberReconciler{
26+
Client: c,
27+
Config: cfg,
28+
}
29+
30+
return reconciler, nil
31+
}
32+
33+
type SubscriberReconciler struct {
34+
client.Client
35+
36+
Config *config.Configuration
37+
38+
OnReconcile func(ctx context.Context, ws *workspacev1.Workspace)
39+
}
40+
41+
func (r *SubscriberReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
42+
log := log.FromContext(ctx)
43+
44+
var workspace workspacev1.Workspace
45+
if err := r.Get(ctx, req.NamespacedName, &workspace); err != nil {
46+
if !errors.IsNotFound(err) {
47+
log.Error(err, "unable to fetch workspace")
48+
}
49+
50+
return ctrl.Result{}, client.IgnoreNotFound(err)
51+
}
52+
53+
if workspace.Status.Conditions == nil {
54+
workspace.Status.Conditions = []metav1.Condition{}
55+
}
56+
57+
if r.OnReconcile != nil {
58+
r.OnReconcile(ctx, &workspace)
59+
}
60+
61+
return ctrl.Result{}, nil
62+
}
63+
64+
// SetupWithManager sets up the controller with the Manager.
65+
func (r *SubscriberReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error {
66+
c, err := controller.NewUnmanaged("subscribers-controller", mgr, controller.Options{Reconciler: r})
67+
if err != nil {
68+
return err
69+
}
70+
71+
go func() {
72+
err = c.Start(ctx)
73+
if err != nil {
74+
log.FromContext(ctx).Error(err, "cannot start Subscriber reconciler")
75+
os.Exit(1)
76+
}
77+
}()
78+
79+
return c.Watch(source.Kind(mgr.GetCache(), &workspacev1.Workspace{}), &handler.EnqueueRequestForObject{})
80+
}

components/ws-manager-mk2/controllers/suite_test.go

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ import (
2323
"sigs.k8s.io/controller-runtime/pkg/metrics"
2424

2525
"github.com/gitpod-io/gitpod/common-go/util"
26-
"github.com/gitpod-io/gitpod/ws-manager-mk2/pkg/activity"
2726
"github.com/gitpod-io/gitpod/ws-manager/api/config"
2827
workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"
2928
//+kubebuilder:scaffold:imports
@@ -50,10 +49,9 @@ func TestAPIs(t *testing.T) {
5049
}
5150

5251
var (
53-
ctx context.Context
54-
cancel context.CancelFunc
55-
wsActivity *activity.WorkspaceActivity
56-
wsMetrics *controllerMetrics
52+
ctx context.Context
53+
cancel context.CancelFunc
54+
wsMetrics *controllerMetrics
5755
)
5856

5957
var _ = BeforeSuite(func() {
@@ -111,8 +109,7 @@ var _ = BeforeSuite(func() {
111109
Expect(err).ToNot(HaveOccurred())
112110
Expect(wsReconciler.SetupWithManager(k8sManager)).To(Succeed())
113111

114-
wsActivity = activity.NewWorkspaceActivity()
115-
timeoutReconciler, err := NewTimeoutReconciler(k8sManager.GetClient(), k8sManager.GetEventRecorderFor("workspace"), conf, wsActivity, maintenance)
112+
timeoutReconciler, err := NewTimeoutReconciler(k8sManager.GetClient(), k8sManager.GetEventRecorderFor("workspace"), conf, maintenance)
116113
Expect(err).ToNot(HaveOccurred())
117114
Expect(timeoutReconciler.SetupWithManager(k8sManager)).To(Succeed())
118115

components/ws-manager-mk2/controllers/timeout_controller.go

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@ import (
2020
"sigs.k8s.io/controller-runtime/pkg/log"
2121

2222
"github.com/gitpod-io/gitpod/common-go/util"
23-
wsactivity "github.com/gitpod-io/gitpod/ws-manager-mk2/pkg/activity"
23+
"github.com/gitpod-io/gitpod/ws-manager-mk2/pkg/activity"
2424
"github.com/gitpod-io/gitpod/ws-manager-mk2/pkg/maintenance"
2525
config "github.com/gitpod-io/gitpod/ws-manager/api/config"
2626
workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"
2727
)
2828

29-
func NewTimeoutReconciler(c client.Client, recorder record.EventRecorder, cfg config.Configuration, activity *wsactivity.WorkspaceActivity, maintenance maintenance.Maintenance) (*TimeoutReconciler, error) {
29+
func NewTimeoutReconciler(c client.Client, recorder record.EventRecorder, cfg config.Configuration, maintenance maintenance.Maintenance) (*TimeoutReconciler, error) {
3030
if cfg.HeartbeatInterval == 0 {
3131
return nil, fmt.Errorf("invalid heartbeat interval, must not be 0")
3232
}
@@ -38,7 +38,6 @@ func NewTimeoutReconciler(c client.Client, recorder record.EventRecorder, cfg co
3838
return &TimeoutReconciler{
3939
Client: c,
4040
Config: cfg,
41-
activity: activity,
4241
reconcileInterval: reconcileInterval,
4342
recorder: recorder,
4443
maintenance: maintenance,
@@ -53,7 +52,6 @@ type TimeoutReconciler struct {
5352
client.Client
5453

5554
Config config.Configuration
56-
activity *wsactivity.WorkspaceActivity
5755
reconcileInterval time.Duration
5856
recorder record.EventRecorder
5957
maintenance maintenance.Maintenance
@@ -157,7 +155,7 @@ func (r *TimeoutReconciler) isWorkspaceTimedOut(ws *workspacev1.Workspace) (reas
157155
}
158156

159157
start := ws.ObjectMeta.CreationTimestamp.Time
160-
lastActivity := r.activity.GetLastActivity(ws)
158+
lastActivity := activity.Last(ws)
161159
isClosed := ws.IsConditionTrue(workspacev1.WorkspaceConditionClosed)
162160

163161
switch phase {

components/ws-manager-mk2/controllers/timeout_controller_test.go

Lines changed: 13 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import (
88
"time"
99

1010
wsk8s "github.com/gitpod-io/gitpod/common-go/kubernetes"
11-
"github.com/gitpod-io/gitpod/ws-manager-mk2/pkg/activity"
1211
workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"
1312
"github.com/google/uuid"
1413
. "github.com/onsi/ginkgo/v2"
@@ -36,7 +35,7 @@ var _ = Describe("TimeoutController", func() {
3635
// Use a fake client instead of the envtest's k8s client, such that we can add objects
3736
// with custom CreationTimestamps and check timeout logic.
3837
fakeClient = fake.NewClientBuilder().WithStatusSubresource(&workspacev1.Workspace{}).WithScheme(k8sClient.Scheme()).Build()
39-
r, err = NewTimeoutReconciler(fakeClient, record.NewFakeRecorder(100), conf, activity.NewWorkspaceActivity(), &fakeMaintenance{enabled: false})
38+
r, err = NewTimeoutReconciler(fakeClient, record.NewFakeRecorder(100), conf, &fakeMaintenance{enabled: false})
4039
Expect(err).ToNot(HaveOccurred())
4140
})
4241

@@ -48,20 +47,21 @@ var _ = Describe("TimeoutController", func() {
4847
customMaxLifetime *time.Duration
4948
update func(ws *workspacev1.Workspace)
5049
updateStatus func(ws *workspacev1.Workspace)
51-
controllerRestart time.Time
5250
expectTimeout bool
5351
}
5452
DescribeTable("workspace timeouts",
5553
func(tc testCase) {
5654
By("creating a workspace")
5755
ws := newWorkspace(uuid.NewString(), "default")
5856
ws.CreationTimestamp = metav1.NewTime(now.Add(-tc.age))
59-
Expect(fakeClient.Create(ctx, ws)).To(Succeed())
6057

6158
if tc.lastActivityAgo != nil {
62-
r.activity.Store(ws.Name, now.Add(-*tc.lastActivityAgo))
59+
now := metav1.NewTime(now.Add(-*tc.lastActivityAgo))
60+
ws.Status.LastActivity = &now
6361
}
6462

63+
Expect(fakeClient.Create(ctx, ws)).To(Succeed())
64+
6565
updateObjWithRetries(fakeClient, ws, false, func(ws *workspacev1.Workspace) {
6666
if tc.customTimeout != nil {
6767
ws.Spec.Timeout.Time = &metav1.Duration{Duration: *tc.customTimeout}
@@ -80,14 +80,6 @@ var _ = Describe("TimeoutController", func() {
8080
}
8181
})
8282

83-
// Set controller (re)start time.
84-
if tc.controllerRestart.IsZero() {
85-
// Bit arbitrary, but default to the controller running for ~2 days.
86-
r.activity.ManagerStartedAt = now.Add(-48 * time.Hour)
87-
} else {
88-
r.activity.ManagerStartedAt = tc.controllerRestart
89-
}
90-
9183
// Run the timeout controller for this workspace.
9284
By("running the TimeoutController reconcile()")
9385
_, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}})
@@ -159,32 +151,11 @@ var _ = Describe("TimeoutController", func() {
159151
lastActivityAgo: pointer.Duration(1 * time.Minute),
160152
expectTimeout: true,
161153
}),
162-
Entry("shouldn't timeout after controller restart", testCase{
163-
phase: workspacev1.WorkspacePhaseRunning,
164-
updateStatus: func(ws *workspacev1.Workspace) {
165-
// Add FirstUserActivity condition from 5 hours ago.
166-
// From this condition the controller should deduce that the workspace
167-
// has had user activity, but since lastActivity is nil, it's been cleared on
168-
// a restart. The controller therefore should not timeout the workspace and
169-
// wait for new user activity. Or timeout once user activity doesn't come
170-
// eventually after the controller restart.
171-
ws.Status.Conditions = wsk8s.AddUniqueCondition(ws.Status.Conditions, metav1.Condition{
172-
Type: string(workspacev1.WorkspaceConditionFirstUserActivity),
173-
Status: metav1.ConditionTrue,
174-
LastTransitionTime: metav1.NewTime(now.Add(-5 * time.Hour)),
175-
})
176-
},
177-
age: 5 * time.Hour,
178-
lastActivityAgo: nil, // No last activity recorded yet after controller restart.
179-
controllerRestart: now,
180-
expectTimeout: false,
181-
}),
182154
Entry("should timeout after controller restart if no FirstUserActivity", testCase{
183-
phase: workspacev1.WorkspacePhaseRunning,
184-
age: 5 * time.Hour,
185-
lastActivityAgo: nil, // No last activity recorded yet after controller restart.
186-
controllerRestart: now,
187-
expectTimeout: true,
155+
phase: workspacev1.WorkspacePhaseRunning,
156+
age: 5 * time.Hour,
157+
lastActivityAgo: nil, // No last activity recorded yet after controller restart.
158+
expectTimeout: true,
188159
}),
189160
Entry("should timeout eventually with no user activity after controller restart", testCase{
190161
phase: workspacev1.WorkspacePhaseRunning,
@@ -195,10 +166,9 @@ var _ = Describe("TimeoutController", func() {
195166
LastTransitionTime: metav1.NewTime(now.Add(-5 * time.Hour)),
196167
})
197168
},
198-
age: 5 * time.Hour,
199-
lastActivityAgo: nil,
200-
controllerRestart: now.Add(-2 * time.Hour),
201-
expectTimeout: true,
169+
age: 5 * time.Hour,
170+
lastActivityAgo: nil,
171+
expectTimeout: true,
202172
}),
203173
)
204174
})
@@ -207,7 +177,7 @@ var _ = Describe("TimeoutController", func() {
207177
var r *TimeoutReconciler
208178
BeforeEach(func() {
209179
var err error
210-
r, err = NewTimeoutReconciler(k8sClient, record.NewFakeRecorder(100), newTestConfig(), activity.NewWorkspaceActivity(), &fakeMaintenance{enabled: false})
180+
r, err = NewTimeoutReconciler(k8sClient, record.NewFakeRecorder(100), newTestConfig(), &fakeMaintenance{enabled: false})
211181
Expect(err).ToNot(HaveOccurred())
212182
})
213183

0 commit comments

Comments
 (0)