Skip to content

Commit a9810d6

Browse files
authored
[ws-manager-mk2] Fix race where pod gets recreated in Stopped phase (#16622)
* [ws-manager-mk2] Fix race where pod gets recreated in Stopped phase * [ws-manager-mk2] Add pod creation logs * Change to Patch
1 parent 0f19043 commit a9810d6

File tree

3 files changed

+22
-13
lines changed

3 files changed

+22
-13
lines changed

components/ws-manager-mk2/controllers/create.go

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -708,9 +708,6 @@ func newStartWorkspaceContext(ctx context.Context, cfg *config.Configuration, ws
708708
span, _ := tracing.FromContext(ctx, "newStartWorkspaceContext")
709709
defer tracing.FinishSpan(span, &err)
710710

711-
// Can't read ws.Status yet at this point (to e.g. get the headless status),
712-
// as it very likely hasn't been set yet by the controller.
713-
isHeadless := ws.Spec.Type != workspacev1.WorkspaceTypeRegular
714711
return &startWorkspaceContext{
715712
Labels: map[string]string{
716713
"app": "gitpod",
@@ -720,13 +717,13 @@ func newStartWorkspaceContext(ctx context.Context, cfg *config.Configuration, ws
720717
wsk8s.OwnerLabel: ws.Spec.Ownership.Owner,
721718
wsk8s.TypeLabel: strings.ToLower(string(ws.Spec.Type)),
722719
instanceIDLabel: ws.Name,
723-
headlessLabel: strconv.FormatBool(isHeadless),
720+
headlessLabel: strconv.FormatBool(ws.IsHeadless()),
724721
},
725722
Config: cfg,
726723
Workspace: ws,
727724
IDEPort: 23000,
728725
SupervisorPort: 22999,
729-
Headless: isHeadless,
726+
Headless: ws.IsHeadless(),
730727
}, nil
731728
}
732729

components/ws-manager-mk2/controllers/workspace_controller.go

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -107,17 +107,17 @@ func (r *WorkspaceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
107107

108108
r.updateMetrics(ctx, &workspace)
109109

110-
result, err := r.actOnStatus(ctx, &workspace, workspacePods)
111-
if err != nil {
112-
return result, err
113-
}
114-
115110
err = r.Status().Update(ctx, &workspace)
116111
if err != nil {
117112
// log.WithValues("status", workspace).Error(err, "unable to update workspace status")
118113
return ctrl.Result{Requeue: true}, err
119114
}
120115

116+
result, err := r.actOnStatus(ctx, &workspace, workspacePods)
117+
if err != nil {
118+
return result, err
119+
}
120+
121121
if r.OnReconcile != nil {
122122
r.OnReconcile(ctx, &workspace)
123123
}
@@ -148,16 +148,28 @@ func (r *WorkspaceReconciler) actOnStatus(ctx context.Context, workspace *worksp
148148
return ctrl.Result{}, err
149149
}
150150

151+
log.Info("creating workspace Pod for Workspace")
151152
err = r.Create(ctx, pod)
152153
if errors.IsAlreadyExists(err) {
153154
// pod exists, we're good
155+
log.Info("Workspace Pod already exists")
154156
} else if err != nil {
155157
log.Error(err, "unable to create Pod for Workspace", "pod", pod)
156158
return ctrl.Result{Requeue: true}, err
157159
} else {
158160
// TODO(cw): replicate the startup mechanism where pods can fail to be scheduled,
159161
// need to be deleted and re-created
162+
// Must increment and persist the pod starts, and ensure we retry on conflict.
163+
// If we fail to persist this value, it's possible that the Pod gets recreated
164+
// when the workspace stops, due to PodStarts still being 0 when the original Pod
165+
// disappears.
166+
// Use a Patch instead of an Update, to prevent conflicts.
167+
patch := client.MergeFrom(workspace.DeepCopy())
160168
workspace.Status.PodStarts++
169+
if err := r.Status().Patch(ctx, workspace, patch); err != nil {
170+
log.Error(err, "Failed to patch PodStarts in workspace status")
171+
return ctrl.Result{}, err
172+
}
161173
}
162174
r.metrics.rememberWorkspace(workspace, nil)
163175

operations/observability/mixins/workspace/dashboards/components/ws-manager-mk2.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3485,7 +3485,7 @@
34853485
}
34863486
}
34873487
],
3488-
"refresh": "5s",
3488+
"refresh": "30s",
34893489
"schemaVersion": 37,
34903490
"style": "dark",
34913491
"tags": [
@@ -3497,10 +3497,10 @@
34973497
"current": {
34983498
"selected": true,
34993499
"text": [
3500-
"ephemeral-wv"
3500+
"All"
35013501
],
35023502
"value": [
3503-
"ephemeral-wv"
3503+
"$__all"
35043504
]
35053505
},
35063506
"datasource": {

0 commit comments

Comments
 (0)