Skip to content

[ws-manager-mk2] Report content init/dispose failures #17015

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions components/ws-daemon/pkg/controller/workspace_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,9 @@ func (wsc *WorkspaceController) handleWorkspaceInit(ctx context.Context, ws *wor

if failure != "" {
log.Error(initErr, "could not initialize workspace", "name", ws.Name)
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionContentReady(metav1.ConditionFalse, "InitializationFailure", failure))
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionContentReady(metav1.ConditionFalse, workspacev1.ReasonInitializationFailure, failure))
} else {
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionContentReady(metav1.ConditionTrue, "InitializationSuccess", ""))
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionContentReady(metav1.ConditionTrue, workspacev1.ReasonInitializationSuccess, ""))
}

return wsc.Status().Update(ctx, ws)
Expand Down
7 changes: 7 additions & 0 deletions components/ws-manager-api/go/crd/v1/workspace_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ import (
const (
// GitpodFinalizerName is the name of the finalizer we use on workspaces and their pods.
GitpodFinalizerName = "gitpod.io/finalizer"

// ReasonInitializationSuccess is a Reason for the WorkspaceConditionContentReady condition,
// incidating content init succeeded.
ReasonInitializationSuccess = "InitializationSuccess"
// ReasonInitializationFailure is a Reason for the WorkspaceConditionContentReady condition,
// indicating that content init failed. The condition's message will contain the failure details.
ReasonInitializationFailure = "InitializationFailure"
)

// WorkspaceSpec defines the desired state of Workspace
Expand Down
2 changes: 1 addition & 1 deletion components/ws-manager-mk2/controllers/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ func newMetricState(ws *workspacev1.Workspace) metricState {
// This is to prevent these from being re-recorded after the controller restarts and clears the metric state for
// each workspace.
recordedStartTime: ws.Status.Phase == workspacev1.WorkspacePhaseRunning,
recordedInitFailure: wsk8s.ConditionWithStatusAndReason(ws.Status.Conditions, string(workspacev1.WorkspaceConditionContentReady), false, "InitializationFailure"),
recordedInitFailure: wsk8s.ConditionWithStatusAndReason(ws.Status.Conditions, string(workspacev1.WorkspaceConditionContentReady), false, workspacev1.ReasonInitializationFailure),
recordedStartFailure: wsk8s.ConditionPresentAndTrue(ws.Status.Conditions, string(workspacev1.WorkspaceConditionFailed)),
recordedContentReady: wsk8s.ConditionPresentAndTrue(ws.Status.Conditions, string(workspacev1.WorkspaceConditionContentReady)),
recordedBackupFailed: wsk8s.ConditionPresentAndTrue(ws.Status.Conditions, string(workspacev1.WorkspaceConditionBackupFailure)),
Expand Down
13 changes: 13 additions & 0 deletions components/ws-manager-mk2/controllers/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,20 @@ func isDisposalFinished(ws *workspacev1.Workspace) bool {

// extractFailure returns a pod failure reason and possibly a phase. If phase is nil then
// one should extract the phase themselves. If the pod has not failed, this function returns "", nil.
// This failure is then stored in the Failed condition on the workspace.
func extractFailure(ws *workspacev1.Workspace, pod *corev1.Pod) (string, *workspacev1.WorkspacePhase) {
// Check for content init failure.
if c := wsk8s.GetCondition(ws.Status.Conditions, string(workspacev1.WorkspaceConditionContentReady)); c != nil {
if c.Status == metav1.ConditionFalse && c.Reason == workspacev1.ReasonInitializationFailure {
return c.Message, nil
}
}

// Check for backup failure.
if c := wsk8s.GetCondition(ws.Status.Conditions, string(workspacev1.WorkspaceConditionBackupFailure)); c != nil {
return c.Message, nil
}

status := pod.Status
if status.Phase == corev1.PodFailed && (status.Reason != "" || status.Message != "") {
// Don't force the phase to UNKNONWN here to leave a chance that we may detect the actual phase of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ func (r *WorkspaceReconciler) actOnStatus(ctx context.Context, workspace *worksp
return r.deleteWorkspacePod(ctx, pod, "timed out")

// if the content initialization failed, delete the pod
case wsk8s.ConditionWithStatusAndReason(workspace.Status.Conditions, string(workspacev1.WorkspaceConditionContentReady), false, "InitializationFailure") && !isPodBeingDeleted(pod):
case wsk8s.ConditionWithStatusAndReason(workspace.Status.Conditions, string(workspacev1.WorkspaceConditionContentReady), false, workspacev1.ReasonInitializationFailure) && !isPodBeingDeleted(pod):
return r.deleteWorkspacePod(ctx, pod, "init failed")

case isWorkspaceBeingDeleted(workspace) && !isPodBeingDeleted(pod):
Expand Down Expand Up @@ -299,7 +299,7 @@ func (r *WorkspaceReconciler) updateMetrics(ctx context.Context, workspace *work
return
}

if !lastState.recordedInitFailure && wsk8s.ConditionWithStatusAndReason(workspace.Status.Conditions, string(workspacev1.WorkspaceConditionContentReady), false, "InitializationFailure") {
if !lastState.recordedInitFailure && wsk8s.ConditionWithStatusAndReason(workspace.Status.Conditions, string(workspacev1.WorkspaceConditionContentReady), false, workspacev1.ReasonInitializationFailure) {
r.metrics.countTotalRestoreFailures(&log, workspace)
lastState.recordedInitFailure = true

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ var _ = Describe("WorkspaceController", func() {
Type: string(workspacev1.WorkspaceConditionContentReady),
Status: metav1.ConditionFalse,
Message: "some failure",
Reason: "InitializationFailure",
Reason: workspacev1.ReasonInitializationFailure,
LastTransitionTime: metav1.Now(),
})
})
Expand Down Expand Up @@ -443,7 +443,7 @@ func markContentReady(ws *workspacev1.Workspace) {
ws.Status.Conditions = wsk8s.AddUniqueCondition(ws.Status.Conditions, metav1.Condition{
Type: string(workspacev1.WorkspaceConditionContentReady),
Status: metav1.ConditionTrue,
Reason: "InitializationSuccess",
Reason: workspacev1.ReasonInitializationSuccess,
LastTransitionTime: metav1.Now(),
})
})
Expand Down