Skip to content

[ws-manager-mk2] Emit events for workspaces and snapshots #16907

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion components/ws-daemon/pkg/controller/snapshot_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"context"
"fmt"

corev1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/retry"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -25,14 +27,16 @@ type SnapshotReconciler struct {
maxConcurrentReconciles int
nodeName string
operations *WorkspaceOperations
recorder record.EventRecorder
}

func NewSnapshotController(c client.Client, nodeName string, maxConcurrentReconciles int, wso *WorkspaceOperations) *SnapshotReconciler {
func NewSnapshotController(c client.Client, recorder record.EventRecorder, nodeName string, maxConcurrentReconciles int, wso *WorkspaceOperations) *SnapshotReconciler {
return &SnapshotReconciler{
Client: c,
maxConcurrentReconciles: maxConcurrentReconciles,
nodeName: nodeName,
operations: wso,
recorder: recorder,
}
}

Expand Down Expand Up @@ -106,6 +110,10 @@ func (ssc *SnapshotReconciler) Reconcile(ctx context.Context, req ctrl.Request)
}

snapshotErr = ssc.operations.TakeSnapshot(ctx, snapshot.Spec.WorkspaceID, snapshotName)
if snapshotErr != nil {
log.Error(snapshotErr, "could not take snapshot", "workspace", snapshot.Spec.WorkspaceID)
}

err = retry.RetryOnConflict(retryParams, func() error {
err := ssc.Client.Get(ctx, req.NamespacedName, &snapshot)
if err != nil {
Expand All @@ -124,5 +132,20 @@ func (ssc *SnapshotReconciler) Reconcile(ctx context.Context, req ctrl.Request)
log.Error(err, "could not set completion status for snapshot", "workspace", snapshot.Spec.WorkspaceID)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unrelated to the PR, but would be nice to log the snapshotErr here if it fails to update the status

}

ssc.emitEvent(&snapshot, snapshotErr)
return ctrl.Result{}, err
}

func (ssc *SnapshotReconciler) emitEvent(s *workspacev1.Snapshot, failure error) {
eventType := corev1.EventTypeNormal
reason := "Succeeded"
message := ""

if failure != nil {
eventType = corev1.EventTypeWarning
reason = "Failed"
message = failure.Error()
}

ssc.recorder.Event(s, eventType, reason, message)
}
13 changes: 12 additions & 1 deletion components/ws-daemon/pkg/controller/workspace_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/retry"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -58,9 +59,10 @@ type WorkspaceController struct {
operations *WorkspaceOperations
metrics *workspaceMetrics
secretNamespace string
recorder record.EventRecorder
}

func NewWorkspaceController(c client.Client, nodeName, secretNamespace string, maxConcurrentReconciles int, ops *WorkspaceOperations, reg prometheus.Registerer) (*WorkspaceController, error) {
func NewWorkspaceController(c client.Client, recorder record.EventRecorder, nodeName, secretNamespace string, maxConcurrentReconciles int, ops *WorkspaceOperations, reg prometheus.Registerer) (*WorkspaceController, error) {
metrics := newWorkspaceMetrics()
reg.Register(metrics)

Expand All @@ -71,6 +73,7 @@ func NewWorkspaceController(c client.Client, nodeName, secretNamespace string, m
operations: ops,
metrics: metrics,
secretNamespace: secretNamespace,
recorder: recorder,
}, nil
}

Expand Down Expand Up @@ -182,6 +185,7 @@ func (wsc *WorkspaceController) handleWorkspaceInit(ctx context.Context, ws *wor
wsc.metrics.recordInitializeTime(time.Since(initStart).Seconds(), ws)
}

wsc.emitEvent(ws, "Content init", initErr)
return ctrl.Result{}, err
}

Expand Down Expand Up @@ -277,6 +281,7 @@ func (wsc *WorkspaceController) handleWorkspaceStop(ctx context.Context, ws *wor
wsc.metrics.recordFinalizeTime(time.Since(disposeStart).Seconds(), ws)
}

wsc.emitEvent(ws, "Backup", disposeErr)
return ctrl.Result{}, err
}

Expand All @@ -301,6 +306,12 @@ func (wsc *WorkspaceController) prepareInitializer(ctx context.Context, ws *work
return &init, nil
}

func (wsc *WorkspaceController) emitEvent(ws *workspacev1.Workspace, operation string, failure error) {
if failure != nil {
wsc.recorder.Eventf(ws, corev1.EventTypeWarning, "Failed", "%s failed: %s", operation, failure.Error())
}
}

func toWorkspaceGitStatus(status *csapi.GitStatus) *workspacev1.GitStatus {
if status == nil {
return nil
Expand Down
5 changes: 3 additions & 2 deletions components/ws-daemon/pkg/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ func NewDaemon(config Config) (*Daemon, error) {
}

wsctrl, err := controller.NewWorkspaceController(
mgr.GetClient(), nodename, config.Runtime.SecretsNamespace, config.WorkspaceController.MaxConcurrentReconciles, workspaceOps, wrappedReg)
mgr.GetClient(), mgr.GetEventRecorderFor("workspace"), nodename, config.Runtime.SecretsNamespace, config.WorkspaceController.MaxConcurrentReconciles, workspaceOps, wrappedReg)
if err != nil {
return nil, err
}
Expand All @@ -219,7 +219,8 @@ func NewDaemon(config Config) (*Daemon, error) {
return nil, err
}

ssctrl := controller.NewSnapshotController(mgr.GetClient(), nodename, config.WorkspaceController.MaxConcurrentReconciles, workspaceOps)
ssctrl := controller.NewSnapshotController(
mgr.GetClient(), mgr.GetEventRecorderFor("snapshot"), nodename, config.WorkspaceController.MaxConcurrentReconciles, workspaceOps)
err = ssctrl.SetupWithManager(mgr)
if err != nil {
return nil, err
Expand Down
2 changes: 1 addition & 1 deletion components/ws-manager-api/go/crd/v1/snapshot_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ type SnapshotStatus struct {
// +kubebuilder:validation:Optional
URL string `json:"url,omitempty"`

// Completed indicates if the snapshot operation has completed either by taking the snapshot or through failure
// Completed indicates if the snapshot operation has completed either by taking the snapshot or due to failure
// +kubebuilder:validation:Required
Completed bool `json:"completed"`
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ spec:
properties:
completed:
description: Completed indicates if the snapshot operation has completed
either by taking the snapshot or through failure
either by taking the snapshot or due to failure
type: boolean
error:
description: Erorr is the error observed during snapshot creation
Expand Down
4 changes: 3 additions & 1 deletion components/ws-manager-mk2/controllers/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ const (
containerUnknownExitCode = 255
)

func updateWorkspaceStatus(ctx context.Context, workspace *workspacev1.Workspace, pods corev1.PodList, cfg *config.Configuration) error {
func (r *WorkspaceReconciler) updateWorkspaceStatus(ctx context.Context, workspace *workspacev1.Workspace, pods corev1.PodList, cfg *config.Configuration) error {
log := log.FromContext(ctx)

switch len(pods.Items) {
Expand Down Expand Up @@ -104,6 +104,8 @@ func updateWorkspaceStatus(ctx context.Context, workspace *workspacev1.Workspace
LastTransitionTime: metav1.Now(),
Message: failure,
})

r.Recorder.Event(workspace, corev1.EventTypeWarning, "Failed", failure)
}

switch {
Expand Down
2 changes: 1 addition & 1 deletion components/ws-manager-mk2/controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ var _ = BeforeSuite(func() {
Expect(err).ToNot(HaveOccurred())

conf := newTestConfig()
wsReconciler, err := NewWorkspaceReconciler(k8sManager.GetClient(), k8sManager.GetScheme(), &conf, metrics.Registry, &fakeMaintenance{enabled: false})
wsReconciler, err := NewWorkspaceReconciler(k8sManager.GetClient(), k8sManager.GetScheme(), k8sManager.GetEventRecorderFor("workspace"), &conf, metrics.Registry, &fakeMaintenance{enabled: false})
wsMetrics = wsReconciler.metrics
Expect(err).ToNot(HaveOccurred())
Expect(wsReconciler.SetupWithManager(k8sManager)).To(Succeed())
Expand Down
26 changes: 24 additions & 2 deletions components/ws-manager-mk2/controllers/workspace_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
Expand All @@ -37,12 +38,13 @@ const (
maintenanceRequeue = 1 * time.Minute
)

func NewWorkspaceReconciler(c client.Client, scheme *runtime.Scheme, cfg *config.Configuration, reg prometheus.Registerer, maintenance maintenance.Maintenance) (*WorkspaceReconciler, error) {
func NewWorkspaceReconciler(c client.Client, scheme *runtime.Scheme, recorder record.EventRecorder, cfg *config.Configuration, reg prometheus.Registerer, maintenance maintenance.Maintenance) (*WorkspaceReconciler, error) {
reconciler := &WorkspaceReconciler{
Client: c,
Scheme: scheme,
Config: cfg,
maintenance: maintenance,
Recorder: recorder,
}

metrics, err := newControllerMetrics(reconciler)
Expand All @@ -63,6 +65,7 @@ type WorkspaceReconciler struct {
Config *config.Configuration
metrics *controllerMetrics
maintenance maintenance.Maintenance
Recorder record.EventRecorder
OnReconcile func(ctx context.Context, ws *workspacev1.Workspace)
}

Expand Down Expand Up @@ -114,12 +117,14 @@ func (r *WorkspaceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
return ctrl.Result{}, err
}

err = updateWorkspaceStatus(ctx, &workspace, workspacePods, r.Config)
oldStatus := workspace.Status.DeepCopy()
err = r.updateWorkspaceStatus(ctx, &workspace, workspacePods, r.Config)
if err != nil {
return ctrl.Result{}, err
}

r.updateMetrics(ctx, &workspace)
r.emitPhaseEvents(ctx, &workspace, oldStatus)

log.V(1).Info("updated workspace status", "status", workspace.Status)
err = r.Status().Update(ctx, &workspace)
Expand Down Expand Up @@ -185,6 +190,8 @@ func (r *WorkspaceReconciler) actOnStatus(ctx context.Context, workspace *worksp
log.Error(err, "Failed to patch PodStarts in workspace status")
return ctrl.Result{}, err
}

r.Recorder.Event(workspace, corev1.EventTypeNormal, "Creating", "")
}
r.metrics.rememberWorkspace(workspace, nil)

Expand All @@ -203,6 +210,7 @@ func (r *WorkspaceReconciler) actOnStatus(ctx context.Context, workspace *worksp

// Workspace might have already been in a deleting state,
// but not guaranteed, so try deleting anyway.
r.Recorder.Event(workspace, corev1.EventTypeNormal, "Deleting", "")
err := r.Client.Delete(ctx, workspace)
return ctrl.Result{}, client.IgnoreNotFound(err)
}
Expand Down Expand Up @@ -340,6 +348,20 @@ func (r *WorkspaceReconciler) updateMetrics(ctx context.Context, workspace *work
r.metrics.rememberWorkspace(workspace, &lastState)
}

func (r *WorkspaceReconciler) emitPhaseEvents(ctx context.Context, ws *workspacev1.Workspace, old *workspacev1.WorkspaceStatus) {
if ws.Status.Phase == workspacev1.WorkspacePhaseInitializing && old.Phase != workspacev1.WorkspacePhaseInitializing {
r.Recorder.Event(ws, corev1.EventTypeNormal, "Initializing", "")
}

if ws.Status.Phase == workspacev1.WorkspacePhaseRunning && old.Phase != workspacev1.WorkspacePhaseRunning {
r.Recorder.Event(ws, corev1.EventTypeNormal, "Running", "")
}

if ws.Status.Phase == workspacev1.WorkspacePhaseStopping && old.Phase != workspacev1.WorkspacePhaseStopping {
r.Recorder.Event(ws, corev1.EventTypeNormal, "Stopping", "")
}
}

func (r *WorkspaceReconciler) deleteWorkspacePod(ctx context.Context, pod *corev1.Pod, reason string) (ctrl.Result, error) {
log := log.FromContext(ctx).WithValues("workspace", pod.Name, "reason", reason)
log.V(1).Info("deleting workspace pod")
Expand Down
13 changes: 7 additions & 6 deletions components/ws-manager-mk2/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,14 @@ func main() {
os.Exit(1)
}

maintenance, err := controllers.NewMaintenanceReconciler(mgr.GetClient())
maintenanceReconciler, err := controllers.NewMaintenanceReconciler(mgr.GetClient())
if err != nil {
setupLog.Error(err, "unable to create maintenance controller", "controller", "Maintenance")
os.Exit(1)
}

reconciler, err := controllers.NewWorkspaceReconciler(mgr.GetClient(), mgr.GetScheme(), &cfg.Manager, metrics.Registry, maintenance)
workspaceReconciler, err := controllers.NewWorkspaceReconciler(
mgr.GetClient(), mgr.GetScheme(), mgr.GetEventRecorderFor("workspace"), &cfg.Manager, metrics.Registry, maintenanceReconciler)
if err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Workspace")
os.Exit(1)
Expand All @@ -132,22 +133,22 @@ func main() {
os.Exit(1)
}

wsmanService, err := setupGRPCService(cfg, mgr.GetClient(), activity, maintenance)
wsmanService, err := setupGRPCService(cfg, mgr.GetClient(), activity, maintenanceReconciler)
if err != nil {
setupLog.Error(err, "unable to start manager service")
os.Exit(1)
}

reconciler.OnReconcile = wsmanService.OnWorkspaceReconcile
if err = reconciler.SetupWithManager(mgr); err != nil {
workspaceReconciler.OnReconcile = wsmanService.OnWorkspaceReconcile
if err = workspaceReconciler.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to setup workspace controller with manager", "controller", "Workspace")
os.Exit(1)
}
if err = timeoutReconciler.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to setup timeout controller with manager", "controller", "Timeout")
os.Exit(1)
}
if err = maintenance.SetupWithManager(mgr); err != nil {
if err = maintenanceReconciler.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to setup maintenance controller with manager", "controller", "Maintenance")
os.Exit(1)
}
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions install/installer/cmd/testdata/render/aws-setup/output.golden

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions install/installer/cmd/testdata/render/gcp-setup/output.golden

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading