Skip to content

Commit 116a5b9

Browse files
authored
[ws-manager-mk2] Support workspace snapshots (#16471)
* [ws-manager-mk2] Scaffold snapshot CRD * [ws-mangaer-mk2] Generate snapshot type * [ws-manger-mk2] Provide snapshot endpoint * [ws-daemon] Handle snapshots * [ws-manager-mk2] Harmonize imports * [ws-manger-mk2] Set owner reference * [installer] Install snapshot CRD * [ws-manager-api] Fix descriptions * [ws-manager-mk2] Review comments - Name controller - Return early in case of error - Reduce scope of permissions
1 parent 733c37b commit 116a5b9

File tree

39 files changed

+1044
-79
lines changed

39 files changed

+1044
-79
lines changed
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
// Copyright (c) 2022 Gitpod GmbH. All rights reserved.
2+
// Licensed under the GNU Affero General Public License (AGPL).
3+
// See License-AGPL.txt in the project root for license information.
4+
5+
package controller
6+
7+
import (
8+
"context"
9+
"fmt"
10+
11+
"k8s.io/client-go/util/retry"
12+
ctrl "sigs.k8s.io/controller-runtime"
13+
"sigs.k8s.io/controller-runtime/pkg/client"
14+
"sigs.k8s.io/controller-runtime/pkg/event"
15+
"sigs.k8s.io/controller-runtime/pkg/log"
16+
"sigs.k8s.io/controller-runtime/pkg/predicate"
17+
18+
workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"
19+
)
20+
21+
// SnapshotReconciler reconciles a Snapshot object
22+
type SnapshotReconciler struct {
23+
client.Client
24+
nodeName string
25+
operations *WorkspaceOperations
26+
}
27+
28+
func NewSnapshotController(c client.Client, nodeName string, wso *WorkspaceOperations) *SnapshotReconciler {
29+
return &SnapshotReconciler{
30+
Client: c,
31+
nodeName: nodeName,
32+
operations: wso,
33+
}
34+
}
35+
36+
// SetupWithManager sets up the controller with the Manager.
37+
func (r *SnapshotReconciler) SetupWithManager(mgr ctrl.Manager) error {
38+
return ctrl.NewControllerManagedBy(mgr).
39+
Named("snapshot").
40+
For(&workspacev1.Snapshot{}).
41+
WithEventFilter(snapshotEventFilter(r.nodeName)).
42+
Complete(r)
43+
}
44+
45+
func snapshotEventFilter(nodeName string) predicate.Predicate {
46+
return predicate.Funcs{
47+
CreateFunc: func(e event.CreateEvent) bool {
48+
if ss, ok := e.Object.(*workspacev1.Snapshot); ok {
49+
return ss.Spec.NodeName == nodeName
50+
}
51+
return false
52+
},
53+
UpdateFunc: func(ue event.UpdateEvent) bool {
54+
return false
55+
},
56+
DeleteFunc: func(de event.DeleteEvent) bool {
57+
return false
58+
},
59+
}
60+
}
61+
62+
//+kubebuilder:rbac:groups=workspace.gitpod.io,resources=snapshots,verbs=get;list;watch;create;update;patch;delete
63+
//+kubebuilder:rbac:groups=workspace.gitpod.io,resources=snapshots/status,verbs=get;update;patch
64+
//+kubebuilder:rbac:groups=workspace.gitpod.io,resources=snapshots/finalizers,verbs=update
65+
66+
// Reconcile is part of the main kubernetes reconciliation loop which aims to
67+
// move the current state of the cluster closer to the desired state.
68+
// For more details, check Reconcile and its Result here:
69+
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/reconcile
70+
func (ssc *SnapshotReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
71+
log := log.FromContext(ctx)
72+
73+
var snapshot workspacev1.Snapshot
74+
if err := ssc.Client.Get(ctx, req.NamespacedName, &snapshot); err != nil {
75+
return ctrl.Result{}, client.IgnoreNotFound(err)
76+
}
77+
78+
if snapshot.Status.Completed {
79+
return ctrl.Result{}, nil
80+
}
81+
82+
snapshotURL, snapshotName, snapshotErr := ssc.operations.SnapshotIDs(snapshot.Spec.WorkspaceID)
83+
if snapshotErr != nil {
84+
return ctrl.Result{}, snapshotErr
85+
}
86+
87+
err := retry.RetryOnConflict(retryParams, func() error {
88+
err := ssc.Client.Get(ctx, req.NamespacedName, &snapshot)
89+
if err != nil {
90+
return err
91+
}
92+
93+
snapshot.Status.URL = snapshotURL
94+
return ssc.Client.Status().Update(ctx, &snapshot)
95+
})
96+
97+
if err != nil {
98+
log.Error(err, "could not set snapshot url", "workspace", snapshot.Spec.WorkspaceID)
99+
return ctrl.Result{}, err
100+
}
101+
102+
snapshotErr = ssc.operations.TakeSnapshot(ctx, snapshot.Spec.WorkspaceID, snapshotName)
103+
err = retry.RetryOnConflict(retryParams, func() error {
104+
err := ssc.Client.Get(ctx, req.NamespacedName, &snapshot)
105+
if err != nil {
106+
return err
107+
}
108+
109+
snapshot.Status.Completed = true
110+
if snapshotErr != nil {
111+
snapshot.Status.Error = fmt.Errorf("could not take snapshot: %w", snapshotErr).Error()
112+
}
113+
114+
return ssc.Status().Update(ctx, &snapshot)
115+
})
116+
117+
if err != nil {
118+
log.Error(err, "could not set completion status for snapshot", "workspace", snapshot.Spec.WorkspaceID)
119+
}
120+
121+
return ctrl.Result{}, err
122+
}

components/ws-daemon/pkg/controller/controller.go renamed to components/ws-daemon/pkg/controller/workspace_controller.go

Lines changed: 5 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@ import (
1515
csapi "github.com/gitpod-io/gitpod/content-service/api"
1616
"github.com/gitpod-io/gitpod/ws-daemon/pkg/container"
1717
"github.com/gitpod-io/gitpod/ws-daemon/pkg/content"
18-
"github.com/gitpod-io/gitpod/ws-daemon/pkg/internal/session"
1918
"github.com/gitpod-io/gitpod/ws-daemon/pkg/iws"
20-
"github.com/gitpod-io/gitpod/ws-daemon/pkg/quota"
2119
workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"
2220
"github.com/opentracing/opentracing-go"
2321
"github.com/prometheus/client_golang/prometheus"
@@ -52,40 +50,18 @@ type WorkspaceControllerOpts struct {
5250
type WorkspaceController struct {
5351
client.Client
5452
NodeName string
55-
opts *WorkspaceControllerOpts
56-
operations WorkspaceOperations
53+
operations *WorkspaceOperations
5754
metrics *workspaceMetrics
5855
}
5956

60-
func NewWorkspaceController(c client.Client, opts WorkspaceControllerOpts) (*WorkspaceController, error) {
61-
xfs, err := quota.NewXFS(opts.ContentConfig.WorkingArea)
62-
if err != nil {
63-
return nil, err
64-
}
65-
store, err := session.NewStore(context.Background(), opts.ContentConfig.WorkingArea, content.WorkspaceLifecycleHooks(
66-
opts.ContentConfig,
67-
func(instanceID string) bool { return true },
68-
&iws.Uidmapper{Config: opts.UIDMapperConfig, Runtime: opts.ContainerRuntime},
69-
xfs,
70-
opts.CGroupMountPoint,
71-
))
72-
if err != nil {
73-
return nil, err
74-
}
75-
57+
func NewWorkspaceController(c client.Client, nodeName string, ops *WorkspaceOperations, reg prometheus.Registerer) (*WorkspaceController, error) {
7658
metrics := newWorkspaceMetrics()
77-
opts.MetricsRegistry.Register(metrics)
78-
79-
ops, err := NewWorkspaceOperations(opts.ContentConfig, store, opts.MetricsRegistry)
80-
if err != nil {
81-
return nil, err
82-
}
59+
reg.Register(metrics)
8360

8461
return &WorkspaceController{
8562
Client: c,
86-
NodeName: opts.NodeName,
87-
opts: &opts,
88-
operations: *ops,
63+
NodeName: nodeName,
64+
operations: ops,
8965
metrics: metrics,
9066
}, nil
9167
}

components/ws-daemon/pkg/controller/workspace_operations.go

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ func (wso *WorkspaceOperations) DisposeWorkspace(ctx context.Context, opts Dispo
200200
}
201201
}
202202

203-
err = wso.uploadWorkspaceContent(ctx, sess)
203+
err = wso.uploadWorkspaceContent(ctx, sess, storage.DefaultBackup)
204204
if err != nil {
205205
return false, nil, xerrors.Errorf("final backup failed for workspace %s", opts.Meta.InstanceId)
206206
}
@@ -227,6 +227,50 @@ func (wso *WorkspaceOperations) DisposeWorkspace(ctx context.Context, opts Dispo
227227
return false, repo, nil
228228
}
229229

230+
func (wso *WorkspaceOperations) SnapshotIDs(workspaceID string) (snapshotUrl, snapshotName string, err error) {
231+
sess := wso.store.Get(workspaceID)
232+
if sess == nil {
233+
return "", "", fmt.Errorf("cannot find workspace %s during SnapshotName", workspaceID)
234+
}
235+
236+
baseName := fmt.Sprintf("snapshot-%d", time.Now().UnixNano())
237+
snapshotName = baseName + ".tar"
238+
239+
rs, ok := sess.NonPersistentAttrs[session.AttrRemoteStorage].(storage.DirectAccess)
240+
if rs == nil || !ok {
241+
return "", "", fmt.Errorf("no remote storage configured")
242+
}
243+
244+
return rs.Qualify(snapshotName), snapshotName, nil
245+
}
246+
247+
func (wso *WorkspaceOperations) TakeSnapshot(ctx context.Context, workspaceID, snapshotName string) (err error) {
248+
//nolint:ineffassign
249+
span, ctx := opentracing.StartSpanFromContext(ctx, "TakeSnapshot")
250+
span.SetTag("workspace", workspaceID)
251+
defer tracing.FinishSpan(span, &err)
252+
253+
if workspaceID == "" {
254+
return fmt.Errorf("workspaceID is required")
255+
}
256+
257+
sess := wso.store.Get(workspaceID)
258+
if sess == nil {
259+
return fmt.Errorf("cannot find workspace %s during DisposeWorkspace", workspaceID)
260+
}
261+
262+
if sess.RemoteStorageDisabled {
263+
return fmt.Errorf("workspace has no remote storage")
264+
}
265+
266+
err = wso.uploadWorkspaceContent(ctx, sess, snapshotName)
267+
if err != nil {
268+
return fmt.Errorf("snapshot failed for workspace %s", workspaceID)
269+
}
270+
271+
return nil
272+
}
273+
230274
func (wso *WorkspaceOperations) uploadWorkspaceLogs(ctx context.Context, opts DisposeOptions) (err error) {
231275
// currently we're only uploading prebuild log files
232276
logFiles, err := logs.ListPrebuildLogFiles(ctx, opts.WorkspaceLocation)
@@ -271,8 +315,7 @@ func (wso *WorkspaceOperations) uploadWorkspaceLogs(ctx context.Context, opts Di
271315
return err
272316
}
273317

274-
func (wso *WorkspaceOperations) uploadWorkspaceContent(ctx context.Context, sess *session.Workspace) error {
275-
var backupName = storage.DefaultBackup
318+
func (wso *WorkspaceOperations) uploadWorkspaceContent(ctx context.Context, sess *session.Workspace, backupName string) error {
276319
// Avoid too many simultaneous backups in order to avoid excessive memory utilization.
277320
var timedOut bool
278321
waitStart := time.Now()

components/ws-daemon/pkg/daemon/daemon.go

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,10 @@ import (
3535
"github.com/gitpod-io/gitpod/ws-daemon/pkg/diskguard"
3636
"github.com/gitpod-io/gitpod/ws-daemon/pkg/dispatch"
3737
"github.com/gitpod-io/gitpod/ws-daemon/pkg/hosts"
38+
"github.com/gitpod-io/gitpod/ws-daemon/pkg/internal/session"
3839
"github.com/gitpod-io/gitpod/ws-daemon/pkg/iws"
3940
"github.com/gitpod-io/gitpod/ws-daemon/pkg/netlimit"
41+
"github.com/gitpod-io/gitpod/ws-daemon/pkg/quota"
4042
)
4143

4244
var (
@@ -185,21 +187,41 @@ func NewDaemon(config Config) (*Daemon, error) {
185187
contentCfg.WorkingArea += config.WorkspaceController.WorkingAreaSuffix
186188
contentCfg.WorkingAreaNode += config.WorkspaceController.WorkingAreaSuffix
187189

188-
wsctrl, err := controller.NewWorkspaceController(mgr.GetClient(), controller.WorkspaceControllerOpts{
189-
NodeName: nodename,
190-
ContentConfig: contentCfg,
191-
UIDMapperConfig: config.Uidmapper,
192-
ContainerRuntime: containerRuntime,
193-
CGroupMountPoint: config.CPULimit.CGroupBasePath,
194-
MetricsRegistry: wrappedReg,
195-
})
190+
xfs, err := quota.NewXFS(contentCfg.WorkingArea)
191+
if err != nil {
192+
return nil, err
193+
}
194+
195+
store, err := session.NewStore(context.Background(), contentCfg.WorkingArea, content.WorkspaceLifecycleHooks(
196+
contentCfg,
197+
func(instanceID string) bool { return true },
198+
&iws.Uidmapper{Config: config.Uidmapper, Runtime: containerRuntime},
199+
xfs,
200+
config.CPULimit.CGroupBasePath,
201+
))
202+
if err != nil {
203+
return nil, err
204+
}
205+
206+
workspaceOps, err := controller.NewWorkspaceOperations(contentCfg, store, wrappedReg)
207+
if err != nil {
208+
return nil, err
209+
}
210+
211+
wsctrl, err := controller.NewWorkspaceController(mgr.GetClient(), nodename, workspaceOps, wrappedReg)
196212
if err != nil {
197213
return nil, err
198214
}
199215
err = wsctrl.SetupWithManager(mgr)
200216
if err != nil {
201217
return nil, err
202218
}
219+
220+
ssctrl := controller.NewSnapshotController(mgr.GetClient(), nodename, workspaceOps)
221+
err = ssctrl.SetupWithManager(mgr)
222+
if err != nil {
223+
return nil, err
224+
}
203225
}
204226

205227
dsptch, err := dispatch.NewDispatch(containerRuntime, clientset, config.Runtime.KubernetesNamespace, nodename, listener...)
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
// Copyright (c) 2022 Gitpod GmbH. All rights reserved.
2+
// Licensed under the GNU Affero General Public License (AGPL).
3+
// See License-AGPL.txt in the project root for license information.
4+
5+
package v1
6+
7+
import (
8+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9+
)
10+
11+
// SnapshotSpec defines the desired state of the snapshot
12+
type SnapshotSpec struct {
13+
// +kubebuilder:validation:Required
14+
NodeName string `json:"nodeName"`
15+
16+
// +kubebuilder:validation:Required
17+
WorkspaceID string `json:"workspaceID"`
18+
}
19+
20+
// SnapshotStatus defines the observed state of the snapshot
21+
type SnapshotStatus struct {
22+
// // +kubebuilder:validation:Optional
23+
// Conditions []metav1.Condition `json:"conditions"`
24+
25+
// Erorr is the error observed during snapshot creation if any
26+
// +kubebuilder:validation:Optional
27+
Error string `json:"error,omitempty"`
28+
29+
// URL contains the url of the snapshot
30+
// +kubebuilder:validation:Optional
31+
URL string `json:"url,omitempty"`
32+
33+
// Completed indicates if the snapshot operation has completed either by taking the snapshot or through failure
34+
// +kubebuilder:validation:Required
35+
Completed bool `json:"completed"`
36+
}
37+
38+
//+kubebuilder:object:root=true
39+
//+kubebuilder:subresource:status
40+
//+kubebuilder:resource:shortName=snapshot
41+
// Custom print columns on the Custom Resource Definition. These are the columns
42+
// showing up when doing e.g. `kubectl get snapshots`.
43+
// Columns with priority > 0 will only show up with `-o wide`.
44+
//+kubebuilder:printcolumn:name="Workspace",type="string",JSONPath=".spec.workspaceID"
45+
//+kubebuilder:printcolumn:name="URL",type="string",JSONPath=".status.url",priority=10
46+
//+kubebuilder:printcolumn:name="Completed",type="boolean",JSONPath=".status.completed"
47+
48+
// Snapshot is the Schema for the snapshot API
49+
type Snapshot struct {
50+
metav1.TypeMeta `json:",inline"`
51+
metav1.ObjectMeta `json:"metadata,omitempty"`
52+
53+
Spec SnapshotSpec `json:"spec,omitempty"`
54+
Status SnapshotStatus `json:"status,omitempty"`
55+
}
56+
57+
//+kubebuilder:object:root=true
58+
59+
// SnapshotList contains a list of Snapshots
60+
type SnapshotList struct {
61+
metav1.TypeMeta `json:",inline"`
62+
metav1.ListMeta `json:"metadata,omitempty"`
63+
Items []Snapshot `json:"items"`
64+
}
65+
66+
func init() {
67+
SchemeBuilder.Register(&Snapshot{}, &SnapshotList{})
68+
}

0 commit comments

Comments
 (0)