From 57c44ea0f5aab17ba98052dff868c75dce2d9198 Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Tue, 20 Jan 2026 17:37:41 +0100 Subject: [PATCH 01/26] add create and delete snapshot capability --- driver/controller.go | 303 +++++++++++++++++++++++++++++++++++++++--- driver/driver_test.go | 118 +++++++++++++++- go.mod | 7 +- go.sum | 10 +- 4 files changed, 410 insertions(+), 28 deletions(-) diff --git a/driver/controller.go b/driver/controller.go index 5d3600f9..6030b8b5 100644 --- a/driver/controller.go +++ b/driver/controller.go @@ -24,13 +24,14 @@ import ( "regexp" "strconv" "strings" + "time" "github.com/cloudscale-ch/cloudscale-go-sdk/v6" "github.com/container-storage-interface/spec/lib/go/csi" "github.com/sirupsen/logrus" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" - + "google.golang.org/protobuf/types/known/timestamppb" "k8s.io/apimachinery/pkg/util/sets" ) @@ -85,6 +86,15 @@ func (d *Driver) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest) return nil, status.Error(codes.InvalidArgument, fmt.Sprintf("volume capabilities cannot be satisified: %s", strings.Join(violations, "; "))) } + if req.GetVolumeContentSource() != nil { + if sourceSnapshot := req.GetVolumeContentSource().GetSnapshot(); sourceSnapshot != nil { + return d.createVolumeFromSnapshot(ctx, req, sourceSnapshot) + } + if sourceVolume := req.GetVolumeContentSource().GetVolume(); sourceVolume != nil { + return nil, status.Error(codes.Unimplemented, "volume cloning is not yet supported") + } + } + if req.AccessibilityRequirements != nil { for _, t := range req.AccessibilityRequirements.Requisite { zone, ok := t.Segments[topologyZonePrefix] @@ -173,7 +183,7 @@ func (d *Driver) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest) return &csi.CreateVolumeResponse{Volume: &csiVolume}, nil } - volumeReq := &cloudscale.VolumeRequest{ + volumeReq := &cloudscale.VolumeCreateRequest{ Name: volumeName, SizeGB: sizeGB, Type: storageType, @@ -193,6 +203,170 @@ func (d *Driver) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest) return resp, nil } +// createVolumeFromSnapshot handles volume creation from an existing snapshot +func (d *Driver) createVolumeFromSnapshot(ctx context.Context, req *csi.CreateVolumeRequest, sourceSnapshot *csi.VolumeContentSource_SnapshotSource) (*csi.CreateVolumeResponse, error) { + sourceSnapshotID := sourceSnapshot.GetSnapshotId() + if sourceSnapshotID == "" { + return nil, status.Error(codes.InvalidArgument, "snapshotID must be provided in volume content source") + } + + volumeName := req.Name + + ll := d.log.WithFields(logrus.Fields{ + "volume_name": volumeName, + "source_snapshot_id": sourceSnapshotID, + "method": "create_volume_from_snapshot", + }) + ll.Info("create volume from snapshot called") + + // Verify snapshot exists and get its properties, must return NotFound when snapshot does not exist. + snapshot, err := d.cloudscaleClient.VolumeSnapshots.Get(ctx, sourceSnapshotID) + if err != nil { + errorResponse, ok := err.(*cloudscale.ErrorResponse) + if ok { + if errorResponse.StatusCode == http.StatusNotFound { + return nil, status.Errorf(codes.NotFound, "source snapshot %s not found", sourceSnapshotID) + } + } + return nil, status.Errorf(codes.Internal, "failed to get source snapshot: %v", err) + } + + ll = ll.WithFields(logrus.Fields{ + "snapshot_size_gb": snapshot.SizeGB, + "snapshot_volume_type": snapshot.Volume.Type, + "snapshot_zone": snapshot.Zone, + }) + + // Validate capacity requirements + // CSI spec: restored volume must be at least as large as the snapshot + // Cloudscale only supports the same size as the snapshot + if req.CapacityRange != nil { + requiredBytes := req.CapacityRange.GetRequiredBytes() + if requiredBytes > 0 { + requiredGB := int(requiredBytes / GB) + if requiredGB < snapshot.SizeGB { + return nil, status.Errorf(codes.InvalidArgument, + "requested volume size (%d GB) is smaller than snapshot size (%d GB)", + requiredGB, snapshot.SizeGB) + } + if requiredGB > snapshot.SizeGB { + return nil, status.Errorf(codes.InvalidArgument, + "cloudscale.ch API does not support creating volumes larger than snapshot size during restore. "+ + "Create volume from snapshot first, then expand it using ControllerExpandVolume. "+ + "Requested: %d GB, Snapshot: %d GB", requiredGB, snapshot.SizeGB) + } + } + + // Validate limit if specified + limitBytes := req.CapacityRange.GetLimitBytes() + if limitBytes > 0 && int64(snapshot.SizeGB)*GB > limitBytes { + return nil, status.Errorf(codes.OutOfRange, + "snapshot size (%d GB) exceeds capacity limit (%d bytes)", + snapshot.SizeGB, limitBytes) + } + } + + // cloudscale does create the volume in the same zone as the snapshot. + if req.AccessibilityRequirements != nil { + for _, t := range req.AccessibilityRequirements.Requisite { + zone, ok := t.Segments[topologyZonePrefix] + if !ok { + continue + } + if zone != snapshot.Zone.Slug { + return nil, status.Errorf(codes.InvalidArgument, + "requested zone %s does not match snapshot zone %s", zone, snapshot.Zone) + } + } + } + + // cloudscale does not support to change storage type, so we warn if parameters are specified that will be ignored + if storageType := req.Parameters[StorageTypeAttribute]; storageType != "" && storageType != snapshot.Volume.Type { + ll.WithFields(logrus.Fields{ + "requested_type": storageType, + "snapshot_volume_type": snapshot.Volume.Type, + }).Warn("storage type parameter ignored when creating from snapshot") + } + + luksEncrypted := "false" + if req.Parameters[LuksEncryptedAttribute] == "true" { + if violations := validateLuksCapabilities(req.VolumeCapabilities); len(violations) > 0 { + return nil, status.Error(codes.InvalidArgument, fmt.Sprintf("volume capabilities cannot be satisified: %s", strings.Join(violations, "; "))) + } + luksEncrypted = "true" + } + + // Check if volume already exists + volumes, err := d.cloudscaleClient.Volumes.List(ctx, cloudscale.WithNameFilter(volumeName)) + if err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } + + csiVolume := csi.Volume{ + CapacityBytes: int64(snapshot.SizeGB) * GB, + AccessibleTopology: []*csi.Topology{ + { + Segments: map[string]string{ + topologyZonePrefix: d.zone, + }, + }, + }, + VolumeContext: map[string]string{ + PublishInfoVolumeName: volumeName, + LuksEncryptedAttribute: luksEncrypted, + }, + ContentSource: req.GetVolumeContentSource(), + } + + if luksEncrypted == "true" { + csiVolume.VolumeContext[LuksCipherAttribute] = req.Parameters[LuksCipherAttribute] + csiVolume.VolumeContext[LuksKeySizeAttribute] = req.Parameters[LuksKeySizeAttribute] + } + + // Volume already exists - validate it matches request + if len(volumes) != 0 { + if len(volumes) > 1 { + return nil, fmt.Errorf("fatal issue: duplicate volume %q exists", volumeName) + } + vol := volumes[0] + + if vol.SizeGB != snapshot.SizeGB { + return nil, status.Errorf(codes.AlreadyExists, + "volume %q already exists with size %d GB, but snapshot requires %d GB", + volumeName, vol.SizeGB, snapshot.SizeGB) + } + + if vol.Zone != snapshot.Zone { + return nil, status.Errorf(codes.AlreadyExists, + "volume %q already exists in zone %s, but snapshot is in zone %s", + volumeName, vol.Zone, snapshot.Zone) + } + + ll.Info("volume from snapshot already exists") + csiVolume.VolumeId = vol.UUID + return &csi.CreateVolumeResponse{Volume: &csiVolume}, nil + } + + // Create volume from snapshot + volumeReq := &cloudscale.VolumeCreateRequest{ + Name: volumeName, + VolumeSnapshotUUID: sourceSnapshotID, + // Size, Type, Zone are inherited from snapshot - do NOT set them + } + + ll.WithField("volume_req", volumeReq).Info("creating volume from snapshot") + vol, err := d.cloudscaleClient.Volumes.Create(ctx, volumeReq) + if err != nil { + return nil, status.Errorf(codes.Internal, "failed to create volume from snapshot: %v", err) + } + + csiVolume.VolumeId = vol.UUID + resp := &csi.CreateVolumeResponse{Volume: &csiVolume} + + ll.WithField("response", resp).Info("volume created from snapshot") + return resp, nil +} + // DeleteVolume deletes the given volume. The function is idempotent. func (d *Driver) DeleteVolume(ctx context.Context, req *csi.DeleteVolumeRequest) (*csi.DeleteVolumeResponse, error) { if req.VolumeId == "" { @@ -255,7 +429,7 @@ func (d *Driver) ControllerPublishVolume(ctx context.Context, req *csi.Controlle }) ll.Info("controller publish volume called") - attachRequest := &cloudscale.VolumeRequest{ + attachRequest := &cloudscale.VolumeUpdateRequest{ ServerUUIDs: &[]string{req.NodeId}, } err := d.cloudscaleClient.Volumes.Update(ctx, req.VolumeId, attachRequest) @@ -329,7 +503,7 @@ func (d *Driver) ControllerUnpublishVolume(ctx context.Context, req *csi.Control ll.Info("Volume is attached to node given in request or NodeID in request is not set.") - detachRequest := &cloudscale.VolumeRequest{ + detachRequest := &cloudscale.VolumeUpdateRequest{ ServerUUIDs: &[]string{}, } err = d.cloudscaleClient.Volumes.Update(ctx, req.VolumeId, detachRequest) @@ -451,9 +625,7 @@ func (d *Driver) ControllerGetCapabilities(ctx context.Context, req *csi.Control csi.ControllerServiceCapability_RPC_PUBLISH_UNPUBLISH_VOLUME, csi.ControllerServiceCapability_RPC_LIST_VOLUMES, csi.ControllerServiceCapability_RPC_EXPAND_VOLUME, - - // TODO(arslan): enable once snapshotting is supported - // csi.ControllerServiceCapability_RPC_CREATE_DELETE_SNAPSHOT, + csi.ControllerServiceCapability_RPC_CREATE_DELETE_SNAPSHOT, // csi.ControllerServiceCapability_RPC_LIST_SNAPSHOTS, // TODO: check if this can be implemented @@ -476,20 +648,113 @@ func (d *Driver) ControllerGetCapabilities(ctx context.Context, req *csi.Control // CreateSnapshot will be called by the CO to create a new snapshot from a // source volume on behalf of a user. func (d *Driver) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequest) (*csi.CreateSnapshotResponse, error) { - d.log.WithFields(logrus.Fields{ - "req": req, - "method": "create_snapshot", - }).Warn("create snapshot is not implemented") - return nil, status.Error(codes.Unimplemented, "") + if req.Name == "" { + return nil, status.Error(codes.InvalidArgument, "CreateSnapshotRequest Name must be provided") + } + + if req.SourceVolumeId == "" { + return nil, status.Error(codes.InvalidArgument, "CreateSnapshotRequest Source Volume Id must be provided") + } + + ll := d.log.WithFields(logrus.Fields{ + "source_volume_id": req.SourceVolumeId, + "name": req.Name, + "method": "create_snapshot", + }) + + ll.Info("find existing volume snapshots with same name") + snapshots, err := d.cloudscaleClient.VolumeSnapshots.List(ctx, cloudscale.WithNameFilter(req.Name)) + if err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } + + for _, snapshot := range snapshots { + if snapshot.Volume.UUID == req.SourceVolumeId { + creationTime := timestamppb.Now() + if snapshot.CreatedAt != "" { + if t, err := time.Parse(time.RFC3339, snapshot.CreatedAt); err == nil { + creationTime = timestamppb.New(t) + } + } + + return &csi.CreateSnapshotResponse{ + Snapshot: &csi.Snapshot{ + SnapshotId: snapshot.UUID, + SourceVolumeId: snapshot.Volume.UUID, + ReadyToUse: snapshot.Status == "available", + SizeBytes: int64(snapshot.SizeGB * GB), + CreationTime: creationTime, + }, + }, nil + } + return nil, status.Error(codes.AlreadyExists, "snapshot with this name already exists for another volume.") + } + + volumeSnapshotCreateRequest := &cloudscale.VolumeSnapshotCreateRequest{ + Name: req.Name, + SourceVolume: req.SourceVolumeId, + // todo: tags? + } + + ll.WithField("volume_snapshot_create_request", volumeSnapshotCreateRequest).Info("creating volume snapshot") + snapshot, err := d.cloudscaleClient.VolumeSnapshots.Create(ctx, volumeSnapshotCreateRequest) + if err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } + + creationTime := timestamppb.Now() + if snapshot.CreatedAt != "" { + if t, err := time.Parse(time.RFC3339, snapshot.CreatedAt); err == nil { + creationTime = timestamppb.New(t) + } + } + + resp := &csi.CreateSnapshotResponse{ + Snapshot: &csi.Snapshot{ + SnapshotId: snapshot.UUID, + SourceVolumeId: snapshot.Volume.UUID, + ReadyToUse: snapshot.Status == "available", // check status + SizeBytes: int64(snapshot.SizeGB * GB), + CreationTime: creationTime, + }, + } + + ll.WithField("response", resp).Info("volume snapshot created") + return resp, nil } -// DeleteSnapshost will be called by the CO to delete a snapshot. +// DeleteSnapshot will be called by the CO to delete a snapshot. func (d *Driver) DeleteSnapshot(ctx context.Context, req *csi.DeleteSnapshotRequest) (*csi.DeleteSnapshotResponse, error) { - d.log.WithFields(logrus.Fields{ - "req": req, - "method": "delete_snapshot", - }).Warn("delete snapshot is not implemented") - return nil, status.Error(codes.Unimplemented, "") + if req.SnapshotId == "" { + return nil, status.Error(codes.InvalidArgument, "DeleteSnapshot Snapshot ID must be provided") + } + + ll := d.log.WithFields(logrus.Fields{ + "snapshot_id": req.SnapshotId, + "method": "delete_snapshot", + }) + ll.Info("delete snapshot called") + + // todo: think through long running delete jobs + err := d.cloudscaleClient.VolumeSnapshots.Delete(ctx, req.SnapshotId) + if err != nil { + errorResponse, ok := err.(*cloudscale.ErrorResponse) + if ok { + if errorResponse.StatusCode == http.StatusNotFound { + // To make it idempotent, the volume might already have been + // deleted, so a 404 is ok. + ll.WithFields(logrus.Fields{ + "error": err, + "resp": errorResponse, + }).Warn("assuming snapshot is already deleted") + return &csi.DeleteSnapshotResponse{}, nil + } + } + return nil, err + } + + ll.Info("snapshot is deleted") + return &csi.DeleteSnapshotResponse{}, nil } // ListSnapshots returns the information about all snapshots on the storage @@ -538,7 +803,7 @@ func (d *Driver) ControllerExpandVolume(ctx context.Context, req *csi.Controller return &csi.ControllerExpandVolumeResponse{CapacityBytes: int64(volume.SizeGB) * GB, NodeExpansionRequired: true}, nil } - volumeReq := &cloudscale.VolumeRequest{ + volumeReq := &cloudscale.VolumeUpdateRequest{ SizeGB: resizeGigaBytes, } err = d.cloudscaleClient.Volumes.Update(ctx, volume.UUID, volumeReq) diff --git a/driver/driver_test.go b/driver/driver_test.go index e0cd7034..460f1d8e 100644 --- a/driver/driver_test.go +++ b/driver/driver_test.go @@ -106,6 +106,11 @@ func NewFakeClient(initialServers map[string]*cloudscale.Server) *cloudscale.Cli volumes: make(map[string]*cloudscale.Volume), } + fakeClient.VolumeSnapshots = &FakeVolumeSnapshotServiceOperations{ + fakeClient: fakeClient, + snapshots: make(map[string]*cloudscale.VolumeSnapshot), + } + return fakeClient } @@ -184,8 +189,13 @@ type FakeVolumeServiceOperations struct { volumes map[string]*cloudscale.Volume } -func (f FakeVolumeServiceOperations) Create(ctx context.Context, createRequest *cloudscale.VolumeRequest) (*cloudscale.Volume, error) { +func (f FakeVolumeServiceOperations) Create(ctx context.Context, createRequest *cloudscale.VolumeCreateRequest) (*cloudscale.Volume, error) { id := randString(32) + + // todo: CSI-test pass without this, but we could implement: + // - check if volumeSnapshot is present. Return error if volumeSnapshot does not exist + // - create volume with inferred values form snapshot. + vol := &cloudscale.Volume{ UUID: id, Name: createRequest.Name, @@ -254,7 +264,7 @@ func extractParams(modifiers []cloudscale.ListRequestModifier) url.Values { return params } -func (f FakeVolumeServiceOperations) Update(ctx context.Context, volumeID string, updateRequest *cloudscale.VolumeRequest) error { +func (f FakeVolumeServiceOperations) Update(ctx context.Context, volumeID string, updateRequest *cloudscale.VolumeUpdateRequest) error { vol, ok := f.volumes[volumeID] if ok != true { return generateNotFoundError() @@ -306,6 +316,23 @@ func getVolumesPerServer(f FakeVolumeServiceOperations, serverUUID string) int { } func (f FakeVolumeServiceOperations) Delete(ctx context.Context, volumeID string) error { + + // prevent deletion if snapshots exist + snapshots, err := f.fakeClient.VolumeSnapshots.List(context.Background()) + + if err != nil { + return err + } + + for _, snapshot := range snapshots { + if snapshot.Volume.UUID == volumeID { + return &cloudscale.ErrorResponse{ + StatusCode: 409, + Message: map[string]string{"detail": "volume has snapshots"}, + } + } + } + delete(f.volumes, volumeID) return nil } @@ -363,6 +390,93 @@ func (f *FakeVolumeServiceOperations) WaitFor(ctx context.Context, id string, co panic("implement me") } +type FakeVolumeSnapshotServiceOperations struct { + fakeClient *cloudscale.Client + snapshots map[string]*cloudscale.VolumeSnapshot +} + +func (f FakeVolumeSnapshotServiceOperations) Create(ctx context.Context, createRequest *cloudscale.VolumeSnapshotCreateRequest) (*cloudscale.VolumeSnapshot, error) { + + vol, err := f.fakeClient.Volumes.Get(ctx, createRequest.SourceVolume) + if err != nil { + return nil, err + } + + id := randString(32) + snap := &cloudscale.VolumeSnapshot{ + UUID: id, + Name: createRequest.Name, + SizeGB: vol.SizeGB, + CreatedAt: time.Now().UTC().Format(time.RFC3339), + Status: "available", + Volume: cloudscale.VolumeStub{ + UUID: createRequest.SourceVolume, + }, + } + + f.snapshots[id] = snap + return snap, nil +} + +func (f *FakeVolumeSnapshotServiceOperations) Get( + ctx context.Context, + snapshotID string, +) (*cloudscale.VolumeSnapshot, error) { + + snap, ok := f.snapshots[snapshotID] + if !ok { + return nil, generateNotFoundError() + } + return snap, nil +} + +func (f *FakeVolumeSnapshotServiceOperations) List( + ctx context.Context, + modifiers ...cloudscale.ListRequestModifier, +) ([]cloudscale.VolumeSnapshot, error) { + var snapshots []cloudscale.VolumeSnapshot + + for _, snapshot := range f.snapshots { + snapshots = append(snapshots, *snapshot) + } + + if len(modifiers) == 0 { + return snapshots, nil + } + if len(modifiers) > 1 { + panic("implement me (support for more than one modifier)") + } + + params := extractParams(modifiers) + + if filterName := params.Get("name"); filterName != "" { + filtered := make([]cloudscale.VolumeSnapshot, 0, 1) + for _, snapshot := range snapshots { + if snapshot.Name == filterName { + filtered = append(filtered, snapshot) + } + } + return filtered, nil + } + + panic("implement me (support for unknown param)") +} + +func (f FakeVolumeSnapshotServiceOperations) Update(ctx context.Context, resourceID string, updateRequest *cloudscale.VolumeSnapshotUpdateRequest) error { + panic("implement me") +} + +func (f *FakeVolumeSnapshotServiceOperations) Delete( + ctx context.Context, + snapshotID string, +) error { + delete(f.snapshots, snapshotID) + return nil +} +func (f FakeVolumeSnapshotServiceOperations) WaitFor(ctx context.Context, resourceID string, condition func(resource *cloudscale.VolumeSnapshot) (bool, error), opts ...backoff.RetryOption) (*cloudscale.VolumeSnapshot, error) { + panic("implement me") +} + func generateNotFoundError() *cloudscale.ErrorResponse { return &cloudscale.ErrorResponse{ StatusCode: 404, diff --git a/go.mod b/go.mod index e554e514..76f3fe4d 100644 --- a/go.mod +++ b/go.mod @@ -2,16 +2,18 @@ module github.com/cloudscale-ch/csi-cloudscale require ( github.com/cenkalti/backoff/v5 v5.0.3 - github.com/cloudscale-ch/cloudscale-go-sdk/v6 v6.0.1 + github.com/cloudscale-ch/cloudscale-go-sdk/v6 v6.0.2-0.20260113130452-d14a0cbe6a32 github.com/container-storage-interface/spec v1.12.0 github.com/golang/protobuf v1.5.4 github.com/google/uuid v1.6.0 github.com/kubernetes-csi/csi-test/v5 v5.4.0 + github.com/kubernetes-csi/external-snapshotter/client/v6 v6.3.0 github.com/sirupsen/logrus v1.9.3 github.com/stretchr/testify v1.11.1 golang.org/x/oauth2 v0.34.0 golang.org/x/sys v0.39.0 google.golang.org/grpc v1.77.0 + google.golang.org/protobuf v1.36.10 k8s.io/api v0.28.15 k8s.io/apimachinery v0.28.15 k8s.io/client-go v0.28.15 @@ -21,7 +23,7 @@ require ( require ( github.com/davecgh/go-spew v1.1.1 // indirect - github.com/emicklei/go-restful/v3 v3.9.0 // indirect + github.com/emicklei/go-restful/v3 v3.10.1 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-openapi/jsonpointer v0.19.6 // indirect github.com/go-openapi/jsonreference v0.20.2 // indirect @@ -52,7 +54,6 @@ require ( golang.org/x/time v0.3.0 // indirect golang.org/x/tools v0.37.0 // indirect google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 // indirect - google.golang.org/protobuf v1.36.10 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index efe880fb..27445127 100644 --- a/go.sum +++ b/go.sum @@ -2,16 +2,16 @@ github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPd github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= -github.com/cloudscale-ch/cloudscale-go-sdk/v6 v6.0.1 h1:2P+TKwtB50hogQ2neIPX+7ARNMy7vaDU9bkMGEhOz3k= -github.com/cloudscale-ch/cloudscale-go-sdk/v6 v6.0.1/go.mod h1:NLC7XW7HqG0HggDaOBCvmf7WplTDaAqTF9u08yh6k0E= +github.com/cloudscale-ch/cloudscale-go-sdk/v6 v6.0.2-0.20260113130452-d14a0cbe6a32 h1:XUwopev0HXEmCVUrmuXHmDadux857+WSPWSDzj1zrhs= +github.com/cloudscale-ch/cloudscale-go-sdk/v6 v6.0.2-0.20260113130452-d14a0cbe6a32/go.mod h1:NLC7XW7HqG0HggDaOBCvmf7WplTDaAqTF9u08yh6k0E= github.com/container-storage-interface/spec v1.12.0 h1:zrFOEqpR5AghNaaDG4qyedwPBqU2fU0dWjLQMP/azK0= github.com/container-storage-interface/spec v1.12.0/go.mod h1:txsm+MA2B2WDa5kW69jNbqPnvTtfvZma7T/zsAZ9qX8= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/emicklei/go-restful/v3 v3.9.0 h1:XwGDlfxEnQZzuopoqxwSEllNcCOM9DhhFyhFIIGKwxE= -github.com/emicklei/go-restful/v3 v3.9.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/emicklei/go-restful/v3 v3.10.1 h1:rc42Y5YTp7Am7CS630D7JmhRjq4UlEUuEKfrDac4bSQ= +github.com/emicklei/go-restful/v3 v3.10.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= @@ -59,6 +59,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kubernetes-csi/csi-test/v5 v5.4.0 h1:u5DgYNIreSNO2+u4Nq2Wpl+bbakRSjNyxZHmDTAqnYA= github.com/kubernetes-csi/csi-test/v5 v5.4.0/go.mod h1:anAJKFUb/SdHhIHECgSKxC5LSiLzib+1I6mrWF5Hve8= +github.com/kubernetes-csi/external-snapshotter/client/v6 v6.3.0 h1:qS4r4ljINLWKJ9m9Ge3Q3sGZ/eIoDVDT2RhAdQFHb1k= +github.com/kubernetes-csi/external-snapshotter/client/v6 v6.3.0/go.mod h1:oGXx2XTEzs9ikW2V6IC1dD8trgjRsS/Mvc2JRiC618Y= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/moby/spdystream v0.2.0 h1:cjW1zVyyoiM0T7b6UoySUFqzXMoqRckQtXwGPiBhOM8= From fdd653e6f81eb3c997c4072a55ecd40063390c65 Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Tue, 20 Jan 2026 18:21:41 +0100 Subject: [PATCH 02/26] add csi-snapshotter sidecar --- README.md | 20 +++++++++++++++++++ charts/csi-cloudscale/templates/rbac.yaml | 20 ++++++++++++++++++- .../csi-cloudscale/templates/statefulset.yaml | 12 +++++++++++ charts/csi-cloudscale/values.yaml | 12 +++++++++++ 4 files changed, 63 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1ab57dcd..6b78b1f0 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,26 @@ The current version is: **`v3.5.6`**. ## Installing to Kubernetes +Follow these steps to deploy the cloudscale.ch CSI driver to your Kubernetes cluster. + +### Prerequisites for Snapshot Support + +To use CSI snapshots with this driver, your cluster must have the VolumeSnapshot CRDs and the snapshot controller installed. + +Note: Some Kubernetes distributions already include these CRDs and controllers. You only need to apply them manually if your cluster does not provide them. + +Install the snapshot resources: +``` +# Create the necessary CRDs +kubectl apply -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.4.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshotclasses.yaml +kubectl apply -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.4.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshotcontents.yaml +kubectl apply -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.4.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshots.yaml + +# Install snapshot controller with RBAC +kubectl apply -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.4.0/deploy/kubernetes/snapshot-controller/rbac-snapshot-controller.yaml +kubectl apply -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.4.0/deploy/kubernetes/snapshot-controller/setup-snapshot-controller.yaml +``` + ### Kubernetes Compatibility The following table describes the required cloudscale.ch driver version per diff --git a/charts/csi-cloudscale/templates/rbac.yaml b/charts/csi-cloudscale/templates/rbac.yaml index 7013c80a..81b7a2c6 100644 --- a/charts/csi-cloudscale/templates/rbac.yaml +++ b/charts/csi-cloudscale/templates/rbac.yaml @@ -21,7 +21,25 @@ rules: verbs: ["get", "list"] - apiGroups: ["snapshot.storage.k8s.io"] resources: ["volumesnapshotcontents"] - verbs: ["get", "list"] + verbs: [ "get", "list", "watch", "update", "patch" ] + - apiGroups: [ "snapshot.storage.k8s.io" ] + resources: [ "volumesnapshotcontents/status" ] + verbs: [ "update", "patch" ] + - apiGroups: [ "snapshot.storage.k8s.io" ] + resources: [ "volumesnapshotclasses" ] + verbs: [ "get", "list", "watch" ] + - apiGroups: [ "groupsnapshot.storage.k8s.io" ] # todo: are we sure about this? snapshot groups are not supported + resources: [ "volumegroupsnapshotclasses" ] + verbs: [ "get", "list", "watch" ] + - apiGroups: [ "groupsnapshot.storage.k8s.io" ] + resources: [ "volumegroupsnapshotcontents" ] + verbs: [ "get", "list", "watch", "update", "patch" ] + - apiGroups: [ "groupsnapshot.storage.k8s.io" ] + resources: [ "volumegroupsnapshotcontents/status" ] + verbs: [ "update", "patch" ] + - apiGroups: [ "coordination.k8s.io" ] + resources: [ "leases" ] + verbs: [ "get", "list", "watch", "create", "update", "patch", "delete" ] - apiGroups: [ "storage.k8s.io" ] resources: [ "csinodes" ] verbs: [ "get", "list", "watch" ] diff --git a/charts/csi-cloudscale/templates/statefulset.yaml b/charts/csi-cloudscale/templates/statefulset.yaml index 36733885..779b236c 100644 --- a/charts/csi-cloudscale/templates/statefulset.yaml +++ b/charts/csi-cloudscale/templates/statefulset.yaml @@ -72,6 +72,18 @@ spec: volumeMounts: - name: socket-dir mountPath: /var/lib/csi/sockets/pluginproxy/ + - name: csi-snapshotter + image: "{{ .Values.snapshotter.image.registry }}/{{ .Values.snapshotter.image.repository }}:{{ .Values.snapshotter.image.tag }}" + args: + - "--csi-address=$(CSI_ENDPOINT)" + - "--leader-election=true" + - "--v=5" + env: + - name: CSI_ENDPOINT + value: unix:///var/lib/csi/sockets/pluginproxy/csi.sock + volumeMounts: + - name: socket-dir + mountPath: /var/lib/csi/sockets/pluginproxy/ - name: csi-cloudscale-plugin image: "{{ .Values.controller.image.registry }}/{{ .Values.controller.image.repository }}:{{ .Values.controller.image.tag }}" args : diff --git a/charts/csi-cloudscale/values.yaml b/charts/csi-cloudscale/values.yaml index 2ea7ba73..dc77db02 100644 --- a/charts/csi-cloudscale/values.yaml +++ b/charts/csi-cloudscale/values.yaml @@ -79,6 +79,18 @@ resizer: # cpu: 100m # memory: 128Mi + + +snapshotter: + image: + registry: registry.k8s.io + repository: sig-storage/csi-snapshotter + tag: v8.4.0 + pullPolicy: IfNotPresent + logLevelVerbosity: "5" + resources: {} + + controller: replicas: 1 image: From 226e6043488f4b9ee49b08b42583ccbd104578ce Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Tue, 20 Jan 2026 18:58:33 +0100 Subject: [PATCH 03/26] fix for existing snapshots on other volumes --- driver/controller.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/driver/controller.go b/driver/controller.go index dc50fc73..e8f9cc0d 100644 --- a/driver/controller.go +++ b/driver/controller.go @@ -687,7 +687,11 @@ func (d *Driver) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequ }, }, nil } - return nil, status.Error(codes.AlreadyExists, "snapshot with this name already exists for another volume.") + + // Snapshot name exists but for a different volume + if snapshot.Volume.UUID != req.SourceVolumeId { + return nil, status.Error(codes.AlreadyExists, "snapshot with this name already exists for another volume") + } } volumeSnapshotCreateRequest := &cloudscale.VolumeSnapshotCreateRequest{ From a731ebe04624d61829ca34f8fcca8af24e8349c0 Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Wed, 21 Jan 2026 19:55:30 +0100 Subject: [PATCH 04/26] add integraiton test covering creation and deletion of snapshot --- test/kubernetes/integration_test.go | 272 ++++++++++++++++++++++++++++ 1 file changed, 272 insertions(+) diff --git a/test/kubernetes/integration_test.go b/test/kubernetes/integration_test.go index 74bbdf49..6b1aed0c 100644 --- a/test/kubernetes/integration_test.go +++ b/test/kubernetes/integration_test.go @@ -22,15 +22,20 @@ import ( "github.com/cloudscale-ch/csi-cloudscale/driver" "github.com/stretchr/testify/assert" "golang.org/x/oauth2" + "k8s.io/client-go/dynamic" "k8s.io/client-go/rest" + snapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v6/apis/volumesnapshot/v1" appsv1 "k8s.io/api/apps/v1" "k8s.io/api/core/v1" kubeerrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/selection" "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/cache" @@ -158,6 +163,76 @@ func TestPod_Single_SSD_Volume(t *testing.T) { waitCloudscaleVolumeDeleted(t, pvc.Spec.VolumeName) } +func TestPod_Single_SSD_Volume_Snapshot(t *testing.T) { + podDescriptor := TestPodDescriptor{ + Kind: "Pod", + Name: pseudoUuid(), + Volumes: []TestPodVolume{ + { + ClaimName: "csi-pod-ssd-pvc", + SizeGB: 5, + StorageClass: "cloudscale-volume-ssd", + }, + }, + } + + // submit the pod and the pvc + pod := makeKubernetesPod(t, podDescriptor) + pvcs := makeKubernetesPVCs(t, podDescriptor) + assert.Equal(t, 1, len(pvcs)) + + // wait for the pod to be running and verify that the pvc is bound + waitForPod(t, client, pod.Name) + pvc := getPVC(t, client, pvcs[0].Name) + assert.Equal(t, v1.ClaimBound, pvc.Status.Phase) + + // load the volume from the cloudscale.ch api and verify that it + // has the requested size and volume type + volume := getCloudscaleVolume(t, pvc.Spec.VolumeName) + assert.Equal(t, 5, volume.SizeGB) + assert.Equal(t, "ssd", volume.Type) + + // verify that our disk is not luks-encrypted, formatted with ext4 and 5 GB big + disk, err := getVolumeInfo(t, pod, pvc.Spec.VolumeName) + assert.NoError(t, err) + assert.Equal(t, "", disk.Luks) + assert.Equal(t, "Filesystem", disk.PVCVolumeMode) + assert.Equal(t, "ext4", disk.Filesystem) + assert.Equal(t, 5*driver.GB, disk.DeviceSize) + assert.Equal(t, 5*driver.GB, disk.FilesystemSize) + + // create a snapshot of the volume + snapshotName := pseudoUuid() + snapshot := makeKubernetesVolumeSnapshot(t, snapshotName, pvc.Name) + + // wait for the snapshot to be ready + waitForVolumeSnapshot(t, client, snapshot.Name) + snapshot = getVolumeSnapshot(t, client, snapshot.Name) + assert.NotNil(t, snapshot.Status) + assert.NotNil(t, snapshot.Status.BoundVolumeSnapshotContentName) + assert.True(t, *snapshot.Status.ReadyToUse) + + snapshotContent := getVolumeSnapshotContent(t, *snapshot.Status.BoundVolumeSnapshotContentName) + assert.NotNil(t, snapshotContent.Status) + assert.NotNil(t, snapshotContent.Status.SnapshotHandle) + + cloudscaleSnapshot := getCloudscaleVolumeSnapshot(t, *snapshotContent.Status.SnapshotHandle) + assert.NotNil(t, cloudscaleSnapshot) + assert.Equal(t, *snapshotContent.Status.SnapshotHandle, cloudscaleSnapshot.UUID) + assert.Equal(t, "available", cloudscaleSnapshot.Status) + assert.Equal(t, 5, cloudscaleSnapshot.SizeGB) + + // delete the snapshot before deleting the volume + deleteKubernetesVolumeSnapshot(t, snapshot.Name) + waitCloudscaleVolumeSnapshotDeleted(t, *snapshotContent.Status.SnapshotHandle) + + // delete the pod and the pvcs and wait until the volume was deleted from + // the cloudscale.ch account; this check is necessary to test that the + // csi-plugin properly deletes the volume from cloudscale.ch + cleanup(t, podDescriptor) + waitCloudscaleVolumeDeleted(t, pvc.Spec.VolumeName) +} + func TestPod_Single_SSD_Raw_Volume(t *testing.T) { podDescriptor := TestPodDescriptor{ Kind: "Pod", @@ -1452,3 +1527,200 @@ func generateMetricEntry(line string) MetricEntry { } return MetricEntry{split[0], "", split[1]} } + +// makeKubernetesVolumeSnapshot creates a VolumeSnapshot for the given PVC +func makeKubernetesVolumeSnapshot(t *testing.T, snapshotName string, pvcName string) *snapshotv1.VolumeSnapshot { + className := "cloudscale-snapshots" + + snapshot := &snapshotv1.VolumeSnapshot{ + TypeMeta: metav1.TypeMeta{ + Kind: "VolumeSnapshot", + APIVersion: "snapshot.storage.k8s.io/v1", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: snapshotName, + Namespace: namespace, + }, + Spec: snapshotv1.VolumeSnapshotSpec{ + VolumeSnapshotClassName: &className, + Source: snapshotv1.VolumeSnapshotSource{ + PersistentVolumeClaimName: &pvcName, + }, + }, + } + + t.Logf("Creating volume snapshot %v", snapshotName) + snapshotClient := getDynamicSnapshotClient(t) + + obj, err := runtime.DefaultUnstructuredConverter.ToUnstructured(snapshot) + if err != nil { + t.Fatal(err) + } + + unstructuredSnapshot := &unstructured.Unstructured{Object: obj} + + gvr := schema.GroupVersionResource{ + Group: "snapshot.storage.k8s.io", + Version: "v1", + Resource: "volumesnapshots", + } + + created, err := snapshotClient.Resource(gvr).Namespace(namespace).Create( + context.Background(), + unstructuredSnapshot, + metav1.CreateOptions{}, + ) + if err != nil { + t.Fatal(err) + } + + var result snapshotv1.VolumeSnapshot + err = runtime.DefaultUnstructuredConverter.FromUnstructured(created.Object, &result) + if err != nil { + t.Fatal(err) + } + + return &result +} + +// deleteKubernetesVolumeSnapshot deletes the VolumeSnapshot with the given name +func deleteKubernetesVolumeSnapshot(t *testing.T, snapshotName string) { + t.Logf("Deleting volume snapshot %v", snapshotName) + snapshotClient := getDynamicSnapshotClient(t) + + gvr := schema.GroupVersionResource{ + Group: "snapshot.storage.k8s.io", + Version: "v1", + Resource: "volumesnapshots", + } + + err := snapshotClient.Resource(gvr).Namespace(namespace).Delete( + context.Background(), + snapshotName, + metav1.DeleteOptions{}, + ) + assert.NoError(t, err) +} + +// waitForVolumeSnapshot waits for the VolumeSnapshot to be ready +func waitForVolumeSnapshot(t *testing.T, client kubernetes.Interface, name string) { + start := time.Now() + + t.Logf("Waiting for volume snapshot %q to be ready ...\n", name) + + for { + snapshot := getVolumeSnapshot(t, client, name) + + if snapshot.Status != nil && snapshot.Status.ReadyToUse != nil && *snapshot.Status.ReadyToUse { + t.Logf("Volume snapshot %q is ready\n", name) + return + } + + if time.Now().UnixNano()-start.UnixNano() > (5 * time.Minute).Nanoseconds() { + t.Fatalf("timeout exceeded while waiting for volume snapshot %v to be ready", name) + return + } + + t.Logf("Volume snapshot %q not ready yet; waiting...", name) + time.Sleep(5 * time.Second) + } +} + +// getVolumeSnapshot retrieves the VolumeSnapshot with the given name +func getVolumeSnapshot(t *testing.T, client kubernetes.Interface, name string) *snapshotv1.VolumeSnapshot { + snapshotClient := getDynamicSnapshotClient(t) + + gvr := schema.GroupVersionResource{ + Group: "snapshot.storage.k8s.io", + Version: "v1", + Resource: "volumesnapshots", + } + + unstructuredSnapshot, err := snapshotClient.Resource(gvr).Namespace(namespace).Get( + context.Background(), + name, + metav1.GetOptions{}, + ) + assert.NoError(t, err) + + var snapshot snapshotv1.VolumeSnapshot + err = runtime.DefaultUnstructuredConverter.FromUnstructured(unstructuredSnapshot.Object, &snapshot) + assert.NoError(t, err) + + return &snapshot +} + +func getCloudscaleVolumeSnapshot(t *testing.T, snapshotHandle string) *cloudscale.VolumeSnapshot { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + snapshot, err := cloudscaleClient.VolumeSnapshots.Get(ctx, snapshotHandle) + if err != nil { + t.Fatalf("Could not find snapshot with handle %v: %v", snapshotHandle, err) + } + + return snapshot +} + +// waitCloudscaleVolumeSnapshotDeleted waits until the snapshot with the given handle was deleted +func waitCloudscaleVolumeSnapshotDeleted(t *testing.T, snapshotHandle string) { + start := time.Now() + + for { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + _, err := cloudscaleClient.VolumeSnapshots.Get(ctx, snapshotHandle) + cancel() + + if err != nil { + if cloudscaleErr, ok := err.(*cloudscale.ErrorResponse); ok { + if cloudscaleErr.StatusCode == http.StatusNotFound { + t.Logf("snapshot %v is deleted on cloudscale", snapshotHandle) + return + } + } + // Some other error - log but continue waiting + t.Logf("error checking snapshot %v: %v", snapshotHandle, err) + } + + if time.Since(start) > 5*time.Minute { + t.Errorf("timeout exceeded while waiting for snapshot %v to be deleted from cloudscale", snapshotHandle) + return + } + + t.Logf("snapshot %v not deleted on cloudscale yet; awaiting deletion", snapshotHandle) + time.Sleep(5 * time.Second) + } +} + +// getVolumeSnapshotContent retrieves the VolumeSnapshotContent for a VolumeSnapshot +func getVolumeSnapshotContent(t *testing.T, contentName string) *snapshotv1.VolumeSnapshotContent { + snapshotClient := getDynamicSnapshotClient(t) + + gvr := schema.GroupVersionResource{ + Group: "snapshot.storage.k8s.io", + Version: "v1", + Resource: "volumesnapshotcontents", + } + + unstructuredContent, err := snapshotClient.Resource(gvr).Get( + context.Background(), + contentName, + metav1.GetOptions{}, + ) + assert.NoError(t, err) + + var content snapshotv1.VolumeSnapshotContent + err = runtime.DefaultUnstructuredConverter.FromUnstructured(unstructuredContent.Object, &content) + assert.NoError(t, err) + + return &content +} + +// getDynamicSnapshotClient returns a dynamic client for working with VolumeSnapshots +func getDynamicSnapshotClient(t *testing.T) dynamic.Interface { + dynamicClient, err := dynamic.NewForConfig(config) + if err != nil { + t.Fatal(err) + } + return dynamicClient +} From aa6bedb671595ca0220ddc7f4eae5b59c77c9861 Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Wed, 21 Jan 2026 21:09:49 +0100 Subject: [PATCH 05/26] add integration test creating a new volume from a snapshot --- driver/controller.go | 13 +++ test/kubernetes/integration_test.go | 154 ++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+) diff --git a/driver/controller.go b/driver/controller.go index e8f9cc0d..8f664eb5 100644 --- a/driver/controller.go +++ b/driver/controller.go @@ -383,6 +383,11 @@ func (d *Driver) DeleteVolume(ctx context.Context, req *csi.DeleteVolumeRequest) if err != nil { errorResponse, ok := err.(*cloudscale.ErrorResponse) if ok { + ll.WithFields(logrus.Fields{ + "status_code": errorResponse.StatusCode, + "error": err, + }).Warn("cloudscale API returned error during volume deletion") + if errorResponse.StatusCode == http.StatusNotFound { // To make it idempotent, the volume might already have been // deleted, so a 404 is ok. @@ -392,6 +397,14 @@ func (d *Driver) DeleteVolume(ctx context.Context, req *csi.DeleteVolumeRequest) }).Warn("assuming volume is already deleted") return &csi.DeleteVolumeResponse{}, nil } + + // Check if the error message indicates snapshots exist + if strings.Contains(err.Error(), "Snapshots exist") || + strings.Contains(err.Error(), "snapshot") { + ll.Warn("volume has snapshots, cannot delete yet") + return nil, status.Error(codes.FailedPrecondition, + "volume has existing snapshots that must be deleted first") + } } return nil, err } diff --git a/test/kubernetes/integration_test.go b/test/kubernetes/integration_test.go index 6b1aed0c..f1bbaee1 100644 --- a/test/kubernetes/integration_test.go +++ b/test/kubernetes/integration_test.go @@ -233,6 +233,114 @@ func TestPod_Single_SSD_Volume_Snapshot(t *testing.T) { waitCloudscaleVolumeDeleted(t, pvc.Spec.VolumeName) } +func TestPod_Create_Volume_From_Snapshot(t *testing.T) { + podDescriptor := TestPodDescriptor{ + Kind: "Pod", + Name: pseudoUuid(), + Volumes: []TestPodVolume{ + { + ClaimName: "csi-pod-ssd-pvc-original", + SizeGB: 5, + StorageClass: "cloudscale-volume-ssd", + }, + }, + } + + // submit the pod and the pvc + pod := makeKubernetesPod(t, podDescriptor) + pvcs := makeKubernetesPVCs(t, podDescriptor) + assert.Equal(t, 1, len(pvcs)) + + // wait for the pod to be running and verify that the pvc is bound + waitForPod(t, client, pod.Name) + pvc := getPVC(t, client, pvcs[0].Name) + assert.Equal(t, v1.ClaimBound, pvc.Status.Phase) + + // load the volume from the cloudscale.ch api and verify that it + // has the requested size and volume type + originalVolume := getCloudscaleVolume(t, pvc.Spec.VolumeName) + assert.Equal(t, 5, originalVolume.SizeGB) + assert.Equal(t, "ssd", originalVolume.Type) + + // verify that our disk is not luks-encrypted, formatted with ext4 and 5 GB big + disk, err := getVolumeInfo(t, pod, pvc.Spec.VolumeName) + assert.NoError(t, err) + assert.Equal(t, "", disk.Luks) + assert.Equal(t, "Filesystem", disk.PVCVolumeMode) + assert.Equal(t, "ext4", disk.Filesystem) + assert.Equal(t, 5*driver.GB, disk.DeviceSize) + assert.Equal(t, 5*driver.GB, disk.FilesystemSize) + + // store the original filesystem UUID to verify it's preserved after restore + originalFilesystemUUID := disk.FilesystemUUID + + // create a snapshot of the volume + snapshotName := pseudoUuid() + snapshot := makeKubernetesVolumeSnapshot(t, snapshotName, pvc.Name) + + // wait for the snapshot to be ready + waitForVolumeSnapshot(t, client, snapshot.Name) + snapshot = getVolumeSnapshot(t, client, snapshot.Name) + assert.NotNil(t, snapshot.Status) + assert.NotNil(t, snapshot.Status.BoundVolumeSnapshotContentName) + assert.True(t, *snapshot.Status.ReadyToUse) + + // verify the snapshot exists in cloudscale.ch API + snapshotContent := getVolumeSnapshotContent(t, *snapshot.Status.BoundVolumeSnapshotContentName) + assert.NotNil(t, snapshotContent.Status) + assert.NotNil(t, snapshotContent.Status.SnapshotHandle) + + cloudscaleSnapshot := getCloudscaleVolumeSnapshot(t, *snapshotContent.Status.SnapshotHandle) + assert.NotNil(t, cloudscaleSnapshot) + assert.Equal(t, *snapshotContent.Status.SnapshotHandle, cloudscaleSnapshot.UUID) + assert.Equal(t, "available", cloudscaleSnapshot.Status) + assert.Equal(t, 5, cloudscaleSnapshot.SizeGB) + + // create a new pod with a pvc restored from the snapshot + restoredPodDescriptor := TestPodDescriptor{ + Kind: "Pod", + Name: pseudoUuid(), + Volumes: []TestPodVolume{ + { + ClaimName: "csi-pod-ssd-pvc-restored", + SizeGB: 5, + StorageClass: "cloudscale-volume-ssd", + }, + }, + } + + restoredPod := makeKubernetesPod(t, restoredPodDescriptor) + restoredPVCs := makeKubernetesPVCsFromSnapshot(t, restoredPodDescriptor, snapshot.Name) + assert.Equal(t, 1, len(restoredPVCs)) + + // wait for the restored pod to be running and verify that the pvc is bound + waitForPod(t, client, restoredPod.Name) + restoredPVC := getPVC(t, client, restoredPVCs[0].Name) + assert.Equal(t, v1.ClaimBound, restoredPVC.Status.Phase) + + // load the restored volume from the cloudscale.ch api and verify that it + // has the requested size and volume type + restoredVolume := getCloudscaleVolume(t, restoredPVC.Spec.VolumeName) + assert.Equal(t, 5, restoredVolume.SizeGB) + assert.Equal(t, "ssd", restoredVolume.Type) + + // verify that the restored disk has the same properties as the original + restoredDisk, err := getVolumeInfo(t, restoredPod, restoredPVC.Spec.VolumeName) + assert.NoError(t, err) + assert.Equal(t, "", restoredDisk.Luks) + assert.Equal(t, "Filesystem", restoredDisk.PVCVolumeMode) + assert.Equal(t, "ext4", restoredDisk.Filesystem) + assert.Equal(t, 5*driver.GB, restoredDisk.DeviceSize) + assert.Equal(t, 5*driver.GB, restoredDisk.FilesystemSize) + + // verify that the filesystem UUID is preserved (data was restored, not recreated) + assert.Equal(t, originalFilesystemUUID, restoredDisk.FilesystemUUID) + + // finally cleanup the restored pod and pvc + cleanup(t, restoredPodDescriptor) + waitCloudscaleVolumeDeleted(t, restoredPVC.Spec.VolumeName) +} + func TestPod_Single_SSD_Raw_Volume(t *testing.T) { podDescriptor := TestPodDescriptor{ Kind: "Pod", @@ -1716,6 +1824,52 @@ func getVolumeSnapshotContent(t *testing.T, contentName string) *snapshotv1.Volu return &content } +// creates kubernetes pvcs from the given TestPodDescriptor, restoring from a snapshot +func makeKubernetesPVCsFromSnapshot(t *testing.T, pod TestPodDescriptor, snapshotName string) []*v1.PersistentVolumeClaim { + pvcs := make([]*v1.PersistentVolumeClaim, 0) + + for _, volume := range pod.Volumes { + volMode := v1.PersistentVolumeFilesystem + if volume.Block { + volMode = v1.PersistentVolumeBlock + } + + apiGroup := "snapshot.storage.k8s.io" + pvcs = append(pvcs, &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: volume.ClaimName, + }, + Spec: v1.PersistentVolumeClaimSpec{ + VolumeMode: &volMode, + AccessModes: []v1.PersistentVolumeAccessMode{ + v1.ReadWriteOnce, + }, + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceStorage: resource.MustParse(fmt.Sprintf("%vGi", volume.SizeGB)), + }, + }, + StorageClassName: strPtr(volume.StorageClass), + DataSource: &v1.TypedLocalObjectReference{ + APIGroup: &apiGroup, + Kind: "VolumeSnapshot", + Name: snapshotName, + }, + }, + }) + } + + t.Log("Creating pvc from snapshot") + for _, pvc := range pvcs { + _, err := client.CoreV1().PersistentVolumeClaims(namespace).Create(context.Background(), pvc, metav1.CreateOptions{}) + if err != nil { + t.Fatal(err) + } + } + + return pvcs +} + // getDynamicSnapshotClient returns a dynamic client for working with VolumeSnapshots func getDynamicSnapshotClient(t *testing.T) dynamic.Interface { dynamicClient, err := dynamic.NewForConfig(config) From eaa650c98f42a139dacbd11daf10ede29ce95ada Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Thu, 22 Jan 2026 18:15:29 +0100 Subject: [PATCH 06/26] improve error handling in CreateSnapshot if snapshot does not exist --- driver/controller.go | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/driver/controller.go b/driver/controller.go index 8f664eb5..4bd17bff 100644 --- a/driver/controller.go +++ b/driver/controller.go @@ -678,7 +678,14 @@ func (d *Driver) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequ ll.Info("find existing volume snapshots with same name") snapshots, err := d.cloudscaleClient.VolumeSnapshots.List(ctx, cloudscale.WithNameFilter(req.Name)) if err != nil { - return nil, status.Error(codes.Internal, err.Error()) + errorResponse, ok := err.(*cloudscale.ErrorResponse) + if ok { + ll.WithFields(logrus.Fields{ + "status_code": errorResponse.StatusCode, + "error": err, + }).Warn("cloudscale API returned error during snapshot list") + } + return nil, status.Errorf(codes.Internal, "failed to list snapshots: %v", err) } for _, snapshot := range snapshots { @@ -710,13 +717,24 @@ func (d *Driver) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequ volumeSnapshotCreateRequest := &cloudscale.VolumeSnapshotCreateRequest{ Name: req.Name, SourceVolume: req.SourceVolumeId, - // todo: tags? + // todo: Tags are not currently supported in snapshot creation } ll.WithField("volume_snapshot_create_request", volumeSnapshotCreateRequest).Info("creating volume snapshot") snapshot, err := d.cloudscaleClient.VolumeSnapshots.Create(ctx, volumeSnapshotCreateRequest) if err != nil { - return nil, status.Error(codes.Internal, err.Error()) + errorResponse, ok := err.(*cloudscale.ErrorResponse) + if ok { + ll.WithFields(logrus.Fields{ + "status_code": errorResponse.StatusCode, + "error": err, + }).Warn("cloudscale API returned error during snapshot creation") + + if errorResponse.StatusCode == http.StatusNotFound { + return nil, status.Errorf(codes.NotFound, "source volume %s not found: %v", req.SourceVolumeId, err) + } + } + return nil, status.Errorf(codes.Internal, "failed to create snapshot: %v", err) } creationTime := timestamppb.Now() @@ -752,7 +770,9 @@ func (d *Driver) DeleteSnapshot(ctx context.Context, req *csi.DeleteSnapshotRequ }) ll.Info("delete snapshot called") - // todo: think through long running delete jobs + // Note: Snapshot deletion is asynchronous via the cloudscale API. + // The HTTP request returns success immediately, but the snapshot enters "deleting" state. + // Cloudscale handles the deletion asynchronously. The operation is idempotent. err := d.cloudscaleClient.VolumeSnapshots.Delete(ctx, req.SnapshotId) if err != nil { errorResponse, ok := err.(*cloudscale.ErrorResponse) From a70fa501d572187381d71f356ae9d5504e75f0b0 Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Thu, 22 Jan 2026 18:19:58 +0100 Subject: [PATCH 07/26] improve error message formating Co-authored-by: Michael Weibel <307427+mweibel@users.noreply.github.com> --- driver/controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/controller.go b/driver/controller.go index 4bd17bff..2380c6af 100644 --- a/driver/controller.go +++ b/driver/controller.go @@ -228,7 +228,7 @@ func (d *Driver) createVolumeFromSnapshot(ctx context.Context, req *csi.CreateVo return nil, status.Errorf(codes.NotFound, "source snapshot %s not found", sourceSnapshotID) } } - return nil, status.Errorf(codes.Internal, "failed to get source snapshot: %v", err) + return nil, status.Errorf(codes.Internal, "failed to get source snapshot: %w", err) } ll = ll.WithFields(logrus.Fields{ From bb3f5583e95f317c784d67c17ceb8ff173935cbd Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Thu, 22 Jan 2026 18:26:23 +0100 Subject: [PATCH 08/26] fix error formatting --- driver/controller.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/driver/controller.go b/driver/controller.go index 2380c6af..e29c0a28 100644 --- a/driver/controller.go +++ b/driver/controller.go @@ -228,7 +228,8 @@ func (d *Driver) createVolumeFromSnapshot(ctx context.Context, req *csi.CreateVo return nil, status.Errorf(codes.NotFound, "source snapshot %s not found", sourceSnapshotID) } } - return nil, status.Errorf(codes.Internal, "failed to get source snapshot: %w", err) + wrapped := fmt.Errorf("failed to get source snapshot: %w", err) + return nil, status.Error(codes.Internal, wrapped.Error()) } ll = ll.WithFields(logrus.Fields{ From 68317ca18e376e432049ee72c47b04493b530db4 Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Thu, 22 Jan 2026 20:45:34 +0100 Subject: [PATCH 09/26] add integration test for luks volume --- test/kubernetes/integration_test.go | 122 ++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/test/kubernetes/integration_test.go b/test/kubernetes/integration_test.go index f1bbaee1..ee0a1006 100644 --- a/test/kubernetes/integration_test.go +++ b/test/kubernetes/integration_test.go @@ -341,6 +341,128 @@ func TestPod_Create_Volume_From_Snapshot(t *testing.T) { waitCloudscaleVolumeDeleted(t, restoredPVC.Spec.VolumeName) } +func TestPod_Single_SSD_Luks_Volume_Snapshot(t *testing.T) { + podDescriptor := TestPodDescriptor{ + Kind: "Pod", + Name: pseudoUuid(), + Volumes: []TestPodVolume{ + { + ClaimName: "csi-pod-ssd-luks-pvc-original", + SizeGB: 5, + StorageClass: "cloudscale-volume-ssd-luks", + LuksKey: "secret", + }, + }, + } + + // submit the pod and the pvc + pod := makeKubernetesPod(t, podDescriptor) + pvcs := makeKubernetesPVCs(t, podDescriptor) + assert.Equal(t, 1, len(pvcs)) + + // wait for the pod to be running and verify that the pvc is bound + waitForPod(t, client, pod.Name) + pvc := getPVC(t, client, pvcs[0].Name) + assert.Equal(t, v1.ClaimBound, pvc.Status.Phase) + + // load the volume from the cloudscale.ch api and verify that it + // has the requested size and volume type + originalVolume := getCloudscaleVolume(t, pvc.Spec.VolumeName) + assert.Equal(t, 5, originalVolume.SizeGB) + assert.Equal(t, "ssd", originalVolume.Type) + + // verify that our disk is luks-encrypted, formatted with ext4 and 5 GB big + disk, err := getVolumeInfo(t, pod, pvc.Spec.VolumeName) + assert.NoError(t, err) + assert.Equal(t, "ext4", disk.Filesystem) + assert.Equal(t, 5*driver.GB, disk.DeviceSize) + assert.Equal(t, "LUKS1", disk.Luks) + assert.Equal(t, "Filesystem", disk.PVCVolumeMode) + assert.Equal(t, "aes-xts-plain64", disk.Cipher) + assert.Equal(t, 512, disk.Keysize) + assert.Equal(t, 5*driver.GB-luksOverhead, disk.FilesystemSize) + + // store the original filesystem UUID to verify it's preserved after restore + originalFilesystemUUID := disk.FilesystemUUID + + // create a snapshot of the LUKS volume + snapshotName := pseudoUuid() + snapshot := makeKubernetesVolumeSnapshot(t, snapshotName, pvc.Name) + + // wait for the snapshot to be ready + waitForVolumeSnapshot(t, client, snapshot.Name) + snapshot = getVolumeSnapshot(t, client, snapshot.Name) + assert.NotNil(t, snapshot.Status) + assert.NotNil(t, snapshot.Status.BoundVolumeSnapshotContentName) + assert.True(t, *snapshot.Status.ReadyToUse) + + // verify the snapshot exists in cloudscale.ch API + snapshotContent := getVolumeSnapshotContent(t, *snapshot.Status.BoundVolumeSnapshotContentName) + assert.NotNil(t, snapshotContent.Status) + assert.NotNil(t, snapshotContent.Status.SnapshotHandle) + + cloudscaleSnapshot := getCloudscaleVolumeSnapshot(t, *snapshotContent.Status.SnapshotHandle) + assert.NotNil(t, cloudscaleSnapshot) + assert.Equal(t, *snapshotContent.Status.SnapshotHandle, cloudscaleSnapshot.UUID) + assert.Equal(t, "available", cloudscaleSnapshot.Status) + assert.Equal(t, 5, cloudscaleSnapshot.SizeGB) + + // create a new pod with a pvc restored from the snapshot with LUKS parameters + restoredPodDescriptor := TestPodDescriptor{ + Kind: "Pod", + Name: pseudoUuid(), + Volumes: []TestPodVolume{ + { + ClaimName: "csi-pod-ssd-luks-pvc-restored", + SizeGB: 5, + StorageClass: "cloudscale-volume-ssd-luks", + LuksKey: "secret", + }, + }, + } + + restoredPod := makeKubernetesPod(t, restoredPodDescriptor) + restoredPVCs := makeKubernetesPVCsFromSnapshot(t, restoredPodDescriptor, snapshot.Name) + assert.Equal(t, 1, len(restoredPVCs)) + + // wait for the restored pod to be running and verify that the pvc is bound + waitForPod(t, client, restoredPod.Name) + restoredPVC := getPVC(t, client, restoredPVCs[0].Name) + assert.Equal(t, v1.ClaimBound, restoredPVC.Status.Phase) + + // load the restored volume from the cloudscale.ch api and verify that it + // has the requested size and volume type + restoredVolume := getCloudscaleVolume(t, restoredPVC.Spec.VolumeName) + assert.Equal(t, 5, restoredVolume.SizeGB) + assert.Equal(t, "ssd", restoredVolume.Type) + + // verify that the restored disk has LUKS encryption preserved + restoredDisk, err := getVolumeInfo(t, restoredPod, restoredPVC.Spec.VolumeName) + assert.NoError(t, err) + assert.Equal(t, "LUKS1", restoredDisk.Luks) + assert.Equal(t, "Filesystem", restoredDisk.PVCVolumeMode) + assert.Equal(t, "ext4", restoredDisk.Filesystem) + assert.Equal(t, 5*driver.GB, restoredDisk.DeviceSize) + assert.Equal(t, 5*driver.GB-luksOverhead, restoredDisk.FilesystemSize) + assert.Equal(t, "aes-xts-plain64", restoredDisk.Cipher) + assert.Equal(t, 512, restoredDisk.Keysize) + + // verify that the filesystem UUID is preserved (data was restored, not recreated) + assert.Equal(t, originalFilesystemUUID, restoredDisk.FilesystemUUID) + + // delete the snapshot before deleting the volumes + deleteKubernetesVolumeSnapshot(t, snapshot.Name) + waitCloudscaleVolumeSnapshotDeleted(t, *snapshotContent.Status.SnapshotHandle) + + // finally cleanup the restored pod and pvc + cleanup(t, restoredPodDescriptor) + waitCloudscaleVolumeDeleted(t, restoredPVC.Spec.VolumeName) + + // cleanup the original pod and pvc + cleanup(t, podDescriptor) + waitCloudscaleVolumeDeleted(t, pvc.Spec.VolumeName) +} + func TestPod_Single_SSD_Raw_Volume(t *testing.T) { podDescriptor := TestPodDescriptor{ Kind: "Pod", From 000c1d9e61bee464a968ce7ff7de0cbc52deca76 Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Thu, 22 Jan 2026 21:15:29 +0100 Subject: [PATCH 10/26] add integration test for PVCs with wrong size --- test/kubernetes/integration_test.go | 142 ++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) diff --git a/test/kubernetes/integration_test.go b/test/kubernetes/integration_test.go index ee0a1006..c0d5a570 100644 --- a/test/kubernetes/integration_test.go +++ b/test/kubernetes/integration_test.go @@ -463,6 +463,148 @@ func TestPod_Single_SSD_Luks_Volume_Snapshot(t *testing.T) { waitCloudscaleVolumeDeleted(t, pvc.Spec.VolumeName) } + +func TestPod_Snapshot_Size_Validation(t *testing.T) { + // Test that snapshot size validation works correctly + podDescriptor := TestPodDescriptor{ + Kind: "Pod", + Name: pseudoUuid(), + Volumes: []TestPodVolume{ + { + ClaimName: "csi-pod-snapshot-size-pvc", + SizeGB: 5, + StorageClass: "cloudscale-volume-ssd", + }, + }, + } + + // Create volume + pod := makeKubernetesPod(t, podDescriptor) + pvcs := makeKubernetesPVCs(t, podDescriptor) + waitForPod(t, client, pod.Name) + pvc := getPVC(t, client, pvcs[0].Name) + assert.Equal(t, v1.ClaimBound, pvc.Status.Phase) + + volume := getCloudscaleVolume(t, pvc.Spec.VolumeName) + assert.Equal(t, 5, volume.SizeGB) + + // Create snapshot + snapshotName := pseudoUuid() + snapshot := makeKubernetesVolumeSnapshot(t, snapshotName, pvc.Name) + waitForVolumeSnapshot(t, client, snapshot.Name) + snapshot = getVolumeSnapshot(t, client, snapshot.Name) + assert.True(t, *snapshot.Status.ReadyToUse) + + snapshotContent := getVolumeSnapshotContent(t, *snapshot.Status.BoundVolumeSnapshotContentName) + snapshotHandle := *snapshotContent.Status.SnapshotHandle + + cloudscaleSnapshot := getCloudscaleVolumeSnapshot(t, snapshotHandle) + assert.Equal(t, 5, cloudscaleSnapshot.SizeGB) + + + // Attempt to restore with smaller size (should fail) + // Create PVC directly without pod (since it won't bind) + smallerPVCName := "csi-pod-snapshot-size-pvc-smaller" + volMode := v1.PersistentVolumeFilesystem + apiGroup := "snapshot.storage.k8s.io" + smallerPVC := &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: smallerPVCName, + }, + Spec: v1.PersistentVolumeClaimSpec{ + VolumeMode: &volMode, + AccessModes: []v1.PersistentVolumeAccessMode{ + v1.ReadWriteOnce, + }, + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceStorage: resource.MustParse("3Gi"), // Smaller than snapshot size (5GB) + }, + }, + StorageClassName: strPtr("cloudscale-volume-ssd"), + DataSource: &v1.TypedLocalObjectReference{ + APIGroup: &apiGroup, + Kind: "VolumeSnapshot", + Name: snapshot.Name, + }, + }, + } + + t.Log("Creating PVC from snapshot with smaller size (should fail)") + _, err := client.CoreV1().PersistentVolumeClaims(namespace).Create(context.Background(), smallerPVC, metav1.CreateOptions{}) + assert.NoError(t, err) + + // Wait a bit for the PVC to be processed + time.Sleep(10 * time.Second) + + // Check that PVC is not bound (should fail) + smallerPVC = getPVC(t, client, smallerPVCName) + assert.NotEqual(t, v1.ClaimBound, smallerPVC.Status.Phase, "PVC with smaller size should not be bound") + assert.Equal(t, v1.ClaimPending, smallerPVC.Status.Phase, "PVC should be in Pending state due to size validation failure") + + // Verify no volume was created + if smallerPVC.Spec.VolumeName != "" { + t.Logf("Warning: Volume was created despite size validation failure: %s", smallerPVC.Spec.VolumeName) + } + + // Cleanup failed PVC + err = client.CoreV1().PersistentVolumeClaims(namespace).Delete(context.Background(), smallerPVCName, metav1.DeleteOptions{}) + assert.NoError(t, err) + + // Attempt to restore with larger size (should fail) + // Create PVC directly without pod (since it won't bind) + largerPVCName := "csi-pod-snapshot-size-pvc-larger" + largerPVC := &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: largerPVCName, + }, + Spec: v1.PersistentVolumeClaimSpec{ + VolumeMode: &volMode, + AccessModes: []v1.PersistentVolumeAccessMode{ + v1.ReadWriteOnce, + }, + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceStorage: resource.MustParse("10Gi"), // Larger than snapshot size (5GB) + }, + }, + StorageClassName: strPtr("cloudscale-volume-ssd"), + DataSource: &v1.TypedLocalObjectReference{ + APIGroup: &apiGroup, + Kind: "VolumeSnapshot", + Name: snapshot.Name, + }, + }, + } + + t.Log("Creating PVC from snapshot with larger size (should fail)") + _, err = client.CoreV1().PersistentVolumeClaims(namespace).Create(context.Background(), largerPVC, metav1.CreateOptions{}) + assert.NoError(t, err) + + // Wait a bit for the PVC to be processed + time.Sleep(10 * time.Second) + + // Check that PVC is not bound (should fail) + largerPVC = getPVC(t, client, largerPVCName) + assert.NotEqual(t, v1.ClaimBound, largerPVC.Status.Phase, "PVC with larger size should not be bound") + assert.Equal(t, v1.ClaimPending, largerPVC.Status.Phase, "PVC should be in Pending state due to size validation failure") + + // Verify no volume was created + if largerPVC.Spec.VolumeName != "" { + t.Logf("Warning: Volume was created despite size validation failure: %s", largerPVC.Spec.VolumeName) + } + + // Cleanup failed PVC + err = client.CoreV1().PersistentVolumeClaims(namespace).Delete(context.Background(), largerPVCName, metav1.DeleteOptions{}) + assert.NoError(t, err) + + // Cleanup original resources + deleteKubernetesVolumeSnapshot(t, snapshot.Name) + waitCloudscaleVolumeSnapshotDeleted(t, snapshotHandle) + cleanup(t, podDescriptor) + waitCloudscaleVolumeDeleted(t, pvc.Spec.VolumeName) +} + func TestPod_Single_SSD_Raw_Volume(t *testing.T) { podDescriptor := TestPodDescriptor{ Kind: "Pod", From 576bd5ae15d7ed4427700f49c2e0865fb07989c2 Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Thu, 22 Jan 2026 22:45:32 +0100 Subject: [PATCH 11/26] add examples, including luks --- .../luks-volumesnapshot.yaml | 11 ++++ .../restored-luks-pod.yaml | 17 +++++ .../restored-luks-pvc.yaml | 21 +++++++ .../restored-luks-secret.yaml | 10 +++ .../kubernetes/volume-snapshots/README.md | 62 +++++++++++++++++++ .../volume-snapshots/original-pod.yaml | 17 +++++ .../volume-snapshots/original-pvc.yaml | 12 ++++ .../volume-snapshots/restored-pod.yaml | 17 +++++ .../volume-snapshots/restored-pvc.yaml | 17 +++++ .../volume-snapshots/volumesnapshot.yaml | 10 +++ .../volume-snapshots/volumesnapshotclass.yaml | 9 +++ 11 files changed, 203 insertions(+) create mode 100644 examples/kubernetes/luks-encrypted-volumes/luks-volumesnapshot.yaml create mode 100644 examples/kubernetes/luks-encrypted-volumes/restored-luks-pod.yaml create mode 100644 examples/kubernetes/luks-encrypted-volumes/restored-luks-pvc.yaml create mode 100644 examples/kubernetes/luks-encrypted-volumes/restored-luks-secret.yaml create mode 100644 examples/kubernetes/volume-snapshots/README.md create mode 100644 examples/kubernetes/volume-snapshots/original-pod.yaml create mode 100644 examples/kubernetes/volume-snapshots/original-pvc.yaml create mode 100644 examples/kubernetes/volume-snapshots/restored-pod.yaml create mode 100644 examples/kubernetes/volume-snapshots/restored-pvc.yaml create mode 100644 examples/kubernetes/volume-snapshots/volumesnapshot.yaml create mode 100644 examples/kubernetes/volume-snapshots/volumesnapshotclass.yaml diff --git a/examples/kubernetes/luks-encrypted-volumes/luks-volumesnapshot.yaml b/examples/kubernetes/luks-encrypted-volumes/luks-volumesnapshot.yaml new file mode 100644 index 00000000..490faa55 --- /dev/null +++ b/examples/kubernetes/luks-encrypted-volumes/luks-volumesnapshot.yaml @@ -0,0 +1,11 @@ +# VolumeSnapshot creates a snapshot of a LUKS-encrypted volume +# Make sure the VolumeSnapshotClass is created first (see ../volume-snapshots/volumesnapshotclass.yaml) +# The snapshot preserves the LUKS encryption state +apiVersion: snapshot.storage.k8s.io/v1 +kind: VolumeSnapshot +metadata: + name: my-luks-snapshot +spec: + volumeSnapshotClassName: cloudscale-snapshots + source: + persistentVolumeClaimName: csi-pod-pvc-luks diff --git a/examples/kubernetes/luks-encrypted-volumes/restored-luks-pod.yaml b/examples/kubernetes/luks-encrypted-volumes/restored-luks-pod.yaml new file mode 100644 index 00000000..62bdda2a --- /dev/null +++ b/examples/kubernetes/luks-encrypted-volumes/restored-luks-pod.yaml @@ -0,0 +1,17 @@ +# Pod using the restored LUKS volume (optional, for testing) +kind: Pod +apiVersion: v1 +metadata: + name: my-restored-luks-app +spec: + containers: + - name: my-frontend + image: busybox + volumeMounts: + - mountPath: "/data" + name: my-cloudscale-volume + command: [ "sleep", "1000000" ] + volumes: + - name: my-cloudscale-volume + persistentVolumeClaim: + claimName: my-restored-luks-volume diff --git a/examples/kubernetes/luks-encrypted-volumes/restored-luks-pvc.yaml b/examples/kubernetes/luks-encrypted-volumes/restored-luks-pvc.yaml new file mode 100644 index 00000000..1cc586c1 --- /dev/null +++ b/examples/kubernetes/luks-encrypted-volumes/restored-luks-pvc.yaml @@ -0,0 +1,21 @@ +# PersistentVolumeClaim restored from a LUKS snapshot +# IMPORTANT: When restoring from a LUKS snapshot, you MUST: +# 1. Use a LUKS storage class (cloudscale-volume-ssd-luks or cloudscale-volume-bulk-luks) +# 2. Provide a LUKS secret with the pattern: ${pvc-name}-luks-key +# 3. Use the SAME LUKS key as the original volume +# 4. Match the snapshot size exactly (1Gi in this example) +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: my-restored-luks-volume +spec: + accessModes: + - ReadWriteOnce + storageClassName: cloudscale-volume-ssd-luks + resources: + requests: + storage: 1Gi + dataSource: + name: my-luks-snapshot + kind: VolumeSnapshot + apiGroup: snapshot.storage.k8s.io diff --git a/examples/kubernetes/luks-encrypted-volumes/restored-luks-secret.yaml b/examples/kubernetes/luks-encrypted-volumes/restored-luks-secret.yaml new file mode 100644 index 00000000..4e2ee1b7 --- /dev/null +++ b/examples/kubernetes/luks-encrypted-volumes/restored-luks-secret.yaml @@ -0,0 +1,10 @@ +# Secret containing the LUKS key for the restored volume +# IMPORTANT: This must use the same LUKS key as the original volume +# The secret name must follow the pattern: ${pvc-name}-luks-key +# In this case: my-restored-luks-volume-luks-key +apiVersion: v1 +kind: Secret +metadata: + name: my-restored-luks-volume-luks-key +stringData: + luksKey: "hDEKFgEZgmpuppShPG7HailSFBsy8MzlvlhALvqk0+2jTrcKrFmtttoF5IGlLVoLt/jpaWnk/kcl7JxnsZ3xQjEcYumv4WkwOv77x+c2C/kyyldTNRaCaVHG9fW9n6oicoWzsyUWcmu0d+JOorGZ792lsS9Q5gXlCg5BD2x1MoVVr8hTQArFfUX6NuHF1o0v/EGHU0A5O5wiNnqpdDjf9r56rPt0H290Nr6Y5Ijb5RTIoJFT5ww5XocrvLlR/GiXRYgzeISfbfyIr8FpfRKmjPTZdLBSXPMMdHJNcPIlRG+DfnBaTKkIFwiWXjxXZss71IKibEM7Qfjwka0KFyufwA==" diff --git a/examples/kubernetes/volume-snapshots/README.md b/examples/kubernetes/volume-snapshots/README.md new file mode 100644 index 00000000..302729ad --- /dev/null +++ b/examples/kubernetes/volume-snapshots/README.md @@ -0,0 +1,62 @@ +# Volume Snapshots Example + +This example demonstrates how to create and restore volumes from snapshots using the cloudscale.ch CSI driver. + +## Prerequisites + +Before using snapshots, ensure your cluster has the VolumeSnapshot CRDs and snapshot controller installed. +See the [main README](../../README.md#prerequisites-for-snapshot-support) for installation instructions. + +## Workflow + +1. **Create VolumeSnapshotClass** (one-time setup, required before creating snapshots): + ```bash + kubectl apply -f volumesnapshotclass.yaml + ``` + + **Note:** VolumeSnapshotClass is currently not deployed automatically with the driver. You must create it manually. + This may change in future releases where it will be deployed automatically (similar to StorageClass). + +2. **Create original volume and pod** (optional, for testing): + ```bash + kubectl apply -f original-pvc.yaml + kubectl apply -f original-pod.yaml + ``` + +3. **Create snapshot**: + ```bash + kubectl apply -f volumesnapshot.yaml + ``` + +4. **Create restored volume and pod**: + ```bash + kubectl apply -f restored-pvc.yaml + kubectl apply -f restored-pod.yaml + ``` + +## Verification + +Check snapshot status: +```bash +kubectl get volumesnapshot +kubectl describe volumesnapshot/my-snapshot +``` + +Check restored volume: +```bash +kubectl get pvc +kubectl get pod +``` + +**LUKS volumes**: For LUKS-encrypted volumes, see the [LUKS snapshot example](../luks-encrypted-volumes/). + +## Cleanup + +```bash +kubectl delete -f restored-pod.yaml +kubectl delete -f restored-pvc.yaml +kubectl delete -f volumesnapshot.yaml +kubectl delete -f original-pod.yaml +kubectl delete -f original-pvc.yaml +# Note: VolumeSnapshotClass is typically not deleted as it's a cluster resource +``` diff --git a/examples/kubernetes/volume-snapshots/original-pod.yaml b/examples/kubernetes/volume-snapshots/original-pod.yaml new file mode 100644 index 00000000..b0b48808 --- /dev/null +++ b/examples/kubernetes/volume-snapshots/original-pod.yaml @@ -0,0 +1,17 @@ +# Pod using the original volume (optional, for testing) +kind: Pod +apiVersion: v1 +metadata: + name: my-app +spec: + containers: + - name: my-frontend + image: busybox + volumeMounts: + - mountPath: "/data" + name: my-cloudscale-volume + command: [ "sleep", "1000000" ] + volumes: + - name: my-cloudscale-volume + persistentVolumeClaim: + claimName: my-volume diff --git a/examples/kubernetes/volume-snapshots/original-pvc.yaml b/examples/kubernetes/volume-snapshots/original-pvc.yaml new file mode 100644 index 00000000..24a198e5 --- /dev/null +++ b/examples/kubernetes/volume-snapshots/original-pvc.yaml @@ -0,0 +1,12 @@ +# Original PersistentVolumeClaim that will be snapshotted +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: my-volume +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi + storageClassName: cloudscale-volume-ssd diff --git a/examples/kubernetes/volume-snapshots/restored-pod.yaml b/examples/kubernetes/volume-snapshots/restored-pod.yaml new file mode 100644 index 00000000..4bdd38aa --- /dev/null +++ b/examples/kubernetes/volume-snapshots/restored-pod.yaml @@ -0,0 +1,17 @@ +# Pod using the restored volume (optional, for testing) +kind: Pod +apiVersion: v1 +metadata: + name: my-restored-app +spec: + containers: + - name: my-frontend + image: busybox + volumeMounts: + - mountPath: "/data" + name: my-cloudscale-volume + command: [ "sleep", "1000000" ] + volumes: + - name: my-cloudscale-volume + persistentVolumeClaim: + claimName: my-restored-volume diff --git a/examples/kubernetes/volume-snapshots/restored-pvc.yaml b/examples/kubernetes/volume-snapshots/restored-pvc.yaml new file mode 100644 index 00000000..5250f894 --- /dev/null +++ b/examples/kubernetes/volume-snapshots/restored-pvc.yaml @@ -0,0 +1,17 @@ +# PersistentVolumeClaim restored from the snapshot +# Note: The restored volume must have the same size as the snapshot (5Gi in this example) +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: my-restored-volume +spec: + accessModes: + - ReadWriteOnce + storageClassName: cloudscale-volume-ssd + resources: + requests: + storage: 5Gi + dataSource: + name: my-snapshot + kind: VolumeSnapshot + apiGroup: snapshot.storage.k8s.io diff --git a/examples/kubernetes/volume-snapshots/volumesnapshot.yaml b/examples/kubernetes/volume-snapshots/volumesnapshot.yaml new file mode 100644 index 00000000..dade8aca --- /dev/null +++ b/examples/kubernetes/volume-snapshots/volumesnapshot.yaml @@ -0,0 +1,10 @@ +# VolumeSnapshot creates a snapshot of the original volume +# Make sure the VolumeSnapshotClass is created first (volumesnapshotclass.yaml) +apiVersion: snapshot.storage.k8s.io/v1 +kind: VolumeSnapshot +metadata: + name: my-snapshot +spec: + volumeSnapshotClassName: cloudscale-snapshots + source: + persistentVolumeClaimName: my-volume diff --git a/examples/kubernetes/volume-snapshots/volumesnapshotclass.yaml b/examples/kubernetes/volume-snapshots/volumesnapshotclass.yaml new file mode 100644 index 00000000..f05b880c --- /dev/null +++ b/examples/kubernetes/volume-snapshots/volumesnapshotclass.yaml @@ -0,0 +1,9 @@ +# VolumeSnapshotClass defines how snapshots should be created for the cloudscale.ch CSI driver. +# This is a cluster-level resource that needs to be created once before using snapshots. +# Note: This may be deployed automatically with the driver in future releases. +apiVersion: snapshot.storage.k8s.io/v1 +kind: VolumeSnapshotClass +metadata: + name: cloudscale-snapshots +driver: csi.cloudscale.ch +deletionPolicy: Delete From ec91d70ab8b07300d422b38859ab7a7f7ccc196e Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Thu, 22 Jan 2026 22:53:45 +0100 Subject: [PATCH 12/26] remove volume group snapshot permissions --- charts/csi-cloudscale/templates/rbac.yaml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/charts/csi-cloudscale/templates/rbac.yaml b/charts/csi-cloudscale/templates/rbac.yaml index 79059b70..e0ecec16 100644 --- a/charts/csi-cloudscale/templates/rbac.yaml +++ b/charts/csi-cloudscale/templates/rbac.yaml @@ -28,15 +28,6 @@ rules: - apiGroups: [ "snapshot.storage.k8s.io" ] resources: [ "volumesnapshotclasses" ] verbs: [ "get", "list", "watch" ] - - apiGroups: [ "groupsnapshot.storage.k8s.io" ] # todo: are we sure about this? snapshot groups are not supported - resources: [ "volumegroupsnapshotclasses" ] - verbs: [ "get", "list", "watch" ] - - apiGroups: [ "groupsnapshot.storage.k8s.io" ] - resources: [ "volumegroupsnapshotcontents" ] - verbs: [ "get", "list", "watch", "update", "patch" ] - - apiGroups: [ "groupsnapshot.storage.k8s.io" ] - resources: [ "volumegroupsnapshotcontents/status" ] - verbs: [ "update", "patch" ] - apiGroups: [ "coordination.k8s.io" ] resources: [ "leases" ] verbs: [ "get", "list", "watch", "create", "update", "patch", "delete" ] From 6180be83160f85b99bc818efb181db4cbcf0acc1 Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Mon, 26 Jan 2026 15:02:25 +0100 Subject: [PATCH 13/26] add propper permission role and bindings --- charts/csi-cloudscale/templates/rbac.yaml | 46 +++++++++++++++++------ 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/charts/csi-cloudscale/templates/rbac.yaml b/charts/csi-cloudscale/templates/rbac.yaml index e0ecec16..aa6cb9a0 100644 --- a/charts/csi-cloudscale/templates/rbac.yaml +++ b/charts/csi-cloudscale/templates/rbac.yaml @@ -16,18 +16,6 @@ rules: - apiGroups: [""] resources: ["events"] verbs: ["list", "watch", "create", "update", "patch"] - - apiGroups: ["snapshot.storage.k8s.io"] - resources: ["volumesnapshots"] - verbs: [ "get", "list", "watch", "update" ] - - apiGroups: ["snapshot.storage.k8s.io"] - resources: ["volumesnapshotcontents"] - verbs: [ "get", "list", "watch", "update", "patch" ] - - apiGroups: [ "snapshot.storage.k8s.io" ] - resources: [ "volumesnapshotcontents/status" ] - verbs: [ "update", "patch" ] - - apiGroups: [ "snapshot.storage.k8s.io" ] - resources: [ "volumesnapshotclasses" ] - verbs: [ "get", "list", "watch" ] - apiGroups: [ "coordination.k8s.io" ] resources: [ "leases" ] verbs: [ "get", "list", "watch", "create", "update", "patch", "delete" ] @@ -61,6 +49,27 @@ rules: --- kind: ClusterRole apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ include "csi-cloudscale.driver-name" . }}-snapshotter-role +rules: + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshots"] + verbs: [ "get", "list", "watch", "update" ] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshotcontents"] + verbs: [ "get", "list", "watch", "update", "patch" ] + - apiGroups: [ "snapshot.storage.k8s.io" ] + resources: [ "volumesnapshotcontents/status" ] + verbs: [ "update", "patch" ] + - apiGroups: [ "snapshot.storage.k8s.io" ] + resources: [ "volumesnapshotclasses" ] + verbs: [ "get", "list", "watch" ] + - apiGroups: [""] + resources: ["events"] + verbs: ["list", "watch", "create", "update", "patch"] +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 metadata: name: {{ include "csi-cloudscale.driver-name" . }}-resizer-role rules: @@ -108,6 +117,19 @@ roleRef: --- kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ include "csi-cloudscale.driver-name" . }}-snapshotter-binding +subjects: + - kind: ServiceAccount + name: {{ include "csi-cloudscale.controller-service-account-name" . }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ include "csi-cloudscale.driver-name" . }}-snapshotter-role + apiGroup: rbac.authorization.k8s.io +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 metadata: name: {{ include "csi-cloudscale.driver-name" . }}-resizer-binding subjects: From 1186d233638e04f68bf8ab1d292b6e26b915b87d Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Mon, 26 Jan 2026 15:14:38 +0100 Subject: [PATCH 14/26] explain reason for DynamicSnapshotClient --- test/kubernetes/integration_test.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/kubernetes/integration_test.go b/test/kubernetes/integration_test.go index c0d5a570..622845ce 100644 --- a/test/kubernetes/integration_test.go +++ b/test/kubernetes/integration_test.go @@ -2134,7 +2134,12 @@ func makeKubernetesPVCsFromSnapshot(t *testing.T, pod TestPodDescriptor, snapsho return pvcs } -// getDynamicSnapshotClient returns a dynamic client for working with VolumeSnapshots +// getDynamicSnapshotClient returns a dynamic client for working with VolumeSnapshots. +// VolumeSnapshot is a Custom Resource Definition (CRD), not a built-in Kubernetes resource. +// Unlike built-in resources (Pods, PVCs, etc.) which have typed clientsets, CRDs require +// a dynamic client that works with unstructured objects. The external-snapshotter client +// package provides the types (snapshotv1.VolumeSnapshot) but not a full typed clientset, +// so we use the dynamic client with GroupVersionResource to interact with the API. func getDynamicSnapshotClient(t *testing.T) dynamic.Interface { dynamicClient, err := dynamic.NewForConfig(config) if err != nil { From c3e163c43b3973ae98c190ead3113c6e0dffc904 Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Mon, 26 Jan 2026 15:41:07 +0100 Subject: [PATCH 15/26] get rid of dynamicClient, use typed clientset from external-snapshotter --- test/kubernetes/integration_test.go | 99 +++++------------------------ 1 file changed, 16 insertions(+), 83 deletions(-) diff --git a/test/kubernetes/integration_test.go b/test/kubernetes/integration_test.go index 622845ce..0f2eb7d3 100644 --- a/test/kubernetes/integration_test.go +++ b/test/kubernetes/integration_test.go @@ -22,20 +22,17 @@ import ( "github.com/cloudscale-ch/csi-cloudscale/driver" "github.com/stretchr/testify/assert" "golang.org/x/oauth2" - "k8s.io/client-go/dynamic" "k8s.io/client-go/rest" snapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v6/apis/volumesnapshot/v1" + snapshotclientset "github.com/kubernetes-csi/external-snapshotter/client/v6/clientset/versioned" appsv1 "k8s.io/api/apps/v1" "k8s.io/api/core/v1" kubeerrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/selection" "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/cache" @@ -81,6 +78,7 @@ type DiskInfo struct { var ( client kubernetes.Interface + snapshotClient snapshotclientset.Interface config *rest.Config cloudscaleClient *cloudscale.Client ) @@ -463,7 +461,6 @@ func TestPod_Single_SSD_Luks_Volume_Snapshot(t *testing.T) { waitCloudscaleVolumeDeleted(t, pvc.Spec.VolumeName) } - func TestPod_Snapshot_Size_Validation(t *testing.T) { // Test that snapshot size validation works correctly podDescriptor := TestPodDescriptor{ @@ -501,7 +498,6 @@ func TestPod_Snapshot_Size_Validation(t *testing.T) { cloudscaleSnapshot := getCloudscaleVolumeSnapshot(t, snapshotHandle) assert.Equal(t, 5, cloudscaleSnapshot.SizeGB) - // Attempt to restore with smaller size (should fail) // Create PVC directly without pod (since it won't bind) smallerPVCName := "csi-pod-snapshot-size-pvc-smaller" @@ -1170,6 +1166,12 @@ func setup() error { return err } + // create the snapshot clientset for working with VolumeSnapshot CRDs + snapshotClient, err = snapshotclientset.NewForConfig(config) + if err != nil { + return err + } + // create test namespace _, err = client.CoreV1().Namespaces().Create( context.Background(), @@ -1922,51 +1924,22 @@ func makeKubernetesVolumeSnapshot(t *testing.T, snapshotName string, pvcName str } t.Logf("Creating volume snapshot %v", snapshotName) - snapshotClient := getDynamicSnapshotClient(t) - - obj, err := runtime.DefaultUnstructuredConverter.ToUnstructured(snapshot) - if err != nil { - t.Fatal(err) - } - - unstructuredSnapshot := &unstructured.Unstructured{Object: obj} - - gvr := schema.GroupVersionResource{ - Group: "snapshot.storage.k8s.io", - Version: "v1", - Resource: "volumesnapshots", - } - - created, err := snapshotClient.Resource(gvr).Namespace(namespace).Create( + created, err := snapshotClient.SnapshotV1().VolumeSnapshots(namespace).Create( context.Background(), - unstructuredSnapshot, + snapshot, metav1.CreateOptions{}, ) if err != nil { t.Fatal(err) } - var result snapshotv1.VolumeSnapshot - err = runtime.DefaultUnstructuredConverter.FromUnstructured(created.Object, &result) - if err != nil { - t.Fatal(err) - } - - return &result + return created } // deleteKubernetesVolumeSnapshot deletes the VolumeSnapshot with the given name func deleteKubernetesVolumeSnapshot(t *testing.T, snapshotName string) { t.Logf("Deleting volume snapshot %v", snapshotName) - snapshotClient := getDynamicSnapshotClient(t) - - gvr := schema.GroupVersionResource{ - Group: "snapshot.storage.k8s.io", - Version: "v1", - Resource: "volumesnapshots", - } - - err := snapshotClient.Resource(gvr).Namespace(namespace).Delete( + err := snapshotClient.SnapshotV1().VolumeSnapshots(namespace).Delete( context.Background(), snapshotName, metav1.DeleteOptions{}, @@ -2000,26 +1973,13 @@ func waitForVolumeSnapshot(t *testing.T, client kubernetes.Interface, name strin // getVolumeSnapshot retrieves the VolumeSnapshot with the given name func getVolumeSnapshot(t *testing.T, client kubernetes.Interface, name string) *snapshotv1.VolumeSnapshot { - snapshotClient := getDynamicSnapshotClient(t) - - gvr := schema.GroupVersionResource{ - Group: "snapshot.storage.k8s.io", - Version: "v1", - Resource: "volumesnapshots", - } - - unstructuredSnapshot, err := snapshotClient.Resource(gvr).Namespace(namespace).Get( + snapshot, err := snapshotClient.SnapshotV1().VolumeSnapshots(namespace).Get( context.Background(), name, metav1.GetOptions{}, ) assert.NoError(t, err) - - var snapshot snapshotv1.VolumeSnapshot - err = runtime.DefaultUnstructuredConverter.FromUnstructured(unstructuredSnapshot.Object, &snapshot) - assert.NoError(t, err) - - return &snapshot + return snapshot } func getCloudscaleVolumeSnapshot(t *testing.T, snapshotHandle string) *cloudscale.VolumeSnapshot { @@ -2066,26 +2026,13 @@ func waitCloudscaleVolumeSnapshotDeleted(t *testing.T, snapshotHandle string) { // getVolumeSnapshotContent retrieves the VolumeSnapshotContent for a VolumeSnapshot func getVolumeSnapshotContent(t *testing.T, contentName string) *snapshotv1.VolumeSnapshotContent { - snapshotClient := getDynamicSnapshotClient(t) - - gvr := schema.GroupVersionResource{ - Group: "snapshot.storage.k8s.io", - Version: "v1", - Resource: "volumesnapshotcontents", - } - - unstructuredContent, err := snapshotClient.Resource(gvr).Get( + content, err := snapshotClient.SnapshotV1().VolumeSnapshotContents().Get( context.Background(), contentName, metav1.GetOptions{}, ) assert.NoError(t, err) - - var content snapshotv1.VolumeSnapshotContent - err = runtime.DefaultUnstructuredConverter.FromUnstructured(unstructuredContent.Object, &content) - assert.NoError(t, err) - - return &content + return content } // creates kubernetes pvcs from the given TestPodDescriptor, restoring from a snapshot @@ -2133,17 +2080,3 @@ func makeKubernetesPVCsFromSnapshot(t *testing.T, pod TestPodDescriptor, snapsho return pvcs } - -// getDynamicSnapshotClient returns a dynamic client for working with VolumeSnapshots. -// VolumeSnapshot is a Custom Resource Definition (CRD), not a built-in Kubernetes resource. -// Unlike built-in resources (Pods, PVCs, etc.) which have typed clientsets, CRDs require -// a dynamic client that works with unstructured objects. The external-snapshotter client -// package provides the types (snapshotv1.VolumeSnapshot) but not a full typed clientset, -// so we use the dynamic client with GroupVersionResource to interact with the API. -func getDynamicSnapshotClient(t *testing.T) dynamic.Interface { - dynamicClient, err := dynamic.NewForConfig(config) - if err != nil { - t.Fatal(err) - } - return dynamicClient -} From 73a757f90a27acf8b117130751fd2344d5b058d3 Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Mon, 26 Jan 2026 18:22:55 +0100 Subject: [PATCH 16/26] use errors.As to prevent issues with wraped errors --- driver/controller.go | 29 +++++++++++++++-------------- test/kubernetes/integration_test.go | 6 ++++-- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/driver/controller.go b/driver/controller.go index e29c0a28..ebc3528c 100644 --- a/driver/controller.go +++ b/driver/controller.go @@ -19,6 +19,7 @@ package driver import ( "context" + "errors" "fmt" "net/http" "regexp" @@ -222,8 +223,8 @@ func (d *Driver) createVolumeFromSnapshot(ctx context.Context, req *csi.CreateVo // Verify snapshot exists and get its properties, must return NotFound when snapshot does not exist. snapshot, err := d.cloudscaleClient.VolumeSnapshots.Get(ctx, sourceSnapshotID) if err != nil { - errorResponse, ok := err.(*cloudscale.ErrorResponse) - if ok { + var errorResponse *cloudscale.ErrorResponse + if errors.As(err, &errorResponse) { if errorResponse.StatusCode == http.StatusNotFound { return nil, status.Errorf(codes.NotFound, "source snapshot %s not found", sourceSnapshotID) } @@ -382,8 +383,8 @@ func (d *Driver) DeleteVolume(ctx context.Context, req *csi.DeleteVolumeRequest) err := d.cloudscaleClient.Volumes.Delete(ctx, req.VolumeId) if err != nil { - errorResponse, ok := err.(*cloudscale.ErrorResponse) - if ok { + var errorResponse *cloudscale.ErrorResponse + if errors.As(err, &errorResponse) { ll.WithFields(logrus.Fields{ "status_code": errorResponse.StatusCode, "error": err, @@ -486,8 +487,8 @@ func (d *Driver) ControllerUnpublishVolume(ctx context.Context, req *csi.Control // check if volume exist before trying to detach it volume, err := d.cloudscaleClient.Volumes.Get(ctx, req.VolumeId) if err != nil { - errorResponse, ok := err.(*cloudscale.ErrorResponse) - if ok { + var errorResponse *cloudscale.ErrorResponse + if errors.As(err, &errorResponse) { if errorResponse.StatusCode == http.StatusNotFound { ll.Info("assuming volume is detached because it does not exist") return &csi.ControllerUnpublishVolumeResponse{}, nil @@ -679,8 +680,8 @@ func (d *Driver) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequ ll.Info("find existing volume snapshots with same name") snapshots, err := d.cloudscaleClient.VolumeSnapshots.List(ctx, cloudscale.WithNameFilter(req.Name)) if err != nil { - errorResponse, ok := err.(*cloudscale.ErrorResponse) - if ok { + var errorResponse *cloudscale.ErrorResponse + if errors.As(err, &errorResponse) { ll.WithFields(logrus.Fields{ "status_code": errorResponse.StatusCode, "error": err, @@ -724,8 +725,8 @@ func (d *Driver) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequ ll.WithField("volume_snapshot_create_request", volumeSnapshotCreateRequest).Info("creating volume snapshot") snapshot, err := d.cloudscaleClient.VolumeSnapshots.Create(ctx, volumeSnapshotCreateRequest) if err != nil { - errorResponse, ok := err.(*cloudscale.ErrorResponse) - if ok { + var errorResponse *cloudscale.ErrorResponse + if errors.As(err, &errorResponse) { ll.WithFields(logrus.Fields{ "status_code": errorResponse.StatusCode, "error": err, @@ -776,8 +777,8 @@ func (d *Driver) DeleteSnapshot(ctx context.Context, req *csi.DeleteSnapshotRequ // Cloudscale handles the deletion asynchronously. The operation is idempotent. err := d.cloudscaleClient.VolumeSnapshots.Delete(ctx, req.SnapshotId) if err != nil { - errorResponse, ok := err.(*cloudscale.ErrorResponse) - if ok { + var errorResponse *cloudscale.ErrorResponse + if errors.As(err, &errorResponse) { if errorResponse.StatusCode == http.StatusNotFound { // To make it idempotent, the volume might already have been // deleted, so a 404 is ok. @@ -981,8 +982,8 @@ func validateLuksCapabilities(caps []*csi.VolumeCapability) []string { } func reraiseNotFound(err error, log *logrus.Entry, operation string) error { - errorResponse, ok := err.(*cloudscale.ErrorResponse) - if ok { + var errorResponse *cloudscale.ErrorResponse + if errors.As(err, &errorResponse) { lt := log.WithFields(logrus.Fields{ "error": err, "errorResponse": errorResponse, diff --git a/test/kubernetes/integration_test.go b/test/kubernetes/integration_test.go index 0f2eb7d3..0af09f2c 100644 --- a/test/kubernetes/integration_test.go +++ b/test/kubernetes/integration_test.go @@ -1645,7 +1645,8 @@ func waitCloudscaleVolumeDeleted(t *testing.T, volumeName string) { return } if err != nil { - if cloudscaleErr, ok := err.(*cloudscale.ErrorResponse); ok { + var cloudscaleErr *cloudscale.ErrorResponse + if errors.As(err, &cloudscaleErr) { if cloudscaleErr.StatusCode == http.StatusNotFound { t.Logf("volume %v is deleted on cloudscale", volumeName) return @@ -2004,7 +2005,8 @@ func waitCloudscaleVolumeSnapshotDeleted(t *testing.T, snapshotHandle string) { cancel() if err != nil { - if cloudscaleErr, ok := err.(*cloudscale.ErrorResponse); ok { + var cloudscaleErr *cloudscale.ErrorResponse + if errors.As(err, &cloudscaleErr) { if cloudscaleErr.StatusCode == http.StatusNotFound { t.Logf("snapshot %v is deleted on cloudscale", snapshotHandle) return From d376c0b1b751d36fd2feeffba29c814349d68804 Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Tue, 27 Jan 2026 11:39:32 +0100 Subject: [PATCH 17/26] throw InvalidArgument instead of warning for storageType missmatch --- driver/controller.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/driver/controller.go b/driver/controller.go index ebc3528c..44e062f1 100644 --- a/driver/controller.go +++ b/driver/controller.go @@ -282,12 +282,13 @@ func (d *Driver) createVolumeFromSnapshot(ctx context.Context, req *csi.CreateVo } } - // cloudscale does not support to change storage type, so we warn if parameters are specified that will be ignored + // cloudscale does not support changing storage type when restoring from snapshot. + // The restored volume must have the same storage type as the snapshot. if storageType := req.Parameters[StorageTypeAttribute]; storageType != "" && storageType != snapshot.Volume.Type { - ll.WithFields(logrus.Fields{ - "requested_type": storageType, - "snapshot_volume_type": snapshot.Volume.Type, - }).Warn("storage type parameter ignored when creating from snapshot") + return nil, status.Errorf(codes.InvalidArgument, + "requested storage type %s does not match snapshot storage type %s. "+ + "Storage type cannot be changed when creating a volume from a snapshot", + storageType, snapshot.Volume.Type) } luksEncrypted := "false" From 760a5c78284195de0aed71b5c2eb8452945c17db Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Tue, 27 Jan 2026 13:11:40 +0100 Subject: [PATCH 18/26] improve error handling of existing snapshots and log levels --- driver/controller.go | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/driver/controller.go b/driver/controller.go index 44e062f1..1389d86c 100644 --- a/driver/controller.go +++ b/driver/controller.go @@ -283,7 +283,7 @@ func (d *Driver) createVolumeFromSnapshot(ctx context.Context, req *csi.CreateVo } // cloudscale does not support changing storage type when restoring from snapshot. - // The restored volume must have the same storage type as the snapshot. + // The restored volume must have the same storage type as the source volume of the snapshot. if storageType := req.Parameters[StorageTypeAttribute]; storageType != "" && storageType != snapshot.Volume.Type { return nil, status.Errorf(codes.InvalidArgument, "requested storage type %s does not match snapshot storage type %s. "+ @@ -386,27 +386,30 @@ func (d *Driver) DeleteVolume(ctx context.Context, req *csi.DeleteVolumeRequest) if err != nil { var errorResponse *cloudscale.ErrorResponse if errors.As(err, &errorResponse) { - ll.WithFields(logrus.Fields{ - "status_code": errorResponse.StatusCode, - "error": err, - }).Warn("cloudscale API returned error during volume deletion") - if errorResponse.StatusCode == http.StatusNotFound { // To make it idempotent, the volume might already have been // deleted, so a 404 is ok. ll.WithFields(logrus.Fields{ "error": err, "resp": errorResponse, - }).Warn("assuming volume is already deleted") + }).Debug("assuming volume is already deleted") return &csi.DeleteVolumeResponse{}, nil } - // Check if the error message indicates snapshots exist - if strings.Contains(err.Error(), "Snapshots exist") || - strings.Contains(err.Error(), "snapshot") { - ll.Warn("volume has snapshots, cannot delete yet") - return nil, status.Error(codes.FailedPrecondition, - "volume has existing snapshots that must be deleted first") + ll.WithFields(logrus.Fields{ + "status_code": errorResponse.StatusCode, + "error": err, + }).Debug("cloudscale API returned error during volume deletion") + + // Check if the error indicates snapshots exist (HTTP 400 with specific error message) + // The API returns HTTP 400 with: {"detail": "Snapshots exist for this volume. The snapshot must be deleted before the volume can be deleted."} + if errorResponse.StatusCode == http.StatusBadRequest && + strings.Contains(err.Error(), "Snapshots exist for this volume. The snapshot must be deleted before the volume can be deleted.") { + ll.WithFields(logrus.Fields{ + "error": err, + "resp": errorResponse, + }).Warn("volume has snapshots, cannot delete yet") + return nil, status.Error(codes.FailedPrecondition, "volume has existing snapshots that must be deleted first") } } return nil, err @@ -781,12 +784,12 @@ func (d *Driver) DeleteSnapshot(ctx context.Context, req *csi.DeleteSnapshotRequ var errorResponse *cloudscale.ErrorResponse if errors.As(err, &errorResponse) { if errorResponse.StatusCode == http.StatusNotFound { - // To make it idempotent, the volume might already have been + // To make it idempotent, the snapshot might already have been // deleted, so a 404 is ok. ll.WithFields(logrus.Fields{ "error": err, "resp": errorResponse, - }).Warn("assuming snapshot is already deleted") + }).Debug("assuming snapshot is already deleted") return &csi.DeleteSnapshotResponse{}, nil } } From 40920bef3df2e7a1410865f9e9e1147c0e69f1f5 Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Tue, 27 Jan 2026 13:27:05 +0100 Subject: [PATCH 19/26] simplify createdAt parsing --- driver/controller.go | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/driver/controller.go b/driver/controller.go index 1389d86c..22800a93 100644 --- a/driver/controller.go +++ b/driver/controller.go @@ -401,8 +401,7 @@ func (d *Driver) DeleteVolume(ctx context.Context, req *csi.DeleteVolumeRequest) "error": err, }).Debug("cloudscale API returned error during volume deletion") - // Check if the error indicates snapshots exist (HTTP 400 with specific error message) - // The API returns HTTP 400 with: {"detail": "Snapshots exist for this volume. The snapshot must be deleted before the volume can be deleted."} + // Check if the error indicates snapshots exist (HTTP 400 with error message "Snapshots exist for this volume") if errorResponse.StatusCode == http.StatusBadRequest && strings.Contains(err.Error(), "Snapshots exist for this volume. The snapshot must be deleted before the volume can be deleted.") { ll.WithFields(logrus.Fields{ @@ -696,12 +695,11 @@ func (d *Driver) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequ for _, snapshot := range snapshots { if snapshot.Volume.UUID == req.SourceVolumeId { - creationTime := timestamppb.Now() - if snapshot.CreatedAt != "" { - if t, err := time.Parse(time.RFC3339, snapshot.CreatedAt); err == nil { - creationTime = timestamppb.New(t) - } + t, err := time.Parse(time.RFC3339, snapshot.CreatedAt) + if err != nil { + return nil, status.Errorf(codes.Internal, "failed to parse snapshot CreatedAt timestamp %q: %v", snapshot.CreatedAt, err) } + creationTime := timestamppb.New(t) return &csi.CreateSnapshotResponse{ Snapshot: &csi.Snapshot{ @@ -743,12 +741,11 @@ func (d *Driver) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequ return nil, status.Errorf(codes.Internal, "failed to create snapshot: %v", err) } - creationTime := timestamppb.Now() - if snapshot.CreatedAt != "" { - if t, err := time.Parse(time.RFC3339, snapshot.CreatedAt); err == nil { - creationTime = timestamppb.New(t) - } + t, err := time.Parse(time.RFC3339, snapshot.CreatedAt) + if err != nil { + return nil, status.Errorf(codes.Internal, "failed to parse snapshot CreatedAt timestamp %q: %v", snapshot.CreatedAt, err) } + creationTime := timestamppb.New(t) resp := &csi.CreateSnapshotResponse{ Snapshot: &csi.Snapshot{ From f044acff8b63f9d3c7c0b776a33f1a1845b46619 Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Tue, 27 Jan 2026 17:18:18 +0100 Subject: [PATCH 20/26] shorten compared error message --- driver/controller.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/driver/controller.go b/driver/controller.go index 22800a93..56f2e3da 100644 --- a/driver/controller.go +++ b/driver/controller.go @@ -402,8 +402,7 @@ func (d *Driver) DeleteVolume(ctx context.Context, req *csi.DeleteVolumeRequest) }).Debug("cloudscale API returned error during volume deletion") // Check if the error indicates snapshots exist (HTTP 400 with error message "Snapshots exist for this volume") - if errorResponse.StatusCode == http.StatusBadRequest && - strings.Contains(err.Error(), "Snapshots exist for this volume. The snapshot must be deleted before the volume can be deleted.") { + if errorResponse.StatusCode == http.StatusBadRequest && strings.Contains(strings.ToLower(err.Error()), strings.ToLower("Snapshots exist for this volume")) { ll.WithFields(logrus.Fields{ "error": err, "resp": errorResponse, From 567d5611ad9dec7b89a1cd80551a241ef109f636 Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Tue, 27 Jan 2026 18:20:13 +0100 Subject: [PATCH 21/26] replace custom CRD and snapshot controller installation with kustomize --- README.md | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index a332a65a..78d93e8e 100644 --- a/README.md +++ b/README.md @@ -77,16 +77,10 @@ To use CSI snapshots with this driver, your cluster must have the VolumeSnapshot Note: Some Kubernetes distributions already include these CRDs and controllers. You only need to apply them manually if your cluster does not provide them. -Install the snapshot resources: +Install the snapshot resources using kustomize (recommended): ``` -# Create the necessary CRDs -kubectl apply -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.4.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshotclasses.yaml -kubectl apply -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.4.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshotcontents.yaml -kubectl apply -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.4.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshots.yaml - -# Install snapshot controller with RBAC -kubectl apply -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.4.0/deploy/kubernetes/snapshot-controller/rbac-snapshot-controller.yaml -kubectl apply -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.4.0/deploy/kubernetes/snapshot-controller/setup-snapshot-controller.yaml +kubectl apply -k https://github.com/kubernetes-csi/external-snapshotter/client/config/crd?ref=v8.4.0 +kubectl apply -k https://github.com/kubernetes-csi/external-snapshotter/deploy/kubernetes/snapshot-controller?ref=v8.4.0 ``` ### Kubernetes Compatibility From c8de2779f1f6e77765f0f912876bec301a543ea3 Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Thu, 29 Jan 2026 10:11:00 +0100 Subject: [PATCH 22/26] ignoring storage type parameter, only add debug information --- driver/controller.go | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/driver/controller.go b/driver/controller.go index 56f2e3da..6559a941 100644 --- a/driver/controller.go +++ b/driver/controller.go @@ -283,12 +283,10 @@ func (d *Driver) createVolumeFromSnapshot(ctx context.Context, req *csi.CreateVo } // cloudscale does not support changing storage type when restoring from snapshot. - // The restored volume must have the same storage type as the source volume of the snapshot. - if storageType := req.Parameters[StorageTypeAttribute]; storageType != "" && storageType != snapshot.Volume.Type { - return nil, status.Errorf(codes.InvalidArgument, - "requested storage type %s does not match snapshot storage type %s. "+ - "Storage type cannot be changed when creating a volume from a snapshot", - storageType, snapshot.Volume.Type) + // The restored volume type is inherited from the source volume of the snapshot. + if storageType := req.Parameters[StorageTypeAttribute]; storageType != "" { + ll.WithField("requested_type", storageType). + Debug("ignoring storage type parameter when restoring from snapshot") } luksEncrypted := "false" From da0b9497372fa07faeb52f469abd5dc6a876aea4 Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Thu, 29 Jan 2026 10:11:59 +0100 Subject: [PATCH 23/26] setup instructions for volumesnapshotclass.yaml --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 78d93e8e..c228338c 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,8 @@ Install the snapshot resources using kustomize (recommended): ``` kubectl apply -k https://github.com/kubernetes-csi/external-snapshotter/client/config/crd?ref=v8.4.0 kubectl apply -k https://github.com/kubernetes-csi/external-snapshotter/deploy/kubernetes/snapshot-controller?ref=v8.4.0 +# setup volumesnapshotclass in your cluster +kubectl apply -f examples/kubernetes/volume-snapshots/volumesnapshotclass.yaml ``` ### Kubernetes Compatibility From 9436917031138cb48ce3bfc565052365062e6139 Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Thu, 29 Jan 2026 10:13:01 +0100 Subject: [PATCH 24/26] remove leader-election config, as we run it on single replica --- charts/csi-cloudscale/templates/statefulset.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/charts/csi-cloudscale/templates/statefulset.yaml b/charts/csi-cloudscale/templates/statefulset.yaml index 6f315449..c2a0c27b 100644 --- a/charts/csi-cloudscale/templates/statefulset.yaml +++ b/charts/csi-cloudscale/templates/statefulset.yaml @@ -76,7 +76,6 @@ spec: image: "{{ .Values.snapshotter.image.registry }}/{{ .Values.snapshotter.image.repository }}:{{ .Values.snapshotter.image.tag }}" args: - "--csi-address=$(CSI_ENDPOINT)" - - "--leader-election=true" - "--v=5" env: - name: CSI_ENDPOINT From 23d371c9168a64bb7580676e0e284be3155b2569 Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Thu, 29 Jan 2026 12:25:59 +0100 Subject: [PATCH 25/26] add documentation for luks examples --- .../luks-encrypted-volumes/README.md | 135 ++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 examples/kubernetes/luks-encrypted-volumes/README.md diff --git a/examples/kubernetes/luks-encrypted-volumes/README.md b/examples/kubernetes/luks-encrypted-volumes/README.md new file mode 100644 index 00000000..3d7ef3b0 --- /dev/null +++ b/examples/kubernetes/luks-encrypted-volumes/README.md @@ -0,0 +1,135 @@ +# LUKS Encrypted Volumes Example + +This example demonstrates how to create and restore LUKS-encrypted volumes from snapshots using the cloudscale.ch CSI driver. + +## Prerequisites + +1. **Snapshot CRDs installed**: See the [main README](../../README.md#prerequisites-for-snapshot-support) +2. **VolumeSnapshotClass created**: See the [volume-snapshots example](../volume-snapshots/) +3. **LUKS storage classes available**: `cloudscale-volume-ssd-luks` or `cloudscale-volume-bulk-luks` + +## Workflow + +### 1. Create Original LUKS Volume + +```bash +# Create the LUKS secret (contains the encryption key) +kubectl apply -f luks-secret.yaml + +# Create the PVC (this will create a LUKS-encrypted volume) +kubectl apply -f luks-pvc.yaml + +# Optional: Create a pod to use the volume +kubectl apply -f luks-pod.yaml +``` + +**Note:** The pod will remain in `ContainerCreating` state until: +- The PVC is bound (volume provisioned) +- The LUKS volume is decrypted and mounted on the node +- This can take 30-60 seconds depending on volume size + +### 2. Create Snapshot + +```bash +kubectl apply -f luks-volumesnapshot.yaml +``` + +Wait for the snapshot to be ready: +```bash +kubectl get volumesnapshot my-luks-snapshot +# Wait until READYTOUSE is true +``` + +### 3. Restore from Snapshot + +```bash +# Create the LUKS secret for the restored volume +# IMPORTANT: Use the SAME key as the original volume +kubectl apply -f restored-luks-secret.yaml + +# Create the restored PVC (from snapshot) +kubectl apply -f restored-luks-pvc.yaml + +# Optional: Create a pod to use the restored volume +kubectl apply -f restored-luks-pod.yaml +``` + +**Note:** Restored pods will also remain in `ContainerCreating` until: +- The volume is created from the snapshot +- The PVC is bound +- The LUKS volume is decrypted and mounted +- This can take 1-2 minutes for snapshot restore + +## Verification + +Check PVC status: +```bash +kubectl get pvc +# Wait until STATUS is Bound +``` + +Check pod status: +```bash +kubectl get pod +# Pods will be in ContainerCreating until PVCs are bound and volumes are mounted +``` + +Check pod events if stuck: +```bash +kubectl describe pod my-csi-app-luks +kubectl describe pod my-restored-luks-app +``` + +## Important Notes + +1. **LUKS Key Matching**: The restored volume MUST use the same LUKS key as the original volume. The key is stored in the secret. + +2. **Secret Naming**: The secret name must follow the pattern `${pvc-name}-luks-key`: + - Original PVC `csi-pod-pvc-luks` → Secret `csi-pod-pvc-luks-luks-key` + - Restored PVC `my-restored-luks-volume` → Secret `my-restored-luks-volume-luks-key` + +3. **Storage Class**: Both original and restored volumes must use a LUKS storage class (`cloudscale-volume-ssd-luks` or `cloudscale-volume-bulk-luks`). + +4. **Size Matching**: The restored volume size must match the snapshot size exactly (1Gi in this example). + +5. **ContainerCreating State**: It's **expected** for pods to remain in `ContainerCreating` state for 30-120 seconds while: + - Volumes are being provisioned/restored + - LUKS volumes are being decrypted + - Filesystems are being mounted + +## Troubleshooting + +If pods remain stuck in `ContainerCreating` for more than 5 minutes: + +1. Check PVC status: + ```bash + kubectl get pvc + kubectl describe pvc + ``` + +2. Check for events: + ```bash + kubectl get events --sort-by='.lastTimestamp' + ``` + +3. Verify secrets exist: + ```bash + kubectl get secret -luks-key + ``` + +4. Check node logs for LUKS errors: + ```bash + kubectl logs -n kube-system -l app=csi-cloudscale-node + ``` + +## Cleanup + +```bash +kubectl delete -f restored-luks-pod.yaml +kubectl delete -f restored-luks-pvc.yaml +kubectl delete -f restored-luks-secret.yaml +kubectl delete -f luks-volumesnapshot.yaml +kubectl delete -f luks-pod.yaml +kubectl delete -f luks-pvc.yaml +kubectl delete -f luks-secret.yaml +``` From 64768a122e820e62ecfeeabc5f90b8a2d464f776 Mon Sep 17 00:00:00 2001 From: Julian Bigler Date: Thu, 29 Jan 2026 12:33:03 +0100 Subject: [PATCH 26/26] fail integration tests with clear error if CRDs or VolumeSnapshotClass is missing --- test/kubernetes/integration_test.go | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/test/kubernetes/integration_test.go b/test/kubernetes/integration_test.go index 0af09f2c..dcde0b03 100644 --- a/test/kubernetes/integration_test.go +++ b/test/kubernetes/integration_test.go @@ -27,7 +27,7 @@ import ( snapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v6/apis/volumesnapshot/v1" snapshotclientset "github.com/kubernetes-csi/external-snapshotter/client/v6/clientset/versioned" appsv1 "k8s.io/api/apps/v1" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" kubeerrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -1907,6 +1907,22 @@ func generateMetricEntry(line string) MetricEntry { func makeKubernetesVolumeSnapshot(t *testing.T, snapshotName string, pvcName string) *snapshotv1.VolumeSnapshot { className := "cloudscale-snapshots" + // Verify that the VolumeSnapshotClass exists before creating the VolumeSnapshot + // This helps catch configuration issues early (e.g., CRDs not installed) + _, err := snapshotClient.SnapshotV1().VolumeSnapshotClasses().Get( + context.Background(), + className, + metav1.GetOptions{}, + ) + if err != nil { + if kubeerrors.IsNotFound(err) { + t.Fatalf("VolumeSnapshotClass %q not found. "+ + "This usually means the snapshot CRDs are not installed. "+ + "See the readme for setup installation instrucitons and and ensure the VolumeSnapshotClass resource exists. Error: %v", className, err) + } + t.Fatalf("Failed to get VolumeSnapshotClass %q: %v", className, err) + } + snapshot := &snapshotv1.VolumeSnapshot{ TypeMeta: metav1.TypeMeta{ Kind: "VolumeSnapshot",