diff --git a/README.md b/README.md index b17da5ae..c228338c 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,22 @@ The current version is: **`v3.6.0`**. ## Installing to Kubernetes +Follow these steps to deploy the cloudscale.ch CSI driver to your Kubernetes cluster. + +### Prerequisites for Snapshot Support + +To use CSI snapshots with this driver, your cluster must have the VolumeSnapshot CRDs and the snapshot controller installed. + +Note: Some Kubernetes distributions already include these CRDs and controllers. You only need to apply them manually if your cluster does not provide them. + +Install the snapshot resources using kustomize (recommended): +``` +kubectl apply -k https://github.com/kubernetes-csi/external-snapshotter/client/config/crd?ref=v8.4.0 +kubectl apply -k https://github.com/kubernetes-csi/external-snapshotter/deploy/kubernetes/snapshot-controller?ref=v8.4.0 +# setup volumesnapshotclass in your cluster +kubectl apply -f examples/kubernetes/volume-snapshots/volumesnapshotclass.yaml +``` + ### Kubernetes Compatibility The following table describes the required cloudscale.ch driver version per diff --git a/charts/csi-cloudscale/templates/rbac.yaml b/charts/csi-cloudscale/templates/rbac.yaml index edd394c8..aa6cb9a0 100644 --- a/charts/csi-cloudscale/templates/rbac.yaml +++ b/charts/csi-cloudscale/templates/rbac.yaml @@ -16,12 +16,9 @@ rules: - apiGroups: [""] resources: ["events"] verbs: ["list", "watch", "create", "update", "patch"] - - apiGroups: ["snapshot.storage.k8s.io"] - resources: ["volumesnapshots"] - verbs: [ "get", "list", "watch", "update" ] - - apiGroups: ["snapshot.storage.k8s.io"] - resources: ["volumesnapshotcontents"] - verbs: ["get", "list"] + - apiGroups: [ "coordination.k8s.io" ] + resources: [ "leases" ] + verbs: [ "get", "list", "watch", "create", "update", "patch", "delete" ] - apiGroups: [ "storage.k8s.io" ] resources: [ "csinodes" ] verbs: [ "get", "list", "watch" ] @@ -52,6 +49,27 @@ rules: --- kind: ClusterRole apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ include "csi-cloudscale.driver-name" . }}-snapshotter-role +rules: + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshots"] + verbs: [ "get", "list", "watch", "update" ] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshotcontents"] + verbs: [ "get", "list", "watch", "update", "patch" ] + - apiGroups: [ "snapshot.storage.k8s.io" ] + resources: [ "volumesnapshotcontents/status" ] + verbs: [ "update", "patch" ] + - apiGroups: [ "snapshot.storage.k8s.io" ] + resources: [ "volumesnapshotclasses" ] + verbs: [ "get", "list", "watch" ] + - apiGroups: [""] + resources: ["events"] + verbs: ["list", "watch", "create", "update", "patch"] +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 metadata: name: {{ include "csi-cloudscale.driver-name" . }}-resizer-role rules: @@ -99,6 +117,19 @@ roleRef: --- kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ include "csi-cloudscale.driver-name" . }}-snapshotter-binding +subjects: + - kind: ServiceAccount + name: {{ include "csi-cloudscale.controller-service-account-name" . }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ include "csi-cloudscale.driver-name" . }}-snapshotter-role + apiGroup: rbac.authorization.k8s.io +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 metadata: name: {{ include "csi-cloudscale.driver-name" . }}-resizer-binding subjects: diff --git a/charts/csi-cloudscale/templates/statefulset.yaml b/charts/csi-cloudscale/templates/statefulset.yaml index fd095b03..c2a0c27b 100644 --- a/charts/csi-cloudscale/templates/statefulset.yaml +++ b/charts/csi-cloudscale/templates/statefulset.yaml @@ -72,6 +72,17 @@ spec: volumeMounts: - name: socket-dir mountPath: /var/lib/csi/sockets/pluginproxy/ + - name: csi-snapshotter + image: "{{ .Values.snapshotter.image.registry }}/{{ .Values.snapshotter.image.repository }}:{{ .Values.snapshotter.image.tag }}" + args: + - "--csi-address=$(CSI_ENDPOINT)" + - "--v=5" + env: + - name: CSI_ENDPOINT + value: unix:///var/lib/csi/sockets/pluginproxy/csi.sock + volumeMounts: + - name: socket-dir + mountPath: /var/lib/csi/sockets/pluginproxy/ - name: csi-cloudscale-plugin image: "{{ .Values.controller.image.registry }}/{{ .Values.controller.image.repository }}:{{ .Values.controller.image.tag }}" args : diff --git a/charts/csi-cloudscale/values.yaml b/charts/csi-cloudscale/values.yaml index 0778a405..9d0a05b1 100644 --- a/charts/csi-cloudscale/values.yaml +++ b/charts/csi-cloudscale/values.yaml @@ -79,6 +79,18 @@ resizer: # cpu: 100m # memory: 128Mi + + +snapshotter: + image: + registry: registry.k8s.io + repository: sig-storage/csi-snapshotter + tag: v8.4.0 + pullPolicy: IfNotPresent + logLevelVerbosity: "5" + resources: {} + + controller: replicas: 1 image: diff --git a/driver/controller.go b/driver/controller.go index 09df2c45..6559a941 100644 --- a/driver/controller.go +++ b/driver/controller.go @@ -19,18 +19,20 @@ package driver import ( "context" + "errors" "fmt" "net/http" "regexp" "strconv" "strings" + "time" "github.com/cloudscale-ch/cloudscale-go-sdk/v6" "github.com/container-storage-interface/spec/lib/go/csi" "github.com/sirupsen/logrus" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" - + "google.golang.org/protobuf/types/known/timestamppb" "k8s.io/apimachinery/pkg/util/sets" ) @@ -85,6 +87,15 @@ func (d *Driver) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest) return nil, status.Error(codes.InvalidArgument, fmt.Sprintf("volume capabilities cannot be satisified: %s", strings.Join(violations, "; "))) } + if req.GetVolumeContentSource() != nil { + if sourceSnapshot := req.GetVolumeContentSource().GetSnapshot(); sourceSnapshot != nil { + return d.createVolumeFromSnapshot(ctx, req, sourceSnapshot) + } + if sourceVolume := req.GetVolumeContentSource().GetVolume(); sourceVolume != nil { + return nil, status.Error(codes.Unimplemented, "volume cloning is not yet supported") + } + } + if req.AccessibilityRequirements != nil { for _, t := range req.AccessibilityRequirements.Requisite { zone, ok := t.Segments[topologyZonePrefix] @@ -173,7 +184,7 @@ func (d *Driver) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest) return &csi.CreateVolumeResponse{Volume: &csiVolume}, nil } - volumeReq := &cloudscale.VolumeRequest{ + volumeReq := &cloudscale.VolumeCreateRequest{ Name: volumeName, SizeGB: sizeGB, Type: storageType, @@ -193,6 +204,170 @@ func (d *Driver) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest) return resp, nil } +// createVolumeFromSnapshot handles volume creation from an existing snapshot +func (d *Driver) createVolumeFromSnapshot(ctx context.Context, req *csi.CreateVolumeRequest, sourceSnapshot *csi.VolumeContentSource_SnapshotSource) (*csi.CreateVolumeResponse, error) { + sourceSnapshotID := sourceSnapshot.GetSnapshotId() + if sourceSnapshotID == "" { + return nil, status.Error(codes.InvalidArgument, "snapshotID must be provided in volume content source") + } + + volumeName := req.Name + + ll := d.log.WithFields(logrus.Fields{ + "volume_name": volumeName, + "source_snapshot_id": sourceSnapshotID, + "method": "create_volume_from_snapshot", + }) + ll.Info("create volume from snapshot called") + + // Verify snapshot exists and get its properties, must return NotFound when snapshot does not exist. + snapshot, err := d.cloudscaleClient.VolumeSnapshots.Get(ctx, sourceSnapshotID) + if err != nil { + var errorResponse *cloudscale.ErrorResponse + if errors.As(err, &errorResponse) { + if errorResponse.StatusCode == http.StatusNotFound { + return nil, status.Errorf(codes.NotFound, "source snapshot %s not found", sourceSnapshotID) + } + } + wrapped := fmt.Errorf("failed to get source snapshot: %w", err) + return nil, status.Error(codes.Internal, wrapped.Error()) + } + + ll = ll.WithFields(logrus.Fields{ + "snapshot_size_gb": snapshot.SizeGB, + "snapshot_volume_type": snapshot.Volume.Type, + "snapshot_zone": snapshot.Zone, + }) + + // Validate capacity requirements + // CSI spec: restored volume must be at least as large as the snapshot + // Cloudscale only supports the same size as the snapshot + if req.CapacityRange != nil { + requiredBytes := req.CapacityRange.GetRequiredBytes() + if requiredBytes > 0 { + requiredGB := int(requiredBytes / GB) + if requiredGB < snapshot.SizeGB { + return nil, status.Errorf(codes.InvalidArgument, + "requested volume size (%d GB) is smaller than snapshot size (%d GB)", + requiredGB, snapshot.SizeGB) + } + if requiredGB > snapshot.SizeGB { + return nil, status.Errorf(codes.InvalidArgument, + "cloudscale.ch API does not support creating volumes larger than snapshot size during restore. "+ + "Create volume from snapshot first, then expand it using ControllerExpandVolume. "+ + "Requested: %d GB, Snapshot: %d GB", requiredGB, snapshot.SizeGB) + } + } + + // Validate limit if specified + limitBytes := req.CapacityRange.GetLimitBytes() + if limitBytes > 0 && int64(snapshot.SizeGB)*GB > limitBytes { + return nil, status.Errorf(codes.OutOfRange, + "snapshot size (%d GB) exceeds capacity limit (%d bytes)", + snapshot.SizeGB, limitBytes) + } + } + + // cloudscale does create the volume in the same zone as the snapshot. + if req.AccessibilityRequirements != nil { + for _, t := range req.AccessibilityRequirements.Requisite { + zone, ok := t.Segments[topologyZonePrefix] + if !ok { + continue + } + if zone != snapshot.Zone.Slug { + return nil, status.Errorf(codes.InvalidArgument, + "requested zone %s does not match snapshot zone %s", zone, snapshot.Zone) + } + } + } + + // cloudscale does not support changing storage type when restoring from snapshot. + // The restored volume type is inherited from the source volume of the snapshot. + if storageType := req.Parameters[StorageTypeAttribute]; storageType != "" { + ll.WithField("requested_type", storageType). + Debug("ignoring storage type parameter when restoring from snapshot") + } + + luksEncrypted := "false" + if req.Parameters[LuksEncryptedAttribute] == "true" { + if violations := validateLuksCapabilities(req.VolumeCapabilities); len(violations) > 0 { + return nil, status.Error(codes.InvalidArgument, fmt.Sprintf("volume capabilities cannot be satisified: %s", strings.Join(violations, "; "))) + } + luksEncrypted = "true" + } + + // Check if volume already exists + volumes, err := d.cloudscaleClient.Volumes.List(ctx, cloudscale.WithNameFilter(volumeName)) + if err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } + + csiVolume := csi.Volume{ + CapacityBytes: int64(snapshot.SizeGB) * GB, + AccessibleTopology: []*csi.Topology{ + { + Segments: map[string]string{ + topologyZonePrefix: d.zone, + }, + }, + }, + VolumeContext: map[string]string{ + PublishInfoVolumeName: volumeName, + LuksEncryptedAttribute: luksEncrypted, + }, + ContentSource: req.GetVolumeContentSource(), + } + + if luksEncrypted == "true" { + csiVolume.VolumeContext[LuksCipherAttribute] = req.Parameters[LuksCipherAttribute] + csiVolume.VolumeContext[LuksKeySizeAttribute] = req.Parameters[LuksKeySizeAttribute] + } + + // Volume already exists - validate it matches request + if len(volumes) != 0 { + if len(volumes) > 1 { + return nil, fmt.Errorf("fatal issue: duplicate volume %q exists", volumeName) + } + vol := volumes[0] + + if vol.SizeGB != snapshot.SizeGB { + return nil, status.Errorf(codes.AlreadyExists, + "volume %q already exists with size %d GB, but snapshot requires %d GB", + volumeName, vol.SizeGB, snapshot.SizeGB) + } + + if vol.Zone != snapshot.Zone { + return nil, status.Errorf(codes.AlreadyExists, + "volume %q already exists in zone %s, but snapshot is in zone %s", + volumeName, vol.Zone, snapshot.Zone) + } + + ll.Info("volume from snapshot already exists") + csiVolume.VolumeId = vol.UUID + return &csi.CreateVolumeResponse{Volume: &csiVolume}, nil + } + + // Create volume from snapshot + volumeReq := &cloudscale.VolumeCreateRequest{ + Name: volumeName, + VolumeSnapshotUUID: sourceSnapshotID, + // Size, Type, Zone are inherited from snapshot - do NOT set them + } + + ll.WithField("volume_req", volumeReq).Info("creating volume from snapshot") + vol, err := d.cloudscaleClient.Volumes.Create(ctx, volumeReq) + if err != nil { + return nil, status.Errorf(codes.Internal, "failed to create volume from snapshot: %v", err) + } + + csiVolume.VolumeId = vol.UUID + resp := &csi.CreateVolumeResponse{Volume: &csiVolume} + + ll.WithField("response", resp).Info("volume created from snapshot") + return resp, nil +} + // DeleteVolume deletes the given volume. The function is idempotent. func (d *Driver) DeleteVolume(ctx context.Context, req *csi.DeleteVolumeRequest) (*csi.DeleteVolumeResponse, error) { if req.VolumeId == "" { @@ -207,17 +382,31 @@ func (d *Driver) DeleteVolume(ctx context.Context, req *csi.DeleteVolumeRequest) err := d.cloudscaleClient.Volumes.Delete(ctx, req.VolumeId) if err != nil { - errorResponse, ok := err.(*cloudscale.ErrorResponse) - if ok { + var errorResponse *cloudscale.ErrorResponse + if errors.As(err, &errorResponse) { if errorResponse.StatusCode == http.StatusNotFound { // To make it idempotent, the volume might already have been // deleted, so a 404 is ok. ll.WithFields(logrus.Fields{ "error": err, "resp": errorResponse, - }).Warn("assuming volume is already deleted") + }).Debug("assuming volume is already deleted") return &csi.DeleteVolumeResponse{}, nil } + + ll.WithFields(logrus.Fields{ + "status_code": errorResponse.StatusCode, + "error": err, + }).Debug("cloudscale API returned error during volume deletion") + + // Check if the error indicates snapshots exist (HTTP 400 with error message "Snapshots exist for this volume") + if errorResponse.StatusCode == http.StatusBadRequest && strings.Contains(strings.ToLower(err.Error()), strings.ToLower("Snapshots exist for this volume")) { + ll.WithFields(logrus.Fields{ + "error": err, + "resp": errorResponse, + }).Warn("volume has snapshots, cannot delete yet") + return nil, status.Error(codes.FailedPrecondition, "volume has existing snapshots that must be deleted first") + } } return nil, err } @@ -255,7 +444,7 @@ func (d *Driver) ControllerPublishVolume(ctx context.Context, req *csi.Controlle }) ll.Info("controller publish volume called") - attachRequest := &cloudscale.VolumeRequest{ + attachRequest := &cloudscale.VolumeUpdateRequest{ ServerUUIDs: &[]string{req.NodeId}, } err := d.cloudscaleClient.Volumes.Update(ctx, req.VolumeId, attachRequest) @@ -298,8 +487,8 @@ func (d *Driver) ControllerUnpublishVolume(ctx context.Context, req *csi.Control // check if volume exist before trying to detach it volume, err := d.cloudscaleClient.Volumes.Get(ctx, req.VolumeId) if err != nil { - errorResponse, ok := err.(*cloudscale.ErrorResponse) - if ok { + var errorResponse *cloudscale.ErrorResponse + if errors.As(err, &errorResponse) { if errorResponse.StatusCode == http.StatusNotFound { ll.Info("assuming volume is detached because it does not exist") return &csi.ControllerUnpublishVolumeResponse{}, nil @@ -329,7 +518,7 @@ func (d *Driver) ControllerUnpublishVolume(ctx context.Context, req *csi.Control ll.Info("Volume is attached to node given in request or NodeID in request is not set.") - detachRequest := &cloudscale.VolumeRequest{ + detachRequest := &cloudscale.VolumeUpdateRequest{ ServerUUIDs: &[]string{}, } err = d.cloudscaleClient.Volumes.Update(ctx, req.VolumeId, detachRequest) @@ -451,9 +640,7 @@ func (d *Driver) ControllerGetCapabilities(ctx context.Context, req *csi.Control csi.ControllerServiceCapability_RPC_PUBLISH_UNPUBLISH_VOLUME, csi.ControllerServiceCapability_RPC_LIST_VOLUMES, csi.ControllerServiceCapability_RPC_EXPAND_VOLUME, - - // TODO(arslan): enable once snapshotting is supported - // csi.ControllerServiceCapability_RPC_CREATE_DELETE_SNAPSHOT, + csi.ControllerServiceCapability_RPC_CREATE_DELETE_SNAPSHOT, // csi.ControllerServiceCapability_RPC_LIST_SNAPSHOTS, // TODO: check if this can be implemented @@ -476,20 +663,135 @@ func (d *Driver) ControllerGetCapabilities(ctx context.Context, req *csi.Control // CreateSnapshot will be called by the CO to create a new snapshot from a // source volume on behalf of a user. func (d *Driver) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequest) (*csi.CreateSnapshotResponse, error) { - d.log.WithFields(logrus.Fields{ - "req": req, - "method": "create_snapshot", - }).Warn("create snapshot is not implemented") - return nil, status.Error(codes.Unimplemented, "") + if req.Name == "" { + return nil, status.Error(codes.InvalidArgument, "CreateSnapshotRequest Name must be provided") + } + + if req.SourceVolumeId == "" { + return nil, status.Error(codes.InvalidArgument, "CreateSnapshotRequest Source Volume Id must be provided") + } + + ll := d.log.WithFields(logrus.Fields{ + "source_volume_id": req.SourceVolumeId, + "name": req.Name, + "method": "create_snapshot", + }) + + ll.Info("find existing volume snapshots with same name") + snapshots, err := d.cloudscaleClient.VolumeSnapshots.List(ctx, cloudscale.WithNameFilter(req.Name)) + if err != nil { + var errorResponse *cloudscale.ErrorResponse + if errors.As(err, &errorResponse) { + ll.WithFields(logrus.Fields{ + "status_code": errorResponse.StatusCode, + "error": err, + }).Warn("cloudscale API returned error during snapshot list") + } + return nil, status.Errorf(codes.Internal, "failed to list snapshots: %v", err) + } + + for _, snapshot := range snapshots { + if snapshot.Volume.UUID == req.SourceVolumeId { + t, err := time.Parse(time.RFC3339, snapshot.CreatedAt) + if err != nil { + return nil, status.Errorf(codes.Internal, "failed to parse snapshot CreatedAt timestamp %q: %v", snapshot.CreatedAt, err) + } + creationTime := timestamppb.New(t) + + return &csi.CreateSnapshotResponse{ + Snapshot: &csi.Snapshot{ + SnapshotId: snapshot.UUID, + SourceVolumeId: snapshot.Volume.UUID, + ReadyToUse: snapshot.Status == "available", + SizeBytes: int64(snapshot.SizeGB * GB), + CreationTime: creationTime, + }, + }, nil + } + + // Snapshot name exists but for a different volume + if snapshot.Volume.UUID != req.SourceVolumeId { + return nil, status.Error(codes.AlreadyExists, "snapshot with this name already exists for another volume") + } + } + + volumeSnapshotCreateRequest := &cloudscale.VolumeSnapshotCreateRequest{ + Name: req.Name, + SourceVolume: req.SourceVolumeId, + // todo: Tags are not currently supported in snapshot creation + } + + ll.WithField("volume_snapshot_create_request", volumeSnapshotCreateRequest).Info("creating volume snapshot") + snapshot, err := d.cloudscaleClient.VolumeSnapshots.Create(ctx, volumeSnapshotCreateRequest) + if err != nil { + var errorResponse *cloudscale.ErrorResponse + if errors.As(err, &errorResponse) { + ll.WithFields(logrus.Fields{ + "status_code": errorResponse.StatusCode, + "error": err, + }).Warn("cloudscale API returned error during snapshot creation") + + if errorResponse.StatusCode == http.StatusNotFound { + return nil, status.Errorf(codes.NotFound, "source volume %s not found: %v", req.SourceVolumeId, err) + } + } + return nil, status.Errorf(codes.Internal, "failed to create snapshot: %v", err) + } + + t, err := time.Parse(time.RFC3339, snapshot.CreatedAt) + if err != nil { + return nil, status.Errorf(codes.Internal, "failed to parse snapshot CreatedAt timestamp %q: %v", snapshot.CreatedAt, err) + } + creationTime := timestamppb.New(t) + + resp := &csi.CreateSnapshotResponse{ + Snapshot: &csi.Snapshot{ + SnapshotId: snapshot.UUID, + SourceVolumeId: snapshot.Volume.UUID, + ReadyToUse: snapshot.Status == "available", // check status + SizeBytes: int64(snapshot.SizeGB * GB), + CreationTime: creationTime, + }, + } + + ll.WithField("response", resp).Info("volume snapshot created") + return resp, nil } -// DeleteSnapshost will be called by the CO to delete a snapshot. +// DeleteSnapshot will be called by the CO to delete a snapshot. func (d *Driver) DeleteSnapshot(ctx context.Context, req *csi.DeleteSnapshotRequest) (*csi.DeleteSnapshotResponse, error) { - d.log.WithFields(logrus.Fields{ - "req": req, - "method": "delete_snapshot", - }).Warn("delete snapshot is not implemented") - return nil, status.Error(codes.Unimplemented, "") + if req.SnapshotId == "" { + return nil, status.Error(codes.InvalidArgument, "DeleteSnapshot Snapshot ID must be provided") + } + + ll := d.log.WithFields(logrus.Fields{ + "snapshot_id": req.SnapshotId, + "method": "delete_snapshot", + }) + ll.Info("delete snapshot called") + + // Note: Snapshot deletion is asynchronous via the cloudscale API. + // The HTTP request returns success immediately, but the snapshot enters "deleting" state. + // Cloudscale handles the deletion asynchronously. The operation is idempotent. + err := d.cloudscaleClient.VolumeSnapshots.Delete(ctx, req.SnapshotId) + if err != nil { + var errorResponse *cloudscale.ErrorResponse + if errors.As(err, &errorResponse) { + if errorResponse.StatusCode == http.StatusNotFound { + // To make it idempotent, the snapshot might already have been + // deleted, so a 404 is ok. + ll.WithFields(logrus.Fields{ + "error": err, + "resp": errorResponse, + }).Debug("assuming snapshot is already deleted") + return &csi.DeleteSnapshotResponse{}, nil + } + } + return nil, err + } + + ll.Info("snapshot is deleted") + return &csi.DeleteSnapshotResponse{}, nil } // ListSnapshots returns the information about all snapshots on the storage @@ -538,7 +840,7 @@ func (d *Driver) ControllerExpandVolume(ctx context.Context, req *csi.Controller return &csi.ControllerExpandVolumeResponse{CapacityBytes: int64(volume.SizeGB) * GB, NodeExpansionRequired: true}, nil } - volumeReq := &cloudscale.VolumeRequest{ + volumeReq := &cloudscale.VolumeUpdateRequest{ SizeGB: resizeGigaBytes, } err = d.cloudscaleClient.Volumes.Update(ctx, volume.UUID, volumeReq) @@ -678,8 +980,8 @@ func validateLuksCapabilities(caps []*csi.VolumeCapability) []string { } func reraiseNotFound(err error, log *logrus.Entry, operation string) error { - errorResponse, ok := err.(*cloudscale.ErrorResponse) - if ok { + var errorResponse *cloudscale.ErrorResponse + if errors.As(err, &errorResponse) { lt := log.WithFields(logrus.Fields{ "error": err, "errorResponse": errorResponse, diff --git a/driver/driver_test.go b/driver/driver_test.go index eaf5b848..488c884f 100644 --- a/driver/driver_test.go +++ b/driver/driver_test.go @@ -110,6 +110,11 @@ func NewFakeClient(initialServers map[string]*cloudscale.Server) *cloudscale.Cli volumes: make(map[string]*cloudscale.Volume), } + fakeClient.VolumeSnapshots = &FakeVolumeSnapshotServiceOperations{ + fakeClient: fakeClient, + snapshots: make(map[string]*cloudscale.VolumeSnapshot), + } + return fakeClient } @@ -197,8 +202,13 @@ type FakeVolumeServiceOperations struct { volumes map[string]*cloudscale.Volume } -func (f *FakeVolumeServiceOperations) Create(ctx context.Context, createRequest *cloudscale.VolumeRequest) (*cloudscale.Volume, error) { +func (f *FakeVolumeServiceOperations) Create(ctx context.Context, createRequest *cloudscale.VolumeCreateRequest) (*cloudscale.Volume, error) { id := randString(32) + + // todo: CSI-test pass without this, but we could implement: + // - check if volumeSnapshot is present. Return error if volumeSnapshot does not exist + // - create volume with inferred values form snapshot. + vol := &cloudscale.Volume{ UUID: id, Name: createRequest.Name, @@ -267,7 +277,7 @@ func extractParams(modifiers []cloudscale.ListRequestModifier) url.Values { return params } -func (f *FakeVolumeServiceOperations) Update(ctx context.Context, volumeID string, updateRequest *cloudscale.VolumeRequest) error { +func (f *FakeVolumeServiceOperations) Update(ctx context.Context, volumeID string, updateRequest *cloudscale.VolumeUpdateRequest) error { vol, ok := f.volumes[volumeID] if ok != true { return generateNotFoundError() @@ -319,6 +329,22 @@ func getVolumesPerServer(f *FakeVolumeServiceOperations, serverUUID string) int } func (f *FakeVolumeServiceOperations) Delete(ctx context.Context, volumeID string) error { + + // prevent deletion if snapshots exist + snapshots, err := f.fakeClient.VolumeSnapshots.List(context.Background()) + + if err != nil { + return err + } + + for _, snapshot := range snapshots { + if snapshot.Volume.UUID == volumeID { + return &cloudscale.ErrorResponse{ + StatusCode: 409, + Message: map[string]string{"detail": "volume has snapshots"}, + } + } + } delete(f.volumes, volumeID) return nil } @@ -376,6 +402,93 @@ func (f *FakeVolumeServiceOperations) WaitFor(ctx context.Context, id string, co panic("implement me") } +type FakeVolumeSnapshotServiceOperations struct { + fakeClient *cloudscale.Client + snapshots map[string]*cloudscale.VolumeSnapshot +} + +func (f FakeVolumeSnapshotServiceOperations) Create(ctx context.Context, createRequest *cloudscale.VolumeSnapshotCreateRequest) (*cloudscale.VolumeSnapshot, error) { + + vol, err := f.fakeClient.Volumes.Get(ctx, createRequest.SourceVolume) + if err != nil { + return nil, err + } + + id := randString(32) + snap := &cloudscale.VolumeSnapshot{ + UUID: id, + Name: createRequest.Name, + SizeGB: vol.SizeGB, + CreatedAt: time.Now().UTC().Format(time.RFC3339), + Status: "available", + Volume: cloudscale.VolumeStub{ + UUID: createRequest.SourceVolume, + }, + } + + f.snapshots[id] = snap + return snap, nil +} + +func (f *FakeVolumeSnapshotServiceOperations) Get( + ctx context.Context, + snapshotID string, +) (*cloudscale.VolumeSnapshot, error) { + + snap, ok := f.snapshots[snapshotID] + if !ok { + return nil, generateNotFoundError() + } + return snap, nil +} + +func (f *FakeVolumeSnapshotServiceOperations) List( + ctx context.Context, + modifiers ...cloudscale.ListRequestModifier, +) ([]cloudscale.VolumeSnapshot, error) { + var snapshots []cloudscale.VolumeSnapshot + + for _, snapshot := range f.snapshots { + snapshots = append(snapshots, *snapshot) + } + + if len(modifiers) == 0 { + return snapshots, nil + } + if len(modifiers) > 1 { + panic("implement me (support for more than one modifier)") + } + + params := extractParams(modifiers) + + if filterName := params.Get("name"); filterName != "" { + filtered := make([]cloudscale.VolumeSnapshot, 0, 1) + for _, snapshot := range snapshots { + if snapshot.Name == filterName { + filtered = append(filtered, snapshot) + } + } + return filtered, nil + } + + panic("implement me (support for unknown param)") +} + +func (f FakeVolumeSnapshotServiceOperations) Update(ctx context.Context, resourceID string, updateRequest *cloudscale.VolumeSnapshotUpdateRequest) error { + panic("implement me") +} + +func (f *FakeVolumeSnapshotServiceOperations) Delete( + ctx context.Context, + snapshotID string, +) error { + delete(f.snapshots, snapshotID) + return nil +} +func (f FakeVolumeSnapshotServiceOperations) WaitFor(ctx context.Context, resourceID string, condition func(resource *cloudscale.VolumeSnapshot) (bool, error), opts ...backoff.RetryOption) (*cloudscale.VolumeSnapshot, error) { + panic("implement me") +} + func generateNotFoundError() *cloudscale.ErrorResponse { return &cloudscale.ErrorResponse{ StatusCode: 404, @@ -463,7 +576,7 @@ func TestNodeStageVolume_ConcurrentSameVolume(t *testing.T) { // Create the volume in the fake client first ctx := t.Context() - vol, err := driver.cloudscaleClient.Volumes.Create(ctx, &cloudscale.VolumeRequest{ + vol, err := driver.cloudscaleClient.Volumes.Create(ctx, &cloudscale.VolumeCreateRequest{ Name: "test-volume", SizeGB: 10, Type: "ssd", @@ -540,7 +653,7 @@ func TestNodeStageVolume_ConcurrentDifferentVolumes(t *testing.T) { ctx := t.Context() // Create two different volumes - vol1, err := driver.cloudscaleClient.Volumes.Create(ctx, &cloudscale.VolumeRequest{ + vol1, err := driver.cloudscaleClient.Volumes.Create(ctx, &cloudscale.VolumeCreateRequest{ Name: "test-volume-1", SizeGB: 10, Type: "ssd", @@ -549,7 +662,7 @@ func TestNodeStageVolume_ConcurrentDifferentVolumes(t *testing.T) { t.Fatalf("Failed to create volume 1: %v", err) } - vol2, err := driver.cloudscaleClient.Volumes.Create(ctx, &cloudscale.VolumeRequest{ + vol2, err := driver.cloudscaleClient.Volumes.Create(ctx, &cloudscale.VolumeCreateRequest{ Name: "test-volume-2", SizeGB: 10, Type: "ssd", @@ -628,7 +741,7 @@ func TestNodeOperations_CrossOperationLocking(t *testing.T) { ctx := t.Context() // Create a volume - vol, err := driver.cloudscaleClient.Volumes.Create(ctx, &cloudscale.VolumeRequest{ + vol, err := driver.cloudscaleClient.Volumes.Create(ctx, &cloudscale.VolumeCreateRequest{ Name: "test-volume", SizeGB: 10, Type: "ssd", diff --git a/examples/kubernetes/luks-encrypted-volumes/README.md b/examples/kubernetes/luks-encrypted-volumes/README.md new file mode 100644 index 00000000..3d7ef3b0 --- /dev/null +++ b/examples/kubernetes/luks-encrypted-volumes/README.md @@ -0,0 +1,135 @@ +# LUKS Encrypted Volumes Example + +This example demonstrates how to create and restore LUKS-encrypted volumes from snapshots using the cloudscale.ch CSI driver. + +## Prerequisites + +1. **Snapshot CRDs installed**: See the [main README](../../README.md#prerequisites-for-snapshot-support) +2. **VolumeSnapshotClass created**: See the [volume-snapshots example](../volume-snapshots/) +3. **LUKS storage classes available**: `cloudscale-volume-ssd-luks` or `cloudscale-volume-bulk-luks` + +## Workflow + +### 1. Create Original LUKS Volume + +```bash +# Create the LUKS secret (contains the encryption key) +kubectl apply -f luks-secret.yaml + +# Create the PVC (this will create a LUKS-encrypted volume) +kubectl apply -f luks-pvc.yaml + +# Optional: Create a pod to use the volume +kubectl apply -f luks-pod.yaml +``` + +**Note:** The pod will remain in `ContainerCreating` state until: +- The PVC is bound (volume provisioned) +- The LUKS volume is decrypted and mounted on the node +- This can take 30-60 seconds depending on volume size + +### 2. Create Snapshot + +```bash +kubectl apply -f luks-volumesnapshot.yaml +``` + +Wait for the snapshot to be ready: +```bash +kubectl get volumesnapshot my-luks-snapshot +# Wait until READYTOUSE is true +``` + +### 3. Restore from Snapshot + +```bash +# Create the LUKS secret for the restored volume +# IMPORTANT: Use the SAME key as the original volume +kubectl apply -f restored-luks-secret.yaml + +# Create the restored PVC (from snapshot) +kubectl apply -f restored-luks-pvc.yaml + +# Optional: Create a pod to use the restored volume +kubectl apply -f restored-luks-pod.yaml +``` + +**Note:** Restored pods will also remain in `ContainerCreating` until: +- The volume is created from the snapshot +- The PVC is bound +- The LUKS volume is decrypted and mounted +- This can take 1-2 minutes for snapshot restore + +## Verification + +Check PVC status: +```bash +kubectl get pvc +# Wait until STATUS is Bound +``` + +Check pod status: +```bash +kubectl get pod +# Pods will be in ContainerCreating until PVCs are bound and volumes are mounted +``` + +Check pod events if stuck: +```bash +kubectl describe pod my-csi-app-luks +kubectl describe pod my-restored-luks-app +``` + +## Important Notes + +1. **LUKS Key Matching**: The restored volume MUST use the same LUKS key as the original volume. The key is stored in the secret. + +2. **Secret Naming**: The secret name must follow the pattern `${pvc-name}-luks-key`: + - Original PVC `csi-pod-pvc-luks` → Secret `csi-pod-pvc-luks-luks-key` + - Restored PVC `my-restored-luks-volume` → Secret `my-restored-luks-volume-luks-key` + +3. **Storage Class**: Both original and restored volumes must use a LUKS storage class (`cloudscale-volume-ssd-luks` or `cloudscale-volume-bulk-luks`). + +4. **Size Matching**: The restored volume size must match the snapshot size exactly (1Gi in this example). + +5. **ContainerCreating State**: It's **expected** for pods to remain in `ContainerCreating` state for 30-120 seconds while: + - Volumes are being provisioned/restored + - LUKS volumes are being decrypted + - Filesystems are being mounted + +## Troubleshooting + +If pods remain stuck in `ContainerCreating` for more than 5 minutes: + +1. Check PVC status: + ```bash + kubectl get pvc + kubectl describe pvc + ``` + +2. Check for events: + ```bash + kubectl get events --sort-by='.lastTimestamp' + ``` + +3. Verify secrets exist: + ```bash + kubectl get secret -luks-key + ``` + +4. Check node logs for LUKS errors: + ```bash + kubectl logs -n kube-system -l app=csi-cloudscale-node + ``` + +## Cleanup + +```bash +kubectl delete -f restored-luks-pod.yaml +kubectl delete -f restored-luks-pvc.yaml +kubectl delete -f restored-luks-secret.yaml +kubectl delete -f luks-volumesnapshot.yaml +kubectl delete -f luks-pod.yaml +kubectl delete -f luks-pvc.yaml +kubectl delete -f luks-secret.yaml +``` diff --git a/examples/kubernetes/luks-encrypted-volumes/luks-volumesnapshot.yaml b/examples/kubernetes/luks-encrypted-volumes/luks-volumesnapshot.yaml new file mode 100644 index 00000000..490faa55 --- /dev/null +++ b/examples/kubernetes/luks-encrypted-volumes/luks-volumesnapshot.yaml @@ -0,0 +1,11 @@ +# VolumeSnapshot creates a snapshot of a LUKS-encrypted volume +# Make sure the VolumeSnapshotClass is created first (see ../volume-snapshots/volumesnapshotclass.yaml) +# The snapshot preserves the LUKS encryption state +apiVersion: snapshot.storage.k8s.io/v1 +kind: VolumeSnapshot +metadata: + name: my-luks-snapshot +spec: + volumeSnapshotClassName: cloudscale-snapshots + source: + persistentVolumeClaimName: csi-pod-pvc-luks diff --git a/examples/kubernetes/luks-encrypted-volumes/restored-luks-pod.yaml b/examples/kubernetes/luks-encrypted-volumes/restored-luks-pod.yaml new file mode 100644 index 00000000..62bdda2a --- /dev/null +++ b/examples/kubernetes/luks-encrypted-volumes/restored-luks-pod.yaml @@ -0,0 +1,17 @@ +# Pod using the restored LUKS volume (optional, for testing) +kind: Pod +apiVersion: v1 +metadata: + name: my-restored-luks-app +spec: + containers: + - name: my-frontend + image: busybox + volumeMounts: + - mountPath: "/data" + name: my-cloudscale-volume + command: [ "sleep", "1000000" ] + volumes: + - name: my-cloudscale-volume + persistentVolumeClaim: + claimName: my-restored-luks-volume diff --git a/examples/kubernetes/luks-encrypted-volumes/restored-luks-pvc.yaml b/examples/kubernetes/luks-encrypted-volumes/restored-luks-pvc.yaml new file mode 100644 index 00000000..1cc586c1 --- /dev/null +++ b/examples/kubernetes/luks-encrypted-volumes/restored-luks-pvc.yaml @@ -0,0 +1,21 @@ +# PersistentVolumeClaim restored from a LUKS snapshot +# IMPORTANT: When restoring from a LUKS snapshot, you MUST: +# 1. Use a LUKS storage class (cloudscale-volume-ssd-luks or cloudscale-volume-bulk-luks) +# 2. Provide a LUKS secret with the pattern: ${pvc-name}-luks-key +# 3. Use the SAME LUKS key as the original volume +# 4. Match the snapshot size exactly (1Gi in this example) +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: my-restored-luks-volume +spec: + accessModes: + - ReadWriteOnce + storageClassName: cloudscale-volume-ssd-luks + resources: + requests: + storage: 1Gi + dataSource: + name: my-luks-snapshot + kind: VolumeSnapshot + apiGroup: snapshot.storage.k8s.io diff --git a/examples/kubernetes/luks-encrypted-volumes/restored-luks-secret.yaml b/examples/kubernetes/luks-encrypted-volumes/restored-luks-secret.yaml new file mode 100644 index 00000000..4e2ee1b7 --- /dev/null +++ b/examples/kubernetes/luks-encrypted-volumes/restored-luks-secret.yaml @@ -0,0 +1,10 @@ +# Secret containing the LUKS key for the restored volume +# IMPORTANT: This must use the same LUKS key as the original volume +# The secret name must follow the pattern: ${pvc-name}-luks-key +# In this case: my-restored-luks-volume-luks-key +apiVersion: v1 +kind: Secret +metadata: + name: my-restored-luks-volume-luks-key +stringData: + luksKey: "hDEKFgEZgmpuppShPG7HailSFBsy8MzlvlhALvqk0+2jTrcKrFmtttoF5IGlLVoLt/jpaWnk/kcl7JxnsZ3xQjEcYumv4WkwOv77x+c2C/kyyldTNRaCaVHG9fW9n6oicoWzsyUWcmu0d+JOorGZ792lsS9Q5gXlCg5BD2x1MoVVr8hTQArFfUX6NuHF1o0v/EGHU0A5O5wiNnqpdDjf9r56rPt0H290Nr6Y5Ijb5RTIoJFT5ww5XocrvLlR/GiXRYgzeISfbfyIr8FpfRKmjPTZdLBSXPMMdHJNcPIlRG+DfnBaTKkIFwiWXjxXZss71IKibEM7Qfjwka0KFyufwA==" diff --git a/examples/kubernetes/volume-snapshots/README.md b/examples/kubernetes/volume-snapshots/README.md new file mode 100644 index 00000000..302729ad --- /dev/null +++ b/examples/kubernetes/volume-snapshots/README.md @@ -0,0 +1,62 @@ +# Volume Snapshots Example + +This example demonstrates how to create and restore volumes from snapshots using the cloudscale.ch CSI driver. + +## Prerequisites + +Before using snapshots, ensure your cluster has the VolumeSnapshot CRDs and snapshot controller installed. +See the [main README](../../README.md#prerequisites-for-snapshot-support) for installation instructions. + +## Workflow + +1. **Create VolumeSnapshotClass** (one-time setup, required before creating snapshots): + ```bash + kubectl apply -f volumesnapshotclass.yaml + ``` + + **Note:** VolumeSnapshotClass is currently not deployed automatically with the driver. You must create it manually. + This may change in future releases where it will be deployed automatically (similar to StorageClass). + +2. **Create original volume and pod** (optional, for testing): + ```bash + kubectl apply -f original-pvc.yaml + kubectl apply -f original-pod.yaml + ``` + +3. **Create snapshot**: + ```bash + kubectl apply -f volumesnapshot.yaml + ``` + +4. **Create restored volume and pod**: + ```bash + kubectl apply -f restored-pvc.yaml + kubectl apply -f restored-pod.yaml + ``` + +## Verification + +Check snapshot status: +```bash +kubectl get volumesnapshot +kubectl describe volumesnapshot/my-snapshot +``` + +Check restored volume: +```bash +kubectl get pvc +kubectl get pod +``` + +**LUKS volumes**: For LUKS-encrypted volumes, see the [LUKS snapshot example](../luks-encrypted-volumes/). + +## Cleanup + +```bash +kubectl delete -f restored-pod.yaml +kubectl delete -f restored-pvc.yaml +kubectl delete -f volumesnapshot.yaml +kubectl delete -f original-pod.yaml +kubectl delete -f original-pvc.yaml +# Note: VolumeSnapshotClass is typically not deleted as it's a cluster resource +``` diff --git a/examples/kubernetes/volume-snapshots/original-pod.yaml b/examples/kubernetes/volume-snapshots/original-pod.yaml new file mode 100644 index 00000000..b0b48808 --- /dev/null +++ b/examples/kubernetes/volume-snapshots/original-pod.yaml @@ -0,0 +1,17 @@ +# Pod using the original volume (optional, for testing) +kind: Pod +apiVersion: v1 +metadata: + name: my-app +spec: + containers: + - name: my-frontend + image: busybox + volumeMounts: + - mountPath: "/data" + name: my-cloudscale-volume + command: [ "sleep", "1000000" ] + volumes: + - name: my-cloudscale-volume + persistentVolumeClaim: + claimName: my-volume diff --git a/examples/kubernetes/volume-snapshots/original-pvc.yaml b/examples/kubernetes/volume-snapshots/original-pvc.yaml new file mode 100644 index 00000000..24a198e5 --- /dev/null +++ b/examples/kubernetes/volume-snapshots/original-pvc.yaml @@ -0,0 +1,12 @@ +# Original PersistentVolumeClaim that will be snapshotted +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: my-volume +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi + storageClassName: cloudscale-volume-ssd diff --git a/examples/kubernetes/volume-snapshots/restored-pod.yaml b/examples/kubernetes/volume-snapshots/restored-pod.yaml new file mode 100644 index 00000000..4bdd38aa --- /dev/null +++ b/examples/kubernetes/volume-snapshots/restored-pod.yaml @@ -0,0 +1,17 @@ +# Pod using the restored volume (optional, for testing) +kind: Pod +apiVersion: v1 +metadata: + name: my-restored-app +spec: + containers: + - name: my-frontend + image: busybox + volumeMounts: + - mountPath: "/data" + name: my-cloudscale-volume + command: [ "sleep", "1000000" ] + volumes: + - name: my-cloudscale-volume + persistentVolumeClaim: + claimName: my-restored-volume diff --git a/examples/kubernetes/volume-snapshots/restored-pvc.yaml b/examples/kubernetes/volume-snapshots/restored-pvc.yaml new file mode 100644 index 00000000..5250f894 --- /dev/null +++ b/examples/kubernetes/volume-snapshots/restored-pvc.yaml @@ -0,0 +1,17 @@ +# PersistentVolumeClaim restored from the snapshot +# Note: The restored volume must have the same size as the snapshot (5Gi in this example) +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: my-restored-volume +spec: + accessModes: + - ReadWriteOnce + storageClassName: cloudscale-volume-ssd + resources: + requests: + storage: 5Gi + dataSource: + name: my-snapshot + kind: VolumeSnapshot + apiGroup: snapshot.storage.k8s.io diff --git a/examples/kubernetes/volume-snapshots/volumesnapshot.yaml b/examples/kubernetes/volume-snapshots/volumesnapshot.yaml new file mode 100644 index 00000000..dade8aca --- /dev/null +++ b/examples/kubernetes/volume-snapshots/volumesnapshot.yaml @@ -0,0 +1,10 @@ +# VolumeSnapshot creates a snapshot of the original volume +# Make sure the VolumeSnapshotClass is created first (volumesnapshotclass.yaml) +apiVersion: snapshot.storage.k8s.io/v1 +kind: VolumeSnapshot +metadata: + name: my-snapshot +spec: + volumeSnapshotClassName: cloudscale-snapshots + source: + persistentVolumeClaimName: my-volume diff --git a/examples/kubernetes/volume-snapshots/volumesnapshotclass.yaml b/examples/kubernetes/volume-snapshots/volumesnapshotclass.yaml new file mode 100644 index 00000000..f05b880c --- /dev/null +++ b/examples/kubernetes/volume-snapshots/volumesnapshotclass.yaml @@ -0,0 +1,9 @@ +# VolumeSnapshotClass defines how snapshots should be created for the cloudscale.ch CSI driver. +# This is a cluster-level resource that needs to be created once before using snapshots. +# Note: This may be deployed automatically with the driver in future releases. +apiVersion: snapshot.storage.k8s.io/v1 +kind: VolumeSnapshotClass +metadata: + name: cloudscale-snapshots +driver: csi.cloudscale.ch +deletionPolicy: Delete diff --git a/go.mod b/go.mod index a65da1cf..3c541dd9 100644 --- a/go.mod +++ b/go.mod @@ -2,16 +2,18 @@ module github.com/cloudscale-ch/csi-cloudscale require ( github.com/cenkalti/backoff/v5 v5.0.3 - github.com/cloudscale-ch/cloudscale-go-sdk/v6 v6.0.1 + github.com/cloudscale-ch/cloudscale-go-sdk/v6 v6.0.2-0.20260113130452-d14a0cbe6a32 github.com/container-storage-interface/spec v1.12.0 github.com/golang/protobuf v1.5.4 github.com/google/uuid v1.6.0 github.com/kubernetes-csi/csi-test/v5 v5.4.0 + github.com/kubernetes-csi/external-snapshotter/client/v6 v6.3.0 github.com/sirupsen/logrus v1.9.3 github.com/stretchr/testify v1.11.1 golang.org/x/oauth2 v0.34.0 golang.org/x/sys v0.39.0 google.golang.org/grpc v1.77.0 + google.golang.org/protobuf v1.36.10 k8s.io/api v0.28.15 k8s.io/apimachinery v0.28.15 k8s.io/client-go v0.28.15 @@ -21,7 +23,7 @@ require ( require ( github.com/davecgh/go-spew v1.1.1 // indirect - github.com/emicklei/go-restful/v3 v3.9.0 // indirect + github.com/emicklei/go-restful/v3 v3.10.1 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-openapi/jsonpointer v0.19.6 // indirect github.com/go-openapi/jsonreference v0.20.2 // indirect @@ -52,7 +54,6 @@ require ( golang.org/x/time v0.3.0 // indirect golang.org/x/tools v0.37.0 // indirect google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 // indirect - google.golang.org/protobuf v1.36.10 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index efe880fb..27445127 100644 --- a/go.sum +++ b/go.sum @@ -2,16 +2,16 @@ github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPd github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= -github.com/cloudscale-ch/cloudscale-go-sdk/v6 v6.0.1 h1:2P+TKwtB50hogQ2neIPX+7ARNMy7vaDU9bkMGEhOz3k= -github.com/cloudscale-ch/cloudscale-go-sdk/v6 v6.0.1/go.mod h1:NLC7XW7HqG0HggDaOBCvmf7WplTDaAqTF9u08yh6k0E= +github.com/cloudscale-ch/cloudscale-go-sdk/v6 v6.0.2-0.20260113130452-d14a0cbe6a32 h1:XUwopev0HXEmCVUrmuXHmDadux857+WSPWSDzj1zrhs= +github.com/cloudscale-ch/cloudscale-go-sdk/v6 v6.0.2-0.20260113130452-d14a0cbe6a32/go.mod h1:NLC7XW7HqG0HggDaOBCvmf7WplTDaAqTF9u08yh6k0E= github.com/container-storage-interface/spec v1.12.0 h1:zrFOEqpR5AghNaaDG4qyedwPBqU2fU0dWjLQMP/azK0= github.com/container-storage-interface/spec v1.12.0/go.mod h1:txsm+MA2B2WDa5kW69jNbqPnvTtfvZma7T/zsAZ9qX8= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/emicklei/go-restful/v3 v3.9.0 h1:XwGDlfxEnQZzuopoqxwSEllNcCOM9DhhFyhFIIGKwxE= -github.com/emicklei/go-restful/v3 v3.9.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/emicklei/go-restful/v3 v3.10.1 h1:rc42Y5YTp7Am7CS630D7JmhRjq4UlEUuEKfrDac4bSQ= +github.com/emicklei/go-restful/v3 v3.10.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= @@ -59,6 +59,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kubernetes-csi/csi-test/v5 v5.4.0 h1:u5DgYNIreSNO2+u4Nq2Wpl+bbakRSjNyxZHmDTAqnYA= github.com/kubernetes-csi/csi-test/v5 v5.4.0/go.mod h1:anAJKFUb/SdHhIHECgSKxC5LSiLzib+1I6mrWF5Hve8= +github.com/kubernetes-csi/external-snapshotter/client/v6 v6.3.0 h1:qS4r4ljINLWKJ9m9Ge3Q3sGZ/eIoDVDT2RhAdQFHb1k= +github.com/kubernetes-csi/external-snapshotter/client/v6 v6.3.0/go.mod h1:oGXx2XTEzs9ikW2V6IC1dD8trgjRsS/Mvc2JRiC618Y= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/moby/spdystream v0.2.0 h1:cjW1zVyyoiM0T7b6UoySUFqzXMoqRckQtXwGPiBhOM8= diff --git a/test/kubernetes/integration_test.go b/test/kubernetes/integration_test.go index 74bbdf49..dcde0b03 100644 --- a/test/kubernetes/integration_test.go +++ b/test/kubernetes/integration_test.go @@ -24,8 +24,10 @@ import ( "golang.org/x/oauth2" "k8s.io/client-go/rest" + snapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v6/apis/volumesnapshot/v1" + snapshotclientset "github.com/kubernetes-csi/external-snapshotter/client/v6/clientset/versioned" appsv1 "k8s.io/api/apps/v1" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" kubeerrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -76,6 +78,7 @@ type DiskInfo struct { var ( client kubernetes.Interface + snapshotClient snapshotclientset.Interface config *rest.Config cloudscaleClient *cloudscale.Client ) @@ -158,6 +161,446 @@ func TestPod_Single_SSD_Volume(t *testing.T) { waitCloudscaleVolumeDeleted(t, pvc.Spec.VolumeName) } +func TestPod_Single_SSD_Volume_Snapshot(t *testing.T) { + podDescriptor := TestPodDescriptor{ + Kind: "Pod", + Name: pseudoUuid(), + Volumes: []TestPodVolume{ + { + ClaimName: "csi-pod-ssd-pvc", + SizeGB: 5, + StorageClass: "cloudscale-volume-ssd", + }, + }, + } + + // submit the pod and the pvc + pod := makeKubernetesPod(t, podDescriptor) + pvcs := makeKubernetesPVCs(t, podDescriptor) + assert.Equal(t, 1, len(pvcs)) + + // wait for the pod to be running and verify that the pvc is bound + waitForPod(t, client, pod.Name) + pvc := getPVC(t, client, pvcs[0].Name) + assert.Equal(t, v1.ClaimBound, pvc.Status.Phase) + + // load the volume from the cloudscale.ch api and verify that it + // has the requested size and volume type + volume := getCloudscaleVolume(t, pvc.Spec.VolumeName) + assert.Equal(t, 5, volume.SizeGB) + assert.Equal(t, "ssd", volume.Type) + + // verify that our disk is not luks-encrypted, formatted with ext4 and 5 GB big + disk, err := getVolumeInfo(t, pod, pvc.Spec.VolumeName) + assert.NoError(t, err) + assert.Equal(t, "", disk.Luks) + assert.Equal(t, "Filesystem", disk.PVCVolumeMode) + assert.Equal(t, "ext4", disk.Filesystem) + assert.Equal(t, 5*driver.GB, disk.DeviceSize) + assert.Equal(t, 5*driver.GB, disk.FilesystemSize) + + // create a snapshot of the volume + snapshotName := pseudoUuid() + snapshot := makeKubernetesVolumeSnapshot(t, snapshotName, pvc.Name) + + // wait for the snapshot to be ready + waitForVolumeSnapshot(t, client, snapshot.Name) + snapshot = getVolumeSnapshot(t, client, snapshot.Name) + assert.NotNil(t, snapshot.Status) + assert.NotNil(t, snapshot.Status.BoundVolumeSnapshotContentName) + assert.True(t, *snapshot.Status.ReadyToUse) + + snapshotContent := getVolumeSnapshotContent(t, *snapshot.Status.BoundVolumeSnapshotContentName) + assert.NotNil(t, snapshotContent.Status) + assert.NotNil(t, snapshotContent.Status.SnapshotHandle) + + cloudscaleSnapshot := getCloudscaleVolumeSnapshot(t, *snapshotContent.Status.SnapshotHandle) + assert.NotNil(t, cloudscaleSnapshot) + assert.Equal(t, *snapshotContent.Status.SnapshotHandle, cloudscaleSnapshot.UUID) + assert.Equal(t, "available", cloudscaleSnapshot.Status) + assert.Equal(t, 5, cloudscaleSnapshot.SizeGB) + + // delete the snapshot before deleting the volume + deleteKubernetesVolumeSnapshot(t, snapshot.Name) + waitCloudscaleVolumeSnapshotDeleted(t, *snapshotContent.Status.SnapshotHandle) + + // delete the pod and the pvcs and wait until the volume was deleted from + // the cloudscale.ch account; this check is necessary to test that the + // csi-plugin properly deletes the volume from cloudscale.ch + cleanup(t, podDescriptor) + waitCloudscaleVolumeDeleted(t, pvc.Spec.VolumeName) +} + +func TestPod_Create_Volume_From_Snapshot(t *testing.T) { + podDescriptor := TestPodDescriptor{ + Kind: "Pod", + Name: pseudoUuid(), + Volumes: []TestPodVolume{ + { + ClaimName: "csi-pod-ssd-pvc-original", + SizeGB: 5, + StorageClass: "cloudscale-volume-ssd", + }, + }, + } + + // submit the pod and the pvc + pod := makeKubernetesPod(t, podDescriptor) + pvcs := makeKubernetesPVCs(t, podDescriptor) + assert.Equal(t, 1, len(pvcs)) + + // wait for the pod to be running and verify that the pvc is bound + waitForPod(t, client, pod.Name) + pvc := getPVC(t, client, pvcs[0].Name) + assert.Equal(t, v1.ClaimBound, pvc.Status.Phase) + + // load the volume from the cloudscale.ch api and verify that it + // has the requested size and volume type + originalVolume := getCloudscaleVolume(t, pvc.Spec.VolumeName) + assert.Equal(t, 5, originalVolume.SizeGB) + assert.Equal(t, "ssd", originalVolume.Type) + + // verify that our disk is not luks-encrypted, formatted with ext4 and 5 GB big + disk, err := getVolumeInfo(t, pod, pvc.Spec.VolumeName) + assert.NoError(t, err) + assert.Equal(t, "", disk.Luks) + assert.Equal(t, "Filesystem", disk.PVCVolumeMode) + assert.Equal(t, "ext4", disk.Filesystem) + assert.Equal(t, 5*driver.GB, disk.DeviceSize) + assert.Equal(t, 5*driver.GB, disk.FilesystemSize) + + // store the original filesystem UUID to verify it's preserved after restore + originalFilesystemUUID := disk.FilesystemUUID + + // create a snapshot of the volume + snapshotName := pseudoUuid() + snapshot := makeKubernetesVolumeSnapshot(t, snapshotName, pvc.Name) + + // wait for the snapshot to be ready + waitForVolumeSnapshot(t, client, snapshot.Name) + snapshot = getVolumeSnapshot(t, client, snapshot.Name) + assert.NotNil(t, snapshot.Status) + assert.NotNil(t, snapshot.Status.BoundVolumeSnapshotContentName) + assert.True(t, *snapshot.Status.ReadyToUse) + + // verify the snapshot exists in cloudscale.ch API + snapshotContent := getVolumeSnapshotContent(t, *snapshot.Status.BoundVolumeSnapshotContentName) + assert.NotNil(t, snapshotContent.Status) + assert.NotNil(t, snapshotContent.Status.SnapshotHandle) + + cloudscaleSnapshot := getCloudscaleVolumeSnapshot(t, *snapshotContent.Status.SnapshotHandle) + assert.NotNil(t, cloudscaleSnapshot) + assert.Equal(t, *snapshotContent.Status.SnapshotHandle, cloudscaleSnapshot.UUID) + assert.Equal(t, "available", cloudscaleSnapshot.Status) + assert.Equal(t, 5, cloudscaleSnapshot.SizeGB) + + // create a new pod with a pvc restored from the snapshot + restoredPodDescriptor := TestPodDescriptor{ + Kind: "Pod", + Name: pseudoUuid(), + Volumes: []TestPodVolume{ + { + ClaimName: "csi-pod-ssd-pvc-restored", + SizeGB: 5, + StorageClass: "cloudscale-volume-ssd", + }, + }, + } + + restoredPod := makeKubernetesPod(t, restoredPodDescriptor) + restoredPVCs := makeKubernetesPVCsFromSnapshot(t, restoredPodDescriptor, snapshot.Name) + assert.Equal(t, 1, len(restoredPVCs)) + + // wait for the restored pod to be running and verify that the pvc is bound + waitForPod(t, client, restoredPod.Name) + restoredPVC := getPVC(t, client, restoredPVCs[0].Name) + assert.Equal(t, v1.ClaimBound, restoredPVC.Status.Phase) + + // load the restored volume from the cloudscale.ch api and verify that it + // has the requested size and volume type + restoredVolume := getCloudscaleVolume(t, restoredPVC.Spec.VolumeName) + assert.Equal(t, 5, restoredVolume.SizeGB) + assert.Equal(t, "ssd", restoredVolume.Type) + + // verify that the restored disk has the same properties as the original + restoredDisk, err := getVolumeInfo(t, restoredPod, restoredPVC.Spec.VolumeName) + assert.NoError(t, err) + assert.Equal(t, "", restoredDisk.Luks) + assert.Equal(t, "Filesystem", restoredDisk.PVCVolumeMode) + assert.Equal(t, "ext4", restoredDisk.Filesystem) + assert.Equal(t, 5*driver.GB, restoredDisk.DeviceSize) + assert.Equal(t, 5*driver.GB, restoredDisk.FilesystemSize) + + // verify that the filesystem UUID is preserved (data was restored, not recreated) + assert.Equal(t, originalFilesystemUUID, restoredDisk.FilesystemUUID) + + // finally cleanup the restored pod and pvc + cleanup(t, restoredPodDescriptor) + waitCloudscaleVolumeDeleted(t, restoredPVC.Spec.VolumeName) +} + +func TestPod_Single_SSD_Luks_Volume_Snapshot(t *testing.T) { + podDescriptor := TestPodDescriptor{ + Kind: "Pod", + Name: pseudoUuid(), + Volumes: []TestPodVolume{ + { + ClaimName: "csi-pod-ssd-luks-pvc-original", + SizeGB: 5, + StorageClass: "cloudscale-volume-ssd-luks", + LuksKey: "secret", + }, + }, + } + + // submit the pod and the pvc + pod := makeKubernetesPod(t, podDescriptor) + pvcs := makeKubernetesPVCs(t, podDescriptor) + assert.Equal(t, 1, len(pvcs)) + + // wait for the pod to be running and verify that the pvc is bound + waitForPod(t, client, pod.Name) + pvc := getPVC(t, client, pvcs[0].Name) + assert.Equal(t, v1.ClaimBound, pvc.Status.Phase) + + // load the volume from the cloudscale.ch api and verify that it + // has the requested size and volume type + originalVolume := getCloudscaleVolume(t, pvc.Spec.VolumeName) + assert.Equal(t, 5, originalVolume.SizeGB) + assert.Equal(t, "ssd", originalVolume.Type) + + // verify that our disk is luks-encrypted, formatted with ext4 and 5 GB big + disk, err := getVolumeInfo(t, pod, pvc.Spec.VolumeName) + assert.NoError(t, err) + assert.Equal(t, "ext4", disk.Filesystem) + assert.Equal(t, 5*driver.GB, disk.DeviceSize) + assert.Equal(t, "LUKS1", disk.Luks) + assert.Equal(t, "Filesystem", disk.PVCVolumeMode) + assert.Equal(t, "aes-xts-plain64", disk.Cipher) + assert.Equal(t, 512, disk.Keysize) + assert.Equal(t, 5*driver.GB-luksOverhead, disk.FilesystemSize) + + // store the original filesystem UUID to verify it's preserved after restore + originalFilesystemUUID := disk.FilesystemUUID + + // create a snapshot of the LUKS volume + snapshotName := pseudoUuid() + snapshot := makeKubernetesVolumeSnapshot(t, snapshotName, pvc.Name) + + // wait for the snapshot to be ready + waitForVolumeSnapshot(t, client, snapshot.Name) + snapshot = getVolumeSnapshot(t, client, snapshot.Name) + assert.NotNil(t, snapshot.Status) + assert.NotNil(t, snapshot.Status.BoundVolumeSnapshotContentName) + assert.True(t, *snapshot.Status.ReadyToUse) + + // verify the snapshot exists in cloudscale.ch API + snapshotContent := getVolumeSnapshotContent(t, *snapshot.Status.BoundVolumeSnapshotContentName) + assert.NotNil(t, snapshotContent.Status) + assert.NotNil(t, snapshotContent.Status.SnapshotHandle) + + cloudscaleSnapshot := getCloudscaleVolumeSnapshot(t, *snapshotContent.Status.SnapshotHandle) + assert.NotNil(t, cloudscaleSnapshot) + assert.Equal(t, *snapshotContent.Status.SnapshotHandle, cloudscaleSnapshot.UUID) + assert.Equal(t, "available", cloudscaleSnapshot.Status) + assert.Equal(t, 5, cloudscaleSnapshot.SizeGB) + + // create a new pod with a pvc restored from the snapshot with LUKS parameters + restoredPodDescriptor := TestPodDescriptor{ + Kind: "Pod", + Name: pseudoUuid(), + Volumes: []TestPodVolume{ + { + ClaimName: "csi-pod-ssd-luks-pvc-restored", + SizeGB: 5, + StorageClass: "cloudscale-volume-ssd-luks", + LuksKey: "secret", + }, + }, + } + + restoredPod := makeKubernetesPod(t, restoredPodDescriptor) + restoredPVCs := makeKubernetesPVCsFromSnapshot(t, restoredPodDescriptor, snapshot.Name) + assert.Equal(t, 1, len(restoredPVCs)) + + // wait for the restored pod to be running and verify that the pvc is bound + waitForPod(t, client, restoredPod.Name) + restoredPVC := getPVC(t, client, restoredPVCs[0].Name) + assert.Equal(t, v1.ClaimBound, restoredPVC.Status.Phase) + + // load the restored volume from the cloudscale.ch api and verify that it + // has the requested size and volume type + restoredVolume := getCloudscaleVolume(t, restoredPVC.Spec.VolumeName) + assert.Equal(t, 5, restoredVolume.SizeGB) + assert.Equal(t, "ssd", restoredVolume.Type) + + // verify that the restored disk has LUKS encryption preserved + restoredDisk, err := getVolumeInfo(t, restoredPod, restoredPVC.Spec.VolumeName) + assert.NoError(t, err) + assert.Equal(t, "LUKS1", restoredDisk.Luks) + assert.Equal(t, "Filesystem", restoredDisk.PVCVolumeMode) + assert.Equal(t, "ext4", restoredDisk.Filesystem) + assert.Equal(t, 5*driver.GB, restoredDisk.DeviceSize) + assert.Equal(t, 5*driver.GB-luksOverhead, restoredDisk.FilesystemSize) + assert.Equal(t, "aes-xts-plain64", restoredDisk.Cipher) + assert.Equal(t, 512, restoredDisk.Keysize) + + // verify that the filesystem UUID is preserved (data was restored, not recreated) + assert.Equal(t, originalFilesystemUUID, restoredDisk.FilesystemUUID) + + // delete the snapshot before deleting the volumes + deleteKubernetesVolumeSnapshot(t, snapshot.Name) + waitCloudscaleVolumeSnapshotDeleted(t, *snapshotContent.Status.SnapshotHandle) + + // finally cleanup the restored pod and pvc + cleanup(t, restoredPodDescriptor) + waitCloudscaleVolumeDeleted(t, restoredPVC.Spec.VolumeName) + + // cleanup the original pod and pvc + cleanup(t, podDescriptor) + waitCloudscaleVolumeDeleted(t, pvc.Spec.VolumeName) +} + +func TestPod_Snapshot_Size_Validation(t *testing.T) { + // Test that snapshot size validation works correctly + podDescriptor := TestPodDescriptor{ + Kind: "Pod", + Name: pseudoUuid(), + Volumes: []TestPodVolume{ + { + ClaimName: "csi-pod-snapshot-size-pvc", + SizeGB: 5, + StorageClass: "cloudscale-volume-ssd", + }, + }, + } + + // Create volume + pod := makeKubernetesPod(t, podDescriptor) + pvcs := makeKubernetesPVCs(t, podDescriptor) + waitForPod(t, client, pod.Name) + pvc := getPVC(t, client, pvcs[0].Name) + assert.Equal(t, v1.ClaimBound, pvc.Status.Phase) + + volume := getCloudscaleVolume(t, pvc.Spec.VolumeName) + assert.Equal(t, 5, volume.SizeGB) + + // Create snapshot + snapshotName := pseudoUuid() + snapshot := makeKubernetesVolumeSnapshot(t, snapshotName, pvc.Name) + waitForVolumeSnapshot(t, client, snapshot.Name) + snapshot = getVolumeSnapshot(t, client, snapshot.Name) + assert.True(t, *snapshot.Status.ReadyToUse) + + snapshotContent := getVolumeSnapshotContent(t, *snapshot.Status.BoundVolumeSnapshotContentName) + snapshotHandle := *snapshotContent.Status.SnapshotHandle + + cloudscaleSnapshot := getCloudscaleVolumeSnapshot(t, snapshotHandle) + assert.Equal(t, 5, cloudscaleSnapshot.SizeGB) + + // Attempt to restore with smaller size (should fail) + // Create PVC directly without pod (since it won't bind) + smallerPVCName := "csi-pod-snapshot-size-pvc-smaller" + volMode := v1.PersistentVolumeFilesystem + apiGroup := "snapshot.storage.k8s.io" + smallerPVC := &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: smallerPVCName, + }, + Spec: v1.PersistentVolumeClaimSpec{ + VolumeMode: &volMode, + AccessModes: []v1.PersistentVolumeAccessMode{ + v1.ReadWriteOnce, + }, + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceStorage: resource.MustParse("3Gi"), // Smaller than snapshot size (5GB) + }, + }, + StorageClassName: strPtr("cloudscale-volume-ssd"), + DataSource: &v1.TypedLocalObjectReference{ + APIGroup: &apiGroup, + Kind: "VolumeSnapshot", + Name: snapshot.Name, + }, + }, + } + + t.Log("Creating PVC from snapshot with smaller size (should fail)") + _, err := client.CoreV1().PersistentVolumeClaims(namespace).Create(context.Background(), smallerPVC, metav1.CreateOptions{}) + assert.NoError(t, err) + + // Wait a bit for the PVC to be processed + time.Sleep(10 * time.Second) + + // Check that PVC is not bound (should fail) + smallerPVC = getPVC(t, client, smallerPVCName) + assert.NotEqual(t, v1.ClaimBound, smallerPVC.Status.Phase, "PVC with smaller size should not be bound") + assert.Equal(t, v1.ClaimPending, smallerPVC.Status.Phase, "PVC should be in Pending state due to size validation failure") + + // Verify no volume was created + if smallerPVC.Spec.VolumeName != "" { + t.Logf("Warning: Volume was created despite size validation failure: %s", smallerPVC.Spec.VolumeName) + } + + // Cleanup failed PVC + err = client.CoreV1().PersistentVolumeClaims(namespace).Delete(context.Background(), smallerPVCName, metav1.DeleteOptions{}) + assert.NoError(t, err) + + // Attempt to restore with larger size (should fail) + // Create PVC directly without pod (since it won't bind) + largerPVCName := "csi-pod-snapshot-size-pvc-larger" + largerPVC := &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: largerPVCName, + }, + Spec: v1.PersistentVolumeClaimSpec{ + VolumeMode: &volMode, + AccessModes: []v1.PersistentVolumeAccessMode{ + v1.ReadWriteOnce, + }, + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceStorage: resource.MustParse("10Gi"), // Larger than snapshot size (5GB) + }, + }, + StorageClassName: strPtr("cloudscale-volume-ssd"), + DataSource: &v1.TypedLocalObjectReference{ + APIGroup: &apiGroup, + Kind: "VolumeSnapshot", + Name: snapshot.Name, + }, + }, + } + + t.Log("Creating PVC from snapshot with larger size (should fail)") + _, err = client.CoreV1().PersistentVolumeClaims(namespace).Create(context.Background(), largerPVC, metav1.CreateOptions{}) + assert.NoError(t, err) + + // Wait a bit for the PVC to be processed + time.Sleep(10 * time.Second) + + // Check that PVC is not bound (should fail) + largerPVC = getPVC(t, client, largerPVCName) + assert.NotEqual(t, v1.ClaimBound, largerPVC.Status.Phase, "PVC with larger size should not be bound") + assert.Equal(t, v1.ClaimPending, largerPVC.Status.Phase, "PVC should be in Pending state due to size validation failure") + + // Verify no volume was created + if largerPVC.Spec.VolumeName != "" { + t.Logf("Warning: Volume was created despite size validation failure: %s", largerPVC.Spec.VolumeName) + } + + // Cleanup failed PVC + err = client.CoreV1().PersistentVolumeClaims(namespace).Delete(context.Background(), largerPVCName, metav1.DeleteOptions{}) + assert.NoError(t, err) + + // Cleanup original resources + deleteKubernetesVolumeSnapshot(t, snapshot.Name) + waitCloudscaleVolumeSnapshotDeleted(t, snapshotHandle) + cleanup(t, podDescriptor) + waitCloudscaleVolumeDeleted(t, pvc.Spec.VolumeName) +} + func TestPod_Single_SSD_Raw_Volume(t *testing.T) { podDescriptor := TestPodDescriptor{ Kind: "Pod", @@ -723,6 +1166,12 @@ func setup() error { return err } + // create the snapshot clientset for working with VolumeSnapshot CRDs + snapshotClient, err = snapshotclientset.NewForConfig(config) + if err != nil { + return err + } + // create test namespace _, err = client.CoreV1().Namespaces().Create( context.Background(), @@ -1196,7 +1645,8 @@ func waitCloudscaleVolumeDeleted(t *testing.T, volumeName string) { return } if err != nil { - if cloudscaleErr, ok := err.(*cloudscale.ErrorResponse); ok { + var cloudscaleErr *cloudscale.ErrorResponse + if errors.As(err, &cloudscaleErr) { if cloudscaleErr.StatusCode == http.StatusNotFound { t.Logf("volume %v is deleted on cloudscale", volumeName) return @@ -1452,3 +1902,199 @@ func generateMetricEntry(line string) MetricEntry { } return MetricEntry{split[0], "", split[1]} } + +// makeKubernetesVolumeSnapshot creates a VolumeSnapshot for the given PVC +func makeKubernetesVolumeSnapshot(t *testing.T, snapshotName string, pvcName string) *snapshotv1.VolumeSnapshot { + className := "cloudscale-snapshots" + + // Verify that the VolumeSnapshotClass exists before creating the VolumeSnapshot + // This helps catch configuration issues early (e.g., CRDs not installed) + _, err := snapshotClient.SnapshotV1().VolumeSnapshotClasses().Get( + context.Background(), + className, + metav1.GetOptions{}, + ) + if err != nil { + if kubeerrors.IsNotFound(err) { + t.Fatalf("VolumeSnapshotClass %q not found. "+ + "This usually means the snapshot CRDs are not installed. "+ + "See the readme for setup installation instrucitons and and ensure the VolumeSnapshotClass resource exists. Error: %v", className, err) + } + t.Fatalf("Failed to get VolumeSnapshotClass %q: %v", className, err) + } + + snapshot := &snapshotv1.VolumeSnapshot{ + TypeMeta: metav1.TypeMeta{ + Kind: "VolumeSnapshot", + APIVersion: "snapshot.storage.k8s.io/v1", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: snapshotName, + Namespace: namespace, + }, + Spec: snapshotv1.VolumeSnapshotSpec{ + VolumeSnapshotClassName: &className, + Source: snapshotv1.VolumeSnapshotSource{ + PersistentVolumeClaimName: &pvcName, + }, + }, + } + + t.Logf("Creating volume snapshot %v", snapshotName) + created, err := snapshotClient.SnapshotV1().VolumeSnapshots(namespace).Create( + context.Background(), + snapshot, + metav1.CreateOptions{}, + ) + if err != nil { + t.Fatal(err) + } + + return created +} + +// deleteKubernetesVolumeSnapshot deletes the VolumeSnapshot with the given name +func deleteKubernetesVolumeSnapshot(t *testing.T, snapshotName string) { + t.Logf("Deleting volume snapshot %v", snapshotName) + err := snapshotClient.SnapshotV1().VolumeSnapshots(namespace).Delete( + context.Background(), + snapshotName, + metav1.DeleteOptions{}, + ) + assert.NoError(t, err) +} + +// waitForVolumeSnapshot waits for the VolumeSnapshot to be ready +func waitForVolumeSnapshot(t *testing.T, client kubernetes.Interface, name string) { + start := time.Now() + + t.Logf("Waiting for volume snapshot %q to be ready ...\n", name) + + for { + snapshot := getVolumeSnapshot(t, client, name) + + if snapshot.Status != nil && snapshot.Status.ReadyToUse != nil && *snapshot.Status.ReadyToUse { + t.Logf("Volume snapshot %q is ready\n", name) + return + } + + if time.Now().UnixNano()-start.UnixNano() > (5 * time.Minute).Nanoseconds() { + t.Fatalf("timeout exceeded while waiting for volume snapshot %v to be ready", name) + return + } + + t.Logf("Volume snapshot %q not ready yet; waiting...", name) + time.Sleep(5 * time.Second) + } +} + +// getVolumeSnapshot retrieves the VolumeSnapshot with the given name +func getVolumeSnapshot(t *testing.T, client kubernetes.Interface, name string) *snapshotv1.VolumeSnapshot { + snapshot, err := snapshotClient.SnapshotV1().VolumeSnapshots(namespace).Get( + context.Background(), + name, + metav1.GetOptions{}, + ) + assert.NoError(t, err) + return snapshot +} + +func getCloudscaleVolumeSnapshot(t *testing.T, snapshotHandle string) *cloudscale.VolumeSnapshot { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + snapshot, err := cloudscaleClient.VolumeSnapshots.Get(ctx, snapshotHandle) + if err != nil { + t.Fatalf("Could not find snapshot with handle %v: %v", snapshotHandle, err) + } + + return snapshot +} + +// waitCloudscaleVolumeSnapshotDeleted waits until the snapshot with the given handle was deleted +func waitCloudscaleVolumeSnapshotDeleted(t *testing.T, snapshotHandle string) { + start := time.Now() + + for { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + _, err := cloudscaleClient.VolumeSnapshots.Get(ctx, snapshotHandle) + cancel() + + if err != nil { + var cloudscaleErr *cloudscale.ErrorResponse + if errors.As(err, &cloudscaleErr) { + if cloudscaleErr.StatusCode == http.StatusNotFound { + t.Logf("snapshot %v is deleted on cloudscale", snapshotHandle) + return + } + } + // Some other error - log but continue waiting + t.Logf("error checking snapshot %v: %v", snapshotHandle, err) + } + + if time.Since(start) > 5*time.Minute { + t.Errorf("timeout exceeded while waiting for snapshot %v to be deleted from cloudscale", snapshotHandle) + return + } + + t.Logf("snapshot %v not deleted on cloudscale yet; awaiting deletion", snapshotHandle) + time.Sleep(5 * time.Second) + } +} + +// getVolumeSnapshotContent retrieves the VolumeSnapshotContent for a VolumeSnapshot +func getVolumeSnapshotContent(t *testing.T, contentName string) *snapshotv1.VolumeSnapshotContent { + content, err := snapshotClient.SnapshotV1().VolumeSnapshotContents().Get( + context.Background(), + contentName, + metav1.GetOptions{}, + ) + assert.NoError(t, err) + return content +} + +// creates kubernetes pvcs from the given TestPodDescriptor, restoring from a snapshot +func makeKubernetesPVCsFromSnapshot(t *testing.T, pod TestPodDescriptor, snapshotName string) []*v1.PersistentVolumeClaim { + pvcs := make([]*v1.PersistentVolumeClaim, 0) + + for _, volume := range pod.Volumes { + volMode := v1.PersistentVolumeFilesystem + if volume.Block { + volMode = v1.PersistentVolumeBlock + } + + apiGroup := "snapshot.storage.k8s.io" + pvcs = append(pvcs, &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: volume.ClaimName, + }, + Spec: v1.PersistentVolumeClaimSpec{ + VolumeMode: &volMode, + AccessModes: []v1.PersistentVolumeAccessMode{ + v1.ReadWriteOnce, + }, + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceStorage: resource.MustParse(fmt.Sprintf("%vGi", volume.SizeGB)), + }, + }, + StorageClassName: strPtr(volume.StorageClass), + DataSource: &v1.TypedLocalObjectReference{ + APIGroup: &apiGroup, + Kind: "VolumeSnapshot", + Name: snapshotName, + }, + }, + }) + } + + t.Log("Creating pvc from snapshot") + for _, pvc := range pvcs { + _, err := client.CoreV1().PersistentVolumeClaims(namespace).Create(context.Background(), pvc, metav1.CreateOptions{}) + if err != nil { + t.Fatal(err) + } + } + + return pvcs +}