From b44339ada0d0a4a5cfbe2935cb19519c68ec66d1 Mon Sep 17 00:00:00 2001 From: Rene Peinthor Date: Wed, 24 Jun 2026 15:24:23 +0200 Subject: [PATCH] linstor: fix encrypted volume snapshot backup and restore Encrypted Linstor volumes use a LUKS layer inside the DRBD stack, so the storage-layer snapshot device holds ciphertext while the DRBD device CloudStack restores to is the decrypted view. Backing up the raw snapshot and writing it back to the decrypted device corrupted the volume (different data, unbootable root). Back up encrypted snapshots from the decrypted DRBD device (forcing the temporary-resource path) and store them as a LUKS-encrypted qcow2 using the volume passphrase, so snapshots are not kept in clear text on secondary storage. On revert, decrypt the qcow2 and write plaintext to the DRBD device; the LUKS layer re-encrypts it. The qemu-img shrink is skipped for encrypted volumes (the DRBD device is already net-sized). Add an integration test (test_linstor_encrypted_snapshots.py): the encrypted-root snapshot revert round-trip, that create-volume-from-encrypted-snapshot is rejected by CloudStack core, and a best-effort check that the backed-up qcow2 is LUKS-encrypted at rest. --- plugins/storage/volume/linstor/CHANGELOG.md | 8 + .../LinstorBackupSnapshotCommandWrapper.java | 46 +- ...torRevertBackupSnapshotCommandWrapper.java | 28 +- .../LinstorPrimaryDataStoreDriverImpl.java | 14 +- test/integration/plugins/linstor/README.md | 18 + .../test_linstor_encrypted_snapshots.py | 444 ++++++++++++++++++ 6 files changed, 545 insertions(+), 13 deletions(-) create mode 100644 test/integration/plugins/linstor/test_linstor_encrypted_snapshots.py diff --git a/plugins/storage/volume/linstor/CHANGELOG.md b/plugins/storage/volume/linstor/CHANGELOG.md index 1a3142e8c59b..89b09b74a6d7 100644 --- a/plugins/storage/volume/linstor/CHANGELOG.md +++ b/plugins/storage/volume/linstor/CHANGELOG.md @@ -24,6 +24,14 @@ All notable changes to Linstor CloudStack plugin will be documented in this file The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [2026-06-24] + +### Fixed + +- Restore of encrypted volume snapshots: snapshots of encrypted volumes are now + stored as LUKS-encrypted qcow2 files and decrypted on revert (previously the + restored data was corrupted and the root device unbootable). + ## [2026-01-17] ### Added diff --git a/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LinstorBackupSnapshotCommandWrapper.java b/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LinstorBackupSnapshotCommandWrapper.java index fab4829da551..c111d320cb4e 100644 --- a/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LinstorBackupSnapshotCommandWrapper.java +++ b/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LinstorBackupSnapshotCommandWrapper.java @@ -18,6 +18,10 @@ import java.io.File; import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import com.cloud.agent.api.to.DataStoreTO; import com.cloud.agent.api.to.NfsTO; @@ -31,9 +35,11 @@ import com.cloud.utils.script.Script; import org.apache.cloudstack.storage.command.CopyCmdAnswer; import org.apache.cloudstack.storage.to.SnapshotObjectTO; +import org.apache.cloudstack.utils.cryptsetup.KeyFile; import org.apache.cloudstack.utils.qemu.QemuImg; import org.apache.cloudstack.utils.qemu.QemuImgException; import org.apache.cloudstack.utils.qemu.QemuImgFile; +import org.apache.cloudstack.utils.qemu.QemuObject; import org.apache.commons.io.FileUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -83,6 +89,7 @@ private String convertImageToQCow2( final String srcPath, final SnapshotObjectTO dst, final KVMStoragePool secondaryPool, + final byte[] passphrase, int waitMilliSeconds ) throws LibvirtException, QemuImgException, IOException @@ -94,9 +101,22 @@ private String convertImageToQCow2( final QemuImgFile srcFile = new QemuImgFile(srcPath, QemuImg.PhysicalDiskFormat.RAW); final QemuImgFile dstFile = new QemuImgFile(dstPath, QemuImg.PhysicalDiskFormat.QCOW2); - // NOTE: the qemu img will also contain the drbd metadata at the end final QemuImg qemu = new QemuImg(waitMilliSeconds); - qemu.convert(srcFile, dstFile); + if (passphrase != null && passphrase.length > 0) { + // Encrypted volumes are backed up from their decrypted DRBD device, so the snapshot + // data here is plaintext. Encrypt the destination qcow2 with the volume's passphrase + // (LUKS), so the snapshot is not stored in clear text on secondary storage. + try (KeyFile keyFile = new KeyFile(passphrase)) { + final Map options = new HashMap<>(); + final List qemuObjects = new ArrayList<>(); + qemuObjects.add(QemuObject.prepareSecretForQemuImg(QemuImg.PhysicalDiskFormat.QCOW2, + QemuObject.EncryptFormat.LUKS, keyFile.toString(), "sec0", options)); + qemu.convert(srcFile, dstFile, options, qemuObjects, null, true); + } + } else { + // NOTE: the qemu img will also contain the drbd metadata at the end + qemu.convert(srcFile, dstFile); + } LOGGER.info("Backup snapshot '{}' to '{}'", srcPath, dstPath); return dstPath; } @@ -153,14 +173,21 @@ public CopyCmdAnswer execute(LinstorBackupSnapshotCommand cmd, LibvirtComputingR secondaryPool = storagePoolMgr.getStoragePoolByURI(dstDataStore.getUrl()); - String dstPath = convertImageToQCow2(srcPath, dst, secondaryPool, cmd.getWaitInMillSeconds()); + final byte[] passphrase = src.getVolume() != null ? src.getVolume().getPassphrase() : null; + final boolean encrypted = passphrase != null && passphrase.length > 0; - // resize to real volume size, cutting of drbd metadata - String result = qemuShrink(dstPath, src.getVolume().getSize(), cmd.getWaitInMillSeconds()); - if (result != null) { - return new CopyCmdAnswer("qemu-img shrink failed: " + result); + String dstPath = convertImageToQCow2(srcPath, dst, secondaryPool, passphrase, cmd.getWaitInMillSeconds()); + + if (!encrypted) { + // resize to real volume size, cutting of drbd metadata + // For encrypted volumes the source is the decrypted DRBD device (already net-sized, + // no drbd metadata to cut); shrinking an encrypted qcow2 would also need the secret. + String result = qemuShrink(dstPath, src.getVolume().getSize(), cmd.getWaitInMillSeconds()); + if (result != null) { + return new CopyCmdAnswer("qemu-img shrink failed: " + result); + } + LOGGER.info("Backup shrunk " + dstPath + " to actual size " + src.getVolume().getSize()); } - LOGGER.info("Backup shrunk " + dstPath + " to actual size " + src.getVolume().getSize()); SnapshotObjectTO snapshot = setCorrectSnapshotSize(dst, dstPath); LOGGER.info("Actual file size for '{}' is {}", dstPath, snapshot.getPhysicalSize()); @@ -171,6 +198,9 @@ public CopyCmdAnswer execute(LinstorBackupSnapshotCommand cmd, LibvirtComputingR LOGGER.error(error); return new CopyCmdAnswer(cmd, e); } finally { + if (src.getVolume() != null) { + src.getVolume().clearPassphrase(); + } cleanupSecondaryPool(secondaryPool); if (zfsHidden) { zfsSnapdev(true, src.getPath()); diff --git a/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LinstorRevertBackupSnapshotCommandWrapper.java b/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LinstorRevertBackupSnapshotCommandWrapper.java index 2d6df5f2296a..51d0ed88e340 100644 --- a/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LinstorRevertBackupSnapshotCommandWrapper.java +++ b/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LinstorRevertBackupSnapshotCommandWrapper.java @@ -17,6 +17,7 @@ package com.cloud.hypervisor.kvm.resource.wrapper; import java.io.File; +import java.util.Collections; import com.cloud.agent.api.to.DataStoreTO; import com.cloud.api.storage.LinstorRevertBackupSnapshotCommand; @@ -31,9 +32,12 @@ import org.apache.cloudstack.storage.datastore.util.LinstorUtil; import org.apache.cloudstack.storage.to.SnapshotObjectTO; import org.apache.cloudstack.storage.to.VolumeObjectTO; +import org.apache.cloudstack.utils.cryptsetup.KeyFile; +import org.apache.cloudstack.utils.qemu.QemuImageOptions; import org.apache.cloudstack.utils.qemu.QemuImg; import org.apache.cloudstack.utils.qemu.QemuImgException; import org.apache.cloudstack.utils.qemu.QemuImgFile; +import org.apache.cloudstack.utils.qemu.QemuObject; import org.joda.time.Duration; import org.libvirt.LibvirtException; @@ -43,8 +47,9 @@ public final class LinstorRevertBackupSnapshotCommandWrapper { private void convertQCow2ToRAW( - KVMStoragePool pool, final String srcPath, final String dstUuid, int waitMilliSeconds) - throws LibvirtException, QemuImgException + KVMStoragePool pool, final String srcPath, final String dstUuid, final byte[] passphrase, + int waitMilliSeconds) + throws LibvirtException, QemuImgException, java.io.IOException { final String dstPath = pool.getPhysicalDisk(dstUuid).getPath(); final QemuImgFile srcQemuFile = new QemuImgFile( @@ -60,7 +65,20 @@ private void convertQCow2ToRAW( } final QemuImg qemu = new QemuImg(waitMilliSeconds, zeroedDevice, true); final QemuImgFile dstFile = new QemuImgFile(dstPath, QemuImg.PhysicalDiskFormat.RAW); - qemu.convert(srcQemuFile, dstFile); + if (passphrase != null && passphrase.length > 0) { + // The backed-up qcow2 is LUKS-encrypted with the volume's passphrase. Decrypt it while + // writing plaintext to the (decrypted) DRBD device; the Linstor LUKS layer re-encrypts it, + // so no qemu encryption must be applied to the destination. + try (KeyFile keyFile = new KeyFile(passphrase)) { + final QemuObject srcSecret = QemuObject.prepareSecretForQemuImg( + QemuImg.PhysicalDiskFormat.QCOW2, QemuObject.EncryptFormat.LUKS, keyFile.toString(), "sec0", null); + final QemuImageOptions srcImageOpts = new QemuImageOptions( + QemuImg.PhysicalDiskFormat.QCOW2, srcPath, "sec0"); + qemu.convert(srcQemuFile, dstFile, null, Collections.singletonList(srcSecret), srcImageOpts, null, false); + } + } else { + qemu.convert(srcQemuFile, dstFile); + } } @Override @@ -84,10 +102,13 @@ public CopyCmdAnswer execute(LinstorRevertBackupSnapshotCommand cmd, LibvirtComp secondaryPool = storagePoolMgr.getStoragePoolByURI( srcDataStore.getUrl() + File.separator + srcFile.getParent()); + // The destination volume is the (same) original volume, whose passphrase the backed-up + // qcow2 was encrypted with; use it to decrypt while restoring. convertQCow2ToRAW( linstorPool, secondaryPool.getLocalPath() + File.separator + srcFile.getName(), dst.getPath(), + dst.getPassphrase(), cmd.getWaitInMillSeconds()); final VolumeObjectTO dstVolume = new VolumeObjectTO(); @@ -99,6 +120,7 @@ public CopyCmdAnswer execute(LinstorRevertBackupSnapshotCommand cmd, LibvirtComp logger.error(error); return new CopyCmdAnswer(cmd, e); } finally { + dst.clearPassphrase(); LinstorBackupSnapshotCommandWrapper.cleanupSecondaryPool(secondaryPool); } } diff --git a/plugins/storage/volume/linstor/src/main/java/org/apache/cloudstack/storage/datastore/driver/LinstorPrimaryDataStoreDriverImpl.java b/plugins/storage/volume/linstor/src/main/java/org/apache/cloudstack/storage/datastore/driver/LinstorPrimaryDataStoreDriverImpl.java index 3f06bee8ac83..7638e47a49e8 100644 --- a/plugins/storage/volume/linstor/src/main/java/org/apache/cloudstack/storage/datastore/driver/LinstorPrimaryDataStoreDriverImpl.java +++ b/plugins/storage/volume/linstor/src/main/java/org/apache/cloudstack/storage/datastore/driver/LinstorPrimaryDataStoreDriverImpl.java @@ -1088,12 +1088,22 @@ protected Answer copySnapshot(DataObject srcData, DataObject destData) { VirtualMachineManager.ExecuteInSequence.value()); cmd.setOptions(options); - Optional optEP = getDiskfullEP(api, pool, rscName); + // For encrypted volumes Linstor adds a LUKS layer (DRBD -> LUKS -> STORAGE). The storage + // layer snapshot device (getSnapshotPath) therefore only exposes the raw LUKS ciphertext, + // while restore writes onto the decrypted DRBD device (/dev/drbd/by-res/.../0). Backing up + // the ciphertext and writing it back to the decrypted layer corrupts the volume (and the + // shrink to the net volume size would even truncate the ciphertext). So for encrypted + // volumes we never read the storage snapshot directly: restore the snapshot into a temporary + // resource and back up its decrypted DRBD device instead, symmetric to the restore path. + final boolean encrypted = snapshotObject.getBaseVolume().getPassphraseId() != null; + Optional optEP = encrypted ? + Optional.empty() : getDiskfullEP(api, pool, rscName); Answer answer; if (optEP.isPresent()) { answer = optEP.get().sendMessage(cmd); } else { - logger.debug("No diskfull endpoint found to copy image, creating diskless endpoint"); + logger.debug("No diskfull endpoint used to copy image (encrypted={}), using temporary resource", + encrypted); answer = copyFromTemporaryResource(api, pool, rscName, snapshotName, snapshotObject, cmd); } return answer; diff --git a/test/integration/plugins/linstor/README.md b/test/integration/plugins/linstor/README.md index 4505d1b7d57c..4971c9506b5b 100644 --- a/test/integration/plugins/linstor/README.md +++ b/test/integration/plugins/linstor/README.md @@ -48,3 +48,21 @@ nosetests --with-marvin --marvin-config= /test/ ``` You can also run these tests out of the box with PyDev or PyCharm or whatever. + +## Encrypted snapshot tests + +`test_linstor_encrypted_snapshots.py` covers the encrypted-volume snapshot round trip +(create encrypted root disk -> snapshot -> revert / create-volume-from-snapshot) and that the +backed-up qcow2 on secondary storage is itself LUKS encrypted. + +Extra prerequisites: + +* At least one KVM host with volume-encryption support (`host.encryptionsupported == true`, i.e. + cryptsetup/qemu LUKS available). Tests self-skip if none is found. +* The Linstor resource group used (`acs-basic`) must be able to add a LUKS layer to its volumes. +* `lin.backup.snapshots` must be enabled (default) so snapshots are backed up to secondary storage; + the test sets it. With it disabled the qcow2 path is not exercised. + +``` +nosetests --with-marvin --marvin-config= /test/integration/plugins/linstor/test_linstor_encrypted_snapshots.py --zone= --hypervisor=kvm +``` diff --git a/test/integration/plugins/linstor/test_linstor_encrypted_snapshots.py b/test/integration/plugins/linstor/test_linstor_encrypted_snapshots.py new file mode 100644 index 000000000000..5f440309bb34 --- /dev/null +++ b/test/integration/plugins/linstor/test_linstor_encrypted_snapshots.py @@ -0,0 +1,444 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json +import logging +import os +import random +import socket +import time + +# All tests inherit from cloudstackTestCase +from marvin.cloudstackTestCase import cloudstackTestCase + +# Import Integration Libraries +from marvin.cloudstackAPI import createVolume +from marvin.cloudstackException import CloudstackAPIException +from marvin.lib.base import Account, Configurations, Host, ServiceOffering, \ + Snapshot, StoragePool, User, VirtualMachine, Volume +from marvin.lib.common import get_domain, get_template, get_zone, list_hosts, list_virtual_machines, list_volumes +from marvin.lib.utils import cleanup_resources +from marvin.sshClient import SshClient +from nose.plugins.attrib import attr + +# Prerequisites: +# Only one zone / pod / cluster +# Only KVM hypervisor (Linstor only supports KVM) +# At least one KVM host with volume-encryption support (host.encryptionsupported == True), +# i.e. cryptsetup/qemu with LUKS available on the host. +# One Linstor storage pool whose resource-group can add a LUKS layer (encrypted volumes). +# 'lin.backup.snapshots' enabled (default true) so snapshots are backed up to secondary storage +# as qcow2 -- that is the path these tests are meant to exercise. With it disabled, snapshots +# stay on primary as Linstor system snapshots and a different (rollback) code path is used. +# +# What this exercises (the encrypted-snapshot round trip): +# * backup: decrypted DRBD device -> LUKS-encrypted qcow2 on secondary +# * revert: encrypted qcow2 -> decrypted, written to the DRBD device (Linstor re-encrypts) +# * create: encrypted qcow2 -> new volume via createVolumeFromSnapshot (KVMStorageProcessor) +# +# Note on verification: Linstor encrypts inside the DRBD stack (LUKS layer), so the libvirt domain +# XML does NOT carry like hypervisor-based encryption does. Correctness +# is therefore verified by a data round trip (write marker -> snapshot -> change -> restore -> read), +# and encryption-at-rest is verified by inspecting the backed-up qcow2 with 'qemu-img info'. + +MARKER_PATH = "/root/cs_enc_marker.txt" + + +class TestData: + account = "account" + computeOffering = "computeoffering" + diskName = "diskname" + domainId = "domainId" + hypervisor = "hypervisor" + provider = "provider" + scope = "scope" + storageTag = "linstor" + tags = "tags" + user = "user" + virtualMachine = "virtualmachine" + zoneId = "zoneId" + + def __init__(self, linstor_controller_url): + self.testdata = { + TestData.account: { + "email": "test-enc@test.com", + "firstname": "John", + "lastname": "Doe", + "username": "test-enc", + "password": "test" + }, + TestData.user: { + "email": "user-enc@test.com", + "firstname": "Jane", + "lastname": "Doe", + "username": "test-enc-user", + "password": "password" + }, + "primarystorage": { + "name": "LinstorEncPool-%d" % random.randint(0, 100000), + TestData.scope: "ZONE", + "url": linstor_controller_url, + TestData.provider: "Linstor", + TestData.tags: TestData.storageTag, + TestData.hypervisor: "KVM", + "details": { + "resourceGroup": "acs-basic" + } + }, + TestData.virtualMachine: { + "name": "TestEncVM", + "displayname": "Test Encrypted VM" + }, + # encryptroot=True is passed as a create kwarg, not in this dict + TestData.computeOffering: { + "name": "Linstor_Compute_Encrypted", + "displaytext": "Linstor_Compute_Encrypted", + "cpunumber": 1, + "cpuspeed": 500, + "memory": 512, + "storagetype": "shared", + TestData.tags: TestData.storageTag + }, + TestData.diskName: "restored-from-enc-snap", + TestData.zoneId: 1, + TestData.domainId: 1, + } + + +class ServiceReady: + @classmethod + def ready(cls, hostname, port): + try: + s = socket.create_connection((hostname, port), timeout=1) + s.close() + return True + except (ConnectionRefusedError, socket.timeout, OSError): + return False + + @classmethod + def wait(cls, hostname, port, wait_interval=5, timeout=120, service_name='ssh'): + starttime = int(round(time.time() * 1000)) + while not cls.ready(hostname, port): + if starttime + timeout * 1000 < int(round(time.time() * 1000)): + raise RuntimeError("{s} {h} cannot be reached.".format(s=service_name, h=hostname)) + time.sleep(wait_interval) + return True + + @classmethod + def wait_ssh_ready(cls, hostname, wait_interval=2, timeout=120): + return cls.wait(hostname, 22, wait_interval, timeout, "ssh") + + +class TestLinstorEncryptedSnapshots(cloudstackTestCase): + + @classmethod + def setUpClass(cls): + testclient = super(TestLinstorEncryptedSnapshots, cls).getClsTestClient() + + cls.apiClient = testclient.getApiClient() + cls.dbConnection = testclient.getDbConnection() + + cls._cleanup = [] + cls.skip_reason = None + + # Linstor is KVM-only, so the hypervisor type is not probed via getHypervisorInfo() (which is + # only populated when nosetests is invoked with --hypervisor). Instead we require an actual KVM + # host that supports volume encryption below. + + # The first host runs the Linstor controller (per the Linstor test prerequisites). + first_host = list_hosts(cls.apiClient)[0] + cls.testdata = TestData(first_host.ipaddress).testdata + + cls.zone = get_zone(cls.apiClient, zone_id=cls.testdata[TestData.zoneId]) + cls.domain = get_domain(cls.apiClient, cls.testdata[TestData.domainId]) + cls.template = get_template(cls.apiClient, cls.zone.id, hypervisor="KVM") + + # Host SSH credentials, only needed by test_03 to inspect the backed-up qcow2 on secondary + # storage. A full marvin config carries these under zones->pods->clusters->hosts, but a + # lightweight config may omit them; in that case fall back to HOST_SSH_USER / HOST_SSH_PASSWORD + # env vars. Never fail class setup over this - the other tests don't need host SSH. + cls.hostConfig = None + try: + cls.hostConfig = cls.config.__dict__["zones"][0].__dict__["pods"][0].__dict__["clusters"][0] \ + .__dict__["hosts"][0].__dict__ + except (KeyError, IndexError, AttributeError, TypeError): + host_user = os.environ.get("HOST_SSH_USER") + host_pass = os.environ.get("HOST_SSH_PASSWORD") + if host_user and host_pass: + cls.hostConfig = {"username": host_user, "password": host_pass} + + if not cls._encryption_capable_host_exists(): + cls.skip_reason = "No KVM host with volume-encryption support found" + return + + # Ensure snapshots are backed up to secondary storage (the path under test). + Configurations.update(cls.apiClient, name="lin.backup.snapshots", value="true") + + primarystorage = cls.testdata["primarystorage"] + # Registering the pool makes the management server call the Linstor controller (to read the + # resource-group capacity). If the controller enforces authentication, that call needs an API + # token, supplied as the 'lin.auth.apitoken' add-pool detail. Provide it via LINSTOR_API_TOKEN + # so it is never hard-coded; leave it unset for an unauthenticated controller. + api_token = os.environ.get("LINSTOR_API_TOKEN") + if api_token: + primarystorage["details"]["lin.auth.apitoken"] = api_token + + try: + cls.primary_storage = StoragePool.create( + cls.apiClient, + primarystorage, + scope=primarystorage[TestData.scope], + zoneid=cls.zone.id, + provider=primarystorage[TestData.provider], + tags=primarystorage[TestData.tags], + hypervisor=primarystorage[TestData.hypervisor] + ) + except Exception as e: + cls.skip_reason = ( + "Could not register the Linstor primary storage pool (%s). If the Linstor controller " + "requires authentication, set the LINSTOR_API_TOKEN env var to a valid controller API " + "token before running these tests." % e) + return + + # Compute offering with encrypted root, pinned to the Linstor pool via the storage tag. + cls.compute_offering_encrypted = ServiceOffering.create( + cls.apiClient, + cls.testdata[TestData.computeOffering], + encryptroot=True + ) + + cls.account = Account.create(cls.apiClient, cls.testdata[TestData.account], admin=1) + cls.user = User.create( + cls.apiClient, cls.testdata[TestData.user], + account=cls.account.name, domainid=cls.domain.id) + + cls._cleanup = [ + cls.compute_offering_encrypted, + cls.user, + cls.account, + ] + + @classmethod + def tearDownClass(cls): + try: + cleanup_resources(cls.apiClient, cls._cleanup) + if getattr(cls, "primary_storage", None) is not None: + cls.primary_storage.delete(cls.apiClient) + except Exception as e: + logging.debug("Exception in tearDownClass: %s" % e) + + def setUp(self): + if self.skip_reason: + self.skipTest(self.skip_reason) + self.cleanup = [] + + def tearDown(self): + cleanup_resources(self.apiClient, self.cleanup) + + # --------------------------------------------------------------------- # + # Tests + # --------------------------------------------------------------------- # + + @attr(tags=['basic'], required_hardware=True) + def test_01_revert_encrypted_root_snapshot(self): + """Snapshot an encrypted root volume, change it, revert, and verify the data and boot.""" + vm = self._deploy_encrypted_vm("TestEncVM-revert") + + # 1. write a marker into the encrypted root volume + self._write_marker(vm, "linstor-encrypted-v1") + + # 2. snapshot the (stopped) root volume -> encrypted qcow2 on secondary + vm.stop(self.apiClient) + snapshot = self._snapshot_root_volume(vm) + + # 3. change the data so a successful revert is detectable + self._start_vm(vm) + self._write_marker(vm, "linstor-encrypted-v2-CHANGED") + + # 4. revert the volume to the snapshot (requires the VM stopped) + vm.stop(self.apiClient) + Volume.revertToSnapshot(self.apiClient, snapshot.id) + + # 5. the VM must boot again and the original data must be back + self._start_vm(vm) + restored = self._read_marker(vm) + self.assertEqual( + "linstor-encrypted-v1", restored, + "Reverted encrypted root volume has wrong content (got %r) - decryption/round-trip broken" % restored + ) + + @attr(tags=['basic'], required_hardware=True) + def test_02_create_volume_from_encrypted_snapshot_is_rejected(self): + """Creating a new volume from an encrypted volume's snapshot must be rejected by CloudStack. + + CloudStack core (VolumeApiServiceImpl) unconditionally blocks this for any encrypted source + volume ("Cannot create new volumes from encrypted volume snapshots"), so the request must never + reach the storage layer. This is a guard test: if the limitation is ever lifted, decryption + support for the create-from-snapshot path (KVMStorageProcessor / LinstorStorageAdaptor) must be + added and this test updated accordingly. + """ + vm = self._deploy_encrypted_vm("TestEncVM-create") + + self._write_marker(vm, "linstor-encrypted-create-src") + vm.stop(self.apiClient) + snapshot = self._snapshot_root_volume(vm) + + cmd = createVolume.createVolumeCmd() + cmd.name = "%s-%d" % (self.testdata[TestData.diskName], random.randint(0, 100000)) + cmd.zoneid = self.zone.id + cmd.account = self.account.name + cmd.domainid = self.domain.id + cmd.snapshotid = snapshot.id + + try: + self.apiClient.createVolume(cmd) + self.fail("Creating a volume from an encrypted volume snapshot should have been rejected") + except CloudstackAPIException as e: + self.assertIn( + "encrypted volume snapshots", str(e), + "Unexpected error creating volume from encrypted snapshot: %s" % e + ) + + @attr(tags=['basic'], required_hardware=True) + def test_03_backed_up_snapshot_qcow2_is_encrypted(self): + """The qcow2 written to secondary storage for an encrypted volume must itself be LUKS encrypted.""" + if not self.hostConfig: + self.skipTest("No host SSH credentials available (set HOST_SSH_USER/HOST_SSH_PASSWORD or " + "provide them in the marvin config) - cannot inspect the secondary-storage qcow2") + vm = self._deploy_encrypted_vm("TestEncVM-atrest") + self._write_marker(vm, "linstor-encrypted-atrest") + vm.stop(self.apiClient) + snapshot = self._snapshot_root_volume(vm) + + info = self._qemu_img_info_of_backed_up_snapshot(snapshot) + if info is None: + self.skipTest("Could not locate the backed-up snapshot on secondary storage to inspect it") + + encrypted = bool(info.get("encrypted")) or "encrypt" in json.dumps(info.get("format-specific", {})) + self.assertTrue( + encrypted, + "Backed-up snapshot qcow2 is NOT encrypted at rest: %s" % json.dumps(info) + ) + + # --------------------------------------------------------------------- # + # Helpers + # --------------------------------------------------------------------- # + + def _deploy_encrypted_vm(self, name): + vm = VirtualMachine.create( + self.apiClient, + {"name": name, "displayname": name}, + accountid=self.account.name, + zoneid=self.zone.id, + serviceofferingid=self.compute_offering_encrypted.id, + templateid=self.template.id, + domainid=self.domain.id, + startvm=False, + mode='basic', + ) + self.cleanup.insert(0, vm) + self._start_vm(vm) + return vm + + def _snapshot_root_volume(self, vm): + root = list_volumes(self.apiClient, virtualmachineid=vm.id, type="ROOT", listall=True)[0] + snapshot = Snapshot.create( + self.apiClient, + volume_id=root.id, + account=self.account.name, + domainid=self.domain.id, + ) + self.assertIsNotNone(snapshot, "Could not create snapshot of encrypted root volume") + self.cleanup.insert(0, snapshot) + return snapshot + + def _vm_ssh(self, vm): + # The VM is deployed stopped, so its instance has no ssh_ip yet; the IP may also change across + # stop/start cycles. Always pass the current address from a fresh lookup. + ipaddress = self._get_vm(vm.id).ipaddress + return vm.get_ssh_client(ipaddress=ipaddress, reconnect=True, retries=5) + + def _write_marker(self, vm, content): + ssh = self._vm_ssh(vm) + ssh.execute("echo '%s' > %s" % (content, MARKER_PATH)) + ssh.execute("sync") + + def _read_marker(self, vm): + ssh = self._vm_ssh(vm) + result = ssh.execute("cat %s" % MARKER_PATH) + return result[0].strip() if result else None + + @classmethod + def _encryption_capable_host_exists(cls): + hosts = Host.list(cls.apiClient, zoneid=cls.zone.id, type='Routing', hypervisor='KVM', state='Up') + return any(getattr(h, "encryptionsupported", False) for h in (hosts or [])) + + @classmethod + def _get_vm(cls, vm_id): + return list_virtual_machines(cls.apiClient, id=vm_id)[0] + + @classmethod + def _start_vm(cls, vm): + vm_for_check = cls._get_vm(vm.id) + if vm_for_check.state == VirtualMachine.STOPPED: + vm.start(cls.apiClient) + vm_for_check = cls._get_vm(vm.id) + ServiceReady.wait_ssh_ready(vm_for_check.ipaddress) + return vm_for_check + + def _host_ssh(self): + host = list_hosts(self.apiClient, type='Routing', hypervisor='KVM', state='Up')[0] + return SshClient( + host=host.ipaddress, port=22, + user=self.hostConfig['username'], passwd=self.hostConfig['password']) + + def _qemu_img_info_of_backed_up_snapshot(self, snapshot): + """Self-mount the secondary NFS export on a host and run 'qemu-img info' on the snapshot file.""" + # The backed-up snapshot's physical path on secondary storage isn't exposed via the API, so we + # read it from the DB. The DB may be unreachable from where the tests run (e.g. MariaDB bound to + # localhost on the management server); in that case return None so the test skips. + try: + rows = self.dbConnection.execute( + "SELECT ss.install_path " + "FROM snapshot_store_ref ss JOIN snapshots s ON s.id = ss.snapshot_id " + "WHERE s.uuid = '%s' AND ss.store_role = 'Image'" % snapshot.id) + store = self.dbConnection.execute( + "SELECT url FROM image_store WHERE role = 'Image' AND removed IS NULL LIMIT 1") + except Exception as e: + logging.debug("DB lookup for snapshot install path failed: %s" % e) + return None + + if not rows or not rows[0][0] or not store or not store[0][0]: + return None + install_path = rows[0][0] + url = store[0][0] # e.g. nfs:/// + if not url.startswith("nfs://"): + return None + server, export = url[len("nfs://"):].split("/", 1) + + ssh = self._host_ssh() + mount_point = "/tmp/cs_sectest_%d" % random.randint(0, 100000) + try: + ssh.execute("mkdir -p %s" % mount_point) + ssh.execute("mount -t nfs -o ro %s:/%s %s" % (server, export, mount_point)) + out = ssh.execute("qemu-img info --output=json %s/%s" % (mount_point, install_path)) + return json.loads("".join(out)) if out else None + except Exception as e: + logging.debug("qemu-img info on secondary failed: %s" % e) + return None + finally: + ssh.execute("umount %s 2>/dev/null; rmdir %s 2>/dev/null" % (mount_point, mount_point))