diff --git a/plugins/storage/volume/linstor/CHANGELOG.md b/plugins/storage/volume/linstor/CHANGELOG.md index 1a3142e8c59b..89b09b74a6d7 100644 --- a/plugins/storage/volume/linstor/CHANGELOG.md +++ b/plugins/storage/volume/linstor/CHANGELOG.md @@ -24,6 +24,14 @@ All notable changes to Linstor CloudStack plugin will be documented in this file The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [2026-06-24] + +### Fixed + +- Restore of encrypted volume snapshots: snapshots of encrypted volumes are now + stored as LUKS-encrypted qcow2 files and decrypted on revert (previously the + restored data was corrupted and the root device unbootable). + ## [2026-01-17] ### Added diff --git a/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LinstorBackupSnapshotCommandWrapper.java b/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LinstorBackupSnapshotCommandWrapper.java index fab4829da551..c111d320cb4e 100644 --- a/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LinstorBackupSnapshotCommandWrapper.java +++ b/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LinstorBackupSnapshotCommandWrapper.java @@ -18,6 +18,10 @@ import java.io.File; import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import com.cloud.agent.api.to.DataStoreTO; import com.cloud.agent.api.to.NfsTO; @@ -31,9 +35,11 @@ import com.cloud.utils.script.Script; import org.apache.cloudstack.storage.command.CopyCmdAnswer; import org.apache.cloudstack.storage.to.SnapshotObjectTO; +import org.apache.cloudstack.utils.cryptsetup.KeyFile; import org.apache.cloudstack.utils.qemu.QemuImg; import org.apache.cloudstack.utils.qemu.QemuImgException; import org.apache.cloudstack.utils.qemu.QemuImgFile; +import org.apache.cloudstack.utils.qemu.QemuObject; import org.apache.commons.io.FileUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -83,6 +89,7 @@ private String convertImageToQCow2( final String srcPath, final SnapshotObjectTO dst, final KVMStoragePool secondaryPool, + final byte[] passphrase, int waitMilliSeconds ) throws LibvirtException, QemuImgException, IOException @@ -94,9 +101,22 @@ private String convertImageToQCow2( final QemuImgFile srcFile = new QemuImgFile(srcPath, QemuImg.PhysicalDiskFormat.RAW); final QemuImgFile dstFile = new QemuImgFile(dstPath, QemuImg.PhysicalDiskFormat.QCOW2); - // NOTE: the qemu img will also contain the drbd metadata at the end final QemuImg qemu = new QemuImg(waitMilliSeconds); - qemu.convert(srcFile, dstFile); + if (passphrase != null && passphrase.length > 0) { + // Encrypted volumes are backed up from their decrypted DRBD device, so the snapshot + // data here is plaintext. Encrypt the destination qcow2 with the volume's passphrase + // (LUKS), so the snapshot is not stored in clear text on secondary storage. + try (KeyFile keyFile = new KeyFile(passphrase)) { + final Map options = new HashMap<>(); + final List qemuObjects = new ArrayList<>(); + qemuObjects.add(QemuObject.prepareSecretForQemuImg(QemuImg.PhysicalDiskFormat.QCOW2, + QemuObject.EncryptFormat.LUKS, keyFile.toString(), "sec0", options)); + qemu.convert(srcFile, dstFile, options, qemuObjects, null, true); + } + } else { + // NOTE: the qemu img will also contain the drbd metadata at the end + qemu.convert(srcFile, dstFile); + } LOGGER.info("Backup snapshot '{}' to '{}'", srcPath, dstPath); return dstPath; } @@ -153,14 +173,21 @@ public CopyCmdAnswer execute(LinstorBackupSnapshotCommand cmd, LibvirtComputingR secondaryPool = storagePoolMgr.getStoragePoolByURI(dstDataStore.getUrl()); - String dstPath = convertImageToQCow2(srcPath, dst, secondaryPool, cmd.getWaitInMillSeconds()); + final byte[] passphrase = src.getVolume() != null ? src.getVolume().getPassphrase() : null; + final boolean encrypted = passphrase != null && passphrase.length > 0; - // resize to real volume size, cutting of drbd metadata - String result = qemuShrink(dstPath, src.getVolume().getSize(), cmd.getWaitInMillSeconds()); - if (result != null) { - return new CopyCmdAnswer("qemu-img shrink failed: " + result); + String dstPath = convertImageToQCow2(srcPath, dst, secondaryPool, passphrase, cmd.getWaitInMillSeconds()); + + if (!encrypted) { + // resize to real volume size, cutting of drbd metadata + // For encrypted volumes the source is the decrypted DRBD device (already net-sized, + // no drbd metadata to cut); shrinking an encrypted qcow2 would also need the secret. + String result = qemuShrink(dstPath, src.getVolume().getSize(), cmd.getWaitInMillSeconds()); + if (result != null) { + return new CopyCmdAnswer("qemu-img shrink failed: " + result); + } + LOGGER.info("Backup shrunk " + dstPath + " to actual size " + src.getVolume().getSize()); } - LOGGER.info("Backup shrunk " + dstPath + " to actual size " + src.getVolume().getSize()); SnapshotObjectTO snapshot = setCorrectSnapshotSize(dst, dstPath); LOGGER.info("Actual file size for '{}' is {}", dstPath, snapshot.getPhysicalSize()); @@ -171,6 +198,9 @@ public CopyCmdAnswer execute(LinstorBackupSnapshotCommand cmd, LibvirtComputingR LOGGER.error(error); return new CopyCmdAnswer(cmd, e); } finally { + if (src.getVolume() != null) { + src.getVolume().clearPassphrase(); + } cleanupSecondaryPool(secondaryPool); if (zfsHidden) { zfsSnapdev(true, src.getPath()); diff --git a/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LinstorRevertBackupSnapshotCommandWrapper.java b/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LinstorRevertBackupSnapshotCommandWrapper.java index 2d6df5f2296a..51d0ed88e340 100644 --- a/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LinstorRevertBackupSnapshotCommandWrapper.java +++ b/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LinstorRevertBackupSnapshotCommandWrapper.java @@ -17,6 +17,7 @@ package com.cloud.hypervisor.kvm.resource.wrapper; import java.io.File; +import java.util.Collections; import com.cloud.agent.api.to.DataStoreTO; import com.cloud.api.storage.LinstorRevertBackupSnapshotCommand; @@ -31,9 +32,12 @@ import org.apache.cloudstack.storage.datastore.util.LinstorUtil; import org.apache.cloudstack.storage.to.SnapshotObjectTO; import org.apache.cloudstack.storage.to.VolumeObjectTO; +import org.apache.cloudstack.utils.cryptsetup.KeyFile; +import org.apache.cloudstack.utils.qemu.QemuImageOptions; import org.apache.cloudstack.utils.qemu.QemuImg; import org.apache.cloudstack.utils.qemu.QemuImgException; import org.apache.cloudstack.utils.qemu.QemuImgFile; +import org.apache.cloudstack.utils.qemu.QemuObject; import org.joda.time.Duration; import org.libvirt.LibvirtException; @@ -43,8 +47,9 @@ public final class LinstorRevertBackupSnapshotCommandWrapper { private void convertQCow2ToRAW( - KVMStoragePool pool, final String srcPath, final String dstUuid, int waitMilliSeconds) - throws LibvirtException, QemuImgException + KVMStoragePool pool, final String srcPath, final String dstUuid, final byte[] passphrase, + int waitMilliSeconds) + throws LibvirtException, QemuImgException, java.io.IOException { final String dstPath = pool.getPhysicalDisk(dstUuid).getPath(); final QemuImgFile srcQemuFile = new QemuImgFile( @@ -60,7 +65,20 @@ private void convertQCow2ToRAW( } final QemuImg qemu = new QemuImg(waitMilliSeconds, zeroedDevice, true); final QemuImgFile dstFile = new QemuImgFile(dstPath, QemuImg.PhysicalDiskFormat.RAW); - qemu.convert(srcQemuFile, dstFile); + if (passphrase != null && passphrase.length > 0) { + // The backed-up qcow2 is LUKS-encrypted with the volume's passphrase. Decrypt it while + // writing plaintext to the (decrypted) DRBD device; the Linstor LUKS layer re-encrypts it, + // so no qemu encryption must be applied to the destination. + try (KeyFile keyFile = new KeyFile(passphrase)) { + final QemuObject srcSecret = QemuObject.prepareSecretForQemuImg( + QemuImg.PhysicalDiskFormat.QCOW2, QemuObject.EncryptFormat.LUKS, keyFile.toString(), "sec0", null); + final QemuImageOptions srcImageOpts = new QemuImageOptions( + QemuImg.PhysicalDiskFormat.QCOW2, srcPath, "sec0"); + qemu.convert(srcQemuFile, dstFile, null, Collections.singletonList(srcSecret), srcImageOpts, null, false); + } + } else { + qemu.convert(srcQemuFile, dstFile); + } } @Override @@ -84,10 +102,13 @@ public CopyCmdAnswer execute(LinstorRevertBackupSnapshotCommand cmd, LibvirtComp secondaryPool = storagePoolMgr.getStoragePoolByURI( srcDataStore.getUrl() + File.separator + srcFile.getParent()); + // The destination volume is the (same) original volume, whose passphrase the backed-up + // qcow2 was encrypted with; use it to decrypt while restoring. convertQCow2ToRAW( linstorPool, secondaryPool.getLocalPath() + File.separator + srcFile.getName(), dst.getPath(), + dst.getPassphrase(), cmd.getWaitInMillSeconds()); final VolumeObjectTO dstVolume = new VolumeObjectTO(); @@ -99,6 +120,7 @@ public CopyCmdAnswer execute(LinstorRevertBackupSnapshotCommand cmd, LibvirtComp logger.error(error); return new CopyCmdAnswer(cmd, e); } finally { + dst.clearPassphrase(); LinstorBackupSnapshotCommandWrapper.cleanupSecondaryPool(secondaryPool); } } diff --git a/plugins/storage/volume/linstor/src/main/java/org/apache/cloudstack/storage/datastore/driver/LinstorPrimaryDataStoreDriverImpl.java b/plugins/storage/volume/linstor/src/main/java/org/apache/cloudstack/storage/datastore/driver/LinstorPrimaryDataStoreDriverImpl.java index 3f06bee8ac83..7638e47a49e8 100644 --- a/plugins/storage/volume/linstor/src/main/java/org/apache/cloudstack/storage/datastore/driver/LinstorPrimaryDataStoreDriverImpl.java +++ b/plugins/storage/volume/linstor/src/main/java/org/apache/cloudstack/storage/datastore/driver/LinstorPrimaryDataStoreDriverImpl.java @@ -1088,12 +1088,22 @@ protected Answer copySnapshot(DataObject srcData, DataObject destData) { VirtualMachineManager.ExecuteInSequence.value()); cmd.setOptions(options); - Optional optEP = getDiskfullEP(api, pool, rscName); + // For encrypted volumes Linstor adds a LUKS layer (DRBD -> LUKS -> STORAGE). The storage + // layer snapshot device (getSnapshotPath) therefore only exposes the raw LUKS ciphertext, + // while restore writes onto the decrypted DRBD device (/dev/drbd/by-res/.../0). Backing up + // the ciphertext and writing it back to the decrypted layer corrupts the volume (and the + // shrink to the net volume size would even truncate the ciphertext). So for encrypted + // volumes we never read the storage snapshot directly: restore the snapshot into a temporary + // resource and back up its decrypted DRBD device instead, symmetric to the restore path. + final boolean encrypted = snapshotObject.getBaseVolume().getPassphraseId() != null; + Optional optEP = encrypted ? + Optional.empty() : getDiskfullEP(api, pool, rscName); Answer answer; if (optEP.isPresent()) { answer = optEP.get().sendMessage(cmd); } else { - logger.debug("No diskfull endpoint found to copy image, creating diskless endpoint"); + logger.debug("No diskfull endpoint used to copy image (encrypted={}), using temporary resource", + encrypted); answer = copyFromTemporaryResource(api, pool, rscName, snapshotName, snapshotObject, cmd); } return answer; diff --git a/test/integration/plugins/linstor/README.md b/test/integration/plugins/linstor/README.md index 4505d1b7d57c..4971c9506b5b 100644 --- a/test/integration/plugins/linstor/README.md +++ b/test/integration/plugins/linstor/README.md @@ -48,3 +48,21 @@ nosetests --with-marvin --marvin-config= /test/ ``` You can also run these tests out of the box with PyDev or PyCharm or whatever. + +## Encrypted snapshot tests + +`test_linstor_encrypted_snapshots.py` covers the encrypted-volume snapshot round trip +(create encrypted root disk -> snapshot -> revert / create-volume-from-snapshot) and that the +backed-up qcow2 on secondary storage is itself LUKS encrypted. + +Extra prerequisites: + +* At least one KVM host with volume-encryption support (`host.encryptionsupported == true`, i.e. + cryptsetup/qemu LUKS available). Tests self-skip if none is found. +* The Linstor resource group used (`acs-basic`) must be able to add a LUKS layer to its volumes. +* `lin.backup.snapshots` must be enabled (default) so snapshots are backed up to secondary storage; + the test sets it. With it disabled the qcow2 path is not exercised. + +``` +nosetests --with-marvin --marvin-config= /test/integration/plugins/linstor/test_linstor_encrypted_snapshots.py --zone= --hypervisor=kvm +``` diff --git a/test/integration/plugins/linstor/test_linstor_encrypted_snapshots.py b/test/integration/plugins/linstor/test_linstor_encrypted_snapshots.py new file mode 100644 index 000000000000..5f440309bb34 --- /dev/null +++ b/test/integration/plugins/linstor/test_linstor_encrypted_snapshots.py @@ -0,0 +1,444 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json +import logging +import os +import random +import socket +import time + +# All tests inherit from cloudstackTestCase +from marvin.cloudstackTestCase import cloudstackTestCase + +# Import Integration Libraries +from marvin.cloudstackAPI import createVolume +from marvin.cloudstackException import CloudstackAPIException +from marvin.lib.base import Account, Configurations, Host, ServiceOffering, \ + Snapshot, StoragePool, User, VirtualMachine, Volume +from marvin.lib.common import get_domain, get_template, get_zone, list_hosts, list_virtual_machines, list_volumes +from marvin.lib.utils import cleanup_resources +from marvin.sshClient import SshClient +from nose.plugins.attrib import attr + +# Prerequisites: +# Only one zone / pod / cluster +# Only KVM hypervisor (Linstor only supports KVM) +# At least one KVM host with volume-encryption support (host.encryptionsupported == True), +# i.e. cryptsetup/qemu with LUKS available on the host. +# One Linstor storage pool whose resource-group can add a LUKS layer (encrypted volumes). +# 'lin.backup.snapshots' enabled (default true) so snapshots are backed up to secondary storage +# as qcow2 -- that is the path these tests are meant to exercise. With it disabled, snapshots +# stay on primary as Linstor system snapshots and a different (rollback) code path is used. +# +# What this exercises (the encrypted-snapshot round trip): +# * backup: decrypted DRBD device -> LUKS-encrypted qcow2 on secondary +# * revert: encrypted qcow2 -> decrypted, written to the DRBD device (Linstor re-encrypts) +# * create: encrypted qcow2 -> new volume via createVolumeFromSnapshot (KVMStorageProcessor) +# +# Note on verification: Linstor encrypts inside the DRBD stack (LUKS layer), so the libvirt domain +# XML does NOT carry like hypervisor-based encryption does. Correctness +# is therefore verified by a data round trip (write marker -> snapshot -> change -> restore -> read), +# and encryption-at-rest is verified by inspecting the backed-up qcow2 with 'qemu-img info'. + +MARKER_PATH = "/root/cs_enc_marker.txt" + + +class TestData: + account = "account" + computeOffering = "computeoffering" + diskName = "diskname" + domainId = "domainId" + hypervisor = "hypervisor" + provider = "provider" + scope = "scope" + storageTag = "linstor" + tags = "tags" + user = "user" + virtualMachine = "virtualmachine" + zoneId = "zoneId" + + def __init__(self, linstor_controller_url): + self.testdata = { + TestData.account: { + "email": "test-enc@test.com", + "firstname": "John", + "lastname": "Doe", + "username": "test-enc", + "password": "test" + }, + TestData.user: { + "email": "user-enc@test.com", + "firstname": "Jane", + "lastname": "Doe", + "username": "test-enc-user", + "password": "password" + }, + "primarystorage": { + "name": "LinstorEncPool-%d" % random.randint(0, 100000), + TestData.scope: "ZONE", + "url": linstor_controller_url, + TestData.provider: "Linstor", + TestData.tags: TestData.storageTag, + TestData.hypervisor: "KVM", + "details": { + "resourceGroup": "acs-basic" + } + }, + TestData.virtualMachine: { + "name": "TestEncVM", + "displayname": "Test Encrypted VM" + }, + # encryptroot=True is passed as a create kwarg, not in this dict + TestData.computeOffering: { + "name": "Linstor_Compute_Encrypted", + "displaytext": "Linstor_Compute_Encrypted", + "cpunumber": 1, + "cpuspeed": 500, + "memory": 512, + "storagetype": "shared", + TestData.tags: TestData.storageTag + }, + TestData.diskName: "restored-from-enc-snap", + TestData.zoneId: 1, + TestData.domainId: 1, + } + + +class ServiceReady: + @classmethod + def ready(cls, hostname, port): + try: + s = socket.create_connection((hostname, port), timeout=1) + s.close() + return True + except (ConnectionRefusedError, socket.timeout, OSError): + return False + + @classmethod + def wait(cls, hostname, port, wait_interval=5, timeout=120, service_name='ssh'): + starttime = int(round(time.time() * 1000)) + while not cls.ready(hostname, port): + if starttime + timeout * 1000 < int(round(time.time() * 1000)): + raise RuntimeError("{s} {h} cannot be reached.".format(s=service_name, h=hostname)) + time.sleep(wait_interval) + return True + + @classmethod + def wait_ssh_ready(cls, hostname, wait_interval=2, timeout=120): + return cls.wait(hostname, 22, wait_interval, timeout, "ssh") + + +class TestLinstorEncryptedSnapshots(cloudstackTestCase): + + @classmethod + def setUpClass(cls): + testclient = super(TestLinstorEncryptedSnapshots, cls).getClsTestClient() + + cls.apiClient = testclient.getApiClient() + cls.dbConnection = testclient.getDbConnection() + + cls._cleanup = [] + cls.skip_reason = None + + # Linstor is KVM-only, so the hypervisor type is not probed via getHypervisorInfo() (which is + # only populated when nosetests is invoked with --hypervisor). Instead we require an actual KVM + # host that supports volume encryption below. + + # The first host runs the Linstor controller (per the Linstor test prerequisites). + first_host = list_hosts(cls.apiClient)[0] + cls.testdata = TestData(first_host.ipaddress).testdata + + cls.zone = get_zone(cls.apiClient, zone_id=cls.testdata[TestData.zoneId]) + cls.domain = get_domain(cls.apiClient, cls.testdata[TestData.domainId]) + cls.template = get_template(cls.apiClient, cls.zone.id, hypervisor="KVM") + + # Host SSH credentials, only needed by test_03 to inspect the backed-up qcow2 on secondary + # storage. A full marvin config carries these under zones->pods->clusters->hosts, but a + # lightweight config may omit them; in that case fall back to HOST_SSH_USER / HOST_SSH_PASSWORD + # env vars. Never fail class setup over this - the other tests don't need host SSH. + cls.hostConfig = None + try: + cls.hostConfig = cls.config.__dict__["zones"][0].__dict__["pods"][0].__dict__["clusters"][0] \ + .__dict__["hosts"][0].__dict__ + except (KeyError, IndexError, AttributeError, TypeError): + host_user = os.environ.get("HOST_SSH_USER") + host_pass = os.environ.get("HOST_SSH_PASSWORD") + if host_user and host_pass: + cls.hostConfig = {"username": host_user, "password": host_pass} + + if not cls._encryption_capable_host_exists(): + cls.skip_reason = "No KVM host with volume-encryption support found" + return + + # Ensure snapshots are backed up to secondary storage (the path under test). + Configurations.update(cls.apiClient, name="lin.backup.snapshots", value="true") + + primarystorage = cls.testdata["primarystorage"] + # Registering the pool makes the management server call the Linstor controller (to read the + # resource-group capacity). If the controller enforces authentication, that call needs an API + # token, supplied as the 'lin.auth.apitoken' add-pool detail. Provide it via LINSTOR_API_TOKEN + # so it is never hard-coded; leave it unset for an unauthenticated controller. + api_token = os.environ.get("LINSTOR_API_TOKEN") + if api_token: + primarystorage["details"]["lin.auth.apitoken"] = api_token + + try: + cls.primary_storage = StoragePool.create( + cls.apiClient, + primarystorage, + scope=primarystorage[TestData.scope], + zoneid=cls.zone.id, + provider=primarystorage[TestData.provider], + tags=primarystorage[TestData.tags], + hypervisor=primarystorage[TestData.hypervisor] + ) + except Exception as e: + cls.skip_reason = ( + "Could not register the Linstor primary storage pool (%s). If the Linstor controller " + "requires authentication, set the LINSTOR_API_TOKEN env var to a valid controller API " + "token before running these tests." % e) + return + + # Compute offering with encrypted root, pinned to the Linstor pool via the storage tag. + cls.compute_offering_encrypted = ServiceOffering.create( + cls.apiClient, + cls.testdata[TestData.computeOffering], + encryptroot=True + ) + + cls.account = Account.create(cls.apiClient, cls.testdata[TestData.account], admin=1) + cls.user = User.create( + cls.apiClient, cls.testdata[TestData.user], + account=cls.account.name, domainid=cls.domain.id) + + cls._cleanup = [ + cls.compute_offering_encrypted, + cls.user, + cls.account, + ] + + @classmethod + def tearDownClass(cls): + try: + cleanup_resources(cls.apiClient, cls._cleanup) + if getattr(cls, "primary_storage", None) is not None: + cls.primary_storage.delete(cls.apiClient) + except Exception as e: + logging.debug("Exception in tearDownClass: %s" % e) + + def setUp(self): + if self.skip_reason: + self.skipTest(self.skip_reason) + self.cleanup = [] + + def tearDown(self): + cleanup_resources(self.apiClient, self.cleanup) + + # --------------------------------------------------------------------- # + # Tests + # --------------------------------------------------------------------- # + + @attr(tags=['basic'], required_hardware=True) + def test_01_revert_encrypted_root_snapshot(self): + """Snapshot an encrypted root volume, change it, revert, and verify the data and boot.""" + vm = self._deploy_encrypted_vm("TestEncVM-revert") + + # 1. write a marker into the encrypted root volume + self._write_marker(vm, "linstor-encrypted-v1") + + # 2. snapshot the (stopped) root volume -> encrypted qcow2 on secondary + vm.stop(self.apiClient) + snapshot = self._snapshot_root_volume(vm) + + # 3. change the data so a successful revert is detectable + self._start_vm(vm) + self._write_marker(vm, "linstor-encrypted-v2-CHANGED") + + # 4. revert the volume to the snapshot (requires the VM stopped) + vm.stop(self.apiClient) + Volume.revertToSnapshot(self.apiClient, snapshot.id) + + # 5. the VM must boot again and the original data must be back + self._start_vm(vm) + restored = self._read_marker(vm) + self.assertEqual( + "linstor-encrypted-v1", restored, + "Reverted encrypted root volume has wrong content (got %r) - decryption/round-trip broken" % restored + ) + + @attr(tags=['basic'], required_hardware=True) + def test_02_create_volume_from_encrypted_snapshot_is_rejected(self): + """Creating a new volume from an encrypted volume's snapshot must be rejected by CloudStack. + + CloudStack core (VolumeApiServiceImpl) unconditionally blocks this for any encrypted source + volume ("Cannot create new volumes from encrypted volume snapshots"), so the request must never + reach the storage layer. This is a guard test: if the limitation is ever lifted, decryption + support for the create-from-snapshot path (KVMStorageProcessor / LinstorStorageAdaptor) must be + added and this test updated accordingly. + """ + vm = self._deploy_encrypted_vm("TestEncVM-create") + + self._write_marker(vm, "linstor-encrypted-create-src") + vm.stop(self.apiClient) + snapshot = self._snapshot_root_volume(vm) + + cmd = createVolume.createVolumeCmd() + cmd.name = "%s-%d" % (self.testdata[TestData.diskName], random.randint(0, 100000)) + cmd.zoneid = self.zone.id + cmd.account = self.account.name + cmd.domainid = self.domain.id + cmd.snapshotid = snapshot.id + + try: + self.apiClient.createVolume(cmd) + self.fail("Creating a volume from an encrypted volume snapshot should have been rejected") + except CloudstackAPIException as e: + self.assertIn( + "encrypted volume snapshots", str(e), + "Unexpected error creating volume from encrypted snapshot: %s" % e + ) + + @attr(tags=['basic'], required_hardware=True) + def test_03_backed_up_snapshot_qcow2_is_encrypted(self): + """The qcow2 written to secondary storage for an encrypted volume must itself be LUKS encrypted.""" + if not self.hostConfig: + self.skipTest("No host SSH credentials available (set HOST_SSH_USER/HOST_SSH_PASSWORD or " + "provide them in the marvin config) - cannot inspect the secondary-storage qcow2") + vm = self._deploy_encrypted_vm("TestEncVM-atrest") + self._write_marker(vm, "linstor-encrypted-atrest") + vm.stop(self.apiClient) + snapshot = self._snapshot_root_volume(vm) + + info = self._qemu_img_info_of_backed_up_snapshot(snapshot) + if info is None: + self.skipTest("Could not locate the backed-up snapshot on secondary storage to inspect it") + + encrypted = bool(info.get("encrypted")) or "encrypt" in json.dumps(info.get("format-specific", {})) + self.assertTrue( + encrypted, + "Backed-up snapshot qcow2 is NOT encrypted at rest: %s" % json.dumps(info) + ) + + # --------------------------------------------------------------------- # + # Helpers + # --------------------------------------------------------------------- # + + def _deploy_encrypted_vm(self, name): + vm = VirtualMachine.create( + self.apiClient, + {"name": name, "displayname": name}, + accountid=self.account.name, + zoneid=self.zone.id, + serviceofferingid=self.compute_offering_encrypted.id, + templateid=self.template.id, + domainid=self.domain.id, + startvm=False, + mode='basic', + ) + self.cleanup.insert(0, vm) + self._start_vm(vm) + return vm + + def _snapshot_root_volume(self, vm): + root = list_volumes(self.apiClient, virtualmachineid=vm.id, type="ROOT", listall=True)[0] + snapshot = Snapshot.create( + self.apiClient, + volume_id=root.id, + account=self.account.name, + domainid=self.domain.id, + ) + self.assertIsNotNone(snapshot, "Could not create snapshot of encrypted root volume") + self.cleanup.insert(0, snapshot) + return snapshot + + def _vm_ssh(self, vm): + # The VM is deployed stopped, so its instance has no ssh_ip yet; the IP may also change across + # stop/start cycles. Always pass the current address from a fresh lookup. + ipaddress = self._get_vm(vm.id).ipaddress + return vm.get_ssh_client(ipaddress=ipaddress, reconnect=True, retries=5) + + def _write_marker(self, vm, content): + ssh = self._vm_ssh(vm) + ssh.execute("echo '%s' > %s" % (content, MARKER_PATH)) + ssh.execute("sync") + + def _read_marker(self, vm): + ssh = self._vm_ssh(vm) + result = ssh.execute("cat %s" % MARKER_PATH) + return result[0].strip() if result else None + + @classmethod + def _encryption_capable_host_exists(cls): + hosts = Host.list(cls.apiClient, zoneid=cls.zone.id, type='Routing', hypervisor='KVM', state='Up') + return any(getattr(h, "encryptionsupported", False) for h in (hosts or [])) + + @classmethod + def _get_vm(cls, vm_id): + return list_virtual_machines(cls.apiClient, id=vm_id)[0] + + @classmethod + def _start_vm(cls, vm): + vm_for_check = cls._get_vm(vm.id) + if vm_for_check.state == VirtualMachine.STOPPED: + vm.start(cls.apiClient) + vm_for_check = cls._get_vm(vm.id) + ServiceReady.wait_ssh_ready(vm_for_check.ipaddress) + return vm_for_check + + def _host_ssh(self): + host = list_hosts(self.apiClient, type='Routing', hypervisor='KVM', state='Up')[0] + return SshClient( + host=host.ipaddress, port=22, + user=self.hostConfig['username'], passwd=self.hostConfig['password']) + + def _qemu_img_info_of_backed_up_snapshot(self, snapshot): + """Self-mount the secondary NFS export on a host and run 'qemu-img info' on the snapshot file.""" + # The backed-up snapshot's physical path on secondary storage isn't exposed via the API, so we + # read it from the DB. The DB may be unreachable from where the tests run (e.g. MariaDB bound to + # localhost on the management server); in that case return None so the test skips. + try: + rows = self.dbConnection.execute( + "SELECT ss.install_path " + "FROM snapshot_store_ref ss JOIN snapshots s ON s.id = ss.snapshot_id " + "WHERE s.uuid = '%s' AND ss.store_role = 'Image'" % snapshot.id) + store = self.dbConnection.execute( + "SELECT url FROM image_store WHERE role = 'Image' AND removed IS NULL LIMIT 1") + except Exception as e: + logging.debug("DB lookup for snapshot install path failed: %s" % e) + return None + + if not rows or not rows[0][0] or not store or not store[0][0]: + return None + install_path = rows[0][0] + url = store[0][0] # e.g. nfs:/// + if not url.startswith("nfs://"): + return None + server, export = url[len("nfs://"):].split("/", 1) + + ssh = self._host_ssh() + mount_point = "/tmp/cs_sectest_%d" % random.randint(0, 100000) + try: + ssh.execute("mkdir -p %s" % mount_point) + ssh.execute("mount -t nfs -o ro %s:/%s %s" % (server, export, mount_point)) + out = ssh.execute("qemu-img info --output=json %s/%s" % (mount_point, install_path)) + return json.loads("".join(out)) if out else None + except Exception as e: + logging.debug("qemu-img info on secondary failed: %s" % e) + return None + finally: + ssh.execute("umount %s 2>/dev/null; rmdir %s 2>/dev/null" % (mount_point, mount_point))