From 7c95d6b24490dbcc2651fe14cda7e53cb9f34c6c Mon Sep 17 00:00:00 2001 From: "zhijian.liu" Date: Tue, 7 Apr 2026 13:59:24 +0800 Subject: [PATCH] [crypto]: Fix the issue where VMs fail to boot after deleting NKP and reimporting them. --- .../TpmEncryptedResourceKeyBackend.java | 2 + conf/springConfigXml/Kvm.xml | 2 + ...mTpmEncryptedResourceKeyRefJdbcRepair.java | 39 ++++++ .../org/zstack/kvm/tpm/KvmTpmExtensions.java | 117 +++++++++++++++++- 4 files changed, 155 insertions(+), 5 deletions(-) create mode 100644 plugin/kvm/src/main/java/org/zstack/kvm/tpm/KvmTpmEncryptedResourceKeyRefJdbcRepair.java diff --git a/compute/src/main/java/org/zstack/compute/vm/devices/TpmEncryptedResourceKeyBackend.java b/compute/src/main/java/org/zstack/compute/vm/devices/TpmEncryptedResourceKeyBackend.java index 2992af5ec7a..368935fe6f1 100644 --- a/compute/src/main/java/org/zstack/compute/vm/devices/TpmEncryptedResourceKeyBackend.java +++ b/compute/src/main/java/org/zstack/compute/vm/devices/TpmEncryptedResourceKeyBackend.java @@ -5,6 +5,8 @@ /** * Responsible for handling the replication or reset of encryption resource keys * and other tasks in VM TPM cloning scenarios. + * Avoid inserting a second EncryptedResourceKeyRef row when wrapped material already exists + * ({@link #attachKeyProviderToTpm}); see KVM TPM extensions for orphan-ref cleanup on VM paths. */ public interface TpmEncryptedResourceKeyBackend { diff --git a/conf/springConfigXml/Kvm.xml b/conf/springConfigXml/Kvm.xml index 13625bb3ae2..e7056bb4210 100755 --- a/conf/springConfigXml/Kvm.xml +++ b/conf/springConfigXml/Kvm.xml @@ -252,6 +252,8 @@ + + diff --git a/plugin/kvm/src/main/java/org/zstack/kvm/tpm/KvmTpmEncryptedResourceKeyRefJdbcRepair.java b/plugin/kvm/src/main/java/org/zstack/kvm/tpm/KvmTpmEncryptedResourceKeyRefJdbcRepair.java new file mode 100644 index 00000000000..99e2e356af0 --- /dev/null +++ b/plugin/kvm/src/main/java/org/zstack/kvm/tpm/KvmTpmEncryptedResourceKeyRefJdbcRepair.java @@ -0,0 +1,39 @@ +package org.zstack.kvm.tpm; + +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.transaction.annotation.Propagation; +import org.springframework.transaction.annotation.Transactional; +import org.zstack.core.db.DatabaseFacade; +import org.zstack.header.tpm.entity.TpmVO; + +import javax.persistence.Query; + +/** + * Per-TPM EncryptedResourceKeyRef native repair in a separate bean so {@code REQUIRES_NEW} applies without self-proxy. + */ +public class KvmTpmEncryptedResourceKeyRefJdbcRepair { + + @Autowired + private DatabaseFacade databaseFacade; + + @Transactional(propagation = Propagation.REQUIRES_NEW) + public int deleteOrphanPlaceholderTpmKeyRefRows(String tpmUuid) { + Query q = databaseFacade.getEntityManager().createNativeQuery( + "DELETE FROM EncryptedResourceKeyRefVO WHERE resourceType = :rt AND resourceUuid = :tu " + + "AND providerUuid IS NOT NULL AND kekRef IS NULL"); + q.setParameter("rt", TpmVO.class.getSimpleName()); + q.setParameter("tu", tpmUuid); + return q.executeUpdate(); + } + + @Transactional(propagation = Propagation.REQUIRES_NEW) + public int applyProviderUuidOnRowWithKek(String tpmUuid, String providerUuid) { + Query q = databaseFacade.getEntityManager().createNativeQuery( + "UPDATE EncryptedResourceKeyRefVO SET providerUuid = :pu, lastOpDate = CURRENT_TIMESTAMP(3) " + + "WHERE resourceType = :rt AND resourceUuid = :tu AND providerUuid IS NULL AND kekRef IS NOT NULL"); + q.setParameter("pu", providerUuid); + q.setParameter("rt", TpmVO.class.getSimpleName()); + q.setParameter("tu", tpmUuid); + return q.executeUpdate(); + } +} diff --git a/plugin/kvm/src/main/java/org/zstack/kvm/tpm/KvmTpmExtensions.java b/plugin/kvm/src/main/java/org/zstack/kvm/tpm/KvmTpmExtensions.java index 5d98121f66a..66d6c3e447a 100644 --- a/plugin/kvm/src/main/java/org/zstack/kvm/tpm/KvmTpmExtensions.java +++ b/plugin/kvm/src/main/java/org/zstack/kvm/tpm/KvmTpmExtensions.java @@ -63,6 +63,8 @@ public class KvmTpmExtensions implements KVMStartVmExtensionPoint, private EncryptedResourceKeyManager resourceKeyManager; @Autowired private CloudBus bus; + @Autowired + private KvmTpmEncryptedResourceKeyRefJdbcRepair tpmKeyRefJdbcRepair; private final Object hostFileLock = new Object(); @@ -73,9 +75,14 @@ public void beforeStartVmOnKvm(KVMHostInventory host, VmInstanceSpec spec, KVMAg return; } + String tpmUuid = devicesSpec.getTpm().getTpmUuid(); + repairOrphanTpmKeyRefPlaceholders(tpmUuid); String keyProviderUuid = devicesSpec.getTpm().getKeyProviderUuid(); if (StringUtils.isBlank(keyProviderUuid)) { - keyProviderUuid = resourceKeyBackend.findKeyProviderUuidByTpm(devicesSpec.getTpm().getTpmUuid()); + keyProviderUuid = safeFindKeyProviderUuidByTpm(tpmUuid); + } + if (StringUtils.isBlank(keyProviderUuid)) { + keyProviderUuid = tryRebindKeyProviderByName(tpmUuid); } TpmTO tpm = new TpmTO(); @@ -133,14 +140,33 @@ public void preInstantiateVmResource(VmInstanceSpec spec, Completion completion) completion.success(); return; } - TpmSpec tpmSpec = devicesSpec.getTpm(); clearRollbackInfo(spec); final PrepareTpmResourceContext context = new PrepareTpmResourceContext(); context.tpmUuid = tpmSpec.getTpmUuid(); + repairOrphanTpmKeyRefPlaceholders(context.tpmUuid); context.backupFileUuid = tpmSpec.getBackupFileUuid(); // maybe null - context.providerUuid = resourceKeyBackend.findKeyProviderUuidByTpm(context.tpmUuid); - context.providerName = resourceKeyBackend.findKeyProviderNameByTpm(context.tpmUuid); + context.providerUuid = safeFindKeyProviderUuidByTpm(context.tpmUuid); + context.providerName = safeFindKeyProviderNameByTpm(context.tpmUuid); + + if (StringUtils.isBlank(context.providerUuid) && StringUtils.isNotBlank(tpmSpec.getKeyProviderUuid())) { + context.providerUuid = tpmSpec.getKeyProviderUuid(); + resourceKeyBackend.attachKeyProviderToTpm(context.tpmUuid, context.providerUuid); + context.providerName = safeFindKeyProviderNameByTpm(context.tpmUuid); + logger.info(String.format("auto repaired TPM key provider binding for tpm[uuid:%s], providerUuid:%s", + context.tpmUuid, context.providerUuid)); + } + + if (StringUtils.isBlank(context.providerUuid)) { + String reboundUuid = tryRebindKeyProviderByName(context.tpmUuid); + if (StringUtils.isNotBlank(reboundUuid)) { + context.providerUuid = reboundUuid; + context.providerName = safeFindKeyProviderNameByTpm(context.tpmUuid); + logger.info(String.format( + "rebound TPM to key provider by providerName after ref.providerUuid was cleared, tpm[uuid:%s], providerUuid:%s", + context.tpmUuid, context.providerUuid)); + } + } final SimpleFlowChain chain = new SimpleFlowChain(); chain.setName("prepare-tpm-resources-for-vm-" + spec.getVmInventory().getUuid()); @@ -182,7 +208,7 @@ public boolean skip(Map data) { @Override public void run(FlowTrigger trigger, Map data) { - if (StringUtils.isBlank(context.providerUuid) || StringUtils.isBlank(context.providerName)) { + if (StringUtils.isBlank(context.providerUuid)) { trigger.fail(operr("missing TPM resource key binding for tpm[uuid:%s], attachKeyProviderToTpm must run before create-dek", context.tpmUuid)); return; } @@ -323,4 +349,85 @@ private void clearRollbackInfo(VmInstanceSpec spec) { spec.getDevicesSpec().getTpm().setResourceKeyCreatedNew(false); spec.getDevicesSpec().getTpm().setResourceKeyProviderUuid(null); } + + /** + * When the key provider row was deleted, ref {@code providerUuid} may be nulled but {@code providerName} remains. + * Resolve uuid by name (unique per NKP) and update the existing row that still has {@code kekRef} — do not call + * {@code attachKeyProviderToTpm}, which can insert a second placeholder row (providerUuid set, {@code kekRef} null) + * and break unwrap / swtpm. + */ + private String tryRebindKeyProviderByName(String tpmUuid) { + String name = safeFindKeyProviderNameByTpm(tpmUuid); + if (StringUtils.isBlank(name)) { + return null; + } + String uuid = safeFindKeyProviderUuidByName(name, tpmUuid); + if (StringUtils.isBlank(uuid)) { + return null; + } + int updated = tpmKeyRefJdbcRepair.applyProviderUuidOnRowWithKek(tpmUuid, uuid); + if (updated > 0) { + logger.info(String.format( + "updated EncryptedResourceKeyRef.providerUuid in-place for tpm[uuid:%s], rows:%d", tpmUuid, updated)); + } + return uuid; + } + + private void repairOrphanTpmKeyRefPlaceholders(String tpmUuid) { + int deleted = tpmKeyRefJdbcRepair.deleteOrphanPlaceholderTpmKeyRefRows(tpmUuid); + if (deleted > 0) { + logger.info(String.format( + "removed %d EncryptedResourceKeyRef placeholder row(s) (providerUuid set, kekRef null) for tpm[uuid:%s]", + deleted, tpmUuid)); + } + } + + private String safeFindKeyProviderUuidByTpm(String tpmUuid) { + try { + return resourceKeyBackend.findKeyProviderUuidByTpm(tpmUuid); + } catch (RuntimeException e) { + if (isNonUniqueResultException(e)) { + logger.warn(String.format( + "multiple EncryptedResourceKeyRef rows for tpm[uuid:%s], fix duplicate refs in DB", tpmUuid), e); + return null; + } + throw e; + } + } + + private String safeFindKeyProviderNameByTpm(String tpmUuid) { + try { + return resourceKeyBackend.findKeyProviderNameByTpm(tpmUuid); + } catch (RuntimeException e) { + if (isNonUniqueResultException(e)) { + logger.warn(String.format( + "multiple EncryptedResourceKeyRef rows for tpm[uuid:%s], fix duplicate refs in DB", tpmUuid), e); + return null; + } + throw e; + } + } + + private String safeFindKeyProviderUuidByName(String providerName, String tpmUuid) { + try { + return resourceKeyBackend.findKeyProviderUuidByName(providerName); + } catch (RuntimeException e) { + if (isNonUniqueResultException(e)) { + logger.warn(String.format( + "multiple KeyProvider rows for name[%s] (providerName should be unique per NKP); tpm[uuid:%s]", + providerName, tpmUuid), e); + return null; + } + throw e; + } + } + + private static boolean isNonUniqueResultException(Throwable e) { + for (Throwable t = e; t != null; t = t.getCause()) { + if (t.getClass().getName().endsWith("NonUniqueResultException")) { + return true; + } + } + return false; + } }