From 39830658137a85ee5f83d4a617a640a3a9d8f66e Mon Sep 17 00:00:00 2001 From: "tao.gan" Date: Thu, 14 May 2026 18:22:57 +0800 Subject: [PATCH 1/2] [storage]: symmetric snapshot group disband on chain delete ungroupAfterDeleted now mirrors ungroupAfterDeleteSingleSnapshot: regardless of root/data volume type, a snapshot group VO is only disbanded after ALL of its refs reach snapshotDeleted=true. Previously the root-volume path immediately removed the group VO once the root chain finished, leaving data-volume refs as orphans pointing to a non-existent group. After this change the group lives until every ref is marked deleted, which is the symmetric behavior with the single-snapshot path and the precondition the integrity check (landed in a separate commit) relies on. Resolves: ZSV-10538 Change-Id: I77707a78706271697463676a756c617a796f6c61 --- .../snapshot/VolumeSnapshotTreeBase.java | 37 ++++++++++++------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/storage/src/main/java/org/zstack/storage/snapshot/VolumeSnapshotTreeBase.java b/storage/src/main/java/org/zstack/storage/snapshot/VolumeSnapshotTreeBase.java index beb8b044d1a..8eed9233077 100755 --- a/storage/src/main/java/org/zstack/storage/snapshot/VolumeSnapshotTreeBase.java +++ b/storage/src/main/java/org/zstack/storage/snapshot/VolumeSnapshotTreeBase.java @@ -2142,26 +2142,37 @@ protected Boolean scripts() { return cleanup; } - // The logic for cleaning up snapshot groups when deleting a snapshot chain + // The logic for cleaning up snapshot groups when deleting a snapshot chain. + // Symmetric with ungroupAfterDeleteSingleSnapshot: regardless of root/data volume type, + // a group is only disbanded after ALL its refs have snapshotDeleted=true. + // This avoids leaving orphan refs (root chain delete used to immediately drop the group VO, + // leaving data-volume refs pointing to a non-existent group). private void ungroupAfterDeleted(List snapshots) { List uuids = snapshots.stream().map(VolumeSnapshotInventory::getUuid).collect(Collectors.toList()); SQL.New(VolumeSnapshotGroupRefVO.class).in(VolumeSnapshotGroupRefVO_.volumeSnapshotUuid, uuids) .set(VolumeSnapshotGroupRefVO_.snapshotDeleted, true).update(); - if (currentRoot.getVolumeType().equals(VolumeType.Root.toString())) { - List groupUuids = new ArrayList<>(); - for (VolumeSnapshotInventory snapshot : snapshots) { - String groupUuid = snapshot.getGroupUuid(); - if (groupUuid != null) { - logger.debug(String.format("root volume snapshot[uuid:%s, name:%s] has been deleted, " + - "ungroup snapshot group[uuid:%s]", snapshot.getUuid(), snapshot.getName(), groupUuid)); - groupUuids.add(groupUuid); - } + Set touchedGroupUuids = snapshots.stream() + .map(VolumeSnapshotInventory::getGroupUuid) + .filter(Objects::nonNull) + .collect(Collectors.toSet()); + + List groupsToDelete = new ArrayList<>(); + for (String groupUuid : touchedGroupUuids) { + long remaining = Q.New(VolumeSnapshotGroupRefVO.class) + .eq(VolumeSnapshotGroupRefVO_.volumeSnapshotGroupUuid, groupUuid) + .eq(VolumeSnapshotGroupRefVO_.snapshotDeleted, false).count(); + if (remaining == 0) { + logger.debug(String.format("snapshot group[uuid:%s] all volume snapshots have been deleted, " + + "disbanding group", groupUuid)); + groupsToDelete.add(groupUuid); } + } - groupUuids.forEach(groupUuid -> vidm.deleteArchiveVmInstanceResourceMetadataGroup(groupUuid)); - cleanVmHostBackupFilesForGroup(groupUuids); - dbf.removeByPrimaryKeys(groupUuids, VolumeSnapshotGroupVO.class); + if (!groupsToDelete.isEmpty()) { + groupsToDelete.forEach(groupUuid -> vidm.deleteArchiveVmInstanceResourceMetadataGroup(groupUuid)); + cleanVmHostBackupFilesForGroup(groupsToDelete); + dbf.removeByPrimaryKeys(groupsToDelete, VolumeSnapshotGroupVO.class); } } From 467e8000dc98ebbac4b60cc89cce31107ce87b5b Mon Sep 17 00:00:00 2001 From: "tao.gan" Date: Thu, 14 May 2026 18:23:09 +0800 Subject: [PATCH 2/2] [storage]: VM-scoped snapshot group integrity check + cascade Once symmetric disband is in place, an incomplete snapshot group (some refs deleted, some still alive on the VM) is sticky and must not be silently ignored by subsequent operations on the same VM. Block on the following entries when the VM has any incomplete group (the incomplete group itself is excluded from the check, so users always have a path to clear the debt): - APIDeleteVolumeSnapshotGroupMsg (other groups): blocked unless a new boolean force=true is passed. force is added to the API as the operator bypass; other entries deliberately do NOT expose force. - APICreateVolumeSnapshotGroupMsg: blocked. - APIAttachDataVolumeToVmMsg / APIDetachDataVolumeFromVmMsg: blocked. APIDestroyVmInstanceMsg is intentionally NOT blocked: the new VolumeSnapshotGroupCascadeExtension keys on VmInstanceVO and force-cleans every group VO (refs -> archived metadata -> host backup files -> group VO) when the VM is destroyed, so VM teardown is never wedged by leftover incomplete groups. Single-snapshot APIs are also exempt by design. Resolves: ZSV-10538 Change-Id: I717363667067726a796467787568756d6f7a706e --- conf/springConfigXml/volumeSnapshot.xml | 6 + .../APIDeleteVolumeSnapshotGroupMsg.java | 11 ++ .../group/VolumeSnapshotGroupBase.java | 14 ++ .../VolumeSnapshotGroupCascadeExtension.java | 153 ++++++++++++++++++ .../group/VolumeSnapshotGroupChecker.java | 44 +++++ .../storage/volume/VolumeApiInterceptor.java | 30 ++++ 6 files changed, 258 insertions(+) create mode 100644 storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupCascadeExtension.java diff --git a/conf/springConfigXml/volumeSnapshot.xml b/conf/springConfigXml/volumeSnapshot.xml index f2ad0dc93a4..e2afe874754 100755 --- a/conf/springConfigXml/volumeSnapshot.xml +++ b/conf/springConfigXml/volumeSnapshot.xml @@ -41,6 +41,12 @@ + + + + + + diff --git a/header/src/main/java/org/zstack/header/storage/snapshot/group/APIDeleteVolumeSnapshotGroupMsg.java b/header/src/main/java/org/zstack/header/storage/snapshot/group/APIDeleteVolumeSnapshotGroupMsg.java index cb1f8dde454..2a534d29203 100644 --- a/header/src/main/java/org/zstack/header/storage/snapshot/group/APIDeleteVolumeSnapshotGroupMsg.java +++ b/header/src/main/java/org/zstack/header/storage/snapshot/group/APIDeleteVolumeSnapshotGroupMsg.java @@ -31,6 +31,9 @@ public class APIDeleteVolumeSnapshotGroupMsg extends APIDeleteMessage implements @APIParam(required = false, validValues = {"single", "chain", "auto"}) private String scope = "chain"; + @APIParam(required = false) + private boolean force = false; + @APINoSee private String vmUuid; @@ -58,6 +61,14 @@ public void setScope(String scope) { this.scope = scope; } + public boolean isForce() { + return force; + } + + public void setForce(boolean force) { + this.force = force; + } + public String getVmUuid() { return vmUuid; } diff --git a/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupBase.java b/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupBase.java index fcb6907ca9e..c6e9e41de86 100644 --- a/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupBase.java +++ b/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupBase.java @@ -195,6 +195,20 @@ public String getName() { private void handleDelete(APIDeleteVolumeSnapshotGroupMsg msg, NoErrorCompletion completion) { APIDeleteVolumeSnapshotGroupEvent event = new APIDeleteVolumeSnapshotGroupEvent(msg.getId()); + + if (!msg.isForce()) { + List incomplete = VolumeSnapshotGroupChecker + .findIncompleteGroupsOnVm(self.getVmInstanceUuid(), self.getUuid()); + if (!incomplete.isEmpty()) { + event.setError(operr("VM[uuid:%s] has incomplete snapshot group(s) %s, " + + "please clean them up first (or pass force=true) before deleting other snapshot groups", + self.getVmInstanceUuid(), incomplete)); + bus.publish(event); + completion.done(); + return; + } + } + DeleteVolumeSnapshotGroupInnerMsg imsg = new DeleteVolumeSnapshotGroupInnerMsg(); imsg.setUuid(msg.getUuid()); imsg.setDeletionMode(msg.getDeletionMode()); diff --git a/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupCascadeExtension.java b/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupCascadeExtension.java new file mode 100644 index 00000000000..786c4e1330d --- /dev/null +++ b/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupCascadeExtension.java @@ -0,0 +1,153 @@ +package org.zstack.storage.snapshot.group; + +import org.springframework.beans.factory.annotation.Autowired; +import org.zstack.core.cascade.AbstractAsyncCascadeExtension; +import org.zstack.core.cascade.CascadeAction; +import org.zstack.core.cascade.CascadeConstant; +import org.zstack.core.db.DatabaseFacade; +import org.zstack.core.db.Q; +import org.zstack.core.db.SQL; +import org.zstack.header.core.Completion; +import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupRefVO; +import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupRefVO_; +import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupVO; +import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupVO_; +import org.zstack.header.vm.VmDeletionStruct; +import org.zstack.header.vm.VmInstanceVO; +import org.zstack.header.vm.additions.VmHostBackupFileVO; +import org.zstack.header.vm.additions.VmHostBackupFileVO_; +import org.zstack.header.vm.additions.VmHostFileManager; +import org.zstack.header.vm.devices.VmInstanceResourceMetadataManager; +import org.zstack.utils.Utils; +import org.zstack.utils.logging.CLogger; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +/** + * Cascade extension keyed on VmInstance for cleaning up VolumeSnapshotGroup VOs + * when a VM is destroyed. + * + * Background: snapshot groups are VM-scoped. When a VM is destroyed, any remaining + * group VOs (whether complete or incomplete due to partial single-snapshot deletions) + * become orphaned. Without this cleanup, those rows would survive beyond the VM + * and pollute downstream queries. + * + * On DELETION_CHECK we do NOT block — VM destroy should proceed even with + * incomplete groups (per product decision); cleanup is automatic. + */ +public class VolumeSnapshotGroupCascadeExtension extends AbstractAsyncCascadeExtension { + private static final CLogger logger = Utils.getLogger(VolumeSnapshotGroupCascadeExtension.class); + + private static final String NAME = VolumeSnapshotGroupVO.class.getSimpleName(); + + @Autowired + private DatabaseFacade dbf; + @Autowired + private VmInstanceResourceMetadataManager vidm; + @Autowired + private VmHostFileManager vmHostFileManager; + + @Override + public void asyncCascade(CascadeAction action, Completion completion) { + if (action.isActionCode(CascadeConstant.DELETION_CLEANUP_CODE)) { + handleDeletionCleanup(action, completion); + } else if (action.isActionCode(CascadeConstant.DELETION_DELETE_CODE, + CascadeConstant.DELETION_FORCE_DELETE_CODE)) { + handleDeletion(action, completion); + } else { + completion.success(); + } + } + + private void handleDeletion(CascadeAction action, Completion completion) { + if (!VmInstanceVO.class.getSimpleName().equals(action.getParentIssuer())) { + completion.success(); + return; + } + + List vmUuids = vmUuidsFromAction(action); + if (vmUuids.isEmpty()) { + completion.success(); + return; + } + + List groupUuids = Q.New(VolumeSnapshotGroupVO.class) + .select(VolumeSnapshotGroupVO_.uuid) + .in(VolumeSnapshotGroupVO_.vmInstanceUuid, vmUuids) + .listValues(); + if (groupUuids.isEmpty()) { + completion.success(); + return; + } + + logger.debug(String.format("VM destroy cascade: force-removing %d snapshot group(s) %s for vm(s) %s " + + "(includes any incomplete groups from prior single-snapshot deletions)", + groupUuids.size(), groupUuids, vmUuids)); + + // 1. drop all refs first (FK-like constraint via business logic) + SQL.New(VolumeSnapshotGroupRefVO.class) + .in(VolumeSnapshotGroupRefVO_.volumeSnapshotGroupUuid, groupUuids) + .delete(); + + // 2. clean associated metadata + backup files + groupUuids.forEach(vidm::deleteArchiveVmInstanceResourceMetadataGroup); + cleanVmHostBackupFilesForGroup(groupUuids); + + // 3. remove group VOs + dbf.removeByPrimaryKeys(groupUuids, VolumeSnapshotGroupVO.class); + + completion.success(); + } + + private void cleanVmHostBackupFilesForGroup(List groupUuids) { + if (groupUuids.isEmpty()) { + return; + } + + List backupUuidList = Q.New(VmHostBackupFileVO.class) + .in(VmHostBackupFileVO_.resourceUuid, groupUuids) + .select(VmHostBackupFileVO_.uuid) + .listValues(); + + backupUuidList.forEach(vmHostFileManager::cleanVmHostBackupFile); + } + + private void handleDeletionCleanup(CascadeAction action, Completion completion) { + try { + dbf.eoCleanup(VolumeSnapshotGroupVO.class); + } catch (Throwable t) { + logger.warn("eoCleanup VolumeSnapshotGroupVO failed: " + t.getMessage()); + } finally { + completion.success(); + } + } + + private List vmUuidsFromAction(CascadeAction action) { + Object ctx = action.getParentIssuerContext(); + if (ctx == null) { + return Collections.emptyList(); + } + List uuids = new ArrayList<>(); + if (ctx instanceof List) { + for (Object o : (List) ctx) { + if (o instanceof VmDeletionStruct) { + uuids.add(((VmDeletionStruct) o).getInventory().getUuid()); + } + } + } + return uuids; + } + + @Override + public List getEdgeNames() { + return Arrays.asList(VmInstanceVO.class.getSimpleName()); + } + + @Override + public String getCascadeResourceName() { + return NAME; + } +} diff --git a/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupChecker.java b/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupChecker.java index 9749984de47..3c3f9385953 100644 --- a/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupChecker.java +++ b/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupChecker.java @@ -3,6 +3,7 @@ import org.zstack.core.db.Q; import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupAvailability; import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupRefVO; +import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupRefVO_; import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupVO; import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupVO_; import org.zstack.header.vo.ResourceVO; @@ -25,6 +26,49 @@ public static boolean isAvailable(String uuid) { return getAvailability(uuid).isAvailable(); } + /** + * Find all incomplete snapshot groups on a VM. + * An incomplete group is one where part of its refs have snapshotDeleted=true + * but at least one ref is still alive (snapshotDeleted=false). + * Such groups represent a "debt" that pollutes subsequent group/VM operations. + * + * @param vmInstanceUuid the VM to inspect + * @param excludeGroupUuid group uuid to exclude from the result (e.g. when the caller is + * itself trying to delete that group, do not flag it as a blocker); + * pass null to include all groups + * @return list of incomplete group uuids (excluding excludeGroupUuid); empty if none + */ + public static List findIncompleteGroupsOnVm(String vmInstanceUuid, String excludeGroupUuid) { + if (vmInstanceUuid == null) { + return Collections.emptyList(); + } + + List groupUuids = Q.New(VolumeSnapshotGroupVO.class) + .select(VolumeSnapshotGroupVO_.uuid) + .eq(VolumeSnapshotGroupVO_.vmInstanceUuid, vmInstanceUuid) + .listValues(); + + List incomplete = new ArrayList<>(); + for (Object o : groupUuids) { + String guuid = o.toString(); + if (guuid.equals(excludeGroupUuid)) { + continue; + } + long deletedRefs = Q.New(VolumeSnapshotGroupRefVO.class) + .eq(VolumeSnapshotGroupRefVO_.volumeSnapshotGroupUuid, guuid) + .eq(VolumeSnapshotGroupRefVO_.snapshotDeleted, true).count(); + if (deletedRefs == 0) { + continue; + } + long totalRefs = Q.New(VolumeSnapshotGroupRefVO.class) + .eq(VolumeSnapshotGroupRefVO_.volumeSnapshotGroupUuid, guuid).count(); + if (deletedRefs < totalRefs) { + incomplete.add(guuid); + } + } + return incomplete; + } + public static List getAvailability(List uuids) { List results = new ArrayList<>(); List groups = Q.New(VolumeSnapshotGroupVO.class) diff --git a/storage/src/main/java/org/zstack/storage/volume/VolumeApiInterceptor.java b/storage/src/main/java/org/zstack/storage/volume/VolumeApiInterceptor.java index 09e41c229f7..b956b750ce8 100755 --- a/storage/src/main/java/org/zstack/storage/volume/VolumeApiInterceptor.java +++ b/storage/src/main/java/org/zstack/storage/volume/VolumeApiInterceptor.java @@ -44,6 +44,7 @@ import org.zstack.header.storage.snapshot.VolumeSnapshotVO; import org.zstack.header.storage.snapshot.VolumeSnapshotVO_; import org.zstack.header.storage.snapshot.group.MemorySnapshotValidatorExtensionPoint; +import org.zstack.storage.snapshot.group.VolumeSnapshotGroupChecker; import org.zstack.header.tag.SystemTagVO; import org.zstack.header.vm.APICreateVmInstanceMsg; import org.zstack.header.vm.DiskAO; @@ -213,6 +214,8 @@ private void validate(APICreateVolumeSnapshotGroupMsg msg) { throw new ApiMessageInterceptionException(argerr("volume[uuid:%s] is not root volume", msg.getRootVolumeUuid())); } + checkIncompleteSnapshotGroupsOnVm(vmvo.getUuid(), "create new snapshot group"); + if (msg.isWithMemory() && !(vmvo.getState().equals(VmInstanceState.Running) || (vmvo.getState().equals(VmInstanceState.Paused)))) { throw new ApiMessageInterceptionException(argerr("Can not take memory snapshot, vm current state[%s], but expect state are [%s, %s]", vmvo.getState().toString(), VmInstanceState.Running.toString(), VmInstanceState.Paused.toString())); @@ -316,9 +319,13 @@ private void validate(APIDetachDataVolumeFromVmMsg msg) { throw new ApiMessageInterceptionException(operr("the volume[uuid:%s, name:%s, type:%s] can't detach it", vol.getUuid(), vol.getName(), vol.getType())); } + + String vmUuid = msg.getVmUuid() != null ? msg.getVmUuid() : vol.getVmInstanceUuid(); + checkIncompleteSnapshotGroupsOnVm(vmUuid, "detach data volume"); } private void validate(APIAttachDataVolumeToVmMsg msg) { + checkIncompleteSnapshotGroupsOnVm(msg.getVmInstanceUuid(), "attach data volume"); new SQLBatch() { @Override protected void scripts() { @@ -691,6 +698,29 @@ public boolean start() { return true; } + /** + * Block VM-scoped operations when the VM has any incomplete snapshot group. + * An incomplete group is one whose refs are partially deleted (some snapshotDeleted=true, + * but at least one alive). Such groups must be cleaned up first to avoid pollution + * of subsequent group operations on this VM. + * + * Exempt operations: deleting an incomplete group itself (handled by + * {@code VolumeSnapshotGroupBase#handleDelete} which excludes self), single-snapshot + * deletion, and VM destroy (handled by VolumeSnapshotGroupCascadeExtension cleanup). + */ + private void checkIncompleteSnapshotGroupsOnVm(String vmUuid, String operationDesc) { + if (vmUuid == null) { + return; + } + List incomplete = VolumeSnapshotGroupChecker.findIncompleteGroupsOnVm(vmUuid, null); + if (!incomplete.isEmpty()) { + throw new ApiMessageInterceptionException(operr( + "VM[uuid:%s] has incomplete snapshot group(s) %s, " + + "please clean them up first before %s", + vmUuid, incomplete, operationDesc)); + } + } + @Override public boolean stop() { return true;