diff --git a/conf/springConfigXml/volumeSnapshot.xml b/conf/springConfigXml/volumeSnapshot.xml index f2ad0dc93a4..e2afe874754 100755 --- a/conf/springConfigXml/volumeSnapshot.xml +++ b/conf/springConfigXml/volumeSnapshot.xml @@ -41,6 +41,12 @@ + + + + + + diff --git a/header/src/main/java/org/zstack/header/storage/snapshot/group/APIDeleteVolumeSnapshotGroupMsg.java b/header/src/main/java/org/zstack/header/storage/snapshot/group/APIDeleteVolumeSnapshotGroupMsg.java index cb1f8dde454..2a534d29203 100644 --- a/header/src/main/java/org/zstack/header/storage/snapshot/group/APIDeleteVolumeSnapshotGroupMsg.java +++ b/header/src/main/java/org/zstack/header/storage/snapshot/group/APIDeleteVolumeSnapshotGroupMsg.java @@ -31,6 +31,9 @@ public class APIDeleteVolumeSnapshotGroupMsg extends APIDeleteMessage implements @APIParam(required = false, validValues = {"single", "chain", "auto"}) private String scope = "chain"; + @APIParam(required = false) + private boolean force = false; + @APINoSee private String vmUuid; @@ -58,6 +61,14 @@ public void setScope(String scope) { this.scope = scope; } + public boolean isForce() { + return force; + } + + public void setForce(boolean force) { + this.force = force; + } + public String getVmUuid() { return vmUuid; } diff --git a/storage/src/main/java/org/zstack/storage/snapshot/VolumeSnapshotTreeBase.java b/storage/src/main/java/org/zstack/storage/snapshot/VolumeSnapshotTreeBase.java index beb8b044d1a..8eed9233077 100755 --- a/storage/src/main/java/org/zstack/storage/snapshot/VolumeSnapshotTreeBase.java +++ b/storage/src/main/java/org/zstack/storage/snapshot/VolumeSnapshotTreeBase.java @@ -2142,26 +2142,37 @@ protected Boolean scripts() { return cleanup; } - // The logic for cleaning up snapshot groups when deleting a snapshot chain + // The logic for cleaning up snapshot groups when deleting a snapshot chain. + // Symmetric with ungroupAfterDeleteSingleSnapshot: regardless of root/data volume type, + // a group is only disbanded after ALL its refs have snapshotDeleted=true. + // This avoids leaving orphan refs (root chain delete used to immediately drop the group VO, + // leaving data-volume refs pointing to a non-existent group). private void ungroupAfterDeleted(List snapshots) { List uuids = snapshots.stream().map(VolumeSnapshotInventory::getUuid).collect(Collectors.toList()); SQL.New(VolumeSnapshotGroupRefVO.class).in(VolumeSnapshotGroupRefVO_.volumeSnapshotUuid, uuids) .set(VolumeSnapshotGroupRefVO_.snapshotDeleted, true).update(); - if (currentRoot.getVolumeType().equals(VolumeType.Root.toString())) { - List groupUuids = new ArrayList<>(); - for (VolumeSnapshotInventory snapshot : snapshots) { - String groupUuid = snapshot.getGroupUuid(); - if (groupUuid != null) { - logger.debug(String.format("root volume snapshot[uuid:%s, name:%s] has been deleted, " + - "ungroup snapshot group[uuid:%s]", snapshot.getUuid(), snapshot.getName(), groupUuid)); - groupUuids.add(groupUuid); - } + Set touchedGroupUuids = snapshots.stream() + .map(VolumeSnapshotInventory::getGroupUuid) + .filter(Objects::nonNull) + .collect(Collectors.toSet()); + + List groupsToDelete = new ArrayList<>(); + for (String groupUuid : touchedGroupUuids) { + long remaining = Q.New(VolumeSnapshotGroupRefVO.class) + .eq(VolumeSnapshotGroupRefVO_.volumeSnapshotGroupUuid, groupUuid) + .eq(VolumeSnapshotGroupRefVO_.snapshotDeleted, false).count(); + if (remaining == 0) { + logger.debug(String.format("snapshot group[uuid:%s] all volume snapshots have been deleted, " + + "disbanding group", groupUuid)); + groupsToDelete.add(groupUuid); } + } - groupUuids.forEach(groupUuid -> vidm.deleteArchiveVmInstanceResourceMetadataGroup(groupUuid)); - cleanVmHostBackupFilesForGroup(groupUuids); - dbf.removeByPrimaryKeys(groupUuids, VolumeSnapshotGroupVO.class); + if (!groupsToDelete.isEmpty()) { + groupsToDelete.forEach(groupUuid -> vidm.deleteArchiveVmInstanceResourceMetadataGroup(groupUuid)); + cleanVmHostBackupFilesForGroup(groupsToDelete); + dbf.removeByPrimaryKeys(groupsToDelete, VolumeSnapshotGroupVO.class); } } diff --git a/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupBase.java b/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupBase.java index fcb6907ca9e..c6e9e41de86 100644 --- a/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupBase.java +++ b/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupBase.java @@ -195,6 +195,20 @@ public String getName() { private void handleDelete(APIDeleteVolumeSnapshotGroupMsg msg, NoErrorCompletion completion) { APIDeleteVolumeSnapshotGroupEvent event = new APIDeleteVolumeSnapshotGroupEvent(msg.getId()); + + if (!msg.isForce()) { + List incomplete = VolumeSnapshotGroupChecker + .findIncompleteGroupsOnVm(self.getVmInstanceUuid(), self.getUuid()); + if (!incomplete.isEmpty()) { + event.setError(operr("VM[uuid:%s] has incomplete snapshot group(s) %s, " + + "please clean them up first (or pass force=true) before deleting other snapshot groups", + self.getVmInstanceUuid(), incomplete)); + bus.publish(event); + completion.done(); + return; + } + } + DeleteVolumeSnapshotGroupInnerMsg imsg = new DeleteVolumeSnapshotGroupInnerMsg(); imsg.setUuid(msg.getUuid()); imsg.setDeletionMode(msg.getDeletionMode()); diff --git a/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupCascadeExtension.java b/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupCascadeExtension.java new file mode 100644 index 00000000000..786c4e1330d --- /dev/null +++ b/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupCascadeExtension.java @@ -0,0 +1,153 @@ +package org.zstack.storage.snapshot.group; + +import org.springframework.beans.factory.annotation.Autowired; +import org.zstack.core.cascade.AbstractAsyncCascadeExtension; +import org.zstack.core.cascade.CascadeAction; +import org.zstack.core.cascade.CascadeConstant; +import org.zstack.core.db.DatabaseFacade; +import org.zstack.core.db.Q; +import org.zstack.core.db.SQL; +import org.zstack.header.core.Completion; +import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupRefVO; +import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupRefVO_; +import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupVO; +import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupVO_; +import org.zstack.header.vm.VmDeletionStruct; +import org.zstack.header.vm.VmInstanceVO; +import org.zstack.header.vm.additions.VmHostBackupFileVO; +import org.zstack.header.vm.additions.VmHostBackupFileVO_; +import org.zstack.header.vm.additions.VmHostFileManager; +import org.zstack.header.vm.devices.VmInstanceResourceMetadataManager; +import org.zstack.utils.Utils; +import org.zstack.utils.logging.CLogger; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +/** + * Cascade extension keyed on VmInstance for cleaning up VolumeSnapshotGroup VOs + * when a VM is destroyed. + * + * Background: snapshot groups are VM-scoped. When a VM is destroyed, any remaining + * group VOs (whether complete or incomplete due to partial single-snapshot deletions) + * become orphaned. Without this cleanup, those rows would survive beyond the VM + * and pollute downstream queries. + * + * On DELETION_CHECK we do NOT block — VM destroy should proceed even with + * incomplete groups (per product decision); cleanup is automatic. + */ +public class VolumeSnapshotGroupCascadeExtension extends AbstractAsyncCascadeExtension { + private static final CLogger logger = Utils.getLogger(VolumeSnapshotGroupCascadeExtension.class); + + private static final String NAME = VolumeSnapshotGroupVO.class.getSimpleName(); + + @Autowired + private DatabaseFacade dbf; + @Autowired + private VmInstanceResourceMetadataManager vidm; + @Autowired + private VmHostFileManager vmHostFileManager; + + @Override + public void asyncCascade(CascadeAction action, Completion completion) { + if (action.isActionCode(CascadeConstant.DELETION_CLEANUP_CODE)) { + handleDeletionCleanup(action, completion); + } else if (action.isActionCode(CascadeConstant.DELETION_DELETE_CODE, + CascadeConstant.DELETION_FORCE_DELETE_CODE)) { + handleDeletion(action, completion); + } else { + completion.success(); + } + } + + private void handleDeletion(CascadeAction action, Completion completion) { + if (!VmInstanceVO.class.getSimpleName().equals(action.getParentIssuer())) { + completion.success(); + return; + } + + List vmUuids = vmUuidsFromAction(action); + if (vmUuids.isEmpty()) { + completion.success(); + return; + } + + List groupUuids = Q.New(VolumeSnapshotGroupVO.class) + .select(VolumeSnapshotGroupVO_.uuid) + .in(VolumeSnapshotGroupVO_.vmInstanceUuid, vmUuids) + .listValues(); + if (groupUuids.isEmpty()) { + completion.success(); + return; + } + + logger.debug(String.format("VM destroy cascade: force-removing %d snapshot group(s) %s for vm(s) %s " + + "(includes any incomplete groups from prior single-snapshot deletions)", + groupUuids.size(), groupUuids, vmUuids)); + + // 1. drop all refs first (FK-like constraint via business logic) + SQL.New(VolumeSnapshotGroupRefVO.class) + .in(VolumeSnapshotGroupRefVO_.volumeSnapshotGroupUuid, groupUuids) + .delete(); + + // 2. clean associated metadata + backup files + groupUuids.forEach(vidm::deleteArchiveVmInstanceResourceMetadataGroup); + cleanVmHostBackupFilesForGroup(groupUuids); + + // 3. remove group VOs + dbf.removeByPrimaryKeys(groupUuids, VolumeSnapshotGroupVO.class); + + completion.success(); + } + + private void cleanVmHostBackupFilesForGroup(List groupUuids) { + if (groupUuids.isEmpty()) { + return; + } + + List backupUuidList = Q.New(VmHostBackupFileVO.class) + .in(VmHostBackupFileVO_.resourceUuid, groupUuids) + .select(VmHostBackupFileVO_.uuid) + .listValues(); + + backupUuidList.forEach(vmHostFileManager::cleanVmHostBackupFile); + } + + private void handleDeletionCleanup(CascadeAction action, Completion completion) { + try { + dbf.eoCleanup(VolumeSnapshotGroupVO.class); + } catch (Throwable t) { + logger.warn("eoCleanup VolumeSnapshotGroupVO failed: " + t.getMessage()); + } finally { + completion.success(); + } + } + + private List vmUuidsFromAction(CascadeAction action) { + Object ctx = action.getParentIssuerContext(); + if (ctx == null) { + return Collections.emptyList(); + } + List uuids = new ArrayList<>(); + if (ctx instanceof List) { + for (Object o : (List) ctx) { + if (o instanceof VmDeletionStruct) { + uuids.add(((VmDeletionStruct) o).getInventory().getUuid()); + } + } + } + return uuids; + } + + @Override + public List getEdgeNames() { + return Arrays.asList(VmInstanceVO.class.getSimpleName()); + } + + @Override + public String getCascadeResourceName() { + return NAME; + } +} diff --git a/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupChecker.java b/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupChecker.java index 9749984de47..3c3f9385953 100644 --- a/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupChecker.java +++ b/storage/src/main/java/org/zstack/storage/snapshot/group/VolumeSnapshotGroupChecker.java @@ -3,6 +3,7 @@ import org.zstack.core.db.Q; import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupAvailability; import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupRefVO; +import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupRefVO_; import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupVO; import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupVO_; import org.zstack.header.vo.ResourceVO; @@ -25,6 +26,49 @@ public static boolean isAvailable(String uuid) { return getAvailability(uuid).isAvailable(); } + /** + * Find all incomplete snapshot groups on a VM. + * An incomplete group is one where part of its refs have snapshotDeleted=true + * but at least one ref is still alive (snapshotDeleted=false). + * Such groups represent a "debt" that pollutes subsequent group/VM operations. + * + * @param vmInstanceUuid the VM to inspect + * @param excludeGroupUuid group uuid to exclude from the result (e.g. when the caller is + * itself trying to delete that group, do not flag it as a blocker); + * pass null to include all groups + * @return list of incomplete group uuids (excluding excludeGroupUuid); empty if none + */ + public static List findIncompleteGroupsOnVm(String vmInstanceUuid, String excludeGroupUuid) { + if (vmInstanceUuid == null) { + return Collections.emptyList(); + } + + List groupUuids = Q.New(VolumeSnapshotGroupVO.class) + .select(VolumeSnapshotGroupVO_.uuid) + .eq(VolumeSnapshotGroupVO_.vmInstanceUuid, vmInstanceUuid) + .listValues(); + + List incomplete = new ArrayList<>(); + for (Object o : groupUuids) { + String guuid = o.toString(); + if (guuid.equals(excludeGroupUuid)) { + continue; + } + long deletedRefs = Q.New(VolumeSnapshotGroupRefVO.class) + .eq(VolumeSnapshotGroupRefVO_.volumeSnapshotGroupUuid, guuid) + .eq(VolumeSnapshotGroupRefVO_.snapshotDeleted, true).count(); + if (deletedRefs == 0) { + continue; + } + long totalRefs = Q.New(VolumeSnapshotGroupRefVO.class) + .eq(VolumeSnapshotGroupRefVO_.volumeSnapshotGroupUuid, guuid).count(); + if (deletedRefs < totalRefs) { + incomplete.add(guuid); + } + } + return incomplete; + } + public static List getAvailability(List uuids) { List results = new ArrayList<>(); List groups = Q.New(VolumeSnapshotGroupVO.class) diff --git a/storage/src/main/java/org/zstack/storage/volume/VolumeApiInterceptor.java b/storage/src/main/java/org/zstack/storage/volume/VolumeApiInterceptor.java index 09e41c229f7..b956b750ce8 100755 --- a/storage/src/main/java/org/zstack/storage/volume/VolumeApiInterceptor.java +++ b/storage/src/main/java/org/zstack/storage/volume/VolumeApiInterceptor.java @@ -44,6 +44,7 @@ import org.zstack.header.storage.snapshot.VolumeSnapshotVO; import org.zstack.header.storage.snapshot.VolumeSnapshotVO_; import org.zstack.header.storage.snapshot.group.MemorySnapshotValidatorExtensionPoint; +import org.zstack.storage.snapshot.group.VolumeSnapshotGroupChecker; import org.zstack.header.tag.SystemTagVO; import org.zstack.header.vm.APICreateVmInstanceMsg; import org.zstack.header.vm.DiskAO; @@ -213,6 +214,8 @@ private void validate(APICreateVolumeSnapshotGroupMsg msg) { throw new ApiMessageInterceptionException(argerr("volume[uuid:%s] is not root volume", msg.getRootVolumeUuid())); } + checkIncompleteSnapshotGroupsOnVm(vmvo.getUuid(), "create new snapshot group"); + if (msg.isWithMemory() && !(vmvo.getState().equals(VmInstanceState.Running) || (vmvo.getState().equals(VmInstanceState.Paused)))) { throw new ApiMessageInterceptionException(argerr("Can not take memory snapshot, vm current state[%s], but expect state are [%s, %s]", vmvo.getState().toString(), VmInstanceState.Running.toString(), VmInstanceState.Paused.toString())); @@ -316,9 +319,13 @@ private void validate(APIDetachDataVolumeFromVmMsg msg) { throw new ApiMessageInterceptionException(operr("the volume[uuid:%s, name:%s, type:%s] can't detach it", vol.getUuid(), vol.getName(), vol.getType())); } + + String vmUuid = msg.getVmUuid() != null ? msg.getVmUuid() : vol.getVmInstanceUuid(); + checkIncompleteSnapshotGroupsOnVm(vmUuid, "detach data volume"); } private void validate(APIAttachDataVolumeToVmMsg msg) { + checkIncompleteSnapshotGroupsOnVm(msg.getVmInstanceUuid(), "attach data volume"); new SQLBatch() { @Override protected void scripts() { @@ -691,6 +698,29 @@ public boolean start() { return true; } + /** + * Block VM-scoped operations when the VM has any incomplete snapshot group. + * An incomplete group is one whose refs are partially deleted (some snapshotDeleted=true, + * but at least one alive). Such groups must be cleaned up first to avoid pollution + * of subsequent group operations on this VM. + * + * Exempt operations: deleting an incomplete group itself (handled by + * {@code VolumeSnapshotGroupBase#handleDelete} which excludes self), single-snapshot + * deletion, and VM destroy (handled by VolumeSnapshotGroupCascadeExtension cleanup). + */ + private void checkIncompleteSnapshotGroupsOnVm(String vmUuid, String operationDesc) { + if (vmUuid == null) { + return; + } + List incomplete = VolumeSnapshotGroupChecker.findIncompleteGroupsOnVm(vmUuid, null); + if (!incomplete.isEmpty()) { + throw new ApiMessageInterceptionException(operr( + "VM[uuid:%s] has incomplete snapshot group(s) %s, " + + "please clean them up first before %s", + vmUuid, incomplete, operationDesc)); + } + } + @Override public boolean stop() { return true;