Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions conf/springConfigXml/volumeSnapshot.xml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@
</zstack:plugin>
</bean>

<bean id="VolumeSnapshotGroupCascadeExtension" class="org.zstack.storage.snapshot.group.VolumeSnapshotGroupCascadeExtension">
<zstack:plugin>
<zstack:extension interface="org.zstack.core.cascade.CascadeExtensionPoint"/>
</zstack:plugin>
</bean>

<bean id="L3NetworkMemorySnapshotGroupReference" class="org.zstack.storage.snapshot.group.L3NetworkMemorySnapshotGroupReference">
<zstack:plugin>
<zstack:extension interface="org.zstack.storage.snapshot.group.MemorySnapshotGroupReferenceFactory" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ public class APIDeleteVolumeSnapshotGroupMsg extends APIDeleteMessage implements
@APIParam(required = false, validValues = {"single", "chain", "auto"})
private String scope = "chain";

@APIParam(required = false)
private boolean force = false;

@APINoSee
private String vmUuid;

Expand Down Expand Up @@ -58,6 +61,14 @@ public void setScope(String scope) {
this.scope = scope;
}

public boolean isForce() {
return force;
}

public void setForce(boolean force) {
this.force = force;
}

public String getVmUuid() {
return vmUuid;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2142,26 +2142,37 @@ protected Boolean scripts() {
return cleanup;
}

// The logic for cleaning up snapshot groups when deleting a snapshot chain
// The logic for cleaning up snapshot groups when deleting a snapshot chain.
// Symmetric with ungroupAfterDeleteSingleSnapshot: regardless of root/data volume type,
// a group is only disbanded after ALL its refs have snapshotDeleted=true.
// This avoids leaving orphan refs (root chain delete used to immediately drop the group VO,
// leaving data-volume refs pointing to a non-existent group).
private void ungroupAfterDeleted(List<VolumeSnapshotInventory> snapshots) {
List<String> uuids = snapshots.stream().map(VolumeSnapshotInventory::getUuid).collect(Collectors.toList());
SQL.New(VolumeSnapshotGroupRefVO.class).in(VolumeSnapshotGroupRefVO_.volumeSnapshotUuid, uuids)
.set(VolumeSnapshotGroupRefVO_.snapshotDeleted, true).update();
if (currentRoot.getVolumeType().equals(VolumeType.Root.toString())) {
List<String> groupUuids = new ArrayList<>();
for (VolumeSnapshotInventory snapshot : snapshots) {
String groupUuid = snapshot.getGroupUuid();
if (groupUuid != null) {
logger.debug(String.format("root volume snapshot[uuid:%s, name:%s] has been deleted, " +
"ungroup snapshot group[uuid:%s]", snapshot.getUuid(), snapshot.getName(), groupUuid));
groupUuids.add(groupUuid);
}

Set<String> touchedGroupUuids = snapshots.stream()
.map(VolumeSnapshotInventory::getGroupUuid)
.filter(Objects::nonNull)
.collect(Collectors.toSet());

List<String> groupsToDelete = new ArrayList<>();
for (String groupUuid : touchedGroupUuids) {
long remaining = Q.New(VolumeSnapshotGroupRefVO.class)
.eq(VolumeSnapshotGroupRefVO_.volumeSnapshotGroupUuid, groupUuid)
.eq(VolumeSnapshotGroupRefVO_.snapshotDeleted, false).count();
if (remaining == 0) {
logger.debug(String.format("snapshot group[uuid:%s] all volume snapshots have been deleted, " +
"disbanding group", groupUuid));
groupsToDelete.add(groupUuid);
}
}

groupUuids.forEach(groupUuid -> vidm.deleteArchiveVmInstanceResourceMetadataGroup(groupUuid));
cleanVmHostBackupFilesForGroup(groupUuids);
dbf.removeByPrimaryKeys(groupUuids, VolumeSnapshotGroupVO.class);
if (!groupsToDelete.isEmpty()) {
groupsToDelete.forEach(groupUuid -> vidm.deleteArchiveVmInstanceResourceMetadataGroup(groupUuid));
cleanVmHostBackupFilesForGroup(groupsToDelete);
dbf.removeByPrimaryKeys(groupsToDelete, VolumeSnapshotGroupVO.class);
Comment on lines +2155 to +2175
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

这里会把同组的 Memory ref 永久算成“未删”,导致快照组无法解散。

Line 2162 开始的 remaining 统计会把整组里所有 snapshotDeleted=false 的 ref 都算进去,但这里前面只标记了 snapshots 本身的 ref。链路删除 root/data 快照时,同组的 memory snapshot 会在后面的 cleanUpMemorySnapshots() 里被直接硬删 VO/tree,却不会把对应的 VolumeSnapshotGroupRefVO.snapshotDeleted 置为 true。这样只要组里带有 memory ref,这里就会一直得到 remaining > 0,最终把该 VM 长期留在“不完整快照组”状态。建议把本次一并删除的 memory snapshot ref 也纳入标记,或者让 cleanUpMemorySnapshots() 复用同样的 group-ref 清理逻辑。

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In
`@storage/src/main/java/org/zstack/storage/snapshot/VolumeSnapshotTreeBase.java`
around lines 2155 - 2175, The code that computes remaining refs for a group uses
VolumeSnapshotGroupRefVO.snapshotDeleted to decide whether a group can be
disbanded, but memory snapshot VOs deleted by cleanUpMemorySnapshots() are
removed without setting their VolumeSnapshotGroupRefVO.snapshotDeleted=true, so
remaining will remain >0 and groups never disband; fix by ensuring memory
snapshots removed in cleanUpMemorySnapshots() also mark their corresponding
VolumeSnapshotGroupRefVO.snapshotDeleted=true (or add their group ref UUIDs to
the same snapshots-to-delete set before the remaining-count check), i.e. update
cleanUpMemorySnapshots() or the deletion flow to reuse the group-ref clearing
logic used here so VolumeSnapshotGroupRefVO entries reflect the deletion and
allow groupsToDelete to be computed correctly.

}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,20 @@ public String getName() {

private void handleDelete(APIDeleteVolumeSnapshotGroupMsg msg, NoErrorCompletion completion) {
APIDeleteVolumeSnapshotGroupEvent event = new APIDeleteVolumeSnapshotGroupEvent(msg.getId());

if (!msg.isForce()) {
List<String> incomplete = VolumeSnapshotGroupChecker
.findIncompleteGroupsOnVm(self.getVmInstanceUuid(), self.getUuid());
if (!incomplete.isEmpty()) {
event.setError(operr("VM[uuid:%s] has incomplete snapshot group(s) %s, " +
"please clean them up first (or pass force=true) before deleting other snapshot groups",
self.getVmInstanceUuid(), incomplete));
bus.publish(event);
completion.done();
return;
}
}

DeleteVolumeSnapshotGroupInnerMsg imsg = new DeleteVolumeSnapshotGroupInnerMsg();
imsg.setUuid(msg.getUuid());
imsg.setDeletionMode(msg.getDeletionMode());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package org.zstack.storage.snapshot.group;

import org.springframework.beans.factory.annotation.Autowired;
import org.zstack.core.cascade.AbstractAsyncCascadeExtension;
import org.zstack.core.cascade.CascadeAction;
import org.zstack.core.cascade.CascadeConstant;
import org.zstack.core.db.DatabaseFacade;
import org.zstack.core.db.Q;
import org.zstack.core.db.SQL;
import org.zstack.header.core.Completion;
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupRefVO;
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupRefVO_;
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupVO;
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupVO_;
import org.zstack.header.vm.VmDeletionStruct;
import org.zstack.header.vm.VmInstanceVO;
import org.zstack.header.vm.additions.VmHostBackupFileVO;
import org.zstack.header.vm.additions.VmHostBackupFileVO_;
import org.zstack.header.vm.additions.VmHostFileManager;
import org.zstack.header.vm.devices.VmInstanceResourceMetadataManager;
import org.zstack.utils.Utils;
import org.zstack.utils.logging.CLogger;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

/**
* Cascade extension keyed on VmInstance for cleaning up VolumeSnapshotGroup VOs
* when a VM is destroyed.
*
* Background: snapshot groups are VM-scoped. When a VM is destroyed, any remaining
* group VOs (whether complete or incomplete due to partial single-snapshot deletions)
* become orphaned. Without this cleanup, those rows would survive beyond the VM
* and pollute downstream queries.
*
* On DELETION_CHECK we do NOT block — VM destroy should proceed even with
* incomplete groups (per product decision); cleanup is automatic.
*/
public class VolumeSnapshotGroupCascadeExtension extends AbstractAsyncCascadeExtension {
private static final CLogger logger = Utils.getLogger(VolumeSnapshotGroupCascadeExtension.class);

private static final String NAME = VolumeSnapshotGroupVO.class.getSimpleName();

@Autowired
private DatabaseFacade dbf;
@Autowired
private VmInstanceResourceMetadataManager vidm;
@Autowired
private VmHostFileManager vmHostFileManager;

@Override
public void asyncCascade(CascadeAction action, Completion completion) {
if (action.isActionCode(CascadeConstant.DELETION_CLEANUP_CODE)) {
handleDeletionCleanup(action, completion);
} else if (action.isActionCode(CascadeConstant.DELETION_DELETE_CODE,
CascadeConstant.DELETION_FORCE_DELETE_CODE)) {
handleDeletion(action, completion);
} else {
completion.success();
}
}

private void handleDeletion(CascadeAction action, Completion completion) {
if (!VmInstanceVO.class.getSimpleName().equals(action.getParentIssuer())) {
completion.success();
return;
}

List<String> vmUuids = vmUuidsFromAction(action);
if (vmUuids.isEmpty()) {
completion.success();
return;
}

List<String> groupUuids = Q.New(VolumeSnapshotGroupVO.class)
.select(VolumeSnapshotGroupVO_.uuid)
.in(VolumeSnapshotGroupVO_.vmInstanceUuid, vmUuids)
.listValues();
if (groupUuids.isEmpty()) {
completion.success();
return;
}

logger.debug(String.format("VM destroy cascade: force-removing %d snapshot group(s) %s for vm(s) %s " +
"(includes any incomplete groups from prior single-snapshot deletions)",
groupUuids.size(), groupUuids, vmUuids));

// 1. drop all refs first (FK-like constraint via business logic)
SQL.New(VolumeSnapshotGroupRefVO.class)
.in(VolumeSnapshotGroupRefVO_.volumeSnapshotGroupUuid, groupUuids)
.delete();

// 2. clean associated metadata + backup files
groupUuids.forEach(vidm::deleteArchiveVmInstanceResourceMetadataGroup);
cleanVmHostBackupFilesForGroup(groupUuids);

// 3. remove group VOs
dbf.removeByPrimaryKeys(groupUuids, VolumeSnapshotGroupVO.class);

completion.success();
}

private void cleanVmHostBackupFilesForGroup(List<String> groupUuids) {
if (groupUuids.isEmpty()) {
return;
}

List<String> backupUuidList = Q.New(VmHostBackupFileVO.class)
.in(VmHostBackupFileVO_.resourceUuid, groupUuids)
.select(VmHostBackupFileVO_.uuid)
.listValues();

backupUuidList.forEach(vmHostFileManager::cleanVmHostBackupFile);
}

private void handleDeletionCleanup(CascadeAction action, Completion completion) {
try {
dbf.eoCleanup(VolumeSnapshotGroupVO.class);
} catch (Throwable t) {
logger.warn("eoCleanup VolumeSnapshotGroupVO failed: " + t.getMessage());
} finally {
completion.success();
}
}

private List<String> vmUuidsFromAction(CascadeAction action) {
Object ctx = action.getParentIssuerContext();
if (ctx == null) {
return Collections.emptyList();
}
List<String> uuids = new ArrayList<>();
if (ctx instanceof List) {
for (Object o : (List<?>) ctx) {
if (o instanceof VmDeletionStruct) {
uuids.add(((VmDeletionStruct) o).getInventory().getUuid());
}
}
}
return uuids;
}

@Override
public List<String> getEdgeNames() {
return Arrays.asList(VmInstanceVO.class.getSimpleName());
}

@Override
public String getCascadeResourceName() {
return NAME;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import org.zstack.core.db.Q;
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupAvailability;
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupRefVO;
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupRefVO_;
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupVO;
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupVO_;
import org.zstack.header.vo.ResourceVO;
Expand All @@ -25,6 +26,49 @@ public static boolean isAvailable(String uuid) {
return getAvailability(uuid).isAvailable();
}

/**
* Find all incomplete snapshot groups on a VM.
* An incomplete group is one where part of its refs have snapshotDeleted=true
* but at least one ref is still alive (snapshotDeleted=false).
* Such groups represent a "debt" that pollutes subsequent group/VM operations.
*
* @param vmInstanceUuid the VM to inspect
* @param excludeGroupUuid group uuid to exclude from the result (e.g. when the caller is
* itself trying to delete that group, do not flag it as a blocker);
* pass null to include all groups
* @return list of incomplete group uuids (excluding excludeGroupUuid); empty if none
*/
public static List<String> findIncompleteGroupsOnVm(String vmInstanceUuid, String excludeGroupUuid) {
if (vmInstanceUuid == null) {
return Collections.emptyList();
}

List<String> groupUuids = Q.New(VolumeSnapshotGroupVO.class)
.select(VolumeSnapshotGroupVO_.uuid)
.eq(VolumeSnapshotGroupVO_.vmInstanceUuid, vmInstanceUuid)
.listValues();

List<String> incomplete = new ArrayList<>();
for (Object o : groupUuids) {
String guuid = o.toString();
if (guuid.equals(excludeGroupUuid)) {
continue;
}
long deletedRefs = Q.New(VolumeSnapshotGroupRefVO.class)
.eq(VolumeSnapshotGroupRefVO_.volumeSnapshotGroupUuid, guuid)
.eq(VolumeSnapshotGroupRefVO_.snapshotDeleted, true).count();
if (deletedRefs == 0) {
continue;
}
long totalRefs = Q.New(VolumeSnapshotGroupRefVO.class)
.eq(VolumeSnapshotGroupRefVO_.volumeSnapshotGroupUuid, guuid).count();
if (deletedRefs < totalRefs) {
incomplete.add(guuid);
}
}
return incomplete;
}

public static List<VolumeSnapshotGroupAvailability> getAvailability(List<String> uuids) {
List<VolumeSnapshotGroupAvailability> results = new ArrayList<>();
List<VolumeSnapshotGroupVO> groups = Q.New(VolumeSnapshotGroupVO.class)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import org.zstack.header.storage.snapshot.VolumeSnapshotVO;
import org.zstack.header.storage.snapshot.VolumeSnapshotVO_;
import org.zstack.header.storage.snapshot.group.MemorySnapshotValidatorExtensionPoint;
import org.zstack.storage.snapshot.group.VolumeSnapshotGroupChecker;
import org.zstack.header.tag.SystemTagVO;
import org.zstack.header.vm.APICreateVmInstanceMsg;
import org.zstack.header.vm.DiskAO;
Expand Down Expand Up @@ -213,6 +214,8 @@ private void validate(APICreateVolumeSnapshotGroupMsg msg) {
throw new ApiMessageInterceptionException(argerr("volume[uuid:%s] is not root volume", msg.getRootVolumeUuid()));
}

checkIncompleteSnapshotGroupsOnVm(vmvo.getUuid(), "create new snapshot group");

if (msg.isWithMemory() && !(vmvo.getState().equals(VmInstanceState.Running) || (vmvo.getState().equals(VmInstanceState.Paused)))) {
throw new ApiMessageInterceptionException(argerr("Can not take memory snapshot, vm current state[%s], but expect state are [%s, %s]",
vmvo.getState().toString(), VmInstanceState.Running.toString(), VmInstanceState.Paused.toString()));
Expand Down Expand Up @@ -316,9 +319,13 @@ private void validate(APIDetachDataVolumeFromVmMsg msg) {
throw new ApiMessageInterceptionException(operr("the volume[uuid:%s, name:%s, type:%s] can't detach it",
vol.getUuid(), vol.getName(), vol.getType()));
}

String vmUuid = msg.getVmUuid() != null ? msg.getVmUuid() : vol.getVmInstanceUuid();
checkIncompleteSnapshotGroupsOnVm(vmUuid, "detach data volume");
}

private void validate(APIAttachDataVolumeToVmMsg msg) {
checkIncompleteSnapshotGroupsOnVm(msg.getVmInstanceUuid(), "attach data volume");
new SQLBatch() {
@Override
protected void scripts() {
Expand Down Expand Up @@ -691,6 +698,29 @@ public boolean start() {
return true;
}

/**
* Block VM-scoped operations when the VM has any incomplete snapshot group.
* An incomplete group is one whose refs are partially deleted (some snapshotDeleted=true,
* but at least one alive). Such groups must be cleaned up first to avoid pollution
* of subsequent group operations on this VM.
*
* Exempt operations: deleting an incomplete group itself (handled by
* {@code VolumeSnapshotGroupBase#handleDelete} which excludes self), single-snapshot
* deletion, and VM destroy (handled by VolumeSnapshotGroupCascadeExtension cleanup).
*/
private void checkIncompleteSnapshotGroupsOnVm(String vmUuid, String operationDesc) {
if (vmUuid == null) {
return;
}
List<String> incomplete = VolumeSnapshotGroupChecker.findIncompleteGroupsOnVm(vmUuid, null);
if (!incomplete.isEmpty()) {
throw new ApiMessageInterceptionException(operr(
"VM[uuid:%s] has incomplete snapshot group(s) %s, " +
"please clean them up first before %s",
vmUuid, incomplete, operationDesc));
}
}

@Override
public boolean stop() {
return true;
Expand Down