From 81f5dd20ca09fb2dbb356e33dcdf65bd5b4ec0ad Mon Sep 17 00:00:00 2001 From: jmsperu Date: Tue, 17 Mar 2026 22:33:45 +0300 Subject: [PATCH 1/4] Allow parallel execution of backup and delete commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change executeInSequence() to return false for TakeBackupCommand and DeleteBackupCommand, allowing the KVM agent to process multiple backup/delete operations concurrently via its worker thread pool. Previously, all backup commands were serialized — a large VM backup (e.g. 100+ GB taking 2+ hours) would block all other backup and delete operations on the same host. Since each backup mounts its own temporary NFS directory and operates on independent VM disks, there is no shared state requiring serialization. Restore and PrepareForBackupRestoration commands remain sequential as they modify VM state that should not be concurrent. Co-Authored-By: Claude Opus 4.6 --- .../java/org/apache/cloudstack/backup/DeleteBackupCommand.java | 2 +- .../java/org/apache/cloudstack/backup/TakeBackupCommand.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/apache/cloudstack/backup/DeleteBackupCommand.java b/core/src/main/java/org/apache/cloudstack/backup/DeleteBackupCommand.java index 16c611af998e..7a37463f7da4 100644 --- a/core/src/main/java/org/apache/cloudstack/backup/DeleteBackupCommand.java +++ b/core/src/main/java/org/apache/cloudstack/backup/DeleteBackupCommand.java @@ -71,6 +71,6 @@ public void setMountOptions(String mountOptions) { @Override public boolean executeInSequence() { - return true; + return false; } } diff --git a/core/src/main/java/org/apache/cloudstack/backup/TakeBackupCommand.java b/core/src/main/java/org/apache/cloudstack/backup/TakeBackupCommand.java index 93855ea17211..7ad08dbafae9 100644 --- a/core/src/main/java/org/apache/cloudstack/backup/TakeBackupCommand.java +++ b/core/src/main/java/org/apache/cloudstack/backup/TakeBackupCommand.java @@ -89,6 +89,6 @@ public void setVolumePaths(List volumePaths) { @Override public boolean executeInSequence() { - return true; + return false; } } From b2cd0bf02e03821728e65f5828769313a8380683 Mon Sep 17 00:00:00 2001 From: jmsperu Date: Sat, 23 May 2026 00:52:27 +0300 Subject: [PATCH 2/4] fix(backup): restrict parallel execution to TakeBackup only Revert DeleteBackupCommand to executeInSequence()=true per @abh1sar's review. Backup creation is safe to parallelize across VMs because each VM writes to its own NAS path. Backup deletion is not safe to parallelize the same way: incremental chains share parent qcow2 files on the NAS, and concurrent deletions of sibling or descendant nodes can race on the underlying file references and the chain metadata. Until that's modeled explicitly, keep DeleteBackup serialized. TakeBackup parallelization stands. --- .../java/org/apache/cloudstack/backup/DeleteBackupCommand.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/cloudstack/backup/DeleteBackupCommand.java b/core/src/main/java/org/apache/cloudstack/backup/DeleteBackupCommand.java index 7a37463f7da4..16c611af998e 100644 --- a/core/src/main/java/org/apache/cloudstack/backup/DeleteBackupCommand.java +++ b/core/src/main/java/org/apache/cloudstack/backup/DeleteBackupCommand.java @@ -71,6 +71,6 @@ public void setMountOptions(String mountOptions) { @Override public boolean executeInSequence() { - return false; + return true; } } From 4338c9cfa0cc675ac6ac505e250c42f2b03af999 Mon Sep 17 00:00:00 2001 From: jmsperu Date: Tue, 26 May 2026 21:22:10 +0300 Subject: [PATCH 3/4] ci: retrigger workflow (flaky/stale shards) From b166bfdec257c13ee7003d97608594df1f35566a Mon Sep 17 00:00:00 2001 From: jmsperu Date: Thu, 28 May 2026 16:09:01 +0300 Subject: [PATCH 4/4] docs(backup): comment on TakeBackupCommand.executeInSequence=false MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Daan's review on PR #12847, document inline why we return false from executeInSequence — the same reasoning previously only given in the PR thread. The comment explains the design intent (parallel dispatch with DeleteBackupCommand) and the safety case (per-VM on-NAS paths + per-script NFS mount lifecycle). --- .../org/apache/cloudstack/backup/TakeBackupCommand.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/core/src/main/java/org/apache/cloudstack/backup/TakeBackupCommand.java b/core/src/main/java/org/apache/cloudstack/backup/TakeBackupCommand.java index 7ad08dbafae9..a5500044f86e 100644 --- a/core/src/main/java/org/apache/cloudstack/backup/TakeBackupCommand.java +++ b/core/src/main/java/org/apache/cloudstack/backup/TakeBackupCommand.java @@ -89,6 +89,12 @@ public void setVolumePaths(List volumePaths) { @Override public boolean executeInSequence() { + // Returning false lets the KVM agent dispatch this command in parallel with + // other backup commands (notably DeleteBackupCommand). A long-running snapshot + // copy must not block a concurrent delete on a different VM's backup. Safe + // because each backup operates on its own on-NAS path (named after the VM + // uuid + timestamp) and the NFS mount lifecycle is per-script-invocation, + // not shared — so there is no path-level contention between parallel runs. return false; } }