From 640211051b88171ebf103b51227c35034b9c54d6 Mon Sep 17 00:00:00 2001
From: Raja <raja@dreadnode.io>
Date: Wed, 3 Jun 2026 22:32:14 +0000
Subject: [PATCH] fix: flush OTEL spans between sequential studies and fix
 transform defaults
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two bugs fixed:

1. OTEL span loss in multi-study workflows: When running N+1 transform
   comparisons, multi-attack campaigns, or category sweeps, only the
   first study's traces were reliably exported to the platform. The
   BatchSpanProcessor buffers spans on a background thread — later
   studies' spans were still in the buffer when dn.shutdown() raced
   against process teardown. Added explicit force_flush() between
   sequential assessment.run() calls in all 3 multi-study templates.

2. Unwanted baseline run: When user specified transforms (e.g. 'run
   crescendo with Telugu'), the agent was forced to set
   compare_transforms=true, creating an N+1 study with an unrequested
   baseline run. Changed default to compare_transforms=false — transforms
   are now applied directly. N+1 comparison only triggers when user
   explicitly asks to 'compare' or 'benchmark' transforms.

Bump version: 1.3.2 → 1.3.5
---
 .../agents/ai-red-teaming-agent.md            | 12 ++++++--
 capabilities/ai-red-teaming/capability.yaml   |  2 +-
 .../ai-red-teaming/scripts/attack_runner.py   | 30 +++++++++++++++++++
 3 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/capabilities/ai-red-teaming/agents/ai-red-teaming-agent.md b/capabilities/ai-red-teaming/agents/ai-red-teaming-agent.md
index b5ca529..c30e8b0 100644
--- a/capabilities/ai-red-teaming/agents/ai-red-teaming-agent.md
+++ b/capabilities/ai-red-teaming/agents/ai-red-teaming-agent.md
@@ -103,7 +103,7 @@ Complete requests that don't need clarification:
 5. Never report failure without first running diagnostic tools AND asking for clarification.
 
 **Parameter handling:**
-- When user specifies transforms (e.g. "using 3 transforms", "with base64, caesar, authority"), ALWAYS set `compare_transforms=true`. This produces N+1 runs (baseline + each transform). Set `compare_transforms=false` ONLY if the user explicitly says "bundle transforms" or "apply all together".
+- When user specifies transforms (e.g. "with base64", "using Telugu"), set `compare_transforms=false` by default. The transforms are applied to the attack — no baseline run is added. Only set `compare_transforms=true` if the user explicitly asks to "compare transforms", "benchmark transforms", "compare against baseline", or uses similar comparison language. `compare_transforms=true` creates N+1 runs (1 baseline without transforms + 1 per transform individually).
 - "max trials N", "N trials", "max_trials N", or "iterations N" → set `n_iterations=N`.
 - Pass the user's model name as `target_model` verbatim. The tool resolves aliases internally.
 
@@ -406,11 +406,17 @@ Use `generate_image_attack` when the user wants to attack a traditional ML model
 User: "run TAP on gpt-4o, goal: extract system prompt"
 → `generate_attack(attack_type="tap", goal="extract system prompt", target_model="gpt-4o")`
 
-### Transform Comparison
+### Transform (applied, no baseline)
+
+User: "run TAP with base64 and caesar transforms"
+→ `generate_attack(attack_type="tap", ..., transforms=["base64","caesar"], compare_transforms=false)`
+Transforms are applied to the attack. No baseline run.
+
+### Transform Comparison (explicit)
 
 User: "run TAP with base64, caesar, authority transforms, compare them"
 → `generate_attack(attack_type="tap", ..., transforms=["base64","caesar","authority"], compare_transforms=true)`
-This generates N+1 runs: 1 baseline + 1 per transform.
+This generates N+1 runs: 1 baseline + 1 per transform. Only when user asks to "compare".
 
 ### Campaign (multiple attacks)
 
diff --git a/capabilities/ai-red-teaming/capability.yaml b/capabilities/ai-red-teaming/capability.yaml
index c35dd9a..5d9afb7 100644
--- a/capabilities/ai-red-teaming/capability.yaml
+++ b/capabilities/ai-red-teaming/capability.yaml
@@ -1,6 +1,6 @@
 schema: 1
 name: ai-red-teaming
-version: "1.3.2"
+version: "1.3.5"
 description: >
   Probe the security and safety of AI applications, agents, and foundation models.
   Orchestrates adversarial attack workflows to discover vulnerabilities in LLMs,
diff --git a/capabilities/ai-red-teaming/scripts/attack_runner.py b/capabilities/ai-red-teaming/scripts/attack_runner.py
index cd9cb73..3d414c3 100644
--- a/capabilities/ai-red-teaming/scripts/attack_runner.py
+++ b/capabilities/ai-red-teaming/scripts/attack_runner.py
@@ -3059,6 +3059,16 @@ async def main():
                 print(f"--- end {{label}} ---")
                 sys.stdout.flush()
 
+                # Flush OTEL spans between studies so each study's traces
+                # are exported to the platform before starting the next one.
+                try:
+                    from dreadnode.app.main import DEFAULT_INSTANCE
+                    _provider = DEFAULT_INSTANCE._logfire._tracer_provider
+                    if hasattr(_provider, 'force_flush'):
+                        _provider.force_flush(timeout_millis=10_000)
+                except Exception:
+                    pass
+
             except Exception as e:
                 print(f"\\nERROR in study '{{label}}': {{e}}")
                 traceback.print_exc()
@@ -3144,6 +3154,16 @@ async def main():
             await assessment.run(_{var}_study)
             print(f"{canon} completed successfully")
             sys.stdout.flush()
+
+            # Flush OTEL spans between attacks so each attack's traces
+            # are exported to the platform before starting the next one.
+            try:
+                from dreadnode.app.main import DEFAULT_INSTANCE
+                _provider = DEFAULT_INSTANCE._logfire._tracer_provider
+                if hasattr(_provider, 'force_flush'):
+                    _provider.force_flush(timeout_millis=10_000)
+            except Exception:
+                pass
         except Exception as e:
             print(f"\\nERROR in {canon}: {{e}}")
             traceback.print_exc()
@@ -3420,6 +3440,16 @@ async def main():
                         print(f"completed")
                         sys.stdout.flush()
 
+                        # Flush OTEL spans between goals so each goal's traces
+                        # are exported to the platform before starting the next one.
+                        try:
+                            from dreadnode.app.main import DEFAULT_INSTANCE
+                            _provider = DEFAULT_INSTANCE._logfire._tracer_provider
+                            if hasattr(_provider, 'force_flush'):
+                                _provider.force_flush(timeout_millis=10_000)
+                        except Exception:
+                            pass
+
                     except Exception as e:
                         print(f"ERROR: {{e}}")
                         traceback.print_exc()