From 28c686af32956958b11ef292b722eb017925df7a Mon Sep 17 00:00:00 2001
From: TheFactoriousDROID <andrew.taylor@factory.ai>
Date: Fri, 6 Mar 2026 14:20:07 -0800
Subject: [PATCH 1/3] Add rfspec plugin: multi-model spec generation via
 /rfspec command

Sends user prompt directly to Opus 4.6 (max), GPT-5.4 (xhigh), and
Gemini 3.1 Pro (high) in parallel -- each at its maximum reasoning tier.
Presents competing responses and lets the user pick or synthesize.

Includes a skill that teaches the agent the full workflow: when to invoke
rfspec, how to evaluate and compare results, how to synthesize, and where
to save the final spec.
---
 .factory-plugin/marketplace.json              |  6 ++
 README.md                                     |  8 ++
 plugins/rfspec/.factory-plugin/plugin.json    |  9 ++
 plugins/rfspec/README.md                      | 32 +++++++
 plugins/rfspec/commands/rfspec                |  2 +
 plugins/rfspec/skills/rfspec/SKILL.md         | 91 +++++++++++++++++++
 .../rfspec/references/evaluation-guide.md     | 38 ++++++++
 .../rfspec/references/troubleshooting.md      | 37 ++++++++
 plugins/rfspec/skills/rfspec/scripts/run.sh   | 89 ++++++++++++++++++
 9 files changed, 312 insertions(+)
 create mode 100644 plugins/rfspec/.factory-plugin/plugin.json
 create mode 100644 plugins/rfspec/README.md
 create mode 100755 plugins/rfspec/commands/rfspec
 create mode 100644 plugins/rfspec/skills/rfspec/SKILL.md
 create mode 100644 plugins/rfspec/skills/rfspec/references/evaluation-guide.md
 create mode 100644 plugins/rfspec/skills/rfspec/references/troubleshooting.md
 create mode 100755 plugins/rfspec/skills/rfspec/scripts/run.sh
diff --git a/.factory-plugin/marketplace.json b/.factory-plugin/marketplace.json
index e099ed2..96fa7a3 100644
--- a/.factory-plugin/marketplace.json
+++ b/.factory-plugin/marketplace.json
@@ -23,6 +23,12 @@
       "description": "Core Skills for essential functionalities and integrations",
       "source": "./plugins/core",
       "category": "core"
+    },
+    {
+      "name": "rfspec",
+      "description": "Request for Spec: fan out a prompt to multiple AI models in parallel and pick or synthesize the best implementation spec",
+      "source": "./plugins/rfspec",
+      "category": "productivity"
     }
   ]
 }
diff --git a/README.md b/README.md
index 5a572d6..28024d0 100644
--- a/README.md
+++ b/README.md
@@ -48,6 +48,14 @@ Skills for continuous learning and improvement.
 - `frontend-design` - Build web apps, websites, HTML pages with good design
 - `browser-navigation` - Browser automation with agent-browser
 
+### rfspec
+
+Fan out a prompt to multiple AI models in parallel and pick or synthesize the best result.
+
+**Skills:**
+
+- `rfspec` - Multi-model spec generation and synthesis workflow
+
 ## Plugin Structure
 
 Each plugin follows the Factory plugin format:
diff --git a/plugins/rfspec/.factory-plugin/plugin.json b/plugins/rfspec/.factory-plugin/plugin.json
new file mode 100644
index 0000000..4280e39
--- /dev/null
+++ b/plugins/rfspec/.factory-plugin/plugin.json
@@ -0,0 +1,9 @@
+{
+  "name": "rfspec",
+  "description": "Request for Spec: fan out a prompt to multiple AI models in parallel and pick or synthesize the best implementation spec",
+  "version": "1.0.0",
+  "author": {
+    "name": "Andy Taylor",
+    "email": "andrew.taylor@factory.ai"
+  }
+}
diff --git a/plugins/rfspec/README.md b/plugins/rfspec/README.md
new file mode 100644
index 0000000..b892e20
--- /dev/null
+++ b/plugins/rfspec/README.md
@@ -0,0 +1,32 @@
+# rfspec
+
+Request for Spec -- fan out a prompt to multiple AI models in parallel and choose or synthesize the best result.
+
+## What it does
+
+`/rfspec` sends your prompt to three models simultaneously (Claude Opus 4.6, GPT-5.4, Gemini 3.1 Pro), each at its maximum reasoning tier. The results are presented side-by-side so you can pick the strongest one or synthesize a combination. No prescriptive system prompt is injected -- the models bring their own reasoning to your request.
+
+## Usage
+
+```
+/rfspec <describe what you want to build>
+```
+
+Example:
+
+```
+/rfspec add a dark mode toggle to the settings page with persistent user preference
+```
+
+The command will:
+
+1. Send the prompt to all three models in parallel via `droid exec`, each at its maximum reasoning tier (Opus: max, GPT-5.4: xhigh, Gemini: high)
+2. Collect and display each model's response (Options A, B, C)
+3. Ask you to pick one as-is or synthesize the best parts
+4. Save the chosen result to `specs/active/YYYY-MM-DD-<slug>.md`
+
+## Requirements
+
+- **droid CLI** -- must be installed and authenticated
+- **jq** -- for JSON parsing (`brew install jq` on macOS)
+- Access to at least one of the three models. Models that fail are skipped gracefully; the command only errors if all three fail.
diff --git a/plugins/rfspec/commands/rfspec b/plugins/rfspec/commands/rfspec
new file mode 100755
index 0000000..3528d67
--- /dev/null
+++ b/plugins/rfspec/commands/rfspec
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+exec "$(dirname "$0")/../skills/rfspec/scripts/run.sh" "$@"
diff --git a/plugins/rfspec/skills/rfspec/SKILL.md b/plugins/rfspec/skills/rfspec/SKILL.md
new file mode 100644
index 0000000..094160e
--- /dev/null
+++ b/plugins/rfspec/skills/rfspec/SKILL.md
@@ -0,0 +1,91 @@
+---
+name: rfspec
+version: 1.2.0
+description: |
+  Multi-model spec generation and synthesis. Use when the user wants to:
+  - Get competing proposals from different AI models
+  - Compare approaches to a problem from different perspectives
+  - Synthesize the best parts of several proposals into one spec
+  Keywords: rfspec, competing specs, multi-model, compare approaches,
+  multiple perspectives, request for spec, fan out, model comparison.
+  NOT for: single-model generation, code review, or running tests.
+---
+
+# rfspec -- Request for Spec
+
+Fan out a prompt to multiple models, compare their responses, and help the user pick or synthesize the best result.
+
+## Quick Reference
+
+| Task | Action |
+|------|--------|
+| Generate competing specs | `/rfspec <prompt>` |
+| Pick one result | Select via AskUser after comparison |
+| Synthesize results | Combine strongest elements when user chooses synthesis |
+| Save final spec | Write to `specs/active/YYYY-MM-DD-<slug>.md` |
+
+## Workflow
+
+1. Run `/rfspec <user's prompt>` -- fires parallel model calls, returns labeled options (A, B, C).
+2. Evaluate the results -- see [references/evaluation-guide.md](references/evaluation-guide.md).
+3. Present the choice to the user via AskUser.
+4. Save the selected or synthesized result.
+
+## Saving
+
+Save the final result (picked or synthesized) to:
+
+```
+specs/active/YYYY-MM-DD-<slug>.md
+```
+
+Where `<slug>` is a short kebab-case name derived from the topic.
+
+## Pitfalls
+
+- Don't summarize each option individually -- compare them against each other.
+- Don't concatenate when synthesizing -- resolve contradictions and produce a coherent document.
+- If all options are rejected, gather feedback and re-run with a refined prompt.
+
+## Verification
+
+After saving a spec:
+
+1. Confirm the file exists at the expected path.
+2. Verify it contains the selected or synthesized content.
+3. Report the saved path to the user.
+
+## Examples
+
+Example 1: User wants competing specs
+User says: "Get me specs from multiple models for adding a dark mode toggle"
+Actions:
+
+1. Run `/rfspec add a dark mode toggle to the settings page with persistent user preference`
+2. Read Options A, B, C
+3. Compare: "Option A uses CSS variables with a React context, Option B uses Tailwind's dark class with localStorage, Option C uses a theme provider with system preference detection."
+4. Present choice via AskUser
+Result: User picks Option B, saved to `specs/active/2026-03-06-dark-mode-toggle.md`
+
+Example 2: User wants synthesis
+User says: "rfspec this: refactor the auth module to use JWT"
+Actions:
+
+1. Run `/rfspec refactor the auth module to use JWT`
+2. Compare results, noting Option A has better token rotation but Option C has cleaner middleware
+3. User selects "Synthesize"
+4. Combine Option A's rotation logic with Option C's middleware structure
+Result: Synthesized spec saved to `specs/active/2026-03-06-auth-jwt-refactor.md`
+
+Example 3: All options rejected
+User says: "None of these work, they all miss the caching layer"
+Actions:
+
+1. Ask what's missing -- user explains the Redis caching requirement
+2. Offer to re-run: `/rfspec refactor auth module to use JWT with Redis session caching`
+Result: New round of specs generated with caching addressed
+
+## References
+
+- [references/evaluation-guide.md](references/evaluation-guide.md) -- how to compare, synthesize, and handle rejection
+- [references/troubleshooting.md](references/troubleshooting.md) -- error codes and fixes
diff --git a/plugins/rfspec/skills/rfspec/references/evaluation-guide.md b/plugins/rfspec/skills/rfspec/references/evaluation-guide.md
new file mode 100644
index 0000000..b5fce05
--- /dev/null
+++ b/plugins/rfspec/skills/rfspec/references/evaluation-guide.md
@@ -0,0 +1,38 @@
+# Evaluation Guide
+
+How to compare and evaluate competing model responses.
+
+## Comparing Options
+
+When results come back from `/rfspec`:
+
+1. **Read all options** before responding. Understand each approach fully.
+2. **Identify meaningful differences** -- not cosmetic ones. Focus on:
+   - Architectural choices (patterns, libraries, data flow)
+   - Scope differences (what each included or excluded)
+   - Risk areas (where one flagged something the others missed)
+   - Concrete vs. vague (which named actual files, functions, steps)
+3. **Write a brief comparison** -- 2-4 sentences per option covering strengths and gaps. Compare, don't summarize.
+4. **Present the choice** using AskUser:
+   - Use Option A as-is
+   - Use Option B as-is
+   - Use Option C as-is
+   - Synthesize a refined version combining the best of all three
+   - None of these work
+
+## Synthesizing
+
+If the user picks synthesis:
+
+1. Start from the strongest option as the base.
+2. Pull in specific elements from the others -- name what you're taking and why.
+3. Resolve contradictions (don't concatenate).
+4. The final result should read as a single coherent document, not a patchwork.
+
+## Handling Rejection
+
+If the user rejects all options:
+
+1. Ask what's missing or wrong.
+2. Incorporate their feedback into a refined prompt.
+3. Offer to re-run `/rfspec` with the updated prompt.
diff --git a/plugins/rfspec/skills/rfspec/references/troubleshooting.md b/plugins/rfspec/skills/rfspec/references/troubleshooting.md
new file mode 100644
index 0000000..b67675c
--- /dev/null
+++ b/plugins/rfspec/skills/rfspec/references/troubleshooting.md
@@ -0,0 +1,37 @@
+# Troubleshooting
+
+## All three models failed
+
+```
+Error: All three models failed. Check that your droid CLI is authenticated...
+```
+
+**Cause:** droid CLI not authenticated or models unavailable.
+**Solution:** Run `droid` interactively to verify auth, then retry.
+
+## jq not installed
+
+```
+Error: jq is required but not installed.
+```
+
+**Cause:** jq not on PATH.
+**Solution:** `brew install jq` (macOS) or `apt-get install jq` (Linux).
+
+## One or two models failed
+
+```
+Note: The following models encountered errors: Opus 4.6
+```
+
+**Cause:** Specific model unavailable or rate-limited.
+**Solution:** This is handled gracefully -- compare the options that did return. No action needed unless the failed model was critical.
+
+## Command not found
+
+```
+/rfspec: command not found
+```
+
+**Cause:** Plugin not installed.
+**Solution:** Install via `/plugins` UI or `droid plugin install rfspec@factory-plugins --scope user`.
diff --git a/plugins/rfspec/skills/rfspec/scripts/run.sh b/plugins/rfspec/skills/rfspec/scripts/run.sh
new file mode 100755
index 0000000..d912be3
--- /dev/null
+++ b/plugins/rfspec/skills/rfspec/scripts/run.sh
@@ -0,0 +1,89 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# ── guard: dependencies ──────────────────────────────────────────────
+command -v jq  >/dev/null 2>&1 || { echo "Error: jq is required but not installed. Install it with: brew install jq"; exit 1; }
+command -v droid >/dev/null 2>&1 || { echo "Error: droid CLI is required but not found on PATH."; exit 1; }
+
+PROMPT="$*"
+
+if [ -z "$PROMPT" ]; then
+  echo "Usage: /rfspec <your prompt>"
+  echo ""
+  echo "Sends your prompt to three models in parallel (Opus, GPT, Gemini),"
+  echo "then lets you pick the best spec or synthesize a combination."
+  exit 1
+fi
+
+# ── prompt ────────────────────────────────────────────────────────────
+TMPDIR=$(mktemp -d)
+trap 'rm -rf "$TMPDIR"' EXIT
+
+echo "$PROMPT" > "$TMPDIR/prompt.md"
+
+# ── models (id, label, max reasoning) ────────────────────────────────
+MODEL_A="claude-opus-4-6";  LABEL_A="Opus 4.6";       RE_A="max"
+MODEL_B="gpt-5.4";          LABEL_B="GPT-5.4";        RE_B="xhigh"
+MODEL_C="gemini-3.1-pro-preview"; LABEL_C="Gemini 3.1 Pro"; RE_C="high"
+
+# ── fire all three in parallel ───────────────────────────────────────
+droid exec -m "$MODEL_A" -r "$RE_A" -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/a.json" &
+PID_A=$!
+droid exec -m "$MODEL_B" -r "$RE_B" -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/b.json" &
+PID_B=$!
+droid exec -m "$MODEL_C" -r "$RE_C" -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/c.json" &
+PID_C=$!
+
+FAIL=""
+wait $PID_A 2>/dev/null || FAIL="${FAIL}${LABEL_A} "
+wait $PID_B 2>/dev/null || FAIL="${FAIL}${LABEL_B} "
+wait $PID_C 2>/dev/null || FAIL="${FAIL}${LABEL_C} "
+
+# ── extract results ──────────────────────────────────────────────────
+extract() {
+  local file="$1"
+  if [ -s "$file" ]; then
+    jq -r '.result // empty' "$file" 2>/dev/null || cat "$file"
+  fi
+}
+
+RESULT_A=$(extract "$TMPDIR/a.json")
+RESULT_B=$(extract "$TMPDIR/b.json")
+RESULT_C=$(extract "$TMPDIR/c.json")
+
+# ── present results ──────────────────────────────────────────────────
+echo "=== RFSPEC RESULTS ==="
+echo ""
+echo "User request: ${PROMPT}"
+echo ""
+
+[ -n "$RESULT_A" ] && printf '### Option A -- %s\n\n%s\n\n' "$LABEL_A" "$RESULT_A"
+[ -n "$RESULT_B" ] && printf '### Option B -- %s\n\n%s\n\n' "$LABEL_B" "$RESULT_B"
+[ -n "$RESULT_C" ] && printf '### Option C -- %s\n\n%s\n\n' "$LABEL_C" "$RESULT_C"
+
+if [ -n "$FAIL" ]; then
+  echo "Note: The following models encountered errors: ${FAIL}"
+  echo ""
+fi
+
+SUCCESS=0
+[ -n "$RESULT_A" ] && SUCCESS=$((SUCCESS + 1))
+[ -n "$RESULT_B" ] && SUCCESS=$((SUCCESS + 1))
+[ -n "$RESULT_C" ] && SUCCESS=$((SUCCESS + 1))
+
+if [ "$SUCCESS" -eq 0 ]; then
+  echo "Error: All three models failed. Check that your droid CLI is authenticated"
+  echo "and the models (${MODEL_A}, ${MODEL_B}, ${MODEL_C}) are available."
+  exit 1
+fi
+
+echo "=== AGENT INSTRUCTIONS ==="
+echo "Analyze the specs above. Provide a brief comparison of each model's"
+echo "strengths and weaknesses. Then use the AskUser tool to offer:"
+echo "- Use Option A (${LABEL_A}) as-is"
+echo "- Use Option B (${LABEL_B}) as-is"
+echo "- Use Option C (${LABEL_C}) as-is"
+echo "- Synthesize a refined spec combining the best of all three"
+echo "- No -- none of these work (explain why)"
+echo "If the user picks synthesis, combine the strongest elements and save"
+echo "to specs/active/YYYY-MM-DD-<slug>.md. If rejected, gather feedback."

From 6959152ca0f1d98c26dbb78398d45d673fd1a487 Mon Sep 17 00:00:00 2001
From: TheFactoriousDROID <andrew.taylor@factory.ai>
Date: Fri, 6 Mar 2026 16:05:51 -0800
Subject: [PATCH 2/3] fix: add --auto medium to droid exec and require
 ExitSpecMode before saving

Background droid exec calls failed for Opus because no --auto flag was set,
causing permission denial in non-interactive subprocesses. Also, the agent
instructions allowed saving specs directly without user review -- now requires
ExitSpecMode approval before writing to specs/active/.
---
 plugins/rfspec/skills/rfspec/SKILL.md       |  6 ++++--
 plugins/rfspec/skills/rfspec/scripts/run.sh | 14 +++++++++-----
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/plugins/rfspec/skills/rfspec/SKILL.md b/plugins/rfspec/skills/rfspec/SKILL.md
index 094160e..29b6cfd 100644
--- a/plugins/rfspec/skills/rfspec/SKILL.md
+++ b/plugins/rfspec/skills/rfspec/SKILL.md
@@ -29,11 +29,13 @@ Fan out a prompt to multiple models, compare their responses, and help the user
 1. Run `/rfspec <user's prompt>` -- fires parallel model calls, returns labeled options (A, B, C).
 2. Evaluate the results -- see [references/evaluation-guide.md](references/evaluation-guide.md).
 3. Present the choice to the user via AskUser.
-4. Save the selected or synthesized result.
+4. Present the selected or synthesized spec via ExitSpecMode for user review.
+5. Save to `specs/active/` only after the user approves in spec mode.
 
 ## Saving
 
-Save the final result (picked or synthesized) to:
+**Do not save immediately.** After the user picks or synthesis is complete, present the
+final spec via ExitSpecMode for review. Only after approval, save to:
 
 ```
 specs/active/YYYY-MM-DD-<slug>.md
diff --git a/plugins/rfspec/skills/rfspec/scripts/run.sh b/plugins/rfspec/skills/rfspec/scripts/run.sh
index d912be3..8bace58 100755
--- a/plugins/rfspec/skills/rfspec/scripts/run.sh
+++ b/plugins/rfspec/skills/rfspec/scripts/run.sh
@@ -27,11 +27,11 @@ MODEL_B="gpt-5.4";          LABEL_B="GPT-5.4";        RE_B="xhigh"
 MODEL_C="gemini-3.1-pro-preview"; LABEL_C="Gemini 3.1 Pro"; RE_C="high"
 
 # ── fire all three in parallel ───────────────────────────────────────
-droid exec -m "$MODEL_A" -r "$RE_A" -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/a.json" &
+droid exec -m "$MODEL_A" -r "$RE_A" --auto medium -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/a.json" &
 PID_A=$!
-droid exec -m "$MODEL_B" -r "$RE_B" -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/b.json" &
+droid exec -m "$MODEL_B" -r "$RE_B" --auto medium -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/b.json" &
 PID_B=$!
-droid exec -m "$MODEL_C" -r "$RE_C" -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/c.json" &
+droid exec -m "$MODEL_C" -r "$RE_C" --auto medium -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/c.json" &
 PID_C=$!
 
 FAIL=""
@@ -85,5 +85,9 @@ echo "- Use Option B (${LABEL_B}) as-is"
 echo "- Use Option C (${LABEL_C}) as-is"
 echo "- Synthesize a refined spec combining the best of all three"
 echo "- No -- none of these work (explain why)"
-echo "If the user picks synthesis, combine the strongest elements and save"
-echo "to specs/active/YYYY-MM-DD-<slug>.md. If rejected, gather feedback."
+echo ""
+echo "CRITICAL: Do NOT save the spec directly. After the user picks an option"
+echo "or requests synthesis, use the ExitSpecMode tool to present the final"
+echo "spec content for review. Only save to specs/active/YYYY-MM-DD-<slug>.md"
+echo "AFTER the user approves the spec in spec mode. If rejected, gather"
+echo "feedback and revise."

From d007c4a90dce2cd9ed251c8bd48f0e4a9a4cf76d Mon Sep 17 00:00:00 2001
From: TheFactoriousDROID <andrew.taylor@factory.ai>
Date: Mon, 9 Mar 2026 20:11:55 -0700
Subject: [PATCH 3/3] fix(rfspec): persist results and support fire-and-forget
 polling

The run.sh script spawns three droid exec calls that take several minutes,
but the Execute tool times out at 60s. When that happens the temp dir
self-destructs and results are lost.

Changes:
- Write model outputs to persistent ~/.factory/rfspec/runs/<id>/ instead of
  a temp dir
- Print RFSPEC_RUN_DIR path immediately so the agent captures it before timeout
- Write a done sentinel (STATUS=complete|failed) for polling
- Update SKILL.md (v1.3.0) with fire-and-forget + poll workflow instructions
---
 plugins/rfspec/commands/rfspec              |  80 ++++++++++-
 plugins/rfspec/skills/rfspec/SKILL.md       | 100 +++++++++++---
 plugins/rfspec/skills/rfspec/scripts/run.sh | 142 ++++++++++++++------
 3 files changed, 259 insertions(+), 63 deletions(-)

diff --git a/plugins/rfspec/commands/rfspec b/plugins/rfspec/commands/rfspec
index 3528d67..56b158d 100755
--- a/plugins/rfspec/commands/rfspec
+++ b/plugins/rfspec/commands/rfspec
@@ -1,2 +1,80 @@
 #!/usr/bin/env bash
-exec "$(dirname "$0")/../skills/rfspec/scripts/run.sh" "$@"
+# Launch rfspec in background and return polling instructions immediately.
+# This avoids the Execute tool timeout killing the long-running model calls.
+
+SCRIPT_DIR="$(dirname "$0")"
+RUN_SH="${SCRIPT_DIR}/../skills/rfspec/scripts/run.sh"
+
+if [ $# -eq 0 ]; then
+  exec "$RUN_SH"
+fi
+
+# Run the script in background, capturing output to its own log.
+# run.sh prints RFSPEC_RUN_DIR=<path> as its first line, so we wait
+# just long enough to capture that, then return control to the agent.
+BGLOG=$(mktemp /tmp/rfspec-bg-XXXXXXXX)
+nohup "$RUN_SH" "$@" >"$BGLOG" 2>&1 &
+BG_PID=$!
+
+# Wait briefly for run.sh to create the output dir and print the path
+sleep 2
+
+# Extract the run dir from the early output
+RUN_DIR=$(grep -m1 'RFSPEC_RUN_DIR=' "$BGLOG" 2>/dev/null | cut -d= -f2-)
+
+echo "User prompt: $*"
+echo ""
+
+if [ -z "$RUN_DIR" ]; then
+  echo "rfspec launched (PID ${BG_PID}), but run dir not yet available."
+  echo "Check log: ${BGLOG}"
+else
+  echo "RFSPEC_RUN_DIR=${RUN_DIR}"
+fi
+
+echo ""
+echo "rfspec is running in background (PID ${BG_PID})."
+echo "Background log: ${BGLOG}"
+echo ""
+cat <<'WORKFLOW'
+=== RFSPEC WORKFLOW ===
+
+Three models (Opus, GPT-5.4, Gemini) are generating competing spec proposals.
+
+STEP 1: Tell the user the models are running and results will be ready
+in a few minutes.
+
+STEP 2: Poll for completion every 30-60 seconds:
+WORKFLOW
+echo "  cat ${RUN_DIR:-<run_dir>}/done 2>/dev/null || echo PENDING"
+cat <<'WORKFLOW'
+
+STEP 3: When done, read the results file:
+WORKFLOW
+echo "  Read: ${RUN_DIR:-<run_dir>}/results.md"
+cat <<'WORKFLOW'
+
+STEP 4: EVALUATE -- compare the three specs against each other:
+  - Architectural choices (patterns, libraries, data flow)
+  - Scope differences (what each included or excluded)
+  - Concrete vs. vague (which named actual files, functions, steps)
+  - Risk areas (where one flagged something the others missed)
+Write a 2-4 sentence comparison per option. Compare, don't summarize.
+
+STEP 5: PRESENT the choice using AskUser with these options:
+  - Use Option A as-is
+  - Use Option B as-is
+  - Use Option C as-is
+  - Synthesize a refined spec combining the best of all three
+  - None of these work
+
+STEP 6: FINALIZE based on user's choice:
+  - If user picks one option: present it via ExitSpecMode for review.
+  - If user picks synthesis: start from the strongest option as base,
+    pull specific elements from others (name what and why), resolve
+    contradictions. The result must be a single coherent document.
+  - If user rejects all: ask what's missing, refine prompt, re-run.
+
+  Only save to specs/active/YYYY-MM-DD-<slug>.md AFTER user approves
+  the spec in spec mode. Do NOT save without approval.
+WORKFLOW
diff --git a/plugins/rfspec/skills/rfspec/SKILL.md b/plugins/rfspec/skills/rfspec/SKILL.md
index 29b6cfd..d6c6ef1 100644
--- a/plugins/rfspec/skills/rfspec/SKILL.md
+++ b/plugins/rfspec/skills/rfspec/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: rfspec
-version: 1.2.0
+version: 1.3.0
 description: |
   Multi-model spec generation and synthesis. Use when the user wants to:
   - Get competing proposals from different AI models
@@ -17,20 +17,63 @@ Fan out a prompt to multiple models, compare their responses, and help the user
 
 ## Quick Reference
 
-| Task | Action |
-|------|--------|
-| Generate competing specs | `/rfspec <prompt>` |
-| Pick one result | Select via AskUser after comparison |
-| Synthesize results | Combine strongest elements when user chooses synthesis |
-| Save final spec | Write to `specs/active/YYYY-MM-DD-<slug>.md` |
+| Task                     | Action                                                 |
+| ------------------------ | ------------------------------------------------------ |
+| Generate competing specs | `/rfspec <prompt>` (background)                        |
+| Poll for results         | Check `<run_dir>/done` sentinel                        |
+| Pick one result          | Select via AskUser after comparison                    |
+| Synthesize results       | Combine strongest elements when user chooses synthesis |
+| Save final spec          | Write to `specs/active/YYYY-MM-DD-<slug>.md`           |
 
 ## Workflow
 
-1. Run `/rfspec <user's prompt>` -- fires parallel model calls, returns labeled options (A, B, C).
-2. Evaluate the results -- see [references/evaluation-guide.md](references/evaluation-guide.md).
-3. Present the choice to the user via AskUser.
-4. Present the selected or synthesized spec via ExitSpecMode for user review.
-5. Save to `specs/active/` only after the user approves in spec mode.
+The `/rfspec` command spawns three `droid exec` calls in parallel. These take
+several minutes, far exceeding the Execute tool timeout. You MUST use the
+fire-and-forget + poll pattern.
+
+### Step 1 -- Launch (background)
+
+Run the command with `fireAndForget=true`:
+
+```
+Execute: /rfspec <user's prompt>
+  fireAndForget: true
+```
+
+The script immediately prints `RFSPEC_RUN_DIR=<path>` to its log file.
+Read the log file (path printed by Execute) to capture the run directory.
+
+### Step 2 -- Poll for completion
+
+Tell the user the models are running and you will check back. Then poll:
+
+```
+Execute: cat <run_dir>/done 2>/dev/null || echo "PENDING"
+```
+
+Poll every 30-60 seconds. The sentinel contains `STATUS=complete` or
+`STATUS=failed`. While waiting, you can do other work or let the user know
+progress.
+
+### Step 3 -- Read results
+
+Once `done` exists, read the results:
+
+```
+Read: <run_dir>/results.md
+```
+
+This file contains all three model outputs as markdown sections (Option A, B, C).
+
+### Step 4 -- Evaluate and present
+
+Evaluate the results -- see [references/evaluation-guide.md](references/evaluation-guide.md).
+Present the choice to the user via AskUser.
+
+### Step 5 -- Finalize
+
+Present the selected or synthesized spec via ExitSpecMode for user review.
+Save to `specs/active/` only after the user approves in spec mode.
 
 ## Saving
 
@@ -43,6 +86,19 @@ specs/active/YYYY-MM-DD-<slug>.md
 
 Where `<slug>` is a short kebab-case name derived from the topic.
 
+## Resuming from slash command
+
+If you are loading this skill after `/rfspec` already ran (the slash command told
+you to invoke `Skill: rfspec`), you already have the run directory. Pick up from
+Step 3:
+
+1. Read `<run_dir>/results.md` to get the model outputs.
+2. Follow Step 4 (evaluate and present) and Step 5 (finalize) below.
+
+The `results.md` file includes embedded agent instructions as a fallback, but
+prefer the full workflow in this document -- it covers the evaluation guide,
+saving rules, and rejection handling that the embedded version omits.
+
 ## Pitfalls
 
 - Don't summarize each option individually -- compare them against each other.
@@ -63,21 +119,23 @@ Example 1: User wants competing specs
 User says: "Get me specs from multiple models for adding a dark mode toggle"
 Actions:
 
-1. Run `/rfspec add a dark mode toggle to the settings page with persistent user preference`
-2. Read Options A, B, C
-3. Compare: "Option A uses CSS variables with a React context, Option B uses Tailwind's dark class with localStorage, Option C uses a theme provider with system preference detection."
-4. Present choice via AskUser
-Result: User picks Option B, saved to `specs/active/2026-03-06-dark-mode-toggle.md`
+1. Execute `/rfspec add a dark mode toggle ...` with `fireAndForget=true`
+2. Read the background log to get `RFSPEC_RUN_DIR`
+3. Tell user: "Models are running, I'll check back shortly."
+4. Poll `<run_dir>/done` until `STATUS=complete`
+5. Read `<run_dir>/results.md`, compare Options A, B, C
+6. Present choice via AskUser
+   Result: User picks Option B, saved to `specs/active/2026-03-06-dark-mode-toggle.md`
 
 Example 2: User wants synthesis
 User says: "rfspec this: refactor the auth module to use JWT"
 Actions:
 
-1. Run `/rfspec refactor the auth module to use JWT`
-2. Compare results, noting Option A has better token rotation but Option C has cleaner middleware
+1. Launch background, poll for completion
+2. Read results, compare -- Option A has better token rotation, Option C has cleaner middleware
 3. User selects "Synthesize"
 4. Combine Option A's rotation logic with Option C's middleware structure
-Result: Synthesized spec saved to `specs/active/2026-03-06-auth-jwt-refactor.md`
+   Result: Synthesized spec saved to `specs/active/2026-03-06-auth-jwt-refactor.md`
 
 Example 3: All options rejected
 User says: "None of these work, they all miss the caching layer"
@@ -85,7 +143,7 @@ Actions:
 
 1. Ask what's missing -- user explains the Redis caching requirement
 2. Offer to re-run: `/rfspec refactor auth module to use JWT with Redis session caching`
-Result: New round of specs generated with caching addressed
+   Result: New round of specs generated with caching addressed
 
 ## References
 
diff --git a/plugins/rfspec/skills/rfspec/scripts/run.sh b/plugins/rfspec/skills/rfspec/scripts/run.sh
index 8bace58..f997356 100755
--- a/plugins/rfspec/skills/rfspec/scripts/run.sh
+++ b/plugins/rfspec/skills/rfspec/scripts/run.sh
@@ -2,8 +2,14 @@
 set -euo pipefail
 
 # ── guard: dependencies ──────────────────────────────────────────────
-command -v jq  >/dev/null 2>&1 || { echo "Error: jq is required but not installed. Install it with: brew install jq"; exit 1; }
-command -v droid >/dev/null 2>&1 || { echo "Error: droid CLI is required but not found on PATH."; exit 1; }
+command -v jq >/dev/null 2>&1 || {
+  echo "Error: jq is required but not installed. Install it with: brew install jq"
+  exit 1
+}
+command -v droid >/dev/null 2>&1 || {
+  echo "Error: droid CLI is required but not found on PATH."
+  exit 1
+}
 
 PROMPT="$*"
 
@@ -15,23 +21,59 @@ if [ -z "$PROMPT" ]; then
   exit 1
 fi
 
-# ── prompt ────────────────────────────────────────────────────────────
+# ── persistent output directory ──────────────────────────────────────
+# Results go to a stable path so the calling session can poll for them.
+# The temp dir is only used for the prompt file passed to droid exec.
+RFSPEC_HOME="${HOME}/.factory/rfspec/runs"
+RUN_ID="$(date +%Y%m%d-%H%M%S)-$$"
+OUTDIR="${RFSPEC_HOME}/${RUN_ID}"
+mkdir -p "$OUTDIR"
+
 TMPDIR=$(mktemp -d)
 trap 'rm -rf "$TMPDIR"' EXIT
 
-echo "$PROMPT" > "$TMPDIR/prompt.md"
+# Wrap the raw prompt with spec-generation framing so subagents produce
+# a structured spec proposal, not code or casual analysis.
+cat >"$TMPDIR/prompt.md" <<SPEC_FRAME
+You are generating a structured implementation spec. Do NOT write code.
+
+Produce a spec document with these sections:
+- **Objective**: What this achieves (1-2 sentences)
+- **Context**: Relevant background, constraints, prior art
+- **Plan**: Concrete implementation steps with file paths and approach
+- **Validation**: How to verify the implementation works
+- **Risks / Open Questions**: What could go wrong, what needs clarification
+
+Be specific -- name files, functions, libraries, and patterns. Avoid vague hand-waving.
+
+---
+
+${PROMPT}
+SPEC_FRAME
+cp "$TMPDIR/prompt.md" "$OUTDIR/prompt.md"
+
+# Print the output path IMMEDIATELY so the calling agent can capture it
+# even if the Execute call times out before the models finish.
+echo "RFSPEC_RUN_DIR=${OUTDIR}"
+echo "Firing three model calls in parallel. Poll ${OUTDIR}/results.md for output."
 
 # ── models (id, label, max reasoning) ────────────────────────────────
-MODEL_A="claude-opus-4-6";  LABEL_A="Opus 4.6";       RE_A="max"
-MODEL_B="gpt-5.4";          LABEL_B="GPT-5.4";        RE_B="xhigh"
-MODEL_C="gemini-3.1-pro-preview"; LABEL_C="Gemini 3.1 Pro"; RE_C="high"
+MODEL_A="claude-opus-4-6"
+LABEL_A="Opus 4.6"
+RE_A="max"
+MODEL_B="gpt-5.4"
+LABEL_B="GPT-5.4"
+RE_B="xhigh"
+MODEL_C="gemini-3.1-pro-preview"
+LABEL_C="Gemini 3.1 Pro"
+RE_C="high"
 
 # ── fire all three in parallel ───────────────────────────────────────
-droid exec -m "$MODEL_A" -r "$RE_A" --auto medium -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/a.json" &
+droid exec -m "$MODEL_A" -r "$RE_A" --auto high -f "$TMPDIR/prompt.md" -o json 2>/dev/null >"$OUTDIR/a.json" &
 PID_A=$!
-droid exec -m "$MODEL_B" -r "$RE_B" --auto medium -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/b.json" &
+droid exec -m "$MODEL_B" -r "$RE_B" --auto high -f "$TMPDIR/prompt.md" -o json 2>/dev/null >"$OUTDIR/b.json" &
 PID_B=$!
-droid exec -m "$MODEL_C" -r "$RE_C" --auto medium -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/c.json" &
+droid exec -m "$MODEL_C" -r "$RE_C" --auto high -f "$TMPDIR/prompt.md" -o json 2>/dev/null >"$OUTDIR/c.json" &
 PID_C=$!
 
 FAIL=""
@@ -47,47 +89,65 @@ extract() {
   fi
 }
 
-RESULT_A=$(extract "$TMPDIR/a.json")
-RESULT_B=$(extract "$TMPDIR/b.json")
-RESULT_C=$(extract "$TMPDIR/c.json")
-
-# ── present results ──────────────────────────────────────────────────
-echo "=== RFSPEC RESULTS ==="
-echo ""
-echo "User request: ${PROMPT}"
-echo ""
-
-[ -n "$RESULT_A" ] && printf '### Option A -- %s\n\n%s\n\n' "$LABEL_A" "$RESULT_A"
-[ -n "$RESULT_B" ] && printf '### Option B -- %s\n\n%s\n\n' "$LABEL_B" "$RESULT_B"
-[ -n "$RESULT_C" ] && printf '### Option C -- %s\n\n%s\n\n' "$LABEL_C" "$RESULT_C"
-
-if [ -n "$FAIL" ]; then
-  echo "Note: The following models encountered errors: ${FAIL}"
-  echo ""
-fi
+RESULT_A=$(extract "$OUTDIR/a.json")
+RESULT_B=$(extract "$OUTDIR/b.json")
+RESULT_C=$(extract "$OUTDIR/c.json")
 
+# ── write results to persistent file ─────────────────────────────────
 SUCCESS=0
 [ -n "$RESULT_A" ] && SUCCESS=$((SUCCESS + 1))
 [ -n "$RESULT_B" ] && SUCCESS=$((SUCCESS + 1))
 [ -n "$RESULT_C" ] && SUCCESS=$((SUCCESS + 1))
 
+{
+  echo "# rfspec results"
+  echo ""
+  echo "User request: ${PROMPT}"
+  echo ""
+
+  [ -n "$RESULT_A" ] && printf '## Option A -- %s\n\n%s\n\n' "$LABEL_A" "$RESULT_A"
+  [ -n "$RESULT_B" ] && printf '## Option B -- %s\n\n%s\n\n' "$LABEL_B" "$RESULT_B"
+  [ -n "$RESULT_C" ] && printf '## Option C -- %s\n\n%s\n\n' "$LABEL_C" "$RESULT_C"
+
+  if [ -n "$FAIL" ]; then
+    echo "> **Note:** The following models encountered errors: ${FAIL}"
+    echo ""
+  fi
+
+  if [ "$SUCCESS" -gt 0 ]; then
+    echo "---"
+    echo ""
+    echo "## Agent Instructions"
+    echo ""
+    echo "Analyze the specs above. Provide a brief comparison of each model's"
+    echo "strengths and weaknesses -- compare them against each other, not individually."
+    echo "Then use the AskUser tool to offer:"
+    echo "- Use Option A (${LABEL_A}) as-is"
+    echo "- Use Option B (${LABEL_B}) as-is"
+    echo "- Use Option C (${LABEL_C}) as-is"
+    echo "- Synthesize a refined spec combining the best of all three"
+    echo "- No -- none of these work (explain why)"
+    echo ""
+    echo "CRITICAL: Do NOT save the spec directly. After the user picks an option"
+    echo "or requests synthesis, use the ExitSpecMode tool to present the final"
+    echo "spec content for review. Only save to specs/active/YYYY-MM-DD-<slug>.md"
+    echo "AFTER the user approves the spec in spec mode. If rejected, gather"
+    echo "feedback and revise."
+  fi
+} >"$OUTDIR/results.md"
+
+# ── also print to stdout (for cases where timeout is large enough) ───
+cat "$OUTDIR/results.md"
+
 if [ "$SUCCESS" -eq 0 ]; then
+  echo ""
   echo "Error: All three models failed. Check that your droid CLI is authenticated"
   echo "and the models (${MODEL_A}, ${MODEL_B}, ${MODEL_C}) are available."
+  echo "STATUS=failed" >"$OUTDIR/done"
   exit 1
 fi
 
-echo "=== AGENT INSTRUCTIONS ==="
-echo "Analyze the specs above. Provide a brief comparison of each model's"
-echo "strengths and weaknesses. Then use the AskUser tool to offer:"
-echo "- Use Option A (${LABEL_A}) as-is"
-echo "- Use Option B (${LABEL_B}) as-is"
-echo "- Use Option C (${LABEL_C}) as-is"
-echo "- Synthesize a refined spec combining the best of all three"
-echo "- No -- none of these work (explain why)"
+# ── write completion sentinel ────────────────────────────────────────
+echo "STATUS=complete" >"$OUTDIR/done"
 echo ""
-echo "CRITICAL: Do NOT save the spec directly. After the user picks an option"
-echo "or requests synthesis, use the ExitSpecMode tool to present the final"
-echo "spec content for review. Only save to specs/active/YYYY-MM-DD-<slug>.md"
-echo "AFTER the user approves the spec in spec mode. If rejected, gather"
-echo "feedback and revise."
+echo "Results written to: ${OUTDIR}/results.md"