From 28c686af32956958b11ef292b722eb017925df7a Mon Sep 17 00:00:00 2001 From: TheFactoriousDROID Date: Fri, 6 Mar 2026 14:20:07 -0800 Subject: [PATCH 1/3] Add rfspec plugin: multi-model spec generation via /rfspec command Sends user prompt directly to Opus 4.6 (max), GPT-5.4 (xhigh), and Gemini 3.1 Pro (high) in parallel -- each at its maximum reasoning tier. Presents competing responses and lets the user pick or synthesize. Includes a skill that teaches the agent the full workflow: when to invoke rfspec, how to evaluate and compare results, how to synthesize, and where to save the final spec. --- .factory-plugin/marketplace.json | 6 ++ README.md | 8 ++ plugins/rfspec/.factory-plugin/plugin.json | 9 ++ plugins/rfspec/README.md | 32 +++++++ plugins/rfspec/commands/rfspec | 2 + plugins/rfspec/skills/rfspec/SKILL.md | 91 +++++++++++++++++++ .../rfspec/references/evaluation-guide.md | 38 ++++++++ .../rfspec/references/troubleshooting.md | 37 ++++++++ plugins/rfspec/skills/rfspec/scripts/run.sh | 89 ++++++++++++++++++ 9 files changed, 312 insertions(+) create mode 100644 plugins/rfspec/.factory-plugin/plugin.json create mode 100644 plugins/rfspec/README.md create mode 100755 plugins/rfspec/commands/rfspec create mode 100644 plugins/rfspec/skills/rfspec/SKILL.md create mode 100644 plugins/rfspec/skills/rfspec/references/evaluation-guide.md create mode 100644 plugins/rfspec/skills/rfspec/references/troubleshooting.md create mode 100755 plugins/rfspec/skills/rfspec/scripts/run.sh diff --git a/.factory-plugin/marketplace.json b/.factory-plugin/marketplace.json index e099ed2..96fa7a3 100644 --- a/.factory-plugin/marketplace.json +++ b/.factory-plugin/marketplace.json @@ -23,6 +23,12 @@ "description": "Core Skills for essential functionalities and integrations", "source": "./plugins/core", "category": "core" + }, + { + "name": "rfspec", + "description": "Request for Spec: fan out a prompt to multiple AI models in parallel and pick or synthesize the best implementation spec", + "source": "./plugins/rfspec", + "category": "productivity" } ] } diff --git a/README.md b/README.md index 5a572d6..28024d0 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,14 @@ Skills for continuous learning and improvement. - `frontend-design` - Build web apps, websites, HTML pages with good design - `browser-navigation` - Browser automation with agent-browser +### rfspec + +Fan out a prompt to multiple AI models in parallel and pick or synthesize the best result. + +**Skills:** + +- `rfspec` - Multi-model spec generation and synthesis workflow + ## Plugin Structure Each plugin follows the Factory plugin format: diff --git a/plugins/rfspec/.factory-plugin/plugin.json b/plugins/rfspec/.factory-plugin/plugin.json new file mode 100644 index 0000000..4280e39 --- /dev/null +++ b/plugins/rfspec/.factory-plugin/plugin.json @@ -0,0 +1,9 @@ +{ + "name": "rfspec", + "description": "Request for Spec: fan out a prompt to multiple AI models in parallel and pick or synthesize the best implementation spec", + "version": "1.0.0", + "author": { + "name": "Andy Taylor", + "email": "andrew.taylor@factory.ai" + } +} diff --git a/plugins/rfspec/README.md b/plugins/rfspec/README.md new file mode 100644 index 0000000..b892e20 --- /dev/null +++ b/plugins/rfspec/README.md @@ -0,0 +1,32 @@ +# rfspec + +Request for Spec -- fan out a prompt to multiple AI models in parallel and choose or synthesize the best result. + +## What it does + +`/rfspec` sends your prompt to three models simultaneously (Claude Opus 4.6, GPT-5.4, Gemini 3.1 Pro), each at its maximum reasoning tier. The results are presented side-by-side so you can pick the strongest one or synthesize a combination. No prescriptive system prompt is injected -- the models bring their own reasoning to your request. + +## Usage + +``` +/rfspec +``` + +Example: + +``` +/rfspec add a dark mode toggle to the settings page with persistent user preference +``` + +The command will: + +1. Send the prompt to all three models in parallel via `droid exec`, each at its maximum reasoning tier (Opus: max, GPT-5.4: xhigh, Gemini: high) +2. Collect and display each model's response (Options A, B, C) +3. Ask you to pick one as-is or synthesize the best parts +4. Save the chosen result to `specs/active/YYYY-MM-DD-.md` + +## Requirements + +- **droid CLI** -- must be installed and authenticated +- **jq** -- for JSON parsing (`brew install jq` on macOS) +- Access to at least one of the three models. Models that fail are skipped gracefully; the command only errors if all three fail. diff --git a/plugins/rfspec/commands/rfspec b/plugins/rfspec/commands/rfspec new file mode 100755 index 0000000..3528d67 --- /dev/null +++ b/plugins/rfspec/commands/rfspec @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +exec "$(dirname "$0")/../skills/rfspec/scripts/run.sh" "$@" diff --git a/plugins/rfspec/skills/rfspec/SKILL.md b/plugins/rfspec/skills/rfspec/SKILL.md new file mode 100644 index 0000000..094160e --- /dev/null +++ b/plugins/rfspec/skills/rfspec/SKILL.md @@ -0,0 +1,91 @@ +--- +name: rfspec +version: 1.2.0 +description: | + Multi-model spec generation and synthesis. Use when the user wants to: + - Get competing proposals from different AI models + - Compare approaches to a problem from different perspectives + - Synthesize the best parts of several proposals into one spec + Keywords: rfspec, competing specs, multi-model, compare approaches, + multiple perspectives, request for spec, fan out, model comparison. + NOT for: single-model generation, code review, or running tests. +--- + +# rfspec -- Request for Spec + +Fan out a prompt to multiple models, compare their responses, and help the user pick or synthesize the best result. + +## Quick Reference + +| Task | Action | +|------|--------| +| Generate competing specs | `/rfspec ` | +| Pick one result | Select via AskUser after comparison | +| Synthesize results | Combine strongest elements when user chooses synthesis | +| Save final spec | Write to `specs/active/YYYY-MM-DD-.md` | + +## Workflow + +1. Run `/rfspec ` -- fires parallel model calls, returns labeled options (A, B, C). +2. Evaluate the results -- see [references/evaluation-guide.md](references/evaluation-guide.md). +3. Present the choice to the user via AskUser. +4. Save the selected or synthesized result. + +## Saving + +Save the final result (picked or synthesized) to: + +``` +specs/active/YYYY-MM-DD-.md +``` + +Where `` is a short kebab-case name derived from the topic. + +## Pitfalls + +- Don't summarize each option individually -- compare them against each other. +- Don't concatenate when synthesizing -- resolve contradictions and produce a coherent document. +- If all options are rejected, gather feedback and re-run with a refined prompt. + +## Verification + +After saving a spec: + +1. Confirm the file exists at the expected path. +2. Verify it contains the selected or synthesized content. +3. Report the saved path to the user. + +## Examples + +Example 1: User wants competing specs +User says: "Get me specs from multiple models for adding a dark mode toggle" +Actions: + +1. Run `/rfspec add a dark mode toggle to the settings page with persistent user preference` +2. Read Options A, B, C +3. Compare: "Option A uses CSS variables with a React context, Option B uses Tailwind's dark class with localStorage, Option C uses a theme provider with system preference detection." +4. Present choice via AskUser +Result: User picks Option B, saved to `specs/active/2026-03-06-dark-mode-toggle.md` + +Example 2: User wants synthesis +User says: "rfspec this: refactor the auth module to use JWT" +Actions: + +1. Run `/rfspec refactor the auth module to use JWT` +2. Compare results, noting Option A has better token rotation but Option C has cleaner middleware +3. User selects "Synthesize" +4. Combine Option A's rotation logic with Option C's middleware structure +Result: Synthesized spec saved to `specs/active/2026-03-06-auth-jwt-refactor.md` + +Example 3: All options rejected +User says: "None of these work, they all miss the caching layer" +Actions: + +1. Ask what's missing -- user explains the Redis caching requirement +2. Offer to re-run: `/rfspec refactor auth module to use JWT with Redis session caching` +Result: New round of specs generated with caching addressed + +## References + +- [references/evaluation-guide.md](references/evaluation-guide.md) -- how to compare, synthesize, and handle rejection +- [references/troubleshooting.md](references/troubleshooting.md) -- error codes and fixes diff --git a/plugins/rfspec/skills/rfspec/references/evaluation-guide.md b/plugins/rfspec/skills/rfspec/references/evaluation-guide.md new file mode 100644 index 0000000..b5fce05 --- /dev/null +++ b/plugins/rfspec/skills/rfspec/references/evaluation-guide.md @@ -0,0 +1,38 @@ +# Evaluation Guide + +How to compare and evaluate competing model responses. + +## Comparing Options + +When results come back from `/rfspec`: + +1. **Read all options** before responding. Understand each approach fully. +2. **Identify meaningful differences** -- not cosmetic ones. Focus on: + - Architectural choices (patterns, libraries, data flow) + - Scope differences (what each included or excluded) + - Risk areas (where one flagged something the others missed) + - Concrete vs. vague (which named actual files, functions, steps) +3. **Write a brief comparison** -- 2-4 sentences per option covering strengths and gaps. Compare, don't summarize. +4. **Present the choice** using AskUser: + - Use Option A as-is + - Use Option B as-is + - Use Option C as-is + - Synthesize a refined version combining the best of all three + - None of these work + +## Synthesizing + +If the user picks synthesis: + +1. Start from the strongest option as the base. +2. Pull in specific elements from the others -- name what you're taking and why. +3. Resolve contradictions (don't concatenate). +4. The final result should read as a single coherent document, not a patchwork. + +## Handling Rejection + +If the user rejects all options: + +1. Ask what's missing or wrong. +2. Incorporate their feedback into a refined prompt. +3. Offer to re-run `/rfspec` with the updated prompt. diff --git a/plugins/rfspec/skills/rfspec/references/troubleshooting.md b/plugins/rfspec/skills/rfspec/references/troubleshooting.md new file mode 100644 index 0000000..b67675c --- /dev/null +++ b/plugins/rfspec/skills/rfspec/references/troubleshooting.md @@ -0,0 +1,37 @@ +# Troubleshooting + +## All three models failed + +``` +Error: All three models failed. Check that your droid CLI is authenticated... +``` + +**Cause:** droid CLI not authenticated or models unavailable. +**Solution:** Run `droid` interactively to verify auth, then retry. + +## jq not installed + +``` +Error: jq is required but not installed. +``` + +**Cause:** jq not on PATH. +**Solution:** `brew install jq` (macOS) or `apt-get install jq` (Linux). + +## One or two models failed + +``` +Note: The following models encountered errors: Opus 4.6 +``` + +**Cause:** Specific model unavailable or rate-limited. +**Solution:** This is handled gracefully -- compare the options that did return. No action needed unless the failed model was critical. + +## Command not found + +``` +/rfspec: command not found +``` + +**Cause:** Plugin not installed. +**Solution:** Install via `/plugins` UI or `droid plugin install rfspec@factory-plugins --scope user`. diff --git a/plugins/rfspec/skills/rfspec/scripts/run.sh b/plugins/rfspec/skills/rfspec/scripts/run.sh new file mode 100755 index 0000000..d912be3 --- /dev/null +++ b/plugins/rfspec/skills/rfspec/scripts/run.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ── guard: dependencies ────────────────────────────────────────────── +command -v jq >/dev/null 2>&1 || { echo "Error: jq is required but not installed. Install it with: brew install jq"; exit 1; } +command -v droid >/dev/null 2>&1 || { echo "Error: droid CLI is required but not found on PATH."; exit 1; } + +PROMPT="$*" + +if [ -z "$PROMPT" ]; then + echo "Usage: /rfspec " + echo "" + echo "Sends your prompt to three models in parallel (Opus, GPT, Gemini)," + echo "then lets you pick the best spec or synthesize a combination." + exit 1 +fi + +# ── prompt ──────────────────────────────────────────────────────────── +TMPDIR=$(mktemp -d) +trap 'rm -rf "$TMPDIR"' EXIT + +echo "$PROMPT" > "$TMPDIR/prompt.md" + +# ── models (id, label, max reasoning) ──────────────────────────────── +MODEL_A="claude-opus-4-6"; LABEL_A="Opus 4.6"; RE_A="max" +MODEL_B="gpt-5.4"; LABEL_B="GPT-5.4"; RE_B="xhigh" +MODEL_C="gemini-3.1-pro-preview"; LABEL_C="Gemini 3.1 Pro"; RE_C="high" + +# ── fire all three in parallel ─────────────────────────────────────── +droid exec -m "$MODEL_A" -r "$RE_A" -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/a.json" & +PID_A=$! +droid exec -m "$MODEL_B" -r "$RE_B" -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/b.json" & +PID_B=$! +droid exec -m "$MODEL_C" -r "$RE_C" -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/c.json" & +PID_C=$! + +FAIL="" +wait $PID_A 2>/dev/null || FAIL="${FAIL}${LABEL_A} " +wait $PID_B 2>/dev/null || FAIL="${FAIL}${LABEL_B} " +wait $PID_C 2>/dev/null || FAIL="${FAIL}${LABEL_C} " + +# ── extract results ────────────────────────────────────────────────── +extract() { + local file="$1" + if [ -s "$file" ]; then + jq -r '.result // empty' "$file" 2>/dev/null || cat "$file" + fi +} + +RESULT_A=$(extract "$TMPDIR/a.json") +RESULT_B=$(extract "$TMPDIR/b.json") +RESULT_C=$(extract "$TMPDIR/c.json") + +# ── present results ────────────────────────────────────────────────── +echo "=== RFSPEC RESULTS ===" +echo "" +echo "User request: ${PROMPT}" +echo "" + +[ -n "$RESULT_A" ] && printf '### Option A -- %s\n\n%s\n\n' "$LABEL_A" "$RESULT_A" +[ -n "$RESULT_B" ] && printf '### Option B -- %s\n\n%s\n\n' "$LABEL_B" "$RESULT_B" +[ -n "$RESULT_C" ] && printf '### Option C -- %s\n\n%s\n\n' "$LABEL_C" "$RESULT_C" + +if [ -n "$FAIL" ]; then + echo "Note: The following models encountered errors: ${FAIL}" + echo "" +fi + +SUCCESS=0 +[ -n "$RESULT_A" ] && SUCCESS=$((SUCCESS + 1)) +[ -n "$RESULT_B" ] && SUCCESS=$((SUCCESS + 1)) +[ -n "$RESULT_C" ] && SUCCESS=$((SUCCESS + 1)) + +if [ "$SUCCESS" -eq 0 ]; then + echo "Error: All three models failed. Check that your droid CLI is authenticated" + echo "and the models (${MODEL_A}, ${MODEL_B}, ${MODEL_C}) are available." + exit 1 +fi + +echo "=== AGENT INSTRUCTIONS ===" +echo "Analyze the specs above. Provide a brief comparison of each model's" +echo "strengths and weaknesses. Then use the AskUser tool to offer:" +echo "- Use Option A (${LABEL_A}) as-is" +echo "- Use Option B (${LABEL_B}) as-is" +echo "- Use Option C (${LABEL_C}) as-is" +echo "- Synthesize a refined spec combining the best of all three" +echo "- No -- none of these work (explain why)" +echo "If the user picks synthesis, combine the strongest elements and save" +echo "to specs/active/YYYY-MM-DD-.md. If rejected, gather feedback." From 6959152ca0f1d98c26dbb78398d45d673fd1a487 Mon Sep 17 00:00:00 2001 From: TheFactoriousDROID Date: Fri, 6 Mar 2026 16:05:51 -0800 Subject: [PATCH 2/3] fix: add --auto medium to droid exec and require ExitSpecMode before saving Background droid exec calls failed for Opus because no --auto flag was set, causing permission denial in non-interactive subprocesses. Also, the agent instructions allowed saving specs directly without user review -- now requires ExitSpecMode approval before writing to specs/active/. --- plugins/rfspec/skills/rfspec/SKILL.md | 6 ++++-- plugins/rfspec/skills/rfspec/scripts/run.sh | 14 +++++++++----- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/plugins/rfspec/skills/rfspec/SKILL.md b/plugins/rfspec/skills/rfspec/SKILL.md index 094160e..29b6cfd 100644 --- a/plugins/rfspec/skills/rfspec/SKILL.md +++ b/plugins/rfspec/skills/rfspec/SKILL.md @@ -29,11 +29,13 @@ Fan out a prompt to multiple models, compare their responses, and help the user 1. Run `/rfspec ` -- fires parallel model calls, returns labeled options (A, B, C). 2. Evaluate the results -- see [references/evaluation-guide.md](references/evaluation-guide.md). 3. Present the choice to the user via AskUser. -4. Save the selected or synthesized result. +4. Present the selected or synthesized spec via ExitSpecMode for user review. +5. Save to `specs/active/` only after the user approves in spec mode. ## Saving -Save the final result (picked or synthesized) to: +**Do not save immediately.** After the user picks or synthesis is complete, present the +final spec via ExitSpecMode for review. Only after approval, save to: ``` specs/active/YYYY-MM-DD-.md diff --git a/plugins/rfspec/skills/rfspec/scripts/run.sh b/plugins/rfspec/skills/rfspec/scripts/run.sh index d912be3..8bace58 100755 --- a/plugins/rfspec/skills/rfspec/scripts/run.sh +++ b/plugins/rfspec/skills/rfspec/scripts/run.sh @@ -27,11 +27,11 @@ MODEL_B="gpt-5.4"; LABEL_B="GPT-5.4"; RE_B="xhigh" MODEL_C="gemini-3.1-pro-preview"; LABEL_C="Gemini 3.1 Pro"; RE_C="high" # ── fire all three in parallel ─────────────────────────────────────── -droid exec -m "$MODEL_A" -r "$RE_A" -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/a.json" & +droid exec -m "$MODEL_A" -r "$RE_A" --auto medium -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/a.json" & PID_A=$! -droid exec -m "$MODEL_B" -r "$RE_B" -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/b.json" & +droid exec -m "$MODEL_B" -r "$RE_B" --auto medium -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/b.json" & PID_B=$! -droid exec -m "$MODEL_C" -r "$RE_C" -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/c.json" & +droid exec -m "$MODEL_C" -r "$RE_C" --auto medium -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/c.json" & PID_C=$! FAIL="" @@ -85,5 +85,9 @@ echo "- Use Option B (${LABEL_B}) as-is" echo "- Use Option C (${LABEL_C}) as-is" echo "- Synthesize a refined spec combining the best of all three" echo "- No -- none of these work (explain why)" -echo "If the user picks synthesis, combine the strongest elements and save" -echo "to specs/active/YYYY-MM-DD-.md. If rejected, gather feedback." +echo "" +echo "CRITICAL: Do NOT save the spec directly. After the user picks an option" +echo "or requests synthesis, use the ExitSpecMode tool to present the final" +echo "spec content for review. Only save to specs/active/YYYY-MM-DD-.md" +echo "AFTER the user approves the spec in spec mode. If rejected, gather" +echo "feedback and revise." From d007c4a90dce2cd9ed251c8bd48f0e4a9a4cf76d Mon Sep 17 00:00:00 2001 From: TheFactoriousDROID Date: Mon, 9 Mar 2026 20:11:55 -0700 Subject: [PATCH 3/3] fix(rfspec): persist results and support fire-and-forget polling The run.sh script spawns three droid exec calls that take several minutes, but the Execute tool times out at 60s. When that happens the temp dir self-destructs and results are lost. Changes: - Write model outputs to persistent ~/.factory/rfspec/runs// instead of a temp dir - Print RFSPEC_RUN_DIR path immediately so the agent captures it before timeout - Write a done sentinel (STATUS=complete|failed) for polling - Update SKILL.md (v1.3.0) with fire-and-forget + poll workflow instructions --- plugins/rfspec/commands/rfspec | 80 ++++++++++- plugins/rfspec/skills/rfspec/SKILL.md | 100 +++++++++++--- plugins/rfspec/skills/rfspec/scripts/run.sh | 142 ++++++++++++++------ 3 files changed, 259 insertions(+), 63 deletions(-) diff --git a/plugins/rfspec/commands/rfspec b/plugins/rfspec/commands/rfspec index 3528d67..56b158d 100755 --- a/plugins/rfspec/commands/rfspec +++ b/plugins/rfspec/commands/rfspec @@ -1,2 +1,80 @@ #!/usr/bin/env bash -exec "$(dirname "$0")/../skills/rfspec/scripts/run.sh" "$@" +# Launch rfspec in background and return polling instructions immediately. +# This avoids the Execute tool timeout killing the long-running model calls. + +SCRIPT_DIR="$(dirname "$0")" +RUN_SH="${SCRIPT_DIR}/../skills/rfspec/scripts/run.sh" + +if [ $# -eq 0 ]; then + exec "$RUN_SH" +fi + +# Run the script in background, capturing output to its own log. +# run.sh prints RFSPEC_RUN_DIR= as its first line, so we wait +# just long enough to capture that, then return control to the agent. +BGLOG=$(mktemp /tmp/rfspec-bg-XXXXXXXX) +nohup "$RUN_SH" "$@" >"$BGLOG" 2>&1 & +BG_PID=$! + +# Wait briefly for run.sh to create the output dir and print the path +sleep 2 + +# Extract the run dir from the early output +RUN_DIR=$(grep -m1 'RFSPEC_RUN_DIR=' "$BGLOG" 2>/dev/null | cut -d= -f2-) + +echo "User prompt: $*" +echo "" + +if [ -z "$RUN_DIR" ]; then + echo "rfspec launched (PID ${BG_PID}), but run dir not yet available." + echo "Check log: ${BGLOG}" +else + echo "RFSPEC_RUN_DIR=${RUN_DIR}" +fi + +echo "" +echo "rfspec is running in background (PID ${BG_PID})." +echo "Background log: ${BGLOG}" +echo "" +cat <<'WORKFLOW' +=== RFSPEC WORKFLOW === + +Three models (Opus, GPT-5.4, Gemini) are generating competing spec proposals. + +STEP 1: Tell the user the models are running and results will be ready +in a few minutes. + +STEP 2: Poll for completion every 30-60 seconds: +WORKFLOW +echo " cat ${RUN_DIR:-}/done 2>/dev/null || echo PENDING" +cat <<'WORKFLOW' + +STEP 3: When done, read the results file: +WORKFLOW +echo " Read: ${RUN_DIR:-}/results.md" +cat <<'WORKFLOW' + +STEP 4: EVALUATE -- compare the three specs against each other: + - Architectural choices (patterns, libraries, data flow) + - Scope differences (what each included or excluded) + - Concrete vs. vague (which named actual files, functions, steps) + - Risk areas (where one flagged something the others missed) +Write a 2-4 sentence comparison per option. Compare, don't summarize. + +STEP 5: PRESENT the choice using AskUser with these options: + - Use Option A as-is + - Use Option B as-is + - Use Option C as-is + - Synthesize a refined spec combining the best of all three + - None of these work + +STEP 6: FINALIZE based on user's choice: + - If user picks one option: present it via ExitSpecMode for review. + - If user picks synthesis: start from the strongest option as base, + pull specific elements from others (name what and why), resolve + contradictions. The result must be a single coherent document. + - If user rejects all: ask what's missing, refine prompt, re-run. + + Only save to specs/active/YYYY-MM-DD-.md AFTER user approves + the spec in spec mode. Do NOT save without approval. +WORKFLOW diff --git a/plugins/rfspec/skills/rfspec/SKILL.md b/plugins/rfspec/skills/rfspec/SKILL.md index 29b6cfd..d6c6ef1 100644 --- a/plugins/rfspec/skills/rfspec/SKILL.md +++ b/plugins/rfspec/skills/rfspec/SKILL.md @@ -1,6 +1,6 @@ --- name: rfspec -version: 1.2.0 +version: 1.3.0 description: | Multi-model spec generation and synthesis. Use when the user wants to: - Get competing proposals from different AI models @@ -17,20 +17,63 @@ Fan out a prompt to multiple models, compare their responses, and help the user ## Quick Reference -| Task | Action | -|------|--------| -| Generate competing specs | `/rfspec ` | -| Pick one result | Select via AskUser after comparison | -| Synthesize results | Combine strongest elements when user chooses synthesis | -| Save final spec | Write to `specs/active/YYYY-MM-DD-.md` | +| Task | Action | +| ------------------------ | ------------------------------------------------------ | +| Generate competing specs | `/rfspec ` (background) | +| Poll for results | Check `/done` sentinel | +| Pick one result | Select via AskUser after comparison | +| Synthesize results | Combine strongest elements when user chooses synthesis | +| Save final spec | Write to `specs/active/YYYY-MM-DD-.md` | ## Workflow -1. Run `/rfspec ` -- fires parallel model calls, returns labeled options (A, B, C). -2. Evaluate the results -- see [references/evaluation-guide.md](references/evaluation-guide.md). -3. Present the choice to the user via AskUser. -4. Present the selected or synthesized spec via ExitSpecMode for user review. -5. Save to `specs/active/` only after the user approves in spec mode. +The `/rfspec` command spawns three `droid exec` calls in parallel. These take +several minutes, far exceeding the Execute tool timeout. You MUST use the +fire-and-forget + poll pattern. + +### Step 1 -- Launch (background) + +Run the command with `fireAndForget=true`: + +``` +Execute: /rfspec + fireAndForget: true +``` + +The script immediately prints `RFSPEC_RUN_DIR=` to its log file. +Read the log file (path printed by Execute) to capture the run directory. + +### Step 2 -- Poll for completion + +Tell the user the models are running and you will check back. Then poll: + +``` +Execute: cat /done 2>/dev/null || echo "PENDING" +``` + +Poll every 30-60 seconds. The sentinel contains `STATUS=complete` or +`STATUS=failed`. While waiting, you can do other work or let the user know +progress. + +### Step 3 -- Read results + +Once `done` exists, read the results: + +``` +Read: /results.md +``` + +This file contains all three model outputs as markdown sections (Option A, B, C). + +### Step 4 -- Evaluate and present + +Evaluate the results -- see [references/evaluation-guide.md](references/evaluation-guide.md). +Present the choice to the user via AskUser. + +### Step 5 -- Finalize + +Present the selected or synthesized spec via ExitSpecMode for user review. +Save to `specs/active/` only after the user approves in spec mode. ## Saving @@ -43,6 +86,19 @@ specs/active/YYYY-MM-DD-.md Where `` is a short kebab-case name derived from the topic. +## Resuming from slash command + +If you are loading this skill after `/rfspec` already ran (the slash command told +you to invoke `Skill: rfspec`), you already have the run directory. Pick up from +Step 3: + +1. Read `/results.md` to get the model outputs. +2. Follow Step 4 (evaluate and present) and Step 5 (finalize) below. + +The `results.md` file includes embedded agent instructions as a fallback, but +prefer the full workflow in this document -- it covers the evaluation guide, +saving rules, and rejection handling that the embedded version omits. + ## Pitfalls - Don't summarize each option individually -- compare them against each other. @@ -63,21 +119,23 @@ Example 1: User wants competing specs User says: "Get me specs from multiple models for adding a dark mode toggle" Actions: -1. Run `/rfspec add a dark mode toggle to the settings page with persistent user preference` -2. Read Options A, B, C -3. Compare: "Option A uses CSS variables with a React context, Option B uses Tailwind's dark class with localStorage, Option C uses a theme provider with system preference detection." -4. Present choice via AskUser -Result: User picks Option B, saved to `specs/active/2026-03-06-dark-mode-toggle.md` +1. Execute `/rfspec add a dark mode toggle ...` with `fireAndForget=true` +2. Read the background log to get `RFSPEC_RUN_DIR` +3. Tell user: "Models are running, I'll check back shortly." +4. Poll `/done` until `STATUS=complete` +5. Read `/results.md`, compare Options A, B, C +6. Present choice via AskUser + Result: User picks Option B, saved to `specs/active/2026-03-06-dark-mode-toggle.md` Example 2: User wants synthesis User says: "rfspec this: refactor the auth module to use JWT" Actions: -1. Run `/rfspec refactor the auth module to use JWT` -2. Compare results, noting Option A has better token rotation but Option C has cleaner middleware +1. Launch background, poll for completion +2. Read results, compare -- Option A has better token rotation, Option C has cleaner middleware 3. User selects "Synthesize" 4. Combine Option A's rotation logic with Option C's middleware structure -Result: Synthesized spec saved to `specs/active/2026-03-06-auth-jwt-refactor.md` + Result: Synthesized spec saved to `specs/active/2026-03-06-auth-jwt-refactor.md` Example 3: All options rejected User says: "None of these work, they all miss the caching layer" @@ -85,7 +143,7 @@ Actions: 1. Ask what's missing -- user explains the Redis caching requirement 2. Offer to re-run: `/rfspec refactor auth module to use JWT with Redis session caching` -Result: New round of specs generated with caching addressed + Result: New round of specs generated with caching addressed ## References diff --git a/plugins/rfspec/skills/rfspec/scripts/run.sh b/plugins/rfspec/skills/rfspec/scripts/run.sh index 8bace58..f997356 100755 --- a/plugins/rfspec/skills/rfspec/scripts/run.sh +++ b/plugins/rfspec/skills/rfspec/scripts/run.sh @@ -2,8 +2,14 @@ set -euo pipefail # ── guard: dependencies ────────────────────────────────────────────── -command -v jq >/dev/null 2>&1 || { echo "Error: jq is required but not installed. Install it with: brew install jq"; exit 1; } -command -v droid >/dev/null 2>&1 || { echo "Error: droid CLI is required but not found on PATH."; exit 1; } +command -v jq >/dev/null 2>&1 || { + echo "Error: jq is required but not installed. Install it with: brew install jq" + exit 1 +} +command -v droid >/dev/null 2>&1 || { + echo "Error: droid CLI is required but not found on PATH." + exit 1 +} PROMPT="$*" @@ -15,23 +21,59 @@ if [ -z "$PROMPT" ]; then exit 1 fi -# ── prompt ──────────────────────────────────────────────────────────── +# ── persistent output directory ────────────────────────────────────── +# Results go to a stable path so the calling session can poll for them. +# The temp dir is only used for the prompt file passed to droid exec. +RFSPEC_HOME="${HOME}/.factory/rfspec/runs" +RUN_ID="$(date +%Y%m%d-%H%M%S)-$$" +OUTDIR="${RFSPEC_HOME}/${RUN_ID}" +mkdir -p "$OUTDIR" + TMPDIR=$(mktemp -d) trap 'rm -rf "$TMPDIR"' EXIT -echo "$PROMPT" > "$TMPDIR/prompt.md" +# Wrap the raw prompt with spec-generation framing so subagents produce +# a structured spec proposal, not code or casual analysis. +cat >"$TMPDIR/prompt.md" </dev/null > "$TMPDIR/a.json" & +droid exec -m "$MODEL_A" -r "$RE_A" --auto high -f "$TMPDIR/prompt.md" -o json 2>/dev/null >"$OUTDIR/a.json" & PID_A=$! -droid exec -m "$MODEL_B" -r "$RE_B" --auto medium -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/b.json" & +droid exec -m "$MODEL_B" -r "$RE_B" --auto high -f "$TMPDIR/prompt.md" -o json 2>/dev/null >"$OUTDIR/b.json" & PID_B=$! -droid exec -m "$MODEL_C" -r "$RE_C" --auto medium -f "$TMPDIR/prompt.md" -o json 2>/dev/null > "$TMPDIR/c.json" & +droid exec -m "$MODEL_C" -r "$RE_C" --auto high -f "$TMPDIR/prompt.md" -o json 2>/dev/null >"$OUTDIR/c.json" & PID_C=$! FAIL="" @@ -47,47 +89,65 @@ extract() { fi } -RESULT_A=$(extract "$TMPDIR/a.json") -RESULT_B=$(extract "$TMPDIR/b.json") -RESULT_C=$(extract "$TMPDIR/c.json") - -# ── present results ────────────────────────────────────────────────── -echo "=== RFSPEC RESULTS ===" -echo "" -echo "User request: ${PROMPT}" -echo "" - -[ -n "$RESULT_A" ] && printf '### Option A -- %s\n\n%s\n\n' "$LABEL_A" "$RESULT_A" -[ -n "$RESULT_B" ] && printf '### Option B -- %s\n\n%s\n\n' "$LABEL_B" "$RESULT_B" -[ -n "$RESULT_C" ] && printf '### Option C -- %s\n\n%s\n\n' "$LABEL_C" "$RESULT_C" - -if [ -n "$FAIL" ]; then - echo "Note: The following models encountered errors: ${FAIL}" - echo "" -fi +RESULT_A=$(extract "$OUTDIR/a.json") +RESULT_B=$(extract "$OUTDIR/b.json") +RESULT_C=$(extract "$OUTDIR/c.json") +# ── write results to persistent file ───────────────────────────────── SUCCESS=0 [ -n "$RESULT_A" ] && SUCCESS=$((SUCCESS + 1)) [ -n "$RESULT_B" ] && SUCCESS=$((SUCCESS + 1)) [ -n "$RESULT_C" ] && SUCCESS=$((SUCCESS + 1)) +{ + echo "# rfspec results" + echo "" + echo "User request: ${PROMPT}" + echo "" + + [ -n "$RESULT_A" ] && printf '## Option A -- %s\n\n%s\n\n' "$LABEL_A" "$RESULT_A" + [ -n "$RESULT_B" ] && printf '## Option B -- %s\n\n%s\n\n' "$LABEL_B" "$RESULT_B" + [ -n "$RESULT_C" ] && printf '## Option C -- %s\n\n%s\n\n' "$LABEL_C" "$RESULT_C" + + if [ -n "$FAIL" ]; then + echo "> **Note:** The following models encountered errors: ${FAIL}" + echo "" + fi + + if [ "$SUCCESS" -gt 0 ]; then + echo "---" + echo "" + echo "## Agent Instructions" + echo "" + echo "Analyze the specs above. Provide a brief comparison of each model's" + echo "strengths and weaknesses -- compare them against each other, not individually." + echo "Then use the AskUser tool to offer:" + echo "- Use Option A (${LABEL_A}) as-is" + echo "- Use Option B (${LABEL_B}) as-is" + echo "- Use Option C (${LABEL_C}) as-is" + echo "- Synthesize a refined spec combining the best of all three" + echo "- No -- none of these work (explain why)" + echo "" + echo "CRITICAL: Do NOT save the spec directly. After the user picks an option" + echo "or requests synthesis, use the ExitSpecMode tool to present the final" + echo "spec content for review. Only save to specs/active/YYYY-MM-DD-.md" + echo "AFTER the user approves the spec in spec mode. If rejected, gather" + echo "feedback and revise." + fi +} >"$OUTDIR/results.md" + +# ── also print to stdout (for cases where timeout is large enough) ─── +cat "$OUTDIR/results.md" + if [ "$SUCCESS" -eq 0 ]; then + echo "" echo "Error: All three models failed. Check that your droid CLI is authenticated" echo "and the models (${MODEL_A}, ${MODEL_B}, ${MODEL_C}) are available." + echo "STATUS=failed" >"$OUTDIR/done" exit 1 fi -echo "=== AGENT INSTRUCTIONS ===" -echo "Analyze the specs above. Provide a brief comparison of each model's" -echo "strengths and weaknesses. Then use the AskUser tool to offer:" -echo "- Use Option A (${LABEL_A}) as-is" -echo "- Use Option B (${LABEL_B}) as-is" -echo "- Use Option C (${LABEL_C}) as-is" -echo "- Synthesize a refined spec combining the best of all three" -echo "- No -- none of these work (explain why)" +# ── write completion sentinel ──────────────────────────────────────── +echo "STATUS=complete" >"$OUTDIR/done" echo "" -echo "CRITICAL: Do NOT save the spec directly. After the user picks an option" -echo "or requests synthesis, use the ExitSpecMode tool to present the final" -echo "spec content for review. Only save to specs/active/YYYY-MM-DD-.md" -echo "AFTER the user approves the spec in spec mode. If rejected, gather" -echo "feedback and revise." +echo "Results written to: ${OUTDIR}/results.md"