diff --git a/.factory-plugin/marketplace.json b/.factory-plugin/marketplace.json index e099ed2..96fa7a3 100644 --- a/.factory-plugin/marketplace.json +++ b/.factory-plugin/marketplace.json @@ -23,6 +23,12 @@ "description": "Core Skills for essential functionalities and integrations", "source": "./plugins/core", "category": "core" + }, + { + "name": "rfspec", + "description": "Request for Spec: fan out a prompt to multiple AI models in parallel and pick or synthesize the best implementation spec", + "source": "./plugins/rfspec", + "category": "productivity" } ] } diff --git a/README.md b/README.md index 5a572d6..28024d0 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,14 @@ Skills for continuous learning and improvement. - `frontend-design` - Build web apps, websites, HTML pages with good design - `browser-navigation` - Browser automation with agent-browser +### rfspec + +Fan out a prompt to multiple AI models in parallel and pick or synthesize the best result. + +**Skills:** + +- `rfspec` - Multi-model spec generation and synthesis workflow + ## Plugin Structure Each plugin follows the Factory plugin format: diff --git a/plugins/rfspec/.factory-plugin/plugin.json b/plugins/rfspec/.factory-plugin/plugin.json new file mode 100644 index 0000000..4280e39 --- /dev/null +++ b/plugins/rfspec/.factory-plugin/plugin.json @@ -0,0 +1,9 @@ +{ + "name": "rfspec", + "description": "Request for Spec: fan out a prompt to multiple AI models in parallel and pick or synthesize the best implementation spec", + "version": "1.0.0", + "author": { + "name": "Andy Taylor", + "email": "andrew.taylor@factory.ai" + } +} diff --git a/plugins/rfspec/README.md b/plugins/rfspec/README.md new file mode 100644 index 0000000..b892e20 --- /dev/null +++ b/plugins/rfspec/README.md @@ -0,0 +1,32 @@ +# rfspec + +Request for Spec -- fan out a prompt to multiple AI models in parallel and choose or synthesize the best result. + +## What it does + +`/rfspec` sends your prompt to three models simultaneously (Claude Opus 4.6, GPT-5.4, Gemini 3.1 Pro), each at its maximum reasoning tier. The results are presented side-by-side so you can pick the strongest one or synthesize a combination. No prescriptive system prompt is injected -- the models bring their own reasoning to your request. + +## Usage + +``` +/rfspec +``` + +Example: + +``` +/rfspec add a dark mode toggle to the settings page with persistent user preference +``` + +The command will: + +1. Send the prompt to all three models in parallel via `droid exec`, each at its maximum reasoning tier (Opus: max, GPT-5.4: xhigh, Gemini: high) +2. Collect and display each model's response (Options A, B, C) +3. Ask you to pick one as-is or synthesize the best parts +4. Save the chosen result to `specs/active/YYYY-MM-DD-.md` + +## Requirements + +- **droid CLI** -- must be installed and authenticated +- **jq** -- for JSON parsing (`brew install jq` on macOS) +- Access to at least one of the three models. Models that fail are skipped gracefully; the command only errors if all three fail. diff --git a/plugins/rfspec/commands/rfspec b/plugins/rfspec/commands/rfspec new file mode 100755 index 0000000..56b158d --- /dev/null +++ b/plugins/rfspec/commands/rfspec @@ -0,0 +1,80 @@ +#!/usr/bin/env bash +# Launch rfspec in background and return polling instructions immediately. +# This avoids the Execute tool timeout killing the long-running model calls. + +SCRIPT_DIR="$(dirname "$0")" +RUN_SH="${SCRIPT_DIR}/../skills/rfspec/scripts/run.sh" + +if [ $# -eq 0 ]; then + exec "$RUN_SH" +fi + +# Run the script in background, capturing output to its own log. +# run.sh prints RFSPEC_RUN_DIR= as its first line, so we wait +# just long enough to capture that, then return control to the agent. +BGLOG=$(mktemp /tmp/rfspec-bg-XXXXXXXX) +nohup "$RUN_SH" "$@" >"$BGLOG" 2>&1 & +BG_PID=$! + +# Wait briefly for run.sh to create the output dir and print the path +sleep 2 + +# Extract the run dir from the early output +RUN_DIR=$(grep -m1 'RFSPEC_RUN_DIR=' "$BGLOG" 2>/dev/null | cut -d= -f2-) + +echo "User prompt: $*" +echo "" + +if [ -z "$RUN_DIR" ]; then + echo "rfspec launched (PID ${BG_PID}), but run dir not yet available." + echo "Check log: ${BGLOG}" +else + echo "RFSPEC_RUN_DIR=${RUN_DIR}" +fi + +echo "" +echo "rfspec is running in background (PID ${BG_PID})." +echo "Background log: ${BGLOG}" +echo "" +cat <<'WORKFLOW' +=== RFSPEC WORKFLOW === + +Three models (Opus, GPT-5.4, Gemini) are generating competing spec proposals. + +STEP 1: Tell the user the models are running and results will be ready +in a few minutes. + +STEP 2: Poll for completion every 30-60 seconds: +WORKFLOW +echo " cat ${RUN_DIR:-}/done 2>/dev/null || echo PENDING" +cat <<'WORKFLOW' + +STEP 3: When done, read the results file: +WORKFLOW +echo " Read: ${RUN_DIR:-}/results.md" +cat <<'WORKFLOW' + +STEP 4: EVALUATE -- compare the three specs against each other: + - Architectural choices (patterns, libraries, data flow) + - Scope differences (what each included or excluded) + - Concrete vs. vague (which named actual files, functions, steps) + - Risk areas (where one flagged something the others missed) +Write a 2-4 sentence comparison per option. Compare, don't summarize. + +STEP 5: PRESENT the choice using AskUser with these options: + - Use Option A as-is + - Use Option B as-is + - Use Option C as-is + - Synthesize a refined spec combining the best of all three + - None of these work + +STEP 6: FINALIZE based on user's choice: + - If user picks one option: present it via ExitSpecMode for review. + - If user picks synthesis: start from the strongest option as base, + pull specific elements from others (name what and why), resolve + contradictions. The result must be a single coherent document. + - If user rejects all: ask what's missing, refine prompt, re-run. + + Only save to specs/active/YYYY-MM-DD-.md AFTER user approves + the spec in spec mode. Do NOT save without approval. +WORKFLOW diff --git a/plugins/rfspec/skills/rfspec/SKILL.md b/plugins/rfspec/skills/rfspec/SKILL.md new file mode 100644 index 0000000..d6c6ef1 --- /dev/null +++ b/plugins/rfspec/skills/rfspec/SKILL.md @@ -0,0 +1,151 @@ +--- +name: rfspec +version: 1.3.0 +description: | + Multi-model spec generation and synthesis. Use when the user wants to: + - Get competing proposals from different AI models + - Compare approaches to a problem from different perspectives + - Synthesize the best parts of several proposals into one spec + Keywords: rfspec, competing specs, multi-model, compare approaches, + multiple perspectives, request for spec, fan out, model comparison. + NOT for: single-model generation, code review, or running tests. +--- + +# rfspec -- Request for Spec + +Fan out a prompt to multiple models, compare their responses, and help the user pick or synthesize the best result. + +## Quick Reference + +| Task | Action | +| ------------------------ | ------------------------------------------------------ | +| Generate competing specs | `/rfspec ` (background) | +| Poll for results | Check `/done` sentinel | +| Pick one result | Select via AskUser after comparison | +| Synthesize results | Combine strongest elements when user chooses synthesis | +| Save final spec | Write to `specs/active/YYYY-MM-DD-.md` | + +## Workflow + +The `/rfspec` command spawns three `droid exec` calls in parallel. These take +several minutes, far exceeding the Execute tool timeout. You MUST use the +fire-and-forget + poll pattern. + +### Step 1 -- Launch (background) + +Run the command with `fireAndForget=true`: + +``` +Execute: /rfspec + fireAndForget: true +``` + +The script immediately prints `RFSPEC_RUN_DIR=` to its log file. +Read the log file (path printed by Execute) to capture the run directory. + +### Step 2 -- Poll for completion + +Tell the user the models are running and you will check back. Then poll: + +``` +Execute: cat /done 2>/dev/null || echo "PENDING" +``` + +Poll every 30-60 seconds. The sentinel contains `STATUS=complete` or +`STATUS=failed`. While waiting, you can do other work or let the user know +progress. + +### Step 3 -- Read results + +Once `done` exists, read the results: + +``` +Read: /results.md +``` + +This file contains all three model outputs as markdown sections (Option A, B, C). + +### Step 4 -- Evaluate and present + +Evaluate the results -- see [references/evaluation-guide.md](references/evaluation-guide.md). +Present the choice to the user via AskUser. + +### Step 5 -- Finalize + +Present the selected or synthesized spec via ExitSpecMode for user review. +Save to `specs/active/` only after the user approves in spec mode. + +## Saving + +**Do not save immediately.** After the user picks or synthesis is complete, present the +final spec via ExitSpecMode for review. Only after approval, save to: + +``` +specs/active/YYYY-MM-DD-.md +``` + +Where `` is a short kebab-case name derived from the topic. + +## Resuming from slash command + +If you are loading this skill after `/rfspec` already ran (the slash command told +you to invoke `Skill: rfspec`), you already have the run directory. Pick up from +Step 3: + +1. Read `/results.md` to get the model outputs. +2. Follow Step 4 (evaluate and present) and Step 5 (finalize) below. + +The `results.md` file includes embedded agent instructions as a fallback, but +prefer the full workflow in this document -- it covers the evaluation guide, +saving rules, and rejection handling that the embedded version omits. + +## Pitfalls + +- Don't summarize each option individually -- compare them against each other. +- Don't concatenate when synthesizing -- resolve contradictions and produce a coherent document. +- If all options are rejected, gather feedback and re-run with a refined prompt. + +## Verification + +After saving a spec: + +1. Confirm the file exists at the expected path. +2. Verify it contains the selected or synthesized content. +3. Report the saved path to the user. + +## Examples + +Example 1: User wants competing specs +User says: "Get me specs from multiple models for adding a dark mode toggle" +Actions: + +1. Execute `/rfspec add a dark mode toggle ...` with `fireAndForget=true` +2. Read the background log to get `RFSPEC_RUN_DIR` +3. Tell user: "Models are running, I'll check back shortly." +4. Poll `/done` until `STATUS=complete` +5. Read `/results.md`, compare Options A, B, C +6. Present choice via AskUser + Result: User picks Option B, saved to `specs/active/2026-03-06-dark-mode-toggle.md` + +Example 2: User wants synthesis +User says: "rfspec this: refactor the auth module to use JWT" +Actions: + +1. Launch background, poll for completion +2. Read results, compare -- Option A has better token rotation, Option C has cleaner middleware +3. User selects "Synthesize" +4. Combine Option A's rotation logic with Option C's middleware structure + Result: Synthesized spec saved to `specs/active/2026-03-06-auth-jwt-refactor.md` + +Example 3: All options rejected +User says: "None of these work, they all miss the caching layer" +Actions: + +1. Ask what's missing -- user explains the Redis caching requirement +2. Offer to re-run: `/rfspec refactor auth module to use JWT with Redis session caching` + Result: New round of specs generated with caching addressed + +## References + +- [references/evaluation-guide.md](references/evaluation-guide.md) -- how to compare, synthesize, and handle rejection +- [references/troubleshooting.md](references/troubleshooting.md) -- error codes and fixes diff --git a/plugins/rfspec/skills/rfspec/references/evaluation-guide.md b/plugins/rfspec/skills/rfspec/references/evaluation-guide.md new file mode 100644 index 0000000..b5fce05 --- /dev/null +++ b/plugins/rfspec/skills/rfspec/references/evaluation-guide.md @@ -0,0 +1,38 @@ +# Evaluation Guide + +How to compare and evaluate competing model responses. + +## Comparing Options + +When results come back from `/rfspec`: + +1. **Read all options** before responding. Understand each approach fully. +2. **Identify meaningful differences** -- not cosmetic ones. Focus on: + - Architectural choices (patterns, libraries, data flow) + - Scope differences (what each included or excluded) + - Risk areas (where one flagged something the others missed) + - Concrete vs. vague (which named actual files, functions, steps) +3. **Write a brief comparison** -- 2-4 sentences per option covering strengths and gaps. Compare, don't summarize. +4. **Present the choice** using AskUser: + - Use Option A as-is + - Use Option B as-is + - Use Option C as-is + - Synthesize a refined version combining the best of all three + - None of these work + +## Synthesizing + +If the user picks synthesis: + +1. Start from the strongest option as the base. +2. Pull in specific elements from the others -- name what you're taking and why. +3. Resolve contradictions (don't concatenate). +4. The final result should read as a single coherent document, not a patchwork. + +## Handling Rejection + +If the user rejects all options: + +1. Ask what's missing or wrong. +2. Incorporate their feedback into a refined prompt. +3. Offer to re-run `/rfspec` with the updated prompt. diff --git a/plugins/rfspec/skills/rfspec/references/troubleshooting.md b/plugins/rfspec/skills/rfspec/references/troubleshooting.md new file mode 100644 index 0000000..b67675c --- /dev/null +++ b/plugins/rfspec/skills/rfspec/references/troubleshooting.md @@ -0,0 +1,37 @@ +# Troubleshooting + +## All three models failed + +``` +Error: All three models failed. Check that your droid CLI is authenticated... +``` + +**Cause:** droid CLI not authenticated or models unavailable. +**Solution:** Run `droid` interactively to verify auth, then retry. + +## jq not installed + +``` +Error: jq is required but not installed. +``` + +**Cause:** jq not on PATH. +**Solution:** `brew install jq` (macOS) or `apt-get install jq` (Linux). + +## One or two models failed + +``` +Note: The following models encountered errors: Opus 4.6 +``` + +**Cause:** Specific model unavailable or rate-limited. +**Solution:** This is handled gracefully -- compare the options that did return. No action needed unless the failed model was critical. + +## Command not found + +``` +/rfspec: command not found +``` + +**Cause:** Plugin not installed. +**Solution:** Install via `/plugins` UI or `droid plugin install rfspec@factory-plugins --scope user`. diff --git a/plugins/rfspec/skills/rfspec/scripts/run.sh b/plugins/rfspec/skills/rfspec/scripts/run.sh new file mode 100755 index 0000000..f997356 --- /dev/null +++ b/plugins/rfspec/skills/rfspec/scripts/run.sh @@ -0,0 +1,153 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ── guard: dependencies ────────────────────────────────────────────── +command -v jq >/dev/null 2>&1 || { + echo "Error: jq is required but not installed. Install it with: brew install jq" + exit 1 +} +command -v droid >/dev/null 2>&1 || { + echo "Error: droid CLI is required but not found on PATH." + exit 1 +} + +PROMPT="$*" + +if [ -z "$PROMPT" ]; then + echo "Usage: /rfspec " + echo "" + echo "Sends your prompt to three models in parallel (Opus, GPT, Gemini)," + echo "then lets you pick the best spec or synthesize a combination." + exit 1 +fi + +# ── persistent output directory ────────────────────────────────────── +# Results go to a stable path so the calling session can poll for them. +# The temp dir is only used for the prompt file passed to droid exec. +RFSPEC_HOME="${HOME}/.factory/rfspec/runs" +RUN_ID="$(date +%Y%m%d-%H%M%S)-$$" +OUTDIR="${RFSPEC_HOME}/${RUN_ID}" +mkdir -p "$OUTDIR" + +TMPDIR=$(mktemp -d) +trap 'rm -rf "$TMPDIR"' EXIT + +# Wrap the raw prompt with spec-generation framing so subagents produce +# a structured spec proposal, not code or casual analysis. +cat >"$TMPDIR/prompt.md" </dev/null >"$OUTDIR/a.json" & +PID_A=$! +droid exec -m "$MODEL_B" -r "$RE_B" --auto high -f "$TMPDIR/prompt.md" -o json 2>/dev/null >"$OUTDIR/b.json" & +PID_B=$! +droid exec -m "$MODEL_C" -r "$RE_C" --auto high -f "$TMPDIR/prompt.md" -o json 2>/dev/null >"$OUTDIR/c.json" & +PID_C=$! + +FAIL="" +wait $PID_A 2>/dev/null || FAIL="${FAIL}${LABEL_A} " +wait $PID_B 2>/dev/null || FAIL="${FAIL}${LABEL_B} " +wait $PID_C 2>/dev/null || FAIL="${FAIL}${LABEL_C} " + +# ── extract results ────────────────────────────────────────────────── +extract() { + local file="$1" + if [ -s "$file" ]; then + jq -r '.result // empty' "$file" 2>/dev/null || cat "$file" + fi +} + +RESULT_A=$(extract "$OUTDIR/a.json") +RESULT_B=$(extract "$OUTDIR/b.json") +RESULT_C=$(extract "$OUTDIR/c.json") + +# ── write results to persistent file ───────────────────────────────── +SUCCESS=0 +[ -n "$RESULT_A" ] && SUCCESS=$((SUCCESS + 1)) +[ -n "$RESULT_B" ] && SUCCESS=$((SUCCESS + 1)) +[ -n "$RESULT_C" ] && SUCCESS=$((SUCCESS + 1)) + +{ + echo "# rfspec results" + echo "" + echo "User request: ${PROMPT}" + echo "" + + [ -n "$RESULT_A" ] && printf '## Option A -- %s\n\n%s\n\n' "$LABEL_A" "$RESULT_A" + [ -n "$RESULT_B" ] && printf '## Option B -- %s\n\n%s\n\n' "$LABEL_B" "$RESULT_B" + [ -n "$RESULT_C" ] && printf '## Option C -- %s\n\n%s\n\n' "$LABEL_C" "$RESULT_C" + + if [ -n "$FAIL" ]; then + echo "> **Note:** The following models encountered errors: ${FAIL}" + echo "" + fi + + if [ "$SUCCESS" -gt 0 ]; then + echo "---" + echo "" + echo "## Agent Instructions" + echo "" + echo "Analyze the specs above. Provide a brief comparison of each model's" + echo "strengths and weaknesses -- compare them against each other, not individually." + echo "Then use the AskUser tool to offer:" + echo "- Use Option A (${LABEL_A}) as-is" + echo "- Use Option B (${LABEL_B}) as-is" + echo "- Use Option C (${LABEL_C}) as-is" + echo "- Synthesize a refined spec combining the best of all three" + echo "- No -- none of these work (explain why)" + echo "" + echo "CRITICAL: Do NOT save the spec directly. After the user picks an option" + echo "or requests synthesis, use the ExitSpecMode tool to present the final" + echo "spec content for review. Only save to specs/active/YYYY-MM-DD-.md" + echo "AFTER the user approves the spec in spec mode. If rejected, gather" + echo "feedback and revise." + fi +} >"$OUTDIR/results.md" + +# ── also print to stdout (for cases where timeout is large enough) ─── +cat "$OUTDIR/results.md" + +if [ "$SUCCESS" -eq 0 ]; then + echo "" + echo "Error: All three models failed. Check that your droid CLI is authenticated" + echo "and the models (${MODEL_A}, ${MODEL_B}, ${MODEL_C}) are available." + echo "STATUS=failed" >"$OUTDIR/done" + exit 1 +fi + +# ── write completion sentinel ──────────────────────────────────────── +echo "STATUS=complete" >"$OUTDIR/done" +echo "" +echo "Results written to: ${OUTDIR}/results.md"