From 6b3dc8dd67b10560a881ffd5e3d1910a53448115 Mon Sep 17 00:00:00 2001
From: Muhammad Ubaid Raza <mubaidr@gmail.com>
Date: Wed, 13 May 2026 15:55:01 +0500
Subject: [PATCH 1/3] feat: add explicit assumption rule and confidence metric
 to agent documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add `confidence` field (0‑1) to the output schema in `agents/gem-browser-tester.agent.md`
- Include `confidence` in the `extra` object of `agents/gem-devops.agent.md`
- Append the guideline “State assumptions explicitly; never guess silently” to all agent docs
- Update the “Bisect (Complex Only)” heading to reflect its gate condition
- Minor wording and formatting adjustments across the affected agent documents
---
 agents/gem-browser-tester.agent.md          |  2 ++
 agents/gem-code-simplifier.agent.md         |  3 +++
 agents/gem-critic.agent.md                  |  1 +
 agents/gem-debugger.agent.md                | 11 +++++---
 agents/gem-designer-mobile.agent.md         |  3 +++
 agents/gem-designer.agent.md                |  3 +++
 agents/gem-devops.agent.md                  |  7 +++++-
 agents/gem-documentation-writer.agent.md    |  4 +++
 agents/gem-implementer-mobile.agent.md      |  4 +++
 agents/gem-implementer.agent.md             |  6 ++++-
 agents/gem-mobile-tester.agent.md           |  2 ++
 agents/gem-orchestrator.agent.md            | 28 ++++++++++++++-------
 agents/gem-planner.agent.md                 |  8 ++++--
 agents/gem-researcher.agent.md              | 14 ++++++++---
 agents/gem-reviewer.agent.md                |  8 +++---
 plugins/gem-team/.github/plugin/plugin.json |  3 ++-
 plugins/gem-team/README.md                  | 10 +++-----
 17 files changed, 86 insertions(+), 31 deletions(-)

diff --git a/agents/gem-browser-tester.agent.md b/agents/gem-browser-tester.agent.md
index ddca369c2..0f0002293 100644
--- a/agents/gem-browser-tester.agent.md
+++ b/agents/gem-browser-tester.agent.md
@@ -208,6 +208,7 @@ Use `${fixtures.field.path}` for variable interpolation.
     "flaky_tests": ["scenario_id"],
     "failures": [{ "type": "string", "criteria": "string", "details": "string", "flow_id": "string", "scenario": "string", "step_index": "number", "evidence": ["string"] }],
     "flow_results": [{ "flow_id": "string", "status": "passed|failed", "steps_completed": "number", "steps_total": "number", "duration_ms": "number" }],
+    "confidence": "number (0-1)",
   },
 }
 ```
@@ -240,6 +241,7 @@ Use `${fixtures.field.path}` for variable interpolation.
 - NEVER fail without re-taking snapshot on element not found
 - NEVER use SPEC-based accessibility validation
 - Always use established library/framework patterns
+- State assumptions explicitly; never guess silently
 
 ### I/O Optimization
 
diff --git a/agents/gem-code-simplifier.agent.md b/agents/gem-code-simplifier.agent.md
index caff6bf2e..2c1361a43 100644
--- a/agents/gem-code-simplifier.agent.md
+++ b/agents/gem-code-simplifier.agent.md
@@ -227,6 +227,9 @@ Return JSON per `Output Format`
 - MUST verify tests pass after every change
 - Use existing tech stack. Preserve patterns — don't introduce new abstractions.
 - Always use established library/framework patterns
+- State assumptions explicitly; never guess silently
+- Minimum code, nothing speculative
+- Surgical changes, don't refactor adjacent code
 
 ### I/O Optimization
 
diff --git a/agents/gem-critic.agent.md b/agents/gem-critic.agent.md
index 267a61880..ded09aef2 100644
--- a/agents/gem-critic.agent.md
+++ b/agents/gem-critic.agent.md
@@ -189,6 +189,7 @@ Return JSON per `Output Format`
 - ALWAYS offer alternatives — never just criticize.
 - Use project's existing tech stack. Challenge mismatches.
 - Always use established library/framework patterns
+- State assumptions explicitly; never guess silently
 
 ### I/O Optimization
 
diff --git a/agents/gem-debugger.agent.md b/agents/gem-debugger.agent.md
index 114069dfb..292da13f9 100644
--- a/agents/gem-debugger.agent.md
+++ b/agents/gem-debugger.agent.md
@@ -113,13 +113,15 @@ DEBUGGER. Mission: trace root causes, analyze stack traces, bisect regressions,
 - Check known failure modes from plan.yaml
 - Identify anti-patterns causing this error type
 
-### 4. Bisect (Complex Only)
+### 4. Bisect (Complex Only) (Gate: stack trace + git blame insufficient)
 
 #### 4.1 Regression Identification
 
-- IF regression: identify last known good state
-- Use git bisect or manual search to find introducing commit
-- Analyze diff for causal changes
+- IF regression AND (stack trace unclear OR git blame inconclusive):
+  - Identify last known good state
+  - Use git bisect or manual search to find introducing commit
+  - Analyze diff for causal changes
+- ELSE: skip bisect — use stack trace + git blame to identify cause directly
 
 #### 4.2 Interaction Analysis
 
@@ -323,6 +325,7 @@ Return JSON per `Output Format`
 - NEVER implement fixes — only diagnose and recommend
 - Cite sources for every claim
 - Always use established library/framework patterns
+- State assumptions explicitly; never guess silently
 
 ### I/O Optimization
 
diff --git a/agents/gem-designer-mobile.agent.md b/agents/gem-designer-mobile.agent.md
index 4782db527..c3554a822 100644
--- a/agents/gem-designer-mobile.agent.md
+++ b/agents/gem-designer-mobile.agent.md
@@ -366,6 +366,9 @@ Return JSON per `Output Format`
 - For patterns: Component architecture, state management, responsive patterns
 - Use project's existing tech stack. No new styling solutions.
 - Always use established library/framework patterns
+- State assumptions explicitly; never guess silently
+- Minimum code, nothing speculative
+- Surgical changes, don't refactor adjacent code
 
 ### I/O Optimization
 
diff --git a/agents/gem-designer.agent.md b/agents/gem-designer.agent.md
index e24539eaf..15995d5f6 100644
--- a/agents/gem-designer.agent.md
+++ b/agents/gem-designer.agent.md
@@ -305,6 +305,9 @@ Return JSON per `Output Format`
 - For patterns: Use component architecture, state management, responsive patterns
 - Use project's existing tech stack. No new styling solutions.
 - Always use established library/framework patterns
+- State assumptions explicitly; never guess silently
+- Minimum code, nothing speculative
+- Surgical changes, don't refactor adjacent code
 
 ### I/O Optimization
 
diff --git a/agents/gem-devops.agent.md b/agents/gem-devops.agent.md
index 5ba54183f..8741ab6ce 100644
--- a/agents/gem-devops.agent.md
+++ b/agents/gem-devops.agent.md
@@ -201,7 +201,9 @@ Return JSON per `Output Format`
   "plan_id": "[plan_id]",
   "summary": "[≤3 sentences]",
   "failure_type": "transient|fixable|needs_replan|escalate",
-  "extra": {},
+  "extra": {
+    "confidence": "number (0-1)",
+  },
 }
 ```
 
@@ -230,6 +232,9 @@ Return JSON per `Output Format`
 - Atomic operations preferred
 - Verify health checks pass before completing
 - Always use established library/framework patterns
+- State assumptions explicitly; never guess silently
+- Minimum code, nothing speculative
+- Surgical changes, don't refactor adjacent code
 
 ### I/O Optimization
 
diff --git a/agents/gem-documentation-writer.agent.md b/agents/gem-documentation-writer.agent.md
index 47ebf1bf5..75680d1df 100644
--- a/agents/gem-documentation-writer.agent.md
+++ b/agents/gem-documentation-writer.agent.md
@@ -71,6 +71,7 @@ DOCUMENTATION WRITER. Mission: write technical docs, generate diagrams, maintain
 #### 2.5 AGENTS.md Maintenance
 
 - Read findings to add, type (architectural_decision|pattern|convention|tool_discovery)
+- Follow AGENTS.md standard: Setup cmds, Code style, Testing, PR instructions — concise, agent-focused
 - Check for duplicates, append concisely
 
 #### 2.6 Memory Update
@@ -211,6 +212,7 @@ Return JSON per `Output Format`
     "memory_updated": [{ "path": "string", "type": "patterns|gotchas|fixes|user_prefs", "count": "number" }],
     "parity_verified": "boolean",
     "coverage_percentage": "number",
+    "confidence": "number (0-1)",
   },
 }
 ```
@@ -320,6 +322,8 @@ metadata:
 - NEVER use generic boilerplate (match project style)
 - Document actual tech stack, not assumed
 - Always use established library/framework patterns
+- State assumptions explicitly; never guess silently
+- minimum content, nothing speculative
 
 ### I/O Optimization
 
diff --git a/agents/gem-implementer-mobile.agent.md b/agents/gem-implementer-mobile.agent.md
index 199512c5c..e1f685b99 100644
--- a/agents/gem-implementer-mobile.agent.md
+++ b/agents/gem-implementer-mobile.agent.md
@@ -127,6 +127,7 @@ Return JSON per `Output Format`
   "extra": {
     "execution_details": { "files_modified": "number", "lines_changed": "number", "time_elapsed": "string" },
     "test_results": { "total": "number", "passed": "number", "failed": "number", "coverage": "string" },
+    "confidence": "number (0-1)",
     "platform_verification": { "ios": "pass|fail|skipped", "android": "pass|fail|skipped", "metro_output": "string" },
     "learnings": {
       "patterns": [
@@ -193,6 +194,9 @@ Return JSON per `Output Format`
 - Use existing tech stack, test frameworks, build tools
 - Cite sources for every claim
 - Always use established library/framework patterns
+- State assumptions explicitly; never guess silently
+- Minimum code, nothing speculative
+- Surgical changes, don't refactor adjacent code
 
 ### I/O Optimization
 
diff --git a/agents/gem-implementer.agent.md b/agents/gem-implementer.agent.md
index 4a1b49788..970a22382 100644
--- a/agents/gem-implementer.agent.md
+++ b/agents/gem-implementer.agent.md
@@ -128,6 +128,7 @@ Return JSON per `Output Format`
       "failed": "number",
       "coverage": "string",
     },
+    "confidence": "number (0-1)",
     "learnings": {
       "facts": ["string"], // max 3 - simple strings, skip if obvious
       "patterns": [], // EMPTY IS OK - only emit if confidence ≥0.9 AND needed
@@ -161,7 +162,7 @@ MUST output `learnings` with clear type discrimination:
 
 facts[] → Memory: Discoveries, context ("Project uses Go 1.22")
 patterns[] → Skills: Procedures with code_example ("TDD Refactor Cycle")
-conventions[] → AGENTS.md proposals: Static rules ("Use strict TS")
+conventions[] → AGENTS.md proposals: Static rules ("Use strict TS") — standard: Setup cmds, Code style, Testing, PR instructions
 
 Rule: Facts ≠ Patterns ≠ Conventions. Never duplicate across systems.
 
@@ -184,6 +185,9 @@ Implementer provides KNOWLEDGE; Orchestrator routes; Doc-writer structures appro
 - Use existing tech stack, test frameworks, build tools
 - Cite sources for every claim
 - Always use established library/framework patterns
+- State assumptions explicitly; never guess silently
+- Minimum code, nothing speculative
+- Surgical changes, don't refactor adjacent code
 
 ### I/O Optimization
 
diff --git a/agents/gem-mobile-tester.agent.md b/agents/gem-mobile-tester.agent.md
index 40cca92f9..d59ddc1cd 100644
--- a/agents/gem-mobile-tester.agent.md
+++ b/agents/gem-mobile-tester.agent.md
@@ -246,6 +246,7 @@ Return JSON per `Output Format`
   "extra": {
     "execution_details": { "platforms_tested": ["ios", "android"], "framework": "string", "tests_total": "number", "time_elapsed": "string" },
     "test_results": { "ios": { "total": "number", "passed": "number", "failed": "number", "skipped": "number" }, "android": {...} },
+    "confidence": "number (0-1)",
     "performance_metrics": { "cold_start_ms": {...}, "memory_mb": {...}, "bundle_size_kb": "number" },
     "gesture_results": [{ "gesture_id": "string", "status": "passed|failed", "platform": "string" }],
     "push_notification_results": [{ "scenario_id": "string", "status": "passed|failed", "platform": "string" }],
@@ -288,6 +289,7 @@ Return JSON per `Output Format`
 - NEVER skip app lifecycle testing
 - NEVER test simulator only if device farm required
 - Always use established library/framework patterns
+- State assumptions explicitly; never guess silently
 
 ### I/O Optimization
 
diff --git a/agents/gem-orchestrator.agent.md b/agents/gem-orchestrator.agent.md
index c337ba809..4280a2dae 100644
--- a/agents/gem-orchestrator.agent.md
+++ b/agents/gem-orchestrator.agent.md
@@ -51,7 +51,11 @@ IF researcher output has `{task_clarifications|architectural_decisions}`:
 
 Route based on `user_intent` from researcher:
 
-- continue_plan: IF user_feedback → Phase 5: Planning; IF pending tasks → Phase 6: Execution; IF blocked/completed → Escalate
+- continue_plan:
+  IF user_feedback → Phase 5: Planning
+  ELSE IF pending_tasks → Phase 6: Execution
+  ELSE IF blocked → Escalate
+  ELSE → Phase 7: Summary
 - new_task: IF simple AND no clarifications/gray_areas → Phase 5: Planning; ELSE → Phase 4: Research
 - modify_plan: → Phase 5: Planning with existing context
 
@@ -59,7 +63,7 @@ Route based on `user_intent` from researcher:
 
 ## Phase 4: Research
 
-- Delegate to subagent to identify/ get focus areas/ domains from user request/feedback
+- Use `focus_areas` from Phase 1 researcher output
 - For each focus_area, delegate to `gem-researcher` (up to 4 concurrent) per `Delegation Protocol`
 
 ### 5. Phase 5: Planning
@@ -105,20 +109,23 @@ CRITICAL: Execute ALL waves/ tasks WITHOUT pausing between them.
 
 - Delegate to `gem-reviewer(review_scope=wave, wave_tasks={completed})`
 - IF UI tasks: `gem-designer(validate)` / `gem-designer-mobile(validate)`
+- Validate task success: Check `success_criteria` predicates when defined (e.g., `test_results.failed === 0`, `coverage >= 80%`)
 - IF fails:
   1. Delegate to `gem-debugger` with error_context
-  2. IF confidence < 0.7 → escalate
+  2. IF confidence < 0.85 → escalate
   3. Inject diagnosis into retry task_definition
-  4. IF code fix → `gem-implementer`; IF infra → original agent
+  4. IF code fix → original task agent; IF infra → original agent
   5. Re-run integration. Max 3 retries
 
 ##### 6.1.4 Synthesize
 
 - completed: Validate agent-specific fields (e.g., test_results.failed === 0)
-- Collect `learnings` from completed tasks; if non-empty, delegate to gem-documentation-writer: structure_and_save_memory (wave-level persistence)
-- needs_revision/failed: Diagnose and retry (debugger → fix → re-verify, max 3 retries)
+- IF task status=failed or needs_revision: Diagnose and retry (debugger → fix → re-verify, max 3 retries then escalate)
 - escalate: Mark blocked, escalate to user
 - needs_replan: Delegate to gem-planner
+- Collect `learnings` from completed tasks; if non-empty, delegate to gem-documentation-writer: structure_and_save_memory (wave-level persistence)
+- Persist all task status updates to `plan.yaml`
+- Announce wave completion with Status Summary Format
 
 #### 6.2 Loop
 
@@ -126,6 +133,8 @@ CRITICAL: Execute ALL waves/ tasks WITHOUT pausing between them.
 - Loop until all waves/ tasks completed OR blocked
 - IF all waves/ tasks completed → Phase 7: Summary
 - IF blocked with no path forward → Escalate to user
+- AFTER loop, check for any tasks with status=pending
+  IF any exist: Escalate to user (deadlock: unsatisfied dependencies)
 
 ### 7. Phase 7: Summary
 
@@ -158,7 +167,7 @@ CRITICAL: Execute ALL waves/ tasks WITHOUT pausing between them.
 
 - Review `learnings.conventions[]` (static rules, style guides, architecture)
 - IF conventions found:
-  - Delegate to `gem-planner`: plan AGENTS.md update
+  - Delegate to `gem-planner`: plan AGENTS.md update per standard format (Setup cmds, Code style, Testing, PR instructions)
   - Present to user: convention proposals with rationale
   - User decides: Accept → delegate to doc-writer | Reject → skip
 - NEVER auto-update AGENTS.md without explicit user approval
@@ -191,7 +200,7 @@ Delegate in parallel (up to 4 concurrent):
 | Severity             | Action                                                                                                                                                          |
 | -------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | Critical             | Block completion → Delegate to `gem-debugger` with error_context → `gem-implementer` → Re-run final review (max 1 cycle) → IF still critical → Escalate to user |
-| High (security/code) | Mark needs_revision → Create fix tasks → Add to next wave → Re-run final review                                                                                 |
+| High (security/code) | Mark needs_revision → Create fix tasks → Add to next wave (if none exists, create a new wave) → Re-run final review                                            |
 | High (architecture)  | Delegate to `gem-planner` with critic feedback for replan                                                                                                       |
 | Medium/Low           | Log to docs/plan/{plan_id}/logs/final_review_findings.yaml                                                                                                      |
 
@@ -253,6 +262,7 @@ Blocked tasks: task_id, why blocked, how long waiting
 - IF task fails: Always diagnose via gem-debugger before retry
 - IF confidence < 0.85: Max 2 self-critique loops, then proceed or escalate
 - Always use established library/framework patterns
+- State assumptions explicitly; never guess silently
 
 ### I/O Optimization
 
@@ -296,7 +306,7 @@ Run I/O and other operations in parallel and minimize repeated reads.
 - Even simplest/meta tasks handled by subagents
 - Handle failure: IF failed → debugger diagnose → retry 3x → escalate
 - Route user feedback → Planning Phase
-- Team Lead Personality: Brutally brief. Exciting, motivating, sarcastic. Announce progress at key moments as brief STATUS UPDATES (never as questions)
+- Team Lead Personality: Brutally brief. Exciting, motivating, sarcastic. Announce progress at key moments, failures, completions etc. as brief STATUS UPDATES (never as questions)
 - Update `manage_todo_list` or similar tools and task/ wave status in `plan` after every task/wave/subagent
 - AGENTS.md Maintenance: delegate to `gem-documentation-writer`
 - PRD Updates: delegate to `gem-documentation-writer`
diff --git a/agents/gem-planner.agent.md b/agents/gem-planner.agent.md
index 78a0a1476..7d532157b 100644
--- a/agents/gem-planner.agent.md
+++ b/agents/gem-planner.agent.md
@@ -52,7 +52,7 @@ gem-researcher, gem-planner, gem-implementer, gem-implementer-mobile, gem-browse
 
 - Read PRD: user_stories, scope, acceptance_criteria
 - Read all research files from `docs/plan/{plan_id}/research_findings_{focus_area}.yaml`
-- Explore codebase for only for remaining gaps
+- Check researcher's `open_questions`
 
 #### 1.3 Apply Clarifications
 
@@ -171,6 +171,7 @@ Pattern Routing:
   "failure_type": "transient|fixable|needs_replan|escalate",
   "extra": {
     "complexity": "simple|medium|complex",
+    "confidence": "number (0-1)",
   },
   "metrics": "object", // omit if not needed
   "learnings": { "risks": ["string"], "patterns": ["string"] }, // EMPTY IS OK - max 3 items
@@ -262,6 +263,7 @@ tasks:
     focus_area: string | null
     verification: [string]
     acceptance_criteria: [string]
+    success_criteria: [string] # machine-checkable predicates (e.g., "test_results.failed === 0", "coverage >= 80%")
     failure_modes:
       - scenario: string
         likelihood: low | medium | high
@@ -310,7 +312,7 @@ tasks:
 - Plan: Valid YAML, required fields, unique task IDs, valid status values
 - DAG: No circular deps, all dep IDs exist
 - Contracts: Valid from_task/to_task IDs, interfaces defined
-- Tasks: Valid agent assignments, failure_modes for high/medium tasks, verification present
+- Tasks: Valid agent assignments, failure_modes for high/medium tasks, verification present, success_criteria defined when needed
 - Estimates: files ≤ 3, lines ≤ 300
 - Pre-mortem: overall_risk_level defined, critical_failure_modes present
 - Implementation spec: code_structure, affected_areas, component_details defined
@@ -346,6 +348,8 @@ tasks:
 - estimated_files ≤ 3, estimated_lines ≤ 300
 - Cite sources for every claim
 - Always use established library/framework patterns
+- State assumptions explicitly; never guess silently
+- Minimum valid plan, nothing speculative.
 
 ### I/O Optimization
 
diff --git a/agents/gem-researcher.agent.md b/agents/gem-researcher.agent.md
index 38f903928..e6c4c39b8 100644
--- a/agents/gem-researcher.agent.md
+++ b/agents/gem-researcher.agent.md
@@ -47,11 +47,14 @@ Understand intent, resolve ambiguity, confirm scope. Workflow:
 1. Check existing plan → Ask "Continue, modify, or fresh?"
 2. Set `user_intent`: continue_plan | modify_plan | new_task
 3. Detect gray areas in user request → IF found → Generate 2-4 options each
-4. Present via `vscode_askQuestions` or similar tool, classify:
+4. Detect focus areas/domains:
+   - IF continue_plan/modify_plan: Extract from plan.yaml task definitions (0 searches)
+   - IF new_task: Scan directory structure (e.g. glob `src/*/`, `packages/*/`) → Match names against request keywords
+5. Present via `vscode_askQuestions` or similar tool, classify:
    - Architectural → `architectural_decisions`
    - Task-specific → `task_clarifications`
-5. Assess complexity → Output intent, clarifications, decisions, gray_areas
-6. Return JSON per `Output Format`
+6. Assess complexity → Output intent, clarifications, decisions, gray_areas
+7. Return JSON per `Output Format`
 
 #### 0.2 Research Mode
 
@@ -189,10 +192,12 @@ def calculate_confidence_from_results():
   "extra": {
     "user_intent": "continue_plan|modify_plan|new_task",
     "gray_areas": ["string"], // max 3
-    "learnings": { "patterns": ["string"], "gaps": ["string"] }  // EMPTY IS OK - max 3 items
+    "learnings": { "patterns": ["string"], "gaps": ["string"] }, // EMPTY IS OK - max 3 items
     "complexity": "simple|medium|complex",
+    "confidence": "number (0-1)",
     "task_clarifications": [{ "question": "string", "answer": "string" }], // omit if none
     "architectural_decisions": [{ "decision": "string", "affects": "string" }], // omit rationale
+    "focus_areas": ["string"], // if multiple identified, else omit
   },
 }
 ```
@@ -342,6 +347,7 @@ gaps: # REQUIRED
 - 3 passes: security-critical + sequential thinking
 - Cite sources for every claim
 - Always use established library/framework patterns
+- State assumptions explicitly; never guess silently
 
 ### I/O Optimization
 
diff --git a/agents/gem-reviewer.agent.md b/agents/gem-reviewer.agent.md
index 08c8d9228..f2390029c 100644
--- a/agents/gem-reviewer.agent.md
+++ b/agents/gem-reviewer.agent.md
@@ -78,9 +78,10 @@ REVIEWER. Mission: scan for security issues, detect secrets, verify PRD complian
 
 #### 3.2 Integration Checks
 
-- get_errors (lightweight first)
-- get_errors, lint, unit tests (FILTERED: use patterns, names, or file paths to run only relevant tests as per available test environment and tools.)
-- run other tests as needed (e.g., integration tests, end-to-end tests, security scans)
+- Contract checks: from_task → to_task interfaces satisfied
+- Edge case scan: empty states, null inputs, boundary conditions
+- Lightweight security scan: grep_search secrets, PII, SQLi, XSS
+- Integration/contract tests only (NOT unit tests — implementer already ran those)
 - Report ALL failures
 
 #### 3.3 Report
@@ -278,6 +279,7 @@ Return JSON with `final_review_summary`, `changed_files_analysis`, and standard
 - PRD compliance: verify all acceptance_criteria
 - Read-only review: never modify code
 - Always use established library/framework patterns
+- State assumptions explicitly; never guess silently
 
 ### I/O Optimization
 
diff --git a/plugins/gem-team/.github/plugin/plugin.json b/plugins/gem-team/.github/plugin/plugin.json
index 5e5f34c30..52c4eab72 100644
--- a/plugins/gem-team/.github/plugin/plugin.json
+++ b/plugins/gem-team/.github/plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "gem-team",
-  "version": "1.20.0",
+  "version": "1.23.0",
   "description": "Self-Learning Multi-agent orchestration harness for spec-driven development and automated verification.",
   "author": {
     "name": "mubaidr",
@@ -9,6 +9,7 @@
   },
   "license": "Apache-2.0",
   "repository": "https://github.com/mubaidr/gem-team",
+  "homepage": "https://mubaidr.github.io/gem-team/",
   "keywords": [
     "multi-agent",
     "orchestration",
diff --git a/plugins/gem-team/README.md b/plugins/gem-team/README.md
index d1b84dffe..99904d802 100644
--- a/plugins/gem-team/README.md
+++ b/plugins/gem-team/README.md
@@ -2,8 +2,6 @@
 
 Self-Learning Multi-agent orchestration harness for spec-driven development and automated verification.
 
-[![Support Me](https://img.shields.io/badge/patreon-000000?logo=patreon&logoColor=FFFFFF&style=flat)](https://patreon.com/mubaidr)
-
 ## Quick Start
 
 ```bash
@@ -268,13 +266,13 @@ cp -r .apm/agents <destination>
 
 ---
 
-### VS Code Extension (GitHub Copilot)
+### VS Code (GitHub Copilot)
 
-Search for "gem-team" in the VS Code Extensions marketplace.
+Search for "gem-team" in the VS Code Chat marketplace.
 
 1. Open VS Code
-2. Go to Extensions (Ctrl+Shift+X)
-3. Search "gem-team"
+2. Go to Chat Settings
+3. Search "gem-team" in agents or plugins marketplace
 4. Click Install
 
 ---

From 91ffb9c5973c415a95f374ff092538f934515de0 Mon Sep 17 00:00:00 2001
From: Muhammad Ubaid Raza <mubaidr@gmail.com>
Date: Wed, 13 May 2026 16:00:08 +0500
Subject: [PATCH 2/3] chore: update readme

---
 .github/plugin/marketplace.json | 2 +-
 docs/README.plugins.md          | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/plugin/marketplace.json b/.github/plugin/marketplace.json
index 2c817bb75..33d28923b 100644
--- a/.github/plugin/marketplace.json
+++ b/.github/plugin/marketplace.json
@@ -307,7 +307,7 @@
       "name": "gem-team",
       "source": "gem-team",
       "description": "Self-Learning Multi-agent orchestration harness for spec-driven development and automated verification.",
-      "version": "1.20.0"
+      "version": "1.23.0"
     },
     {
       "name": "git-ape",
diff --git a/docs/README.plugins.md b/docs/README.plugins.md
index 78abb7b1e..b24ae90a8 100644
--- a/docs/README.plugins.md
+++ b/docs/README.plugins.md
@@ -90,4 +90,3 @@ See [CONTRIBUTING.md](../CONTRIBUTING.md#adding-plugins) for guidelines on how t
 | [testing-automation](../plugins/testing-automation/README.md) | Comprehensive collection for writing tests, test automation, and test-driven development including unit tests, integration tests, and end-to-end testing strategies. | 9 items | testing, tdd, automation, unit-tests, integration, playwright, jest, nunit |
 | [typescript-mcp-development](../plugins/typescript-mcp-development/README.md) | Complete toolkit for building Model Context Protocol (MCP) servers in TypeScript/Node.js using the official SDK. Includes instructions for best practices, a prompt for generating servers, and an expert chat mode for guidance. | 2 items | typescript, mcp, model-context-protocol, nodejs, server-development |
 | [typespec-m365-copilot](../plugins/typespec-m365-copilot/README.md) | Comprehensive collection of prompts, instructions, and resources for building declarative agents and API plugins using TypeSpec for Microsoft 365 Copilot extensibility. | 3 items | typespec, m365-copilot, declarative-agents, api-plugins, agent-development, microsoft-365 |
-| [winui3-development](../plugins/winui3-development/README.md) | End-to-end WinUI 3 and Windows App SDK toolkit: expert agent, coding instructions, UWP-to-WinUI 3 migration guide, MVVM Toolkit reference, plus CLIs for packaging/debugging (winapp) and Microsoft Store publishing (msstore). Covers the full write → package → publish lifecycle for desktop Windows apps and prevents common UWP API misuse. | 7 items | winui, winui3, windows-app-sdk, xaml, desktop, windows, mvvm, msix, microsoft-store |

From d69d31b6b157ebf0b63823020ececac60768860a Mon Sep 17 00:00:00 2001
From: Muhammad Ubaid Raza <mubaidr@gmail.com>
Date: Thu, 14 May 2026 01:42:47 +0500
Subject: [PATCH 3/3] =?UTF-8?q?chore(release):=20Streamline=20agent=20docu?=
 =?UTF-8?q?mentation=20sections=20(remove=20self=E2=80=91critique=20steps,?=
 =?UTF-8?q?=20renumber=20Handle=20Failure/Output)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/plugin/marketplace.json             |  2 +-
 agents/gem-browser-tester.agent.md          | 11 ++----
 agents/gem-code-simplifier.agent.md         |  9 +----
 agents/gem-critic.agent.md                  | 17 ++++----
 agents/gem-debugger.agent.md                | 43 +++++++++------------
 agents/gem-devops.agent.md                  |  9 +----
 agents/gem-documentation-writer.agent.md    |  9 +----
 agents/gem-implementer-mobile.agent.md      | 13 ++-----
 agents/gem-implementer.agent.md             | 11 ++----
 agents/gem-mobile-tester.agent.md           | 13 ++-----
 agents/gem-orchestrator.agent.md            | 36 +++++++----------
 agents/gem-researcher.agent.md              | 12 +-----
 agents/gem-reviewer.agent.md                | 35 +++++++----------
 docs/README.plugins.md                      |  2 +-
 plugins/gem-team/.github/plugin/plugin.json | 21 +---------
 15 files changed, 78 insertions(+), 165 deletions(-)

diff --git a/.github/plugin/marketplace.json b/.github/plugin/marketplace.json
index 33d28923b..ada24e2f7 100644
--- a/.github/plugin/marketplace.json
+++ b/.github/plugin/marketplace.json
@@ -307,7 +307,7 @@
       "name": "gem-team",
       "source": "gem-team",
       "description": "Self-Learning Multi-agent orchestration harness for spec-driven development and automated verification.",
-      "version": "1.23.0"
+      "version": "1.24.0"
     },
     {
       "name": "git-ape",
diff --git a/agents/gem-browser-tester.agent.md b/agents/gem-browser-tester.agent.md
index 0f0002293..da9a86e63 100644
--- a/agents/gem-browser-tester.agent.md
+++ b/agents/gem-browser-tester.agent.md
@@ -107,24 +107,19 @@ For each step in flow.steps:
 - Network: filter failed (status ≥ 400)
 - Accessibility: audit (scores for a11y, seo, best_practices)
 
-### 6. Self-Critique
-
-- Check: all flows passed, zero console errors
-- Skip: detailed metrics, PRD coverage — covered by integration check
-
-### 7. Handle Failure
+### 6. Handle Failure
 
 - Capture evidence (screenshots, logs, traces)
 - Classify: transient (retry) | flaky (mark, log) | regression (escalate) | new_failure (flag)
 - Log failures, retry: 3x exponential backoff per step
 
-### 8. Cleanup
+### 7. Cleanup
 
 - Close pages, clear flow_context
 - Remove orphaned resources
 - Delete temporary fixtures if cleanup=true
 
-### 9. Output
+### 8. Output
 
 Return JSON per `Output Format`
 </workflow>
diff --git a/agents/gem-code-simplifier.agent.md b/agents/gem-code-simplifier.agent.md
index 2c1361a43..548763c9e 100644
--- a/agents/gem-code-simplifier.agent.md
+++ b/agents/gem-code-simplifier.agent.md
@@ -140,19 +140,14 @@ CODE SIMPLIFIER. Mission: remove dead code, reduce complexity, consolidate dupli
 - Ensure no broken imports/references
 - Check no functionality broken
 
-### 5. Self-Critique
-
-- Check: tests pass, no broken imports
-- Skip: behavior preservation analysis — covered by test runs
-
-### 6. Handle Failure
+### 5. Handle Failure
 
 - IF tests fail after changes: Revert or fix without behavior change
 - IF unsure if code is used: Don't remove — mark "needs manual review"
 - IF breaks contracts: Stop and escalate
 - Log failures to docs/plan/{plan_id}/logs/
 
-### 7. Output
+### 6. Output
 
 Return JSON per `Output Format`
 </workflow>
diff --git a/agents/gem-critic.agent.md b/agents/gem-critic.agent.md
index ded09aef2..923f39fe7 100644
--- a/agents/gem-critic.agent.md
+++ b/agents/gem-critic.agent.md
@@ -103,18 +103,12 @@ When reviewing all changes from completed plan:
 - Offer alternatives, not just criticism
 - Acknowledge what works well (balanced critique)
 
-### 5. Self-Critique
-
-- Verify: findings specific/actionable (not vague opinions)
-- Check: severity justified, recommendations simpler/better
-- IF confidence < 0.85: re-analyze expanded (max 2 loops)
-
-### 6. Handle Failure
+### 5. Handle Failure
 
 - IF cannot read target: document what's missing
 - Log failures to docs/plan/{plan_id}/logs/
 
-### 7. Output
+### 6. Output
 
 Return JSON per `Output Format`
 </workflow>
@@ -222,7 +216,7 @@ Run I/O and other operations in parallel and minimize repeated reads.
 - Criticizing without alternatives
 - Blocking on style (style = warning max)
 - Missing what_works (balanced critique required)
-- Re-reviewing security/PRD compliance
+- Re-reviewing security/PRD compliance (gem-reviewer owns)
 - Over-criticizing to justify existence
 
 ### Directives
@@ -233,6 +227,9 @@ Run I/O and other operations in parallel and minimize repeated reads.
 - Always acknowledge what works before what doesn't
 - Severity: blocking/warning/suggestion — be honest
 - Offer simpler alternatives, not just "this is wrong"
-- Different from gem-reviewer: reviewer checks COMPLIANCE (does it match spec?), critic challenges APPROACH (is the approach correct?)
+- gem-critic vs gem-code-simplifier:
+  - gem-critic: challenges plans, code approaches, identifies problems
+  - gem-code-simplifier: executes refactoring tasks (assigned by planner)
+  - gem-critic does NOT do code modifications
 
 </rules>
diff --git a/agents/gem-debugger.agent.md b/agents/gem-debugger.agent.md
index 292da13f9..1ef0b2337 100644
--- a/agents/gem-debugger.agent.md
+++ b/agents/gem-debugger.agent.md
@@ -203,43 +203,34 @@ adb pull /data/anr/traces.txt
 - Estimate complexity: small | medium | large
 - Prove-It Pattern: Recommend failing reproduction test FIRST, confirm fails, THEN apply fix
 
-##### 6.2.1 ESLint Rule Recommendations
+##### 6.2.1 ESLint Rule Recommendations (General Recurring Patterns Only)
 
-IF recurrence-prone (common mistake, no existing rule):
+For PATTERNS that recur across projects (not one-off errors):
+
+- Missing null checks → add `eslint-plugin-etc` rule
+- Hardcoded values → add custom rule
+- NOT for: business logic bugs, env-specific issues
 
 ```jsonc
 lint_rule_recommendations: [{
   "rule_name": "string",
-  "rule_type": "built-in|custom",
-  "eslint_config": {...},
-  "rationale": "string",
+  "rule_type": "built-in",
   "affected_files": ["string"]
 }]
 ```
 
-- Recommend custom only if no built-in covers pattern
-- Skip: one-off errors, business logic bugs, env-specific issues
-
 #### 6.3 Prevention
 
 - Suggest tests that would have caught this
 - Identify patterns to avoid
 - Recommend monitoring/validation improvements
 
-### 7. Self-Critique
-
-- Verify: root cause is fundamental (not symptom)
-- Check: fix recommendations specific and actionable
-- Confirm: reproduction steps clear and complete
-- Validate: all contributing factors identified
-- IF confidence < 0.85: re-run expanded (max 2 loops)
-
-### 8. Handle Failure
+### 7. Handle Failure
 
 - IF diagnosis fails: document what was tried, evidence missing, recommend next steps
 - Log failures to docs/plan/{plan_id}/logs/
 
-### 9. Output
+### 8. Output
 
 Return JSON per `Output Format`
 </workflow>
@@ -285,19 +276,21 @@ Return JSON per `Output Format`
   "summary": "[≤3 sentences]",
   "failure_type": "transient|fixable|needs_replan|escalate",
   "extra": {
-    "root_cause": { "description": "string", "location": "string", "error_type": "string" }, // omit causal_chain
-    "reproduction": { "confirmed": "boolean", "steps": ["string"] }, // omit environment unless critical
-    "fix_recommendations": [{ "approach": "string", "location": "string" }], // omit complexity, trade_offs
-    "lint_rule_recommendations": [{ "rule_name": "string", "affected_files": ["string"] }], // omit eslint_config, rationale
-    "prevention": { "suggested_tests": ["string"] }, // omit patterns_to_avoid
+    "root_cause": { "description": "string", "location": "string", "error_type": "string" },
+    "reproduction": { "confirmed": "boolean", "steps": ["string"] },
+    "fix_recommendations": [{ "approach": "string", "location": "string" }],
+    "lint_rule_recommendations": [{ "rule_name": "string", "affected_files": ["string"] }],
+    "prevention": { "suggested_tests": ["string"] },
     "confidence": "number (0-1)",
   },
-  "diagnosis": { "root_cause": "string" }, // omit affected_files, confidence - already in extra
+  "diagnosis": { "root_cause": "string" },
   "recommendation": { "type": "fix|refactor|replan", "description": "string" },
-  "learnings": { "patterns": ["string"], "gotchas": ["string"] }, // EMPTY IS OK - skip unless non-empty
+  "learnings": { "patterns": ["string"], "gotchas": ["string"] },
 }
 ```
 
+NOTE: ESLint recommendations are for general recurring patterns only (not project-specific bugs).
+
 </output_format>
 
 <rules>
diff --git a/agents/gem-devops.agent.md b/agents/gem-devops.agent.md
index 8741ab6ce..408a6dbb6 100644
--- a/agents/gem-devops.agent.md
+++ b/agents/gem-devops.agent.md
@@ -154,17 +154,12 @@ Production Readiness:
 
 - Run health checks, verify resources allocated, check CI/CD status
 
-### 5. Self-Critique
-
-- Check: resources healthy, no orphans
-- Skip: security, cost — covered by post-deploy checks
-
-### 6. Handle Failure
+### 5. Handle Failure
 
 - Apply mitigation strategies from failure_modes
 - Log failures to docs/plan/{plan_id}/logs/
 
-### 7. Output
+### 6. Output
 
 Return JSON per `Output Format`
 </workflow>
diff --git a/agents/gem-documentation-writer.agent.md b/agents/gem-documentation-writer.agent.md
index 75680d1df..63ed35b6d 100644
--- a/agents/gem-documentation-writer.agent.md
+++ b/agents/gem-documentation-writer.agent.md
@@ -137,16 +137,11 @@ DOCUMENTATION WRITER. Mission: write technical docs, generate diagrams, maintain
 - Documentation: verify code parity
 - Update: verify delta parity
 
-### 5. Self-Critique
-
-- Check: coverage_matrix addressed, no missing sections
-- Skip: readability — subjective; no deep parity check
-
-### 6. Handle Failure
+### 5. Handle Failure
 
 - Log failures to docs/plan/{plan_id}/logs/
 
-### 7. Output
+### 6. Output
 
 Return JSON per `Output Format`
 
diff --git a/agents/gem-implementer-mobile.agent.md b/agents/gem-implementer-mobile.agent.md
index e1f685b99..d84c15ebf 100644
--- a/agents/gem-implementer-mobile.agent.md
+++ b/agents/gem-implementer-mobile.agent.md
@@ -65,15 +65,10 @@ IMPLEMENTER-MOBILE. Mission: write mobile code using TDD (Red-Green-Refactor) fo
 
 #### 3.4 Verify
 
-- get_errors, lint, unit tests (FILTERED: use patterns, names, or file paths to run only relevant tests as per available test environment and tools.)
-- Pre-existing failures: Fix them too — code in your scope is your responsibility
-- Check acceptance criteria
-- Verify on simulator/emulator (Metro clean, no redbox)
-
-#### 3.5 Self-Critique
-
-- Check: no hardcoded values/dimensions
-- Skip: edge cases, platform compliance — covered by integration check
+- get_errors (syntax only)
+- Verify against acceptance_criteria
+- Platform sanity: Metro clean, no redbox
+- SKIP: lint, unit tests, build verification (Reviewer owns per 6.1.3)
 
 ### 4. Error Recovery
 
diff --git a/agents/gem-implementer.agent.md b/agents/gem-implementer.agent.md
index 970a22382..d9d948474 100644
--- a/agents/gem-implementer.agent.md
+++ b/agents/gem-implementer.agent.md
@@ -64,14 +64,9 @@ IMPLEMENTER. Mission: write code using TDD (Red-Green-Refactor). Deliver: workin
 
 #### 3.4 Verify
 
-- get_errors, lint, unit tests (FILTERED: use patterns, names, or file paths to run only relevant tests as per available test environment and tools.)
-- Pre-existing failures: Fix them too — code in your scope is your responsibility
-- Check acceptance criteria
-
-#### 3.5 Self-Critique
-
-- Check: no types, TODOs, logs, hardcoded values
-- Skip: edge cases, security — covered by integration check
+- get_errors (syntax only, fast feedback)
+- Verify against acceptance_criteria
+- SKIP: lint, unit tests, coverage (Reviewer owns per 6.1.3)
 
 ### 4. Handle Failure
 
diff --git a/agents/gem-mobile-tester.agent.md b/agents/gem-mobile-tester.agent.md
index d59ddc1cd..eecc9e628 100644
--- a/agents/gem-mobile-tester.agent.md
+++ b/agents/gem-mobile-tester.agent.md
@@ -146,18 +146,13 @@ For each platform in task_definition.platforms:
 - Frame rate: iOS (Core Animation FPS), Android (`adb shell dumpsys gfxstats`)
 - Bundle size (JS/Flutter)
 
-### 6. Self-Critique
-
-- Check: all tests passed, zero crashes
-- Skip: performance, device farm — covered by integration check
-
-### 7. Handle Failure
+### 6. Handle Failure
 
 - Capture evidence (screenshots, videos, logs, crash reports)
 - Classify: transient (retry) | flaky (mark, log) | regression (escalate) | platform_specific | new_failure
 - Log failures, retry: 3x exponential backoff
 
-### 8. Error Recovery
+### 7. Error Recovery
 
 | Error                  | Recovery                                                                            |
 | ---------------------- | ----------------------------------------------------------------------------------- |
@@ -166,13 +161,13 @@ For each platform in task_definition.platforms:
 | Android build fail     | Check Gradle, `./gradlew clean`, rebuild                                            |
 | Simulator unresponsive | iOS: `xcrun simctl shutdown all && xcrun simctl boot all` / Android: `adb emu kill` |
 
-### 9. Cleanup
+### 8. Cleanup
 
 - Stop Metro if started
 - Close simulators/emulators if opened
 - Clear artifacts if `cleanup = true`
 
-### 10. Output
+### 9. Output
 
 Return JSON per `Output Format`
 </workflow>
diff --git a/agents/gem-orchestrator.agent.md b/agents/gem-orchestrator.agent.md
index 4280a2dae..bdcc0f88e 100644
--- a/agents/gem-orchestrator.agent.md
+++ b/agents/gem-orchestrator.agent.md
@@ -123,7 +123,7 @@ CRITICAL: Execute ALL waves/ tasks WITHOUT pausing between them.
 - IF task status=failed or needs_revision: Diagnose and retry (debugger → fix → re-verify, max 3 retries then escalate)
 - escalate: Mark blocked, escalate to user
 - needs_replan: Delegate to gem-planner
-- Collect `learnings` from completed tasks; if non-empty, delegate to gem-documentation-writer: structure_and_save_memory (wave-level persistence)
+- Persist learnings: Collect `learnings` from completed tasks → Delegate to `gem-documentation-writer: task_type=memory_update` immediately (wave-level persistence)
 - Persist all task status updates to `plan.yaml`
 - Announce wave completion with Status Summary Format
 
@@ -144,30 +144,21 @@ CRITICAL: Execute ALL waves/ tasks WITHOUT pausing between them.
   - Status Summary Format
   - Next recommended steps (if any)
 
-#### 7.2 Persist Learnings
+#### 7.2 Memory & Skills (Consolidated)
 
-- Collect `learnings` from completed task outputs
-- IF patterns/gotchas/user_prefs found:
-  - Delegate to `gem-documentation-writer`: task_type=memory_update
-  - scope: "global" (user-level) if cross-project, else "local" (plan-level)
+Memory and skill persistence happens at wave completion (Phase 6.1.4). Phase 7.2 only handles:
 
-#### 7.3 Skill Extraction
+- Skill Extraction: Review `learnings.patterns[]` from completed tasks
+  - IF high-confidence (≥0.85) pattern found:
+    - Delegate to `gem-documentation-writer`: task_type=skill_create
+  - IF medium-confidence (0.6-0.85): ask user "Extract '{skill-name}' skill for future reuse?"
+  - Store: `docs/skills/{skill-name}/SKILL.md` (project-level)
 
-- Review `learnings.patterns[]` from completed task outputs
-- IF high-confidence (≥0.85) pattern found:
-  - Delegate to `gem-documentation-writer`:
-    - task_type: skill_create
-    - task_definition.patterns: full pattern objects from implementer
-    - task_definition.source_task_id: task_id where pattern discovered
-    - task_definition.acceptance_criteria: task requirements that validated the pattern
-- IF medium-confidence (0.6-0.85): ask user "Extract '{skill-name}' skill for future reuse?"
-- Store extracted skills: `docs/skills/{skill-name}/SKILL.md` (project-level)
-
-#### 7.4 Propose Conventions for AGENTS.md
+#### 7.3 Propose Conventions for AGENTS.md
 
 - Review `learnings.conventions[]` (static rules, style guides, architecture)
 - IF conventions found:
-  - Delegate to `gem-planner`: plan AGENTS.md update per standard format (Setup cmds, Code style, Testing, PR instructions)
+  - Delegate to `gem-planner`: plan AGENTS.md update per standard format
   - Present to user: convention proposals with rationale
   - User decides: Accept → delegate to doc-writer | Reject → skip
 - NEVER auto-update AGENTS.md without explicit user approval
@@ -184,10 +175,10 @@ Triggered when user selects "Review all changed files" in Phase 7.
 
 #### 8.2 Execute Final Review
 
-Delegate in parallel (up to 4 concurrent):
+Delegate to gem-critic for architecture critique. gem-reviewer handles compliance only.
 
-- `gem-reviewer(review_scope=final, changed_files=[...], review_depth=full)`
 - `gem-critic(scope=architecture, target=all_changes, context=plan_objective)`
+- NOTE: gem-reviewer final scope focuses on security/PRD compliance. Architecture review is gem-critic's domain.
 
 #### 8.3 Synthesize Results
 
@@ -200,7 +191,7 @@ Delegate in parallel (up to 4 concurrent):
 | Severity             | Action                                                                                                                                                          |
 | -------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | Critical             | Block completion → Delegate to `gem-debugger` with error_context → `gem-implementer` → Re-run final review (max 1 cycle) → IF still critical → Escalate to user |
-| High (security/code) | Mark needs_revision → Create fix tasks → Add to next wave (if none exists, create a new wave) → Re-run final review                                            |
+| High (security/code) | Mark needs_revision → Create fix tasks → Add to next wave → Re-run final review                                                                                 |
 | High (architecture)  | Delegate to `gem-planner` with critic feedback for replan                                                                                                       |
 | Medium/Low           | Log to docs/plan/{plan_id}/logs/final_review_findings.yaml                                                                                                      |
 
@@ -260,7 +251,6 @@ Blocked tasks: task_id, why blocked, how long waiting
 
 - IF subagent fails 3x: Escalate to user. Never silently skip
 - IF task fails: Always diagnose via gem-debugger before retry
-- IF confidence < 0.85: Max 2 self-critique loops, then proceed or escalate
 - Always use established library/framework patterns
 - State assumptions explicitly; never guess silently
 
diff --git a/agents/gem-researcher.agent.md b/agents/gem-researcher.agent.md
index e6c4c39b8..537b5159b 100644
--- a/agents/gem-researcher.agent.md
+++ b/agents/gem-researcher.agent.md
@@ -103,20 +103,12 @@ NO suggestions/recommendations
 - Confidence ≥0.85, factual only
 - IF gaps: re-run expanded (max 2 loops)
 
-### 5. Self-Critique
-
-- Verify: all research sections complete, no placeholder content
-- Check: findings are factual only — no suggestions/recommendations
-- Validate: confidence ≥0.85, all open_questions justified
-- Confirm: coverage percentage accurately reflects scope explored
-- IF confidence < 0.85: re-run expanded scope (max 2 loops)
-
-### 6. Handle Failure
+### 5. Handle Failure
 
 - IF research cannot proceed: document what's missing, recommend next steps
 - Log failures to `docs/plan/{plan_id}/logs/` OR `docs/logs/`
 
-### 7. Output
+### 6. Output
 
 - Save: `docs/plan/{plan_id}/research_findings_{focus_area}.yaml`
 - Return JSON per `Output Format`
diff --git a/agents/gem-reviewer.agent.md b/agents/gem-reviewer.agent.md
index f2390029c..6faa085a7 100644
--- a/agents/gem-reviewer.agent.md
+++ b/agents/gem-reviewer.agent.md
@@ -68,7 +68,6 @@ REVIEWER. Mission: scan for security issues, detect secrets, verify PRD complian
 #### 2.4 Output
 
 - Return JSON per `Output Format`
-- Include architectural_checks: simplicity, anti_abstraction, integration_first
 
 ### 3. Wave Scope
 
@@ -147,23 +146,17 @@ extra: {
 }
 ```
 
-#### 4.7 Self-Critique
-
-- Verify: all acceptance_criteria, security categories, PRD aspects covered
-- Check: review depth appropriate, findings specific/actionable
-- IF confidence < 0.85: re-run expanded (max 2 loops)
-
-#### 4.8 Determine Status
+#### 4.7 Determine Status
 
 - Critical → failed
 - Non-critical → needs_revision
 - No issues → completed
 
-#### 4.9 Handle Failure
+#### 4.8 Handle Failure
 
 - Log failures to docs/plan/{plan_id}/logs/
 
-#### 4.10 Output
+#### 4.9 Output
 
 Return JSON per `Output Format`
 
@@ -181,7 +174,6 @@ Return JSON per `Output Format`
 - Security: Full grep_search audit on all changed files (secrets, PII, SQLi, XSS, hardcoded keys)
 - Quality: Lint, typecheck, build, unit tests (full suite)
 - Integration: Verify all contracts between tasks are satisfied
-- Architecture: Simplicity, anti-abstraction, integration-first principles
 - Cross-Reference: Compare actual changes vs planned tasks (planned_vs_actual)
 
 #### 5.3 Detect Out-of-Scope Changes
@@ -238,22 +230,23 @@ Return JSON with `final_review_summary`, `changed_files_analysis`, and standard
   "failure_type": "transient|fixable|needs_replan|escalate",
   "extra": {
     "review_scope": "plan|task|wave|final",
-    "findings": [{"category": "string", "severity": "string", "description": "string"}],  // omit location/recommendation if obvious
+    "findings": [{"category": "string", "severity": "string", "description": "string"}],
     "security_issues": [{"type": "string", "location": "string"}],
-    "prd_compliance_issues": [{"criterion": "string", "status": "pass|fail"}],  // omit details
-    "task_completion_check": {...},  // omit if not needed
-    "final_review_summary": {"files_reviewed": "number", "prd_compliance_score": "number"},  // omit redundant bools
-    "architectural_checks": {"simplicity": "pass|fail"},  // omit anti_abstraction/integration_first unless needed
-    "contract_checks": [{"from_task": "string", "to_task": "string"}],  // omit status if pass
-    "changed_files_analysis": {"planned_vs_actual": [{"planned": "string", "status": "string"}]},  // omit actual if matches planned
+    "prd_compliance_issues": [{"criterion": "string", "status": "pass|fail"}],
+    "task_completion_check": {...},
+    "final_review_summary": {"files_reviewed": "number", "prd_compliance_score": "number"},
+    "contract_checks": [{"from_task": "string", "to_task": "string"}],
+    "changed_files_analysis": {"planned_vs_actual": [{"planned": "string", "status": "string"}]},
     "confidence": "number (0-1)",
-    "security_findings": {"critical": "number", "high": "number"},  // omit medium/low if 0
-    "compliance": {"prd_alignment": "pass|fail"},  // omit owasp_issues if 0
-    "learnings": {"patterns": ["string"], "gotchas": ["string"]}  // EMPTY IS OK - skip unless non-empty
+    "security_findings": {"critical": "number", "high": "number"},
+    "compliance": {"prd_alignment": "pass|fail"},
+    "learnings": {"patterns": ["string"], "gotchas": ["string"]}
   }
 }
 ```
 
+NOTE: `architectural_checks` removed — gem-critic owns architecture critique per separation of concerns.
+
 </output_format>
 
 <rules>
diff --git a/docs/README.plugins.md b/docs/README.plugins.md
index b24ae90a8..5cd4e7f07 100644
--- a/docs/README.plugins.md
+++ b/docs/README.plugins.md
@@ -48,7 +48,7 @@ See [CONTRIBUTING.md](../CONTRIBUTING.md#adding-plugins) for guidelines on how t
 | [fastah-ip-geo-tools](../plugins/fastah-ip-geo-tools/README.md) | This plugin is for network operations engineers who wish to tune and publish IP geolocation feeds in RFC 8805 format. It consists of an AI Skill and an associated MCP server that geocodes geolocation place names to real cities for accuracy. | 1 items | geofeed, ip-geolocation, rfc-8805, rfc-9632, network-operations, isp, cloud, hosting, ixp |
 | [flowstudio-power-automate](../plugins/flowstudio-power-automate/README.md) | Give your AI agent full visibility into Power Automate cloud flows via the FlowStudio MCP server. Connect, debug, build, monitor health, and govern flows at scale — action-level inputs and outputs, not just status codes. | 5 items | power-automate, power-platform, flowstudio, mcp, model-context-protocol, cloud-flows, workflow-automation, monitoring, governance |
 | [frontend-web-dev](../plugins/frontend-web-dev/README.md) | Essential prompts, instructions, and chat modes for modern frontend web development including React, Angular, Vue, TypeScript, and CSS frameworks. | 4 items | frontend, web, react, typescript, javascript, css, html, angular, vue |
-| [gem-team](../plugins/gem-team/README.md) | Self-Learning Multi-agent orchestration harness for spec-driven development and automated verification. | 15 items | multi-agent, orchestration, tdd, testing, e2e, devops, security-audit, code-review, prd, mobile |
+| [gem-team](../plugins/gem-team/README.md) | Self-Learning Multi-agent orchestration harness for spec-driven development and automated verification. | 0 items | multi-agent, orchestration, tdd, testing, e2e, devops, security-audit, code-review, prd, mobile |
 | [go-mcp-development](../plugins/go-mcp-development/README.md) | Complete toolkit for building Model Context Protocol (MCP) servers in Go using the official github.com/modelcontextprotocol/go-sdk. Includes instructions for best practices, a prompt for generating servers, and an expert chat mode for guidance. | 2 items | go, golang, mcp, model-context-protocol, server-development, sdk |
 | [java-development](../plugins/java-development/README.md) | Comprehensive collection of prompts and instructions for Java development including Spring Boot, Quarkus, testing, documentation, and best practices. | 4 items | java, springboot, quarkus, jpa, junit, javadoc |
 | [java-mcp-development](../plugins/java-mcp-development/README.md) | Complete toolkit for building Model Context Protocol servers in Java using the official MCP Java SDK with reactive streams and Spring Boot integration. | 2 items | java, mcp, model-context-protocol, server-development, sdk, reactive-streams, spring-boot, reactor |
diff --git a/plugins/gem-team/.github/plugin/plugin.json b/plugins/gem-team/.github/plugin/plugin.json
index 52c4eab72..9f89547ef 100644
--- a/plugins/gem-team/.github/plugin/plugin.json
+++ b/plugins/gem-team/.github/plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "gem-team",
-  "version": "1.23.0",
+  "version": "1.24.0",
   "description": "Self-Learning Multi-agent orchestration harness for spec-driven development and automated verification.",
   "author": {
     "name": "mubaidr",
@@ -9,7 +9,7 @@
   },
   "license": "Apache-2.0",
   "repository": "https://github.com/mubaidr/gem-team",
-  "homepage": "https://mubaidr.github.io/gem-team/",
+  "homepage": "https://github.com/mubaidr/gem-team",
   "keywords": [
     "multi-agent",
     "orchestration",
@@ -21,22 +21,5 @@
     "code-review",
     "prd",
     "mobile"
-  ],
-  "agents": [
-    "./agents/gem-browser-tester.md",
-    "./agents/gem-code-simplifier.md",
-    "./agents/gem-critic.md",
-    "./agents/gem-debugger.md",
-    "./agents/gem-designer-mobile.md",
-    "./agents/gem-designer.md",
-    "./agents/gem-devops.md",
-    "./agents/gem-documentation-writer.md",
-    "./agents/gem-implementer-mobile.md",
-    "./agents/gem-implementer.md",
-    "./agents/gem-mobile-tester.md",
-    "./agents/gem-orchestrator.md",
-    "./agents/gem-planner.md",
-    "./agents/gem-researcher.md",
-    "./agents/gem-reviewer.md"
   ]
 }