diff --git a/.claude/agents/refactoring-specialist.md b/.claude/agents/refactoring-specialist.md index aa206f3..f131b0d 100644 --- a/.claude/agents/refactoring-specialist.md +++ b/.claude/agents/refactoring-specialist.md @@ -21,6 +21,15 @@ You are a Refactoring Specialist for Python projects. You perform read-only anal | **Interface Segregation** | Large interfaces forcing implementations of unused methods, "god" base classes | | **Dependency Inversion** | High-level modules importing low-level modules directly, no abstraction boundaries | +### Python-Specific SOLID Checks + +- **Mutable default arguments** (`def f(x=[])`) -- shared state across calls, use `None` + assignment +- **ABC/Protocol misuse** -- prefer `typing.Protocol` for structural subtyping over `abc.ABC` when callers only need a subset of methods (Interface Segregation) +- **Missing dependency injection** -- classes that instantiate their own dependencies internally instead of accepting them via `__init__` (Dependency Inversion) +- **God classes** -- classes with 10+ public methods or mixed concerns (data access + business logic + formatting) +- **`@property` overuse** -- properties hiding expensive computation or side effects; prefer explicit methods when the operation is not trivially cheap +- **Circular imports** -- modules importing each other signals entangled responsibilities (Single Responsibility) + ## Code Smells to Detect ### Size Smells diff --git a/.claude/commands/cove-isolated.md b/.claude/commands/cove-isolated.md new file mode 100644 index 0000000..9502c55 --- /dev/null +++ b/.claude/commands/cove-isolated.md @@ -0,0 +1,68 @@ +--- +allowed-tools: Read, Glob, Grep, Bash, Agent +description: Isolated Chain-of-Verification (CoVe) -- verification step runs in a separate agent to prevent confirmation bias. +--- + + + +# Isolated Chain-of-Verification (CoVe) + +Apply the 4-step CoVe process with **isolated verification** -- Step 3 runs in a separate agent that cannot see the baseline response, preventing confirmation bias. + +## Step 1: Generate Baseline Response + +Answer the user's question fully, as you normally would. Write out your complete response under a heading: + +```markdown +## Baseline Response +[your full answer here] +``` + +## Step 2: Plan Verification Questions + +Review your baseline response and generate a numbered list of fact-check questions. Focus on claims that could be wrong -- file paths, function signatures, API behavior, version numbers, configuration syntax, behavioral assertions. + +```markdown +## Verification Questions +1. [Is the file path X correct?] +2. [Does function Y actually accept parameter Z?] +3. [Is it true that library A supports feature B?] +... +``` + +Generate 3-8 questions depending on response complexity. + +## Step 3: Isolated Verification (Agent) + +Launch a general-purpose Agent to answer the verification questions **independently**. The agent must NOT see your baseline response -- only the verification questions. This prevents confirmation bias. + +Provide the agent with: +- The numbered list of verification questions from Step 2 +- Instructions to use Read, Grep, Glob, and Bash to find evidence +- Instructions to answer each question with CONFIRMED or INCORRECT plus evidence + +Example agent prompt: +``` +Answer each of these fact-check questions by investigating the codebase. For each question, respond with CONFIRMED or INCORRECT and cite your evidence. + +Questions: +1. [question 1] +2. [question 2] +... +``` + +## Step 4: Generate Final Verified Response + +Review the agent's verification results and revise your baseline response, incorporating all corrections. If no errors were found, state that the baseline was verified and present it as final. + +```markdown +## Verified Response +[corrected answer, incorporating all verification results] +``` + +If any corrections were made, add a brief summary: + +```markdown +## Corrections Made +- [what changed and why] +``` diff --git a/.claude/commands/cove.md b/.claude/commands/cove.md new file mode 100644 index 0000000..41c16be --- /dev/null +++ b/.claude/commands/cove.md @@ -0,0 +1,60 @@ +--- +allowed-tools: Read, Glob, Grep, Bash +description: Chain-of-Verification (CoVe) prompting for high-stakes accuracy. Generates a response, self-verifies with fact-check questions, then revises. +--- + + + +# Chain-of-Verification (CoVe) + +Apply the 4-step CoVe process to reduce hallucinations and factual errors in your response to the user's question. + +## Step 1: Generate Baseline Response + +Answer the user's question fully, as you normally would. Write out your complete response under a heading: + +```markdown +## Baseline Response +[your full answer here] +``` + +## Step 2: Plan Verification Questions + +Review your baseline response and generate a numbered list of fact-check questions. Focus on claims that could be wrong -- file paths, function signatures, API behavior, version numbers, configuration syntax, behavioral assertions. + +```markdown +## Verification Questions +1. [Is the file path X correct?] +2. [Does function Y actually accept parameter Z?] +3. [Is it true that library A supports feature B?] +... +``` + +Generate 3-8 questions depending on response complexity. + +## Step 3: Answer Verifications + +Answer each verification question independently. Use tools (Read, Grep, Glob, Bash) to check facts against the actual codebase, documentation, or runtime behavior. Do not rely on your baseline response -- verify from source. + +```markdown +## Verification Results +1. [CONFIRMED / INCORRECT] -- [evidence] +2. [CONFIRMED / INCORRECT] -- [evidence] +... +``` + +## Step 4: Generate Final Verified Response + +Revise your baseline response, incorporating all corrections from Step 3. If no errors were found, state that the baseline was verified and present it as final. + +```markdown +## Verified Response +[corrected answer, incorporating all verification results] +``` + +If any corrections were made, add a brief summary: + +```markdown +## Corrections Made +- [what changed and why] +``` diff --git a/.github/workflows/template-sync.yml b/.github/workflows/template-sync.yml new file mode 100644 index 0000000..7c58e71 --- /dev/null +++ b/.github/workflows/template-sync.yml @@ -0,0 +1,169 @@ +# Inspired by serpro69/claude-starter-kit template-sync approach +name: Template Sync + +on: + workflow_dispatch: + inputs: + dry_run: + description: "Show changes without creating a PR" + type: boolean + default: false + template_repo: + description: "Upstream template repository (owner/repo)" + type: string + default: "stranma/claude-code-python-template" + template_branch: + description: "Upstream template branch" + type: string + default: "master" + schedule: + - cron: "0 9 * * 1" # Weekly on Monday at 09:00 UTC + +permissions: + contents: write + pull-requests: write + +jobs: + sync: + name: Sync from upstream template + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Configure git + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + - name: Determine template repo + id: config + run: | + REPO="${{ inputs.template_repo || 'stranma/claude-code-python-template' }}" + BRANCH="${{ inputs.template_branch || 'master' }}" + echo "repo=${REPO}" >> "$GITHUB_OUTPUT" + echo "branch=${BRANCH}" >> "$GITHUB_OUTPUT" + echo "Syncing from ${REPO}@${BRANCH}" + + - name: Add upstream remote and fetch + run: | + git remote add upstream "https://github.com/${{ steps.config.outputs.repo }}.git" || true + git fetch upstream "${{ steps.config.outputs.branch }}" + + - name: Compute template diff + id: diff + env: + UPSTREAM_BRANCH: ${{ steps.config.outputs.branch }} + run: | + # Paths managed by the template (synced from upstream) + # Defined once here; reused in the apply step via GITHUB_OUTPUT + TEMPLATE_PATHS=".claude/agents/ .claude/commands/ .claude/hooks/ .claude/rules/ .claude/skills/ .devcontainer/ .github/workflows/ docs/DEVELOPMENT_PROCESS.md" + echo "template_paths=${TEMPLATE_PATHS}" >> "$GITHUB_OUTPUT" + + # Get changed files between local and upstream + CHANGED=$(git diff --name-only HEAD "upstream/${UPSTREAM_BRANCH}" -- ${TEMPLATE_PATHS} 2>/dev/null || true) + + if [ -z "$CHANGED" ]; then + echo "No template changes found" + echo "has_changes=false" >> "$GITHUB_OUTPUT" + else + echo "Template changes detected:" + echo "$CHANGED" + echo "has_changes=true" >> "$GITHUB_OUTPUT" + # Store diff summary for PR body + DIFF_STAT=$(git diff --stat HEAD "upstream/${UPSTREAM_BRANCH}" -- ${TEMPLATE_PATHS} 2>/dev/null || true) + { + echo "diff_stat<> "$GITHUB_OUTPUT" + fi + + - name: Show diff (dry run) + if: steps.diff.outputs.has_changes == 'true' && (inputs.dry_run == true || inputs.dry_run == 'true') + run: | + echo "=== DRY RUN: Changes that would be synced ===" + echo "${{ steps.diff.outputs.diff_stat }}" + + - name: Apply template changes + id: apply + if: steps.diff.outputs.has_changes == 'true' && inputs.dry_run != true && inputs.dry_run != 'true' + env: + UPSTREAM_BRANCH: ${{ steps.config.outputs.branch }} + TEMPLATE_PATHS: ${{ steps.diff.outputs.template_paths }} + run: | + SYNC_BRANCH="template-sync/$(date +%Y%m%d)" + + # Check if branch already exists + if git rev-parse --verify "refs/heads/${SYNC_BRANCH}" > /dev/null 2>&1; then + echo "Sync branch ${SYNC_BRANCH} already exists, updating" + git checkout "${SYNC_BRANCH}" + else + git checkout -b "${SYNC_BRANCH}" + fi + + # Checkout template-managed files from upstream + for path in ${TEMPLATE_PATHS}; do + git checkout "upstream/${UPSTREAM_BRANCH}" -- "${path}" 2>/dev/null || true + done + + # Stage and commit + git add -A + if git diff --cached --quiet; then + echo "No changes to commit after checkout" + echo "changes_applied=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + + echo "changes_applied=true" >> "$GITHUB_OUTPUT" + + git commit -m "chore: sync template from upstream + + Source: ${{ steps.config.outputs.repo }}@${{ steps.config.outputs.branch }}" + + git push -u origin "${SYNC_BRANCH}" + + echo "sync_branch=${SYNC_BRANCH}" >> "$GITHUB_ENV" + + - name: Create pull request + if: steps.apply.outputs.changes_applied == 'true' && inputs.dry_run != true && inputs.dry_run != 'true' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + # Check for existing PR from this branch + EXISTING_PR=$(gh pr list --head "${{ env.sync_branch }}" --json number --jq '.[0].number' 2>/dev/null || true) + + if [ -n "$EXISTING_PR" ]; then + echo "PR #${EXISTING_PR} already exists for this sync branch" + exit 0 + fi + + gh pr create \ + --title "chore: sync upstream template changes" \ + --body "$(cat <<'EOF' + ## Template Sync + + Automated sync of template-managed files from upstream. + + **Source:** ${{ steps.config.outputs.repo }}@${{ steps.config.outputs.branch }} + + ### Changed files + ``` + ${{ steps.diff.outputs.diff_stat }} + ``` + + ### What to review + - Check if any synced files conflict with project-specific customizations + - Template-managed paths: `.claude/`, `.devcontainer/`, `.github/workflows/`, `docs/DEVELOPMENT_PROCESS.md` + - Project-specific files (`apps/`, `libs/`, `tests/`, `pyproject.toml`, `README.md`) are NOT touched + + ### How to resolve conflicts + If a synced file conflicts with local changes, edit the file on this branch to keep your customizations, then merge. + EOF + )" + + - name: Summary + if: steps.diff.outputs.has_changes == 'false' + run: echo "Already up to date with upstream template. No sync needed." diff --git a/README.md b/README.md index 76d66e1..a022e44 100644 --- a/README.md +++ b/README.md @@ -196,6 +196,8 @@ actual scope at completion based on workspace signals. - `/design` -- crystallize brainstorming into a structured plan - `/done` -- validate, ship, and document in one command - `/catchup` -- restore context after session break or `/clear` +- `/cove` -- Chain-of-Verification for high-stakes accuracy (4-step self-verification) +- `/cove-isolated` -- CoVe with isolated verification agent (prevents confirmation bias) - `/security-audit` -- 6-phase security posture scan with A-F grading - `/edit-permissions` -- manage Claude Code permission rules @@ -233,7 +235,7 @@ my-project/ │ ├── settings.json │ ├── agents/ # 12 agents │ ├── skills/ # /sync, /design, /done, /edit-permissions -│ ├── commands/ # /catchup, /security-audit +│ ├── commands/ # /catchup, /cove, /cove-isolated, /security-audit │ ├── hooks/ # 5 hook scripts │ └── rules/ # 4 review rules ├── .devcontainer/ # VS Code devcontainer @@ -306,6 +308,8 @@ Monorepo structure inspired by [carderne/postmodern-mono](https://github.com/car - Claude Code methodology layer (CLAUDE.md, agents, skills, hooks) - Setup script for template initialization +Chain-of-Verification commands and template sync workflow inspired by [serpro69/claude-starter-kit](https://github.com/serpro69/claude-starter-kit), a language-agnostic Claude Code starter template with MCP server integrations. Python SOLID checklist items in the refactoring-specialist agent also draw from their structured code review approach. + ## License MIT diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index fe25f96..778c291 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -8,6 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- Chain-of-Verification (CoVe) commands (`/cove`, `/cove-isolated`) for high-stakes accuracy -- 4-step self-verification process based on Meta's CoVe paper, with an isolated variant that runs verification in a separate agent to prevent confirmation bias +- Template sync workflow (`.github/workflows/template-sync.yml`) for downstream projects to auto-sync upstream template improvements -- runs weekly or on manual trigger, creates PRs with changed template-managed files while preserving project-specific code +- Python-specific SOLID checklist in `refactoring-specialist` agent -- checks for mutable default arguments, ABC/Protocol misuse, missing dependency injection, god classes, `@property` overuse, and circular imports - Template integration CI pipeline (`template-integration.yml`) tests `setup_project.py` across 5 configurations (mono-default, mono-renamed, mono-extra-pkgs, single-package, mono-postgres) -- verifies each produces a valid project that installs, lints, type-checks, and passes tests - Reusable `scripts/test_template_integration.sh` for local template validation with the same 9-step verification as CI - Workflow skill `/sync` checks workspace readiness before starting work (git fetch, status, branch info, warnings) @@ -15,7 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Workflow skill `/done` auto-detects scope (Q/S/P) and runs the full validate-ship-document pipeline, including the former `/ship` checklist - Three graduated permission tiers (Assisted, Autonomous, Full Trust) for devcontainer environments -- container isolation (firewall, non-root, hooks) enables safely expanding Claude Code permissions, reducing unnecessary prompts from dozens per session to zero in Tier 2/3 while blocking tool installation, package publishing, and container escape vectors via curated deny lists and a policy-enforcement hook - 5 hook scripts in `.claude/hooks/` run automatically during Claude Code sessions -- 3 security hooks block destructive commands, secret leaks, and invisible Unicode attacks in real time; 2 productivity hooks auto-format Python files and auto-run associated tests after every edit -- 2 slash commands (`/catchup`, `/security-audit`) provide one-command context restoration after `/clear` and a 6-phase security posture scan with A-F grading +- 4 slash commands (`/catchup`, `/cove`, `/cove-isolated`, `/security-audit`) provide context restoration, chain-of-verification for accuracy, and a 6-phase security posture scan with A-F grading - 3 new specialized agents: `security-auditor` (OWASP-based vulnerability analysis, read-only), `refactoring-specialist` (SOLID/code smell detection, read-only), `output-evaluator` (LLM-as-Judge quality scoring for automated pipelines) - 4 review rules in `.claude/rules/` auto-loaded as project context -- cover architecture, code quality, performance, and test quality concerns that linters cannot catch - AI-powered PR review via GitHub Actions (`claude-code-review.yml`) using `anthropics/claude-code-action@v1` -- automatically reviews PRs with read-only tools on open/sync/ready_for_review diff --git a/docs/DEVELOPMENT_PROCESS.md b/docs/DEVELOPMENT_PROCESS.md index 39ddd7e..f6ff858 100644 --- a/docs/DEVELOPMENT_PROCESS.md +++ b/docs/DEVELOPMENT_PROCESS.md @@ -161,11 +161,13 @@ All hooks require `jq` for JSON parsing and degrade gracefully if jq is missing. ## Commands -2 slash commands in `.claude/commands/`: +4 slash commands in `.claude/commands/`: | Command | Purpose | |---------|---------| | `/catchup` | Context restoration after `/clear`. Reads IMPLEMENTATION_PLAN.md, CHANGELOG.md, git history; recommends next steps. | +| `/cove` | Chain-of-Verification (CoVe) for high-stakes accuracy. 4-step process: generate baseline, plan verifications, verify from source, produce corrected response. | +| `/cove-isolated` | Isolated CoVe variant. Verification step runs in a separate agent that cannot see the baseline response, preventing confirmation bias. | | `/security-audit` | 6-phase Python security scan (deps, secrets, code patterns, input validation, config, scoring). Outputs A-F grade. | --- diff --git a/tests/test_commands.py b/tests/test_commands.py index de00ff1..f11f430 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -8,6 +8,8 @@ ALL_COMMANDS = [ "catchup.md", + "cove.md", + "cove-isolated.md", "security-audit.md", ] @@ -92,6 +94,25 @@ def test_security_audit_checks_secrets(self) -> None: assert "secret" in content.lower(), "security-audit should scan for secrets" def test_security_audit_checks_code_patterns(self) -> None: + # Checks that security-audit references unsafe code patterns (static strings, not code usage) content = (COMMANDS_DIR / "security-audit.md").read_text(encoding="utf-8") for pattern in ["eval", "exec", "pickle", "subprocess"]: assert pattern in content, f"security-audit missing code pattern: {pattern}" + + def test_cove_has_four_steps(self) -> None: + content = (COMMANDS_DIR / "cove.md").read_text(encoding="utf-8") + for step in ["Step 1", "Step 2", "Step 3", "Step 4"]: + assert step in content, f"cove missing {step}" + + def test_cove_has_verification_questions(self) -> None: + content = (COMMANDS_DIR / "cove.md").read_text(encoding="utf-8") + assert "Verification" in content, "cove should mention verification" + + def test_cove_isolated_uses_agent(self) -> None: + content = (COMMANDS_DIR / "cove-isolated.md").read_text(encoding="utf-8") + assert "Agent" in content, "cove-isolated should use Agent tool for isolation" + + def test_cove_isolated_has_four_steps(self) -> None: + content = (COMMANDS_DIR / "cove-isolated.md").read_text(encoding="utf-8") + for step in ["Step 1", "Step 2", "Step 3", "Step 4"]: + assert step in content, f"cove-isolated missing {step}" diff --git a/tests/test_hooks.py b/tests/test_hooks.py index 544c486..99d09b9 100644 --- a/tests/test_hooks.py +++ b/tests/test_hooks.py @@ -1,6 +1,7 @@ """Tests for .claude/hooks/ -- validates hook scripts exist, are executable, and have correct structure.""" import stat +import subprocess from pathlib import Path import pytest @@ -50,8 +51,22 @@ class TestHookPermissions: @pytest.mark.parametrize("hook_name", ALL_HOOKS) def test_hook_is_executable(self, hook_name: str) -> None: hook_path = HOOKS_DIR / hook_name - mode = hook_path.stat().st_mode - assert mode & stat.S_IXUSR, f"{hook_name} is not executable (missing user execute bit)" + repo_root = Path(__file__).parent.parent + # Try git's tracked mode first (works on Windows where NTFS has no execute bit) + result = subprocess.run( + ["git", "ls-files", "-s", str(hook_path.relative_to(repo_root))], + capture_output=True, + text=True, + cwd=repo_root, + ) + if result.stdout: + assert result.stdout.startswith("100755"), ( + f"{hook_name} is not tracked as executable by git (expected mode 100755)" + ) + else: + # Not in a git repo (e.g. integration test copy) -- fall back to filesystem + mode = hook_path.stat().st_mode + assert mode & stat.S_IXUSR, f"{hook_name} is not executable (missing user execute bit)" @pytest.mark.parametrize("hook_name", ALL_HOOKS) def test_hook_is_readable(self, hook_name: str) -> None: diff --git a/tests/test_skills.py b/tests/test_skills.py index cebb31e..3dad401 100644 --- a/tests/test_skills.py +++ b/tests/test_skills.py @@ -114,10 +114,6 @@ def test_sync_shows_recent_commits(self) -> None: content = (SKILLS_DIR / "sync" / "SKILL.md").read_text(encoding="utf-8") assert "git log" in content, "sync should show recent commits" - def test_sync_does_not_classify(self) -> None: - content = (SKILLS_DIR / "sync" / "SKILL.md").read_text(encoding="utf-8") - assert "does not classify" in content.lower(), "sync should explicitly state it does not classify tasks" - # /design def test_design_reads_decisions(self) -> None: content = (SKILLS_DIR / "design" / "SKILL.md").read_text(encoding="utf-8")