diff --git a/.claude/agents/folder-refactor-advisor.md b/.claude/agents/folder-refactor-advisor.md new file mode 100644 index 0000000..81ea834 --- /dev/null +++ b/.claude/agents/folder-refactor-advisor.md @@ -0,0 +1,43 @@ +--- +name: folder-refactor-advisor +description: Use when the folder-size CI check (`.github/workflows/folder-size.yaml`) errors or warns, or when the user asks for help splitting a folder that has grown too many flat sibling files. Investigates the folder, interviews the user about how the area is expected to evolve, and proposes three concrete subfolder restructurings with pros/cons and a recommendation. Advisory only - does not move files. +tools: Read, Glob, Grep, Bash, AskUserQuestion +model: sonnet +color: yellow +--- + +# Folder Refactor Advisor + +Advisory-only. You diagnose one flat folder and recommend how to split it. +Never move, rename, create, or delete files. + +## Steps + +1. **Inventory** with `Glob`/`Read`/`Grep`: every immediate `.py` + child, its line count, one-line purpose, import graph (internal + external + callers), naming patterns, matching test layout. Skim, don't deep-read. +2. **Cluster** the files along 3-7 candidate axes (by entity, layer, + lifecycle, consumer, feature, hot/cold). Expect 2-3 axes to overlap - the + interview disambiguates. +3. **Interview** via `AskUserQuestion`, in batches of 2-3. Ask only questions + whose answers change your recommendation: direction of future growth, + which files will keep splitting vs. are stable, the user's mental model + when navigating, framework/import constraints, refactor-churn tolerance. +4. **Propose exactly three options.** Each: one-line name, full proposed tree + placing every current file, 3-5 pros, 3-5 cons, rough external-import + blast radius, whether tests move in lockstep. Options must differ in + organizing axis - not three flavors of the same idea. +5. **Recommend one** in a sentence, citing the deciding user answer verbatim. + Acknowledge the strongest counter-argument. +6. **Hand off** the concrete file moves and import/barrel updates, in order. + Do not perform them. + +## Guardrails + +- One folder per invocation. +- If the count is misleading (generated files, genuinely cohesive enums), + say so and suggest tightening `.github/workflows/folder-size.yaml` + exclusions instead of inventing a refactor. +- Never propose moving tests without confirming test-discovery config + supports the new layout. +- Tables and trees beat paragraphs. diff --git a/.codex/agents/folder-refactor-advisor.toml b/.codex/agents/folder-refactor-advisor.toml new file mode 100644 index 0000000..2999bbe --- /dev/null +++ b/.codex/agents/folder-refactor-advisor.toml @@ -0,0 +1,44 @@ +name = "folder-refactor-advisor" +description = "Use when the folder-size CI check (`.github/workflows/folder-size.yaml`) errors or warns, or when the user asks for help splitting a folder that has grown too many flat sibling files. Investigates the folder, interviews the user about how the area is expected to evolve, and proposes three concrete subfolder restructurings with pros/cons and a recommendation. Advisory only - does not move files." +developer_instructions = """ +# Folder Refactor Advisor + +Advisory-only. You diagnose one flat folder and recommend how to split it. +Never move, rename, create, or delete files. + +## Steps + +1. **Inventory** with `Glob`/`Read`/`Grep`: every immediate `.py` + child, its line count, one-line purpose, import graph (internal + external + callers), naming patterns, matching test layout. Skim, don't deep-read. +2. **Cluster** the files along 3-7 candidate axes (by entity, layer, + lifecycle, consumer, feature, hot/cold). Expect 2-3 axes to overlap - the + interview disambiguates. +3. **Interview** via `AskUserQuestion`, in batches of 2-3. Ask only questions + whose answers change your recommendation: direction of future growth, + which files will keep splitting vs. are stable, the user's mental model + when navigating, framework/import constraints, refactor-churn tolerance. +4. **Propose exactly three options.** Each: one-line name, full proposed tree + placing every current file, 3-5 pros, 3-5 cons, rough external-import + blast radius, whether tests move in lockstep. Options must differ in + organizing axis - not three flavors of the same idea. +5. **Recommend one** in a sentence, citing the deciding user answer verbatim. + Acknowledge the strongest counter-argument. +6. **Hand off** the concrete file moves and import/barrel updates, in order. + Do not perform them. + +## Guardrails + +- One folder per invocation. +- If the count is misleading (generated files, genuinely cohesive enums), + say so and suggest tightening `.github/workflows/folder-size.yaml` + exclusions instead of inventing a refactor. +- Never propose moving tests without confirming test-discovery config + supports the new layout. +- Tables and trees beat paragraphs. +""" + +# Claude-only frontmatter (preserved for reference, not used by Codex): +# tools = 'Read, Glob, Grep, Bash, AskUserQuestion' +# model = 'sonnet' +# color = 'yellow' diff --git a/.github/workflows/folder-size.yaml b/.github/workflows/folder-size.yaml new file mode 100644 index 0000000..0e0e697 --- /dev/null +++ b/.github/workflows/folder-size.yaml @@ -0,0 +1,30 @@ +name: Folder Size Check + +on: + workflow_dispatch: + pull_request: + paths: + - '**.py' + +jobs: + check-folder-sizes: + name: Folder File Count Limit + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Check for oversized folders + run: | + if [ -n "${{ github.event.pull_request.base.sha }}" ]; then + mapfile -t files < <(git diff --name-only --diff-filter=d "${{ github.event.pull_request.base.sha }}...HEAD" -- '*.py') + if [ "${#files[@]}" -eq 0 ]; then + echo "No source files changed." + exit 0 + fi + scripts/check_folder_sizes.sh "${files[@]}" + else + scripts/check_folder_sizes.sh --all + fi diff --git a/.github/workflows/large-files.yaml b/.github/workflows/large-files.yaml new file mode 100644 index 0000000..0faa9a8 --- /dev/null +++ b/.github/workflows/large-files.yaml @@ -0,0 +1,30 @@ +name: Large File Check + +on: + workflow_dispatch: + pull_request: + paths: + - '**.py' + +jobs: + check-file-sizes: + name: Source File Line Limit + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Check for large source files + run: | + if [ -n "${{ github.event.pull_request.base.sha }}" ]; then + mapfile -t files < <(git diff --name-only --diff-filter=d "${{ github.event.pull_request.base.sha }}...HEAD" -- '*.py') + if [ "${#files[@]}" -eq 0 ]; then + echo "No source files changed." + exit 0 + fi + scripts/check_large_files.sh "${files[@]}" + else + scripts/check_large_files.sh --all + fi diff --git a/CLAUDE.md b/CLAUDE.md index 11458cc..2ed2e22 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,5 +1,7 @@ # CLAUDE.md +**Before any other work in this repo, enable prek:** `uv tool install prek && prek install`. Hooks are defined in `prek.toml`. + This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. ## Project Overview @@ -133,6 +135,10 @@ Structure as: `init()` → `continue(id)` → `cleanup(id)` - Use descriptive IDs (runId, taskId) - Handle rate limits, timeouts, retries at system boundaries +## Subagents + +- Folder-size CI failure → spawn subagent `.claude/agents/folder-refactor-advisor.md`. + ## Git Workflow - **Protected Branch**: `main` is protected. Do not push directly to `main`. Use PRs. - **Merge Strategy**: Squash and merge. diff --git a/prek.toml b/prek.toml index 3bc1f30..31e6266 100644 --- a/prek.toml +++ b/prek.toml @@ -17,3 +17,21 @@ hooks = [ { id = "ai-writing-check", name = "AI writing check", entry = "uv run python scripts/check_ai_writing.py", language = "system", pass_filenames = false, always_run = true }, { id = "sync-agent-config", name = "sync Claude ↔ Codex skills & subagents; fail if drift was fixed", entry = "uv run scripts/sync_agent_config.py --check", language = "system", pass_filenames = false, always_run = true }, ] + +# ── Source-size guardrails (mirror GitHub Actions) ──────────────── +[[repos]] +repo = "local" + +[[repos.hooks]] +id = "check-large-files" +name = "fail if any source file exceeds the line-count error threshold" +language = "system" +entry = "scripts/check_large_files.sh" +files = "\\.(py)$" + +[[repos.hooks]] +id = "check-folder-sizes" +name = "fail if any source folder exceeds the file-count error threshold" +language = "system" +entry = "scripts/check_folder_sizes.sh" +files = "\\.(py)$" diff --git a/scripts/check_folder_sizes.sh b/scripts/check_folder_sizes.sh new file mode 100755 index 0000000..8046c67 --- /dev/null +++ b/scripts/check_folder_sizes.sh @@ -0,0 +1,134 @@ +#!/usr/bin/env bash +# +# Enforce a file-count limit on source folders (non-recursive). Shared by +# .github/workflows/folder-size.yaml and prek.toml. +# +# Usage: +# check_folder_sizes.sh [file ...] # check folders containing the given files +# check_folder_sizes.sh --all # scan every folder in the tree +# +# Thresholds: warn at WARN files-per-folder, error at ERROR. Override via +# FOLDER_WARN_THRESHOLD / FOLDER_ERROR_THRESHOLD env vars (namespaced so +# they don't collide with check_large_files.sh). +# Exit 1 on non-grandfathered errors, 0 on warnings-only or clean. +# +# If $GITHUB_STEP_SUMMARY is set, a markdown summary is appended to it. + +set -euo pipefail + +WARN_THRESHOLD="${FOLDER_WARN_THRESHOLD:-20}" +ERROR_THRESHOLD="${FOLDER_ERROR_THRESHOLD:-35}" + +GRANDFATHERED=() + +EXCLUDE_PATH_RE='(^|/)(node_modules|__pycache__|\.venv|venv|visual-tests|e2e|tests|test|__tests__|\.git|dist|build)(/|$)' +ALEMBIC_RE='(^|/)alembic[^/]*/versions(/|$)' + +is_grandfathered() { + local target="$1" + for g in "${GRANDFATHERED[@]}"; do + [ "$target" = "$g" ] && return 0 + done + return 1 +} + +should_skip() { + local f="$1" + [ -z "$f" ] && return 0 + [ "$f" = "." ] && return 0 + echo "$f" | grep -qE "$EXCLUDE_PATH_RE" && return 0 + echo "$f" | grep -qE "$ALEMBIC_RE" && return 0 + return 1 +} + +count_folder() { + find "$1" -mindepth 1 -maxdepth 1 -type f \ + -name '*.py' \ + -not -name 'test_*.py' \ + -not -name 'conftest.py' \ + -not -name 'vulture_whitelist.py' \ + | wc -l +} + +collect_all_folders() { + find . -type d \ + -not -path './.git/*' \ + -not -path '*/__pycache__/*' \ + -not -path '*/.venv/*' \ + -not -path '*/venv/*' \ + | sed 's|^\./||' +} + +folder_list=$(mktemp) +trap 'rm -f "$folder_list"' EXIT + +if [ "${1:-}" = "--all" ]; then + collect_all_folders > "$folder_list" +else + for f in "$@"; do + [ -z "$f" ] && continue + dirname "$f" + done | sort -u > "$folder_list" +fi + +warnings=0 +errors=0 +warn_list="" +error_list="" + +while IFS= read -r folder; do + folder="${folder#./}" + should_skip "$folder" && continue + [ ! -d "$folder" ] && continue + + count=$(count_folder "$folder") + + if [ "$count" -gt "$ERROR_THRESHOLD" ]; then + if is_grandfathered "$folder"; then + warnings=$((warnings + 1)) + warn_list="${warn_list}| \`${folder}/\` | ${count} | :warning: exceeds ${ERROR_THRESHOLD} (grandfathered) |\n" + else + errors=$((errors + 1)) + error_list="${error_list}| \`${folder}/\` | ${count} | :x: exceeds ${ERROR_THRESHOLD} |\n" + fi + elif [ "$count" -gt "$WARN_THRESHOLD" ]; then + warnings=$((warnings + 1)) + warn_list="${warn_list}| \`${folder}/\` | ${count} | :warning: exceeds ${WARN_THRESHOLD} |\n" + fi +done < "$folder_list" + +if [ -n "${GITHUB_STEP_SUMMARY:-}" ] && { [ "$errors" -gt 0 ] || [ "$warnings" -gt 0 ]; }; then + { + echo "## Folder Size Report" + echo "" + echo "| Folder | Files | Status |" + echo "|--------|-------|--------|" + [ "$errors" -gt 0 ] && printf '%b' "$error_list" + [ "$warnings" -gt 0 ] && printf '%b' "$warn_list" + echo "" + echo "**Thresholds:** warn at ${WARN_THRESHOLD} files, error at ${ERROR_THRESHOLD} files. Counts immediate \`.py\` children only - subfolders are the fix, not the problem." + } >> "$GITHUB_STEP_SUMMARY" +fi + +format_list() { + if command -v column >/dev/null 2>&1; then + printf '%b' "$1" | column -t -s '|' + else + printf '%b' "$1" + fi +} + +if [ "$errors" -gt 0 ]; then + echo "::error::${errors} folder(s) exceed the ${ERROR_THRESHOLD}-file error threshold" >&2 + format_list "$error_list" >&2 +fi +if [ "$warnings" -gt 0 ]; then + echo "::warning::${warnings} folder(s) exceed the ${WARN_THRESHOLD}-file warning threshold" >&2 + format_list "$warn_list" >&2 +fi +if [ "$errors" -eq 0 ] && [ "$warnings" -eq 0 ]; then + echo "All folders are within the ${WARN_THRESHOLD}-file limit." +fi + +[ "$errors" -gt 0 ] && exit 1 +exit 0 diff --git a/scripts/check_large_files.sh b/scripts/check_large_files.sh new file mode 100755 index 0000000..8275d9d --- /dev/null +++ b/scripts/check_large_files.sh @@ -0,0 +1,113 @@ +#!/usr/bin/env bash +# +# Enforce a line-count limit on source files. Shared by +# .github/workflows/large-files.yaml and prek.toml. +# +# Usage: +# check_large_files.sh [file ...] # check the given files +# check_large_files.sh --all # scan the whole tree +# +# Thresholds: warn at WARN lines, error at ERROR lines. Override via +# LARGE_FILE_WARN_THRESHOLD / LARGE_FILE_ERROR_THRESHOLD env vars +# (namespaced so they don't collide with check_folder_sizes.sh). +# Exit 1 on errors, 0 on warnings-only or clean. +# +# If $GITHUB_STEP_SUMMARY is set, a markdown summary is appended to it. + +set -euo pipefail + +WARN_THRESHOLD="${LARGE_FILE_WARN_THRESHOLD:-500}" +ERROR_THRESHOLD="${LARGE_FILE_ERROR_THRESHOLD:-800}" + +EXCLUDE_PATH_RE='(^|/)(node_modules|__pycache__|\.venv|venv|visual-tests|e2e|tests|test|__tests__|\.git)(/|$)' +ALEMBIC_RE='(^|/)alembic[^/]*/versions(/|$)' +EXCLUDE_NAME_RE='(^test_[^/]+\.py|^conftest\.py|^vulture_whitelist\.py)$' + +is_source_file() { + case "$1" in + *.py) return 0 ;; + *) return 1 ;; + esac +} + +is_excluded() { + local f="$1" base + echo "$f" | grep -qE "$EXCLUDE_PATH_RE" && return 0 + echo "$f" | grep -qE "$ALEMBIC_RE" && return 0 + base=$(basename "$f") + echo "$base" | grep -qE "$EXCLUDE_NAME_RE" && return 0 + return 1 +} + +collect_all() { + find . -type f -name '*.py' \ + -not -path './.git/*' \ + -not -path '*/__pycache__/*' \ + -not -path '*/.venv/*' \ + -not -path '*/venv/*' \ + | sed 's|^\./||' +} + +files=() +if [ "${1:-}" = "--all" ]; then + mapfile -t files < <(collect_all) +else + files=("$@") +fi + +warnings=0 +errors=0 +warn_list="" +error_list="" + +for file in "${files[@]}"; do + [ -z "$file" ] && continue + [ ! -f "$file" ] && continue + is_source_file "$file" || continue + is_excluded "$file" && continue + + lines=$(wc -l < "$file") + if [ "$lines" -gt "$ERROR_THRESHOLD" ]; then + errors=$((errors + 1)) + error_list="${error_list}| \`${file}\` | ${lines} | :x: exceeds ${ERROR_THRESHOLD} |\n" + elif [ "$lines" -gt "$WARN_THRESHOLD" ]; then + warnings=$((warnings + 1)) + warn_list="${warn_list}| \`${file}\` | ${lines} | :warning: exceeds ${WARN_THRESHOLD} |\n" + fi +done + +if [ -n "${GITHUB_STEP_SUMMARY:-}" ] && { [ "$errors" -gt 0 ] || [ "$warnings" -gt 0 ]; }; then + { + echo "## Large File Report" + echo "" + echo "| File | Lines | Status |" + echo "|------|-------|--------|" + [ "$errors" -gt 0 ] && printf '%b' "$error_list" + [ "$warnings" -gt 0 ] && printf '%b' "$warn_list" + echo "" + echo "**Thresholds:** warn at ${WARN_THRESHOLD} lines, error at ${ERROR_THRESHOLD} lines" + } >> "$GITHUB_STEP_SUMMARY" +fi + +format_list() { + if command -v column >/dev/null 2>&1; then + printf '%b' "$1" | column -t -s '|' + else + printf '%b' "$1" + fi +} + +if [ "$errors" -gt 0 ]; then + echo "::error::${errors} file(s) exceed the ${ERROR_THRESHOLD}-line error threshold" >&2 + format_list "$error_list" >&2 +fi +if [ "$warnings" -gt 0 ]; then + echo "::warning::${warnings} file(s) exceed the ${WARN_THRESHOLD}-line warning threshold" >&2 + format_list "$warn_list" >&2 +fi +if [ "$errors" -eq 0 ] && [ "$warnings" -eq 0 ]; then + echo "All source files are within the ${WARN_THRESHOLD}-line limit." +fi + +[ "$errors" -gt 0 ] && exit 1 +exit 0