diff --git a/agent/src/config.py b/agent/src/config.py
index 04fa162d..78e683ea 100644
--- a/agent/src/config.py
+++ b/agent/src/config.py
@@ -337,6 +337,8 @@ def build_config(
initial_approval_gate_count: int = 0,
approval_gate_cap: int | None = None,
attachments: list[dict] | None = None,
+ self_review_enabled: bool = False,
+ self_review_max_turns: int = 5,
) -> TaskConfig:
"""Build and validate configuration from explicit parameters.
@@ -407,6 +409,8 @@ def build_config(
initial_approval_gate_count=initial_approval_gate_count,
approval_gate_cap=approval_gate_cap,
attachments=validated_attachments,
+ self_review_enabled=self_review_enabled,
+ self_review_max_turns=self_review_max_turns,
)
@@ -431,6 +435,9 @@ def get_config() -> TaskConfig:
# an unreachable ``traces//`` key.
trace=os.environ.get("TRACE", "").lower() in ("1", "true", "yes"),
user_id=os.environ.get("USER_ID", ""),
+ self_review_enabled=os.environ.get("SELF_REVIEW_ENABLED", "").lower()
+ in ("1", "true", "yes"),
+ self_review_max_turns=int(os.environ.get("SELF_REVIEW_MAX_TURNS", "5")),
)
except ValueError as e:
print(f"ERROR: {e}", file=sys.stderr)
diff --git a/agent/src/models.py b/agent/src/models.py
index 007b94bc..2d0ddabd 100644
--- a/agent/src/models.py
+++ b/agent/src/models.py
@@ -186,6 +186,9 @@ class TaskConfig(BaseModel):
# Attachments from the orchestrator payload (Phase 3). Validated as
# AttachmentConfig models. Empty list for tasks without attachments.
attachments: list[AttachmentConfig] = Field(default_factory=list)
+ # Self-review: optional LLM diff critique before PR creation.
+ self_review_enabled: bool = False
+ self_review_max_turns: int = 5 # Cap on turns allocated to self-review
@model_validator(mode="after")
def _validate_trace_requires_user_id(self) -> Self:
diff --git a/agent/src/pipeline.py b/agent/src/pipeline.py
index e476a11c..783d12c7 100644
--- a/agent/src/pipeline.py
+++ b/agent/src/pipeline.py
@@ -23,12 +23,14 @@
_extract_agent_notes,
ensure_committed,
ensure_pr,
+ post_self_review_comment,
verify_build,
verify_lint,
)
from progress_writer import _ProgressWriter
from prompt_builder import build_system_prompt, discover_project_config
from runner import run_agent
+from self_review import run_self_review
from shell import log, log_error_cw
from system_prompt import SYSTEM_PROMPT
from telemetry import (
@@ -279,6 +281,8 @@ def run_task(
trace: bool = False,
user_id: str = "",
attachments: list[dict] | None = None,
+ self_review_enabled: bool = False,
+ self_review_max_turns: int = 5,
) -> dict:
"""Run the full agent pipeline and return a serialized result dict.
@@ -318,6 +322,8 @@ def run_task(
initial_approval_gate_count=initial_approval_gate_count,
approval_gate_cap=approval_gate_cap,
attachments=attachments,
+ self_review_enabled=self_review_enabled,
+ self_review_max_turns=self_review_max_turns,
)
# Inject Cedar policies into config for the PolicyEngine in runner.py
@@ -623,6 +629,22 @@ def _on_trace_truncated(max_bytes: int, first_dropped: int) -> None:
"turns_attempted": agent_result.num_turns or agent_result.turns,
}
+ # Self-review phase: LLM critiques its own diff before PR creation.
+ # Runs between cancel-check and post-hooks. Fail-open: errors here
+ # never block PR creation.
+ with task_span("task.self_review"):
+ review_result = run_self_review(
+ config, setup, agent_result, trajectory, progress
+ )
+ if review_result is not None:
+ # Accumulate turns and cost from the review phase
+ agent_result.turns += review_result.turns
+ agent_result.num_turns += review_result.num_turns or review_result.turns
+ if review_result.cost_usd is not None:
+ agent_result.cost_usd = (
+ (agent_result.cost_usd or 0.0) + review_result.cost_usd
+ )
+
# Post-hooks (agent_result is guaranteed set by the try/except above)
with task_span("task.post_hooks") as post_span:
# Safety net: commit any uncommitted tracked changes (skip for read-only tasks)
@@ -643,6 +665,10 @@ def _on_trace_truncated(max_bytes: int, first_dropped: int) -> None:
if pr_url:
progress.write_agent_milestone("pr_created", pr_url)
+ # Post self-review summary as PR comment (if self-review ran and produced findings)
+ if pr_url and review_result is not None:
+ post_self_review_comment(setup.repo_dir, pr_url, config)
+
# Memory write — capture task episode and repo learnings
memory_written = False
effective_memory_id = memory_id or os.environ.get("MEMORY_ID", "")
diff --git a/agent/src/post_hooks.py b/agent/src/post_hooks.py
index e66ae9bb..5cbcce6f 100644
--- a/agent/src/post_hooks.py
+++ b/agent/src/post_hooks.py
@@ -327,6 +327,68 @@ def ensure_pr(
return None
+def post_self_review_comment(repo_dir: str, pr_url: str, config: TaskConfig) -> bool:
+ """Post the self-review summary as a PR comment.
+
+ Reads the summary file written by the self-review agent, formats it as a
+ comment, and posts it via `gh pr comment`. Fail-open: exceptions are logged
+ but never propagated.
+
+ Returns True if a comment was posted, False otherwise.
+ """
+ from self_review import read_self_review_summary
+
+ try:
+ summary = read_self_review_summary(repo_dir)
+ except Exception as e:
+ log("WARN", f"post_self_review_comment: failed to read summary: {type(e).__name__}: {e}")
+ return False
+
+ if not summary:
+ log("POST", "post_self_review_comment: no summary file found — skipping")
+ return False
+
+ # Extract PR number from URL (e.g. https://github.com/owner/repo/pull/123)
+ match = re.search(r"/pull/(\d+)", pr_url)
+ if not match:
+ log("WARN", f"post_self_review_comment: could not extract PR number from {pr_url}")
+ return False
+ pr_number = match.group(1)
+
+ comment_body = f"## \U0001f50d Self-Review Summary\n\n{summary}"
+
+ try:
+ result = subprocess.run(
+ [
+ "gh",
+ "pr",
+ "comment",
+ pr_number,
+ "--repo",
+ config.repo_url,
+ "--body",
+ comment_body,
+ ],
+ cwd=repo_dir,
+ capture_output=True,
+ text=True,
+ timeout=60,
+ )
+ if result.returncode == 0:
+ log("POST", f"Self-review summary posted as comment on PR #{pr_number}")
+ return True
+ stderr = result.stderr.strip()[:200] if result.stderr else ""
+ log(
+ "WARN",
+ f"post_self_review_comment: gh pr comment failed "
+ f"(rc={result.returncode}): {stderr}",
+ )
+ return False
+ except (subprocess.TimeoutExpired, OSError) as e:
+ log("WARN", f"post_self_review_comment: {type(e).__name__}: {e}")
+ return False
+
+
def _extract_agent_notes(repo_dir: str, branch: str, config: TaskConfig) -> str | None:
"""Extract the "## Agent notes" section from the PR body.
diff --git a/agent/src/prompts/__init__.py b/agent/src/prompts/__init__.py
index a864e013..2dd586a5 100644
--- a/agent/src/prompts/__init__.py
+++ b/agent/src/prompts/__init__.py
@@ -4,6 +4,7 @@
from .new_task import NEW_TASK_WORKFLOW
from .pr_iteration import PR_ITERATION_WORKFLOW
from .pr_review import PR_REVIEW_WORKFLOW
+from .self_review import SELF_REVIEW_PROMPT as SELF_REVIEW_PROMPT
_PROMPTS = {
"new_task": BASE_PROMPT.replace("{workflow}", NEW_TASK_WORKFLOW),
diff --git a/agent/src/prompts/self_review.py b/agent/src/prompts/self_review.py
new file mode 100644
index 00000000..25cf47a2
--- /dev/null
+++ b/agent/src/prompts/self_review.py
@@ -0,0 +1,61 @@
+"""Self-review prompt template for pre-PR diff critique."""
+
+SELF_REVIEW_PROMPT = """\
+You are reviewing your own work before it becomes a pull request. Below is the \
+cumulative diff of all changes on this branch compared to the base branch.
+
+
+{diff}
+
+
+## Task context
+
+{task_description}
+
+## Review checklist
+
+Examine the diff carefully for:
+
+1. **Correctness** — Logic errors, off-by-one mistakes, missing edge cases, \
+incorrect assumptions about data shapes or API contracts.
+2. **Bugs** — Null/undefined dereferences, unhandled error paths, resource leaks, \
+race conditions.
+3. **Security** — Injection vulnerabilities (SQL, command, XSS), hardcoded secrets, \
+insecure defaults, OWASP Top 10 issues.
+4. **Style & consistency** — Naming conventions, code style violations relative to \
+the surrounding codebase, unnecessary complexity.
+5. **Test gaps** — Important behaviour that is untested, assertions that don't \
+verify the right thing, missing edge-case coverage.
+
+## Instructions
+
+- If you find issues, fix them directly: edit the files, run the build/tests to \
+verify your fixes, and commit the changes.
+- If no issues are found, stop immediately — do not make changes for the sake of \
+making changes.
+- Do NOT refactor code that was not part of the original diff unless it has a \
+concrete bug or security issue.
+- Keep fixes minimal and focused. Each fix should be a separate commit with a \
+clear message.
+
+## Summary output
+
+After completing your review (whether you made fixes or not), write a file \
+`.self-review-summary.md` in the repository root with your findings in this format:
+
+```markdown
+### Self-Review Summary
+
+**Findings:**
+**Fixes applied:**
+
+#### Issues found
+
+- : —
+```
+
+If no issues were found, write the file with: "No issues found — code looks good."
+
+This file is a pipeline artifact and will be deleted automatically — it will NOT \
+appear in the pull request.
+"""
diff --git a/agent/src/self_review.py b/agent/src/self_review.py
new file mode 100644
index 00000000..bb53e3b7
--- /dev/null
+++ b/agent/src/self_review.py
@@ -0,0 +1,239 @@
+"""Self-review orchestration: LLM critiques its own diff before PR creation."""
+
+from __future__ import annotations
+
+import asyncio
+import contextlib
+import os
+import subprocess
+from typing import TYPE_CHECKING
+
+from prompts.self_review import SELF_REVIEW_PROMPT
+from shell import log
+
+if TYPE_CHECKING:
+ from models import AgentResult, RepoSetup, TaskConfig
+ from progress_writer import _ProgressWriter
+ from telemetry import _TrajectoryWriter
+
+# Diff truncation limit (characters). Large diffs are cut at hunk boundaries.
+_MAX_DIFF_CHARS = 60_000
+
+# Minimal system prompt for the self-review agent invocation.
+_REVIEW_SYSTEM_PROMPT = """\
+You are a code reviewer working inside the repository {repo_url} on branch {branch_name}.
+Your working directory is {repo_dir}.
+
+You have full access to the filesystem and can run commands. Fix any issues you \
+find directly — edit files, run the build, and commit fixes. Keep changes minimal \
+and focused.
+
+Do NOT open a pull request or push. Just fix issues and commit locally.
+"""
+
+
+def _get_diff(repo_dir: str, default_branch: str) -> str:
+ """Generate the cumulative diff of the branch vs origin/{default_branch}."""
+ try:
+ result = subprocess.run(
+ ["git", "diff", f"origin/{default_branch}...HEAD"],
+ cwd=repo_dir,
+ capture_output=True,
+ text=True,
+ timeout=60,
+ )
+ if result.returncode != 0:
+ log("WARN", f"self_review: git diff failed (exit {result.returncode})")
+ return ""
+ return result.stdout
+ except (subprocess.TimeoutExpired, OSError) as e:
+ log("WARN", f"self_review: git diff error: {type(e).__name__}: {e}")
+ return ""
+
+
+def _truncate_diff(diff: str, max_chars: int = _MAX_DIFF_CHARS) -> str:
+ """Truncate diff at a hunk boundary if it exceeds max_chars.
+
+ Cuts at the last complete hunk (line starting with '@@') that fits
+ within the limit, appending a truncation notice.
+ """
+ if len(diff) <= max_chars:
+ return diff
+
+ # Find the last hunk header that starts before max_chars
+ truncated = diff[:max_chars]
+ last_hunk = truncated.rfind("\n@@")
+ if last_hunk > 0:
+ # Cut just before this hunk header
+ truncated = truncated[:last_hunk]
+ else:
+ # No hunk boundary found — hard-cut at max_chars
+ last_newline = truncated.rfind("\n")
+ if last_newline > 0:
+ truncated = truncated[:last_newline]
+
+ total_lines = diff.count("\n")
+ kept_lines = truncated.count("\n")
+ truncated += (
+ f"\n\n... [diff truncated: showing ~{kept_lines} of ~{total_lines} lines; "
+ f"{len(diff) - len(truncated)} chars omitted] ..."
+ )
+ return truncated
+
+
+def _build_review_system_prompt(config: TaskConfig, setup: RepoSetup) -> str:
+ """Build a minimal system prompt for the self-review agent."""
+ return _REVIEW_SYSTEM_PROMPT.format(
+ repo_url=config.repo_url,
+ branch_name=setup.branch,
+ repo_dir=setup.repo_dir,
+ )
+
+
+def run_self_review(
+ config: TaskConfig,
+ setup: RepoSetup,
+ agent_result: AgentResult,
+ trajectory: _TrajectoryWriter,
+ progress: _ProgressWriter,
+) -> AgentResult | None:
+ """Run the self-review phase: LLM critiques its own diff and fixes issues.
+
+ Returns the AgentResult from the review phase, or None if skipped.
+ Fail-open: errors are logged but never block the pipeline.
+ """
+ # Skip condition: feature disabled
+ if not config.self_review_enabled:
+ log("TASK", "self_review: disabled (self_review_enabled=False)")
+ return None
+
+ # Skip condition: pr_review is read-only (no diff to review)
+ if config.task_type == "pr_review":
+ log("TASK", "self_review: skipped for pr_review task type")
+ return None
+
+ # Compute remaining turns
+ used_turns = agent_result.turns or 0
+ remaining_turns = config.max_turns - used_turns
+ review_turns = min(remaining_turns, config.self_review_max_turns)
+ if review_turns <= 0:
+ log("TASK", f"self_review: no remaining turns (used={used_turns}, max={config.max_turns})")
+ return None
+
+ # Compute remaining budget
+ review_budget: float | None = None
+ if config.max_budget_usd is not None:
+ used_cost = agent_result.cost_usd or 0.0
+ remaining_budget = config.max_budget_usd - used_cost
+ if remaining_budget <= 0:
+ log(
+ "TASK",
+ f"self_review: no remaining budget "
+ f"(used=${used_cost:.2f}, max=${config.max_budget_usd:.2f})",
+ )
+ return None
+ review_budget = remaining_budget
+
+ # Get the diff
+ diff = _get_diff(setup.repo_dir, setup.default_branch)
+ if not diff.strip():
+ log("TASK", "self_review: no diff found — skipping")
+ return None
+
+ # Truncate if needed
+ diff = _truncate_diff(diff)
+
+ # Build the review prompt
+ task_desc = config.task_description or f"Issue #{config.issue_number}"
+ user_prompt = SELF_REVIEW_PROMPT.format(diff=diff, task_description=task_desc)
+ system_prompt = _build_review_system_prompt(config, setup)
+
+ # Build a modified config for the review run
+ review_config = config.model_copy(
+ update={
+ "max_turns": review_turns,
+ "max_budget_usd": review_budget,
+ }
+ )
+
+ log(
+ "TASK",
+ f"self_review: starting (turns={review_turns}, "
+ f"budget={'$' + f'{review_budget:.2f}' if review_budget else 'unlimited'}, "
+ f"diff_chars={len(diff)})",
+ )
+ progress.write_agent_milestone(
+ "self_review_started",
+ f"turns={review_turns} diff_chars={len(diff)}",
+ )
+
+ try:
+ from runner import run_agent
+
+ review_result = asyncio.run(
+ run_agent(
+ user_prompt,
+ system_prompt,
+ review_config,
+ cwd=setup.repo_dir,
+ trajectory=trajectory,
+ )
+ )
+ except Exception as e:
+ # Fail-open: self-review errors never block the pipeline
+ log("WARN", f"self_review: agent execution failed: {type(e).__name__}: {e}")
+ progress.write_agent_milestone(
+ "self_review_complete",
+ f"status=error error={type(e).__name__}: {e}",
+ )
+ return None
+
+ log(
+ "TASK",
+ f"self_review: complete (status={review_result.status}, "
+ f"turns={review_result.turns}, cost=${review_result.cost_usd or 0:.4f})",
+ )
+ progress.write_agent_milestone(
+ "self_review_complete",
+ f"status={review_result.status} turns={review_result.turns}",
+ )
+ return review_result
+
+
+_SUMMARY_FILENAME = ".self-review-summary.md"
+
+
+def read_self_review_summary(repo_dir: str) -> str | None:
+ """Read and delete the self-review summary file.
+
+ The self-review agent writes `.self-review-summary.md` in the repo root.
+ This function reads the content, removes the file (so it never appears in
+ the PR), and returns the content. Returns None if the file doesn't exist.
+ """
+ summary_path = os.path.join(repo_dir, _SUMMARY_FILENAME)
+ if not os.path.isfile(summary_path):
+ return None
+
+ try:
+ with open(summary_path) as f:
+ content = f.read()
+ except OSError as e:
+ log("WARN", f"self_review: failed to read summary file: {type(e).__name__}: {e}")
+ return None
+
+ # Remove the file so it doesn't end up in the PR
+ try:
+ os.remove(summary_path)
+ except OSError as e:
+ log("WARN", f"self_review: failed to delete summary file: {type(e).__name__}: {e}")
+
+ # If the file was staged by the agent, unstage it
+ with contextlib.suppress(subprocess.TimeoutExpired, OSError):
+ subprocess.run(
+ ["git", "rm", "--cached", "--ignore-unmatch", "-f", _SUMMARY_FILENAME],
+ cwd=repo_dir,
+ capture_output=True,
+ timeout=30,
+ )
+
+ return content.strip() if content.strip() else None
diff --git a/agent/src/server.py b/agent/src/server.py
index b01df8da..55c3779b 100644
--- a/agent/src/server.py
+++ b/agent/src/server.py
@@ -386,6 +386,8 @@ def _run_task_background(
user_id: str = "",
workload_access_token: str = "",
attachments: list[dict] | None = None,
+ self_review_enabled: bool = False,
+ self_review_max_turns: int = 5,
) -> None:
"""Run the agent task in a background thread."""
global _background_pipeline_failed
@@ -469,6 +471,8 @@ def _run_task_background(
trace=trace,
user_id=user_id,
attachments=attachments,
+ self_review_enabled=self_review_enabled,
+ self_review_max_turns=self_review_max_turns,
)
_background_pipeline_failed = False
except Exception as e:
@@ -557,6 +561,9 @@ def _extract_invocation_params(inp: dict, request: Request) -> dict:
channel_source = inp.get("channel_source", "") or ""
channel_metadata = inp.get("channel_metadata") or {}
attachments = inp.get("attachments") or []
+ # Self-review (opt-in): LLM critiques its own diff before PR creation.
+ self_review_enabled = inp.get("self_review_enabled") is True
+ self_review_max_turns = int(inp.get("self_review_max_turns", 5))
# ``trace`` is strictly opt-in (design §10.1). Accept only real
# booleans from the orchestrator — a string "false" would otherwise
# flip the flag on.
@@ -630,6 +637,8 @@ def _extract_invocation_params(inp: dict, request: Request) -> dict:
"user_id": user_id,
"workload_access_token": workload_access_token,
"attachments": attachments,
+ "self_review_enabled": self_review_enabled,
+ "self_review_max_turns": self_review_max_turns,
}
diff --git a/agent/tests/test_self_review.py b/agent/tests/test_self_review.py
new file mode 100644
index 00000000..e2a817f1
--- /dev/null
+++ b/agent/tests/test_self_review.py
@@ -0,0 +1,484 @@
+"""Unit tests for self_review.py — self-review orchestration module."""
+
+import os
+from unittest.mock import MagicMock, patch
+
+from models import AgentResult, RepoSetup, TaskConfig
+from self_review import (
+ _MAX_DIFF_CHARS,
+ _SUMMARY_FILENAME,
+ _get_diff,
+ _truncate_diff,
+ read_self_review_summary,
+ run_self_review,
+)
+
+
+def _make_config(**overrides) -> TaskConfig:
+ """Create a minimal TaskConfig for testing."""
+ defaults = {
+ "repo_url": "owner/repo",
+ "github_token": "ghp_test123",
+ "aws_region": "us-east-1",
+ "task_description": "Fix the bug",
+ "max_turns": 10,
+ "self_review_enabled": True,
+ "self_review_max_turns": 5,
+ "task_type": "new_task",
+ }
+ defaults.update(overrides)
+ return TaskConfig(**defaults)
+
+
+def _make_setup(**overrides) -> RepoSetup:
+ """Create a minimal RepoSetup for testing."""
+ defaults = {
+ "repo_dir": "/workspace/repo",
+ "branch": "feat/123-fix",
+ "default_branch": "main",
+ }
+ defaults.update(overrides)
+ return RepoSetup(**defaults)
+
+
+def _make_agent_result(**overrides) -> AgentResult:
+ """Create a minimal AgentResult for testing."""
+ defaults = {
+ "status": "success",
+ "turns": 5,
+ "num_turns": 5,
+ "cost_usd": 0.50,
+ }
+ defaults.update(overrides)
+ return AgentResult(**defaults)
+
+
+class TestSkipConditions:
+ """Test all conditions that cause self-review to be skipped."""
+
+ def test_skip_when_disabled(self):
+ config = _make_config(self_review_enabled=False)
+ setup = _make_setup()
+ agent_result = _make_agent_result()
+ trajectory = MagicMock()
+ progress = MagicMock()
+
+ result = run_self_review(config, setup, agent_result, trajectory, progress)
+ assert result is None
+
+ def test_skip_for_pr_review_task_type(self):
+ config = _make_config(
+ task_type="pr_review",
+ pr_number="42",
+ task_description="",
+ issue_number="",
+ )
+ setup = _make_setup()
+ agent_result = _make_agent_result()
+ trajectory = MagicMock()
+ progress = MagicMock()
+
+ result = run_self_review(config, setup, agent_result, trajectory, progress)
+ assert result is None
+
+ def test_skip_when_no_remaining_turns(self):
+ config = _make_config(max_turns=5, self_review_max_turns=5)
+ setup = _make_setup()
+ agent_result = _make_agent_result(turns=5)
+ trajectory = MagicMock()
+ progress = MagicMock()
+
+ result = run_self_review(config, setup, agent_result, trajectory, progress)
+ assert result is None
+
+ def test_skip_when_no_remaining_budget(self):
+ config = _make_config(max_budget_usd=1.0)
+ setup = _make_setup()
+ agent_result = _make_agent_result(cost_usd=1.0)
+ trajectory = MagicMock()
+ progress = MagicMock()
+
+ result = run_self_review(config, setup, agent_result, trajectory, progress)
+ assert result is None
+
+ @patch("self_review._get_diff", return_value="")
+ def test_skip_when_empty_diff(self, mock_diff):
+ config = _make_config()
+ setup = _make_setup()
+ agent_result = _make_agent_result()
+ trajectory = MagicMock()
+ progress = MagicMock()
+
+ result = run_self_review(config, setup, agent_result, trajectory, progress)
+ assert result is None
+
+ @patch("self_review._get_diff", return_value=" \n \n ")
+ def test_skip_when_whitespace_only_diff(self, mock_diff):
+ config = _make_config()
+ setup = _make_setup()
+ agent_result = _make_agent_result()
+ trajectory = MagicMock()
+ progress = MagicMock()
+
+ result = run_self_review(config, setup, agent_result, trajectory, progress)
+ assert result is None
+
+
+class TestGetDiff:
+ """Test _get_diff helper."""
+
+ @patch("self_review.subprocess.run")
+ def test_returns_diff_output(self, mock_run):
+ mock_run.return_value = MagicMock(returncode=0, stdout="diff --git a/f\n+line\n")
+ result = _get_diff("/repo", "main")
+ assert result == "diff --git a/f\n+line\n"
+ mock_run.assert_called_once_with(
+ ["git", "diff", "origin/main...HEAD"],
+ cwd="/repo",
+ capture_output=True,
+ text=True,
+ timeout=60,
+ )
+
+ @patch("self_review.subprocess.run")
+ def test_returns_empty_on_failure(self, mock_run):
+ mock_run.return_value = MagicMock(returncode=1, stdout="")
+ result = _get_diff("/repo", "main")
+ assert result == ""
+
+ @patch("self_review.subprocess.run")
+ def test_returns_empty_on_timeout(self, mock_run):
+ import subprocess
+
+ mock_run.side_effect = subprocess.TimeoutExpired(cmd="git", timeout=60)
+ result = _get_diff("/repo", "main")
+ assert result == ""
+
+ @patch("self_review.subprocess.run")
+ def test_uses_custom_default_branch(self, mock_run):
+ mock_run.return_value = MagicMock(returncode=0, stdout="some diff")
+ _get_diff("/repo", "develop")
+ mock_run.assert_called_once_with(
+ ["git", "diff", "origin/develop...HEAD"],
+ cwd="/repo",
+ capture_output=True,
+ text=True,
+ timeout=60,
+ )
+
+
+class TestTruncateDiff:
+ """Test _truncate_diff helper."""
+
+ def test_no_truncation_needed(self):
+ short_diff = "diff --git a/file.py\n@@ -1,3 +1,4 @@\n+new line\n"
+ result = _truncate_diff(short_diff)
+ assert result == short_diff
+
+ def test_truncates_at_hunk_boundary(self):
+ # Build a diff that exceeds the limit with multiple hunks
+ hunk1 = "diff --git a/file.py\n@@ -1,3 +1,4 @@\n" + "+line\n" * 100
+ hunk2 = "\n@@ -100,3 +101,4 @@\n" + "+more\n" * 100
+ big_diff = hunk1 + hunk2
+ result = _truncate_diff(big_diff, max_chars=len(hunk1) + 10)
+ # Should truncate before hunk2's @@ marker
+ assert "@@ -100,3" not in result
+ assert "truncated" in result
+
+ def test_hard_cut_when_no_hunk_boundary(self):
+ # Single hunk that exceeds max
+ big_diff = "+x\n" * 30000
+ result = _truncate_diff(big_diff, max_chars=100)
+ assert len(result) < len(big_diff)
+ assert "truncated" in result
+
+ def test_exact_limit_no_truncation(self):
+ diff = "a" * _MAX_DIFF_CHARS
+ result = _truncate_diff(diff)
+ assert result == diff
+
+ def test_one_over_limit_truncates(self):
+ diff = "a" * (_MAX_DIFF_CHARS + 1)
+ result = _truncate_diff(diff)
+ assert "truncated" in result
+
+
+class TestBudgetAndTurnComputation:
+ """Test that budget and turn limits are computed correctly."""
+
+ @patch("self_review._get_diff", return_value="diff --git a/f\n+line\n")
+ @patch("self_review.asyncio.run")
+ def test_review_turns_capped_at_self_review_max_turns(self, mock_asyncio_run, mock_diff):
+ mock_asyncio_run.return_value = AgentResult(status="success", turns=2, num_turns=2)
+ config = _make_config(max_turns=100, self_review_max_turns=3)
+ setup = _make_setup()
+ agent_result = _make_agent_result(turns=5)
+ trajectory = MagicMock()
+ progress = MagicMock()
+
+ run_self_review(config, setup, agent_result, trajectory, progress)
+
+ # The review config passed to run_agent should have max_turns=3
+ call_args = mock_asyncio_run.call_args
+ coro = call_args[0][0]
+ # Close the coroutine to avoid warnings
+ coro.close()
+
+ @patch("self_review._get_diff", return_value="diff --git a/f\n+line\n")
+ @patch("self_review.asyncio.run")
+ def test_review_turns_uses_remaining_when_less_than_cap(self, mock_asyncio_run, mock_diff):
+ mock_asyncio_run.return_value = AgentResult(status="success", turns=1, num_turns=1)
+ config = _make_config(max_turns=8, self_review_max_turns=5)
+ setup = _make_setup()
+ agent_result = _make_agent_result(turns=6) # Only 2 remaining
+ trajectory = MagicMock()
+ progress = MagicMock()
+
+ run_self_review(config, setup, agent_result, trajectory, progress)
+
+ # Should use min(2, 5) = 2 turns
+ call_args = mock_asyncio_run.call_args
+ coro = call_args[0][0]
+ coro.close()
+
+
+class TestHappyPath:
+ """Test the full happy path with mocked run_agent."""
+
+ @patch("self_review._get_diff", return_value="diff --git a/file.py\n+new code\n")
+ @patch("self_review.asyncio.run")
+ def test_returns_review_result(self, mock_asyncio_run, mock_diff):
+ review_agent_result = AgentResult(
+ status="success", turns=2, num_turns=2, cost_usd=0.10
+ )
+ mock_asyncio_run.return_value = review_agent_result
+
+ config = _make_config()
+ setup = _make_setup()
+ agent_result = _make_agent_result(turns=5)
+ trajectory = MagicMock()
+ progress = MagicMock()
+
+ result = run_self_review(config, setup, agent_result, trajectory, progress)
+
+ assert result is not None
+ assert result.status == "success"
+ assert result.turns == 2
+ assert result.cost_usd == 0.10
+
+ @patch("self_review._get_diff", return_value="diff --git a/file.py\n+new code\n")
+ @patch("self_review.asyncio.run")
+ def test_writes_progress_milestones(self, mock_asyncio_run, mock_diff):
+ mock_asyncio_run.return_value = AgentResult(
+ status="success", turns=1, num_turns=1, cost_usd=0.05
+ )
+ config = _make_config()
+ setup = _make_setup()
+ agent_result = _make_agent_result()
+ trajectory = MagicMock()
+ progress = MagicMock()
+
+ run_self_review(config, setup, agent_result, trajectory, progress)
+
+ # Should write both started and complete milestones
+ milestone_calls = progress.write_agent_milestone.call_args_list
+ assert len(milestone_calls) == 2
+ assert milestone_calls[0][0][0] == "self_review_started"
+ assert milestone_calls[1][0][0] == "self_review_complete"
+
+ @patch("self_review._get_diff", return_value="diff --git a/file.py\n+new code\n")
+ @patch("self_review.asyncio.run")
+ def test_fail_open_on_agent_error(self, mock_asyncio_run, mock_diff):
+ mock_asyncio_run.side_effect = RuntimeError("SDK crashed")
+
+ config = _make_config()
+ setup = _make_setup()
+ agent_result = _make_agent_result()
+ trajectory = MagicMock()
+ progress = MagicMock()
+
+ # Should not raise
+ result = run_self_review(config, setup, agent_result, trajectory, progress)
+ assert result is None
+
+ # Should still write milestones
+ milestone_calls = progress.write_agent_milestone.call_args_list
+ assert milestone_calls[0][0][0] == "self_review_started"
+ assert milestone_calls[1][0][0] == "self_review_complete"
+ assert "error" in milestone_calls[1][0][1]
+
+ @patch("self_review._get_diff", return_value="diff --git a/file.py\n+new code\n")
+ @patch("self_review.asyncio.run")
+ def test_works_with_unlimited_budget(self, mock_asyncio_run, mock_diff):
+ mock_asyncio_run.return_value = AgentResult(
+ status="success", turns=1, num_turns=1, cost_usd=0.03
+ )
+ config = _make_config(max_budget_usd=None)
+ setup = _make_setup()
+ agent_result = _make_agent_result(cost_usd=None)
+ trajectory = MagicMock()
+ progress = MagicMock()
+
+ result = run_self_review(config, setup, agent_result, trajectory, progress)
+ assert result is not None
+ assert result.status == "success"
+
+
+class TestReadSelfReviewSummary:
+ """Tests for read_self_review_summary — reads and cleans up the summary file."""
+
+ def test_returns_content_when_file_exists(self, tmp_path):
+ summary_content = (
+ "### Self-Review Summary\n\n"
+ "**Findings:** 2\n"
+ "**Fixes applied:** 1\n\n"
+ "#### Issues found\n\n"
+ "- Security: hardcoded token — fixed\n"
+ "- Style: inconsistent naming — not fixed (cosmetic)\n"
+ )
+ (tmp_path / _SUMMARY_FILENAME).write_text(summary_content)
+
+ result = read_self_review_summary(str(tmp_path))
+
+ assert result == summary_content.strip()
+
+ def test_returns_none_when_file_missing(self, tmp_path):
+ result = read_self_review_summary(str(tmp_path))
+ assert result is None
+
+ def test_deletes_file_after_reading(self, tmp_path):
+ (tmp_path / _SUMMARY_FILENAME).write_text("No issues found — code looks good.")
+
+ read_self_review_summary(str(tmp_path))
+
+ assert not (tmp_path / _SUMMARY_FILENAME).exists()
+
+ def test_returns_none_for_empty_file(self, tmp_path):
+ (tmp_path / _SUMMARY_FILENAME).write_text(" \n\n ")
+
+ result = read_self_review_summary(str(tmp_path))
+
+ assert result is None
+
+ def test_returns_none_for_whitespace_only(self, tmp_path):
+ (tmp_path / _SUMMARY_FILENAME).write_text("\t\n ")
+
+ result = read_self_review_summary(str(tmp_path))
+
+ assert result is None
+
+ @patch("self_review.subprocess.run")
+ def test_runs_git_rm_cached_for_cleanup(self, mock_run, tmp_path):
+ (tmp_path / _SUMMARY_FILENAME).write_text("Some findings")
+ mock_run.return_value = MagicMock(returncode=0)
+
+ read_self_review_summary(str(tmp_path))
+
+ mock_run.assert_called_once_with(
+ ["git", "rm", "--cached", "--ignore-unmatch", "-f", _SUMMARY_FILENAME],
+ cwd=str(tmp_path),
+ capture_output=True,
+ timeout=30,
+ )
+
+ @patch("self_review.subprocess.run", side_effect=OSError("git not found"))
+ def test_git_rm_failure_does_not_block(self, mock_run, tmp_path):
+ (tmp_path / _SUMMARY_FILENAME).write_text("Findings here")
+
+ # Should not raise even if git rm fails
+ result = read_self_review_summary(str(tmp_path))
+
+ assert result == "Findings here"
+
+
+class TestPostSelfReviewComment:
+ """Tests for post_hooks.post_self_review_comment."""
+
+ def test_posts_comment_on_success(self, tmp_path):
+ from post_hooks import post_self_review_comment
+
+ (tmp_path / _SUMMARY_FILENAME).write_text("**Findings:** 1\n**Fixes applied:** 1")
+
+ config = MagicMock()
+ config.repo_url = "owner/repo"
+ pr_url = "https://github.com/owner/repo/pull/42"
+
+ with patch("post_hooks.subprocess.run") as mock_run:
+ mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="")
+ result = post_self_review_comment(str(tmp_path), pr_url, config)
+
+ assert result is True
+ call_args = mock_run.call_args[0][0]
+ assert call_args[0:3] == ["gh", "pr", "comment"]
+ assert "42" in call_args
+ assert "owner/repo" in call_args
+
+ def test_returns_false_when_no_summary(self, tmp_path):
+ from post_hooks import post_self_review_comment
+
+ config = MagicMock()
+ config.repo_url = "owner/repo"
+ pr_url = "https://github.com/owner/repo/pull/42"
+
+ result = post_self_review_comment(str(tmp_path), pr_url, config)
+ assert result is False
+
+ def test_returns_false_on_invalid_pr_url(self, tmp_path):
+ from post_hooks import post_self_review_comment
+
+ (tmp_path / _SUMMARY_FILENAME).write_text("Some findings")
+
+ config = MagicMock()
+ config.repo_url = "owner/repo"
+ pr_url = "https://github.com/owner/repo/issues/42"
+
+ result = post_self_review_comment(str(tmp_path), pr_url, config)
+ assert result is False
+
+ def test_returns_false_on_gh_failure(self, tmp_path):
+ from post_hooks import post_self_review_comment
+
+ (tmp_path / _SUMMARY_FILENAME).write_text("**Findings:** 1")
+
+ config = MagicMock()
+ config.repo_url = "owner/repo"
+ pr_url = "https://github.com/owner/repo/pull/99"
+
+ with patch("post_hooks.subprocess.run") as mock_run:
+ mock_run.return_value = MagicMock(returncode=1, stdout="", stderr="not found")
+ result = post_self_review_comment(str(tmp_path), pr_url, config)
+
+ assert result is False
+
+ def test_fail_open_on_exception(self, tmp_path):
+ from post_hooks import post_self_review_comment
+
+ (tmp_path / _SUMMARY_FILENAME).write_text("**Findings:** 1")
+
+ config = MagicMock()
+ config.repo_url = "owner/repo"
+ pr_url = "https://github.com/owner/repo/pull/5"
+
+ with patch("post_hooks.subprocess.run", side_effect=OSError("network error")):
+ result = post_self_review_comment(str(tmp_path), pr_url, config)
+
+ assert result is False
+
+ def test_comment_body_includes_header(self, tmp_path):
+ from post_hooks import post_self_review_comment
+
+ (tmp_path / _SUMMARY_FILENAME).write_text("**Findings:** 0\nNo issues.")
+
+ config = MagicMock()
+ config.repo_url = "owner/repo"
+ pr_url = "https://github.com/owner/repo/pull/7"
+
+ with patch("post_hooks.subprocess.run") as mock_run:
+ mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="")
+ post_self_review_comment(str(tmp_path), pr_url, config)
+
+ call_args = mock_run.call_args[0][0]
+ body_idx = call_args.index("--body") + 1
+ body = call_args[body_idx]
+ assert "Self-Review Summary" in body
+ assert "**Findings:** 0" in body