From d8cb85184734a08dce254126d857c83f1086b1dc Mon Sep 17 00:00:00 2001 From: Shuveb Hussain Date: Sat, 16 May 2026 20:29:26 -0500 Subject: [PATCH 1/2] Ralph two-mechanism sandbox + security-review hardening Adds the Ralph orchestrator's two-mechanism, hard-fail sandbox (macOS sandbox-exec / Linux bwrap; no unsandboxed mode) and applies the fixes from a multi-agent security review of the change. Sandbox feature: - sandbox/ package: models, detect, exceptions, wrapper, adapters (sandbox_exec + bwrap), packaged SBPL profile (claude.sb). - Detection hard-fails before any agent subprocess; AgentRunner always pairs the OS sandbox with --dangerously-skip-permissions. Security-review fixes: - C1: post-construction SandboxError now propagates out of the orchestrator (re-raise before the broad except) and hard-aborts via CLI/TUI wiring instead of being downgraded to a per-feature failure. - C2: SBPL /dev grant narrowed from a blanket (subpath "/dev") to a curated device-node allow-list; comment corrected to a bounded trade-off. Verified to compile under real sandbox-exec. - H1: line-anchored denial-signature matching ([Sandbox] prefix stripped); truncated stderr-drain / unattributed non-zero exit now logged instead of silent. - I2: fail-loud SandboxPermissions/SandboxConfig validation (no writable filesystem-root / bare $HOME; project_dir agreement). - I3: subprocess cwd pinned to the sandbox's realpath'd project_dir. - I4: TUI fail-closed paths now covered by tests. - I5: stale docker refs and drift-prone _DEFAULT_BASE_URL removed; CLAUDE.md synced. - Hardening: import-time dispatch-totality assertion; bwrap --die-with-parent/--chdir; control-char sanitization of surfaced sandbox stderr; observability/log-severity parity. Tests: 1000 unit tests pass (incl. real sandbox-exec SBPL compile on macOS). No new ruff/mypy violations introduced. Co-Authored-By: Claude Opus 4.7 (1M context) --- CLAUDE.md | 10 + src/mfbt/commands/ralph/__init__.py | 40 +- src/mfbt/commands/ralph/agent.py | 431 +++++++++-- src/mfbt/commands/ralph/display.py | 17 + src/mfbt/commands/ralph/orchestrator.py | 86 ++- src/mfbt/commands/ralph/profiles/__init__.py | 9 + src/mfbt/commands/ralph/profiles/claude.sb | 77 ++ src/mfbt/commands/ralph/ralph_widgets.py | 15 + src/mfbt/commands/ralph/sandbox/__init__.py | 45 ++ .../ralph/sandbox/adapters/__init__.py | 90 +++ .../commands/ralph/sandbox/adapters/bwrap.py | 149 ++++ .../ralph/sandbox/adapters/sandbox_exec.py | 195 +++++ src/mfbt/commands/ralph/sandbox/detect.py | 270 +++++++ src/mfbt/commands/ralph/sandbox/exceptions.py | 122 ++++ src/mfbt/commands/ralph/sandbox/models.py | 296 ++++++++ src/mfbt/commands/ralph/sandbox/wrapper.py | 282 ++++++++ src/mfbt/commands/ralph/tui_display.py | 44 +- src/mfbt/commands/ralph/types.py | 5 + src/mfbt/tui/app.py | 67 +- src/mfbt/tui/app.tcss | 28 + src/mfbt/tui/screens/confirm_modal.py | 81 +++ src/mfbt/tui/screens/feature_list.py | 98 ++- tests/unit/ralph/sandbox/__init__.py | 0 tests/unit/ralph/sandbox/adapters/__init__.py | 1 + tests/unit/ralph/sandbox/adapters/conftest.py | 40 ++ .../unit/ralph/sandbox/adapters/test_bwrap.py | 178 +++++ .../ralph/sandbox/adapters/test_dispatch.py | 59 ++ .../sandbox/adapters/test_sandbox_exec.py | 357 ++++++++++ tests/unit/ralph/sandbox/test_detect.py | 295 ++++++++ tests/unit/ralph/sandbox/test_exceptions.py | 93 +++ tests/unit/ralph/sandbox/test_models.py | 280 ++++++++ tests/unit/ralph/sandbox/test_wrapper.py | 373 ++++++++++ tests/unit/ralph/test_agent.py | 668 +++++++++++++++++- tests/unit/ralph/test_cli.py | 112 ++- tests/unit/ralph/test_display.py | 15 + tests/unit/ralph/test_orchestrator.py | 226 +++++- tests/unit/ralph/test_tui_display.py | 62 ++ tests/unit/tui/test_app.py | 108 +++ tests/unit/tui/test_confirm_modal.py | 60 ++ tests/unit/tui/test_ralph_panel.py | 142 ++++ 40 files changed, 5391 insertions(+), 135 deletions(-) create mode 100644 src/mfbt/commands/ralph/profiles/__init__.py create mode 100644 src/mfbt/commands/ralph/profiles/claude.sb create mode 100644 src/mfbt/commands/ralph/sandbox/__init__.py create mode 100644 src/mfbt/commands/ralph/sandbox/adapters/__init__.py create mode 100644 src/mfbt/commands/ralph/sandbox/adapters/bwrap.py create mode 100644 src/mfbt/commands/ralph/sandbox/adapters/sandbox_exec.py create mode 100644 src/mfbt/commands/ralph/sandbox/detect.py create mode 100644 src/mfbt/commands/ralph/sandbox/exceptions.py create mode 100644 src/mfbt/commands/ralph/sandbox/models.py create mode 100644 src/mfbt/commands/ralph/sandbox/wrapper.py create mode 100644 src/mfbt/tui/screens/confirm_modal.py create mode 100644 tests/unit/ralph/sandbox/__init__.py create mode 100644 tests/unit/ralph/sandbox/adapters/__init__.py create mode 100644 tests/unit/ralph/sandbox/adapters/conftest.py create mode 100644 tests/unit/ralph/sandbox/adapters/test_bwrap.py create mode 100644 tests/unit/ralph/sandbox/adapters/test_dispatch.py create mode 100644 tests/unit/ralph/sandbox/adapters/test_sandbox_exec.py create mode 100644 tests/unit/ralph/sandbox/test_detect.py create mode 100644 tests/unit/ralph/sandbox/test_exceptions.py create mode 100644 tests/unit/ralph/sandbox/test_models.py create mode 100644 tests/unit/ralph/sandbox/test_wrapper.py create mode 100644 tests/unit/tui/test_confirm_modal.py diff --git a/CLAUDE.md b/CLAUDE.md index c68baa8..a7c17af 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -106,5 +106,15 @@ This project uses the **mfbt MCP server**, which exposes a virtual filesystem (V - **Ralph in TUI:** Integrated into main TUI (`r` key), not a standalone app. Ralph runs *in place* inside the unified `FeatureListPanel` (`tui/screens/feature_list.py`) in `#main-content` — there is no separate Ralph panel — with `PreflightModal` for agent checks. Standalone `tui_app.py` and the old `ralph_panel.py` are deleted. - **Ralph subcommand:** `src/mfbt/commands/ralph/` — orchestrator, display (console), tui_display (Textual adapter), ralph_widgets (TUI widgets), agent runner, progress (API), prompt builder, types. - **Display duck typing:** `RalphOrchestrator.display` is typed as `Any` — both `RalphDisplay` and `RalphTUIDisplay` are structurally compatible (same display protocol). +- **Ralph sandboxing (two-mechanism, hard-fail — current design):** Exactly **two** sandbox mechanisms, each a **hard requirement** on its platform: `sandbox-exec` on macOS, `bwrap` (bubblewrap) on Linux. There is **no** unsandboxed mode, no `docker`/`firejail`, no config override, no `--sandbox`/`--yes` flag, no consent/feedback layer. If the platform's mechanism is unavailable (or the platform is neither macOS nor Linux), Ralph **aborts before any agent subprocess** with an actionable message. + - **`sandbox/` package:** `models.py` (`SandboxMechanism`={`SANDBOX_EXEC`,`BWRAP`} only; `SandboxConfig`, `SandboxPermissions`, `resolve_paths`), `detect.py`, `exceptions.py`, `wrapper.py`, `adapters/` (`sandbox_exec.py`, `bwrap.py`, `__init__.py`). No `models` `SandboxResult`/`SandboxType`/`CandidateOutcome`/`SandboxDetectionReport`; no `feedback.py`; no `docker.py`/`firejail.py`. `models.py` imports the sibling leaf `exceptions` (acyclic, still no external deps / no import-time side effects) and does **fail-loud, filesystem-free `__post_init__` validation**: `SandboxPermissions` rejects a writable filesystem-root or bare `$HOME` (incl. `project_dir`) → `SandboxSetupError`; `SandboxConfig` rejects `project_dir != permissions.project_dir` (the bound `permissions` is stored as-is, no silent replace). `SandboxConfig.base_url` is **inert** (no adapter consumes it; never a confinement input). + - **Detection:** one function `detect_sandbox(which_fn=shutil.which, system_fn=platform.system, *, run_fn=subprocess.run) -> SandboxMechanism`. macOS→`SANDBOX_EXEC` (binary-presence check), Linux→`BWRAP` (functional liveness probe `bwrap --ro-bind / / true`), else→raise `SandboxDetectionError`. No `detect_sandbox_with_report`, no `explicit=`, no config loader. `detect.py` does **not** import `mfbt.config` (cycle severed; `config.validate_config` has no sandbox block; no `_VALID_SANDBOX_TYPES`/`_CONFIG_OVERRIDE_KEY`). + - **Adapters:** each is pure `build_prefix(config: SandboxConfig) -> list[str]` + `classify_exit(rc, sandbox_stderr) -> SandboxError|None`. `adapters/__init__.py` `get_adapter`/`get_classifier` are strict 2-entry dispatch (unknown mechanism → `SandboxError`, no identity/no-op). SBPL asset `commands/ralph/profiles/claude.sb` (MCLI-126 write-confine: `(allow default)` + global write-deny + per-writable re-allow + a **curated `/dev` device-node allow-list** — `/dev/null|zero|random|urandom|tty|dtracehelper|ptmx`, `^/dev/ttys[0-9]+$`, `(subpath "/dev/fd")` — **not** a blanket `(subpath "/dev")`; kept a single line so the comment-breakout test's balanced-sexpr invariant holds; network open) loaded via `importlib.resources`; hatchling auto-bundles non-`.py` package files. `classify_exit` re-attributes a non-zero exit to the sandbox **only** on the sandbox's own **line-anchored** stderr signature (`[Sandbox] ` wrapper prefix stripped first; sandbox_exec: `deny(\d+)` regex anywhere on a line OR a line starting `sandbox-exec:`; bwrap: a line starting `bwrap:`); else `None` = Claude's own exit, returned as `AgentResult`. + - **`SandboxWrapper`:** `wrap_command(cmd)` canonical (`wrap(cmd)` alias); `prepare()` is a **no-arg no-op** lifecycle hook; `cleanup()` removes the sandbox-exec SBPL temp profile in `run()`'s `finally`. `mechanism=None`→`detect_sandbox()` opt-in (orchestrator passes a concrete mechanism; bad value→`SandboxError`). Sandbox stderr: pure `format_sandbox_stderr` (`[Sandbox] ` prefix) + `note_stderr_line` (always attributes) / `get_sandbox_stderr`; sandbox tool + `claude` share ONE stderr fd by design (attribution by `[Sandbox] ` marker, not OS-fd — do **not** split processes). `AgentResult.sandbox_stderr` defaulted. + - **`AgentRunner`:** `__init__(*, project_dir=None, base_url=None)` (no `assume_yes`/`presented`). `sandbox_wrapper is None`→auto-detect once; injected wrapper used as-is. The no-wrapper default `base_url` is sourced from `mfbt.config.DEFAULT_CONFIG["base_url"]` (single source — no hand-synced `_DEFAULT_BASE_URL` constant). `build_command` **always** inserts `--dangerously-skip-permissions` at argv[1] (a real OS sandbox is always the boundary; headless `claude -p`/`stdin=DEVNULL` can't answer tool prompts). The subprocess `Popen` is launched with **`cwd=` pinned to the sandbox's own realpath'd `project_dir`** (single source = the wrapper's `SandboxConfig`; resolved exactly as the adapters resolve it) so the run location is always inside the confined/bound area, never the inherited parent CWD — `cwd` is part of the frozen Popen signature. After the timed `stderr` join, `run()` logs (WARNING) if the drain was truncated, and on a non-zero exit not attributed to a denial logs INFO (WARNING if truncated) so a possibly-missed denial is never fully silent. `run()` = a `try/except SandboxError` setup phase (build→`prepare()`→`wrap_command`) + the subprocess `try/…/finally cleanup()` phase; ordered `except` clauses (base `SandboxError` last) each `_emit_sandbox_error(exc); raise` (re-raise, **not** `SystemExit`). Popen `OSError` EPERM/EACCES→`SandboxDeniedError`. No `_handle_none_sandbox_fallback`/`acknowledge_unsandboxed`/`_emit_sandbox_note`/`_NONE_SANDBOX_WARNING`. + - **Exceptions:** `SandboxError(message, *, mechanism=None)` (`__str__`=`"[] "`); subclasses `SandboxDetectionError`, `SandboxSetupError`, `SandboxDeniedError`, `SandboxCompatibilityError` (reserved, defensively handled, never raised by current paths). All 5 re-exported from `sandbox/__init__`. + - **Hard-fail wiring (TWO chokepoints — construction AND run):** (1) construction: `RalphOrchestrator.__init__` calls `detect_sandbox()`, logs+re-raises `SandboxDetectionError`. (2) post-construction: `orchestrator._implement_feature` has an `except SandboxError: logger.error(...); raise` **before** its broad `except Exception` so a `SandboxSetupError`/`SandboxDeniedError` from `agent.run()` is **never** downgraded to a per-feature `FAILED` (which would loop on a broken sandbox and mask a denial) — it propagates out of `run()`. CLI `commands/ralph/__init__.py`: `except SandboxDetectionError`→`display.error`+`typer.Exit(1)`; `display.sandbox_status(...)` before `orchestrator.run()`; `except SandboxError`→`display.error("sandbox failure — Ralph aborted: …")`+`typer.Exit(1)`; `summary.failed>0`→`typer.Exit(1)`. TUI `feature_list.py::_run_orchestrator`: `except SandboxDetectionError` (now `logger.error(exc_info=True)` — parity with CLI) wrapping the `RalphOrchestrator(...)` construction → notify(error) + `_return_to_browsing`; a separate `except SandboxError` around `run()` → notify("Sandbox failure — Ralph aborted: …", error) + `_return_to_browsing` (both fail closed visibly). `RalphConfig` has no `assume_yes`/`sandbox`; the pre-run `typer.confirm` prompt is gone; `--status` only conflicts with `--quiet`. `RalphHeader._sandbox_label()` always shows the mechanism (no UNSANDBOXED branch). `RalphDisplay`/`RalphTUIDisplay` keep `sandbox_status` (stdout/log, quiet-suppressed, enum `.value` verbatim); `sandbox_warning` removed. + - **Debug:** module-scope `_debug_enabled = False` in `wrapper.py` AND `detect.py` (literal `# TODO: decision needed — wire to -- and `, intentionally unwired; tests `monkeypatch.setattr(, "_debug_enabled", True)`). `[sandbox-debug] ` via `wrapper.format_sandbox_debug`/`emit_debug_sandbox_stderr()`/`detect._probe_liveness`. + - **Test gotchas:** `__init__`'s `logger` param shadows the module logger — use `logging.getLogger(__name__)`. `test_orchestrator.py` autouse fixture patches `orchestrator.detect_sandbox` (NOT `_with_report`) returning a bare `SandboxMechanism`. `test_agent.py` autouse fixture patches `agent.detect_sandbox` + `agent.SandboxWrapper`; `_stub_wrapper` defaults to `SANDBOX_EXEC` (a real sandbox) and sets `classify_exit.return_value=None`, so `build_command` includes `--dangerously-skip-permissions` at index 1 — assert presence, never absence; mock wrappers need `wrap_command.side_effect` (not `.wrap`). `test_detect.py::test_import_has_no_side_effects` must keep the parent-package `detect` attr save/restore in `finally` (full-suite-order hermeticity). Real-wrapper integration tests in `test_agent.py` exercise BWRAP/SANDBOX_EXEC through `run()` mocking only `Popen`/`which`. - **Key files:** `auth_flow.py` (shared OAuth), `coding_agents.py` (agent pre-flight checks), `tui/screens/phase_list.py`, `tui/screens/preflight_modal.py`, `tui/screens/feature_list.py` (unified browse + in-place Ralph panel), `commands/ralph/ralph_widgets.py`. - **TUI shortcuts:** `r` = Ralph, `ctrl+r` = Refresh, `d` = Describe, `enter` = Open/Detail, `esc` = Back, `?` = Help, `q` = Quit. diff --git a/src/mfbt/commands/ralph/__init__.py b/src/mfbt/commands/ralph/__init__.py index 62a60cd..19d464f 100644 --- a/src/mfbt/commands/ralph/__init__.py +++ b/src/mfbt/commands/ralph/__init__.py @@ -119,12 +119,6 @@ def ralph( "-q", help="Suppress live output; only show errors and final summary", ), - yes: bool = typer.Option( - False, - "--yes", - "-y", - help="Skip confirmation prompt and start immediately", - ), instructions: Optional[str] = typer.Option( None, "--instructions", @@ -148,9 +142,9 @@ def ralph( ) from mfbt.commands.ralph.types import AgentType, OrchestrationMode, PhaseInfo, RalphConfig - if status and (quiet or yes): + if status and quiet: console.print( - "[red]Error:[/red] --status is mutually exclusive with --quiet and --yes." + "[red]Error:[/red] --status is mutually exclusive with --quiet." ) raise typer.Exit(code=1) @@ -244,24 +238,44 @@ def ralph( if status: raise typer.Exit(code=0) - if not yes: - typer.confirm("Continue?", abort=True) - from mfbt.commands.ralph.log_capture import RalphLogger from mfbt.config import get_mfbt_dir log_base_dir = get_mfbt_dir() / "logs" ralph_logger = RalphLogger(log_base_dir, agent_type) - orchestrator = RalphOrchestrator( - ralph_config, client, display, logger=ralph_logger + from mfbt.commands.ralph.sandbox import ( + SandboxDetectionError, + SandboxError, ) + try: + orchestrator = RalphOrchestrator( + ralph_config, client, display, logger=ralph_logger + ) + except SandboxDetectionError as exc: + display.error(str(exc)) + raise typer.Exit(code=1) from exc + + # Surface the active sandbox (quiet-suppressed) before the run + # loop. Detection already hard-failed above if no sandbox is + # available, so this is purely informational. + if not quiet: + display.sandbox_status(orchestrator.sandbox_mechanism.value) + try: summary = orchestrator.run() except AgentNotFoundError as exc: display.error(str(exc)) raise typer.Exit(code=1) from exc + except SandboxError as exc: + # A post-construction sandbox failure (setup/denial) is a hard + # abort, not a per-feature failure: the orchestrator re-raises + # it out of run() rather than continuing the loop. Surface the + # attributed message and exit non-zero, same as the + # construction-time SandboxDetectionError above. + display.error(f"sandbox failure — Ralph aborted: {exc}") + raise typer.Exit(code=1) from exc exit_code = 1 if summary.failed > 0 else 0 raise typer.Exit(code=exit_code) diff --git a/src/mfbt/commands/ralph/agent.py b/src/mfbt/commands/ralph/agent.py index 977b1ee..d576809 100644 --- a/src/mfbt/commands/ralph/agent.py +++ b/src/mfbt/commands/ralph/agent.py @@ -2,14 +2,35 @@ from __future__ import annotations +import errno import json as _json import logging +import os +import re import shutil import subprocess +import sys import threading import time from collections.abc import Callable - +from pathlib import Path +from typing import Any + +from mfbt.commands.ralph.sandbox.detect import detect_sandbox +from mfbt.commands.ralph.sandbox.exceptions import ( + SandboxCompatibilityError, + SandboxDeniedError, + SandboxError, + SandboxSetupError, +) +from mfbt.commands.ralph.sandbox.models import ( + SandboxConfig, + SandboxPermissions, +) +from mfbt.commands.ralph.sandbox.wrapper import ( + SandboxWrapper, + format_sandbox_stderr, +) from mfbt.commands.ralph.types import AgentResult, AgentType logger = logging.getLogger("mfbt.commands.ralph.agent") @@ -19,6 +40,24 @@ class AgentNotFoundError(Exception): """Raised when the coding agent binary is not on PATH.""" +# Strip ANSI/OSC escape sequences and C0/DEL control bytes (keeping \t and +# \n) from subprocess-derived text before it is written to the operator's +# terminal. The sandbox tool and the agent it wraps share one stderr fd, so +# captured "sandbox" stderr can contain arbitrary bytes the agent (or a tool +# it spawned) emitted; replaying raw escape sequences on the error path could +# drive the operator's terminal (cursor control, title/clipboard via OSC). +_TERMINAL_CONTROL_RE = re.compile( + r"\x1b\[[0-9;?]*[ -/]*[@-~]" # CSI ... final byte + r"|\x1b[@-_]" # other 2-byte ESC (incl. OSC/ST introducer) + r"|[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]" # C0 controls + DEL, minus \t \n +) + + +def _sanitize_terminal_text(text: str) -> str: + """Neutralize terminal control sequences in untrusted subprocess text.""" + return _TERMINAL_CONTROL_RE.sub("", text) + + # --------------------------------------------------------------------------- # stream-json parsing helpers # --------------------------------------------------------------------------- @@ -120,22 +159,73 @@ def __init__( agent_type: AgentType, max_turns: int | None = None, special_instructions: str | None = None, + sandbox_wrapper: Any | None = None, + *, + project_dir: Path | None = None, + base_url: str | None = None, ) -> None: self._agent_type = agent_type self._max_turns = max_turns self._special_instructions = special_instructions + # Sandboxing seam (MCLI-138). The orchestrator does its own up-front + # detection and injects a fully-built ``SandboxWrapper`` (duck-typed: + # ``prepare()`` / ``wrap_command(cmd)`` / ``cleanup()`` / + # ``note_stderr_line(line)`` / ``get_sandbox_stderr()`` / ``mechanism``) + # -- that wrapper is used as-is, and ``detect_sandbox()`` is NOT called + # again (no double detection). When no wrapper is injected (standalone + # / tests), detect exactly once here and build the wrapper from the + # runner's project dir + base URL, so ``self._sandbox_wrapper`` is + # always populated and ``run()`` has a single, consistent path. + if sandbox_wrapper is None: + pdir = project_dir or Path.cwd() + mechanism = detect_sandbox() + if base_url is None: + # Single source of truth for the default backend URL -- no + # hand-synced duplicate constant. This is the standalone / + # no-wrapper-injected path only (the orchestrator always + # injects a wrapper built from the resolved config). + # ``base_url`` is inert in the sandbox layer regardless + # (no adapter consumes ``SandboxConfig.base_url``); it is + # resolved here only to keep the data contract fully + # specified. + from mfbt.config import DEFAULT_CONFIG + + base_url = DEFAULT_CONFIG["base_url"] + sandbox_wrapper = SandboxWrapper( + mechanism, + SandboxConfig( + project_dir=pdir, + base_url=base_url, + permissions=SandboxPermissions.for_project(pdir), + ), + ) + self._sandbox_wrapper = sandbox_wrapper self._process: subprocess.Popen | None = None + # Live stderr sink for the current run(): when set (the orchestrator + # always passes one), typed sandbox-error lines are mirrored to it so + # they are visible in the TUI agent-output log, not only on the real + # process stderr the Textual UI has taken over. + self._on_stderr: Callable[[str], None] | None = None def build_command(self, prompt: str) -> list[str]: - """Build the CLI command list for the agent.""" + """Build the CLI command list for the agent. + + ``--dangerously-skip-permissions`` is always added: a real OS sandbox + (``sandbox-exec`` / ``bwrap``) is guaranteed to be active -- detection + hard-fails before a run otherwise -- and *is* the safety boundary. The + agent runs headless (``-p``, ``stdin=DEVNULL``) with no channel to + answer an interactive tool-approval prompt, so without the flag Claude + Code silently makes no edits. The blast radius is bounded by the + sandbox. + """ if self._agent_type == AgentType.CLAUDE: cmd = [ "claude", - "--dangerously-skip-permissions", "-p", prompt, "--output-format", "stream-json", "--verbose", ] + cmd.insert(1, "--dangerously-skip-permissions") if self._max_turns is not None: cmd.extend(["--max-turns", str(self._max_turns)]) append_prompt = self._APPEND_SYSTEM_PROMPT @@ -163,80 +253,295 @@ def _validate_binary(self) -> None: f"Install it or ensure it is in your PATH." ) + def _emit_sandbox_error(self, exc: SandboxError) -> None: + """Write the typed, attribution-bearing CLI message to stderr (MCLI-152). + + One distinct ``[ralph] …`` line per :class:`SandboxError` subclass + (the ``SandboxCompatibilityError`` message additionally suggests a + remedy). The exception's ``str`` already carries the ``[]`` + tag (MCLI-149) and, for adapter-classified runtime failures, the + captured sandbox stderr (MCLI-150); the separately-accumulated sandbox + stderr is *also* appended here so the cause is actionable without + consulting logs and is surfaced on *every* SandboxError regardless of + debug mode (MCLI-151 rule 3). Writes to stderr only — never stdout, + never raises, never exits: the caller re-raises so the orchestrator's + failed-feature path yields the non-zero process exit. The same text is + also mirrored to the live ``on_stderr`` sink (when the current + ``run()`` was given one) so it is visible in the TUI agent-output log, + not only on the real process stderr the Textual UI has taken over. + """ + if isinstance(exc, SandboxSetupError): + line = f"[ralph] sandbox setup failed: {exc}" + elif isinstance(exc, SandboxDeniedError): + line = f"[ralph] sandbox denied access: {exc}" + elif isinstance(exc, SandboxCompatibilityError): + line = ( + f"[ralph] sandbox compatibility check failed: {exc}\n" + "[ralph] the platform sandbox cannot run the agent here — " + "only sandbox-exec (macOS) and bwrap (Linux) are supported; " + "ensure the required tool is installed and functional" + ) + else: + line = f"[ralph] sandbox error: {exc}" + sys.stderr.write(line + "\n") + # Captured sandbox stderr is subprocess-derived (shared fd with the + # agent): strip terminal control sequences before replaying it to the + # operator's terminal / TUI sink on this error path. + captured = _sanitize_terminal_text( + self._sandbox_wrapper.get_sandbox_stderr() + ) + if captured: + sys.stderr.write( + captured if captured.endswith("\n") else captured + "\n" + ) + sys.stderr.flush() + # Mirror to the live sink (TUI log) when one is wired. Never let a + # broken sink mask the original sandbox failure being re-raised. + if self._on_stderr is not None: + try: + self._on_stderr(line + "\n") + if captured: + self._on_stderr( + captured if captured.endswith("\n") + else captured + "\n" + ) + except Exception: + logger.debug( + "on_stderr sink raised while emitting sandbox error", + exc_info=True, + ) + def run( self, prompt: str, on_output: Callable[[str], None] | None = None, + on_stderr: Callable[[str], None] | None = None, ) -> AgentResult: """Run the agent with *prompt* and wait for it to complete. *on_output* is called with human-readable display text extracted - from the stream-json JSONL output. + from the stream-json JSONL output. *on_stderr* is called live with + each ``[Sandbox] ``-prefixed sandbox-layer stderr line so it surfaces + in real time (a sandbox is always active). """ self._validate_binary() - cmd = self.build_command(prompt) + # Mirror typed sandbox-error lines to this sink too (TUI visibility). + self._on_stderr = on_stderr + # ── Sandbox setup phase (MCLI-142 / MCLI-152) ───────────────────── + # Command building + adapter prep/wrap form one failure domain, kept + # structurally separate from the subprocess phase below. Each + # SandboxError subclass has its own except branch (most specific + # siblings first, the SandboxError base last) so the operator sees a + # distinct, attributed ``[ralph] …`` line naming the failure type and + # mechanism (MCLI-152). Claude Code subprocess errors are handled + # wholly by the separate phase below and are never conflated with this. + try: + cmd = self.build_command(prompt) + # prepare() is a no-op for both supported mechanisms; kept as a + # stable hook. wrap_command() prepends the sandbox command prefix + # before any subprocess is spawned. + self._sandbox_wrapper.prepare() + cmd = self._sandbox_wrapper.wrap_command(cmd) + except SandboxSetupError as exc: + self._emit_sandbox_error(exc) + raise + except SandboxDeniedError as exc: + self._emit_sandbox_error(exc) + raise + except SandboxCompatibilityError as exc: + self._emit_sandbox_error(exc) + raise + except SandboxError as exc: + self._emit_sandbox_error(exc) + raise logger.debug("Spawning agent: %s", " ".join(cmd[:3]) + " ...") - start = time.monotonic() - self._process = subprocess.Popen( - cmd, - stdin=subprocess.DEVNULL, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - - stdout_lines: list[str] = [] - stderr_lines: list[str] = [] - - def _read_stdout( - stream, - buf: list[str], - callback: Callable[[str], None] | None, - ) -> None: - assert stream is not None - for raw_line in iter(stream.readline, b""): - line = raw_line.decode("utf-8", errors="replace") - buf.append(line) - if callback is not None: - display_text = _extract_display_text(line) - if display_text: - callback(display_text + "\n") - stream.close() - - def _read_stderr( - stream, - buf: list[str], - ) -> None: - assert stream is not None - for raw_line in iter(stream.readline, b""): - buf.append(raw_line.decode("utf-8", errors="replace")) - stream.close() - - stdout_thread = threading.Thread( - target=_read_stdout, - args=(self._process.stdout, stdout_lines, on_output), - daemon=True, - ) - stderr_thread = threading.Thread( - target=_read_stderr, - args=(self._process.stderr, stderr_lines), - daemon=True, + # ── Subprocess phase ────────────────────────────────────────────── + # Launch the agent with its working directory pinned to the SANDBOX's + # own project dir (the single source of truth -- the SandboxConfig the + # wrapper was built from), realpath-resolved exactly as the adapters + # resolve it (SBPL renders %%PROJECT_DIR%% via os.path.realpath; bwrap + # binds realpath'd writable paths). Without an explicit cwd= the child + # would inherit whatever CWD the parent happens to have, which need + # not be inside the confined/bound area while + # --dangerously-skip-permissions is unconditionally on -- the agent + # could run relative to a directory the sandbox does not cover. This + # ties the run location to the confinement instead of to process + # state and removes the second independent Path.cwd() read. + sandbox_project_dir = os.path.realpath( + os.path.expanduser( + str(self._sandbox_wrapper.config.project_dir) + ) ) - stdout_thread.start() - stderr_thread.start() - - self._process.wait() - stdout_thread.join(timeout=5) - stderr_thread.join(timeout=5) - - elapsed = time.monotonic() - start + start = time.monotonic() + try: + # NOTE: this Popen signature is intentionally frozen. The + # stream-json reader downstream depends on stdout/stderr being + # real OS pipes (``PIPE``), stdin being closed (``DEVNULL`` -- + # the agent is non-interactive), and NO ``shell=True`` (the + # sandbox-prefixed argv must exec directly, never via a shell). + # ``cwd`` is pinned to the sandbox's realpath'd project dir (see + # above) so the run location always lies inside the confined area + # rather than wherever the parent's CWD happened to be; it is part + # of the frozen signature, not a tunable. Sandbox-wrapper + # compatibility with stream-json is a known per-adapter risk: a + # wrapper that line-buffers, re-blocks, or allocates a PTY on + # stdout/stderr can stall or corrupt JSONL parsing. + # ``sandbox-exec`` is the most likely future offender; ``bwrap`` + # passes the fds through untouched. Change the wrapped argv if + # needed, but do NOT change these kwargs. + try: + self._process = subprocess.Popen( + cmd, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + cwd=sandbox_project_dir, + ) + except OSError as exc: + # EPERM/EACCES while spawning the *sandbox-prefixed* argv is the + # sandbox tool refusing to launch (e.g. sandbox-exec denying + # exec), not Claude Code failing -- map it to a typed runtime + # denial (MCLI-150). A sandbox is always active here, so any + # other OSError propagates byte-for-byte as before. + if exc.errno in (errno.EPERM, errno.EACCES): + raise SandboxDeniedError( + f"could not launch the sandboxed process: {exc}", + mechanism=self._sandbox_wrapper.mechanism.value, + ) from exc + raise + + stdout_lines: list[str] = [] + stderr_lines: list[str] = [] + + def _read_stdout( + stream, + buf: list[str], + callback: Callable[[str], None] | None, + ) -> None: + assert stream is not None + for raw_line in iter(stream.readline, b""): + line = raw_line.decode("utf-8", errors="replace") + buf.append(line) + if callback is not None: + display_text = _extract_display_text(line) + if display_text: + callback(display_text + "\n") + stream.close() + + def _read_stderr( + stream, + buf: list[str], + ) -> None: + assert stream is not None + wrapper = self._sandbox_wrapper + for raw_line in iter(stream.readline, b""): + line = raw_line.decode("utf-8", errors="replace") + # Lossless: the full stream is always kept in ``stderr``. + buf.append(line) + # A classification/callback bug must never kill the drain + # (data-loss guard) — the raw line is already buffered. + try: + attributed = wrapper.note_stderr_line(line) + if attributed and on_stderr is not None: + on_stderr(format_sandbox_stderr(line)) + except Exception: + logger.debug( + "sandbox stderr attribution error", + exc_info=True, + ) + stream.close() + + stdout_thread = threading.Thread( + target=_read_stdout, + args=(self._process.stdout, stdout_lines, on_output), + daemon=True, + ) + stderr_thread = threading.Thread( + target=_read_stderr, + args=(self._process.stderr, stderr_lines), + daemon=True, + ) + stdout_thread.start() + stderr_thread.start() + + self._process.wait() + stdout_thread.join(timeout=5) + stderr_thread.join(timeout=5) + # If the stderr drain did not finish, the captured sandbox stderr + # may be incomplete -- a real sandbox-denial signature could be + # missing, so classify_exit() below would misattribute the exit to + # Claude. That would otherwise be entirely silent; surface it (H1). + stderr_truncated = stderr_thread.is_alive() + if stderr_truncated: + logger.warning( + "sandbox stderr drain did not complete within 5s; " + "captured sandbox stderr may be incomplete and " + "sandbox-denial attribution for this run unreliable" + ) - return AgentResult( - exit_code=self._process.returncode, - elapsed_seconds=elapsed, - stdout="".join(stdout_lines), - stderr="".join(stderr_lines), - ) + elapsed = time.monotonic() - start + + sandbox_stderr = self._sandbox_wrapper.get_sandbox_stderr() + returncode = self._process.returncode + # Re-attribute a non-zero exit to the sandbox layer when the + # mechanism's adapter classifier recognises it (MCLI-150). A + # ``None`` result means the exit is Claude Code's own status -- + # it stays the separately-handled ``AgentResult`` path below, so + # Claude application errors never enter a SandboxError branch. + if returncode != 0: + sandbox_exc = self._sandbox_wrapper.classify_exit(returncode) + if sandbox_exc is not None: + raise sandbox_exc + # Non-zero exit NOT attributed to a sandbox denial (no + # signature in the captured stderr). Usually correct -- Claude + # exits non-zero routinely -- so this is INFO. But if the + # stderr drain was truncated the "no signature" verdict is + # unreliable: escalate to WARNING so a possibly-missed denial + # is visible in logs rather than fully silent (H1). + emit = logger.warning if stderr_truncated else logger.info + emit( + "agent exited %d under sandbox %s; not attributed to a " + "sandbox denial%s", + returncode, + self._sandbox_wrapper.mechanism.value, + " — stderr drain truncated, attribution uncertain" + if stderr_truncated + else "", + ) + # MCLI-151: no sandbox error -> forward captured sandbox stderr to + # the user only in debug mode; otherwise silently discard it + # (it is always lossless in AgentResult.stderr regardless). + self._sandbox_wrapper.emit_debug_sandbox_stderr() + return AgentResult( + exit_code=returncode, + elapsed_seconds=elapsed, + stdout="".join(stdout_lines), + stderr="".join(stderr_lines), + sandbox_stderr=sandbox_stderr, + ) + # Subprocess-phase sandbox failures (Popen EPERM/EACCES, post-exit + # adapter classification -- MCLI-150) get the same typed, attributed + # surfacing as the setup phase (MCLI-152), via its own per-subclass + # branches. A non-sandbox ``OSError`` (e.g. errno-less "spawn failed") + # is *not* a SandboxError and propagates untouched, so Claude Code + # process errors stay separately handled. + except SandboxSetupError as exc: + self._emit_sandbox_error(exc) + raise + except SandboxDeniedError as exc: + self._emit_sandbox_error(exc) + raise + except SandboxCompatibilityError as exc: + self._emit_sandbox_error(exc) + raise + except SandboxError as exc: + self._emit_sandbox_error(exc) + raise + finally: + # self._sandbox_wrapper is always populated (see __init__); + # cleanup() is a no-op for mechanisms with no temp artifacts. + self._sandbox_wrapper.cleanup() def terminate(self) -> None: """Terminate the running agent subprocess (SIGTERM, then SIGKILL).""" diff --git a/src/mfbt/commands/ralph/display.py b/src/mfbt/commands/ralph/display.py index 4b7940f..60efc10 100644 --- a/src/mfbt/commands/ralph/display.py +++ b/src/mfbt/commands/ralph/display.py @@ -323,3 +323,20 @@ def warn(self, message: str) -> None: self._console.print( f"[bold cyan]\\[ralph][/bold cyan] [yellow]{message}[/yellow]" ) + + # -- Sandbox status (MCLI-145) ------------------------------------------- + + def sandbox_status(self, mechanism_value: str) -> None: + """Print the active-sandbox startup status line (MCLI-145). + + Informational; goes to stdout in the standard ``[ralph]`` style and is + suppressed by ``--quiet``. A sandbox is always active (detection + hard-fails otherwise). ``mechanism_value`` is the ``SandboxMechanism`` + enum value verbatim. + """ + if self._quiet: + return + self._console.print( + f"[bold cyan]\\[ralph][/bold cyan] 🔒 Sandbox: " + f"[bold]{mechanism_value}[/bold]" + ) diff --git a/src/mfbt/commands/ralph/orchestrator.py b/src/mfbt/commands/ralph/orchestrator.py index 0ffae31..f53da03 100644 --- a/src/mfbt/commands/ralph/orchestrator.py +++ b/src/mfbt/commands/ralph/orchestrator.py @@ -5,6 +5,7 @@ import logging import threading import time +from pathlib import Path from typing import TYPE_CHECKING from mfbt.commands.ralph.agent import AgentRunner @@ -16,6 +17,17 @@ verify_feature_completed, ) from mfbt.commands.ralph.prompt import build_agent_prompt +from mfbt.commands.ralph.sandbox.detect import detect_sandbox +from mfbt.commands.ralph.sandbox.exceptions import ( + SandboxDetectionError, + SandboxError, +) +from mfbt.commands.ralph.sandbox.models import ( + SandboxConfig, + SandboxMechanism, + SandboxPermissions, +) +from mfbt.commands.ralph.sandbox.wrapper import SandboxWrapper from mfbt.commands.ralph.types import ( FeatureOutcome, FeatureResult, @@ -50,8 +62,43 @@ def __init__( self._client = client self._display = display self._logger = logger + # Detect the platform's required sandbox up front. macOS needs + # sandbox-exec, Linux needs bwrap; if neither is available (or the + # platform is unsupported) detect_sandbox() raises + # SandboxDetectionError here -- before any agent subprocess can start. + # There is no unsandboxed fallback. The detected mechanism is bound to + # a SandboxConfig in a SandboxWrapper and injected into AgentRunner, + # which prepends the mechanism's command prefix to the Claude argv at + # run() time -- sandboxing is live. + # NB: the ``logger`` parameter (a RalphLogger | None) shadows the + # module logger inside __init__, so reference the stdlib module logger + # explicitly here. + module_log = logging.getLogger(__name__) + try: + self._sandbox_mechanism = detect_sandbox() + except SandboxDetectionError: + module_log.error( + "no usable sandbox; aborting before any agent run", + exc_info=True, + ) + raise + module_log.info( + "sandbox mechanism: %s", self._sandbox_mechanism.value + ) + project_dir = Path.cwd() + sandbox_config = SandboxConfig( + project_dir=project_dir, + base_url=config.base_url, + permissions=SandboxPermissions.for_project(project_dir), + ) + self._sandbox_wrapper = SandboxWrapper( + self._sandbox_mechanism, sandbox_config + ) self._agent = AgentRunner( - config.coding_agent, config.max_turns, config.special_instructions + config.coding_agent, + config.max_turns, + config.special_instructions, + sandbox_wrapper=self._sandbox_wrapper, ) self._results: list[FeatureResult] = [] self._session_start: float = 0.0 @@ -70,6 +117,11 @@ def stop(self) -> None: def stop_requested(self) -> bool: return self._stop_event.is_set() + @property + def sandbox_mechanism(self) -> SandboxMechanism: + """The sandbox mechanism resolved at construction (MCLI-145).""" + return self._sandbox_mechanism + def _build_output_callback(self) -> Callable[[str], None] | None: """Build a combined callback for display + logger with error isolation.""" display_cb = self._display.agent_output if not self._config.quiet else None @@ -223,8 +275,38 @@ def _implement_feature( agent_result = None try: - agent_result = self._agent.run(prompt, on_output=output_callback) + agent_result = self._agent.run( + prompt, + on_output=output_callback, + # Sandbox stderr is ``[Sandbox] ``-prefixed, so the same + # combined display+logger sink surfaces it live and + # visually distinct from Claude's output. + on_stderr=output_callback, + ) last_exit_code = agent_result.exit_code + if self.stop_requested: + # stop() terminated the agent mid-run (e.g. user quit). + # Skip the needless completion-verify network call and + # the misleading "Retrying…" path. + break + except SandboxError: + # A sandbox failure that occurs *after* construction + # (SandboxSetupError writing the SBPL profile, an EPERM at + # launch, or an adapter-classified runtime denial) is fatal to + # the WHOLE run, not a per-feature failure. The two-mechanism + # design is hard-fail: we must NOT downgrade it to a soft + # FAILED result and advance to the next feature (which would + # silently re-run against the same broken sandbox and mask a + # real denial as an ordinary agent error). Re-raise so it + # propagates out of run() to the CLI/TUI sandbox-abort wiring, + # exactly like the construction-time SandboxDetectionError. + logger.error( + "sandbox failure during agent run for %s; aborting " + "the session (hard-fail)", + feature.feature_key, + exc_info=True, + ) + raise except Exception as exc: last_error = str(exc) logger.exception("Agent run failed for %s", feature.feature_key) diff --git a/src/mfbt/commands/ralph/profiles/__init__.py b/src/mfbt/commands/ralph/profiles/__init__.py new file mode 100644 index 0000000..618d093 --- /dev/null +++ b/src/mfbt/commands/ralph/profiles/__init__.py @@ -0,0 +1,9 @@ +"""Packaged sandbox profile assets. + +This package exists so static policy files (currently the ``claude.sb`` SBPL +profile consumed by the ``sandbox-exec`` adapter) can be loaded as package +resources via :func:`importlib.resources.files` regardless of how the project +is installed (editable, wheel, zipapp). It contains no importable code. +""" + +from __future__ import annotations diff --git a/src/mfbt/commands/ralph/profiles/claude.sb b/src/mfbt/commands/ralph/profiles/claude.sb new file mode 100644 index 0000000..5d03c63 --- /dev/null +++ b/src/mfbt/commands/ralph/profiles/claude.sb @@ -0,0 +1,77 @@ +(version 1) +(allow default) + +;; ====================================================================== +;; claude.sb — sandbox-exec (SBPL) profile for Claude Code under Ralph +;; ====================================================================== +;; +;; Purpose +;; Confine the Claude Code subprocess that Ralph spawns so it may only +;; *write* inside an explicit allow-list of paths. Reads, process exec, +;; and ALL network access stay permitted: the agent must reach the mfbt +;; MCP server on localhost and fetch npm/pip packages at tool time, so +;; network is intentionally NOT restricted (MCLI-126). +;; +;; Policy shape (MCLI-126 "write-confine" model) +;; (version 1) SBPL dialect version (required first form). +;; (allow default) permissive base — reads/exec/network allowed. +;; (deny file-write* /) revoke ALL filesystem writes globally... +;; (allow file-write* ...) ...then re-grant writes for allow-listed +;; subpaths only, substituted at invocation time. +;; +;; Placeholders (substituted by the sandbox_exec adapter before use). The +;; delimited tokens are written WITHOUT their percent delimiters in this +;; comment on purpose: render_profile() does a plain string replace, so a +;; literal token here would also be substituted and -- because the writable +;; rules are multi-line -- would break out of this comment and inject bare +;; tokens into the policy (the "unbound variable" class of failure). +;; PROJECT_DIR absolute path of the project working directory. +;; WRITABLE_PATHS expands to one +;; (allow file-write* (subpath "")) +;; line per resolved, existence-filtered writable path +;; (project dir, ~/.claude, temp dir, npm/pip caches, +;; claude binary dir, ...). +;; +;; Extending +;; Grant the agent write access to a new location by adding it to +;; SandboxPermissions.writable_paths (see sandbox/models.py) — do NOT +;; hand-edit generated rules here. This file owns only the policy *shape*; +;; the concrete path list is owned by the permissions model so both +;; adapters (sandbox-exec and bwrap) stay consistent. +;; ---------------------------------------------------------------------- + +;; --- MCLI-126: globally confine writes, then re-grant the allow-list --- +(deny file-write* (subpath "/")) + +;; Device nodes. The global deny above also revokes writes to character +;; devices, which breaks ubiquitous build-tool operations: redirecting to +;; /dev/null, RNG, pty allocation, Node's /dev/dtracehelper probe, and +;; writing to the inherited stdout/stderr (macOS exposes these as +;; /dev/fd/N; /dev/stdout etc. are symlinks the kernel resolves to /dev/fd). +;; +;; This is a CURATED allow-list, NOT a blanket `(subpath "/dev")`. A blanket +;; /dev grant also covers every /dev/fd/N entry, so a process holding a +;; writable descriptor to a file *outside* the project allow-list could write +;; through its /dev/fd alias and side-step the confinement. Re-allowing only +;; the specific nodes below is an accepted, bounded trade-off — keep this list +;; minimal and DO NOT widen it back to `(subpath "/dev")`. `/dev/fd` is still +;; granted (process substitution and /dev/stdout redirection need it), but +;; under this spawn model close_fds is on and only stdin/stdout/stderr are +;; inherited, so a child can only alias descriptors it could already open +;; under the policy — narrowing it away from the rest of /dev removes the +;; broad device surface without breaking those tools. NB: when changing this +;; list, validate against a real `sandbox-exec` run — an over-tightened list +;; fails tools at run time, not at policy-load time. +;; (single line so every active SBPL form stays a balanced one-line +;; s-expression — the comment-breakout regression test depends on that.) +(allow file-write* (literal "/dev/null") (literal "/dev/zero") (literal "/dev/random") (literal "/dev/urandom") (literal "/dev/tty") (literal "/dev/dtracehelper") (literal "/dev/ptmx") (regex #"^/dev/ttys[0-9]+$") (subpath "/dev/fd")) + +;; Project working directory (explicit; also present in the writable list). +(allow file-write* (subpath "%%PROJECT_DIR%%")) + +;; Re-granted writable paths (resolved + existence-filtered at use time): +%%WRITABLE_PATHS%% + +;; NOTE: deliberately NO (deny network*) / (allow network*) rules. Network +;; stays fully open via (allow default) (MCLI-126): Ralph's agent talks to +;; the mfbt MCP server over localhost and downloads packages at tool time. diff --git a/src/mfbt/commands/ralph/ralph_widgets.py b/src/mfbt/commands/ralph/ralph_widgets.py index 76c01c0..d3b72b8 100644 --- a/src/mfbt/commands/ralph/ralph_widgets.py +++ b/src/mfbt/commands/ralph/ralph_widgets.py @@ -122,6 +122,17 @@ class RalphHeader(Widget): pending: reactive[int] = reactive(0) progress_pct: reactive[float] = reactive(0.0) session_state: reactive[str] = reactive("running") + # Active sandbox mechanism value ("sandbox_exec" or "bwrap"). Always shown + # in the header so the operator can see at a glance that the agent is + # confined, for the entire duration of a Ralph run. A sandbox is always + # active (detection hard-fails otherwise). + sandbox: reactive[str] = reactive("") + + def _sandbox_label(self) -> str: + """Header segment for the active sandbox mechanism (always shown).""" + if not self.sandbox: + return " | [dim]Sandbox: \u2026[/dim]" + return f" | [cyan]\U0001f512 Sandbox: [bold]{self.sandbox}[/bold][/cyan]" def render(self) -> str: state_icon = { @@ -139,6 +150,7 @@ def render(self) -> str: f" \u2014 {phase_label}" f" | Agent: [bold]{self.agent_name}[/bold]" f" | {self.mode}" + f"{self._sandbox_label()}" ) line2 = ( f" Progress: {self.completed}/{self.total_features}" @@ -177,6 +189,9 @@ def watch_progress_pct(self, value: float) -> None: def watch_session_state(self, value: str) -> None: self._trigger_update() + def watch_sandbox(self, value: str) -> None: + self._trigger_update() + class FeatureProgressTable(DataTable): """Unified table showing all features with SPN, priority, status, time, and attempts. diff --git a/src/mfbt/commands/ralph/sandbox/__init__.py b/src/mfbt/commands/ralph/sandbox/__init__.py new file mode 100644 index 0000000..ebce50f --- /dev/null +++ b/src/mfbt/commands/ralph/sandbox/__init__.py @@ -0,0 +1,45 @@ +"""Ralph sandboxing sub-package. + +Public data models and the detection-layer exception hierarchy are re-exported +here so the detector, adapters, ``SandboxWrapper``, and the CLI/orchestrator +layer can import them from a stable package path. ``wrapper`` and ``adapters`` +are fully implemented and wired end-to-end; they are intentionally *not* +re-exported here so that merely importing this package never pulls in their +(subprocess/filesystem-touching) dependencies. + +This ``__init__`` is intentionally minimal -- it only re-exports the plain data +models (:mod:`mfbt.commands.ralph.sandbox.models`) and the exception types +(:mod:`mfbt.commands.ralph.sandbox.exceptions`), both of which have no +import-time side effects. ``detect_sandbox`` itself is intentionally *not* +re-exported here: import it from +:mod:`mfbt.commands.ralph.sandbox.detect` directly so merely importing the +package never pulls in the detection function's dependencies. Importing the +package triggers no tool detection or subprocess activity and introduces no +circular imports. +""" + +from __future__ import annotations + +from mfbt.commands.ralph.sandbox.exceptions import ( + SandboxCompatibilityError, + SandboxDeniedError, + SandboxDetectionError, + SandboxError, + SandboxSetupError, +) +from mfbt.commands.ralph.sandbox.models import ( + SandboxConfig, + SandboxMechanism, + SandboxPermissions, +) + +__all__ = [ + "SandboxCompatibilityError", + "SandboxConfig", + "SandboxDeniedError", + "SandboxDetectionError", + "SandboxError", + "SandboxMechanism", + "SandboxPermissions", + "SandboxSetupError", +] diff --git a/src/mfbt/commands/ralph/sandbox/adapters/__init__.py b/src/mfbt/commands/ralph/sandbox/adapters/__init__.py new file mode 100644 index 0000000..c6e7628 --- /dev/null +++ b/src/mfbt/commands/ralph/sandbox/adapters/__init__.py @@ -0,0 +1,90 @@ +"""Sandbox adapter dispatch (MCLI-129). + +Single import point for the two per-mechanism adapters. :func:`get_adapter` +maps a :class:`SandboxMechanism` to its ``build_prefix``-style callable so +:class:`SandboxWrapper` dispatches through one stable interface instead of +importing individual adapter modules. There is no identity / no-op adapter: +the enum is closed to ``SANDBOX_EXEC`` and ``BWRAP``, both of which always +contribute a real command prefix. An unknown mechanism is a programming error +and raises :class:`SandboxError`. + +Import graph is strictly acyclic: ``models`` → adapter modules → this +package → ``wrapper``. Adapter modules import only from ``..models`` / +``..exceptions``, never from this package or higher-level modules. +""" + +from __future__ import annotations + +from collections.abc import Callable + +from mfbt.commands.ralph.sandbox.adapters import bwrap, sandbox_exec +from mfbt.commands.ralph.sandbox.exceptions import SandboxError +from mfbt.commands.ralph.sandbox.models import SandboxConfig, SandboxMechanism + +__all__ = ["AdapterFn", "ClassifierFn", "get_adapter", "get_classifier"] + +#: Signature shared by every adapter's ``build_prefix``. +AdapterFn = Callable[[SandboxConfig], list[str]] + +#: Signature shared by every adapter's ``classify_exit`` (MCLI-150). Given the +#: wrapped process's exit code and the captured sandbox-layer stderr, return +#: the typed :class:`SandboxError` the failure maps to, or ``None`` when the +#: exit is *not* attributable to the sandbox (i.e. it is Claude Code's own +#: exit and must stay separately handled). +ClassifierFn = Callable[[int, str], "SandboxError | None"] + + +_DISPATCH: dict[SandboxMechanism, AdapterFn] = { + SandboxMechanism.SANDBOX_EXEC: sandbox_exec.build_prefix, + SandboxMechanism.BWRAP: bwrap.build_prefix, +} + + +def get_adapter(mechanism: SandboxMechanism) -> AdapterFn: + """Return the ``build_prefix`` callable for *mechanism*. + + The dispatch is total over the (two-member) enum; an unknown value is a + programming error and raises :class:`SandboxError` rather than silently + degrading to bare execution. + """ + try: + return _DISPATCH[mechanism] + except KeyError as exc: + raise SandboxError(f"no adapter for mechanism {mechanism!r}") from exc + + +_CLASSIFIER_DISPATCH: dict[SandboxMechanism, ClassifierFn] = { + SandboxMechanism.SANDBOX_EXEC: sandbox_exec.classify_exit, + SandboxMechanism.BWRAP: bwrap.classify_exit, +} + + +# Dispatch totality is a security invariant (an unhandled mechanism must be +# impossible, not lazily discovered at run time). Make it executable: adding a +# SandboxMechanism member without wiring BOTH dispatch tables fails at import, +# not on the first run that needs the missing adapter. +_ALL = set(SandboxMechanism) +if set(_DISPATCH) != _ALL or set(_CLASSIFIER_DISPATCH) != _ALL: + missing = ( + _ALL - set(_DISPATCH), + _ALL - set(_CLASSIFIER_DISPATCH), + ) + raise SandboxError( # pragma: no cover - import-time guard + "sandbox adapter dispatch is not total over SandboxMechanism " + f"(missing adapter={missing[0]}, missing classifier={missing[1]}); " + "every mechanism must have both a build_prefix and a classify_exit." + ) + + +def get_classifier(mechanism: SandboxMechanism) -> ClassifierFn: + """Return the ``classify_exit`` callable for *mechanism* (MCLI-150). + + Mirrors :func:`get_adapter`: total over the enum, raising + :class:`SandboxError` on an unknown value. + """ + try: + return _CLASSIFIER_DISPATCH[mechanism] + except KeyError as exc: + raise SandboxError( + f"no exit classifier for mechanism {mechanism!r}" + ) from exc diff --git a/src/mfbt/commands/ralph/sandbox/adapters/bwrap.py b/src/mfbt/commands/ralph/sandbox/adapters/bwrap.py new file mode 100644 index 0000000..7b65582 --- /dev/null +++ b/src/mfbt/commands/ralph/sandbox/adapters/bwrap.py @@ -0,0 +1,149 @@ +"""``bwrap`` (Bubblewrap) adapter — Linux namespace sandbox (MCLI-127/132). + +Translates a :class:`SandboxConfig` into a ``bwrap`` command prefix: + +* a fixed set of system paths → ``--ro-bind`` (read-only), each skipped + silently when absent on the host (MCLI-132), plus the synthetic + ``--dev /dev`` and ``--proc /proc`` mounts; +* every resolved writable path → ``--bind `` (read-write); +* ``--die-with-parent`` so the sandboxed process tree cannot outlive (or be + reparented away from) the ``mfbt`` process that owns its lifecycle; +* ``--chdir `` (realpath, matching the rw bind) so the agent's + working directory is always inside the namespace, never an unbound host + path inherited from the parent; +* **no** ``--unshare-net``: network is intentionally left unrestricted so + Claude can reach the mfbt MCP server on localhost and download npm/pip + packages. + +``--unshare-pid`` / ``--clearenv`` are deliberately *not* used: the primary +boundary here is write-confinement, and both have a real chance of breaking +the agent or its tools (env-dependent toolchains; processes that inspect the +host pid space). They are a possible future hardening step, not an oversight. + +``AgentRunner`` prepends the returned list to the Claude argv, so the final +invocation is ``bwrap claude -p ...`` (bwrap treats the first +non-option token as the command to run inside the namespace). +""" + +from __future__ import annotations + +import os +from collections.abc import Callable + +from mfbt.commands.ralph.sandbox.exceptions import SandboxDeniedError, SandboxError +from mfbt.commands.ralph.sandbox.models import SandboxConfig + +_MECHANISM = "bwrap" + +# Read-only system bind mounts required for the Claude binary and its runtime +# (dynamic linker, shared libraries, system config such as resolv.conf / CA +# certificates). Paths absent on the host — e.g. /lib64 or /sbin on +# merged-/usr distros — are skipped at build time rather than causing bwrap +# to fail with a missing-bind-source error (MCLI-132). +_RO_SYSTEM_PATHS: tuple[str, ...] = ( + "/usr", + "/etc", + "/lib", + "/lib64", + "/bin", + "/sbin", +) + + +def _system_mounts(path_exists: Callable[[str], bool]) -> list[str]: + """Read-only + synthetic system mounts (MCLI-132). + + ``--ro-bind`` entries whose source path is absent on the host are + skipped silently. ``--dev`` / ``--proc`` are synthetic (bwrap creates + them inside the namespace) and therefore emitted unconditionally. + """ + args: list[str] = [] + for path in _RO_SYSTEM_PATHS: + if path_exists(path): + args += ["--ro-bind", path, path] + args += ["--dev", "/dev", "--proc", "/proc"] + return args + + +def _build_bwrap_args( + config: SandboxConfig, *, path_exists: Callable[[str], bool] +) -> list[str]: + """Assemble the bwrap argument list (injectable existence check). + + System mounts come first, then the read-write project binds, matching + the ordering the rest of the sandbox subsystem expects before the + wrapped command is appended by the caller. + """ + # --die-with-parent: the sandboxed tree must not outlive / be reparented + # away from the owning mfbt process (lifecycle containment for an + # autonomous agent running with --dangerously-skip-permissions). + args: list[str] = [ + "bwrap", "--die-with-parent", *_system_mounts(path_exists) + ] + # Writable project paths as read-write bind mounts. Absolute paths only: + # a relative path is ambiguous inside the new mount namespace. + for path in config.permissions.resolved_writable_paths(): + abs_path = str(path.absolute()) + args += ["--bind", abs_path, abs_path] + # Run inside the project dir, resolved exactly as the binds are + # (realpath), so the agent's CWD is always a path that exists in the + # namespace rather than an unbound host path inherited from the parent. + project_dir = os.path.realpath(os.path.expanduser(str(config.project_dir))) + args += ["--chdir", project_dir] + # NOTE: deliberately NO --unshare-net. Network stays unrestricted so the + # agent can reach the mfbt MCP server on localhost and fetch packages. + return args + + +def build_prefix(config: SandboxConfig) -> list[str]: + """Return the ``bwrap`` command prefix for *config* (see module docstring).""" + return _build_bwrap_args(config, path_exists=os.path.exists) + + +# ``bwrap`` prefixes its OWN setup-failure diagnostics with ``bwrap: `` (its +# argv0) at the START of the line before exec'ing the wrapped command. That +# line-leading prefix -- not the exit code -- is the reliable origin signal: +# Claude Code also commonly exits 1, so keying on the code alone +# misattributes Claude errors as sandbox denials. The match is anchored to +# the start of the line (not a whole-buffer substring) so a stray mention of +# ``bwrap:`` in Claude's interleaved output is not misread as a denial. +# ``SandboxWrapper`` prepends ``[Sandbox] `` to every captured line, so that +# marker is stripped before the start-of-line test (callers/tests may also +# pass the raw, unprefixed stderr -- both are handled). +_BWRAP_ERROR_MARKER = "bwrap:" +_SANDBOX_WRAPPER_LINE_PREFIX = "[Sandbox] " + + +def _stderr_has_bwrap_signature(sandbox_stderr: str) -> bool: + """True iff some line begins with ``bwrap``'s own ``bwrap:`` diagnostic.""" + for raw in sandbox_stderr.splitlines(): + line = raw + if line.startswith(_SANDBOX_WRAPPER_LINE_PREFIX): + line = line[len(_SANDBOX_WRAPPER_LINE_PREFIX):] + if line.strip().lower().startswith(_BWRAP_ERROR_MARKER): + return True + return False + + +def classify_exit(returncode: int, sandbox_stderr: str) -> SandboxError | None: + """Map a wrapped-process exit to a typed sandbox error (MCLI-150 / I5). + + ``bwrap`` propagates the wrapped child's exit code unchanged, so a + non-zero exit is *by default* Claude Code's own status. It is + re-attributed to the sandbox **only** when the captured stderr carries + ``bwrap``'s own line-leading ``bwrap:`` failure prefix (a denied bind + source, or restricted user namespaces). Without that signature a non-zero + exit -- including the very common Claude exit ``1`` -- is **not** a + sandbox failure (``None``), so the real Claude error is never masked + behind a wrong "the sandbox denied this" message (I5). A clean ``0`` is + never a failure. + """ + if returncode == 0: + return None + if not _stderr_has_bwrap_signature(sandbox_stderr): + return None + detail = sandbox_stderr.strip() or ( + "bwrap failed while setting up the sandbox namespace " + "(a bind mount was denied or user namespaces are restricted)" + ) + return SandboxDeniedError(detail, mechanism=_MECHANISM) diff --git a/src/mfbt/commands/ralph/sandbox/adapters/sandbox_exec.py b/src/mfbt/commands/ralph/sandbox/adapters/sandbox_exec.py new file mode 100644 index 0000000..d5dcd6f --- /dev/null +++ b/src/mfbt/commands/ralph/sandbox/adapters/sandbox_exec.py @@ -0,0 +1,195 @@ +"""``sandbox-exec`` adapter — macOS SBPL write-confinement (MCLI-126/131). + +Builds the command prefix that runs Claude Code under macOS ``sandbox-exec`` +with a generated SBPL profile. The profile *template* is the packaged +``profiles/claude.sb`` asset (MCLI-131) — never an ad-hoc inline string. This +adapter loads it, substitutes the runtime path placeholders, writes the +rendered profile to a throwaway temp file (the source tree is never touched), +and returns ``['sandbox-exec', '-f', ]`` for ``AgentRunner`` to +prepend to the Claude argv. +""" + +from __future__ import annotations + +import os +import re +import tempfile +from importlib.resources import files + +from mfbt.commands.ralph.sandbox.exceptions import ( + SandboxDeniedError, + SandboxError, + SandboxSetupError, +) +from mfbt.commands.ralph.sandbox.models import SandboxConfig + +_MECHANISM = "sandbox_exec" +_PROFILE_PACKAGE = "mfbt.commands.ralph.profiles" +_PROFILE_NAME = "claude.sb" +_PROJECT_DIR_TOKEN = "%%PROJECT_DIR%%" +_WRITABLE_PATHS_TOKEN = "%%WRITABLE_PATHS%%" + + +def _sbpl_escape(path: str) -> str: + """Escape *path* for an SBPL ``(subpath "...")`` double-quoted literal. + + Backslash and double-quote are the characters that would otherwise let a + path break out of the literal, so they are the only ones escaped. + """ + return path.replace("\\", "\\\\").replace('"', '\\"') + + +def _load_profile_template() -> str: + """Read the packaged ``claude.sb`` SBPL template (MCLI-131).""" + resource = files(_PROFILE_PACKAGE).joinpath(_PROFILE_NAME) + return resource.read_text(encoding="utf-8") + + +def render_profile(config: SandboxConfig) -> str: + """Render the SBPL profile for *config* by substituting placeholders. + + ``%%PROJECT_DIR%%`` becomes the absolute project directory and + ``%%WRITABLE_PATHS%%`` becomes one ``(allow file-write* (subpath "..."))`` + rule per resolved (expanded + existence-filtered) writable path. No + network rules are emitted: network stays open via ``(allow default)``. + + Each placeholder must occur **exactly once** in the template. A plain + ``str.replace`` substitutes *every* occurrence, so a second (e.g. a + literal token left in a ``;;`` doc-comment) would also be replaced; since + ``%%WRITABLE_PATHS%%`` expands to multi-line content it would break out of + the comment and inject bare tokens into the policy, producing a + ``sandbox-exec: unbound variable`` failure at run time. This is enforced + here (fail loud as :class:`SandboxSetupError`) rather than emitting a + profile that only ``sandbox-exec`` rejects later. + """ + template = _load_profile_template() + for token in (_PROJECT_DIR_TOKEN, _WRITABLE_PATHS_TOKEN): + count = template.count(token) + if count != 1: + raise SandboxSetupError( + f"SBPL template {_PROFILE_NAME!r} must contain {token!r} " + f"exactly once, found {count} (a stray copy in a comment " + f"would corrupt the generated policy)", + mechanism=_MECHANISM, + ) + # realpath (not just expanduser): macOS sandbox-exec matches SBPL + # subpaths against the kernel-resolved path, so a project under a + # symlinked path (e.g. /tmp/... -> /private/tmp/...) would otherwise + # never match its own write-allow rule. Mirrors resolve_paths(). + project_dir = _sbpl_escape( + os.path.realpath(os.path.expanduser(str(config.project_dir))) + ) + write_rules = "\n".join( + f'(allow file-write* (subpath "{_sbpl_escape(str(path))}"))' + for path in config.permissions.resolved_writable_paths() + ) + return template.replace(_PROJECT_DIR_TOKEN, project_dir).replace( + _WRITABLE_PATHS_TOKEN, write_rules + ) + + +def build_prefix(config: SandboxConfig) -> list[str]: + """Return the ``sandbox-exec`` command prefix for *config*. + + Renders the packaged SBPL template, writes it to a temp file with + ``delete=False`` (so it survives until ``sandbox-exec`` opens it; + ``SandboxWrapper`` removes it after the agent exits), and returns the + three-element prefix ``['sandbox-exec', '-f', ]``. + """ + profile = render_profile(config) + # Writing the rendered SBPL profile to a temp file is sandbox *setup*: a + # failure here (temp dir unwritable, disk full, permission denied) means + # the sandbox could not be initialized → SandboxSetupError, distinct from + # a runtime denial (MCLI-150). + try: + with tempfile.NamedTemporaryFile( + mode="w", + suffix=".sb", + prefix="mfbt-sandbox-", + delete=False, + encoding="utf-8", + ) as handle: + handle.write(profile) + # ``delete=False`` keeps the file on disk after the context exits + # so ``sandbox-exec`` can open it by path; + # ``SandboxWrapper.cleanup()`` removes it after the agent exits. + profile_path = handle.name + except OSError as exc: + raise SandboxSetupError( + f"could not write the sandbox-exec SBPL profile to a temp file: " + f"{exc}", + mechanism=_MECHANISM, + ) from exc + return ["sandbox-exec", "-f", profile_path] + + +# Signatures that identify output as ``sandbox-exec``'s OWN denial / failure +# diagnostics rather than Claude Code's stderr (the two share one +# process-tree fd, so Claude output is in this same buffer). To keep a stray +# mention of these tokens in Claude's own output from being misread as a +# denial, the match is line-aware and as specific as each signature allows: +# +# * macOS kernel sandbox-violation lines always carry the ``deny(N)`` token +# with a numeric id (e.g. ``... claude(123) deny(1) file-write* ...``); the +# token is mid-line (kernel-prefixed), so it is matched by the precise +# ``deny\(\)`` regex rather than the bare substring ``deny(``. +# * ``sandbox-exec``'s own startup failures are emitted with a +# ``sandbox-exec:`` prefix at the START of the line. +# +# ``SandboxWrapper`` prepends ``[Sandbox] `` to every captured line, so that +# marker is stripped before the start-of-line test (callers/tests may also +# pass the raw, unprefixed stderr — both are handled). +_SANDBOX_WRAPPER_LINE_PREFIX = "[Sandbox] " +_DENY_TOKEN_RE = re.compile(r"deny\(\d+\)") +_SANDBOX_EXEC_FAILURE_PREFIX = "sandbox-exec:" + + +def _strip_wrapper_prefix(line: str) -> str: + """Drop the ``SandboxWrapper`` ``[Sandbox] `` marker if present.""" + if line.startswith(_SANDBOX_WRAPPER_LINE_PREFIX): + return line[len(_SANDBOX_WRAPPER_LINE_PREFIX):] + return line + + +def _stderr_has_sandbox_signature(sandbox_stderr: str) -> bool: + """True iff some line is ``sandbox-exec``'s own denial/failure diagnostic. + + Line-aware (not a whole-buffer substring scan): the ``deny(N)`` kernel + token is matched anywhere on a line (it is kernel-prefixed), while the + ``sandbox-exec:`` startup-failure marker must begin the line, so a stray + mention in Claude's interleaved stdout/stderr is far less likely to be + misclassified as a sandbox denial. + """ + for raw in sandbox_stderr.splitlines(): + line = _strip_wrapper_prefix(raw).strip().lower() + if not line: + continue + if _DENY_TOKEN_RE.search(line): + return True + if line.startswith(_SANDBOX_EXEC_FAILURE_PREFIX): + return True + return False + + +def classify_exit(returncode: int, sandbox_stderr: str) -> SandboxError | None: + """Map a wrapped-process exit to a typed sandbox error (MCLI-150 / I5). + + ``sandbox-exec`` returns the wrapped command's exit status on a clean + run, so a non-zero exit is *by default* Claude Code's own status. It is + re-attributed to the sandbox **only** when the captured stderr carries + ``sandbox-exec``'s own denial signature (a ``deny(N)`` kernel-violation + line, or a line beginning ``sandbox-exec:``) -- otherwise treating every + non-zero exit as a denial would mask the real Claude error behind a wrong + "the sandbox blocked this" message (I5). ``None`` ⇒ not a sandbox failure + (Claude's own exit, kept on the ordinary ``AgentResult`` path); a clean + ``0`` is never a failure. + """ + if returncode == 0: + return None + if not _stderr_has_sandbox_signature(sandbox_stderr): + return None + detail = sandbox_stderr.strip() or ( + f"the sandbox-exec policy denied an operation " + f"(wrapped process exited {returncode})" + ) + return SandboxDeniedError(detail, mechanism=_MECHANISM) diff --git a/src/mfbt/commands/ralph/sandbox/detect.py b/src/mfbt/commands/ralph/sandbox/detect.py new file mode 100644 index 0000000..8c766dc --- /dev/null +++ b/src/mfbt/commands/ralph/sandbox/detect.py @@ -0,0 +1,270 @@ +"""Sandbox mechanism detection. + +Pure, dependency-injectable selection of the platform's *required* sandbox +mechanism. There are exactly two: + +* macOS (``Darwin``) -> ``sandbox-exec`` (ships with the OS; binary-presence + check). +* Linux -> ``bwrap`` (bubblewrap; a functional rootless-namespace liveness + probe, not mere binary presence). + +Each is a HARD REQUIREMENT. :func:`detect_sandbox` either returns the one +mechanism for the current platform or raises +:class:`~mfbt.commands.ralph.sandbox.exceptions.SandboxDetectionError` -- a +hard failure that must abort before any agent subprocess starts. There is no +unsandboxed mode, no cross-platform fallback, no config-file override, and no +priority chain. + +Design rules: + +* **No import-time side effects.** No tool detection, subprocess, filesystem, + or logging output is triggered by importing this module. Every external + probe runs only inside a function body and only through an *injected* + callable (``which_fn``/``system_fn``/``run_fn``), so the whole module is + unit-testable without real binaries or installed tools. This module does not + import :mod:`mfbt.config` (the old config-override path is gone), so there is + no detect<->config import cycle. +* **Platform gating** is strict: macOS resolves only ``sandbox-exec``; Linux + resolves only ``bwrap``; any other platform raises. + +Detection selects on binary presence (+ functional liveness for ``bwrap``); it +does **not** run the agent inside the sandbox to verify end-to-end +compatibility. A present-but-incompatible mechanism (e.g. ``bwrap`` with user +namespaces disabled) is detected as unavailable and raises here, or surfaces +its failure as a typed :class:`SandboxDeniedError` at run time. +""" + +from __future__ import annotations + +import logging +import platform +import shutil +import subprocess +import sys +from collections.abc import Callable, Iterable +from dataclasses import dataclass +from enum import Enum +from typing import Any + +from mfbt.commands.ralph.sandbox.exceptions import SandboxDetectionError +from mfbt.commands.ralph.sandbox.models import SandboxMechanism + +logger = logging.getLogger("mfbt.commands.ralph.sandbox.detect") + +# --------------------------------------------------------------------------- +# Tunables +# +# Conservative bounded default so a probe can never block indefinitely. +# --------------------------------------------------------------------------- + +_LIVENESS_PROBE_TIMEOUT = 5 # seconds; bound for the bwrap functional probe + +# MCLI-151: debug mode is OFF until a flag/env-var name is decided. The probe +# debug code path below is reachable (unit tests monkeypatch this to True); +# only the *activation wiring* is deliberately deferred. +# TODO: decision needed — wire to -- and +_debug_enabled = False +_SANDBOX_DEBUG_PREFIX = "[sandbox-debug] " + + +def _debug_emit(text: str) -> None: + """Write *text* to ``sys.stderr``, every non-empty line debug-marked. + + Resolves ``sys.stderr`` at call time (capturable in tests) and is only + ever reached behind a ``_debug_enabled`` guard (MCLI-151). + """ + marked = "\n".join( + f"{_SANDBOX_DEBUG_PREFIX}{line}" if line.strip() else line + for line in text.split("\n") + ) + sys.stderr.write(marked) + sys.stderr.flush() + + +# Functional liveness probe arguments (MCLI-124) -- beyond mere binary +# presence. The canonical minimal rootless-namespace invocation: bind ``/`` +# read-only into a fresh namespace and run ``true``. Fails fast if user +# namespaces are restricted. +_BWRAP_PROBE_ARGS: tuple[str, ...] = ("--ro-bind", "/", "/", "true") + +_RunFn = Callable[..., "subprocess.CompletedProcess[Any]"] +_WhichFn = Callable[[str], "str | None"] +_SystemFn = Callable[[], str] + + +# --------------------------------------------------------------------------- +# MCLI-124: structured functional-liveness probe result +# --------------------------------------------------------------------------- + + +class LivenessStatus(Enum): + """Outcome of a functional liveness probe. + + ``PROBE_FAILED`` is deliberately distinct from ``NOT_FOUND`` so callers can + tell "the tool is installed but cannot run here" from "the tool is absent". + """ + + NOT_FOUND = "not_found" + PROBE_FAILED = "probe_failed" + AVAILABLE = "available" + + +@dataclass(frozen=True) +class LivenessProbeResult: + """Structured result of probing a binary for functional availability.""" + + status: LivenessStatus + exit_code: int | None = None + stderr: str | None = None + + @property + def is_available(self) -> bool: + """True only when the probe ran and exited 0.""" + return self.status is LivenessStatus.AVAILABLE + + +def _stderr_snippet(stderr: object, limit: int = 240) -> str | None: + """Best-effort short, log-safe rendering of probe stderr.""" + if not stderr: + return None + if isinstance(stderr, bytes): + text = stderr.decode("utf-8", "replace") + else: + text = str(stderr) + text = text.strip() + if not text: + return None + return text if len(text) <= limit else text[:limit] + "..." + + +# --------------------------------------------------------------------------- +# MCLI-124: bwrap functional liveness probe +# --------------------------------------------------------------------------- + + +def _probe_liveness( + binary: str, + probe_args: Iterable[str], + which_fn: _WhichFn, + run_fn: _RunFn, +) -> LivenessProbeResult: + """Probe *binary* for functional availability beyond binary presence. + + Returns ``NOT_FOUND`` without running any subprocess when the binary is + absent (zero added delay), ``AVAILABLE`` on a clean exit-0 probe, and + ``PROBE_FAILED`` (carrying the exit code / stderr snippet) when the binary + is present but the probe fails, times out, or cannot launch. + """ + if which_fn(binary) is None: + return LivenessProbeResult(LivenessStatus.NOT_FOUND) + + resolved_cmd = [binary, *probe_args] + # MCLI-151: in debug mode, surface the resolved probe command on stderr + # before it runs. Reachable; gated only by the (unwired) _debug_enabled. + if _debug_enabled: + _debug_emit("probe: " + " ".join(resolved_cmd) + "\n") + + try: + completed = run_fn( + resolved_cmd, + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + timeout=_LIVENESS_PROBE_TIMEOUT, + check=False, + ) + except (subprocess.TimeoutExpired, OSError) as exc: + logger.warning("%s present but probe could not run: %s", binary, exc) + if _debug_enabled: + _debug_emit(f"probe {binary} could not run: {exc}\n") + return LivenessProbeResult(LivenessStatus.PROBE_FAILED, stderr=str(exc)) + + returncode: int = completed.returncode + # MCLI-151: forward the probe's captured stderr in debug mode. + if _debug_enabled: + probe_err = _stderr_snippet(getattr(completed, "stderr", None)) + if probe_err: + _debug_emit(f"probe {binary} stderr: {probe_err}\n") + if returncode == 0: + logger.debug("%s functional liveness probe OK", binary) + return LivenessProbeResult(LivenessStatus.AVAILABLE, exit_code=0) + + snippet = _stderr_snippet(getattr(completed, "stderr", None)) + logger.warning( + "%s present but non-functional (exit=%d)%s", + binary, + returncode, + f": {snippet}" if snippet else "", + ) + return LivenessProbeResult( + LivenessStatus.PROBE_FAILED, exit_code=returncode, stderr=snippet + ) + + +# --------------------------------------------------------------------------- +# Public detection entry point +# --------------------------------------------------------------------------- + +_MACOS_MISSING = ( + "sandbox-exec is required to sandbox the agent on macOS but was not found " + "on PATH. It ships with macOS; ensure /usr/bin is on PATH. Ralph will not " + "run without a sandbox." +) +_LINUX_MISSING = ( + "bwrap (bubblewrap) is required to sandbox the agent on Linux but was not " + "found on PATH. Install it: 'apt install bubblewrap' (Debian/Ubuntu) or " + "'dnf install bubblewrap' (Fedora/RHEL). Ralph will not run without a " + "sandbox." +) + + +def detect_sandbox( + which_fn: _WhichFn = shutil.which, + system_fn: _SystemFn = platform.system, + *, + run_fn: _RunFn = subprocess.run, +) -> SandboxMechanism: + """Resolve the platform's required sandbox mechanism, or raise. + + * macOS (``system_fn() == "Darwin"``): returns + :attr:`SandboxMechanism.SANDBOX_EXEC` when ``sandbox-exec`` is on PATH; + otherwise raises :class:`SandboxDetectionError`. + * Linux (``system_fn() == "Linux"``): returns + :attr:`SandboxMechanism.BWRAP` when the ``bwrap`` functional liveness + probe succeeds; raises if ``bwrap`` is absent or present-but-broken + (e.g. user namespaces disabled). + * Any other platform: raises :class:`SandboxDetectionError`. + + Never returns ``None`` and never falls back to unsandboxed execution. The + raised error carries an actionable, install-hint message and must abort + the run before any agent subprocess starts. + """ + system = system_fn() + + if system == "Darwin": + if which_fn("sandbox-exec") is None: + raise SandboxDetectionError(_MACOS_MISSING) + return SandboxMechanism.SANDBOX_EXEC + + if system == "Linux": + probe = _probe_liveness("bwrap", _BWRAP_PROBE_ARGS, which_fn, run_fn) + if probe.status is LivenessStatus.AVAILABLE: + return SandboxMechanism.BWRAP + if probe.status is LivenessStatus.NOT_FOUND: + raise SandboxDetectionError(_LINUX_MISSING) + detail = ( + f" (probe exit={probe.exit_code})" + if probe.exit_code is not None + else "" + ) + snippet = f": {probe.stderr}" if probe.stderr else "" + raise SandboxDetectionError( + "bwrap (bubblewrap) is installed but not functional on this host" + f"{detail}{snippet}. User namespaces may be disabled. Ralph will " + "not run without a working sandbox." + ) + + raise SandboxDetectionError( + f"No supported sandbox mechanism for platform {system!r}. Ralph " + "requires sandbox-exec on macOS or bwrap (bubblewrap) on Linux and " + "refuses to run unconfined." + ) diff --git a/src/mfbt/commands/ralph/sandbox/exceptions.py b/src/mfbt/commands/ralph/sandbox/exceptions.py new file mode 100644 index 0000000..9941349 --- /dev/null +++ b/src/mfbt/commands/ralph/sandbox/exceptions.py @@ -0,0 +1,122 @@ +"""Sandbox typed exception hierarchy. + +Every sandbox-layer failure is one of these types so the adapters, +``SandboxWrapper``, and ``AgentRunner`` can both catch the common +:class:`SandboxError` base *and* branch on the specific failure mode: + +* :class:`SandboxDetectionError` -- the platform's required sandbox mechanism + (``sandbox-exec`` on macOS, ``bwrap`` on Linux) is unavailable, or the + platform is unsupported. A hard failure that must abort *before* any agent + subprocess starts. +* :class:`SandboxCompatibilityError` -- reserved taxonomy member for "a present + mechanism cannot actually run the agent". Not raised by the current + detection/run path (detection no longer probes ``claude`` inside the + sandbox); kept as a defensively-handled member of the hierarchy. +* :class:`SandboxSetupError` -- a resolved mechanism failed while preparing to + run the agent (e.g. writing the ``sandbox-exec`` SBPL profile). +* :class:`SandboxDeniedError` -- the sandbox blocked an operation at *runtime* + (an ``EPERM``/``EACCES`` at process launch, or a wrapper exit code that means + "the sandbox denied this"), as opposed to a Claude Code application error. + +Every instance carries an optional *mechanism* tag (``"bwrap"`` / +``"sandbox_exec"`` -- the :class:`SandboxMechanism` ``value``) so a message +can be attributed to its source without the caller having to thread the +mechanism separately. ``str(err)`` is ``"[] "`` +when a mechanism is set and just ``""`` otherwise -- so existing +``SandboxError("plain message")`` raise sites are unchanged. + +Pure Python, no external dependencies, no import-time side effects. +""" + +from __future__ import annotations + +__all__ = [ + "SandboxCompatibilityError", + "SandboxDeniedError", + "SandboxDetectionError", + "SandboxError", + "SandboxSetupError", +] + + +class SandboxError(RuntimeError): + """Base class for every sandbox-layer failure. + + Subclasses ``RuntimeError`` so existing broad ``except RuntimeError`` + handlers keep working, while callers that care specifically about the + sandbox layer can catch this base. + + *message* is the human-readable description. *mechanism* is the optional + :class:`~mfbt.commands.ralph.sandbox.models.SandboxMechanism` ``value`` + string (e.g. ``"bwrap"``) identifying which sandbox produced the failure; + it is **keyword-only** so every existing positional ``SandboxError("...")`` + call site (and its subclasses) stays valid and unattributed. + """ + + def __init__(self, message: str, *, mechanism: str | None = None) -> None: + super().__init__(message) + self.message = message + self.mechanism = mechanism + + def __str__(self) -> str: + """``"[] "`` when attributed, else ``""``. + + Keeping the bare-message form when ``mechanism`` is ``None`` means + callers that never set a mechanism see byte-for-byte the same string + as before this type carried attribution. + """ + if self.mechanism: + return f"[{self.mechanism}] {self.message}" + return self.message + + +class SandboxDetectionError(SandboxError): + """The platform's required sandbox mechanism is unavailable. + + Raised by :func:`mfbt.commands.ralph.sandbox.detect.detect_sandbox` when + ``sandbox-exec`` is missing on macOS, ``bwrap`` is absent or + non-functional on Linux, or the platform is neither. This is a *hard* + failure: there is no unsandboxed fallback, so the run must abort before + any agent subprocess starts. + """ + + +class SandboxCompatibilityError(SandboxError): + """A selected sandbox mechanism cannot actually execute the agent. + + Reserved taxonomy member. Detection deliberately does **not** run + ``claude`` inside each candidate, so no current code path raises this; a + present-but-incompatible mechanism instead surfaces as a + :class:`SandboxDeniedError` at run time. Kept (and still defensively + handled by ``AgentRunner``) so the failure taxonomy stays complete and a + future in-sandbox compatibility check has a typed home. + """ + + +class SandboxSetupError(SandboxError): + """A resolved sandbox mechanism failed while preparing to run the agent. + + Raised from the post-detection setup phase -- adapter prefix construction + (``wrap_command``; e.g. writing the ``sandbox-exec`` SBPL profile to a + temp file) -- as opposed to detection (:class:`SandboxDetectionError`) or + the compatibility probe (:class:`SandboxCompatibilityError`). + ``AgentRunner`` surfaces it as a sandbox-layer failure distinct from a + Claude Code subprocess error. + """ + + +class SandboxDeniedError(SandboxError): + """The sandbox blocked an operation at runtime. + + Raised when the sandbox -- not Claude Code -- is the source of the + failure once the wrapped process is (or is being) launched: an + ``EPERM``/``EACCES`` ``OSError`` while spawning the sandboxed command, or a + wrapper exit code an adapter maps to "the sandbox denied this" (e.g. an + exit from ``bwrap`` itself carrying its ``bwrap:`` failure signature, or a + non-zero ``sandbox-exec`` exit with a ``deny(...)`` line). Kept distinct + from + :class:`SandboxSetupError` so the operator can tell "could not set the + sandbox up" from "the sandbox refused something at run time", and from a + Claude Code application error (which is never wrapped in a + :class:`SandboxError`). + """ diff --git a/src/mfbt/commands/ralph/sandbox/models.py b/src/mfbt/commands/ralph/sandbox/models.py new file mode 100644 index 0000000..c0b2ae7 --- /dev/null +++ b/src/mfbt/commands/ralph/sandbox/models.py @@ -0,0 +1,296 @@ +"""Shared data models for the Ralph sandboxing layer. + +This module is the single source of truth for the sandboxing data contract. +Every later component -- the detector, the adapters, and ``SandboxWrapper`` -- +consumes the types defined here; none of them defines its own enum members or +path lists independently. + +The module is intentionally limited to the Python standard library plus the +sibling :mod:`mfbt.commands.ralph.sandbox.exceptions` module (itself +dependency-free, no import-time side effects, and a leaf of the import graph +so this import introduces no cycle). It is free of import-time side effects +(no tool detection, no subprocess calls, no filesystem access at import). +Path *resolution* and existence filtering still happen at *use* time via +:func:`resolve_paths`, not at construction time; the only construction-time +work is cheap, filesystem-free **structural** validation that fails loud on a +security-policy object that would void confinement (a writable filesystem +root / bare ``$HOME``, or a config whose ``project_dir`` disagrees with its +permissions') -- a type-valid-but-dangerous object must not be silently +accepted. +""" + +from __future__ import annotations + +import enum +import os +import shutil +import tempfile +from collections.abc import Iterable +from dataclasses import dataclass, field +from pathlib import Path + +from mfbt.commands.ralph.sandbox.exceptions import SandboxSetupError + +__all__ = [ + "SandboxConfig", + "SandboxMechanism", + "SandboxPermissions", + "resolve_paths", +] + + +class SandboxMechanism(enum.Enum): + """The two supported sandbox mechanisms. + + Exactly one applies per platform and each is a hard requirement: + ``sandbox-exec`` on macOS, ``bwrap`` (bubblewrap) on Linux. There is no + unsandboxed mode and no cross-platform fallback -- if the platform's + mechanism is unavailable, detection raises and Ralph refuses to run. This + enum is the single source of truth consumed by the detector, the adapter + dispatcher, and the startup status line. + """ + + SANDBOX_EXEC = "sandbox_exec" + BWRAP = "bwrap" + + +def resolve_paths(paths: Iterable[Path]) -> list[Path]: + """Resolve a path list for sandbox argument construction. + + For every entry: ``expanduser`` then ``realpath`` (canonicalize, fully + resolving symlinks), silently dropping paths that do not exist. This is + the use-time resolution step: models store paths as supplied (possibly + ``~``-relative); adapters call this immediately before building their + mechanism-specific arguments. + + Symlink canonicalization is **required for correctness on macOS**: + ``sandbox-exec`` evaluates SBPL ``(subpath ...)`` rules against the + kernel's *resolved* path, so a rule for ``/tmp`` or ``$TMPDIR`` + (``/var/folders/...``) never matches the real write target + (``/private/tmp`` / ``/private/var/folders/...``). Without this, Claude + Code's Bash tool -- which writes its shell snapshot / temp files into + ``$TMPDIR`` -- is silently write-denied while read-only tools still work. + It is also the right behavior for ``bwrap`` bind mounts (realpath of a + non-symlinked Linux ``/tmp`` is unchanged). + """ + resolved: list[Path] = [] + for path in paths: + real = Path(os.path.realpath(os.path.expanduser(str(path)))) + if real.exists(): + resolved.append(real) + return resolved + + +def _is_filesystem_root(path: Path) -> bool: + """True if *path* is a filesystem root (e.g. ``/`` or ``C:\\``). + + Pure and filesystem-free: a root is the unique path that is its own + parent. (Also catches the degenerate single-segment relative ``.``, + which is not a usable writable confinement target anyway.) + """ + return path == path.parent + + +def _assert_confining(project_dir: Path, writable_paths: Iterable[Path]) -> None: + """Fail loud if a writable entry would void the write-confinement. + + Granting write to a filesystem root or to the bare ``$HOME`` directory is + indistinguishable, at the type level, from "confine to nothing": both + adapters faithfully translate it into an allow-(almost)-everything policy + (an SBPL ``(subpath "/")`` / a ``--bind / /``). That is never a legitimate + sandbox permission set and is almost certainly a construction bug, so + reject it at construction time rather than emitting a policy that silently + does not confine. ``project_dir`` is checked too: it is re-allowed + verbatim by the SBPL profile and bound rw by bwrap, so a root/`$HOME` + project dir is equally catastrophic. Filesystem-free (no resolve/expand): + structural check only, matching this module's no-FS-at-construction rule. + """ + home = Path.home() + for path in (project_dir, *writable_paths): + if _is_filesystem_root(path): + raise SandboxSetupError( + f"refusing to build a sandbox that grants write access to a " + f"filesystem root ({str(path)!r}): that voids the project " + f"write-confinement. This is a construction bug — pass a " + f"specific project/cache directory, not a root." + ) + if path == home: + raise SandboxSetupError( + f"refusing to build a sandbox that grants write access to " + f"the entire home directory ({str(path)!r}): grant the " + f"specific subdirectories needed (e.g. ~/.claude, ~/.cache), " + f"not all of $HOME." + ) + + +def _claude_binary_dir() -> Path: + """Best-effort directory containing the ``claude`` binary. + + Resolved from ``PATH`` when available; otherwise falls back to the + conventional Claude Code install location under the user's home. The + fallback keeps the canonical default permission set complete even when the + binary is absent at construction time -- non-existent paths are dropped + later by :func:`resolve_paths`. + """ + found = shutil.which("claude") + if found is not None: + return Path(found).parent + return Path.home() / ".claude" / "local" + + +def _default_writable_paths() -> tuple[Path, ...]: + """Canonical writable path set for zero-argument construction (REQ-092).""" + home = Path.home() + return ( + Path.cwd(), + home / ".claude", + Path("/tmp"), + _claude_binary_dir(), + home / ".npm", + home / ".cache", + ) + + +def _default_readonly_paths() -> tuple[Path, ...]: + """Canonical read-only path set (REQ-093).""" + return (Path("/usr/lib"), Path("/usr/local/lib")) + + +@dataclass(frozen=True) +class SandboxPermissions: + """Platform-agnostic description of Claude Code's required access rights. + + This is the single source of truth every sandbox adapter reads to produce + mechanism-specific configuration (SBPL profiles, ``bwrap`` bind-mounts). + Adapters never define their own path lists. + + Paths are stored as supplied -- possibly ``~``-relative and possibly + non-existent. Call :meth:`resolved_writable_paths` / + :meth:`resolved_readonly_paths` (or :func:`resolve_paths`) at the point of + building sandbox arguments to expand and existence-filter them. + + The path lists are stored as ``tuple`` (and any iterable passed in is + coerced to one in :meth:`__post_init__`) so this *frozen* security-policy + object is genuinely deeply immutable: a holder cannot widen the sandbox's + writable surface by mutating ``perms.writable_paths`` in place. + """ + + project_dir: Path = field(default_factory=Path.cwd) + writable_paths: tuple[Path, ...] = field( + default_factory=_default_writable_paths + ) + readonly_paths: tuple[Path, ...] = field( + default_factory=_default_readonly_paths + ) + allow_network: bool = True + + def __post_init__(self) -> None: + """Coerce the path lists to tuples so immutability is enforced. + + Callers (and tests) routinely pass ``writable_paths=[...]``; without + this a mutable ``list`` would be stored on a ``frozen=True`` object, + contradicting the advertised immutability. ``object.__setattr__`` is + required because the dataclass is frozen. + """ + object.__setattr__(self, "writable_paths", tuple(self.writable_paths)) + object.__setattr__(self, "readonly_paths", tuple(self.readonly_paths)) + # Fail loud on a type-valid but confinement-voiding writable surface + # (filesystem root / bare $HOME, incl. project_dir). Structural and + # filesystem-free, so the no-FS-at-construction contract holds. + _assert_confining(self.project_dir, self.writable_paths) + + @property + def readable_paths(self) -> tuple[Path, ...]: + """Alias for :attr:`readonly_paths` (read-only access path list).""" + return self.readonly_paths + + @classmethod + def for_project(cls, project_dir: Path) -> SandboxPermissions: + """Build the standard permission set for ``project_dir`` (REQ-092/093). + + Respects ``NPM_CONFIG_CACHE`` and ``PIP_CACHE_DIR`` when set, falling + back to ``~/.npm`` and ``~/.cache/pip``. Uses + ``tempfile.gettempdir()`` for the platform temp directory **and** + ``/tmp`` (some tools hardcode it; canonicalized to ``/private/tmp`` + on macOS by :func:`resolve_paths`) and the resolved ``claude`` binary + directory. + + **Accepted trade-off:** the whole of ``~/.claude`` is writable. Claude + Code legitimately writes its own session/config/state there, so a + narrower grant breaks it; the cost is that a write-confined-but-still + ``--dangerously-skip-permissions`` agent can rewrite its own + config/hooks/memory (a persistence surface *outside* the project). + This is a deliberate, documented trade-off, not an oversight — narrow + it to a project-scoped subdir if/when Claude Code supports one. + + Structural validation (filesystem-root / bare ``$HOME``) runs in + ``__post_init__``; per-path existence filtering still happens at use + time via :func:`resolve_paths`. + """ + home = Path.home() + npm_cache = os.environ.get("NPM_CONFIG_CACHE") + pip_cache = os.environ.get("PIP_CACHE_DIR") + writable = ( + project_dir, + home / ".claude", + Path(tempfile.gettempdir()), + Path("/tmp"), + Path(npm_cache) if npm_cache else home / ".npm", + Path(pip_cache) if pip_cache else home / ".cache" / "pip", + _claude_binary_dir(), + ) + return cls( + project_dir=project_dir, + writable_paths=writable, + readonly_paths=_default_readonly_paths(), + allow_network=True, + ) + + def resolved_writable_paths(self) -> list[Path]: + """Writable paths expanded and filtered to those that exist.""" + return resolve_paths(self.writable_paths) + + def resolved_readonly_paths(self) -> list[Path]: + """Read-only paths expanded and filtered to those that exist.""" + return resolve_paths(self.readonly_paths) + + +@dataclass(frozen=True) +class SandboxConfig: + """Runtime context required by ``SandboxWrapper`` and every adapter. + + All three fields are mandatory and carry no defaults: a sandbox invocation + must always be fully specified by the caller. Omitting any field at + construction raises ``TypeError``. + + ``base_url`` is **currently inert**: no adapter consumes it (network is + left fully open by design — see the adapters/SBPL profile), so it does + *not* drive any network policy. It is retained as part of the + runtime-context contract for a future network-scoped sandbox; do not read + intent into its presence. (It must therefore never be the basis of a + confinement decision.) + + ``project_dir`` and ``permissions.project_dir`` must agree. The SBPL + adapter renders ``%%PROJECT_DIR%%`` from ``config.project_dir`` while the + write-allow rules come from ``permissions``; a disagreement means the + profile's project token and the writable rules describe different roots + (a latent confinement-correctness hole the type would otherwise permit). + A mismatch is a construction bug and raises :class:`SandboxSetupError` + rather than being silently accepted; the bound ``permissions`` object is + stored as-is (no silent replacement). + """ + + project_dir: Path + base_url: str + permissions: SandboxPermissions + + def __post_init__(self) -> None: + if self.permissions.project_dir != self.project_dir: + raise SandboxSetupError( + "SandboxConfig.project_dir " + f"({str(self.project_dir)!r}) disagrees with " + "permissions.project_dir " + f"({str(self.permissions.project_dir)!r}); they must name " + "the same directory or the SBPL %%PROJECT_DIR%% token and " + "the write-allow rules confine different roots." + ) diff --git a/src/mfbt/commands/ralph/sandbox/wrapper.py b/src/mfbt/commands/ralph/sandbox/wrapper.py new file mode 100644 index 0000000..00366ef --- /dev/null +++ b/src/mfbt/commands/ralph/sandbox/wrapper.py @@ -0,0 +1,282 @@ +"""``SandboxWrapper`` — applies a sandbox mechanism to the agent command. + +Binds a resolved :class:`SandboxMechanism` to a :class:`SandboxConfig` and +exposes the small surface ``AgentRunner`` needs: + +* :meth:`prepare` — one-time setup before the subprocess starts (a no-op for + both supported mechanisms; kept as a stable hook); +* :meth:`wrap_command` — prepend the mechanism's command prefix to the agent + argv (canonical name; :meth:`wrap` is a backward-compatible alias); +* :meth:`cleanup` — best-effort removal of temp artifacts (the + ``sandbox-exec`` SBPL profile) after the process exits; +* :meth:`note_stderr_line` / :meth:`get_sandbox_stderr` — capture sandbox + -process stderr distinctly from Claude Code's stderr. + +The bound mechanism is always a real sandbox (``sandbox-exec`` or ``bwrap``): +the constructor rejects anything else and detection hard-fails before a +wrapper is built, so there is no pass-through / unsandboxed path here. + +**Single-process-tree stderr attribution.** The sandbox tool +(``sandbox-exec`` / ``bwrap``) wraps ``claude`` as one process tree that +shares a single stderr file descriptor, so the sandbox layer's diagnostics +cannot be split from Claude's by OS fd. Streams are instead distinguished by +the ``[Sandbox] `` line marker that :func:`format_sandbox_stderr` applies. +Physically separating the two would require spawning the sandbox tool and +Claude as distinct processes and is intentionally not done. +""" + +from __future__ import annotations + +import logging +import sys +from pathlib import Path + +from mfbt.commands.ralph.sandbox.adapters import ( + AdapterFn, + get_adapter, + get_classifier, +) +from mfbt.commands.ralph.sandbox.exceptions import SandboxError +from mfbt.commands.ralph.sandbox.models import SandboxConfig, SandboxMechanism + +__all__ = [ + "SandboxWrapper", + "format_sandbox_debug", + "format_sandbox_stderr", +] + +logger = logging.getLogger("mfbt.commands.ralph.sandbox.wrapper") + +_SANDBOX_STDERR_PREFIX = "[Sandbox] " +_SANDBOX_DEBUG_PREFIX = "[sandbox-debug] " + +# MCLI-151: debug mode is OFF until a flag/env-var name is decided. The whole +# debug code path below is reachable (unit tests monkeypatch this to True); +# only the *activation wiring* is deliberately deferred. +# TODO: decision needed — wire to -- and +_debug_enabled = False + + +def format_sandbox_debug(raw: str) -> str: + """Prefix every non-empty line of *raw* with ``[sandbox-debug] ``. + + Mirrors :func:`format_sandbox_stderr` (blank lines untouched, trailing + newline and empty input preserved) but uses the debug marker so + debug-mode diagnostics are visually distinct from the ``[Sandbox] `` + attribution of real sandbox-layer stderr. Pure and side-effect-free. + """ + if not raw: + return raw + return "\n".join( + f"{_SANDBOX_DEBUG_PREFIX}{line}" if line.strip() else line + for line in raw.split("\n") + ) + + +def format_sandbox_stderr(raw: str) -> str: + """Prefix every non-empty line of *raw* with ``[Sandbox] ``. + + Empty lines are preserved verbatim (no prefix) so blank separators in the + sandbox tool's output stay visually blank; a trailing newline in *raw* is + preserved and an empty string returns unchanged. + + Pure and side-effect-free. This is the marker convention used to + distinguish sandbox-layer stderr from Claude Code's stderr when both + share a single process-tree file descriptor (see module docstring). + """ + if not raw: + return raw + return "\n".join( + f"{_SANDBOX_STDERR_PREFIX}{line}" if line.strip() else line + for line in raw.split("\n") + ) + + +class SandboxWrapper: + """Translate a :class:`SandboxConfig` into agent-command wrapping.""" + + def __init__( + self, + mechanism: SandboxMechanism | None, + config: SandboxConfig, + ) -> None: + """Bind *mechanism* and *config*; resolve (don't invoke) the adapter. + + Passing an explicit :class:`SandboxMechanism` bypasses all of + ``detect.py`` entirely — construction performs zero subprocess / + filesystem work. ``mechanism=None`` is a standalone-construction + convenience that defers to :func:`detect_sandbox`; this mirrors, and + does not replace, the orchestrator's own up-front detection. + + The adapter ``build_prefix`` callable is *selected and stored* here + (a pure dict lookup, no side effects); it is *invoked* — which may + touch the filesystem or spawn a subprocess — only in + :meth:`wrap_command`. An unrecognized mechanism raises + :class:`SandboxError`. + """ + if mechanism is None: + # Opt-in convenience: resolve like the orchestrator would. Local + # import keeps the sandbox package import side-effect-free and + # lets ``SandboxDetectionError`` propagate untyped-caught. + from mfbt.commands.ralph.sandbox.detect import detect_sandbox + + mechanism = detect_sandbox() + if not isinstance(mechanism, SandboxMechanism): + valid = ", ".join(m.value for m in SandboxMechanism) + msg = ( + f"unrecognized sandbox mechanism {mechanism!r}; " + f"valid values: {valid}" + ) + raise SandboxError(msg) + self._mechanism = mechanism + self._config = config + self._created_paths: list[Path] = [] + # Reference store only — get_adapter is a pure dict lookup; the + # callable is invoked (FS / subprocess) lazily in wrap_command(). + self._build_prefix: AdapterFn = get_adapter(mechanism) + self._sandbox_stderr_parts: list[str] = [] + + @property + def mechanism(self) -> SandboxMechanism: + """The resolved sandbox mechanism this wrapper applies.""" + return self._mechanism + + @property + def config(self) -> SandboxConfig: + """The :class:`SandboxConfig` bound to this wrapper.""" + return self._config + + def prepare(self) -> None: + """Run one-time setup before the agent subprocess starts. + + Neither supported mechanism (``sandbox-exec``, ``bwrap``) needs any + preparation, so this is a no-op. Kept as a stable lifecycle hook that + ``AgentRunner`` always calls. + """ + + def wrap_command(self, cmd: list[str]) -> list[str]: + """Return a new list: *cmd* prefixed with the sandbox invocation. + + Both supported adapters always contribute a non-empty prefix; the + input list is never mutated. This method never injects + ``--dangerously-skip-permissions`` or any other agent flag; that is + the ``AgentRunner`` layer's responsibility. + """ + prefix = self._build_prefix(self._config) + # MCLI-151: in debug mode, surface the *resolved* sandbox command + # prefix on stderr before the subprocess is spawned. Reachable code + # path; gated only by the (currently unwired) ``_debug_enabled``. + if _debug_enabled: + self._debug_write( + f"{self._mechanism.value} command prefix " + f"({len(prefix)} args):\n" + "\n".join(prefix) + "\n" + ) + # The sandbox-exec adapter writes a temp SBPL profile and passes it + # via ``-f ``; track that tmpfile so cleanup() removes it + # once the agent exits. Key off the ``-f`` convention (not the last + # element) so a future extra trailing flag can't make cleanup unlink + # the wrong path. + if self._mechanism is SandboxMechanism.SANDBOX_EXEC: + try: + profile_path = prefix[prefix.index("-f") + 1] + except (ValueError, IndexError): + profile_path = None + if profile_path is not None: + self._created_paths.append(Path(profile_path)) + return [*prefix, *cmd] + + @staticmethod + def _debug_write(text: str) -> None: + """Write *text* to ``sys.stderr``, every non-empty line debug-marked. + + Resolved at call time (``sys.stderr``, not a bound default) so tests + can capture it. Only ever invoked behind a ``_debug_enabled`` guard. + """ + sys.stderr.write(format_sandbox_debug(text)) + sys.stderr.flush() + + def emit_debug_sandbox_stderr(self) -> None: + """Forward captured sandbox stderr (the no-error half of MCLI-151). + + This method handles only the two non-error cases: + + * **debug ON** → write the accumulated sandbox-layer stderr to the + user's stderr, ``[sandbox-debug] ``-prefixed, *after* execution; + * **debug OFF, no error** → silently discard (a no-op). + + Note: the remaining MCLI-151 case -- *on ``SandboxError``, always + surface regardless of debug* -- is deliberately NOT handled here. It + is handled at the raise site: the MCLI-150 adapter classifiers embed + the captured sandbox stderr directly into the ``SandboxError`` + message, which ``AgentRunner`` surfaces (MCLI-152). Call only after + the stderr-reader thread has joined (same invariant as + :meth:`get_sandbox_stderr`). + """ + if not _debug_enabled: + return + captured = self.get_sandbox_stderr() + if not captured: + return + # Already ``[Sandbox] ``-prefixed by note_stderr_line; add the debug + # marker so its origin (debug forward vs. live surfacing) is clear. + self._debug_write(captured) + + def wrap(self, command: list[str]) -> list[str]: + """Backward-compatible alias for :meth:`wrap_command` (MCLI-134). + + ``wrap_command`` is the canonical name; ``wrap`` is retained for + existing duck-typed callers and tests. + """ + return self.wrap_command(command) + + def cleanup(self) -> None: + """Best-effort removal of temp artifacts created by :meth:`wrap_command`.""" + while self._created_paths: + path = self._created_paths.pop() + try: + path.unlink(missing_ok=True) + except OSError: + logger.debug( + "failed to remove sandbox temp file %s", + path, + exc_info=True, + ) + + def note_stderr_line(self, raw_line: str) -> bool: + """Capture *raw_line* as sandbox-layer stderr; always returns ``True``. + + The bound mechanism is always a real sandbox, so every line is + attributed to the sandbox layer: its ``[Sandbox] ``-prefixed form + (via :func:`format_sandbox_stderr`) is accumulated for + :meth:`get_sandbox_stderr`. + + Called only from ``AgentRunner``'s single stderr-reader thread; the + accumulated result is read via :meth:`get_sandbox_stderr` only after + that thread has joined (single-writer invariant — no lock needed). + """ + self._sandbox_stderr_parts.append(format_sandbox_stderr(raw_line)) + return True + + def get_sandbox_stderr(self) -> str: + """Return all captured sandbox stderr, ``[Sandbox] ``-prefixed. + + Empty when no stderr was captured. Read only after the stderr-reader + thread has joined (see :meth:`note_stderr_line`). + """ + return "".join(self._sandbox_stderr_parts) + + def classify_exit(self, returncode: int) -> SandboxError | None: + """Map a wrapped-process *returncode* to a typed sandbox error (MCLI-150). + + Delegates to the bound mechanism's adapter classifier (via + :func:`get_classifier`), passing the accumulated sandbox-layer stderr + so adapters that disambiguate by output (the sandbox tool's own + failure signature vs. Claude's exit) can do so. Returns ``None`` when + the exit is *not* + attributable to the sandbox -- it is Claude Code's own exit and must + stay separately handled by ``AgentRunner``. Call only after the + stderr-reader thread has joined (same single-writer invariant as + :meth:`get_sandbox_stderr`). + """ + classifier = get_classifier(self._mechanism) + return classifier(returncode, self.get_sandbox_stderr()) diff --git a/src/mfbt/commands/ralph/tui_display.py b/src/mfbt/commands/ralph/tui_display.py index 58404a1..5e56ae2 100644 --- a/src/mfbt/commands/ralph/tui_display.py +++ b/src/mfbt/commands/ralph/tui_display.py @@ -2,11 +2,14 @@ from __future__ import annotations +import logging from pathlib import Path from typing import TYPE_CHECKING, Any from rich.text import Text +logger = logging.getLogger("mfbt.commands.ralph.tui_display") + if TYPE_CHECKING: from mfbt.commands.ralph.types import ( FeatureResult, @@ -30,8 +33,20 @@ def __init__(self, container: Any) -> None: self._feature_num = 0 def _call_from_thread(self, callback: Any, *args: Any) -> None: - """Schedule a callback on the main thread via the container's app.""" - self._container.app.call_from_thread(callback, *args) + """Schedule a callback on the main thread via the container's app. + + After the user quits, the orchestrator worker thread may still be + mid-feature and emit display updates. The Textual app context is + gone by then, so accessing ``container.app`` / ``call_from_thread`` + raises NoActiveAppError (a RuntimeError subclass), and + ``call_from_thread`` also raises a plain RuntimeError once the event + loop has stopped. Drop the update instead of crashing the worker + thread with a spurious traceback. + """ + try: + self._container.app.call_from_thread(callback, *args) + except RuntimeError: + return # -- Status summary (no-op in TUI — TUI auto-continues) ---------------- @@ -368,3 +383,28 @@ def _update() -> None: pass self._call_from_thread(_update) + + # -- Sandbox status (MCLI-145) ------------------------------------------- + + def sandbox_status(self, mechanism_value: str) -> None: + """Show the active-sandbox status line in the agent-output log.""" + + def _update() -> None: + from mfbt.commands.ralph.ralph_widgets import AgentOutputLog + + try: + log = self._container.query_one("#agent-output", AgentOutputLog) + log.write( + Text.from_markup( + f"[cyan]\U0001f512 Sandbox: {mechanism_value}[/cyan]" + ) + ) + except Exception: + # The active-sandbox indicator is the user's only in-TUI + # confirmation they are protected; if it can't render, do not + # swallow it fully silently — leave a debug breadcrumb. + logger.debug( + "could not render sandbox status line", exc_info=True + ) + + self._call_from_thread(_update) diff --git a/src/mfbt/commands/ralph/types.py b/src/mfbt/commands/ralph/types.py index db64cc1..72092b6 100644 --- a/src/mfbt/commands/ralph/types.py +++ b/src/mfbt/commands/ralph/types.py @@ -113,6 +113,11 @@ class AgentResult: elapsed_seconds: float stdout: str stderr: str + # Sandbox-layer stderr, ``[Sandbox] ``-prefixed and attributed by marker + # convention (the sandbox tool and Claude share one process-tree fd, so + # this is a logical, not OS-fd, split). Empty when no sandbox is active. + # The full, lossless stream remains in ``stderr``. + sandbox_stderr: str = "" @dataclass(frozen=True) diff --git a/src/mfbt/tui/app.py b/src/mfbt/tui/app.py index d22ead8..af240a6 100644 --- a/src/mfbt/tui/app.py +++ b/src/mfbt/tui/app.py @@ -106,6 +106,10 @@ def __init__( # panel itself is found via _current_feature_panel() — there is no # separate cached reference to drift out of sync. self._ralph_return: bool = False + # True while the "leave an active Ralph run?" confirmation modal is + # open. The 'q' binding is priority=True so it fires even over the + # modal — this flag prevents stacking modals / double teardown. + self._exit_confirm_open: bool = False def compose(self) -> ComposeResult: yield K9sHeader(id="k9s-header") @@ -160,6 +164,22 @@ def _current_feature_panel(self) -> Any: return child return None + def _ralph_active_panel(self) -> Any: + """Return the feature panel iff Ralph is RUNNING or PAUSED there. + + Used to gate quit / back behind a confirmation modal while a run + is in progress. + """ + from mfbt.commands.ralph.ralph_widgets import RalphStage + + panel = self._current_feature_panel() + if panel is not None and panel.stage in ( + RalphStage.RUNNING, + RalphStage.PAUSED, + ): + return panel + return None + def _show_project_list(self) -> None: """Mount the project list panel.""" from mfbt.tui.screens.project_list import ProjectListPanel @@ -416,6 +436,26 @@ def _handle_describe(self, data: dict[str, Any] | None, resource_type: str) -> N def action_go_back(self) -> None: """Navigate back one level.""" + ralph_panel = self._ralph_active_panel() + if ralph_panel is not None: + # Leaving an active run needs explicit confirmation (this also + # supersedes panel.go_back()'s RUNNING-swallow / PAUSED-return). + if self._exit_confirm_open: + return + from mfbt.tui.screens.confirm_modal import ConfirmExitRalphModal + + self._exit_confirm_open = True + + def _on_confirm(confirmed: bool | None) -> None: + self._exit_confirm_open = False + if confirmed: + ralph_panel.leave_ralph() + + self.push_screen( + ConfirmExitRalphModal(context="back"), callback=_on_confirm + ) + return + panel = self._current_feature_panel() if panel is not None: if panel.go_back(): @@ -810,13 +850,36 @@ def on_resize(self) -> None: else: self.remove_class("narrow") - async def action_quit(self) -> None: + def _stop_for_quit(self) -> None: + """Tear down the panel orchestrator + token monitor before exit.""" panel = self._current_feature_panel() if panel is not None: panel.stop() if self._token_monitor is not None: self._token_monitor.stop() - await super().action_quit() + + async def action_quit(self) -> None: + if self._ralph_active_panel() is None: + # Not in an active Ralph run — original immediate behavior. + self._stop_for_quit() + await super().action_quit() + return + if self._exit_confirm_open: + return # modal already up; ignore the repeat keypress + from mfbt.tui.screens.confirm_modal import ConfirmExitRalphModal + + self._exit_confirm_open = True + + def _on_confirm(confirmed: bool | None) -> None: + self._exit_confirm_open = False + if confirmed: + self._stop_for_quit() + # Textual's App.action_quit is exactly self.exit(). + self.exit() + + self.push_screen( + ConfirmExitRalphModal(context="quit"), callback=_on_confirm + ) def action_show_help(self) -> None: from mfbt.commands.ralph.ralph_widgets import RalphStage diff --git a/src/mfbt/tui/app.tcss b/src/mfbt/tui/app.tcss index 067fe28..2645c70 100644 --- a/src/mfbt/tui/app.tcss +++ b/src/mfbt/tui/app.tcss @@ -385,6 +385,34 @@ AuthRequiredModal { margin: 0 1; } +/* ─── Confirm Exit Ralph Modal ─── */ + +ConfirmExitRalphModal { + align: center middle; +} + +#confirm-exit-container { + width: 55; + max-height: 50%; + background: $surface; + border: round $yellow; + padding: 1 2; +} + +#confirm-exit-title { + height: 1; + text-style: bold; +} + +#confirm-exit-buttons { + height: 3; + align: center middle; +} + +#confirm-exit-buttons Button { + margin: 0 1; +} + /* ─── Narrow terminal (<80 cols) ─── */ .narrow K9sHeader #header-logo { diff --git a/src/mfbt/tui/screens/confirm_modal.py b/src/mfbt/tui/screens/confirm_modal.py new file mode 100644 index 0000000..05fcea5 --- /dev/null +++ b/src/mfbt/tui/screens/confirm_modal.py @@ -0,0 +1,81 @@ +"""Confirmation modal shown before leaving an active Ralph run. + +Pressing ``q`` (quit) or ESC/back while Ralph is RUNNING or PAUSED would +otherwise tear down a long orchestration with a single stray keypress. +This modal gates that: the user must explicitly confirm. +""" + +from __future__ import annotations + +from typing import Any + +from textual.app import ComposeResult +from textual.binding import Binding +from textual.containers import Container, Horizontal +from textual.screen import ModalScreen +from textual.widgets import Button, Label, Static + +_COPY: dict[str, dict[str, str]] = { + "quit": { + "title": "Quit while Ralph is running?", + "body": ( + "Ralph is still working. Quitting will stop the in-progress " + "feature and exit the app." + ), + "confirm_label": "Quit", + }, + "back": { + "title": "Stop Ralph and go back?", + "body": ( + "Ralph is still working. Going back will stop the in-progress " + "feature and return you to the feature list." + ), + "confirm_label": "Stop & Go Back", + }, +} + + +class ConfirmExitRalphModal(ModalScreen[bool]): + """Yes/no gate before quitting or navigating away from a Ralph run. + + Dismisses ``True`` when the user confirms leaving, ``False`` otherwise + (Keep Running button or ESC). + """ + + BINDINGS = [ + Binding("escape", "cancel", "Cancel", show=False), + ] + + def __init__(self, context: str = "quit", **kwargs: Any) -> None: + super().__init__(**kwargs) + self._copy = _COPY.get(context, _COPY["quit"]) + + def compose(self) -> ComposeResult: + with Container(id="confirm-exit-container"): + yield Label( + f"[bold #f1fa8c]{self._copy['title']}[/bold #f1fa8c]", + id="confirm-exit-title", + ) + yield Label("") + yield Static(self._copy["body"]) + yield Label("") + with Horizontal(id="confirm-exit-buttons"): + yield Button( + self._copy["confirm_label"], + id="confirm-exit-confirm", + variant="error", + ) + yield Button( + "Keep Running", + id="confirm-exit-cancel", + variant="success", + ) + + def on_button_pressed(self, event: Button.Pressed) -> None: + if event.button.id == "confirm-exit-confirm": + self.dismiss(True) + elif event.button.id == "confirm-exit-cancel": + self.dismiss(False) + + def action_cancel(self) -> None: + self.dismiss(False) diff --git a/src/mfbt/tui/screens/feature_list.py b/src/mfbt/tui/screens/feature_list.py index 2d502ec..10beabf 100644 --- a/src/mfbt/tui/screens/feature_list.py +++ b/src/mfbt/tui/screens/feature_list.py @@ -402,6 +402,28 @@ def _return_to_browsing(self) -> None: # Reload to reflect features completed during the run. self.run_worker(self._load_features, thread=True) + def leave_ralph(self) -> None: + """Stop the run and return to the browsing list (user-confirmed). + + Public entry point for the app-level "leave Ralph?" confirmation + gate. Works from both RUNNING and PAUSED — ``_return_to_browsing`` + already stops the orchestrator, hides the chrome and reloads. + """ + self._return_to_browsing() + + def _safe_call_from_thread( + self, callback: Any, *args: Any, **kwargs: Any + ) -> None: + """``app.call_from_thread`` that tolerates a torn-down app. + + Used by ``_run_orchestrator``'s failure handlers: if the run raised + *because* the user quit, the app context is already gone and a bare + ``call_from_thread`` would raise NoActiveAppError (a RuntimeError), + re-logging a spurious traceback during shutdown. + """ + with suppress(RuntimeError): + self.app.call_from_thread(callback, *args, **kwargs) + # ---- Ralph: pause / kill / retry ---------------------------------- def stop(self) -> None: @@ -734,6 +756,10 @@ def _on_auth_required() -> bool: def _run_orchestrator(self) -> None: from mfbt.commands.ralph.log_capture import RalphLogger from mfbt.commands.ralph.orchestrator import RalphOrchestrator + from mfbt.commands.ralph.sandbox import ( + SandboxDetectionError, + SandboxError, + ) from mfbt.commands.ralph.tui_display import RalphTUIDisplay try: @@ -745,28 +771,70 @@ def _run_orchestrator(self) -> None: ) self._orchestrator_client = self._build_uncached_client() - self._orchestrator = RalphOrchestrator( - self._ralph_config, - self._orchestrator_client, - display, - logger=ralph_logger, + # Sandbox is a hard requirement: macOS needs sandbox-exec, Linux + # needs bwrap. Detection runs in the orchestrator constructor and + # raises SandboxDetectionError if none is available (or the + # platform is unsupported). The TUI worker cannot run unconfined, + # so fail closed *visibly* — a clear error notification and a + # return to browsing — instead of an opaque crash. + try: + self._orchestrator = RalphOrchestrator( + self._ralph_config, + self._orchestrator_client, + display, + logger=ralph_logger, + ) + except SandboxDetectionError as exc: + # "No usable sandbox, refusing to run" is the system's primary + # protection being unavailable — an error-class condition, not + # a warning. Logged at error with a traceback for parity with + # the CLI/orchestrator path (orchestrator.__init__). + logger.error("Ralph aborted: no usable sandbox (%s)", exc, + exc_info=True) + self._safe_call_from_thread( + self.app.notify, str(exc), severity="error" + ) + self._safe_call_from_thread(self._return_to_browsing) + return + + # Persist the resolved sandbox mechanism in the header and surface + # it in the agent-output log, visible for the entire run. + self.app.call_from_thread( + self._set_header_sandbox, + self._orchestrator.sandbox_mechanism.value, ) + display.sandbox_status(self._orchestrator.sandbox_mechanism.value) + self._orchestrator.run() except (TokenError, AuthenticationRequired): # Token expired mid-run (1hr access tokens, long runs). Surface # the auth modal and tear down instead of crashing the whole TUI # and orphaning the agent subprocess. logger.warning("Authentication required during Ralph run") - self.app.call_from_thread(self.app.show_auth_required_modal) - self.app.call_from_thread(self._return_to_browsing) + self._safe_call_from_thread(self.app.show_auth_required_modal) + self._safe_call_from_thread(self._return_to_browsing) + except SandboxError as exc: + # A sandbox failure surfaced *after* construction (setup/denial): + # the orchestrator re-raises it out of run() instead of continuing + # the feature loop (hard-fail). Fail closed visibly with an + # attributed message, same posture as the construction-time + # SandboxDetectionError branch above — never silently continue. + logger.error("Ralph aborted: sandbox failure (%s)", exc, + exc_info=True) + self._safe_call_from_thread( + self.app.notify, + f"Sandbox failure — Ralph aborted: {exc}", + severity="error", + ) + self._safe_call_from_thread(self._return_to_browsing) except Exception as exc: logger.exception("Ralph orchestration failed") - self.app.call_from_thread( + self._safe_call_from_thread( self.app.notify, f"Ralph run failed: {exc}", severity="error", ) - self.app.call_from_thread(self._return_to_browsing) + self._safe_call_from_thread(self._return_to_browsing) # ---- Hints / header helpers --------------------------------------- @@ -801,6 +869,18 @@ def _set_header_ralph(self, active: bool) -> None: header.ralph_active = active header.ralph_project_name = self.project_name if active else "" + def _set_header_sandbox(self, mechanism_value: str) -> None: + """Persist the active sandbox mechanism in the Ralph header. + + Called via ``call_from_thread`` from the orchestrator worker so the + mechanism (``sandbox_exec`` / ``bwrap``) is visible for the whole + run, not just as a one-shot log line. + """ + self._sandbox_mechanism_value = mechanism_value + with suppress(Exception): + header = self.query_one("#ralph-header", RalphHeader) + header.sandbox = mechanism_value + def update_special_instructions(self, text: str | None) -> None: """Update cached instructions (called from the app's modal callback).""" self._special_instructions = text diff --git a/tests/unit/ralph/sandbox/__init__.py b/tests/unit/ralph/sandbox/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/ralph/sandbox/adapters/__init__.py b/tests/unit/ralph/sandbox/adapters/__init__.py new file mode 100644 index 0000000..33df09d --- /dev/null +++ b/tests/unit/ralph/sandbox/adapters/__init__.py @@ -0,0 +1 @@ +"""Unit tests for the sandbox adapter layer (MMCLI-032).""" diff --git a/tests/unit/ralph/sandbox/adapters/conftest.py b/tests/unit/ralph/sandbox/adapters/conftest.py new file mode 100644 index 0000000..0ab0ea9 --- /dev/null +++ b/tests/unit/ralph/sandbox/adapters/conftest.py @@ -0,0 +1,40 @@ +"""Shared fixtures for sandbox adapter tests.""" + +from __future__ import annotations + +from collections.abc import Callable +from pathlib import Path + +import pytest + +from mfbt.commands.ralph.sandbox.models import SandboxConfig, SandboxPermissions + +MakeConfig = Callable[..., SandboxConfig] + + +@pytest.fixture +def make_config() -> MakeConfig: + """Factory building a SandboxConfig with an explicit writable allow-list. + + ``readonly_paths`` is emptied so tests are not coupled to host + ``/usr/lib`` existence; ``writable`` entries should be real (existing) + directories so they survive ``resolved_writable_paths()`` filtering. + """ + + def _make( + project_dir: Path, + writable: list[Path], + *, + base_url: str = "https://app.mfbt.ai", + ) -> SandboxConfig: + perms = SandboxPermissions( + project_dir=project_dir, + writable_paths=list(writable), + readonly_paths=[], + allow_network=True, + ) + return SandboxConfig( + project_dir=project_dir, base_url=base_url, permissions=perms + ) + + return _make diff --git a/tests/unit/ralph/sandbox/adapters/test_bwrap.py b/tests/unit/ralph/sandbox/adapters/test_bwrap.py new file mode 100644 index 0000000..c3f508c --- /dev/null +++ b/tests/unit/ralph/sandbox/adapters/test_bwrap.py @@ -0,0 +1,178 @@ +"""Unit tests for the bwrap adapter (MCLI-127 + MCLI-132).""" + +from __future__ import annotations + +import inspect +from collections.abc import Callable +from pathlib import Path + +from mfbt.commands.ralph.sandbox.adapters import bwrap +from mfbt.commands.ralph.sandbox.exceptions import SandboxDeniedError +from mfbt.commands.ralph.sandbox.models import SandboxConfig + +MakeConfig = Callable[..., SandboxConfig] + + +def _pairs(args: list[str], flag: str) -> list[tuple[str, str]]: + """All ``flag SRC DST`` triples reduced to ``(SRC, DST)`` pairs.""" + out: list[tuple[str, str]] = [] + for i, tok in enumerate(args): + if tok == flag and i + 2 < len(args): + out.append((args[i + 1], args[i + 2])) + return out + + +class TestBuildPrefix: + def test_first_element_is_bwrap( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + assert bwrap.build_prefix(make_config(tmp_path, [tmp_path]))[0] == "bwrap" + + def test_writable_paths_become_bind_pairs( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + d1 = tmp_path / "proj" + d2 = tmp_path / "cache" + d1.mkdir() + d2.mkdir() + args = bwrap.build_prefix(make_config(tmp_path, [d1, d2])) + binds = _pairs(args, "--bind") + assert (str(d1), str(d1)) in binds + assert (str(d2), str(d2)) in binds + + def test_dev_and_proc_present( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + args = bwrap.build_prefix(make_config(tmp_path, [tmp_path])) + # --dev / --proc take a single path argument (not a SRC DST pair). + assert args[args.index("--dev") + 1] == "/dev" + assert args[args.index("--proc") + 1] == "/proc" + + def test_no_unshare_net( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + args = bwrap.build_prefix(make_config(tmp_path, [tmp_path])) + assert "--unshare-net" not in args + + def test_die_with_parent_present( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + # Lifecycle containment: the sandboxed tree must not outlive mfbt. + args = bwrap.build_prefix(make_config(tmp_path, [tmp_path])) + assert "--die-with-parent" in args + assert args[0] == "bwrap" + + def test_chdir_is_realpath_project_dir( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + import os + + proj = tmp_path / "proj" + proj.mkdir() + args = bwrap.build_prefix(make_config(proj, [proj])) + chdir = args[args.index("--chdir") + 1] + assert chdir == os.path.realpath(str(proj)) + + def test_signature_matches_contract(self) -> None: + params = list(inspect.signature(bwrap.build_prefix).parameters) + assert params == ["config"] + + +class TestSystemMounts: + def test_existing_system_paths_are_ro_bound( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + args = bwrap._build_bwrap_args( + make_config(tmp_path, [tmp_path]), + path_exists=lambda p: True, + ) + ro = _pairs(args, "--ro-bind") + for p in ("/usr", "/etc", "/lib", "/lib64", "/bin", "/sbin"): + assert (p, p) in ro + + def test_missing_optional_path_skipped_without_error( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + args = bwrap._build_bwrap_args( + make_config(tmp_path, [tmp_path]), + path_exists=lambda p: p != "/lib64", + ) + ro = _pairs(args, "--ro-bind") + assert ("/lib64", "/lib64") not in ro + assert ("/usr", "/usr") in ro + + def test_all_system_paths_absent_still_valid( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + args = bwrap._build_bwrap_args( + make_config(tmp_path, [tmp_path]), + path_exists=lambda p: False, + ) + assert "--ro-bind" not in args + # Synthetic mounts are unconditional. + assert "--dev" in args + assert "--proc" in args + + def test_system_mounts_precede_writable_binds( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + proj = tmp_path / "proj" + proj.mkdir() + args = bwrap._build_bwrap_args( + make_config(tmp_path, [proj]), + path_exists=lambda p: True, + ) + assert args.index("--ro-bind") < args.index("--bind") + + +class TestClassifyExit: + def test_exit_1_without_bwrap_signature_is_claude_exit(self) -> None: + # I5: exit 1 with no "bwrap:" prefix is Claude's own exit, NOT a + # sandbox denial -- classifying it as a denial would mask the real + # Claude error. + assert bwrap.classify_exit(1, "") is None + assert bwrap.classify_exit(1, "claude: some application error\n") is None + + def test_exit_1_uses_sandbox_stderr_when_present(self) -> None: + exc = bwrap.classify_exit(1, "bwrap: Creating new namespace failed\n") + assert isinstance(exc, SandboxDeniedError) + assert exc.mechanism == "bwrap" + assert "Creating new namespace failed" in str(exc) + + def test_any_nonzero_with_bwrap_signature_is_denied(self) -> None: + # The "bwrap:" origin prefix -- not the exit code -- drives it. + exc = bwrap.classify_exit(127, "bwrap: execvp claude: No such file\n") + assert isinstance(exc, SandboxDeniedError) + + def test_zero_exit_is_not_a_failure(self) -> None: + assert bwrap.classify_exit(0, "") is None + assert bwrap.classify_exit(0, "bwrap: noise") is None + + def test_signature_matched_through_wrapper_prefix(self) -> None: + # Production passes SandboxWrapper-prefixed stderr; the matcher must + # strip "[Sandbox] " before the start-of-line bwrap: test. + exc = bwrap.classify_exit( + 1, "[Sandbox] bwrap: Creating new namespace failed\n" + ) + assert isinstance(exc, SandboxDeniedError) + + def test_bwrap_mention_mid_line_is_not_a_denial(self) -> None: + # H1/I3 false-positive resistance: bwrap: must be at the START of a + # line. Claude mentioning "bwrap:" mid-sentence (or in echoed output) + # on a non-zero exit must NOT be reclassified as a denial — post-C1 + # that would hard-abort the whole run. + assert ( + bwrap.classify_exit(1, "see the bwrap: docs for details\n") + is None + ) + assert bwrap.classify_exit(1, "output was bwrap: noise\n") is None + + def test_other_nonzero_is_claude_exit_not_sandbox(self) -> None: + # bwrap propagates the child's exit code; without its own "bwrap:" + # diagnostic these are Claude Code's own exits. + assert bwrap.classify_exit(2, "anything") is None + assert bwrap.classify_exit(137, "killed") is None + + def test_signature_matches_contract(self) -> None: + params = list(inspect.signature(bwrap.classify_exit).parameters) + assert params == ["returncode", "sandbox_stderr"] diff --git a/tests/unit/ralph/sandbox/adapters/test_dispatch.py b/tests/unit/ralph/sandbox/adapters/test_dispatch.py new file mode 100644 index 0000000..fd1b852 --- /dev/null +++ b/tests/unit/ralph/sandbox/adapters/test_dispatch.py @@ -0,0 +1,59 @@ +"""Unit tests for the adapters dispatch helper (MCLI-129).""" + +from __future__ import annotations + +import pytest + +from mfbt.commands.ralph.sandbox.adapters import ( + bwrap, + get_adapter, + get_classifier, + sandbox_exec, +) +from mfbt.commands.ralph.sandbox.exceptions import SandboxError +from mfbt.commands.ralph.sandbox.models import SandboxMechanism + + +class TestGetAdapter: + def test_each_mechanism_maps_to_its_build_prefix(self) -> None: + assert get_adapter(SandboxMechanism.SANDBOX_EXEC) is ( + sandbox_exec.build_prefix + ) + assert get_adapter(SandboxMechanism.BWRAP) is bwrap.build_prefix + + def test_every_mechanism_is_reachable_and_callable(self) -> None: + for mechanism in SandboxMechanism: + assert callable(get_adapter(mechanism)) + + def test_unknown_mechanism_raises(self) -> None: + with pytest.raises(SandboxError, match="no adapter for mechanism"): + get_adapter("docker") # type: ignore[arg-type] + + +class TestGetClassifier: + def test_each_mechanism_maps_to_its_classify_exit(self) -> None: + assert get_classifier(SandboxMechanism.SANDBOX_EXEC) is ( + sandbox_exec.classify_exit + ) + assert get_classifier(SandboxMechanism.BWRAP) is bwrap.classify_exit + + def test_every_mechanism_is_reachable_and_callable(self) -> None: + for mechanism in SandboxMechanism: + assert callable(get_classifier(mechanism)) + + def test_unknown_mechanism_raises(self) -> None: + with pytest.raises( + SandboxError, match="no exit classifier for mechanism" + ): + get_classifier("none") # type: ignore[arg-type] + + +def test_package_exposes_only_public_dispatch_surface() -> None: + from mfbt.commands.ralph.sandbox import adapters + + assert set(adapters.__all__) == { + "AdapterFn", + "ClassifierFn", + "get_adapter", + "get_classifier", + } diff --git a/tests/unit/ralph/sandbox/adapters/test_sandbox_exec.py b/tests/unit/ralph/sandbox/adapters/test_sandbox_exec.py new file mode 100644 index 0000000..e070267 --- /dev/null +++ b/tests/unit/ralph/sandbox/adapters/test_sandbox_exec.py @@ -0,0 +1,357 @@ +"""Unit tests for the sandbox-exec adapter (MCLI-126 + MCLI-131). + +Covers SBPL generation from the packaged ``claude.sb`` template, placeholder +substitution, the ``['sandbox-exec', '-f', ]`` prefix shape, source-tree +safety, and (macOS only) that the rendered profile compiles. +""" + +from __future__ import annotations + +import inspect +import re +import shutil +import subprocess +import sys +import tempfile +from collections.abc import Callable +from importlib.resources import files +from pathlib import Path + +import pytest + +from mfbt.commands.ralph.sandbox.adapters import sandbox_exec +from mfbt.commands.ralph.sandbox.exceptions import ( + SandboxDeniedError, + SandboxSetupError, +) +from mfbt.commands.ralph.sandbox.models import SandboxConfig + +MakeConfig = Callable[..., SandboxConfig] + + +def _policy_lines(profile: str) -> list[str]: + """Non-comment, non-blank SBPL forms in order.""" + return [ + ln.strip() + for ln in profile.splitlines() + if ln.strip() and not ln.strip().startswith(";") + ] + + +class TestRenderProfile: + def test_starts_with_version_then_allow_default( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + cfg = make_config(tmp_path, [tmp_path]) + lines = _policy_lines(sandbox_exec.render_profile(cfg)) + assert lines[0] == "(version 1)" + assert lines[1] == "(allow default)" + + def test_global_write_deny_present( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + cfg = make_config(tmp_path, [tmp_path]) + assert ( + '(deny file-write* (subpath "/"))' + in sandbox_exec.render_profile(cfg) + ) + + def test_one_allow_per_writable_path( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + d1 = tmp_path / "a" + d2 = tmp_path / "b" + d1.mkdir() + d2.mkdir() + missing = tmp_path / "gone" # filtered out: does not exist + cfg = make_config(tmp_path, [d1, d2, missing]) + profile = sandbox_exec.render_profile(cfg) + assert f'(allow file-write* (subpath "{d1}"))' in profile + assert f'(allow file-write* (subpath "{d2}"))' in profile + assert str(missing) not in profile + + def test_project_dir_placeholder_substituted( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + cfg = make_config(tmp_path, [tmp_path]) + profile = sandbox_exec.render_profile(cfg) + assert "%%PROJECT_DIR%%" not in profile + assert "%%WRITABLE_PATHS%%" not in profile + assert f'(allow file-write* (subpath "{tmp_path}"))' in profile + + def test_no_network_rules( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + # MCLI-126: no network-related SBPL *rules* (comments may explain + # why network is intentionally left unrestricted; writable paths may + # coincidentally contain the substring "network"). + cfg = make_config(tmp_path, [tmp_path]) + rule_lines = _policy_lines(sandbox_exec.render_profile(cfg)) + net_rule = re.compile(r"\((?:allow|deny)\s+network") + assert not any(net_rule.search(line) for line in rule_lines) + + def test_loaded_from_packaged_template_not_inline( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + # The documented header only lives in the packaged .sb asset, so its + # presence proves the adapter rendered the file (MCLI-131), not an + # ad-hoc inline string. + cfg = make_config(tmp_path, [tmp_path]) + assert "write-confine" in sandbox_exec.render_profile(cfg) + + def test_quotes_in_path_are_escaped( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + weird = tmp_path / 'we"ird' + weird.mkdir() + profile = sandbox_exec.render_profile(make_config(tmp_path, [weird])) + assert '(subpath "' + str(weird).replace('"', '\\"') + '")' in profile + + def test_project_dir_subpath_is_canonicalized( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + # A symlinked project dir must be emitted as its realpath, or the + # macOS sandbox-exec kernel never matches its own write-allow rule + # (the "bash tool sandbox issue" root cause). + real = tmp_path / "real_proj" + real.mkdir() + link = tmp_path / "link_proj" + link.symlink_to(real) + profile = sandbox_exec.render_profile(make_config(link, [real])) + assert f'(subpath "{real}")' in profile + assert f'(subpath "{link}")' not in profile + + def test_dev_grant_is_curated_not_blanket( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + # Security: the device-node re-grant must be a curated allow-list, not + # a blanket (subpath "/dev") (which would also expose every /dev/fd/N + # alias and let a process side-step the project write-confinement). + profile = sandbox_exec.render_profile(make_config(tmp_path, [tmp_path])) + rule_lines = _policy_lines(profile) + # No standalone blanket /dev allow. + assert '(allow file-write* (subpath "/dev"))' not in rule_lines + # The curated nodes the build tooling actually needs are present. + for needed in ( + '(literal "/dev/null")', + '(literal "/dev/urandom")', + '(literal "/dev/dtracehelper")', + '(literal "/dev/ptmx")', + '(regex #"^/dev/ttys[0-9]+$")', + '(subpath "/dev/fd")', + ): + assert needed in profile, needed + + def test_global_deny_precedes_every_write_allow( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + # SBPL is last-match-wins: the global write-deny is only effective if + # it appears BEFORE every (allow file-write* ...) re-grant in the + # rendered output. A template edit that moved a token above the deny + # would silently produce an allow-all policy that still parses — pin + # the ordering so that regression fails loudly here. + d1 = tmp_path / "w1" + d1.mkdir() + rule_lines = _policy_lines( + sandbox_exec.render_profile(make_config(tmp_path, [d1])) + ) + deny = '(deny file-write* (subpath "/"))' + assert deny in rule_lines + deny_idx = rule_lines.index(deny) + allow_idxs = [ + i + for i, ln in enumerate(rule_lines) + if ln.startswith("(allow file-write*") + ] + assert allow_idxs, "expected at least one write-allow rule" + assert deny_idx < min(allow_idxs), ( + "global write-deny must precede every write-allow re-grant " + f"(deny at {deny_idx}, first allow at {min(allow_idxs)})" + ) + + +class TestBuildPrefix: + def test_returns_three_element_prefix( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + prefix = sandbox_exec.build_prefix(make_config(tmp_path, [tmp_path])) + assert len(prefix) == 3 + assert prefix[0] == "sandbox-exec" + assert prefix[1] == "-f" + assert Path(prefix[2]).exists() + + def test_temp_file_holds_rendered_profile( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + cfg = make_config(tmp_path, [tmp_path]) + prefix = sandbox_exec.build_prefix(cfg) + written = Path(prefix[2]).read_text(encoding="utf-8") + assert written == sandbox_exec.render_profile(cfg) + Path(prefix[2]).unlink() + + def test_does_not_modify_source_tree( + self, tmp_path: Path, make_config: MakeConfig + ) -> None: + template = ( + files("mfbt.commands.ralph.profiles") + .joinpath("claude.sb") + .read_text(encoding="utf-8") + ) + prefix = sandbox_exec.build_prefix(make_config(tmp_path, [tmp_path])) + # Temp file lives in the OS temp dir, not the package source tree. + assert str(Path(prefix[2]).parent).startswith( + str(Path(tempfile.gettempdir())) + ) + # The packaged template still has its placeholders intact. + assert "%%WRITABLE_PATHS%%" in template + Path(prefix[2]).unlink() + + def test_signature_matches_contract(self) -> None: + params = list(inspect.signature(sandbox_exec.build_prefix).parameters) + assert params == ["config"] + + def test_profile_write_failure_is_setup_error( + self, + tmp_path: Path, + make_config: MakeConfig, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + # Writing the SBPL profile is sandbox *setup*: an OSError there → + # SandboxSetupError (mechanism-tagged), not a runtime denial. + def _boom(*_a: object, **_k: object) -> object: + raise OSError("No space left on device") + + monkeypatch.setattr(tempfile, "NamedTemporaryFile", _boom) + with pytest.raises(SandboxSetupError) as exc: + sandbox_exec.build_prefix(make_config(tmp_path, [tmp_path])) + assert exc.value.mechanism == "sandbox_exec" + assert "SBPL profile" in str(exc.value) + + +class TestClassifyExit: + def test_nonzero_without_deny_signature_is_claude_exit(self) -> None: + # I5: a non-zero exit with no sandbox-exec denial signature is + # Claude's own exit, NOT a sandbox denial (masking it behind a wrong + # "sandbox blocked this" message would misdirect the user). + for code in (1, 2, 65, 126): + assert sandbox_exec.classify_exit(code, "") is None + assert ( + sandbox_exec.classify_exit(1, "claude: app error\n") is None + ) + + def test_deny_signature_is_sandbox_denied(self) -> None: + exc = sandbox_exec.classify_exit( + 1, "claude(123) deny(1) file-write* /etc/hosts\n" + ) + assert isinstance(exc, SandboxDeniedError) + assert exc.mechanism == "sandbox_exec" + assert "deny(1)" in str(exc) + + def test_sandbox_exec_startup_failure_is_denied(self) -> None: + exc = sandbox_exec.classify_exit( + 1, "sandbox-exec: failed to parse profile\n" + ) + assert isinstance(exc, SandboxDeniedError) + + def test_zero_exit_is_not_a_failure(self) -> None: + assert sandbox_exec.classify_exit(0, "irrelevant") is None + assert sandbox_exec.classify_exit(0, "deny(1) file-write*") is None + + def test_deny_signature_matched_through_wrapper_prefix(self) -> None: + # Production passes SandboxWrapper-prefixed stderr ("[Sandbox] ..."); + # the line-aware matcher must strip that marker before testing. + exc = sandbox_exec.classify_exit( + 1, "[Sandbox] claude(7) deny(2) file-write* /etc/x\n" + ) + assert isinstance(exc, SandboxDeniedError) + + def test_bare_deny_word_without_id_is_not_a_denial(self) -> None: + # H1/I3 false-positive resistance: Claude echoing the literal text + # "deny(" (e.g. printing source code) lacks the kernel deny() + # token, so a non-zero Claude exit must NOT be misread as a denial + # (which, post-C1, would hard-abort the whole run). + assert ( + sandbox_exec.classify_exit(1, 'print("deny(this)")\n') is None + ) + assert sandbox_exec.classify_exit(1, "would deny() it\n") is None + + def test_sandbox_exec_marker_only_at_line_start(self) -> None: + # The sandbox-exec: startup-failure marker counts only at the start + # of a line, so a mid-sentence mention in Claude output is ignored. + assert ( + sandbox_exec.classify_exit( + 1, "I ran sandbox-exec: and it was fine\n" + ) + is None + ) + + def test_signature_matches_contract(self) -> None: + params = list( + inspect.signature(sandbox_exec.classify_exit).parameters + ) + assert params == ["returncode", "sandbox_stderr"] + + +def test_render_profile_no_comment_breakout_multi_path( + tmp_path: Path, make_config: MakeConfig +) -> None: + """Regression: multi-line %%WRITABLE_PATHS%% must not break out of the + ``;;`` doc-comment and inject bare prose tokens (``unbound variable: + expands``). Platform-independent so it catches the bug everywhere. + """ + d1 = tmp_path / "a" + d2 = tmp_path / "b" + d1.mkdir() + d2.mkdir() + profile = sandbox_exec.render_profile(make_config(tmp_path, [d1, d2])) + assert "%%" not in profile + for line in profile.splitlines(): + stripped = line.strip() + if not stripped or stripped.startswith(";;"): + continue + # Every active (non-comment) line must be a balanced s-expression + # with no trailing bare tokens like the old " expands to one". + assert stripped.startswith("(") and stripped.endswith(")"), ( + f"non-sexpr line leaked into the SBPL policy: {line!r}" + ) + + +def test_render_profile_rejects_duplicated_token( + tmp_path: Path, make_config: MakeConfig, monkeypatch: pytest.MonkeyPatch +) -> None: + """A stray copy of a placeholder (e.g. left in a comment) fails loud.""" + good = sandbox_exec._load_profile_template() + monkeypatch.setattr( + sandbox_exec, + "_load_profile_template", + lambda: good + "\n;; stray %%WRITABLE_PATHS%% in a comment\n", + ) + with pytest.raises(SandboxSetupError, match="exactly once"): + sandbox_exec.render_profile(make_config(tmp_path, [tmp_path])) + + +@pytest.mark.skipif( + sys.platform != "darwin", reason="sandbox-exec is macOS-only" +) +def test_rendered_profile_compiles_on_macos( + tmp_path: Path, make_config: MakeConfig +) -> None: + if shutil.which("sandbox-exec") is None: # pragma: no cover + pytest.skip("sandbox-exec not available") + # ≥2 existing writable paths so write_rules is MULTI-LINE -- the exact + # condition that triggered the comment-breakout regression. + d1 = tmp_path / "a" + d2 = tmp_path / "b" + d1.mkdir() + d2.mkdir() + prefix = sandbox_exec.build_prefix(make_config(tmp_path, [tmp_path, d1, d2])) + # Running /usr/bin/true under the profile exercises the SBPL compiler: + # a malformed profile makes sandbox-exec exit non-zero before running it. + completed = subprocess.run( # noqa: S603 + [*prefix, "/usr/bin/true"], + capture_output=True, + timeout=15, + check=False, + ) + assert completed.returncode == 0, completed.stderr + Path(prefix[2]).unlink(missing_ok=True) diff --git a/tests/unit/ralph/sandbox/test_detect.py b/tests/unit/ralph/sandbox/test_detect.py new file mode 100644 index 0000000..017cae6 --- /dev/null +++ b/tests/unit/ralph/sandbox/test_detect.py @@ -0,0 +1,295 @@ +"""Unit tests for mfbt.commands.ralph.sandbox.detect. + +Every external probe is dependency-injected, so these tests run with no real +binaries or platform coupling. After the two-mechanism cleanup detection is +purely platform-driven and hard-fails (raises ``SandboxDetectionError``) when +the platform's required sandbox is unavailable -- there is no chain, no config +override, no explicit request, and no ``NONE`` fallback. +""" + +from __future__ import annotations + +import importlib +import inspect +import platform +import shutil +import subprocess +import sys +from collections.abc import Callable +from dataclasses import dataclass +from unittest.mock import patch + +import pytest + +from mfbt.commands.ralph.sandbox import detect as detect_mod +from mfbt.commands.ralph.sandbox.detect import ( + LivenessProbeResult, + LivenessStatus, + _probe_liveness, + detect_sandbox, +) +from mfbt.commands.ralph.sandbox.exceptions import SandboxDetectionError +from mfbt.commands.ralph.sandbox.models import SandboxMechanism + +# --------------------------------------------------------------------------- +# Injectable fakes +# --------------------------------------------------------------------------- + + +def which_for(*present: str) -> Callable[[str], str | None]: + """Build a fake ``which`` that resolves only the named binaries.""" + found = set(present) + + def _which(name: str) -> str | None: + return f"/usr/bin/{name}" if name in found else None + + return _which + + +@dataclass +class FakeCompleted: + returncode: int + stderr: object = b"" + + +def run_returning(code: int, stderr: object = b"") -> Callable[..., FakeCompleted]: + def _run(cmd: list[str], **kwargs: object) -> FakeCompleted: + return FakeCompleted(code, stderr) + + return _run + + +def run_raising(exc: BaseException) -> Callable[..., FakeCompleted]: + def _run(cmd: list[str], **kwargs: object) -> FakeCompleted: + raise exc + + return _run + + +def never_run(cmd: list[str], **kwargs: object) -> FakeCompleted: + raise AssertionError("subprocess must not be invoked in this path") + + +# =========================================================================== +# Foundation -- signature, defaults, no import-time side effects +# =========================================================================== + + +class TestModuleFoundation: + def test_signature_defaults_are_stdlib(self) -> None: + sig = inspect.signature(detect_sandbox) + assert sig.parameters["which_fn"].default is shutil.which + assert sig.parameters["system_fn"].default is platform.system + assert sig.parameters["run_fn"].default is subprocess.run + # The config-override path is gone: no config_loader / explicit params. + assert "config_loader" not in sig.parameters + assert "explicit" not in sig.parameters + assert sig.return_annotation == "SandboxMechanism" + + def test_detect_does_not_import_mfbt_config(self) -> None: + # The detect<->config import cycle is severed: detect.py must not pull + # in mfbt.config at all (the old config-override path is removed). + src = inspect.getsource(detect_mod) + assert "from mfbt.config import" not in src + assert "import mfbt.config" not in src + + def test_import_has_no_side_effects(self) -> None: + # Import a throwaway fresh copy with the stdlib probes patched. We must + # NOT reload the shared module object: reloading rebinds its globals, + # so functions already imported by this test file would resolve their + # module-level names against the new dict and break identity checks. + mod_name = "mfbt.commands.ralph.sandbox.detect" + parent_name, _, leaf = mod_name.rpartition(".") + parent = sys.modules[parent_name] + original = sys.modules[mod_name] + # importlib.import_module() rebinds the parent package's submodule + # attribute to the throwaway copy; restoring only sys.modules[mod_name] + # leaves sys.modules[parent].detect dangling at the fresh module, which + # poisons any later ``from ...detect import detect_sandbox`` (and the + # patches that target it). Save and restore the parent attr too. + original_parent_attr = getattr(parent, leaf) + del sys.modules[mod_name] + try: + with ( + patch("subprocess.run") as m_run, + patch("shutil.which") as m_which, + patch("platform.system") as m_system, + ): + importlib.import_module(mod_name) + m_run.assert_not_called() + m_which.assert_not_called() + m_system.assert_not_called() + finally: + sys.modules[mod_name] = original + setattr(parent, leaf, original_parent_attr) + + +# =========================================================================== +# Platform-driven detection (macOS -> sandbox-exec, Linux -> bwrap) +# =========================================================================== + + +class TestPlatformDetection: + def test_macos_returns_sandbox_exec_without_subprocess(self) -> None: + result = detect_sandbox( + which_for("sandbox-exec"), + lambda: "Darwin", + run_fn=never_run, + ) + assert result is SandboxMechanism.SANDBOX_EXEC + + def test_macos_missing_sandbox_exec_raises(self) -> None: + with pytest.raises( + SandboxDetectionError, match="sandbox-exec is required" + ): + detect_sandbox(which_for(), lambda: "Darwin", run_fn=never_run) + + def test_linux_returns_bwrap_when_probe_ok(self) -> None: + result = detect_sandbox( + which_for("bwrap"), + lambda: "Linux", + run_fn=run_returning(0), + ) + assert result is SandboxMechanism.BWRAP + + def test_linux_missing_bwrap_raises(self) -> None: + with pytest.raises( + SandboxDetectionError, match="bubblewrap.*not.*found" + ): + detect_sandbox(which_for(), lambda: "Linux", run_fn=never_run) + + def test_linux_bwrap_present_but_probe_fails_raises(self) -> None: + with pytest.raises( + SandboxDetectionError, match="installed but not functional" + ): + detect_sandbox( + which_for("bwrap"), + lambda: "Linux", + run_fn=run_returning(1, stderr=b"namespace denied"), + ) + + def test_macos_never_probes_bwrap(self) -> None: + # sandbox-exec missing on macOS must NOT fall through to bwrap/docker. + with pytest.raises(SandboxDetectionError): + detect_sandbox( + which_for("bwrap", "docker"), + lambda: "Darwin", + run_fn=never_run, + ) + + def test_unsupported_platform_raises(self) -> None: + with pytest.raises( + SandboxDetectionError, + match="No supported sandbox mechanism for platform", + ): + detect_sandbox( + which_for("docker"), + lambda: "Windows", + run_fn=never_run, + ) + + def test_no_subprocess_when_bwrap_binary_absent(self) -> None: + # Linux + no bwrap binary: must raise without ever running a probe. + with pytest.raises(SandboxDetectionError): + detect_sandbox(which_for(), lambda: "Linux", run_fn=never_run) + + +# =========================================================================== +# bwrap functional liveness probe (beyond binary presence) +# =========================================================================== + + +class TestFunctionalLivenessProbe: + def test_not_found_when_binary_absent_and_no_subprocess(self) -> None: + result = _probe_liveness("bwrap", ("--version",), which_for(), never_run) + assert result.status is LivenessStatus.NOT_FOUND + assert result.is_available is False + + def test_available_on_exit_zero(self) -> None: + result = _probe_liveness( + "bwrap", ("--ro-bind", "/", "/", "true"), + which_for("bwrap"), run_returning(0), + ) + assert result.status is LivenessStatus.AVAILABLE + assert result.is_available is True + assert result.exit_code == 0 + + def test_probe_failed_distinct_from_not_found(self) -> None: + result = _probe_liveness( + "bwrap", ("--ro-bind", "/", "/", "true"), + which_for("bwrap"), run_returning(1, stderr=b"namespace denied"), + ) + assert result.status is LivenessStatus.PROBE_FAILED + assert result.status is not LivenessStatus.NOT_FOUND + assert result.exit_code == 1 + assert result.stderr == "namespace denied" + + def test_probe_failed_on_timeout(self) -> None: + run = run_raising(subprocess.TimeoutExpired(cmd="bwrap", timeout=5)) + result = _probe_liveness("bwrap", ("x",), which_for("bwrap"), run) + assert result.status is LivenessStatus.PROBE_FAILED + + def test_probe_failed_on_os_error(self) -> None: + result = _probe_liveness( + "bwrap", ("x",), which_for("bwrap"), run_raising(OSError("boom")) + ) + assert result.status is LivenessStatus.PROBE_FAILED + + def test_result_is_frozen(self) -> None: + import dataclasses + + r = LivenessProbeResult(LivenessStatus.AVAILABLE) + with pytest.raises(dataclasses.FrozenInstanceError): + r.status = LivenessStatus.NOT_FOUND # type: ignore[misc] + + +class TestProbeDebugLogging: + """MCLI-151: probe command + stderr surfaced only when debug is on.""" + + def test_default_debug_disabled(self) -> None: + assert detect_mod._debug_enabled is False + + def test_logs_resolved_command_and_stderr_when_debug_on( + self, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], + ) -> None: + monkeypatch.setattr(detect_mod, "_debug_enabled", True) + _probe_liveness( + "bwrap", ("--ro-bind", "/", "/", "true"), + which_for("bwrap"), run_returning(1, stderr=b"namespace denied"), + ) + err = capsys.readouterr().err + assert "[sandbox-debug] probe: bwrap --ro-bind / / true" in err + assert "[sandbox-debug] probe bwrap stderr: namespace denied" in err + + def test_silent_when_debug_off( + self, capsys: pytest.CaptureFixture[str] + ) -> None: + _probe_liveness( + "bwrap", ("--version",), + which_for("bwrap"), run_returning(1, stderr=b"boom"), + ) + assert "[sandbox-debug]" not in capsys.readouterr().err + + def test_logs_when_probe_cannot_run( + self, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], + ) -> None: + monkeypatch.setattr(detect_mod, "_debug_enabled", True) + _probe_liveness( + "bwrap", ("x",), which_for("bwrap"), + run_raising(OSError("cannot exec")), + ) + assert "could not run: cannot exec" in capsys.readouterr().err + + def test_no_subprocess_no_debug_output( + self, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], + ) -> None: + # Binary absent -> returns before any probe; nothing to log. + monkeypatch.setattr(detect_mod, "_debug_enabled", True) + _probe_liveness("bwrap", ("--version",), which_for(), never_run) + assert capsys.readouterr().err == "" diff --git a/tests/unit/ralph/sandbox/test_exceptions.py b/tests/unit/ralph/sandbox/test_exceptions.py new file mode 100644 index 0000000..3b48875 --- /dev/null +++ b/tests/unit/ralph/sandbox/test_exceptions.py @@ -0,0 +1,93 @@ +"""Unit tests for mfbt.commands.ralph.sandbox.exceptions. + +Covers the MMCLI-031 detection-phase hierarchy (MCLI-119/123/125) plus the +MCLI-149 additions: mechanism attribution, ``__str__`` behavior, and the +runtime :class:`SandboxDeniedError`. +""" + +from __future__ import annotations + +import pytest + +from mfbt.commands.ralph.sandbox import ( + SandboxCompatibilityError, + SandboxDeniedError, + SandboxDetectionError, + SandboxError, + SandboxSetupError, +) +from mfbt.commands.ralph.sandbox import exceptions as exc_module + +_SUBCLASSES = ( + SandboxDetectionError, + SandboxCompatibilityError, + SandboxSetupError, + SandboxDeniedError, +) + + +class TestExceptionHierarchy: + def test_base_is_runtime_error(self) -> None: + assert issubclass(SandboxError, RuntimeError) + + @pytest.mark.parametrize("cls", _SUBCLASSES) + def test_subclass_of_base_and_runtime_error(self, cls: type) -> None: + assert issubclass(cls, SandboxError) + assert issubclass(cls, RuntimeError) + + def test_distinct_types(self) -> None: + # No subclass is a subclass of any sibling. + for i, a in enumerate(_SUBCLASSES): + for j, b in enumerate(_SUBCLASSES): + if i != j: + assert not issubclass(a, b) + + @pytest.mark.parametrize("cls", _SUBCLASSES) + def test_catchable_via_common_base(self, cls: type) -> None: + with pytest.raises(SandboxError): + raise cls("boom") + + +class TestMechanismAttribution: + def test_mechanism_defaults_to_none(self) -> None: + err = SandboxError("plain") + assert err.mechanism is None + assert err.message == "plain" + + def test_str_without_mechanism_is_bare_message(self) -> None: + # Back-compat: unattributed errors stringify exactly as before. + err = SandboxDetectionError("bwrap not found on PATH") + assert str(err) == "bwrap not found on PATH" + + @pytest.mark.parametrize("cls", _SUBCLASSES) + def test_str_with_mechanism_is_prefixed(self, cls: type) -> None: + err = cls("denied", mechanism="bwrap") + assert err.mechanism == "bwrap" + assert str(err) == "[bwrap] denied" + + def test_mechanism_is_keyword_only(self) -> None: + with pytest.raises(TypeError): + SandboxError("msg", "bwrap") # type: ignore[misc] + + def test_message_round_trips(self) -> None: + err = SandboxDetectionError("bwrap not found on PATH") + assert str(err) == "bwrap not found on PATH" + + +class TestPublicExports: + def test_public_exports(self) -> None: + assert set(exc_module.__all__) == { + "SandboxCompatibilityError", + "SandboxDeniedError", + "SandboxDetectionError", + "SandboxError", + "SandboxSetupError", + } + + def test_importable_from_package(self) -> None: + # AC: ``from mfbt.commands.ralph.sandbox import SandboxDeniedError``. + from mfbt.commands.ralph.sandbox import ( + SandboxDeniedError as PkgDenied, + ) + + assert PkgDenied is SandboxDeniedError diff --git a/tests/unit/ralph/sandbox/test_models.py b/tests/unit/ralph/sandbox/test_models.py new file mode 100644 index 0000000..5c2d08d --- /dev/null +++ b/tests/unit/ralph/sandbox/test_models.py @@ -0,0 +1,280 @@ +"""Unit tests for mfbt.commands.ralph.sandbox.models. + +Covers the MMCLI-030 data-model acceptance criteria: +MCLI-112 (SandboxMechanism), MCLI-113 (SandboxPermissions defaults), +MCLI-114 (SandboxConfig), MCLI-115 (for_project factory + path resolution), +MCLI-117 (SandboxResult). +""" + +from __future__ import annotations + +import dataclasses +from pathlib import Path + +import pytest + +from mfbt.commands.ralph.sandbox import ( + SandboxConfig, + SandboxMechanism, + SandboxPermissions, +) +from mfbt.commands.ralph.sandbox import models as models_module +from mfbt.commands.ralph.sandbox.exceptions import SandboxSetupError +from mfbt.commands.ralph.sandbox.models import resolve_paths + + +class TestSandboxMechanism: + """Enum members, values, import paths after the two-mechanism cleanup.""" + + def test_exactly_two_members(self) -> None: + assert list(SandboxMechanism) == [ + SandboxMechanism.SANDBOX_EXEC, + SandboxMechanism.BWRAP, + ] + + def test_string_values(self) -> None: + assert SandboxMechanism.SANDBOX_EXEC.value == "sandbox_exec" + assert SandboxMechanism.BWRAP.value == "bwrap" + + def test_removed_mechanisms_no_longer_exist(self) -> None: + for gone in ("FIREJAIL", "DOCKER", "NONE"): + assert not hasattr(SandboxMechanism, gone) + + def test_both_import_paths_resolve_to_same_enum(self) -> None: + assert SandboxMechanism is models_module.SandboxMechanism + + +class TestSandboxPermissions: + """MCLI-113: zero-arg construction, defaults, types, immutability.""" + + def test_zero_arg_construction(self) -> None: + perms = SandboxPermissions() + assert isinstance(perms.project_dir, Path) + assert perms.allow_network is True + + def test_all_path_fields_are_path_instances(self) -> None: + perms = SandboxPermissions() + assert all(isinstance(p, Path) for p in perms.writable_paths) + assert all(isinstance(p, Path) for p in perms.readonly_paths) + + def test_six_default_writable_paths(self) -> None: + perms = SandboxPermissions() + assert len(perms.writable_paths) == 6 + names = {p.name for p in perms.writable_paths} + # ~/.claude, ~/.npm, ~/.cache and /tmp by name; cwd + claude bin dir + # are environment-dependent but still present in the list of six. + assert {".claude", ".npm", ".cache"}.issubset(names) + assert Path("/tmp") in perms.writable_paths + + def test_two_default_readonly_paths(self) -> None: + perms = SandboxPermissions() + assert perms.readonly_paths == (Path("/usr/lib"), Path("/usr/local/lib")) + + def test_path_lists_are_immutable_tuples(self) -> None: + # I3: the frozen security-policy object must be deeply immutable -- + # a holder cannot widen the writable surface via in-place mutation. + perms = SandboxPermissions(writable_paths=[Path("/a")]) + assert isinstance(perms.writable_paths, tuple) + assert isinstance(perms.readonly_paths, tuple) + with pytest.raises(AttributeError): + perms.writable_paths.append(Path("/etc")) # type: ignore[attr-defined] + + def test_readable_paths_aliases_readonly_paths(self) -> None: + perms = SandboxPermissions() + assert perms.readable_paths == perms.readonly_paths + + def test_frozen(self) -> None: + perms = SandboxPermissions() + assert dataclasses.is_dataclass(perms) + with pytest.raises(dataclasses.FrozenInstanceError): + perms.allow_network = False # type: ignore[misc] + + def test_independent_default_lists_per_instance(self) -> None: + a = SandboxPermissions() + b = SandboxPermissions() + assert a.writable_paths is not b.writable_paths + + def test_zero_arg_and_for_project_pass_the_confinement_guard( + self, tmp_path: Path + ) -> None: + # Regression: the canonical permission sets must NOT trip the + # root/$HOME guard (cwd, ~/.claude, /tmp, caches are all specific). + SandboxPermissions() + SandboxPermissions.for_project(tmp_path / "proj") + + def test_writable_filesystem_root_rejected(self) -> None: + # I2: a writable filesystem root is indistinguishable from "confine + # to nothing" — both adapters would translate it to allow-all. + with pytest.raises(SandboxSetupError, match="filesystem root"): + SandboxPermissions( + project_dir=Path("/proj"), writable_paths=[Path("/")] + ) + + def test_writable_bare_home_rejected(self) -> None: + with pytest.raises(SandboxSetupError, match="home directory"): + SandboxPermissions( + project_dir=Path("/proj"), + writable_paths=[Path.home()], + ) + + def test_project_dir_filesystem_root_rejected(self) -> None: + # project_dir is re-allowed verbatim by the SBPL profile / bound rw + # by bwrap, so a root project_dir is equally catastrophic. + with pytest.raises(SandboxSetupError, match="filesystem root"): + SandboxPermissions( + project_dir=Path("/"), writable_paths=[Path("/proj")] + ) + + +class TestSandboxPermissionsForProject: + """MCLI-115: for_project factory, env overrides, path resolution.""" + + def test_for_project_default_path_set( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + home = tmp_path / "home" + home.mkdir() + monkeypatch.setenv("HOME", str(home)) + monkeypatch.delenv("NPM_CONFIG_CACHE", raising=False) + monkeypatch.delenv("PIP_CACHE_DIR", raising=False) + project = tmp_path / "project" + + perms = SandboxPermissions.for_project(project) + + assert perms.project_dir == project + assert project in perms.writable_paths + assert home / ".claude" in perms.writable_paths + assert home / ".npm" in perms.writable_paths + assert home / ".cache" / "pip" in perms.writable_paths + assert perms.readonly_paths == ( + Path("/usr/lib"), + Path("/usr/local/lib"), + ) + assert perms.allow_network is True + + def test_for_project_respects_cache_env_overrides( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + npm_cache = tmp_path / "npm-cache" + pip_cache = tmp_path / "pip-cache" + monkeypatch.setenv("NPM_CONFIG_CACHE", str(npm_cache)) + monkeypatch.setenv("PIP_CACHE_DIR", str(pip_cache)) + + perms = SandboxPermissions.for_project(tmp_path / "project") + + assert npm_cache in perms.writable_paths + assert pip_cache in perms.writable_paths + + def test_resolve_paths_expands_tilde_and_drops_missing( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + home = tmp_path / "home" + home.mkdir() + (home / "exists").mkdir() + monkeypatch.setenv("HOME", str(home)) + + resolved = resolve_paths([Path("~/exists"), Path("~/missing")]) + + assert resolved == [home / "exists"] + assert all("~" not in str(p) for p in resolved) + + def test_resolve_paths_canonicalizes_symlinks( + self, tmp_path: Path + ) -> None: + # Root cause of the macOS "bash tool sandbox issue": sandbox-exec + # matches SBPL subpaths against the kernel-resolved path, so a rule + # built from a symlinked path (e.g. /tmp -> /private/tmp, $TMPDIR -> + # /private/var/folders/...) never matches the real write target. + # resolve_paths() must emit the canonical (realpath) form. + real = tmp_path / "real_dir" + real.mkdir() + link = tmp_path / "link_dir" + link.symlink_to(real) + + resolved = resolve_paths([link]) + + assert resolved == [real] + assert resolved[0] == real.resolve() + + def test_resolved_writable_paths_filters_nonexistent( + self, tmp_path: Path + ) -> None: + present = tmp_path / "present" + present.mkdir() + perms = SandboxPermissions( + writable_paths=[present, tmp_path / "absent"], + ) + assert perms.resolved_writable_paths() == [present] + + def test_resolved_readonly_paths_filters_nonexistent( + self, tmp_path: Path + ) -> None: + present = tmp_path / "ro" + present.mkdir() + perms = SandboxPermissions( + readonly_paths=[present, tmp_path / "absent"], + ) + assert perms.resolved_readonly_paths() == [present] + + +class TestSandboxConfig: + """MCLI-114: three required fields, no defaults, field order, frozen.""" + + def _perms(self, project_dir: Path = Path("/p")) -> SandboxPermissions: + # project_dir must agree with the SandboxConfig it is paired with + # (enforced by SandboxConfig.__post_init__). + return SandboxPermissions(project_dir=project_dir) + + def test_construction_with_all_three_fields(self) -> None: + perms = self._perms(Path("/work/proj")) + cfg = SandboxConfig( + project_dir=Path("/work/proj"), + base_url="http://localhost:8000", + permissions=perms, + ) + assert cfg.project_dir == Path("/work/proj") + assert cfg.base_url == "http://localhost:8000" + # The bound permissions object is stored as-is (no silent replace). + assert cfg.permissions is perms + + def test_positional_field_order(self) -> None: + perms = self._perms(Path("/p")) + cfg = SandboxConfig(Path("/p"), "http://x", perms) + assert cfg.project_dir == Path("/p") + assert cfg.base_url == "http://x" + assert cfg.permissions is perms + + def test_project_dir_must_agree_with_permissions(self) -> None: + # I2: a config whose project_dir disagrees with its permissions' + # project_dir is a latent confinement-correctness hole (the SBPL + # %%PROJECT_DIR%% token and the write-allow rules would describe + # different roots) and must fail loud at construction. + with pytest.raises(SandboxSetupError, match="disagrees with"): + SandboxConfig( + project_dir=Path("/work/a"), + base_url="http://x", + permissions=SandboxPermissions(project_dir=Path("/work/b")), + ) + + def test_no_field_has_a_default(self) -> None: + fields = dataclasses.fields(SandboxConfig) + names = [f.name for f in fields] + assert names == ["project_dir", "base_url", "permissions"] + for f in fields: + assert f.default is dataclasses.MISSING + assert f.default_factory is dataclasses.MISSING # type: ignore[misc] + + def test_omitting_any_field_raises_type_error(self) -> None: + with pytest.raises(TypeError): + SandboxConfig() # type: ignore[call-arg] + with pytest.raises(TypeError): + SandboxConfig(project_dir=Path("/p")) # type: ignore[call-arg] + with pytest.raises(TypeError): + SandboxConfig( # type: ignore[call-arg] + project_dir=Path("/p"), base_url="http://x" + ) + + def test_frozen(self) -> None: + cfg = SandboxConfig(Path("/p"), "http://x", self._perms(Path("/p"))) + with pytest.raises(dataclasses.FrozenInstanceError): + cfg.base_url = "http://y" # type: ignore[misc] diff --git a/tests/unit/ralph/sandbox/test_wrapper.py b/tests/unit/ralph/sandbox/test_wrapper.py new file mode 100644 index 0000000..5fd664a --- /dev/null +++ b/tests/unit/ralph/sandbox/test_wrapper.py @@ -0,0 +1,373 @@ +"""Unit tests for SandboxWrapper (two-mechanism cleanup).""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import patch + +import pytest + +from mfbt.commands.ralph.sandbox import wrapper as wrapper_mod +from mfbt.commands.ralph.sandbox.adapters import bwrap, sandbox_exec +from mfbt.commands.ralph.sandbox.exceptions import ( + SandboxDeniedError, + SandboxDetectionError, + SandboxError, +) +from mfbt.commands.ralph.sandbox.models import ( + SandboxConfig, + SandboxMechanism, + SandboxPermissions, +) +from mfbt.commands.ralph.sandbox.wrapper import ( + SandboxWrapper, + format_sandbox_debug, + format_sandbox_stderr, +) + + +def _cfg(project_dir: Path) -> SandboxConfig: + perms = SandboxPermissions( + project_dir=project_dir, + writable_paths=[project_dir], + readonly_paths=[], + allow_network=True, + ) + return SandboxConfig( + project_dir=project_dir, + base_url="https://app.mfbt.ai", + permissions=perms, + ) + + +class TestWrap: + def test_bwrap_prefixes_command(self, tmp_path: Path) -> None: + cmd = ["claude", "-p", "x"] + wrapper = SandboxWrapper(SandboxMechanism.BWRAP, _cfg(tmp_path)) + wrapped = wrapper.wrap(cmd) + assert wrapped[0] == "bwrap" + assert wrapped[-3:] == cmd + + def test_mechanism_property(self, tmp_path: Path) -> None: + wrapper = SandboxWrapper(SandboxMechanism.BWRAP, _cfg(tmp_path)) + assert wrapper.mechanism is SandboxMechanism.BWRAP + + +class TestPrepare: + def test_prepare_is_noop_no_args(self, tmp_path: Path) -> None: + # prepare() takes no arguments and does nothing for both mechanisms. + for mech in (SandboxMechanism.BWRAP, SandboxMechanism.SANDBOX_EXEC): + SandboxWrapper(mech, _cfg(tmp_path)).prepare() + + +class TestCleanup: + def test_cleanup_removes_sandbox_exec_temp_profile( + self, tmp_path: Path + ) -> None: + wrapper = SandboxWrapper( + SandboxMechanism.SANDBOX_EXEC, _cfg(tmp_path) + ) + wrapped = wrapper.wrap(["claude", "-p", "x"]) + profile = Path(wrapped[2]) # ['sandbox-exec', '-f', , ...] + assert profile.exists() + wrapper.cleanup() + assert not profile.exists() + + def test_cleanup_is_idempotent(self, tmp_path: Path) -> None: + wrapper = SandboxWrapper(SandboxMechanism.BWRAP, _cfg(tmp_path)) + wrapper.wrap(["claude"]) + wrapper.cleanup() + wrapper.cleanup() # must not raise + + +class TestFormatSandboxStderr: + """MCLI-135: pure ``[Sandbox] `` line-prefixing helper.""" + + def test_non_empty_lines_prefixed(self) -> None: + assert format_sandbox_stderr("a\nb") == "[Sandbox] a\n[Sandbox] b" + + def test_empty_lines_preserved_unprefixed(self) -> None: + assert format_sandbox_stderr("a\n\nb") == "[Sandbox] a\n\n[Sandbox] b" + + def test_whitespace_only_line_treated_as_empty(self) -> None: + assert format_sandbox_stderr(" ") == " " + + def test_trailing_newline_preserved(self) -> None: + assert format_sandbox_stderr("oops\n") == "[Sandbox] oops\n" + + def test_empty_string_unchanged(self) -> None: + assert format_sandbox_stderr("") == "" + + def test_pure_no_side_effects(self) -> None: + raw = "x\ny\n" + first = format_sandbox_stderr(raw) + assert format_sandbox_stderr(raw) == first + assert raw == "x\ny\n" # input untouched + + +class TestInit: + """Constructor stores mechanism/config/adapter; guards.""" + + def test_stores_mechanism_and_config_properties( + self, tmp_path: Path + ) -> None: + cfg = _cfg(tmp_path) + wrapper = SandboxWrapper(SandboxMechanism.BWRAP, cfg) + assert wrapper.mechanism is SandboxMechanism.BWRAP + assert wrapper.config is cfg + + @pytest.mark.parametrize( + ("mechanism", "expected"), + [ + (SandboxMechanism.SANDBOX_EXEC, sandbox_exec.build_prefix), + (SandboxMechanism.BWRAP, bwrap.build_prefix), + ], + ) + def test_stores_correct_adapter_callable( + self, mechanism: SandboxMechanism, expected, tmp_path: Path + ) -> None: + wrapper = SandboxWrapper(mechanism, _cfg(tmp_path)) + assert wrapper._build_prefix is expected + + @pytest.mark.parametrize("bogus", ["bwrap", 42, object()]) + def test_unrecognized_mechanism_raises_sandbox_error( + self, bogus, tmp_path: Path + ) -> None: + with pytest.raises(SandboxError, match="unrecognized sandbox"): + SandboxWrapper(bogus, _cfg(tmp_path)) + + def test_construction_has_zero_side_effects( + self, tmp_path: Path + ) -> None: + with ( + patch("subprocess.run") as m_run, + patch("subprocess.Popen") as m_popen, + patch("shutil.which") as m_which, + ): + SandboxWrapper(SandboxMechanism.BWRAP, _cfg(tmp_path)) + m_run.assert_not_called() + m_popen.assert_not_called() + m_which.assert_not_called() + + +class TestWrapCommand: + """MCLI-134: ``wrap_command`` is the canonical wrapping entry point.""" + + def test_bwrap_prefixes_command(self, tmp_path: Path) -> None: + cmd = ["claude", "-p", "x"] + wrapper = SandboxWrapper(SandboxMechanism.BWRAP, _cfg(tmp_path)) + wrapped = wrapper.wrap_command(cmd) + assert wrapped[0] == "bwrap" + assert wrapped[-3:] == cmd + + def test_input_list_never_mutated(self, tmp_path: Path) -> None: + cmd = ["claude", "-p", "x"] + original = list(cmd) + SandboxWrapper(SandboxMechanism.BWRAP, _cfg(tmp_path)).wrap_command( + cmd + ) + assert cmd == original + + def test_never_injects_skip_permissions(self, tmp_path: Path) -> None: + wrapper = SandboxWrapper(SandboxMechanism.BWRAP, _cfg(tmp_path)) + wrapped = wrapper.wrap_command(["claude", "-p", "x"]) + assert "--dangerously-skip-permissions" not in wrapped + + +class TestWrapAlias: + """MCLI-134: ``wrap`` is a thin backward-compatible alias.""" + + def test_wrap_equals_wrap_command(self, tmp_path: Path) -> None: + cmd = ["claude", "-p", "x"] + via_alias = SandboxWrapper( + SandboxMechanism.BWRAP, _cfg(tmp_path) + ).wrap(cmd) + via_canon = SandboxWrapper( + SandboxMechanism.BWRAP, _cfg(tmp_path) + ).wrap_command(cmd) + assert via_alias == via_canon + + +class TestSandboxStderrCapture: + """MCLI-135/137: capture + ``[Sandbox] `` attribution accessor.""" + + def test_accumulates_formatted(self, tmp_path: Path) -> None: + wrapper = SandboxWrapper(SandboxMechanism.BWRAP, _cfg(tmp_path)) + assert wrapper.note_stderr_line("bind failed\n") is True + assert wrapper.note_stderr_line("retrying\n") is True + assert wrapper.get_sandbox_stderr() == ( + "[Sandbox] bind failed\n[Sandbox] retrying\n" + ) + + def test_order_preserved_across_calls(self, tmp_path: Path) -> None: + wrapper = SandboxWrapper(SandboxMechanism.BWRAP, _cfg(tmp_path)) + for i in range(5): + wrapper.note_stderr_line(f"line {i}\n") + captured = wrapper.get_sandbox_stderr() + assert captured.index("line 0") < captured.index("line 4") + + def test_blank_line_passed_through(self, tmp_path: Path) -> None: + wrapper = SandboxWrapper(SandboxMechanism.BWRAP, _cfg(tmp_path)) + wrapper.note_stderr_line("\n") + assert wrapper.get_sandbox_stderr() == "\n" + + +class TestDetectFallback: + """MCLI-136: ``mechanism=None`` defers to detect; explicit bypasses it.""" + + def test_none_resolves_via_detect_sandbox(self, tmp_path: Path) -> None: + with patch( + "mfbt.commands.ralph.sandbox.detect.detect_sandbox", + return_value=SandboxMechanism.BWRAP, + ) as m_detect: + wrapper = SandboxWrapper(None, _cfg(tmp_path)) + m_detect.assert_called_once_with() + assert wrapper.mechanism is SandboxMechanism.BWRAP + + def test_explicit_mechanism_does_not_call_detect( + self, tmp_path: Path + ) -> None: + with patch( + "mfbt.commands.ralph.sandbox.detect.detect_sandbox" + ) as m_detect: + SandboxWrapper(SandboxMechanism.SANDBOX_EXEC, _cfg(tmp_path)) + m_detect.assert_not_called() + + def test_detection_error_propagates(self, tmp_path: Path) -> None: + with ( + patch( + "mfbt.commands.ralph.sandbox.detect.detect_sandbox", + side_effect=SandboxDetectionError("bwrap not on PATH"), + ), + pytest.raises(SandboxDetectionError, match="bwrap not on PATH"), + ): + SandboxWrapper(None, _cfg(tmp_path)) + + +class TestBothAdaptersReachable: + """Every adapter reachable by enum injection, no real tools needed.""" + + @pytest.mark.parametrize( + ("mechanism", "expected"), + [ + (SandboxMechanism.SANDBOX_EXEC, sandbox_exec.build_prefix), + (SandboxMechanism.BWRAP, bwrap.build_prefix), + ], + ) + def test_reachable_without_sandbox_binaries( + self, mechanism: SandboxMechanism, expected, tmp_path: Path + ) -> None: + with patch("shutil.which", return_value=None): + wrapper = SandboxWrapper(mechanism, _cfg(tmp_path)) + assert wrapper.mechanism is mechanism + assert wrapper._build_prefix is expected + + +class TestClassifyExit: + """MCLI-150: wrapper delegates to the bound mechanism's classifier, + passing the accumulated sandbox stderr.""" + + def test_bwrap_failure_signature_classified_as_denied( + self, tmp_path: Path + ) -> None: + wrapper = SandboxWrapper(SandboxMechanism.BWRAP, _cfg(tmp_path)) + wrapper.note_stderr_line("bwrap: Creating new namespace failed\n") + exc = wrapper.classify_exit(1) + assert isinstance(exc, SandboxDeniedError) + assert exc.mechanism == "bwrap" + + def test_bwrap_exit_1_without_signature_is_claude_exit( + self, tmp_path: Path + ) -> None: + wrapper = SandboxWrapper(SandboxMechanism.BWRAP, _cfg(tmp_path)) + assert wrapper.classify_exit(1) is None + + def test_accumulated_sandbox_stderr_is_passed_through( + self, tmp_path: Path + ) -> None: + wrapper = SandboxWrapper(SandboxMechanism.SANDBOX_EXEC, _cfg(tmp_path)) + wrapper.note_stderr_line("claude(1) deny(1) file-write* /etc\n") + exc = wrapper.classify_exit(1) + assert isinstance(exc, SandboxDeniedError) + assert "deny(1) file-write*" in str(exc) + + def test_zero_exit_returns_none(self, tmp_path: Path) -> None: + wrapper = SandboxWrapper(SandboxMechanism.BWRAP, _cfg(tmp_path)) + assert wrapper.classify_exit(0) is None + + +class TestFormatSandboxDebug: + def test_non_empty_lines_prefixed(self) -> None: + assert format_sandbox_debug("a\nb") == ( + "[sandbox-debug] a\n[sandbox-debug] b" + ) + + def test_blank_lines_and_trailing_newline_preserved(self) -> None: + assert format_sandbox_debug("x\n\n") == "[sandbox-debug] x\n\n" + + def test_empty_string_unchanged(self) -> None: + assert format_sandbox_debug("") == "" + + +class TestDebugLogging: + """MCLI-151: gated by the (unwired) module ``_debug_enabled`` stand-in.""" + + def test_default_debug_disabled(self) -> None: + assert wrapper_mod._debug_enabled is False + + def test_wrap_command_logs_prefix_when_debug_on( + self, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], + ) -> None: + monkeypatch.setattr(wrapper_mod, "_debug_enabled", True) + w = SandboxWrapper(SandboxMechanism.BWRAP, _cfg(tmp_path)) + w.wrap_command(["claude", "-p", "hi"]) + err = capsys.readouterr().err + assert "[sandbox-debug] bwrap" in err + assert "[sandbox-debug] --bind" in err + + def test_wrap_command_silent_when_debug_off( + self, + tmp_path: Path, + capsys: pytest.CaptureFixture[str], + ) -> None: + w = SandboxWrapper(SandboxMechanism.BWRAP, _cfg(tmp_path)) + w.wrap_command(["claude"]) + assert "[sandbox-debug]" not in capsys.readouterr().err + + def test_emit_debug_stderr_forwards_when_debug_on( + self, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], + ) -> None: + w = SandboxWrapper(SandboxMechanism.BWRAP, _cfg(tmp_path)) + w.note_stderr_line("bwrap: bind failed\n") + monkeypatch.setattr(wrapper_mod, "_debug_enabled", True) + w.emit_debug_sandbox_stderr() + out = capsys.readouterr().err + assert "[sandbox-debug]" in out + assert "bwrap: bind failed" in out + + def test_emit_debug_stderr_discards_when_debug_off( + self, + tmp_path: Path, + capsys: pytest.CaptureFixture[str], + ) -> None: + w = SandboxWrapper(SandboxMechanism.BWRAP, _cfg(tmp_path)) + w.note_stderr_line("bwrap: bind failed\n") + w.emit_debug_sandbox_stderr() # debug OFF -> silent discard + assert capsys.readouterr().err == "" + + def test_emit_debug_stderr_noop_without_captured( + self, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], + ) -> None: + monkeypatch.setattr(wrapper_mod, "_debug_enabled", True) + w = SandboxWrapper(SandboxMechanism.BWRAP, _cfg(tmp_path)) + w.emit_debug_sandbox_stderr() + assert capsys.readouterr().err == "" diff --git a/tests/unit/ralph/test_agent.py b/tests/unit/ralph/test_agent.py index d5d2b91..1c156e5 100644 --- a/tests/unit/ralph/test_agent.py +++ b/tests/unit/ralph/test_agent.py @@ -2,7 +2,10 @@ from __future__ import annotations +import errno import json +import os +from pathlib import Path from unittest.mock import MagicMock, patch import pytest @@ -13,21 +16,99 @@ _extract_display_text, _summarize_tool_use, ) +from mfbt.commands.ralph.sandbox.exceptions import ( + SandboxCompatibilityError, + SandboxDeniedError, + SandboxDetectionError, + SandboxError, + SandboxSetupError, +) +from mfbt.commands.ralph.sandbox.models import ( + SandboxConfig, + SandboxMechanism, + SandboxPermissions, +) +from mfbt.commands.ralph.sandbox.wrapper import SandboxWrapper from mfbt.commands.ralph.types import AgentType +def _sandbox_cfg(project_dir: Path) -> SandboxConfig: + return SandboxConfig( + project_dir=project_dir, + base_url="https://app.mfbt.ai", + permissions=SandboxPermissions( + project_dir=project_dir, + writable_paths=[project_dir], + readonly_paths=[], + allow_network=True, + ), + ) + + +def _stub_wrapper( + mechanism: SandboxMechanism = SandboxMechanism.SANDBOX_EXEC, +) -> MagicMock: + """A duck-typed SandboxWrapper double. + + Defaults to a real sandbox (``SANDBOX_EXEC``), so ``build_command`` always + inserts ``--dangerously-skip-permissions`` at index 1. ``wrap_command`` is + identity (cmd[0] stays ``"claude"``), ``note_stderr_line`` returns + ``False`` (so the stub captures/surfaces nothing), and ``classify_exit`` + returns ``None`` (no sandbox-attributed failure — MCLI-150), keeping the + stderr and exit-code paths intact. + """ + w = MagicMock() + w.mechanism = mechanism + # run() pins the subprocess cwd to the sandbox's own project dir + # (realpath'd); give the stub a concrete, non-root, non-$HOME path so + # that resolution is deterministic. + w.config.project_dir = Path("/tmp/ralph-stub-project") + w.wrap_command.side_effect = lambda c: list(c) + w.note_stderr_line.return_value = False + w.get_sandbox_stderr.return_value = "" + w.classify_exit.return_value = None + return w + + +@pytest.fixture(autouse=True) +def _stub_sandbox_autodetect(): + """Keep ``AgentRunner(...)`` (no wrapper injected) construction hermetic. + + MCLI-138 made ``__init__`` call ``detect_sandbox()`` + build a real + ``SandboxWrapper`` when no wrapper is injected, which would probe real + binaries / read ``~/.mfbt/config.json``. Stub both symbols in the agent + namespace; tests that inject an explicit ``sandbox_wrapper=`` bypass this + path entirely, and the dedicated MCLI-138 tests re-patch explicitly. + Mirrors ``test_orchestrator.py``'s ``_stub_sandbox_detection``. + """ + with ( + patch( + "mfbt.commands.ralph.agent.detect_sandbox", + return_value=SandboxMechanism.SANDBOX_EXEC, + ), + patch( + "mfbt.commands.ralph.agent.SandboxWrapper", + side_effect=lambda mechanism, config: _stub_wrapper(mechanism), + ), + ): + yield + + class TestAgentRunnerBuildCommand: def test_basic_claude_command(self) -> None: + # C1: under a real sandbox (autouse stub = SANDBOX_EXEC) the bypass + # flag IS inserted at index 1 -- the OS sandbox is the safety + # boundary and headless claude (stdin=DEVNULL) cannot answer + # interactive tool approvals without it. runner = AgentRunner(AgentType.CLAUDE) cmd = runner.build_command("do something") - # Check the core flags (append-system-prompt tested separately) - assert cmd[:7] == [ - "claude", - "--dangerously-skip-permissions", + assert cmd[:2] == ["claude", "--dangerously-skip-permissions"] + for token in ( "-p", "do something", "--output-format", "stream-json", "--verbose", - ] + ): + assert token in cmd def test_with_max_turns(self) -> None: runner = AgentRunner(AgentType.CLAUDE, max_turns=10) @@ -46,6 +127,515 @@ def test_includes_append_system_prompt(self) -> None: assert "mark feature as in-progress" in prompt_value +class TestAgentRunnerSandboxInjection: + """MMCLI-032/034: SandboxWrapper injected/auto-detected, applied at run().""" + + def test_sandbox_wrapper_is_stored_when_injected(self) -> None: + sentinel = object() + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=sentinel) + assert runner._sandbox_wrapper is sentinel + + def test_build_command_stays_pure(self) -> None: + # Wrapping happens in run(), not build_command(): the base command + # is identical regardless of any injected (same-mechanism) wrapper. + plain = AgentRunner(AgentType.CLAUDE).build_command("p") + wrapped = AgentRunner( + AgentType.CLAUDE, sandbox_wrapper=_stub_wrapper() + ).build_command("p") + assert plain == wrapped + assert plain[0] == "claude" + + +class TestAgentRunnerSandboxAutodetect: + """MCLI-138: __init__ auto-detects exactly once when no wrapper given.""" + + @patch("mfbt.commands.ralph.agent.SandboxWrapper") + @patch("mfbt.commands.ralph.agent.detect_sandbox") + def test_autodetects_and_builds_wrapper_when_not_injected( + self, mock_detect: MagicMock, mock_wrapper_cls: MagicMock + ) -> None: + mock_detect.return_value = SandboxMechanism.BWRAP + built = object() + mock_wrapper_cls.return_value = built + + runner = AgentRunner(AgentType.CLAUDE) + + mock_detect.assert_called_once_with() + mock_wrapper_cls.assert_called_once() + pos, _ = mock_wrapper_cls.call_args + assert pos[0] is SandboxMechanism.BWRAP + cfg = pos[1] + assert isinstance(cfg, SandboxConfig) + assert cfg.base_url == "https://app.mfbt.ai" + assert runner._sandbox_wrapper is built + + @patch("mfbt.commands.ralph.agent.SandboxWrapper") + @patch("mfbt.commands.ralph.agent.detect_sandbox") + def test_explicit_wrapper_skips_detection( + self, mock_detect: MagicMock, mock_wrapper_cls: MagicMock + ) -> None: + sentinel = object() + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=sentinel) + mock_detect.assert_not_called() + mock_wrapper_cls.assert_not_called() + assert runner._sandbox_wrapper is sentinel + + @patch("mfbt.commands.ralph.agent.SandboxWrapper") + @patch("mfbt.commands.ralph.agent.detect_sandbox") + def test_project_dir_and_base_url_threaded_into_config( + self, mock_detect: MagicMock, mock_wrapper_cls: MagicMock, + tmp_path: Path, + ) -> None: + mock_detect.return_value = SandboxMechanism.BWRAP + AgentRunner( + AgentType.CLAUDE, + project_dir=tmp_path, + base_url="https://example.test", + ) + cfg = mock_wrapper_cls.call_args[0][1] + assert cfg.project_dir == tmp_path + assert cfg.base_url == "https://example.test" + + +class TestAgentRunnerSkipPermissionsGating: + """C1: --dangerously-skip-permissions is always added — a real sandbox is + always active (detection hard-fails otherwise) and is the boundary; + headless claude (stdin=DEVNULL) needs the flag to act.""" + + def test_real_sandbox_gets_flag(self) -> None: + runner = AgentRunner( + AgentType.CLAUDE, + sandbox_wrapper=_stub_wrapper(SandboxMechanism.BWRAP), + ) + cmd = runner.build_command("p") + assert cmd[:2] == ["claude", "--dangerously-skip-permissions"] + + def test_sandbox_exec_gets_flag(self) -> None: + runner = AgentRunner( + AgentType.CLAUDE, + sandbox_wrapper=_stub_wrapper(SandboxMechanism.SANDBOX_EXEC), + ) + cmd = runner.build_command("p") + assert cmd[:2] == ["claude", "--dangerously-skip-permissions"] + + +class TestAgentRunnerSandboxErrorHandling: + """MCLI-142/152: each SandboxError subclass surfaces as a distinct, + attributed ``[ralph] …`` stderr line and is re-raised (non-zero exit via + the orchestrator's failed-feature path).""" + + @pytest.mark.parametrize( + ("exc", "expected"), + [ + (SandboxError("base"), "[ralph] sandbox error: base"), + ( + SandboxSetupError("setup blew up", mechanism="docker"), + "[ralph] sandbox setup failed: [docker] setup blew up", + ), + ( + SandboxDetectionError("detect blew up"), + "[ralph] sandbox error: detect blew up", + ), + ( + SandboxCompatibilityError("incompatible", mechanism="bwrap"), + "[ralph] sandbox compatibility check failed: " + "[bwrap] incompatible", + ), + ( + SandboxDeniedError("nope", mechanism="sandbox_exec"), + "[ralph] sandbox denied access: [sandbox_exec] nope", + ), + ], + ) + @patch("shutil.which", return_value="/usr/local/bin/claude") + @patch("subprocess.Popen") + def test_setup_phase_error_is_typed_and_reraised( + self, mock_popen: MagicMock, _which: MagicMock, + exc: SandboxError, expected: str, + capsys: pytest.CaptureFixture[str], + ) -> None: + wrapper = _stub_wrapper(SandboxMechanism.BWRAP) + wrapper.prepare.side_effect = exc + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=wrapper) + with pytest.raises(type(exc)): + runner.run("p") + out = capsys.readouterr() + assert expected in out.err + assert out.out == "" # stderr only, never stdout + mock_popen.assert_not_called() # subprocess never launched + + def test_compatibility_message_suggests_a_remedy( + self, capsys: pytest.CaptureFixture[str] + ) -> None: + with patch("shutil.which", return_value="/usr/local/bin/claude"): + wrapper = _stub_wrapper(SandboxMechanism.BWRAP) + wrapper.prepare.side_effect = SandboxCompatibilityError("x") + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=wrapper) + with pytest.raises(SandboxCompatibilityError): + runner.run("p") + err = capsys.readouterr().err + assert "the platform sandbox cannot run the agent here" in err + assert "sandbox-exec (macOS) and bwrap (Linux)" in err + + @patch("shutil.which", return_value="/usr/local/bin/claude") + @patch("subprocess.Popen") + def test_subprocess_phase_classified_error_is_typed( + self, mock_popen: MagicMock, _which: MagicMock, + capsys: pytest.CaptureFixture[str], + ) -> None: + # A runtime denial classified post-exit gets the same [ralph] surface. + mock_popen.return_value = _fake_proc(1) + wrapper = _stub_wrapper(SandboxMechanism.BWRAP) + wrapper.classify_exit.return_value = SandboxDeniedError( + "bind mount denied", mechanism="bwrap" + ) + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=wrapper) + with pytest.raises(SandboxDeniedError): + runner.run("p") + out = capsys.readouterr() + assert ( + "[ralph] sandbox denied access: [bwrap] bind mount denied" + in out.err + ) + assert out.out == "" + wrapper.cleanup.assert_called_once_with() + + @patch("shutil.which", return_value="/usr/local/bin/claude") + @patch("subprocess.Popen") + def test_captured_sandbox_stderr_is_appended( + self, mock_popen: MagicMock, _which: MagicMock, + capsys: pytest.CaptureFixture[str], + ) -> None: + # MCLI-151 rule 3: captured sandbox stderr surfaces on any error. + wrapper = _stub_wrapper(SandboxMechanism.BWRAP) + wrapper.get_sandbox_stderr.return_value = "[Sandbox] denied /etc\n" + wrapper.prepare.side_effect = SandboxSetupError("boom") + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=wrapper) + with pytest.raises(SandboxSetupError): + runner.run("p") + assert "[Sandbox] denied /etc" in capsys.readouterr().err + + @patch("shutil.which", return_value="/usr/local/bin/claude") + @patch("subprocess.Popen", side_effect=OSError("spawn failed")) + def test_subprocess_oserror_not_treated_as_sandbox_error( + self, _popen: MagicMock, _which: MagicMock, + capsys: pytest.CaptureFixture[str], + ) -> None: + wrapper = _stub_wrapper(SandboxMechanism.BWRAP) + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=wrapper) + with pytest.raises(OSError, match="spawn failed"): + runner.run("p") + assert "[ralph] sandbox" not in capsys.readouterr().err + + @patch("shutil.which", return_value="/usr/local/bin/claude") + @patch("subprocess.Popen") + def test_claude_nonzero_exit_never_enters_sandbox_branch( + self, mock_popen: MagicMock, _which: MagicMock, + capsys: pytest.CaptureFixture[str], + ) -> None: + # classify_exit -> None: Claude's own failure, returned as AgentResult, + # no [ralph] sandbox line, happy/Claude-error path unchanged. + mock_popen.return_value = _fake_proc(3) + wrapper = _stub_wrapper(SandboxMechanism.BWRAP) + wrapper.classify_exit.return_value = None + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=wrapper) + result = runner.run("p") + assert result.exit_code == 3 + assert "[ralph] sandbox" not in capsys.readouterr().err + + +def _fake_proc(returncode: int) -> MagicMock: + """A Popen double whose streams are empty and that exits *returncode*.""" + proc = MagicMock() + proc.stdout.readline.side_effect = [b""] + proc.stderr.readline.side_effect = [b""] + proc.wait.return_value = returncode + proc.returncode = returncode + return proc + + +class TestAgentRunnerRuntimeClassification: + """MCLI-150: launch-time errno and post-exit classification map sandbox + failures to typed errors, while Claude's own exit stays an AgentResult.""" + + @pytest.mark.parametrize("err", [errno.EPERM, errno.EACCES]) + @patch("shutil.which", return_value="/usr/local/bin/claude") + @patch("subprocess.Popen") + def test_eperm_at_launch_under_sandbox_is_denied( + self, mock_popen: MagicMock, _which: MagicMock, err: int + ) -> None: + mock_popen.side_effect = OSError(err, "Operation not permitted") + wrapper = _stub_wrapper(SandboxMechanism.BWRAP) + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=wrapper) + with pytest.raises(SandboxDeniedError) as exc: + runner.run("p") + assert exc.value.mechanism == "bwrap" + wrapper.cleanup.assert_called_once_with() + + @patch("shutil.which", return_value="/usr/local/bin/claude") + @patch("subprocess.Popen") + def test_non_errno_oserror_still_propagates( + self, mock_popen: MagicMock, _which: MagicMock + ) -> None: + mock_popen.side_effect = OSError("spawn failed") # errno is None + wrapper = _stub_wrapper(SandboxMechanism.BWRAP) + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=wrapper) + with pytest.raises(OSError, match="spawn failed") as exc: + runner.run("p") + assert not isinstance(exc.value, SandboxError) + + @patch("shutil.which", return_value="/usr/local/bin/claude") + @patch("subprocess.Popen") + def test_classified_nonzero_exit_raises_typed_error( + self, mock_popen: MagicMock, _which: MagicMock + ) -> None: + mock_popen.return_value = _fake_proc(1) + wrapper = _stub_wrapper(SandboxMechanism.BWRAP) + wrapper.classify_exit.return_value = SandboxDeniedError( + "bwrap denied a bind mount", mechanism="bwrap" + ) + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=wrapper) + with pytest.raises(SandboxDeniedError): + runner.run("p") + wrapper.classify_exit.assert_called_once_with(1) + wrapper.cleanup.assert_called_once_with() + + @patch("shutil.which", return_value="/usr/local/bin/claude") + @patch("subprocess.Popen") + def test_unclassified_nonzero_exit_returns_agent_result( + self, mock_popen: MagicMock, _which: MagicMock + ) -> None: + # classify_exit -> None means it is Claude's own non-zero exit: + # AgentRunner must return an AgentResult, not raise (happy/Claude + # error path unchanged, never a SandboxError branch). + mock_popen.return_value = _fake_proc(2) + wrapper = _stub_wrapper(SandboxMechanism.BWRAP) + wrapper.classify_exit.return_value = None + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=wrapper) + result = runner.run("p") + assert result.exit_code == 2 + wrapper.classify_exit.assert_called_once_with(2) + + @patch("shutil.which", return_value="/usr/local/bin/claude") + @patch("subprocess.Popen") + def test_zero_exit_never_classifies( + self, mock_popen: MagicMock, _which: MagicMock + ) -> None: + mock_popen.return_value = _fake_proc(0) + wrapper = _stub_wrapper(SandboxMechanism.BWRAP) + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=wrapper) + result = runner.run("p") + assert result.exit_code == 0 + wrapper.classify_exit.assert_not_called() + + +class TestAgentRunnerPopenSignature: + """MCLI-140: frozen Popen kwargs across adapter paths.""" + + @pytest.mark.parametrize( + "mechanism", + [SandboxMechanism.BWRAP, SandboxMechanism.SANDBOX_EXEC], + ) + @patch("shutil.which", return_value="/usr/local/bin/claude") + @patch("subprocess.Popen") + def test_popen_kwargs_frozen( + self, mock_popen: MagicMock, _which: MagicMock, + mechanism: SandboxMechanism, + ) -> None: + import subprocess as _sp + + mock_proc = MagicMock() + mock_proc.stdout.readline.side_effect = [b""] + mock_proc.stderr.readline.side_effect = [b""] + mock_proc.wait.return_value = 0 + mock_proc.returncode = 0 + mock_popen.return_value = mock_proc + + runner = AgentRunner( + AgentType.CLAUDE, + sandbox_wrapper=_stub_wrapper(mechanism), + ) + runner.run("p") + + _, kwargs = mock_popen.call_args + assert kwargs["stdin"] is _sp.DEVNULL + assert kwargs["stdout"] is _sp.PIPE + assert kwargs["stderr"] is _sp.PIPE + assert kwargs.get("shell", False) is not True + # I3: cwd is pinned to the sandbox's realpath'd project dir, never + # left to whatever the parent process CWD happens to be. + assert kwargs["cwd"] == os.path.realpath("/tmp/ralph-stub-project") + + @patch("shutil.which", return_value="/usr/local/bin/claude") + @patch("subprocess.Popen") + def test_popen_cwd_is_sandbox_project_dir( + self, mock_popen: MagicMock, _which: MagicMock, tmp_path: Path + ) -> None: + # The agent must run *inside* the confined/bound project dir (the + # single source of truth = the SandboxConfig the wrapper was built + # from), realpath-resolved exactly as the adapters resolve it. + mock_proc = MagicMock() + mock_proc.stdout.readline.side_effect = [b""] + mock_proc.stderr.readline.side_effect = [b""] + mock_proc.wait.return_value = 0 + mock_proc.returncode = 0 + mock_popen.return_value = mock_proc + + wrapper = _stub_wrapper(SandboxMechanism.BWRAP) + wrapper.config.project_dir = tmp_path + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=wrapper) + runner.run("p") + + _, kwargs = mock_popen.call_args + assert kwargs["cwd"] == os.path.realpath(str(tmp_path)) + + @patch("shutil.which", return_value="/usr/local/bin/claude") + @patch("subprocess.Popen") + def test_run_prepares_wraps_and_cleans_up( + self, mock_popen: MagicMock, mock_which: MagicMock + ) -> None: + mock_proc = MagicMock() + mock_proc.stdout.readline.side_effect = [b""] + mock_proc.stderr.readline.side_effect = [b""] + mock_proc.wait.return_value = 0 + mock_proc.returncode = 0 + mock_popen.return_value = mock_proc + + wrapper = MagicMock() + wrapper.wrap_command.side_effect = lambda c: [ + "bwrap", "--dev", "/dev", *c + ] + wrapper.note_stderr_line.return_value = False + wrapper.get_sandbox_stderr.return_value = "" + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=wrapper) + runner.run("p") + + # prepare() is a no-arg lifecycle hook (no-op for both mechanisms). + wrapper.prepare.assert_called_once_with() + # wrap_command() received the un-prefixed claude command... + assert wrapper.wrap_command.call_args[0][0][0] == "claude" + # ...and Popen was handed the wrapped command. + spawned = mock_popen.call_args[0][0] + assert spawned[0] == "bwrap" + assert "claude" in spawned + wrapper.cleanup.assert_called_once_with() + + @patch("shutil.which", return_value="/usr/local/bin/claude") + @patch("subprocess.Popen", side_effect=OSError("spawn failed")) + def test_run_cleans_up_even_when_spawn_fails( + self, mock_popen: MagicMock, mock_which: MagicMock + ) -> None: + wrapper = MagicMock() + wrapper.wrap_command.side_effect = lambda c: [ + "sandbox-exec", "-f", "/tmp/p.sb", *c + ] + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=wrapper) + with pytest.raises(OSError, match="spawn failed"): + runner.run("p") + wrapper.cleanup.assert_called_once_with() + + @patch("shutil.which", return_value="/usr/local/bin/claude") + @patch("subprocess.Popen") + def test_sandbox_stderr_attributed_and_surfaced_live( + self, mock_popen: MagicMock, mock_which: MagicMock, tmp_path: Path + ) -> None: + """MCLI-135/137: live surface + capture, full stream lossless.""" + mock_proc = MagicMock() + mock_proc.stdout.readline.side_effect = [b""] + mock_proc.stderr.readline.side_effect = [ + b"bwrap: bind failed\n", + b"retry\n", + b"", + ] + mock_proc.wait.return_value = 0 + mock_proc.returncode = 0 + mock_popen.return_value = mock_proc + + wrapper = SandboxWrapper( + SandboxMechanism.BWRAP, _sandbox_cfg(tmp_path) + ) + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=wrapper) + surfaced: list[str] = [] + result = runner.run("p", on_stderr=surfaced.append) + + assert surfaced == [ + "[Sandbox] bwrap: bind failed\n", + "[Sandbox] retry\n", + ] + assert result.sandbox_stderr == ( + "[Sandbox] bwrap: bind failed\n[Sandbox] retry\n" + ) + # Full stream stays lossless and unprefixed. + assert result.stderr == "bwrap: bind failed\nretry\n" + + @patch("shutil.which", return_value="/usr/local/bin/claude") + @patch("subprocess.Popen") + def test_stub_wrapper_passthrough_leaves_stderr_unchanged( + self, mock_popen: MagicMock, mock_which: MagicMock + ) -> None: + """The MagicMock stub (note_stderr_line→False) captures nothing.""" + mock_proc = MagicMock() + mock_proc.stdout.readline.side_effect = [b""] + mock_proc.stderr.readline.side_effect = [b"plain error\n", b""] + mock_proc.wait.return_value = 0 + mock_proc.returncode = 0 + mock_popen.return_value = mock_proc + + runner = AgentRunner(AgentType.CLAUDE) # autouse stub: passthrough + surfaced: list[str] = [] + result = runner.run("p", on_stderr=surfaced.append) + + assert result.stderr == "plain error\n" + assert result.sandbox_stderr == "" + assert surfaced == [] + + @patch("shutil.which", return_value="/usr/local/bin/claude") + @patch("subprocess.Popen") + def test_note_stderr_line_error_does_not_lose_raw( + self, mock_popen: MagicMock, mock_which: MagicMock + ) -> None: + """MCLI-137 data-loss guard: a classify bug never drops raw stderr.""" + mock_proc = MagicMock() + mock_proc.stdout.readline.side_effect = [b""] + mock_proc.stderr.readline.side_effect = [b"boom\n", b""] + mock_proc.wait.return_value = 0 + mock_proc.returncode = 0 + mock_popen.return_value = mock_proc + + wrapper = MagicMock() + wrapper.wrap_command.side_effect = lambda c: [*c] + wrapper.note_stderr_line.side_effect = RuntimeError("classify bug") + wrapper.get_sandbox_stderr.return_value = "" + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=wrapper) + result = runner.run("p") + + assert result.stderr == "boom\n" + + @patch("shutil.which", return_value="/usr/local/bin/claude") + @patch("subprocess.Popen") + def test_burst_and_asymmetric_close_no_loss( + self, mock_popen: MagicMock, mock_which: MagicMock, tmp_path: Path + ) -> None: + """MCLI-137: bursty stderr after stdout closes — no data lost/hung.""" + mock_proc = MagicMock() + mock_proc.stdout.readline.side_effect = [b""] # closes first + burst = [f"e{i}\n".encode() for i in range(50)] + [b""] + mock_proc.stderr.readline.side_effect = burst + mock_proc.wait.return_value = 0 + mock_proc.returncode = 0 + mock_popen.return_value = mock_proc + + wrapper = SandboxWrapper( + SandboxMechanism.BWRAP, _sandbox_cfg(tmp_path) + ) + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=wrapper) + result = runner.run("p") + + for i in range(50): + assert f"e{i}\n" in result.stderr + assert result.sandbox_stderr.count("[Sandbox] ") == 50 + + class TestAgentRunnerValidateBinary: @patch("shutil.which", return_value=None) def test_claude_not_found(self, mock_which: MagicMock) -> None: @@ -272,3 +862,71 @@ def test_terminate_escalates_to_sigkill(self) -> None: runner.terminate() mock_proc.terminate.assert_called_once() mock_proc.kill.assert_called_once() + + +class TestAgentRunnerRealWrapperIntegration: + """End-to-end: a REAL SandboxWrapper flows through AgentRunner.run(). + + The rest of the suite injects a MagicMock wrapper; these exercise the + actual orchestrator→AgentRunner→wrapper→adapter→Popen wiring (only + ``subprocess.Popen`` + ``shutil.which`` are mocked) so a broken + ``wrap_command``/``prepare``/``classify_exit``/``cleanup`` contract or a + bad adapter prefix is actually caught (test-analyzer C1).""" + + @staticmethod + def _cfg(tmp_path: Path) -> SandboxConfig: + return SandboxConfig( + project_dir=tmp_path, + base_url="https://app.mfbt.ai", + permissions=SandboxPermissions( + project_dir=tmp_path, + writable_paths=[tmp_path], + readonly_paths=[], + ), + ) + + @patch("shutil.which", return_value="/usr/local/bin/claude") + @patch("subprocess.Popen") + def test_bwrap_real_wrapper_prefixes_argv_and_adds_flag( + self, mock_popen: MagicMock, _which: MagicMock, tmp_path: Path + ) -> None: + mock_popen.return_value = _fake_proc(0) + wrapper = SandboxWrapper(SandboxMechanism.BWRAP, self._cfg(tmp_path)) + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=wrapper) + + runner.run("p") + + argv = mock_popen.call_args[0][0] + assert argv[0] == "bwrap" + assert "claude" in argv + # C1: a real sandbox gets the bypass flag (headless claude needs it). + assert "--dangerously-skip-permissions" in argv + # The project dir is bound read-write. + assert "--bind" in argv + + @patch("shutil.which", return_value="/usr/local/bin/claude") + @patch("subprocess.Popen") + def test_sandbox_exec_temp_profile_created_then_cleaned( + self, mock_popen: MagicMock, _which: MagicMock, tmp_path: Path + ) -> None: + seen: dict[str, Path] = {} + + def _capture(cmd, **kwargs): # type: ignore[no-untyped-def] + # cmd == ["sandbox-exec", "-f", , "claude", ...] + profile = Path(cmd[2]) + assert profile.exists(), "SBPL profile must exist at spawn time" + seen["profile"] = profile + return _fake_proc(0) + + mock_popen.side_effect = _capture + wrapper = SandboxWrapper( + SandboxMechanism.SANDBOX_EXEC, self._cfg(tmp_path) + ) + runner = AgentRunner(AgentType.CLAUDE, sandbox_wrapper=wrapper) + + runner.run("p") + + argv = mock_popen.call_args[0][0] + assert argv[:2] == ["sandbox-exec", "-f"] + # cleanup() in run()'s finally must have removed the temp profile. + assert not seen["profile"].exists() diff --git a/tests/unit/ralph/test_cli.py b/tests/unit/ralph/test_cli.py index 5633502..399338a 100644 --- a/tests/unit/ralph/test_cli.py +++ b/tests/unit/ralph/test_cli.py @@ -48,6 +48,9 @@ def test_help_text(self) -> None: assert "--mode" in result.output assert "--phase" in result.output assert "--quiet" in result.output + # Removed in the two-mechanism cleanup. + assert "--sandbox" not in result.output + assert "--yes" not in result.output class TestRalphInvalidParams: @@ -137,6 +140,72 @@ def _patch_resolve_and_progress(mock_get_dir, mock_mfbt_dir): return patches +class TestRalphSandboxHardFail: + @patch("mfbt.config.get_mfbt_dir") + def test_unavailable_sandbox_exits_cleanly( + self, mock_get_dir: MagicMock, mock_mfbt_dir: Path + ) -> None: + from mfbt.commands.ralph.sandbox import SandboxDetectionError + + patches = _patch_resolve_and_progress(mock_get_dir, mock_mfbt_dir) + with ( + patches[0], + patches[1], + patches[2], + patch( + "mfbt.commands.ralph.orchestrator.RalphOrchestrator", + side_effect=SandboxDetectionError( + "bwrap (bubblewrap) is required to sandbox the agent on " + "Linux but was not found on PATH." + ), + ), + ): + result = runner.invoke(app, [ + "ralph", "--coding-agent", "claude", + "--project", _PROJECT_ID, + ]) + assert result.exit_code == 1 + assert "bwrap" in result.output + assert "not found on PATH" in result.output + # Clean message, not a raw traceback. + assert "Traceback" not in result.output + + @patch("mfbt.config.get_mfbt_dir") + def test_post_construction_sandbox_error_aborts_run( + self, mock_get_dir: MagicMock, mock_mfbt_dir: Path + ) -> None: + # C1: a sandbox failure raised from run() (setup/denial) must NOT be + # swallowed as a per-feature failure — it propagates out, the CLI + # surfaces an attributed message and exits non-zero (hard-fail). + from mfbt.commands.ralph.sandbox import SandboxSetupError + + patches = _patch_resolve_and_progress(mock_get_dir, mock_mfbt_dir) + with ( + patches[0], + patches[1], + patches[2], + patch( + "mfbt.commands.ralph.orchestrator.RalphOrchestrator" + ) as mock_orch_cls, + ): + mock_orch = MagicMock() + mock_orch.sandbox_mechanism.value = "sandbox_exec" + mock_orch.run.side_effect = SandboxSetupError( + "could not write the sandbox-exec SBPL profile", + mechanism="sandbox_exec", + ) + mock_orch_cls.return_value = mock_orch + result = runner.invoke(app, [ + "ralph", "--coding-agent", "claude", + "--project", _PROJECT_ID, + ]) + mock_orch.run.assert_called_once() + assert result.exit_code == 1 + assert "sandbox failure — Ralph aborted" in result.output + assert "sandbox_exec" in result.output + assert "Traceback" not in result.output + + class TestRalphStatus: @patch("mfbt.config.get_mfbt_dir") def test_status_shows_and_exits( @@ -164,22 +233,13 @@ def test_status_and_quiet_mutually_exclusive( assert result.exit_code == 1 assert "mutually exclusive" in result.output - @patch("mfbt.config.get_mfbt_dir") - def test_status_and_yes_mutually_exclusive( - self, mock_get_dir: MagicMock, mock_mfbt_dir: Path - ) -> None: - mock_get_dir.return_value = mock_mfbt_dir - result = runner.invoke( - app, - ["ralph", "--coding-agent", "claude", "--project", _PROJECT_ID, "--status", "--yes"], - ) - assert result.exit_code == 1 - assert "mutually exclusive" in result.output +class TestRalphNoConfirmation: + """The pre-run confirmation prompt was removed with --yes; the run + proceeds directly after the status summary.""" -class TestRalphYes: @patch("mfbt.config.get_mfbt_dir") - def test_yes_skips_confirmation( + def test_no_confirmation_prompt_before_run( self, mock_get_dir: MagicMock, mock_mfbt_dir: Path ) -> None: patches = _patch_resolve_and_progress(mock_get_dir, mock_mfbt_dir) @@ -197,30 +257,8 @@ def test_yes_skips_confirmation( mock_orch_cls.return_value = mock_orch result = runner.invoke( app, - ["ralph", "--coding-agent", "claude", "--project", _PROJECT_ID, "--yes"], + ["ralph", "--coding-agent", "claude", "--project", _PROJECT_ID], ) mock_confirm.assert_not_called() + mock_orch.run.assert_called_once() assert result.exit_code == 0 - - @patch("mfbt.config.get_mfbt_dir") - def test_default_prompts_for_confirmation( - self, mock_get_dir: MagicMock, mock_mfbt_dir: Path - ) -> None: - patches = _patch_resolve_and_progress(mock_get_dir, mock_mfbt_dir) - with ( - patches[0], - patches[1], - patches[2], - patch("typer.confirm") as mock_confirm, - patch( - "mfbt.commands.ralph.orchestrator.RalphOrchestrator" - ) as mock_orch_cls, - ): - mock_orch = MagicMock() - mock_orch.run.return_value = MagicMock(failed=0) - mock_orch_cls.return_value = mock_orch - result = runner.invoke( - app, - ["ralph", "--coding-agent", "claude", "--project", _PROJECT_ID], - ) - mock_confirm.assert_called_once() diff --git a/tests/unit/ralph/test_display.py b/tests/unit/ralph/test_display.py index eac0306..1cc7d1e 100644 --- a/tests/unit/ralph/test_display.py +++ b/tests/unit/ralph/test_display.py @@ -182,6 +182,21 @@ def test_error_always_shown(self) -> None: assert "something broke" in output +class TestSandboxStatusMethod: + """MCLI-145: sandbox_status startup line.""" + + def test_sandbox_status_to_stdout(self) -> None: + d, out, err = _make_display() + d.sandbox_status("sandbox_exec") + assert "Sandbox: sandbox_exec" in out.getvalue() + assert err.getvalue() == "" + + def test_sandbox_status_suppressed_when_quiet(self) -> None: + d, out, err = _make_display(quiet=True) + d.sandbox_status("bwrap") + assert out.getvalue() == "" + + class TestShowStatus: def _progress(self, **overrides) -> dict: defaults = dict( diff --git a/tests/unit/ralph/test_orchestrator.py b/tests/unit/ralph/test_orchestrator.py index 6615d2f..7061244 100644 --- a/tests/unit/ralph/test_orchestrator.py +++ b/tests/unit/ralph/test_orchestrator.py @@ -6,11 +6,18 @@ from pathlib import Path from unittest.mock import MagicMock, call, patch +import pytest from rich.console import Console from mfbt.commands.ralph.display import RalphDisplay from mfbt.commands.ralph.log_capture import RalphLogger from mfbt.commands.ralph.orchestrator import RalphOrchestrator +from mfbt.commands.ralph.sandbox import ( + SandboxDetectionError, + SandboxMechanism, + SandboxSetupError, +) +from mfbt.commands.ralph.sandbox.wrapper import SandboxWrapper from mfbt.commands.ralph.types import ( AgentResult, AgentType, @@ -94,6 +101,82 @@ def _agent_result(exit_code: int = 0) -> AgentResult: } +@pytest.fixture(autouse=True) +def _stub_sandbox_detection(): + """Keep orchestrator construction hermetic. + + RalphOrchestrator.__init__ calls detect_sandbox(), which would probe real + binaries. Stub it for every test here; the dedicated wiring tests below + re-patch this target explicitly. + """ + with patch( + "mfbt.commands.ralph.orchestrator.detect_sandbox", + return_value=SandboxMechanism.SANDBOX_EXEC, + ): + yield + + +class TestOrchestratorSandboxDetectionWiring: + """Sandbox detect + live SandboxWrapper wiring in __init__.""" + + @patch("mfbt.commands.ralph.orchestrator.detect_sandbox") + def test_detected_mechanism_stored_and_logged( + self, mock_detect: MagicMock, caplog: pytest.LogCaptureFixture + ) -> None: + mock_detect.return_value = SandboxMechanism.BWRAP + with caplog.at_level( + "INFO", logger="mfbt.commands.ralph.orchestrator" + ): + orch = RalphOrchestrator( + _make_config(), MagicMock(), _make_display() + ) + mock_detect.assert_called_once_with() + assert orch._sandbox_mechanism is SandboxMechanism.BWRAP + assert "sandbox mechanism: bwrap" in caplog.text + + @patch("mfbt.commands.ralph.orchestrator.detect_sandbox") + def test_sandbox_wrapper_built_from_detected_mechanism( + self, mock_detect: MagicMock + ) -> None: + mock_detect.return_value = SandboxMechanism.BWRAP + orch = RalphOrchestrator(_make_config(), MagicMock(), _make_display()) + assert isinstance(orch._sandbox_wrapper, SandboxWrapper) + assert orch._sandbox_wrapper.mechanism is SandboxMechanism.BWRAP + + @patch("mfbt.commands.ralph.orchestrator.AgentRunner") + @patch("mfbt.commands.ralph.orchestrator.detect_sandbox") + def test_agent_runner_receives_sandbox_wrapper( + self, mock_detect: MagicMock, mock_agent: MagicMock + ) -> None: + mock_detect.return_value = SandboxMechanism.BWRAP + orch = RalphOrchestrator(_make_config(), MagicMock(), _make_display()) + _, kwargs = mock_agent.call_args + assert kwargs["sandbox_wrapper"] is orch._sandbox_wrapper + assert isinstance(kwargs["sandbox_wrapper"], SandboxWrapper) + + @patch("mfbt.commands.ralph.orchestrator.detect_sandbox") + def test_unavailable_sandbox_aborts_construction( + self, mock_detect: MagicMock + ) -> None: + mock_detect.side_effect = SandboxDetectionError("bwrap not on PATH") + with pytest.raises(SandboxDetectionError, match="bwrap not on PATH"): + RalphOrchestrator(_make_config(), MagicMock(), _make_display()) + + def test_construction_does_not_invoke_real_probes(self) -> None: + # The autouse stub is active: construction must not touch subprocess. + with patch("subprocess.run") as m_run: + RalphOrchestrator(_make_config(), MagicMock(), _make_display()) + m_run.assert_not_called() + + @patch("mfbt.commands.ralph.orchestrator.detect_sandbox") + def test_sandbox_mechanism_accessor( + self, mock_detect: MagicMock + ) -> None: + mock_detect.return_value = SandboxMechanism.BWRAP + orch = RalphOrchestrator(_make_config(), MagicMock(), _make_display()) + assert orch.sandbox_mechanism is SandboxMechanism.BWRAP + + class TestOrchestratorAllSucceed: @patch("mfbt.commands.ralph.orchestrator.AgentRunner") @patch("mfbt.commands.ralph.orchestrator.verify_feature_completed") @@ -204,6 +287,102 @@ def test_all_retries_fail( assert mock_runner.run.call_count == 3 # MAX_RETRY_ATTEMPTS +class TestOrchestratorSandboxFailureHardAborts: + """C1: a sandbox failure from agent.run() (setup/denial) is fatal to the + whole session — it must propagate out of run(), NOT be downgraded to a + per-feature FAILED result with the loop advancing to the next feature + (which would re-run against a broken sandbox and mask a real denial).""" + + @patch("mfbt.commands.ralph.orchestrator.AgentRunner") + @patch("mfbt.commands.ralph.orchestrator.verify_feature_completed") + @patch("mfbt.commands.ralph.orchestrator.resolve_feature_task") + @patch("mfbt.commands.ralph.orchestrator.get_next_from_list") + @patch("mfbt.commands.ralph.orchestrator.build_global_feature_list") + @patch("mfbt.commands.ralph.orchestrator.get_phase_progress") + def test_sandbox_error_propagates_and_stops_the_loop( + self, + mock_progress: MagicMock, + mock_build: MagicMock, + mock_next: MagicMock, + mock_resolve: MagicMock, + mock_verify: MagicMock, + mock_runner_cls: MagicMock, + ) -> None: + mock_progress.return_value = PROGRESS + info1 = _make_feature_info("MCLI-031") + info2 = _make_feature_info("MCLI-032") + mock_build.return_value = [info1, info2] + # Two features queued; if the SandboxError were swallowed the loop + # would advance to info2 and call run() a second time. + mock_next.side_effect = [info1, info2, None] + mock_resolve.return_value = _make_feature("MCLI-031") + + mock_runner = MagicMock() + mock_runner.run.side_effect = SandboxSetupError( + "could not write the SBPL profile", mechanism="sandbox_exec" + ) + mock_runner_cls.return_value = mock_runner + + orch = RalphOrchestrator(_make_config(), MagicMock(), _make_display()) + + with pytest.raises(SandboxSetupError, match="SBPL profile"): + orch.run() + + # Hard-fail: exactly one agent run attempt, no retry, no advance to + # the second feature. + assert mock_runner.run.call_count == 1 + mock_verify.assert_not_called() + + +class TestOrchestratorStopMidFeature: + """stop() mid-run (e.g. user quit) must wind down without a wasted + completion-verify call or a misleading retry display.""" + + @patch("mfbt.commands.ralph.orchestrator.AgentRunner") + @patch("mfbt.commands.ralph.orchestrator.verify_feature_completed") + @patch("mfbt.commands.ralph.orchestrator.resolve_feature_task") + @patch("mfbt.commands.ralph.orchestrator.get_next_from_list") + @patch("mfbt.commands.ralph.orchestrator.build_global_feature_list") + @patch("mfbt.commands.ralph.orchestrator.get_phase_progress") + def test_stop_after_agent_run_skips_verify_and_retry( + self, + mock_progress: MagicMock, + mock_build: MagicMock, + mock_next: MagicMock, + mock_resolve: MagicMock, + mock_verify: MagicMock, + mock_runner_cls: MagicMock, + ) -> None: + mock_progress.return_value = PROGRESS + info1 = _make_feature_info("MCLI-031") + mock_build.return_value = [info1] + mock_next.side_effect = [info1, None] + mock_resolve.return_value = _make_feature("MCLI-031") + + mock_runner = MagicMock() + mock_runner_cls.return_value = mock_runner + + config = _make_config() # no max_turns → retries would be allowed + display = MagicMock() + orch = RalphOrchestrator(config, MagicMock(), display) + + # Simulate the user quitting: stop() fires while the agent runs. + def _run_then_stopped(*_a, **_k): + orch.stop() + return _agent_result(exit_code=0) + + mock_runner.run.side_effect = _run_then_stopped + + summary = orch.run() + + # Broke out immediately after the agent returned: + mock_verify.assert_not_called() + display.feature_retry.assert_not_called() + assert mock_runner.run.call_count == 1 # no retry attempt + assert summary.failed == 1 + assert summary.succeeded == 0 + + class TestOrchestratorMaxTurnsFailFast: @patch("mfbt.commands.ralph.orchestrator.AgentRunner") @patch("mfbt.commands.ralph.orchestrator.verify_feature_completed") @@ -448,7 +627,7 @@ def test_combined_callback_calls_both( captured_callback = None - def capture_run(prompt, on_output=None): + def capture_run(prompt, on_output=None, on_stderr=None): nonlocal captured_callback captured_callback = on_output # Simulate agent writing output @@ -477,6 +656,49 @@ def capture_run(prompt, on_output=None): content = log_files[0].read_text() assert "hello" in content + @patch("mfbt.commands.ralph.orchestrator.AgentRunner") + @patch("mfbt.commands.ralph.orchestrator.verify_feature_completed") + @patch("mfbt.commands.ralph.orchestrator.resolve_feature_task") + @patch("mfbt.commands.ralph.orchestrator.get_next_from_list") + @patch("mfbt.commands.ralph.orchestrator.build_global_feature_list") + @patch("mfbt.commands.ralph.orchestrator.get_phase_progress") + def test_on_stderr_is_wired_to_combined_callback( + self, + mock_progress: MagicMock, + mock_build: MagicMock, + mock_next: MagicMock, + mock_resolve: MagicMock, + mock_verify: MagicMock, + mock_runner_cls: MagicMock, + ) -> None: + """MCLI-137 + live-surface: sandbox stderr reuses the output sink.""" + mock_progress.return_value = PROGRESS + info1 = _make_feature_info("MCLI-031") + mock_build.return_value = [info1] + mock_next.side_effect = [info1, None] + mock_resolve.return_value = _make_feature("MCLI-031") + mock_verify.return_value = True + + seen: dict[str, object] = {} + + def capture_run(prompt, on_output=None, on_stderr=None): + seen["on_output"] = on_output + seen["on_stderr"] = on_stderr + return _agent_result() + + mock_runner = MagicMock() + mock_runner.run.side_effect = capture_run + mock_runner_cls.return_value = mock_runner + + orch = RalphOrchestrator( + _make_config(), MagicMock(), _make_display() + ) + orch.run() + + assert seen["on_stderr"] is not None + # Same combined display+logger sink as stdout output. + assert seen["on_stderr"] is seen["on_output"] + class TestOrchestratorLoggerErrorIsolation: """Logger errors must not prevent display callback from working.""" @@ -503,7 +725,7 @@ def test_logger_error_does_not_break_display( mock_resolve.return_value = _make_feature("MCLI-031") mock_verify.return_value = True - def capture_run(prompt, on_output=None): + def capture_run(prompt, on_output=None, on_stderr=None): if on_output: on_output("test line\n") return _agent_result() diff --git a/tests/unit/ralph/test_tui_display.py b/tests/unit/ralph/test_tui_display.py index f599586..3b32d5c 100644 --- a/tests/unit/ralph/test_tui_display.py +++ b/tests/unit/ralph/test_tui_display.py @@ -214,6 +214,22 @@ def test_agent_output_ignores_empty_lines(self) -> None: app.app.call_from_thread.assert_not_called() +class TestTUIDisplaySandboxFeedback: + """MCLI-145 parity for the TUI display adapter.""" + + def test_sandbox_status_writes_to_log(self) -> None: + app = _make_mock_container() + log = MagicMock() + app.query_one.return_value = log + display = RalphTUIDisplay.__new__(RalphTUIDisplay) + display._container = app + display._feature_num = 0 + + display.sandbox_status("bwrap") + log.write.assert_called_once() + assert "bwrap" in str(log.write.call_args[0][0]) + + class TestTUIDisplayFeatureComplete: def test_feature_complete_success(self) -> None: app = _make_mock_container() @@ -396,3 +412,49 @@ def test_warn_writes_yellow(self) -> None: log.write.assert_called_once() written = str(log.write.call_args[0][0]) assert "low disk space" in written + + +class TestTUIDisplayCallFromThreadGuard: + """_call_from_thread must tolerate a torn-down Textual app. + + After the user quits, the orchestrator worker thread may still be + mid-feature and emit display updates. Accessing container.app / + call_from_thread then raises NoActiveAppError (a RuntimeError + subclass); the worker must not crash with a traceback. + """ + + def test_swallows_runtime_error_from_call_from_thread(self) -> None: + container = MagicMock() + container.app.call_from_thread.side_effect = RuntimeError("boom") + display = RalphTUIDisplay.__new__(RalphTUIDisplay) + display._container = container + display._feature_num = 0 + + # Must not raise. + display._call_from_thread(lambda: None) + + def test_swallows_no_active_app_error_from_app_access(self) -> None: + from textual._context import NoActiveAppError + + container = MagicMock() + # The .app property is what raises per the reported traceback. + type(container).app = property( + lambda self: (_ for _ in ()).throw(NoActiveAppError()) + ) + display = RalphTUIDisplay.__new__(RalphTUIDisplay) + display._container = container + display._feature_num = 0 + + # NoActiveAppError is a RuntimeError subclass — must not raise. + display._call_from_thread(lambda: None) + + def test_callback_invoked_when_app_alive(self) -> None: + container = _make_mock_container() + display = RalphTUIDisplay.__new__(RalphTUIDisplay) + display._container = container + display._feature_num = 0 + + sentinel = MagicMock() + display._call_from_thread(sentinel, "arg") + + sentinel.assert_called_once_with("arg") diff --git a/tests/unit/tui/test_app.py b/tests/unit/tui/test_app.py index ced5b7a..a83dcd3 100644 --- a/tests/unit/tui/test_app.py +++ b/tests/unit/tui/test_app.py @@ -86,3 +86,111 @@ async def test_screen_push_updates_nav_state() -> None: header = app.query_one("#k9s-header", K9sHeader) # Initial state should have "Projects" in the breadcrumb assert "Projects" in header.breadcrumb + + +class TestRalphExitConfirmation: + """`q` / back while Ralph is RUNNING/PAUSED must confirm first.""" + + @pytest.mark.asyncio + async def test_quit_while_ralph_active_prompts_and_guards_reentry( + self, + ) -> None: + from mfbt.tui.screens.confirm_modal import ConfirmExitRalphModal + + app = _make_app() + async with app.run_test() as pilot: + app._ralph_active_panel = lambda: MagicMock() + app.exit = MagicMock() + + await app.action_quit() + await pilot.pause() + + assert isinstance(app.screen, ConfirmExitRalphModal) + assert app._exit_confirm_open is True + app.exit.assert_not_called() + + # Priority 'q' still fires over the modal — must not stack. + await app.action_quit() + await pilot.pause() + assert ( + sum( + isinstance(s, ConfirmExitRalphModal) + for s in app.screen_stack + ) + == 1 + ) + + @pytest.mark.asyncio + async def test_confirming_quit_tears_down_and_exits(self) -> None: + from mfbt.tui.screens.confirm_modal import ConfirmExitRalphModal + + app = _make_app() + async with app.run_test() as pilot: + app._ralph_active_panel = lambda: MagicMock() + app._stop_for_quit = MagicMock() + app.exit = MagicMock() + + await app.action_quit() + await pilot.pause() + assert isinstance(app.screen, ConfirmExitRalphModal) + + await pilot.click("#confirm-exit-confirm") + await pilot.pause() + + app._stop_for_quit.assert_called_once() + app.exit.assert_called_once() + assert app._exit_confirm_open is False + + @pytest.mark.asyncio + async def test_cancelling_quit_keeps_app_running(self) -> None: + from mfbt.tui.screens.confirm_modal import ConfirmExitRalphModal + + app = _make_app() + async with app.run_test() as pilot: + app._ralph_active_panel = lambda: MagicMock() + app._stop_for_quit = MagicMock() + app.exit = MagicMock() + + await app.action_quit() + await pilot.pause() + await pilot.press("escape") # modal's cancel binding + await pilot.pause() + + app.exit.assert_not_called() + app._stop_for_quit.assert_not_called() + assert app._exit_confirm_open is False + assert not isinstance(app.screen, ConfirmExitRalphModal) + + @pytest.mark.asyncio + async def test_back_while_ralph_active_leaves_on_confirm(self) -> None: + from mfbt.tui.screens.confirm_modal import ConfirmExitRalphModal + + app = _make_app() + async with app.run_test() as pilot: + fake_panel = MagicMock() + app._ralph_active_panel = lambda: fake_panel + + app.action_go_back() + await pilot.pause() + assert isinstance(app.screen, ConfirmExitRalphModal) + + await pilot.click("#confirm-exit-confirm") + await pilot.pause() + + fake_panel.leave_ralph.assert_called_once() + assert app._exit_confirm_open is False + + @pytest.mark.asyncio + async def test_quit_without_ralph_does_not_prompt(self) -> None: + from mfbt.tui.screens.confirm_modal import ConfirmExitRalphModal + + app = _make_app() + async with app.run_test() as pilot: + app._ralph_active_panel = lambda: None + app.exit = MagicMock() + + await app.action_quit() + await pilot.pause() + + assert not isinstance(app.screen, ConfirmExitRalphModal) + app.exit.assert_called_once() diff --git a/tests/unit/tui/test_confirm_modal.py b/tests/unit/tui/test_confirm_modal.py new file mode 100644 index 0000000..fb8ec1a --- /dev/null +++ b/tests/unit/tui/test_confirm_modal.py @@ -0,0 +1,60 @@ +"""Tests for the ConfirmExitRalphModal (leave-an-active-Ralph-run gate).""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest + +from mfbt.tui.screens.confirm_modal import ConfirmExitRalphModal + + +def _press(modal: ConfirmExitRalphModal, button_id: str) -> None: + modal.on_button_pressed( + SimpleNamespace(button=SimpleNamespace(id=button_id)) + ) + + +@pytest.mark.parametrize("context", ["quit", "back"]) +def test_confirm_button_dismisses_true(context: str) -> None: + modal = ConfirmExitRalphModal(context=context) + modal.dismiss = MagicMock() + + _press(modal, "confirm-exit-confirm") + + modal.dismiss.assert_called_once_with(True) + + +@pytest.mark.parametrize("context", ["quit", "back"]) +def test_cancel_button_dismisses_false(context: str) -> None: + modal = ConfirmExitRalphModal(context=context) + modal.dismiss = MagicMock() + + _press(modal, "confirm-exit-cancel") + + modal.dismiss.assert_called_once_with(False) + + +def test_escape_action_dismisses_false() -> None: + modal = ConfirmExitRalphModal(context="quit") + modal.dismiss = MagicMock() + + modal.action_cancel() + + modal.dismiss.assert_called_once_with(False) + + +def test_context_tailors_copy() -> None: + quit_modal = ConfirmExitRalphModal(context="quit") + back_modal = ConfirmExitRalphModal(context="back") + + assert "Quit" in quit_modal._copy["title"] + assert quit_modal._copy["confirm_label"] == "Quit" + assert "go back" in back_modal._copy["title"].lower() + assert back_modal._copy["confirm_label"] == "Stop & Go Back" + + +def test_unknown_context_falls_back_to_quit() -> None: + modal = ConfirmExitRalphModal(context="bogus") + assert modal._copy["confirm_label"] == "Quit" diff --git a/tests/unit/tui/test_ralph_panel.py b/tests/unit/tui/test_ralph_panel.py index 55269bc..af2f468 100644 --- a/tests/unit/tui/test_ralph_panel.py +++ b/tests/unit/tui/test_ralph_panel.py @@ -108,6 +108,36 @@ def test_go_back_from_browsing_returns_false(self, tmp_path: Path) -> None: assert panel._stage == RalphStage.BROWSING +class TestLeaveRalph: + """leave_ralph() is the user-confirmed exit from the app-level gate.""" + + def test_leave_ralph_from_running_tears_down(self, tmp_path: Path) -> None: + panel = _make_panel(tmp_path) + panel._stage = RalphStage.RUNNING + mock_orch = MagicMock() + mock_client = MagicMock() + panel._orchestrator = mock_orch + panel._orchestrator_client = mock_client + + panel.leave_ralph() + + mock_orch.stop.assert_called_once() + mock_client.close.assert_called_once() + assert panel._stage == RalphStage.BROWSING + panel.run_worker.assert_called_once() + + def test_leave_ralph_from_paused_tears_down(self, tmp_path: Path) -> None: + panel = _make_panel(tmp_path) + panel._stage = RalphStage.PAUSED + mock_orch = MagicMock() + panel._orchestrator = mock_orch + + panel.leave_ralph() + + mock_orch.stop.assert_called_once() + assert panel._stage == RalphStage.BROWSING + + class TestStartRalph: def test_start_without_config_notifies(self, tmp_path: Path) -> None: panel = _make_panel(tmp_path) @@ -402,3 +432,115 @@ def test_exception_restores_paused_and_notifies( # Failure must not crash the worker — it restores PAUSED + notifies. assert panel._stage == RalphStage.PAUSED app.notify.assert_called_once() + + +class TestRunOrchestratorFailsClosed: + """The TUI worker must fail *closed and visibly* on any sandbox failure. + + This is the TUI half of the two-mechanism hard-fail contract — the CLI + half is covered by test_cli.py. Without these, a refactor that broke the + ``except SandboxDetectionError`` / ``except SandboxError`` branches would + let the TUI run unconfined or crash opaquely with no failing test. + """ + + def _prep(self, tmp_path: Path): # type: ignore[no-untyped-def] + panel = _make_panel(tmp_path) + panel._ralph_config = MagicMock() + panel._build_uncached_client = MagicMock() + panel._return_to_browsing = MagicMock() + panel._set_header_sandbox = MagicMock() + return panel + + @contextmanager + def _patches(self): # type: ignore[no-untyped-def] + with patch( + "mfbt.commands.ralph.log_capture.RalphLogger" + ), patch( + "mfbt.commands.ralph.tui_display.RalphTUIDisplay" + ), patch( + "mfbt.commands.ralph.orchestrator.RalphOrchestrator" + ) as orch_cls: + yield orch_cls + + def test_detection_error_at_construction_fails_closed( + self, tmp_path: Path + ) -> None: + from mfbt.commands.ralph.sandbox import SandboxDetectionError + + panel = self._prep(tmp_path) + app = _sync_app() + with self._patches() as orch_cls, _attach_app(panel, app): + orch_cls.side_effect = SandboxDetectionError( + "bwrap (bubblewrap) is required but was not found" + ) + panel._run_orchestrator() + + # Failed closed: error notification + back to browsing, and the run + # loop was NEVER reached (construction raised). + app.notify.assert_called_once() + _, kwargs = app.notify.call_args + assert kwargs.get("severity") == "error" + panel._return_to_browsing.assert_called_once() + orch_cls.return_value.run.assert_not_called() + + def test_post_construction_sandbox_error_fails_closed( + self, tmp_path: Path + ) -> None: + from mfbt.commands.ralph.sandbox import SandboxSetupError + + panel = self._prep(tmp_path) + app = _sync_app() + with self._patches() as orch_cls, _attach_app(panel, app): + # Construction succeeds; the failure surfaces from run() — the + # orchestrator re-raises it (C1) instead of looping on. + orch_cls.return_value.run.side_effect = SandboxSetupError( + "could not write the SBPL profile", mechanism="sandbox_exec" + ) + panel._run_orchestrator() + + orch_cls.return_value.run.assert_called_once() + app.notify.assert_called_once() + args, kwargs = app.notify.call_args + assert kwargs.get("severity") == "error" + assert "Sandbox failure" in args[0] + panel._return_to_browsing.assert_called_once() + + +class TestRalphHeaderSandbox: + """The active sandbox mechanism is always shown in the Ralph header.""" + + def _header(self): # type: ignore[no-untyped-def] + from mfbt.commands.ralph.ralph_widgets import RalphHeader + + h = RalphHeader() + h.agent_name = "claude" + h.mode = "feature-by-feature" + return h + + def test_active_sandbox_is_labelled(self) -> None: + h = self._header() + h.sandbox = "sandbox_exec" + line1 = h.render().split("\n")[0] + assert "Sandbox:" in line1 and "sandbox_exec" in line1 + assert "UNSANDBOXED" not in line1 + + def test_bwrap_is_labelled(self) -> None: + h = self._header() + h.sandbox = "bwrap" + line1 = h.render().split("\n")[0] + assert "Sandbox:" in line1 and "bwrap" in line1 + assert "UNSANDBOXED" not in line1 + + def test_unset_shows_pending_placeholder(self) -> None: + # Always-present segment, even before detection resolves. + assert "Sandbox" in self._header().render().split("\n")[0] + + def test_set_header_sandbox_updates_widget_and_cache( + self, tmp_path: Path + ) -> None: + panel = _make_panel(tmp_path) + panel._set_header_sandbox("bwrap") + assert panel._sandbox_mechanism_value == "bwrap" + # query_one("#ralph-header", RalphHeader).sandbox was assigned. + header = panel.query_one.return_value + assert header.sandbox == "bwrap" From 53a97a52c7d6d9b6e19439dac824fbe086ce2cf7 Mon Sep 17 00:00:00 2001 From: Shuveb Hussain Date: Sat, 16 May 2026 20:47:44 -0500 Subject: [PATCH 2/2] Fix 4 sandbox/TUI review issues + clear pre-existing lint - feature_list.py: route the post-construction sandbox-header update through _safe_call_from_thread so a quit during the bwrap liveness probe no longer raises NoActiveAppError into the outer handler. - feature_list.py: drop the dead _sandbox_mechanism_value write (no read site); RalphHeader.sandbox is the sole persistence. - models.py: _assert_confining now expanduser-expands paths before the root/$HOME structural checks so a tilde-relative Path("~") is rejected at construction instead of expanding to $HOME in the adapters (env-var read only, no-FS-at-construction contract preserved). - sandbox_exec.py: document the accepted deny(N) shared-fd false-positive trade-off explicitly (no behavior change). - tests: tilde-confinement regression coverage; update header test for the removed cache attr; reflow an over-length dict literal (E501). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../ralph/sandbox/adapters/sandbox_exec.py | 16 ++++++++++++++++ src/mfbt/commands/ralph/sandbox/models.py | 14 ++++++++++---- src/mfbt/tui/screens/feature_list.py | 10 +++++++--- tests/unit/ralph/sandbox/test_models.py | 16 ++++++++++++++++ tests/unit/tui/test_ralph_panel.py | 13 +++++++++---- 5 files changed, 58 insertions(+), 11 deletions(-) diff --git a/src/mfbt/commands/ralph/sandbox/adapters/sandbox_exec.py b/src/mfbt/commands/ralph/sandbox/adapters/sandbox_exec.py index d5dcd6f..0b0ab07 100644 --- a/src/mfbt/commands/ralph/sandbox/adapters/sandbox_exec.py +++ b/src/mfbt/commands/ralph/sandbox/adapters/sandbox_exec.py @@ -139,6 +139,22 @@ def build_prefix(config: SandboxConfig) -> list[str]: # ``SandboxWrapper`` prepends ``[Sandbox] `` to every captured line, so that # marker is stripped before the start-of-line test (callers/tests may also # pass the raw, unprefixed stderr — both are handled). +# +# ACCEPTED TRADE-OFF (kept visible deliberately): because the two processes +# share one stderr fd *by design* (attribution is by the ``[Sandbox] `` +# marker, not an OS fd — splitting them is explicitly rejected upstream), +# Claude's own output that literally contains a ``deny()`` token +# (e.g. a tool printing access-control results, or test output) on a +# non-zero exit *will* be misclassified as a ``SandboxDeniedError``. This +# is accepted because the inverse error is strictly worse: dropping the +# ``deny(N)`` signal would let a real kernel denial read as Claude's own +# exit, looping the orchestrator on a permanently-broken sandbox and +# masking the denial. The misclassification is also non-destructive — see +# ``classify_exit``: the full captured stderr is carried verbatim in the +# error detail, so the operator still sees Claude's real output. The +# numeric-id requirement (``deny\(\d+\)``, not bare ``deny(``) plus the +# line-awareness above are the only narrowing available without fd +# splitting; this residual window is known and not a bug. _SANDBOX_WRAPPER_LINE_PREFIX = "[Sandbox] " _DENY_TOKEN_RE = re.compile(r"deny\(\d+\)") _SANDBOX_EXEC_FAILURE_PREFIX = "sandbox-exec:" diff --git a/src/mfbt/commands/ralph/sandbox/models.py b/src/mfbt/commands/ralph/sandbox/models.py index c0b2ae7..4304de7 100644 --- a/src/mfbt/commands/ralph/sandbox/models.py +++ b/src/mfbt/commands/ralph/sandbox/models.py @@ -102,19 +102,25 @@ def _assert_confining(project_dir: Path, writable_paths: Iterable[Path]) -> None reject it at construction time rather than emitting a policy that silently does not confine. ``project_dir`` is checked too: it is re-allowed verbatim by the SBPL profile and bound rw by bwrap, so a root/`$HOME` - project dir is equally catastrophic. Filesystem-free (no resolve/expand): - structural check only, matching this module's no-FS-at-construction rule. + project dir is equally catastrophic. Filesystem-free: ``expanduser`` is an + env-var read only (no ``realpath``/no FS I/O), so this still honours this + module's no-FS-at-construction rule. The expansion is required, not + cosmetic: both adapters call ``os.path.expanduser`` on the stored paths at + use time, so a tilde-relative ``Path("~")`` ultimately expands to ``$HOME`` + — without expanding here first it would equal neither a root nor + ``Path.home()`` and silently pass the structural guard it is meant to trip. """ home = Path.home() for path in (project_dir, *writable_paths): - if _is_filesystem_root(path): + expanded = Path(os.path.expanduser(str(path))) + if _is_filesystem_root(expanded): raise SandboxSetupError( f"refusing to build a sandbox that grants write access to a " f"filesystem root ({str(path)!r}): that voids the project " f"write-confinement. This is a construction bug — pass a " f"specific project/cache directory, not a root." ) - if path == home: + if expanded == home: raise SandboxSetupError( f"refusing to build a sandbox that grants write access to " f"the entire home directory ({str(path)!r}): grant the " diff --git a/src/mfbt/tui/screens/feature_list.py b/src/mfbt/tui/screens/feature_list.py index 10beabf..7b4d133 100644 --- a/src/mfbt/tui/screens/feature_list.py +++ b/src/mfbt/tui/screens/feature_list.py @@ -798,8 +798,13 @@ def _run_orchestrator(self) -> None: return # Persist the resolved sandbox mechanism in the header and surface - # it in the agent-output log, visible for the entire run. - self.app.call_from_thread( + # it in the agent-output log, visible for the entire run. The + # bwrap liveness probe inside construction can take up to ~5 s; if + # the user quits in that window the app context is already gone, + # so use the RuntimeError-tolerant helper like every other + # cross-thread call here (a bare call_from_thread would raise + # NoActiveAppError and mislog a spurious orchestration failure). + self._safe_call_from_thread( self._set_header_sandbox, self._orchestrator.sandbox_mechanism.value, ) @@ -876,7 +881,6 @@ def _set_header_sandbox(self, mechanism_value: str) -> None: mechanism (``sandbox_exec`` / ``bwrap``) is visible for the whole run, not just as a one-shot log line. """ - self._sandbox_mechanism_value = mechanism_value with suppress(Exception): header = self.query_one("#ralph-header", RalphHeader) header.sandbox = mechanism_value diff --git a/tests/unit/ralph/sandbox/test_models.py b/tests/unit/ralph/sandbox/test_models.py index 5c2d08d..dca04c8 100644 --- a/tests/unit/ralph/sandbox/test_models.py +++ b/tests/unit/ralph/sandbox/test_models.py @@ -125,6 +125,22 @@ def test_project_dir_filesystem_root_rejected(self) -> None: project_dir=Path("/"), writable_paths=[Path("/proj")] ) + def test_writable_tilde_home_rejected(self) -> None: + # Regression (Issue 3): Path("~") != Path.home(), but both adapters + # expanduser the stored paths at use time, so a bare "~" ultimately + # grants write to all of $HOME. The structural guard must expand it. + with pytest.raises(SandboxSetupError, match="home directory"): + SandboxPermissions( + project_dir=Path("/proj"), + writable_paths=[Path("~")], + ) + + def test_project_dir_tilde_home_rejected(self) -> None: + with pytest.raises(SandboxSetupError, match="home directory"): + SandboxPermissions( + project_dir=Path("~"), writable_paths=[Path("/proj")] + ) + class TestSandboxPermissionsForProject: """MCLI-115: for_project factory, env overrides, path resolution.""" diff --git a/tests/unit/tui/test_ralph_panel.py b/tests/unit/tui/test_ralph_panel.py index af2f468..87ad49f 100644 --- a/tests/unit/tui/test_ralph_panel.py +++ b/tests/unit/tui/test_ralph_panel.py @@ -158,7 +158,11 @@ def test_start_with_no_actionable_features_notifies( panel = _make_panel(tmp_path) panel._ralph_config = MagicMock() panel._pending_features = [ - {"completion_status": "completed", "has_spec": True, "has_prompt_plan": True}, + { + "completion_status": "completed", + "has_spec": True, + "has_prompt_plan": True, + }, ] mock_app = MagicMock() @@ -535,12 +539,13 @@ def test_unset_shows_pending_placeholder(self) -> None: # Always-present segment, even before detection resolves. assert "Sandbox" in self._header().render().split("\n")[0] - def test_set_header_sandbox_updates_widget_and_cache( + def test_set_header_sandbox_updates_widget( self, tmp_path: Path ) -> None: panel = _make_panel(tmp_path) panel._set_header_sandbox("bwrap") - assert panel._sandbox_mechanism_value == "bwrap" - # query_one("#ralph-header", RalphHeader).sandbox was assigned. + # query_one("#ralph-header", RalphHeader).sandbox was assigned. The + # header widget is the sole persistence — there is no separate + # panel-level cache attribute. header = panel.query_one.return_value assert header.sandbox == "bwrap"