From f8d31fe94331cde5aa0e3c1dbe372423a5e00e54 Mon Sep 17 00:00:00 2001 From: c <37263590+Aphroq@users.noreply.github.com> Date: Sat, 2 May 2026 14:23:37 +0000 Subject: [PATCH] fix: reject external symlink targets during hydrate --- .../extensions/sandbox/blaxel/sandbox.py | 5 +- .../extensions/sandbox/cloudflare/sandbox.py | 5 +- .../extensions/sandbox/daytona/sandbox.py | 5 +- src/agents/extensions/sandbox/e2b/sandbox.py | 5 +- .../extensions/sandbox/modal/sandbox.py | 1 + .../extensions/sandbox/runloop/sandbox.py | 5 +- .../extensions/sandbox/vercel/sandbox.py | 14 ++++- src/agents/sandbox/sandboxes/docker.py | 5 +- src/agents/sandbox/sandboxes/unix_local.py | 6 +- src/agents/sandbox/util/tar_utils.py | 59 ++++++++++++++++++- tests/extensions/sandbox/test_vercel.py | 32 ++++++++++ tests/sandbox/test_extract.py | 27 +++++++++ tests/sandbox/test_tar_utils.py | 34 +++++++++++ 13 files changed, 191 insertions(+), 12 deletions(-) diff --git a/src/agents/extensions/sandbox/blaxel/sandbox.py b/src/agents/extensions/sandbox/blaxel/sandbox.py index e87cb38389..ce5d4f77ad 100644 --- a/src/agents/extensions/sandbox/blaxel/sandbox.py +++ b/src/agents/extensions/sandbox/blaxel/sandbox.py @@ -600,7 +600,10 @@ async def hydrate_workspace(self, data: io.IOBase) -> None: raise WorkspaceWriteTypeError(path=Path(tar_path), actual_type=type(payload).__name__) try: - validate_tar_bytes(bytes(payload)) + validate_tar_bytes( + bytes(payload), + allow_external_symlink_targets=False, + ) except UnsafeTarMemberError as e: raise WorkspaceArchiveWriteError( path=root, diff --git a/src/agents/extensions/sandbox/cloudflare/sandbox.py b/src/agents/extensions/sandbox/cloudflare/sandbox.py index 0454323ea2..59faadd7e0 100644 --- a/src/agents/extensions/sandbox/cloudflare/sandbox.py +++ b/src/agents/extensions/sandbox/cloudflare/sandbox.py @@ -1161,7 +1161,10 @@ async def _hydrate_workspace_via_http(self, data: io.IOBase) -> None: raise WorkspaceArchiveWriteError(path=root, context={"reason": "non_bytes_payload"}) try: - validate_tar_bytes(bytes(raw)) + validate_tar_bytes( + bytes(raw), + allow_external_symlink_targets=False, + ) except UnsafeTarMemberError as e: raise WorkspaceArchiveWriteError( path=root, diff --git a/src/agents/extensions/sandbox/daytona/sandbox.py b/src/agents/extensions/sandbox/daytona/sandbox.py index 541e11009c..d1335df377 100644 --- a/src/agents/extensions/sandbox/daytona/sandbox.py +++ b/src/agents/extensions/sandbox/daytona/sandbox.py @@ -1001,7 +1001,10 @@ async def hydrate_workspace(self, data: io.IOBase) -> None: raise WorkspaceWriteTypeError(path=Path(tar_path), actual_type=type(payload).__name__) try: - validate_tar_bytes(bytes(payload)) + validate_tar_bytes( + bytes(payload), + allow_external_symlink_targets=False, + ) except UnsafeTarMemberError as e: raise WorkspaceArchiveWriteError( path=root, diff --git a/src/agents/extensions/sandbox/e2b/sandbox.py b/src/agents/extensions/sandbox/e2b/sandbox.py index aedf4c0471..6586ee1550 100644 --- a/src/agents/extensions/sandbox/e2b/sandbox.py +++ b/src/agents/extensions/sandbox/e2b/sandbox.py @@ -1496,7 +1496,10 @@ async def hydrate_workspace(self, data: io.IOBase) -> None: ) from e try: - validate_tar_bytes(bytes(raw)) + validate_tar_bytes( + bytes(raw), + allow_external_symlink_targets=False, + ) except UnsafeTarMemberError as e: raise WorkspaceArchiveWriteError( path=error_root, diff --git a/src/agents/extensions/sandbox/modal/sandbox.py b/src/agents/extensions/sandbox/modal/sandbox.py index a83e0f2895..1611c5c296 100644 --- a/src/agents/extensions/sandbox/modal/sandbox.py +++ b/src/agents/extensions/sandbox/modal/sandbox.py @@ -1717,6 +1717,7 @@ async def _hydrate_workspace_via_tar(self, data: io.IOBase) -> None: validate_tar_bytes( bytes(raw), skip_rel_paths=self.state.manifest.ephemeral_persistence_paths(), + allow_external_symlink_targets=False, ) except UnsafeTarMemberError as e: raise WorkspaceArchiveWriteError( diff --git a/src/agents/extensions/sandbox/runloop/sandbox.py b/src/agents/extensions/sandbox/runloop/sandbox.py index 4b1d99c2a2..31004017f2 100644 --- a/src/agents/extensions/sandbox/runloop/sandbox.py +++ b/src/agents/extensions/sandbox/runloop/sandbox.py @@ -1282,7 +1282,10 @@ async def _hydrate_workspace_via_tar(self, payload: bytes) -> None: archive_path = root / f".sandbox-runloop-hydrate-{self.state.session_id.hex}.tar" try: - validate_tar_bytes(payload) + validate_tar_bytes( + payload, + allow_external_symlink_targets=False, + ) except UnsafeTarMemberError as e: raise WorkspaceArchiveWriteError( path=root, diff --git a/src/agents/extensions/sandbox/vercel/sandbox.py b/src/agents/extensions/sandbox/vercel/sandbox.py index c0041bd79b..92513077ab 100644 --- a/src/agents/extensions/sandbox/vercel/sandbox.py +++ b/src/agents/extensions/sandbox/vercel/sandbox.py @@ -285,10 +285,18 @@ async def _validate_path_access(self, path: Path | str, *, for_write: bool = Fal def _runtime_helpers(self) -> tuple[RuntimeHelperScript, ...]: return (RESOLVE_WORKSPACE_PATH_HELPER,) - def _validate_tar_bytes(self, raw: bytes) -> None: + def _validate_tar_bytes( + self, + raw: bytes, + *, + allow_external_symlink_targets: bool = True, + ) -> None: try: with tarfile.open(fileobj=io.BytesIO(raw), mode="r:*") as tar: - validate_tarfile(tar) + validate_tarfile( + tar, + allow_external_symlink_targets=allow_external_symlink_targets, + ) except UnsafeTarMemberError as exc: raise ValueError(str(exc)) from exc except (tarfile.TarError, OSError) as exc: @@ -608,7 +616,7 @@ async def _hydrate_workspace_internal(self, raw: bytes) -> None: ) tar_command = ("tar", "xf", archive_path.as_posix(), "-C", root.as_posix()) try: - self._validate_tar_bytes(raw) + self._validate_tar_bytes(raw, allow_external_symlink_targets=False) await self.mkdir(root, parents=True) await self._write_files_with_retry([{"path": archive_path.as_posix(), "content": raw}]) result = await self.exec(*tar_command, shell=False) diff --git a/src/agents/sandbox/sandboxes/docker.py b/src/agents/sandbox/sandboxes/docker.py index 13eee0bc6d..2148840566 100644 --- a/src/agents/sandbox/sandboxes/docker.py +++ b/src/agents/sandbox/sandboxes/docker.py @@ -1243,7 +1243,10 @@ async def hydrate_workspace(self, data: io.IOBase) -> None: try: archive.seek(0) with tarfile.open(fileobj=archive, mode="r:*") as tar: - validate_tarfile(tar) + validate_tarfile( + tar, + allow_external_symlink_targets=False, + ) except UnsafeTarMemberError as e: raise WorkspaceArchiveWriteError( path=error_root, diff --git a/src/agents/sandbox/sandboxes/unix_local.py b/src/agents/sandbox/sandboxes/unix_local.py index a5b5a3fb85..6bedcb5fa0 100644 --- a/src/agents/sandbox/sandboxes/unix_local.py +++ b/src/agents/sandbox/sandboxes/unix_local.py @@ -1037,7 +1037,11 @@ async def hydrate_workspace(self, data: io.IOBase) -> None: try: root.mkdir(parents=True, exist_ok=True) with tarfile.open(fileobj=data, mode="r:*") as tar: - safe_extract_tarfile(tar, root=root) + safe_extract_tarfile( + tar, + root=root, + allow_external_symlink_targets=False, + ) except UnsafeTarMemberError as e: raise WorkspaceArchiveWriteError( path=root, context={"reason": e.reason, "member": e.member}, cause=e diff --git a/src/agents/sandbox/util/tar_utils.py b/src/agents/sandbox/util/tar_utils.py index cd1ee33258..5a7167177d 100644 --- a/src/agents/sandbox/util/tar_utils.py +++ b/src/agents/sandbox/util/tar_utils.py @@ -35,6 +35,45 @@ def _raise_if_windows_member_path(member_name: str) -> None: raise UnsafeTarMemberError(member=member_name, reason="windows path separator") +def _normalize_posix_path_without_root(path: PurePosixPath) -> tuple[str, ...] | None: + normalized: list[str] = [] + for part in path.parts: + if part in ("", ".", "/"): + continue + if part == "..": + if not normalized: + return None + normalized.pop() + continue + normalized.append(part) + return tuple(normalized) + + +def _validate_symlink_target( + member: tarfile.TarInfo, + *, + rel_path: Path, + allow_external_symlink_targets: bool, +) -> None: + if not member.issym() or allow_external_symlink_targets: + return + + target = PurePosixPath(member.linkname) + if target.is_absolute(): + raise UnsafeTarMemberError( + member=member.name, + reason=f"absolute symlink target not allowed: {member.linkname}", + ) + + member_parent = PurePosixPath(rel_path.as_posix()).parent + normalized = _normalize_posix_path_without_root(member_parent / target) + if normalized is None: + raise UnsafeTarMemberError( + member=member.name, + reason=f"symlink target escapes archive root: {member.linkname}", + ) + + def safe_tar_member_rel_path( member: tarfile.TarInfo, *, @@ -199,6 +238,7 @@ def validate_tarfile( skip_rel_paths: Iterable[str | Path] = (), root_name: str | None = None, allow_symlinks: bool = True, + allow_external_symlink_targets: bool = True, ) -> None: """Validate a workspace tar before handing it to a local or remote extractor. @@ -235,6 +275,11 @@ def validate_tarfile( members_by_rel_path[rel_path] = member if member.issym(): + _validate_symlink_target( + member, + rel_path=rel_path, + allow_external_symlink_targets=allow_external_symlink_targets, + ) if rel_path in rejected_symlink_rel_paths: raise UnsafeTarMemberError( member=member.name, @@ -266,6 +311,7 @@ def validate_tar_bytes( reject_symlink_rel_paths: Iterable[str | Path] = (), skip_rel_paths: Iterable[str | Path] = (), root_name: str | None = None, + allow_external_symlink_targets: bool = True, ) -> None: """Validate raw workspace tar bytes with the shared safe tar policy.""" @@ -276,6 +322,7 @@ def validate_tar_bytes( reject_symlink_rel_paths=reject_symlink_rel_paths, skip_rel_paths=skip_rel_paths, root_name=root_name, + allow_external_symlink_targets=allow_external_symlink_targets, ) except UnsafeTarMemberError: raise @@ -283,7 +330,12 @@ def validate_tar_bytes( raise UnsafeTarMemberError(member="", reason="invalid tar stream") from e -def safe_extract_tarfile(tar: tarfile.TarFile, *, root: Path) -> None: +def safe_extract_tarfile( + tar: tarfile.TarFile, + *, + root: Path, + allow_external_symlink_targets: bool = True, +) -> None: """ Safely extract a tar archive into `root`. @@ -302,7 +354,10 @@ def safe_extract_tarfile(tar: tarfile.TarFile, *, root: Path) -> None: root_resolved = root.resolve() members = tar.getmembers() - validate_tarfile(tar) + validate_tarfile( + tar, + allow_external_symlink_targets=allow_external_symlink_targets, + ) def _prepare_replaceable_leaf(*, dest: Path, rel_path: Path, name: str) -> None: _ensure_no_symlink_parents(root=root_resolved, dest=dest, check_leaf=False) diff --git a/tests/extensions/sandbox/test_vercel.py b/tests/extensions/sandbox/test_vercel.py index bdc3bf4739..306acf9527 100644 --- a/tests/extensions/sandbox/test_vercel.py +++ b/tests/extensions/sandbox/test_vercel.py @@ -1084,6 +1084,38 @@ async def test_vercel_hydrate_workspace_rejects_unsafe_tar_before_upload( ) +@pytest.mark.asyncio +async def test_vercel_hydrate_workspace_rejects_external_symlink_target_before_upload( + monkeypatch: pytest.MonkeyPatch, +) -> None: + vercel_module = _load_vercel_module(monkeypatch) + state = vercel_module.VercelSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000105", + manifest=Manifest(root="/workspace"), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id="sandbox-hydrate-external-link", + workspace_persistence="tar", + ) + sandbox = _FakeAsyncSandbox(sandbox_id="sandbox-hydrate-external-link") + session = vercel_module.VercelSandboxSession.from_state(state, sandbox=sandbox) + + unsafe_buf = io.BytesIO() + with tarfile.open(fileobj=unsafe_buf, mode="w") as archive: + info = tarfile.TarInfo(name="leak") + info.type = tarfile.SYMTYPE + info.linkname = "/etc/passwd" + archive.addfile(info) + + with pytest.raises(vercel_module.WorkspaceArchiveWriteError) as exc_info: + await session.hydrate_workspace(io.BytesIO(unsafe_buf.getvalue())) + + assert "absolute symlink target not allowed" in str(exc_info.value.__cause__) + assert sandbox.write_files_calls == [] + assert not any( + call for call in sandbox.run_command_calls if call[0] == "tar" and call[1][0] == "xf" + ) + + @pytest.mark.asyncio async def test_vercel_hydrate_workspace_raises_archive_error_on_nonzero_tar_exec( monkeypatch: pytest.MonkeyPatch, diff --git a/tests/sandbox/test_extract.py b/tests/sandbox/test_extract.py index 1a058d951c..d9ac7f42cb 100644 --- a/tests/sandbox/test_extract.py +++ b/tests/sandbox/test_extract.py @@ -322,6 +322,33 @@ async def test_extract_zip_rejects_symlinked_parent_paths(tmp_path: Path) -> Non await session.shutdown() +@pytest.mark.asyncio +async def test_unix_local_hydrate_workspace_rejects_external_symlink_targets( + tmp_path: Path, +) -> None: + session = _build_session(tmp_path) + await session.start() + try: + archive = io.BytesIO() + with tarfile.open(fileobj=archive, mode="w") as tar: + info = tarfile.TarInfo(name="leak") + info.type = tarfile.SYMTYPE + info.linkname = "/etc/passwd" + tar.addfile(info) + archive.seek(0) + + with pytest.raises(WorkspaceArchiveWriteError) as exc_info: + await session.hydrate_workspace(archive) + + assert exc_info.value.context["member"] == "leak" + assert ( + exc_info.value.context["reason"] == "absolute symlink target not allowed: /etc/passwd" + ) + assert not (Path(session.state.manifest.root) / "leak").exists() + finally: + await session.shutdown() + + @pytest.mark.asyncio async def test_extract_tar_rejects_windows_drive_member_paths(tmp_path: Path) -> None: await _assert_extract_rejects_member( diff --git a/tests/sandbox/test_tar_utils.py b/tests/sandbox/test_tar_utils.py index 63d3fa57bd..3c8ceec9fa 100644 --- a/tests/sandbox/test_tar_utils.py +++ b/tests/sandbox/test_tar_utils.py @@ -141,6 +141,26 @@ def test_validate_tar_bytes_rejects_member_under_non_directory_member() -> None: validate_tar_bytes(raw) +def test_validate_tar_bytes_rejects_absolute_symlink_target_in_strict_mode() -> None: + raw = _tar_bytes(_symlink("leak", "/etc/passwd")) + + with pytest.raises(UnsafeTarMemberError, match="absolute symlink target not allowed"): + validate_tar_bytes(raw, allow_external_symlink_targets=False) + + +def test_validate_tar_bytes_rejects_parent_escape_symlink_target_in_strict_mode() -> None: + raw = _tar_bytes(_dir("nested"), _symlink("nested/leak", "../../etc/passwd")) + + with pytest.raises(UnsafeTarMemberError, match="symlink target escapes archive root"): + validate_tar_bytes(raw, allow_external_symlink_targets=False) + + +def test_validate_tar_bytes_allows_internal_symlink_target_in_strict_mode() -> None: + raw = _tar_bytes(_dir("nested"), _symlink("nested/python", "../bin/python3")) + + validate_tar_bytes(raw, allow_external_symlink_targets=False) + + def test_strip_tar_member_prefix_returns_workspace_relative_archive() -> None: raw = _tar_bytes( _dir("workspace"), @@ -185,6 +205,20 @@ def test_safe_extract_tarfile_can_rehydrate_existing_leaf_symlink(tmp_path: Path assert os.readlink(tmp_path / "link.txt") == "target-v2.txt" +def test_safe_extract_tarfile_rejects_external_symlink_target_in_strict_mode( + tmp_path: Path, +) -> None: + raw = _tar_bytes(_symlink("link.txt", "/etc/passwd")) + + with tarfile.open(fileobj=io.BytesIO(raw), mode="r:*") as tar: + with pytest.raises(UnsafeTarMemberError, match="absolute symlink target not allowed"): + safe_extract_tarfile( + tar, + root=tmp_path, + allow_external_symlink_targets=False, + ) + + def test_safe_extract_tarfile_can_replace_existing_leaf_file_with_symlink( tmp_path: Path, ) -> None: