diff --git a/agentrun/integration/builtin/sandbox.py b/agentrun/integration/builtin/sandbox.py index 24bce64..fabd1b0 100644 --- a/agentrun/integration/builtin/sandbox.py +++ b/agentrun/integration/builtin/sandbox.py @@ -388,17 +388,32 @@ def inner(sb: Sandbox): name="read_file", description=( "Read the content of a file at the specified path in the sandbox." - " Returns the text content. Suitable for reading code files," - " configs, logs, etc." + " Returns the plain text content by default." + " Set encode_base64=True to get the content as a base64-encoded" + " string instead, which is useful for binary files." + " Suitable for reading code files, configs, logs, binary files," + " etc." ), ) - def read_file(self, path: str) -> Dict[str, Any]: - """读取文件内容 / Read file content""" + def read_file( + self, path: str, encode_base64: bool = False + ) -> Dict[str, Any]: + """读取文件内容 / Read file content. + + 默认返回原始文本,传入 encode_base64=True 时返回 base64 编码字符串。 + Returns plain text by default; returns base64-encoded string when + encode_base64=True. + """ def inner(sb: Sandbox): assert isinstance(sb, CodeInterpreterSandbox) content = sb.file.read(path=path) - return {"path": path, "content": content} + if encode_base64: + encoded = base64.b64encode( + content.encode("utf-8") if isinstance(content, str) else content + ).decode("ascii") + return {"path": path, "content": encoded, "encoding": "base64"} + return {"path": path, "content": content, "encoding": "raw"} return self._run_in_sandbox(inner) diff --git a/tests/unittests/integration/test_code_interpreter_toolset_read_file.py b/tests/unittests/integration/test_code_interpreter_toolset_read_file.py new file mode 100644 index 0000000..9921ac2 --- /dev/null +++ b/tests/unittests/integration/test_code_interpreter_toolset_read_file.py @@ -0,0 +1,114 @@ +"""CodeInterpreterToolSet read_file 工具单元测试 + +测试 read_file 工具的 base64 编码行为和 encode_base64 参数控制。 +Tests the read_file tool's base64 encoding behavior and the encode_base64 parameter control. +""" + +import base64 +import threading +from unittest.mock import MagicMock, patch + +import pytest + +from agentrun.integration.builtin.sandbox import CodeInterpreterToolSet + + +@pytest.fixture +def toolset(): + """创建 CodeInterpreterToolSet 实例,绕过 __init__ / Create instance bypassing __init__.""" + with patch.object(CodeInterpreterToolSet, "__init__", lambda self: None): + ts = CodeInterpreterToolSet() + ts.sandbox = None + ts.sandbox_id = "" + ts._lock = threading.Lock() + ts.template_name = "test-tpl" + ts.template_type = MagicMock() + ts.sandbox_idle_timeout_seconds = 600 + ts.config = None + ts.oss_mount_config = None + ts.nas_config = None + ts.polar_fs_config = None + return ts + + +def _make_mock_sandbox(file_content: str): + """构造一个模拟沙箱,其 file.read 返回指定内容 / Build mock sandbox with file.read returning given content.""" + from agentrun.sandbox.code_interpreter_sandbox import CodeInterpreterSandbox + + mock_sb = MagicMock(spec=CodeInterpreterSandbox) + mock_sb.file.read.return_value = file_content + return mock_sb + + +class TestReadFileRawDefault: + """测试 read_file 默认返回原始文本(向前兼容)/ Test that read_file returns raw text by default.""" + + def test_returns_plain_content_by_default(self, toolset): + """默认情况下应返回原始文本 / Content should be plain text by default.""" + file_content = "hello world" + mock_sb = _make_mock_sandbox(file_content) + + with patch.object(toolset, "_run_in_sandbox", side_effect=lambda fn: fn(mock_sb)): + result = toolset.read_file(path="/tmp/test.txt") + + assert result["content"] == file_content + assert result["encoding"] == "raw" + assert result["path"] == "/tmp/test.txt" + + def test_encode_base64_false_same_as_default(self, toolset): + """encode_base64=False 应与默认行为一致 / encode_base64=False should behave identically to default.""" + file_content = "some content" + mock_sb = _make_mock_sandbox(file_content) + + with patch.object(toolset, "_run_in_sandbox", side_effect=lambda fn: fn(mock_sb)): + result_explicit = toolset.read_file(path="/tmp/f.txt", encode_base64=False) + + mock_sb2 = _make_mock_sandbox(file_content) + with patch.object(toolset, "_run_in_sandbox", side_effect=lambda fn: fn(mock_sb2)): + result_default = toolset.read_file(path="/tmp/f.txt") + + assert result_explicit == result_default + assert result_explicit["encoding"] == "raw" + + +class TestReadFileBase64Param: + """测试 encode_base64=True 时返回 base64 编码内容 / Test that encode_base64=True returns base64 content.""" + + def test_returns_base64_encoded_content(self, toolset): + """encode_base64=True 时内容应为 base64 编码 / Content should be base64 encoded when encode_base64=True.""" + file_content = "hello world" + mock_sb = _make_mock_sandbox(file_content) + + with patch.object(toolset, "_run_in_sandbox", side_effect=lambda fn: fn(mock_sb)): + result = toolset.read_file(path="/tmp/test.txt", encode_base64=True) + + expected_b64 = base64.b64encode(b"hello world").decode("ascii") + assert result["content"] == expected_b64 + assert result["encoding"] == "base64" + assert result["path"] == "/tmp/test.txt" + + def test_base64_roundtrip(self, toolset): + """base64 解码后应等于原始内容 / Decoded base64 should equal original content.""" + file_content = "中文内容 line1\nline2" + mock_sb = _make_mock_sandbox(file_content) + + with patch.object(toolset, "_run_in_sandbox", side_effect=lambda fn: fn(mock_sb)): + result = toolset.read_file(path="/tmp/utf8.txt", encode_base64=True) + + decoded = base64.b64decode(result["content"]).decode("utf-8") + assert decoded == file_content + + def test_bytes_content_also_base64_encoded(self, toolset): + """当底层返回 bytes 时同样应 base64 编码 / Bytes content should also be base64 encoded.""" + file_bytes = b"\x00\x01\x02\x03" + from agentrun.sandbox.code_interpreter_sandbox import CodeInterpreterSandbox + + mock_sb = MagicMock(spec=CodeInterpreterSandbox) + mock_sb.file.read.return_value = file_bytes + + with patch.object(toolset, "_run_in_sandbox", side_effect=lambda fn: fn(mock_sb)): + result = toolset.read_file(path="/tmp/binary.bin", encode_base64=True) + + expected_b64 = base64.b64encode(file_bytes).decode("ascii") + assert result["content"] == expected_b64 + assert result["encoding"] == "base64"