diff --git a/crates/execution/assets/runners/python-runner.mjs b/crates/execution/assets/runners/python-runner.mjs index df22aac0..f28f821d 100644 --- a/crates/execution/assets/runners/python-runner.mjs +++ b/crates/execution/assets/runners/python-runner.mjs @@ -14,6 +14,10 @@ const PYODIDE_PACKAGE_CACHE_DIR_ENV = 'AGENTOS_PYODIDE_PACKAGE_CACHE_DIR'; const PYODIDE_PACKAGE_CACHE_GUEST_ROOT = '/__agentos_pyodide_cache'; const PYTHON_CODE_ENV = 'AGENTOS_PYTHON_CODE'; const PYTHON_FILE_ENV = 'AGENTOS_PYTHON_FILE'; +const PYTHON_ARGV_ENV = 'AGENTOS_PYTHON_ARGV'; +const PYTHON_MODULE_ENV = 'AGENTOS_PYTHON_MODULE'; +const PYTHON_STDIN_PROGRAM_ENV = 'AGENTOS_PYTHON_STDIN_PROGRAM'; +const PYTHON_INTERACTIVE_ENV = 'AGENTOS_PYTHON_INTERACTIVE'; const PYTHON_PREWARM_ONLY_ENV = 'AGENTOS_PYTHON_PREWARM_ONLY'; const PYTHON_WARMUP_DEBUG_ENV = 'AGENTOS_PYTHON_WARMUP_DEBUG'; const PYTHON_WARMUP_METRICS_PREFIX = '__AGENTOS_PYTHON_WARMUP_METRICS__:'; @@ -583,6 +587,15 @@ function createPythonBridgeRpcBridge() { async fsMkdir(path, options = {}) { this.fsMkdirSync(path, options); }, + fsUnlinkSync(path) { + requestSync('fsUnlink', { path }); + }, + fsRmdirSync(path) { + requestSync('fsRmdir', { path }); + }, + fsRenameSync(path, destination) { + requestSync('fsRename', { path, destination }); + }, httpRequestSync(url, method = 'GET', headersJson = '{}', bodyBase64 = null) { let headers; try { @@ -759,6 +772,15 @@ function createPythonFdRpcBridge() { async fsMkdir(path, options = {}) { this.fsMkdirSync(path, options); }, + fsUnlinkSync(path) { + requestSync('fsUnlink', { path }); + }, + fsRmdirSync(path) { + requestSync('fsRmdir', { path }); + }, + fsRenameSync(path, destination) { + requestSync('fsRename', { path, destination }); + }, httpRequestSync(url, method = 'GET', headersJson = '{}', bodyBase64 = null) { let headers; try { @@ -1698,14 +1720,30 @@ function installPythonWorkspaceFs(pyodide, bridge) { } return node; }, - rename() { - throw new FS.ErrnoError(ERRNO_CODES.ENOSYS); + rename(oldNode, newDir, newName) { + const source = nodeGuestPath(oldNode); + const destination = joinGuestPath(nodeGuestPath(newDir), newName); + withFsErrors(() => bridge.fsRenameSync(source, destination)); + // `nodeGuestPath` reads the stored path, so retarget the node before it + // moves in the in-memory tree; children re-derive on the next sync. + oldNode.agentOSGuestPath = destination; + memfsDirNodeOps.rename(oldNode, newDir, newName); }, - unlink() { - throw new FS.ErrnoError(ERRNO_CODES.ENOSYS); + unlink(parent, name) { + withFsErrors(() => + bridge.fsUnlinkSync(joinGuestPath(nodeGuestPath(parent), name)), + ); + if (parent.contents && Object.prototype.hasOwnProperty.call(parent.contents, name)) { + memfsDirNodeOps.unlink(parent, name); + } }, - rmdir() { - throw new FS.ErrnoError(ERRNO_CODES.ENOSYS); + rmdir(parent, name) { + withFsErrors(() => + bridge.fsRmdirSync(joinGuestPath(nodeGuestPath(parent), name)), + ); + if (parent.contents && Object.prototype.hasOwnProperty.call(parent.contents, name)) { + memfsDirNodeOps.rmdir(parent, name); + } }, readdir(node) { syncDirectory(node); @@ -1716,30 +1754,84 @@ function installPythonWorkspaceFs(pyodide, bridge) { }, }; + const overlayBackend = { + mount(mount) { + const root = MEMFS.mount(mount); + root.agentOSGuestPath = mount.mountpoint; + root.agentOSDirty = false; + root.agentOSLoaded = true; + root.agentOSRemoteSize = 0; + root.node_ops = workspaceDirNodeOps; + root.stream_ops = workspaceDirStreamOps; + return root; + }, + }; + + function mountVfsAt(guestPath) { + try { + FS.mkdir(guestPath); + } catch (error) { + if (!(error instanceof FS.ErrnoError) || error.errno !== ERRNO_CODES.EEXIST) { + throw error; + } + } + FS.mount(overlayBackend, {}, guestPath); + } + + // Mount the kernel VFS over the VM's real top-level directories so Python sees + // the whole guest filesystem — `/tmp`, `/etc`, `/root`, `/usr`, … — exactly + // like the JS/WASM runtimes and `vm.readFile()`. Pyodide owns a handful of + // paths in its own in-isolate FS; keep those on MEMFS so the interpreter, its + // stdlib, and its devices keep working. + const RESERVED_ROOTS = new Set([ + 'lib', + 'dev', + 'proc', + 'home', + '__agentos_pyodide', + '__agentos_pyodide_cache', + ]); + let rootEntries = []; try { - FS.mkdir('/workspace'); + rootEntries = bridge.fsReaddirSync('/'); } catch (error) { - if (!(error instanceof FS.ErrnoError) || error.errno !== ERRNO_CODES.EEXIST) { - throw error; + // A nested Python child can't reach the kernel VFS (it gets a recoverable + // "unavailable" error and falls back to the in-isolate FS) — that case is + // expected and quiet. Any other failure means the top-level process lost the + // VM root, which is worth surfacing. + if (!/not available for nested child/.test(String(error?.message ?? error))) { + writeStream( + process.stderr, + `agentos: could not bridge the VM filesystem into Python (${formatError(error)}); only /workspace will be visible\n`, + ); } + rootEntries = []; + } + for (const name of rootEntries) { + if (RESERVED_ROOTS.has(name)) { + continue; + } + const childPath = `/${name}`; + let isDir = false; + try { + isDir = Boolean(bridge.fsStatSync(childPath)?.isDirectory); + } catch { + isDir = false; + } + if (!isDir) { + continue; + } + try { + mountVfsAt(childPath); + } catch { + // A path Pyodide owns or cannot shadow — skip it rather than abort boot. + } + } + // /workspace stays available for backward compatibility even if the VM root + // does not advertise it. + if (!rootEntries.includes('workspace')) { + mountVfsAt('/workspace'); } - - FS.mount( - { - mount(mount) { - const root = MEMFS.mount(mount); - root.agentOSGuestPath = mount.mountpoint; - root.agentOSDirty = false; - root.agentOSLoaded = true; - root.agentOSRemoteSize = 0; - root.node_ops = workspaceDirNodeOps; - root.stream_ops = workspaceDirStreamOps; - return root; - }, - }, - {}, - '/workspace', - ); } async function readLockFileContents(indexPath, indexURL) { @@ -1815,6 +1907,180 @@ function installPythonStdin(pyodide) { }); } +function applyPythonArgv(pyodide) { + const argvJson = readRunnerEnv(PYTHON_ARGV_ENV); + if (argvJson == null) { + return; + } + let argv; + try { + argv = JSON.parse(argvJson); + } catch { + return; + } + if (!Array.isArray(argv)) { + return; + } + const normalized = argv.map((value) => String(value)); + pyodide.globals.set('__agentos_argv', pyodide.toPy(normalized)); + try { + pyodide.runPython('import sys as _agentos_sys_argv\n_agentos_sys_argv.argv = list(__agentos_argv)\ndel _agentos_sys_argv'); + } finally { + pyodide.globals.delete('__agentos_argv'); + } +} + +// Drains the guest stdin stream to EOF and returns it as text. Used for +// `python -` (and piped programs), where stdin IS the program body. +function readProgramFromStdin() { + const chunks = []; + const CHUNK = 65536; + if (bridgePythonStdinRead) { + while (true) { + const response = callBridgeSync(bridgePythonStdinRead, [CHUNK, 100]); + if (response === PYTHON_STDIN_DONE_SENTINEL) { + break; + } + const dataBase64 = typeof response === 'string' ? response : ''; + if (dataBase64.length === 0) { + continue; + } + chunks.push(Buffer.from(dataBase64, 'base64')); + } + } else if (bridgeKernelStdinRead) { + while (true) { + const response = callBridgeSync(bridgeKernelStdinRead, [CHUNK, 100]); + if (response?.done) { + break; + } + const dataBase64 = typeof response?.dataBase64 === 'string' ? response.dataBase64 : ''; + if (dataBase64.length === 0) { + continue; + } + chunks.push(Buffer.from(dataBase64, 'base64')); + } + } else { + const buffer = Buffer.alloc(CHUNK); + while (true) { + let bytesRead = 0; + try { + bytesRead = readSync(STDIN_FD, buffer, 0, buffer.length, null); + } catch { + break; + } + if (bytesRead === 0) { + break; + } + chunks.push(Buffer.from(buffer.subarray(0, bytesRead))); + } + } + return Buffer.concat(chunks).toString('utf8'); +} + +// A persistent, kernel-VFS-backed site-packages. The default Pyodide +// site-packages lives in the per-process in-isolate MEMFS, so anything installed +// there vanishes when the process exits. This directory lives on the VM +// filesystem (via the kernel VFS), so `pip install` survives across separate +// `python` invocations and is visible to other processes — just like a real +// `site-packages`. It is prepended to `sys.path` on every boot. +const PYTHON_VFS_SITE_PACKAGES = '/root/.agentos/site-packages'; + +function installPythonVfsSitePackages(pyodide) { + if (typeof pyodide?.runPython !== 'function') { + return; + } + try { + pyodide.globals.set('__agentos_vfs_site', PYTHON_VFS_SITE_PACKAGES); + pyodide.runPython( + 'import os as _os, sys as _sys\n' + + 'try:\n' + + ' _os.makedirs(__agentos_vfs_site, exist_ok=True)\n' + + ' if __agentos_vfs_site not in _sys.path:\n' + + // Append (not prepend): the stdlib + bundled packages stay first, so + // hot imports resolve from the fast in-isolate FS and only genuinely + // pip-installed packages incur a VFS lookup, and a VFS package can't + // shadow the stdlib. + ' _sys.path.append(__agentos_vfs_site)\n' + + // Best-effort: if the VFS site-packages can't be created (e.g. a + // read-only `/root`), persistence is simply unavailable — pip still + // works in-process. Degrade quietly rather than spam stderr. + 'except OSError:\n' + + ' pass\n' + + 'finally:\n' + + ' del _os, _sys', + ); + } catch (error) { + writeStream(process.stderr, `agentos: VFS site-packages setup failed: ${formatError(error)}\n`); + } finally { + try { + pyodide.globals.delete('__agentos_vfs_site'); + } catch {} + } +} + +// `pip` / `python -m pip`: emulate the common pip CLI via Pyodide's micropip, +// which fetches wheels through the runner's kernel-backed fetch (so egress is +// governed by the VM network policy, never an ambient host fetch). Installed +// packages are copied into the persistent VFS site-packages so they survive the +// per-process interpreter and can be imported by a later `python` invocation. +async function runPythonPip(pyodide) { + pyodide.globals.set('__agentos_vfs_site', PYTHON_VFS_SITE_PACKAGES); + try { + await pyodide.runPythonAsync(` +import os, shutil, site, sys +_agentos_pip_args = sys.argv[1:] +if _agentos_pip_args and _agentos_pip_args[0] == "install": + import micropip + _agentos_pip_pkgs = [a for a in _agentos_pip_args[1:] if not a.startswith("-")] + if not _agentos_pip_pkgs: + print("ERROR: You must give at least one requirement to install", file=sys.stderr) + sys.exit(1) + _agentos_sp = site.getsitepackages()[0] + _agentos_before = set(os.listdir(_agentos_sp)) if os.path.isdir(_agentos_sp) else set() + await micropip.install(_agentos_pip_pkgs) + # Persist whatever micropip extracted into the in-isolate site-packages into + # the VFS-backed site-packages so it survives this process. + os.makedirs(__agentos_vfs_site, exist_ok=True) + _agentos_after = set(os.listdir(_agentos_sp)) if os.path.isdir(_agentos_sp) else set() + for _agentos_name in sorted(_agentos_after - _agentos_before): + _agentos_src = os.path.join(_agentos_sp, _agentos_name) + _agentos_dst = os.path.join(__agentos_vfs_site, _agentos_name) + if os.path.isdir(_agentos_src): + shutil.copytree(_agentos_src, _agentos_dst, dirs_exist_ok=True) + else: + shutil.copy2(_agentos_src, _agentos_dst) + print("Successfully installed " + " ".join(_agentos_pip_pkgs)) +elif _agentos_pip_args and _agentos_pip_args[0] in ("--version", "-V", "version"): + print("pip (agentOS micropip shim)") +elif _agentos_pip_args and _agentos_pip_args[0] == "list": + import micropip + for _agentos_pkg in sorted(micropip.list()): + print(_agentos_pkg) +else: + print("usage: pip install [ ...]", file=sys.stderr) + sys.exit(2) +`); + } finally { + try { + pyodide.globals.delete('__agentos_vfs_site'); + } catch {} + } +} + +// Minimal interactive REPL backed by the kernel stdin stream (sys.stdin via +// setStdin). Prompts use the standard PS1/PS2; EOF on stdin ends the session. +async function runPythonRepl(pyodide) { + await pyodide.runPythonAsync(` +import sys +from code import InteractiveConsole +if not hasattr(sys, "ps1"): + sys.ps1 = ">>> " +if not hasattr(sys, "ps2"): + sys.ps2 = "... " +InteractiveConsole(locals={"__name__": "__main__", "__doc__": None}).interact(banner="", exitmsg="") +`); +} + function resolvePythonSource(pyodide) { const filePath = readRunnerEnv(PYTHON_FILE_ENV); if (filePath != null) { @@ -1909,6 +2175,7 @@ try { installPythonStdin(pyodide); installPythonWorkspaceFs(pyodide, pythonVfsRpcBridge); + installPythonVfsSitePackages(pyodide); installPythonGuestLoaderHooks(); if (pyodide?._api?.config) { pyodide._api.config.packageBaseUrl = bundledPackageBaseUrl; @@ -1939,7 +2206,19 @@ try { installPythonGuestProcessHardening(); installPythonGuestImportBlocklist(pyodide); installPythonRuntimeEnv(pyodide); - const source = readRunnerEnv(PYTHON_FILE_ENV) != null ? 'file' : 'inline'; + applyPythonArgv(pyodide); + const moduleName = readRunnerEnv(PYTHON_MODULE_ENV); + const stdinProgram = readRunnerEnv(PYTHON_STDIN_PROGRAM_ENV) === '1'; + const interactive = readRunnerEnv(PYTHON_INTERACTIVE_ENV) === '1'; + const source = moduleName + ? `module:${moduleName}` + : stdinProgram + ? 'stdin' + : interactive + ? 'repl' + : readRunnerEnv(PYTHON_FILE_ENV) != null + ? 'file' + : 'inline'; emitPythonStartupMetrics({ prewarmOnly: false, startupMs: realPerformance.now() - startupStarted, @@ -1948,8 +2227,24 @@ try { packageCount: preloadPackages.length, source, }); - const code = resolvePythonSource(pyodide); - await pyodide.runPythonAsync(code); + if (moduleName === 'pip') { + await runPythonPip(pyodide); + } else if (moduleName) { + pyodide.globals.set('__agentos_module', moduleName); + try { + await pyodide.runPythonAsync( + 'import runpy\nrunpy.run_module(__agentos_module, run_name="__main__", alter_sys=True)', + ); + } finally { + pyodide.globals.delete('__agentos_module'); + } + } else if (stdinProgram) { + await pyodide.runPythonAsync(readProgramFromStdin()); + } else if (interactive) { + await runPythonRepl(pyodide); + } else { + await pyodide.runPythonAsync(resolvePythonSource(pyodide)); + } } } catch (error) { writeStream(process.stderr, formatError(error)); diff --git a/crates/execution/src/python.rs b/crates/execution/src/python.rs index e08a6b9c..45fb6413 100644 --- a/crates/execution/src/python.rs +++ b/crates/execution/src/python.rs @@ -50,6 +50,9 @@ pub enum PythonVfsRpcMethod { Stat, ReadDir, Mkdir, + Unlink, + Rmdir, + Rename, HttpRequest, DnsLookup, SubprocessRun, @@ -63,6 +66,9 @@ impl PythonVfsRpcMethod { "fsStat" => Some(Self::Stat), "fsReaddir" => Some(Self::ReadDir), "fsMkdir" => Some(Self::Mkdir), + "fsUnlink" => Some(Self::Unlink), + "fsRmdir" => Some(Self::Rmdir), + "fsRename" => Some(Self::Rename), "httpRequest" => Some(Self::HttpRequest), "dnsLookup" => Some(Self::DnsLookup), "subprocessRun" => Some(Self::SubprocessRun), @@ -76,6 +82,8 @@ pub struct PythonVfsRpcRequest { pub id: u64, pub method: PythonVfsRpcMethod, pub path: String, + /// Second path for `Rename` (the destination); `None` for other methods. + pub destination: Option, pub content_base64: Option, pub recursive: bool, pub url: Option, @@ -137,6 +145,8 @@ struct PythonVfsBridgeRequestWire { #[serde(default)] path: String, #[serde(default)] + destination: Option, + #[serde(default)] content_base64: Option, #[serde(default)] recursive: bool, @@ -1176,6 +1186,7 @@ fn parse_python_bridge_sync_rpc_request( id: request.id, method, path: wire.path, + destination: wire.destination, content_base64: wire.content_base64, recursive: wire.recursive, url: wire.url, diff --git a/crates/sidecar/src/execution.rs b/crates/sidecar/src/execution.rs index 48cf267b..8f0c2c69 100644 --- a/crates/sidecar/src/execution.rs +++ b/crates/sidecar/src/execution.rs @@ -3198,7 +3198,15 @@ where (ActiveExecution::Javascript(execution), env.clone()) } GuestRuntimeKind::Python => { - let python_file_path = python_file_entrypoint(&resolved.entrypoint); + // The `python` command path (marked by AGENTOS_PYTHON_ARGV) is + // explicit about file mode via AGENTOS_PYTHON_FILE, so a `-c` code + // string that happens to end in `.py` is never mistaken for a path. + // The low-level execute API keeps the `.py`-suffix heuristic. + let python_file_path = if resolved.env.contains_key("AGENTOS_PYTHON_ARGV") { + resolved.env.get("AGENTOS_PYTHON_FILE").map(PathBuf::from) + } else { + python_file_entrypoint(&resolved.entrypoint) + }; let pyodide_dist_path = self .python_engine .bundled_pyodide_dist_path_for_vm(&vm_id) @@ -4664,7 +4672,10 @@ where | PythonVfsRpcMethod::Write | PythonVfsRpcMethod::Stat | PythonVfsRpcMethod::ReadDir - | PythonVfsRpcMethod::Mkdir => { + | PythonVfsRpcMethod::Mkdir + | PythonVfsRpcMethod::Unlink + | PythonVfsRpcMethod::Rmdir + | PythonVfsRpcMethod::Rename => { filesystem_handle_python_vfs_rpc_request(self, vm_id, process_id, request) } PythonVfsRpcMethod::HttpRequest => { @@ -5272,10 +5283,15 @@ where }); } - if command == PYTHON_COMMAND { - return Err(SidecarError::InvalidState(String::from( - "nested python child_process execution is not supported yet", - ))); + if is_python_runtime_command(&command) { + return resolve_python_command_execution( + vm, + &command, + &process_args, + env, + guest_cwd, + host_cwd, + ); } let guest_entrypoint = resolve_guest_command_entrypoint( @@ -5432,9 +5448,7 @@ where let kernel_command = match resolved.runtime { GuestRuntimeKind::JavaScript => JAVASCRIPT_COMMAND, GuestRuntimeKind::WebAssembly => WASM_COMMAND, - GuestRuntimeKind::Python => { - unreachable!("python child_process execution is rejected") - } + GuestRuntimeKind::Python => PYTHON_COMMAND, }; let kernel_handle = vm .kernel @@ -5551,7 +5565,75 @@ where ActiveExecution::Wasm(Box::new(execution)) } GuestRuntimeKind::Python => { - unreachable!("python child_process execution is rejected") + // Nested `python` child_process: set up the Pyodide context the + // same way the top-level execute path does, so a guest shell or + // node parent can spawn `python` exactly like `node`. + let python_file_path = if execution_env.contains_key("AGENTOS_PYTHON_ARGV") { + execution_env.get("AGENTOS_PYTHON_FILE").map(PathBuf::from) + } else { + python_file_entrypoint(&resolved.entrypoint) + }; + let pyodide_dist_path = self + .python_engine + .bundled_pyodide_dist_path_for_vm(vm_id) + .map_err(python_error)?; + let pyodide_cache_path = pyodide_dist_path + .parent() + .and_then(Path::parent) + .unwrap_or(pyodide_dist_path.as_path()) + .join("pyodide-package-cache"); + add_runtime_guest_path_mapping( + &mut execution_env, + PYTHON_PYODIDE_GUEST_ROOT, + &pyodide_dist_path, + ); + add_runtime_guest_path_mapping( + &mut execution_env, + PYTHON_PYODIDE_CACHE_GUEST_ROOT, + &pyodide_cache_path, + ); + add_runtime_host_access_path( + &mut execution_env, + "AGENTOS_EXTRA_FS_READ_PATHS", + &pyodide_dist_path, + true, + ); + add_runtime_host_access_path( + &mut execution_env, + "AGENTOS_EXTRA_FS_READ_PATHS", + &pyodide_cache_path, + true, + ); + add_runtime_host_access_path( + &mut execution_env, + "AGENTOS_EXTRA_FS_WRITE_PATHS", + &pyodide_cache_path, + false, + ); + let context = self + .python_engine + .create_context(CreatePythonContextRequest { + vm_id: vm_id.to_owned(), + pyodide_dist_path, + }); + let execution = self + .python_engine + .start_execution(StartPythonExecutionRequest { + vm_id: vm_id.to_owned(), + context_id: context.context_id, + code: resolved.entrypoint.clone(), + file_path: python_file_path, + env: execution_env, + cwd: resolved.host_cwd.clone(), + limits: python_execution_limits(vm), + guest_runtime: guest_runtime_identity( + vm, + Some(u64::from(kernel_pid)), + Some(u64::from(parent_kernel_pid)), + ), + }) + .map_err(python_error)?; + ActiveExecution::Python(execution) } }; let kernel_stdin_writer_fd = match javascript_child_process_stdin_mode(&request) { @@ -5832,9 +5914,7 @@ where let kernel_command = match resolved.runtime { GuestRuntimeKind::JavaScript => JAVASCRIPT_COMMAND, GuestRuntimeKind::WebAssembly => WASM_COMMAND, - GuestRuntimeKind::Python => { - unreachable!("python child_process execution is rejected") - } + GuestRuntimeKind::Python => PYTHON_COMMAND, }; let kernel_handle = vm .kernel @@ -5950,7 +6030,75 @@ where ActiveExecution::Wasm(Box::new(execution)) } GuestRuntimeKind::Python => { - unreachable!("python child_process execution is rejected") + // Nested `python` child_process: set up the Pyodide context the + // same way the top-level execute path does, so a guest shell or + // node parent can spawn `python` exactly like `node`. + let python_file_path = if execution_env.contains_key("AGENTOS_PYTHON_ARGV") { + execution_env.get("AGENTOS_PYTHON_FILE").map(PathBuf::from) + } else { + python_file_entrypoint(&resolved.entrypoint) + }; + let pyodide_dist_path = self + .python_engine + .bundled_pyodide_dist_path_for_vm(vm_id) + .map_err(python_error)?; + let pyodide_cache_path = pyodide_dist_path + .parent() + .and_then(Path::parent) + .unwrap_or(pyodide_dist_path.as_path()) + .join("pyodide-package-cache"); + add_runtime_guest_path_mapping( + &mut execution_env, + PYTHON_PYODIDE_GUEST_ROOT, + &pyodide_dist_path, + ); + add_runtime_guest_path_mapping( + &mut execution_env, + PYTHON_PYODIDE_CACHE_GUEST_ROOT, + &pyodide_cache_path, + ); + add_runtime_host_access_path( + &mut execution_env, + "AGENTOS_EXTRA_FS_READ_PATHS", + &pyodide_dist_path, + true, + ); + add_runtime_host_access_path( + &mut execution_env, + "AGENTOS_EXTRA_FS_READ_PATHS", + &pyodide_cache_path, + true, + ); + add_runtime_host_access_path( + &mut execution_env, + "AGENTOS_EXTRA_FS_WRITE_PATHS", + &pyodide_cache_path, + false, + ); + let context = self + .python_engine + .create_context(CreatePythonContextRequest { + vm_id: vm_id.to_owned(), + pyodide_dist_path, + }); + let execution = self + .python_engine + .start_execution(StartPythonExecutionRequest { + vm_id: vm_id.to_owned(), + context_id: context.context_id, + code: resolved.entrypoint.clone(), + file_path: python_file_path, + env: execution_env, + cwd: resolved.host_cwd.clone(), + limits: python_execution_limits(vm), + guest_runtime: guest_runtime_identity( + vm, + Some(u64::from(kernel_pid)), + Some(u64::from(parent_kernel_pid)), + ), + }) + .map_err(python_error)?; + ActiveExecution::Python(execution) } }; let kernel_stdin_writer_fd = match javascript_child_process_stdin_mode(&request) { @@ -6594,10 +6742,35 @@ where return Ok(event); } } - ActiveExecutionEvent::PythonVfsRpcRequest(_) => { - return Err(SidecarError::InvalidState(String::from( - "nested Python child_process execution is not supported yet", - ))); + ActiveExecutionEvent::PythonVfsRpcRequest(request) => { + // The kernel-VFS bridge is wired for top-level Python + // executions; a nested Python child (spawned by a JS/Python + // parent) cannot service VFS RPCs through this child-event + // path. Respond with a recoverable error instead of aborting + // the child, so its runner falls back to the in-isolate FS + // for the nested process — top-level Python keeps the full + // VFS root. + let Some(vm) = self.vms.get_mut(vm_id) else { + return Ok(Value::Null); + }; + let Some(parent) = + Self::descendant_parent_process_mut(vm, process_id, current_process_path) + else { + return Ok(Value::Null); + }; + let Some(child) = parent.child_processes.get_mut(child_process_id) else { + return Ok(Value::Null); + }; + // Best-effort: deliver the "unavailable" error so the child's + // pending VFS RPC resolves and its runner falls back to the + // in-isolate FS. If delivery fails the child has already gone + // away (broken pipe / no-longer-pending), so dropping the + // result is correct here — there is nothing left to hang. + let _ = child.execution.respond_python_vfs_rpc_error( + request.id, + "ERR_AGENTOS_PYTHON_VFS_UNAVAILABLE", + "python VFS is not available for nested child processes", + ); } ActiveExecutionEvent::SignalState { signal, @@ -7575,6 +7748,10 @@ fn resolve_command_execution( }); } + if is_python_runtime_command(command) { + return resolve_python_command_execution(vm, command, &args, env, guest_cwd, host_cwd); + } + if is_node_runtime_command(command) { if let Some(cli) = resolve_host_node_cli_entrypoint(command) { env.insert( @@ -8522,6 +8699,142 @@ fn is_node_runtime_command(command: &str) -> bool { .is_some_and(|name| matches!(name, "node" | "npm" | "npx")) } +fn python_command_base_name(command: &str) -> &str { + Path::new(command) + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or(command) +} + +/// `python` / `python3` (and `pip` / `pip3`, which map to `python -m pip`) are +/// served by the embedded Pyodide runtime, mirroring how `node` is served by the +/// embedded V8 runtime. +fn is_python_runtime_command(command: &str) -> bool { + matches!( + python_command_base_name(command), + "python" | "python3" | "pip" | "pip3" + ) +} + +/// Parse a `python` / `pip` command line into a Pyodide execution. Supports the +/// CPython program selectors `-c CODE`, `-m MODULE`, a `SCRIPT` path, `-` / +/// piped stdin programs, and a bare interpreter (interactive REPL). The chosen +/// mode plus `sys.argv` are forwarded to the runner as `AGENTOS_PYTHON_*` control +/// env, which the runner consumes and never exposes in the guest `os.environ`. +fn resolve_python_command_execution( + vm: &VmState, + command: &str, + args: &[String], + mut env: BTreeMap, + guest_cwd: String, + host_cwd: PathBuf, +) -> Result { + let base_name = python_command_base_name(command); + let is_pip = matches!(base_name, "pip" | "pip3"); + + let mut entrypoint = String::new(); + let mut argv: Vec = Vec::new(); + let mut module: Option = None; + let mut stdin_program = false; + let mut interactive = false; + let mut guest_entrypoint: Option = None; + + if is_pip { + module = Some(String::from("pip")); + argv.push(String::from("pip")); + argv.extend(args.iter().cloned()); + } else { + // Skip the value-less interpreter flags we can safely ignore so they do + // not get mistaken for a script path. + let mut idx = 0; + while let Some(flag) = args.get(idx) { + match flag.as_str() { + "-B" | "-E" | "-I" | "-O" | "-OO" | "-q" | "-s" | "-S" | "-u" | "-v" | "-b" + | "-d" | "-x" => idx += 1, + _ => break, + } + } + let rest = &args[idx..]; + match rest.first().map(String::as_str) { + Some("-c") => { + entrypoint = rest.get(1).cloned().ok_or_else(|| { + SidecarError::InvalidState(String::from("argument expected for the -c option")) + })?; + argv.push(String::from("-c")); + argv.extend(rest.iter().skip(2).cloned()); + } + Some("-m") => { + let name = rest.get(1).cloned().ok_or_else(|| { + SidecarError::InvalidState(String::from("argument expected for the -m option")) + })?; + module = Some(name); + argv.push(String::from("-m")); + argv.extend(rest.iter().skip(2).cloned()); + } + Some("-") => { + stdin_program = true; + argv.push(String::from("-")); + argv.extend(rest.iter().skip(1).cloned()); + } + Some(spec) if !spec.starts_with('-') => { + let resolved_guest = guest_entrypoint_for_specifier(&guest_cwd, spec) + .unwrap_or_else(|| spec.to_string()); + entrypoint = resolved_guest.clone(); + env.insert(String::from("AGENTOS_PYTHON_FILE"), resolved_guest.clone()); + guest_entrypoint = Some(resolved_guest); + argv.push(spec.to_string()); + argv.extend(rest.iter().skip(1).cloned()); + } + Some(other) => { + return Err(SidecarError::InvalidState(format!( + "unsupported python option: {other}" + ))); + } + None => { + interactive = true; + argv.push(String::new()); + } + } + } + + env.insert( + String::from("AGENTOS_PYTHON_ARGV"), + serde_json::to_string(&argv).unwrap_or_else(|_| String::from("[]")), + ); + if let Some(module) = &module { + env.insert(String::from("AGENTOS_PYTHON_MODULE"), module.clone()); + } + if stdin_program { + env.insert( + String::from("AGENTOS_PYTHON_STDIN_PROGRAM"), + String::from("1"), + ); + } + if interactive { + env.insert( + String::from("AGENTOS_PYTHON_INTERACTIVE"), + String::from("1"), + ); + } + + prepare_guest_runtime_env(vm, &mut env, &guest_cwd, &host_cwd, guest_entrypoint)?; + + Ok(ResolvedChildProcessExecution { + command: String::from(PYTHON_COMMAND), + process_args: std::iter::once(command.to_owned()) + .chain(args.iter().cloned()) + .collect(), + runtime: GuestRuntimeKind::Python, + entrypoint, + execution_args: args.to_vec(), + env, + guest_cwd, + host_cwd, + wasm_permission_tier: None, + tool_command: false, + }) +} + fn resolve_special_node_cli_invocation( args: &[String], env: &mut BTreeMap, diff --git a/crates/sidecar/src/filesystem.rs b/crates/sidecar/src/filesystem.rs index 9243ae50..3fe0361b 100644 --- a/crates/sidecar/src/filesystem.rs +++ b/crates/sidecar/src/filesystem.rs @@ -786,6 +786,38 @@ where .mkdir(&path, request.recursive) .map(|()| PythonVfsRpcResponsePayload::Empty) .map_err(kernel_error), + // Mirror the delete/rename into the host-side shadow too, the + // same way the wire `GuestFilesystemOperation` handlers do — + // otherwise a later shadow→kernel sync would resurrect the + // entry the guest just removed. + PythonVfsRpcMethod::Unlink => { + match vm.kernel.remove_file(&path).map_err(kernel_error) { + Ok(()) => remove_guest_shadow_path(vm, &path) + .map(|()| PythonVfsRpcResponsePayload::Empty), + Err(error) => Err(error), + } + } + PythonVfsRpcMethod::Rmdir => { + match vm.kernel.remove_dir(&path).map_err(kernel_error) { + Ok(()) => remove_guest_shadow_path(vm, &path) + .map(|()| PythonVfsRpcResponsePayload::Empty), + Err(error) => Err(error), + } + } + PythonVfsRpcMethod::Rename => { + let destination = request.destination.as_deref().ok_or_else(|| { + SidecarError::InvalidState(format!( + "python VFS fsRename for {} requires destination", + path + )) + })?; + let destination = normalize_python_vfs_rpc_path(destination)?; + match vm.kernel.rename(&path, &destination).map_err(kernel_error) { + Ok(()) => rename_guest_shadow_path(vm, &path, &destination) + .map(|()| PythonVfsRpcResponsePayload::Empty), + Err(error) => Err(error), + } + } PythonVfsRpcMethod::HttpRequest | PythonVfsRpcMethod::DnsLookup | PythonVfsRpcMethod::SubprocessRun => { @@ -860,16 +892,13 @@ pub(crate) fn normalize_python_vfs_rpc_path(path: &str) -> Result Result, path: &str, diff --git a/crates/sidecar/tests/python.rs b/crates/sidecar/tests/python.rs index faed5853..ab05430d 100644 --- a/crates/sidecar/tests/python.rs +++ b/crates/sidecar/tests/python.rs @@ -206,7 +206,9 @@ fn spawn_static_file_server(root: PathBuf) -> (u16, thread::JoinHandle<()>) { .expect("set nonblocking listener"); let port = listener.local_addr().expect("listener address").port(); let handle = thread::spawn(move || { - let deadline = Instant::now() + Duration::from_secs(15); + // Generous windows so a slow/contended Pyodide boot (and micropip's + // index-then-wheel fetch gap) still lands inside the server's lifetime. + let deadline = Instant::now() + Duration::from_secs(120); let mut served_any = false; let mut idle_since: Option = None; while Instant::now() < deadline { @@ -245,7 +247,7 @@ fn spawn_static_file_server(root: PathBuf) -> (u16, thread::JoinHandle<()>) { Err(error) if error.kind() == std::io::ErrorKind::WouldBlock => { if served_any { match idle_since { - Some(start) if start.elapsed() >= Duration::from_secs(5) => break, + Some(start) if start.elapsed() >= Duration::from_secs(20) => break, Some(_) => {} None => idle_since = Some(Instant::now()), } @@ -587,6 +589,42 @@ fn guest_read_file_utf8( response.content.expect("guest filesystem read content") } +fn guest_exists( + sidecar: &mut secure_exec_sidecar::NativeSidecar, + request_id: RequestId, + connection_id: &str, + session_id: &str, + vm_id: &str, + path: &str, +) -> bool { + let response = guest_filesystem_call( + sidecar, + request_id, + connection_id, + session_id, + vm_id, + GuestFilesystemCallRequest { + operation: GuestFilesystemOperation::Exists, + path: path.to_owned(), + destination_path: None, + target: None, + content: None, + encoding: None, + recursive: false, + mode: None, + uid: None, + gid: None, + atime_ms: None, + mtime_ms: None, + len: None, + offset: None, + }, + ); + + assert_eq!(response.operation, GuestFilesystemOperation::Exists); + response.exists.expect("guest filesystem exists flag") +} + fn write_process_stdin( sidecar: &mut secure_exec_sidecar::NativeSidecar, request_id: RequestId, @@ -1158,6 +1196,148 @@ print(json.dumps({ assert_eq!(python_written, "from python"); } +#[test] +fn python_runtime_supports_file_delete_and_rename() { + assert_node_available(); + + let mut sidecar = new_sidecar("python-file-ops"); + let cwd = temp_dir("python-file-ops-cwd"); + let connection_id = authenticate_wire(&mut sidecar, "conn-python"); + let session_id = open_session_wire(&mut sidecar, 2, &connection_id); + let (vm_id, _) = create_vm_wire( + &mut sidecar, + 3, + &connection_id, + &session_id, + GuestRuntimeKind::Python, + &cwd, + ); + + bootstrap_root_filesystem( + &mut sidecar, + 4, + &connection_id, + &session_id, + &vm_id, + vec![root_dir("/workspace")], + ); + // Seed via the kernel so delete/rename exercise host-backed VFS entries. + guest_write_file_utf8( + &mut sidecar, + 5, + &connection_id, + &session_id, + &vm_id, + "/workspace/seed.txt", + "seed", + ); + + execute_inline_python( + &mut sidecar, + 6, + &connection_id, + &session_id, + &vm_id, + "proc-python-file-ops", + r#" +import json +import os + +results = {} + +# delete a file +os.remove("/workspace/seed.txt") +results["seed_exists"] = os.path.exists("/workspace/seed.txt") + +# create then remove a directory +os.mkdir("/workspace/subdir") +results["subdir_made"] = os.path.isdir("/workspace/subdir") +os.rmdir("/workspace/subdir") +results["subdir_exists"] = os.path.exists("/workspace/subdir") + +# rename a file +with open("/workspace/old.txt", "w", encoding="utf-8") as handle: + handle.write("renamed body") +os.rename("/workspace/old.txt", "/workspace/new.txt") +results["old_exists"] = os.path.exists("/workspace/old.txt") +with open("/workspace/new.txt", "r", encoding="utf-8") as handle: + results["new_body"] = handle.read() + +results["entries"] = sorted(os.listdir("/workspace")) +print(json.dumps(results)) +"#, + ); + + let (stdout, stderr, exit_code) = collect_process_output( + &mut sidecar, + &connection_id, + &session_id, + &vm_id, + "proc-python-file-ops", + ); + + assert_eq!(exit_code, 0, "stdout: {stdout}\nstderr: {stderr}"); + assert!(stderr.is_empty(), "unexpected stderr: {stderr}"); + + let parsed: Value = serde_json::from_str(stdout.trim()).expect("parse file-ops JSON"); + assert_eq!(parsed["seed_exists"], false, "seed.txt should be deleted"); + assert_eq!(parsed["subdir_made"], true); + assert_eq!(parsed["subdir_exists"], false, "subdir should be removed"); + assert_eq!( + parsed["old_exists"], false, + "old.txt should be renamed away" + ); + assert_eq!(parsed["new_body"], "renamed body"); + assert_eq!(parsed["entries"], serde_json::json!(["new.txt"])); + + // Cross-check the HOST kernel VFS reflects the deletes/rename. + assert!( + !guest_exists( + &mut sidecar, + 7, + &connection_id, + &session_id, + &vm_id, + "/workspace/seed.txt" + ), + "host VFS should not see deleted seed.txt" + ); + assert!( + !guest_exists( + &mut sidecar, + 8, + &connection_id, + &session_id, + &vm_id, + "/workspace/old.txt" + ), + "host VFS should not see renamed-away old.txt" + ); + assert!( + !guest_exists( + &mut sidecar, + 9, + &connection_id, + &session_id, + &vm_id, + "/workspace/subdir" + ), + "host VFS should not see removed subdir" + ); + let new_body = guest_read_file_utf8( + &mut sidecar, + 10, + &connection_id, + &session_id, + &vm_id, + "/workspace/new.txt", + ); + assert_eq!( + new_body, "renamed body", + "host VFS should see the renamed file" + ); +} + fn workspace_files_are_shared_between_javascript_and_python_runtimes() { assert_node_available(); @@ -2670,6 +2850,645 @@ print(json.dumps(result)) ); } +fn execute_python_cli( + sidecar: &mut secure_exec_sidecar::NativeSidecar, + request_id: RequestId, + connection_id: &str, + session_id: &str, + vm_id: &str, + process_id: &str, + command: &str, + args: &[&str], +) { + let result = sidecar + .dispatch_wire_blocking(wire_request( + request_id, + wire_vm(connection_id, session_id, vm_id), + RequestPayload::ExecuteRequest(ExecuteRequest { + process_id: process_id.to_owned(), + command: Some(command.to_owned()), + runtime: None, + entrypoint: None, + args: args.iter().map(|arg| (*arg).to_string()).collect(), + env: HashMap::new(), + cwd: None, + wasm_permission_tier: None, + }), + )) + .expect("start python CLI execution through wire"); + + match result.response.payload { + ResponsePayload::ProcessStartedResponse(response) => { + assert_eq!(response.process_id, process_id); + } + other => panic!("unexpected wire execute response: {other:?}"), + } +} + +fn execute_python_cli_with_env( + sidecar: &mut secure_exec_sidecar::NativeSidecar, + request_id: RequestId, + connection_id: &str, + session_id: &str, + vm_id: &str, + process_id: &str, + command: &str, + args: &[&str], + env: HashMap, +) { + let result = sidecar + .dispatch_wire_blocking(wire_request( + request_id, + wire_vm(connection_id, session_id, vm_id), + RequestPayload::ExecuteRequest(ExecuteRequest { + process_id: process_id.to_owned(), + command: Some(command.to_owned()), + runtime: None, + entrypoint: None, + args: args.iter().map(|arg| (*arg).to_string()).collect(), + env, + cwd: None, + wasm_permission_tier: None, + }), + )) + .expect("start python CLI execution through wire"); + + match result.response.payload { + ResponsePayload::ProcessStartedResponse(response) => { + assert_eq!(response.process_id, process_id); + } + other => panic!("unexpected wire execute response: {other:?}"), + } +} + +fn python_command_pip_installs_via_micropip() { + assert_node_available(); + + let (port, server) = spawn_static_file_server(pyodide_asset_dir()); + let mut sidecar = new_sidecar("python-cli-pip"); + let cwd = temp_dir("python-cli-pip-cwd"); + let connection_id = authenticate_wire(&mut sidecar, "conn-python"); + let session_id = open_session_wire(&mut sidecar, 2, &connection_id); + let vm_id = create_vm_with_metadata_and_permissions( + &mut sidecar, + 3, + &connection_id, + &session_id, + GuestRuntimeKind::Python, + &cwd, + HashMap::from([( + String::from("env.AGENTOS_LOOPBACK_EXEMPT_PORTS"), + serde_json::to_string(&vec![port.to_string()]).expect("serialize exempt ports"), + )]), + wire_permissions_allow_all(), + ); + + execute_python_cli_with_env( + &mut sidecar, + 4, + &connection_id, + &session_id, + &vm_id, + "proc-py-pip", + "pip", + &[ + "install", + &format!("http://127.0.0.1:{port}/click-8.3.1-py3-none-any.whl"), + ], + HashMap::from([( + String::from("AGENTOS_PYODIDE_PACKAGE_BASE_URL"), + format!("http://127.0.0.1:{port}/"), + )]), + ); + + let (stdout, stderr, exit_code) = collect_process_output_with_timeout( + &mut sidecar, + &connection_id, + &session_id, + &vm_id, + "proc-py-pip", + Duration::from_secs(90), + ); + let _ = server.join(); + assert_eq!(exit_code, 0, "stdout: {stdout}\nstderr: {stderr}"); + assert!( + stdout.contains("Successfully installed"), + "stdout: {stdout}\nstderr: {stderr}" + ); +} + +fn python_command_runs_inline_code() { + assert_node_available(); + let mut sidecar = new_sidecar("python-cli-inline"); + let cwd = temp_dir("python-cli-inline-cwd"); + let connection_id = authenticate_wire(&mut sidecar, "conn-python"); + let session_id = open_session_wire(&mut sidecar, 2, &connection_id); + let (vm_id, _) = create_vm_wire( + &mut sidecar, + 3, + &connection_id, + &session_id, + GuestRuntimeKind::Python, + &cwd, + ); + + execute_python_cli( + &mut sidecar, + 4, + &connection_id, + &session_id, + &vm_id, + "proc-py-c", + "python", + &["-c", "print(1 + 1)"], + ); + + let (stdout, stderr, exit_code) = collect_process_output( + &mut sidecar, + &connection_id, + &session_id, + &vm_id, + "proc-py-c", + ); + assert_eq!(exit_code, 0, "stdout: {stdout}\nstderr: {stderr}"); + assert_eq!(stdout, "2\n", "stderr: {stderr}"); +} + +fn python_command_runs_script_with_argv() { + assert_node_available(); + let mut sidecar = new_sidecar("python-cli-argv"); + let cwd = temp_dir("python-cli-argv-cwd"); + let connection_id = authenticate_wire(&mut sidecar, "conn-python"); + let session_id = open_session_wire(&mut sidecar, 2, &connection_id); + let vm_id = create_vm_with_root_filesystem( + &mut sidecar, + 3, + &connection_id, + &session_id, + GuestRuntimeKind::Python, + &cwd, + RootFilesystemDescriptor { + mode: RootFilesystemMode::Ephemeral, + disable_default_base_layer: false, + lowers: Vec::new(), + bootstrap_entries: vec![ + root_dir("/workspace"), + root_file( + "/workspace/argv.py", + "import sys\nprint(\",\".join(sys.argv))\n", + Some(RootFilesystemEntryEncoding::Utf8), + ), + ], + }, + ); + + execute_python_cli( + &mut sidecar, + 4, + &connection_id, + &session_id, + &vm_id, + "proc-py-argv", + "python", + &["/workspace/argv.py", "alpha", "beta"], + ); + + let (stdout, stderr, exit_code) = collect_process_output( + &mut sidecar, + &connection_id, + &session_id, + &vm_id, + "proc-py-argv", + ); + assert_eq!(exit_code, 0, "stdout: {stdout}\nstderr: {stderr}"); + assert_eq!( + stdout, "/workspace/argv.py,alpha,beta\n", + "stderr: {stderr}" + ); +} + +fn python_command_runs_module_with_dash_m() { + assert_node_available(); + let mut sidecar = new_sidecar("python-cli-module"); + let cwd = temp_dir("python-cli-module-cwd"); + let connection_id = authenticate_wire(&mut sidecar, "conn-python"); + let session_id = open_session_wire(&mut sidecar, 2, &connection_id); + let (vm_id, _) = create_vm_wire( + &mut sidecar, + 3, + &connection_id, + &session_id, + GuestRuntimeKind::Python, + &cwd, + ); + + execute_python_cli( + &mut sidecar, + 4, + &connection_id, + &session_id, + &vm_id, + "proc-py-m", + "python", + &["-m", "this"], + ); + + let (stdout, stderr, exit_code) = collect_process_output( + &mut sidecar, + &connection_id, + &session_id, + &vm_id, + "proc-py-m", + ); + assert_eq!(exit_code, 0, "stdout: {stdout}\nstderr: {stderr}"); + // `python -m this` runs the stdlib `this` module as __main__, printing the Zen. + assert!( + stdout.contains("Beautiful is better than ugly"), + "stdout: {stdout}\nstderr: {stderr}" + ); +} + +fn python_command_reads_program_from_stdin() { + assert_node_available(); + let mut sidecar = new_sidecar("python-cli-stdin"); + let cwd = temp_dir("python-cli-stdin-cwd"); + let connection_id = authenticate_wire(&mut sidecar, "conn-python"); + let session_id = open_session_wire(&mut sidecar, 2, &connection_id); + let (vm_id, _) = create_vm_wire( + &mut sidecar, + 3, + &connection_id, + &session_id, + GuestRuntimeKind::Python, + &cwd, + ); + + execute_python_cli( + &mut sidecar, + 4, + &connection_id, + &session_id, + &vm_id, + "proc-py-stdin", + "python", + &["-"], + ); + write_process_stdin( + &mut sidecar, + 5, + &connection_id, + &session_id, + &vm_id, + "proc-py-stdin", + "print('from stdin program')\n", + ); + close_process_stdin( + &mut sidecar, + 6, + &connection_id, + &session_id, + &vm_id, + "proc-py-stdin", + ); + + let (stdout, stderr, exit_code) = collect_process_output( + &mut sidecar, + &connection_id, + &session_id, + &vm_id, + "proc-py-stdin", + ); + assert_eq!(exit_code, 0, "stdout: {stdout}\nstderr: {stderr}"); + assert_eq!(stdout, "from stdin program\n", "stderr: {stderr}"); +} + +fn python_command_runs_interactive_repl() { + assert_node_available(); + let mut sidecar = new_sidecar("python-cli-repl"); + let cwd = temp_dir("python-cli-repl-cwd"); + let connection_id = authenticate_wire(&mut sidecar, "conn-python"); + let session_id = open_session_wire(&mut sidecar, 2, &connection_id); + let (vm_id, _) = create_vm_wire( + &mut sidecar, + 3, + &connection_id, + &session_id, + GuestRuntimeKind::Python, + &cwd, + ); + + execute_python_cli( + &mut sidecar, + 4, + &connection_id, + &session_id, + &vm_id, + "proc-py-repl", + "python", + &[], + ); + write_process_stdin( + &mut sidecar, + 5, + &connection_id, + &session_id, + &vm_id, + "proc-py-repl", + "print(6 * 7)\n", + ); + close_process_stdin( + &mut sidecar, + 6, + &connection_id, + &session_id, + &vm_id, + "proc-py-repl", + ); + + let (stdout, stderr, exit_code) = collect_process_output( + &mut sidecar, + &connection_id, + &session_id, + &vm_id, + "proc-py-repl", + ); + assert_eq!(exit_code, 0, "stdout: {stdout}\nstderr: {stderr}"); + assert!(stdout.contains("42"), "stdout: {stdout}\nstderr: {stderr}"); +} + +fn python_command_runs_as_nested_child_process() { + assert_node_available(); + let mut sidecar = new_sidecar("python-cli-nested"); + let workspace_host_dir = temp_dir("python-cli-nested-host"); + let cwd = workspace_host_dir.clone(); + let js_entry = workspace_host_dir.join("spawn.cjs"); + let connection_id = authenticate_wire(&mut sidecar, "conn-python-nested"); + let session_id = open_session_wire(&mut sidecar, 2, &connection_id); + let (vm_id, _) = create_vm_wire( + &mut sidecar, + 3, + &connection_id, + &session_id, + GuestRuntimeKind::JavaScript, + &cwd, + ); + + write_fixture( + &js_entry, + r#" +const { spawnSync } = require('node:child_process'); +const result = spawnSync('python', ['-c', 'print(2 + 3)'], { encoding: 'utf8' }); +if (result.error) { + process.stderr.write(String(result.error)); +} +process.stdout.write('status=' + result.status + ';out=' + (result.stdout || '').trim()); +"#, + ); + + bootstrap_root_filesystem( + &mut sidecar, + 4, + &connection_id, + &session_id, + &vm_id, + vec![root_dir("/workspace")], + ); + let configure = sidecar + .dispatch_wire_blocking(wire_request( + 5, + wire_vm(&connection_id, &session_id, &vm_id), + RequestPayload::ConfigureVmRequest(ConfigureVmRequest { + mounts: vec![MountDescriptor { + guest_path: String::from("/workspace"), + read_only: false, + plugin: MountPluginDescriptor { + id: String::from("host_dir"), + config: json!({ + "hostPath": workspace_host_dir.to_string_lossy().into_owned(), + "readOnly": false, + }) + .to_string(), + }, + }], + software: Vec::new(), + permissions: None, + module_access_cwd: None, + instructions: Vec::new(), + projected_modules: Vec::new(), + command_permissions: HashMap::new(), + loopback_exempt_ports: Vec::new(), + }), + )) + .expect("configure host_dir workspace mount through wire"); + match configure.response.payload { + ResponsePayload::VmConfiguredResponse(response) => { + assert_eq!(response.applied_mounts, 1); + } + other => panic!("unexpected wire configure-vm response: {other:?}"), + } + + let js_fs_env = HashMap::from([ + ( + String::from("AGENTOS_GUEST_PATH_MAPPINGS"), + json!([{ + "guestPath": "/workspace", + "hostPath": workspace_host_dir.to_string_lossy().into_owned(), + }]) + .to_string(), + ), + ( + String::from("AGENTOS_EXTRA_FS_READ_PATHS"), + json!([workspace_host_dir.to_string_lossy().into_owned()]).to_string(), + ), + ]); + + execute_javascript_with_env( + &mut sidecar, + 6, + &connection_id, + &session_id, + &vm_id, + "proc-js-spawn", + &js_entry, + Vec::new(), + js_fs_env, + ); + + let (stdout, stderr, exit_code) = collect_process_output_with_timeout( + &mut sidecar, + &connection_id, + &session_id, + &vm_id, + "proc-js-spawn", + Duration::from_secs(60), + ); + assert_eq!(exit_code, 0, "stdout: {stdout}\nstderr: {stderr}"); + assert!( + stdout.contains("status=0;out=5"), + "stdout: {stdout}\nstderr: {stderr}" + ); +} + +fn python_reads_and_writes_arbitrary_vm_paths() { + assert_node_available(); + let mut sidecar = new_sidecar("python-rootfs-rw"); + let cwd = temp_dir("python-rootfs-rw-cwd"); + let connection_id = authenticate_wire(&mut sidecar, "conn-python"); + let session_id = open_session_wire(&mut sidecar, 2, &connection_id); + let vm_id = create_vm_with_root_filesystem( + &mut sidecar, + 3, + &connection_id, + &session_id, + GuestRuntimeKind::Python, + &cwd, + RootFilesystemDescriptor { + mode: RootFilesystemMode::Ephemeral, + disable_default_base_layer: false, + lowers: Vec::new(), + bootstrap_entries: vec![root_file( + "/etc/agentos-test.txt", + "hello-from-etc\n", + Some(RootFilesystemEntryEncoding::Utf8), + )], + }, + ); + + // Read from /etc and write to /tmp — both outside the old /workspace window. + execute_inline_python( + &mut sidecar, + 4, + &connection_id, + &session_id, + &vm_id, + "proc-rw", + "data = open('/etc/agentos-test.txt').read()\nwith open('/tmp/py-out.txt', 'w') as handle:\n handle.write('written-by-python\\n')\nprint(data.strip())\n", + ); + let (stdout, stderr, exit_code) = + collect_process_output(&mut sidecar, &connection_id, &session_id, &vm_id, "proc-rw"); + assert_eq!(exit_code, 0, "stdout: {stdout}\nstderr: {stderr}"); + assert_eq!(stdout, "hello-from-etc\n", "stderr: {stderr}"); + + // A SEPARATE Python process (fresh Pyodide FS) sees /tmp/py-out.txt — proving + // the write landed in the kernel VFS, not the per-process in-memory FS. + execute_inline_python( + &mut sidecar, + 5, + &connection_id, + &session_id, + &vm_id, + "proc-reread", + "print(open('/tmp/py-out.txt').read().strip())\n", + ); + let (stdout2, stderr2, exit2) = collect_process_output( + &mut sidecar, + &connection_id, + &session_id, + &vm_id, + "proc-reread", + ); + assert_eq!(exit2, 0, "stdout: {stdout2}\nstderr: {stderr2}"); + assert_eq!(stdout2, "written-by-python\n", "stderr: {stderr2}"); +} + +fn python_pip_installs_persist_across_invocations() { + assert_node_available(); + let (port, server) = spawn_static_file_server(pyodide_asset_dir()); + let mut sidecar = new_sidecar("python-vfs-pip"); + let cwd = temp_dir("python-vfs-pip-cwd"); + let connection_id = authenticate_wire(&mut sidecar, "conn-python"); + let session_id = open_session_wire(&mut sidecar, 2, &connection_id); + let vm_id = create_vm_with_metadata_and_permissions( + &mut sidecar, + 3, + &connection_id, + &session_id, + GuestRuntimeKind::Python, + &cwd, + HashMap::from([( + String::from("env.AGENTOS_LOOPBACK_EXEMPT_PORTS"), + serde_json::to_string(&vec![port.to_string()]).expect("serialize exempt ports"), + )]), + wire_permissions_allow_all(), + ); + + // Process 1: `pip install` a wheel — the shim copies it into the VFS-backed + // site-packages so it persists past this interpreter. + execute_python_cli_with_env( + &mut sidecar, + 4, + &connection_id, + &session_id, + &vm_id, + "proc-pip-install", + "pip", + &[ + "install", + &format!("http://127.0.0.1:{port}/click-8.3.1-py3-none-any.whl"), + ], + HashMap::from([( + String::from("AGENTOS_PYODIDE_PACKAGE_BASE_URL"), + format!("http://127.0.0.1:{port}/"), + )]), + ); + let (stdout1, stderr1, exit1) = collect_process_output_with_timeout( + &mut sidecar, + &connection_id, + &session_id, + &vm_id, + "proc-pip-install", + Duration::from_secs(90), + ); + assert_eq!(exit1, 0, "stdout: {stdout1}\nstderr: {stderr1}"); + assert!( + stdout1.contains("Successfully installed"), + "stdout: {stdout1}\nstderr: {stderr1}" + ); + + // Process 2: a FRESH Python interpreter imports the package from the VFS + // site-packages — proving the install persisted across invocations. + execute_python_cli( + &mut sidecar, + 5, + &connection_id, + &session_id, + &vm_id, + "proc-pip-import", + "python", + &["-c", "import click; print(click.__version__)"], + ); + let (stdout2, stderr2, exit2) = collect_process_output_with_timeout( + &mut sidecar, + &connection_id, + &session_id, + &vm_id, + "proc-pip-import", + Duration::from_secs(60), + ); + let _ = server.join(); + assert_eq!(exit2, 0, "stdout: {stdout2}\nstderr: {stderr2}"); + assert_eq!( + stdout2.trim(), + "8.3.1", + "stdout: {stdout2}\nstderr: {stderr2}" + ); +} + +fn python_rootfs_suite() { + python_reads_and_writes_arbitrary_vm_paths(); + python_pip_installs_persist_across_invocations(); +} + +fn python_cli_suite() { + python_command_runs_inline_code(); + python_command_runs_script_with_argv(); + python_command_runs_module_with_dash_m(); + python_command_reads_program_from_stdin(); + python_command_runs_interactive_repl(); + python_command_runs_as_nested_child_process(); + python_command_pip_installs_via_micropip(); +} + #[test] fn python_suite() { // Multiple libtest cases in this V8/Pyodide-backed integration binary @@ -2698,4 +3517,6 @@ fn python_suite() { python_runtime_surfaces_network_permission_errors(); python_runtime_runs_node_subprocesses_through_sidecar_bridge(); python_runtime_surfaces_subprocess_permission_errors(); + python_cli_suite(); + python_rootfs_suite(); } diff --git a/crates/sidecar/tests/service.rs b/crates/sidecar/tests/service.rs index 7d56a560..ca640684 100644 --- a/crates/sidecar/tests/service.rs +++ b/crates/sidecar/tests/service.rs @@ -5586,6 +5586,7 @@ ykAheWCsAteSEWVc0w==\n\ id: 1, method: PythonVfsRpcMethod::Mkdir, path: String::from("/tmp/stale-python-rpc"), + destination: None, content_base64: None, recursive: false, url: None, @@ -5622,6 +5623,7 @@ ykAheWCsAteSEWVc0w==\n\ id: 2, method: PythonVfsRpcMethod::Mkdir, path: String::from("/tmp/stale-python-rpc"), + destination: None, content_base64: None, recursive: false, url: None, @@ -10113,6 +10115,7 @@ export async function loadPyodide() { id: 1, method: PythonVfsRpcMethod::Mkdir, path: String::from("/workspace"), + destination: None, content_base64: None, recursive: false, url: None, @@ -10139,6 +10142,7 @@ export async function loadPyodide() { id: 2, method: PythonVfsRpcMethod::Write, path: String::from("/workspace/note.txt"), + destination: None, content_base64: Some(String::from("aGVsbG8gZnJvbSBzaWRlY2FyIHJwYw==")), recursive: false, url: None, diff --git a/packages/core/src/sidecar-process.ts b/packages/core/src/sidecar-process.ts index a78efa90..ecae37bd 100644 --- a/packages/core/src/sidecar-process.ts +++ b/packages/core/src/sidecar-process.ts @@ -67,7 +67,7 @@ type OwnershipScope = LiveOwnershipScope; type GuestRuntimeKind = Extract< LiveGuestRuntimeKind, - "java_script" | "web_assembly" + "java_script" | "python" | "web_assembly" >; type WasmPermissionTier = LiveWasmPermissionTier; type RootFilesystemEntryEncoding = LiveRootFilesystemEntryEncoding;