diff --git a/python/copilot/_process.py b/python/copilot/_process.py new file mode 100644 index 000000000..bdf39a4d4 --- /dev/null +++ b/python/copilot/_process.py @@ -0,0 +1,74 @@ +"""Helpers for terminating SDK-spawned CLI process trees.""" + +from __future__ import annotations + +import os +import signal +import subprocess +import sys +import time + + +def popen_process_group_kwargs() -> dict[str, bool]: + """Return Popen kwargs that isolate spawned CLI servers in their own process group.""" + if sys.platform == "win32": + return {} + return {"start_new_session": True} + + +def _terminate_windows_process_tree(pid: int, *, force: bool) -> None: + args = ["taskkill", "/T", "/PID", str(pid)] + if force: + args.insert(1, "/F") + kwargs: dict = { + "capture_output": True, + "check": False, + } + if hasattr(subprocess, "CREATE_NO_WINDOW"): + kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW + subprocess.run(args, **kwargs) + + +def terminate_owned_cli_process( + process: subprocess.Popen | None, + *, + graceful: bool = True, + timeout: float = 5.0, +) -> bool: + """Terminate an SDK-owned CLI process and its descendants. + + Returns True when the process is no longer running. + """ + if process is None: + return True + + if process.poll() is not None: + return True + + pid = process.pid + if sys.platform == "win32": + _terminate_windows_process_tree(pid, force=not graceful) + else: + sig = signal.SIGTERM if graceful else signal.SIGKILL + try: + os.killpg(os.getpgid(pid), sig) + except ProcessLookupError: + return True + except OSError: + try: + if graceful: + process.terminate() + else: + process.kill() + except OSError: + return process.poll() is not None + + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + if process.poll() is not None: + return True + time.sleep(0.05) + + if graceful: + return terminate_owned_cli_process(process, graceful=False, timeout=timeout) + return process.poll() is not None diff --git a/python/copilot/client.py b/python/copilot/client.py index c7d11d12b..d4ebb6630 100644 --- a/python/copilot/client.py +++ b/python/copilot/client.py @@ -33,6 +33,7 @@ from ._diagnostics import log_timing from ._jsonrpc import JsonRpcClient, JsonRpcError, ProcessExitedError +from ._process import popen_process_group_kwargs, terminate_owned_cli_process from ._mode import ( CopilotClientMode, ToolSet, @@ -1549,30 +1550,18 @@ async def stop(self) -> None: # never come. Once shutdown has completed (or failed) we terminate the # child immediately and only wait to reap it. if self._cli_process and not self._is_external_server: - poll = getattr(self._cli_process, "poll", None) - is_running = poll is None or poll() is None - if is_running: - self._cli_process.terminate() - try: - await asyncio.to_thread( - self._cli_process.wait, - timeout=_CLI_PROCESS_EXIT_TIMEOUT_SECONDS, + exited = await asyncio.to_thread( + terminate_owned_cli_process, + self._cli_process, + graceful=True, + timeout=_CLI_PROCESS_EXIT_TIMEOUT_SECONDS, + ) + if not exited: + errors.append( + StopError( + message="Timed out waiting for CLI process tree to exit after kill" ) - except subprocess.TimeoutExpired: - self._cli_process.kill() - try: - await asyncio.to_thread( - self._cli_process.wait, - timeout=_CLI_PROCESS_EXIT_TIMEOUT_SECONDS, - ) - except subprocess.TimeoutExpired as e: - errors.append( - StopError( - message=( - f"Timed out waiting for CLI process to exit after kill: {e}" - ) - ) - ) + ) if self._process is self._cli_process: self._process = None self._cli_process = None @@ -1618,7 +1607,12 @@ async def force_stop(self) -> None: if self._process is not None and self._process is not self._cli_process: self._process.terminate() if self._cli_process is not None: - self._cli_process.kill() + await asyncio.to_thread( + terminate_owned_cli_process, + self._cli_process, + graceful=False, + timeout=_CLI_PROCESS_EXIT_TIMEOUT_SECONDS, + ) self._process = None self._cli_process = None except Exception: @@ -3507,6 +3501,7 @@ async def _start_cli_server(self) -> None: cwd=cwd, env=env, creationflags=creationflags, + **popen_process_group_kwargs(), ) self._cli_process = self._process else: @@ -3520,6 +3515,7 @@ async def _start_cli_server(self) -> None: cwd=cwd, env=env, creationflags=creationflags, + **popen_process_group_kwargs(), ) self._cli_process = self._process log_timing( diff --git a/python/test_client.py b/python/test_client.py index f3f46c4d8..3fb3c487a 100644 --- a/python/test_client.py +++ b/python/test_client.py @@ -49,16 +49,12 @@ async def shutdown(self, *, timeout=None): client._cli_process = process client._is_external_server = False - await client.stop() + with patch("copilot.client.terminate_owned_cli_process", return_value=True) as terminate_tree: + await client.stop() assert calls == ["runtime.shutdown"] - # The runtime never self-exits after runtime.shutdown (it keeps its - # JSON-RPC server alive to send the response and leaves termination to - # the caller), so stop() terminates the owned process. The mocked - # process exits on terminate() (wait returns immediately), so we never - # escalate to kill(). - process.terminate.assert_called_once() - process.kill.assert_not_called() + terminate_tree.assert_called_once() + assert terminate_tree.call_args.kwargs["graceful"] is True @pytest.mark.asyncio async def test_force_stop_and_external_stop_do_not_request_runtime_shutdown(self): @@ -75,10 +71,12 @@ async def shutdown(self): force_client._cli_process = process force_client._is_external_server = False - await force_client.force_stop() + with patch("copilot.client.terminate_owned_cli_process", return_value=True) as terminate_tree: + await force_client.force_stop() assert calls == [] - process.kill.assert_called_once() + terminate_tree.assert_called_once() + assert terminate_tree.call_args.kwargs["graceful"] is False external_client = CopilotClient(connection=RuntimeConnection.for_uri("localhost:1234")) external_client._rpc = Mock(runtime=Runtime()) diff --git a/python/test_process.py b/python/test_process.py new file mode 100644 index 000000000..947bbec3c --- /dev/null +++ b/python/test_process.py @@ -0,0 +1,54 @@ +"""Unit tests for CLI process tree termination helpers.""" + +import signal +import sys +from unittest.mock import Mock, patch + +import pytest + +from copilot._process import popen_process_group_kwargs, terminate_owned_cli_process + + +class TestPopenProcessGroupKwargs: + def test_returns_start_new_session_on_posix(self): + with patch.object(sys, "platform", "linux"): + assert popen_process_group_kwargs() == {"start_new_session": True} + + def test_returns_empty_on_windows(self): + with patch.object(sys, "platform", "win32"): + assert popen_process_group_kwargs() == {} + + +class TestTerminateOwnedCliProcess: + def test_noop_for_none_process(self): + assert terminate_owned_cli_process(None) is True + + def test_returns_true_for_exited_process(self): + process = Mock() + process.poll.return_value = 0 + assert terminate_owned_cli_process(process) is True + + @patch("copilot._process.time.sleep", return_value=None) + @patch("copilot._process.os.killpg") + @patch("copilot._process.os.getpgid", return_value=42) + def test_posix_graceful_uses_killpg_sigterm(self, _getpgid, killpg, _sleep): + process = Mock() + process.poll.side_effect = [None, 0] + process.pid = 99 + + with patch.object(sys, "platform", "linux"): + assert terminate_owned_cli_process(process, graceful=True, timeout=1.0) is True + + killpg.assert_called_once_with(42, signal.SIGTERM) + + @patch("copilot._process.subprocess.run") + def test_windows_uses_taskkill_tree(self, run): + process = Mock() + process.poll.side_effect = [None, 0] + process.pid = 1234 + + with patch.object(sys, "platform", "win32"): + assert terminate_owned_cli_process(process, graceful=True, timeout=1.0) is True + + run.assert_called_once() + assert run.call_args.args[0][:3] == ["taskkill", "/T", "/PID"]