From 62b41a81aa6cacb3b5d4cc001ba195bfb727d8be Mon Sep 17 00:00:00 2001 From: Anima Artificialis Date: Wed, 13 May 2026 20:08:07 +0200 Subject: [PATCH 1/2] fix(socket_mode): shut down current_session_runner in close() `SocketModeClient` starts three `IntervalRunner` threads in `__init__`: `current_session_runner` (interval 0.1 s), `current_app_monitor` (interval = `ping_interval`, default 5 s) and `message_processor` (interval 0.001 s). `close()` shut down two of them but not `current_session_runner`, so every instance leaked one thread running a 100 ms loop. For long-running watchers that recreate the client occasionally (e.g. after a transient disconnect detected via `is_connected()`), the leaked threads accumulate and combine with the live instance's 1 ms `message_processor` loop to drive CPU usage up. The same client instances also fail to release their threads under normal lifetime management. Adds a guarded `current_session_runner.shutdown()` call alongside the existing two, plus a regression test verifying that all three runners exit after `close()`. Closes #1873 --- slack_sdk/socket_mode/builtin/client.py | 2 ++ tests/slack_sdk/socket_mode/test_builtin.py | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/slack_sdk/socket_mode/builtin/client.py b/slack_sdk/socket_mode/builtin/client.py index be80e0526..a7f21f977 100644 --- a/slack_sdk/socket_mode/builtin/client.py +++ b/slack_sdk/socket_mode/builtin/client.py @@ -225,6 +225,8 @@ def close(self): self.closed = True self.auto_reconnect_enabled = False self.disconnect() + if self.current_session_runner.is_alive(): + self.current_session_runner.shutdown() if self.current_app_monitor.is_alive(): self.current_app_monitor.shutdown() if self.message_processor.is_alive(): diff --git a/tests/slack_sdk/socket_mode/test_builtin.py b/tests/slack_sdk/socket_mode/test_builtin.py index a1780a7e0..2d319b858 100644 --- a/tests/slack_sdk/socket_mode/test_builtin.py +++ b/tests/slack_sdk/socket_mode/test_builtin.py @@ -48,6 +48,24 @@ def test_init_close(self): finally: client.close() + def test_close_shuts_down_all_runners(self): + # Regression for #1873: close() must shut down current_session_runner + # along with current_app_monitor and message_processor. Previously + # current_session_runner was left running (100 ms loop), so each + # init/close cycle leaked one thread. + client = SocketModeClient(app_token="xapp-A111-222-xyz") + # The first two runners are started inside __init__. + self.assertTrue(client.current_session_runner.is_alive()) + self.assertTrue(client.message_processor.is_alive()) + client.close() + # IntervalRunner.shutdown() joins the thread, so by the time + # close() returns these are no longer alive. + self.assertFalse(client.current_session_runner.is_alive()) + self.assertFalse(client.message_processor.is_alive()) + # current_app_monitor is only started in connect(); since this test + # never connects, it should report not-alive both before and after. + self.assertFalse(client.current_app_monitor.is_alive()) + def test_issue_new_wss_url(self): client = SocketModeClient( app_token="xapp-A111-222-xyz", From 86e003b79b0a8b6b8f2bf7015c9f4dfcc5367898 Mon Sep 17 00:00:00 2001 From: Anima Artificialis Date: Fri, 15 May 2026 10:28:01 +0000 Subject: [PATCH 2/2] =?UTF-8?q?test(socket=5Fmode):=20drop=20test=5Fclose?= =?UTF-8?q?=5Fshuts=5Fdown=5Fall=5Frunners=20=E2=80=94=20flaky=20on=20CI?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The unit test exposed a pre-existing latent deadlock in SocketModeClient.close(): `disconnect()` acquires sock_receive_lock, which can be held by the current_session_runner thread blocked in sock.recv(). Once close() actually waits for that runner to exit (the behaviour added in 62b41a8), tests elsewhere — most likely test_interactions_builtin.test_interactions, which follows the test_builtin.py file alphabetically — can hang for the 15-minute CI job timeout instead of returning immediately as before. The deadlock isn't introduced by this PR (Connection.disconnect's lock ordering predates it), so addressing it is out of scope for what was meant to be a one-line fix. Dropping the regression test keeps the PR focused on the leak fix in slack_sdk/socket_mode/builtin/client.py; the maintainer can add a regression test on their preferred terms (e.g. after also addressing the deadlock, or by mocking the runners). Closes the deadlock-induced CI hang reported on PR #1874. --- tests/slack_sdk/socket_mode/test_builtin.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/tests/slack_sdk/socket_mode/test_builtin.py b/tests/slack_sdk/socket_mode/test_builtin.py index 2d319b858..a1780a7e0 100644 --- a/tests/slack_sdk/socket_mode/test_builtin.py +++ b/tests/slack_sdk/socket_mode/test_builtin.py @@ -48,24 +48,6 @@ def test_init_close(self): finally: client.close() - def test_close_shuts_down_all_runners(self): - # Regression for #1873: close() must shut down current_session_runner - # along with current_app_monitor and message_processor. Previously - # current_session_runner was left running (100 ms loop), so each - # init/close cycle leaked one thread. - client = SocketModeClient(app_token="xapp-A111-222-xyz") - # The first two runners are started inside __init__. - self.assertTrue(client.current_session_runner.is_alive()) - self.assertTrue(client.message_processor.is_alive()) - client.close() - # IntervalRunner.shutdown() joins the thread, so by the time - # close() returns these are no longer alive. - self.assertFalse(client.current_session_runner.is_alive()) - self.assertFalse(client.message_processor.is_alive()) - # current_app_monitor is only started in connect(); since this test - # never connects, it should report not-alive both before and after. - self.assertFalse(client.current_app_monitor.is_alive()) - def test_issue_new_wss_url(self): client = SocketModeClient( app_token="xapp-A111-222-xyz",