From f18df8657581a6d601787070d82c14b5f88f7db3 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Thu, 18 Jun 2026 16:09:35 +0200 Subject: [PATCH] Release load_regulation slots of in_use conns when a pool stops Stopping a pool while requests were in flight leaked the global per-host load_regulation slots of the checked-out connections, starving that host's concurrency cap node-wide so later requests timed out at checkout. Two causes: the pool did not trap exits, so stop_pool (supervisor:terminate_child) killed it without running terminate/2; and terminate/2 only stopped available/h2/h3 connections, never the in_use ones. Trap exits so terminate/2 runs on shutdown, and release each in_use connection's slot (and stop it) there. Add a regression test that checks the per-host count returns to baseline after a pool is stopped with a request still checked out. --- src/hackney_pool.erl | 21 ++++++++++++++++++++- test/hackney_pool_tests.erl | 22 ++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/hackney_pool.erl b/src/hackney_pool.erl index e4d831d5..d96e07a0 100644 --- a/src/hackney_pool.erl +++ b/src/hackney_pool.erl @@ -453,6 +453,11 @@ start_link(Name, Options0) -> %%==================================================================== init([Name, Options]) -> + %% Trap exits so a supervisor shutdown (stop_pool -> terminate_child) runs + %% terminate/2 instead of killing the pool outright. terminate/2 releases the + %% load_regulation slots of in_use connections; without trapping exits it + %% would be skipped and those per-host slots would leak. + process_flag(trap_exit, true), MaxConn = case proplists:get_value(pool_size, Options) of undefined -> proplists:get_value(max_connections, Options, ?DEFAULT_MAX_CONNECTIONS); @@ -801,7 +806,7 @@ handle_info(_Info, State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. -terminate(_Reason, #state{available=Available, +terminate(_Reason, #state{available=Available, in_use=InUse, h2_connections=H2Conns, h3_connections=H3Conns, pid_monitors=PidMonitors}) -> %% Stop all available connections @@ -814,6 +819,20 @@ terminate(_Reason, #state{available=Available, Available ), + %% Release the load_regulation slot of every checked-out connection and stop + %% it. Without this, stopping a pool while requests are in flight orphans the + %% in_use conns: the pool's DOWN handler (which would release) is gone, so the + %% global per-host slots leak and that host's concurrency cap is starved + %% node-wide. Each Key carries the conn's host/port. + maps:foreach( + fun(Pid, Key) -> + {Host, Port} = key_host_port(Key), + hackney_load_regulation:release(Host, Port), + stop_conn(Pid) + end, + InUse + ), + %% Stop all HTTP/2 connections maps:foreach( fun(_Key, Pid) -> diff --git a/test/hackney_pool_tests.erl b/test/hackney_pool_tests.erl index 465c9c1c..0b0ac6cf 100644 --- a/test/hackney_pool_tests.erl +++ b/test/hackney_pool_tests.erl @@ -58,6 +58,8 @@ hackney_pool_integration_test_() -> {"prewarm creates connections", fun test_prewarm/0}, {"queue timeout", {timeout, 120, fun test_queue_timeout/0}}, {"checkout timeout", {timeout, 120, fun test_checkout_timeout/0}}, + {"stop_pool releases in_use load_regulation slots", + {timeout, 30, fun test_stop_pool_releases_inuse_slots/0}}, {"server close detected when idle (issue #544)", {timeout, 30, fun test_server_close_detected/0}}, {"bytes stranded while idle are buffered, not dropped; conn stays reusable", {timeout, 30, fun test_idle_data_consumed_not_dropped/0}}, @@ -781,6 +783,26 @@ test_checkout_timeout() -> ?assertEqual(checkout_timeout, Error), hackney_pool:stop_pool(pool_test_timeout). +%% Regression: stopping a pool while a request is still checked out must release +%% the connection's per-host load_regulation slot. Before the terminate/2 fix the +%% slot leaked, starving that host's concurrency cap node-wide (and flaking +%% test_checkout_timeout, which shares the host with the prior test). Delta-based +%% so it does not depend on the absolute global count. +test_stop_pool_releases_inuse_slots() -> + Host = "127.0.0.1", Port = ?PORT, + URL = <<"http://127.0.0.1:8123/pool">>, + Before = hackney_load_regulation:current(Host, Port), + ok = hackney_pool:start_pool(test_pool_stop_leak, [{pool_size, 1}]), + Opts = [{pool, test_pool_stop_leak}, {connect_timeout, 1000}, + {checkout_timeout, 5000}], + %% Streaming request: returns once the conn is checked out (in_use), holding + %% a load_regulation slot. Do not close it - stop the pool while in flight. + {ok, _Ref} = hackney:request(post, URL, [], stream, Opts), + ?assertEqual(Before + 1, hackney_load_regulation:current(Host, Port)), + hackney_pool:stop_pool(test_pool_stop_leak), + timer:sleep(200), + ?assertEqual(Before, hackney_load_regulation:current(Host, Port)). + %% Test for issue #544: Server closes idle connection, pool should detect it %% This tests that when a server closes a connection while it's idle in the pool, %% the connection process receives tcp_closed and terminates, removing itself from pool.