diff --git a/.github/workflows/debug-bugfix.yml b/.github/workflows/debug-bugfix.yml index 8631b52..ecd2c0a 100644 --- a/.github/workflows/debug-bugfix.yml +++ b/.github/workflows/debug-bugfix.yml @@ -80,7 +80,7 @@ jobs: - name: Install build dependencies (macOS) if: runner.os == 'macOS' run: | - brew install autoconf bison re2c libuv curl oniguruma openssl@3 || true + brew install autoconf bison re2c libuv curl oniguruma openssl@3 libiconv || true # bison is keg-only on macOS; the system bison (2.3) is too old. echo "$(brew --prefix bison)/bin" >> "$GITHUB_PATH" diff --git a/CHANGELOG.md b/CHANGELOG.md index bb9c498..f49e221 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,44 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 squarely in PDO core overhead (PDOStatement object init, fetch wrapping). ### Fixed +- **#118 — curl `XFERINFOFUNCTION` / `PROGRESSFUNCTION` exception leak (macOS).** + When the user callback threw, `curl_xferinfo` / `curl_progress` in + `ext/curl/interface.c` left `EG(exception)` set and returned 0, so libcurl + kept driving the transfer; `zend_call_known_fcc` on subsequent ticks + short-circuited on the pending exception without clearing it. Eventually + the transfer ended and the dangling exception surfaced outside the + coroutine — on Linux it landed on a frame the awaiter unwound, but on + macOS the libuv/kqueue reentry path delivered it as **uncaught** at + engine top-level (`Fatal error: Uncaught RuntimeException`), failing + `tests/curl/035-progress_exception.phpt` and + `056-multi_progress_exception.phpt` on `MACOS_*_NTS`. + Two other async-aware curl callbacks (`curl_prereqfunction`, + `curl_debug`) already did the right thing; `xferinfo`/`progress` were + missed when that pattern was applied. + Fix: in both callbacks, after `zend_call_known_fcc` returns, if + `EG(exception)` is set and `ch->async_event` exists, hand the exception + off to `curl_async_event_set_callback_exception()`, clear it, and + return 1 to abort the transfer — the captured exception is then + re-thrown into the awaiter through the normal `curl_async_event_t` + delivery path (`curl_async.c:1104`). +- **#118 — getaddrinfo event-struct leak on reactor shutdown (NTS).** The + `async_dns_addrinfo_t` (288 B) was never freed when a coroutine cancelled + a DNS resolution and the reactor shut down before libuv's threadpool + worker finished its blocking `getaddrinfo()` syscall. The dispose path + set `LIBUV_DNS_F_DISPOSE_PENDING` and relied on `on_addrinfo_event` + firing to reach the `pefree` branch — but our shutdown drain used + `UV_RUN_NOWAIT` (non-blocking peeks) and didn't wait for the threadpool + cancel-completion to surface via libuv's internal pipe; after + `uv_loop_close()` the callback could no longer fire. Note: per libuv + docs, `uv_cancel(uv_getaddrinfo_t*)` returns `EBUSY` for an in-flight + request — we cannot preempt the worker, only wait for it. + Fix in `libuv_reactor_shutdown` (`ext/async/libuv_reactor.c`): two-phase + drain — bounded `UV_RUN_NOWAIT` for ready callbacks, then bounded + `UV_RUN_ONCE` for async cancel-completions from the threadpool. If a + worker is still wedged past the budget (e.g. DNS server not responding), + leave the loop open: `pefree`-ing pending structs would race with the + still-running worker (UAF, much worse than a leak); the OS reclaims the + memory at process exit. - **#118 — Tracing-JIT SEGV in `Async\Chaos` thread-pool fuzz tests (`FAST_CONCAT` deref of `0x1`).** Root cause was *not* in the async extension itself but in `ext/opcache/jit/zend_jit_ir.c` diff --git a/libuv_reactor.c b/libuv_reactor.c index 40a5324..dc841bd 100644 --- a/libuv_reactor.c +++ b/libuv_reactor.c @@ -420,16 +420,25 @@ bool libuv_reactor_shutdown(void) libuv_cleanup_signal_events(); libuv_cleanup_process_events(); - /* Drain pending callbacks before closing the loop. A single NOWAIT - * pass misses threadpool requests (getaddrinfo) cancelled during - * shutdown: their completion callback fires a few iterations later, - * so uv_loop_close() would hit EBUSY and the request struct leaks. - * Bounded busy-drain — cancelled requests always complete promptly. */ - for (int guard = 0; guard < 10000 && uv_loop_alive(UVLOOP) != 0; guard++) { + /* Sync drain: pick up ready callbacks. */ + for (int i = 0; i < 100 && uv_loop_alive(UVLOOP); i++) { uv_run(UVLOOP, UV_RUN_NOWAIT); } + /* Async drain: wait for threadpool cancel-completions (getaddrinfo, + * fs). uv_cancel can't preempt an in-flight worker, we must wait. */ + for (int i = 0; i < 500 && uv_loop_alive(UVLOOP); i++) { + uv_run(UVLOOP, UV_RUN_ONCE); + } + /* Worker still running past the budget — leave the loop open; + * pefree would race with the worker (UAF > leak; OS reclaims). */ + if (uv_loop_alive(UVLOOP)) { +#ifdef ZEND_DEBUG + fprintf(stderr, "async: libuv shutdown timeout; loop left open\n"); +#endif + } else { + uv_loop_close(UVLOOP); + } - uv_loop_close(UVLOOP); ASYNC_G(reactor_started) = false; zend_hash_destroy(&ASYNC_G(active_io_handles)); }