From 2704fdf7349f750e691c24b22537de535a994a76 Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Sat, 21 Mar 2026 11:59:40 -0500 Subject: [PATCH 1/5] chore(branch): create fix/mcp-stdio-buffering off main From b8b2dd356b8dd19d2e63764aaf4b084bb5ab79e9 Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Sat, 21 Mar 2026 12:00:41 -0500 Subject: [PATCH 2/5] fix(mcp): resolve poll/getline FILE* buffering mismatch causing tools/list hang --- src/mcp/mcp.c | 47 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index bdfdae8..7757dea 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -2375,8 +2375,19 @@ int cbm_mcp_server_run(cbm_mcp_server_t *srv, FILE *in, FILE *out) { for (;;) { /* Poll with idle timeout so we can evict unused stores between requests. - * MCP is request-response (one line at a time), so mixing poll() on the - * raw fd with getline() on the buffered FILE* is safe in practice. */ + * + * IMPORTANT: poll() operates on the raw fd, but getline() reads from a + * buffered FILE*. When a client sends multiple messages in rapid + * succession, the first getline() call may drain ALL kernel data into + * libc's internal FILE* buffer. Subsequent poll() calls then see an + * empty kernel fd and block for STORE_IDLE_TIMEOUT_S seconds even + * though the next messages are already in the FILE* buffer. + * + * Fix (Unix): use a two-phase approach — + * Phase 1: non-blocking poll (timeout=0) to check the kernel fd. + * Phase 2: if Phase 1 returns 0, peek the FILE* buffer via fgetc/ + * ungetc to detect data buffered by a prior getline() call. + * Phase 3: only if both phases confirm no data, do blocking poll. */ #ifdef _WIN32 /* Windows: WaitForSingleObject on stdin handle */ HANDLE hStdin = (HANDLE)_get_osfhandle(fd); @@ -2389,16 +2400,40 @@ int cbm_mcp_server_run(cbm_mcp_server_t *srv, FILE *in, FILE *out) { continue; } #else + /* Phase 1: non-blocking poll — catches data already in the kernel fd + * AND handles the case where a prior getline() drained the kernel fd + * into libc's FILE* buffer (raw fd appears empty but data is buffered). + * We always try a zero-timeout poll first; if it misses buffered data, + * phase 2 below catches it via an explicit FILE* peek. */ struct pollfd pfd = {.fd = fd, .events = POLLIN}; - int pr = poll(&pfd, 1, STORE_IDLE_TIMEOUT_S * 1000); + int pr = poll(&pfd, 1, 0); /* non-blocking */ if (pr < 0) { break; /* error or signal */ } if (pr == 0) { - /* Timeout — evict idle store to free resources */ - cbm_mcp_server_evict_idle(srv, STORE_IDLE_TIMEOUT_S); - continue; + /* Raw fd appears empty. Check whether libc has already buffered + * data from a previous over-read by peeking one byte via fgetc. + * If successful, push it back and proceed to getline without + * blocking. If not (EOF or EAGAIN), do the blocking poll. */ + int c = fgetc(in); + if (c == EOF) { + if (feof(in)) { + break; /* true EOF */ + } + /* No buffered data and fd is empty — do blocking poll */ + pr = poll(&pfd, 1, STORE_IDLE_TIMEOUT_S * 1000); + if (pr < 0) { + break; + } + if (pr == 0) { + cbm_mcp_server_evict_idle(srv, STORE_IDLE_TIMEOUT_S); + continue; + } + } else { + /* Buffered data found — push back and fall through to getline */ + (void)ungetc(c, in); + } } #endif From 16552befdda96fecae88479aa4a402510cb4e59d Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Sat, 21 Mar 2026 12:03:52 -0500 Subject: [PATCH 3/5] test(mcp): add rapid-init integration test for poll/getline buffering fix --- scripts/test_mcp_rapid_init.py | 78 ++++++++++++++++++++++++++++++++++ tests/test_mcp.c | 77 +++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100755 scripts/test_mcp_rapid_init.py diff --git a/scripts/test_mcp_rapid_init.py b/scripts/test_mcp_rapid_init.py new file mode 100755 index 0000000..d2c5f14 --- /dev/null +++ b/scripts/test_mcp_rapid_init.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +"""Integration test for the poll/getline FILE* buffering fix. + +Spawns the MCP server binary, sends initialize + notifications/initialized + +tools/list all at once (no delays), and asserts that the tools/list response +arrives within 5 seconds. + +Usage: + python3 scripts/test_mcp_rapid_init.py [/path/to/binary] + +Exit codes: + 0 - PASS + 1 - FAIL +""" + +import subprocess +import sys +import os + +TIMEOUT_S = 5 + +MESSAGES = ( + b'{"jsonrpc":"2.0","id":1,"method":"initialize",' + b'"params":{"protocolVersion":"2025-11-25","capabilities":{}}}\n' + b'{"jsonrpc":"2.0","method":"notifications/initialized"}\n' + b'{"jsonrpc":"2.0","id":2,"method":"tools/list","params":{}}\n' +) + + +def main(): + if len(sys.argv) >= 2: + binary = sys.argv[1] + else: + # Default: look for build artifact relative to this script's directory + script_dir = os.path.dirname(os.path.abspath(__file__)) + repo_root = os.path.dirname(script_dir) + binary = os.path.join(repo_root, "build", "c", "codebase-memory-mcp") + + if not os.path.isfile(binary): + print(f"FAIL: binary not found at {binary}") + sys.exit(1) + + if not os.access(binary, os.X_OK): + print(f"FAIL: binary not executable: {binary}") + sys.exit(1) + + proc = subprocess.Popen( + [binary], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + ) + + try: + # Write all 3 messages in one call and close stdin to signal EOF + stdout_data, _ = proc.communicate(input=MESSAGES, timeout=TIMEOUT_S) + except subprocess.TimeoutExpired: + proc.kill() + proc.wait() + print( + f"FAIL: server did not respond within {TIMEOUT_S}s " + f"(poll/getline buffering bug not fixed)" + ) + sys.exit(1) + + output = stdout_data.decode("utf-8", errors="replace") + + if "tools" not in output: + print("FAIL: tools/list response not found in server output") + print(f"Server output was:\n{output!r}") + sys.exit(1) + + print("PASS") + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/tests/test_mcp.c b/tests/test_mcp.c index a634127..eb9f346 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -1215,6 +1215,78 @@ TEST(snippet_include_neighbors_enabled) { PASS(); } +/* ══════════════════════════════════════════════════════════════════ + * POLL/GETLINE FILE* BUFFERING FIX + * ══════════════════════════════════════════════════════════════════ */ + +#ifndef _WIN32 +#include +#include + +/* Signal handler used by alarm() to abort the test if it hangs */ +static void alarm_handler(int sig) { + (void)sig; + /* Writing to stderr is async-signal-safe */ + const char msg[] = "FAIL: mcp_server_run_rapid_messages timed out (>5s)\n"; + write(STDERR_FILENO, msg, sizeof(msg) - 1); + _exit(1); +} + +TEST(mcp_server_run_rapid_messages) { + /* Simulate a client sending initialize + notifications/initialized + + * tools/list all at once (no delays), which exercises the FILE* + * buffering fix: the first getline() over-reads kernel data into the + * libc buffer; without the fix, subsequent poll() calls block for 60s. + * + * We use alarm(5) to abort the test process if the server hangs. */ + int fds[2]; + ASSERT_EQ(pipe(fds), 0); + + /* Write all 3 messages to the write end in one shot */ + const char *msgs = + "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"initialize\"," + "\"params\":{\"protocolVersion\":\"2025-11-25\",\"capabilities\":{}}}\n" + "{\"jsonrpc\":\"2.0\",\"method\":\"notifications/initialized\"}\n" + "{\"jsonrpc\":\"2.0\",\"id\":2,\"method\":\"tools/list\",\"params\":{}}\n"; + ssize_t written = write(fds[1], msgs, strlen(msgs)); + ASSERT_TRUE(written > 0); + close(fds[1]); /* EOF signals end of input to the server */ + + FILE *in_fp = fdopen(fds[0], "r"); + ASSERT_NOT_NULL(in_fp); + + FILE *out_fp = tmpfile(); + ASSERT_NOT_NULL(out_fp); + + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + ASSERT_NOT_NULL(srv); + + /* Install alarm to fail the test if cbm_mcp_server_run blocks */ + signal(SIGALRM, alarm_handler); + alarm(5); + + int rc = cbm_mcp_server_run(srv, in_fp, out_fp); + + alarm(0); /* cancel alarm */ + signal(SIGALRM, SIG_DFL); + + ASSERT_EQ(rc, 0); + + /* Verify that tools/list response is present in output */ + rewind(out_fp); + char buf[4096] = {0}; + size_t nread = fread(buf, 1, sizeof(buf) - 1, out_fp); + ASSERT_TRUE(nread > 0); + ASSERT_NOT_NULL(strstr(buf, "tools")); + + cbm_mcp_server_free(srv); + fclose(out_fp); + /* in_fp already EOF; fclose cleans up */ + fclose(in_fp); + PASS(); +} +#endif /* !_WIN32 */ + /* ══════════════════════════════════════════════════════════════════ * SUITE * ══════════════════════════════════════════════════════════════════ */ @@ -1287,6 +1359,11 @@ SUITE(mcp) { RUN_TEST(parse_file_uri_windows); RUN_TEST(parse_file_uri_invalid); + /* Poll/getline FILE* buffering fix */ +#ifndef _WIN32 + RUN_TEST(mcp_server_run_rapid_messages); +#endif + /* Snippet resolution (port of snippet_test.go) */ RUN_TEST(snippet_exact_qn); RUN_TEST(snippet_qn_suffix); From 5ca5f3291fa51ca2f820242d087cdbe19f4f92dd Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Sat, 21 Mar 2026 12:37:28 -0500 Subject: [PATCH 4/5] fix(mcp): address QA round 1 findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use O_NONBLOCK + clearerr() in Phase 2 fgetc probe to preserve the 60s idle eviction timeout when both kernel fd and FILE* buffer are empty (fgetc on a blocking fd would otherwise block indefinitely, bypassing Phase 3 poll timeout and preventing cbm_mcp_server_evict_idle) - Add #include for fcntl()/O_NONBLOCK - Fix comment: "two-phase" → "three-phase" (implementation has 3 phases) - Improve Python integration test: verify id:1 (initialize) and id:2 (tools/list) response IDs are both present, not just "tools" substring Co-Authored-By: Claude Sonnet 4.6 --- scripts/test_mcp_rapid_init.py | 22 +++++++++++++++++++++- src/mcp/mcp.c | 25 +++++++++++++++++++++---- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/scripts/test_mcp_rapid_init.py b/scripts/test_mcp_rapid_init.py index d2c5f14..fe4f65d 100755 --- a/scripts/test_mcp_rapid_init.py +++ b/scripts/test_mcp_rapid_init.py @@ -65,8 +65,28 @@ def main(): output = stdout_data.decode("utf-8", errors="replace") + # Expect exactly 2 JSON responses: id:1 (initialize) and id:2 (tools/list). + # notifications/initialized has no id and produces no response. + lines = [ln.strip() for ln in output.splitlines() if ln.strip()] + import json as _json + json_lines = [] + for ln in lines: + try: + json_lines.append(_json.loads(ln)) + except _json.JSONDecodeError: + pass + + ids = {obj.get("id") for obj in json_lines if "id" in obj} + if 1 not in ids: + print("FAIL: missing initialize response (id:1) in server output") + print(f"Server output was:\n{output!r}") + sys.exit(1) + if 2 not in ids: + print("FAIL: missing tools/list response (id:2) in server output") + print(f"Server output was:\n{output!r}") + sys.exit(1) if "tools" not in output: - print("FAIL: tools/list response not found in server output") + print("FAIL: tools/list response body missing 'tools' key") print(f"Server output was:\n{output!r}") sys.exit(1) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 7757dea..0729d0f 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -28,6 +28,7 @@ #include #include #include +#include #endif #include #include // int64_t @@ -2383,10 +2384,14 @@ int cbm_mcp_server_run(cbm_mcp_server_t *srv, FILE *in, FILE *out) { * empty kernel fd and block for STORE_IDLE_TIMEOUT_S seconds even * though the next messages are already in the FILE* buffer. * - * Fix (Unix): use a two-phase approach — + * Fix (Unix): use a three-phase approach — * Phase 1: non-blocking poll (timeout=0) to check the kernel fd. * Phase 2: if Phase 1 returns 0, peek the FILE* buffer via fgetc/ * ungetc to detect data buffered by a prior getline() call. + * The fd is temporarily set O_NONBLOCK so fgetc() returns + * immediately (EAGAIN → EOF + ferror) instead of blocking + * when the FILE* buffer is empty, which would otherwise + * bypass the Phase 3 idle eviction timeout. * Phase 3: only if both phases confirm no data, do blocking poll. */ #ifdef _WIN32 /* Windows: WaitForSingleObject on stdin handle */ @@ -2414,14 +2419,26 @@ int cbm_mcp_server_run(cbm_mcp_server_t *srv, FILE *in, FILE *out) { if (pr == 0) { /* Raw fd appears empty. Check whether libc has already buffered * data from a previous over-read by peeking one byte via fgetc. - * If successful, push it back and proceed to getline without - * blocking. If not (EOF or EAGAIN), do the blocking poll. */ + * IMPORTANT: temporarily set O_NONBLOCK so fgetc() returns + * immediately when the FILE* buffer AND kernel fd are both empty + * (EAGAIN → EOF + ferror). Without this, fgetc() on a blocking fd + * would block indefinitely, preventing the Phase 3 idle eviction + * timeout from ever firing. */ + int saved_flags = fcntl(fd, F_GETFL); + if (saved_flags >= 0) { + (void)fcntl(fd, F_SETFL, saved_flags | O_NONBLOCK); + } int c = fgetc(in); + if (saved_flags >= 0) { + (void)fcntl(fd, F_SETFL, saved_flags); /* restore blocking */ + } if (c == EOF) { if (feof(in)) { break; /* true EOF */ } - /* No buffered data and fd is empty — do blocking poll */ + /* No buffered data (EAGAIN from non-blocking read) — clear the + * ferror indicator set by EAGAIN, then do the blocking poll. */ + clearerr(in); pr = poll(&pfd, 1, STORE_IDLE_TIMEOUT_S * 1000); if (pr < 0) { break; From 5b534ce4652cae9f7fd02b91a40116fb18d5b1fe Mon Sep 17 00:00:00 2001 From: Shane McCarron Date: Sat, 21 Mar 2026 12:47:11 -0500 Subject: [PATCH 5/5] fix(mcp): address QA round 2 findings - Add explicit fallback path when fcntl(F_GETFL) fails: skip the FILE* peek and fall through directly to blocking poll so idle eviction still fires on timeout (Finding 1) - Strengthen C unit test: verify id:1 (initialize) and id:2 (tools/list) response IDs are both present, not just a substring match on "tools" (Finding 2) Co-Authored-By: Claude Sonnet 4.6 --- src/mcp/mcp.c | 42 ++++++++++++++++++++++++++---------------- tests/test_mcp.c | 7 ++++++- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 0729d0f..6ccccbb 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -2425,20 +2425,10 @@ int cbm_mcp_server_run(cbm_mcp_server_t *srv, FILE *in, FILE *out) { * would block indefinitely, preventing the Phase 3 idle eviction * timeout from ever firing. */ int saved_flags = fcntl(fd, F_GETFL); - if (saved_flags >= 0) { - (void)fcntl(fd, F_SETFL, saved_flags | O_NONBLOCK); - } - int c = fgetc(in); - if (saved_flags >= 0) { - (void)fcntl(fd, F_SETFL, saved_flags); /* restore blocking */ - } - if (c == EOF) { - if (feof(in)) { - break; /* true EOF */ - } - /* No buffered data (EAGAIN from non-blocking read) — clear the - * ferror indicator set by EAGAIN, then do the blocking poll. */ - clearerr(in); + if (saved_flags < 0) { + /* fcntl failed (should not happen on a valid fd) — skip the + * FILE* peek and fall straight through to the blocking poll so + * idle eviction still fires on timeout. */ pr = poll(&pfd, 1, STORE_IDLE_TIMEOUT_S * 1000); if (pr < 0) { break; @@ -2448,8 +2438,28 @@ int cbm_mcp_server_run(cbm_mcp_server_t *srv, FILE *in, FILE *out) { continue; } } else { - /* Buffered data found — push back and fall through to getline */ - (void)ungetc(c, in); + (void)fcntl(fd, F_SETFL, saved_flags | O_NONBLOCK); + int c = fgetc(in); + (void)fcntl(fd, F_SETFL, saved_flags); /* restore blocking */ + if (c == EOF) { + if (feof(in)) { + break; /* true EOF */ + } + /* No buffered data (EAGAIN from non-blocking read) — clear + * the ferror indicator set by EAGAIN, then blocking poll. */ + clearerr(in); + pr = poll(&pfd, 1, STORE_IDLE_TIMEOUT_S * 1000); + if (pr < 0) { + break; + } + if (pr == 0) { + cbm_mcp_server_evict_idle(srv, STORE_IDLE_TIMEOUT_S); + continue; + } + } else { + /* Buffered data found — push back and fall through to getline */ + (void)ungetc(c, in); + } } } #endif diff --git a/tests/test_mcp.c b/tests/test_mcp.c index eb9f346..bd07cba 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -1272,11 +1272,16 @@ TEST(mcp_server_run_rapid_messages) { ASSERT_EQ(rc, 0); - /* Verify that tools/list response is present in output */ + /* Verify both responses are present: + * id:1 — initialize response + * id:2 — tools/list response (notifications/initialized produces none) + * and that the tools list payload is included. */ rewind(out_fp); char buf[4096] = {0}; size_t nread = fread(buf, 1, sizeof(buf) - 1, out_fp); ASSERT_TRUE(nread > 0); + ASSERT_NOT_NULL(strstr(buf, "\"id\":1")); + ASSERT_NOT_NULL(strstr(buf, "\"id\":2")); ASSERT_NOT_NULL(strstr(buf, "tools")); cbm_mcp_server_free(srv);