Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 76 additions & 1 deletion src/mcp/mcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -2833,6 +2833,75 @@ static char *resolve_snippet_source(const char *root_path, const char *file_path
return NULL;
}

static bool utf8_is_cont(unsigned char c) {
return (c & 0xC0) == 0x80;
}

static char *sanitize_utf8_lossy(const char *s) {
enum {
UTF8_REPLACEMENT_LEN = 3,
UTF8_THREE_BYTE_LEN = 3,
UTF8_FOUR_BYTE_LEN = 4,
UTF8_FOURTH_BYTE = 3,
};
if (!s) {
return NULL;
}
size_t len = strlen(s);
if (len > (((size_t)-1) - SKIP_ONE) / UTF8_REPLACEMENT_LEN) {
return NULL;
}
char *out = malloc(len * UTF8_REPLACEMENT_LEN + SKIP_ONE);
if (!out) {
return NULL;
}

const unsigned char *p = (const unsigned char *)s;
const unsigned char *end = p + len;
unsigned char *dst = (unsigned char *)out;
while (p < end) {
unsigned char c = *p;
size_t n = 0;
if (c < 0x80) {
n = 1;
} else if (c >= 0xC2 && c <= 0xDF && p + 1 < end && utf8_is_cont(p[1])) {
n = 2;
} else if (c == 0xE0 && p + 2 < end && p[1] >= 0xA0 && p[1] <= 0xBF && utf8_is_cont(p[2])) {
n = UTF8_THREE_BYTE_LEN;
} else if (c >= 0xE1 && c <= 0xEC && p + 2 < end && utf8_is_cont(p[1]) &&
utf8_is_cont(p[2])) {
n = UTF8_THREE_BYTE_LEN;
} else if (c == 0xED && p + 2 < end && p[1] >= 0x80 && p[1] <= 0x9F && utf8_is_cont(p[2])) {
n = UTF8_THREE_BYTE_LEN;
} else if (c >= 0xEE && c <= 0xEF && p + 2 < end && utf8_is_cont(p[1]) &&
utf8_is_cont(p[2])) {
n = UTF8_THREE_BYTE_LEN;
} else if (c == 0xF0 && p + UTF8_FOURTH_BYTE < end && p[1] >= 0x90 && p[1] <= 0xBF &&
utf8_is_cont(p[2]) && utf8_is_cont(p[UTF8_FOURTH_BYTE])) {
n = UTF8_FOUR_BYTE_LEN;
} else if (c >= 0xF1 && c <= 0xF3 && p + UTF8_FOURTH_BYTE < end && utf8_is_cont(p[1]) &&
utf8_is_cont(p[2]) && utf8_is_cont(p[UTF8_FOURTH_BYTE])) {
n = UTF8_FOUR_BYTE_LEN;
} else if (c == 0xF4 && p + UTF8_FOURTH_BYTE < end && p[1] >= 0x80 && p[1] <= 0x8F &&
utf8_is_cont(p[2]) && utf8_is_cont(p[UTF8_FOURTH_BYTE])) {
n = UTF8_FOUR_BYTE_LEN;
}

if (n > 0) {
memcpy(dst, p, n);
dst += n;
p += n;
} else {
*dst++ = 0xEF;
*dst++ = 0xBF;
*dst++ = 0xBD;
p++;
}
}
*dst = '\0';
return out;
}

/* Build an enriched snippet response for a resolved node. */
/* Add a string array to a JSON object (no-op if count == 0). */
static void add_string_array(yyjson_mut_doc *doc, yyjson_mut_val *obj, const char *key,
Expand Down Expand Up @@ -2877,7 +2946,13 @@ static char *build_snippet_response(cbm_mcp_server_t *srv, cbm_node_t *node,
yyjson_mut_obj_add_int(doc, root_obj, "end_line", end);

if (source) {
yyjson_mut_obj_add_str(doc, root_obj, "source", source);
char *safe_source = sanitize_utf8_lossy(source);
if (safe_source) {
yyjson_mut_obj_add_strcpy(doc, root_obj, "source", safe_source);
free(safe_source);
} else {
yyjson_mut_obj_add_str(doc, root_obj, "source", "(source not available)");
}
} else {
yyjson_mut_obj_add_str(doc, root_obj, "source", "(source not available)");
}
Expand Down
67 changes: 67 additions & 0 deletions tests/test_mcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <yyjson/yyjson.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>

/* ══════════════════════════════════════════════════════════════════
* JSON-RPC PARSING
Expand Down Expand Up @@ -1291,6 +1292,31 @@ static char *call_snippet(cbm_mcp_server_t *srv, const char *args_json) {
return text;
}

static bool is_valid_json_response(const char *json) {
if (!json) {
return false;
}
yyjson_doc *doc = yyjson_read(json, strlen(json), 0);
if (!doc) {
return false;
}
yyjson_doc_free(doc);
return true;
}

static bool snippet_source_has_replacement(const char *json) {
yyjson_doc *doc = yyjson_read(json, strlen(json), 0);
if (!doc) {
return false;
}
yyjson_val *root = yyjson_doc_get_root(doc);
yyjson_val *source = yyjson_obj_get(root, "source");
const char *source_str = yyjson_get_str(source);
bool found = source_str && strstr(source_str, "\xEF\xBF\xBD");
yyjson_doc_free(doc);
return found;
}

/* ── TestSnippet_ExactQN ──────────────────────────────────────── */

TEST(snippet_exact_qn) {
Expand Down Expand Up @@ -1577,6 +1603,46 @@ TEST(snippet_include_neighbors_enabled) {
PASS();
}

/* ── TestSnippet_SourceInvalidUtf8 ────────────────────────────── */

TEST(snippet_source_invalid_utf8) {
char tmp[256];
cbm_mcp_server_t *srv = setup_snippet_server(tmp, sizeof(tmp));
ASSERT_NOT_NULL(srv);

char src_path[512];
snprintf(src_path, sizeof(src_path), "%s/project/main.go", tmp);
FILE *fp = fopen(src_path, "wb");
ASSERT_NOT_NULL(fp);
const unsigned char source[] = {
'p', 'a', 'c', 'k', 'a', 'g', 'e', ' ', 'm', 'a', 'i', 'n', '\n', '\n',
'f', 'u', 'n', 'c', ' ', 'H', 'a', 'n', 'd', 'l', 'e', 'R', 'e', 'q',
'u', 'e', 's', 't', '(', ')', ' ', 'e', 'r', 'r', 'o', 'r', ' ', '{',
'\n', '\t', '/', '/', ' ', 0xC0, 0xD4, 0xB7, 0xC2, '\n', '\t', 'r', 'e', 't',
'u', 'r', 'n', ' ', 'n', 'i', 'l', '\n', '}', '\n'};
ASSERT_EQ(fwrite(source, 1, sizeof(source), fp), sizeof(source));
ASSERT_EQ(fclose(fp), 0);

char *raw =
cbm_mcp_handle_tool(srv, "get_code_snippet",
"{\"qualified_name\":\"test-project.cmd.server.main.HandleRequest\","
"\"project\":\"test-project\"}");
ASSERT_TRUE(is_valid_json_response(raw));
char *resp = extract_text_content(raw);
ASSERT_NOT_NULL(resp);
ASSERT_TRUE(is_valid_json_response(resp));
ASSERT_NULL(strstr(resp, "\xC0\xD4"));
ASSERT_NOT_NULL(strstr(resp, "HandleRequest"));
ASSERT_NOT_NULL(strstr(resp, "return nil"));
ASSERT_TRUE(snippet_source_has_replacement(resp));

free(resp);
free(raw);
cbm_mcp_server_free(srv);
cleanup_snippet_dir(tmp);
PASS();
}

/* ══════════════════════════════════════════════════════════════════
* JSON-RPC PARSING — EDGE CASES
* ══════════════════════════════════════════════════════════════════ */
Expand Down Expand Up @@ -2129,5 +2195,6 @@ SUITE(mcp) {
RUN_TEST(snippet_auto_resolve_enabled);
RUN_TEST(snippet_include_neighbors_default);
RUN_TEST(snippet_include_neighbors_enabled);
RUN_TEST(snippet_source_invalid_utf8);
RUN_TEST(tool_bad_project_name_no_overflow_issue235);
}
Loading