diff --git a/src/cli/cli.c b/src/cli/cli.c index ba58e53..a4b9b88 100644 --- a/src/cli/cli.c +++ b/src/cli/cli.c @@ -1,2847 +1,2918 @@ -/* - * cli.c — CLI subcommand handlers for install, uninstall, update, version. - * - * Port of Go cmd/codebase-memory-mcp/ install/update logic. - * All functions accept explicit paths for testability. - */ -#include "cli/cli.h" -#include "foundation/compat.h" -#include "foundation/str_util.h" - -// the correct standard headers are included below but clang-tidy doesn't map them. -#include -#include "foundation/compat_fs.h" - -#ifndef CBM_VERSION -#define CBM_VERSION "dev" -#endif -#include // EEXIST -#include // open, O_WRONLY, O_CREAT, O_TRUNC -#include // uintptr_t -#include -#include -#include // strtok_r -#include // mode_t, S_IXUSR -#include // MAX_WBITS - -/* yyjson for JSON read-modify-write */ -#include "yyjson/yyjson.h" - -/* ── Constants ────────────────────────────────────────────────── */ - -/* Directory permissions: rwxr-x--- */ -#define DIR_PERMS 0750 - -/* Decompression buffer cap (500 MB) */ -#define DECOMPRESS_MAX_BYTES ((size_t)500 * 1024 * 1024) - -/* Tar header field offsets */ -#define TAR_NAME_LEN 101 /* filename field: bytes 0-99 + NUL */ -#define TAR_SIZE_OFFSET 124 /* octal size field offset */ -#define TAR_SIZE_LEN 13 /* octal size field: bytes 124-135 + NUL */ -#define TAR_TYPE_OFFSET 156 /* type flag byte */ -#define TAR_BINARY_NAME "codebase-memory-mcp" -#define TAR_BINARY_NAME_LEN 19 -#define TAR_BLOCK_SIZE 512 /* tar record alignment */ -#define TAR_BLOCK_MASK 511 /* TAR_BLOCK_SIZE - 1 */ - -/* ── Version ──────────────────────────────────────────────────── */ - -static const char *cli_version = "dev"; - -void cbm_cli_set_version(const char *ver) { - if (ver) { - cli_version = ver; - } -} - -const char *cbm_cli_get_version(void) { - return cli_version; -} - -/* ── Version comparison ───────────────────────────────────────── */ - -/* Parse semver major.minor.patch into array. Returns number of parts parsed. */ -static int parse_semver(const char *v, int out[3]) { - out[0] = out[1] = out[2] = 0; - /* Skip v prefix */ - if (*v == 'v' || *v == 'V') { - v++; - } - - int count = 0; - while (*v && count < 3) { - if (*v == '-') { - break; /* stop at pre-release suffix */ - } - char *endptr; - long val = strtol(v, &endptr, 10); - out[count++] = (int)val; - if (*endptr == '.') { - v = endptr + 1; - } else { - break; - } - } - return count; -} - -static bool has_prerelease(const char *v) { - if (*v == 'v' || *v == 'V') { - v++; - } - return strchr(v, '-') != NULL; -} - -int cbm_compare_versions(const char *a, const char *b) { - int pa[3]; - int pb[3]; - parse_semver(a, pa); - parse_semver(b, pb); - - for (int i = 0; i < 3; i++) { - if (pa[i] != pb[i]) { - return pa[i] - pb[i]; - } - } - - /* Same base version — non-dev beats dev */ - bool a_pre = has_prerelease(a); - bool b_pre = has_prerelease(b); - if (a_pre && !b_pre) { - return -1; - } - if (!a_pre && b_pre) { - return 1; - } - return 0; -} - -/* ── Shell RC detection ───────────────────────────────────────── */ - -const char *cbm_detect_shell_rc(const char *home_dir) { - static char buf[512]; - if (!home_dir || !home_dir[0]) { - return ""; - } - - // NOLINTNEXTLINE(concurrency-mt-unsafe) - const char *shell = getenv("SHELL"); - if (!shell) { - shell = ""; - } - - if (strstr(shell, "/zsh")) { - snprintf(buf, sizeof(buf), "%s/.zshrc", home_dir); - return buf; - } - if (strstr(shell, "/bash")) { - /* Prefer .bashrc, fall back to .bash_profile */ - snprintf(buf, sizeof(buf), "%s/.bashrc", home_dir); - struct stat st; - if (stat(buf, &st) == 0) { - return buf; - } - snprintf(buf, sizeof(buf), "%s/.bash_profile", home_dir); - return buf; - } - if (strstr(shell, "/fish")) { - snprintf(buf, sizeof(buf), "%s/.config/fish/config.fish", home_dir); - return buf; - } - - /* Default to .profile */ - snprintf(buf, sizeof(buf), "%s/.profile", home_dir); - return buf; -} - -/* ── CLI binary detection ─────────────────────────────────────── */ - -const char *cbm_find_cli(const char *name, const char *home_dir) { - static char buf[512]; - if (!name || !name[0]) { - return ""; - } - - /* Check PATH first */ - // NOLINTNEXTLINE(concurrency-mt-unsafe) - const char *path_env = getenv("PATH"); - if (path_env) { - char path_copy[4096]; - snprintf(path_copy, sizeof(path_copy), "%s", path_env); - char *saveptr; - // NOLINTNEXTLINE(misc-include-cleaner) — strtok_r provided by standard header - char *dir = strtok_r(path_copy, ":", &saveptr); - while (dir) { - snprintf(buf, sizeof(buf), "%s/%s", dir, name); - struct stat st; - // NOLINTNEXTLINE(misc-include-cleaner) — S_IXUSR provided by standard header - if (stat(buf, &st) == 0 && (st.st_mode & S_IXUSR)) { - return buf; - } - dir = strtok_r(NULL, ":", &saveptr); - } - } - - /* Check common install locations */ - if (home_dir && home_dir[0]) { - const char *candidates[] = { - "/usr/local/bin/%s", - NULL, /* filled dynamically */ - NULL, - NULL, - NULL, - }; - char paths[5][512]; - snprintf(paths[0], sizeof(paths[0]), "/usr/local/bin/%s", name); - snprintf(paths[1], sizeof(paths[1]), "%s/.npm/bin/%s", home_dir, name); - snprintf(paths[2], sizeof(paths[2]), "%s/.local/bin/%s", home_dir, name); - snprintf(paths[3], sizeof(paths[3]), "%s/.cargo/bin/%s", home_dir, name); -#ifdef __APPLE__ - snprintf(paths[4], sizeof(paths[4]), "/opt/homebrew/bin/%s", name); -#else - paths[4][0] = '\0'; -#endif - (void)candidates; - - for (int i = 0; i < 5; i++) { - if (!paths[i][0]) { - continue; - } - struct stat st; - if (stat(paths[i], &st) == 0) { - snprintf(buf, sizeof(buf), "%s", paths[i]); - return buf; - } - } - } - - return ""; -} - -/* ── File utilities ───────────────────────────────────────────── */ - -int cbm_copy_file(const char *src, const char *dst) { - FILE *in = fopen(src, "rb"); - if (!in) { - return -1; - } - - FILE *out = fopen(dst, "wb"); - if (!out) { - (void)fclose(in); - return -1; - } - - char buf[8192]; - int err = 0; - while (!feof(in) && !ferror(in)) { - size_t n = fread(buf, 1, sizeof(buf), in); - if (n == 0) { - break; - } - if (fwrite(buf, 1, n, out) != n) { - err = 1; - break; - } - } - - if (err || ferror(in)) { - (void)fclose(in); - (void)fclose(out); - return -1; - } - - (void)fclose(in); - int rc = fclose(out); - return rc == 0 ? 0 : -1; -} - -/* ── Skill file content (embedded) ────────────────────────────── */ - -static const char skill_exploring_content[] = - "---\n" - "name: codebase-memory-exploring\n" - "description: Codebase knowledge graph expert. ALWAYS invoke this skill when the user " - "explores code, searches for functions/classes/routes, asks about architecture, or needs " - "codebase orientation. Do not use Grep, Glob, or file search directly — use " - "codebase-memory-mcp search_graph and get_architecture first.\n" - "---\n" - "\n" - "# Codebase Exploration\n" - "\n" - "Use codebase-memory-mcp tools to explore the codebase:\n" - "\n" - "## Workflow\n" - "1. `get_graph_schema` — understand what node/edge types exist\n" - "2. `search_graph` — find functions, classes, routes by pattern\n" - "3. `get_code_snippet` — read specific function implementations\n" - "4. `get_architecture` — get high-level project summary\n" - "\n" - "## Tips\n" - "- Use `search_graph(name_pattern=\".*Pattern.*\")` for fuzzy matching\n" - "- Use `search_graph(label=\"Route\")` to find HTTP routes\n" - "- Use `search_graph(label=\"Function\", file_pattern=\"*.go\")` to scope by language\n"; - -static const char skill_tracing_content[] = - "---\n" - "name: codebase-memory-tracing\n" - "description: Call chain and dependency expert. ALWAYS invoke this skill when the user " - "asks who calls a function, what a function calls, needs impact analysis, or traces " - "dependencies. Do not grep for function names directly — use codebase-memory-mcp " - "trace_call_path first.\n" - "---\n" - "\n" - "# Call Tracing & Impact Analysis\n" - "\n" - "Use codebase-memory-mcp tools to trace call paths:\n" - "\n" - "## Workflow\n" - "1. `search_graph(name_pattern=\".*FuncName.*\")` — find exact function name\n" - "2. `trace_call_path(function_name=\"FuncName\", direction=\"both\")` — trace callers + " - "callees\n" - "3. `detect_changes` — find what changed and assess risk_labels\n" - "\n" - "## Direction Options\n" - "- `inbound` — who calls this function?\n" - "- `outbound` — what does this function call?\n" - "- `both` — full context (callers + callees)\n"; - -static const char skill_quality_content[] = - "---\n" - "name: codebase-memory-quality\n" - "description: Code quality analysis expert. ALWAYS invoke this skill when the user asks " - "about dead code, unused functions, complexity, refactor candidates, or cleanup " - "opportunities. Do not search files manually — use codebase-memory-mcp search_graph " - "with degree filters first.\n" - "---\n" - "\n" - "# Code Quality Analysis\n" - "\n" - "Use codebase-memory-mcp tools for quality analysis:\n" - "\n" - "## Dead Code Detection\n" - "- `search_graph(max_degree=0, exclude_entry_points=true)` — find unreferenced functions\n" - "- `search_graph(max_degree=0, label=\"Function\")` — unreferenced functions only\n" - "\n" - "## Complexity Analysis\n" - "- `search_graph(min_degree=10)` — high fan-out functions\n" - "- `search_graph(label=\"Function\", sort_by=\"degree\")` — most-connected functions\n"; - -static const char skill_reference_content[] = - "---\n" - "name: codebase-memory-reference\n" - "description: Codebase-memory-mcp reference guide. ALWAYS invoke this skill when the user " - "asks about MCP tools, graph queries, Cypher syntax, edge types, or how to use the " - "knowledge graph. Do not guess tool parameters — load this reference first.\n" - "---\n" - "\n" - "# Codebase Memory MCP Reference\n" - "\n" - "## 14 total MCP Tools\n" - "- `index_repository` — index a project\n" - "- `index_status` — check indexing progress\n" - "- `detect_changes` — find what changed since last index\n" - "- `search_graph` — find nodes by pattern\n" - "- `search_code` — text search in source\n" - "- `query_graph` — Cypher query language\n" - "- `trace_call_path` — call chain traversal\n" - "- `get_code_snippet` — read function source\n" - "- `get_graph_schema` — node/edge type catalog\n" - "- `get_architecture` — high-level summary\n" - "- `list_projects` — indexed projects\n" - "- `delete_project` — remove a project\n" - "- `manage_adr` — architecture decision records\n" - "- `ingest_traces` — import runtime traces\n" - "\n" - "## Edge Types\n" - "CALLS, HTTP_CALLS, ASYNC_CALLS, IMPORTS, DEFINES, DEFINES_METHOD,\n" - "HANDLES, IMPLEMENTS, CONTAINS_FILE, CONTAINS_FOLDER, CONTAINS_PACKAGE\n" - "\n" - "## Cypher Examples\n" - "```\n" - "MATCH (f:Function) WHERE f.name =~ '.*Handler.*' RETURN f.name, f.file_path\n" - "MATCH (a)-[r:CALLS]->(b) WHERE a.name = 'main' RETURN b.name\n" - "MATCH (a)-[r:HTTP_CALLS]->(b) RETURN a.name, b.name, r.url_path\n" - "```\n"; - -static const char codex_instructions_content[] = - "# Codebase Knowledge Graph\n" - "\n" - "This project uses codebase-memory-mcp to maintain a knowledge graph of the codebase.\n" - "Use the MCP tools to explore and understand the code:\n" - "\n" - "- `search_graph` — find functions, classes, routes by pattern\n" - "- `trace_call_path` — trace who calls a function or what it calls\n" - "- `get_code_snippet` — read function source code\n" - "- `query_graph` — run Cypher queries for complex patterns\n" - "- `get_architecture` — high-level project summary\n" - "\n" - "Always prefer graph tools over grep for code discovery.\n"; - -static const cbm_skill_t skills[CBM_SKILL_COUNT] = { - {"codebase-memory-exploring", skill_exploring_content}, - {"codebase-memory-tracing", skill_tracing_content}, - {"codebase-memory-quality", skill_quality_content}, - {"codebase-memory-reference", skill_reference_content}, -}; - -const cbm_skill_t *cbm_get_skills(void) { - return skills; -} - -const char *cbm_get_codex_instructions(void) { - return codex_instructions_content; -} - -/* ── Recursive mkdir (via compat_fs) ──────────────────────────── */ - -static int mkdirp(const char *path, int mode) { - return (int)cbm_mkdir_p(path, mode) ? 0 : -1; -} - -/* ── Recursive rmdir ──────────────────────────────────────────── */ - -// NOLINTNEXTLINE(misc-no-recursion) — intentional recursive directory removal -static int rmdir_recursive(const char *path) { - cbm_dir_t *d = cbm_opendir(path); - if (!d) { - return -1; - } - - cbm_dirent_t *ent; - while ((ent = cbm_readdir(d)) != NULL) { - char child[1024]; - snprintf(child, sizeof(child), "%s/%s", path, ent->name); - struct stat st; - if (stat(child, &st) == 0 && S_ISDIR(st.st_mode)) { - rmdir_recursive(child); - } else { - cbm_unlink(child); - } - } - cbm_closedir(d); - return cbm_rmdir(path); -} - -/* ── Skill management ─────────────────────────────────────────── */ - -int cbm_install_skills(const char *skills_dir, bool force, bool dry_run) { - if (!skills_dir) { - return 0; - } - int count = 0; - - for (int i = 0; i < CBM_SKILL_COUNT; i++) { - char skill_path[1024]; - snprintf(skill_path, sizeof(skill_path), "%s/%s", skills_dir, skills[i].name); - char file_path[1024]; - snprintf(file_path, sizeof(file_path), "%s/SKILL.md", skill_path); - - /* Check if already exists */ - if (!force) { - struct stat st; - if (stat(file_path, &st) == 0) { - continue; - } - } - - if (dry_run) { - count++; - continue; - } - - if (mkdirp(skill_path, DIR_PERMS) != 0) { - continue; - } - - FILE *f = fopen(file_path, "w"); - if (!f) { - continue; - } - (void)fwrite(skills[i].content, 1, strlen(skills[i].content), f); - (void)fclose(f); - count++; - } - return count; -} - -int cbm_remove_skills(const char *skills_dir, bool dry_run) { - if (!skills_dir) { - return 0; - } - int count = 0; - - for (int i = 0; i < CBM_SKILL_COUNT; i++) { - char skill_path[1024]; - snprintf(skill_path, sizeof(skill_path), "%s/%s", skills_dir, skills[i].name); - struct stat st; - if (stat(skill_path, &st) != 0) { - continue; - } - - if (dry_run) { - count++; - continue; - } - - if (rmdir_recursive(skill_path) == 0) { - count++; - } - } - return count; -} - -bool cbm_remove_old_monolithic_skill(const char *skills_dir, bool dry_run) { - if (!skills_dir) { - return false; - } - - char old_path[1024]; - snprintf(old_path, sizeof(old_path), "%s/codebase-memory-mcp", skills_dir); - struct stat st; - if (stat(old_path, &st) != 0 || !S_ISDIR(st.st_mode)) { - return false; - } - - if (dry_run) { - return true; - } - return rmdir_recursive(old_path) == 0; -} - -/* ── JSON config helpers (using yyjson) ───────────────────────── */ - -/* Read a JSON file into a yyjson document. Returns NULL on error. */ -static yyjson_doc *read_json_file(const char *path) { - FILE *f = fopen(path, "r"); - if (!f) { - return NULL; - } - - (void)fseek(f, 0, SEEK_END); - long size = ftell(f); - (void)fseek(f, 0, SEEK_SET); - - if (size <= 0 || size > 10L * 1024 * 1024) { - (void)fclose(f); - return NULL; - } - - char *buf = malloc((size_t)size + 1); - if (!buf) { - (void)fclose(f); - return NULL; - } - - size_t nread = fread(buf, 1, (size_t)size, f); - (void)fclose(f); - // NOLINTNEXTLINE(clang-analyzer-security.ArrayBound) - buf[nread] = '\0'; - - /* Allow JSONC (comments + trailing commas) — Zed settings.json uses this format */ - yyjson_read_flag flags = YYJSON_READ_ALLOW_COMMENTS | YYJSON_READ_ALLOW_TRAILING_COMMAS; - yyjson_doc *doc = yyjson_read(buf, nread, flags); - free(buf); - return doc; -} - -/* Write a mutable yyjson document to a file with pretty printing. */ -static int write_json_file(const char *path, yyjson_mut_doc *doc) { - /* Ensure parent directory exists */ - char dir[1024]; - snprintf(dir, sizeof(dir), "%s", path); - char *last_slash = strrchr(dir, '/'); - if (last_slash) { - *last_slash = '\0'; - mkdirp(dir, DIR_PERMS); - } - - yyjson_write_flag flags = YYJSON_WRITE_PRETTY | YYJSON_WRITE_ESCAPE_UNICODE; - size_t len; - char *json = yyjson_mut_write(doc, flags, &len); - if (!json) { - return -1; - } - - FILE *f = fopen(path, "w"); - if (!f) { - free(json); - return -1; - } - - size_t written = fwrite(json, 1, len, f); - /* Add trailing newline */ - (void)fputc('\n', f); - (void)fclose(f); - free(json); - - return written == len ? 0 : -1; -} - -/* ── Editor MCP: Cursor/Windsurf/Gemini (mcpServers key) ──────── */ - -int cbm_install_editor_mcp(const char *binary_path, const char *config_path) { - if (!binary_path || !config_path) { - return -1; - } - - /* Read existing or start fresh */ - yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); - if (!mdoc) { - return -1; - } - - yyjson_doc *doc = read_json_file(config_path); - yyjson_mut_val *root; - if (doc) { - root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); - yyjson_doc_free(doc); - } else { - root = yyjson_mut_obj(mdoc); - } - if (!root) { - yyjson_mut_doc_free(mdoc); - return -1; - } - yyjson_mut_doc_set_root(mdoc, root); - - /* Get or create mcpServers object */ - yyjson_mut_val *servers = yyjson_mut_obj_get(root, "mcpServers"); - if (!servers || !yyjson_mut_is_obj(servers)) { - servers = yyjson_mut_obj(mdoc); - yyjson_mut_obj_add_val(mdoc, root, "mcpServers", servers); - } - - /* Remove existing entry if present */ - yyjson_mut_obj_remove_key(servers, "codebase-memory-mcp"); - - /* Add our entry */ - yyjson_mut_val *entry = yyjson_mut_obj(mdoc); - yyjson_mut_obj_add_str(mdoc, entry, "command", binary_path); - yyjson_mut_obj_add_val(mdoc, servers, "codebase-memory-mcp", entry); - - int rc = write_json_file(config_path, mdoc); - yyjson_mut_doc_free(mdoc); - return rc; -} - -int cbm_remove_editor_mcp(const char *config_path) { - if (!config_path) { - return -1; - } - - yyjson_doc *doc = read_json_file(config_path); - if (!doc) { - return -1; - } - - yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); - yyjson_doc_free(doc); - if (!root) { - yyjson_mut_doc_free(mdoc); - return -1; - } - yyjson_mut_doc_set_root(mdoc, root); - - yyjson_mut_val *servers = yyjson_mut_obj_get(root, "mcpServers"); - if (!servers || !yyjson_mut_is_obj(servers)) { - yyjson_mut_doc_free(mdoc); - return 0; - } - - yyjson_mut_obj_remove_key(servers, "codebase-memory-mcp"); - - int rc = write_json_file(config_path, mdoc); - yyjson_mut_doc_free(mdoc); - return rc; -} - -/* ── VS Code MCP (servers key with type:stdio) ────────────────── */ - -int cbm_install_vscode_mcp(const char *binary_path, const char *config_path) { - if (!binary_path || !config_path) { - return -1; - } - - yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); - if (!mdoc) { - return -1; - } - - yyjson_doc *doc = read_json_file(config_path); - yyjson_mut_val *root; - if (doc) { - root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); - yyjson_doc_free(doc); - } else { - root = yyjson_mut_obj(mdoc); - } - if (!root) { - yyjson_mut_doc_free(mdoc); - return -1; - } - yyjson_mut_doc_set_root(mdoc, root); - - yyjson_mut_val *servers = yyjson_mut_obj_get(root, "servers"); - if (!servers || !yyjson_mut_is_obj(servers)) { - servers = yyjson_mut_obj(mdoc); - yyjson_mut_obj_add_val(mdoc, root, "servers", servers); - } - - yyjson_mut_obj_remove_key(servers, "codebase-memory-mcp"); - - yyjson_mut_val *entry = yyjson_mut_obj(mdoc); - yyjson_mut_obj_add_str(mdoc, entry, "type", "stdio"); - yyjson_mut_obj_add_str(mdoc, entry, "command", binary_path); - yyjson_mut_obj_add_val(mdoc, servers, "codebase-memory-mcp", entry); - - int rc = write_json_file(config_path, mdoc); - yyjson_mut_doc_free(mdoc); - return rc; -} - -int cbm_remove_vscode_mcp(const char *config_path) { - if (!config_path) { - return -1; - } - - yyjson_doc *doc = read_json_file(config_path); - if (!doc) { - return -1; - } - - yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); - yyjson_doc_free(doc); - if (!root) { - yyjson_mut_doc_free(mdoc); - return -1; - } - yyjson_mut_doc_set_root(mdoc, root); - - yyjson_mut_val *servers = yyjson_mut_obj_get(root, "servers"); - if (!servers || !yyjson_mut_is_obj(servers)) { - yyjson_mut_doc_free(mdoc); - return 0; - } - - yyjson_mut_obj_remove_key(servers, "codebase-memory-mcp"); - - int rc = write_json_file(config_path, mdoc); - yyjson_mut_doc_free(mdoc); - return rc; -} - -/* ── Zed MCP (context_servers with command + args) ────────────── */ - -int cbm_install_zed_mcp(const char *binary_path, const char *config_path) { - if (!binary_path || !config_path) { - return -1; - } - - yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); - if (!mdoc) { - return -1; - } - - yyjson_doc *doc = read_json_file(config_path); - yyjson_mut_val *root; - if (doc) { - root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); - yyjson_doc_free(doc); - } else { - root = yyjson_mut_obj(mdoc); - } - if (!root) { - yyjson_mut_doc_free(mdoc); - return -1; - } - yyjson_mut_doc_set_root(mdoc, root); - - yyjson_mut_val *servers = yyjson_mut_obj_get(root, "context_servers"); - if (!servers || !yyjson_mut_is_obj(servers)) { - servers = yyjson_mut_obj(mdoc); - yyjson_mut_obj_add_val(mdoc, root, "context_servers", servers); - } - - yyjson_mut_obj_remove_key(servers, "codebase-memory-mcp"); - - yyjson_mut_val *entry = yyjson_mut_obj(mdoc); - yyjson_mut_obj_add_str(mdoc, entry, "command", binary_path); - yyjson_mut_val *args = yyjson_mut_arr(mdoc); - yyjson_mut_arr_add_str(mdoc, args, ""); - yyjson_mut_obj_add_val(mdoc, entry, "args", args); - yyjson_mut_obj_add_val(mdoc, servers, "codebase-memory-mcp", entry); - - int rc = write_json_file(config_path, mdoc); - yyjson_mut_doc_free(mdoc); - return rc; -} - -int cbm_remove_zed_mcp(const char *config_path) { - if (!config_path) { - return -1; - } - - yyjson_doc *doc = read_json_file(config_path); - if (!doc) { - return -1; - } - - yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); - yyjson_doc_free(doc); - if (!root) { - yyjson_mut_doc_free(mdoc); - return -1; - } - yyjson_mut_doc_set_root(mdoc, root); - - yyjson_mut_val *servers = yyjson_mut_obj_get(root, "context_servers"); - if (!servers || !yyjson_mut_is_obj(servers)) { - yyjson_mut_doc_free(mdoc); - return 0; - } - - yyjson_mut_obj_remove_key(servers, "codebase-memory-mcp"); - - int rc = write_json_file(config_path, mdoc); - yyjson_mut_doc_free(mdoc); - return rc; -} - -/* ── Agent detection ──────────────────────────────────────────── */ - -cbm_detected_agents_t cbm_detect_agents(const char *home_dir) { - cbm_detected_agents_t agents; - memset(&agents, 0, sizeof(agents)); - if (!home_dir || !home_dir[0]) { - return agents; - } - - char path[1024]; - struct stat st; - - /* Claude Code: ~/.claude/ */ - snprintf(path, sizeof(path), "%s/.claude", home_dir); - if (stat(path, &st) == 0 && S_ISDIR(st.st_mode)) { - agents.claude_code = true; - } - - /* Codex CLI: ~/.codex/ */ - snprintf(path, sizeof(path), "%s/.codex", home_dir); - if (stat(path, &st) == 0 && S_ISDIR(st.st_mode)) { - agents.codex = true; - } - - /* Gemini CLI: ~/.gemini/ */ - snprintf(path, sizeof(path), "%s/.gemini", home_dir); - if (stat(path, &st) == 0 && S_ISDIR(st.st_mode)) { - agents.gemini = true; - } - - /* Zed: platform-specific config dir */ -#ifdef __APPLE__ - snprintf(path, sizeof(path), "%s/Library/Application Support/Zed", home_dir); -#else - snprintf(path, sizeof(path), "%s/.config/zed", home_dir); -#endif - if (stat(path, &st) == 0 && S_ISDIR(st.st_mode)) { - agents.zed = true; - } - - /* OpenCode: binary on PATH */ - const char *oc = cbm_find_cli("opencode", home_dir); - if (oc[0]) { - agents.opencode = true; - } - - /* Antigravity: ~/.gemini/antigravity/ */ - snprintf(path, sizeof(path), "%s/.gemini/antigravity", home_dir); - if (stat(path, &st) == 0 && S_ISDIR(st.st_mode)) { - agents.antigravity = true; - agents.gemini = true; /* parent dir implies gemini */ - } - - /* Aider: binary on PATH */ - const char *ai = cbm_find_cli("aider", home_dir); - if (ai[0]) { - agents.aider = true; - } - - /* KiloCode: globalStorage dir */ - snprintf(path, sizeof(path), "%s/.config/Code/User/globalStorage/kilocode.kilo-code", home_dir); - if (stat(path, &st) == 0 && S_ISDIR(st.st_mode)) { - agents.kilocode = true; - } - - /* VS Code: User config dir */ -#ifdef __APPLE__ - snprintf(path, sizeof(path), "%s/Library/Application Support/Code/User", home_dir); -#else - snprintf(path, sizeof(path), "%s/.config/Code/User", home_dir); -#endif - if (stat(path, &st) == 0 && S_ISDIR(st.st_mode)) { - agents.vscode = true; - } - - /* OpenClaw: ~/.openclaw/ dir */ - snprintf(path, sizeof(path), "%s/.openclaw", home_dir); - if (stat(path, &st) == 0 && S_ISDIR(st.st_mode)) { - agents.openclaw = true; - } - - return agents; -} - -/* ── Shared agent instructions content ────────────────────────── */ - -static const char agent_instructions_content[] = - "# Codebase Knowledge Graph (codebase-memory-mcp)\n" - "\n" - "This project uses codebase-memory-mcp to maintain a knowledge graph of the codebase.\n" - "ALWAYS prefer MCP graph tools over grep/glob/file-search for code discovery.\n" - "\n" - "## Priority Order\n" - "1. `search_graph` — find functions, classes, routes, variables by pattern\n" - "2. `trace_call_path` — trace who calls a function or what it calls\n" - "3. `get_code_snippet` — read specific function/class source code\n" - "4. `query_graph` — run Cypher queries for complex patterns\n" - "5. `get_architecture` — high-level project summary\n" - "\n" - "## When to fall back to grep/glob\n" - "- Searching for string literals, error messages, config values\n" - "- Searching non-code files (Dockerfiles, shell scripts, configs)\n" - "- When MCP tools return insufficient results\n" - "\n" - "## Examples\n" - "- Find a handler: `search_graph(name_pattern=\".*OrderHandler.*\")`\n" - "- Who calls it: `trace_call_path(function_name=\"OrderHandler\", direction=\"inbound\")`\n" - "- Read source: `get_code_snippet(qualified_name=\"pkg/orders.OrderHandler\")`\n"; - -const char *cbm_get_agent_instructions(void) { - return agent_instructions_content; -} - -/* ── Instructions file upsert ─────────────────────────────────── */ - -#define CMM_MARKER_START "" -#define CMM_MARKER_END "" - -/* Read entire file into malloc'd buffer. Returns NULL on error. */ -static char *read_file_str(const char *path, size_t *out_len) { - FILE *f = fopen(path, "r"); - if (!f) { - if (out_len) { - *out_len = 0; - } - return NULL; - } - (void)fseek(f, 0, SEEK_END); - long size = ftell(f); - (void)fseek(f, 0, SEEK_SET); - if (size < 0 || size > 10L * 1024 * 1024) { /* cap at 10 MB */ - (void)fclose(f); - return NULL; - } - - char *buf = malloc((size_t)size + 1); - if (!buf) { - (void)fclose(f); - return NULL; - } - size_t nread = fread(buf, 1, (size_t)size, f); - (void)fclose(f); - buf[nread] = '\0'; - if (out_len) { - *out_len = nread; - } - return buf; -} - -/* Write string to file, creating parent dirs if needed. */ -static int write_file_str(const char *path, const char *content) { - /* Ensure parent directory */ - char dir[1024]; - snprintf(dir, sizeof(dir), "%s", path); - char *last_slash = strrchr(dir, '/'); - if (last_slash) { - *last_slash = '\0'; - mkdirp(dir, DIR_PERMS); - } - - FILE *f = fopen(path, "w"); - if (!f) { - return -1; - } - size_t len = strlen(content); - size_t written = fwrite(content, 1, len, f); - (void)fclose(f); - return written == len ? 0 : -1; -} - -int cbm_upsert_instructions(const char *path, const char *content) { - if (!path || !content) { - return -1; - } - - size_t existing_len = 0; - char *existing = read_file_str(path, &existing_len); - - /* Build the marker-wrapped section */ - size_t section_len = - strlen(CMM_MARKER_START) + 1 + strlen(content) + strlen(CMM_MARKER_END) + 1; - char *section = malloc(section_len + 1); - if (!section) { - free(existing); - return -1; - } - snprintf(section, section_len + 1, "%s\n%s%s\n", CMM_MARKER_START, content, CMM_MARKER_END); - - if (!existing) { - /* File doesn't exist — create with just the section */ - int rc = write_file_str(path, section); - free(section); - return rc; - } - - /* Check if markers already exist */ - char *start = strstr(existing, CMM_MARKER_START); - char *end = start ? strstr(start, CMM_MARKER_END) : NULL; - - char *result; - if (start && end) { - /* Replace between markers (including markers themselves) */ - end += strlen(CMM_MARKER_END); - /* Skip trailing newline after end marker */ - if (*end == '\n') { - end++; - } - - size_t prefix_len = (size_t)(start - existing); - size_t suffix_len = strlen(end); - size_t new_len = prefix_len + strlen(section) + suffix_len; - result = malloc(new_len + 1); - if (!result) { - free(existing); - free(section); - return -1; - } - memcpy(result, existing, prefix_len); - memcpy(result + prefix_len, section, strlen(section)); - memcpy(result + prefix_len + strlen(section), end, suffix_len); - result[new_len] = '\0'; - } else { - /* Append section */ - size_t new_len = existing_len + 1 + strlen(section); - result = malloc(new_len + 1); - if (!result) { - free(existing); - free(section); - return -1; - } - memcpy(result, existing, existing_len); - result[existing_len] = '\n'; - memcpy(result + existing_len + 1, section, strlen(section)); - result[new_len] = '\0'; - } - - int rc = write_file_str(path, result); - free(existing); - free(section); - free(result); - return rc; -} - -int cbm_remove_instructions(const char *path) { - if (!path) { - return -1; - } - - size_t len = 0; - char *content = read_file_str(path, &len); - if (!content) { - return 1; - } - - char *start = strstr(content, CMM_MARKER_START); - char *end = start ? strstr(start, CMM_MARKER_END) : NULL; - - if (!start || !end) { - free(content); - return 1; /* not found */ - } - - end += strlen(CMM_MARKER_END); - if (*end == '\n') { - end++; - } - - /* Also remove a leading newline before the start marker if present */ - if (start > content && *(start - 1) == '\n') { - start--; - } - - size_t prefix_len = (size_t)(start - content); - size_t suffix_len = strlen(end); - size_t new_len = prefix_len + suffix_len; - char *result = malloc(new_len + 1); - if (!result) { - free(content); - return -1; - } - memcpy(result, content, prefix_len); - memcpy(result + prefix_len, end, suffix_len); - result[new_len] = '\0'; - - int rc = write_file_str(path, result); - free(content); - free(result); - return rc; -} - -/* ── Codex MCP config (TOML) ─────────────────────────────────── */ - -#define CODEX_CMM_SECTION "[mcp_servers.codebase-memory-mcp]" - -int cbm_upsert_codex_mcp(const char *binary_path, const char *config_path) { - if (!binary_path || !config_path) { - return -1; - } - - size_t len = 0; - char *content = read_file_str(config_path, &len); - - /* Build our TOML section */ - char section[1024]; - snprintf(section, sizeof(section), "%s\ncommand = \"%s\"\n", CODEX_CMM_SECTION, binary_path); - - if (!content) { - /* No file — create fresh */ - return write_file_str(config_path, section); - } - - /* Check if our section already exists */ - char *existing = strstr(content, CODEX_CMM_SECTION); - if (existing) { - /* Remove old section: from [mcp_servers.codebase-memory-mcp] to next [section] or EOF */ - char *section_end = existing + strlen(CODEX_CMM_SECTION); - /* Find next [section] header */ - char *next_section = strstr(section_end, "\n["); - if (next_section) { - next_section++; /* keep the newline before next section */ - } - - size_t prefix_len = (size_t)(existing - content); - const char *suffix = next_section ? next_section : ""; - size_t suffix_len = strlen(suffix); - size_t new_len = prefix_len + strlen(section) + 1 + suffix_len; - char *result = malloc(new_len + 1); - if (!result) { - free(content); - return -1; - } - memcpy(result, content, prefix_len); - memcpy(result + prefix_len, section, strlen(section)); - result[prefix_len + strlen(section)] = '\n'; - memcpy(result + prefix_len + strlen(section) + 1, suffix, suffix_len); - result[new_len] = '\0'; - - int rc = write_file_str(config_path, result); - free(content); - free(result); - return rc; - } - - /* Append our section */ - size_t new_len = len + 1 + strlen(section); - char *result = malloc(new_len + 1); - if (!result) { - free(content); - return -1; - } - memcpy(result, content, len); - result[len] = '\n'; - memcpy(result + len + 1, section, strlen(section)); - result[new_len] = '\0'; - - int rc = write_file_str(config_path, result); - free(content); - free(result); - return rc; -} - -int cbm_remove_codex_mcp(const char *config_path) { - if (!config_path) { - return -1; - } - - size_t len = 0; - char *content = read_file_str(config_path, &len); - if (!content) { - return 1; - } - - char *existing = strstr(content, CODEX_CMM_SECTION); - if (!existing) { - free(content); - return 1; - } - - char *section_end = existing + strlen(CODEX_CMM_SECTION); - char *next_section = strstr(section_end, "\n["); - if (next_section) { - next_section++; - } - - /* Remove leading newline if present */ - if (existing > content && *(existing - 1) == '\n') { - existing--; - } - - size_t prefix_len = (size_t)(existing - content); - const char *suffix = next_section ? next_section : ""; - size_t suffix_len = strlen(suffix); - size_t new_len = prefix_len + suffix_len; - char *result = malloc(new_len + 1); - if (!result) { - free(content); - return -1; - } - memcpy(result, content, prefix_len); - memcpy(result + prefix_len, suffix, suffix_len); - result[new_len] = '\0'; - - int rc = write_file_str(config_path, result); - free(content); - free(result); - return rc; -} - -/* ── OpenCode MCP config (JSON with "mcp" key) ───────────────── */ - -int cbm_upsert_opencode_mcp(const char *binary_path, const char *config_path) { - if (!binary_path || !config_path) { - return -1; - } - - yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); - if (!mdoc) { - return -1; - } - - yyjson_doc *doc = read_json_file(config_path); - yyjson_mut_val *root; - if (doc) { - root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); - yyjson_doc_free(doc); - } else { - root = yyjson_mut_obj(mdoc); - } - if (!root) { - yyjson_mut_doc_free(mdoc); - return -1; - } - yyjson_mut_doc_set_root(mdoc, root); - - /* Get or create "mcp" object */ - yyjson_mut_val *mcp = yyjson_mut_obj_get(root, "mcp"); - if (!mcp || !yyjson_mut_is_obj(mcp)) { - mcp = yyjson_mut_obj(mdoc); - yyjson_mut_obj_add_val(mdoc, root, "mcp", mcp); - } - - yyjson_mut_obj_remove_key(mcp, "codebase-memory-mcp"); - - yyjson_mut_val *entry = yyjson_mut_obj(mdoc); - yyjson_mut_obj_add_str(mdoc, entry, "command", binary_path); - yyjson_mut_obj_add_val(mdoc, mcp, "codebase-memory-mcp", entry); - - int rc = write_json_file(config_path, mdoc); - yyjson_mut_doc_free(mdoc); - return rc; -} - -int cbm_remove_opencode_mcp(const char *config_path) { - if (!config_path) { - return -1; - } - - yyjson_doc *doc = read_json_file(config_path); - if (!doc) { - return -1; - } - - yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); - yyjson_doc_free(doc); - if (!root) { - yyjson_mut_doc_free(mdoc); - return -1; - } - yyjson_mut_doc_set_root(mdoc, root); - - yyjson_mut_val *mcp = yyjson_mut_obj_get(root, "mcp"); - if (!mcp || !yyjson_mut_is_obj(mcp)) { - yyjson_mut_doc_free(mdoc); - return 0; - } - - yyjson_mut_obj_remove_key(mcp, "codebase-memory-mcp"); - - int rc = write_json_file(config_path, mdoc); - yyjson_mut_doc_free(mdoc); - return rc; -} - -/* ── Antigravity MCP config (JSON, same mcpServers format) ────── */ - -int cbm_upsert_antigravity_mcp(const char *binary_path, const char *config_path) { - /* Antigravity uses same mcpServers format as Cursor/Gemini */ - return cbm_install_editor_mcp(binary_path, config_path); -} - -int cbm_remove_antigravity_mcp(const char *config_path) { - return cbm_remove_editor_mcp(config_path); -} - -/* ── Claude Code pre-tool hooks ───────────────────────────────── */ - -#define CMM_HOOK_MATCHER "Grep|Glob|Read|Search" -#define CMM_HOOK_COMMAND \ - "echo 'Reminder: prefer codebase-memory-mcp search_graph/trace_call_path/" \ - "get_code_snippet over Grep/Glob/Read/Search for code discovery. " \ - "Use get_code_snippet(qualified_name) instead of Read to view a function. " \ - "Fall back only if MCP returns insufficient results.' >&2" - -/* Old matcher values from previous versions — recognized during upgrade so - * upsert_hooks_json can remove them before inserting the current matcher. */ -static const char *cmm_old_matchers[] = { - "Grep|Glob|Read", - NULL, -}; - -/* Check if a PreToolUse array entry matches our hook (current or old matcher). */ -static bool is_cmm_hook_entry(yyjson_mut_val *entry, const char *matcher_str) { - yyjson_mut_val *matcher = yyjson_mut_obj_get(entry, "matcher"); - if (!matcher || !yyjson_mut_is_str(matcher)) { - return false; - } - const char *val = yyjson_mut_get_str(matcher); - if (!val) { - return false; - } - if (strcmp(val, matcher_str) == 0) { - return true; - } - /* Also match old versions for backwards-compatible upgrade */ - for (int i = 0; cmm_old_matchers[i]; i++) { - if (strcmp(val, cmm_old_matchers[i]) == 0) { - return true; - } - } - return false; -} - -/* Generic hook upsert for both Claude Code and Gemini CLI */ -static int upsert_hooks_json(const char *settings_path, const char *hook_event, - const char *matcher_str, const char *command_str) { - if (!settings_path) { - return -1; - } - - yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); - if (!mdoc) { - return -1; - } - - yyjson_doc *doc = read_json_file(settings_path); - yyjson_mut_val *root; - if (doc) { - root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); - yyjson_doc_free(doc); - } else { - root = yyjson_mut_obj(mdoc); - } - if (!root) { - yyjson_mut_doc_free(mdoc); - return -1; - } - yyjson_mut_doc_set_root(mdoc, root); - - /* Get or create hooks object */ - yyjson_mut_val *hooks = yyjson_mut_obj_get(root, "hooks"); - if (!hooks || !yyjson_mut_is_obj(hooks)) { - hooks = yyjson_mut_obj(mdoc); - yyjson_mut_obj_add_val(mdoc, root, "hooks", hooks); - } - - /* Get or create the hook event array (e.g. PreToolUse / BeforeTool) */ - yyjson_mut_val *event_arr = yyjson_mut_obj_get(hooks, hook_event); - if (!event_arr || !yyjson_mut_is_arr(event_arr)) { - event_arr = yyjson_mut_arr(mdoc); - yyjson_mut_obj_add_val(mdoc, hooks, hook_event, event_arr); - } - - /* Remove existing CMM entry if present */ - size_t idx; - size_t max; - yyjson_mut_val *item; - yyjson_mut_arr_foreach(event_arr, idx, max, item) { - if (is_cmm_hook_entry(item, matcher_str)) { - yyjson_mut_arr_remove(event_arr, idx); - break; - } - } - - /* Build our hook entry */ - yyjson_mut_val *entry = yyjson_mut_obj(mdoc); - yyjson_mut_obj_add_str(mdoc, entry, "matcher", matcher_str); - - yyjson_mut_val *hooks_arr = yyjson_mut_arr(mdoc); - yyjson_mut_val *hook_obj = yyjson_mut_obj(mdoc); - yyjson_mut_obj_add_str(mdoc, hook_obj, "type", "command"); - yyjson_mut_obj_add_str(mdoc, hook_obj, "command", command_str); - yyjson_mut_arr_append(hooks_arr, hook_obj); - yyjson_mut_obj_add_val(mdoc, entry, "hooks", hooks_arr); - - yyjson_mut_arr_append(event_arr, entry); - - int rc = write_json_file(settings_path, mdoc); - yyjson_mut_doc_free(mdoc); - return rc; -} - -/* Generic hook remove for both Claude Code and Gemini CLI */ -static int remove_hooks_json(const char *settings_path, const char *hook_event, - const char *matcher_str) { - if (!settings_path) { - return -1; - } - - yyjson_doc *doc = read_json_file(settings_path); - if (!doc) { - return -1; - } - - yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); - yyjson_doc_free(doc); - if (!root) { - yyjson_mut_doc_free(mdoc); - return -1; - } - yyjson_mut_doc_set_root(mdoc, root); - - yyjson_mut_val *hooks = yyjson_mut_obj_get(root, "hooks"); - if (!hooks) { - yyjson_mut_doc_free(mdoc); - return 0; - } - - yyjson_mut_val *event_arr = yyjson_mut_obj_get(hooks, hook_event); - if (!event_arr || !yyjson_mut_is_arr(event_arr)) { - yyjson_mut_doc_free(mdoc); - return 0; - } - - size_t idx; - size_t max; - yyjson_mut_val *item; - yyjson_mut_arr_foreach(event_arr, idx, max, item) { - if (is_cmm_hook_entry(item, matcher_str)) { - yyjson_mut_arr_remove(event_arr, idx); - break; - } - } - - int rc = write_json_file(settings_path, mdoc); - yyjson_mut_doc_free(mdoc); - return rc; -} - -int cbm_upsert_claude_hooks(const char *settings_path) { - return upsert_hooks_json(settings_path, "PreToolUse", CMM_HOOK_MATCHER, CMM_HOOK_COMMAND); -} - -int cbm_remove_claude_hooks(const char *settings_path) { - return remove_hooks_json(settings_path, "PreToolUse", CMM_HOOK_MATCHER); -} - -#define GEMINI_HOOK_MATCHER "google_search|read_file|grep_search" -#define GEMINI_HOOK_COMMAND \ - "echo 'Reminder: prefer codebase-memory-mcp search_graph/trace_call_path/" \ - "get_code_snippet over grep/file search for code discovery.' >&2" - -int cbm_upsert_gemini_hooks(const char *settings_path) { - return upsert_hooks_json(settings_path, "BeforeTool", GEMINI_HOOK_MATCHER, GEMINI_HOOK_COMMAND); -} - -int cbm_remove_gemini_hooks(const char *settings_path) { - return remove_hooks_json(settings_path, "BeforeTool", GEMINI_HOOK_MATCHER); -} - -/* ── PATH management ──────────────────────────────────────────── */ - -int cbm_ensure_path(const char *bin_dir, const char *rc_file, bool dry_run) { - if (!bin_dir || !rc_file) { - return -1; - } - - char line[1024]; - snprintf(line, sizeof(line), "export PATH=\"%s:$PATH\"", bin_dir); - - /* Check if already present in rc file */ - FILE *f = fopen(rc_file, "r"); - if (f) { - char buf[2048]; - while (fgets(buf, sizeof(buf), f)) { - if (strstr(buf, line)) { - (void)fclose(f); - return 1; /* already present */ - } - } - (void)fclose(f); - } - - if (dry_run) { - return 0; - } - - f = fopen(rc_file, "a"); - if (!f) { - return -1; - } - - // NOLINTNEXTLINE(clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling) - (void)fprintf(f, "\n# Added by codebase-memory-mcp install\n%s\n", line); - (void)fclose(f); - return 0; -} - -/* ── Tar.gz extraction ────────────────────────────────────────── */ - -unsigned char *cbm_extract_binary_from_targz(const unsigned char *data, int data_len, - int *out_len) { - if (!data || data_len <= 0 || !out_len) { - return NULL; - } - - /* Decompress gzip */ - z_stream strm = {0}; - // NOLINTNEXTLINE(performance-no-int-to-ptr) - strm.next_in = (unsigned char *)(uintptr_t)data; - strm.avail_in = (unsigned int)data_len; - - // NOLINTNEXTLINE(misc-include-cleaner) — MAX_WBITS provided by standard header - if (inflateInit2(&strm, 16 + MAX_WBITS) != Z_OK) { - return NULL; - } - - /* Allocate decompression buffer (up to 500MB) */ - size_t buf_cap = (size_t)data_len * 10; - if (buf_cap < 4096) { - buf_cap = 4096; - } - if (buf_cap > DECOMPRESS_MAX_BYTES) { - buf_cap = DECOMPRESS_MAX_BYTES; - } - unsigned char *decompressed = malloc(buf_cap); - if (!decompressed) { - inflateEnd(&strm); - return NULL; - } - - size_t total = 0; - int ret; - do { - if (total >= buf_cap) { - size_t new_cap = buf_cap * 2; - if (new_cap > DECOMPRESS_MAX_BYTES) { - free(decompressed); - inflateEnd(&strm); - return NULL; - } - unsigned char *nb = realloc(decompressed, new_cap); - if (!nb) { - free(decompressed); - inflateEnd(&strm); - return NULL; - } - decompressed = nb; - buf_cap = new_cap; - } - strm.next_out = decompressed + total; - strm.avail_out = (unsigned int)(buf_cap - total); - ret = inflate(&strm, Z_NO_FLUSH); - total = buf_cap - strm.avail_out; - } while (ret == Z_OK); - - inflateEnd(&strm); - - if (ret != Z_STREAM_END) { - free(decompressed); - return NULL; - } - - /* Parse tar: find entry starting with "codebase-memory-mcp" */ - size_t pos = 0; - while (pos + 512 <= total) { - /* Tar header is 512 bytes */ - const unsigned char *hdr = decompressed + pos; - - /* Check for end-of-archive (two zero blocks) */ - bool all_zero = true; - for (int i = 0; i < 512 && all_zero; i++) { - if (hdr[i] != 0) { - all_zero = false; - } - } - if (all_zero) { - break; - } - - /* Extract filename (bytes 0-99) */ - char name[TAR_NAME_LEN] = {0}; - memcpy(name, hdr, TAR_NAME_LEN - 1); - - /* Extract size from octal field */ - char size_str[TAR_SIZE_LEN] = {0}; - memcpy(size_str, hdr + TAR_SIZE_OFFSET, TAR_SIZE_LEN - 1); - long file_size = strtol(size_str, NULL, 8); - - /* Extract type flag (byte 156) */ - char typeflag = (char)hdr[TAR_TYPE_OFFSET]; - - pos += 512; /* skip header */ - - /* Check if this is a regular file with our binary name */ - if (typeflag == '0' || typeflag == '\0') { - /* Get basename */ - const char *basename = strrchr(name, '/'); - basename = basename ? basename + 1 : name; - - if (strncmp(basename, TAR_BINARY_NAME, TAR_BINARY_NAME_LEN) == 0) { - if (pos + (size_t)file_size <= total) { - unsigned char *result = malloc((size_t)file_size); - if (result) { - memcpy(result, decompressed + pos, (size_t)file_size); - *out_len = (int)file_size; - free(decompressed); - return result; - } - } - } - } - - /* Skip to next 512-byte boundary */ - size_t blocks = ((size_t)file_size + TAR_BLOCK_MASK) / TAR_BLOCK_SIZE; - pos += blocks * 512; - } - - free(decompressed); - return NULL; /* binary not found */ -} - -/* ── Index management ─────────────────────────────────────────── */ - -static const char *get_cache_dir(const char *home_dir) { - static char buf[1024]; - if (!home_dir) { - home_dir = getenv("HOME"); - } - if (!home_dir) { - return NULL; - } - snprintf(buf, sizeof(buf), "%s/.cache/codebase-memory-mcp", home_dir); - return buf; -} - -int cbm_list_indexes(const char *home_dir) { - const char *cache_dir = get_cache_dir(home_dir); - if (!cache_dir) { - return 0; - } - - cbm_dir_t *d = cbm_opendir(cache_dir); - if (!d) { - return 0; - } - - int count = 0; - cbm_dirent_t *ent; - while ((ent = cbm_readdir(d)) != NULL) { - size_t len = strlen(ent->name); - if (len > 3 && strcmp(ent->name + len - 3, ".db") == 0) { - printf(" %s/%s\n", cache_dir, ent->name); - count++; - } - } - cbm_closedir(d); - return count; -} - -int cbm_remove_indexes(const char *home_dir) { - const char *cache_dir = get_cache_dir(home_dir); - if (!cache_dir) { - return 0; - } - - cbm_dir_t *d = cbm_opendir(cache_dir); - if (!d) { - return 0; - } - - int count = 0; - cbm_dirent_t *ent; - while ((ent = cbm_readdir(d)) != NULL) { - size_t len = strlen(ent->name); - if (len > 3 && strcmp(ent->name + len - 3, ".db") == 0) { - char path[1024]; - snprintf(path, sizeof(path), "%s/%s", cache_dir, ent->name); - /* Also remove .db.tmp if present */ - char tmp_path[1040]; - snprintf(tmp_path, sizeof(tmp_path), "%s.tmp", path); - cbm_unlink(tmp_path); - if (cbm_unlink(path) == 0) { - count++; - } - } - } - cbm_closedir(d); - return count; -} - -/* ── Config store (persistent key-value in _config.db) ─────────── */ - -#include - -struct cbm_config { - sqlite3 *db; - char get_buf[4096]; /* static buffer for cbm_config_get return values */ -}; - -cbm_config_t *cbm_config_open(const char *cache_dir) { - if (!cache_dir) { - return NULL; - } - - char dbpath[1024]; - snprintf(dbpath, sizeof(dbpath), "%s/_config.db", cache_dir); - - /* Ensure directory exists */ - mkdirp(cache_dir, DIR_PERMS); - - sqlite3 *db = NULL; - if (sqlite3_open(dbpath, &db) != SQLITE_OK) { - if (db) { - sqlite3_close(db); - } - return NULL; - } - - /* Create table if not exists */ - const char *sql = "CREATE TABLE IF NOT EXISTS config (key TEXT PRIMARY KEY, value TEXT)"; - char *err_msg = NULL; - if (sqlite3_exec(db, sql, NULL, NULL, &err_msg) != SQLITE_OK) { - sqlite3_free(err_msg); - sqlite3_close(db); - return NULL; - } - - cbm_config_t *cfg = calloc(1, sizeof(*cfg)); - if (!cfg) { - sqlite3_close(db); - return NULL; - } - cfg->db = db; - return cfg; -} - -void cbm_config_close(cbm_config_t *cfg) { - if (!cfg) { - return; - } - if (cfg->db) { - sqlite3_close(cfg->db); - } - free(cfg); -} - -const char *cbm_config_get(cbm_config_t *cfg, const char *key, const char *default_val) { - if (!cfg || !key) { - return default_val; - } - - sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(cfg->db, "SELECT value FROM config WHERE key = ?", -1, &stmt, NULL) != - SQLITE_OK) { - return default_val; - } - sqlite3_bind_text(stmt, 1, key, -1, SQLITE_TRANSIENT); - - const char *result = default_val; - if (sqlite3_step(stmt) == SQLITE_ROW) { - const char *val = (const char *)sqlite3_column_text(stmt, 0); - if (val) { - snprintf(cfg->get_buf, sizeof(cfg->get_buf), "%s", val); - result = cfg->get_buf; - } - } - sqlite3_finalize(stmt); - return result; -} - -bool cbm_config_get_bool(cbm_config_t *cfg, const char *key, bool default_val) { - const char *val = cbm_config_get(cfg, key, NULL); - if (!val) { - return default_val; - } - if (strcmp(val, "true") == 0 || strcmp(val, "1") == 0 || strcmp(val, "on") == 0) { - return true; - } - if (strcmp(val, "false") == 0 || strcmp(val, "0") == 0 || strcmp(val, "off") == 0) { - return false; - } - return default_val; -} - -int cbm_config_get_int(cbm_config_t *cfg, const char *key, int default_val) { - const char *val = cbm_config_get(cfg, key, NULL); - if (!val) { - return default_val; - } - char *endptr; - long v = strtol(val, &endptr, 10); - if (endptr == val || *endptr != '\0') { - return default_val; - } - return (int)v; -} - -int cbm_config_set(cbm_config_t *cfg, const char *key, const char *value) { - if (!cfg || !key || !value) { - return -1; - } - - sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(cfg->db, "INSERT OR REPLACE INTO config (key, value) VALUES (?, ?)", -1, - &stmt, NULL) != SQLITE_OK) { - return -1; - } - sqlite3_bind_text(stmt, 1, key, -1, SQLITE_TRANSIENT); - sqlite3_bind_text(stmt, 2, value, -1, SQLITE_TRANSIENT); - - int rc = sqlite3_step(stmt) == SQLITE_DONE ? 0 : -1; - sqlite3_finalize(stmt); - return rc; -} - -int cbm_config_delete(cbm_config_t *cfg, const char *key) { - if (!cfg || !key) { - return -1; - } - - sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(cfg->db, "DELETE FROM config WHERE key = ?", -1, &stmt, NULL) != - SQLITE_OK) { - return -1; - } - sqlite3_bind_text(stmt, 1, key, -1, SQLITE_TRANSIENT); - - int rc = sqlite3_step(stmt) == SQLITE_DONE ? 0 : -1; - sqlite3_finalize(stmt); - return rc; -} - -/* ── Config CLI subcommand ────────────────────────────────────── */ - -int cbm_cmd_config(int argc, char **argv) { - if (argc == 0) { - printf("Usage: codebase-memory-mcp config [args]\n\n"); - printf("Commands:\n"); - printf(" list Show all config values\n"); - printf(" get Get a config value\n"); - printf(" set Set a config value\n"); - printf(" reset Reset a key to default\n\n"); - printf("Config keys:\n"); - printf(" %-25s default=%-10s %s\n", CBM_CONFIG_AUTO_INDEX, "false", - "Enable auto-indexing on MCP session start"); - printf(" %-25s default=%-10s %s\n", CBM_CONFIG_AUTO_INDEX_LIMIT, "50000", - "Max files for auto-indexing new projects"); - return 0; - } - - // NOLINTNEXTLINE(concurrency-mt-unsafe) - const char *home = getenv("HOME"); - if (!home) { - fprintf(stderr, "error: HOME not set\n"); - return 1; - } - - char cache_dir[1024]; - snprintf(cache_dir, sizeof(cache_dir), "%s/.cache/codebase-memory-mcp", home); - - cbm_config_t *cfg = cbm_config_open(cache_dir); - if (!cfg) { - fprintf(stderr, "error: cannot open config database\n"); - return 1; - } - - int rc = 0; - if (strcmp(argv[0], "list") == 0 || strcmp(argv[0], "ls") == 0) { - printf("Configuration:\n"); - printf(" %-25s = %-10s\n", CBM_CONFIG_AUTO_INDEX, - cbm_config_get(cfg, CBM_CONFIG_AUTO_INDEX, "false")); - printf(" %-25s = %-10s\n", CBM_CONFIG_AUTO_INDEX_LIMIT, - cbm_config_get(cfg, CBM_CONFIG_AUTO_INDEX_LIMIT, "50000")); - } else if (strcmp(argv[0], "get") == 0) { - if (argc < 2) { - fprintf(stderr, "Usage: config get \n"); - rc = 1; - } else { - printf("%s\n", cbm_config_get(cfg, argv[1], "")); - } - } else if (strcmp(argv[0], "set") == 0) { - if (argc < 3) { - fprintf(stderr, "Usage: config set \n"); - rc = 1; - } else { - if (cbm_config_set(cfg, argv[1], argv[2]) == 0) { - printf("%s = %s\n", argv[1], argv[2]); - } else { - fprintf(stderr, "error: failed to set %s\n", argv[1]); - rc = 1; - } - } - } else if (strcmp(argv[0], "reset") == 0) { - if (argc < 2) { - fprintf(stderr, "Usage: config reset \n"); - rc = 1; - } else { - cbm_config_delete(cfg, argv[1]); - printf("%s reset to default\n", argv[1]); - } - } else { - fprintf(stderr, "Unknown config command: %s\n", argv[0]); - rc = 1; - } - - cbm_config_close(cfg); - return rc; -} - -/* ── Interactive prompt ───────────────────────────────────────── */ - -/* Global auto-answer mode: 0=interactive, 1=always yes, -1=always no */ -static int g_auto_answer = 0; - -static void parse_auto_answer(int argc, char **argv) { - for (int i = 0; i < argc; i++) { - if (strcmp(argv[i], "-y") == 0 || strcmp(argv[i], "--yes") == 0) { - g_auto_answer = 1; - } - if (strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "--no") == 0) { - g_auto_answer = -1; - } - } -} - -static bool prompt_yn(const char *question) { - if (g_auto_answer == 1) { - printf("%s (y/n): y (auto)\n", question); - return true; - } - if (g_auto_answer == -1) { - printf("%s (y/n): n (auto)\n", question); - return false; - } - - printf("%s (y/n): ", question); - (void)fflush(stdout); - - char buf[16]; - if (!fgets(buf, sizeof(buf), stdin)) { - return false; - } - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - return (buf[0] == 'y' || buf[0] == 'Y') ? true : false; -} - -/* ── SHA-256 checksum verification ─────────────────────────────── */ - -/* SHA-256 hex digest: 64 hex chars + NUL */ -#define SHA256_HEX_LEN 64 -#define SHA256_BUF_SIZE (SHA256_HEX_LEN + 1) -/* Minimum line length in checksums.txt: 64 hex + 2 spaces + 1 char filename */ -#define CHECKSUM_LINE_MIN (SHA256_HEX_LEN + 2) - -/* Compute SHA-256 of a file using platform tools (sha256sum/shasum). - * Writes 64-char hex digest + NUL to out. Returns 0 on success. */ -static int sha256_file(const char *path, char *out, size_t out_size) { - if (out_size < SHA256_BUF_SIZE) { - return -1; - } - char cmd[1024]; -#ifdef __APPLE__ - snprintf(cmd, sizeof(cmd), "shasum -a 256 '%s' 2>/dev/null", path); -#else - snprintf(cmd, sizeof(cmd), "sha256sum '%s' 2>/dev/null", path); -#endif - // NOLINTNEXTLINE(bugprone-command-processor,cert-env33-c) - FILE *fp = cbm_popen(cmd, "r"); - if (!fp) { - return -1; - } - char line[256]; - if (fgets(line, sizeof(line), fp)) { - /* Output format: <64-char hash> */ - char *space = strchr(line, ' '); - if (space && space - line == SHA256_HEX_LEN) { - memcpy(out, line, SHA256_HEX_LEN); - out[SHA256_HEX_LEN] = '\0'; - cbm_pclose(fp); - return 0; - } - } - cbm_pclose(fp); - return -1; -} - -/* Download checksums.txt and verify the archive integrity. - * Returns: 0 = verified OK, 1 = mismatch (FAIL), -1 = could not verify (warning). */ -static int verify_download_checksum(const char *archive_path, const char *archive_name) { - char checksum_file[256]; - snprintf(checksum_file, sizeof(checksum_file), "%s/cbm-checksums.txt", cbm_tmpdir()); - - char cmd[1024]; - snprintf(cmd, sizeof(cmd), - "curl -fsSL -o '%s' " - "'https://github.com/DeusData/codebase-memory-mcp/releases/latest/download/" - "checksums.txt' 2>/dev/null", - checksum_file); - // NOLINTNEXTLINE(cert-env33-c) — intentional CLI subprocess for download - int rc = system(cmd); - if (rc != 0) { - fprintf(stderr, "warning: could not download checksums.txt — skipping verification\n"); - cbm_unlink(checksum_file); - return -1; - } - - FILE *fp = fopen(checksum_file, "r"); - cbm_unlink(checksum_file); - if (!fp) { - return -1; - } - - char expected[SHA256_BUF_SIZE] = {0}; - char line[512]; - while (fgets(line, sizeof(line), fp)) { - /* Format: <64-char sha256> \n */ - if (strlen(line) > CHECKSUM_LINE_MIN && strstr(line, archive_name)) { - memcpy(expected, line, SHA256_HEX_LEN); - expected[SHA256_HEX_LEN] = '\0'; - break; - } - } - fclose(fp); - - if (expected[0] == '\0') { - fprintf(stderr, "warning: %s not found in checksums.txt\n", archive_name); - return -1; - } - - char actual[SHA256_BUF_SIZE] = {0}; - if (sha256_file(archive_path, actual, sizeof(actual)) != 0) { - fprintf(stderr, "warning: sha256sum/shasum not available — skipping verification\n"); - return -1; - } - - if (strcmp(expected, actual) != 0) { - fprintf(stderr, "error: CHECKSUM MISMATCH — downloaded binary may be compromised!\n"); - fprintf(stderr, " expected: %s\n", expected); - fprintf(stderr, " actual: %s\n", actual); - return 1; - } - - printf("Checksum verified: %s\n", actual); - return 0; -} - -/* ── Detect OS/arch for download URL ──────────────────────────── */ - -static const char *detect_os(void) { -#ifdef _WIN32 - return "windows"; -#elif defined(__APPLE__) - return "darwin"; -#else - return "linux"; -#endif -} - -static const char *detect_arch(void) { -#if defined(__aarch64__) || defined(_M_ARM64) - return "arm64"; -#else - return "amd64"; -#endif -} - -/* ── Subcommand: install ──────────────────────────────────────── */ - -int cbm_cmd_install(int argc, char **argv) { - parse_auto_answer(argc, argv); - bool dry_run = false; - bool force = false; - for (int i = 0; i < argc; i++) { - if (strcmp(argv[i], "--dry-run") == 0) { - dry_run = true; - } - if (strcmp(argv[i], "--force") == 0) { - force = true; - } - } - - const char *home = getenv("HOME"); - if (!home) { - fprintf(stderr, "error: HOME not set\n"); - return 1; - } - - printf("codebase-memory-mcp install %s\n\n", CBM_VERSION); - - /* Step 1: Check for existing indexes */ - int index_count = 0; - const char *cache_dir = get_cache_dir(home); - if (cache_dir) { - cbm_dir_t *d = cbm_opendir(cache_dir); - if (d) { - cbm_dirent_t *ent; - while ((ent = cbm_readdir(d)) != NULL) { - size_t len = strlen(ent->name); - if (len > 3 && strcmp(ent->name + len - 3, ".db") == 0) { - index_count++; - } - } - cbm_closedir(d); - } - } - - if (index_count > 0) { - printf("Found %d existing index(es) that must be rebuilt:\n", index_count); - cbm_list_indexes(home); - printf("\n"); - if (!prompt_yn("Delete these indexes and continue with install?")) { - printf("Install cancelled.\n"); - return 1; - } - if (!dry_run) { - int removed = cbm_remove_indexes(home); - printf("Removed %d index(es).\n\n", removed); - } - } - - /* Step 2: Binary path */ - char self_path[1024]; - snprintf(self_path, sizeof(self_path), "%s/.local/bin/codebase-memory-mcp", home); - - /* Step 3: Detect agents */ - cbm_detected_agents_t agents = cbm_detect_agents(home); - printf("Detected agents:"); - if (agents.claude_code) { - printf(" Claude-Code"); - } - if (agents.codex) { - printf(" Codex"); - } - if (agents.gemini) { - printf(" Gemini-CLI"); - } - if (agents.zed) { - printf(" Zed"); - } - if (agents.opencode) { - printf(" OpenCode"); - } - if (agents.antigravity) { - printf(" Antigravity"); - } - if (agents.aider) { - printf(" Aider"); - } - if (agents.kilocode) { - printf(" KiloCode"); - } - if (agents.vscode) { - printf(" VS-Code"); - } - if (agents.openclaw) { - printf(" OpenClaw"); - } - if (!agents.claude_code && !agents.codex && !agents.gemini && !agents.zed && !agents.opencode && - !agents.antigravity && !agents.aider && !agents.kilocode && !agents.vscode && - !agents.openclaw) { - printf(" (none)"); - } - printf("\n\n"); - - /* Step 4: Install Claude Code skills + hooks */ - if (agents.claude_code) { - char skills_dir[1024]; - snprintf(skills_dir, sizeof(skills_dir), "%s/.claude/skills", home); - printf("Claude Code:\n"); - - int skill_count = cbm_install_skills(skills_dir, force, dry_run); - printf(" skills: %d installed\n", skill_count); - - if (cbm_remove_old_monolithic_skill(skills_dir, dry_run)) { - printf(" removed old monolithic skill\n"); - } - - /* MCP config — write to both locations for compatibility. - * Claude Code <=2.1.x reads ~/.claude/.mcp.json - * Claude Code >=2.1.80 reads ~/.claude.json */ - char mcp_path[1024]; - snprintf(mcp_path, sizeof(mcp_path), "%s/.claude/.mcp.json", home); - if (!dry_run) { - cbm_install_editor_mcp(self_path, mcp_path); - } - printf(" mcp: %s\n", mcp_path); - - char mcp_path2[1024]; - snprintf(mcp_path2, sizeof(mcp_path2), "%s/.claude.json", home); - if (!dry_run) { - cbm_install_editor_mcp(self_path, mcp_path2); - } - printf(" mcp: %s\n", mcp_path2); - - /* PreToolUse hook */ - char settings_path[1024]; - snprintf(settings_path, sizeof(settings_path), "%s/.claude/settings.json", home); - if (!dry_run) { - cbm_upsert_claude_hooks(settings_path); - } - printf(" hooks: PreToolUse (Grep|Glob reminder)\n"); - } - - /* Step 5: Install Codex CLI */ - if (agents.codex) { - printf("Codex CLI:\n"); - char config_path[1024]; - snprintf(config_path, sizeof(config_path), "%s/.codex/config.toml", home); - if (!dry_run) { - cbm_upsert_codex_mcp(self_path, config_path); - } - printf(" mcp: %s\n", config_path); - - char instr_path[1024]; - snprintf(instr_path, sizeof(instr_path), "%s/.codex/AGENTS.md", home); - if (!dry_run) { - cbm_upsert_instructions(instr_path, agent_instructions_content); - } - printf(" instructions: %s\n", instr_path); - } - - /* Step 6: Install Gemini CLI */ - if (agents.gemini) { - printf("Gemini CLI:\n"); - char config_path[1024]; - snprintf(config_path, sizeof(config_path), "%s/.gemini/settings.json", home); - if (!dry_run) { - cbm_install_editor_mcp(self_path, config_path); - } - printf(" mcp: %s\n", config_path); - - char instr_path[1024]; - snprintf(instr_path, sizeof(instr_path), "%s/.gemini/GEMINI.md", home); - if (!dry_run) { - cbm_upsert_instructions(instr_path, agent_instructions_content); - } - printf(" instructions: %s\n", instr_path); - - /* BeforeTool hook (shared with Antigravity) */ - if (!dry_run) { - cbm_upsert_gemini_hooks(config_path); - } - printf(" hooks: BeforeTool (grep/file search reminder)\n"); - } - - /* Step 7: Install Zed */ - if (agents.zed) { - printf("Zed:\n"); - char config_path[1024]; -#ifdef __APPLE__ - snprintf(config_path, sizeof(config_path), - "%s/Library/Application Support/Zed/settings.json", home); -#else - snprintf(config_path, sizeof(config_path), "%s/.config/zed/settings.json", home); -#endif - if (!dry_run) { - cbm_install_zed_mcp(self_path, config_path); - } - printf(" mcp: %s\n", config_path); - } - - /* Step 8: Install OpenCode */ - if (agents.opencode) { - printf("OpenCode:\n"); - char config_path[1024]; - snprintf(config_path, sizeof(config_path), "%s/.config/opencode/opencode.json", home); - if (!dry_run) { - cbm_upsert_opencode_mcp(self_path, config_path); - } - printf(" mcp: %s\n", config_path); - - char instr_path[1024]; - snprintf(instr_path, sizeof(instr_path), "%s/.config/opencode/AGENTS.md", home); - if (!dry_run) { - cbm_upsert_instructions(instr_path, agent_instructions_content); - } - printf(" instructions: %s\n", instr_path); - } - - /* Step 9: Install Antigravity */ - if (agents.antigravity) { - printf("Antigravity:\n"); - char config_path[1024]; - snprintf(config_path, sizeof(config_path), "%s/.gemini/antigravity/mcp_config.json", home); - if (!dry_run) { - cbm_upsert_antigravity_mcp(self_path, config_path); - } - printf(" mcp: %s\n", config_path); - - char instr_path[1024]; - snprintf(instr_path, sizeof(instr_path), "%s/.gemini/antigravity/AGENTS.md", home); - if (!dry_run) { - cbm_upsert_instructions(instr_path, agent_instructions_content); - } - printf(" instructions: %s\n", instr_path); - } - - /* Step 10: Install Aider */ - if (agents.aider) { - printf("Aider:\n"); - char instr_path[1024]; - snprintf(instr_path, sizeof(instr_path), "%s/CONVENTIONS.md", home); - if (!dry_run) { - cbm_upsert_instructions(instr_path, agent_instructions_content); - } - printf(" instructions: %s\n", instr_path); - } - - /* Step 11: Install KiloCode */ - if (agents.kilocode) { - printf("KiloCode:\n"); - char config_path[1024]; - snprintf(config_path, sizeof(config_path), - "%s/.config/Code/User/globalStorage/kilocode.kilo-code/settings/mcp_settings.json", - home); - if (!dry_run) { - cbm_install_editor_mcp(self_path, config_path); - } - printf(" mcp: %s\n", config_path); - - /* KiloCode uses ~/.kilocode/rules/ for global instructions */ - char instr_path[1024]; - snprintf(instr_path, sizeof(instr_path), "%s/.kilocode/rules/codebase-memory-mcp.md", home); - if (!dry_run) { - cbm_upsert_instructions(instr_path, agent_instructions_content); - } - printf(" instructions: %s\n", instr_path); - } - - /* Step 12: Install VS Code */ - if (agents.vscode) { - printf("VS Code:\n"); - char config_path[1024]; -#ifdef __APPLE__ - snprintf(config_path, sizeof(config_path), - "%s/Library/Application Support/Code/User/mcp.json", home); -#else - snprintf(config_path, sizeof(config_path), "%s/.config/Code/User/mcp.json", home); -#endif - if (!dry_run) { - cbm_install_vscode_mcp(self_path, config_path); - } - printf(" mcp: %s\n", config_path); - } - - /* Step 13: Install OpenClaw */ - if (agents.openclaw) { - printf("OpenClaw:\n"); - char config_path[1024]; - snprintf(config_path, sizeof(config_path), "%s/.openclaw/openclaw.json", home); - if (!dry_run) { - cbm_install_editor_mcp(self_path, config_path); - } - printf(" mcp: %s\n", config_path); - } - - /* Step 14: Ensure PATH */ - char bin_dir[1024]; - snprintf(bin_dir, sizeof(bin_dir), "%s/.local/bin", home); - const char *rc = cbm_detect_shell_rc(home); - if (rc[0]) { - int path_rc = cbm_ensure_path(bin_dir, rc, dry_run); - if (path_rc == 0) { - printf("\nAdded %s to PATH in %s\n", bin_dir, rc); - } else if (path_rc == 1) { - printf("\nPATH already includes %s\n", bin_dir); - } - } - - printf("\nInstall complete. Restart your shell or run:\n"); - printf(" source %s\n", rc); - if (dry_run) { - printf("\n(dry-run — no files were modified)\n"); - } - return 0; -} - -/* ── Subcommand: uninstall ────────────────────────────────────── */ - -int cbm_cmd_uninstall(int argc, char **argv) { - parse_auto_answer(argc, argv); - bool dry_run = false; - for (int i = 0; i < argc; i++) { - if (strcmp(argv[i], "--dry-run") == 0) { - dry_run = true; - } - } - - const char *home = getenv("HOME"); - if (!home) { - fprintf(stderr, "error: HOME not set\n"); - return 1; - } - - printf("codebase-memory-mcp uninstall\n\n"); - - /* Step 1: Detect agents and remove per-agent configs */ - cbm_detected_agents_t agents = cbm_detect_agents(home); - - if (agents.claude_code) { - char skills_dir[1024]; - snprintf(skills_dir, sizeof(skills_dir), "%s/.claude/skills", home); - int removed = cbm_remove_skills(skills_dir, dry_run); - printf("Claude Code: removed %d skill(s)\n", removed); - - char mcp_path[1024]; - snprintf(mcp_path, sizeof(mcp_path), "%s/.claude/.mcp.json", home); - if (!dry_run) { - cbm_remove_editor_mcp(mcp_path); - } - printf(" removed MCP config entry\n"); - - /* Also remove from new location (Claude Code >=2.1.80) */ - char mcp_path2[1024]; - snprintf(mcp_path2, sizeof(mcp_path2), "%s/.claude.json", home); - if (!dry_run) { - cbm_remove_editor_mcp(mcp_path2); - } - - char settings_path[1024]; - snprintf(settings_path, sizeof(settings_path), "%s/.claude/settings.json", home); - if (!dry_run) { - cbm_remove_claude_hooks(settings_path); - } - printf(" removed PreToolUse hook\n"); - } - - if (agents.codex) { - char config_path[1024]; - snprintf(config_path, sizeof(config_path), "%s/.codex/config.toml", home); - if (!dry_run) { - cbm_remove_codex_mcp(config_path); - } - printf("Codex CLI: removed MCP config entry\n"); - - char instr_path[1024]; - snprintf(instr_path, sizeof(instr_path), "%s/.codex/AGENTS.md", home); - if (!dry_run) { - cbm_remove_instructions(instr_path); - } - printf(" removed instructions\n"); - } - - if (agents.gemini) { - char config_path[1024]; - snprintf(config_path, sizeof(config_path), "%s/.gemini/settings.json", home); - if (!dry_run) { - cbm_remove_editor_mcp(config_path); - } - printf("Gemini CLI: removed MCP config entry\n"); - - if (!dry_run) { - cbm_remove_gemini_hooks(config_path); - } - printf(" removed BeforeTool hook\n"); - - char instr_path[1024]; - snprintf(instr_path, sizeof(instr_path), "%s/.gemini/GEMINI.md", home); - if (!dry_run) { - cbm_remove_instructions(instr_path); - } - printf(" removed instructions\n"); - } - - if (agents.zed) { - char config_path[1024]; -#ifdef __APPLE__ - snprintf(config_path, sizeof(config_path), - "%s/Library/Application Support/Zed/settings.json", home); -#else - snprintf(config_path, sizeof(config_path), "%s/.config/zed/settings.json", home); -#endif - if (!dry_run) { - cbm_remove_zed_mcp(config_path); - } - printf("Zed: removed MCP config entry\n"); - } - - if (agents.opencode) { - char config_path[1024]; - snprintf(config_path, sizeof(config_path), "%s/.config/opencode/opencode.json", home); - if (!dry_run) { - cbm_remove_opencode_mcp(config_path); - } - printf("OpenCode: removed MCP config entry\n"); - - char instr_path[1024]; - snprintf(instr_path, sizeof(instr_path), "%s/.config/opencode/AGENTS.md", home); - if (!dry_run) { - cbm_remove_instructions(instr_path); - } - printf(" removed instructions\n"); - } - - if (agents.antigravity) { - char config_path[1024]; - snprintf(config_path, sizeof(config_path), "%s/.gemini/antigravity/mcp_config.json", home); - if (!dry_run) { - cbm_remove_antigravity_mcp(config_path); - } - printf("Antigravity: removed MCP config entry\n"); - - char instr_path[1024]; - snprintf(instr_path, sizeof(instr_path), "%s/.gemini/antigravity/AGENTS.md", home); - if (!dry_run) { - cbm_remove_instructions(instr_path); - } - printf(" removed instructions\n"); - } - - if (agents.aider) { - char instr_path[1024]; - snprintf(instr_path, sizeof(instr_path), "%s/CONVENTIONS.md", home); - if (!dry_run) { - cbm_remove_instructions(instr_path); - } - printf("Aider: removed instructions\n"); - } - - if (agents.kilocode) { - char config_path[1024]; - snprintf(config_path, sizeof(config_path), - "%s/.config/Code/User/globalStorage/kilocode.kilo-code/settings/mcp_settings.json", - home); - if (!dry_run) { - cbm_remove_editor_mcp(config_path); - } - printf("KiloCode: removed MCP config entry\n"); - - char instr_path[1024]; - snprintf(instr_path, sizeof(instr_path), "%s/.kilocode/rules/codebase-memory-mcp.md", home); - if (!dry_run) { - cbm_remove_instructions(instr_path); - } - printf(" removed instructions\n"); - } - - if (agents.vscode) { - char config_path[1024]; -#ifdef __APPLE__ - snprintf(config_path, sizeof(config_path), - "%s/Library/Application Support/Code/User/mcp.json", home); -#else - snprintf(config_path, sizeof(config_path), "%s/.config/Code/User/mcp.json", home); -#endif - if (!dry_run) { - cbm_remove_vscode_mcp(config_path); - } - printf("VS Code: removed MCP config entry\n"); - } - - if (agents.openclaw) { - char config_path[1024]; - snprintf(config_path, sizeof(config_path), "%s/.openclaw/openclaw.json", home); - if (!dry_run) { - cbm_remove_editor_mcp(config_path); - } - printf("OpenClaw: removed MCP config entry\n"); - } - - /* Step 2: Remove indexes */ - int index_count = 0; - const char *cache_dir = get_cache_dir(home); - if (cache_dir) { - cbm_dir_t *d = cbm_opendir(cache_dir); - if (d) { - cbm_dirent_t *ent; - while ((ent = cbm_readdir(d)) != NULL) { - size_t len = strlen(ent->name); - if (len > 3 && strcmp(ent->name + len - 3, ".db") == 0) { - index_count++; - } - } - cbm_closedir(d); - } - } - - if (index_count > 0) { - printf("\nFound %d index(es):\n", index_count); - cbm_list_indexes(home); - if (prompt_yn("Delete these indexes?")) { - int idx_removed = cbm_remove_indexes(home); - printf("Removed %d index(es).\n", idx_removed); - } else { - printf("Indexes kept.\n"); - } - } - - /* Step 3: Remove binary */ - char bin_path[1024]; - snprintf(bin_path, sizeof(bin_path), "%s/.local/bin/codebase-memory-mcp", home); - struct stat st; - if (stat(bin_path, &st) == 0) { - if (!dry_run) { - cbm_unlink(bin_path); - } - printf("Removed %s\n", bin_path); - } - - printf("\nUninstall complete.\n"); - if (dry_run) { - printf("(dry-run — no files were modified)\n"); - } - return 0; -} - -/* ── Subcommand: update ───────────────────────────────────────── */ - -int cbm_cmd_update(int argc, char **argv) { - parse_auto_answer(argc, argv); - - const char *home = getenv("HOME"); - if (!home) { - fprintf(stderr, "error: HOME not set\n"); - return 1; - } - - printf("codebase-memory-mcp update (current: %s)\n\n", CBM_VERSION); - - /* Step 1: Check for existing indexes */ - int index_count = 0; - const char *cache_dir = get_cache_dir(home); - if (cache_dir) { - cbm_dir_t *d = cbm_opendir(cache_dir); - if (d) { - cbm_dirent_t *ent; - while ((ent = cbm_readdir(d)) != NULL) { - size_t len = strlen(ent->name); - if (len > 3 && strcmp(ent->name + len - 3, ".db") == 0) { - index_count++; - } - } - cbm_closedir(d); - } - } - - if (index_count > 0) { - printf("Found %d existing index(es) that must be rebuilt after update:\n", index_count); - cbm_list_indexes(home); - printf("\n"); - if (!prompt_yn("Delete these indexes and continue with update?")) { - printf("Update cancelled.\n"); - return 1; - } - int removed = cbm_remove_indexes(home); - printf("Removed %d index(es).\n\n", removed); - } - - /* Step 2: Ask for UI variant */ - printf("Which binary variant do you want?\n"); - printf(" 1) standard — MCP server only\n"); - printf(" 2) ui — MCP server + embedded graph visualization\n"); - printf("Choose (1/2): "); - (void)fflush(stdout); - - char choice[16]; - if (!fgets(choice, sizeof(choice), stdin)) { - fprintf(stderr, "error: failed to read input\n"); - return 1; - } - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - bool want_ui = (choice[0] == '2') ? true : false; - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - const char *variant = want_ui ? "ui-" : ""; - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - const char *variant_label = want_ui ? "ui" : "standard"; - - /* Step 3: Build download URL */ - const char *os = detect_os(); - const char *arch = detect_arch(); - const char *ext = strcmp(os, "windows") == 0 ? "zip" : "tar.gz"; - - char url[512]; - if (want_ui) { - snprintf(url, sizeof(url), - "https://github.com/DeusData/codebase-memory-mcp/releases/latest/download/" - "codebase-memory-mcp-ui-%s-%s.%s", - os, arch, ext); - } else { - snprintf(url, sizeof(url), - "https://github.com/DeusData/codebase-memory-mcp/releases/latest/download/" - "codebase-memory-mcp-%s-%s.%s", - os, arch, ext); - } - - printf("\nDownloading %s binary for %s/%s ...\n", variant_label, os, arch); - printf(" %s\n", url); - - /* Step 4: Download using curl */ - char tmp_archive[256]; - snprintf(tmp_archive, sizeof(tmp_archive), "%s/cbm-update.%s", cbm_tmpdir(), ext); - - char cmd[1024]; - snprintf(cmd, sizeof(cmd), "curl -fSL --progress-bar -o '%s' '%s'", tmp_archive, url); - // NOLINTNEXTLINE(cert-env33-c) — intentional CLI subprocess for download - int rc = system(cmd); - if (rc != 0) { - fprintf(stderr, "error: download failed (exit %d)\n", rc); - cbm_unlink(tmp_archive); - return 1; - } - - /* Step 4b: Verify checksum */ - { - /* Build the expected archive filename (matches checksums.txt format) */ - char archive_name[256]; - if (want_ui) { - snprintf(archive_name, sizeof(archive_name), "codebase-memory-mcp-ui-%s-%s.%s", os, - arch, ext); - } else { - snprintf(archive_name, sizeof(archive_name), "codebase-memory-mcp-%s-%s.%s", os, arch, - ext); - } - int crc = verify_download_checksum(tmp_archive, archive_name); - if (crc == 1) { - /* Hard fail: checksum mismatch */ - cbm_unlink(tmp_archive); - return 1; - } - /* crc == -1: could not verify (warning only), crc == 0: verified OK */ - } - - /* Step 5: Extract binary */ - char bin_dest[1024]; - snprintf(bin_dest, sizeof(bin_dest), "%s/.local/bin/codebase-memory-mcp", home); - - /* Ensure install directory exists */ - char bin_dir[1024]; - snprintf(bin_dir, sizeof(bin_dir), "%s/.local/bin", home); - cbm_mkdir_p(bin_dir, 0755); - - if (strcmp(ext, "tar.gz") == 0) { - /* Read archive into memory and extract */ - FILE *f = fopen(tmp_archive, "rb"); - if (!f) { - fprintf(stderr, "error: cannot open %s\n", tmp_archive); - return 1; - } - fseek(f, 0, SEEK_END); - long fsize = ftell(f); - fseek(f, 0, SEEK_SET); - - unsigned char *data = malloc((size_t)fsize); - if (!data) { - fclose(f); - cbm_unlink(tmp_archive); - return 1; - } - fread(data, 1, (size_t)fsize, f); - fclose(f); - - int bin_len = 0; - unsigned char *bin_data = cbm_extract_binary_from_targz(data, (int)fsize, &bin_len); - free(data); - cbm_unlink(tmp_archive); - - if (!bin_data || bin_len <= 0) { - fprintf(stderr, "error: binary not found in archive\n"); - free(bin_data); - return 1; - } - - /* Open with final permissions atomically (no TOCTOU between write and chmod) */ -#ifndef _WIN32 - int fd = open(bin_dest, O_WRONLY | O_CREAT | O_TRUNC, 0755); - if (fd < 0) { - fprintf(stderr, "error: cannot write to %s\n", bin_dest); - free(bin_data); - return 1; - } - FILE *out = fdopen(fd, "wb"); -#else - FILE *out = fopen(bin_dest, "wb"); -#endif - if (!out) { - fprintf(stderr, "error: cannot write to %s\n", bin_dest); - free(bin_data); -#ifndef _WIN32 - close(fd); -#endif - return 1; - } - fwrite(bin_data, 1, (size_t)bin_len, out); - fclose(out); - free(bin_data); - } else { - /* Zip extraction: exec unzip directly without shell interpretation */ - const char *unzip_argv[] = {"unzip", "-o", "-d", bin_dir, tmp_archive, NULL}; - rc = cbm_exec_no_shell(unzip_argv); - cbm_unlink(tmp_archive); - if (rc != 0) { - fprintf(stderr, "error: extraction failed\n"); - return 1; - } - /* Rename variant binary if needed */ - if (want_ui) { - char ui_bin[1024]; - snprintf(ui_bin, sizeof(ui_bin), "%s/codebase-memory-mcp-ui.exe", bin_dir); - snprintf(bin_dest, sizeof(bin_dest), "%s/codebase-memory-mcp.exe", bin_dir); - rename(ui_bin, bin_dest); - } - } - - /* Step 6: Reinstall skills (force to pick up new content) */ - char skills_dir[1024]; - snprintf(skills_dir, sizeof(skills_dir), "%s/.claude/skills", home); - int skill_count = cbm_install_skills(skills_dir, true, false); - printf("Updated %d skill(s).\n", skill_count); - - /* Step 7: Verify new version (exec directly, no shell interpretation) */ - printf("\nUpdate complete. Verifying:\n"); - { - const char *ver_argv[] = {bin_dest, "--version", NULL}; - (void)cbm_exec_no_shell(ver_argv); - } - - printf("\nAll project indexes were cleared. They will be rebuilt\n"); - printf("automatically when you next use the MCP server.\n"); - (void)variant; - return 0; -} +/* + * cli.c — CLI subcommand handlers for install, uninstall, update, version. + * + * Port of Go cmd/codebase-memory-mcp/ install/update logic. + * All functions accept explicit paths for testability. + */ +#include "cli/cli.h" +#include "foundation/compat.h" +#include "foundation/platform.h" +#include "foundation/str_util.h" + +// the correct standard headers are included below but clang-tidy doesn't map them. +#include +#include "foundation/compat_fs.h" + +#ifndef CBM_VERSION +#define CBM_VERSION "dev" +#endif +#include // EEXIST +#include // open, O_WRONLY, O_CREAT, O_TRUNC +#include // uintptr_t +#include +#include +#include // strtok_r +#include // mode_t, S_IXUSR +#include // MAX_WBITS + +/* yyjson for JSON read-modify-write */ +#include "yyjson/yyjson.h" + +/* ── Constants ────────────────────────────────────────────────── */ + +/* Directory permissions: rwxr-x--- */ +#define DIR_PERMS 0750 + +/* Decompression buffer cap (500 MB) */ +#define DECOMPRESS_MAX_BYTES ((size_t)500 * 1024 * 1024) + +/* Tar header field offsets */ +#define TAR_NAME_LEN 101 /* filename field: bytes 0-99 + NUL */ +#define TAR_SIZE_OFFSET 124 /* octal size field offset */ +#define TAR_SIZE_LEN 13 /* octal size field: bytes 124-135 + NUL */ +#define TAR_TYPE_OFFSET 156 /* type flag byte */ +#define TAR_BINARY_NAME "codebase-memory-mcp" +#define TAR_BINARY_NAME_LEN 19 +#define TAR_BLOCK_SIZE 512 /* tar record alignment */ +#define TAR_BLOCK_MASK 511 /* TAR_BLOCK_SIZE - 1 */ + +/* ── Version ──────────────────────────────────────────────────── */ + +static const char *cli_version = "dev"; + +void cbm_cli_set_version(const char *ver) { + if (ver) { + cli_version = ver; + } +} + +const char *cbm_cli_get_version(void) { + return cli_version; +} + +/* ── Version comparison ───────────────────────────────────────── */ + +/* Parse semver major.minor.patch into array. Returns number of parts parsed. */ +static int parse_semver(const char *v, int out[3]) { + out[0] = out[1] = out[2] = 0; + /* Skip v prefix */ + if (*v == 'v' || *v == 'V') { + v++; + } + + int count = 0; + while (*v && count < 3) { + if (*v == '-') { + break; /* stop at pre-release suffix */ + } + char *endptr; + long val = strtol(v, &endptr, 10); + out[count++] = (int)val; + if (*endptr == '.') { + v = endptr + 1; + } else { + break; + } + } + return count; +} + +static bool has_prerelease(const char *v) { + if (*v == 'v' || *v == 'V') { + v++; + } + return strchr(v, '-') != NULL; +} + +int cbm_compare_versions(const char *a, const char *b) { + int pa[3]; + int pb[3]; + parse_semver(a, pa); + parse_semver(b, pb); + + for (int i = 0; i < 3; i++) { + if (pa[i] != pb[i]) { + return pa[i] - pb[i]; + } + } + + /* Same base version — non-dev beats dev */ + bool a_pre = has_prerelease(a); + bool b_pre = has_prerelease(b); + if (a_pre && !b_pre) { + return -1; + } + if (!a_pre && b_pre) { + return 1; + } + return 0; +} + +/* ── Shell RC detection ───────────────────────────────────────── */ + +const char *cbm_detect_shell_rc(const char *home_dir) { + static char buf[512]; + if (!home_dir || !home_dir[0]) { + return ""; + } + + // NOLINTNEXTLINE(concurrency-mt-unsafe) + const char *shell = getenv("SHELL"); + if (!shell) { + shell = ""; + } + + if (strstr(shell, "/zsh")) { + snprintf(buf, sizeof(buf), "%s/.zshrc", home_dir); + return buf; + } + if (strstr(shell, "/bash")) { + /* Prefer .bashrc, fall back to .bash_profile */ + snprintf(buf, sizeof(buf), "%s/.bashrc", home_dir); + struct stat st; + if (stat(buf, &st) == 0) { + return buf; + } + snprintf(buf, sizeof(buf), "%s/.bash_profile", home_dir); + return buf; + } + if (strstr(shell, "/fish")) { + snprintf(buf, sizeof(buf), "%s/.config/fish/config.fish", home_dir); + return buf; + } + + /* Default to .profile */ + snprintf(buf, sizeof(buf), "%s/.profile", home_dir); + return buf; +} + +/* ── CLI binary detection ─────────────────────────────────────── */ + +const char *cbm_find_cli(const char *name, const char *home_dir) { + static char buf[512]; + if (!name || !name[0]) { + return ""; + } + + /* Check PATH first */ + // NOLINTNEXTLINE(concurrency-mt-unsafe) + const char *path_env = getenv("PATH"); + if (path_env) { + char path_copy[4096]; + snprintf(path_copy, sizeof(path_copy), "%s", path_env); + char *saveptr; + // NOLINTNEXTLINE(misc-include-cleaner) — strtok_r provided by standard header + char *dir = strtok_r(path_copy, ":", &saveptr); + while (dir) { + snprintf(buf, sizeof(buf), "%s/%s", dir, name); + struct stat st; + // NOLINTNEXTLINE(misc-include-cleaner) — S_IXUSR provided by standard header + if (stat(buf, &st) == 0 && (st.st_mode & S_IXUSR)) { + return buf; + } + dir = strtok_r(NULL, ":", &saveptr); + } + } + + /* Check common install locations */ + if (home_dir && home_dir[0]) { + const char *candidates[] = { + "/usr/local/bin/%s", + NULL, /* filled dynamically */ + NULL, + NULL, + NULL, + }; + char paths[5][512]; + snprintf(paths[0], sizeof(paths[0]), "/usr/local/bin/%s", name); + snprintf(paths[1], sizeof(paths[1]), "%s/.npm/bin/%s", home_dir, name); + snprintf(paths[2], sizeof(paths[2]), "%s/.local/bin/%s", home_dir, name); + snprintf(paths[3], sizeof(paths[3]), "%s/.cargo/bin/%s", home_dir, name); +#ifdef __APPLE__ + snprintf(paths[4], sizeof(paths[4]), "/opt/homebrew/bin/%s", name); +#else + paths[4][0] = '\0'; +#endif + (void)candidates; + + for (int i = 0; i < 5; i++) { + if (!paths[i][0]) { + continue; + } + struct stat st; + if (stat(paths[i], &st) == 0) { + snprintf(buf, sizeof(buf), "%s", paths[i]); + return buf; + } + } + } + + return ""; +} + +/* ── File utilities ───────────────────────────────────────────── */ + +int cbm_copy_file(const char *src, const char *dst) { + FILE *in = fopen(src, "rb"); + if (!in) { + return -1; + } + + FILE *out = fopen(dst, "wb"); + if (!out) { + (void)fclose(in); + return -1; + } + + char buf[8192]; + int err = 0; + while (!feof(in) && !ferror(in)) { + size_t n = fread(buf, 1, sizeof(buf), in); + if (n == 0) { + break; + } + if (fwrite(buf, 1, n, out) != n) { + err = 1; + break; + } + } + + if (err || ferror(in)) { + (void)fclose(in); + (void)fclose(out); + return -1; + } + + (void)fclose(in); + int rc = fclose(out); + return rc == 0 ? 0 : -1; +} + +/* ── Skill file content (embedded) ────────────────────────────── */ + +static const char skill_exploring_content[] = + "---\n" + "name: codebase-memory-exploring\n" + "description: Codebase knowledge graph expert. ALWAYS invoke this skill when the user " + "explores code, searches for functions/classes/routes, asks about architecture, or needs " + "codebase orientation. Do not use Grep, Glob, or file search directly — use " + "codebase-memory-mcp search_graph and get_architecture first.\n" + "---\n" + "\n" + "# Codebase Exploration\n" + "\n" + "Use codebase-memory-mcp tools to explore the codebase:\n" + "\n" + "## Workflow\n" + "1. `get_graph_schema` — understand what node/edge types exist\n" + "2. `search_graph` — find functions, classes, routes by pattern\n" + "3. `get_code_snippet` — read specific function implementations\n" + "4. `get_architecture` — get high-level project summary\n" + "\n" + "## Tips\n" + "- Use `search_graph(name_pattern=\".*Pattern.*\")` for fuzzy matching\n" + "- Use `search_graph(label=\"Route\")` to find HTTP routes\n" + "- Use `search_graph(label=\"Function\", file_pattern=\"*.go\")` to scope by language\n"; + +static const char skill_tracing_content[] = + "---\n" + "name: codebase-memory-tracing\n" + "description: Call chain and dependency expert. ALWAYS invoke this skill when the user " + "asks who calls a function, what a function calls, needs impact analysis, or traces " + "dependencies. Do not grep for function names directly — use codebase-memory-mcp " + "trace_call_path first.\n" + "---\n" + "\n" + "# Call Tracing & Impact Analysis\n" + "\n" + "Use codebase-memory-mcp tools to trace call paths:\n" + "\n" + "## Workflow\n" + "1. `search_graph(name_pattern=\".*FuncName.*\")` — find exact function name\n" + "2. `trace_call_path(function_name=\"FuncName\", direction=\"both\")` — trace callers + " + "callees\n" + "3. `detect_changes` — find what changed and assess risk_labels\n" + "\n" + "## Direction Options\n" + "- `inbound` — who calls this function?\n" + "- `outbound` — what does this function call?\n" + "- `both` — full context (callers + callees)\n"; + +static const char skill_quality_content[] = + "---\n" + "name: codebase-memory-quality\n" + "description: Code quality analysis expert. ALWAYS invoke this skill when the user asks " + "about dead code, unused functions, complexity, refactor candidates, or cleanup " + "opportunities. Do not search files manually — use codebase-memory-mcp search_graph " + "with degree filters first.\n" + "---\n" + "\n" + "# Code Quality Analysis\n" + "\n" + "Use codebase-memory-mcp tools for quality analysis:\n" + "\n" + "## Dead Code Detection\n" + "- `search_graph(max_degree=0, exclude_entry_points=true)` — find unreferenced functions\n" + "- `search_graph(max_degree=0, label=\"Function\")` — unreferenced functions only\n" + "\n" + "## Complexity Analysis\n" + "- `search_graph(min_degree=10)` — high fan-out functions\n" + "- `search_graph(label=\"Function\", sort_by=\"degree\")` — most-connected functions\n"; + +static const char skill_reference_content[] = + "---\n" + "name: codebase-memory-reference\n" + "description: Codebase-memory-mcp reference guide. ALWAYS invoke this skill when the user " + "asks about MCP tools, graph queries, Cypher syntax, edge types, or how to use the " + "knowledge graph. Do not guess tool parameters — load this reference first.\n" + "---\n" + "\n" + "# Codebase Memory MCP Reference\n" + "\n" + "## 14 total MCP Tools\n" + "- `index_repository` — index a project\n" + "- `index_status` — check indexing progress\n" + "- `detect_changes` — find what changed since last index\n" + "- `search_graph` — find nodes by pattern\n" + "- `search_code` — text search in source\n" + "- `query_graph` — Cypher query language\n" + "- `trace_call_path` — call chain traversal\n" + "- `get_code_snippet` — read function source\n" + "- `get_graph_schema` — node/edge type catalog\n" + "- `get_architecture` — high-level summary\n" + "- `list_projects` — indexed projects\n" + "- `delete_project` — remove a project\n" + "- `manage_adr` — architecture decision records\n" + "- `ingest_traces` — import runtime traces\n" + "\n" + "## Edge Types\n" + "CALLS, HTTP_CALLS, ASYNC_CALLS, IMPORTS, DEFINES, DEFINES_METHOD,\n" + "HANDLES, IMPLEMENTS, CONTAINS_FILE, CONTAINS_FOLDER, CONTAINS_PACKAGE\n" + "\n" + "## Cypher Examples\n" + "```\n" + "MATCH (f:Function) WHERE f.name =~ '.*Handler.*' RETURN f.name, f.file_path\n" + "MATCH (a)-[r:CALLS]->(b) WHERE a.name = 'main' RETURN b.name\n" + "MATCH (a)-[r:HTTP_CALLS]->(b) RETURN a.name, b.name, r.url_path\n" + "```\n"; + +static const char codex_instructions_content[] = + "# Codebase Knowledge Graph\n" + "\n" + "This project uses codebase-memory-mcp to maintain a knowledge graph of the codebase.\n" + "Use the MCP tools to explore and understand the code:\n" + "\n" + "- `search_graph` — find functions, classes, routes by pattern\n" + "- `trace_call_path` — trace who calls a function or what it calls\n" + "- `get_code_snippet` — read function source code\n" + "- `query_graph` — run Cypher queries for complex patterns\n" + "- `get_architecture` — high-level project summary\n" + "\n" + "Always prefer graph tools over grep for code discovery.\n"; + +static const cbm_skill_t skills[CBM_SKILL_COUNT] = { + {"codebase-memory-exploring", skill_exploring_content}, + {"codebase-memory-tracing", skill_tracing_content}, + {"codebase-memory-quality", skill_quality_content}, + {"codebase-memory-reference", skill_reference_content}, +}; + +const cbm_skill_t *cbm_get_skills(void) { + return skills; +} + +const char *cbm_get_codex_instructions(void) { + return codex_instructions_content; +} + +/* ── Recursive mkdir (via compat_fs) ──────────────────────────── */ + +static int mkdirp(const char *path, int mode) { + return (int)cbm_mkdir_p(path, mode) ? 0 : -1; +} + +/* ── Recursive rmdir ──────────────────────────────────────────── */ + +// NOLINTNEXTLINE(misc-no-recursion) — intentional recursive directory removal +static int rmdir_recursive(const char *path) { + cbm_dir_t *d = cbm_opendir(path); + if (!d) { + return -1; + } + + cbm_dirent_t *ent; + while ((ent = cbm_readdir(d)) != NULL) { + char child[1024]; + snprintf(child, sizeof(child), "%s/%s", path, ent->name); + struct stat st; + if (stat(child, &st) == 0 && S_ISDIR(st.st_mode)) { + rmdir_recursive(child); + } else { + cbm_unlink(child); + } + } + cbm_closedir(d); + return cbm_rmdir(path); +} + +/* ── Skill management ─────────────────────────────────────────── */ + +int cbm_install_skills(const char *skills_dir, bool force, bool dry_run) { + if (!skills_dir) { + return 0; + } + int count = 0; + + for (int i = 0; i < CBM_SKILL_COUNT; i++) { + char skill_path[1024]; + snprintf(skill_path, sizeof(skill_path), "%s/%s", skills_dir, skills[i].name); + char file_path[1024]; + snprintf(file_path, sizeof(file_path), "%s/SKILL.md", skill_path); + + /* Check if already exists */ + if (!force) { + struct stat st; + if (stat(file_path, &st) == 0) { + continue; + } + } + + if (dry_run) { + count++; + continue; + } + + if (mkdirp(skill_path, DIR_PERMS) != 0) { + continue; + } + + FILE *f = fopen(file_path, "w"); + if (!f) { + continue; + } + (void)fwrite(skills[i].content, 1, strlen(skills[i].content), f); + (void)fclose(f); + count++; + } + return count; +} + +int cbm_remove_skills(const char *skills_dir, bool dry_run) { + if (!skills_dir) { + return 0; + } + int count = 0; + + for (int i = 0; i < CBM_SKILL_COUNT; i++) { + char skill_path[1024]; + snprintf(skill_path, sizeof(skill_path), "%s/%s", skills_dir, skills[i].name); + struct stat st; + if (stat(skill_path, &st) != 0) { + continue; + } + + if (dry_run) { + count++; + continue; + } + + if (rmdir_recursive(skill_path) == 0) { + count++; + } + } + return count; +} + +bool cbm_remove_old_monolithic_skill(const char *skills_dir, bool dry_run) { + if (!skills_dir) { + return false; + } + + char old_path[1024]; + snprintf(old_path, sizeof(old_path), "%s/codebase-memory-mcp", skills_dir); + struct stat st; + if (stat(old_path, &st) != 0 || !S_ISDIR(st.st_mode)) { + return false; + } + + if (dry_run) { + return true; + } + return rmdir_recursive(old_path) == 0; +} + +/* ── JSON config helpers (using yyjson) ───────────────────────── */ + +/* Read a JSON file into a yyjson document. Returns NULL on error. */ +static yyjson_doc *read_json_file(const char *path) { + FILE *f = fopen(path, "r"); + if (!f) { + return NULL; + } + + (void)fseek(f, 0, SEEK_END); + long size = ftell(f); + (void)fseek(f, 0, SEEK_SET); + + if (size <= 0 || size > 10L * 1024 * 1024) { + (void)fclose(f); + return NULL; + } + + char *buf = malloc((size_t)size + 1); + if (!buf) { + (void)fclose(f); + return NULL; + } + + size_t nread = fread(buf, 1, (size_t)size, f); + (void)fclose(f); + // NOLINTNEXTLINE(clang-analyzer-security.ArrayBound) + buf[nread] = '\0'; + + /* Allow JSONC (comments + trailing commas) — Zed settings.json uses this format */ + yyjson_read_flag flags = YYJSON_READ_ALLOW_COMMENTS | YYJSON_READ_ALLOW_TRAILING_COMMAS; + yyjson_doc *doc = yyjson_read(buf, nread, flags); + free(buf); + return doc; +} + +/* Write a mutable yyjson document to a file with pretty printing. */ +static int write_json_file(const char *path, yyjson_mut_doc *doc) { + /* Ensure parent directory exists */ + char dir[1024]; + snprintf(dir, sizeof(dir), "%s", path); + char *last_slash = strrchr(dir, '/'); + if (last_slash) { + *last_slash = '\0'; + mkdirp(dir, DIR_PERMS); + } + + yyjson_write_flag flags = YYJSON_WRITE_PRETTY | YYJSON_WRITE_ESCAPE_UNICODE; + size_t len; + char *json = yyjson_mut_write(doc, flags, &len); + if (!json) { + return -1; + } + + FILE *f = fopen(path, "w"); + if (!f) { + free(json); + return -1; + } + + size_t written = fwrite(json, 1, len, f); + /* Add trailing newline */ + (void)fputc('\n', f); + (void)fclose(f); + free(json); + + return written == len ? 0 : -1; +} + +/* ── Editor MCP: Cursor/Windsurf/Gemini (mcpServers key) ──────── */ + +int cbm_install_editor_mcp(const char *binary_path, const char *config_path) { + if (!binary_path || !config_path) { + return -1; + } + + /* Read existing or start fresh */ + yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); + if (!mdoc) { + return -1; + } + + yyjson_doc *doc = read_json_file(config_path); + yyjson_mut_val *root; + if (doc) { + root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); + yyjson_doc_free(doc); + } else { + root = yyjson_mut_obj(mdoc); + } + if (!root) { + yyjson_mut_doc_free(mdoc); + return -1; + } + yyjson_mut_doc_set_root(mdoc, root); + + /* Get or create mcpServers object */ + yyjson_mut_val *servers = yyjson_mut_obj_get(root, "mcpServers"); + if (!servers || !yyjson_mut_is_obj(servers)) { + servers = yyjson_mut_obj(mdoc); + yyjson_mut_obj_add_val(mdoc, root, "mcpServers", servers); + } + + /* Remove existing entry if present */ + yyjson_mut_obj_remove_key(servers, "codebase-memory-mcp"); + + /* Add our entry */ + yyjson_mut_val *entry = yyjson_mut_obj(mdoc); + yyjson_mut_obj_add_str(mdoc, entry, "command", binary_path); + yyjson_mut_obj_add_val(mdoc, servers, "codebase-memory-mcp", entry); + + int rc = write_json_file(config_path, mdoc); + yyjson_mut_doc_free(mdoc); + return rc; +} + +int cbm_remove_editor_mcp(const char *config_path) { + if (!config_path) { + return -1; + } + + yyjson_doc *doc = read_json_file(config_path); + if (!doc) { + return -1; + } + + yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); + yyjson_doc_free(doc); + if (!root) { + yyjson_mut_doc_free(mdoc); + return -1; + } + yyjson_mut_doc_set_root(mdoc, root); + + yyjson_mut_val *servers = yyjson_mut_obj_get(root, "mcpServers"); + if (!servers || !yyjson_mut_is_obj(servers)) { + yyjson_mut_doc_free(mdoc); + return 0; + } + + yyjson_mut_obj_remove_key(servers, "codebase-memory-mcp"); + + int rc = write_json_file(config_path, mdoc); + yyjson_mut_doc_free(mdoc); + return rc; +} + +/* ── VS Code MCP (servers key with type:stdio) ────────────────── */ + +int cbm_install_vscode_mcp(const char *binary_path, const char *config_path) { + if (!binary_path || !config_path) { + return -1; + } + + yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); + if (!mdoc) { + return -1; + } + + yyjson_doc *doc = read_json_file(config_path); + yyjson_mut_val *root; + if (doc) { + root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); + yyjson_doc_free(doc); + } else { + root = yyjson_mut_obj(mdoc); + } + if (!root) { + yyjson_mut_doc_free(mdoc); + return -1; + } + yyjson_mut_doc_set_root(mdoc, root); + + yyjson_mut_val *servers = yyjson_mut_obj_get(root, "servers"); + if (!servers || !yyjson_mut_is_obj(servers)) { + servers = yyjson_mut_obj(mdoc); + yyjson_mut_obj_add_val(mdoc, root, "servers", servers); + } + + yyjson_mut_obj_remove_key(servers, "codebase-memory-mcp"); + + yyjson_mut_val *entry = yyjson_mut_obj(mdoc); + yyjson_mut_obj_add_str(mdoc, entry, "type", "stdio"); + yyjson_mut_obj_add_str(mdoc, entry, "command", binary_path); + yyjson_mut_obj_add_val(mdoc, servers, "codebase-memory-mcp", entry); + + int rc = write_json_file(config_path, mdoc); + yyjson_mut_doc_free(mdoc); + return rc; +} + +int cbm_remove_vscode_mcp(const char *config_path) { + if (!config_path) { + return -1; + } + + yyjson_doc *doc = read_json_file(config_path); + if (!doc) { + return -1; + } + + yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); + yyjson_doc_free(doc); + if (!root) { + yyjson_mut_doc_free(mdoc); + return -1; + } + yyjson_mut_doc_set_root(mdoc, root); + + yyjson_mut_val *servers = yyjson_mut_obj_get(root, "servers"); + if (!servers || !yyjson_mut_is_obj(servers)) { + yyjson_mut_doc_free(mdoc); + return 0; + } + + yyjson_mut_obj_remove_key(servers, "codebase-memory-mcp"); + + int rc = write_json_file(config_path, mdoc); + yyjson_mut_doc_free(mdoc); + return rc; +} + +/* ── Zed MCP (context_servers with command + args) ────────────── */ + +int cbm_install_zed_mcp(const char *binary_path, const char *config_path) { + if (!binary_path || !config_path) { + return -1; + } + + yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); + if (!mdoc) { + return -1; + } + + yyjson_doc *doc = read_json_file(config_path); + yyjson_mut_val *root; + if (doc) { + root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); + yyjson_doc_free(doc); + } else { + root = yyjson_mut_obj(mdoc); + } + if (!root) { + yyjson_mut_doc_free(mdoc); + return -1; + } + yyjson_mut_doc_set_root(mdoc, root); + + yyjson_mut_val *servers = yyjson_mut_obj_get(root, "context_servers"); + if (!servers || !yyjson_mut_is_obj(servers)) { + servers = yyjson_mut_obj(mdoc); + yyjson_mut_obj_add_val(mdoc, root, "context_servers", servers); + } + + yyjson_mut_obj_remove_key(servers, "codebase-memory-mcp"); + + yyjson_mut_val *entry = yyjson_mut_obj(mdoc); + yyjson_mut_obj_add_str(mdoc, entry, "command", binary_path); + yyjson_mut_val *args = yyjson_mut_arr(mdoc); + yyjson_mut_arr_add_str(mdoc, args, ""); + yyjson_mut_obj_add_val(mdoc, entry, "args", args); + yyjson_mut_obj_add_val(mdoc, servers, "codebase-memory-mcp", entry); + + int rc = write_json_file(config_path, mdoc); + yyjson_mut_doc_free(mdoc); + return rc; +} + +int cbm_remove_zed_mcp(const char *config_path) { + if (!config_path) { + return -1; + } + + yyjson_doc *doc = read_json_file(config_path); + if (!doc) { + return -1; + } + + yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); + yyjson_doc_free(doc); + if (!root) { + yyjson_mut_doc_free(mdoc); + return -1; + } + yyjson_mut_doc_set_root(mdoc, root); + + yyjson_mut_val *servers = yyjson_mut_obj_get(root, "context_servers"); + if (!servers || !yyjson_mut_is_obj(servers)) { + yyjson_mut_doc_free(mdoc); + return 0; + } + + yyjson_mut_obj_remove_key(servers, "codebase-memory-mcp"); + + int rc = write_json_file(config_path, mdoc); + yyjson_mut_doc_free(mdoc); + return rc; +} + +/* ── Agent detection ──────────────────────────────────────────── */ + +cbm_detected_agents_t cbm_detect_agents(const char *home_dir) { + cbm_detected_agents_t agents; + memset(&agents, 0, sizeof(agents)); + if (!home_dir || !home_dir[0]) { + return agents; + } + + char path[1024]; + struct stat st; + + /* Claude Code: ~/.claude/ */ + snprintf(path, sizeof(path), "%s/.claude", home_dir); + if (stat(path, &st) == 0 && S_ISDIR(st.st_mode)) { + agents.claude_code = true; + } + + /* Codex CLI: ~/.codex/ */ + snprintf(path, sizeof(path), "%s/.codex", home_dir); + if (stat(path, &st) == 0 && S_ISDIR(st.st_mode)) { + agents.codex = true; + } + + /* Gemini CLI: ~/.gemini/ */ + snprintf(path, sizeof(path), "%s/.gemini", home_dir); + if (stat(path, &st) == 0 && S_ISDIR(st.st_mode)) { + agents.gemini = true; + } + + /* Zed: platform-specific config dir */ + { +#ifdef __APPLE__ + const char *zed_base = cbm_app_config_dir(); + if (zed_base) { + snprintf(path, sizeof(path), "%s/Library/Application Support/Zed", zed_base); + } +#elif defined(_WIN32) + const char *zed_base = cbm_app_local_dir(); + if (zed_base) { + snprintf(path, sizeof(path), "%s/Zed", zed_base); + } +#else + const char *zed_base = cbm_app_config_dir(); + if (zed_base) { + snprintf(path, sizeof(path), "%s/zed", zed_base); + } +#endif + if (zed_base && stat(path, &st) == 0 && S_ISDIR(st.st_mode)) { + agents.zed = true; + } + } + + /* OpenCode: binary on PATH */ + const char *oc = cbm_find_cli("opencode", home_dir); + if (oc[0]) { + agents.opencode = true; + } + + /* Antigravity: ~/.gemini/antigravity/ */ + snprintf(path, sizeof(path), "%s/.gemini/antigravity", home_dir); + if (stat(path, &st) == 0 && S_ISDIR(st.st_mode)) { + agents.antigravity = true; + agents.gemini = true; /* parent dir implies gemini */ + } + + /* Aider: binary on PATH */ + const char *ai = cbm_find_cli("aider", home_dir); + if (ai[0]) { + agents.aider = true; + } + + /* KiloCode: globalStorage dir */ + { + const char *app_cfg = cbm_app_config_dir(); + if (app_cfg) { +#ifdef __APPLE__ + snprintf(path, sizeof(path), "%s/Library/Application Support/Code/User/globalStorage/kilocode.kilo-code", app_cfg); +#else + snprintf(path, sizeof(path), "%s/Code/User/globalStorage/kilocode.kilo-code", app_cfg); +#endif + if (stat(path, &st) == 0 && S_ISDIR(st.st_mode)) { + agents.kilocode = true; + } + } + } + + /* VS Code: User config dir */ + { + const char *app_cfg = cbm_app_config_dir(); + if (app_cfg) { +#ifdef __APPLE__ + snprintf(path, sizeof(path), "%s/Library/Application Support/Code/User", app_cfg); +#else + snprintf(path, sizeof(path), "%s/Code/User", app_cfg); +#endif + if (stat(path, &st) == 0 && S_ISDIR(st.st_mode)) { + agents.vscode = true; + } + } + } + + /* OpenClaw: ~/.openclaw/ dir */ + snprintf(path, sizeof(path), "%s/.openclaw", home_dir); + if (stat(path, &st) == 0 && S_ISDIR(st.st_mode)) { + agents.openclaw = true; + } + + return agents; +} + +/* ── Shared agent instructions content ────────────────────────── */ + +static const char agent_instructions_content[] = + "# Codebase Knowledge Graph (codebase-memory-mcp)\n" + "\n" + "This project uses codebase-memory-mcp to maintain a knowledge graph of the codebase.\n" + "ALWAYS prefer MCP graph tools over grep/glob/file-search for code discovery.\n" + "\n" + "## Priority Order\n" + "1. `search_graph` — find functions, classes, routes, variables by pattern\n" + "2. `trace_call_path` — trace who calls a function or what it calls\n" + "3. `get_code_snippet` — read specific function/class source code\n" + "4. `query_graph` — run Cypher queries for complex patterns\n" + "5. `get_architecture` — high-level project summary\n" + "\n" + "## When to fall back to grep/glob\n" + "- Searching for string literals, error messages, config values\n" + "- Searching non-code files (Dockerfiles, shell scripts, configs)\n" + "- When MCP tools return insufficient results\n" + "\n" + "## Examples\n" + "- Find a handler: `search_graph(name_pattern=\".*OrderHandler.*\")`\n" + "- Who calls it: `trace_call_path(function_name=\"OrderHandler\", direction=\"inbound\")`\n" + "- Read source: `get_code_snippet(qualified_name=\"pkg/orders.OrderHandler\")`\n"; + +const char *cbm_get_agent_instructions(void) { + return agent_instructions_content; +} + +/* ── Instructions file upsert ─────────────────────────────────── */ + +#define CMM_MARKER_START "" +#define CMM_MARKER_END "" + +/* Read entire file into malloc'd buffer. Returns NULL on error. */ +static char *read_file_str(const char *path, size_t *out_len) { + FILE *f = fopen(path, "r"); + if (!f) { + if (out_len) { + *out_len = 0; + } + return NULL; + } + (void)fseek(f, 0, SEEK_END); + long size = ftell(f); + (void)fseek(f, 0, SEEK_SET); + if (size < 0 || size > 10L * 1024 * 1024) { /* cap at 10 MB */ + (void)fclose(f); + return NULL; + } + + char *buf = malloc((size_t)size + 1); + if (!buf) { + (void)fclose(f); + return NULL; + } + size_t nread = fread(buf, 1, (size_t)size, f); + (void)fclose(f); + buf[nread] = '\0'; + if (out_len) { + *out_len = nread; + } + return buf; +} + +/* Write string to file, creating parent dirs if needed. */ +static int write_file_str(const char *path, const char *content) { + /* Ensure parent directory */ + char dir[1024]; + snprintf(dir, sizeof(dir), "%s", path); + char *last_slash = strrchr(dir, '/'); + if (last_slash) { + *last_slash = '\0'; + mkdirp(dir, DIR_PERMS); + } + + FILE *f = fopen(path, "w"); + if (!f) { + return -1; + } + size_t len = strlen(content); + size_t written = fwrite(content, 1, len, f); + (void)fclose(f); + return written == len ? 0 : -1; +} + +int cbm_upsert_instructions(const char *path, const char *content) { + if (!path || !content) { + return -1; + } + + size_t existing_len = 0; + char *existing = read_file_str(path, &existing_len); + + /* Build the marker-wrapped section */ + size_t section_len = + strlen(CMM_MARKER_START) + 1 + strlen(content) + strlen(CMM_MARKER_END) + 1; + char *section = malloc(section_len + 1); + if (!section) { + free(existing); + return -1; + } + snprintf(section, section_len + 1, "%s\n%s%s\n", CMM_MARKER_START, content, CMM_MARKER_END); + + if (!existing) { + /* File doesn't exist — create with just the section */ + int rc = write_file_str(path, section); + free(section); + return rc; + } + + /* Check if markers already exist */ + char *start = strstr(existing, CMM_MARKER_START); + char *end = start ? strstr(start, CMM_MARKER_END) : NULL; + + char *result; + if (start && end) { + /* Replace between markers (including markers themselves) */ + end += strlen(CMM_MARKER_END); + /* Skip trailing newline after end marker */ + if (*end == '\n') { + end++; + } + + size_t prefix_len = (size_t)(start - existing); + size_t suffix_len = strlen(end); + size_t new_len = prefix_len + strlen(section) + suffix_len; + result = malloc(new_len + 1); + if (!result) { + free(existing); + free(section); + return -1; + } + memcpy(result, existing, prefix_len); + memcpy(result + prefix_len, section, strlen(section)); + memcpy(result + prefix_len + strlen(section), end, suffix_len); + result[new_len] = '\0'; + } else { + /* Append section */ + size_t new_len = existing_len + 1 + strlen(section); + result = malloc(new_len + 1); + if (!result) { + free(existing); + free(section); + return -1; + } + memcpy(result, existing, existing_len); + result[existing_len] = '\n'; + memcpy(result + existing_len + 1, section, strlen(section)); + result[new_len] = '\0'; + } + + int rc = write_file_str(path, result); + free(existing); + free(section); + free(result); + return rc; +} + +int cbm_remove_instructions(const char *path) { + if (!path) { + return -1; + } + + size_t len = 0; + char *content = read_file_str(path, &len); + if (!content) { + return 1; + } + + char *start = strstr(content, CMM_MARKER_START); + char *end = start ? strstr(start, CMM_MARKER_END) : NULL; + + if (!start || !end) { + free(content); + return 1; /* not found */ + } + + end += strlen(CMM_MARKER_END); + if (*end == '\n') { + end++; + } + + /* Also remove a leading newline before the start marker if present */ + if (start > content && *(start - 1) == '\n') { + start--; + } + + size_t prefix_len = (size_t)(start - content); + size_t suffix_len = strlen(end); + size_t new_len = prefix_len + suffix_len; + char *result = malloc(new_len + 1); + if (!result) { + free(content); + return -1; + } + memcpy(result, content, prefix_len); + memcpy(result + prefix_len, end, suffix_len); + result[new_len] = '\0'; + + int rc = write_file_str(path, result); + free(content); + free(result); + return rc; +} + +/* ── Codex MCP config (TOML) ─────────────────────────────────── */ + +#define CODEX_CMM_SECTION "[mcp_servers.codebase-memory-mcp]" + +int cbm_upsert_codex_mcp(const char *binary_path, const char *config_path) { + if (!binary_path || !config_path) { + return -1; + } + + size_t len = 0; + char *content = read_file_str(config_path, &len); + + /* Build our TOML section */ + char section[1024]; + snprintf(section, sizeof(section), "%s\ncommand = \"%s\"\n", CODEX_CMM_SECTION, binary_path); + + if (!content) { + /* No file — create fresh */ + return write_file_str(config_path, section); + } + + /* Check if our section already exists */ + char *existing = strstr(content, CODEX_CMM_SECTION); + if (existing) { + /* Remove old section: from [mcp_servers.codebase-memory-mcp] to next [section] or EOF */ + char *section_end = existing + strlen(CODEX_CMM_SECTION); + /* Find next [section] header */ + char *next_section = strstr(section_end, "\n["); + if (next_section) { + next_section++; /* keep the newline before next section */ + } + + size_t prefix_len = (size_t)(existing - content); + const char *suffix = next_section ? next_section : ""; + size_t suffix_len = strlen(suffix); + size_t new_len = prefix_len + strlen(section) + 1 + suffix_len; + char *result = malloc(new_len + 1); + if (!result) { + free(content); + return -1; + } + memcpy(result, content, prefix_len); + memcpy(result + prefix_len, section, strlen(section)); + result[prefix_len + strlen(section)] = '\n'; + memcpy(result + prefix_len + strlen(section) + 1, suffix, suffix_len); + result[new_len] = '\0'; + + int rc = write_file_str(config_path, result); + free(content); + free(result); + return rc; + } + + /* Append our section */ + size_t new_len = len + 1 + strlen(section); + char *result = malloc(new_len + 1); + if (!result) { + free(content); + return -1; + } + memcpy(result, content, len); + result[len] = '\n'; + memcpy(result + len + 1, section, strlen(section)); + result[new_len] = '\0'; + + int rc = write_file_str(config_path, result); + free(content); + free(result); + return rc; +} + +int cbm_remove_codex_mcp(const char *config_path) { + if (!config_path) { + return -1; + } + + size_t len = 0; + char *content = read_file_str(config_path, &len); + if (!content) { + return 1; + } + + char *existing = strstr(content, CODEX_CMM_SECTION); + if (!existing) { + free(content); + return 1; + } + + char *section_end = existing + strlen(CODEX_CMM_SECTION); + char *next_section = strstr(section_end, "\n["); + if (next_section) { + next_section++; + } + + /* Remove leading newline if present */ + if (existing > content && *(existing - 1) == '\n') { + existing--; + } + + size_t prefix_len = (size_t)(existing - content); + const char *suffix = next_section ? next_section : ""; + size_t suffix_len = strlen(suffix); + size_t new_len = prefix_len + suffix_len; + char *result = malloc(new_len + 1); + if (!result) { + free(content); + return -1; + } + memcpy(result, content, prefix_len); + memcpy(result + prefix_len, suffix, suffix_len); + result[new_len] = '\0'; + + int rc = write_file_str(config_path, result); + free(content); + free(result); + return rc; +} + +/* ── OpenCode MCP config (JSON with "mcp" key) ───────────────── */ + +int cbm_upsert_opencode_mcp(const char *binary_path, const char *config_path) { + if (!binary_path || !config_path) { + return -1; + } + + yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); + if (!mdoc) { + return -1; + } + + yyjson_doc *doc = read_json_file(config_path); + yyjson_mut_val *root; + if (doc) { + root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); + yyjson_doc_free(doc); + } else { + root = yyjson_mut_obj(mdoc); + } + if (!root) { + yyjson_mut_doc_free(mdoc); + return -1; + } + yyjson_mut_doc_set_root(mdoc, root); + + /* Get or create "mcp" object */ + yyjson_mut_val *mcp = yyjson_mut_obj_get(root, "mcp"); + if (!mcp || !yyjson_mut_is_obj(mcp)) { + mcp = yyjson_mut_obj(mdoc); + yyjson_mut_obj_add_val(mdoc, root, "mcp", mcp); + } + + yyjson_mut_obj_remove_key(mcp, "codebase-memory-mcp"); + + yyjson_mut_val *entry = yyjson_mut_obj(mdoc); + yyjson_mut_obj_add_str(mdoc, entry, "command", binary_path); + yyjson_mut_obj_add_val(mdoc, mcp, "codebase-memory-mcp", entry); + + int rc = write_json_file(config_path, mdoc); + yyjson_mut_doc_free(mdoc); + return rc; +} + +int cbm_remove_opencode_mcp(const char *config_path) { + if (!config_path) { + return -1; + } + + yyjson_doc *doc = read_json_file(config_path); + if (!doc) { + return -1; + } + + yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); + yyjson_doc_free(doc); + if (!root) { + yyjson_mut_doc_free(mdoc); + return -1; + } + yyjson_mut_doc_set_root(mdoc, root); + + yyjson_mut_val *mcp = yyjson_mut_obj_get(root, "mcp"); + if (!mcp || !yyjson_mut_is_obj(mcp)) { + yyjson_mut_doc_free(mdoc); + return 0; + } + + yyjson_mut_obj_remove_key(mcp, "codebase-memory-mcp"); + + int rc = write_json_file(config_path, mdoc); + yyjson_mut_doc_free(mdoc); + return rc; +} + +/* ── Antigravity MCP config (JSON, same mcpServers format) ────── */ + +int cbm_upsert_antigravity_mcp(const char *binary_path, const char *config_path) { + /* Antigravity uses same mcpServers format as Cursor/Gemini */ + return cbm_install_editor_mcp(binary_path, config_path); +} + +int cbm_remove_antigravity_mcp(const char *config_path) { + return cbm_remove_editor_mcp(config_path); +} + +/* ── Claude Code pre-tool hooks ───────────────────────────────── */ + +#define CMM_HOOK_MATCHER "Grep|Glob|Read|Search" +#define CMM_HOOK_COMMAND \ + "echo 'Reminder: prefer codebase-memory-mcp search_graph/trace_call_path/" \ + "get_code_snippet over Grep/Glob/Read/Search for code discovery. " \ + "Use get_code_snippet(qualified_name) instead of Read to view a function. " \ + "Fall back only if MCP returns insufficient results.' >&2" + +/* Old matcher values from previous versions — recognized during upgrade so + * upsert_hooks_json can remove them before inserting the current matcher. */ +static const char *cmm_old_matchers[] = { + "Grep|Glob|Read", + NULL, +}; + +/* Check if a PreToolUse array entry matches our hook (current or old matcher). */ +static bool is_cmm_hook_entry(yyjson_mut_val *entry, const char *matcher_str) { + yyjson_mut_val *matcher = yyjson_mut_obj_get(entry, "matcher"); + if (!matcher || !yyjson_mut_is_str(matcher)) { + return false; + } + const char *val = yyjson_mut_get_str(matcher); + if (!val) { + return false; + } + if (strcmp(val, matcher_str) == 0) { + return true; + } + /* Also match old versions for backwards-compatible upgrade */ + for (int i = 0; cmm_old_matchers[i]; i++) { + if (strcmp(val, cmm_old_matchers[i]) == 0) { + return true; + } + } + return false; +} + +/* Generic hook upsert for both Claude Code and Gemini CLI */ +static int upsert_hooks_json(const char *settings_path, const char *hook_event, + const char *matcher_str, const char *command_str) { + if (!settings_path) { + return -1; + } + + yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); + if (!mdoc) { + return -1; + } + + yyjson_doc *doc = read_json_file(settings_path); + yyjson_mut_val *root; + if (doc) { + root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); + yyjson_doc_free(doc); + } else { + root = yyjson_mut_obj(mdoc); + } + if (!root) { + yyjson_mut_doc_free(mdoc); + return -1; + } + yyjson_mut_doc_set_root(mdoc, root); + + /* Get or create hooks object */ + yyjson_mut_val *hooks = yyjson_mut_obj_get(root, "hooks"); + if (!hooks || !yyjson_mut_is_obj(hooks)) { + hooks = yyjson_mut_obj(mdoc); + yyjson_mut_obj_add_val(mdoc, root, "hooks", hooks); + } + + /* Get or create the hook event array (e.g. PreToolUse / BeforeTool) */ + yyjson_mut_val *event_arr = yyjson_mut_obj_get(hooks, hook_event); + if (!event_arr || !yyjson_mut_is_arr(event_arr)) { + event_arr = yyjson_mut_arr(mdoc); + yyjson_mut_obj_add_val(mdoc, hooks, hook_event, event_arr); + } + + /* Remove existing CMM entry if present */ + size_t idx; + size_t max; + yyjson_mut_val *item; + yyjson_mut_arr_foreach(event_arr, idx, max, item) { + if (is_cmm_hook_entry(item, matcher_str)) { + yyjson_mut_arr_remove(event_arr, idx); + break; + } + } + + /* Build our hook entry */ + yyjson_mut_val *entry = yyjson_mut_obj(mdoc); + yyjson_mut_obj_add_str(mdoc, entry, "matcher", matcher_str); + + yyjson_mut_val *hooks_arr = yyjson_mut_arr(mdoc); + yyjson_mut_val *hook_obj = yyjson_mut_obj(mdoc); + yyjson_mut_obj_add_str(mdoc, hook_obj, "type", "command"); + yyjson_mut_obj_add_str(mdoc, hook_obj, "command", command_str); + yyjson_mut_arr_append(hooks_arr, hook_obj); + yyjson_mut_obj_add_val(mdoc, entry, "hooks", hooks_arr); + + yyjson_mut_arr_append(event_arr, entry); + + int rc = write_json_file(settings_path, mdoc); + yyjson_mut_doc_free(mdoc); + return rc; +} + +/* Generic hook remove for both Claude Code and Gemini CLI */ +static int remove_hooks_json(const char *settings_path, const char *hook_event, + const char *matcher_str) { + if (!settings_path) { + return -1; + } + + yyjson_doc *doc = read_json_file(settings_path); + if (!doc) { + return -1; + } + + yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); + yyjson_doc_free(doc); + if (!root) { + yyjson_mut_doc_free(mdoc); + return -1; + } + yyjson_mut_doc_set_root(mdoc, root); + + yyjson_mut_val *hooks = yyjson_mut_obj_get(root, "hooks"); + if (!hooks) { + yyjson_mut_doc_free(mdoc); + return 0; + } + + yyjson_mut_val *event_arr = yyjson_mut_obj_get(hooks, hook_event); + if (!event_arr || !yyjson_mut_is_arr(event_arr)) { + yyjson_mut_doc_free(mdoc); + return 0; + } + + size_t idx; + size_t max; + yyjson_mut_val *item; + yyjson_mut_arr_foreach(event_arr, idx, max, item) { + if (is_cmm_hook_entry(item, matcher_str)) { + yyjson_mut_arr_remove(event_arr, idx); + break; + } + } + + int rc = write_json_file(settings_path, mdoc); + yyjson_mut_doc_free(mdoc); + return rc; +} + +int cbm_upsert_claude_hooks(const char *settings_path) { + return upsert_hooks_json(settings_path, "PreToolUse", CMM_HOOK_MATCHER, CMM_HOOK_COMMAND); +} + +int cbm_remove_claude_hooks(const char *settings_path) { + return remove_hooks_json(settings_path, "PreToolUse", CMM_HOOK_MATCHER); +} + +#define GEMINI_HOOK_MATCHER "google_search|read_file|grep_search" +#define GEMINI_HOOK_COMMAND \ + "echo 'Reminder: prefer codebase-memory-mcp search_graph/trace_call_path/" \ + "get_code_snippet over grep/file search for code discovery.' >&2" + +int cbm_upsert_gemini_hooks(const char *settings_path) { + return upsert_hooks_json(settings_path, "BeforeTool", GEMINI_HOOK_MATCHER, GEMINI_HOOK_COMMAND); +} + +int cbm_remove_gemini_hooks(const char *settings_path) { + return remove_hooks_json(settings_path, "BeforeTool", GEMINI_HOOK_MATCHER); +} + +/* ── PATH management ──────────────────────────────────────────── */ + +int cbm_ensure_path(const char *bin_dir, const char *rc_file, bool dry_run) { + if (!bin_dir || !rc_file) { + return -1; + } + + char line[1024]; + snprintf(line, sizeof(line), "export PATH=\"%s:$PATH\"", bin_dir); + + /* Check if already present in rc file */ + FILE *f = fopen(rc_file, "r"); + if (f) { + char buf[2048]; + while (fgets(buf, sizeof(buf), f)) { + if (strstr(buf, line)) { + (void)fclose(f); + return 1; /* already present */ + } + } + (void)fclose(f); + } + + if (dry_run) { + return 0; + } + + f = fopen(rc_file, "a"); + if (!f) { + return -1; + } + + // NOLINTNEXTLINE(clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling) + (void)fprintf(f, "\n# Added by codebase-memory-mcp install\n%s\n", line); + (void)fclose(f); + return 0; +} + +/* ── Tar.gz extraction ────────────────────────────────────────── */ + +unsigned char *cbm_extract_binary_from_targz(const unsigned char *data, int data_len, + int *out_len) { + if (!data || data_len <= 0 || !out_len) { + return NULL; + } + + /* Decompress gzip */ + z_stream strm = {0}; + // NOLINTNEXTLINE(performance-no-int-to-ptr) + strm.next_in = (unsigned char *)(uintptr_t)data; + strm.avail_in = (unsigned int)data_len; + + // NOLINTNEXTLINE(misc-include-cleaner) — MAX_WBITS provided by standard header + if (inflateInit2(&strm, 16 + MAX_WBITS) != Z_OK) { + return NULL; + } + + /* Allocate decompression buffer (up to 500MB) */ + size_t buf_cap = (size_t)data_len * 10; + if (buf_cap < 4096) { + buf_cap = 4096; + } + if (buf_cap > DECOMPRESS_MAX_BYTES) { + buf_cap = DECOMPRESS_MAX_BYTES; + } + unsigned char *decompressed = malloc(buf_cap); + if (!decompressed) { + inflateEnd(&strm); + return NULL; + } + + size_t total = 0; + int ret; + do { + if (total >= buf_cap) { + size_t new_cap = buf_cap * 2; + if (new_cap > DECOMPRESS_MAX_BYTES) { + free(decompressed); + inflateEnd(&strm); + return NULL; + } + unsigned char *nb = realloc(decompressed, new_cap); + if (!nb) { + free(decompressed); + inflateEnd(&strm); + return NULL; + } + decompressed = nb; + buf_cap = new_cap; + } + strm.next_out = decompressed + total; + strm.avail_out = (unsigned int)(buf_cap - total); + ret = inflate(&strm, Z_NO_FLUSH); + total = buf_cap - strm.avail_out; + } while (ret == Z_OK); + + inflateEnd(&strm); + + if (ret != Z_STREAM_END) { + free(decompressed); + return NULL; + } + + /* Parse tar: find entry starting with "codebase-memory-mcp" */ + size_t pos = 0; + while (pos + 512 <= total) { + /* Tar header is 512 bytes */ + const unsigned char *hdr = decompressed + pos; + + /* Check for end-of-archive (two zero blocks) */ + bool all_zero = true; + for (int i = 0; i < 512 && all_zero; i++) { + if (hdr[i] != 0) { + all_zero = false; + } + } + if (all_zero) { + break; + } + + /* Extract filename (bytes 0-99) */ + char name[TAR_NAME_LEN] = {0}; + memcpy(name, hdr, TAR_NAME_LEN - 1); + + /* Extract size from octal field */ + char size_str[TAR_SIZE_LEN] = {0}; + memcpy(size_str, hdr + TAR_SIZE_OFFSET, TAR_SIZE_LEN - 1); + long file_size = strtol(size_str, NULL, 8); + + /* Extract type flag (byte 156) */ + char typeflag = (char)hdr[TAR_TYPE_OFFSET]; + + pos += 512; /* skip header */ + + /* Check if this is a regular file with our binary name */ + if (typeflag == '0' || typeflag == '\0') { + /* Get basename */ + const char *basename = strrchr(name, '/'); + basename = basename ? basename + 1 : name; + + if (strncmp(basename, TAR_BINARY_NAME, TAR_BINARY_NAME_LEN) == 0) { + if (pos + (size_t)file_size <= total) { + unsigned char *result = malloc((size_t)file_size); + if (result) { + memcpy(result, decompressed + pos, (size_t)file_size); + *out_len = (int)file_size; + free(decompressed); + return result; + } + } + } + } + + /* Skip to next 512-byte boundary */ + size_t blocks = ((size_t)file_size + TAR_BLOCK_MASK) / TAR_BLOCK_SIZE; + pos += blocks * 512; + } + + free(decompressed); + return NULL; /* binary not found */ +} + +/* ── Index management ─────────────────────────────────────────── */ + +static const char *get_cache_dir(const char *home_dir) { + static char buf[1024]; + if (!home_dir) { + home_dir = cbm_home_dir(); + } + if (!home_dir) { + return NULL; + } + snprintf(buf, sizeof(buf), "%s/.cache/codebase-memory-mcp", home_dir); + return buf; +} + +int cbm_list_indexes(const char *home_dir) { + const char *cache_dir = get_cache_dir(home_dir); + if (!cache_dir) { + return 0; + } + + cbm_dir_t *d = cbm_opendir(cache_dir); + if (!d) { + return 0; + } + + int count = 0; + cbm_dirent_t *ent; + while ((ent = cbm_readdir(d)) != NULL) { + size_t len = strlen(ent->name); + if (len > 3 && strcmp(ent->name + len - 3, ".db") == 0) { + printf(" %s/%s\n", cache_dir, ent->name); + count++; + } + } + cbm_closedir(d); + return count; +} + +int cbm_remove_indexes(const char *home_dir) { + const char *cache_dir = get_cache_dir(home_dir); + if (!cache_dir) { + return 0; + } + + cbm_dir_t *d = cbm_opendir(cache_dir); + if (!d) { + return 0; + } + + int count = 0; + cbm_dirent_t *ent; + while ((ent = cbm_readdir(d)) != NULL) { + size_t len = strlen(ent->name); + if (len > 3 && strcmp(ent->name + len - 3, ".db") == 0) { + char path[1024]; + snprintf(path, sizeof(path), "%s/%s", cache_dir, ent->name); + /* Also remove .db.tmp if present */ + char tmp_path[1040]; + snprintf(tmp_path, sizeof(tmp_path), "%s.tmp", path); + cbm_unlink(tmp_path); + if (cbm_unlink(path) == 0) { + count++; + } + } + } + cbm_closedir(d); + return count; +} + +/* ── Config store (persistent key-value in _config.db) ─────────── */ + +#include + +struct cbm_config { + sqlite3 *db; + char get_buf[4096]; /* static buffer for cbm_config_get return values */ +}; + +cbm_config_t *cbm_config_open(const char *cache_dir) { + if (!cache_dir) { + return NULL; + } + + char dbpath[1024]; + snprintf(dbpath, sizeof(dbpath), "%s/_config.db", cache_dir); + + /* Ensure directory exists */ + mkdirp(cache_dir, DIR_PERMS); + + sqlite3 *db = NULL; + if (sqlite3_open(dbpath, &db) != SQLITE_OK) { + if (db) { + sqlite3_close(db); + } + return NULL; + } + + /* Create table if not exists */ + const char *sql = "CREATE TABLE IF NOT EXISTS config (key TEXT PRIMARY KEY, value TEXT)"; + char *err_msg = NULL; + if (sqlite3_exec(db, sql, NULL, NULL, &err_msg) != SQLITE_OK) { + sqlite3_free(err_msg); + sqlite3_close(db); + return NULL; + } + + cbm_config_t *cfg = calloc(1, sizeof(*cfg)); + if (!cfg) { + sqlite3_close(db); + return NULL; + } + cfg->db = db; + return cfg; +} + +void cbm_config_close(cbm_config_t *cfg) { + if (!cfg) { + return; + } + if (cfg->db) { + sqlite3_close(cfg->db); + } + free(cfg); +} + +const char *cbm_config_get(cbm_config_t *cfg, const char *key, const char *default_val) { + if (!cfg || !key) { + return default_val; + } + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(cfg->db, "SELECT value FROM config WHERE key = ?", -1, &stmt, NULL) != + SQLITE_OK) { + return default_val; + } + sqlite3_bind_text(stmt, 1, key, -1, SQLITE_TRANSIENT); + + const char *result = default_val; + if (sqlite3_step(stmt) == SQLITE_ROW) { + const char *val = (const char *)sqlite3_column_text(stmt, 0); + if (val) { + snprintf(cfg->get_buf, sizeof(cfg->get_buf), "%s", val); + result = cfg->get_buf; + } + } + sqlite3_finalize(stmt); + return result; +} + +bool cbm_config_get_bool(cbm_config_t *cfg, const char *key, bool default_val) { + const char *val = cbm_config_get(cfg, key, NULL); + if (!val) { + return default_val; + } + if (strcmp(val, "true") == 0 || strcmp(val, "1") == 0 || strcmp(val, "on") == 0) { + return true; + } + if (strcmp(val, "false") == 0 || strcmp(val, "0") == 0 || strcmp(val, "off") == 0) { + return false; + } + return default_val; +} + +int cbm_config_get_int(cbm_config_t *cfg, const char *key, int default_val) { + const char *val = cbm_config_get(cfg, key, NULL); + if (!val) { + return default_val; + } + char *endptr; + long v = strtol(val, &endptr, 10); + if (endptr == val || *endptr != '\0') { + return default_val; + } + return (int)v; +} + +int cbm_config_set(cbm_config_t *cfg, const char *key, const char *value) { + if (!cfg || !key || !value) { + return -1; + } + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(cfg->db, "INSERT OR REPLACE INTO config (key, value) VALUES (?, ?)", -1, + &stmt, NULL) != SQLITE_OK) { + return -1; + } + sqlite3_bind_text(stmt, 1, key, -1, SQLITE_TRANSIENT); + sqlite3_bind_text(stmt, 2, value, -1, SQLITE_TRANSIENT); + + int rc = sqlite3_step(stmt) == SQLITE_DONE ? 0 : -1; + sqlite3_finalize(stmt); + return rc; +} + +int cbm_config_delete(cbm_config_t *cfg, const char *key) { + if (!cfg || !key) { + return -1; + } + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(cfg->db, "DELETE FROM config WHERE key = ?", -1, &stmt, NULL) != + SQLITE_OK) { + return -1; + } + sqlite3_bind_text(stmt, 1, key, -1, SQLITE_TRANSIENT); + + int rc = sqlite3_step(stmt) == SQLITE_DONE ? 0 : -1; + sqlite3_finalize(stmt); + return rc; +} + +/* ── Config CLI subcommand ────────────────────────────────────── */ + +int cbm_cmd_config(int argc, char **argv) { + if (argc == 0) { + printf("Usage: codebase-memory-mcp config [args]\n\n"); + printf("Commands:\n"); + printf(" list Show all config values\n"); + printf(" get Get a config value\n"); + printf(" set Set a config value\n"); + printf(" reset Reset a key to default\n\n"); + printf("Config keys:\n"); + printf(" %-25s default=%-10s %s\n", CBM_CONFIG_AUTO_INDEX, "false", + "Enable auto-indexing on MCP session start"); + printf(" %-25s default=%-10s %s\n", CBM_CONFIG_AUTO_INDEX_LIMIT, "50000", + "Max files for auto-indexing new projects"); + return 0; + } + + const char *home = cbm_home_dir(); + if (!home) { + fprintf(stderr, "error: HOME not set\n"); + return 1; + } + + char cache_dir[1024]; + snprintf(cache_dir, sizeof(cache_dir), "%s/.cache/codebase-memory-mcp", home); + + cbm_config_t *cfg = cbm_config_open(cache_dir); + if (!cfg) { + fprintf(stderr, "error: cannot open config database\n"); + return 1; + } + + int rc = 0; + if (strcmp(argv[0], "list") == 0 || strcmp(argv[0], "ls") == 0) { + printf("Configuration:\n"); + printf(" %-25s = %-10s\n", CBM_CONFIG_AUTO_INDEX, + cbm_config_get(cfg, CBM_CONFIG_AUTO_INDEX, "false")); + printf(" %-25s = %-10s\n", CBM_CONFIG_AUTO_INDEX_LIMIT, + cbm_config_get(cfg, CBM_CONFIG_AUTO_INDEX_LIMIT, "50000")); + } else if (strcmp(argv[0], "get") == 0) { + if (argc < 2) { + fprintf(stderr, "Usage: config get \n"); + rc = 1; + } else { + printf("%s\n", cbm_config_get(cfg, argv[1], "")); + } + } else if (strcmp(argv[0], "set") == 0) { + if (argc < 3) { + fprintf(stderr, "Usage: config set \n"); + rc = 1; + } else { + if (cbm_config_set(cfg, argv[1], argv[2]) == 0) { + printf("%s = %s\n", argv[1], argv[2]); + } else { + fprintf(stderr, "error: failed to set %s\n", argv[1]); + rc = 1; + } + } + } else if (strcmp(argv[0], "reset") == 0) { + if (argc < 2) { + fprintf(stderr, "Usage: config reset \n"); + rc = 1; + } else { + cbm_config_delete(cfg, argv[1]); + printf("%s reset to default\n", argv[1]); + } + } else { + fprintf(stderr, "Unknown config command: %s\n", argv[0]); + rc = 1; + } + + cbm_config_close(cfg); + return rc; +} + +/* ── Interactive prompt ───────────────────────────────────────── */ + +/* Global auto-answer mode: 0=interactive, 1=always yes, -1=always no */ +static int g_auto_answer = 0; + +static void parse_auto_answer(int argc, char **argv) { + for (int i = 0; i < argc; i++) { + if (strcmp(argv[i], "-y") == 0 || strcmp(argv[i], "--yes") == 0) { + g_auto_answer = 1; + } + if (strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "--no") == 0) { + g_auto_answer = -1; + } + } +} + +static bool prompt_yn(const char *question) { + if (g_auto_answer == 1) { + printf("%s (y/n): y (auto)\n", question); + return true; + } + if (g_auto_answer == -1) { + printf("%s (y/n): n (auto)\n", question); + return false; + } + + printf("%s (y/n): ", question); + (void)fflush(stdout); + + char buf[16]; + if (!fgets(buf, sizeof(buf), stdin)) { + return false; + } + // NOLINTNEXTLINE(readability-implicit-bool-conversion) + return (buf[0] == 'y' || buf[0] == 'Y') ? true : false; +} + +/* ── SHA-256 checksum verification ─────────────────────────────── */ + +/* SHA-256 hex digest: 64 hex chars + NUL */ +#define SHA256_HEX_LEN 64 +#define SHA256_BUF_SIZE (SHA256_HEX_LEN + 1) +/* Minimum line length in checksums.txt: 64 hex + 2 spaces + 1 char filename */ +#define CHECKSUM_LINE_MIN (SHA256_HEX_LEN + 2) + +/* Compute SHA-256 of a file using platform tools (sha256sum/shasum). + * Writes 64-char hex digest + NUL to out. Returns 0 on success. */ +static int sha256_file(const char *path, char *out, size_t out_size) { + if (out_size < SHA256_BUF_SIZE) { + return -1; + } + char cmd[1024]; +#ifdef __APPLE__ + snprintf(cmd, sizeof(cmd), "shasum -a 256 '%s' 2>/dev/null", path); +#else + snprintf(cmd, sizeof(cmd), "sha256sum '%s' 2>/dev/null", path); +#endif + // NOLINTNEXTLINE(bugprone-command-processor,cert-env33-c) + FILE *fp = cbm_popen(cmd, "r"); + if (!fp) { + return -1; + } + char line[256]; + if (fgets(line, sizeof(line), fp)) { + /* Output format: <64-char hash> */ + char *space = strchr(line, ' '); + if (space && space - line == SHA256_HEX_LEN) { + memcpy(out, line, SHA256_HEX_LEN); + out[SHA256_HEX_LEN] = '\0'; + cbm_pclose(fp); + return 0; + } + } + cbm_pclose(fp); + return -1; +} + +/* Download checksums.txt and verify the archive integrity. + * Returns: 0 = verified OK, 1 = mismatch (FAIL), -1 = could not verify (warning). */ +static int verify_download_checksum(const char *archive_path, const char *archive_name) { + char checksum_file[256]; + snprintf(checksum_file, sizeof(checksum_file), "%s/cbm-checksums.txt", cbm_tmpdir()); + + char cmd[1024]; + snprintf(cmd, sizeof(cmd), + "curl -fsSL -o '%s' " + "'https://github.com/DeusData/codebase-memory-mcp/releases/latest/download/" + "checksums.txt' 2>/dev/null", + checksum_file); + // NOLINTNEXTLINE(cert-env33-c) — intentional CLI subprocess for download + int rc = system(cmd); + if (rc != 0) { + fprintf(stderr, "warning: could not download checksums.txt — skipping verification\n"); + cbm_unlink(checksum_file); + return -1; + } + + FILE *fp = fopen(checksum_file, "r"); + cbm_unlink(checksum_file); + if (!fp) { + return -1; + } + + char expected[SHA256_BUF_SIZE] = {0}; + char line[512]; + while (fgets(line, sizeof(line), fp)) { + /* Format: <64-char sha256> \n */ + if (strlen(line) > CHECKSUM_LINE_MIN && strstr(line, archive_name)) { + memcpy(expected, line, SHA256_HEX_LEN); + expected[SHA256_HEX_LEN] = '\0'; + break; + } + } + fclose(fp); + + if (expected[0] == '\0') { + fprintf(stderr, "warning: %s not found in checksums.txt\n", archive_name); + return -1; + } + + char actual[SHA256_BUF_SIZE] = {0}; + if (sha256_file(archive_path, actual, sizeof(actual)) != 0) { + fprintf(stderr, "warning: sha256sum/shasum not available — skipping verification\n"); + return -1; + } + + if (strcmp(expected, actual) != 0) { + fprintf(stderr, "error: CHECKSUM MISMATCH — downloaded binary may be compromised!\n"); + fprintf(stderr, " expected: %s\n", expected); + fprintf(stderr, " actual: %s\n", actual); + return 1; + } + + printf("Checksum verified: %s\n", actual); + return 0; +} + +/* ── Detect OS/arch for download URL ──────────────────────────── */ + +static const char *detect_os(void) { +#ifdef _WIN32 + return "windows"; +#elif defined(__APPLE__) + return "darwin"; +#else + return "linux"; +#endif +} + +static const char *detect_arch(void) { +#if defined(__aarch64__) || defined(_M_ARM64) + return "arm64"; +#else + return "amd64"; +#endif +} + +/* ── Subcommand: install ──────────────────────────────────────── */ + +int cbm_cmd_install(int argc, char **argv) { + parse_auto_answer(argc, argv); + bool dry_run = false; + bool force = false; + for (int i = 0; i < argc; i++) { + if (strcmp(argv[i], "--dry-run") == 0) { + dry_run = true; + } + if (strcmp(argv[i], "--force") == 0) { + force = true; + } + } + + const char *home = cbm_home_dir(); + if (!home) { + fprintf(stderr, "error: HOME not set\n"); + return 1; + } + + printf("codebase-memory-mcp install %s\n\n", CBM_VERSION); + + /* Step 1: Check for existing indexes */ + int index_count = 0; + const char *cache_dir = get_cache_dir(home); + if (cache_dir) { + cbm_dir_t *d = cbm_opendir(cache_dir); + if (d) { + cbm_dirent_t *ent; + while ((ent = cbm_readdir(d)) != NULL) { + size_t len = strlen(ent->name); + if (len > 3 && strcmp(ent->name + len - 3, ".db") == 0) { + index_count++; + } + } + cbm_closedir(d); + } + } + + if (index_count > 0) { + printf("Found %d existing index(es) that must be rebuilt:\n", index_count); + cbm_list_indexes(home); + printf("\n"); + if (!prompt_yn("Delete these indexes and continue with install?")) { + printf("Install cancelled.\n"); + return 1; + } + if (!dry_run) { + int removed = cbm_remove_indexes(home); + printf("Removed %d index(es).\n\n", removed); + } + } + + /* Step 2: Binary path — use actual running executable location */ + char self_path[1024]; + const char *exe = cbm_self_exe_path(); + if (exe[0]) { + snprintf(self_path, sizeof(self_path), "%s", exe); + } else { + /* Fallback to conventional location */ + snprintf(self_path, sizeof(self_path), "%s/.local/bin/codebase-memory-mcp", home); + } + + /* Step 3: Detect agents */ + cbm_detected_agents_t agents = cbm_detect_agents(home); + printf("Detected agents:"); + if (agents.claude_code) { + printf(" Claude-Code"); + } + if (agents.codex) { + printf(" Codex"); + } + if (agents.gemini) { + printf(" Gemini-CLI"); + } + if (agents.zed) { + printf(" Zed"); + } + if (agents.opencode) { + printf(" OpenCode"); + } + if (agents.antigravity) { + printf(" Antigravity"); + } + if (agents.aider) { + printf(" Aider"); + } + if (agents.kilocode) { + printf(" KiloCode"); + } + if (agents.vscode) { + printf(" VS-Code"); + } + if (agents.openclaw) { + printf(" OpenClaw"); + } + if (!agents.claude_code && !agents.codex && !agents.gemini && !agents.zed && !agents.opencode && + !agents.antigravity && !agents.aider && !agents.kilocode && !agents.vscode && + !agents.openclaw) { + printf(" (none)"); + } + printf("\n\n"); + + /* Step 4: Install Claude Code skills + hooks */ + if (agents.claude_code) { + char skills_dir[1024]; + snprintf(skills_dir, sizeof(skills_dir), "%s/.claude/skills", home); + printf("Claude Code:\n"); + + int skill_count = cbm_install_skills(skills_dir, force, dry_run); + printf(" skills: %d installed\n", skill_count); + + if (cbm_remove_old_monolithic_skill(skills_dir, dry_run)) { + printf(" removed old monolithic skill\n"); + } + + /* MCP config — write to both locations for compatibility. + * Claude Code <=2.1.x reads ~/.claude/.mcp.json + * Claude Code >=2.1.80 reads ~/.claude.json */ + char mcp_path[1024]; + snprintf(mcp_path, sizeof(mcp_path), "%s/.claude/.mcp.json", home); + if (!dry_run) { + cbm_install_editor_mcp(self_path, mcp_path); + } + printf(" mcp: %s\n", mcp_path); + + char mcp_path2[1024]; + snprintf(mcp_path2, sizeof(mcp_path2), "%s/.claude.json", home); + if (!dry_run) { + cbm_install_editor_mcp(self_path, mcp_path2); + } + printf(" mcp: %s\n", mcp_path2); + + /* PreToolUse hook */ + char settings_path[1024]; + snprintf(settings_path, sizeof(settings_path), "%s/.claude/settings.json", home); + if (!dry_run) { + cbm_upsert_claude_hooks(settings_path); + } + printf(" hooks: PreToolUse (Grep|Glob reminder)\n"); + } + + /* Step 5: Install Codex CLI */ + if (agents.codex) { + printf("Codex CLI:\n"); + char config_path[1024]; + snprintf(config_path, sizeof(config_path), "%s/.codex/config.toml", home); + if (!dry_run) { + cbm_upsert_codex_mcp(self_path, config_path); + } + printf(" mcp: %s\n", config_path); + + char instr_path[1024]; + snprintf(instr_path, sizeof(instr_path), "%s/.codex/AGENTS.md", home); + if (!dry_run) { + cbm_upsert_instructions(instr_path, agent_instructions_content); + } + printf(" instructions: %s\n", instr_path); + } + + /* Step 6: Install Gemini CLI */ + if (agents.gemini) { + printf("Gemini CLI:\n"); + char config_path[1024]; + snprintf(config_path, sizeof(config_path), "%s/.gemini/settings.json", home); + if (!dry_run) { + cbm_install_editor_mcp(self_path, config_path); + } + printf(" mcp: %s\n", config_path); + + char instr_path[1024]; + snprintf(instr_path, sizeof(instr_path), "%s/.gemini/GEMINI.md", home); + if (!dry_run) { + cbm_upsert_instructions(instr_path, agent_instructions_content); + } + printf(" instructions: %s\n", instr_path); + + /* BeforeTool hook (shared with Antigravity) */ + if (!dry_run) { + cbm_upsert_gemini_hooks(config_path); + } + printf(" hooks: BeforeTool (grep/file search reminder)\n"); + } + + /* Step 7: Install Zed */ + if (agents.zed) { + printf("Zed:\n"); + char config_path[1024]; +#ifdef __APPLE__ + snprintf(config_path, sizeof(config_path), + "%s/Library/Application Support/Zed/settings.json", cbm_app_config_dir()); +#elif defined(_WIN32) + snprintf(config_path, sizeof(config_path), "%s/Zed/settings.json", cbm_app_local_dir()); +#else + snprintf(config_path, sizeof(config_path), "%s/zed/settings.json", cbm_app_config_dir()); +#endif + if (!dry_run) { + cbm_install_zed_mcp(self_path, config_path); + } + printf(" mcp: %s\n", config_path); + } + + /* Step 8: Install OpenCode */ + if (agents.opencode) { + printf("OpenCode:\n"); + char config_path[1024]; + snprintf(config_path, sizeof(config_path), "%s/.config/opencode/opencode.json", home); + if (!dry_run) { + cbm_upsert_opencode_mcp(self_path, config_path); + } + printf(" mcp: %s\n", config_path); + + char instr_path[1024]; + snprintf(instr_path, sizeof(instr_path), "%s/.config/opencode/AGENTS.md", home); + if (!dry_run) { + cbm_upsert_instructions(instr_path, agent_instructions_content); + } + printf(" instructions: %s\n", instr_path); + } + + /* Step 9: Install Antigravity */ + if (agents.antigravity) { + printf("Antigravity:\n"); + char config_path[1024]; + snprintf(config_path, sizeof(config_path), "%s/.gemini/antigravity/mcp_config.json", home); + if (!dry_run) { + cbm_upsert_antigravity_mcp(self_path, config_path); + } + printf(" mcp: %s\n", config_path); + + char instr_path[1024]; + snprintf(instr_path, sizeof(instr_path), "%s/.gemini/antigravity/AGENTS.md", home); + if (!dry_run) { + cbm_upsert_instructions(instr_path, agent_instructions_content); + } + printf(" instructions: %s\n", instr_path); + } + + /* Step 10: Install Aider */ + if (agents.aider) { + printf("Aider:\n"); + char instr_path[1024]; + snprintf(instr_path, sizeof(instr_path), "%s/CONVENTIONS.md", home); + if (!dry_run) { + cbm_upsert_instructions(instr_path, agent_instructions_content); + } + printf(" instructions: %s\n", instr_path); + } + + /* Step 11: Install KiloCode */ + if (agents.kilocode) { + printf("KiloCode:\n"); + char config_path[1024]; + const char *kilo_cfg = cbm_app_config_dir(); +#ifdef __APPLE__ + snprintf(config_path, sizeof(config_path), + "%s/Library/Application Support/Code/User/globalStorage/kilocode.kilo-code/settings/mcp_settings.json", + kilo_cfg); +#else + snprintf(config_path, sizeof(config_path), + "%s/Code/User/globalStorage/kilocode.kilo-code/settings/mcp_settings.json", + kilo_cfg); +#endif + if (!dry_run) { + cbm_install_editor_mcp(self_path, config_path); + } + printf(" mcp: %s\n", config_path); + + /* KiloCode uses ~/.kilocode/rules/ for global instructions */ + char instr_path[1024]; + snprintf(instr_path, sizeof(instr_path), "%s/.kilocode/rules/codebase-memory-mcp.md", home); + if (!dry_run) { + cbm_upsert_instructions(instr_path, agent_instructions_content); + } + printf(" instructions: %s\n", instr_path); + } + + /* Step 12: Install VS Code */ + if (agents.vscode) { + printf("VS Code:\n"); + char config_path[1024]; + const char *vs_cfg = cbm_app_config_dir(); +#ifdef __APPLE__ + snprintf(config_path, sizeof(config_path), + "%s/Library/Application Support/Code/User/mcp.json", vs_cfg); +#else + snprintf(config_path, sizeof(config_path), "%s/Code/User/mcp.json", vs_cfg); +#endif + if (!dry_run) { + cbm_install_vscode_mcp(self_path, config_path); + } + printf(" mcp: %s\n", config_path); + } + + /* Step 13: Install OpenClaw */ + if (agents.openclaw) { + printf("OpenClaw:\n"); + char config_path[1024]; + snprintf(config_path, sizeof(config_path), "%s/.openclaw/openclaw.json", home); + if (!dry_run) { + cbm_install_editor_mcp(self_path, config_path); + } + printf(" mcp: %s\n", config_path); + } + + /* Step 14: Ensure PATH — derive bin directory from actual executable */ + char bin_dir[1024]; + snprintf(bin_dir, sizeof(bin_dir), "%s", self_path); + char *last_slash = strrchr(bin_dir, '/'); + if (last_slash) { + *last_slash = '\0'; + } else { + snprintf(bin_dir, sizeof(bin_dir), "%s/.local/bin", home); + } + const char *rc = cbm_detect_shell_rc(home); + if (rc[0]) { + int path_rc = cbm_ensure_path(bin_dir, rc, dry_run); + if (path_rc == 0) { + printf("\nAdded %s to PATH in %s\n", bin_dir, rc); + } else if (path_rc == 1) { + printf("\nPATH already includes %s\n", bin_dir); + } + } + + printf("\nInstall complete. Restart your shell or run:\n"); + printf(" source %s\n", rc); + if (dry_run) { + printf("\n(dry-run — no files were modified)\n"); + } + return 0; +} + +/* ── Subcommand: uninstall ────────────────────────────────────── */ + +int cbm_cmd_uninstall(int argc, char **argv) { + parse_auto_answer(argc, argv); + bool dry_run = false; + for (int i = 0; i < argc; i++) { + if (strcmp(argv[i], "--dry-run") == 0) { + dry_run = true; + } + } + + const char *home = cbm_home_dir(); + if (!home) { + fprintf(stderr, "error: HOME not set\n"); + return 1; + } + + printf("codebase-memory-mcp uninstall\n\n"); + + /* Step 1: Detect agents and remove per-agent configs */ + cbm_detected_agents_t agents = cbm_detect_agents(home); + + if (agents.claude_code) { + char skills_dir[1024]; + snprintf(skills_dir, sizeof(skills_dir), "%s/.claude/skills", home); + int removed = cbm_remove_skills(skills_dir, dry_run); + printf("Claude Code: removed %d skill(s)\n", removed); + + char mcp_path[1024]; + snprintf(mcp_path, sizeof(mcp_path), "%s/.claude/.mcp.json", home); + if (!dry_run) { + cbm_remove_editor_mcp(mcp_path); + } + printf(" removed MCP config entry\n"); + + /* Also remove from new location (Claude Code >=2.1.80) */ + char mcp_path2[1024]; + snprintf(mcp_path2, sizeof(mcp_path2), "%s/.claude.json", home); + if (!dry_run) { + cbm_remove_editor_mcp(mcp_path2); + } + + char settings_path[1024]; + snprintf(settings_path, sizeof(settings_path), "%s/.claude/settings.json", home); + if (!dry_run) { + cbm_remove_claude_hooks(settings_path); + } + printf(" removed PreToolUse hook\n"); + } + + if (agents.codex) { + char config_path[1024]; + snprintf(config_path, sizeof(config_path), "%s/.codex/config.toml", home); + if (!dry_run) { + cbm_remove_codex_mcp(config_path); + } + printf("Codex CLI: removed MCP config entry\n"); + + char instr_path[1024]; + snprintf(instr_path, sizeof(instr_path), "%s/.codex/AGENTS.md", home); + if (!dry_run) { + cbm_remove_instructions(instr_path); + } + printf(" removed instructions\n"); + } + + if (agents.gemini) { + char config_path[1024]; + snprintf(config_path, sizeof(config_path), "%s/.gemini/settings.json", home); + if (!dry_run) { + cbm_remove_editor_mcp(config_path); + } + printf("Gemini CLI: removed MCP config entry\n"); + + if (!dry_run) { + cbm_remove_gemini_hooks(config_path); + } + printf(" removed BeforeTool hook\n"); + + char instr_path[1024]; + snprintf(instr_path, sizeof(instr_path), "%s/.gemini/GEMINI.md", home); + if (!dry_run) { + cbm_remove_instructions(instr_path); + } + printf(" removed instructions\n"); + } + + if (agents.zed) { + char config_path[1024]; +#ifdef __APPLE__ + snprintf(config_path, sizeof(config_path), + "%s/Library/Application Support/Zed/settings.json", cbm_app_config_dir()); +#elif defined(_WIN32) + snprintf(config_path, sizeof(config_path), "%s/Zed/settings.json", cbm_app_local_dir()); +#else + snprintf(config_path, sizeof(config_path), "%s/zed/settings.json", cbm_app_config_dir()); +#endif + if (!dry_run) { + cbm_remove_zed_mcp(config_path); + } + printf("Zed: removed MCP config entry\n"); + } + + if (agents.opencode) { + char config_path[1024]; + snprintf(config_path, sizeof(config_path), "%s/.config/opencode/opencode.json", home); + if (!dry_run) { + cbm_remove_opencode_mcp(config_path); + } + printf("OpenCode: removed MCP config entry\n"); + + char instr_path[1024]; + snprintf(instr_path, sizeof(instr_path), "%s/.config/opencode/AGENTS.md", home); + if (!dry_run) { + cbm_remove_instructions(instr_path); + } + printf(" removed instructions\n"); + } + + if (agents.antigravity) { + char config_path[1024]; + snprintf(config_path, sizeof(config_path), "%s/.gemini/antigravity/mcp_config.json", home); + if (!dry_run) { + cbm_remove_antigravity_mcp(config_path); + } + printf("Antigravity: removed MCP config entry\n"); + + char instr_path[1024]; + snprintf(instr_path, sizeof(instr_path), "%s/.gemini/antigravity/AGENTS.md", home); + if (!dry_run) { + cbm_remove_instructions(instr_path); + } + printf(" removed instructions\n"); + } + + if (agents.aider) { + char instr_path[1024]; + snprintf(instr_path, sizeof(instr_path), "%s/CONVENTIONS.md", home); + if (!dry_run) { + cbm_remove_instructions(instr_path); + } + printf("Aider: removed instructions\n"); + } + + if (agents.kilocode) { + char config_path[1024]; + const char *kilo_cfg = cbm_app_config_dir(); +#ifdef __APPLE__ + snprintf(config_path, sizeof(config_path), + "%s/Library/Application Support/Code/User/globalStorage/kilocode.kilo-code/settings/mcp_settings.json", + kilo_cfg); +#else + snprintf(config_path, sizeof(config_path), + "%s/Code/User/globalStorage/kilocode.kilo-code/settings/mcp_settings.json", + kilo_cfg); +#endif + if (!dry_run) { + cbm_remove_editor_mcp(config_path); + } + printf("KiloCode: removed MCP config entry\n"); + + char instr_path[1024]; + snprintf(instr_path, sizeof(instr_path), "%s/.kilocode/rules/codebase-memory-mcp.md", home); + if (!dry_run) { + cbm_remove_instructions(instr_path); + } + printf(" removed instructions\n"); + } + + if (agents.vscode) { + char config_path[1024]; + const char *vs_cfg = cbm_app_config_dir(); +#ifdef __APPLE__ + snprintf(config_path, sizeof(config_path), + "%s/Library/Application Support/Code/User/mcp.json", vs_cfg); +#else + snprintf(config_path, sizeof(config_path), "%s/Code/User/mcp.json", vs_cfg); +#endif + if (!dry_run) { + cbm_remove_vscode_mcp(config_path); + } + printf("VS Code: removed MCP config entry\n"); + } + + if (agents.openclaw) { + char config_path[1024]; + snprintf(config_path, sizeof(config_path), "%s/.openclaw/openclaw.json", home); + if (!dry_run) { + cbm_remove_editor_mcp(config_path); + } + printf("OpenClaw: removed MCP config entry\n"); + } + + /* Step 2: Remove indexes */ + int index_count = 0; + const char *cache_dir = get_cache_dir(home); + if (cache_dir) { + cbm_dir_t *d = cbm_opendir(cache_dir); + if (d) { + cbm_dirent_t *ent; + while ((ent = cbm_readdir(d)) != NULL) { + size_t len = strlen(ent->name); + if (len > 3 && strcmp(ent->name + len - 3, ".db") == 0) { + index_count++; + } + } + cbm_closedir(d); + } + } + + if (index_count > 0) { + printf("\nFound %d index(es):\n", index_count); + cbm_list_indexes(home); + if (prompt_yn("Delete these indexes?")) { + int idx_removed = cbm_remove_indexes(home); + printf("Removed %d index(es).\n", idx_removed); + } else { + printf("Indexes kept.\n"); + } + } + + /* Step 3: Remove binary */ + char bin_path[1024]; + snprintf(bin_path, sizeof(bin_path), "%s/.local/bin/codebase-memory-mcp", home); + struct stat st; + if (stat(bin_path, &st) == 0) { + if (!dry_run) { + cbm_unlink(bin_path); + } + printf("Removed %s\n", bin_path); + } + + printf("\nUninstall complete.\n"); + if (dry_run) { + printf("(dry-run — no files were modified)\n"); + } + return 0; +} + +/* ── Subcommand: update ───────────────────────────────────────── */ + +int cbm_cmd_update(int argc, char **argv) { + parse_auto_answer(argc, argv); + + const char *home = cbm_home_dir(); + if (!home) { + fprintf(stderr, "error: HOME not set\n"); + return 1; + } + + printf("codebase-memory-mcp update (current: %s)\n\n", CBM_VERSION); + + /* Step 1: Check for existing indexes */ + int index_count = 0; + const char *cache_dir = get_cache_dir(home); + if (cache_dir) { + cbm_dir_t *d = cbm_opendir(cache_dir); + if (d) { + cbm_dirent_t *ent; + while ((ent = cbm_readdir(d)) != NULL) { + size_t len = strlen(ent->name); + if (len > 3 && strcmp(ent->name + len - 3, ".db") == 0) { + index_count++; + } + } + cbm_closedir(d); + } + } + + if (index_count > 0) { + printf("Found %d existing index(es) that must be rebuilt after update:\n", index_count); + cbm_list_indexes(home); + printf("\n"); + if (!prompt_yn("Delete these indexes and continue with update?")) { + printf("Update cancelled.\n"); + return 1; + } + int removed = cbm_remove_indexes(home); + printf("Removed %d index(es).\n\n", removed); + } + + /* Step 2: Ask for UI variant */ + printf("Which binary variant do you want?\n"); + printf(" 1) standard — MCP server only\n"); + printf(" 2) ui — MCP server + embedded graph visualization\n"); + printf("Choose (1/2): "); + (void)fflush(stdout); + + char choice[16]; + if (!fgets(choice, sizeof(choice), stdin)) { + fprintf(stderr, "error: failed to read input\n"); + return 1; + } + // NOLINTNEXTLINE(readability-implicit-bool-conversion) + bool want_ui = (choice[0] == '2') ? true : false; + // NOLINTNEXTLINE(readability-implicit-bool-conversion) + const char *variant = want_ui ? "ui-" : ""; + // NOLINTNEXTLINE(readability-implicit-bool-conversion) + const char *variant_label = want_ui ? "ui" : "standard"; + + /* Step 3: Build download URL */ + const char *os = detect_os(); + const char *arch = detect_arch(); + const char *ext = strcmp(os, "windows") == 0 ? "zip" : "tar.gz"; + + char url[512]; + if (want_ui) { + snprintf(url, sizeof(url), + "https://github.com/DeusData/codebase-memory-mcp/releases/latest/download/" + "codebase-memory-mcp-ui-%s-%s.%s", + os, arch, ext); + } else { + snprintf(url, sizeof(url), + "https://github.com/DeusData/codebase-memory-mcp/releases/latest/download/" + "codebase-memory-mcp-%s-%s.%s", + os, arch, ext); + } + + printf("\nDownloading %s binary for %s/%s ...\n", variant_label, os, arch); + printf(" %s\n", url); + + /* Step 4: Download using curl */ + char tmp_archive[256]; + snprintf(tmp_archive, sizeof(tmp_archive), "%s/cbm-update.%s", cbm_tmpdir(), ext); + + char cmd[1024]; + snprintf(cmd, sizeof(cmd), "curl -fSL --progress-bar -o '%s' '%s'", tmp_archive, url); + // NOLINTNEXTLINE(cert-env33-c) — intentional CLI subprocess for download + int rc = system(cmd); + if (rc != 0) { + fprintf(stderr, "error: download failed (exit %d)\n", rc); + cbm_unlink(tmp_archive); + return 1; + } + + /* Step 4b: Verify checksum */ + { + /* Build the expected archive filename (matches checksums.txt format) */ + char archive_name[256]; + if (want_ui) { + snprintf(archive_name, sizeof(archive_name), "codebase-memory-mcp-ui-%s-%s.%s", os, + arch, ext); + } else { + snprintf(archive_name, sizeof(archive_name), "codebase-memory-mcp-%s-%s.%s", os, arch, + ext); + } + int crc = verify_download_checksum(tmp_archive, archive_name); + if (crc == 1) { + /* Hard fail: checksum mismatch */ + cbm_unlink(tmp_archive); + return 1; + } + /* crc == -1: could not verify (warning only), crc == 0: verified OK */ + } + + /* Step 5: Extract binary — update in place at the current exe location */ + char bin_dest[1024]; + const char *exe = cbm_self_exe_path(); + if (exe[0]) { + snprintf(bin_dest, sizeof(bin_dest), "%s", exe); + } else { + snprintf(bin_dest, sizeof(bin_dest), "%s/.local/bin/codebase-memory-mcp", home); + } + + /* Ensure install directory exists */ + char bin_dir[1024]; + snprintf(bin_dir, sizeof(bin_dir), "%s", bin_dest); + /* Strip filename to get directory */ + char *last_slash = strrchr(bin_dir, '/'); + if (last_slash) { + *last_slash = '\0'; + } else { + snprintf(bin_dir, sizeof(bin_dir), "%s/.local/bin", home); + } + cbm_mkdir_p(bin_dir, 0755); + + if (strcmp(ext, "tar.gz") == 0) { + /* Read archive into memory and extract */ + FILE *f = fopen(tmp_archive, "rb"); + if (!f) { + fprintf(stderr, "error: cannot open %s\n", tmp_archive); + return 1; + } + fseek(f, 0, SEEK_END); + long fsize = ftell(f); + fseek(f, 0, SEEK_SET); + + unsigned char *data = malloc((size_t)fsize); + if (!data) { + fclose(f); + cbm_unlink(tmp_archive); + return 1; + } + fread(data, 1, (size_t)fsize, f); + fclose(f); + + int bin_len = 0; + unsigned char *bin_data = cbm_extract_binary_from_targz(data, (int)fsize, &bin_len); + free(data); + cbm_unlink(tmp_archive); + + if (!bin_data || bin_len <= 0) { + fprintf(stderr, "error: binary not found in archive\n"); + free(bin_data); + return 1; + } + + /* Open with final permissions atomically (no TOCTOU between write and chmod) */ +#ifndef _WIN32 + int fd = open(bin_dest, O_WRONLY | O_CREAT | O_TRUNC, 0755); + if (fd < 0) { + fprintf(stderr, "error: cannot write to %s\n", bin_dest); + free(bin_data); + return 1; + } + FILE *out = fdopen(fd, "wb"); +#else + FILE *out = fopen(bin_dest, "wb"); +#endif + if (!out) { + fprintf(stderr, "error: cannot write to %s\n", bin_dest); + free(bin_data); +#ifndef _WIN32 + close(fd); +#endif + return 1; + } + fwrite(bin_data, 1, (size_t)bin_len, out); + fclose(out); + free(bin_data); + } else { + /* Zip extraction: exec unzip directly without shell interpretation */ + const char *unzip_argv[] = {"unzip", "-o", "-d", bin_dir, tmp_archive, NULL}; + rc = cbm_exec_no_shell(unzip_argv); + cbm_unlink(tmp_archive); + if (rc != 0) { + fprintf(stderr, "error: extraction failed\n"); + return 1; + } + /* Rename variant binary if needed */ + if (want_ui) { + char ui_bin[1024]; + snprintf(ui_bin, sizeof(ui_bin), "%s/codebase-memory-mcp-ui.exe", bin_dir); + snprintf(bin_dest, sizeof(bin_dest), "%s/codebase-memory-mcp.exe", bin_dir); + rename(ui_bin, bin_dest); + } + } + + /* Step 6: Reinstall skills (force to pick up new content) */ + char skills_dir[1024]; + snprintf(skills_dir, sizeof(skills_dir), "%s/.claude/skills", home); + int skill_count = cbm_install_skills(skills_dir, true, false); + printf("Updated %d skill(s).\n", skill_count); + + /* Step 7: Verify new version (exec directly, no shell interpretation) */ + printf("\nUpdate complete. Verifying:\n"); + { + const char *ver_argv[] = {bin_dest, "--version", NULL}; + (void)cbm_exec_no_shell(ver_argv); + } + + printf("\nAll project indexes were cleared. They will be rebuilt\n"); + printf("automatically when you next use the MCP server.\n"); + (void)variant; + return 0; +} diff --git a/src/foundation/platform.c b/src/foundation/platform.c index 2c33e4f..dd380ca 100644 --- a/src/foundation/platform.c +++ b/src/foundation/platform.c @@ -1,218 +1,367 @@ -/* - * platform.c — OS abstraction implementations. - * - * macOS, Linux, and Windows. Platform-specific code behind #ifdef guards. - */ -#include "platform.h" -#include "compat.h" - -#include // uint64_t, int64_t - -#ifdef _WIN32 - -/* ── Windows implementation ───────────────────────────────────── */ - -#ifndef WIN32_LEAN_AND_MEAN -#define WIN32_LEAN_AND_MEAN -#endif -#include -#include -#include - -void *cbm_mmap_read(const char *path, size_t *out_size) { - if (!path || !out_size) { - return NULL; - } - *out_size = 0; - - HANDLE file = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, NULL); - if (file == INVALID_HANDLE_VALUE) { - return NULL; - } - LARGE_INTEGER sz; - if (!GetFileSizeEx(file, &sz) || sz.QuadPart == 0) { - CloseHandle(file); - return NULL; - } - HANDLE mapping = CreateFileMappingA(file, NULL, PAGE_READONLY, 0, 0, NULL); - if (!mapping) { - CloseHandle(file); - return NULL; - } - void *addr = MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0); - CloseHandle(mapping); - CloseHandle(file); - if (!addr) { - return NULL; - } - *out_size = (size_t)sz.QuadPart; - return addr; -} - -void cbm_munmap(void *addr, size_t size) { - (void)size; - if (addr) { - UnmapViewOfFile(addr); - } -} - -uint64_t cbm_now_ns(void) { - LARGE_INTEGER freq, count; - QueryPerformanceFrequency(&freq); - QueryPerformanceCounter(&count); - return (uint64_t)count.QuadPart * 1000000000ULL / (uint64_t)freq.QuadPart; -} - -#define CBM_USEC_PER_SEC 1000000ULL - -uint64_t cbm_now_ms(void) { - return cbm_now_ns() / CBM_USEC_PER_SEC; -} - -int cbm_nprocs(void) { - SYSTEM_INFO si; - GetSystemInfo(&si); - return (int)si.dwNumberOfProcessors > 0 ? (int)si.dwNumberOfProcessors : 1; -} - -bool cbm_file_exists(const char *path) { - DWORD attr = GetFileAttributesA(path); - return attr != INVALID_FILE_ATTRIBUTES; -} - -bool cbm_is_dir(const char *path) { - DWORD attr = GetFileAttributesA(path); - return attr != INVALID_FILE_ATTRIBUTES && (attr & FILE_ATTRIBUTE_DIRECTORY); -} - -int64_t cbm_file_size(const char *path) { - WIN32_FILE_ATTRIBUTE_DATA fad; - if (!GetFileAttributesExA(path, GetFileExInfoStandard, &fad)) { - return -1; - } - LARGE_INTEGER sz; - sz.HighPart = (LONG)fad.nFileSizeHigh; // cppcheck-suppress unreadVariable - sz.LowPart = fad.nFileSizeLow; // cppcheck-suppress unreadVariable - return (int64_t)sz.QuadPart; -} - -#else /* POSIX (macOS + Linux) */ - -/* ── POSIX implementation ─────────────────────────────────────── */ - -#include // open, O_RDONLY -#include -#include -#include -#include - -#ifdef __APPLE__ -#include -#include -#else -#include -#endif - -/* ── Memory mapping ────────────────────────────────────────────── */ - -void *cbm_mmap_read(const char *path, size_t *out_size) { - if (!path || !out_size) { - return NULL; - } - *out_size = 0; - - // NOLINTNEXTLINE(misc-include-cleaner) — open provided by standard header - int fd = open(path, O_RDONLY); - if (fd < 0) { - return NULL; - } - - struct stat st; - if (fstat(fd, &st) != 0 || st.st_size == 0) { - close(fd); - return NULL; - } - - void *addr = mmap(NULL, (size_t)st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); - close(fd); - - if (addr == MAP_FAILED) { - return NULL; - } - *out_size = (size_t)st.st_size; - return addr; -} - -void cbm_munmap(void *addr, size_t size) { - if (addr && size > 0) { - munmap(addr, size); - } -} - -/* ── Timing ────────────────────────────────────────────────────── */ - -#ifdef __APPLE__ -static mach_timebase_info_data_t timebase_info; -static int timebase_init = 0; - -uint64_t cbm_now_ns(void) { - if (!timebase_init) { - mach_timebase_info(&timebase_info); - timebase_init = 1; - } - uint64_t ticks = mach_absolute_time(); - return ticks * timebase_info.numer / timebase_info.denom; -} -#else -uint64_t cbm_now_ns(void) { - struct timespec ts; - cbm_clock_gettime(CLOCK_MONOTONIC, &ts); - return (uint64_t)ts.tv_sec * 1000000000ULL + (uint64_t)ts.tv_nsec; -} -#endif - -#define CBM_USEC_PER_SEC 1000000ULL - -uint64_t cbm_now_ms(void) { - return cbm_now_ns() / CBM_USEC_PER_SEC; -} - -/* ── System info ───────────────────────────────────────────────── */ - -int cbm_nprocs(void) { -#ifdef __APPLE__ - int ncpu = 0; - size_t len = sizeof(ncpu); - if (sysctlbyname("hw.ncpu", &ncpu, &len, NULL, 0) == 0 && ncpu > 0) { - return ncpu; - } - return 1; -#else - long n = sysconf(_SC_NPROCESSORS_ONLN); - return n > 0 ? (int)n : 1; -#endif -} - -/* ── File system ───────────────────────────────────────────────── */ - -bool cbm_file_exists(const char *path) { - struct stat st; - return stat(path, &st) == 0; -} - -bool cbm_is_dir(const char *path) { - struct stat st; - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - return stat(path, &st) == 0 && S_ISDIR(st.st_mode); -} - -int64_t cbm_file_size(const char *path) { - struct stat st; - if (stat(path, &st) != 0) { - return -1; - } - return (int64_t)st.st_size; -} - -#endif /* _WIN32 */ +/* + * platform.c — OS abstraction implementations. + * + * macOS, Linux, and Windows. Platform-specific code behind #ifdef guards. + */ +#include "platform.h" +#include "compat.h" + +#include // uint64_t, int64_t + +#ifdef _WIN32 + +/* ── Windows implementation ───────────────────────────────────── */ + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include +#include +#include + +void *cbm_mmap_read(const char *path, size_t *out_size) { + if (!path || !out_size) { + return NULL; + } + *out_size = 0; + + HANDLE file = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, NULL); + if (file == INVALID_HANDLE_VALUE) { + return NULL; + } + LARGE_INTEGER sz; + if (!GetFileSizeEx(file, &sz) || sz.QuadPart == 0) { + CloseHandle(file); + return NULL; + } + HANDLE mapping = CreateFileMappingA(file, NULL, PAGE_READONLY, 0, 0, NULL); + if (!mapping) { + CloseHandle(file); + return NULL; + } + void *addr = MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0); + CloseHandle(mapping); + CloseHandle(file); + if (!addr) { + return NULL; + } + *out_size = (size_t)sz.QuadPart; + return addr; +} + +void cbm_munmap(void *addr, size_t size) { + (void)size; + if (addr) { + UnmapViewOfFile(addr); + } +} + +uint64_t cbm_now_ns(void) { + LARGE_INTEGER freq, count; + QueryPerformanceFrequency(&freq); + QueryPerformanceCounter(&count); + return (uint64_t)count.QuadPart * 1000000000ULL / (uint64_t)freq.QuadPart; +} + +#define CBM_USEC_PER_SEC 1000000ULL + +uint64_t cbm_now_ms(void) { + return cbm_now_ns() / CBM_USEC_PER_SEC; +} + +int cbm_nprocs(void) { + SYSTEM_INFO si; + GetSystemInfo(&si); + return (int)si.dwNumberOfProcessors > 0 ? (int)si.dwNumberOfProcessors : 1; +} + +bool cbm_file_exists(const char *path) { + DWORD attr = GetFileAttributesA(path); + return attr != INVALID_FILE_ATTRIBUTES; +} + +bool cbm_is_dir(const char *path) { + DWORD attr = GetFileAttributesA(path); + return attr != INVALID_FILE_ATTRIBUTES && (attr & FILE_ATTRIBUTE_DIRECTORY); +} + +int64_t cbm_file_size(const char *path) { + WIN32_FILE_ATTRIBUTE_DATA fad; + if (!GetFileAttributesExA(path, GetFileExInfoStandard, &fad)) { + return -1; + } + LARGE_INTEGER sz; + sz.HighPart = (LONG)fad.nFileSizeHigh; // cppcheck-suppress unreadVariable + sz.LowPart = fad.nFileSizeLow; // cppcheck-suppress unreadVariable + return (int64_t)sz.QuadPart; +} + +const char *cbm_self_exe_path(void) { + static char buf[1024]; + DWORD len = GetModuleFileNameA(NULL, buf, sizeof(buf)); + if (len == 0 || len >= sizeof(buf)) { + return ""; + } + cbm_normalize_path_sep(buf); + return buf; +} + +char *cbm_normalize_path_sep(char *path) { + if (path) { + for (char *p = path; *p; p++) { + if (*p == '\\') { + *p = '/'; + } + } + } + return path; +} + +/* Convert MSYS2/Git Bash path to native Windows path (in-place). + * /c/Users/... → C:/Users/... (single drive letter after leading /) */ +static void msys_to_native(char *path) { + if (path[0] == '/' && path[1] != '\0' && path[2] == '/') { + char drive = path[1]; + if ((drive >= 'a' && drive <= 'z') || (drive >= 'A' && drive <= 'Z')) { + /* Shift left: overwrite leading '/' with drive letter, inject ':' */ + path[0] = (char)(drive >= 'a' ? drive - 32 : drive); /* uppercase */ + path[1] = ':'; + /* rest of path starting from path[2] is already correct */ + } + } +} + +const char *cbm_home_dir(void) { + static char buf[1024]; + const char *h = getenv("HOME"); + if (!h || !h[0]) { + h = getenv("USERPROFILE"); + } + if (!h || !h[0]) { + return NULL; + } + snprintf(buf, sizeof(buf), "%s", h); + cbm_normalize_path_sep(buf); + msys_to_native(buf); + return buf; +} + +const char *cbm_app_config_dir(void) { + static char buf[1024]; + const char *d = getenv("APPDATA"); + if (!d || !d[0]) { + /* Fallback: USERPROFILE/AppData/Roaming */ + const char *home = cbm_home_dir(); + if (!home) { + return NULL; + } + snprintf(buf, sizeof(buf), "%s/AppData/Roaming", home); + return buf; + } + snprintf(buf, sizeof(buf), "%s", d); + cbm_normalize_path_sep(buf); + msys_to_native(buf); + return buf; +} + +const char *cbm_app_local_dir(void) { + static char buf[1024]; + const char *d = getenv("LOCALAPPDATA"); + if (!d || !d[0]) { + /* Fallback: USERPROFILE/AppData/Local */ + const char *home = cbm_home_dir(); + if (!home) { + return NULL; + } + snprintf(buf, sizeof(buf), "%s/AppData/Local", home); + return buf; + } + snprintf(buf, sizeof(buf), "%s", d); + cbm_normalize_path_sep(buf); + msys_to_native(buf); + return buf; +} + +#else /* POSIX (macOS + Linux) */ + +/* ── POSIX implementation ─────────────────────────────────────── */ + +#include // open, O_RDONLY +#include +#include +#include +#include + +#ifdef __APPLE__ +#include +#include // _NSGetExecutablePath +#include +#else +#include +#endif + +/* ── Memory mapping ────────────────────────────────────────────── */ + +void *cbm_mmap_read(const char *path, size_t *out_size) { + if (!path || !out_size) { + return NULL; + } + *out_size = 0; + + // NOLINTNEXTLINE(misc-include-cleaner) — open provided by standard header + int fd = open(path, O_RDONLY); + if (fd < 0) { + return NULL; + } + + struct stat st; + if (fstat(fd, &st) != 0 || st.st_size == 0) { + close(fd); + return NULL; + } + + void *addr = mmap(NULL, (size_t)st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + + if (addr == MAP_FAILED) { + return NULL; + } + *out_size = (size_t)st.st_size; + return addr; +} + +void cbm_munmap(void *addr, size_t size) { + if (addr && size > 0) { + munmap(addr, size); + } +} + +/* ── Timing ────────────────────────────────────────────────────── */ + +#ifdef __APPLE__ +static mach_timebase_info_data_t timebase_info; +static int timebase_init = 0; + +uint64_t cbm_now_ns(void) { + if (!timebase_init) { + mach_timebase_info(&timebase_info); + timebase_init = 1; + } + uint64_t ticks = mach_absolute_time(); + return ticks * timebase_info.numer / timebase_info.denom; +} +#else +uint64_t cbm_now_ns(void) { + struct timespec ts; + cbm_clock_gettime(CLOCK_MONOTONIC, &ts); + return (uint64_t)ts.tv_sec * 1000000000ULL + (uint64_t)ts.tv_nsec; +} +#endif + +#define CBM_USEC_PER_SEC 1000000ULL + +uint64_t cbm_now_ms(void) { + return cbm_now_ns() / CBM_USEC_PER_SEC; +} + +/* ── System info ───────────────────────────────────────────────── */ + +int cbm_nprocs(void) { +#ifdef __APPLE__ + int ncpu = 0; + size_t len = sizeof(ncpu); + if (sysctlbyname("hw.ncpu", &ncpu, &len, NULL, 0) == 0 && ncpu > 0) { + return ncpu; + } + return 1; +#else + long n = sysconf(_SC_NPROCESSORS_ONLN); + return n > 0 ? (int)n : 1; +#endif +} + +/* ── File system ───────────────────────────────────────────────── */ + +bool cbm_file_exists(const char *path) { + struct stat st; + return stat(path, &st) == 0; +} + +bool cbm_is_dir(const char *path) { + struct stat st; + // NOLINTNEXTLINE(readability-implicit-bool-conversion) + return stat(path, &st) == 0 && S_ISDIR(st.st_mode); +} + +int64_t cbm_file_size(const char *path) { + struct stat st; + if (stat(path, &st) != 0) { + return -1; + } + return (int64_t)st.st_size; +} + +const char *cbm_self_exe_path(void) { + static char buf[1024]; +#ifdef __APPLE__ + uint32_t sz = sizeof(buf); + if (_NSGetExecutablePath(buf, &sz) != 0) { + return ""; + } + /* Resolve symlinks to get canonical path */ + char *resolved = realpath(buf, NULL); + if (resolved) { + snprintf(buf, sizeof(buf), "%s", resolved); + free(resolved); + } +#else + ssize_t len = readlink("/proc/self/exe", buf, sizeof(buf) - 1); + if (len <= 0) { + return ""; + } + buf[len] = '\0'; +#endif + return buf; +} + +char *cbm_normalize_path_sep(char *path) { + /* No-op on POSIX — paths already use forward slashes. */ + (void)path; + return path; +} + +const char *cbm_home_dir(void) { + // NOLINTNEXTLINE(concurrency-mt-unsafe) + return getenv("HOME"); +} + +const char *cbm_app_config_dir(void) { + static char buf[1024]; +#ifdef __APPLE__ + /* macOS: callers prepend "Library/Application Support/..." */ + return cbm_home_dir(); +#else + /* Linux: XDG_CONFIG_HOME or ~/.config */ + // NOLINTNEXTLINE(concurrency-mt-unsafe) + const char *xdg = getenv("XDG_CONFIG_HOME"); + if (xdg && xdg[0]) { + snprintf(buf, sizeof(buf), "%s", xdg); + return buf; + } + const char *home = cbm_home_dir(); + if (!home) { + return NULL; + } + snprintf(buf, sizeof(buf), "%s/.config", home); + return buf; +#endif +} + +const char *cbm_app_local_dir(void) { + /* On POSIX there is no distinction between "roaming" and "local" app data. + * Delegate to cbm_app_config_dir() so callers compile on all platforms. */ + return cbm_app_config_dir(); +} + +#endif /* _WIN32 */ diff --git a/src/foundation/platform.h b/src/foundation/platform.h index ffa57fc..36cff26 100644 --- a/src/foundation/platform.h +++ b/src/foundation/platform.h @@ -1,78 +1,108 @@ -/* - * platform.h — OS abstractions. - * - * Provides cross-platform wrappers for: - * - Memory-mapped files (mmap / VirtualAlloc) - * - High-resolution monotonic clock - * - CPU core count - * - File existence check - */ -#ifndef CBM_PLATFORM_H -#define CBM_PLATFORM_H - -#include -#include -#include -#include - -/* ── Safe memory ──────────────────────────────────────────────── */ - -/* Safe realloc: frees old pointer on failure instead of leaking it. - * Returns NULL on allocation failure (old memory is freed). */ -static inline void *safe_realloc(void *ptr, size_t size) { - void *tmp = realloc(ptr, size); // NOLINT(clang-analyzer-optin.portability.UnixAPI) - if (!tmp) { - free(ptr); - } - return tmp; -} - -/* ── Memory mapping ────────────────────────────────────────────── */ - -/* Map a file read-only into memory. Returns NULL on error. - * *out_size is set to the file size. */ -void *cbm_mmap_read(const char *path, size_t *out_size); - -/* Unmap a previously mapped region. */ -void cbm_munmap(void *addr, size_t size); - -/* ── Timing ────────────────────────────────────────────────────── */ - -/* Monotonic nanosecond timestamp (for elapsed time measurement). */ -uint64_t cbm_now_ns(void); - -/* Monotonic millisecond timestamp. */ -uint64_t cbm_now_ms(void); - -/* ── System info ───────────────────────────────────────────────── */ - -/* Number of available CPU cores. */ -int cbm_nprocs(void); - -/* System topology: core types and RAM (only fields with production consumers). */ -typedef struct { - int total_cores; /* hw.ncpu (all cores) */ - int perf_cores; /* P-cores (Apple) or total_cores (others) */ - size_t total_ram; /* total physical RAM in bytes */ -} cbm_system_info_t; - -/* Query system information. Results are cached after first call. */ -cbm_system_info_t cbm_system_info(void); - -/* Recommended worker count for parallel indexing. - * initial=true: all cores (user is waiting for initial index) - * initial=false: max(1, perf_cores-1) (leave headroom for user apps) */ -int cbm_default_worker_count(bool initial); - -/* ── File system ───────────────────────────────────────────────── */ - -/* Check if a path exists. */ -bool cbm_file_exists(const char *path); - -/* Check if path is a directory. */ -bool cbm_is_dir(const char *path); - -/* Get file size. Returns -1 on error. */ -int64_t cbm_file_size(const char *path); - -#endif /* CBM_PLATFORM_H */ +/* + * platform.h — OS abstractions. + * + * Provides cross-platform wrappers for: + * - Memory-mapped files (mmap / VirtualAlloc) + * - High-resolution monotonic clock + * - CPU core count + * - File existence check + */ +#ifndef CBM_PLATFORM_H +#define CBM_PLATFORM_H + +#include +#include +#include +#include + +/* ── Safe memory ──────────────────────────────────────────────── */ + +/* Safe realloc: frees old pointer on failure instead of leaking it. + * Returns NULL on allocation failure (old memory is freed). */ +static inline void *safe_realloc(void *ptr, size_t size) { + void *tmp = realloc(ptr, size); // NOLINT(clang-analyzer-optin.portability.UnixAPI) + if (!tmp) { + free(ptr); + } + return tmp; +} + +/* ── Memory mapping ────────────────────────────────────────────── */ + +/* Map a file read-only into memory. Returns NULL on error. + * *out_size is set to the file size. */ +void *cbm_mmap_read(const char *path, size_t *out_size); + +/* Unmap a previously mapped region. */ +void cbm_munmap(void *addr, size_t size); + +/* ── Timing ────────────────────────────────────────────────────── */ + +/* Monotonic nanosecond timestamp (for elapsed time measurement). */ +uint64_t cbm_now_ns(void); + +/* Monotonic millisecond timestamp. */ +uint64_t cbm_now_ms(void); + +/* ── System info ───────────────────────────────────────────────── */ + +/* Number of available CPU cores. */ +int cbm_nprocs(void); + +/* System topology: core types and RAM (only fields with production consumers). */ +typedef struct { + int total_cores; /* hw.ncpu (all cores) */ + int perf_cores; /* P-cores (Apple) or total_cores (others) */ + size_t total_ram; /* total physical RAM in bytes */ +} cbm_system_info_t; + +/* Query system information. Results are cached after first call. */ +cbm_system_info_t cbm_system_info(void); + +/* Recommended worker count for parallel indexing. + * initial=true: all cores (user is waiting for initial index) + * initial=false: max(1, perf_cores-1) (leave headroom for user apps) */ +int cbm_default_worker_count(bool initial); + +/* ── File system ───────────────────────────────────────────────── */ + +/* Check if a path exists. */ +bool cbm_file_exists(const char *path); + +/* Check if path is a directory. */ +bool cbm_is_dir(const char *path); + +/* Get the user's home directory. + * POSIX: returns HOME. Windows: returns HOME, then USERPROFILE. + * Path is normalized to forward slashes. + * Returns static buffer — do NOT free. Returns NULL on failure. */ +const char *cbm_home_dir(void); + +/* Get the application config directory. + * macOS: returns HOME (caller appends "Library/Application Support/App/..."). + * Linux: returns XDG_CONFIG_HOME or HOME/.config (caller appends "app/..."). + * Windows: returns APPDATA, e.g. "C:/Users/x/AppData/Roaming" (caller appends "App/..."). + * Path is normalized to forward slashes. + * Returns static buffer — do NOT free. Returns NULL on failure. */ +const char *cbm_app_config_dir(void); + +/* Get the application local data directory. + * macOS/Linux: same as cbm_app_config_dir(). + * Windows: returns LOCALAPPDATA, e.g. "C:/Users/x/AppData/Local" (caller appends "App/..."). + * Path is normalized to forward slashes. + * Returns static buffer — do NOT free. Returns NULL on failure. */ +const char *cbm_app_local_dir(void); + +/* Get the absolute path of the currently running executable. + * Returns static buffer — do NOT free. Returns "" on failure. */ +const char *cbm_self_exe_path(void); + +/* Normalize path separators to forward slashes (in-place). + * On Windows, converts backslashes to forward slashes. + * On POSIX, this is a no-op. Returns the input pointer. */ +char *cbm_normalize_path_sep(char *path); + +/* Get file size. Returns -1 on error. */ +int64_t cbm_file_size(const char *path); + +#endif /* CBM_PLATFORM_H */ diff --git a/src/main.c b/src/main.c index 79618fa..a71e0e9 100644 --- a/src/main.c +++ b/src/main.c @@ -1,296 +1,297 @@ -/* - * main.c — Entry point for codebase-memory-mcp. - * - * Modes: - * (default) Run as MCP server on stdin/stdout (JSON-RPC 2.0) - * cli Run a single tool call and print result - * --version Print version and exit - * --help Print usage and exit - * --ui=true/false Enable/disable HTTP UI server (persisted) - * --port=N Set HTTP UI port (persisted, default 9749) - * - * Signal handling: SIGTERM/SIGINT trigger graceful shutdown. - * Watcher runs in a background thread, polling for git changes. - * HTTP UI server (optional) runs in a background thread on localhost. - */ -#include "mcp/mcp.h" -#include "watcher/watcher.h" -#include "pipeline/pipeline.h" -#include "store/store.h" -#include "cli/cli.h" -#include "foundation/log.h" -#include "foundation/compat_thread.h" -#include "foundation/mem.h" -#include "ui/config.h" -#include "ui/http_server.h" -#include "ui/embedded_assets.h" - -#include -#include -#include -#include -#include - -#ifndef CBM_VERSION -#define CBM_VERSION "dev" -#endif - -/* ── Globals for signal handling ────────────────────────────────── */ - -static cbm_watcher_t *g_watcher = NULL; -static cbm_mcp_server_t *g_server = NULL; -static cbm_http_server_t *g_http_server = NULL; -static atomic_int g_shutdown = 0; - -static void signal_handler(int sig) { - (void)sig; - atomic_store(&g_shutdown, 1); - if (g_watcher) { - cbm_watcher_stop(g_watcher); - } - if (g_http_server) { - cbm_http_server_stop(g_http_server); - } - /* Close stdin to unblock getline in the MCP server loop */ - (void)fclose(stdin); -} - -/* ── Watcher background thread ──────────────────────────────────── */ - -static void *watcher_thread(void *arg) { - cbm_watcher_t *w = arg; -#define WATCHER_BASE_INTERVAL_MS 5000 - cbm_watcher_run(w, WATCHER_BASE_INTERVAL_MS); - return NULL; -} - -/* ── HTTP UI background thread ──────────────────────────────────── */ - -static void *http_thread(void *arg) { - cbm_http_server_t *srv = arg; - cbm_http_server_run(srv); - return NULL; -} - -/* ── Index callback for watcher ─────────────────────────────────── */ - -static int watcher_index_fn(const char *project_name, const char *root_path, void *user_data) { - (void)user_data; - cbm_log_info("watcher.reindex", "project", project_name, "path", root_path); - - cbm_pipeline_t *p = cbm_pipeline_new(root_path, NULL, CBM_MODE_FULL); - if (!p) { - return -1; - } - - int rc = cbm_pipeline_run(p); - cbm_pipeline_free(p); - return rc; -} - -/* ── CLI mode ───────────────────────────────────────────────────── */ - -static int run_cli(int argc, char **argv) { - if (argc < 1) { - // NOLINTNEXTLINE(clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling) - (void)fprintf(stderr, "Usage: codebase-memory-mcp cli [json_args]\n"); - return 1; - } - - const char *tool_name = argv[0]; - const char *args_json = argc >= 2 ? argv[1] : "{}"; - - cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); - if (!srv) { - // NOLINTNEXTLINE(clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling) - (void)fprintf(stderr, "Failed to create server\n"); - return 1; - } - - char *result = cbm_mcp_handle_tool(srv, tool_name, args_json); - if (result) { - printf("%s\n", result); - free(result); - } - - cbm_mcp_server_free(srv); - return 0; -} - -/* ── Help ───────────────────────────────────────────────────────── */ - -static void print_help(void) { - printf("codebase-memory-mcp %s\n\n", CBM_VERSION); - printf("Usage:\n"); - printf(" codebase-memory-mcp Run MCP server on stdio\n"); - printf(" codebase-memory-mcp cli [json] Run a single tool\n"); - printf(" codebase-memory-mcp install [-y|-n] [--force] [--dry-run]\n"); - printf(" codebase-memory-mcp uninstall [-y|-n] [--dry-run]\n"); - printf(" codebase-memory-mcp update [-y|-n]\n"); - printf(" codebase-memory-mcp config \n"); - printf(" codebase-memory-mcp --version Print version\n"); - printf(" codebase-memory-mcp --help Print this help\n"); - printf("\nUI options:\n"); - printf(" --ui=true Enable HTTP graph visualization (persisted)\n"); - printf(" --ui=false Disable HTTP graph visualization (persisted)\n"); - printf(" --port=N Set UI port (default 9749, persisted)\n"); - printf("\nSupported agents (auto-detected):\n"); - printf(" Claude Code, Codex CLI, Gemini CLI, Zed, OpenCode, Antigravity, Aider, KiloCode\n"); - printf("\nTools: index_repository, search_graph, query_graph, trace_call_path,\n"); - printf(" get_code_snippet, get_graph_schema, get_architecture, search_code,\n"); - printf(" list_projects, delete_project, index_status, detect_changes,\n"); - printf(" manage_adr, ingest_traces\n"); -} - -/* ── Main ───────────────────────────────────────────────────────── */ - -int main(int argc, char **argv) { - /* Parse arguments */ - for (int i = 1; i < argc; i++) { - if (strcmp(argv[i], "--version") == 0) { - printf("codebase-memory-mcp %s\n", CBM_VERSION); - return 0; - } - if (strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0) { - print_help(); - return 0; - } - if (strcmp(argv[i], "cli") == 0) { - cbm_mem_init(0.5); - return run_cli(argc - i - 1, argv + i + 1); - } - if (strcmp(argv[i], "install") == 0) { - return cbm_cmd_install(argc - i - 1, argv + i + 1); - } - if (strcmp(argv[i], "uninstall") == 0) { - return cbm_cmd_uninstall(argc - i - 1, argv + i + 1); - } - if (strcmp(argv[i], "update") == 0) { - return cbm_cmd_update(argc - i - 1, argv + i + 1); - } - if (strcmp(argv[i], "config") == 0) { - return cbm_cmd_config(argc - i - 1, argv + i + 1); - } - } - - /* Default: MCP server on stdio */ - cbm_mem_init(0.5); /* 50% of RAM — safe now because mimalloc tracks ALL - * memory (C + C++ allocations) via global override. - * No more untracked heap blind spots. */ - /* Store binary path for subprocess spawning + hook log sink */ - cbm_http_server_set_binary_path(argv[0]); - cbm_log_set_sink(cbm_ui_log_append); - cbm_log_info("server.start", "version", CBM_VERSION); - - /* Parse --ui and --port flags (persisted config) */ - cbm_ui_config_t ui_cfg; - cbm_ui_config_load(&ui_cfg); - - bool config_changed = false; - for (int i = 1; i < argc; i++) { - if (strncmp(argv[i], "--ui=", 5) == 0) { - ui_cfg.ui_enabled = (strcmp(argv[i] + 5, "true") == 0); - config_changed = true; - } - if (strncmp(argv[i], "--port=", 7) == 0) { - int p = (int)strtol(argv[i] + 7, NULL, 10); - if (p > 0 && p < 65536) { - ui_cfg.ui_port = p; - config_changed = true; - } - } - } - if (config_changed) { - cbm_ui_config_save(&ui_cfg); - } - - /* Install signal handlers */ -#ifdef _WIN32 - signal(SIGTERM, signal_handler); - signal(SIGINT, signal_handler); -#else - // NOLINTNEXTLINE(misc-include-cleaner) — sigaction provided by standard header - struct sigaction sa = {0}; - // NOLINTNEXTLINE(misc-include-cleaner) — sa_handler provided by standard header - sa.sa_handler = signal_handler; - sigemptyset(&sa.sa_mask); - sa.sa_flags = 0; - sigaction(SIGTERM, &sa, NULL); - sigaction(SIGINT, &sa, NULL); -#endif - - /* Open config store for runtime settings */ - char config_dir[1024]; - const char *cfg_home = getenv("HOME"); - cbm_config_t *runtime_config = NULL; - if (cfg_home) { - snprintf(config_dir, sizeof(config_dir), "%s/.cache/codebase-memory-mcp", cfg_home); - runtime_config = cbm_config_open(config_dir); - } - - /* Create MCP server */ - g_server = cbm_mcp_server_new(NULL); - if (!g_server) { - cbm_log_error("server.err", "msg", "failed to create server"); - cbm_config_close(runtime_config); - return 1; - } - - /* Create and start watcher in background thread */ - cbm_store_t *watch_store = cbm_store_open_memory(); - g_watcher = cbm_watcher_new(watch_store, watcher_index_fn, NULL); - - /* Wire watcher + config into MCP server for session auto-index */ - cbm_mcp_server_set_watcher(g_server, g_watcher); - cbm_mcp_server_set_config(g_server, runtime_config); - cbm_thread_t watcher_tid; - bool watcher_started = false; - - if (g_watcher) { - if (cbm_thread_create(&watcher_tid, 0, watcher_thread, g_watcher) == 0) { - watcher_started = true; - } - } - - /* Optionally start HTTP UI server in background thread */ - cbm_thread_t http_tid; - bool http_started = false; - - if (ui_cfg.ui_enabled && CBM_EMBEDDED_FILE_COUNT > 0) { - g_http_server = cbm_http_server_new(ui_cfg.ui_port); - if (g_http_server) { - if (cbm_thread_create(&http_tid, 0, http_thread, g_http_server) == 0) { - http_started = true; - } - } - } else if (ui_cfg.ui_enabled && CBM_EMBEDDED_FILE_COUNT == 0) { - cbm_log_warn("ui.no_assets", "hint", "rebuild with: make -f Makefile.cbm cbm-with-ui"); - } - - /* Run MCP event loop (blocks until EOF or signal) */ - int rc = cbm_mcp_server_run(g_server, stdin, stdout); - - /* Shutdown */ - cbm_log_info("server.shutdown"); - - if (http_started) { - cbm_http_server_stop(g_http_server); - cbm_thread_join(&http_tid); - cbm_http_server_free(g_http_server); - g_http_server = NULL; - } - - if (watcher_started) { - cbm_watcher_stop(g_watcher); - cbm_thread_join(&watcher_tid); - } - cbm_watcher_free(g_watcher); - cbm_store_close(watch_store); - cbm_mcp_server_free(g_server); - cbm_config_close(runtime_config); - - g_watcher = NULL; - g_server = NULL; - - return rc; -} +/* + * main.c — Entry point for codebase-memory-mcp. + * + * Modes: + * (default) Run as MCP server on stdin/stdout (JSON-RPC 2.0) + * cli Run a single tool call and print result + * --version Print version and exit + * --help Print usage and exit + * --ui=true/false Enable/disable HTTP UI server (persisted) + * --port=N Set HTTP UI port (persisted, default 9749) + * + * Signal handling: SIGTERM/SIGINT trigger graceful shutdown. + * Watcher runs in a background thread, polling for git changes. + * HTTP UI server (optional) runs in a background thread on localhost. + */ +#include "mcp/mcp.h" +#include "watcher/watcher.h" +#include "pipeline/pipeline.h" +#include "store/store.h" +#include "cli/cli.h" +#include "foundation/log.h" +#include "foundation/platform.h" +#include "foundation/compat_thread.h" +#include "foundation/mem.h" +#include "ui/config.h" +#include "ui/http_server.h" +#include "ui/embedded_assets.h" + +#include +#include +#include +#include +#include + +#ifndef CBM_VERSION +#define CBM_VERSION "dev" +#endif + +/* ── Globals for signal handling ────────────────────────────────── */ + +static cbm_watcher_t *g_watcher = NULL; +static cbm_mcp_server_t *g_server = NULL; +static cbm_http_server_t *g_http_server = NULL; +static atomic_int g_shutdown = 0; + +static void signal_handler(int sig) { + (void)sig; + atomic_store(&g_shutdown, 1); + if (g_watcher) { + cbm_watcher_stop(g_watcher); + } + if (g_http_server) { + cbm_http_server_stop(g_http_server); + } + /* Close stdin to unblock getline in the MCP server loop */ + (void)fclose(stdin); +} + +/* ── Watcher background thread ──────────────────────────────────── */ + +static void *watcher_thread(void *arg) { + cbm_watcher_t *w = arg; +#define WATCHER_BASE_INTERVAL_MS 5000 + cbm_watcher_run(w, WATCHER_BASE_INTERVAL_MS); + return NULL; +} + +/* ── HTTP UI background thread ──────────────────────────────────── */ + +static void *http_thread(void *arg) { + cbm_http_server_t *srv = arg; + cbm_http_server_run(srv); + return NULL; +} + +/* ── Index callback for watcher ─────────────────────────────────── */ + +static int watcher_index_fn(const char *project_name, const char *root_path, void *user_data) { + (void)user_data; + cbm_log_info("watcher.reindex", "project", project_name, "path", root_path); + + cbm_pipeline_t *p = cbm_pipeline_new(root_path, NULL, CBM_MODE_FULL); + if (!p) { + return -1; + } + + int rc = cbm_pipeline_run(p); + cbm_pipeline_free(p); + return rc; +} + +/* ── CLI mode ───────────────────────────────────────────────────── */ + +static int run_cli(int argc, char **argv) { + if (argc < 1) { + // NOLINTNEXTLINE(clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling) + (void)fprintf(stderr, "Usage: codebase-memory-mcp cli [json_args]\n"); + return 1; + } + + const char *tool_name = argv[0]; + const char *args_json = argc >= 2 ? argv[1] : "{}"; + + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + if (!srv) { + // NOLINTNEXTLINE(clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling) + (void)fprintf(stderr, "Failed to create server\n"); + return 1; + } + + char *result = cbm_mcp_handle_tool(srv, tool_name, args_json); + if (result) { + printf("%s\n", result); + free(result); + } + + cbm_mcp_server_free(srv); + return 0; +} + +/* ── Help ───────────────────────────────────────────────────────── */ + +static void print_help(void) { + printf("codebase-memory-mcp %s\n\n", CBM_VERSION); + printf("Usage:\n"); + printf(" codebase-memory-mcp Run MCP server on stdio\n"); + printf(" codebase-memory-mcp cli [json] Run a single tool\n"); + printf(" codebase-memory-mcp install [-y|-n] [--force] [--dry-run]\n"); + printf(" codebase-memory-mcp uninstall [-y|-n] [--dry-run]\n"); + printf(" codebase-memory-mcp update [-y|-n]\n"); + printf(" codebase-memory-mcp config \n"); + printf(" codebase-memory-mcp --version Print version\n"); + printf(" codebase-memory-mcp --help Print this help\n"); + printf("\nUI options:\n"); + printf(" --ui=true Enable HTTP graph visualization (persisted)\n"); + printf(" --ui=false Disable HTTP graph visualization (persisted)\n"); + printf(" --port=N Set UI port (default 9749, persisted)\n"); + printf("\nSupported agents (auto-detected):\n"); + printf(" Claude Code, Codex CLI, Gemini CLI, Zed, OpenCode, Antigravity, Aider, KiloCode\n"); + printf("\nTools: index_repository, search_graph, query_graph, trace_call_path,\n"); + printf(" get_code_snippet, get_graph_schema, get_architecture, search_code,\n"); + printf(" list_projects, delete_project, index_status, detect_changes,\n"); + printf(" manage_adr, ingest_traces\n"); +} + +/* ── Main ───────────────────────────────────────────────────────── */ + +int main(int argc, char **argv) { + /* Parse arguments */ + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "--version") == 0) { + printf("codebase-memory-mcp %s\n", CBM_VERSION); + return 0; + } + if (strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0) { + print_help(); + return 0; + } + if (strcmp(argv[i], "cli") == 0) { + cbm_mem_init(0.5); + return run_cli(argc - i - 1, argv + i + 1); + } + if (strcmp(argv[i], "install") == 0) { + return cbm_cmd_install(argc - i - 1, argv + i + 1); + } + if (strcmp(argv[i], "uninstall") == 0) { + return cbm_cmd_uninstall(argc - i - 1, argv + i + 1); + } + if (strcmp(argv[i], "update") == 0) { + return cbm_cmd_update(argc - i - 1, argv + i + 1); + } + if (strcmp(argv[i], "config") == 0) { + return cbm_cmd_config(argc - i - 1, argv + i + 1); + } + } + + /* Default: MCP server on stdio */ + cbm_mem_init(0.5); /* 50% of RAM — safe now because mimalloc tracks ALL + * memory (C + C++ allocations) via global override. + * No more untracked heap blind spots. */ + /* Store binary path for subprocess spawning + hook log sink */ + cbm_http_server_set_binary_path(argv[0]); + cbm_log_set_sink(cbm_ui_log_append); + cbm_log_info("server.start", "version", CBM_VERSION); + + /* Parse --ui and --port flags (persisted config) */ + cbm_ui_config_t ui_cfg; + cbm_ui_config_load(&ui_cfg); + + bool config_changed = false; + for (int i = 1; i < argc; i++) { + if (strncmp(argv[i], "--ui=", 5) == 0) { + ui_cfg.ui_enabled = (strcmp(argv[i] + 5, "true") == 0); + config_changed = true; + } + if (strncmp(argv[i], "--port=", 7) == 0) { + int p = (int)strtol(argv[i] + 7, NULL, 10); + if (p > 0 && p < 65536) { + ui_cfg.ui_port = p; + config_changed = true; + } + } + } + if (config_changed) { + cbm_ui_config_save(&ui_cfg); + } + + /* Install signal handlers */ +#ifdef _WIN32 + signal(SIGTERM, signal_handler); + signal(SIGINT, signal_handler); +#else + // NOLINTNEXTLINE(misc-include-cleaner) — sigaction provided by standard header + struct sigaction sa = {0}; + // NOLINTNEXTLINE(misc-include-cleaner) — sa_handler provided by standard header + sa.sa_handler = signal_handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + sigaction(SIGTERM, &sa, NULL); + sigaction(SIGINT, &sa, NULL); +#endif + + /* Open config store for runtime settings */ + char config_dir[1024]; + const char *cfg_home = cbm_home_dir(); + cbm_config_t *runtime_config = NULL; + if (cfg_home) { + snprintf(config_dir, sizeof(config_dir), "%s/.cache/codebase-memory-mcp", cfg_home); + runtime_config = cbm_config_open(config_dir); + } + + /* Create MCP server */ + g_server = cbm_mcp_server_new(NULL); + if (!g_server) { + cbm_log_error("server.err", "msg", "failed to create server"); + cbm_config_close(runtime_config); + return 1; + } + + /* Create and start watcher in background thread */ + cbm_store_t *watch_store = cbm_store_open_memory(); + g_watcher = cbm_watcher_new(watch_store, watcher_index_fn, NULL); + + /* Wire watcher + config into MCP server for session auto-index */ + cbm_mcp_server_set_watcher(g_server, g_watcher); + cbm_mcp_server_set_config(g_server, runtime_config); + cbm_thread_t watcher_tid; + bool watcher_started = false; + + if (g_watcher) { + if (cbm_thread_create(&watcher_tid, 0, watcher_thread, g_watcher) == 0) { + watcher_started = true; + } + } + + /* Optionally start HTTP UI server in background thread */ + cbm_thread_t http_tid; + bool http_started = false; + + if (ui_cfg.ui_enabled && CBM_EMBEDDED_FILE_COUNT > 0) { + g_http_server = cbm_http_server_new(ui_cfg.ui_port); + if (g_http_server) { + if (cbm_thread_create(&http_tid, 0, http_thread, g_http_server) == 0) { + http_started = true; + } + } + } else if (ui_cfg.ui_enabled && CBM_EMBEDDED_FILE_COUNT == 0) { + cbm_log_warn("ui.no_assets", "hint", "rebuild with: make -f Makefile.cbm cbm-with-ui"); + } + + /* Run MCP event loop (blocks until EOF or signal) */ + int rc = cbm_mcp_server_run(g_server, stdin, stdout); + + /* Shutdown */ + cbm_log_info("server.shutdown"); + + if (http_started) { + cbm_http_server_stop(g_http_server); + cbm_thread_join(&http_tid); + cbm_http_server_free(g_http_server); + g_http_server = NULL; + } + + if (watcher_started) { + cbm_watcher_stop(g_watcher); + cbm_thread_join(&watcher_tid); + } + cbm_watcher_free(g_watcher); + cbm_store_close(watch_store); + cbm_mcp_server_free(g_server); + cbm_config_close(runtime_config); + + g_watcher = NULL; + g_server = NULL; + + return rc; +} diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index bdfdae8..a4d62f6 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -1,2501 +1,2498 @@ -/* - * mcp.c — MCP server: JSON-RPC 2.0 over stdio with 14 graph tools. - * - * Uses yyjson for fast JSON parsing/building. - * Single-threaded event loop: read line → parse → dispatch → respond. - */ - -// operations - -#include "mcp/mcp.h" -#include "store/store.h" -#include "cypher/cypher.h" -#include "pipeline/pipeline.h" -#include "cli/cli.h" -#include "watcher/watcher.h" -#include "foundation/mem.h" -#include "foundation/platform.h" -#include "foundation/compat.h" -#include "foundation/compat_fs.h" -#include "foundation/compat_thread.h" -#include "foundation/log.h" -#include "foundation/str_util.h" - -#ifdef _WIN32 -#include /* _getpid */ -#else -#include -#include -#include -#include -#endif -#include -#include // int64_t -#include -#include -#include -#include -#include - -/* ── Constants ────────────────────────────────────────────────── */ - -/* Default snippet fallback line count */ -#define SNIPPET_DEFAULT_LINES 50 - -/* Idle store eviction: close cached project store after this many seconds - * of inactivity to free SQLite memory during idle periods. */ -#define STORE_IDLE_TIMEOUT_S 60 - -/* Directory permissions: rwxr-xr-x */ -#define ADR_DIR_PERMS 0755 - -/* JSON-RPC 2.0 standard error codes */ -#define JSONRPC_PARSE_ERROR (-32700) -#define JSONRPC_METHOD_NOT_FOUND (-32601) - -/* ── Helpers ────────────────────────────────────────────────────── */ - -static char *heap_strdup(const char *s) { - if (!s) { - return NULL; - } - size_t len = strlen(s); - char *d = malloc(len + 1); - if (d) { - memcpy(d, s, len + 1); - } - return d; -} - -/* Write yyjson_mut_doc to heap-allocated JSON string. - * ALLOW_INVALID_UNICODE: some database strings may contain non-UTF-8 bytes - * from older indexing runs — don't fail serialization over it. */ -static char *yy_doc_to_str(yyjson_mut_doc *doc) { - size_t len = 0; - char *s = yyjson_mut_write(doc, YYJSON_WRITE_ALLOW_INVALID_UNICODE, &len); - return s; -} - -/* ══════════════════════════════════════════════════════════════════ - * JSON-RPC PARSING - * ══════════════════════════════════════════════════════════════════ */ - -int cbm_jsonrpc_parse(const char *line, cbm_jsonrpc_request_t *out) { - memset(out, 0, sizeof(*out)); - out->id = -1; - - yyjson_doc *doc = yyjson_read(line, strlen(line), 0); - if (!doc) { - return -1; - } - - yyjson_val *root = yyjson_doc_get_root(doc); - if (!yyjson_is_obj(root)) { - yyjson_doc_free(doc); - return -1; - } - - yyjson_val *v_jsonrpc = yyjson_obj_get(root, "jsonrpc"); - yyjson_val *v_method = yyjson_obj_get(root, "method"); - yyjson_val *v_id = yyjson_obj_get(root, "id"); - yyjson_val *v_params = yyjson_obj_get(root, "params"); - - if (!v_method || !yyjson_is_str(v_method)) { - yyjson_doc_free(doc); - return -1; - } - - out->jsonrpc = - heap_strdup(v_jsonrpc && yyjson_is_str(v_jsonrpc) ? yyjson_get_str(v_jsonrpc) : "2.0"); - out->method = heap_strdup(yyjson_get_str(v_method)); - - if (v_id) { - out->has_id = true; - if (yyjson_is_int(v_id)) { - out->id = yyjson_get_int(v_id); - } else if (yyjson_is_str(v_id)) { - out->id = strtol(yyjson_get_str(v_id), NULL, 10); - } - } - - if (v_params) { - out->params_raw = yyjson_val_write(v_params, 0, NULL); - } - - yyjson_doc_free(doc); - return 0; -} - -void cbm_jsonrpc_request_free(cbm_jsonrpc_request_t *r) { - if (!r) { - return; - } - free((void *)r->jsonrpc); - free((void *)r->method); - free((void *)r->params_raw); - memset(r, 0, sizeof(*r)); -} - -/* ══════════════════════════════════════════════════════════════════ - * JSON-RPC FORMATTING - * ══════════════════════════════════════════════════════════════════ */ - -char *cbm_jsonrpc_format_response(const cbm_jsonrpc_response_t *resp) { - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - - yyjson_mut_obj_add_str(doc, root, "jsonrpc", "2.0"); - yyjson_mut_obj_add_int(doc, root, "id", resp->id); - - if (resp->error_json) { - /* Parse the error JSON and embed */ - yyjson_doc *err_doc = yyjson_read(resp->error_json, strlen(resp->error_json), 0); - if (err_doc) { - yyjson_mut_val *err_val = yyjson_val_mut_copy(doc, yyjson_doc_get_root(err_doc)); - yyjson_mut_obj_add_val(doc, root, "error", err_val); - yyjson_doc_free(err_doc); - } - } else if (resp->result_json) { - /* Parse the result JSON and embed */ - yyjson_doc *res_doc = yyjson_read(resp->result_json, strlen(resp->result_json), 0); - if (res_doc) { - yyjson_mut_val *res_val = yyjson_val_mut_copy(doc, yyjson_doc_get_root(res_doc)); - yyjson_mut_obj_add_val(doc, root, "result", res_val); - yyjson_doc_free(res_doc); - } - } - - char *out = yy_doc_to_str(doc); - yyjson_mut_doc_free(doc); - return out; -} - -char *cbm_jsonrpc_format_error(int64_t id, int code, const char *message) { - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - - yyjson_mut_obj_add_str(doc, root, "jsonrpc", "2.0"); - yyjson_mut_obj_add_int(doc, root, "id", id); - - yyjson_mut_val *err = yyjson_mut_obj(doc); - yyjson_mut_obj_add_int(doc, err, "code", code); - yyjson_mut_obj_add_str(doc, err, "message", message); - yyjson_mut_obj_add_val(doc, root, "error", err); - - char *out = yy_doc_to_str(doc); - yyjson_mut_doc_free(doc); - return out; -} - -/* ══════════════════════════════════════════════════════════════════ - * MCP PROTOCOL HELPERS - * ══════════════════════════════════════════════════════════════════ */ - -char *cbm_mcp_text_result(const char *text, bool is_error) { - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - - yyjson_mut_val *content = yyjson_mut_arr(doc); - yyjson_mut_val *item = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, item, "type", "text"); - yyjson_mut_obj_add_str(doc, item, "text", text); - yyjson_mut_arr_add_val(content, item); - yyjson_mut_obj_add_val(doc, root, "content", content); - - if (is_error) { - yyjson_mut_obj_add_bool(doc, root, "isError", true); - } - - char *out = yy_doc_to_str(doc); - yyjson_mut_doc_free(doc); - return out; -} - -/* ── Tool definitions ─────────────────────────────────────────── */ - -typedef struct { - const char *name; - const char *description; - const char *input_schema; /* JSON string */ -} tool_def_t; - -static const tool_def_t TOOLS[] = { - {"index_repository", "Index a repository into the knowledge graph", - "{\"type\":\"object\",\"properties\":{\"repo_path\":{\"type\":\"string\",\"description\":" - "\"Path to the " - "repository\"},\"mode\":{\"type\":\"string\",\"enum\":[\"full\",\"fast\"],\"default\":" - "\"full\"}},\"required\":[\"repo_path\"]}"}, - - {"search_graph", - "Search the code knowledge graph for functions, classes, routes, and variables. Use INSTEAD " - "OF grep/glob when finding code definitions, implementations, or relationships. Returns " - "precise results in one call.", - "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"label\":{\"type\":" - "\"string\"},\"name_pattern\":{\"type\":\"string\"},\"qn_pattern\":{\"type\":\"string\"}," - "\"file_pattern\":{\"type\":\"string\"},\"relationship\":{\"type\":\"string\"},\"min_degree\":" - "{\"type\":\"integer\"},\"max_degree\":{\"type\":\"integer\"},\"exclude_entry_points\":{" - "\"type\":\"boolean\"},\"include_connected\":{\"type\":\"boolean\"},\"limit\":{\"type\":" - "\"integer\",\"description\":\"Max results. Default: " - "unlimited\"},\"offset\":{\"type\":\"integer\",\"default\":0}}}"}, - - {"query_graph", - "Execute a Cypher query against the knowledge graph for complex multi-hop patterns, " - "aggregations, and cross-service analysis.", - "{\"type\":\"object\",\"properties\":{\"query\":{\"type\":\"string\",\"description\":\"Cypher " - "query\"},\"project\":{\"type\":\"string\"},\"max_rows\":{\"type\":\"integer\"," - "\"description\":" - "\"Optional row limit. Default: unlimited (100k ceiling)\"}},\"required\":[\"query\"]}"}, - - {"trace_call_path", - "Trace function call paths — who calls a function and what it calls. Use INSTEAD OF grep when " - "finding callers, dependencies, or impact analysis.", - "{\"type\":\"object\",\"properties\":{\"function_name\":{\"type\":\"string\"},\"project\":{" - "\"type\":\"string\"},\"direction\":{\"type\":\"string\",\"enum\":[\"inbound\",\"outbound\"," - "\"both\"],\"default\":\"both\"},\"depth\":{\"type\":\"integer\",\"default\":3},\"edge_" - "types\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"function_" - "name\"]}"}, - - {"get_code_snippet", - "Read source code for a function/class/symbol. IMPORTANT: First call search_graph to find the " - "exact qualified_name, then pass it here. This is a read tool, not a search tool. Accepts " - "full qualified_name (exact match) or short function name (returns suggestions if ambiguous).", - "{\"type\":\"object\",\"properties\":{\"qualified_name\":{\"type\":\"string\",\"description\":" - "\"Full qualified_name from search_graph, or short function name\"},\"project\":{" - "\"type\":\"string\"},\"include_neighbors\":{" - "\"type\":\"boolean\",\"default\":false}},\"required\":[\"qualified_name\"]}"}, - - {"get_graph_schema", "Get the schema of the knowledge graph (node labels, edge types)", - "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"}}}"}, - - {"get_architecture", - "Get high-level architecture overview — packages, services, dependencies, and project " - "structure at a glance.", - "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"aspects\":{\"type\":" - "\"array\",\"items\":{\"type\":\"string\"}}}}"}, - - {"search_code", - "Search source code content with text or regex patterns. Use for string literals, error " - "messages, and config values that are not in the knowledge graph.", - "{\"type\":\"object\",\"properties\":{\"pattern\":{\"type\":\"string\"},\"project\":{\"type\":" - "\"string\"},\"file_pattern\":{\"type\":\"string\"},\"regex\":{\"type\":\"boolean\"," - "\"default\":false},\"limit\":{\"type\":\"integer\",\"description\":\"Max results. Default: " - "unlimited\"}},\"required\":[" - "\"pattern\"]}"}, - - {"list_projects", "List all indexed projects", "{\"type\":\"object\",\"properties\":{}}"}, - - {"delete_project", "Delete a project from the index", - "{\"type\":\"object\",\"properties\":{\"project_name\":{\"type\":\"string\"}},\"required\":[" - "\"project_name\"]}"}, - - {"index_status", "Get the indexing status of a project", - "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"}}}"}, - - {"detect_changes", "Detect code changes and their impact", - "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"scope\":{\"type\":" - "\"string\"},\"depth\":{\"type\":\"integer\",\"default\":2},\"base_branch\":{\"type\":" - "\"string\",\"default\":\"main\"}}}"}, - - {"manage_adr", "Create or update Architecture Decision Records", - "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"mode\":{\"type\":" - "\"string\",\"enum\":[\"get\",\"update\",\"sections\"]},\"content\":{\"type\":\"string\"}," - "\"sections\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}}}"}, - - {"ingest_traces", "Ingest runtime traces to enhance the knowledge graph", - "{\"type\":\"object\",\"properties\":{\"traces\":{\"type\":\"array\",\"items\":{\"type\":" - "\"object\"}},\"project\":{\"type\":" - "\"string\"}},\"required\":[\"traces\"]}"}, -}; - -static const int TOOL_COUNT = sizeof(TOOLS) / sizeof(TOOLS[0]); - -char *cbm_mcp_tools_list(void) { - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - - yyjson_mut_val *tools = yyjson_mut_arr(doc); - - for (int i = 0; i < TOOL_COUNT; i++) { - yyjson_mut_val *tool = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, tool, "name", TOOLS[i].name); - yyjson_mut_obj_add_str(doc, tool, "description", TOOLS[i].description); - - /* Parse input schema JSON and embed */ - yyjson_doc *schema_doc = - yyjson_read(TOOLS[i].input_schema, strlen(TOOLS[i].input_schema), 0); - if (schema_doc) { - yyjson_mut_val *schema = yyjson_val_mut_copy(doc, yyjson_doc_get_root(schema_doc)); - yyjson_mut_obj_add_val(doc, tool, "inputSchema", schema); - yyjson_doc_free(schema_doc); - } - - yyjson_mut_arr_add_val(tools, tool); - } - - yyjson_mut_obj_add_val(doc, root, "tools", tools); - - char *out = yy_doc_to_str(doc); - yyjson_mut_doc_free(doc); - return out; -} - -/* Supported protocol versions, newest first. The server picks the newest - * version that it shares with the client (per MCP spec version negotiation). */ -static const char *SUPPORTED_PROTOCOL_VERSIONS[] = { - "2025-11-25", - "2025-06-18", - "2025-03-26", - "2024-11-05", -}; -static const int SUPPORTED_VERSION_COUNT = - (int)(sizeof(SUPPORTED_PROTOCOL_VERSIONS) / sizeof(SUPPORTED_PROTOCOL_VERSIONS[0])); - -char *cbm_mcp_initialize_response(const char *params_json) { - /* Determine protocol version: if client requests a version we support, - * echo it back; otherwise respond with our latest. */ - const char *version = SUPPORTED_PROTOCOL_VERSIONS[0]; /* default: latest */ - if (params_json) { - yyjson_doc *pdoc = yyjson_read(params_json, strlen(params_json), 0); - if (pdoc) { - yyjson_val *pv = yyjson_obj_get(yyjson_doc_get_root(pdoc), "protocolVersion"); - if (pv && yyjson_is_str(pv)) { - const char *requested = yyjson_get_str(pv); - for (int i = 0; i < SUPPORTED_VERSION_COUNT; i++) { - if (strcmp(requested, SUPPORTED_PROTOCOL_VERSIONS[i]) == 0) { - version = SUPPORTED_PROTOCOL_VERSIONS[i]; - break; - } - } - } - yyjson_doc_free(pdoc); - } - } - - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - - yyjson_mut_obj_add_str(doc, root, "protocolVersion", version); - - yyjson_mut_val *impl = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, impl, "name", "codebase-memory-mcp"); - yyjson_mut_obj_add_str(doc, impl, "version", "0.10.0"); - yyjson_mut_obj_add_val(doc, root, "serverInfo", impl); - - yyjson_mut_val *caps = yyjson_mut_obj(doc); - yyjson_mut_val *tools_cap = yyjson_mut_obj(doc); - yyjson_mut_obj_add_val(doc, caps, "tools", tools_cap); - yyjson_mut_obj_add_val(doc, root, "capabilities", caps); - - char *out = yy_doc_to_str(doc); - yyjson_mut_doc_free(doc); - return out; -} - -/* ══════════════════════════════════════════════════════════════════ - * ARGUMENT EXTRACTION - * ══════════════════════════════════════════════════════════════════ */ - -char *cbm_mcp_get_tool_name(const char *params_json) { - yyjson_doc *doc = yyjson_read(params_json, strlen(params_json), 0); - if (!doc) { - return NULL; - } - yyjson_val *root = yyjson_doc_get_root(doc); - yyjson_val *name = yyjson_obj_get(root, "name"); - char *result = NULL; - if (name && yyjson_is_str(name)) { - result = heap_strdup(yyjson_get_str(name)); - } - yyjson_doc_free(doc); - return result; -} - -char *cbm_mcp_get_arguments(const char *params_json) { - yyjson_doc *doc = yyjson_read(params_json, strlen(params_json), 0); - if (!doc) { - return NULL; - } - yyjson_val *root = yyjson_doc_get_root(doc); - yyjson_val *args = yyjson_obj_get(root, "arguments"); - char *result = NULL; - if (args) { - result = yyjson_val_write(args, 0, NULL); - } - yyjson_doc_free(doc); - return result ? result : heap_strdup("{}"); -} - -// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) -char *cbm_mcp_get_string_arg(const char *args_json, const char *key) { - yyjson_doc *doc = yyjson_read(args_json, strlen(args_json), 0); - if (!doc) { - return NULL; - } - yyjson_val *root = yyjson_doc_get_root(doc); - yyjson_val *val = yyjson_obj_get(root, key); - char *result = NULL; - if (val && yyjson_is_str(val)) { - result = heap_strdup(yyjson_get_str(val)); - } - yyjson_doc_free(doc); - return result; -} - -// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) -int cbm_mcp_get_int_arg(const char *args_json, const char *key, int default_val) { - yyjson_doc *doc = yyjson_read(args_json, strlen(args_json), 0); - if (!doc) { - return default_val; - } - yyjson_val *root = yyjson_doc_get_root(doc); - yyjson_val *val = yyjson_obj_get(root, key); - int result = default_val; - if (val && yyjson_is_int(val)) { - result = yyjson_get_int(val); - } - yyjson_doc_free(doc); - return result; -} - -// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) -bool cbm_mcp_get_bool_arg(const char *args_json, const char *key) { - yyjson_doc *doc = yyjson_read(args_json, strlen(args_json), 0); - if (!doc) { - return false; - } - yyjson_val *root = yyjson_doc_get_root(doc); - yyjson_val *val = yyjson_obj_get(root, key); - bool result = false; - if (val && yyjson_is_bool(val)) { - result = yyjson_get_bool(val); - } - yyjson_doc_free(doc); - return result; -} - -/* ══════════════════════════════════════════════════════════════════ - * MCP SERVER - * ══════════════════════════════════════════════════════════════════ */ - -struct cbm_mcp_server { - cbm_store_t *store; /* currently open project store (or NULL) */ - bool owns_store; /* true if we opened the store */ - char *current_project; /* which project store is open for (heap) */ - time_t store_last_used; /* last time resolve_store was called for a named project */ - char update_notice[256]; /* one-shot update notice, cleared after first injection */ - bool update_checked; /* true after background check has been launched */ - cbm_thread_t update_tid; /* background update check thread */ - bool update_thread_active; /* true if update thread was started and needs joining */ - - /* Session + auto-index state */ - char session_root[1024]; /* detected project root path */ - char session_project[256]; /* derived project name */ - bool session_detected; /* true after first detection attempt */ - struct cbm_watcher *watcher; /* external watcher ref (not owned) */ - struct cbm_config *config; /* external config ref (not owned) */ - cbm_thread_t autoindex_tid; - bool autoindex_active; /* true if auto-index thread was started */ -}; - -cbm_mcp_server_t *cbm_mcp_server_new(const char *store_path) { - cbm_mcp_server_t *srv = calloc(1, sizeof(*srv)); - if (!srv) { - return NULL; - } - - /* If a store_path is given, open that project directly. - * Otherwise, create an in-memory store for test/embedded use. */ - if (store_path) { - srv->store = cbm_store_open(store_path); - srv->current_project = heap_strdup(store_path); - } else { - srv->store = cbm_store_open_memory(); - } - srv->owns_store = true; - - return srv; -} - -cbm_store_t *cbm_mcp_server_store(cbm_mcp_server_t *srv) { - return srv ? srv->store : NULL; -} - -void cbm_mcp_server_set_project(cbm_mcp_server_t *srv, const char *project) { - if (!srv) { - return; - } - free(srv->current_project); - srv->current_project = project ? heap_strdup(project) : NULL; -} - -void cbm_mcp_server_set_watcher(cbm_mcp_server_t *srv, struct cbm_watcher *w) { - if (srv) { - srv->watcher = w; - } -} - -void cbm_mcp_server_set_config(cbm_mcp_server_t *srv, struct cbm_config *cfg) { - if (srv) { - srv->config = cfg; - } -} - -void cbm_mcp_server_free(cbm_mcp_server_t *srv) { - if (!srv) { - return; - } - if (srv->update_thread_active) { - cbm_thread_join(&srv->update_tid); - } - if (srv->autoindex_active) { - cbm_thread_join(&srv->autoindex_tid); - } - if (srv->owns_store && srv->store) { - cbm_store_close(srv->store); - } - free(srv->current_project); - free(srv); -} - -/* ── Idle store eviction ──────────────────────────────────────── */ - -void cbm_mcp_server_evict_idle(cbm_mcp_server_t *srv, int timeout_s) { - if (!srv || !srv->store) { - return; - } - /* Protect initial in-memory stores that were never accessed via a named project. - * store_last_used stays 0 until resolve_store is called with a non-NULL project. */ - if (srv->store_last_used == 0) { - return; - } - - time_t now = time(NULL); - if ((now - srv->store_last_used) < timeout_s) { - return; - } - - if (srv->owns_store) { - cbm_store_close(srv->store); - } - srv->store = NULL; - free(srv->current_project); - srv->current_project = NULL; - srv->store_last_used = 0; -} - -bool cbm_mcp_server_has_cached_store(cbm_mcp_server_t *srv) { - return (srv && srv->store != NULL) != 0; -} - -/* ── Cache dir + project DB path helpers ───────────────────────── */ - -/* Returns the platform cache directory: ~/.cache/codebase-memory-mcp - * Writes to buf, returns buf for convenience. */ -static const char *cache_dir(char *buf, size_t bufsz) { - // NOLINTNEXTLINE(concurrency-mt-unsafe) - const char *home = getenv("HOME"); - if (!home) { - home = "/tmp"; - } - snprintf(buf, bufsz, "%s/.cache/codebase-memory-mcp", home); - return buf; -} - -/* Returns full .db path for a project: /.db */ -static const char *project_db_path(const char *project, char *buf, size_t bufsz) { - char dir[1024]; - cache_dir(dir, sizeof(dir)); - snprintf(buf, bufsz, "%s/%s.db", dir, project); - return buf; -} - -/* ── Store resolution ──────────────────────────────────────────── */ - -/* Open the right project's .db file for query tools. - * Caches the connection — reopens only when project changes. - * Tracks last-access time so the event loop can evict idle stores. */ -static cbm_store_t *resolve_store(cbm_mcp_server_t *srv, const char *project) { - if (!project) { - return srv->store; /* no project specified → use whatever's open */ - } - - srv->store_last_used = time(NULL); - - /* Already open for this project? */ - if (srv->current_project && strcmp(srv->current_project, project) == 0 && srv->store) { - return srv->store; - } - - /* Close old store */ - if (srv->owns_store && srv->store) { - cbm_store_close(srv->store); - srv->store = NULL; - } - - /* Open project's .db file */ - char path[1024]; - project_db_path(project, path, sizeof(path)); - srv->store = cbm_store_open_path(path); - srv->owns_store = true; - free(srv->current_project); - srv->current_project = heap_strdup(project); - - return srv->store; -} - -/* Bail with empty JSON result when no store is available. */ -#define REQUIRE_STORE(store, project) \ - do { \ - if (!(store)) { \ - free(project); \ - return cbm_mcp_text_result("{\"error\":\"no project loaded\"}", true); \ - } \ - } while (0) - -/* ── Tool handler implementations ─────────────────────────────── */ - -/* list_projects: scan cache directory for .db files. - * Each project is a single .db file — no central registry needed. */ -static char *handle_list_projects(cbm_mcp_server_t *srv, const char *args) { - (void)srv; - (void)args; - - char dir_path[1024]; - cache_dir(dir_path, sizeof(dir_path)); - - cbm_dir_t *d = cbm_opendir(dir_path); - - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - yyjson_mut_val *arr = yyjson_mut_arr(doc); - - if (d) { - cbm_dirent_t *entry; - while ((entry = cbm_readdir(d)) != NULL) { - const char *name = entry->name; - size_t len = strlen(name); - - /* Must end with .db and be at least 4 chars (x.db) */ - if (len < 4 || strcmp(name + len - 3, ".db") != 0) { - continue; - } - - /* Skip temp/internal files */ - if (strncmp(name, "tmp-", 4) == 0 || strncmp(name, "_", 1) == 0 || - strncmp(name, ":memory:", 8) == 0) { - continue; - } - - /* Extract project name = filename without .db suffix */ - char project_name[1024]; - snprintf(project_name, sizeof(project_name), "%.*s", (int)(len - 3), name); - - /* Get file metadata */ - char full_path[2048]; - snprintf(full_path, sizeof(full_path), "%s/%s", dir_path, name); - struct stat st; - if (stat(full_path, &st) != 0) { - continue; - } - - /* Open briefly to get node/edge count + root_path */ - cbm_store_t *pstore = cbm_store_open_path(full_path); - int nodes = 0; - int edges = 0; - char root_path_buf[1024] = ""; - if (pstore) { - nodes = cbm_store_count_nodes(pstore, project_name); - edges = cbm_store_count_edges(pstore, project_name); - cbm_project_t proj = {0}; - if (cbm_store_get_project(pstore, project_name, &proj) == CBM_STORE_OK) { - if (proj.root_path) { - snprintf(root_path_buf, sizeof(root_path_buf), "%s", proj.root_path); - } - free((void *)proj.name); - free((void *)proj.indexed_at); - free((void *)proj.root_path); - } - cbm_store_close(pstore); - } - - yyjson_mut_val *p = yyjson_mut_obj(doc); - yyjson_mut_obj_add_strcpy(doc, p, "name", project_name); - yyjson_mut_obj_add_strcpy(doc, p, "root_path", root_path_buf); - yyjson_mut_obj_add_int(doc, p, "nodes", nodes); - yyjson_mut_obj_add_int(doc, p, "edges", edges); - yyjson_mut_obj_add_int(doc, p, "size_bytes", (int64_t)st.st_size); - yyjson_mut_arr_add_val(arr, p); - } - cbm_closedir(d); - } - - yyjson_mut_obj_add_val(doc, root, "projects", arr); - - char *json = yy_doc_to_str(doc); - yyjson_mut_doc_free(doc); - - char *result = cbm_mcp_text_result(json, false); - free(json); - return result; -} - -static char *handle_get_graph_schema(cbm_mcp_server_t *srv, const char *args) { - char *project = cbm_mcp_get_string_arg(args, "project"); - cbm_store_t *store = resolve_store(srv, project); - REQUIRE_STORE(store, project); - - cbm_schema_info_t schema = {0}; - cbm_store_get_schema(store, project, &schema); - - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - - yyjson_mut_val *labels = yyjson_mut_arr(doc); - for (int i = 0; i < schema.node_label_count; i++) { - yyjson_mut_val *lbl = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, lbl, "label", schema.node_labels[i].label); - yyjson_mut_obj_add_int(doc, lbl, "count", schema.node_labels[i].count); - yyjson_mut_arr_add_val(labels, lbl); - } - yyjson_mut_obj_add_val(doc, root, "node_labels", labels); - - yyjson_mut_val *types = yyjson_mut_arr(doc); - for (int i = 0; i < schema.edge_type_count; i++) { - yyjson_mut_val *typ = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, typ, "type", schema.edge_types[i].type); - yyjson_mut_obj_add_int(doc, typ, "count", schema.edge_types[i].count); - yyjson_mut_arr_add_val(types, typ); - } - yyjson_mut_obj_add_val(doc, root, "edge_types", types); - - /* Check ADR presence */ - cbm_project_t proj_info = {0}; - if (cbm_store_get_project(store, project, &proj_info) == 0 && proj_info.root_path) { - char adr_path[4096]; - snprintf(adr_path, sizeof(adr_path), "%s/.codebase-memory/adr.md", proj_info.root_path); - struct stat adr_st; - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - bool adr_exists = (stat(adr_path, &adr_st) == 0); - yyjson_mut_obj_add_bool(doc, root, "adr_present", adr_exists); - if (!adr_exists) { - yyjson_mut_obj_add_str( - doc, root, "adr_hint", - "No ADR found. Use manage_adr(mode='update') to persist architectural " - "decisions across sessions. Run get_architecture(aspects=['all']) first."); - } - cbm_project_free_fields(&proj_info); - } - - char *json = yy_doc_to_str(doc); - yyjson_mut_doc_free(doc); - cbm_store_schema_free(&schema); - free(project); - - char *result = cbm_mcp_text_result(json, false); - free(json); - return result; -} - -static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { - char *project = cbm_mcp_get_string_arg(args, "project"); - cbm_store_t *store = resolve_store(srv, project); - REQUIRE_STORE(store, project); - char *label = cbm_mcp_get_string_arg(args, "label"); - char *name_pattern = cbm_mcp_get_string_arg(args, "name_pattern"); - char *file_pattern = cbm_mcp_get_string_arg(args, "file_pattern"); - int limit = cbm_mcp_get_int_arg(args, "limit", 500000); - int offset = cbm_mcp_get_int_arg(args, "offset", 0); - int min_degree = cbm_mcp_get_int_arg(args, "min_degree", -1); - int max_degree = cbm_mcp_get_int_arg(args, "max_degree", -1); - - cbm_search_params_t params = { - .project = project, - .label = label, - .name_pattern = name_pattern, - .file_pattern = file_pattern, - .limit = limit, - .offset = offset, - .min_degree = min_degree, - .max_degree = max_degree, - }; - - cbm_search_output_t out = {0}; - cbm_store_search(store, ¶ms, &out); - - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - - yyjson_mut_obj_add_int(doc, root, "total", out.total); - - yyjson_mut_val *results = yyjson_mut_arr(doc); - for (int i = 0; i < out.count; i++) { - cbm_search_result_t *sr = &out.results[i]; - yyjson_mut_val *item = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, item, "name", sr->node.name ? sr->node.name : ""); - yyjson_mut_obj_add_str(doc, item, "qualified_name", - sr->node.qualified_name ? sr->node.qualified_name : ""); - yyjson_mut_obj_add_str(doc, item, "label", sr->node.label ? sr->node.label : ""); - yyjson_mut_obj_add_str(doc, item, "file_path", - sr->node.file_path ? sr->node.file_path : ""); - yyjson_mut_obj_add_int(doc, item, "in_degree", sr->in_degree); - yyjson_mut_obj_add_int(doc, item, "out_degree", sr->out_degree); - yyjson_mut_arr_add_val(results, item); - } - yyjson_mut_obj_add_val(doc, root, "results", results); - yyjson_mut_obj_add_bool(doc, root, "has_more", out.total > offset + out.count); - - char *json = yy_doc_to_str(doc); - yyjson_mut_doc_free(doc); - cbm_store_search_free(&out); - - free(project); - free(label); - free(name_pattern); - free(file_pattern); - - char *result = cbm_mcp_text_result(json, false); - free(json); - return result; -} - -static char *handle_query_graph(cbm_mcp_server_t *srv, const char *args) { - char *query = cbm_mcp_get_string_arg(args, "query"); - char *project = cbm_mcp_get_string_arg(args, "project"); - cbm_store_t *store = resolve_store(srv, project); - int max_rows = cbm_mcp_get_int_arg(args, "max_rows", 0); - - if (!query) { - free(project); - return cbm_mcp_text_result("query is required", true); - } - if (!store) { - free(project); - free(query); - return cbm_mcp_text_result("{\"error\":\"no project loaded\"}", true); - } - - cbm_cypher_result_t result = {0}; - int rc = cbm_cypher_execute(store, query, project, max_rows, &result); - - if (rc < 0) { - char *err_msg = result.error ? result.error : "query execution failed"; - char *resp = cbm_mcp_text_result(err_msg, true); - cbm_cypher_result_free(&result); - free(query); - free(project); - return resp; - } - - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - - /* columns */ - yyjson_mut_val *cols = yyjson_mut_arr(doc); - for (int i = 0; i < result.col_count; i++) { - yyjson_mut_arr_add_str(doc, cols, result.columns[i]); - } - yyjson_mut_obj_add_val(doc, root, "columns", cols); - - /* rows */ - yyjson_mut_val *rows = yyjson_mut_arr(doc); - for (int r = 0; r < result.row_count; r++) { - yyjson_mut_val *row = yyjson_mut_arr(doc); - for (int c = 0; c < result.col_count; c++) { - yyjson_mut_arr_add_str(doc, row, result.rows[r][c]); - } - yyjson_mut_arr_add_val(rows, row); - } - yyjson_mut_obj_add_val(doc, root, "rows", rows); - yyjson_mut_obj_add_int(doc, root, "total", result.row_count); - - char *json = yy_doc_to_str(doc); - yyjson_mut_doc_free(doc); - cbm_cypher_result_free(&result); - free(query); - free(project); - - char *res = cbm_mcp_text_result(json, false); - free(json); - return res; -} - -static char *handle_index_status(cbm_mcp_server_t *srv, const char *args) { - char *project = cbm_mcp_get_string_arg(args, "project"); - cbm_store_t *store = resolve_store(srv, project); - REQUIRE_STORE(store, project); - - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - - if (project) { - int nodes = cbm_store_count_nodes(store, project); - int edges = cbm_store_count_edges(store, project); - yyjson_mut_obj_add_str(doc, root, "project", project); - yyjson_mut_obj_add_int(doc, root, "nodes", nodes); - yyjson_mut_obj_add_int(doc, root, "edges", edges); - yyjson_mut_obj_add_str(doc, root, "status", nodes > 0 ? "ready" : "empty"); - } else { - yyjson_mut_obj_add_str(doc, root, "status", "no_project"); - } - - char *json = yy_doc_to_str(doc); - yyjson_mut_doc_free(doc); - free(project); - - char *result = cbm_mcp_text_result(json, false); - free(json); - return result; -} - -/* delete_project: just erase the .db file (and WAL/SHM). */ -static char *handle_delete_project(cbm_mcp_server_t *srv, const char *args) { - char *name = cbm_mcp_get_string_arg(args, "project_name"); - if (!name) { - return cbm_mcp_text_result("project_name is required", true); - } - - /* Close store if it's the project being deleted */ - if (srv->current_project && strcmp(srv->current_project, name) == 0) { - if (srv->owns_store && srv->store) { - cbm_store_close(srv->store); - srv->store = NULL; - } - free(srv->current_project); - srv->current_project = NULL; - } - - /* Delete the .db file + WAL/SHM */ - char path[1024]; - project_db_path(name, path, sizeof(path)); - - char wal[1024]; - char shm[1024]; - snprintf(wal, sizeof(wal), "%s-wal", path); - snprintf(shm, sizeof(shm), "%s-shm", path); - - bool exists = (access(path, F_OK) == 0); - const char *status = "not_found"; - if (exists) { - (void)cbm_unlink(path); - (void)cbm_unlink(wal); - (void)cbm_unlink(shm); - status = "deleted"; - } - - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - yyjson_mut_obj_add_str(doc, root, "project", name); - yyjson_mut_obj_add_str(doc, root, "status", status); - - char *json = yy_doc_to_str(doc); - yyjson_mut_doc_free(doc); - free(name); - - char *result = cbm_mcp_text_result(json, false); - free(json); - return result; -} - -static char *handle_get_architecture(cbm_mcp_server_t *srv, const char *args) { - char *project = cbm_mcp_get_string_arg(args, "project"); - cbm_store_t *store = resolve_store(srv, project); - REQUIRE_STORE(store, project); - - cbm_schema_info_t schema = {0}; - cbm_store_get_schema(store, project, &schema); - - int node_count = cbm_store_count_nodes(store, project); - int edge_count = cbm_store_count_edges(store, project); - - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - - if (project) { - yyjson_mut_obj_add_str(doc, root, "project", project); - } - yyjson_mut_obj_add_int(doc, root, "total_nodes", node_count); - yyjson_mut_obj_add_int(doc, root, "total_edges", edge_count); - - /* Node label summary */ - yyjson_mut_val *labels = yyjson_mut_arr(doc); - for (int i = 0; i < schema.node_label_count; i++) { - yyjson_mut_val *item = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, item, "label", schema.node_labels[i].label); - yyjson_mut_obj_add_int(doc, item, "count", schema.node_labels[i].count); - yyjson_mut_arr_add_val(labels, item); - } - yyjson_mut_obj_add_val(doc, root, "node_labels", labels); - - /* Edge type summary */ - yyjson_mut_val *types = yyjson_mut_arr(doc); - for (int i = 0; i < schema.edge_type_count; i++) { - yyjson_mut_val *item = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, item, "type", schema.edge_types[i].type); - yyjson_mut_obj_add_int(doc, item, "count", schema.edge_types[i].count); - yyjson_mut_arr_add_val(types, item); - } - yyjson_mut_obj_add_val(doc, root, "edge_types", types); - - /* Relationship patterns */ - if (schema.rel_pattern_count > 0) { - yyjson_mut_val *pats = yyjson_mut_arr(doc); - for (int i = 0; i < schema.rel_pattern_count; i++) { - yyjson_mut_arr_add_str(doc, pats, schema.rel_patterns[i]); - } - yyjson_mut_obj_add_val(doc, root, "relationship_patterns", pats); - } - - char *json = yy_doc_to_str(doc); - yyjson_mut_doc_free(doc); - cbm_store_schema_free(&schema); - free(project); - - char *result = cbm_mcp_text_result(json, false); - free(json); - return result; -} - -static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { - char *func_name = cbm_mcp_get_string_arg(args, "function_name"); - char *project = cbm_mcp_get_string_arg(args, "project"); - cbm_store_t *store = resolve_store(srv, project); - char *direction = cbm_mcp_get_string_arg(args, "direction"); - int depth = cbm_mcp_get_int_arg(args, "depth", 3); - - if (!func_name) { - free(project); - free(direction); - return cbm_mcp_text_result("function_name is required", true); - } - if (!store) { - free(func_name); - free(project); - free(direction); - return cbm_mcp_text_result("{\"error\":\"no project loaded\"}", true); - } - if (!direction) { - direction = heap_strdup("both"); - } - - /* Find the node by name */ - cbm_node_t *nodes = NULL; - int node_count = 0; - cbm_store_find_nodes_by_name(store, project, func_name, &nodes, &node_count); - - if (node_count == 0) { - free(func_name); - free(project); - free(direction); - cbm_store_free_nodes(nodes, 0); - return cbm_mcp_text_result("{\"error\":\"function not found\"}", true); - } - - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - - yyjson_mut_obj_add_str(doc, root, "function", func_name); - yyjson_mut_obj_add_str(doc, root, "direction", direction); - - const char *edge_types[] = {"CALLS"}; - int edge_type_count = 1; - - /* Run BFS for each requested direction. - * IMPORTANT: yyjson_mut_obj_add_str borrows pointers — we must keep - * traversal results alive until after yy_doc_to_str serialization. */ - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - bool do_outbound = strcmp(direction, "outbound") == 0 || strcmp(direction, "both") == 0; - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - bool do_inbound = strcmp(direction, "inbound") == 0 || strcmp(direction, "both") == 0; - - cbm_traverse_result_t tr_out = {0}; - cbm_traverse_result_t tr_in = {0}; - - if (do_outbound) { - cbm_store_bfs(store, nodes[0].id, "outbound", edge_types, edge_type_count, depth, 100, - &tr_out); - - yyjson_mut_val *callees = yyjson_mut_arr(doc); - for (int i = 0; i < tr_out.visited_count; i++) { - yyjson_mut_val *item = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, item, "name", - tr_out.visited[i].node.name ? tr_out.visited[i].node.name : ""); - yyjson_mut_obj_add_str( - doc, item, "qualified_name", - tr_out.visited[i].node.qualified_name ? tr_out.visited[i].node.qualified_name : ""); - yyjson_mut_obj_add_int(doc, item, "hop", tr_out.visited[i].hop); - yyjson_mut_arr_add_val(callees, item); - } - yyjson_mut_obj_add_val(doc, root, "callees", callees); - } - - if (do_inbound) { - cbm_store_bfs(store, nodes[0].id, "inbound", edge_types, edge_type_count, depth, 100, - &tr_in); - - yyjson_mut_val *callers = yyjson_mut_arr(doc); - for (int i = 0; i < tr_in.visited_count; i++) { - yyjson_mut_val *item = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, item, "name", - tr_in.visited[i].node.name ? tr_in.visited[i].node.name : ""); - yyjson_mut_obj_add_str( - doc, item, "qualified_name", - tr_in.visited[i].node.qualified_name ? tr_in.visited[i].node.qualified_name : ""); - yyjson_mut_obj_add_int(doc, item, "hop", tr_in.visited[i].hop); - yyjson_mut_arr_add_val(callers, item); - } - yyjson_mut_obj_add_val(doc, root, "callers", callers); - } - - /* Serialize BEFORE freeing traversal results (yyjson borrows strings) */ - char *json = yy_doc_to_str(doc); - yyjson_mut_doc_free(doc); - - /* Now safe to free traversal data */ - if (do_outbound) { - cbm_store_traverse_free(&tr_out); - } - if (do_inbound) { - cbm_store_traverse_free(&tr_in); - } - - cbm_store_free_nodes(nodes, node_count); - free(func_name); - free(project); - free(direction); - - char *result = cbm_mcp_text_result(json, false); - free(json); - return result; -} - -/* ── Helper: free heap fields of a stack-allocated node ────────── */ - -static void free_node_contents(cbm_node_t *n) { - free((void *)n->project); - free((void *)n->label); - free((void *)n->name); - free((void *)n->qualified_name); - free((void *)n->file_path); - free((void *)n->properties_json); - memset(n, 0, sizeof(*n)); -} - -/* ── Helper: read lines [start, end] from a file ─────────────── */ - -static char *read_file_lines(const char *path, int start, int end) { - FILE *fp = fopen(path, "r"); - if (!fp) { - return NULL; - } - - size_t cap = 4096; - char *buf = malloc(cap); - size_t len = 0; - buf[0] = '\0'; - - char line[2048]; - int lineno = 0; - while (fgets(line, sizeof(line), fp)) { - lineno++; - if (lineno < start) { - continue; - } - if (lineno > end) { - break; - } - size_t ll = strlen(line); - while (len + ll + 1 > cap) { - cap *= 2; - buf = safe_realloc(buf, cap); - } - memcpy(buf + len, line, ll); - len += ll; - buf[len] = '\0'; - } - - (void)fclose(fp); - if (len == 0) { - free(buf); - return NULL; - } - return buf; -} - -/* ── Helper: get project root_path from store ─────────────────── */ - -static char *get_project_root(cbm_mcp_server_t *srv, const char *project) { - if (!project) { - return NULL; - } - cbm_store_t *store = resolve_store(srv, project); - if (!store) { - return NULL; - } - cbm_project_t proj = {0}; - if (cbm_store_get_project(store, project, &proj) != CBM_STORE_OK) { - return NULL; - } - char *root = heap_strdup(proj.root_path); - free((void *)proj.name); - free((void *)proj.indexed_at); - free((void *)proj.root_path); - return root; -} - -/* ── index_repository ─────────────────────────────────────────── */ - -static char *handle_index_repository(cbm_mcp_server_t *srv, const char *args) { - char *repo_path = cbm_mcp_get_string_arg(args, "repo_path"); - char *mode_str = cbm_mcp_get_string_arg(args, "mode"); - - if (!repo_path) { - free(mode_str); - return cbm_mcp_text_result("repo_path is required", true); - } - - cbm_index_mode_t mode = CBM_MODE_FULL; - if (mode_str && strcmp(mode_str, "fast") == 0) { - mode = CBM_MODE_FAST; - } - free(mode_str); - - cbm_pipeline_t *p = cbm_pipeline_new(repo_path, NULL, mode); - if (!p) { - free(repo_path); - return cbm_mcp_text_result("failed to create pipeline", true); - } - - char *project_name = heap_strdup(cbm_pipeline_project_name(p)); - - /* Pipeline builds everything in-memory, then dumps to file atomically. - * No need to close srv->store — pipeline doesn't touch the open store. */ - int rc = cbm_pipeline_run(p); - cbm_pipeline_free(p); - cbm_mem_collect(); /* return mimalloc pages to OS after large indexing */ - - /* Invalidate cached store so next query reopens the fresh database */ - if (srv->owns_store && srv->store) { - cbm_store_close(srv->store); - srv->store = NULL; - } - free(srv->current_project); - srv->current_project = NULL; - - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - - yyjson_mut_obj_add_str(doc, root, "project", project_name); - yyjson_mut_obj_add_str(doc, root, "status", rc == 0 ? "indexed" : "error"); - - if (rc == 0) { - cbm_store_t *store = resolve_store(srv, project_name); - if (store) { - int nodes = cbm_store_count_nodes(store, project_name); - int edges = cbm_store_count_edges(store, project_name); - yyjson_mut_obj_add_int(doc, root, "nodes", nodes); - yyjson_mut_obj_add_int(doc, root, "edges", edges); - - /* Check ADR presence and suggest creation if missing */ - char adr_path[4096]; - snprintf(adr_path, sizeof(adr_path), "%s/.codebase-memory/adr.md", repo_path); - struct stat adr_st; - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - bool adr_exists = (stat(adr_path, &adr_st) == 0); - yyjson_mut_obj_add_bool(doc, root, "adr_present", adr_exists); - if (!adr_exists) { - yyjson_mut_obj_add_str( - doc, root, "adr_hint", - "Project indexed. Consider creating an Architecture Decision Record: " - "explore the codebase with get_architecture(aspects=['all']), then use " - "manage_adr(mode='store') to persist architectural insights across sessions."); - } - } - } - - char *json = yy_doc_to_str(doc); - yyjson_mut_doc_free(doc); - free(project_name); - free(repo_path); - - char *result = cbm_mcp_text_result(json, rc != 0); - free(json); - return result; -} - -/* ── get_code_snippet ─────────────────────────────────────────── */ - -/* Copy a node from an array into a heap-allocated standalone node. */ -static void copy_node(const cbm_node_t *src, cbm_node_t *dst) { - dst->id = src->id; - dst->project = heap_strdup(src->project); - dst->label = heap_strdup(src->label); - dst->name = heap_strdup(src->name); - dst->qualified_name = heap_strdup(src->qualified_name); - dst->file_path = heap_strdup(src->file_path); - dst->start_line = src->start_line; - dst->end_line = src->end_line; - dst->properties_json = src->properties_json ? heap_strdup(src->properties_json) : NULL; -} - -/* Build a JSON suggestions response for ambiguous or fuzzy results. */ -static char *snippet_suggestions(const char *input, cbm_node_t *nodes, int count) { - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - - yyjson_mut_obj_add_str(doc, root, "status", "ambiguous"); - - char msg[512]; - snprintf(msg, sizeof(msg), - "%d matches for \"%s\". Pick a qualified_name from suggestions below, " - "or use search_graph(name_pattern=\"...\") to narrow results.", - count, input); - yyjson_mut_obj_add_str(doc, root, "message", msg); - - yyjson_mut_val *arr = yyjson_mut_arr(doc); - for (int i = 0; i < count; i++) { - yyjson_mut_val *s = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, s, "qualified_name", - nodes[i].qualified_name ? nodes[i].qualified_name : ""); - yyjson_mut_obj_add_str(doc, s, "name", nodes[i].name ? nodes[i].name : ""); - yyjson_mut_obj_add_str(doc, s, "label", nodes[i].label ? nodes[i].label : ""); - yyjson_mut_obj_add_str(doc, s, "file_path", nodes[i].file_path ? nodes[i].file_path : ""); - yyjson_mut_arr_append(arr, s); - } - yyjson_mut_obj_add_val(doc, root, "suggestions", arr); - - char *json = yy_doc_to_str(doc); - yyjson_mut_doc_free(doc); - - char *result = cbm_mcp_text_result(json, false); - free(json); - return result; -} - -/* Build an enriched snippet response for a resolved node. */ -static char *build_snippet_response(cbm_mcp_server_t *srv, cbm_node_t *node, - const char *match_method, bool include_neighbors, - cbm_node_t *alternatives, int alt_count) { - char *root_path = get_project_root(srv, node->project); - - int start = node->start_line > 0 ? node->start_line : 1; - int end = node->end_line > start ? node->end_line : start + SNIPPET_DEFAULT_LINES; - char *source = NULL; - - /* Build absolute path and verify it's within the project root. - * Prevents path traversal via crafted file_path (e.g., "../../.ssh/id_rsa"). */ - char *abs_path = NULL; - if (root_path && node->file_path) { - size_t apsz = strlen(root_path) + strlen(node->file_path) + 2; - abs_path = malloc(apsz); - snprintf(abs_path, apsz, "%s/%s", root_path, node->file_path); - - /* Path containment: resolve symlinks/../ and verify file stays within root */ - char real_root[4096]; - char real_file[4096]; - bool path_ok = false; -#ifdef _WIN32 - if (_fullpath(real_root, root_path, sizeof(real_root)) && - _fullpath(real_file, abs_path, sizeof(real_file))) { -#else - if (realpath(root_path, real_root) && realpath(abs_path, real_file)) { -#endif - size_t root_len = strlen(real_root); - if (strncmp(real_file, real_root, root_len) == 0 && - (real_file[root_len] == '/' || real_file[root_len] == '\\' || - real_file[root_len] == '\0')) { - path_ok = true; - } - } - if (path_ok) { - source = read_file_lines(abs_path, start, end); - } - } - - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root_obj = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root_obj); - - yyjson_mut_obj_add_str(doc, root_obj, "name", node->name ? node->name : ""); - yyjson_mut_obj_add_str(doc, root_obj, "qualified_name", - node->qualified_name ? node->qualified_name : ""); - yyjson_mut_obj_add_str(doc, root_obj, "label", node->label ? node->label : ""); - - const char *display_path = ""; - if (abs_path) { - display_path = abs_path; - } else if (node->file_path) { - display_path = node->file_path; - } - yyjson_mut_obj_add_str(doc, root_obj, "file_path", display_path); - yyjson_mut_obj_add_int(doc, root_obj, "start_line", start); - yyjson_mut_obj_add_int(doc, root_obj, "end_line", end); - - if (source) { - yyjson_mut_obj_add_str(doc, root_obj, "source", source); - } else { - yyjson_mut_obj_add_str(doc, root_obj, "source", "(source not available)"); - } - - /* match_method — omitted for exact matches */ - if (match_method) { - yyjson_mut_obj_add_str(doc, root_obj, "match_method", match_method); - } - - /* Enrich with node properties. - * props_doc is freed AFTER serialization since yyjson_mut_obj_add_str - * stores pointers into it (zero-copy). */ - yyjson_doc *props_doc = NULL; - if (node->properties_json && node->properties_json[0] != '\0') { - props_doc = yyjson_read(node->properties_json, strlen(node->properties_json), 0); - if (props_doc) { - yyjson_val *props_root = yyjson_doc_get_root(props_doc); - if (props_root && yyjson_is_obj(props_root)) { - yyjson_obj_iter iter; - yyjson_obj_iter_init(props_root, &iter); - yyjson_val *key; - while ((key = yyjson_obj_iter_next(&iter))) { - yyjson_val *val = yyjson_obj_iter_get_val(key); - const char *k = yyjson_get_str(key); - if (!k) { - continue; - } - if (yyjson_is_str(val)) { - yyjson_mut_obj_add_str(doc, root_obj, k, yyjson_get_str(val)); - } else if (yyjson_is_bool(val)) { - yyjson_mut_obj_add_bool(doc, root_obj, k, yyjson_get_bool(val)); - } else if (yyjson_is_int(val)) { - yyjson_mut_obj_add_int(doc, root_obj, k, yyjson_get_int(val)); - } else if (yyjson_is_real(val)) { - yyjson_mut_obj_add_real(doc, root_obj, k, yyjson_get_real(val)); - } - } - } - } - } - - /* Caller/callee counts — store already resolved by calling handler */ - cbm_store_t *store = srv->store; - int in_deg = 0; - int out_deg = 0; - cbm_store_node_degree(store, node->id, &in_deg, &out_deg); - yyjson_mut_obj_add_int(doc, root_obj, "callers", in_deg); - yyjson_mut_obj_add_int(doc, root_obj, "callees", out_deg); - - /* Include neighbor names (opt-in). - * Strings stored by yyjson reference — freed after serialization. */ - char **nb_callers = NULL; - int nb_caller_count = 0; - char **nb_callees = NULL; - int nb_callee_count = 0; - if (include_neighbors) { - cbm_store_node_neighbor_names(store, node->id, 10, &nb_callers, &nb_caller_count, - &nb_callees, &nb_callee_count); - if (nb_caller_count > 0) { - yyjson_mut_val *arr = yyjson_mut_arr(doc); - for (int i = 0; i < nb_caller_count; i++) { - yyjson_mut_arr_add_str(doc, arr, nb_callers[i]); - } - yyjson_mut_obj_add_val(doc, root_obj, "caller_names", arr); - } - if (nb_callee_count > 0) { - yyjson_mut_val *arr = yyjson_mut_arr(doc); - for (int i = 0; i < nb_callee_count; i++) { - yyjson_mut_arr_add_str(doc, arr, nb_callees[i]); - } - yyjson_mut_obj_add_val(doc, root_obj, "callee_names", arr); - } - } - - /* Alternatives (when auto-resolved from ambiguous) */ - if (alternatives && alt_count > 0) { - yyjson_mut_val *arr = yyjson_mut_arr(doc); - for (int i = 0; i < alt_count; i++) { - yyjson_mut_val *a = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, a, "qualified_name", - alternatives[i].qualified_name ? alternatives[i].qualified_name - : ""); - yyjson_mut_obj_add_str(doc, a, "file_path", - alternatives[i].file_path ? alternatives[i].file_path : ""); - yyjson_mut_arr_append(arr, a); - } - yyjson_mut_obj_add_val(doc, root_obj, "alternatives", arr); - } - - char *json = yy_doc_to_str(doc); - yyjson_mut_doc_free(doc); - yyjson_doc_free(props_doc); /* safe if NULL */ - for (int i = 0; i < nb_caller_count; i++) { - free(nb_callers[i]); - } - for (int i = 0; i < nb_callee_count; i++) { - free(nb_callees[i]); - } - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(nb_callers); - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(nb_callees); - free(root_path); - free(abs_path); - free(source); - - char *result = cbm_mcp_text_result(json, false); - free(json); - return result; -} - -static char *handle_get_code_snippet(cbm_mcp_server_t *srv, const char *args) { - char *qn = cbm_mcp_get_string_arg(args, "qualified_name"); - char *project = cbm_mcp_get_string_arg(args, "project"); - bool include_neighbors = cbm_mcp_get_bool_arg(args, "include_neighbors"); - - if (!qn) { - free(project); - return cbm_mcp_text_result("qualified_name is required", true); - } - - cbm_store_t *store = resolve_store(srv, project); - if (!store) { - free(qn); - free(project); - return cbm_mcp_text_result("no project loaded — run index_repository first", true); - } - - /* Default to current project (same as all other tools) */ - const char *effective_project = project ? project : srv->current_project; - - /* Tier 1: Exact QN match */ - cbm_node_t node = {0}; - int rc = cbm_store_find_node_by_qn(store, effective_project, qn, &node); - if (rc == CBM_STORE_OK) { - char *result = build_snippet_response(srv, &node, NULL, include_neighbors, NULL, 0); - free_node_contents(&node); - free(qn); - free(project); - return result; - } - - /* Tier 2: Suffix match — handles partial QNs ("main.HandleRequest") - * and short names ("ProcessOrder") via LIKE '%.X'. */ - cbm_node_t *suffix_nodes = NULL; - int suffix_count = 0; - cbm_store_find_nodes_by_qn_suffix(store, effective_project, qn, &suffix_nodes, &suffix_count); - - if (suffix_count == 1) { - copy_node(&suffix_nodes[0], &node); - cbm_store_free_nodes(suffix_nodes, suffix_count); - char *result = build_snippet_response(srv, &node, "suffix", include_neighbors, NULL, 0); - free_node_contents(&node); - free(qn); - free(project); - return result; - } - - if (suffix_count > 1) { - char *result = snippet_suggestions(qn, suffix_nodes, suffix_count); - cbm_store_free_nodes(suffix_nodes, suffix_count); - free(qn); - free(project); - return result; - } - - cbm_store_free_nodes(suffix_nodes, suffix_count); - free(qn); - free(project); - - /* Nothing found — guide the caller toward search_graph */ - return cbm_mcp_text_result( - "symbol not found. Use search_graph(name_pattern=\"...\") first to discover " - "the exact qualified_name, then pass it to get_code_snippet.", - true); -} - -/* ── search_code ──────────────────────────────────────────────── */ - -static char *handle_search_code(cbm_mcp_server_t *srv, const char *args) { - char *pattern = cbm_mcp_get_string_arg(args, "pattern"); - char *project = cbm_mcp_get_string_arg(args, "project"); - char *file_pattern = cbm_mcp_get_string_arg(args, "file_pattern"); - int limit = cbm_mcp_get_int_arg(args, "limit", 500000); - bool use_regex = cbm_mcp_get_bool_arg(args, "regex"); - - if (!pattern) { - free(project); - free(file_pattern); - return cbm_mcp_text_result("pattern is required", true); - } - - char *root_path = get_project_root(srv, project); - if (!root_path) { - free(pattern); - free(project); - free(file_pattern); - return cbm_mcp_text_result("project not found or not indexed", true); - } - - /* Reject shell metacharacters in user-supplied arguments */ - if (!cbm_validate_shell_arg(root_path) || - (file_pattern && !cbm_validate_shell_arg(file_pattern))) { - free(root_path); - free(pattern); - free(project); - free(file_pattern); - return cbm_mcp_text_result("path or file_pattern contains invalid characters", true); - } - - /* Write pattern to temp file to avoid shell injection */ - char tmpfile[256]; -#ifdef _WIN32 - snprintf(tmpfile, sizeof(tmpfile), "/tmp/cbm_search_%d.pat", (int)_getpid()); -#else - snprintf(tmpfile, sizeof(tmpfile), "/tmp/cbm_search_%d.pat", (int)getpid()); -#endif - FILE *tf = fopen(tmpfile, "w"); - if (!tf) { - free(root_path); - free(pattern); - free(project); - free(file_pattern); - return cbm_mcp_text_result("search failed: temp file", true); - } - // NOLINTNEXTLINE(clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling) - (void)fprintf(tf, "%s\n", pattern); - (void)fclose(tf); - - char cmd[4096]; - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - const char *flag = use_regex ? "-E" : "-F"; - if (file_pattern) { - snprintf(cmd, sizeof(cmd), "grep -rn %s --include='%s' -m %d -f '%s' '%s' 2>/dev/null", - flag, file_pattern, limit * 3, tmpfile, root_path); - } else { - snprintf(cmd, sizeof(cmd), "grep -rn %s -m %d -f '%s' '%s' 2>/dev/null", flag, limit * 3, - tmpfile, root_path); - } - - // NOLINTNEXTLINE(bugprone-command-processor,cert-env33-c) - FILE *fp = cbm_popen(cmd, "r"); - if (!fp) { - cbm_unlink(tmpfile); - free(root_path); - free(pattern); - free(project); - free(file_pattern); - return cbm_mcp_text_result("search failed", true); - } - - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root_obj = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root_obj); - - yyjson_mut_val *matches = yyjson_mut_arr(doc); - char line[2048]; - int count = 0; - size_t root_len = strlen(root_path); - - while (fgets(line, sizeof(line), fp) && count < limit) { - size_t len = strlen(line); - while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r')) { - line[--len] = '\0'; - } - if (len == 0) { - continue; - } - - /* grep output: /abs/path/file:lineno:content */ - char *colon1 = strchr(line, ':'); - if (!colon1) { - continue; - } - char *colon2 = strchr(colon1 + 1, ':'); - if (!colon2) { - continue; - } - - *colon1 = '\0'; - *colon2 = '\0'; - - /* Strip root_path prefix to get relative path */ - const char *file = line; - if (strncmp(file, root_path, root_len) == 0) { - file += root_len; - if (*file == '/') { - file++; - } - } - int lineno = (int)strtol(colon1 + 1, NULL, 10); - const char *content = colon2 + 1; - - yyjson_mut_val *item = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, item, "file", file); - yyjson_mut_obj_add_int(doc, item, "line", lineno); - yyjson_mut_obj_add_str(doc, item, "content", content); - yyjson_mut_arr_add_val(matches, item); - count++; - } - cbm_pclose(fp); - cbm_unlink(tmpfile); /* Clean up pattern file after grep is done */ - - yyjson_mut_obj_add_val(doc, root_obj, "matches", matches); - yyjson_mut_obj_add_int(doc, root_obj, "count", count); - - char *json = yy_doc_to_str(doc); - yyjson_mut_doc_free(doc); - free(root_path); - free(pattern); - free(project); - free(file_pattern); - - char *result = cbm_mcp_text_result(json, false); - free(json); - return result; -} - -/* ── detect_changes ───────────────────────────────────────────── */ - -static char *handle_detect_changes(cbm_mcp_server_t *srv, const char *args) { - char *project = cbm_mcp_get_string_arg(args, "project"); - char *base_branch = cbm_mcp_get_string_arg(args, "base_branch"); - int depth = cbm_mcp_get_int_arg(args, "depth", 2); - - if (!base_branch) { - base_branch = heap_strdup("main"); - } - - /* Reject shell metacharacters in user-supplied branch name */ - if (!cbm_validate_shell_arg(base_branch)) { - free(project); - free(base_branch); - return cbm_mcp_text_result("base_branch contains invalid characters", true); - } - - char *root_path = get_project_root(srv, project); - if (!root_path) { - free(project); - free(base_branch); - return cbm_mcp_text_result("project not found", true); - } - - if (!cbm_validate_shell_arg(root_path)) { - free(root_path); - free(project); - free(base_branch); - return cbm_mcp_text_result("project path contains invalid characters", true); - } - - /* Get changed files via git */ - char cmd[1024]; - snprintf(cmd, sizeof(cmd), - "cd '%s' && { git diff --name-only '%s'...HEAD 2>/dev/null; " - "git diff --name-only 2>/dev/null; } | sort -u", - root_path, base_branch); - - // NOLINTNEXTLINE(bugprone-command-processor,cert-env33-c) - FILE *fp = cbm_popen(cmd, "r"); - if (!fp) { - free(root_path); - free(project); - free(base_branch); - return cbm_mcp_text_result("git diff failed", true); - } - - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root_obj = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root_obj); - - yyjson_mut_val *changed = yyjson_mut_arr(doc); - yyjson_mut_val *impacted = yyjson_mut_arr(doc); - - /* resolve_store already called via get_project_root above */ - cbm_store_t *store = srv->store; - - char line[1024]; - int file_count = 0; - - while (fgets(line, sizeof(line), fp)) { - size_t len = strlen(line); - while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r')) { - line[--len] = '\0'; - } - if (len == 0) { - continue; - } - - yyjson_mut_arr_add_str(doc, changed, line); - file_count++; - - /* Find symbols defined in this file */ - cbm_node_t *nodes = NULL; - int ncount = 0; - cbm_store_find_nodes_by_file(store, project, line, &nodes, &ncount); - - for (int i = 0; i < ncount; i++) { - if (nodes[i].label && strcmp(nodes[i].label, "File") != 0 && - strcmp(nodes[i].label, "Folder") != 0 && strcmp(nodes[i].label, "Project") != 0) { - yyjson_mut_val *item = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, item, "name", nodes[i].name ? nodes[i].name : ""); - yyjson_mut_obj_add_str(doc, item, "label", nodes[i].label); - yyjson_mut_obj_add_str(doc, item, "file", line); - yyjson_mut_arr_add_val(impacted, item); - } - } - cbm_store_free_nodes(nodes, ncount); - } - cbm_pclose(fp); - - yyjson_mut_obj_add_val(doc, root_obj, "changed_files", changed); - yyjson_mut_obj_add_int(doc, root_obj, "changed_count", file_count); - yyjson_mut_obj_add_val(doc, root_obj, "impacted_symbols", impacted); - yyjson_mut_obj_add_int(doc, root_obj, "depth", depth); - - char *json = yy_doc_to_str(doc); - yyjson_mut_doc_free(doc); - free(root_path); - free(project); - free(base_branch); - - char *result = cbm_mcp_text_result(json, false); - free(json); - return result; -} - -/* ── manage_adr ───────────────────────────────────────────────── */ - -static char *handle_manage_adr(cbm_mcp_server_t *srv, const char *args) { - char *project = cbm_mcp_get_string_arg(args, "project"); - char *mode_str = cbm_mcp_get_string_arg(args, "mode"); - char *content = cbm_mcp_get_string_arg(args, "content"); - - if (!mode_str) { - mode_str = heap_strdup("get"); - } - - char *root_path = get_project_root(srv, project); - if (!root_path) { - free(project); - free(mode_str); - free(content); - return cbm_mcp_text_result("project not found", true); - } - - char adr_dir[4096]; - snprintf(adr_dir, sizeof(adr_dir), "%s/.codebase-memory", root_path); - char adr_path[4096]; - snprintf(adr_path, sizeof(adr_path), "%s/adr.md", adr_dir); - - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root_obj = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root_obj); - - if (strcmp(mode_str, "update") == 0 && content) { - /* Create dir if needed */ - cbm_mkdir(adr_dir); - FILE *fp = fopen(adr_path, "w"); - if (fp) { - (void)fputs(content, fp); - (void)fclose(fp); - yyjson_mut_obj_add_str(doc, root_obj, "status", "updated"); - } else { - yyjson_mut_obj_add_str(doc, root_obj, "status", "write_error"); - } - } else if (strcmp(mode_str, "sections") == 0) { - /* List section headers from ADR */ - FILE *fp = fopen(adr_path, "r"); - yyjson_mut_val *sections = yyjson_mut_arr(doc); - if (fp) { - char line[1024]; - while (fgets(line, sizeof(line), fp)) { - if (line[0] == '#') { - size_t len = strlen(line); - while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r')) { - line[--len] = '\0'; - } - yyjson_mut_arr_add_str(doc, sections, line); - } - } - (void)fclose(fp); - } - yyjson_mut_obj_add_val(doc, root_obj, "sections", sections); - } else { - /* get: read ADR content */ - FILE *fp = fopen(adr_path, "r"); - if (fp) { - (void)fseek(fp, 0, SEEK_END); - long sz = ftell(fp); - (void)fseek(fp, 0, SEEK_SET); - char *buf = malloc(sz + 1); - size_t n = fread(buf, 1, sz, fp); - buf[n] = '\0'; - (void)fclose(fp); - yyjson_mut_obj_add_str(doc, root_obj, "content", buf); - free(buf); - } else { - yyjson_mut_obj_add_str(doc, root_obj, "content", ""); - yyjson_mut_obj_add_str(doc, root_obj, "status", "no_adr"); - yyjson_mut_obj_add_str( - doc, root_obj, "adr_hint", - "No ADR yet. Create one with manage_adr(mode='update', " - "content='## PURPOSE\\n...\\n\\n## STACK\\n...\\n\\n## ARCHITECTURE\\n..." - "\\n\\n## PATTERNS\\n...\\n\\n## TRADEOFFS\\n...\\n\\n## PHILOSOPHY\\n...'). " - "For guided creation: explore the codebase with get_architecture, " - "then draft and store. Sections: PURPOSE, STACK, ARCHITECTURE, " - "PATTERNS, TRADEOFFS, PHILOSOPHY."); - } - } - - char *json = yy_doc_to_str(doc); - yyjson_mut_doc_free(doc); - free(root_path); - free(project); - free(mode_str); - free(content); - - char *result = cbm_mcp_text_result(json, false); - free(json); - return result; -} - -/* ── ingest_traces ────────────────────────────────────────────── */ - -static char *handle_ingest_traces(cbm_mcp_server_t *srv, const char *args) { - (void)srv; - /* Parse traces array from JSON args */ - yyjson_doc *adoc = yyjson_read(args, strlen(args), 0); - int trace_count = 0; - - if (adoc) { - yyjson_val *aroot = yyjson_doc_get_root(adoc); - yyjson_val *traces = yyjson_obj_get(aroot, "traces"); - if (traces && yyjson_is_arr(traces)) { - trace_count = (int)yyjson_arr_size(traces); - } - yyjson_doc_free(adoc); - } - - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - - yyjson_mut_obj_add_str(doc, root, "status", "accepted"); - yyjson_mut_obj_add_int(doc, root, "traces_received", trace_count); - yyjson_mut_obj_add_str(doc, root, "note", - "Runtime edge creation from traces not yet implemented"); - - char *json = yy_doc_to_str(doc); - yyjson_mut_doc_free(doc); - - char *result = cbm_mcp_text_result(json, false); - free(json); - return result; -} - -/* ── Tool dispatch ────────────────────────────────────────────── */ - -// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) -char *cbm_mcp_handle_tool(cbm_mcp_server_t *srv, const char *tool_name, const char *args_json) { - if (!tool_name) { - return cbm_mcp_text_result("missing tool name", true); - } - - if (strcmp(tool_name, "list_projects") == 0) { - return handle_list_projects(srv, args_json); - } - if (strcmp(tool_name, "get_graph_schema") == 0) { - return handle_get_graph_schema(srv, args_json); - } - if (strcmp(tool_name, "search_graph") == 0) { - return handle_search_graph(srv, args_json); - } - if (strcmp(tool_name, "query_graph") == 0) { - return handle_query_graph(srv, args_json); - } - if (strcmp(tool_name, "index_status") == 0) { - return handle_index_status(srv, args_json); - } - if (strcmp(tool_name, "delete_project") == 0) { - return handle_delete_project(srv, args_json); - } - if (strcmp(tool_name, "trace_call_path") == 0) { - return handle_trace_call_path(srv, args_json); - } - if (strcmp(tool_name, "get_architecture") == 0) { - return handle_get_architecture(srv, args_json); - } - - /* Pipeline-dependent tools */ - if (strcmp(tool_name, "index_repository") == 0) { - return handle_index_repository(srv, args_json); - } - if (strcmp(tool_name, "get_code_snippet") == 0) { - return handle_get_code_snippet(srv, args_json); - } - if (strcmp(tool_name, "search_code") == 0) { - return handle_search_code(srv, args_json); - } - if (strcmp(tool_name, "detect_changes") == 0) { - return handle_detect_changes(srv, args_json); - } - if (strcmp(tool_name, "manage_adr") == 0) { - return handle_manage_adr(srv, args_json); - } - if (strcmp(tool_name, "ingest_traces") == 0) { - return handle_ingest_traces(srv, args_json); - } - - char msg[256]; - snprintf(msg, sizeof(msg), "unknown tool: %s", tool_name); - return cbm_mcp_text_result(msg, true); -} - -/* ── Session detection + auto-index ────────────────────────────── */ - -/* Detect session root from CWD (fallback: single indexed project from DB). */ -static void detect_session(cbm_mcp_server_t *srv) { - if (srv->session_detected) { - return; - } - srv->session_detected = true; - - /* 1. Try CWD */ - char cwd[1024]; - if (getcwd(cwd, sizeof(cwd)) != NULL) { - // NOLINTNEXTLINE(concurrency-mt-unsafe) - const char *home = getenv("HOME"); - /* Skip useless roots: / and $HOME */ - if (strcmp(cwd, "/") != 0 && (home == NULL || strcmp(cwd, home) != 0)) { - snprintf(srv->session_root, sizeof(srv->session_root), "%s", cwd); - cbm_log_info("session.root.cwd", "path", cwd); - } - } - - /* Derive project name from path */ - if (srv->session_root[0]) { - /* Use last two path components joined by dash, matching Go's ProjectNameFromPath */ - const char *p = srv->session_root; - const char *last_slash = strrchr(p, '/'); - if (last_slash && last_slash > p) { - const char *prev = last_slash - 1; - while (prev > p && *prev != '/') { - prev--; - } - if (*prev == '/') { - prev++; - } - snprintf(srv->session_project, sizeof(srv->session_project), "%.*s", - (int)(strlen(p) - (size_t)(prev - p)), prev); - /* Replace / with - */ - for (char *c = srv->session_project; *c; c++) { - if (*c == '/') { - *c = '-'; - } - } - } else { - snprintf(srv->session_project, sizeof(srv->session_project), "%s", - last_slash ? last_slash + 1 : p); - } - } -} - -/* Background auto-index thread function */ -static void *autoindex_thread(void *arg) { - cbm_mcp_server_t *srv = (cbm_mcp_server_t *)arg; - - cbm_log_info("autoindex.start", "project", srv->session_project, "path", srv->session_root); - - cbm_pipeline_t *p = cbm_pipeline_new(srv->session_root, NULL, CBM_MODE_FULL); - if (!p) { - cbm_log_warn("autoindex.err", "msg", "pipeline_create_failed"); - return NULL; - } - - int rc = cbm_pipeline_run(p); - cbm_pipeline_free(p); - cbm_mem_collect(); /* return mimalloc pages to OS after indexing */ - - if (rc == 0) { - cbm_log_info("autoindex.done", "project", srv->session_project); - /* Register with watcher for ongoing change detection */ - if (srv->watcher) { - cbm_watcher_watch(srv->watcher, srv->session_project, srv->session_root); - } - } else { - cbm_log_warn("autoindex.err", "msg", "pipeline_run_failed"); - } - return NULL; -} - -/* Start auto-indexing if configured and project not yet indexed. */ -static void maybe_auto_index(cbm_mcp_server_t *srv) { - if (srv->session_root[0] == '\0') { - return; /* no session root detected */ - } - - /* Check if project already has a DB */ - // NOLINTNEXTLINE(concurrency-mt-unsafe) - const char *home = getenv("HOME"); - if (home) { - char db_check[1024]; - snprintf(db_check, sizeof(db_check), "%s/.cache/codebase-memory-mcp/%s.db", home, - srv->session_project); - struct stat st; - if (stat(db_check, &st) == 0) { - /* Already indexed → register watcher for change detection */ - cbm_log_info("autoindex.skip", "reason", "already_indexed", "project", - srv->session_project); - if (srv->watcher) { - cbm_watcher_watch(srv->watcher, srv->session_project, srv->session_root); - } - return; - } - } - -/* Default file limit for auto-indexing new projects */ -#define DEFAULT_AUTO_INDEX_LIMIT 50000 - - /* Check auto_index config */ - bool auto_index = false; - int file_limit = DEFAULT_AUTO_INDEX_LIMIT; - if (srv->config) { - auto_index = cbm_config_get_bool(srv->config, CBM_CONFIG_AUTO_INDEX, false); - file_limit = - cbm_config_get_int(srv->config, CBM_CONFIG_AUTO_INDEX_LIMIT, DEFAULT_AUTO_INDEX_LIMIT); - } - - if (!auto_index) { - cbm_log_info("autoindex.skip", "reason", "disabled", "hint", - "run: codebase-memory-mcp config set auto_index true"); - return; - } - - /* Quick file count check to avoid OOM on massive repos */ - if (!cbm_validate_shell_arg(srv->session_root)) { - cbm_log_warn("autoindex.skip", "reason", "path contains shell metacharacters"); - return; - } - char cmd[1024]; - snprintf(cmd, sizeof(cmd), "git -C '%s' ls-files 2>/dev/null | wc -l", srv->session_root); - // NOLINTNEXTLINE(bugprone-command-processor,cert-env33-c) - FILE *fp = cbm_popen(cmd, "r"); - if (fp) { - char line[64]; - if (fgets(line, sizeof(line), fp)) { - int count = (int)strtol(line, NULL, 10); - if (count > file_limit) { - cbm_log_warn("autoindex.skip", "reason", "too_many_files", "files", line, "limit", - CBM_CONFIG_AUTO_INDEX_LIMIT); - cbm_pclose(fp); - return; - } - } - cbm_pclose(fp); - } - - /* Launch auto-index in background */ - if (cbm_thread_create(&srv->autoindex_tid, 0, autoindex_thread, srv) == 0) { - srv->autoindex_active = true; - } -} - -/* ── Background update check ──────────────────────────────────── */ - -#define UPDATE_CHECK_URL "https://api.github.com/repos/DeusData/codebase-memory-mcp/releases/latest" - -static void *update_check_thread(void *arg) { - cbm_mcp_server_t *srv = (cbm_mcp_server_t *)arg; - - /* Use curl with 5s timeout to fetch latest release tag */ - FILE *fp = cbm_popen("curl -sf --max-time 5 -H 'Accept: application/vnd.github+json' " - "'" UPDATE_CHECK_URL "' 2>/dev/null", - "r"); - if (!fp) { - srv->update_checked = true; - return NULL; - } - - char buf[4096]; - size_t total = 0; - while (total < sizeof(buf) - 1) { - size_t n = fread(buf + total, 1, sizeof(buf) - 1 - total, fp); - if (n == 0) { - break; - } - total += n; - } - buf[total] = '\0'; - cbm_pclose(fp); - - /* Parse tag_name from JSON response */ - yyjson_doc *doc = yyjson_read(buf, total, 0); - if (!doc) { - srv->update_checked = true; - return NULL; - } - - yyjson_val *root = yyjson_doc_get_root(doc); - yyjson_val *tag = yyjson_obj_get(root, "tag_name"); - const char *tag_str = yyjson_get_str(tag); - - if (tag_str) { - const char *current = cbm_cli_get_version(); - if (cbm_compare_versions(tag_str, current) > 0) { - snprintf(srv->update_notice, sizeof(srv->update_notice), - "Update available: %s -> %s -- run: codebase-memory-mcp update", current, - tag_str); - cbm_log_info("update.available", "current", current, "latest", tag_str); - } - } - - yyjson_doc_free(doc); - srv->update_checked = true; - return NULL; -} - -static void start_update_check(cbm_mcp_server_t *srv) { - if (srv->update_checked) { - return; - } - srv->update_checked = true; /* prevent double-launch */ - if (cbm_thread_create(&srv->update_tid, 0, update_check_thread, srv) == 0) { - srv->update_thread_active = true; - } -} - -/* Prepend update notice to a tool result, then clear it (one-shot). */ -static char *inject_update_notice(cbm_mcp_server_t *srv, char *result_json) { - if (srv->update_notice[0] == '\0') { - return result_json; - } - - /* Parse existing result, prepend notice text, rebuild */ - yyjson_doc *doc = yyjson_read(result_json, strlen(result_json), 0); - if (!doc) { - return result_json; - } - - yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); - yyjson_doc_free(doc); - if (!root) { - yyjson_mut_doc_free(mdoc); - return result_json; - } - yyjson_mut_doc_set_root(mdoc, root); - - /* Find the "content" array */ - yyjson_mut_val *content = yyjson_mut_obj_get(root, "content"); - if (content && yyjson_mut_is_arr(content)) { - /* Prepend a text content item with the update notice */ - yyjson_mut_val *notice_item = yyjson_mut_obj(mdoc); - yyjson_mut_obj_add_str(mdoc, notice_item, "type", "text"); - yyjson_mut_obj_add_str(mdoc, notice_item, "text", srv->update_notice); - yyjson_mut_arr_prepend(content, notice_item); - } - - size_t len; - char *new_json = yyjson_mut_write(mdoc, YYJSON_WRITE_ALLOW_INVALID_UNICODE, &len); - yyjson_mut_doc_free(mdoc); - - if (new_json) { - free(result_json); - srv->update_notice[0] = '\0'; /* clear — one-shot */ - return new_json; - } - return result_json; -} - -/* ── Server request handler ───────────────────────────────────── */ - -char *cbm_mcp_server_handle(cbm_mcp_server_t *srv, const char *line) { - cbm_jsonrpc_request_t req = {0}; - if (cbm_jsonrpc_parse(line, &req) < 0) { - return cbm_jsonrpc_format_error(0, JSONRPC_PARSE_ERROR, "Parse error"); - } - - /* Notifications (no id) → no response */ - if (!req.has_id) { - cbm_jsonrpc_request_free(&req); - return NULL; - } - - char *result_json = NULL; - - if (strcmp(req.method, "initialize") == 0) { - result_json = cbm_mcp_initialize_response(req.params_raw); - start_update_check(srv); - detect_session(srv); - maybe_auto_index(srv); - } else if (strcmp(req.method, "tools/list") == 0) { - result_json = cbm_mcp_tools_list(); - } else if (strcmp(req.method, "tools/call") == 0) { - char *tool_name = req.params_raw ? cbm_mcp_get_tool_name(req.params_raw) : NULL; - char *tool_args = - req.params_raw ? cbm_mcp_get_arguments(req.params_raw) : heap_strdup("{}"); - - result_json = cbm_mcp_handle_tool(srv, tool_name, tool_args); - result_json = inject_update_notice(srv, result_json); - free(tool_name); - free(tool_args); - } else { - char *err = cbm_jsonrpc_format_error(req.id, JSONRPC_METHOD_NOT_FOUND, "Method not found"); - cbm_jsonrpc_request_free(&req); - return err; - } - - cbm_jsonrpc_response_t resp = { - .id = req.id, - .result_json = result_json, - }; - char *out = cbm_jsonrpc_format_response(&resp); - free(result_json); - cbm_jsonrpc_request_free(&req); - return out; -} - -/* ── Event loop ───────────────────────────────────────────────── */ - -// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) -int cbm_mcp_server_run(cbm_mcp_server_t *srv, FILE *in, FILE *out) { - char *line = NULL; - size_t cap = 0; - int fd = cbm_fileno(in); - - for (;;) { - /* Poll with idle timeout so we can evict unused stores between requests. - * MCP is request-response (one line at a time), so mixing poll() on the - * raw fd with getline() on the buffered FILE* is safe in practice. */ -#ifdef _WIN32 - /* Windows: WaitForSingleObject on stdin handle */ - HANDLE hStdin = (HANDLE)_get_osfhandle(fd); - DWORD wr = WaitForSingleObject(hStdin, STORE_IDLE_TIMEOUT_S * 1000); - if (wr == WAIT_FAILED) { - break; - } - if (wr == WAIT_TIMEOUT) { - cbm_mcp_server_evict_idle(srv, STORE_IDLE_TIMEOUT_S); - continue; - } -#else - struct pollfd pfd = {.fd = fd, .events = POLLIN}; - int pr = poll(&pfd, 1, STORE_IDLE_TIMEOUT_S * 1000); - - if (pr < 0) { - break; /* error or signal */ - } - if (pr == 0) { - /* Timeout — evict idle store to free resources */ - cbm_mcp_server_evict_idle(srv, STORE_IDLE_TIMEOUT_S); - continue; - } -#endif - - if (cbm_getline(&line, &cap, in) <= 0) { - break; - } - - /* Trim trailing newline/CR */ - size_t len = strlen(line); - while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r')) { - line[--len] = '\0'; - } - if (len == 0) { - continue; - } - - /* Content-Length framing support (LSP-style transport). - * Some MCP clients (OpenCode, VS Code extensions) send: - * Content-Length: \r\n\r\n - * instead of bare JSONL. Detect the header, read the payload, - * and respond with the same framing. */ - if (strncmp(line, "Content-Length:", 15) == 0) { - int content_len = (int)strtol(line + 15, NULL, 10); - if (content_len <= 0 || content_len > 10 * 1024 * 1024) { - continue; /* invalid or too large */ - } - - /* Skip blank line(s) between header and body */ - while (cbm_getline(&line, &cap, in) > 0) { - size_t hlen = strlen(line); - while (hlen > 0 && (line[hlen - 1] == '\n' || line[hlen - 1] == '\r')) { - line[--hlen] = '\0'; - } - if (hlen == 0) { - break; /* found the blank separator */ - } - /* Skip other headers (e.g. Content-Type) */ - } - - /* Read exact content_len bytes */ - char *body = malloc((size_t)content_len + 1); - if (!body) { - continue; - } - size_t nread = fread(body, 1, (size_t)content_len, in); - body[nread] = '\0'; - - char *resp = cbm_mcp_server_handle(srv, body); - free(body); - - if (resp) { - size_t rlen = strlen(resp); - (void)fprintf(out, "Content-Length: %zu\r\n\r\n%s", rlen, resp); - (void)fflush(out); - free(resp); - } - continue; - } - - char *resp = cbm_mcp_server_handle(srv, line); - if (resp) { - // NOLINTNEXTLINE(clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling) - (void)fprintf(out, "%s\n", resp); - (void)fflush(out); - free(resp); - } - } - - free(line); - return 0; -} - -/* ── cbm_parse_file_uri ──────────────────────────────────────── */ - -bool cbm_parse_file_uri(const char *uri, char *out_path, int out_size) { - if (!uri || !out_path || out_size <= 0) { - if (out_path && out_size > 0) { - out_path[0] = '\0'; - } - return false; - } - - /* Must start with file:// */ - if (strncmp(uri, "file://", 7) != 0) { - out_path[0] = '\0'; - return false; - } - - const char *path = uri + 7; - - /* On Windows, file:///C:/path → /C:/path. Strip leading / before drive letter. */ - if (path[0] == '/' && path[1] && - ((path[1] >= 'A' && path[1] <= 'Z') || (path[1] >= 'a' && path[1] <= 'z')) && - path[2] == ':') { - path++; /* skip the leading / */ - } - - snprintf(out_path, out_size, "%s", path); - return true; -} +/* + * mcp.c — MCP server: JSON-RPC 2.0 over stdio with 14 graph tools. + * + * Uses yyjson for fast JSON parsing/building. + * Single-threaded event loop: read line → parse → dispatch → respond. + */ + +// operations + +#include "mcp/mcp.h" +#include "store/store.h" +#include "cypher/cypher.h" +#include "pipeline/pipeline.h" +#include "cli/cli.h" +#include "watcher/watcher.h" +#include "foundation/mem.h" +#include "foundation/platform.h" +#include "foundation/compat.h" +#include "foundation/compat_fs.h" +#include "foundation/compat_thread.h" +#include "foundation/log.h" +#include "foundation/str_util.h" + +#ifdef _WIN32 +#include /* _getpid */ +#else +#include +#include +#include +#include +#endif +#include +#include // int64_t +#include +#include +#include +#include +#include + +/* ── Constants ────────────────────────────────────────────────── */ + +/* Default snippet fallback line count */ +#define SNIPPET_DEFAULT_LINES 50 + +/* Idle store eviction: close cached project store after this many seconds + * of inactivity to free SQLite memory during idle periods. */ +#define STORE_IDLE_TIMEOUT_S 60 + +/* Directory permissions: rwxr-xr-x */ +#define ADR_DIR_PERMS 0755 + +/* JSON-RPC 2.0 standard error codes */ +#define JSONRPC_PARSE_ERROR (-32700) +#define JSONRPC_METHOD_NOT_FOUND (-32601) + +/* ── Helpers ────────────────────────────────────────────────────── */ + +static char *heap_strdup(const char *s) { + if (!s) { + return NULL; + } + size_t len = strlen(s); + char *d = malloc(len + 1); + if (d) { + memcpy(d, s, len + 1); + } + return d; +} + +/* Write yyjson_mut_doc to heap-allocated JSON string. + * ALLOW_INVALID_UNICODE: some database strings may contain non-UTF-8 bytes + * from older indexing runs — don't fail serialization over it. */ +static char *yy_doc_to_str(yyjson_mut_doc *doc) { + size_t len = 0; + char *s = yyjson_mut_write(doc, YYJSON_WRITE_ALLOW_INVALID_UNICODE, &len); + return s; +} + +/* ══════════════════════════════════════════════════════════════════ + * JSON-RPC PARSING + * ══════════════════════════════════════════════════════════════════ */ + +int cbm_jsonrpc_parse(const char *line, cbm_jsonrpc_request_t *out) { + memset(out, 0, sizeof(*out)); + out->id = -1; + + yyjson_doc *doc = yyjson_read(line, strlen(line), 0); + if (!doc) { + return -1; + } + + yyjson_val *root = yyjson_doc_get_root(doc); + if (!yyjson_is_obj(root)) { + yyjson_doc_free(doc); + return -1; + } + + yyjson_val *v_jsonrpc = yyjson_obj_get(root, "jsonrpc"); + yyjson_val *v_method = yyjson_obj_get(root, "method"); + yyjson_val *v_id = yyjson_obj_get(root, "id"); + yyjson_val *v_params = yyjson_obj_get(root, "params"); + + if (!v_method || !yyjson_is_str(v_method)) { + yyjson_doc_free(doc); + return -1; + } + + out->jsonrpc = + heap_strdup(v_jsonrpc && yyjson_is_str(v_jsonrpc) ? yyjson_get_str(v_jsonrpc) : "2.0"); + out->method = heap_strdup(yyjson_get_str(v_method)); + + if (v_id) { + out->has_id = true; + if (yyjson_is_int(v_id)) { + out->id = yyjson_get_int(v_id); + } else if (yyjson_is_str(v_id)) { + out->id = strtol(yyjson_get_str(v_id), NULL, 10); + } + } + + if (v_params) { + out->params_raw = yyjson_val_write(v_params, 0, NULL); + } + + yyjson_doc_free(doc); + return 0; +} + +void cbm_jsonrpc_request_free(cbm_jsonrpc_request_t *r) { + if (!r) { + return; + } + free((void *)r->jsonrpc); + free((void *)r->method); + free((void *)r->params_raw); + memset(r, 0, sizeof(*r)); +} + +/* ══════════════════════════════════════════════════════════════════ + * JSON-RPC FORMATTING + * ══════════════════════════════════════════════════════════════════ */ + +char *cbm_jsonrpc_format_response(const cbm_jsonrpc_response_t *resp) { + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "jsonrpc", "2.0"); + yyjson_mut_obj_add_int(doc, root, "id", resp->id); + + if (resp->error_json) { + /* Parse the error JSON and embed */ + yyjson_doc *err_doc = yyjson_read(resp->error_json, strlen(resp->error_json), 0); + if (err_doc) { + yyjson_mut_val *err_val = yyjson_val_mut_copy(doc, yyjson_doc_get_root(err_doc)); + yyjson_mut_obj_add_val(doc, root, "error", err_val); + yyjson_doc_free(err_doc); + } + } else if (resp->result_json) { + /* Parse the result JSON and embed */ + yyjson_doc *res_doc = yyjson_read(resp->result_json, strlen(resp->result_json), 0); + if (res_doc) { + yyjson_mut_val *res_val = yyjson_val_mut_copy(doc, yyjson_doc_get_root(res_doc)); + yyjson_mut_obj_add_val(doc, root, "result", res_val); + yyjson_doc_free(res_doc); + } + } + + char *out = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + return out; +} + +char *cbm_jsonrpc_format_error(int64_t id, int code, const char *message) { + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "jsonrpc", "2.0"); + yyjson_mut_obj_add_int(doc, root, "id", id); + + yyjson_mut_val *err = yyjson_mut_obj(doc); + yyjson_mut_obj_add_int(doc, err, "code", code); + yyjson_mut_obj_add_str(doc, err, "message", message); + yyjson_mut_obj_add_val(doc, root, "error", err); + + char *out = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + return out; +} + +/* ══════════════════════════════════════════════════════════════════ + * MCP PROTOCOL HELPERS + * ══════════════════════════════════════════════════════════════════ */ + +char *cbm_mcp_text_result(const char *text, bool is_error) { + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_val *content = yyjson_mut_arr(doc); + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "type", "text"); + yyjson_mut_obj_add_str(doc, item, "text", text); + yyjson_mut_arr_add_val(content, item); + yyjson_mut_obj_add_val(doc, root, "content", content); + + if (is_error) { + yyjson_mut_obj_add_bool(doc, root, "isError", true); + } + + char *out = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + return out; +} + +/* ── Tool definitions ─────────────────────────────────────────── */ + +typedef struct { + const char *name; + const char *description; + const char *input_schema; /* JSON string */ +} tool_def_t; + +static const tool_def_t TOOLS[] = { + {"index_repository", "Index a repository into the knowledge graph", + "{\"type\":\"object\",\"properties\":{\"repo_path\":{\"type\":\"string\",\"description\":" + "\"Path to the " + "repository\"},\"mode\":{\"type\":\"string\",\"enum\":[\"full\",\"fast\"],\"default\":" + "\"full\"}},\"required\":[\"repo_path\"]}"}, + + {"search_graph", + "Search the code knowledge graph for functions, classes, routes, and variables. Use INSTEAD " + "OF grep/glob when finding code definitions, implementations, or relationships. Returns " + "precise results in one call.", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"label\":{\"type\":" + "\"string\"},\"name_pattern\":{\"type\":\"string\"},\"qn_pattern\":{\"type\":\"string\"}," + "\"file_pattern\":{\"type\":\"string\"},\"relationship\":{\"type\":\"string\"},\"min_degree\":" + "{\"type\":\"integer\"},\"max_degree\":{\"type\":\"integer\"},\"exclude_entry_points\":{" + "\"type\":\"boolean\"},\"include_connected\":{\"type\":\"boolean\"},\"limit\":{\"type\":" + "\"integer\",\"description\":\"Max results. Default: " + "unlimited\"},\"offset\":{\"type\":\"integer\",\"default\":0}}}"}, + + {"query_graph", + "Execute a Cypher query against the knowledge graph for complex multi-hop patterns, " + "aggregations, and cross-service analysis.", + "{\"type\":\"object\",\"properties\":{\"query\":{\"type\":\"string\",\"description\":\"Cypher " + "query\"},\"project\":{\"type\":\"string\"},\"max_rows\":{\"type\":\"integer\"," + "\"description\":" + "\"Optional row limit. Default: unlimited (100k ceiling)\"}},\"required\":[\"query\"]}"}, + + {"trace_call_path", + "Trace function call paths — who calls a function and what it calls. Use INSTEAD OF grep when " + "finding callers, dependencies, or impact analysis.", + "{\"type\":\"object\",\"properties\":{\"function_name\":{\"type\":\"string\"},\"project\":{" + "\"type\":\"string\"},\"direction\":{\"type\":\"string\",\"enum\":[\"inbound\",\"outbound\"," + "\"both\"],\"default\":\"both\"},\"depth\":{\"type\":\"integer\",\"default\":3},\"edge_" + "types\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"function_" + "name\"]}"}, + + {"get_code_snippet", + "Read source code for a function/class/symbol. IMPORTANT: First call search_graph to find the " + "exact qualified_name, then pass it here. This is a read tool, not a search tool. Accepts " + "full qualified_name (exact match) or short function name (returns suggestions if ambiguous).", + "{\"type\":\"object\",\"properties\":{\"qualified_name\":{\"type\":\"string\",\"description\":" + "\"Full qualified_name from search_graph, or short function name\"},\"project\":{" + "\"type\":\"string\"},\"include_neighbors\":{" + "\"type\":\"boolean\",\"default\":false}},\"required\":[\"qualified_name\"]}"}, + + {"get_graph_schema", "Get the schema of the knowledge graph (node labels, edge types)", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"}}}"}, + + {"get_architecture", + "Get high-level architecture overview — packages, services, dependencies, and project " + "structure at a glance.", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"aspects\":{\"type\":" + "\"array\",\"items\":{\"type\":\"string\"}}}}"}, + + {"search_code", + "Search source code content with text or regex patterns. Use for string literals, error " + "messages, and config values that are not in the knowledge graph.", + "{\"type\":\"object\",\"properties\":{\"pattern\":{\"type\":\"string\"},\"project\":{\"type\":" + "\"string\"},\"file_pattern\":{\"type\":\"string\"},\"regex\":{\"type\":\"boolean\"," + "\"default\":false},\"limit\":{\"type\":\"integer\",\"description\":\"Max results. Default: " + "unlimited\"}},\"required\":[" + "\"pattern\"]}"}, + + {"list_projects", "List all indexed projects", "{\"type\":\"object\",\"properties\":{}}"}, + + {"delete_project", "Delete a project from the index", + "{\"type\":\"object\",\"properties\":{\"project_name\":{\"type\":\"string\"}},\"required\":[" + "\"project_name\"]}"}, + + {"index_status", "Get the indexing status of a project", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"}}}"}, + + {"detect_changes", "Detect code changes and their impact", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"scope\":{\"type\":" + "\"string\"},\"depth\":{\"type\":\"integer\",\"default\":2},\"base_branch\":{\"type\":" + "\"string\",\"default\":\"main\"}}}"}, + + {"manage_adr", "Create or update Architecture Decision Records", + "{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},\"mode\":{\"type\":" + "\"string\",\"enum\":[\"get\",\"update\",\"sections\"]},\"content\":{\"type\":\"string\"}," + "\"sections\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}}}"}, + + {"ingest_traces", "Ingest runtime traces to enhance the knowledge graph", + "{\"type\":\"object\",\"properties\":{\"traces\":{\"type\":\"array\",\"items\":{\"type\":" + "\"object\"}},\"project\":{\"type\":" + "\"string\"}},\"required\":[\"traces\"]}"}, +}; + +static const int TOOL_COUNT = sizeof(TOOLS) / sizeof(TOOLS[0]); + +char *cbm_mcp_tools_list(void) { + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_val *tools = yyjson_mut_arr(doc); + + for (int i = 0; i < TOOL_COUNT; i++) { + yyjson_mut_val *tool = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, tool, "name", TOOLS[i].name); + yyjson_mut_obj_add_str(doc, tool, "description", TOOLS[i].description); + + /* Parse input schema JSON and embed */ + yyjson_doc *schema_doc = + yyjson_read(TOOLS[i].input_schema, strlen(TOOLS[i].input_schema), 0); + if (schema_doc) { + yyjson_mut_val *schema = yyjson_val_mut_copy(doc, yyjson_doc_get_root(schema_doc)); + yyjson_mut_obj_add_val(doc, tool, "inputSchema", schema); + yyjson_doc_free(schema_doc); + } + + yyjson_mut_arr_add_val(tools, tool); + } + + yyjson_mut_obj_add_val(doc, root, "tools", tools); + + char *out = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + return out; +} + +/* Supported protocol versions, newest first. The server picks the newest + * version that it shares with the client (per MCP spec version negotiation). */ +static const char *SUPPORTED_PROTOCOL_VERSIONS[] = { + "2025-11-25", + "2025-06-18", + "2025-03-26", + "2024-11-05", +}; +static const int SUPPORTED_VERSION_COUNT = + (int)(sizeof(SUPPORTED_PROTOCOL_VERSIONS) / sizeof(SUPPORTED_PROTOCOL_VERSIONS[0])); + +char *cbm_mcp_initialize_response(const char *params_json) { + /* Determine protocol version: if client requests a version we support, + * echo it back; otherwise respond with our latest. */ + const char *version = SUPPORTED_PROTOCOL_VERSIONS[0]; /* default: latest */ + if (params_json) { + yyjson_doc *pdoc = yyjson_read(params_json, strlen(params_json), 0); + if (pdoc) { + yyjson_val *pv = yyjson_obj_get(yyjson_doc_get_root(pdoc), "protocolVersion"); + if (pv && yyjson_is_str(pv)) { + const char *requested = yyjson_get_str(pv); + for (int i = 0; i < SUPPORTED_VERSION_COUNT; i++) { + if (strcmp(requested, SUPPORTED_PROTOCOL_VERSIONS[i]) == 0) { + version = SUPPORTED_PROTOCOL_VERSIONS[i]; + break; + } + } + } + yyjson_doc_free(pdoc); + } + } + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "protocolVersion", version); + + yyjson_mut_val *impl = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, impl, "name", "codebase-memory-mcp"); + yyjson_mut_obj_add_str(doc, impl, "version", "0.10.0"); + yyjson_mut_obj_add_val(doc, root, "serverInfo", impl); + + yyjson_mut_val *caps = yyjson_mut_obj(doc); + yyjson_mut_val *tools_cap = yyjson_mut_obj(doc); + yyjson_mut_obj_add_val(doc, caps, "tools", tools_cap); + yyjson_mut_obj_add_val(doc, root, "capabilities", caps); + + char *out = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + return out; +} + +/* ══════════════════════════════════════════════════════════════════ + * ARGUMENT EXTRACTION + * ══════════════════════════════════════════════════════════════════ */ + +char *cbm_mcp_get_tool_name(const char *params_json) { + yyjson_doc *doc = yyjson_read(params_json, strlen(params_json), 0); + if (!doc) { + return NULL; + } + yyjson_val *root = yyjson_doc_get_root(doc); + yyjson_val *name = yyjson_obj_get(root, "name"); + char *result = NULL; + if (name && yyjson_is_str(name)) { + result = heap_strdup(yyjson_get_str(name)); + } + yyjson_doc_free(doc); + return result; +} + +char *cbm_mcp_get_arguments(const char *params_json) { + yyjson_doc *doc = yyjson_read(params_json, strlen(params_json), 0); + if (!doc) { + return NULL; + } + yyjson_val *root = yyjson_doc_get_root(doc); + yyjson_val *args = yyjson_obj_get(root, "arguments"); + char *result = NULL; + if (args) { + result = yyjson_val_write(args, 0, NULL); + } + yyjson_doc_free(doc); + return result ? result : heap_strdup("{}"); +} + +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) +char *cbm_mcp_get_string_arg(const char *args_json, const char *key) { + yyjson_doc *doc = yyjson_read(args_json, strlen(args_json), 0); + if (!doc) { + return NULL; + } + yyjson_val *root = yyjson_doc_get_root(doc); + yyjson_val *val = yyjson_obj_get(root, key); + char *result = NULL; + if (val && yyjson_is_str(val)) { + result = heap_strdup(yyjson_get_str(val)); + } + yyjson_doc_free(doc); + return result; +} + +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) +int cbm_mcp_get_int_arg(const char *args_json, const char *key, int default_val) { + yyjson_doc *doc = yyjson_read(args_json, strlen(args_json), 0); + if (!doc) { + return default_val; + } + yyjson_val *root = yyjson_doc_get_root(doc); + yyjson_val *val = yyjson_obj_get(root, key); + int result = default_val; + if (val && yyjson_is_int(val)) { + result = yyjson_get_int(val); + } + yyjson_doc_free(doc); + return result; +} + +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) +bool cbm_mcp_get_bool_arg(const char *args_json, const char *key) { + yyjson_doc *doc = yyjson_read(args_json, strlen(args_json), 0); + if (!doc) { + return false; + } + yyjson_val *root = yyjson_doc_get_root(doc); + yyjson_val *val = yyjson_obj_get(root, key); + bool result = false; + if (val && yyjson_is_bool(val)) { + result = yyjson_get_bool(val); + } + yyjson_doc_free(doc); + return result; +} + +/* ══════════════════════════════════════════════════════════════════ + * MCP SERVER + * ══════════════════════════════════════════════════════════════════ */ + +struct cbm_mcp_server { + cbm_store_t *store; /* currently open project store (or NULL) */ + bool owns_store; /* true if we opened the store */ + char *current_project; /* which project store is open for (heap) */ + time_t store_last_used; /* last time resolve_store was called for a named project */ + char update_notice[256]; /* one-shot update notice, cleared after first injection */ + bool update_checked; /* true after background check has been launched */ + cbm_thread_t update_tid; /* background update check thread */ + bool update_thread_active; /* true if update thread was started and needs joining */ + + /* Session + auto-index state */ + char session_root[1024]; /* detected project root path */ + char session_project[256]; /* derived project name */ + bool session_detected; /* true after first detection attempt */ + struct cbm_watcher *watcher; /* external watcher ref (not owned) */ + struct cbm_config *config; /* external config ref (not owned) */ + cbm_thread_t autoindex_tid; + bool autoindex_active; /* true if auto-index thread was started */ +}; + +cbm_mcp_server_t *cbm_mcp_server_new(const char *store_path) { + cbm_mcp_server_t *srv = calloc(1, sizeof(*srv)); + if (!srv) { + return NULL; + } + + /* If a store_path is given, open that project directly. + * Otherwise, create an in-memory store for test/embedded use. */ + if (store_path) { + srv->store = cbm_store_open(store_path); + srv->current_project = heap_strdup(store_path); + } else { + srv->store = cbm_store_open_memory(); + } + srv->owns_store = true; + + return srv; +} + +cbm_store_t *cbm_mcp_server_store(cbm_mcp_server_t *srv) { + return srv ? srv->store : NULL; +} + +void cbm_mcp_server_set_project(cbm_mcp_server_t *srv, const char *project) { + if (!srv) { + return; + } + free(srv->current_project); + srv->current_project = project ? heap_strdup(project) : NULL; +} + +void cbm_mcp_server_set_watcher(cbm_mcp_server_t *srv, struct cbm_watcher *w) { + if (srv) { + srv->watcher = w; + } +} + +void cbm_mcp_server_set_config(cbm_mcp_server_t *srv, struct cbm_config *cfg) { + if (srv) { + srv->config = cfg; + } +} + +void cbm_mcp_server_free(cbm_mcp_server_t *srv) { + if (!srv) { + return; + } + if (srv->update_thread_active) { + cbm_thread_join(&srv->update_tid); + } + if (srv->autoindex_active) { + cbm_thread_join(&srv->autoindex_tid); + } + if (srv->owns_store && srv->store) { + cbm_store_close(srv->store); + } + free(srv->current_project); + free(srv); +} + +/* ── Idle store eviction ──────────────────────────────────────── */ + +void cbm_mcp_server_evict_idle(cbm_mcp_server_t *srv, int timeout_s) { + if (!srv || !srv->store) { + return; + } + /* Protect initial in-memory stores that were never accessed via a named project. + * store_last_used stays 0 until resolve_store is called with a non-NULL project. */ + if (srv->store_last_used == 0) { + return; + } + + time_t now = time(NULL); + if ((now - srv->store_last_used) < timeout_s) { + return; + } + + if (srv->owns_store) { + cbm_store_close(srv->store); + } + srv->store = NULL; + free(srv->current_project); + srv->current_project = NULL; + srv->store_last_used = 0; +} + +bool cbm_mcp_server_has_cached_store(cbm_mcp_server_t *srv) { + return (srv && srv->store != NULL) != 0; +} + +/* ── Cache dir + project DB path helpers ───────────────────────── */ + +/* Returns the platform cache directory: ~/.cache/codebase-memory-mcp + * Writes to buf, returns buf for convenience. */ +static const char *cache_dir(char *buf, size_t bufsz) { + const char *home = cbm_home_dir(); + if (!home) { + home = "/tmp"; + } + snprintf(buf, bufsz, "%s/.cache/codebase-memory-mcp", home); + return buf; +} + +/* Returns full .db path for a project: /.db */ +static const char *project_db_path(const char *project, char *buf, size_t bufsz) { + char dir[1024]; + cache_dir(dir, sizeof(dir)); + snprintf(buf, bufsz, "%s/%s.db", dir, project); + return buf; +} + +/* ── Store resolution ──────────────────────────────────────────── */ + +/* Open the right project's .db file for query tools. + * Caches the connection — reopens only when project changes. + * Tracks last-access time so the event loop can evict idle stores. */ +static cbm_store_t *resolve_store(cbm_mcp_server_t *srv, const char *project) { + if (!project) { + return srv->store; /* no project specified → use whatever's open */ + } + + srv->store_last_used = time(NULL); + + /* Already open for this project? */ + if (srv->current_project && strcmp(srv->current_project, project) == 0 && srv->store) { + return srv->store; + } + + /* Close old store */ + if (srv->owns_store && srv->store) { + cbm_store_close(srv->store); + srv->store = NULL; + } + + /* Open project's .db file */ + char path[1024]; + project_db_path(project, path, sizeof(path)); + srv->store = cbm_store_open_path(path); + srv->owns_store = true; + free(srv->current_project); + srv->current_project = heap_strdup(project); + + return srv->store; +} + +/* Bail with empty JSON result when no store is available. */ +#define REQUIRE_STORE(store, project) \ + do { \ + if (!(store)) { \ + free(project); \ + return cbm_mcp_text_result("{\"error\":\"no project loaded\"}", true); \ + } \ + } while (0) + +/* ── Tool handler implementations ─────────────────────────────── */ + +/* list_projects: scan cache directory for .db files. + * Each project is a single .db file — no central registry needed. */ +static char *handle_list_projects(cbm_mcp_server_t *srv, const char *args) { + (void)srv; + (void)args; + + char dir_path[1024]; + cache_dir(dir_path, sizeof(dir_path)); + + cbm_dir_t *d = cbm_opendir(dir_path); + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + yyjson_mut_val *arr = yyjson_mut_arr(doc); + + if (d) { + cbm_dirent_t *entry; + while ((entry = cbm_readdir(d)) != NULL) { + const char *name = entry->name; + size_t len = strlen(name); + + /* Must end with .db and be at least 4 chars (x.db) */ + if (len < 4 || strcmp(name + len - 3, ".db") != 0) { + continue; + } + + /* Skip temp/internal files */ + if (strncmp(name, "tmp-", 4) == 0 || strncmp(name, "_", 1) == 0 || + strncmp(name, ":memory:", 8) == 0) { + continue; + } + + /* Extract project name = filename without .db suffix */ + char project_name[1024]; + snprintf(project_name, sizeof(project_name), "%.*s", (int)(len - 3), name); + + /* Get file metadata */ + char full_path[2048]; + snprintf(full_path, sizeof(full_path), "%s/%s", dir_path, name); + struct stat st; + if (stat(full_path, &st) != 0) { + continue; + } + + /* Open briefly to get node/edge count + root_path */ + cbm_store_t *pstore = cbm_store_open_path(full_path); + int nodes = 0; + int edges = 0; + char root_path_buf[1024] = ""; + if (pstore) { + nodes = cbm_store_count_nodes(pstore, project_name); + edges = cbm_store_count_edges(pstore, project_name); + cbm_project_t proj = {0}; + if (cbm_store_get_project(pstore, project_name, &proj) == CBM_STORE_OK) { + if (proj.root_path) { + snprintf(root_path_buf, sizeof(root_path_buf), "%s", proj.root_path); + } + free((void *)proj.name); + free((void *)proj.indexed_at); + free((void *)proj.root_path); + } + cbm_store_close(pstore); + } + + yyjson_mut_val *p = yyjson_mut_obj(doc); + yyjson_mut_obj_add_strcpy(doc, p, "name", project_name); + yyjson_mut_obj_add_strcpy(doc, p, "root_path", root_path_buf); + yyjson_mut_obj_add_int(doc, p, "nodes", nodes); + yyjson_mut_obj_add_int(doc, p, "edges", edges); + yyjson_mut_obj_add_int(doc, p, "size_bytes", (int64_t)st.st_size); + yyjson_mut_arr_add_val(arr, p); + } + cbm_closedir(d); + } + + yyjson_mut_obj_add_val(doc, root, "projects", arr); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; +} + +static char *handle_get_graph_schema(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + cbm_store_t *store = resolve_store(srv, project); + REQUIRE_STORE(store, project); + + cbm_schema_info_t schema = {0}; + cbm_store_get_schema(store, project, &schema); + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_val *labels = yyjson_mut_arr(doc); + for (int i = 0; i < schema.node_label_count; i++) { + yyjson_mut_val *lbl = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, lbl, "label", schema.node_labels[i].label); + yyjson_mut_obj_add_int(doc, lbl, "count", schema.node_labels[i].count); + yyjson_mut_arr_add_val(labels, lbl); + } + yyjson_mut_obj_add_val(doc, root, "node_labels", labels); + + yyjson_mut_val *types = yyjson_mut_arr(doc); + for (int i = 0; i < schema.edge_type_count; i++) { + yyjson_mut_val *typ = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, typ, "type", schema.edge_types[i].type); + yyjson_mut_obj_add_int(doc, typ, "count", schema.edge_types[i].count); + yyjson_mut_arr_add_val(types, typ); + } + yyjson_mut_obj_add_val(doc, root, "edge_types", types); + + /* Check ADR presence */ + cbm_project_t proj_info = {0}; + if (cbm_store_get_project(store, project, &proj_info) == 0 && proj_info.root_path) { + char adr_path[4096]; + snprintf(adr_path, sizeof(adr_path), "%s/.codebase-memory/adr.md", proj_info.root_path); + struct stat adr_st; + // NOLINTNEXTLINE(readability-implicit-bool-conversion) + bool adr_exists = (stat(adr_path, &adr_st) == 0); + yyjson_mut_obj_add_bool(doc, root, "adr_present", adr_exists); + if (!adr_exists) { + yyjson_mut_obj_add_str( + doc, root, "adr_hint", + "No ADR found. Use manage_adr(mode='update') to persist architectural " + "decisions across sessions. Run get_architecture(aspects=['all']) first."); + } + cbm_project_free_fields(&proj_info); + } + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + cbm_store_schema_free(&schema); + free(project); + + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; +} + +static char *handle_search_graph(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + cbm_store_t *store = resolve_store(srv, project); + REQUIRE_STORE(store, project); + char *label = cbm_mcp_get_string_arg(args, "label"); + char *name_pattern = cbm_mcp_get_string_arg(args, "name_pattern"); + char *file_pattern = cbm_mcp_get_string_arg(args, "file_pattern"); + int limit = cbm_mcp_get_int_arg(args, "limit", 500000); + int offset = cbm_mcp_get_int_arg(args, "offset", 0); + int min_degree = cbm_mcp_get_int_arg(args, "min_degree", -1); + int max_degree = cbm_mcp_get_int_arg(args, "max_degree", -1); + + cbm_search_params_t params = { + .project = project, + .label = label, + .name_pattern = name_pattern, + .file_pattern = file_pattern, + .limit = limit, + .offset = offset, + .min_degree = min_degree, + .max_degree = max_degree, + }; + + cbm_search_output_t out = {0}; + cbm_store_search(store, ¶ms, &out); + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_int(doc, root, "total", out.total); + + yyjson_mut_val *results = yyjson_mut_arr(doc); + for (int i = 0; i < out.count; i++) { + cbm_search_result_t *sr = &out.results[i]; + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "name", sr->node.name ? sr->node.name : ""); + yyjson_mut_obj_add_str(doc, item, "qualified_name", + sr->node.qualified_name ? sr->node.qualified_name : ""); + yyjson_mut_obj_add_str(doc, item, "label", sr->node.label ? sr->node.label : ""); + yyjson_mut_obj_add_str(doc, item, "file_path", + sr->node.file_path ? sr->node.file_path : ""); + yyjson_mut_obj_add_int(doc, item, "in_degree", sr->in_degree); + yyjson_mut_obj_add_int(doc, item, "out_degree", sr->out_degree); + yyjson_mut_arr_add_val(results, item); + } + yyjson_mut_obj_add_val(doc, root, "results", results); + yyjson_mut_obj_add_bool(doc, root, "has_more", out.total > offset + out.count); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + cbm_store_search_free(&out); + + free(project); + free(label); + free(name_pattern); + free(file_pattern); + + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; +} + +static char *handle_query_graph(cbm_mcp_server_t *srv, const char *args) { + char *query = cbm_mcp_get_string_arg(args, "query"); + char *project = cbm_mcp_get_string_arg(args, "project"); + cbm_store_t *store = resolve_store(srv, project); + int max_rows = cbm_mcp_get_int_arg(args, "max_rows", 0); + + if (!query) { + free(project); + return cbm_mcp_text_result("query is required", true); + } + if (!store) { + free(project); + free(query); + return cbm_mcp_text_result("{\"error\":\"no project loaded\"}", true); + } + + cbm_cypher_result_t result = {0}; + int rc = cbm_cypher_execute(store, query, project, max_rows, &result); + + if (rc < 0) { + char *err_msg = result.error ? result.error : "query execution failed"; + char *resp = cbm_mcp_text_result(err_msg, true); + cbm_cypher_result_free(&result); + free(query); + free(project); + return resp; + } + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + /* columns */ + yyjson_mut_val *cols = yyjson_mut_arr(doc); + for (int i = 0; i < result.col_count; i++) { + yyjson_mut_arr_add_str(doc, cols, result.columns[i]); + } + yyjson_mut_obj_add_val(doc, root, "columns", cols); + + /* rows */ + yyjson_mut_val *rows = yyjson_mut_arr(doc); + for (int r = 0; r < result.row_count; r++) { + yyjson_mut_val *row = yyjson_mut_arr(doc); + for (int c = 0; c < result.col_count; c++) { + yyjson_mut_arr_add_str(doc, row, result.rows[r][c]); + } + yyjson_mut_arr_add_val(rows, row); + } + yyjson_mut_obj_add_val(doc, root, "rows", rows); + yyjson_mut_obj_add_int(doc, root, "total", result.row_count); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + cbm_cypher_result_free(&result); + free(query); + free(project); + + char *res = cbm_mcp_text_result(json, false); + free(json); + return res; +} + +static char *handle_index_status(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + cbm_store_t *store = resolve_store(srv, project); + REQUIRE_STORE(store, project); + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + if (project) { + int nodes = cbm_store_count_nodes(store, project); + int edges = cbm_store_count_edges(store, project); + yyjson_mut_obj_add_str(doc, root, "project", project); + yyjson_mut_obj_add_int(doc, root, "nodes", nodes); + yyjson_mut_obj_add_int(doc, root, "edges", edges); + yyjson_mut_obj_add_str(doc, root, "status", nodes > 0 ? "ready" : "empty"); + } else { + yyjson_mut_obj_add_str(doc, root, "status", "no_project"); + } + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + free(project); + + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; +} + +/* delete_project: just erase the .db file (and WAL/SHM). */ +static char *handle_delete_project(cbm_mcp_server_t *srv, const char *args) { + char *name = cbm_mcp_get_string_arg(args, "project_name"); + if (!name) { + return cbm_mcp_text_result("project_name is required", true); + } + + /* Close store if it's the project being deleted */ + if (srv->current_project && strcmp(srv->current_project, name) == 0) { + if (srv->owns_store && srv->store) { + cbm_store_close(srv->store); + srv->store = NULL; + } + free(srv->current_project); + srv->current_project = NULL; + } + + /* Delete the .db file + WAL/SHM */ + char path[1024]; + project_db_path(name, path, sizeof(path)); + + char wal[1024]; + char shm[1024]; + snprintf(wal, sizeof(wal), "%s-wal", path); + snprintf(shm, sizeof(shm), "%s-shm", path); + + bool exists = (access(path, F_OK) == 0); + const char *status = "not_found"; + if (exists) { + (void)cbm_unlink(path); + (void)cbm_unlink(wal); + (void)cbm_unlink(shm); + status = "deleted"; + } + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + yyjson_mut_obj_add_str(doc, root, "project", name); + yyjson_mut_obj_add_str(doc, root, "status", status); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + free(name); + + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; +} + +static char *handle_get_architecture(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + cbm_store_t *store = resolve_store(srv, project); + REQUIRE_STORE(store, project); + + cbm_schema_info_t schema = {0}; + cbm_store_get_schema(store, project, &schema); + + int node_count = cbm_store_count_nodes(store, project); + int edge_count = cbm_store_count_edges(store, project); + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + if (project) { + yyjson_mut_obj_add_str(doc, root, "project", project); + } + yyjson_mut_obj_add_int(doc, root, "total_nodes", node_count); + yyjson_mut_obj_add_int(doc, root, "total_edges", edge_count); + + /* Node label summary */ + yyjson_mut_val *labels = yyjson_mut_arr(doc); + for (int i = 0; i < schema.node_label_count; i++) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "label", schema.node_labels[i].label); + yyjson_mut_obj_add_int(doc, item, "count", schema.node_labels[i].count); + yyjson_mut_arr_add_val(labels, item); + } + yyjson_mut_obj_add_val(doc, root, "node_labels", labels); + + /* Edge type summary */ + yyjson_mut_val *types = yyjson_mut_arr(doc); + for (int i = 0; i < schema.edge_type_count; i++) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "type", schema.edge_types[i].type); + yyjson_mut_obj_add_int(doc, item, "count", schema.edge_types[i].count); + yyjson_mut_arr_add_val(types, item); + } + yyjson_mut_obj_add_val(doc, root, "edge_types", types); + + /* Relationship patterns */ + if (schema.rel_pattern_count > 0) { + yyjson_mut_val *pats = yyjson_mut_arr(doc); + for (int i = 0; i < schema.rel_pattern_count; i++) { + yyjson_mut_arr_add_str(doc, pats, schema.rel_patterns[i]); + } + yyjson_mut_obj_add_val(doc, root, "relationship_patterns", pats); + } + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + cbm_store_schema_free(&schema); + free(project); + + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; +} + +static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { + char *func_name = cbm_mcp_get_string_arg(args, "function_name"); + char *project = cbm_mcp_get_string_arg(args, "project"); + cbm_store_t *store = resolve_store(srv, project); + char *direction = cbm_mcp_get_string_arg(args, "direction"); + int depth = cbm_mcp_get_int_arg(args, "depth", 3); + + if (!func_name) { + free(project); + free(direction); + return cbm_mcp_text_result("function_name is required", true); + } + if (!store) { + free(func_name); + free(project); + free(direction); + return cbm_mcp_text_result("{\"error\":\"no project loaded\"}", true); + } + if (!direction) { + direction = heap_strdup("both"); + } + + /* Find the node by name */ + cbm_node_t *nodes = NULL; + int node_count = 0; + cbm_store_find_nodes_by_name(store, project, func_name, &nodes, &node_count); + + if (node_count == 0) { + free(func_name); + free(project); + free(direction); + cbm_store_free_nodes(nodes, 0); + return cbm_mcp_text_result("{\"error\":\"function not found\"}", true); + } + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "function", func_name); + yyjson_mut_obj_add_str(doc, root, "direction", direction); + + const char *edge_types[] = {"CALLS"}; + int edge_type_count = 1; + + /* Run BFS for each requested direction. + * IMPORTANT: yyjson_mut_obj_add_str borrows pointers — we must keep + * traversal results alive until after yy_doc_to_str serialization. */ + // NOLINTNEXTLINE(readability-implicit-bool-conversion) + bool do_outbound = strcmp(direction, "outbound") == 0 || strcmp(direction, "both") == 0; + // NOLINTNEXTLINE(readability-implicit-bool-conversion) + bool do_inbound = strcmp(direction, "inbound") == 0 || strcmp(direction, "both") == 0; + + cbm_traverse_result_t tr_out = {0}; + cbm_traverse_result_t tr_in = {0}; + + if (do_outbound) { + cbm_store_bfs(store, nodes[0].id, "outbound", edge_types, edge_type_count, depth, 100, + &tr_out); + + yyjson_mut_val *callees = yyjson_mut_arr(doc); + for (int i = 0; i < tr_out.visited_count; i++) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "name", + tr_out.visited[i].node.name ? tr_out.visited[i].node.name : ""); + yyjson_mut_obj_add_str( + doc, item, "qualified_name", + tr_out.visited[i].node.qualified_name ? tr_out.visited[i].node.qualified_name : ""); + yyjson_mut_obj_add_int(doc, item, "hop", tr_out.visited[i].hop); + yyjson_mut_arr_add_val(callees, item); + } + yyjson_mut_obj_add_val(doc, root, "callees", callees); + } + + if (do_inbound) { + cbm_store_bfs(store, nodes[0].id, "inbound", edge_types, edge_type_count, depth, 100, + &tr_in); + + yyjson_mut_val *callers = yyjson_mut_arr(doc); + for (int i = 0; i < tr_in.visited_count; i++) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "name", + tr_in.visited[i].node.name ? tr_in.visited[i].node.name : ""); + yyjson_mut_obj_add_str( + doc, item, "qualified_name", + tr_in.visited[i].node.qualified_name ? tr_in.visited[i].node.qualified_name : ""); + yyjson_mut_obj_add_int(doc, item, "hop", tr_in.visited[i].hop); + yyjson_mut_arr_add_val(callers, item); + } + yyjson_mut_obj_add_val(doc, root, "callers", callers); + } + + /* Serialize BEFORE freeing traversal results (yyjson borrows strings) */ + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + + /* Now safe to free traversal data */ + if (do_outbound) { + cbm_store_traverse_free(&tr_out); + } + if (do_inbound) { + cbm_store_traverse_free(&tr_in); + } + + cbm_store_free_nodes(nodes, node_count); + free(func_name); + free(project); + free(direction); + + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; +} + +/* ── Helper: free heap fields of a stack-allocated node ────────── */ + +static void free_node_contents(cbm_node_t *n) { + free((void *)n->project); + free((void *)n->label); + free((void *)n->name); + free((void *)n->qualified_name); + free((void *)n->file_path); + free((void *)n->properties_json); + memset(n, 0, sizeof(*n)); +} + +/* ── Helper: read lines [start, end] from a file ─────────────── */ + +static char *read_file_lines(const char *path, int start, int end) { + FILE *fp = fopen(path, "r"); + if (!fp) { + return NULL; + } + + size_t cap = 4096; + char *buf = malloc(cap); + size_t len = 0; + buf[0] = '\0'; + + char line[2048]; + int lineno = 0; + while (fgets(line, sizeof(line), fp)) { + lineno++; + if (lineno < start) { + continue; + } + if (lineno > end) { + break; + } + size_t ll = strlen(line); + while (len + ll + 1 > cap) { + cap *= 2; + buf = safe_realloc(buf, cap); + } + memcpy(buf + len, line, ll); + len += ll; + buf[len] = '\0'; + } + + (void)fclose(fp); + if (len == 0) { + free(buf); + return NULL; + } + return buf; +} + +/* ── Helper: get project root_path from store ─────────────────── */ + +static char *get_project_root(cbm_mcp_server_t *srv, const char *project) { + if (!project) { + return NULL; + } + cbm_store_t *store = resolve_store(srv, project); + if (!store) { + return NULL; + } + cbm_project_t proj = {0}; + if (cbm_store_get_project(store, project, &proj) != CBM_STORE_OK) { + return NULL; + } + char *root = heap_strdup(proj.root_path); + free((void *)proj.name); + free((void *)proj.indexed_at); + free((void *)proj.root_path); + return root; +} + +/* ── index_repository ─────────────────────────────────────────── */ + +static char *handle_index_repository(cbm_mcp_server_t *srv, const char *args) { + char *repo_path = cbm_mcp_get_string_arg(args, "repo_path"); + char *mode_str = cbm_mcp_get_string_arg(args, "mode"); + + if (!repo_path) { + free(mode_str); + return cbm_mcp_text_result("repo_path is required", true); + } + + cbm_index_mode_t mode = CBM_MODE_FULL; + if (mode_str && strcmp(mode_str, "fast") == 0) { + mode = CBM_MODE_FAST; + } + free(mode_str); + + cbm_pipeline_t *p = cbm_pipeline_new(repo_path, NULL, mode); + if (!p) { + free(repo_path); + return cbm_mcp_text_result("failed to create pipeline", true); + } + + char *project_name = heap_strdup(cbm_pipeline_project_name(p)); + + /* Pipeline builds everything in-memory, then dumps to file atomically. + * No need to close srv->store — pipeline doesn't touch the open store. */ + int rc = cbm_pipeline_run(p); + cbm_pipeline_free(p); + cbm_mem_collect(); /* return mimalloc pages to OS after large indexing */ + + /* Invalidate cached store so next query reopens the fresh database */ + if (srv->owns_store && srv->store) { + cbm_store_close(srv->store); + srv->store = NULL; + } + free(srv->current_project); + srv->current_project = NULL; + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "project", project_name); + yyjson_mut_obj_add_str(doc, root, "status", rc == 0 ? "indexed" : "error"); + + if (rc == 0) { + cbm_store_t *store = resolve_store(srv, project_name); + if (store) { + int nodes = cbm_store_count_nodes(store, project_name); + int edges = cbm_store_count_edges(store, project_name); + yyjson_mut_obj_add_int(doc, root, "nodes", nodes); + yyjson_mut_obj_add_int(doc, root, "edges", edges); + + /* Check ADR presence and suggest creation if missing */ + char adr_path[4096]; + snprintf(adr_path, sizeof(adr_path), "%s/.codebase-memory/adr.md", repo_path); + struct stat adr_st; + // NOLINTNEXTLINE(readability-implicit-bool-conversion) + bool adr_exists = (stat(adr_path, &adr_st) == 0); + yyjson_mut_obj_add_bool(doc, root, "adr_present", adr_exists); + if (!adr_exists) { + yyjson_mut_obj_add_str( + doc, root, "adr_hint", + "Project indexed. Consider creating an Architecture Decision Record: " + "explore the codebase with get_architecture(aspects=['all']), then use " + "manage_adr(mode='store') to persist architectural insights across sessions."); + } + } + } + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + free(project_name); + free(repo_path); + + char *result = cbm_mcp_text_result(json, rc != 0); + free(json); + return result; +} + +/* ── get_code_snippet ─────────────────────────────────────────── */ + +/* Copy a node from an array into a heap-allocated standalone node. */ +static void copy_node(const cbm_node_t *src, cbm_node_t *dst) { + dst->id = src->id; + dst->project = heap_strdup(src->project); + dst->label = heap_strdup(src->label); + dst->name = heap_strdup(src->name); + dst->qualified_name = heap_strdup(src->qualified_name); + dst->file_path = heap_strdup(src->file_path); + dst->start_line = src->start_line; + dst->end_line = src->end_line; + dst->properties_json = src->properties_json ? heap_strdup(src->properties_json) : NULL; +} + +/* Build a JSON suggestions response for ambiguous or fuzzy results. */ +static char *snippet_suggestions(const char *input, cbm_node_t *nodes, int count) { + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "status", "ambiguous"); + + char msg[512]; + snprintf(msg, sizeof(msg), + "%d matches for \"%s\". Pick a qualified_name from suggestions below, " + "or use search_graph(name_pattern=\"...\") to narrow results.", + count, input); + yyjson_mut_obj_add_str(doc, root, "message", msg); + + yyjson_mut_val *arr = yyjson_mut_arr(doc); + for (int i = 0; i < count; i++) { + yyjson_mut_val *s = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, s, "qualified_name", + nodes[i].qualified_name ? nodes[i].qualified_name : ""); + yyjson_mut_obj_add_str(doc, s, "name", nodes[i].name ? nodes[i].name : ""); + yyjson_mut_obj_add_str(doc, s, "label", nodes[i].label ? nodes[i].label : ""); + yyjson_mut_obj_add_str(doc, s, "file_path", nodes[i].file_path ? nodes[i].file_path : ""); + yyjson_mut_arr_append(arr, s); + } + yyjson_mut_obj_add_val(doc, root, "suggestions", arr); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; +} + +/* Build an enriched snippet response for a resolved node. */ +static char *build_snippet_response(cbm_mcp_server_t *srv, cbm_node_t *node, + const char *match_method, bool include_neighbors, + cbm_node_t *alternatives, int alt_count) { + char *root_path = get_project_root(srv, node->project); + + int start = node->start_line > 0 ? node->start_line : 1; + int end = node->end_line > start ? node->end_line : start + SNIPPET_DEFAULT_LINES; + char *source = NULL; + + /* Build absolute path and verify it's within the project root. + * Prevents path traversal via crafted file_path (e.g., "../../.ssh/id_rsa"). */ + char *abs_path = NULL; + if (root_path && node->file_path) { + size_t apsz = strlen(root_path) + strlen(node->file_path) + 2; + abs_path = malloc(apsz); + snprintf(abs_path, apsz, "%s/%s", root_path, node->file_path); + + /* Path containment: resolve symlinks/../ and verify file stays within root */ + char real_root[4096]; + char real_file[4096]; + bool path_ok = false; +#ifdef _WIN32 + if (_fullpath(real_root, root_path, sizeof(real_root)) && + _fullpath(real_file, abs_path, sizeof(real_file))) { +#else + if (realpath(root_path, real_root) && realpath(abs_path, real_file)) { +#endif + size_t root_len = strlen(real_root); + if (strncmp(real_file, real_root, root_len) == 0 && + (real_file[root_len] == '/' || real_file[root_len] == '\\' || + real_file[root_len] == '\0')) { + path_ok = true; + } + } + if (path_ok) { + source = read_file_lines(abs_path, start, end); + } + } + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root_obj = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root_obj); + + yyjson_mut_obj_add_str(doc, root_obj, "name", node->name ? node->name : ""); + yyjson_mut_obj_add_str(doc, root_obj, "qualified_name", + node->qualified_name ? node->qualified_name : ""); + yyjson_mut_obj_add_str(doc, root_obj, "label", node->label ? node->label : ""); + + const char *display_path = ""; + if (abs_path) { + display_path = abs_path; + } else if (node->file_path) { + display_path = node->file_path; + } + yyjson_mut_obj_add_str(doc, root_obj, "file_path", display_path); + yyjson_mut_obj_add_int(doc, root_obj, "start_line", start); + yyjson_mut_obj_add_int(doc, root_obj, "end_line", end); + + if (source) { + yyjson_mut_obj_add_str(doc, root_obj, "source", source); + } else { + yyjson_mut_obj_add_str(doc, root_obj, "source", "(source not available)"); + } + + /* match_method — omitted for exact matches */ + if (match_method) { + yyjson_mut_obj_add_str(doc, root_obj, "match_method", match_method); + } + + /* Enrich with node properties. + * props_doc is freed AFTER serialization since yyjson_mut_obj_add_str + * stores pointers into it (zero-copy). */ + yyjson_doc *props_doc = NULL; + if (node->properties_json && node->properties_json[0] != '\0') { + props_doc = yyjson_read(node->properties_json, strlen(node->properties_json), 0); + if (props_doc) { + yyjson_val *props_root = yyjson_doc_get_root(props_doc); + if (props_root && yyjson_is_obj(props_root)) { + yyjson_obj_iter iter; + yyjson_obj_iter_init(props_root, &iter); + yyjson_val *key; + while ((key = yyjson_obj_iter_next(&iter))) { + yyjson_val *val = yyjson_obj_iter_get_val(key); + const char *k = yyjson_get_str(key); + if (!k) { + continue; + } + if (yyjson_is_str(val)) { + yyjson_mut_obj_add_str(doc, root_obj, k, yyjson_get_str(val)); + } else if (yyjson_is_bool(val)) { + yyjson_mut_obj_add_bool(doc, root_obj, k, yyjson_get_bool(val)); + } else if (yyjson_is_int(val)) { + yyjson_mut_obj_add_int(doc, root_obj, k, yyjson_get_int(val)); + } else if (yyjson_is_real(val)) { + yyjson_mut_obj_add_real(doc, root_obj, k, yyjson_get_real(val)); + } + } + } + } + } + + /* Caller/callee counts — store already resolved by calling handler */ + cbm_store_t *store = srv->store; + int in_deg = 0; + int out_deg = 0; + cbm_store_node_degree(store, node->id, &in_deg, &out_deg); + yyjson_mut_obj_add_int(doc, root_obj, "callers", in_deg); + yyjson_mut_obj_add_int(doc, root_obj, "callees", out_deg); + + /* Include neighbor names (opt-in). + * Strings stored by yyjson reference — freed after serialization. */ + char **nb_callers = NULL; + int nb_caller_count = 0; + char **nb_callees = NULL; + int nb_callee_count = 0; + if (include_neighbors) { + cbm_store_node_neighbor_names(store, node->id, 10, &nb_callers, &nb_caller_count, + &nb_callees, &nb_callee_count); + if (nb_caller_count > 0) { + yyjson_mut_val *arr = yyjson_mut_arr(doc); + for (int i = 0; i < nb_caller_count; i++) { + yyjson_mut_arr_add_str(doc, arr, nb_callers[i]); + } + yyjson_mut_obj_add_val(doc, root_obj, "caller_names", arr); + } + if (nb_callee_count > 0) { + yyjson_mut_val *arr = yyjson_mut_arr(doc); + for (int i = 0; i < nb_callee_count; i++) { + yyjson_mut_arr_add_str(doc, arr, nb_callees[i]); + } + yyjson_mut_obj_add_val(doc, root_obj, "callee_names", arr); + } + } + + /* Alternatives (when auto-resolved from ambiguous) */ + if (alternatives && alt_count > 0) { + yyjson_mut_val *arr = yyjson_mut_arr(doc); + for (int i = 0; i < alt_count; i++) { + yyjson_mut_val *a = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, a, "qualified_name", + alternatives[i].qualified_name ? alternatives[i].qualified_name + : ""); + yyjson_mut_obj_add_str(doc, a, "file_path", + alternatives[i].file_path ? alternatives[i].file_path : ""); + yyjson_mut_arr_append(arr, a); + } + yyjson_mut_obj_add_val(doc, root_obj, "alternatives", arr); + } + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + yyjson_doc_free(props_doc); /* safe if NULL */ + for (int i = 0; i < nb_caller_count; i++) { + free(nb_callers[i]); + } + for (int i = 0; i < nb_callee_count; i++) { + free(nb_callees[i]); + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(nb_callers); + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(nb_callees); + free(root_path); + free(abs_path); + free(source); + + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; +} + +static char *handle_get_code_snippet(cbm_mcp_server_t *srv, const char *args) { + char *qn = cbm_mcp_get_string_arg(args, "qualified_name"); + char *project = cbm_mcp_get_string_arg(args, "project"); + bool include_neighbors = cbm_mcp_get_bool_arg(args, "include_neighbors"); + + if (!qn) { + free(project); + return cbm_mcp_text_result("qualified_name is required", true); + } + + cbm_store_t *store = resolve_store(srv, project); + if (!store) { + free(qn); + free(project); + return cbm_mcp_text_result("no project loaded — run index_repository first", true); + } + + /* Default to current project (same as all other tools) */ + const char *effective_project = project ? project : srv->current_project; + + /* Tier 1: Exact QN match */ + cbm_node_t node = {0}; + int rc = cbm_store_find_node_by_qn(store, effective_project, qn, &node); + if (rc == CBM_STORE_OK) { + char *result = build_snippet_response(srv, &node, NULL, include_neighbors, NULL, 0); + free_node_contents(&node); + free(qn); + free(project); + return result; + } + + /* Tier 2: Suffix match — handles partial QNs ("main.HandleRequest") + * and short names ("ProcessOrder") via LIKE '%.X'. */ + cbm_node_t *suffix_nodes = NULL; + int suffix_count = 0; + cbm_store_find_nodes_by_qn_suffix(store, effective_project, qn, &suffix_nodes, &suffix_count); + + if (suffix_count == 1) { + copy_node(&suffix_nodes[0], &node); + cbm_store_free_nodes(suffix_nodes, suffix_count); + char *result = build_snippet_response(srv, &node, "suffix", include_neighbors, NULL, 0); + free_node_contents(&node); + free(qn); + free(project); + return result; + } + + if (suffix_count > 1) { + char *result = snippet_suggestions(qn, suffix_nodes, suffix_count); + cbm_store_free_nodes(suffix_nodes, suffix_count); + free(qn); + free(project); + return result; + } + + cbm_store_free_nodes(suffix_nodes, suffix_count); + free(qn); + free(project); + + /* Nothing found — guide the caller toward search_graph */ + return cbm_mcp_text_result( + "symbol not found. Use search_graph(name_pattern=\"...\") first to discover " + "the exact qualified_name, then pass it to get_code_snippet.", + true); +} + +/* ── search_code ──────────────────────────────────────────────── */ + +static char *handle_search_code(cbm_mcp_server_t *srv, const char *args) { + char *pattern = cbm_mcp_get_string_arg(args, "pattern"); + char *project = cbm_mcp_get_string_arg(args, "project"); + char *file_pattern = cbm_mcp_get_string_arg(args, "file_pattern"); + int limit = cbm_mcp_get_int_arg(args, "limit", 500000); + bool use_regex = cbm_mcp_get_bool_arg(args, "regex"); + + if (!pattern) { + free(project); + free(file_pattern); + return cbm_mcp_text_result("pattern is required", true); + } + + char *root_path = get_project_root(srv, project); + if (!root_path) { + free(pattern); + free(project); + free(file_pattern); + return cbm_mcp_text_result("project not found or not indexed", true); + } + + /* Reject shell metacharacters in user-supplied arguments */ + if (!cbm_validate_shell_arg(root_path) || + (file_pattern && !cbm_validate_shell_arg(file_pattern))) { + free(root_path); + free(pattern); + free(project); + free(file_pattern); + return cbm_mcp_text_result("path or file_pattern contains invalid characters", true); + } + + /* Write pattern to temp file to avoid shell injection */ + char tmpfile[256]; +#ifdef _WIN32 + snprintf(tmpfile, sizeof(tmpfile), "/tmp/cbm_search_%d.pat", (int)_getpid()); +#else + snprintf(tmpfile, sizeof(tmpfile), "/tmp/cbm_search_%d.pat", (int)getpid()); +#endif + FILE *tf = fopen(tmpfile, "w"); + if (!tf) { + free(root_path); + free(pattern); + free(project); + free(file_pattern); + return cbm_mcp_text_result("search failed: temp file", true); + } + // NOLINTNEXTLINE(clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling) + (void)fprintf(tf, "%s\n", pattern); + (void)fclose(tf); + + char cmd[4096]; + // NOLINTNEXTLINE(readability-implicit-bool-conversion) + const char *flag = use_regex ? "-E" : "-F"; + if (file_pattern) { + snprintf(cmd, sizeof(cmd), "grep -rn %s --include='%s' -m %d -f '%s' '%s' 2>/dev/null", + flag, file_pattern, limit * 3, tmpfile, root_path); + } else { + snprintf(cmd, sizeof(cmd), "grep -rn %s -m %d -f '%s' '%s' 2>/dev/null", flag, limit * 3, + tmpfile, root_path); + } + + // NOLINTNEXTLINE(bugprone-command-processor,cert-env33-c) + FILE *fp = cbm_popen(cmd, "r"); + if (!fp) { + cbm_unlink(tmpfile); + free(root_path); + free(pattern); + free(project); + free(file_pattern); + return cbm_mcp_text_result("search failed", true); + } + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root_obj = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root_obj); + + yyjson_mut_val *matches = yyjson_mut_arr(doc); + char line[2048]; + int count = 0; + size_t root_len = strlen(root_path); + + while (fgets(line, sizeof(line), fp) && count < limit) { + size_t len = strlen(line); + while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r')) { + line[--len] = '\0'; + } + if (len == 0) { + continue; + } + + /* grep output: /abs/path/file:lineno:content */ + char *colon1 = strchr(line, ':'); + if (!colon1) { + continue; + } + char *colon2 = strchr(colon1 + 1, ':'); + if (!colon2) { + continue; + } + + *colon1 = '\0'; + *colon2 = '\0'; + + /* Strip root_path prefix to get relative path */ + const char *file = line; + if (strncmp(file, root_path, root_len) == 0) { + file += root_len; + if (*file == '/') { + file++; + } + } + int lineno = (int)strtol(colon1 + 1, NULL, 10); + const char *content = colon2 + 1; + + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "file", file); + yyjson_mut_obj_add_int(doc, item, "line", lineno); + yyjson_mut_obj_add_str(doc, item, "content", content); + yyjson_mut_arr_add_val(matches, item); + count++; + } + cbm_pclose(fp); + cbm_unlink(tmpfile); /* Clean up pattern file after grep is done */ + + yyjson_mut_obj_add_val(doc, root_obj, "matches", matches); + yyjson_mut_obj_add_int(doc, root_obj, "count", count); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + free(root_path); + free(pattern); + free(project); + free(file_pattern); + + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; +} + +/* ── detect_changes ───────────────────────────────────────────── */ + +static char *handle_detect_changes(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + char *base_branch = cbm_mcp_get_string_arg(args, "base_branch"); + int depth = cbm_mcp_get_int_arg(args, "depth", 2); + + if (!base_branch) { + base_branch = heap_strdup("main"); + } + + /* Reject shell metacharacters in user-supplied branch name */ + if (!cbm_validate_shell_arg(base_branch)) { + free(project); + free(base_branch); + return cbm_mcp_text_result("base_branch contains invalid characters", true); + } + + char *root_path = get_project_root(srv, project); + if (!root_path) { + free(project); + free(base_branch); + return cbm_mcp_text_result("project not found", true); + } + + if (!cbm_validate_shell_arg(root_path)) { + free(root_path); + free(project); + free(base_branch); + return cbm_mcp_text_result("project path contains invalid characters", true); + } + + /* Get changed files via git */ + char cmd[1024]; + snprintf(cmd, sizeof(cmd), + "cd '%s' && { git diff --name-only '%s'...HEAD 2>/dev/null; " + "git diff --name-only 2>/dev/null; } | sort -u", + root_path, base_branch); + + // NOLINTNEXTLINE(bugprone-command-processor,cert-env33-c) + FILE *fp = cbm_popen(cmd, "r"); + if (!fp) { + free(root_path); + free(project); + free(base_branch); + return cbm_mcp_text_result("git diff failed", true); + } + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root_obj = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root_obj); + + yyjson_mut_val *changed = yyjson_mut_arr(doc); + yyjson_mut_val *impacted = yyjson_mut_arr(doc); + + /* resolve_store already called via get_project_root above */ + cbm_store_t *store = srv->store; + + char line[1024]; + int file_count = 0; + + while (fgets(line, sizeof(line), fp)) { + size_t len = strlen(line); + while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r')) { + line[--len] = '\0'; + } + if (len == 0) { + continue; + } + + yyjson_mut_arr_add_str(doc, changed, line); + file_count++; + + /* Find symbols defined in this file */ + cbm_node_t *nodes = NULL; + int ncount = 0; + cbm_store_find_nodes_by_file(store, project, line, &nodes, &ncount); + + for (int i = 0; i < ncount; i++) { + if (nodes[i].label && strcmp(nodes[i].label, "File") != 0 && + strcmp(nodes[i].label, "Folder") != 0 && strcmp(nodes[i].label, "Project") != 0) { + yyjson_mut_val *item = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, item, "name", nodes[i].name ? nodes[i].name : ""); + yyjson_mut_obj_add_str(doc, item, "label", nodes[i].label); + yyjson_mut_obj_add_str(doc, item, "file", line); + yyjson_mut_arr_add_val(impacted, item); + } + } + cbm_store_free_nodes(nodes, ncount); + } + cbm_pclose(fp); + + yyjson_mut_obj_add_val(doc, root_obj, "changed_files", changed); + yyjson_mut_obj_add_int(doc, root_obj, "changed_count", file_count); + yyjson_mut_obj_add_val(doc, root_obj, "impacted_symbols", impacted); + yyjson_mut_obj_add_int(doc, root_obj, "depth", depth); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + free(root_path); + free(project); + free(base_branch); + + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; +} + +/* ── manage_adr ───────────────────────────────────────────────── */ + +static char *handle_manage_adr(cbm_mcp_server_t *srv, const char *args) { + char *project = cbm_mcp_get_string_arg(args, "project"); + char *mode_str = cbm_mcp_get_string_arg(args, "mode"); + char *content = cbm_mcp_get_string_arg(args, "content"); + + if (!mode_str) { + mode_str = heap_strdup("get"); + } + + char *root_path = get_project_root(srv, project); + if (!root_path) { + free(project); + free(mode_str); + free(content); + return cbm_mcp_text_result("project not found", true); + } + + char adr_dir[4096]; + snprintf(adr_dir, sizeof(adr_dir), "%s/.codebase-memory", root_path); + char adr_path[4096]; + snprintf(adr_path, sizeof(adr_path), "%s/adr.md", adr_dir); + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root_obj = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root_obj); + + if (strcmp(mode_str, "update") == 0 && content) { + /* Create dir if needed */ + cbm_mkdir(adr_dir); + FILE *fp = fopen(adr_path, "w"); + if (fp) { + (void)fputs(content, fp); + (void)fclose(fp); + yyjson_mut_obj_add_str(doc, root_obj, "status", "updated"); + } else { + yyjson_mut_obj_add_str(doc, root_obj, "status", "write_error"); + } + } else if (strcmp(mode_str, "sections") == 0) { + /* List section headers from ADR */ + FILE *fp = fopen(adr_path, "r"); + yyjson_mut_val *sections = yyjson_mut_arr(doc); + if (fp) { + char line[1024]; + while (fgets(line, sizeof(line), fp)) { + if (line[0] == '#') { + size_t len = strlen(line); + while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r')) { + line[--len] = '\0'; + } + yyjson_mut_arr_add_str(doc, sections, line); + } + } + (void)fclose(fp); + } + yyjson_mut_obj_add_val(doc, root_obj, "sections", sections); + } else { + /* get: read ADR content */ + FILE *fp = fopen(adr_path, "r"); + if (fp) { + (void)fseek(fp, 0, SEEK_END); + long sz = ftell(fp); + (void)fseek(fp, 0, SEEK_SET); + char *buf = malloc(sz + 1); + size_t n = fread(buf, 1, sz, fp); + buf[n] = '\0'; + (void)fclose(fp); + yyjson_mut_obj_add_str(doc, root_obj, "content", buf); + free(buf); + } else { + yyjson_mut_obj_add_str(doc, root_obj, "content", ""); + yyjson_mut_obj_add_str(doc, root_obj, "status", "no_adr"); + yyjson_mut_obj_add_str( + doc, root_obj, "adr_hint", + "No ADR yet. Create one with manage_adr(mode='update', " + "content='## PURPOSE\\n...\\n\\n## STACK\\n...\\n\\n## ARCHITECTURE\\n..." + "\\n\\n## PATTERNS\\n...\\n\\n## TRADEOFFS\\n...\\n\\n## PHILOSOPHY\\n...'). " + "For guided creation: explore the codebase with get_architecture, " + "then draft and store. Sections: PURPOSE, STACK, ARCHITECTURE, " + "PATTERNS, TRADEOFFS, PHILOSOPHY."); + } + } + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + free(root_path); + free(project); + free(mode_str); + free(content); + + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; +} + +/* ── ingest_traces ────────────────────────────────────────────── */ + +static char *handle_ingest_traces(cbm_mcp_server_t *srv, const char *args) { + (void)srv; + /* Parse traces array from JSON args */ + yyjson_doc *adoc = yyjson_read(args, strlen(args), 0); + int trace_count = 0; + + if (adoc) { + yyjson_val *aroot = yyjson_doc_get_root(adoc); + yyjson_val *traces = yyjson_obj_get(aroot, "traces"); + if (traces && yyjson_is_arr(traces)) { + trace_count = (int)yyjson_arr_size(traces); + } + yyjson_doc_free(adoc); + } + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "status", "accepted"); + yyjson_mut_obj_add_int(doc, root, "traces_received", trace_count); + yyjson_mut_obj_add_str(doc, root, "note", + "Runtime edge creation from traces not yet implemented"); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; +} + +/* ── Tool dispatch ────────────────────────────────────────────── */ + +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) +char *cbm_mcp_handle_tool(cbm_mcp_server_t *srv, const char *tool_name, const char *args_json) { + if (!tool_name) { + return cbm_mcp_text_result("missing tool name", true); + } + + if (strcmp(tool_name, "list_projects") == 0) { + return handle_list_projects(srv, args_json); + } + if (strcmp(tool_name, "get_graph_schema") == 0) { + return handle_get_graph_schema(srv, args_json); + } + if (strcmp(tool_name, "search_graph") == 0) { + return handle_search_graph(srv, args_json); + } + if (strcmp(tool_name, "query_graph") == 0) { + return handle_query_graph(srv, args_json); + } + if (strcmp(tool_name, "index_status") == 0) { + return handle_index_status(srv, args_json); + } + if (strcmp(tool_name, "delete_project") == 0) { + return handle_delete_project(srv, args_json); + } + if (strcmp(tool_name, "trace_call_path") == 0) { + return handle_trace_call_path(srv, args_json); + } + if (strcmp(tool_name, "get_architecture") == 0) { + return handle_get_architecture(srv, args_json); + } + + /* Pipeline-dependent tools */ + if (strcmp(tool_name, "index_repository") == 0) { + return handle_index_repository(srv, args_json); + } + if (strcmp(tool_name, "get_code_snippet") == 0) { + return handle_get_code_snippet(srv, args_json); + } + if (strcmp(tool_name, "search_code") == 0) { + return handle_search_code(srv, args_json); + } + if (strcmp(tool_name, "detect_changes") == 0) { + return handle_detect_changes(srv, args_json); + } + if (strcmp(tool_name, "manage_adr") == 0) { + return handle_manage_adr(srv, args_json); + } + if (strcmp(tool_name, "ingest_traces") == 0) { + return handle_ingest_traces(srv, args_json); + } + + char msg[256]; + snprintf(msg, sizeof(msg), "unknown tool: %s", tool_name); + return cbm_mcp_text_result(msg, true); +} + +/* ── Session detection + auto-index ────────────────────────────── */ + +/* Detect session root from CWD (fallback: single indexed project from DB). */ +static void detect_session(cbm_mcp_server_t *srv) { + if (srv->session_detected) { + return; + } + srv->session_detected = true; + + /* 1. Try CWD */ + char cwd[1024]; + if (getcwd(cwd, sizeof(cwd)) != NULL) { + const char *home = cbm_home_dir(); + /* Skip useless roots: / and $HOME */ + if (strcmp(cwd, "/") != 0 && (home == NULL || strcmp(cwd, home) != 0)) { + snprintf(srv->session_root, sizeof(srv->session_root), "%s", cwd); + cbm_log_info("session.root.cwd", "path", cwd); + } + } + + /* Derive project name from path */ + if (srv->session_root[0]) { + /* Use last two path components joined by dash, matching Go's ProjectNameFromPath */ + const char *p = srv->session_root; + const char *last_slash = strrchr(p, '/'); + if (last_slash && last_slash > p) { + const char *prev = last_slash - 1; + while (prev > p && *prev != '/') { + prev--; + } + if (*prev == '/') { + prev++; + } + snprintf(srv->session_project, sizeof(srv->session_project), "%.*s", + (int)(strlen(p) - (size_t)(prev - p)), prev); + /* Replace / with - */ + for (char *c = srv->session_project; *c; c++) { + if (*c == '/') { + *c = '-'; + } + } + } else { + snprintf(srv->session_project, sizeof(srv->session_project), "%s", + last_slash ? last_slash + 1 : p); + } + } +} + +/* Background auto-index thread function */ +static void *autoindex_thread(void *arg) { + cbm_mcp_server_t *srv = (cbm_mcp_server_t *)arg; + + cbm_log_info("autoindex.start", "project", srv->session_project, "path", srv->session_root); + + cbm_pipeline_t *p = cbm_pipeline_new(srv->session_root, NULL, CBM_MODE_FULL); + if (!p) { + cbm_log_warn("autoindex.err", "msg", "pipeline_create_failed"); + return NULL; + } + + int rc = cbm_pipeline_run(p); + cbm_pipeline_free(p); + cbm_mem_collect(); /* return mimalloc pages to OS after indexing */ + + if (rc == 0) { + cbm_log_info("autoindex.done", "project", srv->session_project); + /* Register with watcher for ongoing change detection */ + if (srv->watcher) { + cbm_watcher_watch(srv->watcher, srv->session_project, srv->session_root); + } + } else { + cbm_log_warn("autoindex.err", "msg", "pipeline_run_failed"); + } + return NULL; +} + +/* Start auto-indexing if configured and project not yet indexed. */ +static void maybe_auto_index(cbm_mcp_server_t *srv) { + if (srv->session_root[0] == '\0') { + return; /* no session root detected */ + } + + /* Check if project already has a DB */ + const char *home = cbm_home_dir(); + if (home) { + char db_check[1024]; + snprintf(db_check, sizeof(db_check), "%s/.cache/codebase-memory-mcp/%s.db", home, + srv->session_project); + struct stat st; + if (stat(db_check, &st) == 0) { + /* Already indexed → register watcher for change detection */ + cbm_log_info("autoindex.skip", "reason", "already_indexed", "project", + srv->session_project); + if (srv->watcher) { + cbm_watcher_watch(srv->watcher, srv->session_project, srv->session_root); + } + return; + } + } + +/* Default file limit for auto-indexing new projects */ +#define DEFAULT_AUTO_INDEX_LIMIT 50000 + + /* Check auto_index config */ + bool auto_index = false; + int file_limit = DEFAULT_AUTO_INDEX_LIMIT; + if (srv->config) { + auto_index = cbm_config_get_bool(srv->config, CBM_CONFIG_AUTO_INDEX, false); + file_limit = + cbm_config_get_int(srv->config, CBM_CONFIG_AUTO_INDEX_LIMIT, DEFAULT_AUTO_INDEX_LIMIT); + } + + if (!auto_index) { + cbm_log_info("autoindex.skip", "reason", "disabled", "hint", + "run: codebase-memory-mcp config set auto_index true"); + return; + } + + /* Quick file count check to avoid OOM on massive repos */ + if (!cbm_validate_shell_arg(srv->session_root)) { + cbm_log_warn("autoindex.skip", "reason", "path contains shell metacharacters"); + return; + } + char cmd[1024]; + snprintf(cmd, sizeof(cmd), "git -C '%s' ls-files 2>/dev/null | wc -l", srv->session_root); + // NOLINTNEXTLINE(bugprone-command-processor,cert-env33-c) + FILE *fp = cbm_popen(cmd, "r"); + if (fp) { + char line[64]; + if (fgets(line, sizeof(line), fp)) { + int count = (int)strtol(line, NULL, 10); + if (count > file_limit) { + cbm_log_warn("autoindex.skip", "reason", "too_many_files", "files", line, "limit", + CBM_CONFIG_AUTO_INDEX_LIMIT); + cbm_pclose(fp); + return; + } + } + cbm_pclose(fp); + } + + /* Launch auto-index in background */ + if (cbm_thread_create(&srv->autoindex_tid, 0, autoindex_thread, srv) == 0) { + srv->autoindex_active = true; + } +} + +/* ── Background update check ──────────────────────────────────── */ + +#define UPDATE_CHECK_URL "https://api.github.com/repos/DeusData/codebase-memory-mcp/releases/latest" + +static void *update_check_thread(void *arg) { + cbm_mcp_server_t *srv = (cbm_mcp_server_t *)arg; + + /* Use curl with 5s timeout to fetch latest release tag */ + FILE *fp = cbm_popen("curl -sf --max-time 5 -H 'Accept: application/vnd.github+json' " + "'" UPDATE_CHECK_URL "' 2>/dev/null", + "r"); + if (!fp) { + srv->update_checked = true; + return NULL; + } + + char buf[4096]; + size_t total = 0; + while (total < sizeof(buf) - 1) { + size_t n = fread(buf + total, 1, sizeof(buf) - 1 - total, fp); + if (n == 0) { + break; + } + total += n; + } + buf[total] = '\0'; + cbm_pclose(fp); + + /* Parse tag_name from JSON response */ + yyjson_doc *doc = yyjson_read(buf, total, 0); + if (!doc) { + srv->update_checked = true; + return NULL; + } + + yyjson_val *root = yyjson_doc_get_root(doc); + yyjson_val *tag = yyjson_obj_get(root, "tag_name"); + const char *tag_str = yyjson_get_str(tag); + + if (tag_str) { + const char *current = cbm_cli_get_version(); + if (cbm_compare_versions(tag_str, current) > 0) { + snprintf(srv->update_notice, sizeof(srv->update_notice), + "Update available: %s -> %s -- run: codebase-memory-mcp update", current, + tag_str); + cbm_log_info("update.available", "current", current, "latest", tag_str); + } + } + + yyjson_doc_free(doc); + srv->update_checked = true; + return NULL; +} + +static void start_update_check(cbm_mcp_server_t *srv) { + if (srv->update_checked) { + return; + } + srv->update_checked = true; /* prevent double-launch */ + if (cbm_thread_create(&srv->update_tid, 0, update_check_thread, srv) == 0) { + srv->update_thread_active = true; + } +} + +/* Prepend update notice to a tool result, then clear it (one-shot). */ +static char *inject_update_notice(cbm_mcp_server_t *srv, char *result_json) { + if (srv->update_notice[0] == '\0') { + return result_json; + } + + /* Parse existing result, prepend notice text, rebuild */ + yyjson_doc *doc = yyjson_read(result_json, strlen(result_json), 0); + if (!doc) { + return result_json; + } + + yyjson_mut_doc *mdoc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_val_mut_copy(mdoc, yyjson_doc_get_root(doc)); + yyjson_doc_free(doc); + if (!root) { + yyjson_mut_doc_free(mdoc); + return result_json; + } + yyjson_mut_doc_set_root(mdoc, root); + + /* Find the "content" array */ + yyjson_mut_val *content = yyjson_mut_obj_get(root, "content"); + if (content && yyjson_mut_is_arr(content)) { + /* Prepend a text content item with the update notice */ + yyjson_mut_val *notice_item = yyjson_mut_obj(mdoc); + yyjson_mut_obj_add_str(mdoc, notice_item, "type", "text"); + yyjson_mut_obj_add_str(mdoc, notice_item, "text", srv->update_notice); + yyjson_mut_arr_prepend(content, notice_item); + } + + size_t len; + char *new_json = yyjson_mut_write(mdoc, YYJSON_WRITE_ALLOW_INVALID_UNICODE, &len); + yyjson_mut_doc_free(mdoc); + + if (new_json) { + free(result_json); + srv->update_notice[0] = '\0'; /* clear — one-shot */ + return new_json; + } + return result_json; +} + +/* ── Server request handler ───────────────────────────────────── */ + +char *cbm_mcp_server_handle(cbm_mcp_server_t *srv, const char *line) { + cbm_jsonrpc_request_t req = {0}; + if (cbm_jsonrpc_parse(line, &req) < 0) { + return cbm_jsonrpc_format_error(0, JSONRPC_PARSE_ERROR, "Parse error"); + } + + /* Notifications (no id) → no response */ + if (!req.has_id) { + cbm_jsonrpc_request_free(&req); + return NULL; + } + + char *result_json = NULL; + + if (strcmp(req.method, "initialize") == 0) { + result_json = cbm_mcp_initialize_response(req.params_raw); + start_update_check(srv); + detect_session(srv); + maybe_auto_index(srv); + } else if (strcmp(req.method, "tools/list") == 0) { + result_json = cbm_mcp_tools_list(); + } else if (strcmp(req.method, "tools/call") == 0) { + char *tool_name = req.params_raw ? cbm_mcp_get_tool_name(req.params_raw) : NULL; + char *tool_args = + req.params_raw ? cbm_mcp_get_arguments(req.params_raw) : heap_strdup("{}"); + + result_json = cbm_mcp_handle_tool(srv, tool_name, tool_args); + result_json = inject_update_notice(srv, result_json); + free(tool_name); + free(tool_args); + } else { + char *err = cbm_jsonrpc_format_error(req.id, JSONRPC_METHOD_NOT_FOUND, "Method not found"); + cbm_jsonrpc_request_free(&req); + return err; + } + + cbm_jsonrpc_response_t resp = { + .id = req.id, + .result_json = result_json, + }; + char *out = cbm_jsonrpc_format_response(&resp); + free(result_json); + cbm_jsonrpc_request_free(&req); + return out; +} + +/* ── Event loop ───────────────────────────────────────────────── */ + +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) +int cbm_mcp_server_run(cbm_mcp_server_t *srv, FILE *in, FILE *out) { + char *line = NULL; + size_t cap = 0; + int fd = cbm_fileno(in); + + for (;;) { + /* Poll with idle timeout so we can evict unused stores between requests. + * MCP is request-response (one line at a time), so mixing poll() on the + * raw fd with getline() on the buffered FILE* is safe in practice. */ +#ifdef _WIN32 + /* Windows: WaitForSingleObject on stdin handle */ + HANDLE hStdin = (HANDLE)_get_osfhandle(fd); + DWORD wr = WaitForSingleObject(hStdin, STORE_IDLE_TIMEOUT_S * 1000); + if (wr == WAIT_FAILED) { + break; + } + if (wr == WAIT_TIMEOUT) { + cbm_mcp_server_evict_idle(srv, STORE_IDLE_TIMEOUT_S); + continue; + } +#else + struct pollfd pfd = {.fd = fd, .events = POLLIN}; + int pr = poll(&pfd, 1, STORE_IDLE_TIMEOUT_S * 1000); + + if (pr < 0) { + break; /* error or signal */ + } + if (pr == 0) { + /* Timeout — evict idle store to free resources */ + cbm_mcp_server_evict_idle(srv, STORE_IDLE_TIMEOUT_S); + continue; + } +#endif + + if (cbm_getline(&line, &cap, in) <= 0) { + break; + } + + /* Trim trailing newline/CR */ + size_t len = strlen(line); + while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r')) { + line[--len] = '\0'; + } + if (len == 0) { + continue; + } + + /* Content-Length framing support (LSP-style transport). + * Some MCP clients (OpenCode, VS Code extensions) send: + * Content-Length: \r\n\r\n + * instead of bare JSONL. Detect the header, read the payload, + * and respond with the same framing. */ + if (strncmp(line, "Content-Length:", 15) == 0) { + int content_len = (int)strtol(line + 15, NULL, 10); + if (content_len <= 0 || content_len > 10 * 1024 * 1024) { + continue; /* invalid or too large */ + } + + /* Skip blank line(s) between header and body */ + while (cbm_getline(&line, &cap, in) > 0) { + size_t hlen = strlen(line); + while (hlen > 0 && (line[hlen - 1] == '\n' || line[hlen - 1] == '\r')) { + line[--hlen] = '\0'; + } + if (hlen == 0) { + break; /* found the blank separator */ + } + /* Skip other headers (e.g. Content-Type) */ + } + + /* Read exact content_len bytes */ + char *body = malloc((size_t)content_len + 1); + if (!body) { + continue; + } + size_t nread = fread(body, 1, (size_t)content_len, in); + body[nread] = '\0'; + + char *resp = cbm_mcp_server_handle(srv, body); + free(body); + + if (resp) { + size_t rlen = strlen(resp); + (void)fprintf(out, "Content-Length: %zu\r\n\r\n%s", rlen, resp); + (void)fflush(out); + free(resp); + } + continue; + } + + char *resp = cbm_mcp_server_handle(srv, line); + if (resp) { + // NOLINTNEXTLINE(clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling) + (void)fprintf(out, "%s\n", resp); + (void)fflush(out); + free(resp); + } + } + + free(line); + return 0; +} + +/* ── cbm_parse_file_uri ──────────────────────────────────────── */ + +bool cbm_parse_file_uri(const char *uri, char *out_path, int out_size) { + if (!uri || !out_path || out_size <= 0) { + if (out_path && out_size > 0) { + out_path[0] = '\0'; + } + return false; + } + + /* Must start with file:// */ + if (strncmp(uri, "file://", 7) != 0) { + out_path[0] = '\0'; + return false; + } + + const char *path = uri + 7; + + /* On Windows, file:///C:/path → /C:/path. Strip leading / before drive letter. */ + if (path[0] == '/' && path[1] && + ((path[1] >= 'A' && path[1] <= 'Z') || (path[1] >= 'a' && path[1] <= 'z')) && + path[2] == ':') { + path++; /* skip the leading / */ + } + + snprintf(out_path, out_size, "%s", path); + return true; +} diff --git a/src/pipeline/pipeline.c b/src/pipeline/pipeline.c index f5b7510..30e4308 100644 --- a/src/pipeline/pipeline.c +++ b/src/pipeline/pipeline.c @@ -1,763 +1,761 @@ -/* - * pipeline.c — Indexing pipeline orchestrator. - * - * Coordinates multi-pass indexing: - * 1. Discover files - * 2. Build structure (Project/Folder/Package/File nodes) - * 3. Bulk load sources (read + LZ4 HC compress) - * 4. Extract definitions (fused: extract + write nodes + build registry) - * 5. Resolve imports, calls, usages, semantic edges - * 6. Post-passes: tests, communities, HTTP links, git history - * 7. Dump graph buffer to SQLite - */ -#include "pipeline/pipeline.h" -#include "pipeline/pipeline_internal.h" -// NOLINTNEXTLINE(misc-include-cleaner) — worker_pool.h included for interface contract -#include "pipeline/worker_pool.h" -#include "graph_buffer/graph_buffer.h" -#include "store/store.h" -#include "discover/discover.h" -#include "foundation/platform.h" -#include "foundation/compat_fs.h" -#include "foundation/log.h" -#include "foundation/hash_table.h" -#include "foundation/compat.h" -#include "foundation/compat_thread.h" - -#include -#include -#include -#include -#include -#include -#include - -/* ── Internal state ──────────────────────────────────────────────── */ - -struct cbm_pipeline { - char *repo_path; - char *db_path; - char *project_name; - cbm_index_mode_t mode; - atomic_int cancelled; - - /* Indexing state (set during run) */ - cbm_gbuf_t *gbuf; - cbm_registry_t *registry; -}; - -/* ── Timing helper ──────────────────────────────────────────────── */ - -static double elapsed_ms(struct timespec start) { - struct timespec now; - // NOLINTNEXTLINE(misc-include-cleaner) — cbm_clock_gettime provided by standard header - cbm_clock_gettime(CLOCK_MONOTONIC, &now); - return ((double)(now.tv_sec - start.tv_sec) * CBM_MS_PER_SEC) + - ((double)(now.tv_nsec - start.tv_nsec) / CBM_US_PER_SEC_F); -} - -/* Format int to string for logging. Thread-safe via TLS rotating buffers. */ -static const char *itoa_buf(int val) { - static CBM_TLS char bufs[4][32]; - static CBM_TLS int idx = 0; - int i = idx; - idx = (idx + 1) & 3; - snprintf(bufs[i], sizeof(bufs[i]), "%d", val); - return bufs[i]; -} - -/* ── Lifecycle ──────────────────────────────────────────────────── */ - -cbm_pipeline_t *cbm_pipeline_new(const char *repo_path, const char *db_path, - cbm_index_mode_t mode) { - if (!repo_path) { - return NULL; - } - - cbm_pipeline_t *p = calloc(1, sizeof(cbm_pipeline_t)); - if (!p) { - return NULL; - } - - // NOLINTNEXTLINE(misc-include-cleaner) — strdup provided by standard header - p->repo_path = strdup(repo_path); - p->db_path = db_path ? strdup(db_path) : NULL; - p->project_name = cbm_project_name_from_path(repo_path); - p->mode = mode; - atomic_init(&p->cancelled, 0); - - return p; -} - -void cbm_pipeline_free(cbm_pipeline_t *p) { - if (!p) { - return; - } - free(p->repo_path); - free(p->db_path); - free(p->project_name); - /* gbuf, store, registry freed during/after run */ - free(p); -} - -void cbm_pipeline_cancel(cbm_pipeline_t *p) { - if (p) { - atomic_store(&p->cancelled, 1); - } -} - -const char *cbm_pipeline_project_name(const cbm_pipeline_t *p) { - return p ? p->project_name : NULL; -} - -const char *cbm_pipeline_repo_path(const cbm_pipeline_t *p) { - return p ? p->repo_path : NULL; -} - -atomic_int *cbm_pipeline_cancelled_ptr(cbm_pipeline_t *p) { - return p ? &p->cancelled : NULL; -} - -/* Resolve the DB path for this pipeline. Caller must free(). */ -static char *resolve_db_path(const cbm_pipeline_t *p) { - char *path = malloc(1024); - if (!path) { - return NULL; - } - if (p->db_path) { - snprintf(path, 1024, "%s", p->db_path); - } else { - // NOLINTNEXTLINE(concurrency-mt-unsafe) - const char *home = getenv("HOME"); - if (!home) { - home = "/tmp"; - } - snprintf(path, 1024, "%s/.cache/codebase-memory-mcp/%s.db", home, p->project_name); - } - return path; -} - -static int check_cancel(const cbm_pipeline_t *p) { - return atomic_load(&p->cancelled) ? -1 : 0; -} - -/* ── Hash table cleanup callback ─────────────────────────────────── */ - -// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) -static void free_seen_dir_key(const char *key, void *val, void *ud) { - (void)val; - (void)ud; - free((void *)key); -} - -/* ── Pass 1: Structure ──────────────────────────────────────────── */ - -/* Create Project, Folder/Package, and File nodes in the graph buffer. */ -static int pass_structure(cbm_pipeline_t *p, const cbm_file_info_t *files, int file_count) { - cbm_log_info("pass.start", "pass", "structure", "files", itoa_buf(file_count)); - - /* Project node */ - cbm_gbuf_upsert_node(p->gbuf, "Project", p->project_name, p->project_name, NULL, 0, 0, "{}"); - - /* Collect unique directories and create Folder/Package nodes */ - CBMHashTable *seen_dirs = cbm_ht_create(256); - - for (int i = 0; i < file_count; i++) { - const char *rel = files[i].rel_path; - if (!rel) { - continue; - } - - /* Create File node */ - char *file_qn = cbm_pipeline_fqn_compute(p->project_name, rel, "__file__"); - /* Extract basename */ - const char *slash = strrchr(rel, '/'); - const char *basename = slash ? slash + 1 : rel; - - char props[256]; - const char *ext = strrchr(basename, '.'); - snprintf(props, sizeof(props), "{\"extension\":\"%s\"}", ext ? ext : ""); - - const char *qualified_name = file_qn; - const char *file_path = rel; - cbm_gbuf_upsert_node(p->gbuf, "File", basename, qualified_name, file_path, 0, 0, props); - - /* CONTAINS_FILE edge: parent dir -> file */ - char *dir = strdup(rel); - char *last_slash = strrchr(dir, '/'); - if (last_slash) { - { - *last_slash = '\0'; - } - } else { - free(dir); - dir = strdup(""); - } - - const char *parent_qn; - char *parent_qn_heap = NULL; - if (dir[0] == '\0') { - parent_qn = p->project_name; - } else { - parent_qn_heap = cbm_pipeline_fqn_folder(p->project_name, dir); - parent_qn = parent_qn_heap; - } - - /* Walk up directory chain, creating Folder nodes */ - char *walk_dir = strdup(dir); - while (walk_dir[0] != '\0' && !cbm_ht_get(seen_dirs, walk_dir)) { - cbm_ht_set(seen_dirs, strdup(walk_dir), (void *)1); - - char *folder_qn = cbm_pipeline_fqn_folder(p->project_name, walk_dir); - const char *dir_base = strrchr(walk_dir, '/'); - dir_base = dir_base ? dir_base + 1 : walk_dir; - - cbm_gbuf_upsert_node(p->gbuf, "Folder", dir_base, folder_qn, walk_dir, 0, 0, "{}"); - - /* CONTAINS_FOLDER edge: parent -> this dir */ - char *pdir = strdup(walk_dir); - char *ps = strrchr(pdir, '/'); - if (ps) { - { - *ps = '\0'; - } - } else { - free(pdir); - pdir = strdup(""); - } - - const char *pqn; - char *pqn_heap = NULL; - if (pdir[0] == '\0') { - pqn = p->project_name; - } else { - pqn_heap = cbm_pipeline_fqn_folder(p->project_name, pdir); - pqn = pqn_heap; - } - - const cbm_gbuf_node_t *fn = cbm_gbuf_find_by_qn(p->gbuf, folder_qn); - const cbm_gbuf_node_t *pn = cbm_gbuf_find_by_qn(p->gbuf, pqn); - if (fn && pn) { - cbm_gbuf_insert_edge(p->gbuf, pn->id, fn->id, "CONTAINS_FOLDER", "{}"); - } - - free(folder_qn); - free(pqn_heap); - - /* Move up one level */ - char *up = strrchr(walk_dir, '/'); - if (up) { - *up = '\0'; - } else { - walk_dir[0] = '\0'; - } - free(pdir); - } - - /* Now create the CONTAINS_FILE edge */ - const cbm_gbuf_node_t *fnode = cbm_gbuf_find_by_qn(p->gbuf, file_qn); - const cbm_gbuf_node_t *pnode = cbm_gbuf_find_by_qn(p->gbuf, parent_qn); - if (fnode && pnode) { - cbm_gbuf_insert_edge(p->gbuf, pnode->id, fnode->id, "CONTAINS_FILE", "{}"); - } - - free(file_qn); - free(dir); - free(walk_dir); - free(parent_qn_heap); - } - - /* Free seen_dirs keys */ - cbm_ht_foreach(seen_dirs, free_seen_dir_key, NULL); - cbm_ht_free(seen_dirs); - - cbm_log_info("pass.done", "pass", "structure", "nodes", itoa_buf(cbm_gbuf_node_count(p->gbuf)), - "edges", itoa_buf(cbm_gbuf_edge_count(p->gbuf))); - return 0; -} - -/* ── Pass 2: Definitions ─────────────────────────────────────────── */ - -/* Implemented in pass_definitions.c via cbm_pipeline_pass_definitions() */ - -/* ── Githistory compute thread (for fused post-pass parallelism) ─── */ - -typedef struct { - const char *repo_path; - cbm_githistory_result_t *result; -} gh_compute_arg_t; - -static void *gh_compute_thread_fn(void *arg) { - gh_compute_arg_t *a = arg; - cbm_pipeline_githistory_compute(a->repo_path, a->result); - return NULL; -} - -/* ── Pipeline run ────────────────────────────────────────────────── */ - -int cbm_pipeline_run(cbm_pipeline_t *p) { - if (!p) { - return -1; - } - - struct timespec t0; - cbm_clock_gettime(CLOCK_MONOTONIC, &t0); - - /* Phase 1: Discover files */ - cbm_discover_opts_t opts = { - .mode = p->mode, - .ignore_file = NULL, - .max_file_size = 0, - }; - cbm_file_info_t *files = NULL; - int file_count = 0; - int rc = cbm_discover(p->repo_path, &opts, &files, &file_count); - if (rc != 0) { - cbm_log_error("pipeline.err", "phase", "discover", "rc", itoa_buf(rc)); - return -1; - } - cbm_log_info("pipeline.discover", "files", itoa_buf(file_count), "elapsed_ms", - itoa_buf((int)elapsed_ms(t0))); - - if (check_cancel(p)) { - cbm_discover_free(files, file_count); - return -1; - } - - /* Check for existing DB with file hashes → incremental path */ - { - char *db_path = resolve_db_path(p); - if (db_path) { - struct stat db_st; - if (stat(db_path, &db_st) == 0) { - /* DB exists — check if it has file hashes */ - cbm_store_t *check_store = cbm_store_open_path(db_path); - if (check_store) { - cbm_file_hash_t *hashes = NULL; - int hash_count = 0; - cbm_store_get_file_hashes(check_store, p->project_name, &hashes, &hash_count); - cbm_store_free_file_hashes(hashes, hash_count); - cbm_store_close(check_store); - - if (hash_count > 0) { - cbm_log_info("pipeline.route", "path", "incremental", "stored_hashes", - itoa_buf(hash_count)); - rc = cbm_pipeline_run_incremental(p, db_path, files, file_count); - cbm_discover_free(files, file_count); - free(db_path); - return rc; - } - } - } - free(db_path); - } - } - cbm_log_info("pipeline.route", "path", "full"); - - /* Phase 2: Create graph buffer and registry */ - p->gbuf = cbm_gbuf_new(p->project_name, p->repo_path); - p->registry = cbm_registry_new(); - - /* Phase 3: Run passes */ - struct timespec t; - - /* Build shared context for pass functions */ - cbm_pipeline_ctx_t ctx = { - .project_name = p->project_name, - .repo_path = p->repo_path, - .gbuf = p->gbuf, - .registry = p->registry, - .cancelled = &p->cancelled, - }; - - cbm_clock_gettime(CLOCK_MONOTONIC, &t); - rc = pass_structure(p, files, file_count); - if (rc != 0) { // cppcheck-suppress knownConditionTrueFalse - goto cleanup; - } - cbm_log_info("pass.timing", "pass", "structure", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); - if (check_cancel(p)) { - rc = -1; - goto cleanup; - } - - /* Decide: parallel or sequential pipeline */ - int worker_count = cbm_default_worker_count(true); -#define MIN_FILES_FOR_PARALLEL 50 - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - bool use_parallel = (worker_count > 1 && file_count > MIN_FILES_FOR_PARALLEL); - - if (use_parallel) { - cbm_log_info("pipeline.mode", "mode", "parallel", "workers", itoa_buf(worker_count), - "files", itoa_buf(file_count)); - - /* Shared atomic ID source — workers allocate globally unique IDs */ - // NOLINTNEXTLINE(misc-include-cleaner) — int64_t provided by standard header - _Atomic int64_t shared_ids; - int64_t gbuf_next = cbm_gbuf_next_id(p->gbuf); - atomic_init(&shared_ids, gbuf_next); - - /* Allocate result cache: one CBMFileResult* per file */ - // NOLINTNEXTLINE(misc-include-cleaner) - CBMFileResult **result_cache = - (CBMFileResult **)calloc(file_count, sizeof(CBMFileResult *)); - if (!result_cache) { - cbm_log_error("pipeline.err", "phase", "cache_alloc"); - rc = -1; - goto cleanup; - } - - /* Allocate prescan cache: HTTP sites + config refs extracted during - * extraction phase while source is in memory. Eliminates all disk - * re-reads in httplinks (2M+ reads) and configlink (62K+ reads). */ - cbm_prescan_t *prescan_cache = calloc(file_count, sizeof(cbm_prescan_t)); - ctx.prescan_cache = prescan_cache; - ctx.prescan_count = file_count; - - /* Build path → file_idx map for prescan lookup by rel_path */ - CBMHashTable *prescan_map = cbm_ht_create(0); - for (int i = 0; i < file_count; i++) { - cbm_ht_set(prescan_map, files[i].rel_path, (void *)((intptr_t)i + 1)); - } - ctx.prescan_path_map = prescan_map; - - /* Phase 3A: Parallel extract + definition nodes */ - cbm_clock_gettime(CLOCK_MONOTONIC, &t); - rc = cbm_parallel_extract(&ctx, files, file_count, result_cache, &shared_ids, worker_count); - cbm_log_info("pass.timing", "pass", "parallel_extract", "elapsed_ms", - itoa_buf((int)elapsed_ms(t))); - if (rc != 0) { - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(result_cache); - goto cleanup; - } - if (check_cancel(p)) { - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(result_cache); - rc = -1; - goto cleanup; - } - - /* Sync gbuf ID counter after merge */ - cbm_gbuf_set_next_id(p->gbuf, atomic_load(&shared_ids)); - - /* Phase 3B: Serial registry build + DEFINES/IMPORTS edges */ - cbm_clock_gettime(CLOCK_MONOTONIC, &t); - rc = cbm_build_registry_from_cache(&ctx, files, file_count, result_cache); - cbm_log_info("pass.timing", "pass", "registry_build", "elapsed_ms", - itoa_buf((int)elapsed_ms(t))); - if (rc != 0) { - for (int i = 0; i < file_count; i++) { - if (result_cache[i]) { - // NOLINTNEXTLINE(misc-include-cleaner) - cbm_free_result(result_cache[i]); - } - } - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(result_cache); - goto cleanup; - } - if (check_cancel(p)) { - for (int i = 0; i < file_count; i++) { - if (result_cache[i]) { - cbm_free_result(result_cache[i]); - } - } - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(result_cache); - rc = -1; - goto cleanup; - } - - /* Phase 4: Parallel resolution (calls + usages + semantic, fused) */ - cbm_clock_gettime(CLOCK_MONOTONIC, &t); - rc = cbm_parallel_resolve(&ctx, files, file_count, result_cache, &shared_ids, worker_count); - cbm_log_info("pass.timing", "pass", "parallel_resolve", "elapsed_ms", - itoa_buf((int)elapsed_ms(t))); - - /* Sync gbuf ID counter after resolve merge */ - cbm_gbuf_set_next_id(p->gbuf, atomic_load(&shared_ids)); - - /* Free cached extraction results */ - for (int i = 0; i < file_count; i++) { - if (result_cache[i]) { - cbm_free_result(result_cache[i]); - } - } - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(result_cache); - - if (rc != 0) { - goto cleanup; - } - if (check_cancel(p)) { - rc = -1; - goto cleanup; - } - } else { - cbm_log_info("pipeline.mode", "mode", "sequential", "files", itoa_buf(file_count)); - - /* Allocate result cache: pass_definitions stores results for reuse - * by pass_calls/usages/semantic, avoiding 3x redundant file I/O + parsing */ - // NOLINTNEXTLINE(misc-include-cleaner) - CBMFileResult **seq_cache = (CBMFileResult **)calloc(file_count, sizeof(CBMFileResult *)); - if (seq_cache) { - ctx.result_cache = seq_cache; - } - - /* Sequential fallback: original 4-pass chain */ - cbm_clock_gettime(CLOCK_MONOTONIC, &t); - rc = cbm_pipeline_pass_definitions(&ctx, files, file_count); - if (rc != 0) { - goto seq_cleanup; - } - cbm_log_info("pass.timing", "pass", "definitions", "elapsed_ms", - itoa_buf((int)elapsed_ms(t))); - if (check_cancel(p)) { - rc = -1; - goto seq_cleanup; - } - - cbm_clock_gettime(CLOCK_MONOTONIC, &t); - rc = cbm_pipeline_pass_calls(&ctx, files, file_count); - if (rc != 0) { - goto seq_cleanup; - } - cbm_log_info("pass.timing", "pass", "calls", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); - if (check_cancel(p)) { - rc = -1; - goto seq_cleanup; - } - - cbm_clock_gettime(CLOCK_MONOTONIC, &t); - rc = cbm_pipeline_pass_usages(&ctx, files, file_count); - if (rc != 0) { - goto seq_cleanup; - } - cbm_log_info("pass.timing", "pass", "usages", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); - if (check_cancel(p)) { - rc = -1; - goto seq_cleanup; - } - - cbm_clock_gettime(CLOCK_MONOTONIC, &t); - rc = cbm_pipeline_pass_semantic(&ctx, files, file_count); - if (rc != 0) { - goto seq_cleanup; - } - cbm_log_info("pass.timing", "pass", "semantic", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); - if (check_cancel(p)) { - rc = -1; - goto seq_cleanup; - } - - seq_cleanup: - /* Free cached extraction results */ - if (seq_cache) { - for (int i = 0; i < file_count; i++) { - if (seq_cache[i]) { - cbm_free_result(seq_cache[i]); - } - } - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(seq_cache); - ctx.result_cache = NULL; - } - if (rc != 0) { - goto cleanup; - } - } - - /* Post-extraction passes (shared by both parallel and sequential) */ - cbm_clock_gettime(CLOCK_MONOTONIC, &t); - rc = cbm_pipeline_pass_tests(&ctx, files, file_count); - if (rc != 0) { - goto cleanup; - } - cbm_log_info("pass.timing", "pass", "tests", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); - if (check_cancel(p)) { - rc = -1; - goto cleanup; - } - - /* ── Fused post-passes: githistory (I/O) + httplinks (CPU) in parallel ── */ - { - struct timespec t_gh; - struct timespec t_hl; - cbm_clock_gettime(CLOCK_MONOTONIC, &t_gh); - cbm_clock_gettime(CLOCK_MONOTONIC, &t_hl); - - cbm_githistory_result_t gh_result = {0}; - cbm_thread_t gh_thread; - bool gh_threaded = false; - gh_compute_arg_t gh_arg = {.repo_path = ctx.repo_path, .result = &gh_result}; - - /* Skip githistory entirely in fast mode */ - if (p->mode != CBM_MODE_FAST) { - - /* Only parallelize if we have multiple cores */ - if (cbm_default_worker_count(true) > 1) { - if (cbm_thread_create(&gh_thread, 0, gh_compute_thread_fn, &gh_arg) == 0) { - gh_threaded = true; - } - } - - /* If threading failed or single-core, run githistory serially first */ - if (!gh_threaded) { - cbm_pipeline_githistory_compute(ctx.repo_path, &gh_result); - cbm_log_info("pass.timing", "pass", "githistory_compute", "elapsed_ms", - itoa_buf((int)elapsed_ms(t_gh))); - } - } else { - cbm_log_info("pass.skip", "pass", "githistory", "reason", "fast_mode"); - } - - /* Run httplinks on main thread (CPU-bound) */ - rc = cbm_pipeline_pass_httplinks(&ctx); - cbm_log_info("pass.timing", "pass", "httplinks", "elapsed_ms", - itoa_buf((int)elapsed_ms(t_hl))); - - /* Wait for githistory thread to complete */ - if (gh_threaded) { - cbm_thread_join(&gh_thread); - cbm_log_info("pass.timing", "pass", "githistory_compute", "elapsed_ms", - itoa_buf((int)elapsed_ms(t_gh))); - } - - if (rc != 0) { - free(gh_result.couplings); - goto cleanup; - } - if (check_cancel(p)) { - free(gh_result.couplings); - rc = -1; - goto cleanup; - } - - /* Apply githistory edges (serial, writes to gbuf) */ - int gh_edges = 0; - if (gh_result.count > 0) { - gh_edges = cbm_pipeline_githistory_apply(&ctx, &gh_result); - } - cbm_log_info("pass.done", "pass", "githistory", "commits", itoa_buf(gh_result.commit_count), - "edges", itoa_buf(gh_edges)); - free(gh_result.couplings); - } - - /* Pre-dump passes (operate on graph buffer, not store) */ - if (!check_cancel(p)) { - cbm_clock_gettime(CLOCK_MONOTONIC, &t); - cbm_pipeline_pass_decorator_tags(p->gbuf, p->project_name); - cbm_log_info("pass.timing", "pass", "decorator_tags", "elapsed_ms", - itoa_buf((int)elapsed_ms(t))); - } - - if (!check_cancel(p)) { - cbm_clock_gettime(CLOCK_MONOTONIC, &t); - cbm_pipeline_pass_configlink(&ctx); - cbm_log_info("pass.timing", "pass", "configlink", "elapsed_ms", - itoa_buf((int)elapsed_ms(t))); - } - - /* Free prescan cache — no longer needed after httplinks + configlink */ - if (ctx.prescan_cache) { - for (int i = 0; i < ctx.prescan_count; i++) { - free(ctx.prescan_cache[i].http_sites); - free(ctx.prescan_cache[i].config_refs); - free(ctx.prescan_cache[i].routes); - } - free(ctx.prescan_cache); - ctx.prescan_cache = NULL; - } - if (ctx.prescan_path_map) { - cbm_ht_free(ctx.prescan_path_map); - ctx.prescan_path_map = NULL; - } - - /* Direct dump: construct B-tree pages in C, fwrite() to .db file. - * Zero SQLite library involvement — cbm_write_db() builds the binary - * format directly from flat arrays. Atomic: writes .tmp then renames. */ - if (!check_cancel(p)) { - cbm_clock_gettime(CLOCK_MONOTONIC, &t); - - // NOLINTNEXTLINE(concurrency-mt-unsafe) — called once during single-threaded dump - const char *home = getenv("HOME"); - char db_path[1024]; - if (p->db_path) { - snprintf(db_path, sizeof(db_path), "%s", p->db_path); - } else { - if (!home) { - home = "/tmp"; - } - snprintf(db_path, sizeof(db_path), "%s/.cache/codebase-memory-mcp/%s.db", home, - p->project_name); - } - - /* Ensure parent directory exists (e.g. ~/.cache/codebase-memory-mcp/) */ - char db_dir[1024]; - snprintf(db_dir, sizeof(db_dir), "%s", db_path); - char *last_slash = strrchr(db_dir, '/'); - if (last_slash) { - *last_slash = '\0'; - cbm_mkdir_p(db_dir, 0755); - } - - rc = cbm_gbuf_dump_to_sqlite(p->gbuf, db_path); - if (rc != 0) { - cbm_log_error("pipeline.err", "phase", "dump"); - goto cleanup; - } - cbm_log_info("pass.timing", "pass", "dump", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); - - /* Persist file hashes so next run can use incremental path */ - cbm_store_t *hash_store = cbm_store_open_path(db_path); - if (hash_store) { - cbm_store_delete_file_hashes(hash_store, p->project_name); - for (int i = 0; i < file_count; i++) { - struct stat fst; - if (stat(files[i].path, &fst) == 0) { - int64_t mtime_ns; -#ifdef __APPLE__ - mtime_ns = ((int64_t)fst.st_mtimespec.tv_sec * 1000000000LL) + - (int64_t)fst.st_mtimespec.tv_nsec; -#elif defined(_WIN32) - mtime_ns = (int64_t)fst.st_mtime * 1000000000LL; -#else - mtime_ns = - ((int64_t)fst.st_mtim.tv_sec * 1000000000LL) + (int64_t)fst.st_mtim.tv_nsec; -#endif - cbm_store_upsert_file_hash(hash_store, p->project_name, files[i].rel_path, "", - mtime_ns, fst.st_size); - } - } - cbm_store_close(hash_store); - cbm_log_info("pass.timing", "pass", "persist_hashes", "files", itoa_buf(file_count)); - } - } - - cbm_log_info("pipeline.done", "nodes", itoa_buf(cbm_gbuf_node_count(p->gbuf)), "edges", - itoa_buf(cbm_gbuf_edge_count(p->gbuf)), "elapsed_ms", - itoa_buf((int)elapsed_ms(t0))); - -cleanup: - /* Free prescan if not already freed */ - if (ctx.prescan_cache) { - for (int i = 0; i < ctx.prescan_count; i++) { - free(ctx.prescan_cache[i].http_sites); - free(ctx.prescan_cache[i].config_refs); - free(ctx.prescan_cache[i].routes); - } - free(ctx.prescan_cache); - ctx.prescan_cache = NULL; - } - if (ctx.prescan_path_map) { - cbm_ht_free(ctx.prescan_path_map); - ctx.prescan_path_map = NULL; - } - cbm_discover_free(files, file_count); - cbm_gbuf_free(p->gbuf); - p->gbuf = NULL; - cbm_registry_free(p->registry); - p->registry = NULL; - return rc; -} +/* + * pipeline.c — Indexing pipeline orchestrator. + * + * Coordinates multi-pass indexing: + * 1. Discover files + * 2. Build structure (Project/Folder/Package/File nodes) + * 3. Bulk load sources (read + LZ4 HC compress) + * 4. Extract definitions (fused: extract + write nodes + build registry) + * 5. Resolve imports, calls, usages, semantic edges + * 6. Post-passes: tests, communities, HTTP links, git history + * 7. Dump graph buffer to SQLite + */ +#include "pipeline/pipeline.h" +#include "pipeline/pipeline_internal.h" +// NOLINTNEXTLINE(misc-include-cleaner) — worker_pool.h included for interface contract +#include "pipeline/worker_pool.h" +#include "graph_buffer/graph_buffer.h" +#include "store/store.h" +#include "discover/discover.h" +#include "foundation/platform.h" +#include "foundation/compat_fs.h" +#include "foundation/log.h" +#include "foundation/hash_table.h" +#include "foundation/compat.h" +#include "foundation/compat_thread.h" + +#include +#include +#include +#include +#include +#include +#include + +/* ── Internal state ──────────────────────────────────────────────── */ + +struct cbm_pipeline { + char *repo_path; + char *db_path; + char *project_name; + cbm_index_mode_t mode; + atomic_int cancelled; + + /* Indexing state (set during run) */ + cbm_gbuf_t *gbuf; + cbm_registry_t *registry; +}; + +/* ── Timing helper ──────────────────────────────────────────────── */ + +static double elapsed_ms(struct timespec start) { + struct timespec now; + // NOLINTNEXTLINE(misc-include-cleaner) — cbm_clock_gettime provided by standard header + cbm_clock_gettime(CLOCK_MONOTONIC, &now); + return ((double)(now.tv_sec - start.tv_sec) * CBM_MS_PER_SEC) + + ((double)(now.tv_nsec - start.tv_nsec) / CBM_US_PER_SEC_F); +} + +/* Format int to string for logging. Thread-safe via TLS rotating buffers. */ +static const char *itoa_buf(int val) { + static CBM_TLS char bufs[4][32]; + static CBM_TLS int idx = 0; + int i = idx; + idx = (idx + 1) & 3; + snprintf(bufs[i], sizeof(bufs[i]), "%d", val); + return bufs[i]; +} + +/* ── Lifecycle ──────────────────────────────────────────────────── */ + +cbm_pipeline_t *cbm_pipeline_new(const char *repo_path, const char *db_path, + cbm_index_mode_t mode) { + if (!repo_path) { + return NULL; + } + + cbm_pipeline_t *p = calloc(1, sizeof(cbm_pipeline_t)); + if (!p) { + return NULL; + } + + // NOLINTNEXTLINE(misc-include-cleaner) — strdup provided by standard header + p->repo_path = strdup(repo_path); + p->db_path = db_path ? strdup(db_path) : NULL; + p->project_name = cbm_project_name_from_path(repo_path); + p->mode = mode; + atomic_init(&p->cancelled, 0); + + return p; +} + +void cbm_pipeline_free(cbm_pipeline_t *p) { + if (!p) { + return; + } + free(p->repo_path); + free(p->db_path); + free(p->project_name); + /* gbuf, store, registry freed during/after run */ + free(p); +} + +void cbm_pipeline_cancel(cbm_pipeline_t *p) { + if (p) { + atomic_store(&p->cancelled, 1); + } +} + +const char *cbm_pipeline_project_name(const cbm_pipeline_t *p) { + return p ? p->project_name : NULL; +} + +const char *cbm_pipeline_repo_path(const cbm_pipeline_t *p) { + return p ? p->repo_path : NULL; +} + +atomic_int *cbm_pipeline_cancelled_ptr(cbm_pipeline_t *p) { + return p ? &p->cancelled : NULL; +} + +/* Resolve the DB path for this pipeline. Caller must free(). */ +static char *resolve_db_path(const cbm_pipeline_t *p) { + char *path = malloc(1024); + if (!path) { + return NULL; + } + if (p->db_path) { + snprintf(path, 1024, "%s", p->db_path); + } else { + const char *home = cbm_home_dir(); + if (!home) { + home = "/tmp"; + } + snprintf(path, 1024, "%s/.cache/codebase-memory-mcp/%s.db", home, p->project_name); + } + return path; +} + +static int check_cancel(const cbm_pipeline_t *p) { + return atomic_load(&p->cancelled) ? -1 : 0; +} + +/* ── Hash table cleanup callback ─────────────────────────────────── */ + +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) +static void free_seen_dir_key(const char *key, void *val, void *ud) { + (void)val; + (void)ud; + free((void *)key); +} + +/* ── Pass 1: Structure ──────────────────────────────────────────── */ + +/* Create Project, Folder/Package, and File nodes in the graph buffer. */ +static int pass_structure(cbm_pipeline_t *p, const cbm_file_info_t *files, int file_count) { + cbm_log_info("pass.start", "pass", "structure", "files", itoa_buf(file_count)); + + /* Project node */ + cbm_gbuf_upsert_node(p->gbuf, "Project", p->project_name, p->project_name, NULL, 0, 0, "{}"); + + /* Collect unique directories and create Folder/Package nodes */ + CBMHashTable *seen_dirs = cbm_ht_create(256); + + for (int i = 0; i < file_count; i++) { + const char *rel = files[i].rel_path; + if (!rel) { + continue; + } + + /* Create File node */ + char *file_qn = cbm_pipeline_fqn_compute(p->project_name, rel, "__file__"); + /* Extract basename */ + const char *slash = strrchr(rel, '/'); + const char *basename = slash ? slash + 1 : rel; + + char props[256]; + const char *ext = strrchr(basename, '.'); + snprintf(props, sizeof(props), "{\"extension\":\"%s\"}", ext ? ext : ""); + + const char *qualified_name = file_qn; + const char *file_path = rel; + cbm_gbuf_upsert_node(p->gbuf, "File", basename, qualified_name, file_path, 0, 0, props); + + /* CONTAINS_FILE edge: parent dir -> file */ + char *dir = strdup(rel); + char *last_slash = strrchr(dir, '/'); + if (last_slash) { + { + *last_slash = '\0'; + } + } else { + free(dir); + dir = strdup(""); + } + + const char *parent_qn; + char *parent_qn_heap = NULL; + if (dir[0] == '\0') { + parent_qn = p->project_name; + } else { + parent_qn_heap = cbm_pipeline_fqn_folder(p->project_name, dir); + parent_qn = parent_qn_heap; + } + + /* Walk up directory chain, creating Folder nodes */ + char *walk_dir = strdup(dir); + while (walk_dir[0] != '\0' && !cbm_ht_get(seen_dirs, walk_dir)) { + cbm_ht_set(seen_dirs, strdup(walk_dir), (void *)1); + + char *folder_qn = cbm_pipeline_fqn_folder(p->project_name, walk_dir); + const char *dir_base = strrchr(walk_dir, '/'); + dir_base = dir_base ? dir_base + 1 : walk_dir; + + cbm_gbuf_upsert_node(p->gbuf, "Folder", dir_base, folder_qn, walk_dir, 0, 0, "{}"); + + /* CONTAINS_FOLDER edge: parent -> this dir */ + char *pdir = strdup(walk_dir); + char *ps = strrchr(pdir, '/'); + if (ps) { + { + *ps = '\0'; + } + } else { + free(pdir); + pdir = strdup(""); + } + + const char *pqn; + char *pqn_heap = NULL; + if (pdir[0] == '\0') { + pqn = p->project_name; + } else { + pqn_heap = cbm_pipeline_fqn_folder(p->project_name, pdir); + pqn = pqn_heap; + } + + const cbm_gbuf_node_t *fn = cbm_gbuf_find_by_qn(p->gbuf, folder_qn); + const cbm_gbuf_node_t *pn = cbm_gbuf_find_by_qn(p->gbuf, pqn); + if (fn && pn) { + cbm_gbuf_insert_edge(p->gbuf, pn->id, fn->id, "CONTAINS_FOLDER", "{}"); + } + + free(folder_qn); + free(pqn_heap); + + /* Move up one level */ + char *up = strrchr(walk_dir, '/'); + if (up) { + *up = '\0'; + } else { + walk_dir[0] = '\0'; + } + free(pdir); + } + + /* Now create the CONTAINS_FILE edge */ + const cbm_gbuf_node_t *fnode = cbm_gbuf_find_by_qn(p->gbuf, file_qn); + const cbm_gbuf_node_t *pnode = cbm_gbuf_find_by_qn(p->gbuf, parent_qn); + if (fnode && pnode) { + cbm_gbuf_insert_edge(p->gbuf, pnode->id, fnode->id, "CONTAINS_FILE", "{}"); + } + + free(file_qn); + free(dir); + free(walk_dir); + free(parent_qn_heap); + } + + /* Free seen_dirs keys */ + cbm_ht_foreach(seen_dirs, free_seen_dir_key, NULL); + cbm_ht_free(seen_dirs); + + cbm_log_info("pass.done", "pass", "structure", "nodes", itoa_buf(cbm_gbuf_node_count(p->gbuf)), + "edges", itoa_buf(cbm_gbuf_edge_count(p->gbuf))); + return 0; +} + +/* ── Pass 2: Definitions ─────────────────────────────────────────── */ + +/* Implemented in pass_definitions.c via cbm_pipeline_pass_definitions() */ + +/* ── Githistory compute thread (for fused post-pass parallelism) ─── */ + +typedef struct { + const char *repo_path; + cbm_githistory_result_t *result; +} gh_compute_arg_t; + +static void *gh_compute_thread_fn(void *arg) { + gh_compute_arg_t *a = arg; + cbm_pipeline_githistory_compute(a->repo_path, a->result); + return NULL; +} + +/* ── Pipeline run ────────────────────────────────────────────────── */ + +int cbm_pipeline_run(cbm_pipeline_t *p) { + if (!p) { + return -1; + } + + struct timespec t0; + cbm_clock_gettime(CLOCK_MONOTONIC, &t0); + + /* Phase 1: Discover files */ + cbm_discover_opts_t opts = { + .mode = p->mode, + .ignore_file = NULL, + .max_file_size = 0, + }; + cbm_file_info_t *files = NULL; + int file_count = 0; + int rc = cbm_discover(p->repo_path, &opts, &files, &file_count); + if (rc != 0) { + cbm_log_error("pipeline.err", "phase", "discover", "rc", itoa_buf(rc)); + return -1; + } + cbm_log_info("pipeline.discover", "files", itoa_buf(file_count), "elapsed_ms", + itoa_buf((int)elapsed_ms(t0))); + + if (check_cancel(p)) { + cbm_discover_free(files, file_count); + return -1; + } + + /* Check for existing DB with file hashes → incremental path */ + { + char *db_path = resolve_db_path(p); + if (db_path) { + struct stat db_st; + if (stat(db_path, &db_st) == 0) { + /* DB exists — check if it has file hashes */ + cbm_store_t *check_store = cbm_store_open_path(db_path); + if (check_store) { + cbm_file_hash_t *hashes = NULL; + int hash_count = 0; + cbm_store_get_file_hashes(check_store, p->project_name, &hashes, &hash_count); + cbm_store_free_file_hashes(hashes, hash_count); + cbm_store_close(check_store); + + if (hash_count > 0) { + cbm_log_info("pipeline.route", "path", "incremental", "stored_hashes", + itoa_buf(hash_count)); + rc = cbm_pipeline_run_incremental(p, db_path, files, file_count); + cbm_discover_free(files, file_count); + free(db_path); + return rc; + } + } + } + free(db_path); + } + } + cbm_log_info("pipeline.route", "path", "full"); + + /* Phase 2: Create graph buffer and registry */ + p->gbuf = cbm_gbuf_new(p->project_name, p->repo_path); + p->registry = cbm_registry_new(); + + /* Phase 3: Run passes */ + struct timespec t; + + /* Build shared context for pass functions */ + cbm_pipeline_ctx_t ctx = { + .project_name = p->project_name, + .repo_path = p->repo_path, + .gbuf = p->gbuf, + .registry = p->registry, + .cancelled = &p->cancelled, + }; + + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + rc = pass_structure(p, files, file_count); + if (rc != 0) { // cppcheck-suppress knownConditionTrueFalse + goto cleanup; + } + cbm_log_info("pass.timing", "pass", "structure", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); + if (check_cancel(p)) { + rc = -1; + goto cleanup; + } + + /* Decide: parallel or sequential pipeline */ + int worker_count = cbm_default_worker_count(true); +#define MIN_FILES_FOR_PARALLEL 50 + // NOLINTNEXTLINE(readability-implicit-bool-conversion) + bool use_parallel = (worker_count > 1 && file_count > MIN_FILES_FOR_PARALLEL); + + if (use_parallel) { + cbm_log_info("pipeline.mode", "mode", "parallel", "workers", itoa_buf(worker_count), + "files", itoa_buf(file_count)); + + /* Shared atomic ID source — workers allocate globally unique IDs */ + // NOLINTNEXTLINE(misc-include-cleaner) — int64_t provided by standard header + _Atomic int64_t shared_ids; + int64_t gbuf_next = cbm_gbuf_next_id(p->gbuf); + atomic_init(&shared_ids, gbuf_next); + + /* Allocate result cache: one CBMFileResult* per file */ + // NOLINTNEXTLINE(misc-include-cleaner) + CBMFileResult **result_cache = + (CBMFileResult **)calloc(file_count, sizeof(CBMFileResult *)); + if (!result_cache) { + cbm_log_error("pipeline.err", "phase", "cache_alloc"); + rc = -1; + goto cleanup; + } + + /* Allocate prescan cache: HTTP sites + config refs extracted during + * extraction phase while source is in memory. Eliminates all disk + * re-reads in httplinks (2M+ reads) and configlink (62K+ reads). */ + cbm_prescan_t *prescan_cache = calloc(file_count, sizeof(cbm_prescan_t)); + ctx.prescan_cache = prescan_cache; + ctx.prescan_count = file_count; + + /* Build path → file_idx map for prescan lookup by rel_path */ + CBMHashTable *prescan_map = cbm_ht_create(0); + for (int i = 0; i < file_count; i++) { + cbm_ht_set(prescan_map, files[i].rel_path, (void *)((intptr_t)i + 1)); + } + ctx.prescan_path_map = prescan_map; + + /* Phase 3A: Parallel extract + definition nodes */ + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + rc = cbm_parallel_extract(&ctx, files, file_count, result_cache, &shared_ids, worker_count); + cbm_log_info("pass.timing", "pass", "parallel_extract", "elapsed_ms", + itoa_buf((int)elapsed_ms(t))); + if (rc != 0) { + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(result_cache); + goto cleanup; + } + if (check_cancel(p)) { + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(result_cache); + rc = -1; + goto cleanup; + } + + /* Sync gbuf ID counter after merge */ + cbm_gbuf_set_next_id(p->gbuf, atomic_load(&shared_ids)); + + /* Phase 3B: Serial registry build + DEFINES/IMPORTS edges */ + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + rc = cbm_build_registry_from_cache(&ctx, files, file_count, result_cache); + cbm_log_info("pass.timing", "pass", "registry_build", "elapsed_ms", + itoa_buf((int)elapsed_ms(t))); + if (rc != 0) { + for (int i = 0; i < file_count; i++) { + if (result_cache[i]) { + // NOLINTNEXTLINE(misc-include-cleaner) + cbm_free_result(result_cache[i]); + } + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(result_cache); + goto cleanup; + } + if (check_cancel(p)) { + for (int i = 0; i < file_count; i++) { + if (result_cache[i]) { + cbm_free_result(result_cache[i]); + } + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(result_cache); + rc = -1; + goto cleanup; + } + + /* Phase 4: Parallel resolution (calls + usages + semantic, fused) */ + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + rc = cbm_parallel_resolve(&ctx, files, file_count, result_cache, &shared_ids, worker_count); + cbm_log_info("pass.timing", "pass", "parallel_resolve", "elapsed_ms", + itoa_buf((int)elapsed_ms(t))); + + /* Sync gbuf ID counter after resolve merge */ + cbm_gbuf_set_next_id(p->gbuf, atomic_load(&shared_ids)); + + /* Free cached extraction results */ + for (int i = 0; i < file_count; i++) { + if (result_cache[i]) { + cbm_free_result(result_cache[i]); + } + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(result_cache); + + if (rc != 0) { + goto cleanup; + } + if (check_cancel(p)) { + rc = -1; + goto cleanup; + } + } else { + cbm_log_info("pipeline.mode", "mode", "sequential", "files", itoa_buf(file_count)); + + /* Allocate result cache: pass_definitions stores results for reuse + * by pass_calls/usages/semantic, avoiding 3x redundant file I/O + parsing */ + // NOLINTNEXTLINE(misc-include-cleaner) + CBMFileResult **seq_cache = (CBMFileResult **)calloc(file_count, sizeof(CBMFileResult *)); + if (seq_cache) { + ctx.result_cache = seq_cache; + } + + /* Sequential fallback: original 4-pass chain */ + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + rc = cbm_pipeline_pass_definitions(&ctx, files, file_count); + if (rc != 0) { + goto seq_cleanup; + } + cbm_log_info("pass.timing", "pass", "definitions", "elapsed_ms", + itoa_buf((int)elapsed_ms(t))); + if (check_cancel(p)) { + rc = -1; + goto seq_cleanup; + } + + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + rc = cbm_pipeline_pass_calls(&ctx, files, file_count); + if (rc != 0) { + goto seq_cleanup; + } + cbm_log_info("pass.timing", "pass", "calls", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); + if (check_cancel(p)) { + rc = -1; + goto seq_cleanup; + } + + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + rc = cbm_pipeline_pass_usages(&ctx, files, file_count); + if (rc != 0) { + goto seq_cleanup; + } + cbm_log_info("pass.timing", "pass", "usages", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); + if (check_cancel(p)) { + rc = -1; + goto seq_cleanup; + } + + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + rc = cbm_pipeline_pass_semantic(&ctx, files, file_count); + if (rc != 0) { + goto seq_cleanup; + } + cbm_log_info("pass.timing", "pass", "semantic", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); + if (check_cancel(p)) { + rc = -1; + goto seq_cleanup; + } + + seq_cleanup: + /* Free cached extraction results */ + if (seq_cache) { + for (int i = 0; i < file_count; i++) { + if (seq_cache[i]) { + cbm_free_result(seq_cache[i]); + } + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(seq_cache); + ctx.result_cache = NULL; + } + if (rc != 0) { + goto cleanup; + } + } + + /* Post-extraction passes (shared by both parallel and sequential) */ + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + rc = cbm_pipeline_pass_tests(&ctx, files, file_count); + if (rc != 0) { + goto cleanup; + } + cbm_log_info("pass.timing", "pass", "tests", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); + if (check_cancel(p)) { + rc = -1; + goto cleanup; + } + + /* ── Fused post-passes: githistory (I/O) + httplinks (CPU) in parallel ── */ + { + struct timespec t_gh; + struct timespec t_hl; + cbm_clock_gettime(CLOCK_MONOTONIC, &t_gh); + cbm_clock_gettime(CLOCK_MONOTONIC, &t_hl); + + cbm_githistory_result_t gh_result = {0}; + cbm_thread_t gh_thread; + bool gh_threaded = false; + gh_compute_arg_t gh_arg = {.repo_path = ctx.repo_path, .result = &gh_result}; + + /* Skip githistory entirely in fast mode */ + if (p->mode != CBM_MODE_FAST) { + + /* Only parallelize if we have multiple cores */ + if (cbm_default_worker_count(true) > 1) { + if (cbm_thread_create(&gh_thread, 0, gh_compute_thread_fn, &gh_arg) == 0) { + gh_threaded = true; + } + } + + /* If threading failed or single-core, run githistory serially first */ + if (!gh_threaded) { + cbm_pipeline_githistory_compute(ctx.repo_path, &gh_result); + cbm_log_info("pass.timing", "pass", "githistory_compute", "elapsed_ms", + itoa_buf((int)elapsed_ms(t_gh))); + } + } else { + cbm_log_info("pass.skip", "pass", "githistory", "reason", "fast_mode"); + } + + /* Run httplinks on main thread (CPU-bound) */ + rc = cbm_pipeline_pass_httplinks(&ctx); + cbm_log_info("pass.timing", "pass", "httplinks", "elapsed_ms", + itoa_buf((int)elapsed_ms(t_hl))); + + /* Wait for githistory thread to complete */ + if (gh_threaded) { + cbm_thread_join(&gh_thread); + cbm_log_info("pass.timing", "pass", "githistory_compute", "elapsed_ms", + itoa_buf((int)elapsed_ms(t_gh))); + } + + if (rc != 0) { + free(gh_result.couplings); + goto cleanup; + } + if (check_cancel(p)) { + free(gh_result.couplings); + rc = -1; + goto cleanup; + } + + /* Apply githistory edges (serial, writes to gbuf) */ + int gh_edges = 0; + if (gh_result.count > 0) { + gh_edges = cbm_pipeline_githistory_apply(&ctx, &gh_result); + } + cbm_log_info("pass.done", "pass", "githistory", "commits", itoa_buf(gh_result.commit_count), + "edges", itoa_buf(gh_edges)); + free(gh_result.couplings); + } + + /* Pre-dump passes (operate on graph buffer, not store) */ + if (!check_cancel(p)) { + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + cbm_pipeline_pass_decorator_tags(p->gbuf, p->project_name); + cbm_log_info("pass.timing", "pass", "decorator_tags", "elapsed_ms", + itoa_buf((int)elapsed_ms(t))); + } + + if (!check_cancel(p)) { + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + cbm_pipeline_pass_configlink(&ctx); + cbm_log_info("pass.timing", "pass", "configlink", "elapsed_ms", + itoa_buf((int)elapsed_ms(t))); + } + + /* Free prescan cache — no longer needed after httplinks + configlink */ + if (ctx.prescan_cache) { + for (int i = 0; i < ctx.prescan_count; i++) { + free(ctx.prescan_cache[i].http_sites); + free(ctx.prescan_cache[i].config_refs); + free(ctx.prescan_cache[i].routes); + } + free(ctx.prescan_cache); + ctx.prescan_cache = NULL; + } + if (ctx.prescan_path_map) { + cbm_ht_free(ctx.prescan_path_map); + ctx.prescan_path_map = NULL; + } + + /* Direct dump: construct B-tree pages in C, fwrite() to .db file. + * Zero SQLite library involvement — cbm_write_db() builds the binary + * format directly from flat arrays. Atomic: writes .tmp then renames. */ + if (!check_cancel(p)) { + cbm_clock_gettime(CLOCK_MONOTONIC, &t); + + const char *home = cbm_home_dir(); + char db_path[1024]; + if (p->db_path) { + snprintf(db_path, sizeof(db_path), "%s", p->db_path); + } else { + if (!home) { + home = "/tmp"; + } + snprintf(db_path, sizeof(db_path), "%s/.cache/codebase-memory-mcp/%s.db", home, + p->project_name); + } + + /* Ensure parent directory exists (e.g. ~/.cache/codebase-memory-mcp/) */ + char db_dir[1024]; + snprintf(db_dir, sizeof(db_dir), "%s", db_path); + char *last_slash = strrchr(db_dir, '/'); + if (last_slash) { + *last_slash = '\0'; + cbm_mkdir_p(db_dir, 0755); + } + + rc = cbm_gbuf_dump_to_sqlite(p->gbuf, db_path); + if (rc != 0) { + cbm_log_error("pipeline.err", "phase", "dump"); + goto cleanup; + } + cbm_log_info("pass.timing", "pass", "dump", "elapsed_ms", itoa_buf((int)elapsed_ms(t))); + + /* Persist file hashes so next run can use incremental path */ + cbm_store_t *hash_store = cbm_store_open_path(db_path); + if (hash_store) { + cbm_store_delete_file_hashes(hash_store, p->project_name); + for (int i = 0; i < file_count; i++) { + struct stat fst; + if (stat(files[i].path, &fst) == 0) { + int64_t mtime_ns; +#ifdef __APPLE__ + mtime_ns = ((int64_t)fst.st_mtimespec.tv_sec * 1000000000LL) + + (int64_t)fst.st_mtimespec.tv_nsec; +#elif defined(_WIN32) + mtime_ns = (int64_t)fst.st_mtime * 1000000000LL; +#else + mtime_ns = + ((int64_t)fst.st_mtim.tv_sec * 1000000000LL) + (int64_t)fst.st_mtim.tv_nsec; +#endif + cbm_store_upsert_file_hash(hash_store, p->project_name, files[i].rel_path, "", + mtime_ns, fst.st_size); + } + } + cbm_store_close(hash_store); + cbm_log_info("pass.timing", "pass", "persist_hashes", "files", itoa_buf(file_count)); + } + } + + cbm_log_info("pipeline.done", "nodes", itoa_buf(cbm_gbuf_node_count(p->gbuf)), "edges", + itoa_buf(cbm_gbuf_edge_count(p->gbuf)), "elapsed_ms", + itoa_buf((int)elapsed_ms(t0))); + +cleanup: + /* Free prescan if not already freed */ + if (ctx.prescan_cache) { + for (int i = 0; i < ctx.prescan_count; i++) { + free(ctx.prescan_cache[i].http_sites); + free(ctx.prescan_cache[i].config_refs); + free(ctx.prescan_cache[i].routes); + } + free(ctx.prescan_cache); + ctx.prescan_cache = NULL; + } + if (ctx.prescan_path_map) { + cbm_ht_free(ctx.prescan_path_map); + ctx.prescan_path_map = NULL; + } + cbm_discover_free(files, file_count); + cbm_gbuf_free(p->gbuf); + p->gbuf = NULL; + cbm_registry_free(p->registry); + p->registry = NULL; + return rc; +} diff --git a/src/store/store.c b/src/store/store.c index 4b00d4e..dfc01df 100644 --- a/src/store/store.c +++ b/src/store/store.c @@ -1,4419 +1,4418 @@ -/* - * store.c — SQLite graph store implementation. - * - * Implements the opaque cbm_store_t handle with prepared statement caching, - * schema initialization, and all CRUD operations for nodes, edges, projects, - * file hashes, search, BFS traversal, and schema introspection. - */ - -// for ISO timestamp - -#include "store/store.h" -#include "foundation/platform.h" -#include "foundation/compat.h" -#include "foundation/compat_regex.h" - -#include -#include -#include -#include -#include -#include -#include - -/* ── SQLite bind helpers ───────────────────────────────────────── */ - -/* Wrap sqlite3_bind_text with SQLITE_TRANSIENT to isolate the platform - int-to-pointer cast ((void*)-1) in one place. */ -// NOLINTNEXTLINE(performance-no-int-to-ptr) -static const sqlite3_destructor_type BIND_TRANSIENT = SQLITE_TRANSIENT; - -static int bind_text(sqlite3_stmt *s, int col, const char *v) { - return sqlite3_bind_text(s, col, v, -1, BIND_TRANSIENT); -} - -/* ── Internal store structure ───────────────────────────────────── */ - -struct cbm_store { - sqlite3 *db; - const char *db_path; /* heap-allocated, or NULL for :memory: */ - char errbuf[512]; - - /* Prepared statements (lazily initialized, cached for lifetime) */ - sqlite3_stmt *stmt_upsert_node; - sqlite3_stmt *stmt_find_node_by_id; - sqlite3_stmt *stmt_find_node_by_qn; - sqlite3_stmt *stmt_find_node_by_qn_any; /* QN lookup without project filter */ - sqlite3_stmt *stmt_find_nodes_by_name; - sqlite3_stmt *stmt_find_nodes_by_name_any; /* name lookup without project filter */ - sqlite3_stmt *stmt_find_nodes_by_label; - sqlite3_stmt *stmt_find_nodes_by_file; - sqlite3_stmt *stmt_count_nodes; - sqlite3_stmt *stmt_delete_nodes_by_project; - sqlite3_stmt *stmt_delete_nodes_by_file; - sqlite3_stmt *stmt_delete_nodes_by_label; - - sqlite3_stmt *stmt_insert_edge; - sqlite3_stmt *stmt_find_edges_by_source; - sqlite3_stmt *stmt_find_edges_by_target; - sqlite3_stmt *stmt_find_edges_by_source_type; - sqlite3_stmt *stmt_find_edges_by_target_type; - sqlite3_stmt *stmt_find_edges_by_type; - sqlite3_stmt *stmt_count_edges; - sqlite3_stmt *stmt_count_edges_by_type; - sqlite3_stmt *stmt_delete_edges_by_project; - sqlite3_stmt *stmt_delete_edges_by_type; - - sqlite3_stmt *stmt_upsert_project; - sqlite3_stmt *stmt_get_project; - sqlite3_stmt *stmt_list_projects; - sqlite3_stmt *stmt_delete_project; - - sqlite3_stmt *stmt_upsert_file_hash; - sqlite3_stmt *stmt_get_file_hashes; - sqlite3_stmt *stmt_delete_file_hash; - sqlite3_stmt *stmt_delete_file_hashes; -}; - -/* ── Helpers ────────────────────────────────────────────────────── */ - -static void store_set_error(cbm_store_t *s, const char *msg) { - snprintf(s->errbuf, sizeof(s->errbuf), "%s", msg); -} - -static void store_set_error_sqlite(cbm_store_t *s, const char *prefix) { - snprintf(s->errbuf, sizeof(s->errbuf), "%s: %s", prefix, sqlite3_errmsg(s->db)); -} - -static int exec_sql(cbm_store_t *s, const char *sql) { - if (!s || !s->db) { - return CBM_STORE_ERR; - } - char *err = NULL; - int rc = sqlite3_exec(s->db, sql, NULL, NULL, &err); - if (rc != SQLITE_OK) { - snprintf(s->errbuf, sizeof(s->errbuf), "exec: %s", err ? err : "unknown"); - sqlite3_free(err); - return CBM_STORE_ERR; - } - return CBM_STORE_OK; -} - -/* Safe string: returns "" if NULL. */ -static const char *safe_str(const char *s) { - return s ? s : ""; -} - -/* Safe properties: returns "{}" if NULL. */ -static const char *safe_props(const char *s) { - return (s && s[0]) ? s : "{}"; -} - -/* Duplicate a string onto the heap. */ -static char *heap_strdup(const char *s) { - if (!s) { - return NULL; - } - size_t len = strlen(s); - char *d = malloc(len + 1); - if (d) { - memcpy(d, s, len + 1); - } - return d; -} - -/* Prepare a statement (cached). If already prepared, reset+clear. */ -static sqlite3_stmt *prepare_cached(cbm_store_t *s, sqlite3_stmt **slot, const char *sql) { - if (!s || !s->db) { - return NULL; - } - if (*slot) { - sqlite3_reset(*slot); - sqlite3_clear_bindings(*slot); - return *slot; - } - int rc = sqlite3_prepare_v2(s->db, sql, -1, slot, NULL); - if (rc != SQLITE_OK) { - store_set_error_sqlite(s, "prepare"); - return NULL; - } - return *slot; -} - -/* Get ISO-8601 timestamp. */ -static void iso_now(char *buf, size_t sz) { - time_t t = time(NULL); - struct tm tm; -#ifdef _WIN32 - gmtime_s(&tm, &t); /* Windows: reversed arg order */ -#else - gmtime_r(&t, &tm); -#endif - (void)strftime(buf, sz, "%Y-%m-%dT%H:%M:%SZ", - &tm); // cert-err33-c: strftime only fails if buffer is too small — 21-byte ISO - // timestamp always fits in caller-provided buffers -} - -/* ── Schema ─────────────────────────────────────────────────────── */ - -static int init_schema(cbm_store_t *s) { - const char *ddl = "CREATE TABLE IF NOT EXISTS projects (" - " name TEXT PRIMARY KEY," - " indexed_at TEXT NOT NULL," - " root_path TEXT NOT NULL" - ");" - "CREATE TABLE IF NOT EXISTS file_hashes (" - " project TEXT NOT NULL REFERENCES projects(name) ON DELETE CASCADE," - " rel_path TEXT NOT NULL," - " sha256 TEXT NOT NULL," - " mtime_ns INTEGER NOT NULL DEFAULT 0," - " size INTEGER NOT NULL DEFAULT 0," - " PRIMARY KEY (project, rel_path)" - ");" - "CREATE TABLE IF NOT EXISTS nodes (" - " id INTEGER PRIMARY KEY AUTOINCREMENT," - " project TEXT NOT NULL REFERENCES projects(name) ON DELETE CASCADE," - " label TEXT NOT NULL," - " name TEXT NOT NULL," - " qualified_name TEXT NOT NULL," - " file_path TEXT DEFAULT ''," - " start_line INTEGER DEFAULT 0," - " end_line INTEGER DEFAULT 0," - " properties TEXT DEFAULT '{}'," - " UNIQUE(project, qualified_name)" - ");" - "CREATE TABLE IF NOT EXISTS edges (" - " id INTEGER PRIMARY KEY AUTOINCREMENT," - " project TEXT NOT NULL REFERENCES projects(name) ON DELETE CASCADE," - " source_id INTEGER NOT NULL REFERENCES nodes(id) ON DELETE CASCADE," - " target_id INTEGER NOT NULL REFERENCES nodes(id) ON DELETE CASCADE," - " type TEXT NOT NULL," - " properties TEXT DEFAULT '{}'," - " UNIQUE(source_id, target_id, type)" - ");" - "CREATE TABLE IF NOT EXISTS project_summaries (" - " project TEXT PRIMARY KEY," - " summary TEXT NOT NULL," - " source_hash TEXT NOT NULL," - " created_at TEXT NOT NULL," - " updated_at TEXT NOT NULL" - ");"; - - return exec_sql(s, ddl); -} - -static int create_user_indexes(cbm_store_t *s) { - const char *sql = - "CREATE INDEX IF NOT EXISTS idx_nodes_label ON nodes(project, label);" - "CREATE INDEX IF NOT EXISTS idx_nodes_name ON nodes(project, name);" - "CREATE INDEX IF NOT EXISTS idx_nodes_file ON nodes(project, file_path);" - "CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_id, type);" - "CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_id, type);" - "CREATE INDEX IF NOT EXISTS idx_edges_type ON edges(project, type);" - "CREATE INDEX IF NOT EXISTS idx_edges_target_type ON edges(project, target_id, type);" - "CREATE INDEX IF NOT EXISTS idx_edges_source_type ON edges(project, source_id, type);"; - return exec_sql(s, sql); -} - -static int configure_pragmas(cbm_store_t *s, bool in_memory) { - int rc; - rc = exec_sql(s, "PRAGMA foreign_keys = ON;"); - if (rc != CBM_STORE_OK) { - return rc; - } - rc = exec_sql(s, "PRAGMA temp_store = MEMORY;"); - if (rc != CBM_STORE_OK) { - return rc; - } - - if (in_memory) { - rc = exec_sql(s, "PRAGMA synchronous = OFF;"); - } else { - rc = exec_sql(s, "PRAGMA journal_mode = WAL;"); - if (rc != CBM_STORE_OK) { - return rc; - } - rc = exec_sql(s, "PRAGMA synchronous = NORMAL;"); - if (rc != CBM_STORE_OK) { - return rc; - } - rc = exec_sql(s, "PRAGMA busy_timeout = 10000;"); - if (rc != CBM_STORE_OK) { - return rc; - } - rc = exec_sql(s, "PRAGMA mmap_size = 67108864;"); /* 64 MB */ - } - return rc; -} - -/* ── REGEXP function for SQLite ──────────────────────────────────── */ - -static void sqlite_regexp(sqlite3_context *ctx, int argc, sqlite3_value **argv) { - (void)argc; - const char *pattern = (const char *)sqlite3_value_text(argv[0]); - const char *text = (const char *)sqlite3_value_text(argv[1]); - if (!pattern || !text) { - sqlite3_result_int(ctx, 0); - return; - } - - cbm_regex_t re; - int rc = cbm_regcomp(&re, pattern, CBM_REG_EXTENDED | CBM_REG_NOSUB); - if (rc != 0) { - sqlite3_result_error(ctx, "invalid regex", -1); - return; - } - - rc = cbm_regexec(&re, text, 0, NULL, 0); - cbm_regfree(&re); - sqlite3_result_int(ctx, rc == 0 ? 1 : 0); -} - -/* Case-insensitive REGEXP variant */ -static void sqlite_iregexp(sqlite3_context *ctx, int argc, sqlite3_value **argv) { - (void)argc; - const char *pattern = (const char *)sqlite3_value_text(argv[0]); - const char *text = (const char *)sqlite3_value_text(argv[1]); - if (!pattern || !text) { - sqlite3_result_int(ctx, 0); - return; - } - - cbm_regex_t re; - int rc = cbm_regcomp(&re, pattern, CBM_REG_EXTENDED | CBM_REG_NOSUB | CBM_REG_ICASE); - if (rc != 0) { - sqlite3_result_error(ctx, "invalid regex", -1); - return; - } - - rc = cbm_regexec(&re, text, 0, NULL, 0); - cbm_regfree(&re); - sqlite3_result_int(ctx, rc == 0 ? 1 : 0); -} - -/* ── Lifecycle ──────────────────────────────────────────────────── */ - -/* SQLite authorizer: deny dangerous operations that could be exploited via - * SQL injection through the Cypher→SQL translation layer. */ -static int store_authorizer(void *user_data, int action, const char *p3, const char *p4, - const char *p5, const char *p6) { - (void)user_data; - (void)p3; - (void)p4; - (void)p5; - (void)p6; - switch (action) { - case SQLITE_ATTACH: /* ATTACH DATABASE — could create/read arbitrary files */ - case SQLITE_DETACH: /* DETACH DATABASE */ - return SQLITE_DENY; - default: - return SQLITE_OK; - } -} - -static cbm_store_t *store_open_internal(const char *path, bool in_memory) { - cbm_store_t *s = calloc(1, sizeof(cbm_store_t)); - if (!s) { - return NULL; - } - - int flags = SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE; - if (in_memory) { - flags |= SQLITE_OPEN_MEMORY; - } - - int rc = sqlite3_open_v2(path, &s->db, flags, NULL); - if (rc != SQLITE_OK) { - free(s); - return NULL; - } - - if (path && !in_memory) { - s->db_path = heap_strdup(path); - } - - /* Security: block ATTACH/DETACH to prevent file creation via SQL injection. - * The authorizer runs inside SQLite's query planner — no string-level bypass. */ - sqlite3_set_authorizer(s->db, store_authorizer, NULL); - - /* Register REGEXP function (SQLite doesn't have one built-in) */ - sqlite3_create_function(s->db, "regexp", 2, SQLITE_UTF8 | SQLITE_DETERMINISTIC, NULL, - sqlite_regexp, NULL, NULL); - /* Case-insensitive variant for search with case_sensitive=false */ - sqlite3_create_function(s->db, "iregexp", 2, SQLITE_UTF8 | SQLITE_DETERMINISTIC, NULL, - sqlite_iregexp, NULL, NULL); - - if (configure_pragmas(s, in_memory) != CBM_STORE_OK || init_schema(s) != CBM_STORE_OK || - create_user_indexes(s) != CBM_STORE_OK) { - sqlite3_close(s->db); - free((void *)s->db_path); - free(s); - return NULL; - } - - return s; -} - -cbm_store_t *cbm_store_open_memory(void) { - return store_open_internal(":memory:", true); -} - -cbm_store_t *cbm_store_open_path(const char *db_path) { - if (!db_path) { - return NULL; - } - return store_open_internal(db_path, false); -} - -cbm_store_t *cbm_store_open(const char *project) { - if (!project) { - return NULL; - } - /* Build path: ~/.cache/codebase-memory-mcp/.db */ - const char *home = getenv("HOME"); // NOLINT(concurrency-mt-unsafe) — called once during - // single-threaded store open, never concurrently - if (!home) { - home = "/tmp"; - } - char path[1024]; - snprintf(path, sizeof(path), "%s/.cache/codebase-memory-mcp/%s.db", home, project); - return store_open_internal(path, false); -} - -static void finalize_stmt(sqlite3_stmt **s) { - if (*s) { - sqlite3_finalize(*s); - *s = NULL; - } -} - -void cbm_store_close(cbm_store_t *s) { - if (!s) { - return; - } - - /* Finalize all cached statements */ - finalize_stmt(&s->stmt_upsert_node); - finalize_stmt(&s->stmt_find_node_by_id); - finalize_stmt(&s->stmt_find_node_by_qn); - finalize_stmt(&s->stmt_find_node_by_qn_any); - finalize_stmt(&s->stmt_find_nodes_by_name); - finalize_stmt(&s->stmt_find_nodes_by_name_any); - finalize_stmt(&s->stmt_find_nodes_by_label); - finalize_stmt(&s->stmt_find_nodes_by_file); - finalize_stmt(&s->stmt_count_nodes); - finalize_stmt(&s->stmt_delete_nodes_by_project); - finalize_stmt(&s->stmt_delete_nodes_by_file); - finalize_stmt(&s->stmt_delete_nodes_by_label); - - finalize_stmt(&s->stmt_insert_edge); - finalize_stmt(&s->stmt_find_edges_by_source); - finalize_stmt(&s->stmt_find_edges_by_target); - finalize_stmt(&s->stmt_find_edges_by_source_type); - finalize_stmt(&s->stmt_find_edges_by_target_type); - finalize_stmt(&s->stmt_find_edges_by_type); - finalize_stmt(&s->stmt_count_edges); - finalize_stmt(&s->stmt_count_edges_by_type); - finalize_stmt(&s->stmt_delete_edges_by_project); - finalize_stmt(&s->stmt_delete_edges_by_type); - - finalize_stmt(&s->stmt_upsert_project); - finalize_stmt(&s->stmt_get_project); - finalize_stmt(&s->stmt_list_projects); - finalize_stmt(&s->stmt_delete_project); - - finalize_stmt(&s->stmt_upsert_file_hash); - finalize_stmt(&s->stmt_get_file_hashes); - finalize_stmt(&s->stmt_delete_file_hash); - finalize_stmt(&s->stmt_delete_file_hashes); - - sqlite3_close(s->db); - free((void *)s->db_path); - free(s); -} - -const char *cbm_store_error(cbm_store_t *s) { - return s ? s->errbuf : "null store"; -} - -/* ── Transaction ────────────────────────────────────────────────── */ - -int cbm_store_begin(cbm_store_t *s) { - return exec_sql(s, "BEGIN IMMEDIATE;"); -} - -int cbm_store_commit(cbm_store_t *s) { - return exec_sql(s, "COMMIT;"); -} - -int cbm_store_rollback(cbm_store_t *s) { - return exec_sql(s, "ROLLBACK;"); -} - -/* ── Bulk write ─────────────────────────────────────────────────── */ - -int cbm_store_begin_bulk(cbm_store_t *s) { - /* Stay in WAL mode throughout. Switching to MEMORY journal mode would - * make the database unrecoverable if the process crashes mid-write, - * because the in-memory rollback journal is lost on crash. - * WAL mode is crash-safe: uncommitted WAL entries are simply discarded - * on the next open. Performance is preserved via synchronous=OFF and a - * larger cache, which are safe with WAL. */ - int rc = exec_sql(s, "PRAGMA synchronous = OFF;"); - if (rc != CBM_STORE_OK) { - return rc; - } - return exec_sql(s, "PRAGMA cache_size = -65536;"); /* 64 MB */ -} - -int cbm_store_end_bulk(cbm_store_t *s) { - int rc = exec_sql(s, "PRAGMA synchronous = NORMAL;"); - if (rc != CBM_STORE_OK) { - return rc; - } - return exec_sql(s, "PRAGMA cache_size = -2000;"); /* default ~2 MB */ -} - -int cbm_store_drop_indexes(cbm_store_t *s) { - return exec_sql(s, "DROP INDEX IF EXISTS idx_nodes_label;" - "DROP INDEX IF EXISTS idx_nodes_name;" - "DROP INDEX IF EXISTS idx_nodes_file;" - "DROP INDEX IF EXISTS idx_edges_source;" - "DROP INDEX IF EXISTS idx_edges_target;" - "DROP INDEX IF EXISTS idx_edges_type;" - "DROP INDEX IF EXISTS idx_edges_target_type;" - "DROP INDEX IF EXISTS idx_edges_source_type;"); -} - -int cbm_store_create_indexes(cbm_store_t *s) { - return create_user_indexes(s); -} - -/* ── Checkpoint ─────────────────────────────────────────────────── */ - -int cbm_store_checkpoint(cbm_store_t *s) { - int rc = sqlite3_wal_checkpoint_v2(s->db, NULL, SQLITE_CHECKPOINT_TRUNCATE, NULL, NULL); - if (rc != SQLITE_OK) { - store_set_error_sqlite(s, "checkpoint"); - return CBM_STORE_ERR; - } - return exec_sql(s, "PRAGMA optimize;"); -} - -/* ── Dump ───────────────────────────────────────────────────────── */ - -/* Dump entire in-memory database to a file via sqlite3_backup. - * Writes to a temp file first, then atomically renames for crash safety. - * sqlite3_backup_step(-1) copies ALL B-tree pages in one call — - * the file on disk is an exact replica of the in-memory page layout. */ -int cbm_store_dump_to_file(cbm_store_t *s, const char *dest_path) { - if (!s || !dest_path) { - return CBM_STORE_ERR; - } - - /* Ensure parent directory exists */ - char dir[1024]; - snprintf(dir, sizeof(dir), "%s", dest_path); - char *sl = strrchr(dir, '/'); - if (sl) { - *sl = '\0'; - (void)cbm_mkdir(dir); - } - - /* Write to temp file for atomic swap */ - char tmp_path[1024]; - snprintf(tmp_path, sizeof(tmp_path), "%s.tmp", dest_path); - (void)unlink(tmp_path); - - sqlite3 *dest_db = NULL; - int rc = sqlite3_open(tmp_path, &dest_db); - if (rc != SQLITE_OK) { - store_set_error(s, "dump: cannot open temp file"); - return CBM_STORE_ERR; - } - - sqlite3_backup *bk = sqlite3_backup_init(dest_db, "main", s->db, "main"); - if (!bk) { - store_set_error(s, "dump: backup init failed"); - sqlite3_close(dest_db); - (void)unlink(tmp_path); - return CBM_STORE_ERR; - } - - rc = sqlite3_backup_step(bk, -1); /* copy ALL pages in one shot */ - sqlite3_backup_finish(bk); - - if (rc != SQLITE_DONE) { - store_set_error(s, "dump: backup step failed"); - sqlite3_close(dest_db); - (void)unlink(tmp_path); - return CBM_STORE_ERR; - } - - /* Enable WAL on the dumped file so readers can connect concurrently */ - sqlite3_exec(dest_db, "PRAGMA journal_mode = WAL;", NULL, NULL, NULL); - sqlite3_close(dest_db); - - /* Atomic rename: old WAL/SHM become stale and get recreated by - * the next reader's configure_pragmas call. */ - if (rename(tmp_path, dest_path) != 0) { - store_set_error(s, "dump: rename failed"); - (void)unlink(tmp_path); - return CBM_STORE_ERR; - } - - return CBM_STORE_OK; -} - -/* ── Project CRUD ───────────────────────────────────────────────── */ - -int cbm_store_upsert_project(cbm_store_t *s, const char *name, const char *root_path) { - sqlite3_stmt *stmt = - prepare_cached(s, &s->stmt_upsert_project, - "INSERT INTO projects (name, indexed_at, root_path) VALUES (?1, ?2, ?3) " - "ON CONFLICT(name) DO UPDATE SET indexed_at=?2, root_path=?3;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - char ts[64]; - iso_now(ts, sizeof(ts)); - - bind_text(stmt, 1, name); - bind_text(stmt, 2, ts); - bind_text(stmt, 3, root_path); - - int rc = sqlite3_step(stmt); - if (rc != SQLITE_DONE) { - store_set_error_sqlite(s, "upsert_project"); - return CBM_STORE_ERR; - } - return CBM_STORE_OK; -} - -int cbm_store_get_project(cbm_store_t *s, const char *name, cbm_project_t *out) { - sqlite3_stmt *stmt = - prepare_cached(s, &s->stmt_get_project, - "SELECT name, indexed_at, root_path FROM projects WHERE name = ?1;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, name); - int rc = sqlite3_step(stmt); - if (rc == SQLITE_ROW) { - out->name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); - out->indexed_at = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); - out->root_path = heap_strdup((const char *)sqlite3_column_text(stmt, 2)); - return CBM_STORE_OK; - } - return CBM_STORE_NOT_FOUND; -} - -int cbm_store_list_projects(cbm_store_t *s, cbm_project_t **out, int *count) { - sqlite3_stmt *stmt = - prepare_cached(s, &s->stmt_list_projects, - "SELECT name, indexed_at, root_path FROM projects ORDER BY name;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - /* Collect into dynamic array */ - int cap = 8; - int n = 0; - cbm_project_t *arr = malloc(cap * sizeof(cbm_project_t)); - - while (sqlite3_step(stmt) == SQLITE_ROW) { - if (n >= cap) { - cap *= 2; - arr = safe_realloc(arr, cap * sizeof(cbm_project_t)); - } - arr[n].name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); - arr[n].indexed_at = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); - arr[n].root_path = heap_strdup((const char *)sqlite3_column_text(stmt, 2)); - n++; - } - - *out = arr; - *count = n; - return CBM_STORE_OK; -} - -int cbm_store_delete_project(cbm_store_t *s, const char *name) { - sqlite3_stmt *stmt = - prepare_cached(s, &s->stmt_delete_project, "DELETE FROM projects WHERE name = ?1;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, name); - int rc = sqlite3_step(stmt); - if (rc != SQLITE_DONE) { - store_set_error_sqlite(s, "delete_project"); - return CBM_STORE_ERR; - } - return CBM_STORE_OK; -} - -/* ── Node CRUD ──────────────────────────────────────────────────── */ - -// NOLINTNEXTLINE(misc-include-cleaner) — int64_t provided by standard header -int64_t cbm_store_upsert_node(cbm_store_t *s, const cbm_node_t *n) { - sqlite3_stmt *stmt = - prepare_cached(s, &s->stmt_upsert_node, - "INSERT INTO nodes (project, label, name, qualified_name, file_path, " - "start_line, end_line, properties) " - "VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) " - "ON CONFLICT(project, qualified_name) DO UPDATE SET " - "label=?2, name=?3, file_path=?5, start_line=?6, end_line=?7, properties=?8 " - "RETURNING id;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, safe_str(n->project)); - bind_text(stmt, 2, safe_str(n->label)); - bind_text(stmt, 3, safe_str(n->name)); - bind_text(stmt, 4, safe_str(n->qualified_name)); - bind_text(stmt, 5, safe_str(n->file_path)); - sqlite3_bind_int(stmt, 6, n->start_line); - sqlite3_bind_int(stmt, 7, n->end_line); - bind_text(stmt, 8, safe_props(n->properties_json)); - - int rc = sqlite3_step(stmt); - if (rc == SQLITE_ROW) { - int64_t id = sqlite3_column_int64(stmt, 0); - sqlite3_reset(stmt); /* unblock COMMIT — RETURNING leaves stmt active */ - return id; - } - sqlite3_reset(stmt); - store_set_error_sqlite(s, "upsert_node"); - return CBM_STORE_ERR; -} - -/* Scan a node from current row of stmt. Heap-allocates strings. */ -static void scan_node(sqlite3_stmt *stmt, cbm_node_t *n) { - n->id = sqlite3_column_int64(stmt, 0); - n->project = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); - n->label = heap_strdup((const char *)sqlite3_column_text(stmt, 2)); - // NOLINTNEXTLINE(clang-analyzer-unix.Malloc) - n->name = heap_strdup((const char *)sqlite3_column_text(stmt, 3)); - n->qualified_name = heap_strdup((const char *)sqlite3_column_text(stmt, 4)); - n->file_path = heap_strdup((const char *)sqlite3_column_text(stmt, 5)); - n->start_line = sqlite3_column_int(stmt, 6); - n->end_line = sqlite3_column_int(stmt, 7); - n->properties_json = heap_strdup((const char *)sqlite3_column_text(stmt, 8)); -} - -int cbm_store_find_node_by_id(cbm_store_t *s, int64_t id, cbm_node_t *out) { - sqlite3_stmt *stmt = - prepare_cached(s, &s->stmt_find_node_by_id, - "SELECT id, project, label, name, qualified_name, file_path, " - "start_line, end_line, properties FROM nodes WHERE id = ?1;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - sqlite3_bind_int64(stmt, 1, id); - int rc = sqlite3_step(stmt); - if (rc == SQLITE_ROW) { - scan_node(stmt, out); - return CBM_STORE_OK; - } - return CBM_STORE_NOT_FOUND; -} - -int cbm_store_find_node_by_qn(cbm_store_t *s, const char *project, const char *qn, - cbm_node_t *out) { - if (!s || !s->db) { - return CBM_STORE_ERR; - } - sqlite3_stmt *stmt = - prepare_cached(s, &s->stmt_find_node_by_qn, - "SELECT id, project, label, name, qualified_name, file_path, " - "start_line, end_line, properties FROM nodes " - "WHERE project = ?1 AND qualified_name = ?2;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, project); - bind_text(stmt, 2, qn); - int rc = sqlite3_step(stmt); - if (rc == SQLITE_ROW) { - scan_node(stmt, out); - return CBM_STORE_OK; - } - return CBM_STORE_NOT_FOUND; -} - -int cbm_store_find_node_by_qn_any(cbm_store_t *s, const char *qn, cbm_node_t *out) { - if (!s || !s->db) { - return CBM_STORE_ERR; - } - sqlite3_stmt *stmt = - prepare_cached(s, &s->stmt_find_node_by_qn_any, - "SELECT id, project, label, name, qualified_name, file_path, " - "start_line, end_line, properties FROM nodes " - "WHERE qualified_name = ?1 LIMIT 1;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, qn); - int rc = sqlite3_step(stmt); - if (rc == SQLITE_ROW) { - scan_node(stmt, out); - return CBM_STORE_OK; - } - return CBM_STORE_NOT_FOUND; -} - -int cbm_store_find_nodes_by_name_any(cbm_store_t *s, const char *name, cbm_node_t **out, - int *count) { - if (!s || !s->db) { - *out = NULL; - *count = 0; - return CBM_STORE_ERR; - } - sqlite3_stmt *stmt = - prepare_cached(s, &s->stmt_find_nodes_by_name_any, - "SELECT id, project, label, name, qualified_name, file_path, " - "start_line, end_line, properties FROM nodes " - "WHERE name = ?1;"); - if (!stmt) { - *out = NULL; - *count = 0; - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, name); - - int cap = 16; - int n = 0; - cbm_node_t *arr = malloc(cap * sizeof(cbm_node_t)); - while (sqlite3_step(stmt) == SQLITE_ROW) { - if (n >= cap) { - cap *= 2; - arr = safe_realloc(arr, cap * sizeof(cbm_node_t)); - } - scan_node(stmt, &arr[n]); - n++; - } - *out = arr; - *count = n; - return CBM_STORE_OK; -} - -int cbm_store_find_node_ids_by_qns(cbm_store_t *s, const char *project, const char **qns, - int qn_count, int64_t *out_ids) { - if (!s || !project || !qns || !out_ids || qn_count <= 0) { - return 0; - } - - /* Zero out results */ - memset(out_ids, 0, (size_t)qn_count * sizeof(int64_t)); - - int found = 0; - cbm_node_t node = {0}; - for (int i = 0; i < qn_count; i++) { - if (!qns[i]) { - continue; - } - int rc = cbm_store_find_node_by_qn(s, project, qns[i], &node); - if (rc == CBM_STORE_OK) { - out_ids[i] = node.id; - found++; - cbm_node_free_fields(&node); - memset(&node, 0, sizeof(node)); - } - } - return found; -} - -/* Generic: find multiple nodes by a single-column filter. */ -// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) -static int find_nodes_generic(cbm_store_t *s, sqlite3_stmt **slot, const char *sql, - const char *project, const char *val, cbm_node_t **out, int *count) { - if (!s || !s->db) { - *out = NULL; - *count = 0; - return CBM_STORE_ERR; - } - sqlite3_stmt *stmt = prepare_cached(s, slot, sql); - if (!stmt) { - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, project); - bind_text(stmt, 2, val); - - int cap = 16; - int n = 0; - cbm_node_t *arr = malloc(cap * sizeof(cbm_node_t)); - - while (sqlite3_step(stmt) == SQLITE_ROW) { - if (n >= cap) { - cap *= 2; - arr = safe_realloc(arr, cap * sizeof(cbm_node_t)); - } - scan_node(stmt, &arr[n]); - n++; - } - - *out = arr; - *count = n; - return CBM_STORE_OK; -} - -int cbm_store_find_nodes_by_name(cbm_store_t *s, const char *project, const char *name, - cbm_node_t **out, int *count) { - return find_nodes_generic(s, &s->stmt_find_nodes_by_name, - "SELECT id, project, label, name, qualified_name, file_path, " - "start_line, end_line, properties FROM nodes " - "WHERE project = ?1 AND name = ?2;", - project, name, out, count); -} - -int cbm_store_find_nodes_by_label(cbm_store_t *s, const char *project, const char *label, - cbm_node_t **out, int *count) { - return find_nodes_generic(s, &s->stmt_find_nodes_by_label, - "SELECT id, project, label, name, qualified_name, file_path, " - "start_line, end_line, properties FROM nodes " - "WHERE project = ?1 AND label = ?2;", - project, label, out, count); -} - -int cbm_store_find_nodes_by_file(cbm_store_t *s, const char *project, const char *file_path, - cbm_node_t **out, int *count) { - return find_nodes_generic(s, &s->stmt_find_nodes_by_file, - "SELECT id, project, label, name, qualified_name, file_path, " - "start_line, end_line, properties FROM nodes " - "WHERE project = ?1 AND file_path = ?2;", - project, file_path, out, count); -} - -int cbm_store_count_nodes(cbm_store_t *s, const char *project) { - if (!s || !s->db) { - return 0; - } - sqlite3_stmt *stmt = - prepare_cached(s, &s->stmt_count_nodes, "SELECT COUNT(*) FROM nodes WHERE project = ?1;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, project); - if (sqlite3_step(stmt) == SQLITE_ROW) { - return sqlite3_column_int(stmt, 0); - } - return 0; -} - -int cbm_store_delete_nodes_by_project(cbm_store_t *s, const char *project) { - sqlite3_stmt *stmt = prepare_cached(s, &s->stmt_delete_nodes_by_project, - "DELETE FROM nodes WHERE project = ?1;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, project); - if (sqlite3_step(stmt) != SQLITE_DONE) { - store_set_error_sqlite(s, "delete_nodes_by_project"); - return CBM_STORE_ERR; - } - return CBM_STORE_OK; -} - -int cbm_store_delete_nodes_by_file(cbm_store_t *s, const char *project, const char *file_path) { - sqlite3_stmt *stmt = prepare_cached(s, &s->stmt_delete_nodes_by_file, - "DELETE FROM nodes WHERE project = ?1 AND file_path = ?2;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, project); - bind_text(stmt, 2, file_path); - if (sqlite3_step(stmt) != SQLITE_DONE) { - store_set_error_sqlite(s, "delete_nodes_by_file"); - return CBM_STORE_ERR; - } - return CBM_STORE_OK; -} - -int cbm_store_delete_nodes_by_label(cbm_store_t *s, const char *project, const char *label) { - sqlite3_stmt *stmt = prepare_cached(s, &s->stmt_delete_nodes_by_label, - "DELETE FROM nodes WHERE project = ?1 AND label = ?2;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, project); - bind_text(stmt, 2, label); - if (sqlite3_step(stmt) != SQLITE_DONE) { - store_set_error_sqlite(s, "delete_nodes_by_label"); - return CBM_STORE_ERR; - } - return CBM_STORE_OK; -} - -/* ── Node batch ─────────────────────────────────────────────────── */ - -int cbm_store_upsert_node_batch(cbm_store_t *s, const cbm_node_t *nodes, int count, - int64_t *out_ids) { - if (count == 0) { - return CBM_STORE_OK; - } - - exec_sql(s, "BEGIN;"); - for (int i = 0; i < count; i++) { - int64_t id = cbm_store_upsert_node(s, &nodes[i]); - if (id == CBM_STORE_ERR) { - exec_sql(s, "ROLLBACK;"); - return CBM_STORE_ERR; - } - if (out_ids) { - out_ids[i] = id; - } - } - exec_sql(s, "COMMIT;"); - return CBM_STORE_OK; -} - -/* ── Edge CRUD ──────────────────────────────────────────────────── */ - -int64_t cbm_store_insert_edge(cbm_store_t *s, const cbm_edge_t *e) { - sqlite3_stmt *stmt = - prepare_cached(s, &s->stmt_insert_edge, - "INSERT INTO edges (project, source_id, target_id, type, properties) " - "VALUES (?1, ?2, ?3, ?4, ?5) " - "ON CONFLICT(source_id, target_id, type) DO UPDATE SET " - "properties = json_patch(properties, ?5) " - "RETURNING id;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, safe_str(e->project)); - sqlite3_bind_int64(stmt, 2, e->source_id); - sqlite3_bind_int64(stmt, 3, e->target_id); - bind_text(stmt, 4, safe_str(e->type)); - bind_text(stmt, 5, safe_props(e->properties_json)); - - int rc = sqlite3_step(stmt); - if (rc == SQLITE_ROW) { - int64_t id = sqlite3_column_int64(stmt, 0); - sqlite3_reset(stmt); /* unblock COMMIT — RETURNING leaves stmt active */ - return id; - } - sqlite3_reset(stmt); - store_set_error_sqlite(s, "insert_edge"); - return CBM_STORE_ERR; -} - -/* Scan an edge from current row of stmt. */ -static void scan_edge(sqlite3_stmt *stmt, cbm_edge_t *e) { - e->id = sqlite3_column_int64(stmt, 0); - e->project = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); - e->source_id = sqlite3_column_int64(stmt, 2); - e->target_id = sqlite3_column_int64(stmt, 3); - e->type = heap_strdup((const char *)sqlite3_column_text(stmt, 4)); - e->properties_json = heap_strdup((const char *)sqlite3_column_text(stmt, 5)); -} - -/* Generic: find multiple edges by a filter. */ -static int find_edges_generic(cbm_store_t *s, sqlite3_stmt **slot, const char *sql, - void (*bind_fn)(sqlite3_stmt *, const void *), const void *bind_data, - cbm_edge_t **out, int *count) { - if (!s || !s->db) { - *out = NULL; - *count = 0; - return CBM_STORE_ERR; - } - sqlite3_stmt *stmt = prepare_cached(s, slot, sql); - if (!stmt) { - return CBM_STORE_ERR; - } - - bind_fn(stmt, bind_data); - - int cap = 16; - int n = 0; - cbm_edge_t *arr = malloc(cap * sizeof(cbm_edge_t)); - - while (sqlite3_step(stmt) == SQLITE_ROW) { - if (n >= cap) { - cap *= 2; - arr = safe_realloc(arr, cap * sizeof(cbm_edge_t)); - } - scan_edge(stmt, &arr[n]); - n++; - } - - *out = arr; - *count = n; - return CBM_STORE_OK; -} - -/* Bind helpers for edge queries */ -typedef struct { - int64_t id; -} bind_id_t; -typedef struct { - int64_t id; - const char *type; -} bind_id_type_t; -typedef struct { - const char *project; - const char *type; -} bind_proj_type_t; - -static void bind_source_id(sqlite3_stmt *stmt, const void *data) { - const bind_id_t *b = data; - sqlite3_bind_int64(stmt, 1, b->id); -} - -static void bind_id_and_type(sqlite3_stmt *stmt, const void *data) { - const bind_id_type_t *b = data; - sqlite3_bind_int64(stmt, 1, b->id); - bind_text(stmt, 2, b->type); -} - -static void bind_proj_and_type(sqlite3_stmt *stmt, const void *data) { - const bind_proj_type_t *b = data; - bind_text(stmt, 1, b->project); - bind_text(stmt, 2, b->type); -} - -int cbm_store_find_edges_by_source(cbm_store_t *s, int64_t source_id, cbm_edge_t **out, - int *count) { - bind_id_t b = {source_id}; - return find_edges_generic(s, &s->stmt_find_edges_by_source, - "SELECT id, project, source_id, target_id, type, properties " - "FROM edges WHERE source_id = ?1;", - bind_source_id, &b, out, count); -} - -int cbm_store_find_edges_by_target(cbm_store_t *s, int64_t target_id, cbm_edge_t **out, - int *count) { - bind_id_t b = {target_id}; - return find_edges_generic(s, &s->stmt_find_edges_by_target, - "SELECT id, project, source_id, target_id, type, properties " - "FROM edges WHERE target_id = ?1;", - bind_source_id, &b, out, count); -} - -int cbm_store_find_edges_by_source_type(cbm_store_t *s, int64_t source_id, const char *type, - cbm_edge_t **out, int *count) { - bind_id_type_t b = {source_id, type}; - return find_edges_generic(s, &s->stmt_find_edges_by_source_type, - "SELECT id, project, source_id, target_id, type, properties " - "FROM edges WHERE source_id = ?1 AND type = ?2;", - bind_id_and_type, &b, out, count); -} - -int cbm_store_find_edges_by_target_type(cbm_store_t *s, int64_t target_id, const char *type, - cbm_edge_t **out, int *count) { - bind_id_type_t b = {target_id, type}; - return find_edges_generic(s, &s->stmt_find_edges_by_target_type, - "SELECT id, project, source_id, target_id, type, properties " - "FROM edges WHERE target_id = ?1 AND type = ?2;", - bind_id_and_type, &b, out, count); -} - -int cbm_store_find_edges_by_type(cbm_store_t *s, const char *project, const char *type, - cbm_edge_t **out, int *count) { - bind_proj_type_t b = {project, type}; - return find_edges_generic(s, &s->stmt_find_edges_by_type, - "SELECT id, project, source_id, target_id, type, properties " - "FROM edges WHERE project = ?1 AND type = ?2;", - bind_proj_and_type, &b, out, count); -} - -int cbm_store_count_edges(cbm_store_t *s, const char *project) { - if (!s || !s->db) { - return 0; - } - sqlite3_stmt *stmt = - prepare_cached(s, &s->stmt_count_edges, "SELECT COUNT(*) FROM edges WHERE project = ?1;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, project); - if (sqlite3_step(stmt) == SQLITE_ROW) { - return sqlite3_column_int(stmt, 0); - } - return 0; -} - -int cbm_store_count_edges_by_type(cbm_store_t *s, const char *project, const char *type) { - sqlite3_stmt *stmt = - prepare_cached(s, &s->stmt_count_edges_by_type, - "SELECT COUNT(*) FROM edges WHERE project = ?1 AND type = ?2;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, project); - bind_text(stmt, 2, type); - if (sqlite3_step(stmt) == SQLITE_ROW) { - return sqlite3_column_int(stmt, 0); - } - return 0; -} - -int cbm_store_delete_edges_by_project(cbm_store_t *s, const char *project) { - sqlite3_stmt *stmt = prepare_cached(s, &s->stmt_delete_edges_by_project, - "DELETE FROM edges WHERE project = ?1;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, project); - if (sqlite3_step(stmt) != SQLITE_DONE) { - store_set_error_sqlite(s, "delete_edges_by_project"); - return CBM_STORE_ERR; - } - return CBM_STORE_OK; -} - -int cbm_store_delete_edges_by_type(cbm_store_t *s, const char *project, const char *type) { - sqlite3_stmt *stmt = prepare_cached(s, &s->stmt_delete_edges_by_type, - "DELETE FROM edges WHERE project = ?1 AND type = ?2;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, project); - bind_text(stmt, 2, type); - if (sqlite3_step(stmt) != SQLITE_DONE) { - store_set_error_sqlite(s, "delete_edges_by_type"); - return CBM_STORE_ERR; - } - return CBM_STORE_OK; -} - -/* ── Edge batch ─────────────────────────────────────────────────── */ - -int cbm_store_insert_edge_batch(cbm_store_t *s, const cbm_edge_t *edges, int count) { - if (count == 0) { - return CBM_STORE_OK; - } - - exec_sql(s, "BEGIN;"); - for (int i = 0; i < count; i++) { - int64_t id = cbm_store_insert_edge(s, &edges[i]); - if (id == CBM_STORE_ERR) { - exec_sql(s, "ROLLBACK;"); - return CBM_STORE_ERR; - } - } - exec_sql(s, "COMMIT;"); - return CBM_STORE_OK; -} - -/* ── File hash CRUD ─────────────────────────────────────────────── */ - -int cbm_store_upsert_file_hash(cbm_store_t *s, const char *project, const char *rel_path, - const char *sha256, int64_t mtime_ns, int64_t size) { - sqlite3_stmt *stmt = - prepare_cached(s, &s->stmt_upsert_file_hash, - "INSERT INTO file_hashes (project, rel_path, sha256, mtime_ns, size) " - "VALUES (?1, ?2, ?3, ?4, ?5) " - "ON CONFLICT(project, rel_path) DO UPDATE SET " - "sha256=?3, mtime_ns=?4, size=?5;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, project); - bind_text(stmt, 2, rel_path); - bind_text(stmt, 3, sha256); - sqlite3_bind_int64(stmt, 4, mtime_ns); - sqlite3_bind_int64(stmt, 5, size); - - if (sqlite3_step(stmt) != SQLITE_DONE) { - store_set_error_sqlite(s, "upsert_file_hash"); - return CBM_STORE_ERR; - } - return CBM_STORE_OK; -} - -int cbm_store_get_file_hashes(cbm_store_t *s, const char *project, cbm_file_hash_t **out, - int *count) { - sqlite3_stmt *stmt = prepare_cached(s, &s->stmt_get_file_hashes, - "SELECT project, rel_path, sha256, mtime_ns, size " - "FROM file_hashes WHERE project = ?1;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, project); - - int cap = 16; - int n = 0; - cbm_file_hash_t *arr = malloc(cap * sizeof(cbm_file_hash_t)); - - while (sqlite3_step(stmt) == SQLITE_ROW) { - if (n >= cap) { - cap *= 2; - arr = safe_realloc(arr, cap * sizeof(cbm_file_hash_t)); - } - arr[n].project = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); - arr[n].rel_path = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); - arr[n].sha256 = heap_strdup((const char *)sqlite3_column_text(stmt, 2)); - arr[n].mtime_ns = sqlite3_column_int64(stmt, 3); - arr[n].size = sqlite3_column_int64(stmt, 4); - n++; - } - - *out = arr; - *count = n; - return CBM_STORE_OK; -} - -int cbm_store_delete_file_hash(cbm_store_t *s, const char *project, const char *rel_path) { - sqlite3_stmt *stmt = - prepare_cached(s, &s->stmt_delete_file_hash, - "DELETE FROM file_hashes WHERE project = ?1 AND rel_path = ?2;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, project); - bind_text(stmt, 2, rel_path); - if (sqlite3_step(stmt) != SQLITE_DONE) { - store_set_error_sqlite(s, "delete_file_hash"); - return CBM_STORE_ERR; - } - return CBM_STORE_OK; -} - -int cbm_store_delete_file_hashes(cbm_store_t *s, const char *project) { - sqlite3_stmt *stmt = prepare_cached(s, &s->stmt_delete_file_hashes, - "DELETE FROM file_hashes WHERE project = ?1;"); - if (!stmt) { - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, project); - if (sqlite3_step(stmt) != SQLITE_DONE) { - store_set_error_sqlite(s, "delete_file_hashes"); - return CBM_STORE_ERR; - } - return CBM_STORE_OK; -} - -/* ── FindNodesByFileOverlap ─────────────────────────────────────── */ - -int cbm_store_find_nodes_by_file_overlap(cbm_store_t *s, const char *project, const char *file_path, - int start_line, int end_line, cbm_node_t **out, - int *count) { - *out = NULL; - *count = 0; - const char *sql = "SELECT id, project, label, name, qualified_name, file_path, " - "start_line, end_line, properties FROM nodes " - "WHERE project = ?1 AND file_path = ?2 " - "AND label NOT IN ('Module', 'Package', 'File', 'Folder') " - "AND start_line <= ?4 AND end_line >= ?3 " - "ORDER BY start_line"; - - sqlite3_stmt *stmt = NULL; - int rc = sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL); - if (rc != SQLITE_OK) { - store_set_error_sqlite(s, "overlap prepare"); - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, project); - bind_text(stmt, 2, file_path); - sqlite3_bind_int(stmt, 3, start_line); - sqlite3_bind_int(stmt, 4, end_line); - - int cap = 8; - int n = 0; - cbm_node_t *nodes = malloc(cap * sizeof(cbm_node_t)); - while (sqlite3_step(stmt) == SQLITE_ROW) { - if (n >= cap) { - cap *= 2; - nodes = safe_realloc(nodes, cap * sizeof(cbm_node_t)); - } - memset(&nodes[n], 0, sizeof(cbm_node_t)); - scan_node(stmt, &nodes[n]); - n++; - } - sqlite3_finalize(stmt); - *out = nodes; - *count = n; - return CBM_STORE_OK; -} - -/* ── FindNodesByQNSuffix ───────────────────────────────────────── */ - -int cbm_store_find_nodes_by_qn_suffix(cbm_store_t *s, const char *project, const char *suffix, - cbm_node_t **out, int *count) { - *out = NULL; - *count = 0; - if (!s || !s->db) { - return CBM_STORE_ERR; - } - /* Match QNs ending with ".suffix" or exactly equal to suffix */ - char like_pattern[512]; - snprintf(like_pattern, sizeof(like_pattern), "%%.%s", suffix); - - const char *sql_with_project = - "SELECT id, project, label, name, qualified_name, file_path, " - "start_line, end_line, properties FROM nodes " - "WHERE project = ?1 AND (qualified_name LIKE ?2 OR qualified_name = ?3)"; - const char *sql_any = "SELECT id, project, label, name, qualified_name, file_path, " - "start_line, end_line, properties FROM nodes " - "WHERE (qualified_name LIKE ?1 OR qualified_name = ?2)"; - - sqlite3_stmt *stmt = NULL; - int rc = sqlite3_prepare_v2(s->db, project ? sql_with_project : sql_any, -1, &stmt, NULL); - if (rc != SQLITE_OK) { - store_set_error_sqlite(s, "qn_suffix prepare"); - return CBM_STORE_ERR; - } - - if (project) { - bind_text(stmt, 1, project); - bind_text(stmt, 2, like_pattern); - bind_text(stmt, 3, suffix); - } else { - bind_text(stmt, 1, like_pattern); - bind_text(stmt, 2, suffix); - } - - int cap = 8; - int n = 0; - cbm_node_t *nodes = malloc(cap * sizeof(cbm_node_t)); - while (sqlite3_step(stmt) == SQLITE_ROW) { - if (n >= cap) { - cap *= 2; - nodes = safe_realloc(nodes, cap * sizeof(cbm_node_t)); - } - memset(&nodes[n], 0, sizeof(cbm_node_t)); - scan_node(stmt, &nodes[n]); - n++; - } - sqlite3_finalize(stmt); - *out = nodes; - *count = n; - return CBM_STORE_OK; -} - -/* ── NodeDegree ────────────────────────────────────────────────── */ - -// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) -void cbm_store_node_degree(cbm_store_t *s, int64_t node_id, int *in_deg, int *out_deg) { - *in_deg = 0; - *out_deg = 0; - - const char *in_sql = "SELECT COUNT(*) FROM edges WHERE target_id = ?1 AND type = 'CALLS'"; - sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, in_sql, -1, &stmt, NULL) == SQLITE_OK) { - sqlite3_bind_int64(stmt, 1, node_id); - if (sqlite3_step(stmt) == SQLITE_ROW) { - *in_deg = sqlite3_column_int(stmt, 0); - } - sqlite3_finalize(stmt); - } - - const char *out_sql = "SELECT COUNT(*) FROM edges WHERE source_id = ?1 AND type = 'CALLS'"; - if (sqlite3_prepare_v2(s->db, out_sql, -1, &stmt, NULL) == SQLITE_OK) { - sqlite3_bind_int64(stmt, 1, node_id); - if (sqlite3_step(stmt) == SQLITE_ROW) { - *out_deg = sqlite3_column_int(stmt, 0); - } - sqlite3_finalize(stmt); - } -} - -/* ── Node neighbor names ──────────────────────────────────────── */ - -// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) -static int query_neighbor_names(sqlite3 *db, const char *sql, int64_t node_id, int limit, - char ***out, int *out_count) { - *out = NULL; - *out_count = 0; - sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(db, sql, -1, &stmt, NULL) != SQLITE_OK) { - return -1; - } - sqlite3_bind_int64(stmt, 1, node_id); - sqlite3_bind_int(stmt, 2, limit); - - int cap = 8; - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - char **names = malloc((size_t)cap * sizeof(char *)); - int count = 0; - while (sqlite3_step(stmt) == SQLITE_ROW) { - const char *name = (const char *)sqlite3_column_text(stmt, 0); - if (!name) { - continue; - } - if (count >= cap) { - cap *= 2; - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - names = safe_realloc(names, (size_t)cap * sizeof(char *)); - } - // NOLINTNEXTLINE(misc-include-cleaner) — strdup provided by standard header - names[count++] = strdup(name); - } - sqlite3_finalize(stmt); - *out = names; - *out_count = count; - return 0; -} - -int cbm_store_node_neighbor_names(cbm_store_t *s, int64_t node_id, int limit, char ***out_callers, - int *caller_count, char ***out_callees, int *callee_count) { - if (!s) { - return -1; - } - *out_callers = NULL; - *caller_count = 0; - *out_callees = NULL; - *callee_count = 0; - - query_neighbor_names( - s->db, - "SELECT DISTINCT n.name FROM edges e JOIN nodes n ON e.source_id = n.id " - "WHERE e.target_id = ?1 AND e.type IN ('CALLS','HTTP_CALLS','ASYNC_CALLS') " - "ORDER BY n.name LIMIT ?2", - node_id, limit, out_callers, caller_count); - - query_neighbor_names( - s->db, - "SELECT DISTINCT n.name FROM edges e JOIN nodes n ON e.target_id = n.id " - "WHERE e.source_id = ?1 AND e.type IN ('CALLS','HTTP_CALLS','ASYNC_CALLS') " - "ORDER BY n.name LIMIT ?2", - node_id, limit, out_callees, callee_count); - - return 0; -} - -int cbm_store_batch_count_degrees(cbm_store_t *s, const int64_t *node_ids, int id_count, - const char *edge_type, int *out_in, int *out_out) { - if (!s || !node_ids || id_count <= 0 || !out_in || !out_out) { - return CBM_STORE_ERR; - } - - memset(out_in, 0, (size_t)id_count * sizeof(int)); - memset(out_out, 0, (size_t)id_count * sizeof(int)); - - /* Build IN clause: (?,?,?) */ - char in_clause[4096]; - int pos = 0; - for (int i = 0; i < id_count && pos < (int)sizeof(in_clause) - 4; i++) { - if (i > 0) { - in_clause[pos++] = ','; - } - in_clause[pos++] = '?'; - } - in_clause[pos] = '\0'; - - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - bool has_type = edge_type && edge_type[0] != '\0'; - - /* Inbound: COUNT grouped by target_id */ - char sql[8192]; - if (has_type) { - snprintf(sql, sizeof(sql), - "SELECT target_id, COUNT(*) FROM edges " - "WHERE target_id IN (%s) AND type = ? GROUP BY target_id", - in_clause); - } else { - snprintf(sql, sizeof(sql), - "SELECT target_id, COUNT(*) FROM edges " - "WHERE target_id IN (%s) GROUP BY target_id", - in_clause); - } - - sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { - return CBM_STORE_ERR; - } - - for (int i = 0; i < id_count; i++) { - sqlite3_bind_int64(stmt, i + 1, node_ids[i]); - } - if (has_type) { - bind_text(stmt, id_count + 1, edge_type); - } - - while (sqlite3_step(stmt) == SQLITE_ROW) { - int64_t nid = sqlite3_column_int64(stmt, 0); - int cnt = sqlite3_column_int(stmt, 1); - for (int i = 0; i < id_count; i++) { - if (node_ids[i] == nid) { - out_in[i] = cnt; - break; - } - } - } - sqlite3_finalize(stmt); - - /* Outbound: COUNT grouped by source_id */ - if (has_type) { - snprintf(sql, sizeof(sql), - "SELECT source_id, COUNT(*) FROM edges " - "WHERE source_id IN (%s) AND type = ? GROUP BY source_id", - in_clause); - } else { - snprintf(sql, sizeof(sql), - "SELECT source_id, COUNT(*) FROM edges " - "WHERE source_id IN (%s) GROUP BY source_id", - in_clause); - } - - if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { - return CBM_STORE_ERR; - } - - for (int i = 0; i < id_count; i++) { - sqlite3_bind_int64(stmt, i + 1, node_ids[i]); - } - if (has_type) { - bind_text(stmt, id_count + 1, edge_type); - } - - while (sqlite3_step(stmt) == SQLITE_ROW) { - int64_t nid = sqlite3_column_int64(stmt, 0); - int cnt = sqlite3_column_int(stmt, 1); - for (int i = 0; i < id_count; i++) { - if (node_ids[i] == nid) { - out_out[i] = cnt; - break; - } - } - } - sqlite3_finalize(stmt); - - return CBM_STORE_OK; -} - -/* ── UpsertFileHashBatch ───────────────────────────────────────── */ - -int cbm_store_upsert_file_hash_batch(cbm_store_t *s, const cbm_file_hash_t *hashes, int count) { - if (count == 0) { - return CBM_STORE_OK; - } - - int rc = cbm_store_begin(s); - if (rc != CBM_STORE_OK) { - return rc; - } - - for (int i = 0; i < count; i++) { - rc = cbm_store_upsert_file_hash(s, hashes[i].project, hashes[i].rel_path, hashes[i].sha256, - hashes[i].mtime_ns, hashes[i].size); - if (rc != CBM_STORE_OK) { - cbm_store_rollback(s); - return rc; - } - } - - return cbm_store_commit(s); -} - -/* ── FindEdgesByURLPath ────────────────────────────────────────── */ - -// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) -int cbm_store_find_edges_by_url_path(cbm_store_t *s, const char *project, const char *keyword, - cbm_edge_t **out, int *count) { - *out = NULL; - *count = 0; - - /* Search properties JSON for url_path containing keyword */ - char like_pattern[512]; - snprintf(like_pattern, sizeof(like_pattern), "%%\"url_path\":\"%%%%%s%%%%\"%%", keyword); - - const char *sql = "SELECT id, project, source_id, target_id, type, properties FROM edges " - "WHERE project = ?1 AND properties LIKE ?2"; - - sqlite3_stmt *stmt = NULL; - int rc = sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL); - if (rc != SQLITE_OK) { - store_set_error_sqlite(s, "url_path prepare"); - return CBM_STORE_ERR; - } - - bind_text(stmt, 1, project); - bind_text(stmt, 2, like_pattern); - - int cap = 8; - int n = 0; - cbm_edge_t *edges = malloc(cap * sizeof(cbm_edge_t)); - while (sqlite3_step(stmt) == SQLITE_ROW) { - if (n >= cap) { - cap *= 2; - edges = safe_realloc(edges, cap * sizeof(cbm_edge_t)); - } - memset(&edges[n], 0, sizeof(cbm_edge_t)); - scan_edge(stmt, &edges[n]); - n++; - } - sqlite3_finalize(stmt); - *out = edges; - *count = n; - return CBM_STORE_OK; -} - -/* ── RestoreFrom ───────────────────────────────────────────────── */ - -int cbm_store_restore_from(cbm_store_t *dst, cbm_store_t *src) { - sqlite3_backup *bk = sqlite3_backup_init(dst->db, "main", src->db, "main"); - if (!bk) { - store_set_error_sqlite(dst, "backup init"); - return CBM_STORE_ERR; - } - int rc = sqlite3_backup_step(bk, -1); /* copy all pages */ - sqlite3_backup_finish(bk); - - if (rc != SQLITE_DONE) { - store_set_error(dst, "backup step failed"); - return CBM_STORE_ERR; - } - return CBM_STORE_OK; -} - -/* ── Search ─────────────────────────────────────────────────────── */ - -/* Convert a glob pattern to SQL LIKE pattern. */ -char *cbm_glob_to_like(const char *pattern) { - if (!pattern) { - return NULL; - } - size_t len = strlen(pattern); - char *out = malloc((len * 2) + 1); - size_t j = 0; - - for (size_t i = 0; i < len; i++) { - if (pattern[i] == '*' && i + 1 < len && pattern[i + 1] == '*') { - /* Remove leading / from output if present (handles glob dir-star) */ - if (j > 0 && out[j - 1] == '/') { - j--; - } - out[j++] = '%'; - i++; /* skip second * */ - if (i + 1 < len && pattern[i + 1] == '/') { - i++; /* skip trailing / */ - } - } else if (pattern[i] == '*') { - out[j++] = '%'; - } else if (pattern[i] == '?') { - out[j++] = '_'; - } else { - out[j++] = pattern[i]; - } - } - out[j] = '\0'; - return out; -} - -/* ── extractLikeHints ─────────────────────────────────────────── */ - -int cbm_extract_like_hints(const char *pattern, char **out, int max_out) { - if (!pattern || !out || max_out <= 0) { - return 0; - } - - /* Bail on alternation — can't convert OR regex to AND LIKE */ - for (const char *p = pattern; *p; p++) { - if (*p == '|') { - return 0; - } - } - - int count = 0; - char buf[256]; - int blen = 0; - - int i = 0; - while (pattern[i]) { - char ch = pattern[i]; - switch (ch) { - case '\\': - /* Escaped char — the next char is literal */ - if (pattern[i + 1]) { - if (blen < (int)sizeof(buf) - 1) { - buf[blen++] = pattern[i + 1]; - } - i += 2; - } else { - i++; - } - break; - case '.': - case '*': - case '+': - case '?': - case '^': - case '$': - case '(': - case ')': - case '[': - case ']': - case '{': - case '}': - /* Meta character — flush current literal segment */ - if (blen >= 3 && count < max_out) { - buf[blen] = '\0'; - out[count++] = strdup(buf); - } - blen = 0; - i++; - break; - default: - if (blen < (int)sizeof(buf) - 1) { - buf[blen++] = ch; - } - i++; - break; - } - } - /* Flush trailing segment */ - if (blen >= 3 && count < max_out) { - buf[blen] = '\0'; - out[count++] = strdup(buf); - } - return count; -} - -/* ── ensureCaseInsensitive / stripCaseFlag ────────────────────── */ - -const char *cbm_ensure_case_insensitive(const char *pattern) { - static char buf[2048]; - if (!pattern) { - buf[0] = '\0'; - return buf; - } - /* Already has (?i) prefix? Return as-is. */ - if (strncmp(pattern, "(?i)", 4) == 0) { - snprintf(buf, sizeof(buf), "%s", pattern); - } else { - snprintf(buf, sizeof(buf), "(?i)%s", pattern); - } - return buf; -} - -const char *cbm_strip_case_flag(const char *pattern) { - static char buf[2048]; - if (!pattern) { - buf[0] = '\0'; - return buf; - } - if (strncmp(pattern, "(?i)", 4) == 0) { - snprintf(buf, sizeof(buf), "%s", pattern + 4); - } else { - snprintf(buf, sizeof(buf), "%s", pattern); - } - return buf; -} - -int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_search_output_t *out) { - memset(out, 0, sizeof(*out)); - if (!s || !s->db) { - return CBM_STORE_ERR; - } - - /* Build WHERE clauses dynamically */ - char sql[4096]; - char count_sql[4096]; - int bind_idx = 0; - - /* We build a query that selects nodes with optional degree subqueries */ - const char *select_cols = - "SELECT n.id, n.project, n.label, n.name, n.qualified_name, " - "n.file_path, n.start_line, n.end_line, n.properties, " - "(SELECT COUNT(*) FROM edges e WHERE e.target_id = n.id AND e.type = 'CALLS') AS in_deg, " - "(SELECT COUNT(*) FROM edges e WHERE e.source_id = n.id AND e.type = 'CALLS') AS out_deg "; - - /* Start building WHERE */ - char where[2048] = ""; - int wlen = 0; - int nparams = 0; - - /* Track bind values */ - struct { - enum { BV_TEXT } type; - const char *text; - } binds[16]; - -#define ADD_WHERE(cond) \ - do { \ - if (nparams > 0) \ - wlen += snprintf(where + wlen, sizeof(where) - wlen, " AND "); \ - wlen += snprintf(where + wlen, sizeof(where) - wlen, "%s", cond); \ - nparams++; \ - } while (0) - -#define BIND_TEXT(val) \ - do { \ - bind_idx++; \ - binds[bind_idx - 1].type = BV_TEXT; \ - binds[bind_idx - 1].text = val; \ - } while (0) - - char bind_buf[64]; - char *like_pattern = NULL; - - if (params->project) { - snprintf(bind_buf, sizeof(bind_buf), "n.project = ?%d", bind_idx + 1); - ADD_WHERE(bind_buf); - BIND_TEXT(params->project); - } - if (params->label) { - snprintf(bind_buf, sizeof(bind_buf), "n.label = ?%d", bind_idx + 1); - ADD_WHERE(bind_buf); - BIND_TEXT(params->label); - } - if (params->name_pattern) { - if (params->case_sensitive) { - /* Case-sensitive: use built-in REGEXP operator */ - snprintf(bind_buf, sizeof(bind_buf), "n.name REGEXP ?%d", bind_idx + 1); - } else { - /* Case-insensitive: use iregexp() function call syntax */ - snprintf(bind_buf, sizeof(bind_buf), "iregexp(?%d, n.name)", bind_idx + 1); - } - ADD_WHERE(bind_buf); - BIND_TEXT(params->name_pattern); - } - if (params->file_pattern) { - like_pattern = cbm_glob_to_like(params->file_pattern); - snprintf(bind_buf, sizeof(bind_buf), "n.file_path LIKE ?%d", bind_idx + 1); - ADD_WHERE(bind_buf); - BIND_TEXT(like_pattern); - } - - /* Exclude labels: add NOT IN clause directly (no bind params — values are code-provided) */ - if (params->exclude_labels) { - char excl_clause[512] = "n.label NOT IN ("; - int elen = (int)strlen(excl_clause); - for (int i = 0; params->exclude_labels[i]; i++) { - if (i > 0) { - elen += snprintf(excl_clause + elen, sizeof(excl_clause) - (size_t)elen, ","); - if (elen >= (int)sizeof(excl_clause)) { - elen = (int)sizeof(excl_clause) - 1; - } - } - elen += snprintf(excl_clause + elen, sizeof(excl_clause) - (size_t)elen, "'%s'", - params->exclude_labels[i]); - if (elen >= (int)sizeof(excl_clause)) { - elen = (int)sizeof(excl_clause) - 1; - } - } - snprintf(excl_clause + elen, sizeof(excl_clause) - (size_t)elen, ")"); - ADD_WHERE(excl_clause); - } - - /* Build full SQL */ - if (nparams > 0) { - snprintf(sql, sizeof(sql), "%s FROM nodes n WHERE %s", select_cols, where); - } else { - snprintf(sql, sizeof(sql), "%s FROM nodes n", select_cols); - } - - /* Degree filters: -1 = no filter, 0+ = active filter. - * Wraps in subquery to filter on computed degree columns. */ - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - bool has_degree_filter = (params->min_degree >= 0 || params->max_degree >= 0); - if (has_degree_filter) { - char inner_sql[4096]; - snprintf(inner_sql, sizeof(inner_sql), "%s", sql); - if (params->min_degree >= 0 && params->max_degree >= 0) { - snprintf( - sql, sizeof(sql), - "SELECT * FROM (%s) WHERE (in_deg + out_deg) >= %d AND (in_deg + out_deg) <= %d", - inner_sql, params->min_degree, params->max_degree); - } else if (params->min_degree >= 0) { - snprintf(sql, sizeof(sql), "SELECT * FROM (%s) WHERE (in_deg + out_deg) >= %d", - inner_sql, params->min_degree); - } else { - snprintf(sql, sizeof(sql), "SELECT * FROM (%s) WHERE (in_deg + out_deg) <= %d", - inner_sql, params->max_degree); - } - } - - /* Count query (wrap the full query) */ - snprintf(count_sql, sizeof(count_sql), "SELECT COUNT(*) FROM (%s)", sql); - - /* Add ORDER BY + LIMIT. - * When degree filter wraps in subquery, column refs lose the "n." prefix. */ - int limit = params->limit > 0 ? params->limit : 500000; - int offset = params->offset; - bool has_degree_wrap = has_degree_filter; - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - const char *name_col = has_degree_wrap ? "name" : "n.name"; - char order_limit[128]; - snprintf(order_limit, sizeof(order_limit), " ORDER BY %s LIMIT %d OFFSET %d", name_col, limit, - offset); - strncat(sql, order_limit, sizeof(sql) - strlen(sql) - 1); - - /* Execute count query */ - sqlite3_stmt *cnt_stmt = NULL; - int rc = sqlite3_prepare_v2(s->db, count_sql, -1, &cnt_stmt, NULL); - if (rc == SQLITE_OK) { - for (int i = 0; i < bind_idx; i++) { - bind_text(cnt_stmt, i + 1, binds[i].text); - } - if (sqlite3_step(cnt_stmt) == SQLITE_ROW) { - out->total = sqlite3_column_int(cnt_stmt, 0); - } - sqlite3_finalize(cnt_stmt); - } - - /* Execute main query */ - sqlite3_stmt *main_stmt = NULL; - rc = sqlite3_prepare_v2(s->db, sql, -1, &main_stmt, NULL); - if (rc != SQLITE_OK) { - store_set_error_sqlite(s, "search prepare"); - free(like_pattern); - return CBM_STORE_ERR; - } - - for (int i = 0; i < bind_idx; i++) { - bind_text(main_stmt, i + 1, binds[i].text); - } - - int cap = 16; - int n = 0; - cbm_search_result_t *results = malloc(cap * sizeof(cbm_search_result_t)); - - while (sqlite3_step(main_stmt) == SQLITE_ROW) { - if (n >= cap) { - cap *= 2; - results = safe_realloc(results, cap * sizeof(cbm_search_result_t)); - } - memset(&results[n], 0, sizeof(cbm_search_result_t)); - scan_node(main_stmt, &results[n].node); - results[n].in_degree = sqlite3_column_int(main_stmt, 9); - results[n].out_degree = sqlite3_column_int(main_stmt, 10); - n++; - } - - sqlite3_finalize(main_stmt); - free(like_pattern); - - out->results = results; - out->count = n; - return CBM_STORE_OK; -} - -void cbm_store_search_free(cbm_search_output_t *out) { - if (!out) { - return; - } - for (int i = 0; i < out->count; i++) { - cbm_search_result_t *r = &out->results[i]; - free((void *)r->node.project); - free((void *)r->node.label); - free((void *)r->node.name); - free((void *)r->node.qualified_name); - free((void *)r->node.file_path); - free((void *)r->node.properties_json); - for (int j = 0; j < r->connected_count; j++) { - free((void *)r->connected_names[j]); - } - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(r->connected_names); - } - free(out->results); - memset(out, 0, sizeof(*out)); -} - -/* ── BFS Traversal ──────────────────────────────────────────────── */ - -int cbm_store_bfs(cbm_store_t *s, int64_t start_id, const char *direction, const char **edge_types, - // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) - int edge_type_count, int max_depth, int max_results, cbm_traverse_result_t *out) { - memset(out, 0, sizeof(*out)); - - /* Load root node */ - cbm_node_t root = {0}; - int rc = cbm_store_find_node_by_id(s, start_id, &root); - if (rc != CBM_STORE_OK) { - return rc; - } - out->root = root; - - /* Build edge type IN clause */ - char types_clause[512] = "'CALLS'"; - if (edge_type_count > 0) { - int tlen = 0; - for (int i = 0; i < edge_type_count; i++) { - if (i > 0) { - tlen += snprintf(types_clause + tlen, sizeof(types_clause) - (size_t)tlen, ","); - if (tlen >= (int)sizeof(types_clause)) { - tlen = (int)sizeof(types_clause) - 1; - } - } - tlen += snprintf(types_clause + tlen, sizeof(types_clause) - (size_t)tlen, "'%s'", - edge_types[i]); - if (tlen >= (int)sizeof(types_clause)) { - tlen = (int)sizeof(types_clause) - 1; - } - } - } - - /* Build recursive CTE for BFS */ - char sql[4096]; - const char *join_cond; - const char *next_id; - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - bool is_inbound = direction && strcmp(direction, "inbound") == 0; - - if (is_inbound) { - join_cond = "e.target_id = bfs.node_id"; - next_id = "e.source_id"; - } else { - join_cond = "e.source_id = bfs.node_id"; - next_id = "e.target_id"; - } - - snprintf(sql, sizeof(sql), - "WITH RECURSIVE bfs(node_id, hop) AS (" - " SELECT %lld, 0" - " UNION" - " SELECT %s, bfs.hop + 1" - " FROM bfs" - " JOIN edges e ON %s" - " WHERE e.type IN (%s) AND bfs.hop < %d" - ")" - "SELECT DISTINCT n.id, n.project, n.label, n.name, n.qualified_name, " - "n.file_path, n.start_line, n.end_line, n.properties, bfs.hop " - "FROM bfs " - "JOIN nodes n ON n.id = bfs.node_id " - "WHERE bfs.hop > 0 " /* exclude root */ - "ORDER BY bfs.hop " - "LIMIT %d;", - (long long)start_id, next_id, join_cond, types_clause, max_depth, max_results); - - sqlite3_stmt *stmt = NULL; - rc = sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL); - if (rc != SQLITE_OK) { - store_set_error_sqlite(s, "bfs prepare"); - return CBM_STORE_ERR; - } - - int cap = 16; - int n = 0; - cbm_node_hop_t *visited = malloc(cap * sizeof(cbm_node_hop_t)); - - while (sqlite3_step(stmt) == SQLITE_ROW) { - if (n >= cap) { - cap *= 2; - visited = safe_realloc(visited, cap * sizeof(cbm_node_hop_t)); - } - scan_node(stmt, &visited[n].node); - visited[n].hop = sqlite3_column_int(stmt, 9); - n++; - } - - sqlite3_finalize(stmt); - - out->visited = visited; - out->visited_count = n; - - /* Collect edges between visited nodes (including root) */ - if (n > 0) { - /* Build ID set: root + all visited */ - char id_set[4096]; - int ilen = snprintf(id_set, sizeof(id_set), "%lld", (long long)start_id); - if (ilen >= (int)sizeof(id_set)) { - ilen = (int)sizeof(id_set) - 1; - } - for (int i = 0; i < n; i++) { - ilen += snprintf(id_set + ilen, sizeof(id_set) - (size_t)ilen, ",%lld", - (long long)out->visited[i].node.id); - if (ilen >= (int)sizeof(id_set)) { - ilen = (int)sizeof(id_set) - 1; - } - } - - char edge_sql[8192]; - snprintf(edge_sql, sizeof(edge_sql), - "SELECT n1.name, n2.name, e.type " - "FROM edges e " - "JOIN nodes n1 ON n1.id = e.source_id " - "JOIN nodes n2 ON n2.id = e.target_id " - "WHERE e.source_id IN (%s) AND e.target_id IN (%s) " - "AND e.type IN (%s)", - id_set, id_set, types_clause); - - sqlite3_stmt *estmt = NULL; - rc = sqlite3_prepare_v2(s->db, edge_sql, -1, &estmt, NULL); - if (rc == SQLITE_OK) { - int ecap = 8; - int en = 0; - cbm_edge_info_t *edges = malloc(ecap * sizeof(cbm_edge_info_t)); - - while (sqlite3_step(estmt) == SQLITE_ROW) { - if (en >= ecap) { - ecap *= 2; - edges = safe_realloc(edges, ecap * sizeof(cbm_edge_info_t)); - } - edges[en].from_name = heap_strdup((const char *)sqlite3_column_text(estmt, 0)); - edges[en].to_name = heap_strdup((const char *)sqlite3_column_text(estmt, 1)); - edges[en].type = heap_strdup((const char *)sqlite3_column_text(estmt, 2)); - edges[en].confidence = 1.0; - en++; - } - sqlite3_finalize(estmt); - - out->edges = edges; - out->edge_count = en; - } - } else { - out->edges = NULL; - out->edge_count = 0; - } - - return CBM_STORE_OK; -} - -void cbm_store_traverse_free(cbm_traverse_result_t *out) { - if (!out) { - return; - } - /* Free root */ - free((void *)out->root.project); - free((void *)out->root.label); - free((void *)out->root.name); - free((void *)out->root.qualified_name); - free((void *)out->root.file_path); - free((void *)out->root.properties_json); - - /* Free visited */ - for (int i = 0; i < out->visited_count; i++) { - cbm_node_hop_t *h = &out->visited[i]; - free((void *)h->node.project); - free((void *)h->node.label); - free((void *)h->node.name); - free((void *)h->node.qualified_name); - free((void *)h->node.file_path); - free((void *)h->node.properties_json); - } - free(out->visited); - - /* Free edges */ - for (int i = 0; i < out->edge_count; i++) { - free((void *)out->edges[i].from_name); - free((void *)out->edges[i].to_name); - free((void *)out->edges[i].type); - } - free(out->edges); - - memset(out, 0, sizeof(*out)); -} - -/* ── Impact analysis ────────────────────────────────────────────── */ - -cbm_risk_level_t cbm_hop_to_risk(int hop) { - switch (hop) { - case 1: - return CBM_RISK_CRITICAL; - case 2: - return CBM_RISK_HIGH; - case 3: - return CBM_RISK_MEDIUM; - default: - return CBM_RISK_LOW; - } -} - -const char *cbm_risk_label(cbm_risk_level_t level) { - switch (level) { - case CBM_RISK_CRITICAL: - return "CRITICAL"; - case CBM_RISK_HIGH: - return "HIGH"; - case CBM_RISK_MEDIUM: - return "MEDIUM"; - case CBM_RISK_LOW: - default: - return "LOW"; - } -} - -cbm_impact_summary_t cbm_build_impact_summary(const cbm_node_hop_t *hops, int hop_count, - const cbm_edge_info_t *edges, int edge_count) { - cbm_impact_summary_t s = {0}; - for (int i = 0; i < hop_count; i++) { - switch (cbm_hop_to_risk(hops[i].hop)) { - case CBM_RISK_CRITICAL: - s.critical++; - break; - case CBM_RISK_HIGH: - s.high++; - break; - case CBM_RISK_MEDIUM: - s.medium++; - break; - case CBM_RISK_LOW: - s.low++; - break; - } - s.total++; - } - for (int i = 0; i < edge_count; i++) { - if (edges[i].type && (strcmp(edges[i].type, "HTTP_CALLS") == 0 || - strcmp(edges[i].type, "ASYNC_CALLS") == 0)) { - s.has_cross_service = true; - break; - } - } - return s; -} - -int cbm_deduplicate_hops(const cbm_node_hop_t *hops, int hop_count, cbm_node_hop_t **out, - int *out_count) { - *out = NULL; - *out_count = 0; - if (hop_count == 0) { - return CBM_STORE_OK; - } - - /* Simple O(n²) dedup — keep minimum hop per node ID */ - cbm_node_hop_t *result = malloc(hop_count * sizeof(cbm_node_hop_t)); - int n = 0; - - for (int i = 0; i < hop_count; i++) { - int found = -1; - for (int j = 0; j < n; j++) { - if (result[j].node.id == hops[i].node.id) { - found = j; - break; - } - } - if (found >= 0) { - if (hops[i].hop < result[found].hop) { - result[found].hop = hops[i].hop; - } - } else { - result[n] = hops[i]; - n++; - } - } - - *out = safe_realloc(result, n * sizeof(cbm_node_hop_t)); - *out_count = n; - return CBM_STORE_OK; -} - -/* ── Schema ─────────────────────────────────────────────────────── */ - -int cbm_store_get_schema(cbm_store_t *s, const char *project, cbm_schema_info_t *out) { - memset(out, 0, sizeof(*out)); - if (!s || !s->db) { - return -1; - } - - /* Node labels */ - { - const char *sql = "SELECT label, COUNT(*) FROM nodes WHERE project = ?1 GROUP BY label " - "ORDER BY COUNT(*) DESC;"; - sqlite3_stmt *stmt = NULL; - sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL); - bind_text(stmt, 1, project); - - int cap = 8; - int n = 0; - cbm_label_count_t *arr = malloc(cap * sizeof(cbm_label_count_t)); - while (sqlite3_step(stmt) == SQLITE_ROW) { - if (n >= cap) { - cap *= 2; - arr = safe_realloc(arr, cap * sizeof(cbm_label_count_t)); - } - arr[n].label = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); - arr[n].count = sqlite3_column_int(stmt, 1); - n++; - } - sqlite3_finalize(stmt); - out->node_labels = arr; - out->node_label_count = n; - } - - /* Edge types */ - { - const char *sql = "SELECT type, COUNT(*) FROM edges WHERE project = ?1 GROUP BY type ORDER " - "BY COUNT(*) DESC;"; - sqlite3_stmt *stmt = NULL; - sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL); - bind_text(stmt, 1, project); - - int cap = 8; - int n = 0; - cbm_type_count_t *arr = malloc(cap * sizeof(cbm_type_count_t)); - while (sqlite3_step(stmt) == SQLITE_ROW) { - if (n >= cap) { - cap *= 2; - arr = safe_realloc(arr, cap * sizeof(cbm_type_count_t)); - } - arr[n].type = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); - arr[n].count = sqlite3_column_int(stmt, 1); - n++; - } - sqlite3_finalize(stmt); - out->edge_types = arr; - out->edge_type_count = n; - } - - return CBM_STORE_OK; -} - -void cbm_store_schema_free(cbm_schema_info_t *out) { - if (!out) { - return; - } - for (int i = 0; i < out->node_label_count; i++) { - free((void *)out->node_labels[i].label); - } - free(out->node_labels); - - for (int i = 0; i < out->edge_type_count; i++) { - free((void *)out->edge_types[i].type); - } - free(out->edge_types); - - for (int i = 0; i < out->rel_pattern_count; i++) { - free((void *)out->rel_patterns[i]); - } - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(out->rel_patterns); - - for (int i = 0; i < out->sample_func_count; i++) { - free((void *)out->sample_func_names[i]); - } - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(out->sample_func_names); - - for (int i = 0; i < out->sample_class_count; i++) { - free((void *)out->sample_class_names[i]); - } - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(out->sample_class_names); - - for (int i = 0; i < out->sample_qn_count; i++) { - free((void *)out->sample_qns[i]); - } - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(out->sample_qns); - - memset(out, 0, sizeof(*out)); -} - -/* ── Architecture helpers ───────────────────────────────────────── */ - -/* Extract sub-package from QN: project.dir1.dir2.sym → dir1 (4+ parts → [2], else [1]) */ -const char *cbm_qn_to_package(const char *qn) { - if (!qn || !qn[0]) { - return ""; - } - static CBM_TLS char buf[256]; - /* Find dots and extract segment */ - const char *dots[5] = {NULL}; - int ndots = 0; - for (const char *p = qn; *p && ndots < 5; p++) { - if (*p == '.') { - dots[ndots++] = p; - } - } - /* 4+ segments: return segment[2] */ - if (ndots >= 3) { - const char *start = dots[1] + 1; - int len = (int)(dots[2] - start); - if (len > 0 && len < (int)sizeof(buf)) { - memcpy(buf, start, len); - buf[len] = '\0'; - return buf; - } - } - /* 2+ segments: return segment[1] */ - if (ndots >= 1) { - const char *start = dots[0] + 1; - const char *end = (ndots >= 2) ? dots[1] : qn + strlen(qn); - int len = (int)(end - start); - if (len > 0 && len < (int)sizeof(buf)) { - memcpy(buf, start, len); - buf[len] = '\0'; - return buf; - } - } - return ""; -} - -/* Extract top-level package from QN: project.dir1.rest → dir1 (segment[1]) */ -const char *cbm_qn_to_top_package(const char *qn) { - if (!qn || !qn[0]) { - return ""; - } - static CBM_TLS char buf[256]; - const char *first_dot = strchr(qn, '.'); - if (!first_dot) { - return ""; - } - const char *start = first_dot + 1; - const char *second_dot = strchr(start, '.'); - const char *end = second_dot ? second_dot : qn + strlen(qn); - int len = (int)(end - start); - if (len > 0 && len < (int)sizeof(buf)) { - memcpy(buf, start, len); - buf[len] = '\0'; - return buf; - } - return ""; -} - -bool cbm_is_test_file_path(const char *fp) { - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - return fp && fp[0] && strstr(fp, "test") != NULL; -} - -/* File extension → language name mapping */ -static const char *ext_to_lang(const char *ext) { - if (!ext) { - return NULL; - } - /* Common extensions */ - if (strcmp(ext, ".py") == 0) { - return "Python"; - } - if (strcmp(ext, ".go") == 0) { - return "Go"; - } - if (strcmp(ext, ".js") == 0 || strcmp(ext, ".jsx") == 0) { - return "JavaScript"; - } - if (strcmp(ext, ".ts") == 0 || strcmp(ext, ".tsx") == 0) { - return "TypeScript"; - } - if (strcmp(ext, ".rs") == 0) { - return "Rust"; - } - if (strcmp(ext, ".java") == 0) { - return "Java"; - } - if (strcmp(ext, ".cpp") == 0 || strcmp(ext, ".cc") == 0 || strcmp(ext, ".cxx") == 0) { - return "C++"; - } - if (strcmp(ext, ".c") == 0 || strcmp(ext, ".h") == 0) { - return "C"; - } - if (strcmp(ext, ".cs") == 0) { - return "C#"; - } - if (strcmp(ext, ".php") == 0) { - return "PHP"; - } - if (strcmp(ext, ".lua") == 0) { - return "Lua"; - } - if (strcmp(ext, ".scala") == 0) { - return "Scala"; - } - if (strcmp(ext, ".kt") == 0) { - return "Kotlin"; - } - if (strcmp(ext, ".rb") == 0) { - return "Ruby"; - } - if (strcmp(ext, ".sh") == 0 || strcmp(ext, ".bash") == 0) { - return "Bash"; - } - if (strcmp(ext, ".zig") == 0) { - return "Zig"; - } - if (strcmp(ext, ".ex") == 0 || strcmp(ext, ".exs") == 0) { - return "Elixir"; - } - if (strcmp(ext, ".hs") == 0) { - return "Haskell"; - } - if (strcmp(ext, ".ml") == 0 || strcmp(ext, ".mli") == 0) { - return "OCaml"; - } - if (strcmp(ext, ".html") == 0) { - return "HTML"; - } - if (strcmp(ext, ".css") == 0) { - return "CSS"; - } - if (strcmp(ext, ".yaml") == 0 || strcmp(ext, ".yml") == 0) { - return "YAML"; - } - if (strcmp(ext, ".toml") == 0) { - return "TOML"; - } - if (strcmp(ext, ".hcl") == 0 || strcmp(ext, ".tf") == 0) { - return "HCL"; - } - if (strcmp(ext, ".sql") == 0) { - return "SQL"; - } - if (strcmp(ext, ".erl") == 0) { - return "Erlang"; - } - if (strcmp(ext, ".swift") == 0) { - return "Swift"; - } - if (strcmp(ext, ".dart") == 0) { - return "Dart"; - } - if (strcmp(ext, ".groovy") == 0) { - return "Groovy"; - } - if (strcmp(ext, ".pl") == 0) { - return "Perl"; - } - if (strcmp(ext, ".r") == 0) { - return "R"; - } - if (strcmp(ext, ".scss") == 0) { - return "SCSS"; - } - if (strcmp(ext, ".vue") == 0) { - return "Vue"; - } - if (strcmp(ext, ".svelte") == 0) { - return "Svelte"; - } - return NULL; -} - -/* Get lowercase file extension from path */ -static const char *file_ext(const char *path) { - if (!path) { - return NULL; - } - const char *dot = strrchr(path, '.'); - if (!dot) { - return NULL; - } - static CBM_TLS char buf[16]; - int len = (int)strlen(dot); - if (len >= (int)sizeof(buf)) { - return NULL; - } - for (int i = 0; i < len; i++) { - buf[i] = (char)((dot[i] >= 'A' && dot[i] <= 'Z') ? dot[i] + 32 : dot[i]); - } - buf[len] = '\0'; - return buf; -} - -/* ── Architecture aspect implementations ───────────────────────── */ - -static int arch_languages(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { - const char *sql = "SELECT file_path FROM nodes WHERE project=?1 AND label='File'"; - sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { - store_set_error_sqlite(s, "arch_languages"); - return CBM_STORE_ERR; - } - bind_text(stmt, 1, project); - - /* Count per language using a simple parallel array */ - const char *lang_names[64]; - int lang_counts[64]; - int nlang = 0; - - while (sqlite3_step(stmt) == SQLITE_ROW) { - const char *fp = (const char *)sqlite3_column_text(stmt, 0); - const char *ext = file_ext(fp); - const char *lang = ext_to_lang(ext); - if (!lang) { - continue; - } - int found = -1; - for (int i = 0; i < nlang; i++) { - if (strcmp(lang_names[i], lang) == 0) { - found = i; - break; - } - } - if (found >= 0) { - lang_counts[found]++; - } else if (nlang < 64) { - lang_names[nlang] = lang; - lang_counts[nlang] = 1; - nlang++; - } - } - sqlite3_finalize(stmt); - - /* Sort by count descending (simple insertion sort) */ - for (int i = 1; i < nlang; i++) { - int j = i; - while (j > 0 && lang_counts[j] > lang_counts[j - 1]) { - int tc = lang_counts[j]; - lang_counts[j] = lang_counts[j - 1]; - lang_counts[j - 1] = tc; - const char *tn = lang_names[j]; - lang_names[j] = lang_names[j - 1]; - lang_names[j - 1] = tn; - j--; - } - } - if (nlang > 10) { - nlang = 10; - } - - // NOLINTNEXTLINE(clang-analyzer-optin.portability.UnixAPI) - out->languages = calloc(nlang, sizeof(cbm_language_count_t)); - out->language_count = nlang; - for (int i = 0; i < nlang; i++) { - out->languages[i].language = heap_strdup(lang_names[i]); - out->languages[i].file_count = lang_counts[i]; - } - return CBM_STORE_OK; -} - -static int arch_entry_points(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { - const char *sql = "SELECT name, qualified_name, file_path FROM nodes " - "WHERE project=?1 AND json_extract(properties, '$.is_entry_point') = 1 " - "AND (json_extract(properties, '$.is_test') IS NULL OR " - "json_extract(properties, '$.is_test') != 1) " - "AND file_path NOT LIKE '%test%' LIMIT 20"; - sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { - store_set_error_sqlite(s, "arch_entry_points"); - return CBM_STORE_ERR; - } - bind_text(stmt, 1, project); - - int cap = 8; - int n = 0; - cbm_entry_point_t *arr = calloc(cap, sizeof(cbm_entry_point_t)); - while (sqlite3_step(stmt) == SQLITE_ROW) { - if (n >= cap) { - cap *= 2; - arr = safe_realloc(arr, cap * sizeof(cbm_entry_point_t)); - } - arr[n].name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); - arr[n].qualified_name = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); - arr[n].file = heap_strdup((const char *)sqlite3_column_text(stmt, 2)); - n++; - } - sqlite3_finalize(stmt); - out->entry_points = arr; - out->entry_point_count = n; - return CBM_STORE_OK; -} - -static int arch_routes(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { - const char *sql = "SELECT name, properties, COALESCE(file_path, '') FROM nodes " - "WHERE project=?1 AND label='Route' " - "AND (json_extract(properties, '$.is_test') IS NULL OR " - "json_extract(properties, '$.is_test') != 1) " - "LIMIT 20"; - sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { - store_set_error_sqlite(s, "arch_routes"); - return CBM_STORE_ERR; - } - bind_text(stmt, 1, project); - - int cap = 8; - int n = 0; - cbm_route_info_t *arr = calloc(cap, sizeof(cbm_route_info_t)); - while (sqlite3_step(stmt) == SQLITE_ROW) { - const char *name = (const char *)sqlite3_column_text(stmt, 0); - const char *props = (const char *)sqlite3_column_text(stmt, 1); - const char *fp = (const char *)sqlite3_column_text(stmt, 2); - if (cbm_is_test_file_path(fp)) { - continue; - } - if (n >= cap) { - cap *= 2; - arr = safe_realloc(arr, cap * sizeof(cbm_route_info_t)); - } - - /* Parse JSON properties for method, path, handler */ - arr[n].method = heap_strdup(""); - arr[n].path = heap_strdup(name); - arr[n].handler = heap_strdup(""); - - if (props) { - /* Simple JSON extraction — find "method":"...", "path":"...", "handler":"..." */ - const char *m; - char vbuf[256]; - m = strstr(props, "\"method\""); - if (m) { - m = strchr(m + 8, '"'); - if (m) { - m++; - const char *end = strchr(m, '"'); - if (end && end - m < (int)sizeof(vbuf)) { - memcpy(vbuf, m, end - m); - vbuf[end - m] = '\0'; - free((void *)arr[n].method); - arr[n].method = heap_strdup(vbuf); - } - } - } - m = strstr(props, "\"path\""); - if (m) { - m = strchr(m + 6, '"'); - if (m) { - m++; - const char *end = strchr(m, '"'); - if (end && end - m < (int)sizeof(vbuf)) { - memcpy(vbuf, m, end - m); - vbuf[end - m] = '\0'; - free((void *)arr[n].path); - arr[n].path = heap_strdup(vbuf); - } - } - } - m = strstr(props, "\"handler\""); - if (m) { - m = strchr(m + 9, '"'); - if (m) { - m++; - const char *end = strchr(m, '"'); - if (end && end - m < (int)sizeof(vbuf)) { - memcpy(vbuf, m, end - m); - vbuf[end - m] = '\0'; - free((void *)arr[n].handler); - arr[n].handler = heap_strdup(vbuf); - } - } - } - } - n++; - } - sqlite3_finalize(stmt); - out->routes = arr; - out->route_count = n; - return CBM_STORE_OK; -} - -static int arch_hotspots(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { - const char *sql = "SELECT n.name, n.qualified_name, COUNT(*) as fan_in " - "FROM nodes n JOIN edges e ON e.target_id = n.id AND e.type = 'CALLS' " - "WHERE n.project=?1 AND n.label IN ('Function', 'Method') " - "AND (json_extract(n.properties, '$.is_test') IS NULL OR " - "json_extract(n.properties, '$.is_test') != 1) " - "AND n.file_path NOT LIKE '%test%' " - "GROUP BY n.id ORDER BY fan_in DESC LIMIT 10"; - sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { - store_set_error_sqlite(s, "arch_hotspots"); - return CBM_STORE_ERR; - } - bind_text(stmt, 1, project); - - int cap = 8; - int n = 0; - cbm_hotspot_t *arr = calloc(cap, sizeof(cbm_hotspot_t)); - while (sqlite3_step(stmt) == SQLITE_ROW) { - if (n >= cap) { - cap *= 2; - arr = safe_realloc(arr, cap * sizeof(cbm_hotspot_t)); - } - arr[n].name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); - arr[n].qualified_name = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); - arr[n].fan_in = sqlite3_column_int(stmt, 2); - n++; - } - sqlite3_finalize(stmt); - out->hotspots = arr; - out->hotspot_count = n; - return CBM_STORE_OK; -} - -static int arch_boundaries(cbm_store_t *s, const char *project, cbm_cross_pkg_boundary_t **out_arr, - int *out_count) { - /* Build nodeID → package map */ - const char *nsql = "SELECT id, qualified_name FROM nodes WHERE project=?1 AND label IN " - "('Function','Method','Class')"; - sqlite3_stmt *nstmt = NULL; - if (sqlite3_prepare_v2(s->db, nsql, -1, &nstmt, NULL) != SQLITE_OK) { - store_set_error_sqlite(s, "arch_boundaries_nodes"); - return CBM_STORE_ERR; - } - bind_text(nstmt, 1, project); - - /* Simple parallel arrays for node → package mapping */ - int ncap = 256; - int nn = 0; - int64_t *nids = malloc(ncap * sizeof(int64_t)); - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - char **npkgs = malloc(ncap * sizeof(char *)); - - while (sqlite3_step(nstmt) == SQLITE_ROW) { - if (nn >= ncap) { - ncap *= 2; - nids = safe_realloc(nids, ncap * sizeof(int64_t)); - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - npkgs = safe_realloc(npkgs, ncap * sizeof(char *)); - } - nids[nn] = sqlite3_column_int64(nstmt, 0); - const char *qn = (const char *)sqlite3_column_text(nstmt, 1); - npkgs[nn] = heap_strdup(cbm_qn_to_package(qn)); - nn++; - } - sqlite3_finalize(nstmt); - - /* Scan edges, count cross-package calls */ - const char *esql = "SELECT source_id, target_id FROM edges WHERE project=?1 AND type='CALLS'"; - sqlite3_stmt *estmt = NULL; - if (sqlite3_prepare_v2(s->db, esql, -1, &estmt, NULL) != SQLITE_OK) { - for (int i = 0; i < nn; i++) { - free(npkgs[i]); - } - free(nids); - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(npkgs); - store_set_error_sqlite(s, "arch_boundaries_edges"); - return CBM_STORE_ERR; - } - bind_text(estmt, 1, project); - - /* Boundary counts: parallel arrays for from→to→count */ - int bcap = 32; - int bn = 0; - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - char **bfroms = malloc(bcap * sizeof(char *)); - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - char **btos = malloc(bcap * sizeof(char *)); - int *bcounts = malloc(bcap * sizeof(int)); - - while (sqlite3_step(estmt) == SQLITE_ROW) { - int64_t src_id = sqlite3_column_int64(estmt, 0); - int64_t tgt_id = sqlite3_column_int64(estmt, 1); - const char *src_pkg = NULL; - const char *tgt_pkg = NULL; - for (int i = 0; i < nn; i++) { - if (nids[i] == src_id) { - src_pkg = npkgs[i]; - } - if (nids[i] == tgt_id) { - tgt_pkg = npkgs[i]; - } - } - if (!src_pkg || !tgt_pkg || !src_pkg[0] || !tgt_pkg[0] || strcmp(src_pkg, tgt_pkg) == 0) { - continue; - } - - int found = -1; - for (int i = 0; i < bn; i++) { - if (strcmp(bfroms[i], src_pkg) == 0 && strcmp(btos[i], tgt_pkg) == 0) { - found = i; - break; - } - } - if (found >= 0) { - bcounts[found]++; - } else if (bn < bcap) { - bfroms[bn] = heap_strdup(src_pkg); - btos[bn] = heap_strdup(tgt_pkg); - bcounts[bn] = 1; - bn++; - } - } - sqlite3_finalize(estmt); - for (int i = 0; i < nn; i++) { - free(npkgs[i]); - } - free(nids); - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(npkgs); - - /* Sort by count descending */ - for (int i = 1; i < bn; i++) { - int j = i; - while (j > 0 && bcounts[j] > bcounts[j - 1]) { - int tc = bcounts[j]; - bcounts[j] = bcounts[j - 1]; - bcounts[j - 1] = tc; - char *tf = bfroms[j]; - bfroms[j] = bfroms[j - 1]; - bfroms[j - 1] = tf; - char *tt = btos[j]; - btos[j] = btos[j - 1]; - btos[j - 1] = tt; - j--; - } - } - if (bn > 10) { - for (int i = 10; i < bn; i++) { - free(bfroms[i]); - free(btos[i]); - } - bn = 10; - } - - // NOLINTNEXTLINE(clang-analyzer-optin.portability.UnixAPI) - cbm_cross_pkg_boundary_t *result = calloc(bn, sizeof(cbm_cross_pkg_boundary_t)); - for (int i = 0; i < bn; i++) { - result[i].from = bfroms[i]; - result[i].to = btos[i]; - result[i].call_count = bcounts[i]; - } - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(bfroms); - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(btos); - free(bcounts); - *out_arr = result; - *out_count = bn; - return CBM_STORE_OK; -} - -static int arch_packages(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { - /* Try Package nodes first */ - const char *sql = - "SELECT n.name, COUNT(*) as cnt FROM nodes n " - "WHERE n.project=?1 AND n.label='Package' GROUP BY n.name ORDER BY cnt DESC LIMIT 15"; - sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { - store_set_error_sqlite(s, "arch_packages"); - return CBM_STORE_ERR; - } - bind_text(stmt, 1, project); - - int cap = 16; - int n = 0; - cbm_package_summary_t *arr = calloc(cap, sizeof(cbm_package_summary_t)); - while (sqlite3_step(stmt) == SQLITE_ROW) { - if (n >= cap) { - cap *= 2; - arr = safe_realloc(arr, cap * sizeof(cbm_package_summary_t)); - } - arr[n].name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); - arr[n].node_count = sqlite3_column_int(stmt, 1); - n++; - } - sqlite3_finalize(stmt); - - /* Fallback: group by QN segment if no Package nodes */ - if (n == 0) { - free(arr); - const char *qsql = "SELECT qualified_name FROM nodes WHERE project=?1 AND label IN " - "('Function','Method','Class')"; - if (sqlite3_prepare_v2(s->db, qsql, -1, &stmt, NULL) != SQLITE_OK) { - store_set_error_sqlite(s, "arch_packages_qn"); - return CBM_STORE_ERR; - } - bind_text(stmt, 1, project); - - /* Count per package using parallel arrays */ - char *pnames[64]; - int pcounts[64]; - int np = 0; - while (sqlite3_step(stmt) == SQLITE_ROW) { - const char *qn = (const char *)sqlite3_column_text(stmt, 0); - const char *pkg = cbm_qn_to_package(qn); - if (!pkg[0]) { - continue; - } - int found = -1; - for (int i = 0; i < np; i++) { - if (strcmp(pnames[i], pkg) == 0) { - found = i; - break; - } - } - if (found >= 0) { - { - pcounts[found]++; - } - } else if (np < 64) { - pnames[np] = heap_strdup(pkg); - pcounts[np] = 1; - np++; - } - } - sqlite3_finalize(stmt); - - /* Sort by count desc */ - for (int i = 1; i < np; i++) { - int j = i; - while (j > 0 && pcounts[j] > pcounts[j - 1]) { - int tc = pcounts[j]; - pcounts[j] = pcounts[j - 1]; - pcounts[j - 1] = tc; - char *tn = pnames[j]; - pnames[j] = pnames[j - 1]; - pnames[j - 1] = tn; - j--; - } - } -#define MAX_PREVIEW_NAMES 15 - if (np > MAX_PREVIEW_NAMES) { - for (int i = MAX_PREVIEW_NAMES; i < np; i++) { - free(pnames[i]); - } - np = MAX_PREVIEW_NAMES; - } - - // NOLINTNEXTLINE(clang-analyzer-optin.portability.UnixAPI) - arr = calloc(np, sizeof(cbm_package_summary_t)); - n = np; - for (int i = 0; i < np; i++) { - arr[i].name = pnames[i]; - arr[i].node_count = pcounts[i]; - } - } - - out->packages = arr; - out->package_count = n; - return CBM_STORE_OK; -} - -static void classify_layer(const char *pkg, int in, int out_deg, bool has_routes, - // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) - bool has_entry_points, const char **layer, const char **reason) { - static CBM_TLS char reason_buf[128]; - if (has_entry_points && out_deg > 0 && in == 0) { - *layer = "entry"; - *reason = "has entry points, only outbound calls"; - return; - } - if (has_routes) { - *layer = "api"; - *reason = "has HTTP route definitions"; - return; - } - if (in > out_deg && in > 3) { - snprintf(reason_buf, sizeof(reason_buf), "high fan-in (%d in, %d out)", in, out_deg); - *layer = "core"; - *reason = reason_buf; - return; - } - if (out_deg == 0 && in > 0) { - *layer = "leaf"; - *reason = "only inbound calls, no outbound"; - return; - } - if (in == 0 && out_deg > 0) { - *layer = "entry"; - *reason = "only outbound calls"; - return; - } - snprintf(reason_buf, sizeof(reason_buf), "fan-in=%d, fan-out=%d", in, out_deg); - *layer = "internal"; - *reason = reason_buf; - (void)pkg; -} - -static int arch_layers(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { - /* Get boundaries for fan analysis */ - cbm_cross_pkg_boundary_t *boundaries = NULL; - int bcount = 0; - int rc = arch_boundaries(s, project, &boundaries, &bcount); - if (rc != CBM_STORE_OK) { - return rc; - } - - /* Check which packages have Route nodes */ - char *route_pkgs[32]; - int nrpkgs = 0; - { - const char *sql = "SELECT qualified_name FROM nodes WHERE project=?1 AND label='Route'"; - sqlite3_stmt *stmt = NULL; - sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL); - bind_text(stmt, 1, project); - while (sqlite3_step(stmt) == SQLITE_ROW && nrpkgs < 32) { - const char *qn = (const char *)sqlite3_column_text(stmt, 0); - route_pkgs[nrpkgs++] = heap_strdup(cbm_qn_to_package(qn)); - } - sqlite3_finalize(stmt); - } - - /* Check which packages have entry points */ - char *entry_pkgs[32]; - int nepkgs = 0; - { - const char *sql = "SELECT qualified_name FROM nodes WHERE project=?1 AND " - "json_extract(properties, '$.is_entry_point') = 1"; - sqlite3_stmt *stmt = NULL; - sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL); - bind_text(stmt, 1, project); - while (sqlite3_step(stmt) == SQLITE_ROW && nepkgs < 32) { - const char *qn = (const char *)sqlite3_column_text(stmt, 0); - entry_pkgs[nepkgs++] = heap_strdup(cbm_qn_to_package(qn)); - } - sqlite3_finalize(stmt); - } - - /* Compute fan-in/out per package */ - char *all_pkgs[64]; - int fan_in[64]; - int fan_out[64]; - int npkgs = 0; - memset(fan_in, 0, sizeof(fan_in)); - memset(fan_out, 0, sizeof(fan_out)); - - for (int i = 0; i < bcount; i++) { - /* Add or find "from" package */ - int fi = -1; - for (int j = 0; j < npkgs; j++) { - if (strcmp(all_pkgs[j], boundaries[i].from) == 0) { - fi = j; - break; - } - } - if (fi < 0 && npkgs < 64) { - fi = npkgs; - all_pkgs[npkgs] = heap_strdup(boundaries[i].from); - npkgs++; - } - if (fi >= 0) { - fan_out[fi] += boundaries[i].call_count; - } - - int ti = -1; - for (int j = 0; j < npkgs; j++) { - if (strcmp(all_pkgs[j], boundaries[i].to) == 0) { - ti = j; - break; - } - } - if (ti < 0 && npkgs < 64) { - ti = npkgs; - all_pkgs[npkgs] = heap_strdup(boundaries[i].to); - npkgs++; - } - if (ti >= 0) { - fan_in[ti] += boundaries[i].call_count; - } - } - - /* Also include route/entry packages */ - for (int i = 0; i < nrpkgs; i++) { - int found = -1; - for (int j = 0; j < npkgs; j++) { - if (strcmp(all_pkgs[j], route_pkgs[i]) == 0) { - found = j; - break; - } - } - if (found < 0 && npkgs < 64) { - all_pkgs[npkgs] = heap_strdup(route_pkgs[i]); - npkgs++; - } - } - for (int i = 0; i < nepkgs; i++) { - int found = -1; - for (int j = 0; j < npkgs; j++) { - if (strcmp(all_pkgs[j], entry_pkgs[i]) == 0) { - found = j; - break; - } - } - if (found < 0 && npkgs < 64) { - all_pkgs[npkgs] = heap_strdup(entry_pkgs[i]); - npkgs++; - } - } - - /* Classify each package */ - // NOLINTNEXTLINE(clang-analyzer-optin.portability.UnixAPI) - out->layers = calloc(npkgs, sizeof(cbm_package_layer_t)); - out->layer_count = npkgs; - for (int i = 0; i < npkgs; i++) { - bool has_route = false, has_entry = false; - for (int j = 0; j < nrpkgs; j++) { - if (strcmp(all_pkgs[i], route_pkgs[j]) == 0) { - has_route = true; - break; - } - } - for (int j = 0; j < nepkgs; j++) { - if (strcmp(all_pkgs[i], entry_pkgs[j]) == 0) { - has_entry = true; - break; - } - } - const char *layer; - const char *reason; - classify_layer(all_pkgs[i], fan_in[i], fan_out[i], has_route, has_entry, &layer, &reason); - out->layers[i].name = all_pkgs[i]; /* transfer ownership */ - out->layers[i].layer = heap_strdup(layer); - out->layers[i].reason = heap_strdup(reason); - } - - /* Sort layers by name */ - for (int i = 1; i < npkgs; i++) { - int j = i; - while (j > 0 && strcmp(out->layers[j].name, out->layers[j - 1].name) < 0) { - cbm_package_layer_t tmp = out->layers[j]; - out->layers[j] = out->layers[j - 1]; - out->layers[j - 1] = tmp; - j--; - } - } - - /* Cleanup */ - for (int i = 0; i < bcount; i++) { - free((void *)boundaries[i].from); - free((void *)boundaries[i].to); - } - free(boundaries); - for (int i = 0; i < nrpkgs; i++) { - free(route_pkgs[i]); - } - for (int i = 0; i < nepkgs; i++) { - free(entry_pkgs[i]); - } - - return CBM_STORE_OK; -} - -static int arch_file_tree(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { - const char *sql = "SELECT file_path FROM nodes WHERE project=?1 AND label='File'"; - sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { - store_set_error_sqlite(s, "arch_file_tree"); - return CBM_STORE_ERR; - } - bind_text(stmt, 1, project); - - /* Collect all file paths + build directory children map */ - int fcap = 32; - int fn = 0; - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - char **files = malloc(fcap * sizeof(char *)); - - /* Directory tree: parallel arrays of dir → children set */ - int dcap = 64; - int dn = 0; - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - char **dir_paths = calloc(dcap, sizeof(char *)); - int *dir_child_counts = calloc(dcap, sizeof(int)); - /* Track unique children per dir using a simple string array */ - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - char ***dir_children = calloc(dcap, sizeof(char **)); - int *dir_children_caps = calloc(dcap, sizeof(int)); - - while (sqlite3_step(stmt) == SQLITE_ROW) { - const char *fp = (const char *)sqlite3_column_text(stmt, 0); - if (!fp) { - continue; - } - if (fn >= fcap) { - fcap *= 2; - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - files = safe_realloc(files, fcap * sizeof(char *)); - } - files[fn++] = heap_strdup(fp); - - /* Register path components in dir tree (up to 3 levels deep) */ - char tmp[512]; - strncpy(tmp, fp, sizeof(tmp) - 1); - tmp[sizeof(tmp) - 1] = '\0'; - - /* Split by '/' */ - char *parts[16]; - int nparts = 0; - char *p = tmp; - parts[nparts++] = p; - while (*p && nparts < 16) { - if (*p == '/') { - *p = '\0'; - parts[nparts++] = p + 1; - } - p++; - } - - /* Register root children */ - { - int ri = -1; - for (int i = 0; i < dn; i++) { - if (strcmp(dir_paths[i], "") == 0) { - ri = i; - break; - } - } - if (ri < 0 && dn < dcap) { - ri = dn; - dir_paths[dn] = heap_strdup(""); - dir_child_counts[dn] = 0; - dir_children[dn] = NULL; - dir_children_caps[dn] = 0; - dn++; - } - if (ri >= 0 && nparts > 0) { - /* Check if child already exists */ - bool exists = false; - for (int k = 0; k < dir_child_counts[ri]; k++) { - if (strcmp(dir_children[ri][k], parts[0]) == 0) { - exists = true; - break; - } - } - if (!exists) { - if (dir_child_counts[ri] >= dir_children_caps[ri]) { - dir_children_caps[ri] = - dir_children_caps[ri] ? dir_children_caps[ri] * 2 : 4; - dir_children[ri] = - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - realloc(dir_children[ri], dir_children_caps[ri] * sizeof(char *)); - } - dir_children[ri][dir_child_counts[ri]++] = heap_strdup(parts[0]); - } - } - } - - /* Register deeper dir children (depth 0..2) */ - for (int depth = 0; depth < nparts - 1 && depth < 3; depth++) { - /* Build dir path */ - char dir[512] = ""; - for (int k = 0; k <= depth; k++) { - if (k > 0) { - strcat(dir, "/"); - } - strcat(dir, parts[k]); - } - const char *child = (depth + 1 < nparts) ? parts[depth + 1] : NULL; - if (!child) { - continue; - } - - int di = -1; - for (int i = 0; i < dn; i++) { - if (strcmp(dir_paths[i], dir) == 0) { - di = i; - break; - } - } - if (di < 0 && dn < dcap) { - di = dn; - dir_paths[dn] = heap_strdup(dir); - dir_child_counts[dn] = 0; - dir_children[dn] = NULL; - dir_children_caps[dn] = 0; - dn++; - } - if (di >= 0) { - bool exists = false; - for (int k = 0; k < dir_child_counts[di]; k++) { - if (strcmp(dir_children[di][k], child) == 0) { - exists = true; - break; - } - } - if (!exists) { - if (dir_child_counts[di] >= dir_children_caps[di]) { - dir_children_caps[di] = - dir_children_caps[di] ? dir_children_caps[di] * 2 : 4; - dir_children[di] = - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - realloc(dir_children[di], dir_children_caps[di] * sizeof(char *)); - } - dir_children[di][dir_child_counts[di]++] = heap_strdup(child); - } - } - } - } - sqlite3_finalize(stmt); - - /* Build file set for type detection */ - /* Collect tree entries */ - int ecap = 64; - int en = 0; - cbm_file_tree_entry_t *entries = calloc(ecap, sizeof(cbm_file_tree_entry_t)); - - /* Root children */ - for (int i = 0; i < dn; i++) { - if (strcmp(dir_paths[i], "") != 0) { - continue; - } - for (int k = 0; k < dir_child_counts[i]; k++) { - if (en >= ecap) { - ecap *= 2; - entries = safe_realloc(entries, ecap * sizeof(cbm_file_tree_entry_t)); - } - const char *child = dir_children[i][k]; - /* Check if it's a file */ - bool is_file = false; - for (int f = 0; f < fn; f++) { - if (strcmp(files[f], child) == 0) { - is_file = true; - break; - } - } - /* Count its children in dir tree */ - int nch = 0; - for (int d = 0; d < dn; d++) { - if (strcmp(dir_paths[d], child) == 0) { - nch = dir_child_counts[d]; - break; - } - } - entries[en].path = heap_strdup(child); - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - entries[en].type = heap_strdup(is_file ? "file" : "dir"); - entries[en].children = nch; - en++; - } - } - - /* Non-root dir children */ - for (int i = 0; i < dn; i++) { - if (strcmp(dir_paths[i], "") == 0) { - continue; - } - /* Limit depth to 3 levels */ - int slashes = 0; - // NOLINTNEXTLINE(clang-analyzer-security.ArrayBound) - for (const char *p = dir_paths[i]; *p; p++) { - if (*p == '/') { - slashes++; - } - } - if (slashes >= 3) { - continue; - } - - for (int k = 0; k < dir_child_counts[i]; k++) { - if (en >= ecap) { - ecap *= 2; - entries = safe_realloc(entries, ecap * sizeof(cbm_file_tree_entry_t)); - } - char path[512]; - snprintf(path, sizeof(path), "%s/%s", dir_paths[i], dir_children[i][k]); - bool is_file = false; - for (int f = 0; f < fn; f++) { - if (strcmp(files[f], path) == 0) { - is_file = true; - break; - } - } - int nch = 0; - for (int d = 0; d < dn; d++) { - if (strcmp(dir_paths[d], path) == 0) { - nch = dir_child_counts[d]; - break; - } - } - entries[en].path = heap_strdup(path); - // NOLINTNEXTLINE(readability-implicit-bool-conversion) - entries[en].type = heap_strdup(is_file ? "file" : "dir"); - entries[en].children = nch; - en++; - } - } - - /* Sort by path */ - for (int i = 1; i < en; i++) { - int j = i; - while (j > 0 && strcmp(entries[j].path, entries[j - 1].path) < 0) { - cbm_file_tree_entry_t tmp = entries[j]; - entries[j] = entries[j - 1]; - entries[j - 1] = tmp; - j--; - } - } - - /* Cleanup dir tree */ - for (int i = 0; i < dn; i++) { - free(dir_paths[i]); - for (int k = 0; k < dir_child_counts[i]; k++) { - free(dir_children[i][k]); - } - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(dir_children[i]); - } - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(dir_paths); - free(dir_child_counts); - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(dir_children); - free(dir_children_caps); - for (int i = 0; i < fn; i++) { - free(files[i]); - } - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(files); - - out->file_tree = entries; - out->file_tree_count = en; - return CBM_STORE_OK; -} - -/* ── Louvain community detection ───────────────────────────────── */ - -int cbm_louvain(const int64_t *nodes, int node_count, const cbm_louvain_edge_t *edges, - int edge_count, cbm_louvain_result_t **out, int *out_count) { - if (node_count <= 0) { - *out = NULL; - *out_count = 0; - return CBM_STORE_OK; - } - - int n = node_count; - - /* Build adjacency: edge weights */ - int wcap = edge_count > 0 ? edge_count : 1; - int wn = 0; - int *wsi = malloc(wcap * sizeof(int)); - int *wdi = malloc(wcap * sizeof(int)); - double *ww = malloc(wcap * sizeof(double)); - - /* Map node IDs to indices */ - for (int e = 0; e < edge_count; e++) { - int si = -1; - int di = -1; - for (int i = 0; i < n; i++) { - if (nodes[i] == edges[e].src) { - si = i; - } - if (nodes[i] == edges[e].dst) { - di = i; - } - } - if (si < 0 || di < 0 || si == di) { - continue; - } - /* Normalize edge key */ - if (si > di) { - int tmp = si; - si = di; - di = tmp; - } - /* Check if already exists */ - int found = -1; - for (int i = 0; i < wn; i++) { - if (wsi[i] == si && wdi[i] == di) { - found = i; - break; - } - } - if (found >= 0) { - ww[found] += 1.0; - } else { - if (wn >= wcap) { - wcap *= 2; - wsi = safe_realloc(wsi, wcap * sizeof(int)); - wdi = safe_realloc(wdi, wcap * sizeof(int)); - ww = safe_realloc(ww, wcap * sizeof(double)); - } - wsi[wn] = si; - wdi[wn] = di; - ww[wn] = 1.0; - wn++; - } - } - - /* Build adjacency lists */ - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - int **adj = calloc(n, sizeof(int *)); - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - double **adj_w = calloc(n, sizeof(double *)); - int *adj_n = calloc(n, sizeof(int)); - int *adj_cap = calloc(n, sizeof(int)); - - double total_weight = 0; - for (int i = 0; i < wn; i++) { - int si = wsi[i]; - int di = wdi[i]; - double w = ww[i]; - total_weight += w; - - /* Add si → di */ - if (adj_n[si] >= adj_cap[si]) { - adj_cap[si] = adj_cap[si] ? adj_cap[si] * 2 : 4; - adj[si] = safe_realloc(adj[si], adj_cap[si] * sizeof(int)); - adj_w[si] = safe_realloc(adj_w[si], adj_cap[si] * sizeof(double)); - } - adj[si][adj_n[si]] = di; - adj_w[si][adj_n[si]] = w; - adj_n[si]++; - - /* Add di → si */ - if (adj_n[di] >= adj_cap[di]) { - adj_cap[di] = adj_cap[di] ? adj_cap[di] * 2 : 4; - adj[di] = safe_realloc(adj[di], adj_cap[di] * sizeof(int)); - adj_w[di] = safe_realloc(adj_w[di], adj_cap[di] * sizeof(double)); - } - adj[di][adj_n[di]] = si; - adj_w[di][adj_n[di]] = w; - adj_n[di]++; - } - free(wsi); - free(wdi); - free(ww); - - /* Initialize communities */ - int *community = malloc(n * sizeof(int)); - for (int i = 0; i < n; i++) { - community[i] = i; - } - - if (total_weight == 0) { - /* No edges: each node in its own community */ - cbm_louvain_result_t *result = malloc(n * sizeof(cbm_louvain_result_t)); - for (int i = 0; i < n; i++) { - result[i].node_id = nodes[i]; - result[i].community = i; - } - *out = result; - *out_count = n; - free(community); - for (int i = 0; i < n; i++) { - free(adj[i]); - free(adj_w[i]); - } - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(adj); - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(adj_w); - free(adj_n); - free(adj_cap); - return CBM_STORE_OK; - } - - /* Compute node degrees */ - double *degree = calloc(n, sizeof(double)); - for (int i = 0; i < n; i++) { - for (int j = 0; j < adj_n[i]; j++) { - degree[i] += adj_w[i][j]; - } - } - - /* Main Louvain loop (10 iterations max) */ - for (int iter = 0; iter < 10; iter++) { - bool improved = false; - - /* Community total degree */ - double *comm_degree = calloc(n, sizeof(double)); - for (int i = 0; i < n; i++) { - comm_degree[community[i]] += degree[i]; - } - - /* Random order (simple LCG shuffle) */ - int *order = calloc(n, sizeof(int)); - for (int i = 0; i < n; i++) { - order[i] = i; - } - unsigned int seed = (unsigned int)((iter * 1000) + n); - for (int i = n - 1; i > 0; i--) { -/* Linear congruential generator (glibc constants) */ -#define LCG_MULTIPLIER 1103515245U -#define LCG_INCREMENT 12345U - seed = (seed * LCG_MULTIPLIER) + LCG_INCREMENT; - int j = (int)((seed >> 16) % (unsigned int)(i + 1)); - int tmp = order[i]; - order[i] = order[j]; - order[j] = tmp; - } - - for (int oi = 0; oi < n; oi++) { - int i = order[oi]; - int cur_comm = community[i]; - - /* Compute weights to neighboring communities */ - double *nc_weight = calloc(n, sizeof(double)); - bool *nc_seen = calloc(n, sizeof(bool)); - for (int j = 0; j < adj_n[i]; j++) { - int nc = community[adj[i][j]]; - nc_weight[nc] += adj_w[i][j]; - nc_seen[nc] = true; - } - - /* Remove node from community */ - comm_degree[cur_comm] -= degree[i]; - - int best_comm = cur_comm; - double best_gain = 0.0; - - for (int c = 0; c < n; c++) { - if (!nc_seen[c]) { - continue; - } - double gain = nc_weight[c] - (degree[i] * comm_degree[c] / (2.0 * total_weight)); - if (gain > best_gain) { - best_gain = gain; - best_comm = c; - } - } - - /* Also consider staying */ - double cur_gain = - nc_weight[cur_comm] - (degree[i] * comm_degree[cur_comm] / (2.0 * total_weight)); - if (cur_gain >= best_gain) { - best_comm = cur_comm; - } - - community[i] = best_comm; - comm_degree[best_comm] += degree[i]; - - if (best_comm != cur_comm) { - improved = true; - } - - free(nc_weight); - free(nc_seen); - } - free(order); - free(comm_degree); - - if (!improved) { - break; - } - } - - /* Build result */ - cbm_louvain_result_t *result = malloc(n * sizeof(cbm_louvain_result_t)); - for (int i = 0; i < n; i++) { - result[i].node_id = nodes[i]; - result[i].community = community[i]; - } - *out = result; - *out_count = n; - - free(community); - free(degree); - for (int i = 0; i < n; i++) { - free(adj[i]); - free(adj_w[i]); - } - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(adj); - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(adj_w); - free(adj_n); - free(adj_cap); - return CBM_STORE_OK; -} - -/* ── GetArchitecture dispatch ──────────────────────────────────── */ - -static bool want_aspect(const char **aspects, int aspect_count, const char *name) { - if (!aspects || aspect_count == 0) { - return true; - } - for (int i = 0; i < aspect_count; i++) { - if (strcmp(aspects[i], "all") == 0) { - return true; - } - if (strcmp(aspects[i], name) == 0) { - return true; - } - } - return false; -} - -int cbm_store_get_architecture(cbm_store_t *s, const char *project, const char **aspects, - int aspect_count, cbm_architecture_info_t *out) { - memset(out, 0, sizeof(*out)); - int rc; - - if (want_aspect(aspects, aspect_count, "languages")) { - rc = arch_languages(s, project, out); - if (rc != CBM_STORE_OK) { - return rc; - } - } - if (want_aspect(aspects, aspect_count, "packages")) { - rc = arch_packages(s, project, out); - if (rc != CBM_STORE_OK) { - return rc; - } - } - if (want_aspect(aspects, aspect_count, "entry_points")) { - rc = arch_entry_points(s, project, out); - if (rc != CBM_STORE_OK) { - return rc; - } - } - if (want_aspect(aspects, aspect_count, "routes")) { - rc = arch_routes(s, project, out); - if (rc != CBM_STORE_OK) { - return rc; - } - } - if (want_aspect(aspects, aspect_count, "hotspots")) { - rc = arch_hotspots(s, project, out); - if (rc != CBM_STORE_OK) { - return rc; - } - } - if (want_aspect(aspects, aspect_count, "boundaries")) { - cbm_cross_pkg_boundary_t *barr = NULL; - int bcount = 0; - rc = arch_boundaries(s, project, &barr, &bcount); - if (rc != CBM_STORE_OK) { - return rc; - } - out->boundaries = barr; - out->boundary_count = bcount; - } - if (want_aspect(aspects, aspect_count, "layers")) { - rc = arch_layers(s, project, out); - if (rc != CBM_STORE_OK) { - return rc; - } - } - if (want_aspect(aspects, aspect_count, "file_tree")) { - rc = arch_file_tree(s, project, out); - if (rc != CBM_STORE_OK) { - return rc; - } - } - - return CBM_STORE_OK; -} - -void cbm_store_architecture_free(cbm_architecture_info_t *out) { - if (!out) { - return; - } - for (int i = 0; i < out->language_count; i++) { - free((void *)out->languages[i].language); - } - free(out->languages); - for (int i = 0; i < out->package_count; i++) { - free((void *)out->packages[i].name); - } - free(out->packages); - for (int i = 0; i < out->entry_point_count; i++) { - free((void *)out->entry_points[i].name); - free((void *)out->entry_points[i].qualified_name); - free((void *)out->entry_points[i].file); - } - free(out->entry_points); - for (int i = 0; i < out->route_count; i++) { - free((void *)out->routes[i].method); - free((void *)out->routes[i].path); - free((void *)out->routes[i].handler); - } - free(out->routes); - for (int i = 0; i < out->hotspot_count; i++) { - free((void *)out->hotspots[i].name); - free((void *)out->hotspots[i].qualified_name); - } - free(out->hotspots); - for (int i = 0; i < out->boundary_count; i++) { - free((void *)out->boundaries[i].from); - free((void *)out->boundaries[i].to); - } - free(out->boundaries); - for (int i = 0; i < out->service_count; i++) { - free((void *)out->services[i].from); - free((void *)out->services[i].to); - free((void *)out->services[i].type); - } - free(out->services); - for (int i = 0; i < out->layer_count; i++) { - free((void *)out->layers[i].name); - free((void *)out->layers[i].layer); - free((void *)out->layers[i].reason); - } - free(out->layers); - for (int i = 0; i < out->cluster_count; i++) { - free((void *)out->clusters[i].label); - for (int j = 0; j < out->clusters[i].top_node_count; j++) { - free((void *)out->clusters[i].top_nodes[j]); - } - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(out->clusters[i].top_nodes); - for (int j = 0; j < out->clusters[i].package_count; j++) { - free((void *)out->clusters[i].packages[j]); - } - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(out->clusters[i].packages); - for (int j = 0; j < out->clusters[i].edge_type_count; j++) { - free((void *)out->clusters[i].edge_types[j]); - } - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - free(out->clusters[i].edge_types); - } - free(out->clusters); - for (int i = 0; i < out->file_tree_count; i++) { - free((void *)out->file_tree[i].path); - free((void *)out->file_tree[i].type); - } - free(out->file_tree); - memset(out, 0, sizeof(*out)); -} - -/* ── ADR (Architecture Decision Record) ────────────────────────── */ - -static const char *canonical_sections[] = {"PURPOSE", "STACK", "ARCHITECTURE", - "PATTERNS", "TRADEOFFS", "PHILOSOPHY"}; -static const int canonical_section_count = 6; - -static bool is_canonical_section(const char *name) { - for (int i = 0; i < canonical_section_count; i++) { - if (strcmp(name, canonical_sections[i]) == 0) { - return true; - } - } - return false; -} - -cbm_adr_sections_t cbm_adr_parse_sections(const char *content) { - cbm_adr_sections_t result; - memset(&result, 0, sizeof(result)); - if (!content || !content[0]) { - return result; - } - - const char *p = content; - char *current_section = NULL; - char current_content[8192] = ""; - int content_len = 0; - - while (*p) { - /* Find end of line */ - const char *eol = strchr(p, '\n'); - int line_len = eol ? (int)(eol - p) : (int)strlen(p); - - /* Check for canonical section header */ - if (line_len > 3 && p[0] == '#' && p[1] == '#' && p[2] == ' ') { - char header[64]; - int hlen = line_len - 3; - if (hlen >= (int)sizeof(header)) { - hlen = (int)sizeof(header) - 1; - } - memcpy(header, p + 3, hlen); - header[hlen] = '\0'; - /* Trim trailing whitespace */ - while (hlen > 0 && (header[hlen - 1] == ' ' || header[hlen - 1] == '\t' || - header[hlen - 1] == '\r')) { - header[--hlen] = '\0'; - } - - if (is_canonical_section(header)) { - /* Save previous section */ - if (current_section && result.count < 16) { - /* Trim content */ - while (content_len > 0 && (current_content[content_len - 1] == '\n' || - current_content[content_len - 1] == ' ')) { - current_content[--content_len] = '\0'; - } - /* Skip leading whitespace */ - char *trimmed = current_content; - while (*trimmed == '\n' || *trimmed == ' ') { - trimmed++; - } - result.keys[result.count] = current_section; - result.values[result.count] = heap_strdup(trimmed); - result.count++; - } - current_section = heap_strdup(header); - current_content[0] = '\0'; - content_len = 0; - p = eol ? eol + 1 : p + line_len; - continue; - } - } - - /* Append line to current content */ - if (current_section) { - if (content_len > 0 || line_len > 0) { - if (content_len > 0) { - current_content[content_len++] = '\n'; - } - if (content_len + line_len < (int)sizeof(current_content) - 1) { - memcpy(current_content + content_len, p, line_len); - content_len += line_len; - current_content[content_len] = '\0'; - } - } - } - - p = eol ? eol + 1 : p + line_len; - } - - /* Save last section */ - if (current_section && result.count < 16) { - while (content_len > 0 && (current_content[content_len - 1] == '\n' || - current_content[content_len - 1] == ' ')) { - current_content[--content_len] = '\0'; - } - char *trimmed = current_content; - while (*trimmed == '\n' || *trimmed == ' ') { - trimmed++; - } - result.keys[result.count] = current_section; - result.values[result.count] = heap_strdup(trimmed); - result.count++; - } - - return result; -} - -char *cbm_adr_render(const cbm_adr_sections_t *sections) { - if (!sections || sections->count == 0) { - return heap_strdup(""); - } - - char buf[16384] = ""; - int pos = 0; - bool rendered[16] = {false}; - - /* Canonical sections first, in order */ - for (int c = 0; c < canonical_section_count; c++) { - for (int i = 0; i < sections->count; i++) { - if (rendered[i]) { - continue; - } - if (strcmp(sections->keys[i], canonical_sections[c]) == 0) { - if (pos > 0) { - pos += snprintf(buf + pos, sizeof(buf) - pos, "\n\n"); - } - pos += snprintf(buf + pos, sizeof(buf) - pos, "## %s\n%s", sections->keys[i], - sections->values[i]); - rendered[i] = true; - break; - } - } - } - - /* Non-canonical sections alphabetically */ - /* Collect indices of non-rendered sections */ - int extra[16]; - int nextra = 0; - for (int i = 0; i < sections->count; i++) { - if (!rendered[i]) { - extra[nextra++] = i; - } - } - /* Sort extra by key name */ - for (int i = 1; i < nextra; i++) { - int j = i; - while (j > 0 && strcmp(sections->keys[extra[j]], sections->keys[extra[j - 1]]) < 0) { - int tmp = extra[j]; - extra[j] = extra[j - 1]; - extra[j - 1] = tmp; - j--; - } - } - for (int i = 0; i < nextra; i++) { - int idx = extra[i]; - if (pos > 0) { - pos += snprintf(buf + pos, sizeof(buf) - pos, "\n\n"); - } - pos += snprintf(buf + pos, sizeof(buf) - pos, "## %s\n%s", sections->keys[idx], - sections->values[idx]); - } - - return heap_strdup(buf); -} - -int cbm_adr_validate_content(const char *content, char *errbuf, int errbuf_size) { - cbm_adr_sections_t sections = cbm_adr_parse_sections(content); - char missing[256] = ""; - int mlen = 0; - int nmissing = 0; - - for (int c = 0; c < canonical_section_count; c++) { - bool found = false; - for (int i = 0; i < sections.count; i++) { - if (strcmp(sections.keys[i], canonical_sections[c]) == 0) { - found = true; - break; - } - } - if (!found) { - if (mlen > 0) { - mlen += snprintf(missing + mlen, sizeof(missing) - mlen, ", "); - } - mlen += snprintf(missing + mlen, sizeof(missing) - mlen, "%s", canonical_sections[c]); - nmissing++; - } - } - cbm_adr_sections_free(§ions); - - if (nmissing > 0) { - snprintf(errbuf, errbuf_size, - "missing required sections: %s. All 6 required: PURPOSE, STACK, ARCHITECTURE, " - "PATTERNS, TRADEOFFS, PHILOSOPHY", - missing); - return CBM_STORE_ERR; - } - return CBM_STORE_OK; -} - -int cbm_adr_validate_section_keys(const char **keys, int count, char *errbuf, int errbuf_size) { - char invalid[256] = ""; - int ilen = 0; - int ninvalid = 0; - - /* Collect and sort invalid keys */ - const char *inv_keys[16]; - int inv_n = 0; - for (int i = 0; i < count; i++) { - if (!is_canonical_section(keys[i])) { - if (inv_n < 16) { - inv_keys[inv_n++] = keys[i]; - } - } - } - /* Sort alphabetically */ - for (int i = 1; i < inv_n; i++) { - int j = i; - while (j > 0 && strcmp(inv_keys[j], inv_keys[j - 1]) < 0) { - const char *tmp = inv_keys[j]; - inv_keys[j] = inv_keys[j - 1]; - inv_keys[j - 1] = tmp; - j--; - } - } - - for (int i = 0; i < inv_n; i++) { - if (ilen > 0) { - ilen += snprintf(invalid + ilen, sizeof(invalid) - ilen, ", "); - } - ilen += snprintf(invalid + ilen, sizeof(invalid) - ilen, "%s", inv_keys[i]); - ninvalid++; - } - - if (ninvalid > 0) { - snprintf(errbuf, errbuf_size, - "invalid section names: %s. Valid sections: PURPOSE, STACK, ARCHITECTURE, " - "PATTERNS, TRADEOFFS, PHILOSOPHY", - invalid); - return CBM_STORE_ERR; - } - return CBM_STORE_OK; -} - -void cbm_adr_sections_free(cbm_adr_sections_t *s) { - if (!s) { - return; - } - for (int i = 0; i < s->count; i++) { - free(s->keys[i]); - free(s->values[i]); - } - memset(s, 0, sizeof(*s)); -} - -int cbm_store_adr_store(cbm_store_t *s, const char *project, const char *content) { - char now[32]; - iso_now(now, sizeof(now)); - - const char *sql = - "INSERT INTO project_summaries (project, summary, source_hash, created_at, updated_at) " - "VALUES (?1, ?2, '', ?3, ?4) " - "ON CONFLICT(project) DO UPDATE SET summary=excluded.summary, " - "updated_at=excluded.updated_at"; - sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { - store_set_error_sqlite(s, "adr_store"); - return CBM_STORE_ERR; - } - bind_text(stmt, 1, project); - bind_text(stmt, 2, content); - bind_text(stmt, 3, now); - bind_text(stmt, 4, now); - int rc = sqlite3_step(stmt); - sqlite3_finalize(stmt); - return (rc == SQLITE_DONE) ? CBM_STORE_OK : CBM_STORE_ERR; -} - -int cbm_store_adr_get(cbm_store_t *s, const char *project, cbm_adr_t *out) { - const char *sql = - "SELECT project, summary, created_at, updated_at FROM project_summaries WHERE project=?1"; - sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { - store_set_error_sqlite(s, "adr_get"); - return CBM_STORE_ERR; - } - bind_text(stmt, 1, project); - int rc = sqlite3_step(stmt); - if (rc != SQLITE_ROW) { - sqlite3_finalize(stmt); - store_set_error(s, "no ADR found"); - return CBM_STORE_NOT_FOUND; - } - out->project = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); - out->content = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); - out->created_at = heap_strdup((const char *)sqlite3_column_text(stmt, 2)); - out->updated_at = heap_strdup((const char *)sqlite3_column_text(stmt, 3)); - sqlite3_finalize(stmt); - return CBM_STORE_OK; -} - -int cbm_store_adr_delete(cbm_store_t *s, const char *project) { - const char *sql = "DELETE FROM project_summaries WHERE project=?1"; - sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { - store_set_error_sqlite(s, "adr_delete"); - return CBM_STORE_ERR; - } - bind_text(stmt, 1, project); - int rc = sqlite3_step(stmt); - int changes = sqlite3_changes(s->db); - sqlite3_finalize(stmt); - if (rc != SQLITE_DONE) { - return CBM_STORE_ERR; - } - if (changes == 0) { - store_set_error(s, "no ADR found"); - return CBM_STORE_NOT_FOUND; - } - return CBM_STORE_OK; -} - -// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) -int cbm_store_adr_update_sections(cbm_store_t *s, const char *project, const char **keys, - const char **values, int count, cbm_adr_t *out) { - /* Get existing ADR */ - cbm_adr_t existing; - int rc = cbm_store_adr_get(s, project, &existing); - if (rc != CBM_STORE_OK) { - store_set_error(s, "no existing ADR to update"); - return rc; - } - - /* Parse existing sections */ - cbm_adr_sections_t sections = cbm_adr_parse_sections(existing.content); - cbm_store_adr_free(&existing); - - /* Merge new sections */ - for (int i = 0; i < count; i++) { - bool found = false; - for (int j = 0; j < sections.count; j++) { - if (strcmp(sections.keys[j], keys[i]) == 0) { - free(sections.values[j]); - sections.values[j] = heap_strdup(values[i]); - found = true; - break; - } - } - if (!found && sections.count < 16) { - sections.keys[sections.count] = heap_strdup(keys[i]); - sections.values[sections.count] = heap_strdup(values[i]); - sections.count++; - } - } - - /* Render merged */ - char *merged = cbm_adr_render(§ions); - cbm_adr_sections_free(§ions); - - /* Check length */ - if ((int)strlen(merged) > CBM_ADR_MAX_LENGTH) { - char msg[128]; - snprintf(msg, sizeof(msg), "merged ADR exceeds %d chars (%d chars)", CBM_ADR_MAX_LENGTH, - (int)strlen(merged)); - store_set_error(s, msg); - free(merged); - return CBM_STORE_ERR; - } - - /* Store merged */ - rc = cbm_store_adr_store(s, project, merged); - free(merged); - if (rc != CBM_STORE_OK) { - return rc; - } - - return cbm_store_adr_get(s, project, out); -} - -void cbm_store_adr_free(cbm_adr_t *adr) { - if (!adr) { - return; - } - free((void *)adr->project); - free((void *)adr->content); - free((void *)adr->created_at); - free((void *)adr->updated_at); - memset(adr, 0, sizeof(*adr)); -} - -/* ── Architecture doc discovery ────────────────────────────────── */ - -int cbm_store_find_architecture_docs(cbm_store_t *s, const char *project, char ***out, int *count) { - const char *sql = "SELECT file_path FROM nodes WHERE project=?1 AND label='File' " - "AND (file_path LIKE '%ARCHITECTURE.md' OR file_path LIKE '%ADR.md' " - "OR file_path LIKE '%DECISIONS.md' OR file_path LIKE 'docs/adr/%' " - "OR file_path LIKE 'doc/adr/%' OR file_path LIKE 'adr/%') " - "ORDER BY file_path LIMIT 20"; - sqlite3_stmt *stmt = NULL; - if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { - store_set_error_sqlite(s, "find_arch_docs"); - return CBM_STORE_ERR; - } - bind_text(stmt, 1, project); - - int cap = 8; - int n = 0; - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - char **arr = malloc(cap * sizeof(char *)); - while (sqlite3_step(stmt) == SQLITE_ROW) { - if (n >= cap) { - cap *= 2; - // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) - arr = safe_realloc(arr, cap * sizeof(char *)); - } - arr[n++] = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); - } - sqlite3_finalize(stmt); - *out = arr; - *count = n; - return CBM_STORE_OK; -} - -/* ── Memory management ──────────────────────────────────────────── */ - -void cbm_node_free_fields(cbm_node_t *n) { - free((void *)n->project); - free((void *)n->label); - free((void *)n->name); - free((void *)n->qualified_name); - free((void *)n->file_path); - free((void *)n->properties_json); -} - -void cbm_store_free_nodes(cbm_node_t *nodes, int count) { - if (!nodes) { - return; - } - for (int i = 0; i < count; i++) { - cbm_node_free_fields(&nodes[i]); - } - free(nodes); -} - -void cbm_store_free_edges(cbm_edge_t *edges, int count) { - if (!edges) { - return; - } - for (int i = 0; i < count; i++) { - free((void *)edges[i].project); - free((void *)edges[i].type); - free((void *)edges[i].properties_json); - } - free(edges); -} - -void cbm_project_free_fields(cbm_project_t *p) { - free((void *)p->name); - free((void *)p->indexed_at); - free((void *)p->root_path); -} - -void cbm_store_free_projects(cbm_project_t *projects, int count) { - if (!projects) { - return; - } - for (int i = 0; i < count; i++) { - cbm_project_free_fields(&projects[i]); - } - free(projects); -} - -void cbm_store_free_file_hashes(cbm_file_hash_t *hashes, int count) { - if (!hashes) { - return; - } - for (int i = 0; i < count; i++) { - free((void *)hashes[i].project); - free((void *)hashes[i].rel_path); - free((void *)hashes[i].sha256); - } - free(hashes); -} +/* + * store.c — SQLite graph store implementation. + * + * Implements the opaque cbm_store_t handle with prepared statement caching, + * schema initialization, and all CRUD operations for nodes, edges, projects, + * file hashes, search, BFS traversal, and schema introspection. + */ + +// for ISO timestamp + +#include "store/store.h" +#include "foundation/platform.h" +#include "foundation/compat.h" +#include "foundation/compat_regex.h" + +#include +#include +#include +#include +#include +#include +#include + +/* ── SQLite bind helpers ───────────────────────────────────────── */ + +/* Wrap sqlite3_bind_text with SQLITE_TRANSIENT to isolate the platform + int-to-pointer cast ((void*)-1) in one place. */ +// NOLINTNEXTLINE(performance-no-int-to-ptr) +static const sqlite3_destructor_type BIND_TRANSIENT = SQLITE_TRANSIENT; + +static int bind_text(sqlite3_stmt *s, int col, const char *v) { + return sqlite3_bind_text(s, col, v, -1, BIND_TRANSIENT); +} + +/* ── Internal store structure ───────────────────────────────────── */ + +struct cbm_store { + sqlite3 *db; + const char *db_path; /* heap-allocated, or NULL for :memory: */ + char errbuf[512]; + + /* Prepared statements (lazily initialized, cached for lifetime) */ + sqlite3_stmt *stmt_upsert_node; + sqlite3_stmt *stmt_find_node_by_id; + sqlite3_stmt *stmt_find_node_by_qn; + sqlite3_stmt *stmt_find_node_by_qn_any; /* QN lookup without project filter */ + sqlite3_stmt *stmt_find_nodes_by_name; + sqlite3_stmt *stmt_find_nodes_by_name_any; /* name lookup without project filter */ + sqlite3_stmt *stmt_find_nodes_by_label; + sqlite3_stmt *stmt_find_nodes_by_file; + sqlite3_stmt *stmt_count_nodes; + sqlite3_stmt *stmt_delete_nodes_by_project; + sqlite3_stmt *stmt_delete_nodes_by_file; + sqlite3_stmt *stmt_delete_nodes_by_label; + + sqlite3_stmt *stmt_insert_edge; + sqlite3_stmt *stmt_find_edges_by_source; + sqlite3_stmt *stmt_find_edges_by_target; + sqlite3_stmt *stmt_find_edges_by_source_type; + sqlite3_stmt *stmt_find_edges_by_target_type; + sqlite3_stmt *stmt_find_edges_by_type; + sqlite3_stmt *stmt_count_edges; + sqlite3_stmt *stmt_count_edges_by_type; + sqlite3_stmt *stmt_delete_edges_by_project; + sqlite3_stmt *stmt_delete_edges_by_type; + + sqlite3_stmt *stmt_upsert_project; + sqlite3_stmt *stmt_get_project; + sqlite3_stmt *stmt_list_projects; + sqlite3_stmt *stmt_delete_project; + + sqlite3_stmt *stmt_upsert_file_hash; + sqlite3_stmt *stmt_get_file_hashes; + sqlite3_stmt *stmt_delete_file_hash; + sqlite3_stmt *stmt_delete_file_hashes; +}; + +/* ── Helpers ────────────────────────────────────────────────────── */ + +static void store_set_error(cbm_store_t *s, const char *msg) { + snprintf(s->errbuf, sizeof(s->errbuf), "%s", msg); +} + +static void store_set_error_sqlite(cbm_store_t *s, const char *prefix) { + snprintf(s->errbuf, sizeof(s->errbuf), "%s: %s", prefix, sqlite3_errmsg(s->db)); +} + +static int exec_sql(cbm_store_t *s, const char *sql) { + if (!s || !s->db) { + return CBM_STORE_ERR; + } + char *err = NULL; + int rc = sqlite3_exec(s->db, sql, NULL, NULL, &err); + if (rc != SQLITE_OK) { + snprintf(s->errbuf, sizeof(s->errbuf), "exec: %s", err ? err : "unknown"); + sqlite3_free(err); + return CBM_STORE_ERR; + } + return CBM_STORE_OK; +} + +/* Safe string: returns "" if NULL. */ +static const char *safe_str(const char *s) { + return s ? s : ""; +} + +/* Safe properties: returns "{}" if NULL. */ +static const char *safe_props(const char *s) { + return (s && s[0]) ? s : "{}"; +} + +/* Duplicate a string onto the heap. */ +static char *heap_strdup(const char *s) { + if (!s) { + return NULL; + } + size_t len = strlen(s); + char *d = malloc(len + 1); + if (d) { + memcpy(d, s, len + 1); + } + return d; +} + +/* Prepare a statement (cached). If already prepared, reset+clear. */ +static sqlite3_stmt *prepare_cached(cbm_store_t *s, sqlite3_stmt **slot, const char *sql) { + if (!s || !s->db) { + return NULL; + } + if (*slot) { + sqlite3_reset(*slot); + sqlite3_clear_bindings(*slot); + return *slot; + } + int rc = sqlite3_prepare_v2(s->db, sql, -1, slot, NULL); + if (rc != SQLITE_OK) { + store_set_error_sqlite(s, "prepare"); + return NULL; + } + return *slot; +} + +/* Get ISO-8601 timestamp. */ +static void iso_now(char *buf, size_t sz) { + time_t t = time(NULL); + struct tm tm; +#ifdef _WIN32 + gmtime_s(&tm, &t); /* Windows: reversed arg order */ +#else + gmtime_r(&t, &tm); +#endif + (void)strftime(buf, sz, "%Y-%m-%dT%H:%M:%SZ", + &tm); // cert-err33-c: strftime only fails if buffer is too small — 21-byte ISO + // timestamp always fits in caller-provided buffers +} + +/* ── Schema ─────────────────────────────────────────────────────── */ + +static int init_schema(cbm_store_t *s) { + const char *ddl = "CREATE TABLE IF NOT EXISTS projects (" + " name TEXT PRIMARY KEY," + " indexed_at TEXT NOT NULL," + " root_path TEXT NOT NULL" + ");" + "CREATE TABLE IF NOT EXISTS file_hashes (" + " project TEXT NOT NULL REFERENCES projects(name) ON DELETE CASCADE," + " rel_path TEXT NOT NULL," + " sha256 TEXT NOT NULL," + " mtime_ns INTEGER NOT NULL DEFAULT 0," + " size INTEGER NOT NULL DEFAULT 0," + " PRIMARY KEY (project, rel_path)" + ");" + "CREATE TABLE IF NOT EXISTS nodes (" + " id INTEGER PRIMARY KEY AUTOINCREMENT," + " project TEXT NOT NULL REFERENCES projects(name) ON DELETE CASCADE," + " label TEXT NOT NULL," + " name TEXT NOT NULL," + " qualified_name TEXT NOT NULL," + " file_path TEXT DEFAULT ''," + " start_line INTEGER DEFAULT 0," + " end_line INTEGER DEFAULT 0," + " properties TEXT DEFAULT '{}'," + " UNIQUE(project, qualified_name)" + ");" + "CREATE TABLE IF NOT EXISTS edges (" + " id INTEGER PRIMARY KEY AUTOINCREMENT," + " project TEXT NOT NULL REFERENCES projects(name) ON DELETE CASCADE," + " source_id INTEGER NOT NULL REFERENCES nodes(id) ON DELETE CASCADE," + " target_id INTEGER NOT NULL REFERENCES nodes(id) ON DELETE CASCADE," + " type TEXT NOT NULL," + " properties TEXT DEFAULT '{}'," + " UNIQUE(source_id, target_id, type)" + ");" + "CREATE TABLE IF NOT EXISTS project_summaries (" + " project TEXT PRIMARY KEY," + " summary TEXT NOT NULL," + " source_hash TEXT NOT NULL," + " created_at TEXT NOT NULL," + " updated_at TEXT NOT NULL" + ");"; + + return exec_sql(s, ddl); +} + +static int create_user_indexes(cbm_store_t *s) { + const char *sql = + "CREATE INDEX IF NOT EXISTS idx_nodes_label ON nodes(project, label);" + "CREATE INDEX IF NOT EXISTS idx_nodes_name ON nodes(project, name);" + "CREATE INDEX IF NOT EXISTS idx_nodes_file ON nodes(project, file_path);" + "CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_id, type);" + "CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_id, type);" + "CREATE INDEX IF NOT EXISTS idx_edges_type ON edges(project, type);" + "CREATE INDEX IF NOT EXISTS idx_edges_target_type ON edges(project, target_id, type);" + "CREATE INDEX IF NOT EXISTS idx_edges_source_type ON edges(project, source_id, type);"; + return exec_sql(s, sql); +} + +static int configure_pragmas(cbm_store_t *s, bool in_memory) { + int rc; + rc = exec_sql(s, "PRAGMA foreign_keys = ON;"); + if (rc != CBM_STORE_OK) { + return rc; + } + rc = exec_sql(s, "PRAGMA temp_store = MEMORY;"); + if (rc != CBM_STORE_OK) { + return rc; + } + + if (in_memory) { + rc = exec_sql(s, "PRAGMA synchronous = OFF;"); + } else { + rc = exec_sql(s, "PRAGMA journal_mode = WAL;"); + if (rc != CBM_STORE_OK) { + return rc; + } + rc = exec_sql(s, "PRAGMA synchronous = NORMAL;"); + if (rc != CBM_STORE_OK) { + return rc; + } + rc = exec_sql(s, "PRAGMA busy_timeout = 10000;"); + if (rc != CBM_STORE_OK) { + return rc; + } + rc = exec_sql(s, "PRAGMA mmap_size = 67108864;"); /* 64 MB */ + } + return rc; +} + +/* ── REGEXP function for SQLite ──────────────────────────────────── */ + +static void sqlite_regexp(sqlite3_context *ctx, int argc, sqlite3_value **argv) { + (void)argc; + const char *pattern = (const char *)sqlite3_value_text(argv[0]); + const char *text = (const char *)sqlite3_value_text(argv[1]); + if (!pattern || !text) { + sqlite3_result_int(ctx, 0); + return; + } + + cbm_regex_t re; + int rc = cbm_regcomp(&re, pattern, CBM_REG_EXTENDED | CBM_REG_NOSUB); + if (rc != 0) { + sqlite3_result_error(ctx, "invalid regex", -1); + return; + } + + rc = cbm_regexec(&re, text, 0, NULL, 0); + cbm_regfree(&re); + sqlite3_result_int(ctx, rc == 0 ? 1 : 0); +} + +/* Case-insensitive REGEXP variant */ +static void sqlite_iregexp(sqlite3_context *ctx, int argc, sqlite3_value **argv) { + (void)argc; + const char *pattern = (const char *)sqlite3_value_text(argv[0]); + const char *text = (const char *)sqlite3_value_text(argv[1]); + if (!pattern || !text) { + sqlite3_result_int(ctx, 0); + return; + } + + cbm_regex_t re; + int rc = cbm_regcomp(&re, pattern, CBM_REG_EXTENDED | CBM_REG_NOSUB | CBM_REG_ICASE); + if (rc != 0) { + sqlite3_result_error(ctx, "invalid regex", -1); + return; + } + + rc = cbm_regexec(&re, text, 0, NULL, 0); + cbm_regfree(&re); + sqlite3_result_int(ctx, rc == 0 ? 1 : 0); +} + +/* ── Lifecycle ──────────────────────────────────────────────────── */ + +/* SQLite authorizer: deny dangerous operations that could be exploited via + * SQL injection through the Cypher→SQL translation layer. */ +static int store_authorizer(void *user_data, int action, const char *p3, const char *p4, + const char *p5, const char *p6) { + (void)user_data; + (void)p3; + (void)p4; + (void)p5; + (void)p6; + switch (action) { + case SQLITE_ATTACH: /* ATTACH DATABASE — could create/read arbitrary files */ + case SQLITE_DETACH: /* DETACH DATABASE */ + return SQLITE_DENY; + default: + return SQLITE_OK; + } +} + +static cbm_store_t *store_open_internal(const char *path, bool in_memory) { + cbm_store_t *s = calloc(1, sizeof(cbm_store_t)); + if (!s) { + return NULL; + } + + int flags = SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE; + if (in_memory) { + flags |= SQLITE_OPEN_MEMORY; + } + + int rc = sqlite3_open_v2(path, &s->db, flags, NULL); + if (rc != SQLITE_OK) { + free(s); + return NULL; + } + + if (path && !in_memory) { + s->db_path = heap_strdup(path); + } + + /* Security: block ATTACH/DETACH to prevent file creation via SQL injection. + * The authorizer runs inside SQLite's query planner — no string-level bypass. */ + sqlite3_set_authorizer(s->db, store_authorizer, NULL); + + /* Register REGEXP function (SQLite doesn't have one built-in) */ + sqlite3_create_function(s->db, "regexp", 2, SQLITE_UTF8 | SQLITE_DETERMINISTIC, NULL, + sqlite_regexp, NULL, NULL); + /* Case-insensitive variant for search with case_sensitive=false */ + sqlite3_create_function(s->db, "iregexp", 2, SQLITE_UTF8 | SQLITE_DETERMINISTIC, NULL, + sqlite_iregexp, NULL, NULL); + + if (configure_pragmas(s, in_memory) != CBM_STORE_OK || init_schema(s) != CBM_STORE_OK || + create_user_indexes(s) != CBM_STORE_OK) { + sqlite3_close(s->db); + free((void *)s->db_path); + free(s); + return NULL; + } + + return s; +} + +cbm_store_t *cbm_store_open_memory(void) { + return store_open_internal(":memory:", true); +} + +cbm_store_t *cbm_store_open_path(const char *db_path) { + if (!db_path) { + return NULL; + } + return store_open_internal(db_path, false); +} + +cbm_store_t *cbm_store_open(const char *project) { + if (!project) { + return NULL; + } + /* Build path: ~/.cache/codebase-memory-mcp/.db */ + const char *home = cbm_home_dir(); + if (!home) { + home = "/tmp"; + } + char path[1024]; + snprintf(path, sizeof(path), "%s/.cache/codebase-memory-mcp/%s.db", home, project); + return store_open_internal(path, false); +} + +static void finalize_stmt(sqlite3_stmt **s) { + if (*s) { + sqlite3_finalize(*s); + *s = NULL; + } +} + +void cbm_store_close(cbm_store_t *s) { + if (!s) { + return; + } + + /* Finalize all cached statements */ + finalize_stmt(&s->stmt_upsert_node); + finalize_stmt(&s->stmt_find_node_by_id); + finalize_stmt(&s->stmt_find_node_by_qn); + finalize_stmt(&s->stmt_find_node_by_qn_any); + finalize_stmt(&s->stmt_find_nodes_by_name); + finalize_stmt(&s->stmt_find_nodes_by_name_any); + finalize_stmt(&s->stmt_find_nodes_by_label); + finalize_stmt(&s->stmt_find_nodes_by_file); + finalize_stmt(&s->stmt_count_nodes); + finalize_stmt(&s->stmt_delete_nodes_by_project); + finalize_stmt(&s->stmt_delete_nodes_by_file); + finalize_stmt(&s->stmt_delete_nodes_by_label); + + finalize_stmt(&s->stmt_insert_edge); + finalize_stmt(&s->stmt_find_edges_by_source); + finalize_stmt(&s->stmt_find_edges_by_target); + finalize_stmt(&s->stmt_find_edges_by_source_type); + finalize_stmt(&s->stmt_find_edges_by_target_type); + finalize_stmt(&s->stmt_find_edges_by_type); + finalize_stmt(&s->stmt_count_edges); + finalize_stmt(&s->stmt_count_edges_by_type); + finalize_stmt(&s->stmt_delete_edges_by_project); + finalize_stmt(&s->stmt_delete_edges_by_type); + + finalize_stmt(&s->stmt_upsert_project); + finalize_stmt(&s->stmt_get_project); + finalize_stmt(&s->stmt_list_projects); + finalize_stmt(&s->stmt_delete_project); + + finalize_stmt(&s->stmt_upsert_file_hash); + finalize_stmt(&s->stmt_get_file_hashes); + finalize_stmt(&s->stmt_delete_file_hash); + finalize_stmt(&s->stmt_delete_file_hashes); + + sqlite3_close(s->db); + free((void *)s->db_path); + free(s); +} + +const char *cbm_store_error(cbm_store_t *s) { + return s ? s->errbuf : "null store"; +} + +/* ── Transaction ────────────────────────────────────────────────── */ + +int cbm_store_begin(cbm_store_t *s) { + return exec_sql(s, "BEGIN IMMEDIATE;"); +} + +int cbm_store_commit(cbm_store_t *s) { + return exec_sql(s, "COMMIT;"); +} + +int cbm_store_rollback(cbm_store_t *s) { + return exec_sql(s, "ROLLBACK;"); +} + +/* ── Bulk write ─────────────────────────────────────────────────── */ + +int cbm_store_begin_bulk(cbm_store_t *s) { + /* Stay in WAL mode throughout. Switching to MEMORY journal mode would + * make the database unrecoverable if the process crashes mid-write, + * because the in-memory rollback journal is lost on crash. + * WAL mode is crash-safe: uncommitted WAL entries are simply discarded + * on the next open. Performance is preserved via synchronous=OFF and a + * larger cache, which are safe with WAL. */ + int rc = exec_sql(s, "PRAGMA synchronous = OFF;"); + if (rc != CBM_STORE_OK) { + return rc; + } + return exec_sql(s, "PRAGMA cache_size = -65536;"); /* 64 MB */ +} + +int cbm_store_end_bulk(cbm_store_t *s) { + int rc = exec_sql(s, "PRAGMA synchronous = NORMAL;"); + if (rc != CBM_STORE_OK) { + return rc; + } + return exec_sql(s, "PRAGMA cache_size = -2000;"); /* default ~2 MB */ +} + +int cbm_store_drop_indexes(cbm_store_t *s) { + return exec_sql(s, "DROP INDEX IF EXISTS idx_nodes_label;" + "DROP INDEX IF EXISTS idx_nodes_name;" + "DROP INDEX IF EXISTS idx_nodes_file;" + "DROP INDEX IF EXISTS idx_edges_source;" + "DROP INDEX IF EXISTS idx_edges_target;" + "DROP INDEX IF EXISTS idx_edges_type;" + "DROP INDEX IF EXISTS idx_edges_target_type;" + "DROP INDEX IF EXISTS idx_edges_source_type;"); +} + +int cbm_store_create_indexes(cbm_store_t *s) { + return create_user_indexes(s); +} + +/* ── Checkpoint ─────────────────────────────────────────────────── */ + +int cbm_store_checkpoint(cbm_store_t *s) { + int rc = sqlite3_wal_checkpoint_v2(s->db, NULL, SQLITE_CHECKPOINT_TRUNCATE, NULL, NULL); + if (rc != SQLITE_OK) { + store_set_error_sqlite(s, "checkpoint"); + return CBM_STORE_ERR; + } + return exec_sql(s, "PRAGMA optimize;"); +} + +/* ── Dump ───────────────────────────────────────────────────────── */ + +/* Dump entire in-memory database to a file via sqlite3_backup. + * Writes to a temp file first, then atomically renames for crash safety. + * sqlite3_backup_step(-1) copies ALL B-tree pages in one call — + * the file on disk is an exact replica of the in-memory page layout. */ +int cbm_store_dump_to_file(cbm_store_t *s, const char *dest_path) { + if (!s || !dest_path) { + return CBM_STORE_ERR; + } + + /* Ensure parent directory exists */ + char dir[1024]; + snprintf(dir, sizeof(dir), "%s", dest_path); + char *sl = strrchr(dir, '/'); + if (sl) { + *sl = '\0'; + (void)cbm_mkdir(dir); + } + + /* Write to temp file for atomic swap */ + char tmp_path[1024]; + snprintf(tmp_path, sizeof(tmp_path), "%s.tmp", dest_path); + (void)unlink(tmp_path); + + sqlite3 *dest_db = NULL; + int rc = sqlite3_open(tmp_path, &dest_db); + if (rc != SQLITE_OK) { + store_set_error(s, "dump: cannot open temp file"); + return CBM_STORE_ERR; + } + + sqlite3_backup *bk = sqlite3_backup_init(dest_db, "main", s->db, "main"); + if (!bk) { + store_set_error(s, "dump: backup init failed"); + sqlite3_close(dest_db); + (void)unlink(tmp_path); + return CBM_STORE_ERR; + } + + rc = sqlite3_backup_step(bk, -1); /* copy ALL pages in one shot */ + sqlite3_backup_finish(bk); + + if (rc != SQLITE_DONE) { + store_set_error(s, "dump: backup step failed"); + sqlite3_close(dest_db); + (void)unlink(tmp_path); + return CBM_STORE_ERR; + } + + /* Enable WAL on the dumped file so readers can connect concurrently */ + sqlite3_exec(dest_db, "PRAGMA journal_mode = WAL;", NULL, NULL, NULL); + sqlite3_close(dest_db); + + /* Atomic rename: old WAL/SHM become stale and get recreated by + * the next reader's configure_pragmas call. */ + if (rename(tmp_path, dest_path) != 0) { + store_set_error(s, "dump: rename failed"); + (void)unlink(tmp_path); + return CBM_STORE_ERR; + } + + return CBM_STORE_OK; +} + +/* ── Project CRUD ───────────────────────────────────────────────── */ + +int cbm_store_upsert_project(cbm_store_t *s, const char *name, const char *root_path) { + sqlite3_stmt *stmt = + prepare_cached(s, &s->stmt_upsert_project, + "INSERT INTO projects (name, indexed_at, root_path) VALUES (?1, ?2, ?3) " + "ON CONFLICT(name) DO UPDATE SET indexed_at=?2, root_path=?3;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + char ts[64]; + iso_now(ts, sizeof(ts)); + + bind_text(stmt, 1, name); + bind_text(stmt, 2, ts); + bind_text(stmt, 3, root_path); + + int rc = sqlite3_step(stmt); + if (rc != SQLITE_DONE) { + store_set_error_sqlite(s, "upsert_project"); + return CBM_STORE_ERR; + } + return CBM_STORE_OK; +} + +int cbm_store_get_project(cbm_store_t *s, const char *name, cbm_project_t *out) { + sqlite3_stmt *stmt = + prepare_cached(s, &s->stmt_get_project, + "SELECT name, indexed_at, root_path FROM projects WHERE name = ?1;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, name); + int rc = sqlite3_step(stmt); + if (rc == SQLITE_ROW) { + out->name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + out->indexed_at = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); + out->root_path = heap_strdup((const char *)sqlite3_column_text(stmt, 2)); + return CBM_STORE_OK; + } + return CBM_STORE_NOT_FOUND; +} + +int cbm_store_list_projects(cbm_store_t *s, cbm_project_t **out, int *count) { + sqlite3_stmt *stmt = + prepare_cached(s, &s->stmt_list_projects, + "SELECT name, indexed_at, root_path FROM projects ORDER BY name;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + /* Collect into dynamic array */ + int cap = 8; + int n = 0; + cbm_project_t *arr = malloc(cap * sizeof(cbm_project_t)); + + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (n >= cap) { + cap *= 2; + arr = safe_realloc(arr, cap * sizeof(cbm_project_t)); + } + arr[n].name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + arr[n].indexed_at = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); + arr[n].root_path = heap_strdup((const char *)sqlite3_column_text(stmt, 2)); + n++; + } + + *out = arr; + *count = n; + return CBM_STORE_OK; +} + +int cbm_store_delete_project(cbm_store_t *s, const char *name) { + sqlite3_stmt *stmt = + prepare_cached(s, &s->stmt_delete_project, "DELETE FROM projects WHERE name = ?1;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, name); + int rc = sqlite3_step(stmt); + if (rc != SQLITE_DONE) { + store_set_error_sqlite(s, "delete_project"); + return CBM_STORE_ERR; + } + return CBM_STORE_OK; +} + +/* ── Node CRUD ──────────────────────────────────────────────────── */ + +// NOLINTNEXTLINE(misc-include-cleaner) — int64_t provided by standard header +int64_t cbm_store_upsert_node(cbm_store_t *s, const cbm_node_t *n) { + sqlite3_stmt *stmt = + prepare_cached(s, &s->stmt_upsert_node, + "INSERT INTO nodes (project, label, name, qualified_name, file_path, " + "start_line, end_line, properties) " + "VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) " + "ON CONFLICT(project, qualified_name) DO UPDATE SET " + "label=?2, name=?3, file_path=?5, start_line=?6, end_line=?7, properties=?8 " + "RETURNING id;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, safe_str(n->project)); + bind_text(stmt, 2, safe_str(n->label)); + bind_text(stmt, 3, safe_str(n->name)); + bind_text(stmt, 4, safe_str(n->qualified_name)); + bind_text(stmt, 5, safe_str(n->file_path)); + sqlite3_bind_int(stmt, 6, n->start_line); + sqlite3_bind_int(stmt, 7, n->end_line); + bind_text(stmt, 8, safe_props(n->properties_json)); + + int rc = sqlite3_step(stmt); + if (rc == SQLITE_ROW) { + int64_t id = sqlite3_column_int64(stmt, 0); + sqlite3_reset(stmt); /* unblock COMMIT — RETURNING leaves stmt active */ + return id; + } + sqlite3_reset(stmt); + store_set_error_sqlite(s, "upsert_node"); + return CBM_STORE_ERR; +} + +/* Scan a node from current row of stmt. Heap-allocates strings. */ +static void scan_node(sqlite3_stmt *stmt, cbm_node_t *n) { + n->id = sqlite3_column_int64(stmt, 0); + n->project = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); + n->label = heap_strdup((const char *)sqlite3_column_text(stmt, 2)); + // NOLINTNEXTLINE(clang-analyzer-unix.Malloc) + n->name = heap_strdup((const char *)sqlite3_column_text(stmt, 3)); + n->qualified_name = heap_strdup((const char *)sqlite3_column_text(stmt, 4)); + n->file_path = heap_strdup((const char *)sqlite3_column_text(stmt, 5)); + n->start_line = sqlite3_column_int(stmt, 6); + n->end_line = sqlite3_column_int(stmt, 7); + n->properties_json = heap_strdup((const char *)sqlite3_column_text(stmt, 8)); +} + +int cbm_store_find_node_by_id(cbm_store_t *s, int64_t id, cbm_node_t *out) { + sqlite3_stmt *stmt = + prepare_cached(s, &s->stmt_find_node_by_id, + "SELECT id, project, label, name, qualified_name, file_path, " + "start_line, end_line, properties FROM nodes WHERE id = ?1;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + sqlite3_bind_int64(stmt, 1, id); + int rc = sqlite3_step(stmt); + if (rc == SQLITE_ROW) { + scan_node(stmt, out); + return CBM_STORE_OK; + } + return CBM_STORE_NOT_FOUND; +} + +int cbm_store_find_node_by_qn(cbm_store_t *s, const char *project, const char *qn, + cbm_node_t *out) { + if (!s || !s->db) { + return CBM_STORE_ERR; + } + sqlite3_stmt *stmt = + prepare_cached(s, &s->stmt_find_node_by_qn, + "SELECT id, project, label, name, qualified_name, file_path, " + "start_line, end_line, properties FROM nodes " + "WHERE project = ?1 AND qualified_name = ?2;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, project); + bind_text(stmt, 2, qn); + int rc = sqlite3_step(stmt); + if (rc == SQLITE_ROW) { + scan_node(stmt, out); + return CBM_STORE_OK; + } + return CBM_STORE_NOT_FOUND; +} + +int cbm_store_find_node_by_qn_any(cbm_store_t *s, const char *qn, cbm_node_t *out) { + if (!s || !s->db) { + return CBM_STORE_ERR; + } + sqlite3_stmt *stmt = + prepare_cached(s, &s->stmt_find_node_by_qn_any, + "SELECT id, project, label, name, qualified_name, file_path, " + "start_line, end_line, properties FROM nodes " + "WHERE qualified_name = ?1 LIMIT 1;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, qn); + int rc = sqlite3_step(stmt); + if (rc == SQLITE_ROW) { + scan_node(stmt, out); + return CBM_STORE_OK; + } + return CBM_STORE_NOT_FOUND; +} + +int cbm_store_find_nodes_by_name_any(cbm_store_t *s, const char *name, cbm_node_t **out, + int *count) { + if (!s || !s->db) { + *out = NULL; + *count = 0; + return CBM_STORE_ERR; + } + sqlite3_stmt *stmt = + prepare_cached(s, &s->stmt_find_nodes_by_name_any, + "SELECT id, project, label, name, qualified_name, file_path, " + "start_line, end_line, properties FROM nodes " + "WHERE name = ?1;"); + if (!stmt) { + *out = NULL; + *count = 0; + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, name); + + int cap = 16; + int n = 0; + cbm_node_t *arr = malloc(cap * sizeof(cbm_node_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (n >= cap) { + cap *= 2; + arr = safe_realloc(arr, cap * sizeof(cbm_node_t)); + } + scan_node(stmt, &arr[n]); + n++; + } + *out = arr; + *count = n; + return CBM_STORE_OK; +} + +int cbm_store_find_node_ids_by_qns(cbm_store_t *s, const char *project, const char **qns, + int qn_count, int64_t *out_ids) { + if (!s || !project || !qns || !out_ids || qn_count <= 0) { + return 0; + } + + /* Zero out results */ + memset(out_ids, 0, (size_t)qn_count * sizeof(int64_t)); + + int found = 0; + cbm_node_t node = {0}; + for (int i = 0; i < qn_count; i++) { + if (!qns[i]) { + continue; + } + int rc = cbm_store_find_node_by_qn(s, project, qns[i], &node); + if (rc == CBM_STORE_OK) { + out_ids[i] = node.id; + found++; + cbm_node_free_fields(&node); + memset(&node, 0, sizeof(node)); + } + } + return found; +} + +/* Generic: find multiple nodes by a single-column filter. */ +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) +static int find_nodes_generic(cbm_store_t *s, sqlite3_stmt **slot, const char *sql, + const char *project, const char *val, cbm_node_t **out, int *count) { + if (!s || !s->db) { + *out = NULL; + *count = 0; + return CBM_STORE_ERR; + } + sqlite3_stmt *stmt = prepare_cached(s, slot, sql); + if (!stmt) { + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, project); + bind_text(stmt, 2, val); + + int cap = 16; + int n = 0; + cbm_node_t *arr = malloc(cap * sizeof(cbm_node_t)); + + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (n >= cap) { + cap *= 2; + arr = safe_realloc(arr, cap * sizeof(cbm_node_t)); + } + scan_node(stmt, &arr[n]); + n++; + } + + *out = arr; + *count = n; + return CBM_STORE_OK; +} + +int cbm_store_find_nodes_by_name(cbm_store_t *s, const char *project, const char *name, + cbm_node_t **out, int *count) { + return find_nodes_generic(s, &s->stmt_find_nodes_by_name, + "SELECT id, project, label, name, qualified_name, file_path, " + "start_line, end_line, properties FROM nodes " + "WHERE project = ?1 AND name = ?2;", + project, name, out, count); +} + +int cbm_store_find_nodes_by_label(cbm_store_t *s, const char *project, const char *label, + cbm_node_t **out, int *count) { + return find_nodes_generic(s, &s->stmt_find_nodes_by_label, + "SELECT id, project, label, name, qualified_name, file_path, " + "start_line, end_line, properties FROM nodes " + "WHERE project = ?1 AND label = ?2;", + project, label, out, count); +} + +int cbm_store_find_nodes_by_file(cbm_store_t *s, const char *project, const char *file_path, + cbm_node_t **out, int *count) { + return find_nodes_generic(s, &s->stmt_find_nodes_by_file, + "SELECT id, project, label, name, qualified_name, file_path, " + "start_line, end_line, properties FROM nodes " + "WHERE project = ?1 AND file_path = ?2;", + project, file_path, out, count); +} + +int cbm_store_count_nodes(cbm_store_t *s, const char *project) { + if (!s || !s->db) { + return 0; + } + sqlite3_stmt *stmt = + prepare_cached(s, &s->stmt_count_nodes, "SELECT COUNT(*) FROM nodes WHERE project = ?1;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, project); + if (sqlite3_step(stmt) == SQLITE_ROW) { + return sqlite3_column_int(stmt, 0); + } + return 0; +} + +int cbm_store_delete_nodes_by_project(cbm_store_t *s, const char *project) { + sqlite3_stmt *stmt = prepare_cached(s, &s->stmt_delete_nodes_by_project, + "DELETE FROM nodes WHERE project = ?1;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, project); + if (sqlite3_step(stmt) != SQLITE_DONE) { + store_set_error_sqlite(s, "delete_nodes_by_project"); + return CBM_STORE_ERR; + } + return CBM_STORE_OK; +} + +int cbm_store_delete_nodes_by_file(cbm_store_t *s, const char *project, const char *file_path) { + sqlite3_stmt *stmt = prepare_cached(s, &s->stmt_delete_nodes_by_file, + "DELETE FROM nodes WHERE project = ?1 AND file_path = ?2;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, project); + bind_text(stmt, 2, file_path); + if (sqlite3_step(stmt) != SQLITE_DONE) { + store_set_error_sqlite(s, "delete_nodes_by_file"); + return CBM_STORE_ERR; + } + return CBM_STORE_OK; +} + +int cbm_store_delete_nodes_by_label(cbm_store_t *s, const char *project, const char *label) { + sqlite3_stmt *stmt = prepare_cached(s, &s->stmt_delete_nodes_by_label, + "DELETE FROM nodes WHERE project = ?1 AND label = ?2;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, project); + bind_text(stmt, 2, label); + if (sqlite3_step(stmt) != SQLITE_DONE) { + store_set_error_sqlite(s, "delete_nodes_by_label"); + return CBM_STORE_ERR; + } + return CBM_STORE_OK; +} + +/* ── Node batch ─────────────────────────────────────────────────── */ + +int cbm_store_upsert_node_batch(cbm_store_t *s, const cbm_node_t *nodes, int count, + int64_t *out_ids) { + if (count == 0) { + return CBM_STORE_OK; + } + + exec_sql(s, "BEGIN;"); + for (int i = 0; i < count; i++) { + int64_t id = cbm_store_upsert_node(s, &nodes[i]); + if (id == CBM_STORE_ERR) { + exec_sql(s, "ROLLBACK;"); + return CBM_STORE_ERR; + } + if (out_ids) { + out_ids[i] = id; + } + } + exec_sql(s, "COMMIT;"); + return CBM_STORE_OK; +} + +/* ── Edge CRUD ──────────────────────────────────────────────────── */ + +int64_t cbm_store_insert_edge(cbm_store_t *s, const cbm_edge_t *e) { + sqlite3_stmt *stmt = + prepare_cached(s, &s->stmt_insert_edge, + "INSERT INTO edges (project, source_id, target_id, type, properties) " + "VALUES (?1, ?2, ?3, ?4, ?5) " + "ON CONFLICT(source_id, target_id, type) DO UPDATE SET " + "properties = json_patch(properties, ?5) " + "RETURNING id;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, safe_str(e->project)); + sqlite3_bind_int64(stmt, 2, e->source_id); + sqlite3_bind_int64(stmt, 3, e->target_id); + bind_text(stmt, 4, safe_str(e->type)); + bind_text(stmt, 5, safe_props(e->properties_json)); + + int rc = sqlite3_step(stmt); + if (rc == SQLITE_ROW) { + int64_t id = sqlite3_column_int64(stmt, 0); + sqlite3_reset(stmt); /* unblock COMMIT — RETURNING leaves stmt active */ + return id; + } + sqlite3_reset(stmt); + store_set_error_sqlite(s, "insert_edge"); + return CBM_STORE_ERR; +} + +/* Scan an edge from current row of stmt. */ +static void scan_edge(sqlite3_stmt *stmt, cbm_edge_t *e) { + e->id = sqlite3_column_int64(stmt, 0); + e->project = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); + e->source_id = sqlite3_column_int64(stmt, 2); + e->target_id = sqlite3_column_int64(stmt, 3); + e->type = heap_strdup((const char *)sqlite3_column_text(stmt, 4)); + e->properties_json = heap_strdup((const char *)sqlite3_column_text(stmt, 5)); +} + +/* Generic: find multiple edges by a filter. */ +static int find_edges_generic(cbm_store_t *s, sqlite3_stmt **slot, const char *sql, + void (*bind_fn)(sqlite3_stmt *, const void *), const void *bind_data, + cbm_edge_t **out, int *count) { + if (!s || !s->db) { + *out = NULL; + *count = 0; + return CBM_STORE_ERR; + } + sqlite3_stmt *stmt = prepare_cached(s, slot, sql); + if (!stmt) { + return CBM_STORE_ERR; + } + + bind_fn(stmt, bind_data); + + int cap = 16; + int n = 0; + cbm_edge_t *arr = malloc(cap * sizeof(cbm_edge_t)); + + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (n >= cap) { + cap *= 2; + arr = safe_realloc(arr, cap * sizeof(cbm_edge_t)); + } + scan_edge(stmt, &arr[n]); + n++; + } + + *out = arr; + *count = n; + return CBM_STORE_OK; +} + +/* Bind helpers for edge queries */ +typedef struct { + int64_t id; +} bind_id_t; +typedef struct { + int64_t id; + const char *type; +} bind_id_type_t; +typedef struct { + const char *project; + const char *type; +} bind_proj_type_t; + +static void bind_source_id(sqlite3_stmt *stmt, const void *data) { + const bind_id_t *b = data; + sqlite3_bind_int64(stmt, 1, b->id); +} + +static void bind_id_and_type(sqlite3_stmt *stmt, const void *data) { + const bind_id_type_t *b = data; + sqlite3_bind_int64(stmt, 1, b->id); + bind_text(stmt, 2, b->type); +} + +static void bind_proj_and_type(sqlite3_stmt *stmt, const void *data) { + const bind_proj_type_t *b = data; + bind_text(stmt, 1, b->project); + bind_text(stmt, 2, b->type); +} + +int cbm_store_find_edges_by_source(cbm_store_t *s, int64_t source_id, cbm_edge_t **out, + int *count) { + bind_id_t b = {source_id}; + return find_edges_generic(s, &s->stmt_find_edges_by_source, + "SELECT id, project, source_id, target_id, type, properties " + "FROM edges WHERE source_id = ?1;", + bind_source_id, &b, out, count); +} + +int cbm_store_find_edges_by_target(cbm_store_t *s, int64_t target_id, cbm_edge_t **out, + int *count) { + bind_id_t b = {target_id}; + return find_edges_generic(s, &s->stmt_find_edges_by_target, + "SELECT id, project, source_id, target_id, type, properties " + "FROM edges WHERE target_id = ?1;", + bind_source_id, &b, out, count); +} + +int cbm_store_find_edges_by_source_type(cbm_store_t *s, int64_t source_id, const char *type, + cbm_edge_t **out, int *count) { + bind_id_type_t b = {source_id, type}; + return find_edges_generic(s, &s->stmt_find_edges_by_source_type, + "SELECT id, project, source_id, target_id, type, properties " + "FROM edges WHERE source_id = ?1 AND type = ?2;", + bind_id_and_type, &b, out, count); +} + +int cbm_store_find_edges_by_target_type(cbm_store_t *s, int64_t target_id, const char *type, + cbm_edge_t **out, int *count) { + bind_id_type_t b = {target_id, type}; + return find_edges_generic(s, &s->stmt_find_edges_by_target_type, + "SELECT id, project, source_id, target_id, type, properties " + "FROM edges WHERE target_id = ?1 AND type = ?2;", + bind_id_and_type, &b, out, count); +} + +int cbm_store_find_edges_by_type(cbm_store_t *s, const char *project, const char *type, + cbm_edge_t **out, int *count) { + bind_proj_type_t b = {project, type}; + return find_edges_generic(s, &s->stmt_find_edges_by_type, + "SELECT id, project, source_id, target_id, type, properties " + "FROM edges WHERE project = ?1 AND type = ?2;", + bind_proj_and_type, &b, out, count); +} + +int cbm_store_count_edges(cbm_store_t *s, const char *project) { + if (!s || !s->db) { + return 0; + } + sqlite3_stmt *stmt = + prepare_cached(s, &s->stmt_count_edges, "SELECT COUNT(*) FROM edges WHERE project = ?1;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, project); + if (sqlite3_step(stmt) == SQLITE_ROW) { + return sqlite3_column_int(stmt, 0); + } + return 0; +} + +int cbm_store_count_edges_by_type(cbm_store_t *s, const char *project, const char *type) { + sqlite3_stmt *stmt = + prepare_cached(s, &s->stmt_count_edges_by_type, + "SELECT COUNT(*) FROM edges WHERE project = ?1 AND type = ?2;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, project); + bind_text(stmt, 2, type); + if (sqlite3_step(stmt) == SQLITE_ROW) { + return sqlite3_column_int(stmt, 0); + } + return 0; +} + +int cbm_store_delete_edges_by_project(cbm_store_t *s, const char *project) { + sqlite3_stmt *stmt = prepare_cached(s, &s->stmt_delete_edges_by_project, + "DELETE FROM edges WHERE project = ?1;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, project); + if (sqlite3_step(stmt) != SQLITE_DONE) { + store_set_error_sqlite(s, "delete_edges_by_project"); + return CBM_STORE_ERR; + } + return CBM_STORE_OK; +} + +int cbm_store_delete_edges_by_type(cbm_store_t *s, const char *project, const char *type) { + sqlite3_stmt *stmt = prepare_cached(s, &s->stmt_delete_edges_by_type, + "DELETE FROM edges WHERE project = ?1 AND type = ?2;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, project); + bind_text(stmt, 2, type); + if (sqlite3_step(stmt) != SQLITE_DONE) { + store_set_error_sqlite(s, "delete_edges_by_type"); + return CBM_STORE_ERR; + } + return CBM_STORE_OK; +} + +/* ── Edge batch ─────────────────────────────────────────────────── */ + +int cbm_store_insert_edge_batch(cbm_store_t *s, const cbm_edge_t *edges, int count) { + if (count == 0) { + return CBM_STORE_OK; + } + + exec_sql(s, "BEGIN;"); + for (int i = 0; i < count; i++) { + int64_t id = cbm_store_insert_edge(s, &edges[i]); + if (id == CBM_STORE_ERR) { + exec_sql(s, "ROLLBACK;"); + return CBM_STORE_ERR; + } + } + exec_sql(s, "COMMIT;"); + return CBM_STORE_OK; +} + +/* ── File hash CRUD ─────────────────────────────────────────────── */ + +int cbm_store_upsert_file_hash(cbm_store_t *s, const char *project, const char *rel_path, + const char *sha256, int64_t mtime_ns, int64_t size) { + sqlite3_stmt *stmt = + prepare_cached(s, &s->stmt_upsert_file_hash, + "INSERT INTO file_hashes (project, rel_path, sha256, mtime_ns, size) " + "VALUES (?1, ?2, ?3, ?4, ?5) " + "ON CONFLICT(project, rel_path) DO UPDATE SET " + "sha256=?3, mtime_ns=?4, size=?5;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, project); + bind_text(stmt, 2, rel_path); + bind_text(stmt, 3, sha256); + sqlite3_bind_int64(stmt, 4, mtime_ns); + sqlite3_bind_int64(stmt, 5, size); + + if (sqlite3_step(stmt) != SQLITE_DONE) { + store_set_error_sqlite(s, "upsert_file_hash"); + return CBM_STORE_ERR; + } + return CBM_STORE_OK; +} + +int cbm_store_get_file_hashes(cbm_store_t *s, const char *project, cbm_file_hash_t **out, + int *count) { + sqlite3_stmt *stmt = prepare_cached(s, &s->stmt_get_file_hashes, + "SELECT project, rel_path, sha256, mtime_ns, size " + "FROM file_hashes WHERE project = ?1;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, project); + + int cap = 16; + int n = 0; + cbm_file_hash_t *arr = malloc(cap * sizeof(cbm_file_hash_t)); + + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (n >= cap) { + cap *= 2; + arr = safe_realloc(arr, cap * sizeof(cbm_file_hash_t)); + } + arr[n].project = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + arr[n].rel_path = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); + arr[n].sha256 = heap_strdup((const char *)sqlite3_column_text(stmt, 2)); + arr[n].mtime_ns = sqlite3_column_int64(stmt, 3); + arr[n].size = sqlite3_column_int64(stmt, 4); + n++; + } + + *out = arr; + *count = n; + return CBM_STORE_OK; +} + +int cbm_store_delete_file_hash(cbm_store_t *s, const char *project, const char *rel_path) { + sqlite3_stmt *stmt = + prepare_cached(s, &s->stmt_delete_file_hash, + "DELETE FROM file_hashes WHERE project = ?1 AND rel_path = ?2;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, project); + bind_text(stmt, 2, rel_path); + if (sqlite3_step(stmt) != SQLITE_DONE) { + store_set_error_sqlite(s, "delete_file_hash"); + return CBM_STORE_ERR; + } + return CBM_STORE_OK; +} + +int cbm_store_delete_file_hashes(cbm_store_t *s, const char *project) { + sqlite3_stmt *stmt = prepare_cached(s, &s->stmt_delete_file_hashes, + "DELETE FROM file_hashes WHERE project = ?1;"); + if (!stmt) { + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, project); + if (sqlite3_step(stmt) != SQLITE_DONE) { + store_set_error_sqlite(s, "delete_file_hashes"); + return CBM_STORE_ERR; + } + return CBM_STORE_OK; +} + +/* ── FindNodesByFileOverlap ─────────────────────────────────────── */ + +int cbm_store_find_nodes_by_file_overlap(cbm_store_t *s, const char *project, const char *file_path, + int start_line, int end_line, cbm_node_t **out, + int *count) { + *out = NULL; + *count = 0; + const char *sql = "SELECT id, project, label, name, qualified_name, file_path, " + "start_line, end_line, properties FROM nodes " + "WHERE project = ?1 AND file_path = ?2 " + "AND label NOT IN ('Module', 'Package', 'File', 'Folder') " + "AND start_line <= ?4 AND end_line >= ?3 " + "ORDER BY start_line"; + + sqlite3_stmt *stmt = NULL; + int rc = sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL); + if (rc != SQLITE_OK) { + store_set_error_sqlite(s, "overlap prepare"); + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, project); + bind_text(stmt, 2, file_path); + sqlite3_bind_int(stmt, 3, start_line); + sqlite3_bind_int(stmt, 4, end_line); + + int cap = 8; + int n = 0; + cbm_node_t *nodes = malloc(cap * sizeof(cbm_node_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (n >= cap) { + cap *= 2; + nodes = safe_realloc(nodes, cap * sizeof(cbm_node_t)); + } + memset(&nodes[n], 0, sizeof(cbm_node_t)); + scan_node(stmt, &nodes[n]); + n++; + } + sqlite3_finalize(stmt); + *out = nodes; + *count = n; + return CBM_STORE_OK; +} + +/* ── FindNodesByQNSuffix ───────────────────────────────────────── */ + +int cbm_store_find_nodes_by_qn_suffix(cbm_store_t *s, const char *project, const char *suffix, + cbm_node_t **out, int *count) { + *out = NULL; + *count = 0; + if (!s || !s->db) { + return CBM_STORE_ERR; + } + /* Match QNs ending with ".suffix" or exactly equal to suffix */ + char like_pattern[512]; + snprintf(like_pattern, sizeof(like_pattern), "%%.%s", suffix); + + const char *sql_with_project = + "SELECT id, project, label, name, qualified_name, file_path, " + "start_line, end_line, properties FROM nodes " + "WHERE project = ?1 AND (qualified_name LIKE ?2 OR qualified_name = ?3)"; + const char *sql_any = "SELECT id, project, label, name, qualified_name, file_path, " + "start_line, end_line, properties FROM nodes " + "WHERE (qualified_name LIKE ?1 OR qualified_name = ?2)"; + + sqlite3_stmt *stmt = NULL; + int rc = sqlite3_prepare_v2(s->db, project ? sql_with_project : sql_any, -1, &stmt, NULL); + if (rc != SQLITE_OK) { + store_set_error_sqlite(s, "qn_suffix prepare"); + return CBM_STORE_ERR; + } + + if (project) { + bind_text(stmt, 1, project); + bind_text(stmt, 2, like_pattern); + bind_text(stmt, 3, suffix); + } else { + bind_text(stmt, 1, like_pattern); + bind_text(stmt, 2, suffix); + } + + int cap = 8; + int n = 0; + cbm_node_t *nodes = malloc(cap * sizeof(cbm_node_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (n >= cap) { + cap *= 2; + nodes = safe_realloc(nodes, cap * sizeof(cbm_node_t)); + } + memset(&nodes[n], 0, sizeof(cbm_node_t)); + scan_node(stmt, &nodes[n]); + n++; + } + sqlite3_finalize(stmt); + *out = nodes; + *count = n; + return CBM_STORE_OK; +} + +/* ── NodeDegree ────────────────────────────────────────────────── */ + +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) +void cbm_store_node_degree(cbm_store_t *s, int64_t node_id, int *in_deg, int *out_deg) { + *in_deg = 0; + *out_deg = 0; + + const char *in_sql = "SELECT COUNT(*) FROM edges WHERE target_id = ?1 AND type = 'CALLS'"; + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, in_sql, -1, &stmt, NULL) == SQLITE_OK) { + sqlite3_bind_int64(stmt, 1, node_id); + if (sqlite3_step(stmt) == SQLITE_ROW) { + *in_deg = sqlite3_column_int(stmt, 0); + } + sqlite3_finalize(stmt); + } + + const char *out_sql = "SELECT COUNT(*) FROM edges WHERE source_id = ?1 AND type = 'CALLS'"; + if (sqlite3_prepare_v2(s->db, out_sql, -1, &stmt, NULL) == SQLITE_OK) { + sqlite3_bind_int64(stmt, 1, node_id); + if (sqlite3_step(stmt) == SQLITE_ROW) { + *out_deg = sqlite3_column_int(stmt, 0); + } + sqlite3_finalize(stmt); + } +} + +/* ── Node neighbor names ──────────────────────────────────────── */ + +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) +static int query_neighbor_names(sqlite3 *db, const char *sql, int64_t node_id, int limit, + char ***out, int *out_count) { + *out = NULL; + *out_count = 0; + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(db, sql, -1, &stmt, NULL) != SQLITE_OK) { + return -1; + } + sqlite3_bind_int64(stmt, 1, node_id); + sqlite3_bind_int(stmt, 2, limit); + + int cap = 8; + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + char **names = malloc((size_t)cap * sizeof(char *)); + int count = 0; + while (sqlite3_step(stmt) == SQLITE_ROW) { + const char *name = (const char *)sqlite3_column_text(stmt, 0); + if (!name) { + continue; + } + if (count >= cap) { + cap *= 2; + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + names = safe_realloc(names, (size_t)cap * sizeof(char *)); + } + // NOLINTNEXTLINE(misc-include-cleaner) — strdup provided by standard header + names[count++] = strdup(name); + } + sqlite3_finalize(stmt); + *out = names; + *out_count = count; + return 0; +} + +int cbm_store_node_neighbor_names(cbm_store_t *s, int64_t node_id, int limit, char ***out_callers, + int *caller_count, char ***out_callees, int *callee_count) { + if (!s) { + return -1; + } + *out_callers = NULL; + *caller_count = 0; + *out_callees = NULL; + *callee_count = 0; + + query_neighbor_names( + s->db, + "SELECT DISTINCT n.name FROM edges e JOIN nodes n ON e.source_id = n.id " + "WHERE e.target_id = ?1 AND e.type IN ('CALLS','HTTP_CALLS','ASYNC_CALLS') " + "ORDER BY n.name LIMIT ?2", + node_id, limit, out_callers, caller_count); + + query_neighbor_names( + s->db, + "SELECT DISTINCT n.name FROM edges e JOIN nodes n ON e.target_id = n.id " + "WHERE e.source_id = ?1 AND e.type IN ('CALLS','HTTP_CALLS','ASYNC_CALLS') " + "ORDER BY n.name LIMIT ?2", + node_id, limit, out_callees, callee_count); + + return 0; +} + +int cbm_store_batch_count_degrees(cbm_store_t *s, const int64_t *node_ids, int id_count, + const char *edge_type, int *out_in, int *out_out) { + if (!s || !node_ids || id_count <= 0 || !out_in || !out_out) { + return CBM_STORE_ERR; + } + + memset(out_in, 0, (size_t)id_count * sizeof(int)); + memset(out_out, 0, (size_t)id_count * sizeof(int)); + + /* Build IN clause: (?,?,?) */ + char in_clause[4096]; + int pos = 0; + for (int i = 0; i < id_count && pos < (int)sizeof(in_clause) - 4; i++) { + if (i > 0) { + in_clause[pos++] = ','; + } + in_clause[pos++] = '?'; + } + in_clause[pos] = '\0'; + + // NOLINTNEXTLINE(readability-implicit-bool-conversion) + bool has_type = edge_type && edge_type[0] != '\0'; + + /* Inbound: COUNT grouped by target_id */ + char sql[8192]; + if (has_type) { + snprintf(sql, sizeof(sql), + "SELECT target_id, COUNT(*) FROM edges " + "WHERE target_id IN (%s) AND type = ? GROUP BY target_id", + in_clause); + } else { + snprintf(sql, sizeof(sql), + "SELECT target_id, COUNT(*) FROM edges " + "WHERE target_id IN (%s) GROUP BY target_id", + in_clause); + } + + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + return CBM_STORE_ERR; + } + + for (int i = 0; i < id_count; i++) { + sqlite3_bind_int64(stmt, i + 1, node_ids[i]); + } + if (has_type) { + bind_text(stmt, id_count + 1, edge_type); + } + + while (sqlite3_step(stmt) == SQLITE_ROW) { + int64_t nid = sqlite3_column_int64(stmt, 0); + int cnt = sqlite3_column_int(stmt, 1); + for (int i = 0; i < id_count; i++) { + if (node_ids[i] == nid) { + out_in[i] = cnt; + break; + } + } + } + sqlite3_finalize(stmt); + + /* Outbound: COUNT grouped by source_id */ + if (has_type) { + snprintf(sql, sizeof(sql), + "SELECT source_id, COUNT(*) FROM edges " + "WHERE source_id IN (%s) AND type = ? GROUP BY source_id", + in_clause); + } else { + snprintf(sql, sizeof(sql), + "SELECT source_id, COUNT(*) FROM edges " + "WHERE source_id IN (%s) GROUP BY source_id", + in_clause); + } + + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + return CBM_STORE_ERR; + } + + for (int i = 0; i < id_count; i++) { + sqlite3_bind_int64(stmt, i + 1, node_ids[i]); + } + if (has_type) { + bind_text(stmt, id_count + 1, edge_type); + } + + while (sqlite3_step(stmt) == SQLITE_ROW) { + int64_t nid = sqlite3_column_int64(stmt, 0); + int cnt = sqlite3_column_int(stmt, 1); + for (int i = 0; i < id_count; i++) { + if (node_ids[i] == nid) { + out_out[i] = cnt; + break; + } + } + } + sqlite3_finalize(stmt); + + return CBM_STORE_OK; +} + +/* ── UpsertFileHashBatch ───────────────────────────────────────── */ + +int cbm_store_upsert_file_hash_batch(cbm_store_t *s, const cbm_file_hash_t *hashes, int count) { + if (count == 0) { + return CBM_STORE_OK; + } + + int rc = cbm_store_begin(s); + if (rc != CBM_STORE_OK) { + return rc; + } + + for (int i = 0; i < count; i++) { + rc = cbm_store_upsert_file_hash(s, hashes[i].project, hashes[i].rel_path, hashes[i].sha256, + hashes[i].mtime_ns, hashes[i].size); + if (rc != CBM_STORE_OK) { + cbm_store_rollback(s); + return rc; + } + } + + return cbm_store_commit(s); +} + +/* ── FindEdgesByURLPath ────────────────────────────────────────── */ + +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) +int cbm_store_find_edges_by_url_path(cbm_store_t *s, const char *project, const char *keyword, + cbm_edge_t **out, int *count) { + *out = NULL; + *count = 0; + + /* Search properties JSON for url_path containing keyword */ + char like_pattern[512]; + snprintf(like_pattern, sizeof(like_pattern), "%%\"url_path\":\"%%%%%s%%%%\"%%", keyword); + + const char *sql = "SELECT id, project, source_id, target_id, type, properties FROM edges " + "WHERE project = ?1 AND properties LIKE ?2"; + + sqlite3_stmt *stmt = NULL; + int rc = sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL); + if (rc != SQLITE_OK) { + store_set_error_sqlite(s, "url_path prepare"); + return CBM_STORE_ERR; + } + + bind_text(stmt, 1, project); + bind_text(stmt, 2, like_pattern); + + int cap = 8; + int n = 0; + cbm_edge_t *edges = malloc(cap * sizeof(cbm_edge_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (n >= cap) { + cap *= 2; + edges = safe_realloc(edges, cap * sizeof(cbm_edge_t)); + } + memset(&edges[n], 0, sizeof(cbm_edge_t)); + scan_edge(stmt, &edges[n]); + n++; + } + sqlite3_finalize(stmt); + *out = edges; + *count = n; + return CBM_STORE_OK; +} + +/* ── RestoreFrom ───────────────────────────────────────────────── */ + +int cbm_store_restore_from(cbm_store_t *dst, cbm_store_t *src) { + sqlite3_backup *bk = sqlite3_backup_init(dst->db, "main", src->db, "main"); + if (!bk) { + store_set_error_sqlite(dst, "backup init"); + return CBM_STORE_ERR; + } + int rc = sqlite3_backup_step(bk, -1); /* copy all pages */ + sqlite3_backup_finish(bk); + + if (rc != SQLITE_DONE) { + store_set_error(dst, "backup step failed"); + return CBM_STORE_ERR; + } + return CBM_STORE_OK; +} + +/* ── Search ─────────────────────────────────────────────────────── */ + +/* Convert a glob pattern to SQL LIKE pattern. */ +char *cbm_glob_to_like(const char *pattern) { + if (!pattern) { + return NULL; + } + size_t len = strlen(pattern); + char *out = malloc((len * 2) + 1); + size_t j = 0; + + for (size_t i = 0; i < len; i++) { + if (pattern[i] == '*' && i + 1 < len && pattern[i + 1] == '*') { + /* Remove leading / from output if present (handles glob dir-star) */ + if (j > 0 && out[j - 1] == '/') { + j--; + } + out[j++] = '%'; + i++; /* skip second * */ + if (i + 1 < len && pattern[i + 1] == '/') { + i++; /* skip trailing / */ + } + } else if (pattern[i] == '*') { + out[j++] = '%'; + } else if (pattern[i] == '?') { + out[j++] = '_'; + } else { + out[j++] = pattern[i]; + } + } + out[j] = '\0'; + return out; +} + +/* ── extractLikeHints ─────────────────────────────────────────── */ + +int cbm_extract_like_hints(const char *pattern, char **out, int max_out) { + if (!pattern || !out || max_out <= 0) { + return 0; + } + + /* Bail on alternation — can't convert OR regex to AND LIKE */ + for (const char *p = pattern; *p; p++) { + if (*p == '|') { + return 0; + } + } + + int count = 0; + char buf[256]; + int blen = 0; + + int i = 0; + while (pattern[i]) { + char ch = pattern[i]; + switch (ch) { + case '\\': + /* Escaped char — the next char is literal */ + if (pattern[i + 1]) { + if (blen < (int)sizeof(buf) - 1) { + buf[blen++] = pattern[i + 1]; + } + i += 2; + } else { + i++; + } + break; + case '.': + case '*': + case '+': + case '?': + case '^': + case '$': + case '(': + case ')': + case '[': + case ']': + case '{': + case '}': + /* Meta character — flush current literal segment */ + if (blen >= 3 && count < max_out) { + buf[blen] = '\0'; + out[count++] = strdup(buf); + } + blen = 0; + i++; + break; + default: + if (blen < (int)sizeof(buf) - 1) { + buf[blen++] = ch; + } + i++; + break; + } + } + /* Flush trailing segment */ + if (blen >= 3 && count < max_out) { + buf[blen] = '\0'; + out[count++] = strdup(buf); + } + return count; +} + +/* ── ensureCaseInsensitive / stripCaseFlag ────────────────────── */ + +const char *cbm_ensure_case_insensitive(const char *pattern) { + static char buf[2048]; + if (!pattern) { + buf[0] = '\0'; + return buf; + } + /* Already has (?i) prefix? Return as-is. */ + if (strncmp(pattern, "(?i)", 4) == 0) { + snprintf(buf, sizeof(buf), "%s", pattern); + } else { + snprintf(buf, sizeof(buf), "(?i)%s", pattern); + } + return buf; +} + +const char *cbm_strip_case_flag(const char *pattern) { + static char buf[2048]; + if (!pattern) { + buf[0] = '\0'; + return buf; + } + if (strncmp(pattern, "(?i)", 4) == 0) { + snprintf(buf, sizeof(buf), "%s", pattern + 4); + } else { + snprintf(buf, sizeof(buf), "%s", pattern); + } + return buf; +} + +int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_search_output_t *out) { + memset(out, 0, sizeof(*out)); + if (!s || !s->db) { + return CBM_STORE_ERR; + } + + /* Build WHERE clauses dynamically */ + char sql[4096]; + char count_sql[4096]; + int bind_idx = 0; + + /* We build a query that selects nodes with optional degree subqueries */ + const char *select_cols = + "SELECT n.id, n.project, n.label, n.name, n.qualified_name, " + "n.file_path, n.start_line, n.end_line, n.properties, " + "(SELECT COUNT(*) FROM edges e WHERE e.target_id = n.id AND e.type = 'CALLS') AS in_deg, " + "(SELECT COUNT(*) FROM edges e WHERE e.source_id = n.id AND e.type = 'CALLS') AS out_deg "; + + /* Start building WHERE */ + char where[2048] = ""; + int wlen = 0; + int nparams = 0; + + /* Track bind values */ + struct { + enum { BV_TEXT } type; + const char *text; + } binds[16]; + +#define ADD_WHERE(cond) \ + do { \ + if (nparams > 0) \ + wlen += snprintf(where + wlen, sizeof(where) - wlen, " AND "); \ + wlen += snprintf(where + wlen, sizeof(where) - wlen, "%s", cond); \ + nparams++; \ + } while (0) + +#define BIND_TEXT(val) \ + do { \ + bind_idx++; \ + binds[bind_idx - 1].type = BV_TEXT; \ + binds[bind_idx - 1].text = val; \ + } while (0) + + char bind_buf[64]; + char *like_pattern = NULL; + + if (params->project) { + snprintf(bind_buf, sizeof(bind_buf), "n.project = ?%d", bind_idx + 1); + ADD_WHERE(bind_buf); + BIND_TEXT(params->project); + } + if (params->label) { + snprintf(bind_buf, sizeof(bind_buf), "n.label = ?%d", bind_idx + 1); + ADD_WHERE(bind_buf); + BIND_TEXT(params->label); + } + if (params->name_pattern) { + if (params->case_sensitive) { + /* Case-sensitive: use built-in REGEXP operator */ + snprintf(bind_buf, sizeof(bind_buf), "n.name REGEXP ?%d", bind_idx + 1); + } else { + /* Case-insensitive: use iregexp() function call syntax */ + snprintf(bind_buf, sizeof(bind_buf), "iregexp(?%d, n.name)", bind_idx + 1); + } + ADD_WHERE(bind_buf); + BIND_TEXT(params->name_pattern); + } + if (params->file_pattern) { + like_pattern = cbm_glob_to_like(params->file_pattern); + snprintf(bind_buf, sizeof(bind_buf), "n.file_path LIKE ?%d", bind_idx + 1); + ADD_WHERE(bind_buf); + BIND_TEXT(like_pattern); + } + + /* Exclude labels: add NOT IN clause directly (no bind params — values are code-provided) */ + if (params->exclude_labels) { + char excl_clause[512] = "n.label NOT IN ("; + int elen = (int)strlen(excl_clause); + for (int i = 0; params->exclude_labels[i]; i++) { + if (i > 0) { + elen += snprintf(excl_clause + elen, sizeof(excl_clause) - (size_t)elen, ","); + if (elen >= (int)sizeof(excl_clause)) { + elen = (int)sizeof(excl_clause) - 1; + } + } + elen += snprintf(excl_clause + elen, sizeof(excl_clause) - (size_t)elen, "'%s'", + params->exclude_labels[i]); + if (elen >= (int)sizeof(excl_clause)) { + elen = (int)sizeof(excl_clause) - 1; + } + } + snprintf(excl_clause + elen, sizeof(excl_clause) - (size_t)elen, ")"); + ADD_WHERE(excl_clause); + } + + /* Build full SQL */ + if (nparams > 0) { + snprintf(sql, sizeof(sql), "%s FROM nodes n WHERE %s", select_cols, where); + } else { + snprintf(sql, sizeof(sql), "%s FROM nodes n", select_cols); + } + + /* Degree filters: -1 = no filter, 0+ = active filter. + * Wraps in subquery to filter on computed degree columns. */ + // NOLINTNEXTLINE(readability-implicit-bool-conversion) + bool has_degree_filter = (params->min_degree >= 0 || params->max_degree >= 0); + if (has_degree_filter) { + char inner_sql[4096]; + snprintf(inner_sql, sizeof(inner_sql), "%s", sql); + if (params->min_degree >= 0 && params->max_degree >= 0) { + snprintf( + sql, sizeof(sql), + "SELECT * FROM (%s) WHERE (in_deg + out_deg) >= %d AND (in_deg + out_deg) <= %d", + inner_sql, params->min_degree, params->max_degree); + } else if (params->min_degree >= 0) { + snprintf(sql, sizeof(sql), "SELECT * FROM (%s) WHERE (in_deg + out_deg) >= %d", + inner_sql, params->min_degree); + } else { + snprintf(sql, sizeof(sql), "SELECT * FROM (%s) WHERE (in_deg + out_deg) <= %d", + inner_sql, params->max_degree); + } + } + + /* Count query (wrap the full query) */ + snprintf(count_sql, sizeof(count_sql), "SELECT COUNT(*) FROM (%s)", sql); + + /* Add ORDER BY + LIMIT. + * When degree filter wraps in subquery, column refs lose the "n." prefix. */ + int limit = params->limit > 0 ? params->limit : 500000; + int offset = params->offset; + bool has_degree_wrap = has_degree_filter; + // NOLINTNEXTLINE(readability-implicit-bool-conversion) + const char *name_col = has_degree_wrap ? "name" : "n.name"; + char order_limit[128]; + snprintf(order_limit, sizeof(order_limit), " ORDER BY %s LIMIT %d OFFSET %d", name_col, limit, + offset); + strncat(sql, order_limit, sizeof(sql) - strlen(sql) - 1); + + /* Execute count query */ + sqlite3_stmt *cnt_stmt = NULL; + int rc = sqlite3_prepare_v2(s->db, count_sql, -1, &cnt_stmt, NULL); + if (rc == SQLITE_OK) { + for (int i = 0; i < bind_idx; i++) { + bind_text(cnt_stmt, i + 1, binds[i].text); + } + if (sqlite3_step(cnt_stmt) == SQLITE_ROW) { + out->total = sqlite3_column_int(cnt_stmt, 0); + } + sqlite3_finalize(cnt_stmt); + } + + /* Execute main query */ + sqlite3_stmt *main_stmt = NULL; + rc = sqlite3_prepare_v2(s->db, sql, -1, &main_stmt, NULL); + if (rc != SQLITE_OK) { + store_set_error_sqlite(s, "search prepare"); + free(like_pattern); + return CBM_STORE_ERR; + } + + for (int i = 0; i < bind_idx; i++) { + bind_text(main_stmt, i + 1, binds[i].text); + } + + int cap = 16; + int n = 0; + cbm_search_result_t *results = malloc(cap * sizeof(cbm_search_result_t)); + + while (sqlite3_step(main_stmt) == SQLITE_ROW) { + if (n >= cap) { + cap *= 2; + results = safe_realloc(results, cap * sizeof(cbm_search_result_t)); + } + memset(&results[n], 0, sizeof(cbm_search_result_t)); + scan_node(main_stmt, &results[n].node); + results[n].in_degree = sqlite3_column_int(main_stmt, 9); + results[n].out_degree = sqlite3_column_int(main_stmt, 10); + n++; + } + + sqlite3_finalize(main_stmt); + free(like_pattern); + + out->results = results; + out->count = n; + return CBM_STORE_OK; +} + +void cbm_store_search_free(cbm_search_output_t *out) { + if (!out) { + return; + } + for (int i = 0; i < out->count; i++) { + cbm_search_result_t *r = &out->results[i]; + free((void *)r->node.project); + free((void *)r->node.label); + free((void *)r->node.name); + free((void *)r->node.qualified_name); + free((void *)r->node.file_path); + free((void *)r->node.properties_json); + for (int j = 0; j < r->connected_count; j++) { + free((void *)r->connected_names[j]); + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(r->connected_names); + } + free(out->results); + memset(out, 0, sizeof(*out)); +} + +/* ── BFS Traversal ──────────────────────────────────────────────── */ + +int cbm_store_bfs(cbm_store_t *s, int64_t start_id, const char *direction, const char **edge_types, + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) + int edge_type_count, int max_depth, int max_results, cbm_traverse_result_t *out) { + memset(out, 0, sizeof(*out)); + + /* Load root node */ + cbm_node_t root = {0}; + int rc = cbm_store_find_node_by_id(s, start_id, &root); + if (rc != CBM_STORE_OK) { + return rc; + } + out->root = root; + + /* Build edge type IN clause */ + char types_clause[512] = "'CALLS'"; + if (edge_type_count > 0) { + int tlen = 0; + for (int i = 0; i < edge_type_count; i++) { + if (i > 0) { + tlen += snprintf(types_clause + tlen, sizeof(types_clause) - (size_t)tlen, ","); + if (tlen >= (int)sizeof(types_clause)) { + tlen = (int)sizeof(types_clause) - 1; + } + } + tlen += snprintf(types_clause + tlen, sizeof(types_clause) - (size_t)tlen, "'%s'", + edge_types[i]); + if (tlen >= (int)sizeof(types_clause)) { + tlen = (int)sizeof(types_clause) - 1; + } + } + } + + /* Build recursive CTE for BFS */ + char sql[4096]; + const char *join_cond; + const char *next_id; + // NOLINTNEXTLINE(readability-implicit-bool-conversion) + bool is_inbound = direction && strcmp(direction, "inbound") == 0; + + if (is_inbound) { + join_cond = "e.target_id = bfs.node_id"; + next_id = "e.source_id"; + } else { + join_cond = "e.source_id = bfs.node_id"; + next_id = "e.target_id"; + } + + snprintf(sql, sizeof(sql), + "WITH RECURSIVE bfs(node_id, hop) AS (" + " SELECT %lld, 0" + " UNION" + " SELECT %s, bfs.hop + 1" + " FROM bfs" + " JOIN edges e ON %s" + " WHERE e.type IN (%s) AND bfs.hop < %d" + ")" + "SELECT DISTINCT n.id, n.project, n.label, n.name, n.qualified_name, " + "n.file_path, n.start_line, n.end_line, n.properties, bfs.hop " + "FROM bfs " + "JOIN nodes n ON n.id = bfs.node_id " + "WHERE bfs.hop > 0 " /* exclude root */ + "ORDER BY bfs.hop " + "LIMIT %d;", + (long long)start_id, next_id, join_cond, types_clause, max_depth, max_results); + + sqlite3_stmt *stmt = NULL; + rc = sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL); + if (rc != SQLITE_OK) { + store_set_error_sqlite(s, "bfs prepare"); + return CBM_STORE_ERR; + } + + int cap = 16; + int n = 0; + cbm_node_hop_t *visited = malloc(cap * sizeof(cbm_node_hop_t)); + + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (n >= cap) { + cap *= 2; + visited = safe_realloc(visited, cap * sizeof(cbm_node_hop_t)); + } + scan_node(stmt, &visited[n].node); + visited[n].hop = sqlite3_column_int(stmt, 9); + n++; + } + + sqlite3_finalize(stmt); + + out->visited = visited; + out->visited_count = n; + + /* Collect edges between visited nodes (including root) */ + if (n > 0) { + /* Build ID set: root + all visited */ + char id_set[4096]; + int ilen = snprintf(id_set, sizeof(id_set), "%lld", (long long)start_id); + if (ilen >= (int)sizeof(id_set)) { + ilen = (int)sizeof(id_set) - 1; + } + for (int i = 0; i < n; i++) { + ilen += snprintf(id_set + ilen, sizeof(id_set) - (size_t)ilen, ",%lld", + (long long)out->visited[i].node.id); + if (ilen >= (int)sizeof(id_set)) { + ilen = (int)sizeof(id_set) - 1; + } + } + + char edge_sql[8192]; + snprintf(edge_sql, sizeof(edge_sql), + "SELECT n1.name, n2.name, e.type " + "FROM edges e " + "JOIN nodes n1 ON n1.id = e.source_id " + "JOIN nodes n2 ON n2.id = e.target_id " + "WHERE e.source_id IN (%s) AND e.target_id IN (%s) " + "AND e.type IN (%s)", + id_set, id_set, types_clause); + + sqlite3_stmt *estmt = NULL; + rc = sqlite3_prepare_v2(s->db, edge_sql, -1, &estmt, NULL); + if (rc == SQLITE_OK) { + int ecap = 8; + int en = 0; + cbm_edge_info_t *edges = malloc(ecap * sizeof(cbm_edge_info_t)); + + while (sqlite3_step(estmt) == SQLITE_ROW) { + if (en >= ecap) { + ecap *= 2; + edges = safe_realloc(edges, ecap * sizeof(cbm_edge_info_t)); + } + edges[en].from_name = heap_strdup((const char *)sqlite3_column_text(estmt, 0)); + edges[en].to_name = heap_strdup((const char *)sqlite3_column_text(estmt, 1)); + edges[en].type = heap_strdup((const char *)sqlite3_column_text(estmt, 2)); + edges[en].confidence = 1.0; + en++; + } + sqlite3_finalize(estmt); + + out->edges = edges; + out->edge_count = en; + } + } else { + out->edges = NULL; + out->edge_count = 0; + } + + return CBM_STORE_OK; +} + +void cbm_store_traverse_free(cbm_traverse_result_t *out) { + if (!out) { + return; + } + /* Free root */ + free((void *)out->root.project); + free((void *)out->root.label); + free((void *)out->root.name); + free((void *)out->root.qualified_name); + free((void *)out->root.file_path); + free((void *)out->root.properties_json); + + /* Free visited */ + for (int i = 0; i < out->visited_count; i++) { + cbm_node_hop_t *h = &out->visited[i]; + free((void *)h->node.project); + free((void *)h->node.label); + free((void *)h->node.name); + free((void *)h->node.qualified_name); + free((void *)h->node.file_path); + free((void *)h->node.properties_json); + } + free(out->visited); + + /* Free edges */ + for (int i = 0; i < out->edge_count; i++) { + free((void *)out->edges[i].from_name); + free((void *)out->edges[i].to_name); + free((void *)out->edges[i].type); + } + free(out->edges); + + memset(out, 0, sizeof(*out)); +} + +/* ── Impact analysis ────────────────────────────────────────────── */ + +cbm_risk_level_t cbm_hop_to_risk(int hop) { + switch (hop) { + case 1: + return CBM_RISK_CRITICAL; + case 2: + return CBM_RISK_HIGH; + case 3: + return CBM_RISK_MEDIUM; + default: + return CBM_RISK_LOW; + } +} + +const char *cbm_risk_label(cbm_risk_level_t level) { + switch (level) { + case CBM_RISK_CRITICAL: + return "CRITICAL"; + case CBM_RISK_HIGH: + return "HIGH"; + case CBM_RISK_MEDIUM: + return "MEDIUM"; + case CBM_RISK_LOW: + default: + return "LOW"; + } +} + +cbm_impact_summary_t cbm_build_impact_summary(const cbm_node_hop_t *hops, int hop_count, + const cbm_edge_info_t *edges, int edge_count) { + cbm_impact_summary_t s = {0}; + for (int i = 0; i < hop_count; i++) { + switch (cbm_hop_to_risk(hops[i].hop)) { + case CBM_RISK_CRITICAL: + s.critical++; + break; + case CBM_RISK_HIGH: + s.high++; + break; + case CBM_RISK_MEDIUM: + s.medium++; + break; + case CBM_RISK_LOW: + s.low++; + break; + } + s.total++; + } + for (int i = 0; i < edge_count; i++) { + if (edges[i].type && (strcmp(edges[i].type, "HTTP_CALLS") == 0 || + strcmp(edges[i].type, "ASYNC_CALLS") == 0)) { + s.has_cross_service = true; + break; + } + } + return s; +} + +int cbm_deduplicate_hops(const cbm_node_hop_t *hops, int hop_count, cbm_node_hop_t **out, + int *out_count) { + *out = NULL; + *out_count = 0; + if (hop_count == 0) { + return CBM_STORE_OK; + } + + /* Simple O(n²) dedup — keep minimum hop per node ID */ + cbm_node_hop_t *result = malloc(hop_count * sizeof(cbm_node_hop_t)); + int n = 0; + + for (int i = 0; i < hop_count; i++) { + int found = -1; + for (int j = 0; j < n; j++) { + if (result[j].node.id == hops[i].node.id) { + found = j; + break; + } + } + if (found >= 0) { + if (hops[i].hop < result[found].hop) { + result[found].hop = hops[i].hop; + } + } else { + result[n] = hops[i]; + n++; + } + } + + *out = safe_realloc(result, n * sizeof(cbm_node_hop_t)); + *out_count = n; + return CBM_STORE_OK; +} + +/* ── Schema ─────────────────────────────────────────────────────── */ + +int cbm_store_get_schema(cbm_store_t *s, const char *project, cbm_schema_info_t *out) { + memset(out, 0, sizeof(*out)); + if (!s || !s->db) { + return -1; + } + + /* Node labels */ + { + const char *sql = "SELECT label, COUNT(*) FROM nodes WHERE project = ?1 GROUP BY label " + "ORDER BY COUNT(*) DESC;"; + sqlite3_stmt *stmt = NULL; + sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL); + bind_text(stmt, 1, project); + + int cap = 8; + int n = 0; + cbm_label_count_t *arr = malloc(cap * sizeof(cbm_label_count_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (n >= cap) { + cap *= 2; + arr = safe_realloc(arr, cap * sizeof(cbm_label_count_t)); + } + arr[n].label = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + arr[n].count = sqlite3_column_int(stmt, 1); + n++; + } + sqlite3_finalize(stmt); + out->node_labels = arr; + out->node_label_count = n; + } + + /* Edge types */ + { + const char *sql = "SELECT type, COUNT(*) FROM edges WHERE project = ?1 GROUP BY type ORDER " + "BY COUNT(*) DESC;"; + sqlite3_stmt *stmt = NULL; + sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL); + bind_text(stmt, 1, project); + + int cap = 8; + int n = 0; + cbm_type_count_t *arr = malloc(cap * sizeof(cbm_type_count_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (n >= cap) { + cap *= 2; + arr = safe_realloc(arr, cap * sizeof(cbm_type_count_t)); + } + arr[n].type = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + arr[n].count = sqlite3_column_int(stmt, 1); + n++; + } + sqlite3_finalize(stmt); + out->edge_types = arr; + out->edge_type_count = n; + } + + return CBM_STORE_OK; +} + +void cbm_store_schema_free(cbm_schema_info_t *out) { + if (!out) { + return; + } + for (int i = 0; i < out->node_label_count; i++) { + free((void *)out->node_labels[i].label); + } + free(out->node_labels); + + for (int i = 0; i < out->edge_type_count; i++) { + free((void *)out->edge_types[i].type); + } + free(out->edge_types); + + for (int i = 0; i < out->rel_pattern_count; i++) { + free((void *)out->rel_patterns[i]); + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(out->rel_patterns); + + for (int i = 0; i < out->sample_func_count; i++) { + free((void *)out->sample_func_names[i]); + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(out->sample_func_names); + + for (int i = 0; i < out->sample_class_count; i++) { + free((void *)out->sample_class_names[i]); + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(out->sample_class_names); + + for (int i = 0; i < out->sample_qn_count; i++) { + free((void *)out->sample_qns[i]); + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(out->sample_qns); + + memset(out, 0, sizeof(*out)); +} + +/* ── Architecture helpers ───────────────────────────────────────── */ + +/* Extract sub-package from QN: project.dir1.dir2.sym → dir1 (4+ parts → [2], else [1]) */ +const char *cbm_qn_to_package(const char *qn) { + if (!qn || !qn[0]) { + return ""; + } + static CBM_TLS char buf[256]; + /* Find dots and extract segment */ + const char *dots[5] = {NULL}; + int ndots = 0; + for (const char *p = qn; *p && ndots < 5; p++) { + if (*p == '.') { + dots[ndots++] = p; + } + } + /* 4+ segments: return segment[2] */ + if (ndots >= 3) { + const char *start = dots[1] + 1; + int len = (int)(dots[2] - start); + if (len > 0 && len < (int)sizeof(buf)) { + memcpy(buf, start, len); + buf[len] = '\0'; + return buf; + } + } + /* 2+ segments: return segment[1] */ + if (ndots >= 1) { + const char *start = dots[0] + 1; + const char *end = (ndots >= 2) ? dots[1] : qn + strlen(qn); + int len = (int)(end - start); + if (len > 0 && len < (int)sizeof(buf)) { + memcpy(buf, start, len); + buf[len] = '\0'; + return buf; + } + } + return ""; +} + +/* Extract top-level package from QN: project.dir1.rest → dir1 (segment[1]) */ +const char *cbm_qn_to_top_package(const char *qn) { + if (!qn || !qn[0]) { + return ""; + } + static CBM_TLS char buf[256]; + const char *first_dot = strchr(qn, '.'); + if (!first_dot) { + return ""; + } + const char *start = first_dot + 1; + const char *second_dot = strchr(start, '.'); + const char *end = second_dot ? second_dot : qn + strlen(qn); + int len = (int)(end - start); + if (len > 0 && len < (int)sizeof(buf)) { + memcpy(buf, start, len); + buf[len] = '\0'; + return buf; + } + return ""; +} + +bool cbm_is_test_file_path(const char *fp) { + // NOLINTNEXTLINE(readability-implicit-bool-conversion) + return fp && fp[0] && strstr(fp, "test") != NULL; +} + +/* File extension → language name mapping */ +static const char *ext_to_lang(const char *ext) { + if (!ext) { + return NULL; + } + /* Common extensions */ + if (strcmp(ext, ".py") == 0) { + return "Python"; + } + if (strcmp(ext, ".go") == 0) { + return "Go"; + } + if (strcmp(ext, ".js") == 0 || strcmp(ext, ".jsx") == 0) { + return "JavaScript"; + } + if (strcmp(ext, ".ts") == 0 || strcmp(ext, ".tsx") == 0) { + return "TypeScript"; + } + if (strcmp(ext, ".rs") == 0) { + return "Rust"; + } + if (strcmp(ext, ".java") == 0) { + return "Java"; + } + if (strcmp(ext, ".cpp") == 0 || strcmp(ext, ".cc") == 0 || strcmp(ext, ".cxx") == 0) { + return "C++"; + } + if (strcmp(ext, ".c") == 0 || strcmp(ext, ".h") == 0) { + return "C"; + } + if (strcmp(ext, ".cs") == 0) { + return "C#"; + } + if (strcmp(ext, ".php") == 0) { + return "PHP"; + } + if (strcmp(ext, ".lua") == 0) { + return "Lua"; + } + if (strcmp(ext, ".scala") == 0) { + return "Scala"; + } + if (strcmp(ext, ".kt") == 0) { + return "Kotlin"; + } + if (strcmp(ext, ".rb") == 0) { + return "Ruby"; + } + if (strcmp(ext, ".sh") == 0 || strcmp(ext, ".bash") == 0) { + return "Bash"; + } + if (strcmp(ext, ".zig") == 0) { + return "Zig"; + } + if (strcmp(ext, ".ex") == 0 || strcmp(ext, ".exs") == 0) { + return "Elixir"; + } + if (strcmp(ext, ".hs") == 0) { + return "Haskell"; + } + if (strcmp(ext, ".ml") == 0 || strcmp(ext, ".mli") == 0) { + return "OCaml"; + } + if (strcmp(ext, ".html") == 0) { + return "HTML"; + } + if (strcmp(ext, ".css") == 0) { + return "CSS"; + } + if (strcmp(ext, ".yaml") == 0 || strcmp(ext, ".yml") == 0) { + return "YAML"; + } + if (strcmp(ext, ".toml") == 0) { + return "TOML"; + } + if (strcmp(ext, ".hcl") == 0 || strcmp(ext, ".tf") == 0) { + return "HCL"; + } + if (strcmp(ext, ".sql") == 0) { + return "SQL"; + } + if (strcmp(ext, ".erl") == 0) { + return "Erlang"; + } + if (strcmp(ext, ".swift") == 0) { + return "Swift"; + } + if (strcmp(ext, ".dart") == 0) { + return "Dart"; + } + if (strcmp(ext, ".groovy") == 0) { + return "Groovy"; + } + if (strcmp(ext, ".pl") == 0) { + return "Perl"; + } + if (strcmp(ext, ".r") == 0) { + return "R"; + } + if (strcmp(ext, ".scss") == 0) { + return "SCSS"; + } + if (strcmp(ext, ".vue") == 0) { + return "Vue"; + } + if (strcmp(ext, ".svelte") == 0) { + return "Svelte"; + } + return NULL; +} + +/* Get lowercase file extension from path */ +static const char *file_ext(const char *path) { + if (!path) { + return NULL; + } + const char *dot = strrchr(path, '.'); + if (!dot) { + return NULL; + } + static CBM_TLS char buf[16]; + int len = (int)strlen(dot); + if (len >= (int)sizeof(buf)) { + return NULL; + } + for (int i = 0; i < len; i++) { + buf[i] = (char)((dot[i] >= 'A' && dot[i] <= 'Z') ? dot[i] + 32 : dot[i]); + } + buf[len] = '\0'; + return buf; +} + +/* ── Architecture aspect implementations ───────────────────────── */ + +static int arch_languages(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { + const char *sql = "SELECT file_path FROM nodes WHERE project=?1 AND label='File'"; + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "arch_languages"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + + /* Count per language using a simple parallel array */ + const char *lang_names[64]; + int lang_counts[64]; + int nlang = 0; + + while (sqlite3_step(stmt) == SQLITE_ROW) { + const char *fp = (const char *)sqlite3_column_text(stmt, 0); + const char *ext = file_ext(fp); + const char *lang = ext_to_lang(ext); + if (!lang) { + continue; + } + int found = -1; + for (int i = 0; i < nlang; i++) { + if (strcmp(lang_names[i], lang) == 0) { + found = i; + break; + } + } + if (found >= 0) { + lang_counts[found]++; + } else if (nlang < 64) { + lang_names[nlang] = lang; + lang_counts[nlang] = 1; + nlang++; + } + } + sqlite3_finalize(stmt); + + /* Sort by count descending (simple insertion sort) */ + for (int i = 1; i < nlang; i++) { + int j = i; + while (j > 0 && lang_counts[j] > lang_counts[j - 1]) { + int tc = lang_counts[j]; + lang_counts[j] = lang_counts[j - 1]; + lang_counts[j - 1] = tc; + const char *tn = lang_names[j]; + lang_names[j] = lang_names[j - 1]; + lang_names[j - 1] = tn; + j--; + } + } + if (nlang > 10) { + nlang = 10; + } + + // NOLINTNEXTLINE(clang-analyzer-optin.portability.UnixAPI) + out->languages = calloc(nlang, sizeof(cbm_language_count_t)); + out->language_count = nlang; + for (int i = 0; i < nlang; i++) { + out->languages[i].language = heap_strdup(lang_names[i]); + out->languages[i].file_count = lang_counts[i]; + } + return CBM_STORE_OK; +} + +static int arch_entry_points(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { + const char *sql = "SELECT name, qualified_name, file_path FROM nodes " + "WHERE project=?1 AND json_extract(properties, '$.is_entry_point') = 1 " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "AND file_path NOT LIKE '%test%' LIMIT 20"; + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "arch_entry_points"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + + int cap = 8; + int n = 0; + cbm_entry_point_t *arr = calloc(cap, sizeof(cbm_entry_point_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (n >= cap) { + cap *= 2; + arr = safe_realloc(arr, cap * sizeof(cbm_entry_point_t)); + } + arr[n].name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + arr[n].qualified_name = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); + arr[n].file = heap_strdup((const char *)sqlite3_column_text(stmt, 2)); + n++; + } + sqlite3_finalize(stmt); + out->entry_points = arr; + out->entry_point_count = n; + return CBM_STORE_OK; +} + +static int arch_routes(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { + const char *sql = "SELECT name, properties, COALESCE(file_path, '') FROM nodes " + "WHERE project=?1 AND label='Route' " + "AND (json_extract(properties, '$.is_test') IS NULL OR " + "json_extract(properties, '$.is_test') != 1) " + "LIMIT 20"; + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "arch_routes"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + + int cap = 8; + int n = 0; + cbm_route_info_t *arr = calloc(cap, sizeof(cbm_route_info_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + const char *name = (const char *)sqlite3_column_text(stmt, 0); + const char *props = (const char *)sqlite3_column_text(stmt, 1); + const char *fp = (const char *)sqlite3_column_text(stmt, 2); + if (cbm_is_test_file_path(fp)) { + continue; + } + if (n >= cap) { + cap *= 2; + arr = safe_realloc(arr, cap * sizeof(cbm_route_info_t)); + } + + /* Parse JSON properties for method, path, handler */ + arr[n].method = heap_strdup(""); + arr[n].path = heap_strdup(name); + arr[n].handler = heap_strdup(""); + + if (props) { + /* Simple JSON extraction — find "method":"...", "path":"...", "handler":"..." */ + const char *m; + char vbuf[256]; + m = strstr(props, "\"method\""); + if (m) { + m = strchr(m + 8, '"'); + if (m) { + m++; + const char *end = strchr(m, '"'); + if (end && end - m < (int)sizeof(vbuf)) { + memcpy(vbuf, m, end - m); + vbuf[end - m] = '\0'; + free((void *)arr[n].method); + arr[n].method = heap_strdup(vbuf); + } + } + } + m = strstr(props, "\"path\""); + if (m) { + m = strchr(m + 6, '"'); + if (m) { + m++; + const char *end = strchr(m, '"'); + if (end && end - m < (int)sizeof(vbuf)) { + memcpy(vbuf, m, end - m); + vbuf[end - m] = '\0'; + free((void *)arr[n].path); + arr[n].path = heap_strdup(vbuf); + } + } + } + m = strstr(props, "\"handler\""); + if (m) { + m = strchr(m + 9, '"'); + if (m) { + m++; + const char *end = strchr(m, '"'); + if (end && end - m < (int)sizeof(vbuf)) { + memcpy(vbuf, m, end - m); + vbuf[end - m] = '\0'; + free((void *)arr[n].handler); + arr[n].handler = heap_strdup(vbuf); + } + } + } + } + n++; + } + sqlite3_finalize(stmt); + out->routes = arr; + out->route_count = n; + return CBM_STORE_OK; +} + +static int arch_hotspots(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { + const char *sql = "SELECT n.name, n.qualified_name, COUNT(*) as fan_in " + "FROM nodes n JOIN edges e ON e.target_id = n.id AND e.type = 'CALLS' " + "WHERE n.project=?1 AND n.label IN ('Function', 'Method') " + "AND (json_extract(n.properties, '$.is_test') IS NULL OR " + "json_extract(n.properties, '$.is_test') != 1) " + "AND n.file_path NOT LIKE '%test%' " + "GROUP BY n.id ORDER BY fan_in DESC LIMIT 10"; + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "arch_hotspots"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + + int cap = 8; + int n = 0; + cbm_hotspot_t *arr = calloc(cap, sizeof(cbm_hotspot_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (n >= cap) { + cap *= 2; + arr = safe_realloc(arr, cap * sizeof(cbm_hotspot_t)); + } + arr[n].name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + arr[n].qualified_name = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); + arr[n].fan_in = sqlite3_column_int(stmt, 2); + n++; + } + sqlite3_finalize(stmt); + out->hotspots = arr; + out->hotspot_count = n; + return CBM_STORE_OK; +} + +static int arch_boundaries(cbm_store_t *s, const char *project, cbm_cross_pkg_boundary_t **out_arr, + int *out_count) { + /* Build nodeID → package map */ + const char *nsql = "SELECT id, qualified_name FROM nodes WHERE project=?1 AND label IN " + "('Function','Method','Class')"; + sqlite3_stmt *nstmt = NULL; + if (sqlite3_prepare_v2(s->db, nsql, -1, &nstmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "arch_boundaries_nodes"); + return CBM_STORE_ERR; + } + bind_text(nstmt, 1, project); + + /* Simple parallel arrays for node → package mapping */ + int ncap = 256; + int nn = 0; + int64_t *nids = malloc(ncap * sizeof(int64_t)); + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + char **npkgs = malloc(ncap * sizeof(char *)); + + while (sqlite3_step(nstmt) == SQLITE_ROW) { + if (nn >= ncap) { + ncap *= 2; + nids = safe_realloc(nids, ncap * sizeof(int64_t)); + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + npkgs = safe_realloc(npkgs, ncap * sizeof(char *)); + } + nids[nn] = sqlite3_column_int64(nstmt, 0); + const char *qn = (const char *)sqlite3_column_text(nstmt, 1); + npkgs[nn] = heap_strdup(cbm_qn_to_package(qn)); + nn++; + } + sqlite3_finalize(nstmt); + + /* Scan edges, count cross-package calls */ + const char *esql = "SELECT source_id, target_id FROM edges WHERE project=?1 AND type='CALLS'"; + sqlite3_stmt *estmt = NULL; + if (sqlite3_prepare_v2(s->db, esql, -1, &estmt, NULL) != SQLITE_OK) { + for (int i = 0; i < nn; i++) { + free(npkgs[i]); + } + free(nids); + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(npkgs); + store_set_error_sqlite(s, "arch_boundaries_edges"); + return CBM_STORE_ERR; + } + bind_text(estmt, 1, project); + + /* Boundary counts: parallel arrays for from→to→count */ + int bcap = 32; + int bn = 0; + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + char **bfroms = malloc(bcap * sizeof(char *)); + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + char **btos = malloc(bcap * sizeof(char *)); + int *bcounts = malloc(bcap * sizeof(int)); + + while (sqlite3_step(estmt) == SQLITE_ROW) { + int64_t src_id = sqlite3_column_int64(estmt, 0); + int64_t tgt_id = sqlite3_column_int64(estmt, 1); + const char *src_pkg = NULL; + const char *tgt_pkg = NULL; + for (int i = 0; i < nn; i++) { + if (nids[i] == src_id) { + src_pkg = npkgs[i]; + } + if (nids[i] == tgt_id) { + tgt_pkg = npkgs[i]; + } + } + if (!src_pkg || !tgt_pkg || !src_pkg[0] || !tgt_pkg[0] || strcmp(src_pkg, tgt_pkg) == 0) { + continue; + } + + int found = -1; + for (int i = 0; i < bn; i++) { + if (strcmp(bfroms[i], src_pkg) == 0 && strcmp(btos[i], tgt_pkg) == 0) { + found = i; + break; + } + } + if (found >= 0) { + bcounts[found]++; + } else if (bn < bcap) { + bfroms[bn] = heap_strdup(src_pkg); + btos[bn] = heap_strdup(tgt_pkg); + bcounts[bn] = 1; + bn++; + } + } + sqlite3_finalize(estmt); + for (int i = 0; i < nn; i++) { + free(npkgs[i]); + } + free(nids); + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(npkgs); + + /* Sort by count descending */ + for (int i = 1; i < bn; i++) { + int j = i; + while (j > 0 && bcounts[j] > bcounts[j - 1]) { + int tc = bcounts[j]; + bcounts[j] = bcounts[j - 1]; + bcounts[j - 1] = tc; + char *tf = bfroms[j]; + bfroms[j] = bfroms[j - 1]; + bfroms[j - 1] = tf; + char *tt = btos[j]; + btos[j] = btos[j - 1]; + btos[j - 1] = tt; + j--; + } + } + if (bn > 10) { + for (int i = 10; i < bn; i++) { + free(bfroms[i]); + free(btos[i]); + } + bn = 10; + } + + // NOLINTNEXTLINE(clang-analyzer-optin.portability.UnixAPI) + cbm_cross_pkg_boundary_t *result = calloc(bn, sizeof(cbm_cross_pkg_boundary_t)); + for (int i = 0; i < bn; i++) { + result[i].from = bfroms[i]; + result[i].to = btos[i]; + result[i].call_count = bcounts[i]; + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(bfroms); + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(btos); + free(bcounts); + *out_arr = result; + *out_count = bn; + return CBM_STORE_OK; +} + +static int arch_packages(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { + /* Try Package nodes first */ + const char *sql = + "SELECT n.name, COUNT(*) as cnt FROM nodes n " + "WHERE n.project=?1 AND n.label='Package' GROUP BY n.name ORDER BY cnt DESC LIMIT 15"; + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "arch_packages"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + + int cap = 16; + int n = 0; + cbm_package_summary_t *arr = calloc(cap, sizeof(cbm_package_summary_t)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (n >= cap) { + cap *= 2; + arr = safe_realloc(arr, cap * sizeof(cbm_package_summary_t)); + } + arr[n].name = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + arr[n].node_count = sqlite3_column_int(stmt, 1); + n++; + } + sqlite3_finalize(stmt); + + /* Fallback: group by QN segment if no Package nodes */ + if (n == 0) { + free(arr); + const char *qsql = "SELECT qualified_name FROM nodes WHERE project=?1 AND label IN " + "('Function','Method','Class')"; + if (sqlite3_prepare_v2(s->db, qsql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "arch_packages_qn"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + + /* Count per package using parallel arrays */ + char *pnames[64]; + int pcounts[64]; + int np = 0; + while (sqlite3_step(stmt) == SQLITE_ROW) { + const char *qn = (const char *)sqlite3_column_text(stmt, 0); + const char *pkg = cbm_qn_to_package(qn); + if (!pkg[0]) { + continue; + } + int found = -1; + for (int i = 0; i < np; i++) { + if (strcmp(pnames[i], pkg) == 0) { + found = i; + break; + } + } + if (found >= 0) { + { + pcounts[found]++; + } + } else if (np < 64) { + pnames[np] = heap_strdup(pkg); + pcounts[np] = 1; + np++; + } + } + sqlite3_finalize(stmt); + + /* Sort by count desc */ + for (int i = 1; i < np; i++) { + int j = i; + while (j > 0 && pcounts[j] > pcounts[j - 1]) { + int tc = pcounts[j]; + pcounts[j] = pcounts[j - 1]; + pcounts[j - 1] = tc; + char *tn = pnames[j]; + pnames[j] = pnames[j - 1]; + pnames[j - 1] = tn; + j--; + } + } +#define MAX_PREVIEW_NAMES 15 + if (np > MAX_PREVIEW_NAMES) { + for (int i = MAX_PREVIEW_NAMES; i < np; i++) { + free(pnames[i]); + } + np = MAX_PREVIEW_NAMES; + } + + // NOLINTNEXTLINE(clang-analyzer-optin.portability.UnixAPI) + arr = calloc(np, sizeof(cbm_package_summary_t)); + n = np; + for (int i = 0; i < np; i++) { + arr[i].name = pnames[i]; + arr[i].node_count = pcounts[i]; + } + } + + out->packages = arr; + out->package_count = n; + return CBM_STORE_OK; +} + +static void classify_layer(const char *pkg, int in, int out_deg, bool has_routes, + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) + bool has_entry_points, const char **layer, const char **reason) { + static CBM_TLS char reason_buf[128]; + if (has_entry_points && out_deg > 0 && in == 0) { + *layer = "entry"; + *reason = "has entry points, only outbound calls"; + return; + } + if (has_routes) { + *layer = "api"; + *reason = "has HTTP route definitions"; + return; + } + if (in > out_deg && in > 3) { + snprintf(reason_buf, sizeof(reason_buf), "high fan-in (%d in, %d out)", in, out_deg); + *layer = "core"; + *reason = reason_buf; + return; + } + if (out_deg == 0 && in > 0) { + *layer = "leaf"; + *reason = "only inbound calls, no outbound"; + return; + } + if (in == 0 && out_deg > 0) { + *layer = "entry"; + *reason = "only outbound calls"; + return; + } + snprintf(reason_buf, sizeof(reason_buf), "fan-in=%d, fan-out=%d", in, out_deg); + *layer = "internal"; + *reason = reason_buf; + (void)pkg; +} + +static int arch_layers(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { + /* Get boundaries for fan analysis */ + cbm_cross_pkg_boundary_t *boundaries = NULL; + int bcount = 0; + int rc = arch_boundaries(s, project, &boundaries, &bcount); + if (rc != CBM_STORE_OK) { + return rc; + } + + /* Check which packages have Route nodes */ + char *route_pkgs[32]; + int nrpkgs = 0; + { + const char *sql = "SELECT qualified_name FROM nodes WHERE project=?1 AND label='Route'"; + sqlite3_stmt *stmt = NULL; + sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL); + bind_text(stmt, 1, project); + while (sqlite3_step(stmt) == SQLITE_ROW && nrpkgs < 32) { + const char *qn = (const char *)sqlite3_column_text(stmt, 0); + route_pkgs[nrpkgs++] = heap_strdup(cbm_qn_to_package(qn)); + } + sqlite3_finalize(stmt); + } + + /* Check which packages have entry points */ + char *entry_pkgs[32]; + int nepkgs = 0; + { + const char *sql = "SELECT qualified_name FROM nodes WHERE project=?1 AND " + "json_extract(properties, '$.is_entry_point') = 1"; + sqlite3_stmt *stmt = NULL; + sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL); + bind_text(stmt, 1, project); + while (sqlite3_step(stmt) == SQLITE_ROW && nepkgs < 32) { + const char *qn = (const char *)sqlite3_column_text(stmt, 0); + entry_pkgs[nepkgs++] = heap_strdup(cbm_qn_to_package(qn)); + } + sqlite3_finalize(stmt); + } + + /* Compute fan-in/out per package */ + char *all_pkgs[64]; + int fan_in[64]; + int fan_out[64]; + int npkgs = 0; + memset(fan_in, 0, sizeof(fan_in)); + memset(fan_out, 0, sizeof(fan_out)); + + for (int i = 0; i < bcount; i++) { + /* Add or find "from" package */ + int fi = -1; + for (int j = 0; j < npkgs; j++) { + if (strcmp(all_pkgs[j], boundaries[i].from) == 0) { + fi = j; + break; + } + } + if (fi < 0 && npkgs < 64) { + fi = npkgs; + all_pkgs[npkgs] = heap_strdup(boundaries[i].from); + npkgs++; + } + if (fi >= 0) { + fan_out[fi] += boundaries[i].call_count; + } + + int ti = -1; + for (int j = 0; j < npkgs; j++) { + if (strcmp(all_pkgs[j], boundaries[i].to) == 0) { + ti = j; + break; + } + } + if (ti < 0 && npkgs < 64) { + ti = npkgs; + all_pkgs[npkgs] = heap_strdup(boundaries[i].to); + npkgs++; + } + if (ti >= 0) { + fan_in[ti] += boundaries[i].call_count; + } + } + + /* Also include route/entry packages */ + for (int i = 0; i < nrpkgs; i++) { + int found = -1; + for (int j = 0; j < npkgs; j++) { + if (strcmp(all_pkgs[j], route_pkgs[i]) == 0) { + found = j; + break; + } + } + if (found < 0 && npkgs < 64) { + all_pkgs[npkgs] = heap_strdup(route_pkgs[i]); + npkgs++; + } + } + for (int i = 0; i < nepkgs; i++) { + int found = -1; + for (int j = 0; j < npkgs; j++) { + if (strcmp(all_pkgs[j], entry_pkgs[i]) == 0) { + found = j; + break; + } + } + if (found < 0 && npkgs < 64) { + all_pkgs[npkgs] = heap_strdup(entry_pkgs[i]); + npkgs++; + } + } + + /* Classify each package */ + // NOLINTNEXTLINE(clang-analyzer-optin.portability.UnixAPI) + out->layers = calloc(npkgs, sizeof(cbm_package_layer_t)); + out->layer_count = npkgs; + for (int i = 0; i < npkgs; i++) { + bool has_route = false, has_entry = false; + for (int j = 0; j < nrpkgs; j++) { + if (strcmp(all_pkgs[i], route_pkgs[j]) == 0) { + has_route = true; + break; + } + } + for (int j = 0; j < nepkgs; j++) { + if (strcmp(all_pkgs[i], entry_pkgs[j]) == 0) { + has_entry = true; + break; + } + } + const char *layer; + const char *reason; + classify_layer(all_pkgs[i], fan_in[i], fan_out[i], has_route, has_entry, &layer, &reason); + out->layers[i].name = all_pkgs[i]; /* transfer ownership */ + out->layers[i].layer = heap_strdup(layer); + out->layers[i].reason = heap_strdup(reason); + } + + /* Sort layers by name */ + for (int i = 1; i < npkgs; i++) { + int j = i; + while (j > 0 && strcmp(out->layers[j].name, out->layers[j - 1].name) < 0) { + cbm_package_layer_t tmp = out->layers[j]; + out->layers[j] = out->layers[j - 1]; + out->layers[j - 1] = tmp; + j--; + } + } + + /* Cleanup */ + for (int i = 0; i < bcount; i++) { + free((void *)boundaries[i].from); + free((void *)boundaries[i].to); + } + free(boundaries); + for (int i = 0; i < nrpkgs; i++) { + free(route_pkgs[i]); + } + for (int i = 0; i < nepkgs; i++) { + free(entry_pkgs[i]); + } + + return CBM_STORE_OK; +} + +static int arch_file_tree(cbm_store_t *s, const char *project, cbm_architecture_info_t *out) { + const char *sql = "SELECT file_path FROM nodes WHERE project=?1 AND label='File'"; + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "arch_file_tree"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + + /* Collect all file paths + build directory children map */ + int fcap = 32; + int fn = 0; + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + char **files = malloc(fcap * sizeof(char *)); + + /* Directory tree: parallel arrays of dir → children set */ + int dcap = 64; + int dn = 0; + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + char **dir_paths = calloc(dcap, sizeof(char *)); + int *dir_child_counts = calloc(dcap, sizeof(int)); + /* Track unique children per dir using a simple string array */ + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + char ***dir_children = calloc(dcap, sizeof(char **)); + int *dir_children_caps = calloc(dcap, sizeof(int)); + + while (sqlite3_step(stmt) == SQLITE_ROW) { + const char *fp = (const char *)sqlite3_column_text(stmt, 0); + if (!fp) { + continue; + } + if (fn >= fcap) { + fcap *= 2; + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + files = safe_realloc(files, fcap * sizeof(char *)); + } + files[fn++] = heap_strdup(fp); + + /* Register path components in dir tree (up to 3 levels deep) */ + char tmp[512]; + strncpy(tmp, fp, sizeof(tmp) - 1); + tmp[sizeof(tmp) - 1] = '\0'; + + /* Split by '/' */ + char *parts[16]; + int nparts = 0; + char *p = tmp; + parts[nparts++] = p; + while (*p && nparts < 16) { + if (*p == '/') { + *p = '\0'; + parts[nparts++] = p + 1; + } + p++; + } + + /* Register root children */ + { + int ri = -1; + for (int i = 0; i < dn; i++) { + if (strcmp(dir_paths[i], "") == 0) { + ri = i; + break; + } + } + if (ri < 0 && dn < dcap) { + ri = dn; + dir_paths[dn] = heap_strdup(""); + dir_child_counts[dn] = 0; + dir_children[dn] = NULL; + dir_children_caps[dn] = 0; + dn++; + } + if (ri >= 0 && nparts > 0) { + /* Check if child already exists */ + bool exists = false; + for (int k = 0; k < dir_child_counts[ri]; k++) { + if (strcmp(dir_children[ri][k], parts[0]) == 0) { + exists = true; + break; + } + } + if (!exists) { + if (dir_child_counts[ri] >= dir_children_caps[ri]) { + dir_children_caps[ri] = + dir_children_caps[ri] ? dir_children_caps[ri] * 2 : 4; + dir_children[ri] = + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + realloc(dir_children[ri], dir_children_caps[ri] * sizeof(char *)); + } + dir_children[ri][dir_child_counts[ri]++] = heap_strdup(parts[0]); + } + } + } + + /* Register deeper dir children (depth 0..2) */ + for (int depth = 0; depth < nparts - 1 && depth < 3; depth++) { + /* Build dir path */ + char dir[512] = ""; + for (int k = 0; k <= depth; k++) { + if (k > 0) { + strcat(dir, "/"); + } + strcat(dir, parts[k]); + } + const char *child = (depth + 1 < nparts) ? parts[depth + 1] : NULL; + if (!child) { + continue; + } + + int di = -1; + for (int i = 0; i < dn; i++) { + if (strcmp(dir_paths[i], dir) == 0) { + di = i; + break; + } + } + if (di < 0 && dn < dcap) { + di = dn; + dir_paths[dn] = heap_strdup(dir); + dir_child_counts[dn] = 0; + dir_children[dn] = NULL; + dir_children_caps[dn] = 0; + dn++; + } + if (di >= 0) { + bool exists = false; + for (int k = 0; k < dir_child_counts[di]; k++) { + if (strcmp(dir_children[di][k], child) == 0) { + exists = true; + break; + } + } + if (!exists) { + if (dir_child_counts[di] >= dir_children_caps[di]) { + dir_children_caps[di] = + dir_children_caps[di] ? dir_children_caps[di] * 2 : 4; + dir_children[di] = + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + realloc(dir_children[di], dir_children_caps[di] * sizeof(char *)); + } + dir_children[di][dir_child_counts[di]++] = heap_strdup(child); + } + } + } + } + sqlite3_finalize(stmt); + + /* Build file set for type detection */ + /* Collect tree entries */ + int ecap = 64; + int en = 0; + cbm_file_tree_entry_t *entries = calloc(ecap, sizeof(cbm_file_tree_entry_t)); + + /* Root children */ + for (int i = 0; i < dn; i++) { + if (strcmp(dir_paths[i], "") != 0) { + continue; + } + for (int k = 0; k < dir_child_counts[i]; k++) { + if (en >= ecap) { + ecap *= 2; + entries = safe_realloc(entries, ecap * sizeof(cbm_file_tree_entry_t)); + } + const char *child = dir_children[i][k]; + /* Check if it's a file */ + bool is_file = false; + for (int f = 0; f < fn; f++) { + if (strcmp(files[f], child) == 0) { + is_file = true; + break; + } + } + /* Count its children in dir tree */ + int nch = 0; + for (int d = 0; d < dn; d++) { + if (strcmp(dir_paths[d], child) == 0) { + nch = dir_child_counts[d]; + break; + } + } + entries[en].path = heap_strdup(child); + // NOLINTNEXTLINE(readability-implicit-bool-conversion) + entries[en].type = heap_strdup(is_file ? "file" : "dir"); + entries[en].children = nch; + en++; + } + } + + /* Non-root dir children */ + for (int i = 0; i < dn; i++) { + if (strcmp(dir_paths[i], "") == 0) { + continue; + } + /* Limit depth to 3 levels */ + int slashes = 0; + // NOLINTNEXTLINE(clang-analyzer-security.ArrayBound) + for (const char *p = dir_paths[i]; *p; p++) { + if (*p == '/') { + slashes++; + } + } + if (slashes >= 3) { + continue; + } + + for (int k = 0; k < dir_child_counts[i]; k++) { + if (en >= ecap) { + ecap *= 2; + entries = safe_realloc(entries, ecap * sizeof(cbm_file_tree_entry_t)); + } + char path[512]; + snprintf(path, sizeof(path), "%s/%s", dir_paths[i], dir_children[i][k]); + bool is_file = false; + for (int f = 0; f < fn; f++) { + if (strcmp(files[f], path) == 0) { + is_file = true; + break; + } + } + int nch = 0; + for (int d = 0; d < dn; d++) { + if (strcmp(dir_paths[d], path) == 0) { + nch = dir_child_counts[d]; + break; + } + } + entries[en].path = heap_strdup(path); + // NOLINTNEXTLINE(readability-implicit-bool-conversion) + entries[en].type = heap_strdup(is_file ? "file" : "dir"); + entries[en].children = nch; + en++; + } + } + + /* Sort by path */ + for (int i = 1; i < en; i++) { + int j = i; + while (j > 0 && strcmp(entries[j].path, entries[j - 1].path) < 0) { + cbm_file_tree_entry_t tmp = entries[j]; + entries[j] = entries[j - 1]; + entries[j - 1] = tmp; + j--; + } + } + + /* Cleanup dir tree */ + for (int i = 0; i < dn; i++) { + free(dir_paths[i]); + for (int k = 0; k < dir_child_counts[i]; k++) { + free(dir_children[i][k]); + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(dir_children[i]); + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(dir_paths); + free(dir_child_counts); + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(dir_children); + free(dir_children_caps); + for (int i = 0; i < fn; i++) { + free(files[i]); + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(files); + + out->file_tree = entries; + out->file_tree_count = en; + return CBM_STORE_OK; +} + +/* ── Louvain community detection ───────────────────────────────── */ + +int cbm_louvain(const int64_t *nodes, int node_count, const cbm_louvain_edge_t *edges, + int edge_count, cbm_louvain_result_t **out, int *out_count) { + if (node_count <= 0) { + *out = NULL; + *out_count = 0; + return CBM_STORE_OK; + } + + int n = node_count; + + /* Build adjacency: edge weights */ + int wcap = edge_count > 0 ? edge_count : 1; + int wn = 0; + int *wsi = malloc(wcap * sizeof(int)); + int *wdi = malloc(wcap * sizeof(int)); + double *ww = malloc(wcap * sizeof(double)); + + /* Map node IDs to indices */ + for (int e = 0; e < edge_count; e++) { + int si = -1; + int di = -1; + for (int i = 0; i < n; i++) { + if (nodes[i] == edges[e].src) { + si = i; + } + if (nodes[i] == edges[e].dst) { + di = i; + } + } + if (si < 0 || di < 0 || si == di) { + continue; + } + /* Normalize edge key */ + if (si > di) { + int tmp = si; + si = di; + di = tmp; + } + /* Check if already exists */ + int found = -1; + for (int i = 0; i < wn; i++) { + if (wsi[i] == si && wdi[i] == di) { + found = i; + break; + } + } + if (found >= 0) { + ww[found] += 1.0; + } else { + if (wn >= wcap) { + wcap *= 2; + wsi = safe_realloc(wsi, wcap * sizeof(int)); + wdi = safe_realloc(wdi, wcap * sizeof(int)); + ww = safe_realloc(ww, wcap * sizeof(double)); + } + wsi[wn] = si; + wdi[wn] = di; + ww[wn] = 1.0; + wn++; + } + } + + /* Build adjacency lists */ + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + int **adj = calloc(n, sizeof(int *)); + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + double **adj_w = calloc(n, sizeof(double *)); + int *adj_n = calloc(n, sizeof(int)); + int *adj_cap = calloc(n, sizeof(int)); + + double total_weight = 0; + for (int i = 0; i < wn; i++) { + int si = wsi[i]; + int di = wdi[i]; + double w = ww[i]; + total_weight += w; + + /* Add si → di */ + if (adj_n[si] >= adj_cap[si]) { + adj_cap[si] = adj_cap[si] ? adj_cap[si] * 2 : 4; + adj[si] = safe_realloc(adj[si], adj_cap[si] * sizeof(int)); + adj_w[si] = safe_realloc(adj_w[si], adj_cap[si] * sizeof(double)); + } + adj[si][adj_n[si]] = di; + adj_w[si][adj_n[si]] = w; + adj_n[si]++; + + /* Add di → si */ + if (adj_n[di] >= adj_cap[di]) { + adj_cap[di] = adj_cap[di] ? adj_cap[di] * 2 : 4; + adj[di] = safe_realloc(adj[di], adj_cap[di] * sizeof(int)); + adj_w[di] = safe_realloc(adj_w[di], adj_cap[di] * sizeof(double)); + } + adj[di][adj_n[di]] = si; + adj_w[di][adj_n[di]] = w; + adj_n[di]++; + } + free(wsi); + free(wdi); + free(ww); + + /* Initialize communities */ + int *community = malloc(n * sizeof(int)); + for (int i = 0; i < n; i++) { + community[i] = i; + } + + if (total_weight == 0) { + /* No edges: each node in its own community */ + cbm_louvain_result_t *result = malloc(n * sizeof(cbm_louvain_result_t)); + for (int i = 0; i < n; i++) { + result[i].node_id = nodes[i]; + result[i].community = i; + } + *out = result; + *out_count = n; + free(community); + for (int i = 0; i < n; i++) { + free(adj[i]); + free(adj_w[i]); + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(adj); + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(adj_w); + free(adj_n); + free(adj_cap); + return CBM_STORE_OK; + } + + /* Compute node degrees */ + double *degree = calloc(n, sizeof(double)); + for (int i = 0; i < n; i++) { + for (int j = 0; j < adj_n[i]; j++) { + degree[i] += adj_w[i][j]; + } + } + + /* Main Louvain loop (10 iterations max) */ + for (int iter = 0; iter < 10; iter++) { + bool improved = false; + + /* Community total degree */ + double *comm_degree = calloc(n, sizeof(double)); + for (int i = 0; i < n; i++) { + comm_degree[community[i]] += degree[i]; + } + + /* Random order (simple LCG shuffle) */ + int *order = calloc(n, sizeof(int)); + for (int i = 0; i < n; i++) { + order[i] = i; + } + unsigned int seed = (unsigned int)((iter * 1000) + n); + for (int i = n - 1; i > 0; i--) { +/* Linear congruential generator (glibc constants) */ +#define LCG_MULTIPLIER 1103515245U +#define LCG_INCREMENT 12345U + seed = (seed * LCG_MULTIPLIER) + LCG_INCREMENT; + int j = (int)((seed >> 16) % (unsigned int)(i + 1)); + int tmp = order[i]; + order[i] = order[j]; + order[j] = tmp; + } + + for (int oi = 0; oi < n; oi++) { + int i = order[oi]; + int cur_comm = community[i]; + + /* Compute weights to neighboring communities */ + double *nc_weight = calloc(n, sizeof(double)); + bool *nc_seen = calloc(n, sizeof(bool)); + for (int j = 0; j < adj_n[i]; j++) { + int nc = community[adj[i][j]]; + nc_weight[nc] += adj_w[i][j]; + nc_seen[nc] = true; + } + + /* Remove node from community */ + comm_degree[cur_comm] -= degree[i]; + + int best_comm = cur_comm; + double best_gain = 0.0; + + for (int c = 0; c < n; c++) { + if (!nc_seen[c]) { + continue; + } + double gain = nc_weight[c] - (degree[i] * comm_degree[c] / (2.0 * total_weight)); + if (gain > best_gain) { + best_gain = gain; + best_comm = c; + } + } + + /* Also consider staying */ + double cur_gain = + nc_weight[cur_comm] - (degree[i] * comm_degree[cur_comm] / (2.0 * total_weight)); + if (cur_gain >= best_gain) { + best_comm = cur_comm; + } + + community[i] = best_comm; + comm_degree[best_comm] += degree[i]; + + if (best_comm != cur_comm) { + improved = true; + } + + free(nc_weight); + free(nc_seen); + } + free(order); + free(comm_degree); + + if (!improved) { + break; + } + } + + /* Build result */ + cbm_louvain_result_t *result = malloc(n * sizeof(cbm_louvain_result_t)); + for (int i = 0; i < n; i++) { + result[i].node_id = nodes[i]; + result[i].community = community[i]; + } + *out = result; + *out_count = n; + + free(community); + free(degree); + for (int i = 0; i < n; i++) { + free(adj[i]); + free(adj_w[i]); + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(adj); + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(adj_w); + free(adj_n); + free(adj_cap); + return CBM_STORE_OK; +} + +/* ── GetArchitecture dispatch ──────────────────────────────────── */ + +static bool want_aspect(const char **aspects, int aspect_count, const char *name) { + if (!aspects || aspect_count == 0) { + return true; + } + for (int i = 0; i < aspect_count; i++) { + if (strcmp(aspects[i], "all") == 0) { + return true; + } + if (strcmp(aspects[i], name) == 0) { + return true; + } + } + return false; +} + +int cbm_store_get_architecture(cbm_store_t *s, const char *project, const char **aspects, + int aspect_count, cbm_architecture_info_t *out) { + memset(out, 0, sizeof(*out)); + int rc; + + if (want_aspect(aspects, aspect_count, "languages")) { + rc = arch_languages(s, project, out); + if (rc != CBM_STORE_OK) { + return rc; + } + } + if (want_aspect(aspects, aspect_count, "packages")) { + rc = arch_packages(s, project, out); + if (rc != CBM_STORE_OK) { + return rc; + } + } + if (want_aspect(aspects, aspect_count, "entry_points")) { + rc = arch_entry_points(s, project, out); + if (rc != CBM_STORE_OK) { + return rc; + } + } + if (want_aspect(aspects, aspect_count, "routes")) { + rc = arch_routes(s, project, out); + if (rc != CBM_STORE_OK) { + return rc; + } + } + if (want_aspect(aspects, aspect_count, "hotspots")) { + rc = arch_hotspots(s, project, out); + if (rc != CBM_STORE_OK) { + return rc; + } + } + if (want_aspect(aspects, aspect_count, "boundaries")) { + cbm_cross_pkg_boundary_t *barr = NULL; + int bcount = 0; + rc = arch_boundaries(s, project, &barr, &bcount); + if (rc != CBM_STORE_OK) { + return rc; + } + out->boundaries = barr; + out->boundary_count = bcount; + } + if (want_aspect(aspects, aspect_count, "layers")) { + rc = arch_layers(s, project, out); + if (rc != CBM_STORE_OK) { + return rc; + } + } + if (want_aspect(aspects, aspect_count, "file_tree")) { + rc = arch_file_tree(s, project, out); + if (rc != CBM_STORE_OK) { + return rc; + } + } + + return CBM_STORE_OK; +} + +void cbm_store_architecture_free(cbm_architecture_info_t *out) { + if (!out) { + return; + } + for (int i = 0; i < out->language_count; i++) { + free((void *)out->languages[i].language); + } + free(out->languages); + for (int i = 0; i < out->package_count; i++) { + free((void *)out->packages[i].name); + } + free(out->packages); + for (int i = 0; i < out->entry_point_count; i++) { + free((void *)out->entry_points[i].name); + free((void *)out->entry_points[i].qualified_name); + free((void *)out->entry_points[i].file); + } + free(out->entry_points); + for (int i = 0; i < out->route_count; i++) { + free((void *)out->routes[i].method); + free((void *)out->routes[i].path); + free((void *)out->routes[i].handler); + } + free(out->routes); + for (int i = 0; i < out->hotspot_count; i++) { + free((void *)out->hotspots[i].name); + free((void *)out->hotspots[i].qualified_name); + } + free(out->hotspots); + for (int i = 0; i < out->boundary_count; i++) { + free((void *)out->boundaries[i].from); + free((void *)out->boundaries[i].to); + } + free(out->boundaries); + for (int i = 0; i < out->service_count; i++) { + free((void *)out->services[i].from); + free((void *)out->services[i].to); + free((void *)out->services[i].type); + } + free(out->services); + for (int i = 0; i < out->layer_count; i++) { + free((void *)out->layers[i].name); + free((void *)out->layers[i].layer); + free((void *)out->layers[i].reason); + } + free(out->layers); + for (int i = 0; i < out->cluster_count; i++) { + free((void *)out->clusters[i].label); + for (int j = 0; j < out->clusters[i].top_node_count; j++) { + free((void *)out->clusters[i].top_nodes[j]); + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(out->clusters[i].top_nodes); + for (int j = 0; j < out->clusters[i].package_count; j++) { + free((void *)out->clusters[i].packages[j]); + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(out->clusters[i].packages); + for (int j = 0; j < out->clusters[i].edge_type_count; j++) { + free((void *)out->clusters[i].edge_types[j]); + } + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free(out->clusters[i].edge_types); + } + free(out->clusters); + for (int i = 0; i < out->file_tree_count; i++) { + free((void *)out->file_tree[i].path); + free((void *)out->file_tree[i].type); + } + free(out->file_tree); + memset(out, 0, sizeof(*out)); +} + +/* ── ADR (Architecture Decision Record) ────────────────────────── */ + +static const char *canonical_sections[] = {"PURPOSE", "STACK", "ARCHITECTURE", + "PATTERNS", "TRADEOFFS", "PHILOSOPHY"}; +static const int canonical_section_count = 6; + +static bool is_canonical_section(const char *name) { + for (int i = 0; i < canonical_section_count; i++) { + if (strcmp(name, canonical_sections[i]) == 0) { + return true; + } + } + return false; +} + +cbm_adr_sections_t cbm_adr_parse_sections(const char *content) { + cbm_adr_sections_t result; + memset(&result, 0, sizeof(result)); + if (!content || !content[0]) { + return result; + } + + const char *p = content; + char *current_section = NULL; + char current_content[8192] = ""; + int content_len = 0; + + while (*p) { + /* Find end of line */ + const char *eol = strchr(p, '\n'); + int line_len = eol ? (int)(eol - p) : (int)strlen(p); + + /* Check for canonical section header */ + if (line_len > 3 && p[0] == '#' && p[1] == '#' && p[2] == ' ') { + char header[64]; + int hlen = line_len - 3; + if (hlen >= (int)sizeof(header)) { + hlen = (int)sizeof(header) - 1; + } + memcpy(header, p + 3, hlen); + header[hlen] = '\0'; + /* Trim trailing whitespace */ + while (hlen > 0 && (header[hlen - 1] == ' ' || header[hlen - 1] == '\t' || + header[hlen - 1] == '\r')) { + header[--hlen] = '\0'; + } + + if (is_canonical_section(header)) { + /* Save previous section */ + if (current_section && result.count < 16) { + /* Trim content */ + while (content_len > 0 && (current_content[content_len - 1] == '\n' || + current_content[content_len - 1] == ' ')) { + current_content[--content_len] = '\0'; + } + /* Skip leading whitespace */ + char *trimmed = current_content; + while (*trimmed == '\n' || *trimmed == ' ') { + trimmed++; + } + result.keys[result.count] = current_section; + result.values[result.count] = heap_strdup(trimmed); + result.count++; + } + current_section = heap_strdup(header); + current_content[0] = '\0'; + content_len = 0; + p = eol ? eol + 1 : p + line_len; + continue; + } + } + + /* Append line to current content */ + if (current_section) { + if (content_len > 0 || line_len > 0) { + if (content_len > 0) { + current_content[content_len++] = '\n'; + } + if (content_len + line_len < (int)sizeof(current_content) - 1) { + memcpy(current_content + content_len, p, line_len); + content_len += line_len; + current_content[content_len] = '\0'; + } + } + } + + p = eol ? eol + 1 : p + line_len; + } + + /* Save last section */ + if (current_section && result.count < 16) { + while (content_len > 0 && (current_content[content_len - 1] == '\n' || + current_content[content_len - 1] == ' ')) { + current_content[--content_len] = '\0'; + } + char *trimmed = current_content; + while (*trimmed == '\n' || *trimmed == ' ') { + trimmed++; + } + result.keys[result.count] = current_section; + result.values[result.count] = heap_strdup(trimmed); + result.count++; + } + + return result; +} + +char *cbm_adr_render(const cbm_adr_sections_t *sections) { + if (!sections || sections->count == 0) { + return heap_strdup(""); + } + + char buf[16384] = ""; + int pos = 0; + bool rendered[16] = {false}; + + /* Canonical sections first, in order */ + for (int c = 0; c < canonical_section_count; c++) { + for (int i = 0; i < sections->count; i++) { + if (rendered[i]) { + continue; + } + if (strcmp(sections->keys[i], canonical_sections[c]) == 0) { + if (pos > 0) { + pos += snprintf(buf + pos, sizeof(buf) - pos, "\n\n"); + } + pos += snprintf(buf + pos, sizeof(buf) - pos, "## %s\n%s", sections->keys[i], + sections->values[i]); + rendered[i] = true; + break; + } + } + } + + /* Non-canonical sections alphabetically */ + /* Collect indices of non-rendered sections */ + int extra[16]; + int nextra = 0; + for (int i = 0; i < sections->count; i++) { + if (!rendered[i]) { + extra[nextra++] = i; + } + } + /* Sort extra by key name */ + for (int i = 1; i < nextra; i++) { + int j = i; + while (j > 0 && strcmp(sections->keys[extra[j]], sections->keys[extra[j - 1]]) < 0) { + int tmp = extra[j]; + extra[j] = extra[j - 1]; + extra[j - 1] = tmp; + j--; + } + } + for (int i = 0; i < nextra; i++) { + int idx = extra[i]; + if (pos > 0) { + pos += snprintf(buf + pos, sizeof(buf) - pos, "\n\n"); + } + pos += snprintf(buf + pos, sizeof(buf) - pos, "## %s\n%s", sections->keys[idx], + sections->values[idx]); + } + + return heap_strdup(buf); +} + +int cbm_adr_validate_content(const char *content, char *errbuf, int errbuf_size) { + cbm_adr_sections_t sections = cbm_adr_parse_sections(content); + char missing[256] = ""; + int mlen = 0; + int nmissing = 0; + + for (int c = 0; c < canonical_section_count; c++) { + bool found = false; + for (int i = 0; i < sections.count; i++) { + if (strcmp(sections.keys[i], canonical_sections[c]) == 0) { + found = true; + break; + } + } + if (!found) { + if (mlen > 0) { + mlen += snprintf(missing + mlen, sizeof(missing) - mlen, ", "); + } + mlen += snprintf(missing + mlen, sizeof(missing) - mlen, "%s", canonical_sections[c]); + nmissing++; + } + } + cbm_adr_sections_free(§ions); + + if (nmissing > 0) { + snprintf(errbuf, errbuf_size, + "missing required sections: %s. All 6 required: PURPOSE, STACK, ARCHITECTURE, " + "PATTERNS, TRADEOFFS, PHILOSOPHY", + missing); + return CBM_STORE_ERR; + } + return CBM_STORE_OK; +} + +int cbm_adr_validate_section_keys(const char **keys, int count, char *errbuf, int errbuf_size) { + char invalid[256] = ""; + int ilen = 0; + int ninvalid = 0; + + /* Collect and sort invalid keys */ + const char *inv_keys[16]; + int inv_n = 0; + for (int i = 0; i < count; i++) { + if (!is_canonical_section(keys[i])) { + if (inv_n < 16) { + inv_keys[inv_n++] = keys[i]; + } + } + } + /* Sort alphabetically */ + for (int i = 1; i < inv_n; i++) { + int j = i; + while (j > 0 && strcmp(inv_keys[j], inv_keys[j - 1]) < 0) { + const char *tmp = inv_keys[j]; + inv_keys[j] = inv_keys[j - 1]; + inv_keys[j - 1] = tmp; + j--; + } + } + + for (int i = 0; i < inv_n; i++) { + if (ilen > 0) { + ilen += snprintf(invalid + ilen, sizeof(invalid) - ilen, ", "); + } + ilen += snprintf(invalid + ilen, sizeof(invalid) - ilen, "%s", inv_keys[i]); + ninvalid++; + } + + if (ninvalid > 0) { + snprintf(errbuf, errbuf_size, + "invalid section names: %s. Valid sections: PURPOSE, STACK, ARCHITECTURE, " + "PATTERNS, TRADEOFFS, PHILOSOPHY", + invalid); + return CBM_STORE_ERR; + } + return CBM_STORE_OK; +} + +void cbm_adr_sections_free(cbm_adr_sections_t *s) { + if (!s) { + return; + } + for (int i = 0; i < s->count; i++) { + free(s->keys[i]); + free(s->values[i]); + } + memset(s, 0, sizeof(*s)); +} + +int cbm_store_adr_store(cbm_store_t *s, const char *project, const char *content) { + char now[32]; + iso_now(now, sizeof(now)); + + const char *sql = + "INSERT INTO project_summaries (project, summary, source_hash, created_at, updated_at) " + "VALUES (?1, ?2, '', ?3, ?4) " + "ON CONFLICT(project) DO UPDATE SET summary=excluded.summary, " + "updated_at=excluded.updated_at"; + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "adr_store"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + bind_text(stmt, 2, content); + bind_text(stmt, 3, now); + bind_text(stmt, 4, now); + int rc = sqlite3_step(stmt); + sqlite3_finalize(stmt); + return (rc == SQLITE_DONE) ? CBM_STORE_OK : CBM_STORE_ERR; +} + +int cbm_store_adr_get(cbm_store_t *s, const char *project, cbm_adr_t *out) { + const char *sql = + "SELECT project, summary, created_at, updated_at FROM project_summaries WHERE project=?1"; + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "adr_get"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + int rc = sqlite3_step(stmt); + if (rc != SQLITE_ROW) { + sqlite3_finalize(stmt); + store_set_error(s, "no ADR found"); + return CBM_STORE_NOT_FOUND; + } + out->project = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + out->content = heap_strdup((const char *)sqlite3_column_text(stmt, 1)); + out->created_at = heap_strdup((const char *)sqlite3_column_text(stmt, 2)); + out->updated_at = heap_strdup((const char *)sqlite3_column_text(stmt, 3)); + sqlite3_finalize(stmt); + return CBM_STORE_OK; +} + +int cbm_store_adr_delete(cbm_store_t *s, const char *project) { + const char *sql = "DELETE FROM project_summaries WHERE project=?1"; + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "adr_delete"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + int rc = sqlite3_step(stmt); + int changes = sqlite3_changes(s->db); + sqlite3_finalize(stmt); + if (rc != SQLITE_DONE) { + return CBM_STORE_ERR; + } + if (changes == 0) { + store_set_error(s, "no ADR found"); + return CBM_STORE_NOT_FOUND; + } + return CBM_STORE_OK; +} + +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) +int cbm_store_adr_update_sections(cbm_store_t *s, const char *project, const char **keys, + const char **values, int count, cbm_adr_t *out) { + /* Get existing ADR */ + cbm_adr_t existing; + int rc = cbm_store_adr_get(s, project, &existing); + if (rc != CBM_STORE_OK) { + store_set_error(s, "no existing ADR to update"); + return rc; + } + + /* Parse existing sections */ + cbm_adr_sections_t sections = cbm_adr_parse_sections(existing.content); + cbm_store_adr_free(&existing); + + /* Merge new sections */ + for (int i = 0; i < count; i++) { + bool found = false; + for (int j = 0; j < sections.count; j++) { + if (strcmp(sections.keys[j], keys[i]) == 0) { + free(sections.values[j]); + sections.values[j] = heap_strdup(values[i]); + found = true; + break; + } + } + if (!found && sections.count < 16) { + sections.keys[sections.count] = heap_strdup(keys[i]); + sections.values[sections.count] = heap_strdup(values[i]); + sections.count++; + } + } + + /* Render merged */ + char *merged = cbm_adr_render(§ions); + cbm_adr_sections_free(§ions); + + /* Check length */ + if ((int)strlen(merged) > CBM_ADR_MAX_LENGTH) { + char msg[128]; + snprintf(msg, sizeof(msg), "merged ADR exceeds %d chars (%d chars)", CBM_ADR_MAX_LENGTH, + (int)strlen(merged)); + store_set_error(s, msg); + free(merged); + return CBM_STORE_ERR; + } + + /* Store merged */ + rc = cbm_store_adr_store(s, project, merged); + free(merged); + if (rc != CBM_STORE_OK) { + return rc; + } + + return cbm_store_adr_get(s, project, out); +} + +void cbm_store_adr_free(cbm_adr_t *adr) { + if (!adr) { + return; + } + free((void *)adr->project); + free((void *)adr->content); + free((void *)adr->created_at); + free((void *)adr->updated_at); + memset(adr, 0, sizeof(*adr)); +} + +/* ── Architecture doc discovery ────────────────────────────────── */ + +int cbm_store_find_architecture_docs(cbm_store_t *s, const char *project, char ***out, int *count) { + const char *sql = "SELECT file_path FROM nodes WHERE project=?1 AND label='File' " + "AND (file_path LIKE '%ARCHITECTURE.md' OR file_path LIKE '%ADR.md' " + "OR file_path LIKE '%DECISIONS.md' OR file_path LIKE 'docs/adr/%' " + "OR file_path LIKE 'doc/adr/%' OR file_path LIKE 'adr/%') " + "ORDER BY file_path LIMIT 20"; + sqlite3_stmt *stmt = NULL; + if (sqlite3_prepare_v2(s->db, sql, -1, &stmt, NULL) != SQLITE_OK) { + store_set_error_sqlite(s, "find_arch_docs"); + return CBM_STORE_ERR; + } + bind_text(stmt, 1, project); + + int cap = 8; + int n = 0; + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + char **arr = malloc(cap * sizeof(char *)); + while (sqlite3_step(stmt) == SQLITE_ROW) { + if (n >= cap) { + cap *= 2; + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + arr = safe_realloc(arr, cap * sizeof(char *)); + } + arr[n++] = heap_strdup((const char *)sqlite3_column_text(stmt, 0)); + } + sqlite3_finalize(stmt); + *out = arr; + *count = n; + return CBM_STORE_OK; +} + +/* ── Memory management ──────────────────────────────────────────── */ + +void cbm_node_free_fields(cbm_node_t *n) { + free((void *)n->project); + free((void *)n->label); + free((void *)n->name); + free((void *)n->qualified_name); + free((void *)n->file_path); + free((void *)n->properties_json); +} + +void cbm_store_free_nodes(cbm_node_t *nodes, int count) { + if (!nodes) { + return; + } + for (int i = 0; i < count; i++) { + cbm_node_free_fields(&nodes[i]); + } + free(nodes); +} + +void cbm_store_free_edges(cbm_edge_t *edges, int count) { + if (!edges) { + return; + } + for (int i = 0; i < count; i++) { + free((void *)edges[i].project); + free((void *)edges[i].type); + free((void *)edges[i].properties_json); + } + free(edges); +} + +void cbm_project_free_fields(cbm_project_t *p) { + free((void *)p->name); + free((void *)p->indexed_at); + free((void *)p->root_path); +} + +void cbm_store_free_projects(cbm_project_t *projects, int count) { + if (!projects) { + return; + } + for (int i = 0; i < count; i++) { + cbm_project_free_fields(&projects[i]); + } + free(projects); +} + +void cbm_store_free_file_hashes(cbm_file_hash_t *hashes, int count) { + if (!hashes) { + return; + } + for (int i = 0; i < count; i++) { + free((void *)hashes[i].project); + free((void *)hashes[i].rel_path); + free((void *)hashes[i].sha256); + } + free(hashes); +} diff --git a/src/ui/config.c b/src/ui/config.c index 962d0ed..04bf417 100644 --- a/src/ui/config.c +++ b/src/ui/config.c @@ -1,129 +1,130 @@ -/* - * config.c — Persistent UI configuration (JSON via yyjson). - * - * Config file: ~/.cache/codebase-memory-mcp/config.json - * Format: {"ui_enabled": false, "ui_port": 9749} - */ -#include "ui/config.h" -#include "foundation/log.h" -#include "foundation/compat_fs.h" - -#include - -#include -#include -#include - -/* ── Path ────────────────────────────────────────────────────── */ - -void cbm_ui_config_path(char *buf, int bufsz) { - const char *home = getenv("HOME"); // NOLINT(concurrency-mt-unsafe) - if (!home) { - home = "/tmp"; - } - snprintf(buf, (size_t)bufsz, "%s/.cache/codebase-memory-mcp/config.json", home); -} - -/* ── Load ────────────────────────────────────────────────────── */ - -void cbm_ui_config_load(cbm_ui_config_t *cfg) { - cfg->ui_enabled = CBM_UI_DEFAULT_ENABLED; - cfg->ui_port = CBM_UI_DEFAULT_PORT; - - char path[1024]; - cbm_ui_config_path(path, (int)sizeof(path)); - - FILE *f = fopen(path, "rb"); - if (!f) { - return; /* no config file → defaults */ - } - - fseek(f, 0, SEEK_END); - long len = ftell(f); - fseek(f, 0, SEEK_SET); - - if (len <= 0 || len > 4096) { - fclose(f); - return; /* empty or suspiciously large → defaults */ - } - - char *buf = malloc((size_t)len + 1); - if (!buf) { - fclose(f); - return; - } - - size_t nread = fread(buf, 1, (size_t)len, f); - fclose(f); - buf[nread] = '\0'; - - yyjson_doc *doc = yyjson_read(buf, nread, 0); - free(buf); - if (!doc) { - cbm_log_warn("ui.config.corrupt", "path", path); - return; /* corrupt JSON → defaults */ - } - - yyjson_val *root = yyjson_doc_get_root(doc); - if (!yyjson_is_obj(root)) { - yyjson_doc_free(doc); - return; - } - - yyjson_val *v_enabled = yyjson_obj_get(root, "ui_enabled"); - if (yyjson_is_bool(v_enabled)) { - cfg->ui_enabled = yyjson_get_bool(v_enabled); - } - - yyjson_val *v_port = yyjson_obj_get(root, "ui_port"); - if (yyjson_is_int(v_port)) { - cfg->ui_port = (int)yyjson_get_int(v_port); - } - - yyjson_doc_free(doc); -} - -/* ── Save ────────────────────────────────────────────────────── */ - -void cbm_ui_config_save(const cbm_ui_config_t *cfg) { - char path[1024]; - cbm_ui_config_path(path, (int)sizeof(path)); - - /* Ensure directory exists (recursive) */ - char dir[1024]; - snprintf(dir, sizeof(dir), "%s", path); - char *slash = strrchr(dir, '/'); - if (slash) { - *slash = '\0'; - cbm_mkdir_p(dir, 0750); - } - - yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); - yyjson_mut_val *root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - - yyjson_mut_obj_add_bool(doc, root, "ui_enabled", cfg->ui_enabled); - yyjson_mut_obj_add_int(doc, root, "ui_port", cfg->ui_port); - - size_t json_len = 0; - char *json = yyjson_mut_write(doc, YYJSON_WRITE_PRETTY, &json_len); - yyjson_mut_doc_free(doc); - - if (!json) { - cbm_log_error("ui.config.write_fail", "reason", "serialize"); - return; - } - - FILE *f = fopen(path, "wb"); - if (!f) { - cbm_log_error("ui.config.write_fail", "path", path); - free(json); - return; - } - - fwrite(json, 1, json_len, f); - fclose(f); - free(json); - - cbm_log_debug("ui.config.saved", "path", path); -} +/* + * config.c — Persistent UI configuration (JSON via yyjson). + * + * Config file: ~/.cache/codebase-memory-mcp/config.json + * Format: {"ui_enabled": false, "ui_port": 9749} + */ +#include "ui/config.h" +#include "foundation/log.h" +#include "foundation/platform.h" +#include "foundation/compat_fs.h" + +#include + +#include +#include +#include + +/* ── Path ────────────────────────────────────────────────────── */ + +void cbm_ui_config_path(char *buf, int bufsz) { + const char *home = cbm_home_dir(); + if (!home) { + home = "/tmp"; + } + snprintf(buf, (size_t)bufsz, "%s/.cache/codebase-memory-mcp/config.json", home); +} + +/* ── Load ────────────────────────────────────────────────────── */ + +void cbm_ui_config_load(cbm_ui_config_t *cfg) { + cfg->ui_enabled = CBM_UI_DEFAULT_ENABLED; + cfg->ui_port = CBM_UI_DEFAULT_PORT; + + char path[1024]; + cbm_ui_config_path(path, (int)sizeof(path)); + + FILE *f = fopen(path, "rb"); + if (!f) { + return; /* no config file → defaults */ + } + + fseek(f, 0, SEEK_END); + long len = ftell(f); + fseek(f, 0, SEEK_SET); + + if (len <= 0 || len > 4096) { + fclose(f); + return; /* empty or suspiciously large → defaults */ + } + + char *buf = malloc((size_t)len + 1); + if (!buf) { + fclose(f); + return; + } + + size_t nread = fread(buf, 1, (size_t)len, f); + fclose(f); + buf[nread] = '\0'; + + yyjson_doc *doc = yyjson_read(buf, nread, 0); + free(buf); + if (!doc) { + cbm_log_warn("ui.config.corrupt", "path", path); + return; /* corrupt JSON → defaults */ + } + + yyjson_val *root = yyjson_doc_get_root(doc); + if (!yyjson_is_obj(root)) { + yyjson_doc_free(doc); + return; + } + + yyjson_val *v_enabled = yyjson_obj_get(root, "ui_enabled"); + if (yyjson_is_bool(v_enabled)) { + cfg->ui_enabled = yyjson_get_bool(v_enabled); + } + + yyjson_val *v_port = yyjson_obj_get(root, "ui_port"); + if (yyjson_is_int(v_port)) { + cfg->ui_port = (int)yyjson_get_int(v_port); + } + + yyjson_doc_free(doc); +} + +/* ── Save ────────────────────────────────────────────────────── */ + +void cbm_ui_config_save(const cbm_ui_config_t *cfg) { + char path[1024]; + cbm_ui_config_path(path, (int)sizeof(path)); + + /* Ensure directory exists (recursive) */ + char dir[1024]; + snprintf(dir, sizeof(dir), "%s", path); + char *slash = strrchr(dir, '/'); + if (slash) { + *slash = '\0'; + cbm_mkdir_p(dir, 0750); + } + + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_bool(doc, root, "ui_enabled", cfg->ui_enabled); + yyjson_mut_obj_add_int(doc, root, "ui_port", cfg->ui_port); + + size_t json_len = 0; + char *json = yyjson_mut_write(doc, YYJSON_WRITE_PRETTY, &json_len); + yyjson_mut_doc_free(doc); + + if (!json) { + cbm_log_error("ui.config.write_fail", "reason", "serialize"); + return; + } + + FILE *f = fopen(path, "wb"); + if (!f) { + cbm_log_error("ui.config.write_fail", "path", path); + free(json); + return; + } + + fwrite(json, 1, json_len, f); + fclose(f); + free(json); + + cbm_log_debug("ui.config.saved", "path", path); +} diff --git a/src/ui/http_server.c b/src/ui/http_server.c index a2a30f3..fd209a0 100644 --- a/src/ui/http_server.c +++ b/src/ui/http_server.c @@ -1,1222 +1,1222 @@ -/* - * http_server.c — Mongoose-based HTTP server for graph UI. - * - * Routes: - * GET / → embedded index.html - * GET /assets/... → embedded JS/CSS - * POST /rpc → JSON-RPC dispatch via own cbm_mcp_server_t - * OPTIONS /rpc → CORS preflight (for vite dev on :5173) - * * → 404 - * - * Runs in a background pthread. Binds to 127.0.0.1 only. - * Has its own cbm_mcp_server_t with a separate SQLite connection (WAL reader). - */ -#include "ui/http_server.h" -#include "ui/embedded_assets.h" -#include "ui/layout3d.h" -#include "mcp/mcp.h" -#include "store/store.h" -/* pipeline.h no longer needed — indexing runs as subprocess */ -#include "foundation/log.h" -#include "foundation/platform.h" -#include "foundation/compat.h" -#include "foundation/compat_thread.h" - -#include -#include - -#include -#include -#include -#include -#ifdef _WIN32 -#include -#include -#include /* GetProcessMemoryInfo */ -#else -#include -#include -#endif -#ifdef __APPLE__ -#include -#endif - -/* ── Constants ────────────────────────────────────────────────── */ - -/* Max JSON-RPC request body size (1 MB) */ -#define MAX_BODY_SIZE (1024 * 1024) - -/* ── CORS: only allow localhost origins (blocks remote website attacks) ────── */ - -/* Per-request CORS header buffers. Updated at the start of each HTTP handler - * call by update_cors(). Single-threaded mongoose event loop makes statics safe. */ -static char g_cors[256]; /* CORS headers only */ -static char g_cors_json[512]; /* CORS + Content-Type: application/json */ - -/* Inspect the Origin header and only reflect it if it's a localhost URL. - * This prevents remote websites from making cross-origin requests to the - * local graph-ui server (the key defense against CORS-based data exfil). */ -static void update_cors(struct mg_http_message *hm) { - struct mg_str *origin = mg_http_get_header(hm, "Origin"); - if (origin && origin->len > 0 && - (mg_match(*origin, mg_str("http://localhost:*"), NULL) || - mg_match(*origin, mg_str("http://127.0.0.1:*"), NULL))) { - snprintf(g_cors, sizeof(g_cors), - "Access-Control-Allow-Origin: %.*s\r\n" - "Access-Control-Allow-Methods: POST, GET, DELETE, OPTIONS\r\n" - "Access-Control-Allow-Headers: Content-Type\r\n", - (int)origin->len, origin->buf); - } else { - /* No Access-Control-Allow-Origin → browser blocks cross-origin access */ - snprintf(g_cors, sizeof(g_cors), - "Access-Control-Allow-Methods: POST, GET, DELETE, OPTIONS\r\n" - "Access-Control-Allow-Headers: Content-Type\r\n"); - } - snprintf(g_cors_json, sizeof(g_cors_json), - "%sContent-Type: application/json\r\n", g_cors); -} - -/* ── Server state ─────────────────────────────────────────────── */ - -struct cbm_http_server { - struct mg_mgr mgr; - cbm_mcp_server_t *mcp; /* own MCP server instance (read-only) */ - atomic_int stop_flag; - int port; - bool listener_ok; -}; - -/* ── Forward declarations for process-kill PID validation ──────── */ - -#define MAX_INDEX_JOBS 4 - -typedef struct { - char root_path[1024]; - char project_name[256]; - atomic_int status; /* 0=idle, 1=running, 2=done, 3=error */ - char error_msg[256]; -#ifndef _WIN32 - pid_t child_pid; /* tracked for process-kill validation */ -#endif -} index_job_t; - -static index_job_t g_index_jobs[MAX_INDEX_JOBS]; - -/* ── Serve embedded asset ─────────────────────────────────────── */ - -static bool serve_embedded(struct mg_connection *c, const char *path) { - const cbm_embedded_file_t *f = cbm_embedded_lookup(path); - if (!f) - return false; - - /* Build headers with correct Content-Type for this asset */ - char hdrs[512]; - snprintf(hdrs, sizeof(hdrs), - "%sContent-Type: %s\r\n" - "Cache-Control: public, max-age=31536000, immutable\r\n", - g_cors, f->content_type); - - mg_http_reply(c, 200, hdrs, "%.*s", (int)f->size, (const char *)f->data); - return true; -} - -/* Forward declaration */ -static bool get_query_param(struct mg_str query, const char *name, char *buf, int bufsz); - -/* ── Log ring buffer ──────────────────────────────────────────── */ - -#define LOG_RING_SIZE 500 -#define LOG_LINE_MAX 512 - -static char g_log_ring[LOG_RING_SIZE][LOG_LINE_MAX]; -static int g_log_head = 0; -static int g_log_count = 0; -static cbm_mutex_t g_log_mutex; -static atomic_int g_log_mutex_init = 0; - -/* Called from a log hook — appends a line to the ring buffer (thread-safe) */ -void cbm_ui_log_append(const char *line) { - if (!line) - return; - if (!atomic_load(&g_log_mutex_init)) { - cbm_mutex_init(&g_log_mutex); - atomic_store(&g_log_mutex_init, 1); - } - cbm_mutex_lock(&g_log_mutex); - snprintf(g_log_ring[g_log_head], LOG_LINE_MAX, "%s", line); - g_log_head = (g_log_head + 1) % LOG_RING_SIZE; - if (g_log_count < LOG_RING_SIZE) - g_log_count++; - cbm_mutex_unlock(&g_log_mutex); -} - -/* GET /api/logs?lines=N — returns last N log lines */ -static void handle_logs(struct mg_connection *c, struct mg_http_message *hm) { - char lines_str[16] = {0}; - int max_lines = 100; - if (get_query_param(hm->query, "lines", lines_str, (int)sizeof(lines_str))) { - int v = atoi(lines_str); - if (v > 0 && v <= LOG_RING_SIZE) - max_lines = v; - } - - cbm_mutex_lock(&g_log_mutex); - int count = g_log_count < max_lines ? g_log_count : max_lines; - int start = (g_log_head - count + LOG_RING_SIZE) % LOG_RING_SIZE; - int total = g_log_count; - - /* Copy lines under lock */ - size_t buf_size = (size_t)count * (LOG_LINE_MAX + 10) + 64; - char *buf = malloc(buf_size); - if (!buf) { - cbm_mutex_unlock(&g_log_mutex); - mg_http_reply(c, 500, g_cors, "oom"); - return; - } - - int pos = 0; - pos += snprintf(buf + pos, buf_size - (size_t)pos, "{\"lines\":["); - for (int i = 0; i < count; i++) { - int idx = (start + i) % LOG_RING_SIZE; - if (i > 0) - buf[pos++] = ','; - /* Escape quotes in log lines */ - buf[pos++] = '"'; - for (int j = 0; g_log_ring[idx][j] && (size_t)pos < buf_size - 10; j++) { - char ch = g_log_ring[idx][j]; - if (ch == '"') { - buf[pos++] = '\\'; - buf[pos++] = '"'; - } else if (ch == '\\') { - buf[pos++] = '\\'; - buf[pos++] = '\\'; - } else if (ch == '\n') { - buf[pos++] = '\\'; - buf[pos++] = 'n'; - } else { - buf[pos++] = ch; - } - } - buf[pos++] = '"'; - } - cbm_mutex_unlock(&g_log_mutex); - pos += snprintf(buf + pos, buf_size - (size_t)pos, "],\"total\":%d}", total); - - mg_http_reply(c, 200, g_cors_json, "%s", buf); - free(buf); -} - -/* ── Process monitoring ───────────────────────────────────────── */ - -#ifndef _WIN32 -#include -#endif -#include - -/* GET /api/processes — list codebase-memory-mcp processes via ps */ -static void handle_processes(struct mg_connection *c) { - char buf[8192]; - int pos = 0; - -#ifdef _WIN32 - /* Windows: GetProcessMemoryInfo + GetProcessTimes */ - PROCESS_MEMORY_COUNTERS pmc; - FILETIME ft_create, ft_exit, ft_kernel, ft_user; - double user_s = 0, sys_s = 0; - size_t rss_bytes = 0; - if (GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc))) - rss_bytes = pmc.WorkingSetSize; - if (GetProcessTimes(GetCurrentProcess(), &ft_create, &ft_exit, &ft_kernel, &ft_user)) { - ULARGE_INTEGER u, k; - u.LowPart = ft_user.dwLowDateTime; - u.HighPart = ft_user.dwHighDateTime; - k.LowPart = ft_kernel.dwLowDateTime; - k.HighPart = ft_kernel.dwHighDateTime; - user_s = (double)u.QuadPart / 1e7; - sys_s = (double)k.QuadPart / 1e7; - } - pos += snprintf(buf + pos, sizeof(buf) - (size_t)pos, - "{\"self_pid\":%d,\"self_rss_mb\":%.1f," - "\"self_user_cpu_s\":%.1f,\"self_sys_cpu_s\":%.1f,\"processes\":[]}", - (int)_getpid(), (double)rss_bytes / (1024.0 * 1024.0), user_s, sys_s); -#else - struct rusage ru; - getrusage(RUSAGE_SELF, &ru); - long rss_kb = ru.ru_maxrss; -#ifdef __APPLE__ - rss_kb /= 1024; -#endif - pos += snprintf(buf + pos, sizeof(buf) - (size_t)pos, - "{\"self_pid\":%d,\"self_rss_mb\":%.1f," - "\"self_user_cpu_s\":%.1f,\"self_sys_cpu_s\":%.1f,\"processes\":[", - (int)getpid(), (double)rss_kb / 1024.0, - (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1e6, - (double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1e6); - - FILE *fp = popen("LC_ALL=C ps -eo pid,pcpu,rss,etime,comm 2>/dev/null" - " | grep '[c]odebase-memory-mcp'", - "r"); - int proc_count = 0; - if (fp) { - char line[1024]; - while (fgets(line, sizeof(line), fp)) { - int pid = 0; - float cpu = 0; - long rss = 0; - char elapsed[64] = {0}; - char comm[256] = {0}; - - if (sscanf(line, "%d %f %ld %63s %255s", &pid, &cpu, &rss, elapsed, comm) >= 4) { - if (proc_count > 0) - buf[pos++] = ','; - pos += snprintf(buf + pos, sizeof(buf) - (size_t)pos, - "{\"pid\":%d,\"cpu\":%.1f,\"rss_mb\":%.1f," - "\"elapsed\":\"%s\",\"command\":\"%s\",\"is_self\":%s}", - pid, (double)cpu, (double)rss / 1024.0, elapsed, comm, - pid == (int)getpid() ? "true" : "false"); - if (pos >= (int)sizeof(buf)) { - pos = (int)sizeof(buf) - 1; - } - proc_count++; - } - } - pclose(fp); - } - pos += snprintf(buf + pos, sizeof(buf) - (size_t)pos, "]}"); -#endif - - mg_http_reply(c, 200, g_cors_json, "%s", buf); -} - -/* POST /api/process-kill — kill a process by PID */ -static void handle_process_kill(struct mg_connection *c, struct mg_http_message *hm) { - if (hm->body.len == 0 || hm->body.len > 256) { - mg_http_reply(c, 400, g_cors_json, - "{\"error\":\"invalid body\"}"); - return; - } - - char body[257]; - memcpy(body, hm->body.buf, hm->body.len); - body[hm->body.len] = '\0'; - - yyjson_doc *doc = yyjson_read(body, hm->body.len, 0); - if (!doc) { - mg_http_reply(c, 400, g_cors_json, - "{\"error\":\"invalid json\"}"); - return; - } - yyjson_val *root = yyjson_doc_get_root(doc); - yyjson_val *v_pid = yyjson_obj_get(root, "pid"); - if (!v_pid || !yyjson_is_int(v_pid)) { - yyjson_doc_free(doc); - mg_http_reply(c, 400, g_cors_json, - "{\"error\":\"missing pid\"}"); - return; - } - int target_pid = (int)yyjson_get_int(v_pid); - yyjson_doc_free(doc); - -#ifdef _WIN32 - if (target_pid == (int)_getpid()) { -#else - if (target_pid == (int)getpid()) { -#endif - mg_http_reply(c, 400, g_cors_json, - "{\"error\":\"cannot kill self (use the UI server's own shutdown)\"}"); - return; - } - -#ifndef _WIN32 - /* Only allow killing PIDs that were spawned by this server (indexing jobs) */ - { - bool pid_is_ours = false; - for (int i = 0; i < MAX_INDEX_JOBS; i++) { - if (atomic_load(&g_index_jobs[i].status) == 1 && - g_index_jobs[i].child_pid == target_pid) { - pid_is_ours = true; - break; - } - } - if (!pid_is_ours) { - mg_http_reply(c, 403, g_cors_json, - "{\"error\":\"can only kill server-spawned processes\"}"); - return; - } - } -#endif - -#ifdef _WIN32 - HANDLE hproc = OpenProcess(PROCESS_TERMINATE, FALSE, (DWORD)target_pid); - if (!hproc || !TerminateProcess(hproc, 1)) { - if (hproc) - CloseHandle(hproc); - mg_http_reply(c, 500, g_cors_json, - "{\"error\":\"kill failed\"}"); - return; - } - CloseHandle(hproc); -#else - if (kill(target_pid, SIGTERM) != 0) { - mg_http_reply(c, 500, g_cors_json, - "{\"error\":\"kill failed\"}"); - return; - } -#endif - - mg_http_reply(c, 200, g_cors_json, "{\"killed\":%d}", - target_pid); -} - -/* ── Directory browser ────────────────────────────────────────── */ - -#include - -/* GET /api/browse?path=/some/dir — list subdirectories for file picker */ -static void handle_browse(struct mg_connection *c, struct mg_http_message *hm) { - char path[1024] = {0}; - if (!get_query_param(hm->query, "path", path, (int)sizeof(path)) || path[0] == '\0') { - /* Default to home directory */ - const char *home = getenv("HOME"); - if (home) - snprintf(path, sizeof(path), "%s", home); - else - snprintf(path, sizeof(path), "/"); - } - - if (!cbm_is_dir(path)) { - mg_http_reply(c, 400, g_cors_json, - "{\"error\":\"not a directory\"}"); - return; - } - - DIR *dir = opendir(path); - if (!dir) { - mg_http_reply(c, 403, g_cors_json, - "{\"error\":\"cannot open directory\"}"); - return; - } - - /* Build JSON response */ - char buf[32768]; - int pos = 0; - pos += snprintf(buf + pos, sizeof(buf) - (size_t)pos, "{\"path\":\"%s\",\"dirs\":[", path); - - struct dirent *ent; - int count = 0; - while ((ent = readdir(dir)) != NULL) { - /* Skip hidden dirs and . / .. */ - if (ent->d_name[0] == '.') - continue; - - /* Check if it's actually a directory */ - char full[2048]; - snprintf(full, sizeof(full), "%s/%s", path, ent->d_name); - if (!cbm_is_dir(full)) - continue; - - if (count > 0) - buf[pos++] = ','; - pos += snprintf(buf + pos, sizeof(buf) - (size_t)pos, "\"%s\"", ent->d_name); - if (pos >= (int)sizeof(buf)) { - pos = (int)sizeof(buf) - 1; - } - count++; - - if (count >= 200) - break; /* safety limit */ - } - closedir(dir); - - /* Parent path */ - char parent[1024]; - snprintf(parent, sizeof(parent), "%s", path); - char *last_slash = strrchr(parent, '/'); - if (last_slash && last_slash != parent) - *last_slash = '\0'; - else - snprintf(parent, sizeof(parent), "/"); - - pos += snprintf(buf + pos, sizeof(buf) - (size_t)pos, "],\"parent\":\"%s\"}", parent); - mg_http_reply(c, 200, g_cors_json, "%s", buf); -} - -/* ── ADR endpoints ────────────────────────────────────────────── */ - -/* GET /api/adr?project=X — get ADR content for a project */ -static void handle_adr_get(struct mg_connection *c, struct mg_http_message *hm) { - char name[256] = {0}; - if (!get_query_param(hm->query, "project", name, (int)sizeof(name)) || name[0] == '\0') { - mg_http_reply(c, 400, g_cors_json, - "{\"error\":\"missing project\"}"); - return; - } - - const char *home = getenv("HOME"); - if (!home) - home = "/tmp"; - char db_path[1024]; - snprintf(db_path, sizeof(db_path), "%s/.cache/codebase-memory-mcp/%s.db", home, name); - - cbm_store_t *store = cbm_store_open_path(db_path); - if (!store) { - mg_http_reply(c, 200, g_cors_json, - "{\"has_adr\":false}"); - return; - } - - cbm_adr_t adr; - memset(&adr, 0, sizeof(adr)); - if (cbm_store_adr_get(store, name, &adr) == CBM_STORE_OK && adr.content) { - /* Escape content for JSON — simple: replace quotes and newlines */ - size_t clen = strlen(adr.content); - size_t buf_size = clen * 2 + 256; - char *buf = malloc(buf_size); - if (buf) { - int pos = snprintf(buf, buf_size, "{\"has_adr\":true,\"content\":\""); - for (size_t i = 0; i < clen && (size_t)pos < buf_size - 10; i++) { - char ch = adr.content[i]; - if (ch == '"') { - buf[pos++] = '\\'; - buf[pos++] = '"'; - } else if (ch == '\\') { - buf[pos++] = '\\'; - buf[pos++] = '\\'; - } else if (ch == '\n') { - buf[pos++] = '\\'; - buf[pos++] = 'n'; - } else if (ch == '\r') { /* skip */ - } else if (ch == '\t') { - buf[pos++] = '\\'; - buf[pos++] = 't'; - } else { - buf[pos++] = ch; - } - } - pos += snprintf(buf + pos, buf_size - (size_t)pos, "\",\"updated_at\":\"%s\"}", - adr.updated_at ? adr.updated_at : ""); - mg_http_reply(c, 200, g_cors_json, "%s", buf); - free(buf); - } else { - mg_http_reply(c, 500, g_cors, "oom"); - } - cbm_store_adr_free(&adr); - } else { - mg_http_reply(c, 200, g_cors_json, - "{\"has_adr\":false}"); - } - cbm_store_close(store); -} - -/* POST /api/adr — save ADR content. Body: {"project":"...","content":"..."} */ -static void handle_adr_save(struct mg_connection *c, struct mg_http_message *hm) { - if (hm->body.len == 0 || hm->body.len > 16384) { - mg_http_reply(c, 400, g_cors_json, - "{\"error\":\"invalid body\"}"); - return; - } - - char *body = malloc(hm->body.len + 1); - if (!body) { - mg_http_reply(c, 500, g_cors, "oom"); - return; - } - memcpy(body, hm->body.buf, hm->body.len); - body[hm->body.len] = '\0'; - - yyjson_doc *doc = yyjson_read(body, hm->body.len, 0); - free(body); - if (!doc) { - mg_http_reply(c, 400, g_cors_json, - "{\"error\":\"invalid json\"}"); - return; - } - - yyjson_val *root = yyjson_doc_get_root(doc); - yyjson_val *v_proj = yyjson_obj_get(root, "project"); - yyjson_val *v_content = yyjson_obj_get(root, "content"); - if (!v_proj || !yyjson_is_str(v_proj) || !v_content || !yyjson_is_str(v_content)) { - yyjson_doc_free(doc); - mg_http_reply(c, 400, g_cors_json, - "{\"error\":\"missing project or content\"}"); - return; - } - - const char *proj = yyjson_get_str(v_proj); - const char *content = yyjson_get_str(v_content); - - const char *home = getenv("HOME"); - if (!home) - home = "/tmp"; - char db_path[1024]; - snprintf(db_path, sizeof(db_path), "%s/.cache/codebase-memory-mcp/%s.db", home, proj); - - cbm_store_t *store = cbm_store_open_path(db_path); - yyjson_doc_free(doc); - if (!store) { - mg_http_reply(c, 500, g_cors_json, - "{\"error\":\"cannot open store\"}"); - return; - } - - int rc = cbm_store_adr_store(store, proj, content); - cbm_store_close(store); - - if (rc == CBM_STORE_OK) { - mg_http_reply(c, 200, g_cors_json, - "{\"saved\":true}"); - } else { - mg_http_reply(c, 500, g_cors_json, - "{\"error\":\"save failed\"}"); - } -} - -/* ── Background indexing ──────────────────────────────────────── */ - -static char g_binary_path[1024] = {0}; - -void cbm_http_server_set_binary_path(const char *path) { - if (path) { - snprintf(g_binary_path, sizeof(g_binary_path), "%s", path); - } -} - -/* Index via subprocess — isolates crashes from the main process. */ -static void *index_thread_fn(void *arg) { - index_job_t *job = arg; - cbm_log_info("ui.index.start", "path", job->root_path); - - /* Use stored binary path, or try to find it */ - const char *bin = g_binary_path; - char self_path[1024] = {0}; - if (!bin[0]) { -#ifdef _WIN32 - GetModuleFileNameA(NULL, self_path, sizeof(self_path)); -#elif defined(__APPLE__) - uint32_t sz = sizeof(self_path); - _NSGetExecutablePath(self_path, &sz); -#else - ssize_t len = readlink("/proc/self/exe", self_path, sizeof(self_path) - 1); - if (len > 0) - self_path[len] = '\0'; -#endif - bin = self_path[0] ? self_path : "codebase-memory-mcp"; - } - - char log_file[256]; - char json_arg[1200]; - snprintf(json_arg, sizeof(json_arg), "{\"repo_path\":\"%s\"}", job->root_path); - -#ifdef _WIN32 - snprintf(log_file, sizeof(log_file), "%s\\cbm_index_%d.log", - getenv("TEMP") ? getenv("TEMP") : ".", (int)_getpid()); - - /* Build command line for CreateProcess */ - char cmdline[2048]; - snprintf(cmdline, sizeof(cmdline), "\"%s\" cli index_repository \"%s\"", bin, json_arg); - - cbm_log_info("ui.index.spawn", "bin", bin, "log", log_file); - - HANDLE hlog = CreateFileA(log_file, GENERIC_WRITE, FILE_SHARE_READ, NULL, CREATE_ALWAYS, - FILE_ATTRIBUTE_NORMAL, NULL); - STARTUPINFOA si_proc = {.cb = sizeof(si_proc)}; - if (hlog != INVALID_HANDLE_VALUE) { - si_proc.dwFlags = STARTF_USESTDHANDLES; - si_proc.hStdError = hlog; - si_proc.hStdOutput = hlog; - } - PROCESS_INFORMATION pi = {0}; - if (!CreateProcessA(NULL, cmdline, NULL, NULL, TRUE, 0, NULL, NULL, &si_proc, &pi)) { - snprintf(job->error_msg, sizeof(job->error_msg), "CreateProcess failed"); - atomic_store(&job->status, 3); - if (hlog != INVALID_HANDLE_VALUE) - CloseHandle(hlog); - return NULL; - } - if (hlog != INVALID_HANDLE_VALUE) - CloseHandle(hlog); - - /* Poll log file while child runs */ - long tail_pos = 0; - for (;;) { - DWORD wait = WaitForSingleObject(pi.hProcess, 500); - FILE *lf = fopen(log_file, "r"); - if (lf) { - fseek(lf, tail_pos, SEEK_SET); - char line[512]; - while (fgets(line, sizeof(line), lf)) { - size_t l = strlen(line); - if (l > 0 && line[l - 1] == '\n') - line[l - 1] = '\0'; - if (line[0]) - cbm_ui_log_append(line); - } - tail_pos = ftell(lf); - fclose(lf); - } - if (wait == WAIT_OBJECT_0) - break; - } - - DWORD win_exit = 1; - GetExitCodeProcess(pi.hProcess, &win_exit); - int exit_code = (int)win_exit; - CloseHandle(pi.hProcess); - CloseHandle(pi.hThread); - (void)DeleteFileA(log_file); -#else - snprintf(log_file, sizeof(log_file), "/tmp/cbm_index_%d.log", (int)getpid()); - - cbm_log_info("ui.index.fork", "bin", bin, "log", log_file); - - pid_t child_pid = fork(); - if (child_pid < 0) { - snprintf(job->error_msg, sizeof(job->error_msg), "fork failed"); - atomic_store(&job->status, 3); - return NULL; - } - job->child_pid = child_pid; - - if (child_pid == 0) { - FILE *lf = freopen(log_file, "w", stderr); - (void)lf; - freopen("/dev/null", "w", stdout); - execl(bin, bin, "cli", "index_repository", json_arg, (char *)NULL); - _exit(127); - } - - long tail_pos = 0; - for (;;) { - int wstatus = 0; - pid_t wr = waitpid(child_pid, &wstatus, WNOHANG); - bool child_done = (wr == child_pid); - - FILE *lf = fopen(log_file, "r"); - if (lf) { - fseek(lf, tail_pos, SEEK_SET); - char line[512]; - while (fgets(line, sizeof(line), lf)) { - size_t l = strlen(line); - if (l > 0 && line[l - 1] == '\n') - line[l - 1] = '\0'; - if (line[0]) - cbm_ui_log_append(line); - } - tail_pos = ftell(lf); - fclose(lf); - } - - if (child_done) - break; - - struct timespec ts = {0, 500000000}; - cbm_nanosleep(&ts, NULL); - } - - int wstatus = 0; - waitpid(child_pid, &wstatus, 0); - int exit_code = WIFEXITED(wstatus) ? WEXITSTATUS(wstatus) : -1; - - (void)unlink(log_file); -#endif - - if (exit_code != 0) { - snprintf(job->error_msg, sizeof(job->error_msg), "indexing failed (exit code %d)", - exit_code); - atomic_store(&job->status, 3); - } else { - atomic_store(&job->status, 2); - } - cbm_log_info("ui.index.done", "path", job->root_path, "rc", exit_code == 0 ? "ok" : "err"); - return NULL; -} - -/* POST /api/index — body: {"root_path": "/abs/path"} → starts background indexing */ -static void handle_index_start(struct mg_connection *c, struct mg_http_message *hm) { - if (hm->body.len == 0 || hm->body.len > 4096) { - mg_http_reply(c, 400, g_cors_json, - "{\"error\":\"invalid body\"}"); - return; - } - - char body_buf[4097]; - memcpy(body_buf, hm->body.buf, hm->body.len); - body_buf[hm->body.len] = '\0'; - - yyjson_doc *doc = yyjson_read(body_buf, hm->body.len, 0); - if (!doc) { - mg_http_reply(c, 400, g_cors_json, - "{\"error\":\"invalid json\"}"); - return; - } - yyjson_val *root = yyjson_doc_get_root(doc); - yyjson_val *v_path = yyjson_obj_get(root, "root_path"); - if (!v_path || !yyjson_is_str(v_path)) { - yyjson_doc_free(doc); - mg_http_reply(c, 400, g_cors_json, - "{\"error\":\"missing root_path\"}"); - return; - } - const char *rpath = yyjson_get_str(v_path); - - /* Check path exists */ - if (!cbm_is_dir(rpath)) { - yyjson_doc_free(doc); - mg_http_reply(c, 400, g_cors_json, - "{\"error\":\"directory not found\"}"); - return; - } - - /* Find free job slot */ - int slot = -1; - for (int i = 0; i < MAX_INDEX_JOBS; i++) { - int st = atomic_load(&g_index_jobs[i].status); - if (st == 0 || st == 2 || st == 3) { - slot = i; - break; - } - } - if (slot < 0) { - yyjson_doc_free(doc); - mg_http_reply(c, 429, g_cors_json, - "{\"error\":\"all index slots busy\"}"); - return; - } - - index_job_t *job = &g_index_jobs[slot]; - snprintf(job->root_path, sizeof(job->root_path), "%s", rpath); - job->error_msg[0] = '\0'; - atomic_store(&job->status, 1); - yyjson_doc_free(doc); - - /* Spawn background thread */ - cbm_thread_t tid; - if (cbm_thread_create(&tid, 0, index_thread_fn, job) != 0) { - atomic_store(&job->status, 3); - snprintf(job->error_msg, sizeof(job->error_msg), "thread creation failed"); - mg_http_reply(c, 500, g_cors_json, - "{\"error\":\"thread creation failed\"}"); - return; - } - - mg_http_reply(c, 202, g_cors_json, - "{\"status\":\"indexing\",\"slot\":%d,\"path\":\"%s\"}", slot, job->root_path); -} - -/* GET /api/index-status — returns status of all index jobs */ -static void handle_index_status(struct mg_connection *c) { - char buf[2048] = "["; - int pos = 1; - for (int i = 0; i < MAX_INDEX_JOBS; i++) { - int st = atomic_load(&g_index_jobs[i].status); - if (st == 0) - continue; - if (pos > 1) - buf[pos++] = ','; - const char *ss = st == 1 ? "indexing" : st == 2 ? "done" : "error"; - pos += snprintf(buf + pos, sizeof(buf) - (size_t)pos, - "{\"slot\":%d,\"status\":\"%s\",\"path\":\"%s\",\"error\":\"%s\"}", i, ss, - g_index_jobs[i].root_path, st == 3 ? g_index_jobs[i].error_msg : ""); - } - buf[pos++] = ']'; - buf[pos] = '\0'; - mg_http_reply(c, 200, g_cors_json, "%s", buf); -} - -/* DELETE /api/project?name=X — deletes the .db file */ -static void handle_delete_project(struct mg_connection *c, struct mg_http_message *hm) { - char name[256] = {0}; - if (!get_query_param(hm->query, "name", name, (int)sizeof(name)) || name[0] == '\0') { - mg_http_reply(c, 400, g_cors_json, - "{\"error\":\"missing name\"}"); - return; - } - - const char *home = getenv("HOME"); // NOLINT(concurrency-mt-unsafe) - if (!home) - home = "/tmp"; - char db_path[1024]; - snprintf(db_path, sizeof(db_path), "%s/.cache/codebase-memory-mcp/%s.db", home, name); - - if (!cbm_file_exists(db_path)) { - mg_http_reply(c, 404, g_cors_json, - "{\"error\":\"project not found\"}"); - return; - } - - if (unlink(db_path) != 0) { - mg_http_reply(c, 500, g_cors_json, - "{\"error\":\"failed to delete\"}"); - return; - } - - /* Also remove WAL and SHM files if they exist */ - char wal_path[1040], shm_path[1040]; - snprintf(wal_path, sizeof(wal_path), "%s-wal", db_path); - snprintf(shm_path, sizeof(shm_path), "%s-shm", db_path); - (void)unlink(wal_path); - (void)unlink(shm_path); - - cbm_log_info("ui.project.deleted", "name", name); - mg_http_reply(c, 200, g_cors_json, "{\"deleted\":true}"); -} - -/* GET /api/project-health?name=X — checks db integrity */ -static void handle_project_health(struct mg_connection *c, struct mg_http_message *hm) { - char name[256] = {0}; - if (!get_query_param(hm->query, "name", name, (int)sizeof(name)) || name[0] == '\0') { - mg_http_reply(c, 400, g_cors_json, - "{\"error\":\"missing name\"}"); - return; - } - - const char *home = getenv("HOME"); // NOLINT(concurrency-mt-unsafe) - if (!home) - home = "/tmp"; - char db_path[1024]; - snprintf(db_path, sizeof(db_path), "%s/.cache/codebase-memory-mcp/%s.db", home, name); - - if (!cbm_file_exists(db_path)) { - mg_http_reply(c, 200, g_cors_json, - "{\"status\":\"missing\"}"); - return; - } - - cbm_store_t *store = cbm_store_open_path(db_path); - if (!store) { - mg_http_reply(c, 200, g_cors_json, - "{\"status\":\"corrupt\",\"reason\":\"cannot open\"}"); - return; - } - - int node_count = cbm_store_count_nodes(store, name); - int edge_count = cbm_store_count_edges(store, name); - cbm_store_close(store); - - int64_t size = cbm_file_size(db_path); - - mg_http_reply(c, 200, g_cors_json, - "{\"status\":\"healthy\",\"nodes\":%d,\"edges\":%d,\"size_bytes\":%lld}", - node_count, edge_count, (long long)size); -} - -/* ── Extract query parameter from URI ─────────────────────────── */ - -static bool get_query_param(struct mg_str query, const char *name, char *buf, int bufsz) { - int n = mg_http_get_var(&query, name, buf, (size_t)bufsz); - return n > 0; -} - -/* ── Handle GET /api/layout ───────────────────────────────────── */ - -static void handle_layout(struct mg_connection *c, struct mg_http_message *hm) { - char project[256] = {0}; - char max_str[32] = {0}; - - if (!get_query_param(hm->query, "project", project, (int)sizeof(project)) || - project[0] == '\0') { - mg_http_reply(c, 400, g_cors_json, - "{\"error\":\"missing project parameter\"}"); - return; - } - - int max_nodes = 50000; - if (get_query_param(hm->query, "max_nodes", max_str, (int)sizeof(max_str))) { - int v = atoi(max_str); - if (v > 0) - max_nodes = v; - } - - /* Open a read-only store for this project */ - const char *home = getenv("HOME"); // NOLINT(concurrency-mt-unsafe) - if (!home) - home = "/tmp"; - char db_path[1024]; - snprintf(db_path, sizeof(db_path), "%s/.cache/codebase-memory-mcp/%s.db", home, project); - - if (!cbm_file_exists(db_path)) { - mg_http_reply(c, 404, g_cors_json, - "{\"error\":\"project not found\"}"); - return; - } - - cbm_store_t *store = cbm_store_open_path(db_path); - if (!store) { - mg_http_reply(c, 500, g_cors_json, - "{\"error\":\"cannot open store\"}"); - return; - } - - cbm_layout_result_t *layout = - cbm_layout_compute(store, project, CBM_LAYOUT_OVERVIEW, NULL, 0, max_nodes); - cbm_store_close(store); - - if (!layout) { - mg_http_reply(c, 500, g_cors_json, - "{\"error\":\"layout computation failed\"}"); - return; - } - - char *json = cbm_layout_to_json(layout); - cbm_layout_free(layout); - - if (!json) { - mg_http_reply(c, 500, g_cors_json, - "{\"error\":\"JSON serialization failed\"}"); - return; - } - - mg_http_reply(c, 200, g_cors_json, "%s", json); - free(json); -} - -/* ── Handle JSON-RPC request ──────────────────────────────────── */ - -static void handle_rpc(struct mg_connection *c, struct mg_http_message *hm, cbm_mcp_server_t *mcp) { - if (hm->body.len == 0 || hm->body.len > MAX_BODY_SIZE) { - mg_http_reply(c, 400, g_cors_json, - "{\"jsonrpc\":\"2.0\",\"error\":{\"code\":-32600," - "\"message\":\"invalid request size\"},\"id\":null}"); - return; - } - - /* NUL-terminate the body for cbm_mcp_server_handle */ - char *body = malloc(hm->body.len + 1); - if (!body) { - mg_http_reply(c, 500, g_cors, "out of memory"); - return; - } - memcpy(body, hm->body.buf, hm->body.len); - body[hm->body.len] = '\0'; - - char *response = cbm_mcp_server_handle(mcp, body); - free(body); - - if (response) { - mg_http_reply(c, 200, g_cors_json, "%s", response); - free(response); - } else { - mg_http_reply(c, 204, g_cors, ""); - } -} - -/* ── HTTP event handler ───────────────────────────────────────── */ - -static void http_handler(struct mg_connection *c, int ev, void *ev_data) { - if (ev != MG_EV_HTTP_MSG) - return; - - struct mg_http_message *hm = ev_data; - cbm_http_server_t *srv = c->fn_data; - - /* Build per-request CORS headers (only reflects localhost origins) */ - update_cors(hm); - - /* OPTIONS preflight for CORS */ - if (mg_strcmp(hm->method, mg_str("OPTIONS")) == 0) { - char opt_hdrs[512]; - snprintf(opt_hdrs, sizeof(opt_hdrs), "%sContent-Length: 0\r\n", g_cors); - mg_http_reply(c, 204, opt_hdrs, ""); - return; - } - - /* POST /rpc → JSON-RPC dispatch (reuses existing MCP tools) */ - if (mg_strcmp(hm->method, mg_str("POST")) == 0 && mg_match(hm->uri, mg_str("/rpc"), NULL)) { - handle_rpc(c, hm, srv->mcp); - return; - } - - /* GET /api/layout → 3D graph layout */ - if (mg_strcmp(hm->method, mg_str("GET")) == 0 && - mg_match(hm->uri, mg_str("/api/layout*"), NULL)) { - handle_layout(c, hm); - return; - } - - /* POST /api/index → start background indexing */ - if (mg_strcmp(hm->method, mg_str("POST")) == 0 && - mg_match(hm->uri, mg_str("/api/index"), NULL)) { - handle_index_start(c, hm); - return; - } - - /* GET /api/index-status → check indexing progress */ - if (mg_strcmp(hm->method, mg_str("GET")) == 0 && - mg_match(hm->uri, mg_str("/api/index-status"), NULL)) { - handle_index_status(c); - return; - } - - /* DELETE /api/project → delete a project's .db file */ - if (mg_strcmp(hm->method, mg_str("DELETE")) == 0 && - mg_match(hm->uri, mg_str("/api/project*"), NULL)) { - handle_delete_project(c, hm); - return; - } - - /* GET /api/browse → directory browser for file picker */ - if (mg_strcmp(hm->method, mg_str("GET")) == 0 && - mg_match(hm->uri, mg_str("/api/browse*"), NULL)) { - handle_browse(c, hm); - return; - } - - /* GET /api/adr → get ADR for project */ - if (mg_strcmp(hm->method, mg_str("GET")) == 0 && mg_match(hm->uri, mg_str("/api/adr*"), NULL)) { - handle_adr_get(c, hm); - return; - } - - /* POST /api/adr → save ADR for project */ - if (mg_strcmp(hm->method, mg_str("POST")) == 0 && mg_match(hm->uri, mg_str("/api/adr"), NULL)) { - handle_adr_save(c, hm); - return; - } - - /* GET /api/project-health → check db integrity */ - if (mg_strcmp(hm->method, mg_str("GET")) == 0 && - mg_match(hm->uri, mg_str("/api/project-health*"), NULL)) { - handle_project_health(c, hm); - return; - } - - /* GET /api/processes → list running codebase-memory-mcp processes */ - if (mg_strcmp(hm->method, mg_str("GET")) == 0 && - mg_match(hm->uri, mg_str("/api/processes"), NULL)) { - handle_processes(c); - return; - } - - /* GET /api/logs → recent log lines */ - if (mg_strcmp(hm->method, mg_str("GET")) == 0 && - mg_match(hm->uri, mg_str("/api/logs*"), NULL)) { - handle_logs(c, hm); - return; - } - - /* POST /api/process-kill → kill a process */ - if (mg_strcmp(hm->method, mg_str("POST")) == 0 && - mg_match(hm->uri, mg_str("/api/process-kill"), NULL)) { - handle_process_kill(c, hm); - return; - } - - /* GET / → index.html (no-cache so browser always gets latest) */ - if (mg_match(hm->uri, mg_str("/"), NULL)) { - const cbm_embedded_file_t *f = cbm_embedded_lookup("/index.html"); - if (f) { - char html_hdrs[512]; - snprintf(html_hdrs, sizeof(html_hdrs), - "%sContent-Type: text/html\r\nCache-Control: no-cache\r\n", g_cors); - mg_http_reply(c, 200, html_hdrs, "%.*s", (int)f->size, (const char *)f->data); - return; - } - mg_http_reply(c, 404, g_cors, "no frontend embedded"); - return; - } - - /* GET /assets/... → embedded assets */ - if (mg_match(hm->uri, mg_str("/assets/*"), NULL)) { - /* Build path string from mg_str */ - char path[256]; - int len = (int)hm->uri.len; - if (len >= (int)sizeof(path)) - len = (int)sizeof(path) - 1; - memcpy(path, hm->uri.buf, (size_t)len); - path[len] = '\0'; - - if (serve_embedded(c, path)) - return; - mg_http_reply(c, 404, g_cors, "not found"); - return; - } - - /* Fallback: try as embedded path, then 404 */ - { - char path[256]; - int len = (int)hm->uri.len; - if (len >= (int)sizeof(path)) - len = (int)sizeof(path) - 1; - memcpy(path, hm->uri.buf, (size_t)len); - path[len] = '\0'; - - if (serve_embedded(c, path)) - return; - } - - mg_http_reply(c, 404, g_cors, "not found"); -} - -/* ── Public API ───────────────────────────────────────────────── */ - -cbm_http_server_t *cbm_http_server_new(int port) { - cbm_http_server_t *srv = calloc(1, sizeof(*srv)); - if (!srv) - return NULL; - - srv->port = port; - atomic_store(&srv->stop_flag, 0); - - /* Create a dedicated MCP server for HTTP (own SQLite connection) */ - srv->mcp = cbm_mcp_server_new(NULL); - if (!srv->mcp) { - cbm_log_error("ui.http.mcp_fail", "reason", "cannot create MCP instance"); - free(srv); - return NULL; - } - - /* Initialize Mongoose */ - mg_mgr_init(&srv->mgr); - srv->mgr.userdata = srv; - - /* Bind to localhost only */ - char url[64]; - snprintf(url, sizeof(url), "http://127.0.0.1:%d", port); - - struct mg_connection *listener = mg_http_listen(&srv->mgr, url, http_handler, srv); - if (!listener) { - char port_str[16]; - snprintf(port_str, sizeof(port_str), "%d", port); - cbm_log_warn("ui.unavailable", "port", port_str, "reason", "in_use", "hint", - "use --port=N to override"); - cbm_mcp_server_free(srv->mcp); - mg_mgr_free(&srv->mgr); - free(srv); - return NULL; - } - - srv->listener_ok = true; - - char port_str[16]; - snprintf(port_str, sizeof(port_str), "%d", port); - cbm_log_info("ui.serving", "url", url, "port", port_str); - - return srv; -} - -void cbm_http_server_free(cbm_http_server_t *srv) { - if (!srv) - return; - mg_mgr_free(&srv->mgr); - cbm_mcp_server_free(srv->mcp); - free(srv); -} - -void cbm_http_server_stop(cbm_http_server_t *srv) { - if (srv) { - atomic_store(&srv->stop_flag, 1); - } -} - -void cbm_http_server_run(cbm_http_server_t *srv) { - if (!srv || !srv->listener_ok) - return; - - while (!atomic_load(&srv->stop_flag)) { - mg_mgr_poll(&srv->mgr, 200); /* 200ms poll interval */ - } -} - -bool cbm_http_server_is_running(const cbm_http_server_t *srv) { - return srv && srv->listener_ok; -} +/* + * http_server.c — Mongoose-based HTTP server for graph UI. + * + * Routes: + * GET / → embedded index.html + * GET /assets/... → embedded JS/CSS + * POST /rpc → JSON-RPC dispatch via own cbm_mcp_server_t + * OPTIONS /rpc → CORS preflight (for vite dev on :5173) + * * → 404 + * + * Runs in a background pthread. Binds to 127.0.0.1 only. + * Has its own cbm_mcp_server_t with a separate SQLite connection (WAL reader). + */ +#include "ui/http_server.h" +#include "ui/embedded_assets.h" +#include "ui/layout3d.h" +#include "mcp/mcp.h" +#include "store/store.h" +/* pipeline.h no longer needed — indexing runs as subprocess */ +#include "foundation/log.h" +#include "foundation/platform.h" +#include "foundation/compat.h" +#include "foundation/compat_thread.h" + +#include +#include + +#include +#include +#include +#include +#ifdef _WIN32 +#include +#include +#include /* GetProcessMemoryInfo */ +#else +#include +#include +#endif +#ifdef __APPLE__ +#include +#endif + +/* ── Constants ────────────────────────────────────────────────── */ + +/* Max JSON-RPC request body size (1 MB) */ +#define MAX_BODY_SIZE (1024 * 1024) + +/* ── CORS: only allow localhost origins (blocks remote website attacks) ────── */ + +/* Per-request CORS header buffers. Updated at the start of each HTTP handler + * call by update_cors(). Single-threaded mongoose event loop makes statics safe. */ +static char g_cors[256]; /* CORS headers only */ +static char g_cors_json[512]; /* CORS + Content-Type: application/json */ + +/* Inspect the Origin header and only reflect it if it's a localhost URL. + * This prevents remote websites from making cross-origin requests to the + * local graph-ui server (the key defense against CORS-based data exfil). */ +static void update_cors(struct mg_http_message *hm) { + struct mg_str *origin = mg_http_get_header(hm, "Origin"); + if (origin && origin->len > 0 && + (mg_match(*origin, mg_str("http://localhost:*"), NULL) || + mg_match(*origin, mg_str("http://127.0.0.1:*"), NULL))) { + snprintf(g_cors, sizeof(g_cors), + "Access-Control-Allow-Origin: %.*s\r\n" + "Access-Control-Allow-Methods: POST, GET, DELETE, OPTIONS\r\n" + "Access-Control-Allow-Headers: Content-Type\r\n", + (int)origin->len, origin->buf); + } else { + /* No Access-Control-Allow-Origin → browser blocks cross-origin access */ + snprintf(g_cors, sizeof(g_cors), + "Access-Control-Allow-Methods: POST, GET, DELETE, OPTIONS\r\n" + "Access-Control-Allow-Headers: Content-Type\r\n"); + } + snprintf(g_cors_json, sizeof(g_cors_json), + "%sContent-Type: application/json\r\n", g_cors); +} + +/* ── Server state ─────────────────────────────────────────────── */ + +struct cbm_http_server { + struct mg_mgr mgr; + cbm_mcp_server_t *mcp; /* own MCP server instance (read-only) */ + atomic_int stop_flag; + int port; + bool listener_ok; +}; + +/* ── Forward declarations for process-kill PID validation ──────── */ + +#define MAX_INDEX_JOBS 4 + +typedef struct { + char root_path[1024]; + char project_name[256]; + atomic_int status; /* 0=idle, 1=running, 2=done, 3=error */ + char error_msg[256]; +#ifndef _WIN32 + pid_t child_pid; /* tracked for process-kill validation */ +#endif +} index_job_t; + +static index_job_t g_index_jobs[MAX_INDEX_JOBS]; + +/* ── Serve embedded asset ─────────────────────────────────────── */ + +static bool serve_embedded(struct mg_connection *c, const char *path) { + const cbm_embedded_file_t *f = cbm_embedded_lookup(path); + if (!f) + return false; + + /* Build headers with correct Content-Type for this asset */ + char hdrs[512]; + snprintf(hdrs, sizeof(hdrs), + "%sContent-Type: %s\r\n" + "Cache-Control: public, max-age=31536000, immutable\r\n", + g_cors, f->content_type); + + mg_http_reply(c, 200, hdrs, "%.*s", (int)f->size, (const char *)f->data); + return true; +} + +/* Forward declaration */ +static bool get_query_param(struct mg_str query, const char *name, char *buf, int bufsz); + +/* ── Log ring buffer ──────────────────────────────────────────── */ + +#define LOG_RING_SIZE 500 +#define LOG_LINE_MAX 512 + +static char g_log_ring[LOG_RING_SIZE][LOG_LINE_MAX]; +static int g_log_head = 0; +static int g_log_count = 0; +static cbm_mutex_t g_log_mutex; +static atomic_int g_log_mutex_init = 0; + +/* Called from a log hook — appends a line to the ring buffer (thread-safe) */ +void cbm_ui_log_append(const char *line) { + if (!line) + return; + if (!atomic_load(&g_log_mutex_init)) { + cbm_mutex_init(&g_log_mutex); + atomic_store(&g_log_mutex_init, 1); + } + cbm_mutex_lock(&g_log_mutex); + snprintf(g_log_ring[g_log_head], LOG_LINE_MAX, "%s", line); + g_log_head = (g_log_head + 1) % LOG_RING_SIZE; + if (g_log_count < LOG_RING_SIZE) + g_log_count++; + cbm_mutex_unlock(&g_log_mutex); +} + +/* GET /api/logs?lines=N — returns last N log lines */ +static void handle_logs(struct mg_connection *c, struct mg_http_message *hm) { + char lines_str[16] = {0}; + int max_lines = 100; + if (get_query_param(hm->query, "lines", lines_str, (int)sizeof(lines_str))) { + int v = atoi(lines_str); + if (v > 0 && v <= LOG_RING_SIZE) + max_lines = v; + } + + cbm_mutex_lock(&g_log_mutex); + int count = g_log_count < max_lines ? g_log_count : max_lines; + int start = (g_log_head - count + LOG_RING_SIZE) % LOG_RING_SIZE; + int total = g_log_count; + + /* Copy lines under lock */ + size_t buf_size = (size_t)count * (LOG_LINE_MAX + 10) + 64; + char *buf = malloc(buf_size); + if (!buf) { + cbm_mutex_unlock(&g_log_mutex); + mg_http_reply(c, 500, g_cors, "oom"); + return; + } + + int pos = 0; + pos += snprintf(buf + pos, buf_size - (size_t)pos, "{\"lines\":["); + for (int i = 0; i < count; i++) { + int idx = (start + i) % LOG_RING_SIZE; + if (i > 0) + buf[pos++] = ','; + /* Escape quotes in log lines */ + buf[pos++] = '"'; + for (int j = 0; g_log_ring[idx][j] && (size_t)pos < buf_size - 10; j++) { + char ch = g_log_ring[idx][j]; + if (ch == '"') { + buf[pos++] = '\\'; + buf[pos++] = '"'; + } else if (ch == '\\') { + buf[pos++] = '\\'; + buf[pos++] = '\\'; + } else if (ch == '\n') { + buf[pos++] = '\\'; + buf[pos++] = 'n'; + } else { + buf[pos++] = ch; + } + } + buf[pos++] = '"'; + } + cbm_mutex_unlock(&g_log_mutex); + pos += snprintf(buf + pos, buf_size - (size_t)pos, "],\"total\":%d}", total); + + mg_http_reply(c, 200, g_cors_json, "%s", buf); + free(buf); +} + +/* ── Process monitoring ───────────────────────────────────────── */ + +#ifndef _WIN32 +#include +#endif +#include + +/* GET /api/processes — list codebase-memory-mcp processes via ps */ +static void handle_processes(struct mg_connection *c) { + char buf[8192]; + int pos = 0; + +#ifdef _WIN32 + /* Windows: GetProcessMemoryInfo + GetProcessTimes */ + PROCESS_MEMORY_COUNTERS pmc; + FILETIME ft_create, ft_exit, ft_kernel, ft_user; + double user_s = 0, sys_s = 0; + size_t rss_bytes = 0; + if (GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc))) + rss_bytes = pmc.WorkingSetSize; + if (GetProcessTimes(GetCurrentProcess(), &ft_create, &ft_exit, &ft_kernel, &ft_user)) { + ULARGE_INTEGER u, k; + u.LowPart = ft_user.dwLowDateTime; + u.HighPart = ft_user.dwHighDateTime; + k.LowPart = ft_kernel.dwLowDateTime; + k.HighPart = ft_kernel.dwHighDateTime; + user_s = (double)u.QuadPart / 1e7; + sys_s = (double)k.QuadPart / 1e7; + } + pos += snprintf(buf + pos, sizeof(buf) - (size_t)pos, + "{\"self_pid\":%d,\"self_rss_mb\":%.1f," + "\"self_user_cpu_s\":%.1f,\"self_sys_cpu_s\":%.1f,\"processes\":[]}", + (int)_getpid(), (double)rss_bytes / (1024.0 * 1024.0), user_s, sys_s); +#else + struct rusage ru; + getrusage(RUSAGE_SELF, &ru); + long rss_kb = ru.ru_maxrss; +#ifdef __APPLE__ + rss_kb /= 1024; +#endif + pos += snprintf(buf + pos, sizeof(buf) - (size_t)pos, + "{\"self_pid\":%d,\"self_rss_mb\":%.1f," + "\"self_user_cpu_s\":%.1f,\"self_sys_cpu_s\":%.1f,\"processes\":[", + (int)getpid(), (double)rss_kb / 1024.0, + (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1e6, + (double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1e6); + + FILE *fp = popen("LC_ALL=C ps -eo pid,pcpu,rss,etime,comm 2>/dev/null" + " | grep '[c]odebase-memory-mcp'", + "r"); + int proc_count = 0; + if (fp) { + char line[1024]; + while (fgets(line, sizeof(line), fp)) { + int pid = 0; + float cpu = 0; + long rss = 0; + char elapsed[64] = {0}; + char comm[256] = {0}; + + if (sscanf(line, "%d %f %ld %63s %255s", &pid, &cpu, &rss, elapsed, comm) >= 4) { + if (proc_count > 0) + buf[pos++] = ','; + pos += snprintf(buf + pos, sizeof(buf) - (size_t)pos, + "{\"pid\":%d,\"cpu\":%.1f,\"rss_mb\":%.1f," + "\"elapsed\":\"%s\",\"command\":\"%s\",\"is_self\":%s}", + pid, (double)cpu, (double)rss / 1024.0, elapsed, comm, + pid == (int)getpid() ? "true" : "false"); + if (pos >= (int)sizeof(buf)) { + pos = (int)sizeof(buf) - 1; + } + proc_count++; + } + } + pclose(fp); + } + pos += snprintf(buf + pos, sizeof(buf) - (size_t)pos, "]}"); +#endif + + mg_http_reply(c, 200, g_cors_json, "%s", buf); +} + +/* POST /api/process-kill — kill a process by PID */ +static void handle_process_kill(struct mg_connection *c, struct mg_http_message *hm) { + if (hm->body.len == 0 || hm->body.len > 256) { + mg_http_reply(c, 400, g_cors_json, + "{\"error\":\"invalid body\"}"); + return; + } + + char body[257]; + memcpy(body, hm->body.buf, hm->body.len); + body[hm->body.len] = '\0'; + + yyjson_doc *doc = yyjson_read(body, hm->body.len, 0); + if (!doc) { + mg_http_reply(c, 400, g_cors_json, + "{\"error\":\"invalid json\"}"); + return; + } + yyjson_val *root = yyjson_doc_get_root(doc); + yyjson_val *v_pid = yyjson_obj_get(root, "pid"); + if (!v_pid || !yyjson_is_int(v_pid)) { + yyjson_doc_free(doc); + mg_http_reply(c, 400, g_cors_json, + "{\"error\":\"missing pid\"}"); + return; + } + int target_pid = (int)yyjson_get_int(v_pid); + yyjson_doc_free(doc); + +#ifdef _WIN32 + if (target_pid == (int)_getpid()) { +#else + if (target_pid == (int)getpid()) { +#endif + mg_http_reply(c, 400, g_cors_json, + "{\"error\":\"cannot kill self (use the UI server's own shutdown)\"}"); + return; + } + +#ifndef _WIN32 + /* Only allow killing PIDs that were spawned by this server (indexing jobs) */ + { + bool pid_is_ours = false; + for (int i = 0; i < MAX_INDEX_JOBS; i++) { + if (atomic_load(&g_index_jobs[i].status) == 1 && + g_index_jobs[i].child_pid == target_pid) { + pid_is_ours = true; + break; + } + } + if (!pid_is_ours) { + mg_http_reply(c, 403, g_cors_json, + "{\"error\":\"can only kill server-spawned processes\"}"); + return; + } + } +#endif + +#ifdef _WIN32 + HANDLE hproc = OpenProcess(PROCESS_TERMINATE, FALSE, (DWORD)target_pid); + if (!hproc || !TerminateProcess(hproc, 1)) { + if (hproc) + CloseHandle(hproc); + mg_http_reply(c, 500, g_cors_json, + "{\"error\":\"kill failed\"}"); + return; + } + CloseHandle(hproc); +#else + if (kill(target_pid, SIGTERM) != 0) { + mg_http_reply(c, 500, g_cors_json, + "{\"error\":\"kill failed\"}"); + return; + } +#endif + + mg_http_reply(c, 200, g_cors_json, "{\"killed\":%d}", + target_pid); +} + +/* ── Directory browser ────────────────────────────────────────── */ + +#include + +/* GET /api/browse?path=/some/dir — list subdirectories for file picker */ +static void handle_browse(struct mg_connection *c, struct mg_http_message *hm) { + char path[1024] = {0}; + if (!get_query_param(hm->query, "path", path, (int)sizeof(path)) || path[0] == '\0') { + /* Default to home directory */ + const char *home = cbm_home_dir(); + if (home) + snprintf(path, sizeof(path), "%s", home); + else + snprintf(path, sizeof(path), "/"); + } + + if (!cbm_is_dir(path)) { + mg_http_reply(c, 400, g_cors_json, + "{\"error\":\"not a directory\"}"); + return; + } + + DIR *dir = opendir(path); + if (!dir) { + mg_http_reply(c, 403, g_cors_json, + "{\"error\":\"cannot open directory\"}"); + return; + } + + /* Build JSON response */ + char buf[32768]; + int pos = 0; + pos += snprintf(buf + pos, sizeof(buf) - (size_t)pos, "{\"path\":\"%s\",\"dirs\":[", path); + + struct dirent *ent; + int count = 0; + while ((ent = readdir(dir)) != NULL) { + /* Skip hidden dirs and . / .. */ + if (ent->d_name[0] == '.') + continue; + + /* Check if it's actually a directory */ + char full[2048]; + snprintf(full, sizeof(full), "%s/%s", path, ent->d_name); + if (!cbm_is_dir(full)) + continue; + + if (count > 0) + buf[pos++] = ','; + pos += snprintf(buf + pos, sizeof(buf) - (size_t)pos, "\"%s\"", ent->d_name); + if (pos >= (int)sizeof(buf)) { + pos = (int)sizeof(buf) - 1; + } + count++; + + if (count >= 200) + break; /* safety limit */ + } + closedir(dir); + + /* Parent path */ + char parent[1024]; + snprintf(parent, sizeof(parent), "%s", path); + char *last_slash = strrchr(parent, '/'); + if (last_slash && last_slash != parent) + *last_slash = '\0'; + else + snprintf(parent, sizeof(parent), "/"); + + pos += snprintf(buf + pos, sizeof(buf) - (size_t)pos, "],\"parent\":\"%s\"}", parent); + mg_http_reply(c, 200, g_cors_json, "%s", buf); +} + +/* ── ADR endpoints ────────────────────────────────────────────── */ + +/* GET /api/adr?project=X — get ADR content for a project */ +static void handle_adr_get(struct mg_connection *c, struct mg_http_message *hm) { + char name[256] = {0}; + if (!get_query_param(hm->query, "project", name, (int)sizeof(name)) || name[0] == '\0') { + mg_http_reply(c, 400, g_cors_json, + "{\"error\":\"missing project\"}"); + return; + } + + const char *home = cbm_home_dir(); + if (!home) + home = "/tmp"; + char db_path[1024]; + snprintf(db_path, sizeof(db_path), "%s/.cache/codebase-memory-mcp/%s.db", home, name); + + cbm_store_t *store = cbm_store_open_path(db_path); + if (!store) { + mg_http_reply(c, 200, g_cors_json, + "{\"has_adr\":false}"); + return; + } + + cbm_adr_t adr; + memset(&adr, 0, sizeof(adr)); + if (cbm_store_adr_get(store, name, &adr) == CBM_STORE_OK && adr.content) { + /* Escape content for JSON — simple: replace quotes and newlines */ + size_t clen = strlen(adr.content); + size_t buf_size = clen * 2 + 256; + char *buf = malloc(buf_size); + if (buf) { + int pos = snprintf(buf, buf_size, "{\"has_adr\":true,\"content\":\""); + for (size_t i = 0; i < clen && (size_t)pos < buf_size - 10; i++) { + char ch = adr.content[i]; + if (ch == '"') { + buf[pos++] = '\\'; + buf[pos++] = '"'; + } else if (ch == '\\') { + buf[pos++] = '\\'; + buf[pos++] = '\\'; + } else if (ch == '\n') { + buf[pos++] = '\\'; + buf[pos++] = 'n'; + } else if (ch == '\r') { /* skip */ + } else if (ch == '\t') { + buf[pos++] = '\\'; + buf[pos++] = 't'; + } else { + buf[pos++] = ch; + } + } + pos += snprintf(buf + pos, buf_size - (size_t)pos, "\",\"updated_at\":\"%s\"}", + adr.updated_at ? adr.updated_at : ""); + mg_http_reply(c, 200, g_cors_json, "%s", buf); + free(buf); + } else { + mg_http_reply(c, 500, g_cors, "oom"); + } + cbm_store_adr_free(&adr); + } else { + mg_http_reply(c, 200, g_cors_json, + "{\"has_adr\":false}"); + } + cbm_store_close(store); +} + +/* POST /api/adr — save ADR content. Body: {"project":"...","content":"..."} */ +static void handle_adr_save(struct mg_connection *c, struct mg_http_message *hm) { + if (hm->body.len == 0 || hm->body.len > 16384) { + mg_http_reply(c, 400, g_cors_json, + "{\"error\":\"invalid body\"}"); + return; + } + + char *body = malloc(hm->body.len + 1); + if (!body) { + mg_http_reply(c, 500, g_cors, "oom"); + return; + } + memcpy(body, hm->body.buf, hm->body.len); + body[hm->body.len] = '\0'; + + yyjson_doc *doc = yyjson_read(body, hm->body.len, 0); + free(body); + if (!doc) { + mg_http_reply(c, 400, g_cors_json, + "{\"error\":\"invalid json\"}"); + return; + } + + yyjson_val *root = yyjson_doc_get_root(doc); + yyjson_val *v_proj = yyjson_obj_get(root, "project"); + yyjson_val *v_content = yyjson_obj_get(root, "content"); + if (!v_proj || !yyjson_is_str(v_proj) || !v_content || !yyjson_is_str(v_content)) { + yyjson_doc_free(doc); + mg_http_reply(c, 400, g_cors_json, + "{\"error\":\"missing project or content\"}"); + return; + } + + const char *proj = yyjson_get_str(v_proj); + const char *content = yyjson_get_str(v_content); + + const char *home = cbm_home_dir(); + if (!home) + home = "/tmp"; + char db_path[1024]; + snprintf(db_path, sizeof(db_path), "%s/.cache/codebase-memory-mcp/%s.db", home, proj); + + cbm_store_t *store = cbm_store_open_path(db_path); + yyjson_doc_free(doc); + if (!store) { + mg_http_reply(c, 500, g_cors_json, + "{\"error\":\"cannot open store\"}"); + return; + } + + int rc = cbm_store_adr_store(store, proj, content); + cbm_store_close(store); + + if (rc == CBM_STORE_OK) { + mg_http_reply(c, 200, g_cors_json, + "{\"saved\":true}"); + } else { + mg_http_reply(c, 500, g_cors_json, + "{\"error\":\"save failed\"}"); + } +} + +/* ── Background indexing ──────────────────────────────────────── */ + +static char g_binary_path[1024] = {0}; + +void cbm_http_server_set_binary_path(const char *path) { + if (path) { + snprintf(g_binary_path, sizeof(g_binary_path), "%s", path); + } +} + +/* Index via subprocess — isolates crashes from the main process. */ +static void *index_thread_fn(void *arg) { + index_job_t *job = arg; + cbm_log_info("ui.index.start", "path", job->root_path); + + /* Use stored binary path, or try to find it */ + const char *bin = g_binary_path; + char self_path[1024] = {0}; + if (!bin[0]) { +#ifdef _WIN32 + GetModuleFileNameA(NULL, self_path, sizeof(self_path)); +#elif defined(__APPLE__) + uint32_t sz = sizeof(self_path); + _NSGetExecutablePath(self_path, &sz); +#else + ssize_t len = readlink("/proc/self/exe", self_path, sizeof(self_path) - 1); + if (len > 0) + self_path[len] = '\0'; +#endif + bin = self_path[0] ? self_path : "codebase-memory-mcp"; + } + + char log_file[256]; + char json_arg[1200]; + snprintf(json_arg, sizeof(json_arg), "{\"repo_path\":\"%s\"}", job->root_path); + +#ifdef _WIN32 + snprintf(log_file, sizeof(log_file), "%s\\cbm_index_%d.log", + getenv("TEMP") ? getenv("TEMP") : ".", (int)_getpid()); + + /* Build command line for CreateProcess */ + char cmdline[2048]; + snprintf(cmdline, sizeof(cmdline), "\"%s\" cli index_repository \"%s\"", bin, json_arg); + + cbm_log_info("ui.index.spawn", "bin", bin, "log", log_file); + + HANDLE hlog = CreateFileA(log_file, GENERIC_WRITE, FILE_SHARE_READ, NULL, CREATE_ALWAYS, + FILE_ATTRIBUTE_NORMAL, NULL); + STARTUPINFOA si_proc = {.cb = sizeof(si_proc)}; + if (hlog != INVALID_HANDLE_VALUE) { + si_proc.dwFlags = STARTF_USESTDHANDLES; + si_proc.hStdError = hlog; + si_proc.hStdOutput = hlog; + } + PROCESS_INFORMATION pi = {0}; + if (!CreateProcessA(NULL, cmdline, NULL, NULL, TRUE, 0, NULL, NULL, &si_proc, &pi)) { + snprintf(job->error_msg, sizeof(job->error_msg), "CreateProcess failed"); + atomic_store(&job->status, 3); + if (hlog != INVALID_HANDLE_VALUE) + CloseHandle(hlog); + return NULL; + } + if (hlog != INVALID_HANDLE_VALUE) + CloseHandle(hlog); + + /* Poll log file while child runs */ + long tail_pos = 0; + for (;;) { + DWORD wait = WaitForSingleObject(pi.hProcess, 500); + FILE *lf = fopen(log_file, "r"); + if (lf) { + fseek(lf, tail_pos, SEEK_SET); + char line[512]; + while (fgets(line, sizeof(line), lf)) { + size_t l = strlen(line); + if (l > 0 && line[l - 1] == '\n') + line[l - 1] = '\0'; + if (line[0]) + cbm_ui_log_append(line); + } + tail_pos = ftell(lf); + fclose(lf); + } + if (wait == WAIT_OBJECT_0) + break; + } + + DWORD win_exit = 1; + GetExitCodeProcess(pi.hProcess, &win_exit); + int exit_code = (int)win_exit; + CloseHandle(pi.hProcess); + CloseHandle(pi.hThread); + (void)DeleteFileA(log_file); +#else + snprintf(log_file, sizeof(log_file), "/tmp/cbm_index_%d.log", (int)getpid()); + + cbm_log_info("ui.index.fork", "bin", bin, "log", log_file); + + pid_t child_pid = fork(); + if (child_pid < 0) { + snprintf(job->error_msg, sizeof(job->error_msg), "fork failed"); + atomic_store(&job->status, 3); + return NULL; + } + job->child_pid = child_pid; + + if (child_pid == 0) { + FILE *lf = freopen(log_file, "w", stderr); + (void)lf; + freopen("/dev/null", "w", stdout); + execl(bin, bin, "cli", "index_repository", json_arg, (char *)NULL); + _exit(127); + } + + long tail_pos = 0; + for (;;) { + int wstatus = 0; + pid_t wr = waitpid(child_pid, &wstatus, WNOHANG); + bool child_done = (wr == child_pid); + + FILE *lf = fopen(log_file, "r"); + if (lf) { + fseek(lf, tail_pos, SEEK_SET); + char line[512]; + while (fgets(line, sizeof(line), lf)) { + size_t l = strlen(line); + if (l > 0 && line[l - 1] == '\n') + line[l - 1] = '\0'; + if (line[0]) + cbm_ui_log_append(line); + } + tail_pos = ftell(lf); + fclose(lf); + } + + if (child_done) + break; + + struct timespec ts = {0, 500000000}; + cbm_nanosleep(&ts, NULL); + } + + int wstatus = 0; + waitpid(child_pid, &wstatus, 0); + int exit_code = WIFEXITED(wstatus) ? WEXITSTATUS(wstatus) : -1; + + (void)unlink(log_file); +#endif + + if (exit_code != 0) { + snprintf(job->error_msg, sizeof(job->error_msg), "indexing failed (exit code %d)", + exit_code); + atomic_store(&job->status, 3); + } else { + atomic_store(&job->status, 2); + } + cbm_log_info("ui.index.done", "path", job->root_path, "rc", exit_code == 0 ? "ok" : "err"); + return NULL; +} + +/* POST /api/index — body: {"root_path": "/abs/path"} → starts background indexing */ +static void handle_index_start(struct mg_connection *c, struct mg_http_message *hm) { + if (hm->body.len == 0 || hm->body.len > 4096) { + mg_http_reply(c, 400, g_cors_json, + "{\"error\":\"invalid body\"}"); + return; + } + + char body_buf[4097]; + memcpy(body_buf, hm->body.buf, hm->body.len); + body_buf[hm->body.len] = '\0'; + + yyjson_doc *doc = yyjson_read(body_buf, hm->body.len, 0); + if (!doc) { + mg_http_reply(c, 400, g_cors_json, + "{\"error\":\"invalid json\"}"); + return; + } + yyjson_val *root = yyjson_doc_get_root(doc); + yyjson_val *v_path = yyjson_obj_get(root, "root_path"); + if (!v_path || !yyjson_is_str(v_path)) { + yyjson_doc_free(doc); + mg_http_reply(c, 400, g_cors_json, + "{\"error\":\"missing root_path\"}"); + return; + } + const char *rpath = yyjson_get_str(v_path); + + /* Check path exists */ + if (!cbm_is_dir(rpath)) { + yyjson_doc_free(doc); + mg_http_reply(c, 400, g_cors_json, + "{\"error\":\"directory not found\"}"); + return; + } + + /* Find free job slot */ + int slot = -1; + for (int i = 0; i < MAX_INDEX_JOBS; i++) { + int st = atomic_load(&g_index_jobs[i].status); + if (st == 0 || st == 2 || st == 3) { + slot = i; + break; + } + } + if (slot < 0) { + yyjson_doc_free(doc); + mg_http_reply(c, 429, g_cors_json, + "{\"error\":\"all index slots busy\"}"); + return; + } + + index_job_t *job = &g_index_jobs[slot]; + snprintf(job->root_path, sizeof(job->root_path), "%s", rpath); + job->error_msg[0] = '\0'; + atomic_store(&job->status, 1); + yyjson_doc_free(doc); + + /* Spawn background thread */ + cbm_thread_t tid; + if (cbm_thread_create(&tid, 0, index_thread_fn, job) != 0) { + atomic_store(&job->status, 3); + snprintf(job->error_msg, sizeof(job->error_msg), "thread creation failed"); + mg_http_reply(c, 500, g_cors_json, + "{\"error\":\"thread creation failed\"}"); + return; + } + + mg_http_reply(c, 202, g_cors_json, + "{\"status\":\"indexing\",\"slot\":%d,\"path\":\"%s\"}", slot, job->root_path); +} + +/* GET /api/index-status — returns status of all index jobs */ +static void handle_index_status(struct mg_connection *c) { + char buf[2048] = "["; + int pos = 1; + for (int i = 0; i < MAX_INDEX_JOBS; i++) { + int st = atomic_load(&g_index_jobs[i].status); + if (st == 0) + continue; + if (pos > 1) + buf[pos++] = ','; + const char *ss = st == 1 ? "indexing" : st == 2 ? "done" : "error"; + pos += snprintf(buf + pos, sizeof(buf) - (size_t)pos, + "{\"slot\":%d,\"status\":\"%s\",\"path\":\"%s\",\"error\":\"%s\"}", i, ss, + g_index_jobs[i].root_path, st == 3 ? g_index_jobs[i].error_msg : ""); + } + buf[pos++] = ']'; + buf[pos] = '\0'; + mg_http_reply(c, 200, g_cors_json, "%s", buf); +} + +/* DELETE /api/project?name=X — deletes the .db file */ +static void handle_delete_project(struct mg_connection *c, struct mg_http_message *hm) { + char name[256] = {0}; + if (!get_query_param(hm->query, "name", name, (int)sizeof(name)) || name[0] == '\0') { + mg_http_reply(c, 400, g_cors_json, + "{\"error\":\"missing name\"}"); + return; + } + + const char *home = cbm_home_dir(); + if (!home) + home = "/tmp"; + char db_path[1024]; + snprintf(db_path, sizeof(db_path), "%s/.cache/codebase-memory-mcp/%s.db", home, name); + + if (!cbm_file_exists(db_path)) { + mg_http_reply(c, 404, g_cors_json, + "{\"error\":\"project not found\"}"); + return; + } + + if (unlink(db_path) != 0) { + mg_http_reply(c, 500, g_cors_json, + "{\"error\":\"failed to delete\"}"); + return; + } + + /* Also remove WAL and SHM files if they exist */ + char wal_path[1040], shm_path[1040]; + snprintf(wal_path, sizeof(wal_path), "%s-wal", db_path); + snprintf(shm_path, sizeof(shm_path), "%s-shm", db_path); + (void)unlink(wal_path); + (void)unlink(shm_path); + + cbm_log_info("ui.project.deleted", "name", name); + mg_http_reply(c, 200, g_cors_json, "{\"deleted\":true}"); +} + +/* GET /api/project-health?name=X — checks db integrity */ +static void handle_project_health(struct mg_connection *c, struct mg_http_message *hm) { + char name[256] = {0}; + if (!get_query_param(hm->query, "name", name, (int)sizeof(name)) || name[0] == '\0') { + mg_http_reply(c, 400, g_cors_json, + "{\"error\":\"missing name\"}"); + return; + } + + const char *home = cbm_home_dir(); + if (!home) + home = "/tmp"; + char db_path[1024]; + snprintf(db_path, sizeof(db_path), "%s/.cache/codebase-memory-mcp/%s.db", home, name); + + if (!cbm_file_exists(db_path)) { + mg_http_reply(c, 200, g_cors_json, + "{\"status\":\"missing\"}"); + return; + } + + cbm_store_t *store = cbm_store_open_path(db_path); + if (!store) { + mg_http_reply(c, 200, g_cors_json, + "{\"status\":\"corrupt\",\"reason\":\"cannot open\"}"); + return; + } + + int node_count = cbm_store_count_nodes(store, name); + int edge_count = cbm_store_count_edges(store, name); + cbm_store_close(store); + + int64_t size = cbm_file_size(db_path); + + mg_http_reply(c, 200, g_cors_json, + "{\"status\":\"healthy\",\"nodes\":%d,\"edges\":%d,\"size_bytes\":%lld}", + node_count, edge_count, (long long)size); +} + +/* ── Extract query parameter from URI ─────────────────────────── */ + +static bool get_query_param(struct mg_str query, const char *name, char *buf, int bufsz) { + int n = mg_http_get_var(&query, name, buf, (size_t)bufsz); + return n > 0; +} + +/* ── Handle GET /api/layout ───────────────────────────────────── */ + +static void handle_layout(struct mg_connection *c, struct mg_http_message *hm) { + char project[256] = {0}; + char max_str[32] = {0}; + + if (!get_query_param(hm->query, "project", project, (int)sizeof(project)) || + project[0] == '\0') { + mg_http_reply(c, 400, g_cors_json, + "{\"error\":\"missing project parameter\"}"); + return; + } + + int max_nodes = 50000; + if (get_query_param(hm->query, "max_nodes", max_str, (int)sizeof(max_str))) { + int v = atoi(max_str); + if (v > 0) + max_nodes = v; + } + + /* Open a read-only store for this project */ + const char *home = cbm_home_dir(); + if (!home) + home = "/tmp"; + char db_path[1024]; + snprintf(db_path, sizeof(db_path), "%s/.cache/codebase-memory-mcp/%s.db", home, project); + + if (!cbm_file_exists(db_path)) { + mg_http_reply(c, 404, g_cors_json, + "{\"error\":\"project not found\"}"); + return; + } + + cbm_store_t *store = cbm_store_open_path(db_path); + if (!store) { + mg_http_reply(c, 500, g_cors_json, + "{\"error\":\"cannot open store\"}"); + return; + } + + cbm_layout_result_t *layout = + cbm_layout_compute(store, project, CBM_LAYOUT_OVERVIEW, NULL, 0, max_nodes); + cbm_store_close(store); + + if (!layout) { + mg_http_reply(c, 500, g_cors_json, + "{\"error\":\"layout computation failed\"}"); + return; + } + + char *json = cbm_layout_to_json(layout); + cbm_layout_free(layout); + + if (!json) { + mg_http_reply(c, 500, g_cors_json, + "{\"error\":\"JSON serialization failed\"}"); + return; + } + + mg_http_reply(c, 200, g_cors_json, "%s", json); + free(json); +} + +/* ── Handle JSON-RPC request ──────────────────────────────────── */ + +static void handle_rpc(struct mg_connection *c, struct mg_http_message *hm, cbm_mcp_server_t *mcp) { + if (hm->body.len == 0 || hm->body.len > MAX_BODY_SIZE) { + mg_http_reply(c, 400, g_cors_json, + "{\"jsonrpc\":\"2.0\",\"error\":{\"code\":-32600," + "\"message\":\"invalid request size\"},\"id\":null}"); + return; + } + + /* NUL-terminate the body for cbm_mcp_server_handle */ + char *body = malloc(hm->body.len + 1); + if (!body) { + mg_http_reply(c, 500, g_cors, "out of memory"); + return; + } + memcpy(body, hm->body.buf, hm->body.len); + body[hm->body.len] = '\0'; + + char *response = cbm_mcp_server_handle(mcp, body); + free(body); + + if (response) { + mg_http_reply(c, 200, g_cors_json, "%s", response); + free(response); + } else { + mg_http_reply(c, 204, g_cors, ""); + } +} + +/* ── HTTP event handler ───────────────────────────────────────── */ + +static void http_handler(struct mg_connection *c, int ev, void *ev_data) { + if (ev != MG_EV_HTTP_MSG) + return; + + struct mg_http_message *hm = ev_data; + cbm_http_server_t *srv = c->fn_data; + + /* Build per-request CORS headers (only reflects localhost origins) */ + update_cors(hm); + + /* OPTIONS preflight for CORS */ + if (mg_strcmp(hm->method, mg_str("OPTIONS")) == 0) { + char opt_hdrs[512]; + snprintf(opt_hdrs, sizeof(opt_hdrs), "%sContent-Length: 0\r\n", g_cors); + mg_http_reply(c, 204, opt_hdrs, ""); + return; + } + + /* POST /rpc → JSON-RPC dispatch (reuses existing MCP tools) */ + if (mg_strcmp(hm->method, mg_str("POST")) == 0 && mg_match(hm->uri, mg_str("/rpc"), NULL)) { + handle_rpc(c, hm, srv->mcp); + return; + } + + /* GET /api/layout → 3D graph layout */ + if (mg_strcmp(hm->method, mg_str("GET")) == 0 && + mg_match(hm->uri, mg_str("/api/layout*"), NULL)) { + handle_layout(c, hm); + return; + } + + /* POST /api/index → start background indexing */ + if (mg_strcmp(hm->method, mg_str("POST")) == 0 && + mg_match(hm->uri, mg_str("/api/index"), NULL)) { + handle_index_start(c, hm); + return; + } + + /* GET /api/index-status → check indexing progress */ + if (mg_strcmp(hm->method, mg_str("GET")) == 0 && + mg_match(hm->uri, mg_str("/api/index-status"), NULL)) { + handle_index_status(c); + return; + } + + /* DELETE /api/project → delete a project's .db file */ + if (mg_strcmp(hm->method, mg_str("DELETE")) == 0 && + mg_match(hm->uri, mg_str("/api/project*"), NULL)) { + handle_delete_project(c, hm); + return; + } + + /* GET /api/browse → directory browser for file picker */ + if (mg_strcmp(hm->method, mg_str("GET")) == 0 && + mg_match(hm->uri, mg_str("/api/browse*"), NULL)) { + handle_browse(c, hm); + return; + } + + /* GET /api/adr → get ADR for project */ + if (mg_strcmp(hm->method, mg_str("GET")) == 0 && mg_match(hm->uri, mg_str("/api/adr*"), NULL)) { + handle_adr_get(c, hm); + return; + } + + /* POST /api/adr → save ADR for project */ + if (mg_strcmp(hm->method, mg_str("POST")) == 0 && mg_match(hm->uri, mg_str("/api/adr"), NULL)) { + handle_adr_save(c, hm); + return; + } + + /* GET /api/project-health → check db integrity */ + if (mg_strcmp(hm->method, mg_str("GET")) == 0 && + mg_match(hm->uri, mg_str("/api/project-health*"), NULL)) { + handle_project_health(c, hm); + return; + } + + /* GET /api/processes → list running codebase-memory-mcp processes */ + if (mg_strcmp(hm->method, mg_str("GET")) == 0 && + mg_match(hm->uri, mg_str("/api/processes"), NULL)) { + handle_processes(c); + return; + } + + /* GET /api/logs → recent log lines */ + if (mg_strcmp(hm->method, mg_str("GET")) == 0 && + mg_match(hm->uri, mg_str("/api/logs*"), NULL)) { + handle_logs(c, hm); + return; + } + + /* POST /api/process-kill → kill a process */ + if (mg_strcmp(hm->method, mg_str("POST")) == 0 && + mg_match(hm->uri, mg_str("/api/process-kill"), NULL)) { + handle_process_kill(c, hm); + return; + } + + /* GET / → index.html (no-cache so browser always gets latest) */ + if (mg_match(hm->uri, mg_str("/"), NULL)) { + const cbm_embedded_file_t *f = cbm_embedded_lookup("/index.html"); + if (f) { + char html_hdrs[512]; + snprintf(html_hdrs, sizeof(html_hdrs), + "%sContent-Type: text/html\r\nCache-Control: no-cache\r\n", g_cors); + mg_http_reply(c, 200, html_hdrs, "%.*s", (int)f->size, (const char *)f->data); + return; + } + mg_http_reply(c, 404, g_cors, "no frontend embedded"); + return; + } + + /* GET /assets/... → embedded assets */ + if (mg_match(hm->uri, mg_str("/assets/*"), NULL)) { + /* Build path string from mg_str */ + char path[256]; + int len = (int)hm->uri.len; + if (len >= (int)sizeof(path)) + len = (int)sizeof(path) - 1; + memcpy(path, hm->uri.buf, (size_t)len); + path[len] = '\0'; + + if (serve_embedded(c, path)) + return; + mg_http_reply(c, 404, g_cors, "not found"); + return; + } + + /* Fallback: try as embedded path, then 404 */ + { + char path[256]; + int len = (int)hm->uri.len; + if (len >= (int)sizeof(path)) + len = (int)sizeof(path) - 1; + memcpy(path, hm->uri.buf, (size_t)len); + path[len] = '\0'; + + if (serve_embedded(c, path)) + return; + } + + mg_http_reply(c, 404, g_cors, "not found"); +} + +/* ── Public API ───────────────────────────────────────────────── */ + +cbm_http_server_t *cbm_http_server_new(int port) { + cbm_http_server_t *srv = calloc(1, sizeof(*srv)); + if (!srv) + return NULL; + + srv->port = port; + atomic_store(&srv->stop_flag, 0); + + /* Create a dedicated MCP server for HTTP (own SQLite connection) */ + srv->mcp = cbm_mcp_server_new(NULL); + if (!srv->mcp) { + cbm_log_error("ui.http.mcp_fail", "reason", "cannot create MCP instance"); + free(srv); + return NULL; + } + + /* Initialize Mongoose */ + mg_mgr_init(&srv->mgr); + srv->mgr.userdata = srv; + + /* Bind to localhost only */ + char url[64]; + snprintf(url, sizeof(url), "http://127.0.0.1:%d", port); + + struct mg_connection *listener = mg_http_listen(&srv->mgr, url, http_handler, srv); + if (!listener) { + char port_str[16]; + snprintf(port_str, sizeof(port_str), "%d", port); + cbm_log_warn("ui.unavailable", "port", port_str, "reason", "in_use", "hint", + "use --port=N to override"); + cbm_mcp_server_free(srv->mcp); + mg_mgr_free(&srv->mgr); + free(srv); + return NULL; + } + + srv->listener_ok = true; + + char port_str[16]; + snprintf(port_str, sizeof(port_str), "%d", port); + cbm_log_info("ui.serving", "url", url, "port", port_str); + + return srv; +} + +void cbm_http_server_free(cbm_http_server_t *srv) { + if (!srv) + return; + mg_mgr_free(&srv->mgr); + cbm_mcp_server_free(srv->mcp); + free(srv); +} + +void cbm_http_server_stop(cbm_http_server_t *srv) { + if (srv) { + atomic_store(&srv->stop_flag, 1); + } +} + +void cbm_http_server_run(cbm_http_server_t *srv) { + if (!srv || !srv->listener_ok) + return; + + while (!atomic_load(&srv->stop_flag)) { + mg_mgr_poll(&srv->mgr, 200); /* 200ms poll interval */ + } +} + +bool cbm_http_server_is_running(const cbm_http_server_t *srv) { + return srv && srv->listener_ok; +}