Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,17 @@ codebase-memory-mcp cli --raw search_graph '{"label": "Function"}' | jq '.result
| `delete_project` | Remove a project and all its graph data. |
| `index_status` | Check indexing status of a project. |

`index_status` includes an additive `evidence` object. `evidence.index_snapshot`
reports the timestamp and Git HEAD captured when the last successful graph
snapshot was finalized, the current HEAD, tracked working-tree state, and a
`freshness` value: `current` means the indexed HEAD equals current HEAD and
tracked files are clean; `head_changed` means a later checkout/commit changed
HEAD; `working_tree_changed` means HEAD matches but tracked files are modified;
`unknown` means Git or comparison data is unavailable. Untracked files are not
compared. `evidence.coverage` reports discovered/indexed/excluded/failed file
counters when the stored index contains them; older indexes may report
`unknown` rather than guessing.

### Querying

| Tool | Description |
Expand All @@ -393,6 +404,14 @@ codebase-memory-mcp cli --raw search_graph '{"label": "Function"}' | jq '.result
| `get_graph_schema` | Node/edge counts, relationship patterns, property definitions per label. Run this first. |
| `get_code_snippet` | Read source code for a function by qualified name. |
| `get_architecture` | Codebase overview: languages, packages, routes, hotspots, clusters, ADR. |

`trace_path`/`trace_call_path` preserve their existing `callers`/`callees`
arrays and add `edge_evidence` for traversed relations when stored edge
properties contain provenance. Relation `confidence` is source-resolution
confidence from the indexer, not a probability of runtime correctness and not
BM25/semantic search relevance. Dynamic behavior such as reflection,
dependency injection, framework wiring, generated code, configuration, HTTP,
async messaging, and cross-repo links may remain inferred or unavailable.
| `search_code` | Grep-like text search within indexed project files. |
| `manage_adr` | CRUD for Architecture Decision Records. |
| `ingest_traces` | Ingest runtime traces to validate HTTP_CALLS edges. |
Expand Down
16 changes: 16 additions & 0 deletions src/git/git_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,22 @@ int cbm_git_context_resolve(const char *path, cbm_git_context_t *out) {
return 0;
}

int cbm_git_tracked_dirty(const char *path, bool *out_dirty) {
if (!out_dirty) {
return CBM_NOT_FOUND;
}
*out_dirty = false;
char *status = NULL;
int rc = git_capture(path, "status --porcelain --untracked-files=no", &status);
if (rc != 0) {
free(status);
return CBM_NOT_FOUND;
}
*out_dirty = status && status[0] != '\0';
free(status);
return 0;
}

char *cbm_git_context_branch_qn(const char *project_name, const cbm_git_context_t *ctx) {
const char *project = project_name && project_name[0] ? project_name : "project";
const char *slug = "working-tree";
Expand Down
3 changes: 3 additions & 0 deletions src/git/git_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ typedef struct {

int cbm_git_context_resolve(const char *path, cbm_git_context_t *out);
void cbm_git_context_free(cbm_git_context_t *ctx);
/* Returns 0 when tracked working-tree dirtiness was determined, non-zero when
* unavailable. Untracked files are intentionally not compared. */
int cbm_git_tracked_dirty(const char *path, bool *out_dirty);
char *cbm_git_context_branch_qn(const char *project_name, const cbm_git_context_t *ctx);
int cbm_git_context_props_json(const cbm_git_context_t *ctx, char *buf, int buf_size);

Expand Down
161 changes: 158 additions & 3 deletions src/mcp/mcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -893,6 +893,83 @@ static void add_git_context_json(yyjson_mut_doc *doc, yyjson_mut_val *obj, const
cbm_git_context_free(&ctx);
}

static void add_index_evidence_json(yyjson_mut_doc *doc, yyjson_mut_val *root,
const cbm_project_t *proj) {
yyjson_mut_val *evidence = yyjson_mut_obj(doc);
yyjson_mut_val *snap = yyjson_mut_obj(doc);
const char *indexed_head =
(proj && proj->indexed_git_head && proj->indexed_git_head[0]) ? proj->indexed_git_head : NULL;
cbm_git_context_t ctx = {0};
int git_rc = cbm_git_context_resolve(proj ? proj->root_path : NULL, &ctx);
bool dirty = false;
int dirty_rc = (git_rc == 0 && ctx.is_git) ? cbm_git_tracked_dirty(proj->root_path, &dirty)
: CBM_NOT_FOUND;

add_git_context_string(doc, snap, "indexed_at", proj ? proj->indexed_at : NULL);
add_git_context_string(doc, snap, "indexed_git_head", indexed_head);
add_git_context_string(doc, snap, "current_git_head",
(git_rc == 0 && ctx.is_git && ctx.head_sha && ctx.head_sha[0])
? ctx.head_sha
: NULL);
const char *repo_state = "unavailable";
if (git_rc == 0 && ctx.root_exists && !ctx.is_git) {
repo_state = "not_git";
} else if (git_rc == 0 && ctx.is_git && dirty_rc == 0) {
repo_state = dirty ? "dirty" : "clean";
}
yyjson_mut_obj_add_str(doc, snap, "repository_state", repo_state);
if (indexed_head && git_rc == 0 && ctx.is_git && ctx.head_sha && ctx.head_sha[0]) {
yyjson_mut_obj_add_bool(doc, snap, "snapshot_matches_current_head",
strcmp(indexed_head, ctx.head_sha) == 0);
} else {
yyjson_mut_obj_add_null(doc, snap, "snapshot_matches_current_head");
}
if (indexed_head && git_rc == 0 && ctx.is_git && ctx.head_sha && ctx.head_sha[0] &&
dirty_rc == 0) {
yyjson_mut_obj_add_bool(doc, snap, "snapshot_matches_working_tree",
strcmp(indexed_head, ctx.head_sha) == 0 && !dirty);
} else {
yyjson_mut_obj_add_null(doc, snap, "snapshot_matches_working_tree");
}
const char *freshness = "unknown";
if (indexed_head && git_rc == 0 && ctx.is_git && ctx.head_sha && ctx.head_sha[0] &&
dirty_rc == 0) {
if (strcmp(indexed_head, ctx.head_sha) != 0) {
freshness = "head_changed";
} else {
freshness = dirty ? "working_tree_changed" : "current";
}
} else if (git_rc == 0 && ctx.root_exists && !ctx.is_git) {
freshness = "unknown";
}
yyjson_mut_obj_add_str(doc, snap, "freshness", freshness);
yyjson_mut_obj_add_val(doc, evidence, "index_snapshot", snap);

yyjson_mut_val *cov = yyjson_mut_obj(doc);
int discovered = proj ? proj->files_discovered : 0;
int indexed = proj ? proj->files_indexed : 0;
int excluded = proj ? proj->files_excluded : 0;
int failed = proj ? proj->files_failed : 0;
yyjson_mut_obj_add_int(doc, cov, "files_discovered", discovered);
yyjson_mut_obj_add_int(doc, cov, "files_indexed", indexed);
yyjson_mut_obj_add_int(doc, cov, "files_excluded", excluded);
yyjson_mut_obj_add_int(doc, cov, "files_failed", failed);
yyjson_mut_obj_add_str(doc, cov, "coverage_status",
failed > 0 || excluded > 0 ? "partial"
: discovered > 0 && indexed == discovered ? "complete"
: "unknown");
yyjson_mut_obj_add_val(doc, evidence, "coverage", cov);
yyjson_mut_val *limits = yyjson_mut_arr(doc);
yyjson_mut_val *lim = yyjson_mut_obj(doc);
yyjson_mut_obj_add_str(doc, lim, "code", "UNTRACKED_FILES_NOT_COMPARED");
yyjson_mut_obj_add_str(doc, lim, "message",
"Working-tree freshness compares current HEAD and tracked modifications; untracked files are not compared.");
yyjson_mut_arr_add_val(limits, lim);
yyjson_mut_obj_add_val(doc, evidence, "limitations", limits);
yyjson_mut_obj_add_val(doc, root, "evidence", evidence);
cbm_git_context_free(&ctx);
}

/* Build a helpful error listing available projects. Caller must free() result. */
static char *build_project_list_error(const char *reason) {
char dir_path[CBM_SZ_1K];
Expand Down Expand Up @@ -1739,9 +1816,8 @@ static char *handle_index_status(cbm_mcp_server_t *srv, const char *args) {
yyjson_mut_obj_add_strcpy(doc, root, "root_path",
proj_info.root_path ? proj_info.root_path : "");
add_git_context_json(doc, root, proj_info.root_path);
safe_str_free(&proj_info.name);
safe_str_free(&proj_info.indexed_at);
safe_str_free(&proj_info.root_path);
add_index_evidence_json(doc, root, &proj_info);
cbm_project_free_fields(&proj_info);
}
if (nodes == 0) {
yyjson_mut_obj_add_str(
Expand Down Expand Up @@ -2244,6 +2320,77 @@ static yyjson_mut_val *bfs_to_json_array(yyjson_mut_doc *doc, cbm_traverse_resul
return arr;
}

static const char *edge_evidence_status(const char *strategy, double confidence, int candidates) {
if (!strategy || !strategy[0]) {
return "unavailable";
}
if (candidates > 1) {
return "ambiguous";
}
if (strstr(strategy, "heur") || strstr(strategy, "fuzzy") || confidence < 0.8) {
return "inferred";
}
return "verified";
}

static const char *edge_resolution_strategy(const char *strategy) {
if (!strategy || !strategy[0]) {
return "unknown";
}
if (strstr(strategy, "lsp")) {
return "hybrid_lsp";
}
if (strstr(strategy, "import") || strstr(strategy, "same_module") ||
strstr(strategy, "receiver")) {
return "direct_ast";
}
if (strstr(strategy, "fuzzy") || strstr(strategy, "heur")) {
return "heuristic";
}
return strategy;
}

static yyjson_mut_val *trace_edges_to_json_array(yyjson_mut_doc *doc, cbm_traverse_result_t *tr) {
yyjson_mut_val *arr = yyjson_mut_arr(doc);
for (int i = 0; i < tr->edge_count; i++) {
yyjson_mut_val *item = yyjson_mut_obj(doc);
yyjson_mut_obj_add_str(doc, item, "from", tr->edges[i].from_name ? tr->edges[i].from_name : "");
yyjson_mut_obj_add_str(doc, item, "to", tr->edges[i].to_name ? tr->edges[i].to_name : "");
yyjson_mut_obj_add_str(doc, item, "type", tr->edges[i].type ? tr->edges[i].type : "");
yyjson_mut_val *edge = yyjson_mut_obj(doc);
const char *props = tr->edges[i].properties_json;
yyjson_doc *pdoc = props ? yyjson_read(props, strlen(props), 0) : NULL;
yyjson_val *proot = pdoc ? yyjson_doc_get_root(pdoc) : NULL;
yyjson_val *v = proot ? yyjson_obj_get(proot, "strategy") : NULL;
const char *strategy = yyjson_is_str(v) ? yyjson_get_str(v) : NULL;
v = proot ? yyjson_obj_get(proot, "confidence") : NULL;
bool has_conf = yyjson_is_num(v);
double conf = has_conf ? yyjson_get_num(v) : 0.0;
v = proot ? yyjson_obj_get(proot, "candidates") : NULL;
int candidates = yyjson_is_int(v) ? (int)yyjson_get_int(v) : 0;
yyjson_mut_obj_add_str(doc, edge, "resolution_strategy", edge_resolution_strategy(strategy));
if (has_conf) {
yyjson_mut_obj_add_real(doc, edge, "confidence", conf);
} else {
yyjson_mut_obj_add_null(doc, edge, "confidence");
}
if (candidates > 0) {
yyjson_mut_obj_add_int(doc, edge, "candidate_count", candidates);
} else {
yyjson_mut_obj_add_null(doc, edge, "candidate_count");
}
yyjson_mut_obj_add_null(doc, edge, "source_location");
yyjson_mut_obj_add_str(doc, edge, "evidence_status",
edge_evidence_status(strategy, conf, candidates));
yyjson_mut_obj_add_val(doc, item, "edge", edge);
yyjson_mut_arr_add_val(arr, item);
if (pdoc) {
yyjson_doc_free(pdoc);
}
}
return arr;
}

static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) {
char *func_name = cbm_mcp_get_string_arg(args, "function_name");
char *project = cbm_mcp_get_string_arg(args, "project");
Expand Down Expand Up @@ -2365,6 +2512,14 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) {
yyjson_mut_obj_add_val(doc, root, "callers",
bfs_to_json_array(doc, &tr_in, risk_labels, include_tests));
}
yyjson_mut_val *edge_evidence = yyjson_mut_obj(doc);
if (do_outbound) {
yyjson_mut_obj_add_val(doc, edge_evidence, "outbound", trace_edges_to_json_array(doc, &tr_out));
}
if (do_inbound) {
yyjson_mut_obj_add_val(doc, edge_evidence, "inbound", trace_edges_to_json_array(doc, &tr_in));
}
yyjson_mut_obj_add_val(doc, root, "edge_evidence", edge_evidence);

/* Serialize BEFORE freeing traversal results (yyjson borrows strings) */
char *json = yy_doc_to_str(doc);
Expand Down
2 changes: 2 additions & 0 deletions src/pipeline/pipeline.c
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,8 @@ static int dump_and_persist_hashes(cbm_pipeline_t *p, const cbm_file_info_t *fil
stat_mtime_ns(&fst), fst.st_size);
}
}
(void)cbm_store_update_project_coverage(hash_store, p->project_name, file_count,
file_count, p->excluded_count, 0);

/* FTS5 backfill: populate nodes_fts with camelCase-split names.
* Contentless FTS5 requires the special 'delete-all' command instead of
Expand Down
Loading
Loading