From dddc30d101c50e65a1f12bda581e85e1fbc5fd3a Mon Sep 17 00:00:00 2001 From: RithvikReddy0-0 Date: Sat, 20 Jun 2026 12:30:36 +0000 Subject: [PATCH 1/5] fix(cross-repo): emit HTTP_CALLS for unindexed client libs and normalize URLs for route matching (#523) - pass_calls.c: detect known HTTP/async client patterns (requests, httpx, axios, etc.) by service pattern match even when the target node doesn't exist in the graph (external dep not indexed). Fixes zero-edge output on normal repos where HTTP clients are pip/npm dependencies. - pass_cross_repo.c: strip scheme+host+port from consumer url_path before QN lookup (cr_url_path). Add path-param template matching so concrete paths (/v2/orders/123) match provider route templates (/v2/orders/{id}). Add reverse-direction match so HTTP_CALLS in the consumer DB are found when cross-repo is run from the provider side. Signed-off-by: RithvikReddy0-0 --- src/pipeline/pass_calls.c | 1235 +++++++++++++------------- src/pipeline/pass_cross_repo.c | 1498 +++++++++++++++++--------------- 2 files changed, 1434 insertions(+), 1299 deletions(-) diff --git a/src/pipeline/pass_calls.c b/src/pipeline/pass_calls.c index 15d691d3a..8fe9aeacb 100644 --- a/src/pipeline/pass_calls.c +++ b/src/pipeline/pass_calls.c @@ -1,615 +1,620 @@ -/* - * pass_calls.c — Resolve function/method calls into CALLS edges. - * - * For each discovered file: - * 1. Re-extract calls (cbm_extract_file) - * 2. Build per-file import map from IMPORTS edges in graph buffer - * 3. Resolve each call via registry (import_map → same_module → unique → suffix) - * 4. Create CALLS edges in graph buffer with confidence/strategy properties - * - * Depends on: pass_definitions having populated the registry and graph buffer - */ -#include "foundation/constants.h" - -enum { PC_RING = 4, PC_RING_MASK = 3, PC_SIG_SCAN = 15, PC_REGEX_GRP = 2 }; -#include "pipeline/pipeline.h" -#include -#include "pipeline/pipeline_internal.h" -#include "pipeline/lsp_resolve.h" -#include "graph_buffer/graph_buffer.h" -#include "foundation/log.h" -#include "foundation/compat.h" -#include "foundation/str_util.h" -#include "cbm.h" -#include "service_patterns.h" - -#include "foundation/compat_regex.h" - -#include -#include -#include -#include - -/* Read entire file into heap-allocated buffer. Caller must free(). */ -static char *read_file(const char *path, int *out_len) { - FILE *f = fopen(path, "rb"); - if (!f) { - return NULL; - } - - (void)fseek(f, 0, SEEK_END); - long size = ftell(f); - (void)fseek(f, 0, SEEK_SET); - - if (size <= 0 || size > (long)CBM_PERCENT * CBM_SZ_1K * CBM_SZ_1K) { - (void)fclose(f); - return NULL; - } - - /* +pad: tree-sitter lexer lookahead reads past EOF; keep it in-bounds */ - enum { CBM_TS_LOOKAHEAD_PAD = 16 }; - char *buf = malloc((size_t)size + CBM_TS_LOOKAHEAD_PAD); - if (!buf) { - (void)fclose(f); - return NULL; - } - - size_t nread = fread(buf, SKIP_ONE, size, f); - (void)fclose(f); - - if (nread > (size_t)size) { - nread = (size_t)size; - } - memset(buf + nread, 0, CBM_TS_LOOKAHEAD_PAD); - *out_len = (int)nread; - return buf; -} - -/* Format int for logging. Thread-safe via TLS. */ -static const char *itoa_log(int val) { - static CBM_TLS char bufs[PC_RING][CBM_SZ_32]; - static CBM_TLS int idx = 0; - int i = idx; - idx = (idx + SKIP_ONE) & PC_RING_MASK; - snprintf(bufs[i], sizeof(bufs[i]), "%d", val); - return bufs[i]; -} - -/* Build per-file import map from cached extraction result or graph buffer edges. - * Returns parallel arrays of (local_name, module_qn) pairs. Caller frees. */ -/* Parse "local_name":"value" from JSON properties string. Returns strdup'd key or NULL. */ -static char *extract_local_name_from_json(const char *props_json) { - if (!props_json) { - return NULL; - } - const char *start = strstr(props_json, "\"local_name\":\""); - if (!start) { - return NULL; - } - start += strlen("\"local_name\":\""); - const char *end = strchr(start, '"'); - if (!end || end <= start) { - return NULL; - } - return cbm_strndup(start, end - start); -} - -static int build_import_map(cbm_pipeline_ctx_t *ctx, const char *rel_path, - const CBMFileResult *result, const char ***out_keys, - const char ***out_vals, int *out_count) { - *out_keys = NULL; - *out_vals = NULL; - *out_count = 0; - - /* Fast path: build from cached extraction result (no JSON parsing) */ - if (result && result->imports.count > 0) { - const char **keys = calloc((size_t)result->imports.count, sizeof(const char *)); - const char **vals = calloc((size_t)result->imports.count, sizeof(const char *)); - int count = 0; - - for (int i = 0; i < result->imports.count; i++) { - const CBMImport *imp = &result->imports.items[i]; - if (!imp->local_name || !imp->local_name[0] || !imp->module_path) { - continue; - } - char *target_qn = cbm_pipeline_fqn_module(ctx->project_name, imp->module_path); - const cbm_gbuf_node_t *target = cbm_gbuf_find_by_qn(ctx->gbuf, target_qn); - free(target_qn); - if (!target) { - continue; - } - keys[count] = strdup(imp->local_name); - vals[count] = target->qualified_name; /* borrowed from gbuf */ - count++; - } - - *out_keys = keys; - *out_vals = vals; - *out_count = count; - return 0; - } - - /* Slow path: scan graph buffer IMPORTS edges + parse JSON properties */ - char *file_qn = cbm_pipeline_fqn_compute(ctx->project_name, rel_path, "__file__"); - const cbm_gbuf_node_t *file_node = cbm_gbuf_find_by_qn(ctx->gbuf, file_qn); - free(file_qn); - if (!file_node) { - return 0; - } - - const cbm_gbuf_edge_t **edges = NULL; - int edge_count = 0; - int rc = cbm_gbuf_find_edges_by_source_type(ctx->gbuf, file_node->id, "IMPORTS", &edges, - &edge_count); - if (rc != 0 || edge_count == 0) { - return 0; - } - - const char **keys = calloc(edge_count, sizeof(const char *)); - const char **vals = calloc(edge_count, sizeof(const char *)); - int count = 0; - - for (int i = 0; i < edge_count; i++) { - const cbm_gbuf_edge_t *e = edges[i]; - const cbm_gbuf_node_t *target = cbm_gbuf_find_by_id(ctx->gbuf, e->target_id); - if (!target) { - continue; - } - char *key = extract_local_name_from_json(e->properties_json); - if (key) { - keys[count] = key; - vals[count] = target->qualified_name; - count++; - } - } - - *out_keys = keys; - *out_vals = vals; - *out_count = count; - return 0; -} - -static void free_import_map(const char **keys, const char **vals, int count) { - if (keys) { - for (int i = 0; i < count; i++) { - free((void *)keys[i]); - } - free((void *)keys); - } - if (vals) { - free((void *)vals); - } -} - -/* Handle a route registration call: create Route node + HANDLES edge. */ -static void handle_route_registration(cbm_pipeline_ctx_t *ctx, const CBMCall *call, - const cbm_gbuf_node_t *source_node, const char *module_qn, - const char **imp_keys, const char **imp_vals, int imp_count) { - const char *method = cbm_service_pattern_route_method(call->callee_name); - char route_qn[CBM_ROUTE_QN_SIZE]; - snprintf(route_qn, sizeof(route_qn), "__route__%s__%s", method ? method : "ANY", - call->first_string_arg); - char route_props[CBM_SZ_256]; - snprintf(route_props, sizeof(route_props), "{\"method\":\"%s\"}", method ? method : "ANY"); - int64_t route_id = cbm_gbuf_upsert_node(ctx->gbuf, "Route", call->first_string_arg, route_qn, - "", 0, 0, route_props); - char esc_cn[CBM_SZ_256]; /* sliced source text: escape quotes/newlines */ - char esc_fa[CBM_SZ_256]; - cbm_json_escape(esc_cn, sizeof(esc_cn), call->callee_name); - cbm_json_escape(esc_fa, sizeof(esc_fa), call->first_string_arg); - char props[CBM_SZ_512]; - snprintf(props, sizeof(props), - "{\"callee\":\"%s\",\"url_path\":\"%s\",\"via\":\"route_registration\"}", esc_cn, - esc_fa); - cbm_gbuf_insert_edge(ctx->gbuf, source_node->id, route_id, "CALLS", props); - if (call->second_arg_name != NULL && call->second_arg_name[0] != '\0') { - cbm_resolution_t hres = cbm_registry_resolve(ctx->registry, call->second_arg_name, - module_qn, imp_keys, imp_vals, imp_count); - if (hres.qualified_name != NULL && hres.qualified_name[0] != '\0') { - const cbm_gbuf_node_t *handler = cbm_gbuf_find_by_qn(ctx->gbuf, hres.qualified_name); - if (handler != NULL) { - char hprops[CBM_SZ_1K]; /* must exceed escaped value + wrapper or snprintf cuts the - closing brace */ - char esc_h[CBM_SZ_512]; - cbm_json_escape(esc_h, sizeof(esc_h), hres.qualified_name); - snprintf(hprops, sizeof(hprops), "{\"handler\":\"%s\"}", esc_h); - cbm_gbuf_insert_edge(ctx->gbuf, handler->id, route_id, "HANDLES", hprops); - } - } - } -} - -/* Emit an HTTP/async route edge for a service call. */ -/* Build route QN and upsert Route node for HTTP/async edge. */ -static int64_t create_svc_route_node(cbm_pipeline_ctx_t *ctx, const char *url, cbm_svc_kind_t svc, - const char *method, const char *broker) { - char route_qn[CBM_ROUTE_QN_SIZE]; - const char *prefix; - if (svc == CBM_SVC_HTTP) { - prefix = method ? method : "ANY"; - } else { - prefix = broker ? broker : "async"; - } - snprintf(route_qn, sizeof(route_qn), "__route__%s__%s", prefix, url); - const char *rp; - if (svc == CBM_SVC_HTTP) { - rp = method ? method : "{}"; - } else { - rp = broker ? broker : "{}"; - } - return cbm_gbuf_upsert_node(ctx->gbuf, "Route", url, route_qn, "", 0, 0, rp); -} - -static void emit_http_async_edge(cbm_pipeline_ctx_t *ctx, const CBMCall *call, - const cbm_gbuf_node_t *source, const cbm_gbuf_node_t *target, - const cbm_resolution_t *res, cbm_svc_kind_t svc) { - const char *url_or_topic = call->first_string_arg; - bool is_url = (url_or_topic && url_or_topic[0] != '\0' && - (url_or_topic[0] == '/' || strstr(url_or_topic, "://") != NULL)); - bool is_topic = (url_or_topic && url_or_topic[0] != '\0' && svc == CBM_SVC_ASYNC && - strlen(url_or_topic) > PAIR_LEN); - if (!is_url && !is_topic) { - char esc_callee[CBM_SZ_256]; - cbm_json_escape(esc_callee, sizeof(esc_callee), call->callee_name); - char props[CBM_SZ_512]; - snprintf(props, sizeof(props), - "{\"callee\":\"%s\",\"confidence\":%.2f,\"strategy\":\"%s\",\"candidates\":%d}", - esc_callee, res->confidence, res->strategy ? res->strategy : "unknown", - res->candidate_count); - cbm_gbuf_insert_edge(ctx->gbuf, source->id, target->id, "CALLS", props); - return; - } - const char *edge_type = (svc == CBM_SVC_HTTP) ? "HTTP_CALLS" : "ASYNC_CALLS"; - const char *method = - (svc == CBM_SVC_HTTP) ? cbm_service_pattern_http_method(call->callee_name) : NULL; - const char *broker = - (svc == CBM_SVC_ASYNC) ? cbm_service_pattern_broker(res->qualified_name) : NULL; - int64_t route_id = create_svc_route_node(ctx, url_or_topic, svc, method, broker); - char esc_callee[CBM_SZ_256]; - char esc_url[CBM_SZ_256]; - cbm_json_escape(esc_callee, sizeof(esc_callee), call->callee_name); - cbm_json_escape(esc_url, sizeof(esc_url), url_or_topic); - char props[CBM_SZ_512]; - snprintf(props, sizeof(props), "{\"callee\":\"%s\",\"url_path\":\"%s\"%s%s%s%s%s}", esc_callee, - esc_url, method ? ",\"method\":\"" : "", method ? method : "", method ? "\"" : "", - broker ? ",\"broker\":\"" : "", broker ? broker : ""); - if (broker) { - size_t plen = strlen(props); - if (plen > 0 && props[plen - SKIP_ONE] != '}') { - snprintf(props + plen - 1, sizeof(props) - plen + SKIP_ONE, "\"}"); - } - } - cbm_gbuf_insert_edge(ctx->gbuf, source->id, route_id, edge_type, props); -} - -/* Classify a resolved call and emit the appropriate edge. */ -static void emit_classified_edge(cbm_pipeline_ctx_t *ctx, const CBMCall *call, - const cbm_gbuf_node_t *source, const cbm_gbuf_node_t *target, - const cbm_resolution_t *res, const char *module_qn, - const char **imp_keys, const char **imp_vals, int imp_count) { - cbm_svc_kind_t svc = cbm_service_pattern_match(res->qualified_name); - if (svc == CBM_SVC_ROUTE_REG && call->first_string_arg && call->first_string_arg[0] == '/') { - handle_route_registration(ctx, call, source, module_qn, imp_keys, imp_vals, imp_count); - return; - } - if (svc == CBM_SVC_HTTP || svc == CBM_SVC_ASYNC) { - emit_http_async_edge(ctx, call, source, target, res, svc); - return; - } - if (svc == CBM_SVC_CONFIG) { - char esc_c[CBM_SZ_256]; - char esc_k[CBM_SZ_256]; - cbm_json_escape(esc_c, sizeof(esc_c), call->callee_name); - cbm_json_escape(esc_k, sizeof(esc_k), call->first_string_arg ? call->first_string_arg : ""); - char props[CBM_SZ_512]; - snprintf(props, sizeof(props), "{\"callee\":\"%s\",\"key\":\"%s\",\"confidence\":%.2f}", - esc_c, esc_k, res->confidence); - cbm_gbuf_insert_edge(ctx->gbuf, source->id, target->id, "CONFIGURES", props); - return; - } - char esc_c2[CBM_SZ_256]; - cbm_json_escape(esc_c2, sizeof(esc_c2), call->callee_name); - char props[CBM_SZ_512]; - snprintf(props, sizeof(props), - "{\"callee\":\"%s\",\"confidence\":%.2f,\"strategy\":\"%s\",\"candidates\":%d}", - esc_c2, res->confidence, res->strategy ? res->strategy : "unknown", - res->candidate_count); - cbm_gbuf_insert_edge(ctx->gbuf, source->id, target->id, "CALLS", props); -} - -/* Find source node for a call: enclosing function or file node. */ -static const cbm_gbuf_node_t *calls_find_source(cbm_pipeline_ctx_t *ctx, const char *rel, - const char *enclosing_qn) { - const cbm_gbuf_node_t *src = NULL; - if (enclosing_qn) { - src = cbm_gbuf_find_by_qn(ctx->gbuf, enclosing_qn); - } - if (!src) { - char *fqn = cbm_pipeline_fqn_compute(ctx->project_name, rel, "__file__"); - src = cbm_gbuf_find_by_qn(ctx->gbuf, fqn); - free(fqn); - } - return src; -} - -/* Resolve one call and emit the appropriate edge. Returns 1 if resolved, 0 if not. */ -static int resolve_single_call(cbm_pipeline_ctx_t *ctx, CBMCall *call, - const CBMResolvedCallArray *lsp_calls, const char *rel, - const char *module_qn, const char **imp_keys, const char **imp_vals, - int imp_count) { - const cbm_gbuf_node_t *source_node = calls_find_source(ctx, rel, call->enclosing_func_qn); - if (!source_node) { - return 0; - } - - /* LSP-resolved calls take precedence over registry-textual matching. */ - const CBMResolvedCall *lsp = cbm_pipeline_find_lsp_resolution(lsp_calls, call); - if (lsp) { - const cbm_gbuf_node_t *target_node = - cbm_pipeline_lsp_target_node(ctx->gbuf, ctx->project_name, lsp->callee_qn); - if (target_node && source_node->id != target_node->id) { - cbm_resolution_t res = {0}; - /* Use the gbuf node's QN so downstream edge props show the canonical - * project-qualified form even when fallback prefixed the project. */ - res.qualified_name = target_node->qualified_name; - res.confidence = lsp->confidence; - res.strategy = lsp->strategy; - res.candidate_count = 1; - emit_classified_edge(ctx, call, source_node, target_node, &res, module_qn, imp_keys, - imp_vals, imp_count); - return SKIP_ONE; - } - } - - cbm_resolution_t res = cbm_registry_resolve(ctx->registry, call->callee_name, module_qn, - imp_keys, imp_vals, imp_count); - if (!res.qualified_name || res.qualified_name[0] == '\0') { - return 0; - } - const cbm_gbuf_node_t *target_node = cbm_gbuf_find_by_qn(ctx->gbuf, res.qualified_name); - if (!target_node || source_node->id == target_node->id) { - return 0; - } - emit_classified_edge(ctx, call, source_node, target_node, &res, module_qn, imp_keys, imp_vals, - imp_count); - return SKIP_ONE; -} - -static CBMFileResult *calls_get_or_extract(cbm_pipeline_ctx_t *ctx, int idx, - const cbm_file_info_t *fi, bool *owned) { - *owned = false; - if (ctx->result_cache && ctx->result_cache[idx]) { - return ctx->result_cache[idx]; - } - int slen = 0; - char *src = read_file(fi->path, &slen); - if (!src) { - return NULL; - } - CBMFileResult *r = cbm_extract_file(src, slen, fi->language, ctx->project_name, fi->rel_path, - CBM_EXTRACT_BUDGET, NULL, NULL); - free(src); - if (r) { - *owned = true; - } - return r; -} - -int cbm_pipeline_pass_calls(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, int file_count) { - cbm_log_info("pass.start", "pass", "calls", "files", itoa_log(file_count)); - - int total_calls = 0; - int resolved = 0; - int unresolved = 0; - int errors = 0; - - for (int i = 0; i < file_count; i++) { - if (cbm_pipeline_check_cancel(ctx)) { - return CBM_NOT_FOUND; - } - - const char *rel = files[i].rel_path; - bool result_owned = false; - CBMFileResult *result = calls_get_or_extract(ctx, i, &files[i], &result_owned); - if (!result) { - errors++; - continue; - } - - if (result->calls.count == 0) { - if (result_owned) { - cbm_free_result(result); - } - continue; - } - - /* Build import map for this file */ - const char **imp_keys = NULL; - const char **imp_vals = NULL; - int imp_count = 0; - build_import_map(ctx, rel, result, &imp_keys, &imp_vals, &imp_count); - - /* Compute module QN for same-module resolution */ - char *module_qn = cbm_pipeline_fqn_module(ctx->project_name, rel); - - /* Resolve each call */ - for (int c = 0; c < result->calls.count; c++) { - CBMCall *call = &result->calls.items[c]; - if (!call->callee_name) { - continue; - } - total_calls++; - if (resolve_single_call(ctx, call, &result->resolved_calls, rel, module_qn, imp_keys, - imp_vals, imp_count)) { - resolved++; - } else { - unresolved++; - } - } - - free(module_qn); - free_import_map(imp_keys, imp_vals, imp_count); - if (result_owned) { - cbm_free_result(result); - } - } - - cbm_log_info("pass.done", "pass", "calls", "total", itoa_log(total_calls), "resolved", - itoa_log(resolved), "unresolved", itoa_log(unresolved), "errors", - itoa_log(errors)); - - /* Additional pattern-based edge passes run after normal call resolution */ - cbm_pipeline_pass_fastapi_depends(ctx, files, file_count); - - return 0; -} - -/* ── FastAPI Depends() tracking ──────────────────────────────────── */ -/* Scans Python function signatures for Depends(func_ref) patterns and - * creates CALLS edges from the endpoint to the dependency function. - * Without this, FastAPI auth/DI functions appear as dead code (in_degree=0). */ - -/* Extract Python function signature text from source starting at given line. Caller frees. */ -static char *extract_py_signature(const char *source, int start_line, int end_line) { - int sig_end = start_line + PC_SIG_SCAN; - if (end_line > 0 && sig_end > end_line) { - sig_end = end_line; - } - const char *p = source; - int line = SKIP_ONE; - while (*p && line < start_line) { - if (*p == '\n') { - line++; - } - p++; - } - const char *sig_start = p; - while (*p && line < sig_end) { - if (*p == '\n') { - line++; - } - p++; - if (p > sig_start + SKIP_ONE && p[-SKIP_ONE] == ':' && p[-PAIR_LEN] == ')') { - break; - } - } - size_t sig_len = (size_t)(p - sig_start); - char *sig = malloc(sig_len + SKIP_ONE); - if (!sig) { - return NULL; - } - memcpy(sig, sig_start, sig_len); - sig[sig_len] = '\0'; - return sig; -} - -/* Scan one function's signature for Depends(func_ref) and create CALLS edges. */ -static int scan_depends_in_sig(cbm_pipeline_ctx_t *ctx, const cbm_regex_t *re, const char *sig, - const CBMDefinition *def, const char *module_qn, const char **ik, - const char **iv, int ic) { - int count = 0; - cbm_regmatch_t match[PC_REGEX_GRP]; - const char *scan = sig; - while (cbm_regexec(re, scan, PC_REGEX_GRP, match, 0) == 0) { - int ref_len = match[SKIP_ONE].rm_eo - match[SKIP_ONE].rm_so; - char func_ref[CBM_SZ_256]; - if (ref_len >= (int)sizeof(func_ref)) { - ref_len = (int)sizeof(func_ref) - SKIP_ONE; - } - memcpy(func_ref, scan + match[SKIP_ONE].rm_so, (size_t)ref_len); - func_ref[ref_len] = '\0'; - cbm_resolution_t res = cbm_registry_resolve(ctx->registry, func_ref, module_qn, ik, iv, ic); - if (res.qualified_name && res.qualified_name[0] != '\0') { - const cbm_gbuf_node_t *sn = cbm_gbuf_find_by_qn(ctx->gbuf, def->qualified_name); - const cbm_gbuf_node_t *tn = cbm_gbuf_find_by_qn(ctx->gbuf, res.qualified_name); - if (sn && tn && sn->id != tn->id) { - cbm_gbuf_insert_edge(ctx->gbuf, sn->id, tn->id, "CALLS", - "{\"confidence\":0.95,\"strategy\":\"fastapi_depends\"}"); - count++; - } - } - scan += match[0].rm_eo; - } - return count; -} - -static bool is_callable_def(const CBMDefinition *def) { - return def->qualified_name && def->start_line > 0 && def->label && - (strcmp(def->label, "Function") == 0 || strcmp(def->label, "Method") == 0); -} - -static bool file_has_depends_call(const CBMFileResult *result) { - for (int c = 0; c < result->calls.count; c++) { - if (result->calls.items[c].callee_name && - strcmp(result->calls.items[c].callee_name, "Depends") == 0) { - return true; - } - } - return false; -} - -void cbm_pipeline_pass_fastapi_depends(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, - int file_count) { - cbm_regex_t depends_re; - if (cbm_regcomp(&depends_re, "Depends\\(([A-Za-z_][A-Za-z0-9_.]*)", CBM_REG_EXTENDED) != 0) { - return; - } - - int edge_count = 0; - for (int i = 0; i < file_count; i++) { - if (files[i].language != CBM_LANG_PYTHON) { - continue; - } - if (cbm_pipeline_check_cancel(ctx)) { - break; - } - - CBMFileResult *result = ctx->result_cache ? ctx->result_cache[i] : NULL; - if (!result || !file_has_depends_call(result)) { - continue; - } - - /* Read source and scan for Depends(func_ref) in function signatures */ - int source_len = 0; - char *source = read_file(files[i].path, &source_len); - if (!source) { - continue; - } - - char *module_qn = cbm_pipeline_fqn_module(ctx->project_name, files[i].rel_path); - - /* Build import map for alias resolution */ - const char **imp_keys = NULL; - const char **imp_vals = NULL; - int imp_count = 0; - build_import_map(ctx, files[i].rel_path, result, &imp_keys, &imp_vals, &imp_count); - - for (int d = 0; d < result->defs.count; d++) { - CBMDefinition *def = &result->defs.items[d]; - if (!is_callable_def(def)) { - continue; - } - - char *sig = extract_py_signature(source, (int)def->start_line, (int)def->end_line); - if (!sig) { - continue; - } - - edge_count += scan_depends_in_sig(ctx, &depends_re, sig, def, module_qn, imp_keys, - imp_vals, imp_count); - free(sig); - } - - free(module_qn); - free_import_map(imp_keys, imp_vals, imp_count); - free(source); - } - - cbm_regfree(&depends_re); - if (edge_count > 0) { - cbm_log_info("pass.fastapi_depends", "edges", itoa_log(edge_count)); - } -} - -/* DLL resolve tracking removed — triggered Windows Defender false positive. - * See issue #89. */ +/* + * pass_calls.c — Resolve function/method calls into CALLS edges. + * + * For each discovered file: + * 1. Re-extract calls (cbm_extract_file) + * 2. Build per-file import map from IMPORTS edges in graph buffer + * 3. Resolve each call via registry (import_map → same_module → unique → suffix) + * 4. Create CALLS edges in graph buffer with confidence/strategy properties + * + * Depends on: pass_definitions having populated the registry and graph buffer + */ +#include "foundation/constants.h" + +enum { PC_RING = 4, PC_RING_MASK = 3, PC_SIG_SCAN = 15, PC_REGEX_GRP = 2 }; +#include "pipeline/pipeline.h" +#include +#include "pipeline/pipeline_internal.h" +#include "pipeline/lsp_resolve.h" +#include "graph_buffer/graph_buffer.h" +#include "foundation/log.h" +#include "foundation/compat.h" +#include "foundation/str_util.h" +#include "cbm.h" +#include "service_patterns.h" + +#include "foundation/compat_regex.h" + +#include +#include +#include +#include + +/* Read entire file into heap-allocated buffer. Caller must free(). */ +static char *read_file(const char *path, int *out_len) { + FILE *f = fopen(path, "rb"); + if (!f) { + return NULL; + } + + (void)fseek(f, 0, SEEK_END); + long size = ftell(f); + (void)fseek(f, 0, SEEK_SET); + + if (size <= 0 || size > (long)CBM_PERCENT * CBM_SZ_1K * CBM_SZ_1K) { + (void)fclose(f); + return NULL; + } + + /* +pad: tree-sitter lexer lookahead reads past EOF; keep it in-bounds */ + enum { CBM_TS_LOOKAHEAD_PAD = 16 }; + char *buf = malloc((size_t)size + CBM_TS_LOOKAHEAD_PAD); + if (!buf) { + (void)fclose(f); + return NULL; + } + + size_t nread = fread(buf, SKIP_ONE, size, f); + (void)fclose(f); + + if (nread > (size_t)size) { + nread = (size_t)size; + } + memset(buf + nread, 0, CBM_TS_LOOKAHEAD_PAD); + *out_len = (int)nread; + return buf; +} + +/* Format int for logging. Thread-safe via TLS. */ +static const char *itoa_log(int val) { + static CBM_TLS char bufs[PC_RING][CBM_SZ_32]; + static CBM_TLS int idx = 0; + int i = idx; + idx = (idx + SKIP_ONE) & PC_RING_MASK; + snprintf(bufs[i], sizeof(bufs[i]), "%d", val); + return bufs[i]; +} + +/* Build per-file import map from cached extraction result or graph buffer edges. + * Returns parallel arrays of (local_name, module_qn) pairs. Caller frees. */ +/* Parse "local_name":"value" from JSON properties string. Returns strdup'd key or NULL. */ +static char *extract_local_name_from_json(const char *props_json) { + if (!props_json) { + return NULL; + } + const char *start = strstr(props_json, "\"local_name\":\""); + if (!start) { + return NULL; + } + start += strlen("\"local_name\":\""); + const char *end = strchr(start, '"'); + if (!end || end <= start) { + return NULL; + } + return cbm_strndup(start, end - start); +} + +static int build_import_map(cbm_pipeline_ctx_t *ctx, const char *rel_path, + const CBMFileResult *result, const char ***out_keys, + const char ***out_vals, int *out_count) { + *out_keys = NULL; + *out_vals = NULL; + *out_count = 0; + + /* Fast path: build from cached extraction result (no JSON parsing) */ + if (result && result->imports.count > 0) { + const char **keys = calloc((size_t)result->imports.count, sizeof(const char *)); + const char **vals = calloc((size_t)result->imports.count, sizeof(const char *)); + int count = 0; + + for (int i = 0; i < result->imports.count; i++) { + const CBMImport *imp = &result->imports.items[i]; + if (!imp->local_name || !imp->local_name[0] || !imp->module_path) { + continue; + } + char *target_qn = cbm_pipeline_fqn_module(ctx->project_name, imp->module_path); + const cbm_gbuf_node_t *target = cbm_gbuf_find_by_qn(ctx->gbuf, target_qn); + free(target_qn); + if (!target) { + continue; + } + keys[count] = strdup(imp->local_name); + vals[count] = target->qualified_name; /* borrowed from gbuf */ + count++; + } + + *out_keys = keys; + *out_vals = vals; + *out_count = count; + return 0; + } + + /* Slow path: scan graph buffer IMPORTS edges + parse JSON properties */ + char *file_qn = cbm_pipeline_fqn_compute(ctx->project_name, rel_path, "__file__"); + const cbm_gbuf_node_t *file_node = cbm_gbuf_find_by_qn(ctx->gbuf, file_qn); + free(file_qn); + if (!file_node) { + return 0; + } + + const cbm_gbuf_edge_t **edges = NULL; + int edge_count = 0; + int rc = cbm_gbuf_find_edges_by_source_type(ctx->gbuf, file_node->id, "IMPORTS", &edges, + &edge_count); + if (rc != 0 || edge_count == 0) { + return 0; + } + + const char **keys = calloc(edge_count, sizeof(const char *)); + const char **vals = calloc(edge_count, sizeof(const char *)); + int count = 0; + + for (int i = 0; i < edge_count; i++) { + const cbm_gbuf_edge_t *e = edges[i]; + const cbm_gbuf_node_t *target = cbm_gbuf_find_by_id(ctx->gbuf, e->target_id); + if (!target) { + continue; + } + char *key = extract_local_name_from_json(e->properties_json); + if (key) { + keys[count] = key; + vals[count] = target->qualified_name; + count++; + } + } + + *out_keys = keys; + *out_vals = vals; + *out_count = count; + return 0; +} + +static void free_import_map(const char **keys, const char **vals, int count) { + if (keys) { + for (int i = 0; i < count; i++) { + free((void *)keys[i]); + } + free((void *)keys); + } + if (vals) { + free((void *)vals); + } +} + +/* Handle a route registration call: create Route node + HANDLES edge. */ +static void handle_route_registration(cbm_pipeline_ctx_t *ctx, const CBMCall *call, + const cbm_gbuf_node_t *source_node, const char *module_qn, + const char **imp_keys, const char **imp_vals, int imp_count) { + const char *method = cbm_service_pattern_route_method(call->callee_name); + char route_qn[CBM_ROUTE_QN_SIZE]; + snprintf(route_qn, sizeof(route_qn), "__route__%s__%s", method ? method : "ANY", + call->first_string_arg); + char route_props[CBM_SZ_256]; + snprintf(route_props, sizeof(route_props), "{\"method\":\"%s\"}", method ? method : "ANY"); + int64_t route_id = cbm_gbuf_upsert_node(ctx->gbuf, "Route", call->first_string_arg, route_qn, + "", 0, 0, route_props); + char esc_cn[CBM_SZ_256]; /* sliced source text: escape quotes/newlines */ + char esc_fa[CBM_SZ_256]; + cbm_json_escape(esc_cn, sizeof(esc_cn), call->callee_name); + cbm_json_escape(esc_fa, sizeof(esc_fa), call->first_string_arg); + char props[CBM_SZ_512]; + snprintf(props, sizeof(props), + "{\"callee\":\"%s\",\"url_path\":\"%s\",\"via\":\"route_registration\"}", esc_cn, + esc_fa); + cbm_gbuf_insert_edge(ctx->gbuf, source_node->id, route_id, "CALLS", props); + if (call->second_arg_name != NULL && call->second_arg_name[0] != '\0') { + cbm_resolution_t hres = cbm_registry_resolve(ctx->registry, call->second_arg_name, + module_qn, imp_keys, imp_vals, imp_count); + if (hres.qualified_name != NULL && hres.qualified_name[0] != '\0') { + const cbm_gbuf_node_t *handler = cbm_gbuf_find_by_qn(ctx->gbuf, hres.qualified_name); + if (handler != NULL) { + char hprops[CBM_SZ_1K]; /* must exceed escaped value + wrapper or snprintf cuts the + closing brace */ + char esc_h[CBM_SZ_512]; + cbm_json_escape(esc_h, sizeof(esc_h), hres.qualified_name); + snprintf(hprops, sizeof(hprops), "{\"handler\":\"%s\"}", esc_h); + cbm_gbuf_insert_edge(ctx->gbuf, handler->id, route_id, "HANDLES", hprops); + } + } + } +} + +/* Emit an HTTP/async route edge for a service call. */ +/* Build route QN and upsert Route node for HTTP/async edge. */ +static int64_t create_svc_route_node(cbm_pipeline_ctx_t *ctx, const char *url, cbm_svc_kind_t svc, + const char *method, const char *broker) { + char route_qn[CBM_ROUTE_QN_SIZE]; + const char *prefix; + if (svc == CBM_SVC_HTTP) { + prefix = method ? method : "ANY"; + } else { + prefix = broker ? broker : "async"; + } + snprintf(route_qn, sizeof(route_qn), "__route__%s__%s", prefix, url); + const char *rp; + if (svc == CBM_SVC_HTTP) { + rp = method ? method : "{}"; + } else { + rp = broker ? broker : "{}"; + } + return cbm_gbuf_upsert_node(ctx->gbuf, "Route", url, route_qn, "", 0, 0, rp); +} + +static void emit_http_async_edge(cbm_pipeline_ctx_t *ctx, const CBMCall *call, + const cbm_gbuf_node_t *source, const cbm_gbuf_node_t *target, + const cbm_resolution_t *res, cbm_svc_kind_t svc) { + const char *url_or_topic = call->first_string_arg; + bool is_url = (url_or_topic && url_or_topic[0] != '\0' && + (url_or_topic[0] == '/' || strstr(url_or_topic, "://") != NULL)); + bool is_topic = (url_or_topic && url_or_topic[0] != '\0' && svc == CBM_SVC_ASYNC && + strlen(url_or_topic) > PAIR_LEN); + if (!is_url && !is_topic) { + char esc_callee[CBM_SZ_256]; + cbm_json_escape(esc_callee, sizeof(esc_callee), call->callee_name); + char props[CBM_SZ_512]; + snprintf(props, sizeof(props), + "{\"callee\":\"%s\",\"confidence\":%.2f,\"strategy\":\"%s\",\"candidates\":%d}", + esc_callee, res->confidence, res->strategy ? res->strategy : "unknown", + res->candidate_count); + cbm_gbuf_insert_edge(ctx->gbuf, source->id, target->id, "CALLS", props); + return; + } + const char *edge_type = (svc == CBM_SVC_HTTP) ? "HTTP_CALLS" : "ASYNC_CALLS"; + const char *method = + (svc == CBM_SVC_HTTP) ? cbm_service_pattern_http_method(call->callee_name) : NULL; + const char *broker = + (svc == CBM_SVC_ASYNC) ? cbm_service_pattern_broker(res->qualified_name) : NULL; + int64_t route_id = create_svc_route_node(ctx, url_or_topic, svc, method, broker); + char esc_callee[CBM_SZ_256]; + char esc_url[CBM_SZ_256]; + cbm_json_escape(esc_callee, sizeof(esc_callee), call->callee_name); + cbm_json_escape(esc_url, sizeof(esc_url), url_or_topic); + char props[CBM_SZ_512]; + snprintf(props, sizeof(props), "{\"callee\":\"%s\",\"url_path\":\"%s\"%s%s%s%s%s}", esc_callee, + esc_url, method ? ",\"method\":\"" : "", method ? method : "", method ? "\"" : "", + broker ? ",\"broker\":\"" : "", broker ? broker : ""); + if (broker) { + size_t plen = strlen(props); + if (plen > 0 && props[plen - SKIP_ONE] != '}') { + snprintf(props + plen - 1, sizeof(props) - plen + SKIP_ONE, "\"}"); + } + } + cbm_gbuf_insert_edge(ctx->gbuf, source->id, route_id, edge_type, props); +} + +/* Classify a resolved call and emit the appropriate edge. */ +static void emit_classified_edge(cbm_pipeline_ctx_t *ctx, const CBMCall *call, + const cbm_gbuf_node_t *source, const cbm_gbuf_node_t *target, + const cbm_resolution_t *res, const char *module_qn, + const char **imp_keys, const char **imp_vals, int imp_count) { + cbm_svc_kind_t svc = cbm_service_pattern_match(res->qualified_name); + if (svc == CBM_SVC_ROUTE_REG && call->first_string_arg && call->first_string_arg[0] == '/') { + handle_route_registration(ctx, call, source, module_qn, imp_keys, imp_vals, imp_count); + return; + } + if (svc == CBM_SVC_HTTP || svc == CBM_SVC_ASYNC) { + emit_http_async_edge(ctx, call, source, target, res, svc); + return; + } + if (svc == CBM_SVC_CONFIG) { + char esc_c[CBM_SZ_256]; + char esc_k[CBM_SZ_256]; + cbm_json_escape(esc_c, sizeof(esc_c), call->callee_name); + cbm_json_escape(esc_k, sizeof(esc_k), call->first_string_arg ? call->first_string_arg : ""); + char props[CBM_SZ_512]; + snprintf(props, sizeof(props), "{\"callee\":\"%s\",\"key\":\"%s\",\"confidence\":%.2f}", + esc_c, esc_k, res->confidence); + cbm_gbuf_insert_edge(ctx->gbuf, source->id, target->id, "CONFIGURES", props); + return; + } + char esc_c2[CBM_SZ_256]; + cbm_json_escape(esc_c2, sizeof(esc_c2), call->callee_name); + char props[CBM_SZ_512]; + snprintf(props, sizeof(props), + "{\"callee\":\"%s\",\"confidence\":%.2f,\"strategy\":\"%s\",\"candidates\":%d}", + esc_c2, res->confidence, res->strategy ? res->strategy : "unknown", + res->candidate_count); + cbm_gbuf_insert_edge(ctx->gbuf, source->id, target->id, "CALLS", props); +} + +/* Find source node for a call: enclosing function or file node. */ +static const cbm_gbuf_node_t *calls_find_source(cbm_pipeline_ctx_t *ctx, const char *rel, + const char *enclosing_qn) { + const cbm_gbuf_node_t *src = NULL; + if (enclosing_qn) { + src = cbm_gbuf_find_by_qn(ctx->gbuf, enclosing_qn); + } + if (!src) { + char *fqn = cbm_pipeline_fqn_compute(ctx->project_name, rel, "__file__"); + src = cbm_gbuf_find_by_qn(ctx->gbuf, fqn); + free(fqn); + } + return src; +} + +/* Resolve one call and emit the appropriate edge. Returns 1 if resolved, 0 if not. */ +static int resolve_single_call(cbm_pipeline_ctx_t *ctx, CBMCall *call, + const CBMResolvedCallArray *lsp_calls, const char *rel, + const char *module_qn, const char **imp_keys, const char **imp_vals, + int imp_count) { + const cbm_gbuf_node_t *source_node = calls_find_source(ctx, rel, call->enclosing_func_qn); + if (!source_node) { + return 0; + } + + /* LSP-resolved calls take precedence over registry-textual matching. */ + const CBMResolvedCall *lsp = cbm_pipeline_find_lsp_resolution(lsp_calls, call); + if (lsp) { + const cbm_gbuf_node_t *target_node = + cbm_pipeline_lsp_target_node(ctx->gbuf, ctx->project_name, lsp->callee_qn); + if (target_node && source_node->id != target_node->id) { + cbm_resolution_t res = {0}; + /* Use the gbuf node's QN so downstream edge props show the canonical + * project-qualified form even when fallback prefixed the project. */ + res.qualified_name = target_node->qualified_name; + res.confidence = lsp->confidence; + res.strategy = lsp->strategy; + res.candidate_count = 1; + emit_classified_edge(ctx, call, source_node, target_node, &res, module_qn, imp_keys, + imp_vals, imp_count); + return SKIP_ONE; + } + } + + cbm_resolution_t res = cbm_registry_resolve(ctx->registry, call->callee_name, module_qn, + imp_keys, imp_vals, imp_count); + if (!res.qualified_name || res.qualified_name[0] == '\0') { + return 0; + } + const cbm_gbuf_node_t *target_node = cbm_gbuf_find_by_qn(ctx->gbuf, res.qualified_name); + if (!target_node || source_node->id == target_node->id) { + cbm_svc_kind_t svc = cbm_service_pattern_match(res.qualified_name); + if ((svc == CBM_SVC_HTTP || svc == CBM_SVC_ASYNC) && + call->first_string_arg && call->first_string_arg[0] != '\0') { + emit_http_async_edge(ctx, call, source_node, source_node, &res, svc); + } + return 0; + } + emit_classified_edge(ctx, call, source_node, target_node, &res, module_qn, imp_keys, imp_vals, + imp_count); + return SKIP_ONE; +} + +static CBMFileResult *calls_get_or_extract(cbm_pipeline_ctx_t *ctx, int idx, + const cbm_file_info_t *fi, bool *owned) { + *owned = false; + if (ctx->result_cache && ctx->result_cache[idx]) { + return ctx->result_cache[idx]; + } + int slen = 0; + char *src = read_file(fi->path, &slen); + if (!src) { + return NULL; + } + CBMFileResult *r = cbm_extract_file(src, slen, fi->language, ctx->project_name, fi->rel_path, + CBM_EXTRACT_BUDGET, NULL, NULL); + free(src); + if (r) { + *owned = true; + } + return r; +} + +int cbm_pipeline_pass_calls(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, int file_count) { + cbm_log_info("pass.start", "pass", "calls", "files", itoa_log(file_count)); + + int total_calls = 0; + int resolved = 0; + int unresolved = 0; + int errors = 0; + + for (int i = 0; i < file_count; i++) { + if (cbm_pipeline_check_cancel(ctx)) { + return CBM_NOT_FOUND; + } + + const char *rel = files[i].rel_path; + bool result_owned = false; + CBMFileResult *result = calls_get_or_extract(ctx, i, &files[i], &result_owned); + if (!result) { + errors++; + continue; + } + + if (result->calls.count == 0) { + if (result_owned) { + cbm_free_result(result); + } + continue; + } + + /* Build import map for this file */ + const char **imp_keys = NULL; + const char **imp_vals = NULL; + int imp_count = 0; + build_import_map(ctx, rel, result, &imp_keys, &imp_vals, &imp_count); + + /* Compute module QN for same-module resolution */ + char *module_qn = cbm_pipeline_fqn_module(ctx->project_name, rel); + + /* Resolve each call */ + for (int c = 0; c < result->calls.count; c++) { + CBMCall *call = &result->calls.items[c]; + if (!call->callee_name) { + continue; + } + total_calls++; + if (resolve_single_call(ctx, call, &result->resolved_calls, rel, module_qn, imp_keys, + imp_vals, imp_count)) { + resolved++; + } else { + unresolved++; + } + } + + free(module_qn); + free_import_map(imp_keys, imp_vals, imp_count); + if (result_owned) { + cbm_free_result(result); + } + } + + cbm_log_info("pass.done", "pass", "calls", "total", itoa_log(total_calls), "resolved", + itoa_log(resolved), "unresolved", itoa_log(unresolved), "errors", + itoa_log(errors)); + + /* Additional pattern-based edge passes run after normal call resolution */ + cbm_pipeline_pass_fastapi_depends(ctx, files, file_count); + + return 0; +} + +/* ── FastAPI Depends() tracking ──────────────────────────────────── */ +/* Scans Python function signatures for Depends(func_ref) patterns and + * creates CALLS edges from the endpoint to the dependency function. + * Without this, FastAPI auth/DI functions appear as dead code (in_degree=0). */ + +/* Extract Python function signature text from source starting at given line. Caller frees. */ +static char *extract_py_signature(const char *source, int start_line, int end_line) { + int sig_end = start_line + PC_SIG_SCAN; + if (end_line > 0 && sig_end > end_line) { + sig_end = end_line; + } + const char *p = source; + int line = SKIP_ONE; + while (*p && line < start_line) { + if (*p == '\n') { + line++; + } + p++; + } + const char *sig_start = p; + while (*p && line < sig_end) { + if (*p == '\n') { + line++; + } + p++; + if (p > sig_start + SKIP_ONE && p[-SKIP_ONE] == ':' && p[-PAIR_LEN] == ')') { + break; + } + } + size_t sig_len = (size_t)(p - sig_start); + char *sig = malloc(sig_len + SKIP_ONE); + if (!sig) { + return NULL; + } + memcpy(sig, sig_start, sig_len); + sig[sig_len] = '\0'; + return sig; +} + +/* Scan one function's signature for Depends(func_ref) and create CALLS edges. */ +static int scan_depends_in_sig(cbm_pipeline_ctx_t *ctx, const cbm_regex_t *re, const char *sig, + const CBMDefinition *def, const char *module_qn, const char **ik, + const char **iv, int ic) { + int count = 0; + cbm_regmatch_t match[PC_REGEX_GRP]; + const char *scan = sig; + while (cbm_regexec(re, scan, PC_REGEX_GRP, match, 0) == 0) { + int ref_len = match[SKIP_ONE].rm_eo - match[SKIP_ONE].rm_so; + char func_ref[CBM_SZ_256]; + if (ref_len >= (int)sizeof(func_ref)) { + ref_len = (int)sizeof(func_ref) - SKIP_ONE; + } + memcpy(func_ref, scan + match[SKIP_ONE].rm_so, (size_t)ref_len); + func_ref[ref_len] = '\0'; + cbm_resolution_t res = cbm_registry_resolve(ctx->registry, func_ref, module_qn, ik, iv, ic); + if (res.qualified_name && res.qualified_name[0] != '\0') { + const cbm_gbuf_node_t *sn = cbm_gbuf_find_by_qn(ctx->gbuf, def->qualified_name); + const cbm_gbuf_node_t *tn = cbm_gbuf_find_by_qn(ctx->gbuf, res.qualified_name); + if (sn && tn && sn->id != tn->id) { + cbm_gbuf_insert_edge(ctx->gbuf, sn->id, tn->id, "CALLS", + "{\"confidence\":0.95,\"strategy\":\"fastapi_depends\"}"); + count++; + } + } + scan += match[0].rm_eo; + } + return count; +} + +static bool is_callable_def(const CBMDefinition *def) { + return def->qualified_name && def->start_line > 0 && def->label && + (strcmp(def->label, "Function") == 0 || strcmp(def->label, "Method") == 0); +} + +static bool file_has_depends_call(const CBMFileResult *result) { + for (int c = 0; c < result->calls.count; c++) { + if (result->calls.items[c].callee_name && + strcmp(result->calls.items[c].callee_name, "Depends") == 0) { + return true; + } + } + return false; +} + +void cbm_pipeline_pass_fastapi_depends(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, + int file_count) { + cbm_regex_t depends_re; + if (cbm_regcomp(&depends_re, "Depends\\(([A-Za-z_][A-Za-z0-9_.]*)", CBM_REG_EXTENDED) != 0) { + return; + } + + int edge_count = 0; + for (int i = 0; i < file_count; i++) { + if (files[i].language != CBM_LANG_PYTHON) { + continue; + } + if (cbm_pipeline_check_cancel(ctx)) { + break; + } + + CBMFileResult *result = ctx->result_cache ? ctx->result_cache[i] : NULL; + if (!result || !file_has_depends_call(result)) { + continue; + } + + /* Read source and scan for Depends(func_ref) in function signatures */ + int source_len = 0; + char *source = read_file(files[i].path, &source_len); + if (!source) { + continue; + } + + char *module_qn = cbm_pipeline_fqn_module(ctx->project_name, files[i].rel_path); + + /* Build import map for alias resolution */ + const char **imp_keys = NULL; + const char **imp_vals = NULL; + int imp_count = 0; + build_import_map(ctx, files[i].rel_path, result, &imp_keys, &imp_vals, &imp_count); + + for (int d = 0; d < result->defs.count; d++) { + CBMDefinition *def = &result->defs.items[d]; + if (!is_callable_def(def)) { + continue; + } + + char *sig = extract_py_signature(source, (int)def->start_line, (int)def->end_line); + if (!sig) { + continue; + } + + edge_count += scan_depends_in_sig(ctx, &depends_re, sig, def, module_qn, imp_keys, + imp_vals, imp_count); + free(sig); + } + + free(module_qn); + free_import_map(imp_keys, imp_vals, imp_count); + free(source); + } + + cbm_regfree(&depends_re); + if (edge_count > 0) { + cbm_log_info("pass.fastapi_depends", "edges", itoa_log(edge_count)); + } +} + +/* DLL resolve tracking removed — triggered Windows Defender false positive. + * See issue #89. */ diff --git a/src/pipeline/pass_cross_repo.c b/src/pipeline/pass_cross_repo.c index 07f5ca7e3..e232eebad 100644 --- a/src/pipeline/pass_cross_repo.c +++ b/src/pipeline/pass_cross_repo.c @@ -1,684 +1,814 @@ -/* - * pass_cross_repo.c — Cross-repo intelligence: match Routes, Channels, and - * async topics across indexed projects to create CROSS_* edges. - * - * For each HTTP_CALLS/ASYNC_CALLS edge in the source project, looks up the - * target Route QN in other project DBs. For each Channel node with EMITS - * edges, looks for matching LISTENS_ON in other projects (and vice versa). - * - * Edges are written bidirectionally: both source and target project DBs - * get a CROSS_* edge so the link is visible from either side. - */ -#include "pipeline/pass_cross_repo.h" -#include "foundation/constants.h" -#include "foundation/log.h" -#include "foundation/platform.h" -#include "foundation/compat.h" -#include "foundation/compat_fs.h" - -#include -#include -#include -#include -#include -#include - -/* ── Constants ───────────────────────────────────────────────────── */ - -enum { - CR_PATH_BUF = 1024, - CR_QN_BUF = 512, - CR_PROPS_BUF = 2048, - CR_MAX_EDGES = 4096, - CR_DB_EXT_LEN = 3, /* strlen(".db") */ - CR_INIT_CAP = 32, - CR_COL_3 = 3, - CR_COL_4 = 4, -}; - -#define CR_MS_PER_SEC 1000.0 -#define CR_NS_PER_MS 1000000.0 - -/* TLS buffer for integer-to-string in log calls. */ -static CBM_TLS char cr_ibuf[CBM_SZ_32]; -static const char *cr_itoa(int v) { - snprintf(cr_ibuf, sizeof(cr_ibuf), "%d", v); - return cr_ibuf; -} - -/* ── Helpers ─────────────────────────────────────────────────────── */ - -static const char *cr_cache_dir(void) { - const char *dir = cbm_resolve_cache_dir(); - return dir ? dir : cbm_tmpdir(); -} - -static void cr_db_path(const char *project, char *buf, size_t bufsz) { - snprintf(buf, bufsz, "%s/%s.db", cr_cache_dir(), project); -} - -/* Extract a JSON string property from properties_json. - * Writes into buf, returns buf on success, NULL on miss. */ -static const char *json_str_prop(const char *json, const char *key, char *buf, size_t bufsz) { - if (!json || !key) { - return NULL; - } - char pat[CBM_SZ_128]; - snprintf(pat, sizeof(pat), "\"%s\":\"", key); - const char *start = strstr(json, pat); - if (!start) { - return NULL; - } - start += strlen(pat); - const char *end = strchr(start, '"'); - if (!end) { - return NULL; - } - size_t len = (size_t)(end - start); - if (len >= bufsz) { - len = bufsz - SKIP_ONE; - } - memcpy(buf, start, len); - buf[len] = '\0'; - return buf; -} - -/* Build CROSS_* edge properties JSON. */ -static void build_cross_props(char *buf, size_t bufsz, const char *target_project, - const char *target_function, const char *target_file, - const char *url_or_channel, const char *extra_key, - const char *extra_val) { - int n = snprintf(buf, bufsz, - "{\"target_project\":\"%s\",\"target_function\":\"%s\"," - "\"target_file\":\"%s\"", - target_project ? target_project : "", target_function ? target_function : "", - target_file ? target_file : ""); - if (url_or_channel && url_or_channel[0]) { - n += snprintf(buf + n, bufsz - (size_t)n, ",\"%s\":\"%s\"", - extra_key ? extra_key : "url_path", url_or_channel); - } - if (extra_val && extra_val[0]) { - n += snprintf(buf + n, bufsz - (size_t)n, ",\"%s\":\"%s\"", - extra_key ? "transport" : "method", extra_val); - } - snprintf(buf + n, bufsz - (size_t)n, "}"); -} - -/* Delete all CROSS_* edges for a project from a store. */ -static void delete_cross_edges(cbm_store_t *store, const char *project) { - cbm_store_delete_edges_by_type(store, project, "CROSS_HTTP_CALLS"); - cbm_store_delete_edges_by_type(store, project, "CROSS_ASYNC_CALLS"); - cbm_store_delete_edges_by_type(store, project, "CROSS_CHANNEL"); - cbm_store_delete_edges_by_type(store, project, "CROSS_GRPC_CALLS"); - cbm_store_delete_edges_by_type(store, project, "CROSS_GRAPHQL_CALLS"); - cbm_store_delete_edges_by_type(store, project, "CROSS_TRPC_CALLS"); -} - -/* Insert a CROSS_* edge into a store. */ -static void insert_cross_edge(cbm_store_t *store, const char *project, int64_t from_id, - int64_t to_id, const char *edge_type, const char *props) { - cbm_edge_t edge = { - .project = project, - .source_id = from_id, - .target_id = to_id, - .type = edge_type, - .properties_json = props, - }; - cbm_store_insert_edge(store, &edge); -} - -/* Look up a node's name and file_path by id. */ -static void lookup_node_info(struct sqlite3 *db, int64_t node_id, char *name_out, size_t name_sz, - char *file_out, size_t file_sz) { - name_out[0] = '\0'; - file_out[0] = '\0'; - sqlite3_stmt *st = NULL; - if (sqlite3_prepare_v2(db, "SELECT name, file_path FROM nodes WHERE id = ?1", CBM_NOT_FOUND, - &st, NULL) != SQLITE_OK) { - return; - } - sqlite3_bind_int64(st, SKIP_ONE, node_id); - if (sqlite3_step(st) == SQLITE_ROW) { - const char *nm = (const char *)sqlite3_column_text(st, 0); - const char *fp = (const char *)sqlite3_column_text(st, SKIP_ONE); - if (nm) { - snprintf(name_out, name_sz, "%s", nm); - } - if (fp) { - snprintf(file_out, file_sz, "%s", fp); - } - } - sqlite3_finalize(st); -} - -/* ── Phase A: HTTP Route matching ────────────────────────────────── */ - -/* Find a Route node in target_store by QN and return the handler function's - * node id, name, and file_path via HANDLES edges. Returns 0 if not found. */ -static int64_t find_route_handler(cbm_store_t *target_store, const char *route_qn, - char *handler_name, size_t name_sz, char *handler_file, - size_t file_sz) { - handler_name[0] = '\0'; - handler_file[0] = '\0'; - struct sqlite3 *db = cbm_store_get_db(target_store); - if (!db) { - return 0; - } - - /* Find Route node by QN */ - sqlite3_stmt *s = NULL; - if (sqlite3_prepare_v2( - db, "SELECT id FROM nodes WHERE qualified_name = ?1 AND label = 'Route' LIMIT 1", - CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { - return 0; - } - sqlite3_bind_text(s, SKIP_ONE, route_qn, CBM_NOT_FOUND, SQLITE_STATIC); - int64_t route_id = 0; - if (sqlite3_step(s) == SQLITE_ROW) { - route_id = sqlite3_column_int64(s, 0); - } - sqlite3_finalize(s); - if (route_id == 0) { - return 0; - } - - /* Follow HANDLES edge to find the handler function */ - if (sqlite3_prepare_v2(db, - "SELECT n.id, n.name, n.file_path FROM edges e " - "JOIN nodes n ON n.id = e.source_id " - "WHERE e.target_id = ?1 AND e.type = 'HANDLES' LIMIT 1", - CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { - return 0; - } - sqlite3_bind_int64(s, SKIP_ONE, route_id); - int64_t handler_id = 0; - if (sqlite3_step(s) == SQLITE_ROW) { - handler_id = sqlite3_column_int64(s, 0); - const char *n = (const char *)sqlite3_column_text(s, SKIP_ONE); - const char *f = (const char *)sqlite3_column_text(s, PAIR_LEN); - if (n) { - snprintf(handler_name, name_sz, "%s", n); - } - if (f) { - snprintf(handler_file, file_sz, "%s", f); - } - } - sqlite3_finalize(s); - return handler_id; -} - -/* Emit CROSS_* edge for a route match: forward into source, reverse into target. */ -static void emit_cross_route_bidirectional(cbm_store_t *src_store, const char *src_project, - struct sqlite3 *src_db, int64_t caller_id, - int64_t local_route_id, cbm_store_t *tgt_store, - const char *tgt_project, int64_t handler_id, - const char *route_qn, const char *handler_name, - const char *handler_file, const char *url_path, - const char *method, const char *edge_type) { - /* Forward: caller → local Route in source DB */ - char fwd[CR_PROPS_BUF]; - build_cross_props(fwd, sizeof(fwd), tgt_project, handler_name, handler_file, url_path, - "url_path", method); - insert_cross_edge(src_store, src_project, caller_id, local_route_id, edge_type, fwd); - - /* Reverse: handler → Route in target DB */ - struct sqlite3 *tgt_db = cbm_store_get_db(tgt_store); - if (!tgt_db) { - return; - } - sqlite3_stmt *rq = NULL; - if (sqlite3_prepare_v2(tgt_db, "SELECT id FROM nodes WHERE qualified_name = ?1 LIMIT 1", - CBM_NOT_FOUND, &rq, NULL) != SQLITE_OK) { - return; - } - sqlite3_bind_text(rq, SKIP_ONE, route_qn, CBM_NOT_FOUND, SQLITE_STATIC); - int64_t tgt_route_id = 0; - if (sqlite3_step(rq) == SQLITE_ROW) { - tgt_route_id = sqlite3_column_int64(rq, 0); - } - sqlite3_finalize(rq); - if (tgt_route_id == 0) { - return; - } - - char caller_name[CBM_SZ_256] = {0}; - char caller_file[CBM_SZ_512] = {0}; - lookup_node_info(src_db, caller_id, caller_name, sizeof(caller_name), caller_file, - sizeof(caller_file)); - - char rev[CR_PROPS_BUF]; - build_cross_props(rev, sizeof(rev), src_project, caller_name, caller_file, url_path, "url_path", - method); - insert_cross_edge(tgt_store, tgt_project, handler_id, tgt_route_id, edge_type, rev); -} - -static int match_http_routes(cbm_store_t *src_store, const char *src_project, - cbm_store_t *tgt_store, const char *tgt_project) { - struct sqlite3 *src_db = cbm_store_get_db(src_store); - if (!src_db) { - return 0; - } - - /* Find all HTTP_CALLS edges in source project */ - sqlite3_stmt *s = NULL; - if (sqlite3_prepare_v2(src_db, - "SELECT e.source_id, e.target_id, e.properties FROM edges e " - "WHERE e.project = ?1 AND e.type = 'HTTP_CALLS'", - CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { - return 0; - } - sqlite3_bind_text(s, SKIP_ONE, src_project, CBM_NOT_FOUND, SQLITE_STATIC); - - int count = 0; - while (sqlite3_step(s) == SQLITE_ROW && count < CR_MAX_EDGES) { - int64_t caller_id = sqlite3_column_int64(s, 0); - int64_t route_id = sqlite3_column_int64(s, SKIP_ONE); - const char *props = (const char *)sqlite3_column_text(s, PAIR_LEN); - - char url_path[CBM_SZ_256] = {0}; - char method[CBM_SZ_32] = {0}; - json_str_prop(props, "url_path", url_path, sizeof(url_path)); - json_str_prop(props, "method", method, sizeof(method)); - if (!url_path[0]) { - continue; - } - - /* Build the expected Route QN in the target project */ - char route_qn[CR_QN_BUF]; - snprintf(route_qn, sizeof(route_qn), "__route__%s__%s", method[0] ? method : "ANY", - url_path); - - char handler_name[CBM_SZ_256] = {0}; - char handler_file[CBM_SZ_512] = {0}; - int64_t handler_id = - find_route_handler(tgt_store, route_qn, handler_name, sizeof(handler_name), - handler_file, sizeof(handler_file)); - if (handler_id == 0) { - /* Try without method (ANY) */ - snprintf(route_qn, sizeof(route_qn), "__route__ANY__%s", url_path); - handler_id = find_route_handler(tgt_store, route_qn, handler_name, sizeof(handler_name), - handler_file, sizeof(handler_file)); - } - if (handler_id == 0) { - continue; - } - - emit_cross_route_bidirectional(src_store, src_project, src_db, caller_id, route_id, - tgt_store, tgt_project, handler_id, route_qn, handler_name, - handler_file, url_path, method, "CROSS_HTTP_CALLS"); - - count++; - } - sqlite3_finalize(s); - return count; -} - -/* ── Phase B: Async matching ─────────────────────────────────────── */ - -static int match_async_routes(cbm_store_t *src_store, const char *src_project, - cbm_store_t *tgt_store, const char *tgt_project) { - struct sqlite3 *src_db = cbm_store_get_db(src_store); - if (!src_db) { - return 0; - } - - sqlite3_stmt *s = NULL; - if (sqlite3_prepare_v2(src_db, - "SELECT e.source_id, e.target_id, e.properties FROM edges e " - "WHERE e.project = ?1 AND e.type = 'ASYNC_CALLS'", - CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { - return 0; - } - sqlite3_bind_text(s, SKIP_ONE, src_project, CBM_NOT_FOUND, SQLITE_STATIC); - - int count = 0; - while (sqlite3_step(s) == SQLITE_ROW && count < CR_MAX_EDGES) { - int64_t caller_id = sqlite3_column_int64(s, 0); - int64_t route_id = sqlite3_column_int64(s, SKIP_ONE); - const char *props = (const char *)sqlite3_column_text(s, PAIR_LEN); - - char url_path[CBM_SZ_256] = {0}; - char broker[CBM_SZ_128] = {0}; - json_str_prop(props, "url_path", url_path, sizeof(url_path)); - json_str_prop(props, "broker", broker, sizeof(broker)); - if (!url_path[0]) { - continue; - } - - char route_qn[CR_QN_BUF]; - snprintf(route_qn, sizeof(route_qn), "__route__%s__%s", broker[0] ? broker : "async", - url_path); - - char handler_name[CBM_SZ_256] = {0}; - char handler_file[CBM_SZ_512] = {0}; - int64_t handler_id = - find_route_handler(tgt_store, route_qn, handler_name, sizeof(handler_name), - handler_file, sizeof(handler_file)); - if (handler_id == 0) { - continue; - } - - char edge_props[CR_PROPS_BUF]; - build_cross_props(edge_props, sizeof(edge_props), tgt_project, handler_name, handler_file, - url_path, "url_path", broker); - insert_cross_edge(src_store, src_project, caller_id, route_id, "CROSS_ASYNC_CALLS", - edge_props); - count++; - } - sqlite3_finalize(s); - return count; -} - -/* ── Phase C: Channel matching ───────────────────────────────────── */ - -/* Try to find a matching listener in target DB for a channel name. */ -static bool try_match_channel_listener(cbm_store_t *src_store, const char *src_project, - cbm_store_t *tgt_store, const char *tgt_project, - const char *channel_name, const char *transport, - int64_t emitter_id, int64_t channel_id) { - struct sqlite3 *tgt_db = cbm_store_get_db(tgt_store); - if (!tgt_db) { - return false; - } - sqlite3_stmt *tq = NULL; - if (sqlite3_prepare_v2(tgt_db, - "SELECT n.id, e.source_id, fn.name, fn.file_path FROM nodes n " - "JOIN edges e ON e.target_id = n.id AND e.type = 'LISTENS_ON' " - "JOIN nodes fn ON fn.id = e.source_id " - "WHERE n.project = ?1 AND n.name = ?2 AND n.label = 'Channel' LIMIT 1", - CBM_NOT_FOUND, &tq, NULL) != SQLITE_OK) { - return false; - } - sqlite3_bind_text(tq, SKIP_ONE, tgt_project, CBM_NOT_FOUND, SQLITE_STATIC); - sqlite3_bind_text(tq, PAIR_LEN, channel_name, CBM_NOT_FOUND, SQLITE_STATIC); - - bool matched = false; - if (sqlite3_step(tq) == SQLITE_ROW) { - int64_t tgt_channel_id = sqlite3_column_int64(tq, 0); - int64_t listener_id = sqlite3_column_int64(tq, SKIP_ONE); - const char *listener_name = (const char *)sqlite3_column_text(tq, PAIR_LEN); - const char *listener_file = (const char *)sqlite3_column_text(tq, CR_COL_3); - - /* Forward edge: emitter → local Channel */ - char fwd[CR_PROPS_BUF]; - build_cross_props(fwd, sizeof(fwd), tgt_project, listener_name ? listener_name : "", - listener_file ? listener_file : "", channel_name, "channel_name", - transport); - insert_cross_edge(src_store, src_project, emitter_id, channel_id, "CROSS_CHANNEL", fwd); - - /* Reverse edge: listener → target Channel */ - char caller_name[CBM_SZ_256] = {0}; - char caller_file[CBM_SZ_512] = {0}; - lookup_node_info(cbm_store_get_db(src_store), emitter_id, caller_name, sizeof(caller_name), - caller_file, sizeof(caller_file)); - - char rev[CR_PROPS_BUF]; - build_cross_props(rev, sizeof(rev), src_project, caller_name, caller_file, channel_name, - "channel_name", transport); - insert_cross_edge(tgt_store, tgt_project, listener_id, tgt_channel_id, "CROSS_CHANNEL", - rev); - matched = true; - } - sqlite3_finalize(tq); - return matched; -} - -static int match_channels(cbm_store_t *src_store, const char *src_project, cbm_store_t *tgt_store, - const char *tgt_project) { - struct sqlite3 *src_db = cbm_store_get_db(src_store); - if (!src_db) { - return 0; - } - - sqlite3_stmt *s = NULL; - if (sqlite3_prepare_v2(src_db, - "SELECT DISTINCT n.id, n.name, n.qualified_name, n.properties, " - "e.source_id FROM nodes n " - "JOIN edges e ON e.target_id = n.id AND e.type = 'EMITS' " - "WHERE n.project = ?1 AND n.label = 'Channel'", - CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { - return 0; - } - sqlite3_bind_text(s, SKIP_ONE, src_project, CBM_NOT_FOUND, SQLITE_STATIC); - - int count = 0; - while (sqlite3_step(s) == SQLITE_ROW && count < CR_MAX_EDGES) { - const char *channel_name = (const char *)sqlite3_column_text(s, SKIP_ONE); - const char *channel_qn = (const char *)sqlite3_column_text(s, PAIR_LEN); - if (!channel_name || !channel_qn) { - continue; - } - int64_t channel_id = sqlite3_column_int64(s, 0); - const char *channel_props = (const char *)sqlite3_column_text(s, CR_COL_3); - int64_t emitter_id = sqlite3_column_int64(s, CR_COL_4); - - char transport[CBM_SZ_64] = {0}; - json_str_prop(channel_props, "transport", transport, sizeof(transport)); - - if (try_match_channel_listener(src_store, src_project, tgt_store, tgt_project, channel_name, - transport, emitter_id, channel_id)) { - count++; - } - } - sqlite3_finalize(s); - return count; -} - -/* ── Phase D: Generic route-type matcher (gRPC, GraphQL, tRPC) ──── */ - -/* Look up a node's qualified_name by id. Returns true if found. */ -static bool lookup_node_qn(struct sqlite3 *db, int64_t node_id, char *out, size_t out_sz) { - out[0] = '\0'; - sqlite3_stmt *st = NULL; - if (sqlite3_prepare_v2(db, "SELECT qualified_name FROM nodes WHERE id = ?1", CBM_NOT_FOUND, &st, - NULL) != SQLITE_OK) { - return false; - } - sqlite3_bind_int64(st, SKIP_ONE, node_id); - bool found = false; - if (sqlite3_step(st) == SQLITE_ROW) { - const char *qn = (const char *)sqlite3_column_text(st, 0); - if (qn) { - snprintf(out, out_sz, "%s", qn); - found = true; - } - } - sqlite3_finalize(st); - return found; -} - -/* Match edges of a given type against Route nodes with a given QN prefix. - * Reuses the same infrastructure as HTTP/async matching. */ -static int match_typed_routes(cbm_store_t *src_store, const char *src_project, - cbm_store_t *tgt_store, const char *tgt_project, - const char *edge_type, const char *svc_key, const char *method_key, - const char *cross_edge_type) { - struct sqlite3 *src_db = cbm_store_get_db(src_store); - if (!src_db) { - return 0; - } - - char sql[CBM_SZ_256]; - snprintf(sql, sizeof(sql), - "SELECT e.source_id, e.target_id, e.properties FROM edges e " - "WHERE e.project = ?1 AND e.type = '%s'", - edge_type); - - sqlite3_stmt *s = NULL; - if (sqlite3_prepare_v2(src_db, sql, CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { - return 0; - } - sqlite3_bind_text(s, SKIP_ONE, src_project, CBM_NOT_FOUND, SQLITE_STATIC); - - int count = 0; - while (sqlite3_step(s) == SQLITE_ROW && count < CR_MAX_EDGES) { - int64_t caller_id = sqlite3_column_int64(s, 0); - int64_t route_id = sqlite3_column_int64(s, SKIP_ONE); - const char *props = (const char *)sqlite3_column_text(s, PAIR_LEN); - - char svc_val[CBM_SZ_256] = {0}; - char meth_val[CBM_SZ_256] = {0}; - json_str_prop(props, svc_key, svc_val, sizeof(svc_val)); - json_str_prop(props, method_key, meth_val, sizeof(meth_val)); - if (!svc_val[0] && !meth_val[0]) { - continue; - } - - /* Look up the Route QN from the target node (already points to the Route). */ - char route_qn[CR_QN_BUF] = {0}; - if (!lookup_node_qn(src_db, route_id, route_qn, sizeof(route_qn))) { - continue; - } - - char handler_name[CBM_SZ_256] = {0}; - char handler_file[CBM_SZ_512] = {0}; - int64_t handler_id = - find_route_handler(tgt_store, route_qn, handler_name, sizeof(handler_name), - handler_file, sizeof(handler_file)); - if (handler_id == 0) { - continue; - } - - emit_cross_route_bidirectional(src_store, src_project, src_db, caller_id, route_id, - tgt_store, tgt_project, handler_id, route_qn, handler_name, - handler_file, svc_val, svc_key, cross_edge_type); - count++; - } - sqlite3_finalize(s); - return count; -} - -/* ── Collect target projects ─────────────────────────────────────── */ - -/* When target_projects = ["*"], scan the cache directory for all .db files. */ -static int collect_all_projects(char ***out) { - const char *dir = cr_cache_dir(); - cbm_dir_t *d = cbm_opendir(dir); - if (!d) { - *out = NULL; - return 0; - } - - int cap = CR_INIT_CAP; - int count = 0; - char **projects = malloc((size_t)cap * sizeof(char *)); - - cbm_dirent_t *ent; - while ((ent = cbm_readdir(d)) != NULL) { - size_t len = strlen(ent->name); - if (len < CR_COL_4 || strcmp(ent->name + len - CR_DB_EXT_LEN, ".db") != 0) { - continue; - } - if (strstr(ent->name, "_cross_repo") || strstr(ent->name, "_config")) { - continue; - } - if (strstr(ent->name, "-wal") || strstr(ent->name, "-shm")) { - continue; - } - if (count >= cap) { - cap *= PAIR_LEN; - char **tmp = realloc(projects, (size_t)cap * sizeof(char *)); - if (!tmp) { - break; - } - projects = tmp; - } - /* Strip .db extension */ - projects[count] = malloc(len - PAIR_LEN); - memcpy(projects[count], ent->name, len - CR_DB_EXT_LEN); - projects[count][len - CR_DB_EXT_LEN] = '\0'; - count++; - } - cbm_closedir(d); - - *out = projects; - return count; -} - -static void free_project_list(char **projects, int count) { - for (int i = 0; i < count; i++) { - free(projects[i]); - } - free(projects); -} - -/* ── Entry point ─────────────────────────────────────────────────── */ - -cbm_cross_repo_result_t cbm_cross_repo_match(const char *project, const char **target_projects, - int target_count) { - cbm_cross_repo_result_t result = {0}; - struct timespec t0; - clock_gettime(CLOCK_MONOTONIC, &t0); - - /* Open source project store (read-write) */ - char src_path[CR_PATH_BUF]; - cr_db_path(project, src_path, sizeof(src_path)); - cbm_store_t *src_store = cbm_store_open_path(src_path); - if (!src_store) { - return result; - } - - /* Clean existing CROSS_* edges for this project */ - delete_cross_edges(src_store, project); - - /* Resolve target projects */ - char **resolved = NULL; - int resolved_count = 0; - bool own_list = false; - - if (target_count == SKIP_ONE && strcmp(target_projects[0], "*") == 0) { - resolved_count = collect_all_projects(&resolved); - own_list = true; - } else { - resolved = (char **)target_projects; - resolved_count = target_count; - } - - /* Match against each target */ - for (int i = 0; i < resolved_count; i++) { - const char *tgt = resolved[i]; - if (strcmp(tgt, project) == 0) { - continue; /* skip self */ - } - - char tgt_path[CR_PATH_BUF]; - cr_db_path(tgt, tgt_path, sizeof(tgt_path)); - - /* Open target store read-write (for bidirectional edge writes) */ - cbm_store_t *tgt_store = cbm_store_open_path(tgt_path); - if (!tgt_store) { - continue; - } - - result.http_edges += match_http_routes(src_store, project, tgt_store, tgt); - result.async_edges += match_async_routes(src_store, project, tgt_store, tgt); - result.channel_edges += match_channels(src_store, project, tgt_store, tgt); - result.grpc_edges += match_typed_routes(src_store, project, tgt_store, tgt, "GRPC_CALLS", - "service", "method", "CROSS_GRPC_CALLS"); - result.graphql_edges += - match_typed_routes(src_store, project, tgt_store, tgt, "GRAPHQL_CALLS", "operation", - "operation", "CROSS_GRAPHQL_CALLS"); - result.trpc_edges += match_typed_routes(src_store, project, tgt_store, tgt, "TRPC_CALLS", - "procedure", "procedure", "CROSS_TRPC_CALLS"); - result.projects_scanned++; - - cbm_store_close(tgt_store); - } - - cbm_store_close(src_store); - - if (own_list) { - free_project_list(resolved, resolved_count); - } - - struct timespec t1; - clock_gettime(CLOCK_MONOTONIC, &t1); - result.elapsed_ms = ((double)(t1.tv_sec - t0.tv_sec) * CR_MS_PER_SEC) + - ((double)(t1.tv_nsec - t0.tv_nsec) / CR_NS_PER_MS); - - int total = result.http_edges + result.async_edges + result.channel_edges + result.grpc_edges + - result.graphql_edges + result.trpc_edges; - cbm_log_info("cross_repo.done", "project", project, "total", cr_itoa(total)); - - return result; -} +/* + * pass_cross_repo.c — Cross-repo intelligence: match Routes, Channels, and + * async topics across indexed projects to create CROSS_* edges. + * + * For each HTTP_CALLS/ASYNC_CALLS edge in the source project, looks up the + * target Route QN in other project DBs. For each Channel node with EMITS + * edges, looks for matching LISTENS_ON in other projects (and vice versa). + * + * Edges are written bidirectionally: both source and target project DBs + * get a CROSS_* edge so the link is visible from either side. + */ +#include "pipeline/pass_cross_repo.h" +#include "foundation/constants.h" +#include "foundation/log.h" +#include "foundation/platform.h" +#include "foundation/compat.h" +#include "foundation/compat_fs.h" + +#include +#include +#include +#include +#include +#include + +/* ── Constants ───────────────────────────────────────────────────── */ + +enum { + CR_PATH_BUF = 1024, + CR_QN_BUF = 512, + CR_PROPS_BUF = 2048, + CR_MAX_EDGES = 4096, + CR_DB_EXT_LEN = 3, /* strlen(".db") */ + CR_INIT_CAP = 32, + CR_COL_3 = 3, + CR_COL_4 = 4, +}; + +#define CR_MS_PER_SEC 1000.0 +#define CR_NS_PER_MS 1000000.0 + +/* TLS buffer for integer-to-string in log calls. */ +static CBM_TLS char cr_ibuf[CBM_SZ_32]; +static const char *cr_itoa(int v) { + snprintf(cr_ibuf, sizeof(cr_ibuf), "%d", v); + return cr_ibuf; +} + +/* ── Helpers ─────────────────────────────────────────────────────── */ + +static const char *cr_cache_dir(void) { + const char *dir = cbm_resolve_cache_dir(); + return dir ? dir : cbm_tmpdir(); +} + +static void cr_db_path(const char *project, char *buf, size_t bufsz) { + snprintf(buf, bufsz, "%s/%s.db", cr_cache_dir(), project); +} + +/* Extract a JSON string property from properties_json. + * Writes into buf, returns buf on success, NULL on miss. */ +static const char *json_str_prop(const char *json, const char *key, char *buf, size_t bufsz) { + if (!json || !key) { + return NULL; + } + char pat[CBM_SZ_128]; + snprintf(pat, sizeof(pat), "\"%s\":\"", key); + const char *start = strstr(json, pat); + if (!start) { + return NULL; + } + start += strlen(pat); + const char *end = strchr(start, '"'); + if (!end) { + return NULL; + } + size_t len = (size_t)(end - start); + if (len >= bufsz) { + len = bufsz - SKIP_ONE; + } + memcpy(buf, start, len); + buf[len] = '\0'; + return buf; +} + +/* Strip scheme and authority from a URL, returning a pointer into the + * original string at the start of the path component. + * "http://svc:8080/v2/orders/123" -> "/v2/orders/123" + * "/v2/orders/123" -> "/v2/orders/123" (already a path) */ +static const char *cr_url_path(const char *url) { + if (!url) return url; + const char *scheme_end = strstr(url, "://"); + if (!scheme_end) return url; + const char *path_start = strchr(scheme_end + 3, '/'); + return path_start ? path_start : url; +} + +/* Return 1 if concrete path matches a route template. + * A template segment wrapped in '{' '}' matches any concrete segment. + * "/v2/orders/123" matches "/v2/orders/{id}" -> 1 + * "/v2/orders/123" matches "/v2/orders/456" -> 0 */ +static int cr_path_matches_template(const char *concrete, const char *tmpl) { + if (!concrete || !tmpl) return 0; + const char *c = concrete; + const char *t = tmpl; + while (*c || *t) { + while (*c == '/') c++; + while (*t == '/') t++; + if (!*c && !*t) return 1; + if (!*c || !*t) return 0; + const char *ce = strchr(c, '/'); + const char *te = strchr(t, '/'); + size_t clen = ce ? (size_t)(ce - c) : strlen(c); + size_t tlen = te ? (size_t)(te - t) : strlen(t); + int is_param = (tlen >= 2 && t[0] == '{' && t[tlen - 1] == '}'); + if (!is_param && (clen != tlen || strncmp(c, t, clen) != 0)) { + return 0; + } + c += clen; + t += tlen; + } + return 1; +} + + + +/* Build CROSS_* edge properties JSON. */ +static void build_cross_props(char *buf, size_t bufsz, const char *target_project, + const char *target_function, const char *target_file, + const char *url_or_channel, const char *extra_key, + const char *extra_val) { + int n = snprintf(buf, bufsz, + "{\"target_project\":\"%s\",\"target_function\":\"%s\"," + "\"target_file\":\"%s\"", + target_project ? target_project : "", target_function ? target_function : "", + target_file ? target_file : ""); + if (url_or_channel && url_or_channel[0]) { + n += snprintf(buf + n, bufsz - (size_t)n, ",\"%s\":\"%s\"", + extra_key ? extra_key : "url_path", url_or_channel); + } + if (extra_val && extra_val[0]) { + n += snprintf(buf + n, bufsz - (size_t)n, ",\"%s\":\"%s\"", + extra_key ? "transport" : "method", extra_val); + } + snprintf(buf + n, bufsz - (size_t)n, "}"); +} + +/* Delete all CROSS_* edges for a project from a store. */ +static void delete_cross_edges(cbm_store_t *store, const char *project) { + cbm_store_delete_edges_by_type(store, project, "CROSS_HTTP_CALLS"); + cbm_store_delete_edges_by_type(store, project, "CROSS_ASYNC_CALLS"); + cbm_store_delete_edges_by_type(store, project, "CROSS_CHANNEL"); + cbm_store_delete_edges_by_type(store, project, "CROSS_GRPC_CALLS"); + cbm_store_delete_edges_by_type(store, project, "CROSS_GRAPHQL_CALLS"); + cbm_store_delete_edges_by_type(store, project, "CROSS_TRPC_CALLS"); +} + +/* Insert a CROSS_* edge into a store. */ +static void insert_cross_edge(cbm_store_t *store, const char *project, int64_t from_id, + int64_t to_id, const char *edge_type, const char *props) { + cbm_edge_t edge = { + .project = project, + .source_id = from_id, + .target_id = to_id, + .type = edge_type, + .properties_json = props, + }; + cbm_store_insert_edge(store, &edge); +} + +/* Look up a node's name and file_path by id. */ +static void lookup_node_info(struct sqlite3 *db, int64_t node_id, char *name_out, size_t name_sz, + char *file_out, size_t file_sz) { + name_out[0] = '\0'; + file_out[0] = '\0'; + sqlite3_stmt *st = NULL; + if (sqlite3_prepare_v2(db, "SELECT name, file_path FROM nodes WHERE id = ?1", CBM_NOT_FOUND, + &st, NULL) != SQLITE_OK) { + return; + } + sqlite3_bind_int64(st, SKIP_ONE, node_id); + if (sqlite3_step(st) == SQLITE_ROW) { + const char *nm = (const char *)sqlite3_column_text(st, 0); + const char *fp = (const char *)sqlite3_column_text(st, SKIP_ONE); + if (nm) { + snprintf(name_out, name_sz, "%s", nm); + } + if (fp) { + snprintf(file_out, file_sz, "%s", fp); + } + } + sqlite3_finalize(st); +} + +/* ── Phase A: HTTP Route matching ────────────────────────────────── */ + +/* Find a Route node in target_store by QN and return the handler function's + * node id, name, and file_path via HANDLES edges. Returns 0 if not found. */ +static int64_t find_route_handler(cbm_store_t *target_store, const char *route_qn, + char *handler_name, size_t name_sz, char *handler_file, + size_t file_sz) { + handler_name[0] = '\0'; + handler_file[0] = '\0'; + struct sqlite3 *db = cbm_store_get_db(target_store); + if (!db) { + return 0; + } + + /* Find Route node by QN */ + sqlite3_stmt *s = NULL; + if (sqlite3_prepare_v2( + db, "SELECT id FROM nodes WHERE qualified_name = ?1 AND label = 'Route' LIMIT 1", + CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { + return 0; + } + sqlite3_bind_text(s, SKIP_ONE, route_qn, CBM_NOT_FOUND, SQLITE_STATIC); + int64_t route_id = 0; + if (sqlite3_step(s) == SQLITE_ROW) { + route_id = sqlite3_column_int64(s, 0); + } + sqlite3_finalize(s); + if (route_id == 0) { + return 0; + } + + /* Follow HANDLES edge to find the handler function */ + if (sqlite3_prepare_v2(db, + "SELECT n.id, n.name, n.file_path FROM edges e " + "JOIN nodes n ON n.id = e.source_id " + "WHERE e.target_id = ?1 AND e.type = 'HANDLES' LIMIT 1", + CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { + return 0; + } + sqlite3_bind_int64(s, SKIP_ONE, route_id); + int64_t handler_id = 0; + if (sqlite3_step(s) == SQLITE_ROW) { + handler_id = sqlite3_column_int64(s, 0); + const char *n = (const char *)sqlite3_column_text(s, SKIP_ONE); + const char *f = (const char *)sqlite3_column_text(s, PAIR_LEN); + if (n) { + snprintf(handler_name, name_sz, "%s", n); + } + if (f) { + snprintf(handler_file, file_sz, "%s", f); + } + } + sqlite3_finalize(s); + return handler_id; +} + +/* Fuzzy route lookup: scan all Route nodes in target and return the first + * whose path template matches the concrete norm_path from the consumer. + * Fills route_qn_out with the matched QN (for edge emission). */ +static int64_t find_route_handler_fuzzy(cbm_store_t *target_store, + const char *norm_path, const char *method, + char *route_qn_out, size_t route_qn_sz, + char *handler_name, size_t name_sz, + char *handler_file, size_t file_sz) { + handler_name[0] = '\0'; + handler_file[0] = '\0'; + struct sqlite3 *db = cbm_store_get_db(target_store); + if (!db) return 0; + + sqlite3_stmt *s = NULL; + if (sqlite3_prepare_v2(db, + "SELECT id, qualified_name, name FROM nodes WHERE label = 'Route'", + CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { + return 0; + } + + int64_t matched_route_id = 0; + while (sqlite3_step(s) == SQLITE_ROW) { + int64_t rid = sqlite3_column_int64(s, 0); + const char *qn = (const char *)sqlite3_column_text(s, SKIP_ONE); + const char *name = (const char *)sqlite3_column_text(s, PAIR_LEN); + if (!qn || !name) continue; + + /* QN format: __route__METHOD__/path */ + const char *after_prefix = strstr(qn, "__route__"); + if (!after_prefix) continue; + after_prefix += 9; /* skip "__route__" */ + const char *second_sep = strstr(after_prefix, "__"); + if (!second_sep) continue; + const char *tmpl_path = second_sep + 2; + + /* method filter: skip mismatched methods, always accept ANY */ + if (method) { + size_t mlen = (size_t)(second_sep - after_prefix); + char qn_method[CBM_SZ_32] = {0}; + if (mlen >= sizeof(qn_method)) mlen = sizeof(qn_method) - 1; + strncpy(qn_method, after_prefix, mlen); + if (strcmp(qn_method, "ANY") != 0 && strcmp(qn_method, method) != 0) continue; + } + + if (cr_path_matches_template(norm_path, tmpl_path)) { + matched_route_id = rid; + snprintf(route_qn_out, route_qn_sz, "%s", qn); + break; + } + } + sqlite3_finalize(s); + if (matched_route_id == 0) return 0; + + /* Follow HANDLES edge to find the handler function */ + if (sqlite3_prepare_v2(db, + "SELECT n.id, n.name, n.file_path FROM edges e " + "JOIN nodes n ON n.id = e.source_id " + "WHERE e.target_id = ?1 AND e.type = 'HANDLES' LIMIT 1", + CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { + return 0; + } + sqlite3_bind_int64(s, SKIP_ONE, matched_route_id); + int64_t handler_id = 0; + if (sqlite3_step(s) == SQLITE_ROW) { + handler_id = sqlite3_column_int64(s, 0); + const char *n = (const char *)sqlite3_column_text(s, SKIP_ONE); + const char *f = (const char *)sqlite3_column_text(s, PAIR_LEN); + if (n) snprintf(handler_name, name_sz, "%s", n); + if (f) snprintf(handler_file, file_sz, "%s", f); + } + sqlite3_finalize(s); + return handler_id; +} + +/* Emit CROSS_* edge for a route match: forward into source, reverse into target. */ +static void emit_cross_route_bidirectional(cbm_store_t *src_store, const char *src_project, + struct sqlite3 *src_db, int64_t caller_id, + int64_t local_route_id, cbm_store_t *tgt_store, + const char *tgt_project, int64_t handler_id, + const char *route_qn, const char *handler_name, + const char *handler_file, const char *url_path, + const char *method, const char *edge_type) { + /* Forward: caller → local Route in source DB */ + char fwd[CR_PROPS_BUF]; + build_cross_props(fwd, sizeof(fwd), tgt_project, handler_name, handler_file, url_path, + "url_path", method); + insert_cross_edge(src_store, src_project, caller_id, local_route_id, edge_type, fwd); + + /* Reverse: handler → Route in target DB */ + struct sqlite3 *tgt_db = cbm_store_get_db(tgt_store); + if (!tgt_db) { + return; + } + sqlite3_stmt *rq = NULL; + if (sqlite3_prepare_v2(tgt_db, "SELECT id FROM nodes WHERE qualified_name = ?1 LIMIT 1", + CBM_NOT_FOUND, &rq, NULL) != SQLITE_OK) { + return; + } + sqlite3_bind_text(rq, SKIP_ONE, route_qn, CBM_NOT_FOUND, SQLITE_STATIC); + int64_t tgt_route_id = 0; + if (sqlite3_step(rq) == SQLITE_ROW) { + tgt_route_id = sqlite3_column_int64(rq, 0); + } + sqlite3_finalize(rq); + if (tgt_route_id == 0) { + return; + } + + char caller_name[CBM_SZ_256] = {0}; + char caller_file[CBM_SZ_512] = {0}; + lookup_node_info(src_db, caller_id, caller_name, sizeof(caller_name), caller_file, + sizeof(caller_file)); + + char rev[CR_PROPS_BUF]; + build_cross_props(rev, sizeof(rev), src_project, caller_name, caller_file, url_path, "url_path", + method); + insert_cross_edge(tgt_store, tgt_project, handler_id, tgt_route_id, edge_type, rev); +} + +static int match_http_routes(cbm_store_t *src_store, const char *src_project, + cbm_store_t *tgt_store, const char *tgt_project) { + struct sqlite3 *src_db = cbm_store_get_db(src_store); + if (!src_db) { + return 0; + } + + /* Find all HTTP_CALLS edges in source project */ + sqlite3_stmt *s = NULL; + if (sqlite3_prepare_v2(src_db, + "SELECT e.source_id, e.target_id, e.properties FROM edges e " + "WHERE e.project = ?1 AND e.type = 'HTTP_CALLS'", + CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { + return 0; + } + sqlite3_bind_text(s, SKIP_ONE, src_project, CBM_NOT_FOUND, SQLITE_STATIC); + + int count = 0; + while (sqlite3_step(s) == SQLITE_ROW && count < CR_MAX_EDGES) { + int64_t caller_id = sqlite3_column_int64(s, 0); + int64_t route_id = sqlite3_column_int64(s, SKIP_ONE); + const char *props = (const char *)sqlite3_column_text(s, PAIR_LEN); + + char url_path[CBM_SZ_256] = {0}; + char method[CBM_SZ_32] = {0}; + json_str_prop(props, "url_path", url_path, sizeof(url_path)); + json_str_prop(props, "method", method, sizeof(method)); + if (!url_path[0]) { + continue; + } + + /* Normalise: strip scheme+host+port so a full URL consumer-side path + * ("http://svc:8080/v2/orders/123") can match a bare provider route + * ("/v2/orders/{id}"). Issue #523. */ + const char *norm_path = cr_url_path(url_path); + + /* Build the expected Route QN in the target project */ + char route_qn[CR_QN_BUF]; + snprintf(route_qn, sizeof(route_qn), "__route__%s__%s", method[0] ? method : "ANY", + norm_path); + + char handler_name[CBM_SZ_256] = {0}; + char handler_file[CBM_SZ_512] = {0}; + int64_t handler_id = + find_route_handler(tgt_store, route_qn, handler_name, sizeof(handler_name), + handler_file, sizeof(handler_file)); + if (handler_id == 0) { + /* Try without method (ANY) */ + snprintf(route_qn, sizeof(route_qn), "__route__ANY__%s", norm_path); + handler_id = find_route_handler(tgt_store, route_qn, handler_name, sizeof(handler_name), + handler_file, sizeof(handler_file)); + } + if (handler_id == 0) { + /* Exact QN lookup missed — try path-param template matching. + * Handles concrete vs template mismatch ("123" vs "{id}"). Issue #523. */ + handler_id = find_route_handler_fuzzy(tgt_store, norm_path, + method[0] ? method : NULL, + route_qn, sizeof(route_qn), + handler_name, sizeof(handler_name), + handler_file, sizeof(handler_file)); + } + if (handler_id == 0) { + continue; + } + + emit_cross_route_bidirectional(src_store, src_project, src_db, caller_id, route_id, + tgt_store, tgt_project, handler_id, route_qn, handler_name, + handler_file, url_path, method, "CROSS_HTTP_CALLS"); + + count++; + } + sqlite3_finalize(s); + return count; +} + +/* ── Phase B: Async matching ─────────────────────────────────────── */ + +static int match_async_routes(cbm_store_t *src_store, const char *src_project, + cbm_store_t *tgt_store, const char *tgt_project) { + struct sqlite3 *src_db = cbm_store_get_db(src_store); + if (!src_db) { + return 0; + } + + sqlite3_stmt *s = NULL; + if (sqlite3_prepare_v2(src_db, + "SELECT e.source_id, e.target_id, e.properties FROM edges e " + "WHERE e.project = ?1 AND e.type = 'ASYNC_CALLS'", + CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { + return 0; + } + sqlite3_bind_text(s, SKIP_ONE, src_project, CBM_NOT_FOUND, SQLITE_STATIC); + + int count = 0; + while (sqlite3_step(s) == SQLITE_ROW && count < CR_MAX_EDGES) { + int64_t caller_id = sqlite3_column_int64(s, 0); + int64_t route_id = sqlite3_column_int64(s, SKIP_ONE); + const char *props = (const char *)sqlite3_column_text(s, PAIR_LEN); + + char url_path[CBM_SZ_256] = {0}; + char broker[CBM_SZ_128] = {0}; + json_str_prop(props, "url_path", url_path, sizeof(url_path)); + json_str_prop(props, "broker", broker, sizeof(broker)); + if (!url_path[0]) { + continue; + } + + char route_qn[CR_QN_BUF]; + snprintf(route_qn, sizeof(route_qn), "__route__%s__%s", broker[0] ? broker : "async", + url_path); + + char handler_name[CBM_SZ_256] = {0}; + char handler_file[CBM_SZ_512] = {0}; + int64_t handler_id = + find_route_handler(tgt_store, route_qn, handler_name, sizeof(handler_name), + handler_file, sizeof(handler_file)); + if (handler_id == 0) { + continue; + } + + char edge_props[CR_PROPS_BUF]; + build_cross_props(edge_props, sizeof(edge_props), tgt_project, handler_name, handler_file, + url_path, "url_path", broker); + insert_cross_edge(src_store, src_project, caller_id, route_id, "CROSS_ASYNC_CALLS", + edge_props); + count++; + } + sqlite3_finalize(s); + return count; +} + +/* ── Phase C: Channel matching ───────────────────────────────────── */ + +/* Try to find a matching listener in target DB for a channel name. */ +static bool try_match_channel_listener(cbm_store_t *src_store, const char *src_project, + cbm_store_t *tgt_store, const char *tgt_project, + const char *channel_name, const char *transport, + int64_t emitter_id, int64_t channel_id) { + struct sqlite3 *tgt_db = cbm_store_get_db(tgt_store); + if (!tgt_db) { + return false; + } + sqlite3_stmt *tq = NULL; + if (sqlite3_prepare_v2(tgt_db, + "SELECT n.id, e.source_id, fn.name, fn.file_path FROM nodes n " + "JOIN edges e ON e.target_id = n.id AND e.type = 'LISTENS_ON' " + "JOIN nodes fn ON fn.id = e.source_id " + "WHERE n.project = ?1 AND n.name = ?2 AND n.label = 'Channel' LIMIT 1", + CBM_NOT_FOUND, &tq, NULL) != SQLITE_OK) { + return false; + } + sqlite3_bind_text(tq, SKIP_ONE, tgt_project, CBM_NOT_FOUND, SQLITE_STATIC); + sqlite3_bind_text(tq, PAIR_LEN, channel_name, CBM_NOT_FOUND, SQLITE_STATIC); + + bool matched = false; + if (sqlite3_step(tq) == SQLITE_ROW) { + int64_t tgt_channel_id = sqlite3_column_int64(tq, 0); + int64_t listener_id = sqlite3_column_int64(tq, SKIP_ONE); + const char *listener_name = (const char *)sqlite3_column_text(tq, PAIR_LEN); + const char *listener_file = (const char *)sqlite3_column_text(tq, CR_COL_3); + + /* Forward edge: emitter → local Channel */ + char fwd[CR_PROPS_BUF]; + build_cross_props(fwd, sizeof(fwd), tgt_project, listener_name ? listener_name : "", + listener_file ? listener_file : "", channel_name, "channel_name", + transport); + insert_cross_edge(src_store, src_project, emitter_id, channel_id, "CROSS_CHANNEL", fwd); + + /* Reverse edge: listener → target Channel */ + char caller_name[CBM_SZ_256] = {0}; + char caller_file[CBM_SZ_512] = {0}; + lookup_node_info(cbm_store_get_db(src_store), emitter_id, caller_name, sizeof(caller_name), + caller_file, sizeof(caller_file)); + + char rev[CR_PROPS_BUF]; + build_cross_props(rev, sizeof(rev), src_project, caller_name, caller_file, channel_name, + "channel_name", transport); + insert_cross_edge(tgt_store, tgt_project, listener_id, tgt_channel_id, "CROSS_CHANNEL", + rev); + matched = true; + } + sqlite3_finalize(tq); + return matched; +} + +static int match_channels(cbm_store_t *src_store, const char *src_project, cbm_store_t *tgt_store, + const char *tgt_project) { + struct sqlite3 *src_db = cbm_store_get_db(src_store); + if (!src_db) { + return 0; + } + + sqlite3_stmt *s = NULL; + if (sqlite3_prepare_v2(src_db, + "SELECT DISTINCT n.id, n.name, n.qualified_name, n.properties, " + "e.source_id FROM nodes n " + "JOIN edges e ON e.target_id = n.id AND e.type = 'EMITS' " + "WHERE n.project = ?1 AND n.label = 'Channel'", + CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { + return 0; + } + sqlite3_bind_text(s, SKIP_ONE, src_project, CBM_NOT_FOUND, SQLITE_STATIC); + + int count = 0; + while (sqlite3_step(s) == SQLITE_ROW && count < CR_MAX_EDGES) { + const char *channel_name = (const char *)sqlite3_column_text(s, SKIP_ONE); + const char *channel_qn = (const char *)sqlite3_column_text(s, PAIR_LEN); + if (!channel_name || !channel_qn) { + continue; + } + int64_t channel_id = sqlite3_column_int64(s, 0); + const char *channel_props = (const char *)sqlite3_column_text(s, CR_COL_3); + int64_t emitter_id = sqlite3_column_int64(s, CR_COL_4); + + char transport[CBM_SZ_64] = {0}; + json_str_prop(channel_props, "transport", transport, sizeof(transport)); + + if (try_match_channel_listener(src_store, src_project, tgt_store, tgt_project, channel_name, + transport, emitter_id, channel_id)) { + count++; + } + } + sqlite3_finalize(s); + return count; +} + +/* ── Phase D: Generic route-type matcher (gRPC, GraphQL, tRPC) ──── */ + +/* Look up a node's qualified_name by id. Returns true if found. */ +static bool lookup_node_qn(struct sqlite3 *db, int64_t node_id, char *out, size_t out_sz) { + out[0] = '\0'; + sqlite3_stmt *st = NULL; + if (sqlite3_prepare_v2(db, "SELECT qualified_name FROM nodes WHERE id = ?1", CBM_NOT_FOUND, &st, + NULL) != SQLITE_OK) { + return false; + } + sqlite3_bind_int64(st, SKIP_ONE, node_id); + bool found = false; + if (sqlite3_step(st) == SQLITE_ROW) { + const char *qn = (const char *)sqlite3_column_text(st, 0); + if (qn) { + snprintf(out, out_sz, "%s", qn); + found = true; + } + } + sqlite3_finalize(st); + return found; +} + +/* Match edges of a given type against Route nodes with a given QN prefix. + * Reuses the same infrastructure as HTTP/async matching. */ +static int match_typed_routes(cbm_store_t *src_store, const char *src_project, + cbm_store_t *tgt_store, const char *tgt_project, + const char *edge_type, const char *svc_key, const char *method_key, + const char *cross_edge_type) { + struct sqlite3 *src_db = cbm_store_get_db(src_store); + if (!src_db) { + return 0; + } + + char sql[CBM_SZ_256]; + snprintf(sql, sizeof(sql), + "SELECT e.source_id, e.target_id, e.properties FROM edges e " + "WHERE e.project = ?1 AND e.type = '%s'", + edge_type); + + sqlite3_stmt *s = NULL; + if (sqlite3_prepare_v2(src_db, sql, CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { + return 0; + } + sqlite3_bind_text(s, SKIP_ONE, src_project, CBM_NOT_FOUND, SQLITE_STATIC); + + int count = 0; + while (sqlite3_step(s) == SQLITE_ROW && count < CR_MAX_EDGES) { + int64_t caller_id = sqlite3_column_int64(s, 0); + int64_t route_id = sqlite3_column_int64(s, SKIP_ONE); + const char *props = (const char *)sqlite3_column_text(s, PAIR_LEN); + + char svc_val[CBM_SZ_256] = {0}; + char meth_val[CBM_SZ_256] = {0}; + json_str_prop(props, svc_key, svc_val, sizeof(svc_val)); + json_str_prop(props, method_key, meth_val, sizeof(meth_val)); + if (!svc_val[0] && !meth_val[0]) { + continue; + } + + /* Look up the Route QN from the target node (already points to the Route). */ + char route_qn[CR_QN_BUF] = {0}; + if (!lookup_node_qn(src_db, route_id, route_qn, sizeof(route_qn))) { + continue; + } + + char handler_name[CBM_SZ_256] = {0}; + char handler_file[CBM_SZ_512] = {0}; + int64_t handler_id = + find_route_handler(tgt_store, route_qn, handler_name, sizeof(handler_name), + handler_file, sizeof(handler_file)); + if (handler_id == 0) { + continue; + } + + emit_cross_route_bidirectional(src_store, src_project, src_db, caller_id, route_id, + tgt_store, tgt_project, handler_id, route_qn, handler_name, + handler_file, svc_val, svc_key, cross_edge_type); + count++; + } + sqlite3_finalize(s); + return count; +} + +/* ── Collect target projects ─────────────────────────────────────── */ + +/* When target_projects = ["*"], scan the cache directory for all .db files. */ +static int collect_all_projects(char ***out) { + const char *dir = cr_cache_dir(); + cbm_dir_t *d = cbm_opendir(dir); + if (!d) { + *out = NULL; + return 0; + } + + int cap = CR_INIT_CAP; + int count = 0; + char **projects = malloc((size_t)cap * sizeof(char *)); + + cbm_dirent_t *ent; + while ((ent = cbm_readdir(d)) != NULL) { + size_t len = strlen(ent->name); + if (len < CR_COL_4 || strcmp(ent->name + len - CR_DB_EXT_LEN, ".db") != 0) { + continue; + } + if (strstr(ent->name, "_cross_repo") || strstr(ent->name, "_config")) { + continue; + } + if (strstr(ent->name, "-wal") || strstr(ent->name, "-shm")) { + continue; + } + if (count >= cap) { + cap *= PAIR_LEN; + char **tmp = realloc(projects, (size_t)cap * sizeof(char *)); + if (!tmp) { + break; + } + projects = tmp; + } + /* Strip .db extension */ + projects[count] = malloc(len - PAIR_LEN); + memcpy(projects[count], ent->name, len - CR_DB_EXT_LEN); + projects[count][len - CR_DB_EXT_LEN] = '\0'; + count++; + } + cbm_closedir(d); + + *out = projects; + return count; +} + +static void free_project_list(char **projects, int count) { + for (int i = 0; i < count; i++) { + free(projects[i]); + } + free(projects); +} + +/* ── Entry point ─────────────────────────────────────────────────── */ + +cbm_cross_repo_result_t cbm_cross_repo_match(const char *project, const char **target_projects, + int target_count) { + cbm_cross_repo_result_t result = {0}; + struct timespec t0; + clock_gettime(CLOCK_MONOTONIC, &t0); + + /* Open source project store (read-write) */ + char src_path[CR_PATH_BUF]; + cr_db_path(project, src_path, sizeof(src_path)); + cbm_store_t *src_store = cbm_store_open_path(src_path); + if (!src_store) { + return result; + } + + /* Clean existing CROSS_* edges for this project */ + delete_cross_edges(src_store, project); + + /* Resolve target projects */ + char **resolved = NULL; + int resolved_count = 0; + bool own_list = false; + + if (target_count == SKIP_ONE && strcmp(target_projects[0], "*") == 0) { + resolved_count = collect_all_projects(&resolved); + own_list = true; + } else { + resolved = (char **)target_projects; + resolved_count = target_count; + } + + /* Match against each target */ + for (int i = 0; i < resolved_count; i++) { + const char *tgt = resolved[i]; + if (strcmp(tgt, project) == 0) { + continue; /* skip self */ + } + + char tgt_path[CR_PATH_BUF]; + cr_db_path(tgt, tgt_path, sizeof(tgt_path)); + + /* Open target store read-write (for bidirectional edge writes) */ + cbm_store_t *tgt_store = cbm_store_open_path(tgt_path); + if (!tgt_store) { + continue; + } + + result.http_edges += match_http_routes(src_store, project, tgt_store, tgt); + result.http_edges += match_http_routes(tgt_store, tgt, src_store, project); + result.async_edges += match_async_routes(src_store, project, tgt_store, tgt); + result.channel_edges += match_channels(src_store, project, tgt_store, tgt); + result.grpc_edges += match_typed_routes(src_store, project, tgt_store, tgt, "GRPC_CALLS", + "service", "method", "CROSS_GRPC_CALLS"); + result.graphql_edges += + match_typed_routes(src_store, project, tgt_store, tgt, "GRAPHQL_CALLS", "operation", + "operation", "CROSS_GRAPHQL_CALLS"); + result.trpc_edges += match_typed_routes(src_store, project, tgt_store, tgt, "TRPC_CALLS", + "procedure", "procedure", "CROSS_TRPC_CALLS"); + result.projects_scanned++; + + cbm_store_close(tgt_store); + } + + cbm_store_close(src_store); + + if (own_list) { + free_project_list(resolved, resolved_count); + } + + struct timespec t1; + clock_gettime(CLOCK_MONOTONIC, &t1); + result.elapsed_ms = ((double)(t1.tv_sec - t0.tv_sec) * CR_MS_PER_SEC) + + ((double)(t1.tv_nsec - t0.tv_nsec) / CR_NS_PER_MS); + + int total = result.http_edges + result.async_edges + result.channel_edges + result.grpc_edges + + result.graphql_edges + result.trpc_edges; + cbm_log_info("cross_repo.done", "project", project, "total", cr_itoa(total)); + + return result; +} From 805f941e1a8ef30a3723a9ba2347ef08bc11c2ee Mon Sep 17 00:00:00 2001 From: RithvikReddy0-0 Date: Sat, 20 Jun 2026 14:25:52 +0000 Subject: [PATCH 2/5] style: apply clang-format and remove hardcoded URL from comments Signed-off-by: RithvikReddy0-0 --- src/pipeline/pass_calls.c | 4 +- src/pipeline/pass_cross_repo.c | 88 +++++++++++++++++++--------------- 2 files changed, 51 insertions(+), 41 deletions(-) diff --git a/src/pipeline/pass_calls.c b/src/pipeline/pass_calls.c index 8fe9aeacb..813ee438f 100644 --- a/src/pipeline/pass_calls.c +++ b/src/pipeline/pass_calls.c @@ -369,8 +369,8 @@ static int resolve_single_call(cbm_pipeline_ctx_t *ctx, CBMCall *call, const cbm_gbuf_node_t *target_node = cbm_gbuf_find_by_qn(ctx->gbuf, res.qualified_name); if (!target_node || source_node->id == target_node->id) { cbm_svc_kind_t svc = cbm_service_pattern_match(res.qualified_name); - if ((svc == CBM_SVC_HTTP || svc == CBM_SVC_ASYNC) && - call->first_string_arg && call->first_string_arg[0] != '\0') { + if ((svc == CBM_SVC_HTTP || svc == CBM_SVC_ASYNC) && call->first_string_arg && + call->first_string_arg[0] != '\0') { emit_http_async_edge(ctx, call, source_node, source_node, &res, svc); } return 0; diff --git a/src/pipeline/pass_cross_repo.c b/src/pipeline/pass_cross_repo.c index e232eebad..fcc3e424d 100644 --- a/src/pipeline/pass_cross_repo.c +++ b/src/pipeline/pass_cross_repo.c @@ -85,12 +85,14 @@ static const char *json_str_prop(const char *json, const char *key, char *buf, s /* Strip scheme and authority from a URL, returning a pointer into the * original string at the start of the path component. - * "http://svc:8080/v2/orders/123" -> "/v2/orders/123" + * "http://hostname:port/v2/orders/123" -> "/v2/orders/123" * "/v2/orders/123" -> "/v2/orders/123" (already a path) */ static const char *cr_url_path(const char *url) { - if (!url) return url; + if (!url) + return url; const char *scheme_end = strstr(url, "://"); - if (!scheme_end) return url; + if (!scheme_end) + return url; const char *path_start = strchr(scheme_end + 3, '/'); return path_start ? path_start : url; } @@ -100,14 +102,19 @@ static const char *cr_url_path(const char *url) { * "/v2/orders/123" matches "/v2/orders/{id}" -> 1 * "/v2/orders/123" matches "/v2/orders/456" -> 0 */ static int cr_path_matches_template(const char *concrete, const char *tmpl) { - if (!concrete || !tmpl) return 0; + if (!concrete || !tmpl) + return 0; const char *c = concrete; const char *t = tmpl; while (*c || *t) { - while (*c == '/') c++; - while (*t == '/') t++; - if (!*c && !*t) return 1; - if (!*c || !*t) return 0; + while (*c == '/') + c++; + while (*t == '/') + t++; + if (!*c && !*t) + return 1; + if (!*c || !*t) + return 0; const char *ce = strchr(c, '/'); const char *te = strchr(t, '/'); size_t clen = ce ? (size_t)(ce - c) : strlen(c); @@ -122,8 +129,6 @@ static int cr_path_matches_template(const char *concrete, const char *tmpl) { return 1; } - - /* Build CROSS_* edge properties JSON. */ static void build_cross_props(char *buf, size_t bufsz, const char *target_project, const char *target_function, const char *target_file, @@ -251,45 +256,49 @@ static int64_t find_route_handler(cbm_store_t *target_store, const char *route_q /* Fuzzy route lookup: scan all Route nodes in target and return the first * whose path template matches the concrete norm_path from the consumer. * Fills route_qn_out with the matched QN (for edge emission). */ -static int64_t find_route_handler_fuzzy(cbm_store_t *target_store, - const char *norm_path, const char *method, - char *route_qn_out, size_t route_qn_sz, - char *handler_name, size_t name_sz, - char *handler_file, size_t file_sz) { +static int64_t find_route_handler_fuzzy(cbm_store_t *target_store, const char *norm_path, + const char *method, char *route_qn_out, size_t route_qn_sz, + char *handler_name, size_t name_sz, char *handler_file, + size_t file_sz) { handler_name[0] = '\0'; handler_file[0] = '\0'; struct sqlite3 *db = cbm_store_get_db(target_store); - if (!db) return 0; + if (!db) + return 0; sqlite3_stmt *s = NULL; - if (sqlite3_prepare_v2(db, - "SELECT id, qualified_name, name FROM nodes WHERE label = 'Route'", - CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { + if (sqlite3_prepare_v2(db, "SELECT id, qualified_name, name FROM nodes WHERE label = 'Route'", + CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { return 0; } int64_t matched_route_id = 0; while (sqlite3_step(s) == SQLITE_ROW) { - int64_t rid = sqlite3_column_int64(s, 0); - const char *qn = (const char *)sqlite3_column_text(s, SKIP_ONE); + int64_t rid = sqlite3_column_int64(s, 0); + const char *qn = (const char *)sqlite3_column_text(s, SKIP_ONE); const char *name = (const char *)sqlite3_column_text(s, PAIR_LEN); - if (!qn || !name) continue; + if (!qn || !name) + continue; /* QN format: __route__METHOD__/path */ const char *after_prefix = strstr(qn, "__route__"); - if (!after_prefix) continue; + if (!after_prefix) + continue; after_prefix += 9; /* skip "__route__" */ const char *second_sep = strstr(after_prefix, "__"); - if (!second_sep) continue; + if (!second_sep) + continue; const char *tmpl_path = second_sep + 2; /* method filter: skip mismatched methods, always accept ANY */ if (method) { size_t mlen = (size_t)(second_sep - after_prefix); char qn_method[CBM_SZ_32] = {0}; - if (mlen >= sizeof(qn_method)) mlen = sizeof(qn_method) - 1; + if (mlen >= sizeof(qn_method)) + mlen = sizeof(qn_method) - 1; strncpy(qn_method, after_prefix, mlen); - if (strcmp(qn_method, "ANY") != 0 && strcmp(qn_method, method) != 0) continue; + if (strcmp(qn_method, "ANY") != 0 && strcmp(qn_method, method) != 0) + continue; } if (cr_path_matches_template(norm_path, tmpl_path)) { @@ -299,14 +308,15 @@ static int64_t find_route_handler_fuzzy(cbm_store_t *target_store, } } sqlite3_finalize(s); - if (matched_route_id == 0) return 0; + if (matched_route_id == 0) + return 0; /* Follow HANDLES edge to find the handler function */ if (sqlite3_prepare_v2(db, - "SELECT n.id, n.name, n.file_path FROM edges e " - "JOIN nodes n ON n.id = e.source_id " - "WHERE e.target_id = ?1 AND e.type = 'HANDLES' LIMIT 1", - CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { + "SELECT n.id, n.name, n.file_path FROM edges e " + "JOIN nodes n ON n.id = e.source_id " + "WHERE e.target_id = ?1 AND e.type = 'HANDLES' LIMIT 1", + CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { return 0; } sqlite3_bind_int64(s, SKIP_ONE, matched_route_id); @@ -315,8 +325,10 @@ static int64_t find_route_handler_fuzzy(cbm_store_t *target_store, handler_id = sqlite3_column_int64(s, 0); const char *n = (const char *)sqlite3_column_text(s, SKIP_ONE); const char *f = (const char *)sqlite3_column_text(s, PAIR_LEN); - if (n) snprintf(handler_name, name_sz, "%s", n); - if (f) snprintf(handler_file, file_sz, "%s", f); + if (n) + snprintf(handler_name, name_sz, "%s", n); + if (f) + snprintf(handler_file, file_sz, "%s", f); } sqlite3_finalize(s); return handler_id; @@ -399,7 +411,7 @@ static int match_http_routes(cbm_store_t *src_store, const char *src_project, } /* Normalise: strip scheme+host+port so a full URL consumer-side path - * ("http://svc:8080/v2/orders/123") can match a bare provider route + * (e.g. "http://hostname:8080/v2/orders/123") can match a bare provider route * ("/v2/orders/{id}"). Issue #523. */ const char *norm_path = cr_url_path(url_path); @@ -422,11 +434,9 @@ static int match_http_routes(cbm_store_t *src_store, const char *src_project, if (handler_id == 0) { /* Exact QN lookup missed — try path-param template matching. * Handles concrete vs template mismatch ("123" vs "{id}"). Issue #523. */ - handler_id = find_route_handler_fuzzy(tgt_store, norm_path, - method[0] ? method : NULL, - route_qn, sizeof(route_qn), - handler_name, sizeof(handler_name), - handler_file, sizeof(handler_file)); + handler_id = find_route_handler_fuzzy( + tgt_store, norm_path, method[0] ? method : NULL, route_qn, sizeof(route_qn), + handler_name, sizeof(handler_name), handler_file, sizeof(handler_file)); } if (handler_id == 0) { continue; From 06bbe840f4b8d5d27d5b7c1ce485a697090b3857 Mon Sep 17 00:00:00 2001 From: RithvikReddy0-0 Date: Sat, 20 Jun 2026 15:42:07 +0000 Subject: [PATCH 3/5] style: remove hardcoded URLs from comments Signed-off-by: RithvikReddy0-0 --- src/pipeline/pass_cross_repo.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/pipeline/pass_cross_repo.c b/src/pipeline/pass_cross_repo.c index fcc3e424d..179d527a5 100644 --- a/src/pipeline/pass_cross_repo.c +++ b/src/pipeline/pass_cross_repo.c @@ -85,7 +85,6 @@ static const char *json_str_prop(const char *json, const char *key, char *buf, s /* Strip scheme and authority from a URL, returning a pointer into the * original string at the start of the path component. - * "http://hostname:port/v2/orders/123" -> "/v2/orders/123" * "/v2/orders/123" -> "/v2/orders/123" (already a path) */ static const char *cr_url_path(const char *url) { if (!url) @@ -411,7 +410,6 @@ static int match_http_routes(cbm_store_t *src_store, const char *src_project, } /* Normalise: strip scheme+host+port so a full URL consumer-side path - * (e.g. "http://hostname:8080/v2/orders/123") can match a bare provider route * ("/v2/orders/{id}"). Issue #523. */ const char *norm_path = cr_url_path(url_path); From 0a8a44fbf62a617681c91a109c469167730eae2b Mon Sep 17 00:00:00 2001 From: RithvikReddy0-0 Date: Sun, 21 Jun 2026 03:01:35 +0000 Subject: [PATCH 4/5] fix(calls): emit HTTP_CALLS for external clients that resolve to no QN (#523) The previous emit-without-target path sat after the empty-QN early return, so a genuinely external client (requests/axios not installed or vendored) bailed at the empty-QN return before reaching it. The import map is empty, cbm_registry_resolve returns no QN, and there was nothing for cbm_service_pattern_match to classify. Move the detection into the empty-QN branch and classify from the raw callee name (requests.get -> HTTP, GET) instead of the resolved QN. Verified without any vendored stub: HTTP_CALLS now fires and cross-repo links the call to the provider templated route (cross_http_calls: 1). Signed-off-by: RithvikReddy0-0 --- src/pipeline/pass_calls.c | 1247 +++++++++++++++++++------------------ 1 file changed, 627 insertions(+), 620 deletions(-) diff --git a/src/pipeline/pass_calls.c b/src/pipeline/pass_calls.c index 813ee438f..39939aa0d 100644 --- a/src/pipeline/pass_calls.c +++ b/src/pipeline/pass_calls.c @@ -1,620 +1,627 @@ -/* - * pass_calls.c — Resolve function/method calls into CALLS edges. - * - * For each discovered file: - * 1. Re-extract calls (cbm_extract_file) - * 2. Build per-file import map from IMPORTS edges in graph buffer - * 3. Resolve each call via registry (import_map → same_module → unique → suffix) - * 4. Create CALLS edges in graph buffer with confidence/strategy properties - * - * Depends on: pass_definitions having populated the registry and graph buffer - */ -#include "foundation/constants.h" - -enum { PC_RING = 4, PC_RING_MASK = 3, PC_SIG_SCAN = 15, PC_REGEX_GRP = 2 }; -#include "pipeline/pipeline.h" -#include -#include "pipeline/pipeline_internal.h" -#include "pipeline/lsp_resolve.h" -#include "graph_buffer/graph_buffer.h" -#include "foundation/log.h" -#include "foundation/compat.h" -#include "foundation/str_util.h" -#include "cbm.h" -#include "service_patterns.h" - -#include "foundation/compat_regex.h" - -#include -#include -#include -#include - -/* Read entire file into heap-allocated buffer. Caller must free(). */ -static char *read_file(const char *path, int *out_len) { - FILE *f = fopen(path, "rb"); - if (!f) { - return NULL; - } - - (void)fseek(f, 0, SEEK_END); - long size = ftell(f); - (void)fseek(f, 0, SEEK_SET); - - if (size <= 0 || size > (long)CBM_PERCENT * CBM_SZ_1K * CBM_SZ_1K) { - (void)fclose(f); - return NULL; - } - - /* +pad: tree-sitter lexer lookahead reads past EOF; keep it in-bounds */ - enum { CBM_TS_LOOKAHEAD_PAD = 16 }; - char *buf = malloc((size_t)size + CBM_TS_LOOKAHEAD_PAD); - if (!buf) { - (void)fclose(f); - return NULL; - } - - size_t nread = fread(buf, SKIP_ONE, size, f); - (void)fclose(f); - - if (nread > (size_t)size) { - nread = (size_t)size; - } - memset(buf + nread, 0, CBM_TS_LOOKAHEAD_PAD); - *out_len = (int)nread; - return buf; -} - -/* Format int for logging. Thread-safe via TLS. */ -static const char *itoa_log(int val) { - static CBM_TLS char bufs[PC_RING][CBM_SZ_32]; - static CBM_TLS int idx = 0; - int i = idx; - idx = (idx + SKIP_ONE) & PC_RING_MASK; - snprintf(bufs[i], sizeof(bufs[i]), "%d", val); - return bufs[i]; -} - -/* Build per-file import map from cached extraction result or graph buffer edges. - * Returns parallel arrays of (local_name, module_qn) pairs. Caller frees. */ -/* Parse "local_name":"value" from JSON properties string. Returns strdup'd key or NULL. */ -static char *extract_local_name_from_json(const char *props_json) { - if (!props_json) { - return NULL; - } - const char *start = strstr(props_json, "\"local_name\":\""); - if (!start) { - return NULL; - } - start += strlen("\"local_name\":\""); - const char *end = strchr(start, '"'); - if (!end || end <= start) { - return NULL; - } - return cbm_strndup(start, end - start); -} - -static int build_import_map(cbm_pipeline_ctx_t *ctx, const char *rel_path, - const CBMFileResult *result, const char ***out_keys, - const char ***out_vals, int *out_count) { - *out_keys = NULL; - *out_vals = NULL; - *out_count = 0; - - /* Fast path: build from cached extraction result (no JSON parsing) */ - if (result && result->imports.count > 0) { - const char **keys = calloc((size_t)result->imports.count, sizeof(const char *)); - const char **vals = calloc((size_t)result->imports.count, sizeof(const char *)); - int count = 0; - - for (int i = 0; i < result->imports.count; i++) { - const CBMImport *imp = &result->imports.items[i]; - if (!imp->local_name || !imp->local_name[0] || !imp->module_path) { - continue; - } - char *target_qn = cbm_pipeline_fqn_module(ctx->project_name, imp->module_path); - const cbm_gbuf_node_t *target = cbm_gbuf_find_by_qn(ctx->gbuf, target_qn); - free(target_qn); - if (!target) { - continue; - } - keys[count] = strdup(imp->local_name); - vals[count] = target->qualified_name; /* borrowed from gbuf */ - count++; - } - - *out_keys = keys; - *out_vals = vals; - *out_count = count; - return 0; - } - - /* Slow path: scan graph buffer IMPORTS edges + parse JSON properties */ - char *file_qn = cbm_pipeline_fqn_compute(ctx->project_name, rel_path, "__file__"); - const cbm_gbuf_node_t *file_node = cbm_gbuf_find_by_qn(ctx->gbuf, file_qn); - free(file_qn); - if (!file_node) { - return 0; - } - - const cbm_gbuf_edge_t **edges = NULL; - int edge_count = 0; - int rc = cbm_gbuf_find_edges_by_source_type(ctx->gbuf, file_node->id, "IMPORTS", &edges, - &edge_count); - if (rc != 0 || edge_count == 0) { - return 0; - } - - const char **keys = calloc(edge_count, sizeof(const char *)); - const char **vals = calloc(edge_count, sizeof(const char *)); - int count = 0; - - for (int i = 0; i < edge_count; i++) { - const cbm_gbuf_edge_t *e = edges[i]; - const cbm_gbuf_node_t *target = cbm_gbuf_find_by_id(ctx->gbuf, e->target_id); - if (!target) { - continue; - } - char *key = extract_local_name_from_json(e->properties_json); - if (key) { - keys[count] = key; - vals[count] = target->qualified_name; - count++; - } - } - - *out_keys = keys; - *out_vals = vals; - *out_count = count; - return 0; -} - -static void free_import_map(const char **keys, const char **vals, int count) { - if (keys) { - for (int i = 0; i < count; i++) { - free((void *)keys[i]); - } - free((void *)keys); - } - if (vals) { - free((void *)vals); - } -} - -/* Handle a route registration call: create Route node + HANDLES edge. */ -static void handle_route_registration(cbm_pipeline_ctx_t *ctx, const CBMCall *call, - const cbm_gbuf_node_t *source_node, const char *module_qn, - const char **imp_keys, const char **imp_vals, int imp_count) { - const char *method = cbm_service_pattern_route_method(call->callee_name); - char route_qn[CBM_ROUTE_QN_SIZE]; - snprintf(route_qn, sizeof(route_qn), "__route__%s__%s", method ? method : "ANY", - call->first_string_arg); - char route_props[CBM_SZ_256]; - snprintf(route_props, sizeof(route_props), "{\"method\":\"%s\"}", method ? method : "ANY"); - int64_t route_id = cbm_gbuf_upsert_node(ctx->gbuf, "Route", call->first_string_arg, route_qn, - "", 0, 0, route_props); - char esc_cn[CBM_SZ_256]; /* sliced source text: escape quotes/newlines */ - char esc_fa[CBM_SZ_256]; - cbm_json_escape(esc_cn, sizeof(esc_cn), call->callee_name); - cbm_json_escape(esc_fa, sizeof(esc_fa), call->first_string_arg); - char props[CBM_SZ_512]; - snprintf(props, sizeof(props), - "{\"callee\":\"%s\",\"url_path\":\"%s\",\"via\":\"route_registration\"}", esc_cn, - esc_fa); - cbm_gbuf_insert_edge(ctx->gbuf, source_node->id, route_id, "CALLS", props); - if (call->second_arg_name != NULL && call->second_arg_name[0] != '\0') { - cbm_resolution_t hres = cbm_registry_resolve(ctx->registry, call->second_arg_name, - module_qn, imp_keys, imp_vals, imp_count); - if (hres.qualified_name != NULL && hres.qualified_name[0] != '\0') { - const cbm_gbuf_node_t *handler = cbm_gbuf_find_by_qn(ctx->gbuf, hres.qualified_name); - if (handler != NULL) { - char hprops[CBM_SZ_1K]; /* must exceed escaped value + wrapper or snprintf cuts the - closing brace */ - char esc_h[CBM_SZ_512]; - cbm_json_escape(esc_h, sizeof(esc_h), hres.qualified_name); - snprintf(hprops, sizeof(hprops), "{\"handler\":\"%s\"}", esc_h); - cbm_gbuf_insert_edge(ctx->gbuf, handler->id, route_id, "HANDLES", hprops); - } - } - } -} - -/* Emit an HTTP/async route edge for a service call. */ -/* Build route QN and upsert Route node for HTTP/async edge. */ -static int64_t create_svc_route_node(cbm_pipeline_ctx_t *ctx, const char *url, cbm_svc_kind_t svc, - const char *method, const char *broker) { - char route_qn[CBM_ROUTE_QN_SIZE]; - const char *prefix; - if (svc == CBM_SVC_HTTP) { - prefix = method ? method : "ANY"; - } else { - prefix = broker ? broker : "async"; - } - snprintf(route_qn, sizeof(route_qn), "__route__%s__%s", prefix, url); - const char *rp; - if (svc == CBM_SVC_HTTP) { - rp = method ? method : "{}"; - } else { - rp = broker ? broker : "{}"; - } - return cbm_gbuf_upsert_node(ctx->gbuf, "Route", url, route_qn, "", 0, 0, rp); -} - -static void emit_http_async_edge(cbm_pipeline_ctx_t *ctx, const CBMCall *call, - const cbm_gbuf_node_t *source, const cbm_gbuf_node_t *target, - const cbm_resolution_t *res, cbm_svc_kind_t svc) { - const char *url_or_topic = call->first_string_arg; - bool is_url = (url_or_topic && url_or_topic[0] != '\0' && - (url_or_topic[0] == '/' || strstr(url_or_topic, "://") != NULL)); - bool is_topic = (url_or_topic && url_or_topic[0] != '\0' && svc == CBM_SVC_ASYNC && - strlen(url_or_topic) > PAIR_LEN); - if (!is_url && !is_topic) { - char esc_callee[CBM_SZ_256]; - cbm_json_escape(esc_callee, sizeof(esc_callee), call->callee_name); - char props[CBM_SZ_512]; - snprintf(props, sizeof(props), - "{\"callee\":\"%s\",\"confidence\":%.2f,\"strategy\":\"%s\",\"candidates\":%d}", - esc_callee, res->confidence, res->strategy ? res->strategy : "unknown", - res->candidate_count); - cbm_gbuf_insert_edge(ctx->gbuf, source->id, target->id, "CALLS", props); - return; - } - const char *edge_type = (svc == CBM_SVC_HTTP) ? "HTTP_CALLS" : "ASYNC_CALLS"; - const char *method = - (svc == CBM_SVC_HTTP) ? cbm_service_pattern_http_method(call->callee_name) : NULL; - const char *broker = - (svc == CBM_SVC_ASYNC) ? cbm_service_pattern_broker(res->qualified_name) : NULL; - int64_t route_id = create_svc_route_node(ctx, url_or_topic, svc, method, broker); - char esc_callee[CBM_SZ_256]; - char esc_url[CBM_SZ_256]; - cbm_json_escape(esc_callee, sizeof(esc_callee), call->callee_name); - cbm_json_escape(esc_url, sizeof(esc_url), url_or_topic); - char props[CBM_SZ_512]; - snprintf(props, sizeof(props), "{\"callee\":\"%s\",\"url_path\":\"%s\"%s%s%s%s%s}", esc_callee, - esc_url, method ? ",\"method\":\"" : "", method ? method : "", method ? "\"" : "", - broker ? ",\"broker\":\"" : "", broker ? broker : ""); - if (broker) { - size_t plen = strlen(props); - if (plen > 0 && props[plen - SKIP_ONE] != '}') { - snprintf(props + plen - 1, sizeof(props) - plen + SKIP_ONE, "\"}"); - } - } - cbm_gbuf_insert_edge(ctx->gbuf, source->id, route_id, edge_type, props); -} - -/* Classify a resolved call and emit the appropriate edge. */ -static void emit_classified_edge(cbm_pipeline_ctx_t *ctx, const CBMCall *call, - const cbm_gbuf_node_t *source, const cbm_gbuf_node_t *target, - const cbm_resolution_t *res, const char *module_qn, - const char **imp_keys, const char **imp_vals, int imp_count) { - cbm_svc_kind_t svc = cbm_service_pattern_match(res->qualified_name); - if (svc == CBM_SVC_ROUTE_REG && call->first_string_arg && call->first_string_arg[0] == '/') { - handle_route_registration(ctx, call, source, module_qn, imp_keys, imp_vals, imp_count); - return; - } - if (svc == CBM_SVC_HTTP || svc == CBM_SVC_ASYNC) { - emit_http_async_edge(ctx, call, source, target, res, svc); - return; - } - if (svc == CBM_SVC_CONFIG) { - char esc_c[CBM_SZ_256]; - char esc_k[CBM_SZ_256]; - cbm_json_escape(esc_c, sizeof(esc_c), call->callee_name); - cbm_json_escape(esc_k, sizeof(esc_k), call->first_string_arg ? call->first_string_arg : ""); - char props[CBM_SZ_512]; - snprintf(props, sizeof(props), "{\"callee\":\"%s\",\"key\":\"%s\",\"confidence\":%.2f}", - esc_c, esc_k, res->confidence); - cbm_gbuf_insert_edge(ctx->gbuf, source->id, target->id, "CONFIGURES", props); - return; - } - char esc_c2[CBM_SZ_256]; - cbm_json_escape(esc_c2, sizeof(esc_c2), call->callee_name); - char props[CBM_SZ_512]; - snprintf(props, sizeof(props), - "{\"callee\":\"%s\",\"confidence\":%.2f,\"strategy\":\"%s\",\"candidates\":%d}", - esc_c2, res->confidence, res->strategy ? res->strategy : "unknown", - res->candidate_count); - cbm_gbuf_insert_edge(ctx->gbuf, source->id, target->id, "CALLS", props); -} - -/* Find source node for a call: enclosing function or file node. */ -static const cbm_gbuf_node_t *calls_find_source(cbm_pipeline_ctx_t *ctx, const char *rel, - const char *enclosing_qn) { - const cbm_gbuf_node_t *src = NULL; - if (enclosing_qn) { - src = cbm_gbuf_find_by_qn(ctx->gbuf, enclosing_qn); - } - if (!src) { - char *fqn = cbm_pipeline_fqn_compute(ctx->project_name, rel, "__file__"); - src = cbm_gbuf_find_by_qn(ctx->gbuf, fqn); - free(fqn); - } - return src; -} - -/* Resolve one call and emit the appropriate edge. Returns 1 if resolved, 0 if not. */ -static int resolve_single_call(cbm_pipeline_ctx_t *ctx, CBMCall *call, - const CBMResolvedCallArray *lsp_calls, const char *rel, - const char *module_qn, const char **imp_keys, const char **imp_vals, - int imp_count) { - const cbm_gbuf_node_t *source_node = calls_find_source(ctx, rel, call->enclosing_func_qn); - if (!source_node) { - return 0; - } - - /* LSP-resolved calls take precedence over registry-textual matching. */ - const CBMResolvedCall *lsp = cbm_pipeline_find_lsp_resolution(lsp_calls, call); - if (lsp) { - const cbm_gbuf_node_t *target_node = - cbm_pipeline_lsp_target_node(ctx->gbuf, ctx->project_name, lsp->callee_qn); - if (target_node && source_node->id != target_node->id) { - cbm_resolution_t res = {0}; - /* Use the gbuf node's QN so downstream edge props show the canonical - * project-qualified form even when fallback prefixed the project. */ - res.qualified_name = target_node->qualified_name; - res.confidence = lsp->confidence; - res.strategy = lsp->strategy; - res.candidate_count = 1; - emit_classified_edge(ctx, call, source_node, target_node, &res, module_qn, imp_keys, - imp_vals, imp_count); - return SKIP_ONE; - } - } - - cbm_resolution_t res = cbm_registry_resolve(ctx->registry, call->callee_name, module_qn, - imp_keys, imp_vals, imp_count); - if (!res.qualified_name || res.qualified_name[0] == '\0') { - return 0; - } - const cbm_gbuf_node_t *target_node = cbm_gbuf_find_by_qn(ctx->gbuf, res.qualified_name); - if (!target_node || source_node->id == target_node->id) { - cbm_svc_kind_t svc = cbm_service_pattern_match(res.qualified_name); - if ((svc == CBM_SVC_HTTP || svc == CBM_SVC_ASYNC) && call->first_string_arg && - call->first_string_arg[0] != '\0') { - emit_http_async_edge(ctx, call, source_node, source_node, &res, svc); - } - return 0; - } - emit_classified_edge(ctx, call, source_node, target_node, &res, module_qn, imp_keys, imp_vals, - imp_count); - return SKIP_ONE; -} - -static CBMFileResult *calls_get_or_extract(cbm_pipeline_ctx_t *ctx, int idx, - const cbm_file_info_t *fi, bool *owned) { - *owned = false; - if (ctx->result_cache && ctx->result_cache[idx]) { - return ctx->result_cache[idx]; - } - int slen = 0; - char *src = read_file(fi->path, &slen); - if (!src) { - return NULL; - } - CBMFileResult *r = cbm_extract_file(src, slen, fi->language, ctx->project_name, fi->rel_path, - CBM_EXTRACT_BUDGET, NULL, NULL); - free(src); - if (r) { - *owned = true; - } - return r; -} - -int cbm_pipeline_pass_calls(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, int file_count) { - cbm_log_info("pass.start", "pass", "calls", "files", itoa_log(file_count)); - - int total_calls = 0; - int resolved = 0; - int unresolved = 0; - int errors = 0; - - for (int i = 0; i < file_count; i++) { - if (cbm_pipeline_check_cancel(ctx)) { - return CBM_NOT_FOUND; - } - - const char *rel = files[i].rel_path; - bool result_owned = false; - CBMFileResult *result = calls_get_or_extract(ctx, i, &files[i], &result_owned); - if (!result) { - errors++; - continue; - } - - if (result->calls.count == 0) { - if (result_owned) { - cbm_free_result(result); - } - continue; - } - - /* Build import map for this file */ - const char **imp_keys = NULL; - const char **imp_vals = NULL; - int imp_count = 0; - build_import_map(ctx, rel, result, &imp_keys, &imp_vals, &imp_count); - - /* Compute module QN for same-module resolution */ - char *module_qn = cbm_pipeline_fqn_module(ctx->project_name, rel); - - /* Resolve each call */ - for (int c = 0; c < result->calls.count; c++) { - CBMCall *call = &result->calls.items[c]; - if (!call->callee_name) { - continue; - } - total_calls++; - if (resolve_single_call(ctx, call, &result->resolved_calls, rel, module_qn, imp_keys, - imp_vals, imp_count)) { - resolved++; - } else { - unresolved++; - } - } - - free(module_qn); - free_import_map(imp_keys, imp_vals, imp_count); - if (result_owned) { - cbm_free_result(result); - } - } - - cbm_log_info("pass.done", "pass", "calls", "total", itoa_log(total_calls), "resolved", - itoa_log(resolved), "unresolved", itoa_log(unresolved), "errors", - itoa_log(errors)); - - /* Additional pattern-based edge passes run after normal call resolution */ - cbm_pipeline_pass_fastapi_depends(ctx, files, file_count); - - return 0; -} - -/* ── FastAPI Depends() tracking ──────────────────────────────────── */ -/* Scans Python function signatures for Depends(func_ref) patterns and - * creates CALLS edges from the endpoint to the dependency function. - * Without this, FastAPI auth/DI functions appear as dead code (in_degree=0). */ - -/* Extract Python function signature text from source starting at given line. Caller frees. */ -static char *extract_py_signature(const char *source, int start_line, int end_line) { - int sig_end = start_line + PC_SIG_SCAN; - if (end_line > 0 && sig_end > end_line) { - sig_end = end_line; - } - const char *p = source; - int line = SKIP_ONE; - while (*p && line < start_line) { - if (*p == '\n') { - line++; - } - p++; - } - const char *sig_start = p; - while (*p && line < sig_end) { - if (*p == '\n') { - line++; - } - p++; - if (p > sig_start + SKIP_ONE && p[-SKIP_ONE] == ':' && p[-PAIR_LEN] == ')') { - break; - } - } - size_t sig_len = (size_t)(p - sig_start); - char *sig = malloc(sig_len + SKIP_ONE); - if (!sig) { - return NULL; - } - memcpy(sig, sig_start, sig_len); - sig[sig_len] = '\0'; - return sig; -} - -/* Scan one function's signature for Depends(func_ref) and create CALLS edges. */ -static int scan_depends_in_sig(cbm_pipeline_ctx_t *ctx, const cbm_regex_t *re, const char *sig, - const CBMDefinition *def, const char *module_qn, const char **ik, - const char **iv, int ic) { - int count = 0; - cbm_regmatch_t match[PC_REGEX_GRP]; - const char *scan = sig; - while (cbm_regexec(re, scan, PC_REGEX_GRP, match, 0) == 0) { - int ref_len = match[SKIP_ONE].rm_eo - match[SKIP_ONE].rm_so; - char func_ref[CBM_SZ_256]; - if (ref_len >= (int)sizeof(func_ref)) { - ref_len = (int)sizeof(func_ref) - SKIP_ONE; - } - memcpy(func_ref, scan + match[SKIP_ONE].rm_so, (size_t)ref_len); - func_ref[ref_len] = '\0'; - cbm_resolution_t res = cbm_registry_resolve(ctx->registry, func_ref, module_qn, ik, iv, ic); - if (res.qualified_name && res.qualified_name[0] != '\0') { - const cbm_gbuf_node_t *sn = cbm_gbuf_find_by_qn(ctx->gbuf, def->qualified_name); - const cbm_gbuf_node_t *tn = cbm_gbuf_find_by_qn(ctx->gbuf, res.qualified_name); - if (sn && tn && sn->id != tn->id) { - cbm_gbuf_insert_edge(ctx->gbuf, sn->id, tn->id, "CALLS", - "{\"confidence\":0.95,\"strategy\":\"fastapi_depends\"}"); - count++; - } - } - scan += match[0].rm_eo; - } - return count; -} - -static bool is_callable_def(const CBMDefinition *def) { - return def->qualified_name && def->start_line > 0 && def->label && - (strcmp(def->label, "Function") == 0 || strcmp(def->label, "Method") == 0); -} - -static bool file_has_depends_call(const CBMFileResult *result) { - for (int c = 0; c < result->calls.count; c++) { - if (result->calls.items[c].callee_name && - strcmp(result->calls.items[c].callee_name, "Depends") == 0) { - return true; - } - } - return false; -} - -void cbm_pipeline_pass_fastapi_depends(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, - int file_count) { - cbm_regex_t depends_re; - if (cbm_regcomp(&depends_re, "Depends\\(([A-Za-z_][A-Za-z0-9_.]*)", CBM_REG_EXTENDED) != 0) { - return; - } - - int edge_count = 0; - for (int i = 0; i < file_count; i++) { - if (files[i].language != CBM_LANG_PYTHON) { - continue; - } - if (cbm_pipeline_check_cancel(ctx)) { - break; - } - - CBMFileResult *result = ctx->result_cache ? ctx->result_cache[i] : NULL; - if (!result || !file_has_depends_call(result)) { - continue; - } - - /* Read source and scan for Depends(func_ref) in function signatures */ - int source_len = 0; - char *source = read_file(files[i].path, &source_len); - if (!source) { - continue; - } - - char *module_qn = cbm_pipeline_fqn_module(ctx->project_name, files[i].rel_path); - - /* Build import map for alias resolution */ - const char **imp_keys = NULL; - const char **imp_vals = NULL; - int imp_count = 0; - build_import_map(ctx, files[i].rel_path, result, &imp_keys, &imp_vals, &imp_count); - - for (int d = 0; d < result->defs.count; d++) { - CBMDefinition *def = &result->defs.items[d]; - if (!is_callable_def(def)) { - continue; - } - - char *sig = extract_py_signature(source, (int)def->start_line, (int)def->end_line); - if (!sig) { - continue; - } - - edge_count += scan_depends_in_sig(ctx, &depends_re, sig, def, module_qn, imp_keys, - imp_vals, imp_count); - free(sig); - } - - free(module_qn); - free_import_map(imp_keys, imp_vals, imp_count); - free(source); - } - - cbm_regfree(&depends_re); - if (edge_count > 0) { - cbm_log_info("pass.fastapi_depends", "edges", itoa_log(edge_count)); - } -} - -/* DLL resolve tracking removed — triggered Windows Defender false positive. - * See issue #89. */ +/* + * pass_calls.c — Resolve function/method calls into CALLS edges. + * + * For each discovered file: + * 1. Re-extract calls (cbm_extract_file) + * 2. Build per-file import map from IMPORTS edges in graph buffer + * 3. Resolve each call via registry (import_map → same_module → unique → suffix) + * 4. Create CALLS edges in graph buffer with confidence/strategy properties + * + * Depends on: pass_definitions having populated the registry and graph buffer + */ +#include "foundation/constants.h" + +enum { PC_RING = 4, PC_RING_MASK = 3, PC_SIG_SCAN = 15, PC_REGEX_GRP = 2 }; +#include "pipeline/pipeline.h" +#include +#include "pipeline/pipeline_internal.h" +#include "pipeline/lsp_resolve.h" +#include "graph_buffer/graph_buffer.h" +#include "foundation/log.h" +#include "foundation/compat.h" +#include "foundation/str_util.h" +#include "cbm.h" +#include "service_patterns.h" + +#include "foundation/compat_regex.h" + +#include +#include +#include +#include + +/* Read entire file into heap-allocated buffer. Caller must free(). */ +static char *read_file(const char *path, int *out_len) { + FILE *f = fopen(path, "rb"); + if (!f) { + return NULL; + } + + (void)fseek(f, 0, SEEK_END); + long size = ftell(f); + (void)fseek(f, 0, SEEK_SET); + + if (size <= 0 || size > (long)CBM_PERCENT * CBM_SZ_1K * CBM_SZ_1K) { + (void)fclose(f); + return NULL; + } + + /* +pad: tree-sitter lexer lookahead reads past EOF; keep it in-bounds */ + enum { CBM_TS_LOOKAHEAD_PAD = 16 }; + char *buf = malloc((size_t)size + CBM_TS_LOOKAHEAD_PAD); + if (!buf) { + (void)fclose(f); + return NULL; + } + + size_t nread = fread(buf, SKIP_ONE, size, f); + (void)fclose(f); + + if (nread > (size_t)size) { + nread = (size_t)size; + } + memset(buf + nread, 0, CBM_TS_LOOKAHEAD_PAD); + *out_len = (int)nread; + return buf; +} + +/* Format int for logging. Thread-safe via TLS. */ +static const char *itoa_log(int val) { + static CBM_TLS char bufs[PC_RING][CBM_SZ_32]; + static CBM_TLS int idx = 0; + int i = idx; + idx = (idx + SKIP_ONE) & PC_RING_MASK; + snprintf(bufs[i], sizeof(bufs[i]), "%d", val); + return bufs[i]; +} + +/* Build per-file import map from cached extraction result or graph buffer edges. + * Returns parallel arrays of (local_name, module_qn) pairs. Caller frees. */ +/* Parse "local_name":"value" from JSON properties string. Returns strdup'd key or NULL. */ +static char *extract_local_name_from_json(const char *props_json) { + if (!props_json) { + return NULL; + } + const char *start = strstr(props_json, "\"local_name\":\""); + if (!start) { + return NULL; + } + start += strlen("\"local_name\":\""); + const char *end = strchr(start, '"'); + if (!end || end <= start) { + return NULL; + } + return cbm_strndup(start, end - start); +} + +static int build_import_map(cbm_pipeline_ctx_t *ctx, const char *rel_path, + const CBMFileResult *result, const char ***out_keys, + const char ***out_vals, int *out_count) { + *out_keys = NULL; + *out_vals = NULL; + *out_count = 0; + + /* Fast path: build from cached extraction result (no JSON parsing) */ + if (result && result->imports.count > 0) { + const char **keys = calloc((size_t)result->imports.count, sizeof(const char *)); + const char **vals = calloc((size_t)result->imports.count, sizeof(const char *)); + int count = 0; + + for (int i = 0; i < result->imports.count; i++) { + const CBMImport *imp = &result->imports.items[i]; + if (!imp->local_name || !imp->local_name[0] || !imp->module_path) { + continue; + } + char *target_qn = cbm_pipeline_fqn_module(ctx->project_name, imp->module_path); + const cbm_gbuf_node_t *target = cbm_gbuf_find_by_qn(ctx->gbuf, target_qn); + free(target_qn); + if (!target) { + continue; + } + keys[count] = strdup(imp->local_name); + vals[count] = target->qualified_name; /* borrowed from gbuf */ + count++; + } + + *out_keys = keys; + *out_vals = vals; + *out_count = count; + return 0; + } + + /* Slow path: scan graph buffer IMPORTS edges + parse JSON properties */ + char *file_qn = cbm_pipeline_fqn_compute(ctx->project_name, rel_path, "__file__"); + const cbm_gbuf_node_t *file_node = cbm_gbuf_find_by_qn(ctx->gbuf, file_qn); + free(file_qn); + if (!file_node) { + return 0; + } + + const cbm_gbuf_edge_t **edges = NULL; + int edge_count = 0; + int rc = cbm_gbuf_find_edges_by_source_type(ctx->gbuf, file_node->id, "IMPORTS", &edges, + &edge_count); + if (rc != 0 || edge_count == 0) { + return 0; + } + + const char **keys = calloc(edge_count, sizeof(const char *)); + const char **vals = calloc(edge_count, sizeof(const char *)); + int count = 0; + + for (int i = 0; i < edge_count; i++) { + const cbm_gbuf_edge_t *e = edges[i]; + const cbm_gbuf_node_t *target = cbm_gbuf_find_by_id(ctx->gbuf, e->target_id); + if (!target) { + continue; + } + char *key = extract_local_name_from_json(e->properties_json); + if (key) { + keys[count] = key; + vals[count] = target->qualified_name; + count++; + } + } + + *out_keys = keys; + *out_vals = vals; + *out_count = count; + return 0; +} + +static void free_import_map(const char **keys, const char **vals, int count) { + if (keys) { + for (int i = 0; i < count; i++) { + free((void *)keys[i]); + } + free((void *)keys); + } + if (vals) { + free((void *)vals); + } +} + +/* Handle a route registration call: create Route node + HANDLES edge. */ +static void handle_route_registration(cbm_pipeline_ctx_t *ctx, const CBMCall *call, + const cbm_gbuf_node_t *source_node, const char *module_qn, + const char **imp_keys, const char **imp_vals, int imp_count) { + const char *method = cbm_service_pattern_route_method(call->callee_name); + char route_qn[CBM_ROUTE_QN_SIZE]; + snprintf(route_qn, sizeof(route_qn), "__route__%s__%s", method ? method : "ANY", + call->first_string_arg); + char route_props[CBM_SZ_256]; + snprintf(route_props, sizeof(route_props), "{\"method\":\"%s\"}", method ? method : "ANY"); + int64_t route_id = cbm_gbuf_upsert_node(ctx->gbuf, "Route", call->first_string_arg, route_qn, + "", 0, 0, route_props); + char esc_cn[CBM_SZ_256]; /* sliced source text: escape quotes/newlines */ + char esc_fa[CBM_SZ_256]; + cbm_json_escape(esc_cn, sizeof(esc_cn), call->callee_name); + cbm_json_escape(esc_fa, sizeof(esc_fa), call->first_string_arg); + char props[CBM_SZ_512]; + snprintf(props, sizeof(props), + "{\"callee\":\"%s\",\"url_path\":\"%s\",\"via\":\"route_registration\"}", esc_cn, + esc_fa); + cbm_gbuf_insert_edge(ctx->gbuf, source_node->id, route_id, "CALLS", props); + if (call->second_arg_name != NULL && call->second_arg_name[0] != '\0') { + cbm_resolution_t hres = cbm_registry_resolve(ctx->registry, call->second_arg_name, + module_qn, imp_keys, imp_vals, imp_count); + if (hres.qualified_name != NULL && hres.qualified_name[0] != '\0') { + const cbm_gbuf_node_t *handler = cbm_gbuf_find_by_qn(ctx->gbuf, hres.qualified_name); + if (handler != NULL) { + char hprops[CBM_SZ_1K]; /* must exceed escaped value + wrapper or snprintf cuts the + closing brace */ + char esc_h[CBM_SZ_512]; + cbm_json_escape(esc_h, sizeof(esc_h), hres.qualified_name); + snprintf(hprops, sizeof(hprops), "{\"handler\":\"%s\"}", esc_h); + cbm_gbuf_insert_edge(ctx->gbuf, handler->id, route_id, "HANDLES", hprops); + } + } + } +} + +/* Emit an HTTP/async route edge for a service call. */ +/* Build route QN and upsert Route node for HTTP/async edge. */ +static int64_t create_svc_route_node(cbm_pipeline_ctx_t *ctx, const char *url, cbm_svc_kind_t svc, + const char *method, const char *broker) { + char route_qn[CBM_ROUTE_QN_SIZE]; + const char *prefix; + if (svc == CBM_SVC_HTTP) { + prefix = method ? method : "ANY"; + } else { + prefix = broker ? broker : "async"; + } + snprintf(route_qn, sizeof(route_qn), "__route__%s__%s", prefix, url); + const char *rp; + if (svc == CBM_SVC_HTTP) { + rp = method ? method : "{}"; + } else { + rp = broker ? broker : "{}"; + } + return cbm_gbuf_upsert_node(ctx->gbuf, "Route", url, route_qn, "", 0, 0, rp); +} + +static void emit_http_async_edge(cbm_pipeline_ctx_t *ctx, const CBMCall *call, + const cbm_gbuf_node_t *source, const cbm_gbuf_node_t *target, + const cbm_resolution_t *res, cbm_svc_kind_t svc) { + const char *url_or_topic = call->first_string_arg; + bool is_url = (url_or_topic && url_or_topic[0] != '\0' && + (url_or_topic[0] == '/' || strstr(url_or_topic, "://") != NULL)); + bool is_topic = (url_or_topic && url_or_topic[0] != '\0' && svc == CBM_SVC_ASYNC && + strlen(url_or_topic) > PAIR_LEN); + if (!is_url && !is_topic) { + char esc_callee[CBM_SZ_256]; + cbm_json_escape(esc_callee, sizeof(esc_callee), call->callee_name); + char props[CBM_SZ_512]; + snprintf(props, sizeof(props), + "{\"callee\":\"%s\",\"confidence\":%.2f,\"strategy\":\"%s\",\"candidates\":%d}", + esc_callee, res->confidence, res->strategy ? res->strategy : "unknown", + res->candidate_count); + cbm_gbuf_insert_edge(ctx->gbuf, source->id, target->id, "CALLS", props); + return; + } + const char *edge_type = (svc == CBM_SVC_HTTP) ? "HTTP_CALLS" : "ASYNC_CALLS"; + const char *method = + (svc == CBM_SVC_HTTP) ? cbm_service_pattern_http_method(call->callee_name) : NULL; + const char *broker = + (svc == CBM_SVC_ASYNC) ? cbm_service_pattern_broker(res->qualified_name) : NULL; + int64_t route_id = create_svc_route_node(ctx, url_or_topic, svc, method, broker); + char esc_callee[CBM_SZ_256]; + char esc_url[CBM_SZ_256]; + cbm_json_escape(esc_callee, sizeof(esc_callee), call->callee_name); + cbm_json_escape(esc_url, sizeof(esc_url), url_or_topic); + char props[CBM_SZ_512]; + snprintf(props, sizeof(props), "{\"callee\":\"%s\",\"url_path\":\"%s\"%s%s%s%s%s}", esc_callee, + esc_url, method ? ",\"method\":\"" : "", method ? method : "", method ? "\"" : "", + broker ? ",\"broker\":\"" : "", broker ? broker : ""); + if (broker) { + size_t plen = strlen(props); + if (plen > 0 && props[plen - SKIP_ONE] != '}') { + snprintf(props + plen - 1, sizeof(props) - plen + SKIP_ONE, "\"}"); + } + } + cbm_gbuf_insert_edge(ctx->gbuf, source->id, route_id, edge_type, props); +} + +/* Classify a resolved call and emit the appropriate edge. */ +static void emit_classified_edge(cbm_pipeline_ctx_t *ctx, const CBMCall *call, + const cbm_gbuf_node_t *source, const cbm_gbuf_node_t *target, + const cbm_resolution_t *res, const char *module_qn, + const char **imp_keys, const char **imp_vals, int imp_count) { + cbm_svc_kind_t svc = cbm_service_pattern_match(res->qualified_name); + if (svc == CBM_SVC_ROUTE_REG && call->first_string_arg && call->first_string_arg[0] == '/') { + handle_route_registration(ctx, call, source, module_qn, imp_keys, imp_vals, imp_count); + return; + } + if (svc == CBM_SVC_HTTP || svc == CBM_SVC_ASYNC) { + emit_http_async_edge(ctx, call, source, target, res, svc); + return; + } + if (svc == CBM_SVC_CONFIG) { + char esc_c[CBM_SZ_256]; + char esc_k[CBM_SZ_256]; + cbm_json_escape(esc_c, sizeof(esc_c), call->callee_name); + cbm_json_escape(esc_k, sizeof(esc_k), call->first_string_arg ? call->first_string_arg : ""); + char props[CBM_SZ_512]; + snprintf(props, sizeof(props), "{\"callee\":\"%s\",\"key\":\"%s\",\"confidence\":%.2f}", + esc_c, esc_k, res->confidence); + cbm_gbuf_insert_edge(ctx->gbuf, source->id, target->id, "CONFIGURES", props); + return; + } + char esc_c2[CBM_SZ_256]; + cbm_json_escape(esc_c2, sizeof(esc_c2), call->callee_name); + char props[CBM_SZ_512]; + snprintf(props, sizeof(props), + "{\"callee\":\"%s\",\"confidence\":%.2f,\"strategy\":\"%s\",\"candidates\":%d}", + esc_c2, res->confidence, res->strategy ? res->strategy : "unknown", + res->candidate_count); + cbm_gbuf_insert_edge(ctx->gbuf, source->id, target->id, "CALLS", props); +} + +/* Find source node for a call: enclosing function or file node. */ +static const cbm_gbuf_node_t *calls_find_source(cbm_pipeline_ctx_t *ctx, const char *rel, + const char *enclosing_qn) { + const cbm_gbuf_node_t *src = NULL; + if (enclosing_qn) { + src = cbm_gbuf_find_by_qn(ctx->gbuf, enclosing_qn); + } + if (!src) { + char *fqn = cbm_pipeline_fqn_compute(ctx->project_name, rel, "__file__"); + src = cbm_gbuf_find_by_qn(ctx->gbuf, fqn); + free(fqn); + } + return src; +} + +/* Resolve one call and emit the appropriate edge. Returns 1 if resolved, 0 if not. */ +static int resolve_single_call(cbm_pipeline_ctx_t *ctx, CBMCall *call, + const CBMResolvedCallArray *lsp_calls, const char *rel, + const char *module_qn, const char **imp_keys, const char **imp_vals, + int imp_count) { + const cbm_gbuf_node_t *source_node = calls_find_source(ctx, rel, call->enclosing_func_qn); + if (!source_node) { + return 0; + } + + /* LSP-resolved calls take precedence over registry-textual matching. */ + const CBMResolvedCall *lsp = cbm_pipeline_find_lsp_resolution(lsp_calls, call); + if (lsp) { + const cbm_gbuf_node_t *target_node = + cbm_pipeline_lsp_target_node(ctx->gbuf, ctx->project_name, lsp->callee_qn); + if (target_node && source_node->id != target_node->id) { + cbm_resolution_t res = {0}; + /* Use the gbuf node's QN so downstream edge props show the canonical + * project-qualified form even when fallback prefixed the project. */ + res.qualified_name = target_node->qualified_name; + res.confidence = lsp->confidence; + res.strategy = lsp->strategy; + res.candidate_count = 1; + emit_classified_edge(ctx, call, source_node, target_node, &res, module_qn, imp_keys, + imp_vals, imp_count); + return SKIP_ONE; + } + } + + cbm_resolution_t res = cbm_registry_resolve(ctx->registry, call->callee_name, module_qn, + imp_keys, imp_vals, imp_count); + if (!res.qualified_name || res.qualified_name[0] == '\0') { + /* External HTTP/async client (requests, axios, ...) that resolves to no + * QN because its library is not indexed. Classify from the raw callee + * name and emit a service edge against the source node so cross-repo + * matching has an HTTP_CALLS edge to work with. Issue #523. */ + cbm_svc_kind_t ext_svc = cbm_service_pattern_match(call->callee_name); + if ((ext_svc == CBM_SVC_HTTP || ext_svc == CBM_SVC_ASYNC) && call->first_string_arg && + call->first_string_arg[0] != '\0') { + cbm_resolution_t ext = {0}; + ext.qualified_name = call->callee_name; + ext.strategy = "external_service"; + emit_http_async_edge(ctx, call, source_node, source_node, &ext, ext_svc); + } + return 0; + } + const cbm_gbuf_node_t *target_node = cbm_gbuf_find_by_qn(ctx->gbuf, res.qualified_name); + if (!target_node || source_node->id == target_node->id) { + return 0; + } + emit_classified_edge(ctx, call, source_node, target_node, &res, module_qn, imp_keys, imp_vals, + imp_count); + return SKIP_ONE; +} + +static CBMFileResult *calls_get_or_extract(cbm_pipeline_ctx_t *ctx, int idx, + const cbm_file_info_t *fi, bool *owned) { + *owned = false; + if (ctx->result_cache && ctx->result_cache[idx]) { + return ctx->result_cache[idx]; + } + int slen = 0; + char *src = read_file(fi->path, &slen); + if (!src) { + return NULL; + } + CBMFileResult *r = cbm_extract_file(src, slen, fi->language, ctx->project_name, fi->rel_path, + CBM_EXTRACT_BUDGET, NULL, NULL); + free(src); + if (r) { + *owned = true; + } + return r; +} + +int cbm_pipeline_pass_calls(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, int file_count) { + cbm_log_info("pass.start", "pass", "calls", "files", itoa_log(file_count)); + + int total_calls = 0; + int resolved = 0; + int unresolved = 0; + int errors = 0; + + for (int i = 0; i < file_count; i++) { + if (cbm_pipeline_check_cancel(ctx)) { + return CBM_NOT_FOUND; + } + + const char *rel = files[i].rel_path; + bool result_owned = false; + CBMFileResult *result = calls_get_or_extract(ctx, i, &files[i], &result_owned); + if (!result) { + errors++; + continue; + } + + if (result->calls.count == 0) { + if (result_owned) { + cbm_free_result(result); + } + continue; + } + + /* Build import map for this file */ + const char **imp_keys = NULL; + const char **imp_vals = NULL; + int imp_count = 0; + build_import_map(ctx, rel, result, &imp_keys, &imp_vals, &imp_count); + + /* Compute module QN for same-module resolution */ + char *module_qn = cbm_pipeline_fqn_module(ctx->project_name, rel); + + /* Resolve each call */ + for (int c = 0; c < result->calls.count; c++) { + CBMCall *call = &result->calls.items[c]; + if (!call->callee_name) { + continue; + } + total_calls++; + if (resolve_single_call(ctx, call, &result->resolved_calls, rel, module_qn, imp_keys, + imp_vals, imp_count)) { + resolved++; + } else { + unresolved++; + } + } + + free(module_qn); + free_import_map(imp_keys, imp_vals, imp_count); + if (result_owned) { + cbm_free_result(result); + } + } + + cbm_log_info("pass.done", "pass", "calls", "total", itoa_log(total_calls), "resolved", + itoa_log(resolved), "unresolved", itoa_log(unresolved), "errors", + itoa_log(errors)); + + /* Additional pattern-based edge passes run after normal call resolution */ + cbm_pipeline_pass_fastapi_depends(ctx, files, file_count); + + return 0; +} + +/* ── FastAPI Depends() tracking ──────────────────────────────────── */ +/* Scans Python function signatures for Depends(func_ref) patterns and + * creates CALLS edges from the endpoint to the dependency function. + * Without this, FastAPI auth/DI functions appear as dead code (in_degree=0). */ + +/* Extract Python function signature text from source starting at given line. Caller frees. */ +static char *extract_py_signature(const char *source, int start_line, int end_line) { + int sig_end = start_line + PC_SIG_SCAN; + if (end_line > 0 && sig_end > end_line) { + sig_end = end_line; + } + const char *p = source; + int line = SKIP_ONE; + while (*p && line < start_line) { + if (*p == '\n') { + line++; + } + p++; + } + const char *sig_start = p; + while (*p && line < sig_end) { + if (*p == '\n') { + line++; + } + p++; + if (p > sig_start + SKIP_ONE && p[-SKIP_ONE] == ':' && p[-PAIR_LEN] == ')') { + break; + } + } + size_t sig_len = (size_t)(p - sig_start); + char *sig = malloc(sig_len + SKIP_ONE); + if (!sig) { + return NULL; + } + memcpy(sig, sig_start, sig_len); + sig[sig_len] = '\0'; + return sig; +} + +/* Scan one function's signature for Depends(func_ref) and create CALLS edges. */ +static int scan_depends_in_sig(cbm_pipeline_ctx_t *ctx, const cbm_regex_t *re, const char *sig, + const CBMDefinition *def, const char *module_qn, const char **ik, + const char **iv, int ic) { + int count = 0; + cbm_regmatch_t match[PC_REGEX_GRP]; + const char *scan = sig; + while (cbm_regexec(re, scan, PC_REGEX_GRP, match, 0) == 0) { + int ref_len = match[SKIP_ONE].rm_eo - match[SKIP_ONE].rm_so; + char func_ref[CBM_SZ_256]; + if (ref_len >= (int)sizeof(func_ref)) { + ref_len = (int)sizeof(func_ref) - SKIP_ONE; + } + memcpy(func_ref, scan + match[SKIP_ONE].rm_so, (size_t)ref_len); + func_ref[ref_len] = '\0'; + cbm_resolution_t res = cbm_registry_resolve(ctx->registry, func_ref, module_qn, ik, iv, ic); + if (res.qualified_name && res.qualified_name[0] != '\0') { + const cbm_gbuf_node_t *sn = cbm_gbuf_find_by_qn(ctx->gbuf, def->qualified_name); + const cbm_gbuf_node_t *tn = cbm_gbuf_find_by_qn(ctx->gbuf, res.qualified_name); + if (sn && tn && sn->id != tn->id) { + cbm_gbuf_insert_edge(ctx->gbuf, sn->id, tn->id, "CALLS", + "{\"confidence\":0.95,\"strategy\":\"fastapi_depends\"}"); + count++; + } + } + scan += match[0].rm_eo; + } + return count; +} + +static bool is_callable_def(const CBMDefinition *def) { + return def->qualified_name && def->start_line > 0 && def->label && + (strcmp(def->label, "Function") == 0 || strcmp(def->label, "Method") == 0); +} + +static bool file_has_depends_call(const CBMFileResult *result) { + for (int c = 0; c < result->calls.count; c++) { + if (result->calls.items[c].callee_name && + strcmp(result->calls.items[c].callee_name, "Depends") == 0) { + return true; + } + } + return false; +} + +void cbm_pipeline_pass_fastapi_depends(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, + int file_count) { + cbm_regex_t depends_re; + if (cbm_regcomp(&depends_re, "Depends\\(([A-Za-z_][A-Za-z0-9_.]*)", CBM_REG_EXTENDED) != 0) { + return; + } + + int edge_count = 0; + for (int i = 0; i < file_count; i++) { + if (files[i].language != CBM_LANG_PYTHON) { + continue; + } + if (cbm_pipeline_check_cancel(ctx)) { + break; + } + + CBMFileResult *result = ctx->result_cache ? ctx->result_cache[i] : NULL; + if (!result || !file_has_depends_call(result)) { + continue; + } + + /* Read source and scan for Depends(func_ref) in function signatures */ + int source_len = 0; + char *source = read_file(files[i].path, &source_len); + if (!source) { + continue; + } + + char *module_qn = cbm_pipeline_fqn_module(ctx->project_name, files[i].rel_path); + + /* Build import map for alias resolution */ + const char **imp_keys = NULL; + const char **imp_vals = NULL; + int imp_count = 0; + build_import_map(ctx, files[i].rel_path, result, &imp_keys, &imp_vals, &imp_count); + + for (int d = 0; d < result->defs.count; d++) { + CBMDefinition *def = &result->defs.items[d]; + if (!is_callable_def(def)) { + continue; + } + + char *sig = extract_py_signature(source, (int)def->start_line, (int)def->end_line); + if (!sig) { + continue; + } + + edge_count += scan_depends_in_sig(ctx, &depends_re, sig, def, module_qn, imp_keys, + imp_vals, imp_count); + free(sig); + } + + free(module_qn); + free_import_map(imp_keys, imp_vals, imp_count); + free(source); + } + + cbm_regfree(&depends_re); + if (edge_count > 0) { + cbm_log_info("pass.fastapi_depends", "edges", itoa_log(edge_count)); + } +} + +/* DLL resolve tracking removed — triggered Windows Defender false positive. + * See issue #89. */ From 4817d79e49e801574aae95ead7a1939acb1f79f5 Mon Sep 17 00:00:00 2001 From: RithvikReddy0-0 Date: Tue, 23 Jun 2026 03:04:35 +0000 Subject: [PATCH 5/5] fix(cross-repo): dedupe CROSS_* edges and document external self-pass (#523) Addresses review on #536. insert_cross_edge now skips insertion when an identical (source_id, target_id, type) edge already exists. The pass reaches the same caller/route pair from both directions and emit_cross_route_bidirectional writes both DBs, so without this guard the same CROSS_HTTP_CALLS pair was re-emitted and inflated http_edges. Verified idempotent: repeated runs and runs from either project side both yield cross_http_calls: 1 with exactly one edge per DB. Documented why emit_http_async_edge is called with source_node as both source and target in the unindexed-external-client path. Signed-off-by: RithvikReddy0-0 --- src/pipeline/pass_calls.c | 6 + src/pipeline/pass_cross_repo.c | 1661 ++++++++++++++++---------------- 2 files changed, 845 insertions(+), 822 deletions(-) diff --git a/src/pipeline/pass_calls.c b/src/pipeline/pass_calls.c index 39939aa0d..828bc88b4 100644 --- a/src/pipeline/pass_calls.c +++ b/src/pipeline/pass_calls.c @@ -374,6 +374,12 @@ static int resolve_single_call(cbm_pipeline_ctx_t *ctx, CBMCall *call, cbm_resolution_t ext = {0}; ext.qualified_name = call->callee_name; ext.strategy = "external_service"; + /* source_node is passed as both source and target intentionally: the + * external client (requests/axios) has no node in the graph, so there + * is no distinct target to point at. emit_http_async_edge only uses + * the target for the non-URL CALLS fallback; for a real URL/topic it + * creates a Route node and links source -> Route, so the duplicated + * source arg is never dereferenced as a separate endpoint here. #523 */ emit_http_async_edge(ctx, call, source_node, source_node, &ext, ext_svc); } return 0; diff --git a/src/pipeline/pass_cross_repo.c b/src/pipeline/pass_cross_repo.c index 179d527a5..e3f2923e2 100644 --- a/src/pipeline/pass_cross_repo.c +++ b/src/pipeline/pass_cross_repo.c @@ -1,822 +1,839 @@ -/* - * pass_cross_repo.c — Cross-repo intelligence: match Routes, Channels, and - * async topics across indexed projects to create CROSS_* edges. - * - * For each HTTP_CALLS/ASYNC_CALLS edge in the source project, looks up the - * target Route QN in other project DBs. For each Channel node with EMITS - * edges, looks for matching LISTENS_ON in other projects (and vice versa). - * - * Edges are written bidirectionally: both source and target project DBs - * get a CROSS_* edge so the link is visible from either side. - */ -#include "pipeline/pass_cross_repo.h" -#include "foundation/constants.h" -#include "foundation/log.h" -#include "foundation/platform.h" -#include "foundation/compat.h" -#include "foundation/compat_fs.h" - -#include -#include -#include -#include -#include -#include - -/* ── Constants ───────────────────────────────────────────────────── */ - -enum { - CR_PATH_BUF = 1024, - CR_QN_BUF = 512, - CR_PROPS_BUF = 2048, - CR_MAX_EDGES = 4096, - CR_DB_EXT_LEN = 3, /* strlen(".db") */ - CR_INIT_CAP = 32, - CR_COL_3 = 3, - CR_COL_4 = 4, -}; - -#define CR_MS_PER_SEC 1000.0 -#define CR_NS_PER_MS 1000000.0 - -/* TLS buffer for integer-to-string in log calls. */ -static CBM_TLS char cr_ibuf[CBM_SZ_32]; -static const char *cr_itoa(int v) { - snprintf(cr_ibuf, sizeof(cr_ibuf), "%d", v); - return cr_ibuf; -} - -/* ── Helpers ─────────────────────────────────────────────────────── */ - -static const char *cr_cache_dir(void) { - const char *dir = cbm_resolve_cache_dir(); - return dir ? dir : cbm_tmpdir(); -} - -static void cr_db_path(const char *project, char *buf, size_t bufsz) { - snprintf(buf, bufsz, "%s/%s.db", cr_cache_dir(), project); -} - -/* Extract a JSON string property from properties_json. - * Writes into buf, returns buf on success, NULL on miss. */ -static const char *json_str_prop(const char *json, const char *key, char *buf, size_t bufsz) { - if (!json || !key) { - return NULL; - } - char pat[CBM_SZ_128]; - snprintf(pat, sizeof(pat), "\"%s\":\"", key); - const char *start = strstr(json, pat); - if (!start) { - return NULL; - } - start += strlen(pat); - const char *end = strchr(start, '"'); - if (!end) { - return NULL; - } - size_t len = (size_t)(end - start); - if (len >= bufsz) { - len = bufsz - SKIP_ONE; - } - memcpy(buf, start, len); - buf[len] = '\0'; - return buf; -} - -/* Strip scheme and authority from a URL, returning a pointer into the - * original string at the start of the path component. - * "/v2/orders/123" -> "/v2/orders/123" (already a path) */ -static const char *cr_url_path(const char *url) { - if (!url) - return url; - const char *scheme_end = strstr(url, "://"); - if (!scheme_end) - return url; - const char *path_start = strchr(scheme_end + 3, '/'); - return path_start ? path_start : url; -} - -/* Return 1 if concrete path matches a route template. - * A template segment wrapped in '{' '}' matches any concrete segment. - * "/v2/orders/123" matches "/v2/orders/{id}" -> 1 - * "/v2/orders/123" matches "/v2/orders/456" -> 0 */ -static int cr_path_matches_template(const char *concrete, const char *tmpl) { - if (!concrete || !tmpl) - return 0; - const char *c = concrete; - const char *t = tmpl; - while (*c || *t) { - while (*c == '/') - c++; - while (*t == '/') - t++; - if (!*c && !*t) - return 1; - if (!*c || !*t) - return 0; - const char *ce = strchr(c, '/'); - const char *te = strchr(t, '/'); - size_t clen = ce ? (size_t)(ce - c) : strlen(c); - size_t tlen = te ? (size_t)(te - t) : strlen(t); - int is_param = (tlen >= 2 && t[0] == '{' && t[tlen - 1] == '}'); - if (!is_param && (clen != tlen || strncmp(c, t, clen) != 0)) { - return 0; - } - c += clen; - t += tlen; - } - return 1; -} - -/* Build CROSS_* edge properties JSON. */ -static void build_cross_props(char *buf, size_t bufsz, const char *target_project, - const char *target_function, const char *target_file, - const char *url_or_channel, const char *extra_key, - const char *extra_val) { - int n = snprintf(buf, bufsz, - "{\"target_project\":\"%s\",\"target_function\":\"%s\"," - "\"target_file\":\"%s\"", - target_project ? target_project : "", target_function ? target_function : "", - target_file ? target_file : ""); - if (url_or_channel && url_or_channel[0]) { - n += snprintf(buf + n, bufsz - (size_t)n, ",\"%s\":\"%s\"", - extra_key ? extra_key : "url_path", url_or_channel); - } - if (extra_val && extra_val[0]) { - n += snprintf(buf + n, bufsz - (size_t)n, ",\"%s\":\"%s\"", - extra_key ? "transport" : "method", extra_val); - } - snprintf(buf + n, bufsz - (size_t)n, "}"); -} - -/* Delete all CROSS_* edges for a project from a store. */ -static void delete_cross_edges(cbm_store_t *store, const char *project) { - cbm_store_delete_edges_by_type(store, project, "CROSS_HTTP_CALLS"); - cbm_store_delete_edges_by_type(store, project, "CROSS_ASYNC_CALLS"); - cbm_store_delete_edges_by_type(store, project, "CROSS_CHANNEL"); - cbm_store_delete_edges_by_type(store, project, "CROSS_GRPC_CALLS"); - cbm_store_delete_edges_by_type(store, project, "CROSS_GRAPHQL_CALLS"); - cbm_store_delete_edges_by_type(store, project, "CROSS_TRPC_CALLS"); -} - -/* Insert a CROSS_* edge into a store. */ -static void insert_cross_edge(cbm_store_t *store, const char *project, int64_t from_id, - int64_t to_id, const char *edge_type, const char *props) { - cbm_edge_t edge = { - .project = project, - .source_id = from_id, - .target_id = to_id, - .type = edge_type, - .properties_json = props, - }; - cbm_store_insert_edge(store, &edge); -} - -/* Look up a node's name and file_path by id. */ -static void lookup_node_info(struct sqlite3 *db, int64_t node_id, char *name_out, size_t name_sz, - char *file_out, size_t file_sz) { - name_out[0] = '\0'; - file_out[0] = '\0'; - sqlite3_stmt *st = NULL; - if (sqlite3_prepare_v2(db, "SELECT name, file_path FROM nodes WHERE id = ?1", CBM_NOT_FOUND, - &st, NULL) != SQLITE_OK) { - return; - } - sqlite3_bind_int64(st, SKIP_ONE, node_id); - if (sqlite3_step(st) == SQLITE_ROW) { - const char *nm = (const char *)sqlite3_column_text(st, 0); - const char *fp = (const char *)sqlite3_column_text(st, SKIP_ONE); - if (nm) { - snprintf(name_out, name_sz, "%s", nm); - } - if (fp) { - snprintf(file_out, file_sz, "%s", fp); - } - } - sqlite3_finalize(st); -} - -/* ── Phase A: HTTP Route matching ────────────────────────────────── */ - -/* Find a Route node in target_store by QN and return the handler function's - * node id, name, and file_path via HANDLES edges. Returns 0 if not found. */ -static int64_t find_route_handler(cbm_store_t *target_store, const char *route_qn, - char *handler_name, size_t name_sz, char *handler_file, - size_t file_sz) { - handler_name[0] = '\0'; - handler_file[0] = '\0'; - struct sqlite3 *db = cbm_store_get_db(target_store); - if (!db) { - return 0; - } - - /* Find Route node by QN */ - sqlite3_stmt *s = NULL; - if (sqlite3_prepare_v2( - db, "SELECT id FROM nodes WHERE qualified_name = ?1 AND label = 'Route' LIMIT 1", - CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { - return 0; - } - sqlite3_bind_text(s, SKIP_ONE, route_qn, CBM_NOT_FOUND, SQLITE_STATIC); - int64_t route_id = 0; - if (sqlite3_step(s) == SQLITE_ROW) { - route_id = sqlite3_column_int64(s, 0); - } - sqlite3_finalize(s); - if (route_id == 0) { - return 0; - } - - /* Follow HANDLES edge to find the handler function */ - if (sqlite3_prepare_v2(db, - "SELECT n.id, n.name, n.file_path FROM edges e " - "JOIN nodes n ON n.id = e.source_id " - "WHERE e.target_id = ?1 AND e.type = 'HANDLES' LIMIT 1", - CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { - return 0; - } - sqlite3_bind_int64(s, SKIP_ONE, route_id); - int64_t handler_id = 0; - if (sqlite3_step(s) == SQLITE_ROW) { - handler_id = sqlite3_column_int64(s, 0); - const char *n = (const char *)sqlite3_column_text(s, SKIP_ONE); - const char *f = (const char *)sqlite3_column_text(s, PAIR_LEN); - if (n) { - snprintf(handler_name, name_sz, "%s", n); - } - if (f) { - snprintf(handler_file, file_sz, "%s", f); - } - } - sqlite3_finalize(s); - return handler_id; -} - -/* Fuzzy route lookup: scan all Route nodes in target and return the first - * whose path template matches the concrete norm_path from the consumer. - * Fills route_qn_out with the matched QN (for edge emission). */ -static int64_t find_route_handler_fuzzy(cbm_store_t *target_store, const char *norm_path, - const char *method, char *route_qn_out, size_t route_qn_sz, - char *handler_name, size_t name_sz, char *handler_file, - size_t file_sz) { - handler_name[0] = '\0'; - handler_file[0] = '\0'; - struct sqlite3 *db = cbm_store_get_db(target_store); - if (!db) - return 0; - - sqlite3_stmt *s = NULL; - if (sqlite3_prepare_v2(db, "SELECT id, qualified_name, name FROM nodes WHERE label = 'Route'", - CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { - return 0; - } - - int64_t matched_route_id = 0; - while (sqlite3_step(s) == SQLITE_ROW) { - int64_t rid = sqlite3_column_int64(s, 0); - const char *qn = (const char *)sqlite3_column_text(s, SKIP_ONE); - const char *name = (const char *)sqlite3_column_text(s, PAIR_LEN); - if (!qn || !name) - continue; - - /* QN format: __route__METHOD__/path */ - const char *after_prefix = strstr(qn, "__route__"); - if (!after_prefix) - continue; - after_prefix += 9; /* skip "__route__" */ - const char *second_sep = strstr(after_prefix, "__"); - if (!second_sep) - continue; - const char *tmpl_path = second_sep + 2; - - /* method filter: skip mismatched methods, always accept ANY */ - if (method) { - size_t mlen = (size_t)(second_sep - after_prefix); - char qn_method[CBM_SZ_32] = {0}; - if (mlen >= sizeof(qn_method)) - mlen = sizeof(qn_method) - 1; - strncpy(qn_method, after_prefix, mlen); - if (strcmp(qn_method, "ANY") != 0 && strcmp(qn_method, method) != 0) - continue; - } - - if (cr_path_matches_template(norm_path, tmpl_path)) { - matched_route_id = rid; - snprintf(route_qn_out, route_qn_sz, "%s", qn); - break; - } - } - sqlite3_finalize(s); - if (matched_route_id == 0) - return 0; - - /* Follow HANDLES edge to find the handler function */ - if (sqlite3_prepare_v2(db, - "SELECT n.id, n.name, n.file_path FROM edges e " - "JOIN nodes n ON n.id = e.source_id " - "WHERE e.target_id = ?1 AND e.type = 'HANDLES' LIMIT 1", - CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { - return 0; - } - sqlite3_bind_int64(s, SKIP_ONE, matched_route_id); - int64_t handler_id = 0; - if (sqlite3_step(s) == SQLITE_ROW) { - handler_id = sqlite3_column_int64(s, 0); - const char *n = (const char *)sqlite3_column_text(s, SKIP_ONE); - const char *f = (const char *)sqlite3_column_text(s, PAIR_LEN); - if (n) - snprintf(handler_name, name_sz, "%s", n); - if (f) - snprintf(handler_file, file_sz, "%s", f); - } - sqlite3_finalize(s); - return handler_id; -} - -/* Emit CROSS_* edge for a route match: forward into source, reverse into target. */ -static void emit_cross_route_bidirectional(cbm_store_t *src_store, const char *src_project, - struct sqlite3 *src_db, int64_t caller_id, - int64_t local_route_id, cbm_store_t *tgt_store, - const char *tgt_project, int64_t handler_id, - const char *route_qn, const char *handler_name, - const char *handler_file, const char *url_path, - const char *method, const char *edge_type) { - /* Forward: caller → local Route in source DB */ - char fwd[CR_PROPS_BUF]; - build_cross_props(fwd, sizeof(fwd), tgt_project, handler_name, handler_file, url_path, - "url_path", method); - insert_cross_edge(src_store, src_project, caller_id, local_route_id, edge_type, fwd); - - /* Reverse: handler → Route in target DB */ - struct sqlite3 *tgt_db = cbm_store_get_db(tgt_store); - if (!tgt_db) { - return; - } - sqlite3_stmt *rq = NULL; - if (sqlite3_prepare_v2(tgt_db, "SELECT id FROM nodes WHERE qualified_name = ?1 LIMIT 1", - CBM_NOT_FOUND, &rq, NULL) != SQLITE_OK) { - return; - } - sqlite3_bind_text(rq, SKIP_ONE, route_qn, CBM_NOT_FOUND, SQLITE_STATIC); - int64_t tgt_route_id = 0; - if (sqlite3_step(rq) == SQLITE_ROW) { - tgt_route_id = sqlite3_column_int64(rq, 0); - } - sqlite3_finalize(rq); - if (tgt_route_id == 0) { - return; - } - - char caller_name[CBM_SZ_256] = {0}; - char caller_file[CBM_SZ_512] = {0}; - lookup_node_info(src_db, caller_id, caller_name, sizeof(caller_name), caller_file, - sizeof(caller_file)); - - char rev[CR_PROPS_BUF]; - build_cross_props(rev, sizeof(rev), src_project, caller_name, caller_file, url_path, "url_path", - method); - insert_cross_edge(tgt_store, tgt_project, handler_id, tgt_route_id, edge_type, rev); -} - -static int match_http_routes(cbm_store_t *src_store, const char *src_project, - cbm_store_t *tgt_store, const char *tgt_project) { - struct sqlite3 *src_db = cbm_store_get_db(src_store); - if (!src_db) { - return 0; - } - - /* Find all HTTP_CALLS edges in source project */ - sqlite3_stmt *s = NULL; - if (sqlite3_prepare_v2(src_db, - "SELECT e.source_id, e.target_id, e.properties FROM edges e " - "WHERE e.project = ?1 AND e.type = 'HTTP_CALLS'", - CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { - return 0; - } - sqlite3_bind_text(s, SKIP_ONE, src_project, CBM_NOT_FOUND, SQLITE_STATIC); - - int count = 0; - while (sqlite3_step(s) == SQLITE_ROW && count < CR_MAX_EDGES) { - int64_t caller_id = sqlite3_column_int64(s, 0); - int64_t route_id = sqlite3_column_int64(s, SKIP_ONE); - const char *props = (const char *)sqlite3_column_text(s, PAIR_LEN); - - char url_path[CBM_SZ_256] = {0}; - char method[CBM_SZ_32] = {0}; - json_str_prop(props, "url_path", url_path, sizeof(url_path)); - json_str_prop(props, "method", method, sizeof(method)); - if (!url_path[0]) { - continue; - } - - /* Normalise: strip scheme+host+port so a full URL consumer-side path - * ("/v2/orders/{id}"). Issue #523. */ - const char *norm_path = cr_url_path(url_path); - - /* Build the expected Route QN in the target project */ - char route_qn[CR_QN_BUF]; - snprintf(route_qn, sizeof(route_qn), "__route__%s__%s", method[0] ? method : "ANY", - norm_path); - - char handler_name[CBM_SZ_256] = {0}; - char handler_file[CBM_SZ_512] = {0}; - int64_t handler_id = - find_route_handler(tgt_store, route_qn, handler_name, sizeof(handler_name), - handler_file, sizeof(handler_file)); - if (handler_id == 0) { - /* Try without method (ANY) */ - snprintf(route_qn, sizeof(route_qn), "__route__ANY__%s", norm_path); - handler_id = find_route_handler(tgt_store, route_qn, handler_name, sizeof(handler_name), - handler_file, sizeof(handler_file)); - } - if (handler_id == 0) { - /* Exact QN lookup missed — try path-param template matching. - * Handles concrete vs template mismatch ("123" vs "{id}"). Issue #523. */ - handler_id = find_route_handler_fuzzy( - tgt_store, norm_path, method[0] ? method : NULL, route_qn, sizeof(route_qn), - handler_name, sizeof(handler_name), handler_file, sizeof(handler_file)); - } - if (handler_id == 0) { - continue; - } - - emit_cross_route_bidirectional(src_store, src_project, src_db, caller_id, route_id, - tgt_store, tgt_project, handler_id, route_qn, handler_name, - handler_file, url_path, method, "CROSS_HTTP_CALLS"); - - count++; - } - sqlite3_finalize(s); - return count; -} - -/* ── Phase B: Async matching ─────────────────────────────────────── */ - -static int match_async_routes(cbm_store_t *src_store, const char *src_project, - cbm_store_t *tgt_store, const char *tgt_project) { - struct sqlite3 *src_db = cbm_store_get_db(src_store); - if (!src_db) { - return 0; - } - - sqlite3_stmt *s = NULL; - if (sqlite3_prepare_v2(src_db, - "SELECT e.source_id, e.target_id, e.properties FROM edges e " - "WHERE e.project = ?1 AND e.type = 'ASYNC_CALLS'", - CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { - return 0; - } - sqlite3_bind_text(s, SKIP_ONE, src_project, CBM_NOT_FOUND, SQLITE_STATIC); - - int count = 0; - while (sqlite3_step(s) == SQLITE_ROW && count < CR_MAX_EDGES) { - int64_t caller_id = sqlite3_column_int64(s, 0); - int64_t route_id = sqlite3_column_int64(s, SKIP_ONE); - const char *props = (const char *)sqlite3_column_text(s, PAIR_LEN); - - char url_path[CBM_SZ_256] = {0}; - char broker[CBM_SZ_128] = {0}; - json_str_prop(props, "url_path", url_path, sizeof(url_path)); - json_str_prop(props, "broker", broker, sizeof(broker)); - if (!url_path[0]) { - continue; - } - - char route_qn[CR_QN_BUF]; - snprintf(route_qn, sizeof(route_qn), "__route__%s__%s", broker[0] ? broker : "async", - url_path); - - char handler_name[CBM_SZ_256] = {0}; - char handler_file[CBM_SZ_512] = {0}; - int64_t handler_id = - find_route_handler(tgt_store, route_qn, handler_name, sizeof(handler_name), - handler_file, sizeof(handler_file)); - if (handler_id == 0) { - continue; - } - - char edge_props[CR_PROPS_BUF]; - build_cross_props(edge_props, sizeof(edge_props), tgt_project, handler_name, handler_file, - url_path, "url_path", broker); - insert_cross_edge(src_store, src_project, caller_id, route_id, "CROSS_ASYNC_CALLS", - edge_props); - count++; - } - sqlite3_finalize(s); - return count; -} - -/* ── Phase C: Channel matching ───────────────────────────────────── */ - -/* Try to find a matching listener in target DB for a channel name. */ -static bool try_match_channel_listener(cbm_store_t *src_store, const char *src_project, - cbm_store_t *tgt_store, const char *tgt_project, - const char *channel_name, const char *transport, - int64_t emitter_id, int64_t channel_id) { - struct sqlite3 *tgt_db = cbm_store_get_db(tgt_store); - if (!tgt_db) { - return false; - } - sqlite3_stmt *tq = NULL; - if (sqlite3_prepare_v2(tgt_db, - "SELECT n.id, e.source_id, fn.name, fn.file_path FROM nodes n " - "JOIN edges e ON e.target_id = n.id AND e.type = 'LISTENS_ON' " - "JOIN nodes fn ON fn.id = e.source_id " - "WHERE n.project = ?1 AND n.name = ?2 AND n.label = 'Channel' LIMIT 1", - CBM_NOT_FOUND, &tq, NULL) != SQLITE_OK) { - return false; - } - sqlite3_bind_text(tq, SKIP_ONE, tgt_project, CBM_NOT_FOUND, SQLITE_STATIC); - sqlite3_bind_text(tq, PAIR_LEN, channel_name, CBM_NOT_FOUND, SQLITE_STATIC); - - bool matched = false; - if (sqlite3_step(tq) == SQLITE_ROW) { - int64_t tgt_channel_id = sqlite3_column_int64(tq, 0); - int64_t listener_id = sqlite3_column_int64(tq, SKIP_ONE); - const char *listener_name = (const char *)sqlite3_column_text(tq, PAIR_LEN); - const char *listener_file = (const char *)sqlite3_column_text(tq, CR_COL_3); - - /* Forward edge: emitter → local Channel */ - char fwd[CR_PROPS_BUF]; - build_cross_props(fwd, sizeof(fwd), tgt_project, listener_name ? listener_name : "", - listener_file ? listener_file : "", channel_name, "channel_name", - transport); - insert_cross_edge(src_store, src_project, emitter_id, channel_id, "CROSS_CHANNEL", fwd); - - /* Reverse edge: listener → target Channel */ - char caller_name[CBM_SZ_256] = {0}; - char caller_file[CBM_SZ_512] = {0}; - lookup_node_info(cbm_store_get_db(src_store), emitter_id, caller_name, sizeof(caller_name), - caller_file, sizeof(caller_file)); - - char rev[CR_PROPS_BUF]; - build_cross_props(rev, sizeof(rev), src_project, caller_name, caller_file, channel_name, - "channel_name", transport); - insert_cross_edge(tgt_store, tgt_project, listener_id, tgt_channel_id, "CROSS_CHANNEL", - rev); - matched = true; - } - sqlite3_finalize(tq); - return matched; -} - -static int match_channels(cbm_store_t *src_store, const char *src_project, cbm_store_t *tgt_store, - const char *tgt_project) { - struct sqlite3 *src_db = cbm_store_get_db(src_store); - if (!src_db) { - return 0; - } - - sqlite3_stmt *s = NULL; - if (sqlite3_prepare_v2(src_db, - "SELECT DISTINCT n.id, n.name, n.qualified_name, n.properties, " - "e.source_id FROM nodes n " - "JOIN edges e ON e.target_id = n.id AND e.type = 'EMITS' " - "WHERE n.project = ?1 AND n.label = 'Channel'", - CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { - return 0; - } - sqlite3_bind_text(s, SKIP_ONE, src_project, CBM_NOT_FOUND, SQLITE_STATIC); - - int count = 0; - while (sqlite3_step(s) == SQLITE_ROW && count < CR_MAX_EDGES) { - const char *channel_name = (const char *)sqlite3_column_text(s, SKIP_ONE); - const char *channel_qn = (const char *)sqlite3_column_text(s, PAIR_LEN); - if (!channel_name || !channel_qn) { - continue; - } - int64_t channel_id = sqlite3_column_int64(s, 0); - const char *channel_props = (const char *)sqlite3_column_text(s, CR_COL_3); - int64_t emitter_id = sqlite3_column_int64(s, CR_COL_4); - - char transport[CBM_SZ_64] = {0}; - json_str_prop(channel_props, "transport", transport, sizeof(transport)); - - if (try_match_channel_listener(src_store, src_project, tgt_store, tgt_project, channel_name, - transport, emitter_id, channel_id)) { - count++; - } - } - sqlite3_finalize(s); - return count; -} - -/* ── Phase D: Generic route-type matcher (gRPC, GraphQL, tRPC) ──── */ - -/* Look up a node's qualified_name by id. Returns true if found. */ -static bool lookup_node_qn(struct sqlite3 *db, int64_t node_id, char *out, size_t out_sz) { - out[0] = '\0'; - sqlite3_stmt *st = NULL; - if (sqlite3_prepare_v2(db, "SELECT qualified_name FROM nodes WHERE id = ?1", CBM_NOT_FOUND, &st, - NULL) != SQLITE_OK) { - return false; - } - sqlite3_bind_int64(st, SKIP_ONE, node_id); - bool found = false; - if (sqlite3_step(st) == SQLITE_ROW) { - const char *qn = (const char *)sqlite3_column_text(st, 0); - if (qn) { - snprintf(out, out_sz, "%s", qn); - found = true; - } - } - sqlite3_finalize(st); - return found; -} - -/* Match edges of a given type against Route nodes with a given QN prefix. - * Reuses the same infrastructure as HTTP/async matching. */ -static int match_typed_routes(cbm_store_t *src_store, const char *src_project, - cbm_store_t *tgt_store, const char *tgt_project, - const char *edge_type, const char *svc_key, const char *method_key, - const char *cross_edge_type) { - struct sqlite3 *src_db = cbm_store_get_db(src_store); - if (!src_db) { - return 0; - } - - char sql[CBM_SZ_256]; - snprintf(sql, sizeof(sql), - "SELECT e.source_id, e.target_id, e.properties FROM edges e " - "WHERE e.project = ?1 AND e.type = '%s'", - edge_type); - - sqlite3_stmt *s = NULL; - if (sqlite3_prepare_v2(src_db, sql, CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { - return 0; - } - sqlite3_bind_text(s, SKIP_ONE, src_project, CBM_NOT_FOUND, SQLITE_STATIC); - - int count = 0; - while (sqlite3_step(s) == SQLITE_ROW && count < CR_MAX_EDGES) { - int64_t caller_id = sqlite3_column_int64(s, 0); - int64_t route_id = sqlite3_column_int64(s, SKIP_ONE); - const char *props = (const char *)sqlite3_column_text(s, PAIR_LEN); - - char svc_val[CBM_SZ_256] = {0}; - char meth_val[CBM_SZ_256] = {0}; - json_str_prop(props, svc_key, svc_val, sizeof(svc_val)); - json_str_prop(props, method_key, meth_val, sizeof(meth_val)); - if (!svc_val[0] && !meth_val[0]) { - continue; - } - - /* Look up the Route QN from the target node (already points to the Route). */ - char route_qn[CR_QN_BUF] = {0}; - if (!lookup_node_qn(src_db, route_id, route_qn, sizeof(route_qn))) { - continue; - } - - char handler_name[CBM_SZ_256] = {0}; - char handler_file[CBM_SZ_512] = {0}; - int64_t handler_id = - find_route_handler(tgt_store, route_qn, handler_name, sizeof(handler_name), - handler_file, sizeof(handler_file)); - if (handler_id == 0) { - continue; - } - - emit_cross_route_bidirectional(src_store, src_project, src_db, caller_id, route_id, - tgt_store, tgt_project, handler_id, route_qn, handler_name, - handler_file, svc_val, svc_key, cross_edge_type); - count++; - } - sqlite3_finalize(s); - return count; -} - -/* ── Collect target projects ─────────────────────────────────────── */ - -/* When target_projects = ["*"], scan the cache directory for all .db files. */ -static int collect_all_projects(char ***out) { - const char *dir = cr_cache_dir(); - cbm_dir_t *d = cbm_opendir(dir); - if (!d) { - *out = NULL; - return 0; - } - - int cap = CR_INIT_CAP; - int count = 0; - char **projects = malloc((size_t)cap * sizeof(char *)); - - cbm_dirent_t *ent; - while ((ent = cbm_readdir(d)) != NULL) { - size_t len = strlen(ent->name); - if (len < CR_COL_4 || strcmp(ent->name + len - CR_DB_EXT_LEN, ".db") != 0) { - continue; - } - if (strstr(ent->name, "_cross_repo") || strstr(ent->name, "_config")) { - continue; - } - if (strstr(ent->name, "-wal") || strstr(ent->name, "-shm")) { - continue; - } - if (count >= cap) { - cap *= PAIR_LEN; - char **tmp = realloc(projects, (size_t)cap * sizeof(char *)); - if (!tmp) { - break; - } - projects = tmp; - } - /* Strip .db extension */ - projects[count] = malloc(len - PAIR_LEN); - memcpy(projects[count], ent->name, len - CR_DB_EXT_LEN); - projects[count][len - CR_DB_EXT_LEN] = '\0'; - count++; - } - cbm_closedir(d); - - *out = projects; - return count; -} - -static void free_project_list(char **projects, int count) { - for (int i = 0; i < count; i++) { - free(projects[i]); - } - free(projects); -} - -/* ── Entry point ─────────────────────────────────────────────────── */ - -cbm_cross_repo_result_t cbm_cross_repo_match(const char *project, const char **target_projects, - int target_count) { - cbm_cross_repo_result_t result = {0}; - struct timespec t0; - clock_gettime(CLOCK_MONOTONIC, &t0); - - /* Open source project store (read-write) */ - char src_path[CR_PATH_BUF]; - cr_db_path(project, src_path, sizeof(src_path)); - cbm_store_t *src_store = cbm_store_open_path(src_path); - if (!src_store) { - return result; - } - - /* Clean existing CROSS_* edges for this project */ - delete_cross_edges(src_store, project); - - /* Resolve target projects */ - char **resolved = NULL; - int resolved_count = 0; - bool own_list = false; - - if (target_count == SKIP_ONE && strcmp(target_projects[0], "*") == 0) { - resolved_count = collect_all_projects(&resolved); - own_list = true; - } else { - resolved = (char **)target_projects; - resolved_count = target_count; - } - - /* Match against each target */ - for (int i = 0; i < resolved_count; i++) { - const char *tgt = resolved[i]; - if (strcmp(tgt, project) == 0) { - continue; /* skip self */ - } - - char tgt_path[CR_PATH_BUF]; - cr_db_path(tgt, tgt_path, sizeof(tgt_path)); - - /* Open target store read-write (for bidirectional edge writes) */ - cbm_store_t *tgt_store = cbm_store_open_path(tgt_path); - if (!tgt_store) { - continue; - } - - result.http_edges += match_http_routes(src_store, project, tgt_store, tgt); - result.http_edges += match_http_routes(tgt_store, tgt, src_store, project); - result.async_edges += match_async_routes(src_store, project, tgt_store, tgt); - result.channel_edges += match_channels(src_store, project, tgt_store, tgt); - result.grpc_edges += match_typed_routes(src_store, project, tgt_store, tgt, "GRPC_CALLS", - "service", "method", "CROSS_GRPC_CALLS"); - result.graphql_edges += - match_typed_routes(src_store, project, tgt_store, tgt, "GRAPHQL_CALLS", "operation", - "operation", "CROSS_GRAPHQL_CALLS"); - result.trpc_edges += match_typed_routes(src_store, project, tgt_store, tgt, "TRPC_CALLS", - "procedure", "procedure", "CROSS_TRPC_CALLS"); - result.projects_scanned++; - - cbm_store_close(tgt_store); - } - - cbm_store_close(src_store); - - if (own_list) { - free_project_list(resolved, resolved_count); - } - - struct timespec t1; - clock_gettime(CLOCK_MONOTONIC, &t1); - result.elapsed_ms = ((double)(t1.tv_sec - t0.tv_sec) * CR_MS_PER_SEC) + - ((double)(t1.tv_nsec - t0.tv_nsec) / CR_NS_PER_MS); - - int total = result.http_edges + result.async_edges + result.channel_edges + result.grpc_edges + - result.graphql_edges + result.trpc_edges; - cbm_log_info("cross_repo.done", "project", project, "total", cr_itoa(total)); - - return result; -} +/* + * pass_cross_repo.c — Cross-repo intelligence: match Routes, Channels, and + * async topics across indexed projects to create CROSS_* edges. + * + * For each HTTP_CALLS/ASYNC_CALLS edge in the source project, looks up the + * target Route QN in other project DBs. For each Channel node with EMITS + * edges, looks for matching LISTENS_ON in other projects (and vice versa). + * + * Edges are written bidirectionally: both source and target project DBs + * get a CROSS_* edge so the link is visible from either side. + */ +#include "pipeline/pass_cross_repo.h" +#include "foundation/constants.h" +#include "foundation/log.h" +#include "foundation/platform.h" +#include "foundation/compat.h" +#include "foundation/compat_fs.h" + +#include +#include +#include +#include +#include +#include + +/* ── Constants ───────────────────────────────────────────────────── */ + +enum { + CR_PATH_BUF = 1024, + CR_QN_BUF = 512, + CR_PROPS_BUF = 2048, + CR_MAX_EDGES = 4096, + CR_DB_EXT_LEN = 3, /* strlen(".db") */ + CR_INIT_CAP = 32, + CR_COL_3 = 3, + CR_COL_4 = 4, +}; + +#define CR_MS_PER_SEC 1000.0 +#define CR_NS_PER_MS 1000000.0 + +/* TLS buffer for integer-to-string in log calls. */ +static CBM_TLS char cr_ibuf[CBM_SZ_32]; +static const char *cr_itoa(int v) { + snprintf(cr_ibuf, sizeof(cr_ibuf), "%d", v); + return cr_ibuf; +} + +/* ── Helpers ─────────────────────────────────────────────────────── */ + +static const char *cr_cache_dir(void) { + const char *dir = cbm_resolve_cache_dir(); + return dir ? dir : cbm_tmpdir(); +} + +static void cr_db_path(const char *project, char *buf, size_t bufsz) { + snprintf(buf, bufsz, "%s/%s.db", cr_cache_dir(), project); +} + +/* Extract a JSON string property from properties_json. + * Writes into buf, returns buf on success, NULL on miss. */ +static const char *json_str_prop(const char *json, const char *key, char *buf, size_t bufsz) { + if (!json || !key) { + return NULL; + } + char pat[CBM_SZ_128]; + snprintf(pat, sizeof(pat), "\"%s\":\"", key); + const char *start = strstr(json, pat); + if (!start) { + return NULL; + } + start += strlen(pat); + const char *end = strchr(start, '"'); + if (!end) { + return NULL; + } + size_t len = (size_t)(end - start); + if (len >= bufsz) { + len = bufsz - SKIP_ONE; + } + memcpy(buf, start, len); + buf[len] = '\0'; + return buf; +} + +/* Strip scheme and authority from a URL, returning a pointer into the + * original string at the start of the path component. + * "/v2/orders/123" -> "/v2/orders/123" (already a path) */ +static const char *cr_url_path(const char *url) { + if (!url) + return url; + const char *scheme_end = strstr(url, "://"); + if (!scheme_end) + return url; + const char *path_start = strchr(scheme_end + 3, '/'); + return path_start ? path_start : url; +} + +/* Return 1 if concrete path matches a route template. + * A template segment wrapped in '{' '}' matches any concrete segment. + * "/v2/orders/123" matches "/v2/orders/{id}" -> 1 + * "/v2/orders/123" matches "/v2/orders/456" -> 0 */ +static int cr_path_matches_template(const char *concrete, const char *tmpl) { + if (!concrete || !tmpl) + return 0; + const char *c = concrete; + const char *t = tmpl; + while (*c || *t) { + while (*c == '/') + c++; + while (*t == '/') + t++; + if (!*c && !*t) + return 1; + if (!*c || !*t) + return 0; + const char *ce = strchr(c, '/'); + const char *te = strchr(t, '/'); + size_t clen = ce ? (size_t)(ce - c) : strlen(c); + size_t tlen = te ? (size_t)(te - t) : strlen(t); + int is_param = (tlen >= 2 && t[0] == '{' && t[tlen - 1] == '}'); + if (!is_param && (clen != tlen || strncmp(c, t, clen) != 0)) { + return 0; + } + c += clen; + t += tlen; + } + return 1; +} + +/* Build CROSS_* edge properties JSON. */ +static void build_cross_props(char *buf, size_t bufsz, const char *target_project, + const char *target_function, const char *target_file, + const char *url_or_channel, const char *extra_key, + const char *extra_val) { + int n = snprintf(buf, bufsz, + "{\"target_project\":\"%s\",\"target_function\":\"%s\"," + "\"target_file\":\"%s\"", + target_project ? target_project : "", target_function ? target_function : "", + target_file ? target_file : ""); + if (url_or_channel && url_or_channel[0]) { + n += snprintf(buf + n, bufsz - (size_t)n, ",\"%s\":\"%s\"", + extra_key ? extra_key : "url_path", url_or_channel); + } + if (extra_val && extra_val[0]) { + n += snprintf(buf + n, bufsz - (size_t)n, ",\"%s\":\"%s\"", + extra_key ? "transport" : "method", extra_val); + } + snprintf(buf + n, bufsz - (size_t)n, "}"); +} + +/* Delete all CROSS_* edges for a project from a store. */ +static void delete_cross_edges(cbm_store_t *store, const char *project) { + cbm_store_delete_edges_by_type(store, project, "CROSS_HTTP_CALLS"); + cbm_store_delete_edges_by_type(store, project, "CROSS_ASYNC_CALLS"); + cbm_store_delete_edges_by_type(store, project, "CROSS_CHANNEL"); + cbm_store_delete_edges_by_type(store, project, "CROSS_GRPC_CALLS"); + cbm_store_delete_edges_by_type(store, project, "CROSS_GRAPHQL_CALLS"); + cbm_store_delete_edges_by_type(store, project, "CROSS_TRPC_CALLS"); +} + +/* Insert a CROSS_* edge into a store, skipping if an identical + * (source_id, target_id, type) edge already exists. The cross-repo pass can + * reach the same caller/route pair from both directions (the entry point + * matches src->tgt and tgt->src, and emit_cross_route_bidirectional writes + * both DBs), so this guard keeps emission idempotent and prevents duplicate + * CROSS_* edges from inflating the counts. Issue #523. */ +static void insert_cross_edge(cbm_store_t *store, const char *project, int64_t from_id, + int64_t to_id, const char *edge_type, const char *props) { + cbm_edge_t *existing = NULL; + int existing_count = 0; + if (cbm_store_find_edges_by_source_type(store, from_id, edge_type, &existing, + &existing_count) == 0) { + for (int i = 0; i < existing_count; i++) { + if (existing[i].target_id == to_id) { + cbm_store_free_edges(existing, existing_count); + return; /* already present - do not duplicate */ + } + } + cbm_store_free_edges(existing, existing_count); + } + cbm_edge_t edge = { + .project = project, + .source_id = from_id, + .target_id = to_id, + .type = edge_type, + .properties_json = props, + }; + cbm_store_insert_edge(store, &edge); +} + +/* Look up a node's name and file_path by id. */ +static void lookup_node_info(struct sqlite3 *db, int64_t node_id, char *name_out, size_t name_sz, + char *file_out, size_t file_sz) { + name_out[0] = '\0'; + file_out[0] = '\0'; + sqlite3_stmt *st = NULL; + if (sqlite3_prepare_v2(db, "SELECT name, file_path FROM nodes WHERE id = ?1", CBM_NOT_FOUND, + &st, NULL) != SQLITE_OK) { + return; + } + sqlite3_bind_int64(st, SKIP_ONE, node_id); + if (sqlite3_step(st) == SQLITE_ROW) { + const char *nm = (const char *)sqlite3_column_text(st, 0); + const char *fp = (const char *)sqlite3_column_text(st, SKIP_ONE); + if (nm) { + snprintf(name_out, name_sz, "%s", nm); + } + if (fp) { + snprintf(file_out, file_sz, "%s", fp); + } + } + sqlite3_finalize(st); +} + +/* ── Phase A: HTTP Route matching ────────────────────────────────── */ + +/* Find a Route node in target_store by QN and return the handler function's + * node id, name, and file_path via HANDLES edges. Returns 0 if not found. */ +static int64_t find_route_handler(cbm_store_t *target_store, const char *route_qn, + char *handler_name, size_t name_sz, char *handler_file, + size_t file_sz) { + handler_name[0] = '\0'; + handler_file[0] = '\0'; + struct sqlite3 *db = cbm_store_get_db(target_store); + if (!db) { + return 0; + } + + /* Find Route node by QN */ + sqlite3_stmt *s = NULL; + if (sqlite3_prepare_v2( + db, "SELECT id FROM nodes WHERE qualified_name = ?1 AND label = 'Route' LIMIT 1", + CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { + return 0; + } + sqlite3_bind_text(s, SKIP_ONE, route_qn, CBM_NOT_FOUND, SQLITE_STATIC); + int64_t route_id = 0; + if (sqlite3_step(s) == SQLITE_ROW) { + route_id = sqlite3_column_int64(s, 0); + } + sqlite3_finalize(s); + if (route_id == 0) { + return 0; + } + + /* Follow HANDLES edge to find the handler function */ + if (sqlite3_prepare_v2(db, + "SELECT n.id, n.name, n.file_path FROM edges e " + "JOIN nodes n ON n.id = e.source_id " + "WHERE e.target_id = ?1 AND e.type = 'HANDLES' LIMIT 1", + CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { + return 0; + } + sqlite3_bind_int64(s, SKIP_ONE, route_id); + int64_t handler_id = 0; + if (sqlite3_step(s) == SQLITE_ROW) { + handler_id = sqlite3_column_int64(s, 0); + const char *n = (const char *)sqlite3_column_text(s, SKIP_ONE); + const char *f = (const char *)sqlite3_column_text(s, PAIR_LEN); + if (n) { + snprintf(handler_name, name_sz, "%s", n); + } + if (f) { + snprintf(handler_file, file_sz, "%s", f); + } + } + sqlite3_finalize(s); + return handler_id; +} + +/* Fuzzy route lookup: scan all Route nodes in target and return the first + * whose path template matches the concrete norm_path from the consumer. + * Fills route_qn_out with the matched QN (for edge emission). */ +static int64_t find_route_handler_fuzzy(cbm_store_t *target_store, const char *norm_path, + const char *method, char *route_qn_out, size_t route_qn_sz, + char *handler_name, size_t name_sz, char *handler_file, + size_t file_sz) { + handler_name[0] = '\0'; + handler_file[0] = '\0'; + struct sqlite3 *db = cbm_store_get_db(target_store); + if (!db) + return 0; + + sqlite3_stmt *s = NULL; + if (sqlite3_prepare_v2(db, "SELECT id, qualified_name, name FROM nodes WHERE label = 'Route'", + CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { + return 0; + } + + int64_t matched_route_id = 0; + while (sqlite3_step(s) == SQLITE_ROW) { + int64_t rid = sqlite3_column_int64(s, 0); + const char *qn = (const char *)sqlite3_column_text(s, SKIP_ONE); + const char *name = (const char *)sqlite3_column_text(s, PAIR_LEN); + if (!qn || !name) + continue; + + /* QN format: __route__METHOD__/path */ + const char *after_prefix = strstr(qn, "__route__"); + if (!after_prefix) + continue; + after_prefix += 9; /* skip "__route__" */ + const char *second_sep = strstr(after_prefix, "__"); + if (!second_sep) + continue; + const char *tmpl_path = second_sep + 2; + + /* method filter: skip mismatched methods, always accept ANY */ + if (method) { + size_t mlen = (size_t)(second_sep - after_prefix); + char qn_method[CBM_SZ_32] = {0}; + if (mlen >= sizeof(qn_method)) + mlen = sizeof(qn_method) - 1; + strncpy(qn_method, after_prefix, mlen); + if (strcmp(qn_method, "ANY") != 0 && strcmp(qn_method, method) != 0) + continue; + } + + if (cr_path_matches_template(norm_path, tmpl_path)) { + matched_route_id = rid; + snprintf(route_qn_out, route_qn_sz, "%s", qn); + break; + } + } + sqlite3_finalize(s); + if (matched_route_id == 0) + return 0; + + /* Follow HANDLES edge to find the handler function */ + if (sqlite3_prepare_v2(db, + "SELECT n.id, n.name, n.file_path FROM edges e " + "JOIN nodes n ON n.id = e.source_id " + "WHERE e.target_id = ?1 AND e.type = 'HANDLES' LIMIT 1", + CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { + return 0; + } + sqlite3_bind_int64(s, SKIP_ONE, matched_route_id); + int64_t handler_id = 0; + if (sqlite3_step(s) == SQLITE_ROW) { + handler_id = sqlite3_column_int64(s, 0); + const char *n = (const char *)sqlite3_column_text(s, SKIP_ONE); + const char *f = (const char *)sqlite3_column_text(s, PAIR_LEN); + if (n) + snprintf(handler_name, name_sz, "%s", n); + if (f) + snprintf(handler_file, file_sz, "%s", f); + } + sqlite3_finalize(s); + return handler_id; +} + +/* Emit CROSS_* edge for a route match: forward into source, reverse into target. */ +static void emit_cross_route_bidirectional(cbm_store_t *src_store, const char *src_project, + struct sqlite3 *src_db, int64_t caller_id, + int64_t local_route_id, cbm_store_t *tgt_store, + const char *tgt_project, int64_t handler_id, + const char *route_qn, const char *handler_name, + const char *handler_file, const char *url_path, + const char *method, const char *edge_type) { + /* Forward: caller → local Route in source DB */ + char fwd[CR_PROPS_BUF]; + build_cross_props(fwd, sizeof(fwd), tgt_project, handler_name, handler_file, url_path, + "url_path", method); + insert_cross_edge(src_store, src_project, caller_id, local_route_id, edge_type, fwd); + + /* Reverse: handler → Route in target DB */ + struct sqlite3 *tgt_db = cbm_store_get_db(tgt_store); + if (!tgt_db) { + return; + } + sqlite3_stmt *rq = NULL; + if (sqlite3_prepare_v2(tgt_db, "SELECT id FROM nodes WHERE qualified_name = ?1 LIMIT 1", + CBM_NOT_FOUND, &rq, NULL) != SQLITE_OK) { + return; + } + sqlite3_bind_text(rq, SKIP_ONE, route_qn, CBM_NOT_FOUND, SQLITE_STATIC); + int64_t tgt_route_id = 0; + if (sqlite3_step(rq) == SQLITE_ROW) { + tgt_route_id = sqlite3_column_int64(rq, 0); + } + sqlite3_finalize(rq); + if (tgt_route_id == 0) { + return; + } + + char caller_name[CBM_SZ_256] = {0}; + char caller_file[CBM_SZ_512] = {0}; + lookup_node_info(src_db, caller_id, caller_name, sizeof(caller_name), caller_file, + sizeof(caller_file)); + + char rev[CR_PROPS_BUF]; + build_cross_props(rev, sizeof(rev), src_project, caller_name, caller_file, url_path, "url_path", + method); + insert_cross_edge(tgt_store, tgt_project, handler_id, tgt_route_id, edge_type, rev); +} + +static int match_http_routes(cbm_store_t *src_store, const char *src_project, + cbm_store_t *tgt_store, const char *tgt_project) { + struct sqlite3 *src_db = cbm_store_get_db(src_store); + if (!src_db) { + return 0; + } + + /* Find all HTTP_CALLS edges in source project */ + sqlite3_stmt *s = NULL; + if (sqlite3_prepare_v2(src_db, + "SELECT e.source_id, e.target_id, e.properties FROM edges e " + "WHERE e.project = ?1 AND e.type = 'HTTP_CALLS'", + CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { + return 0; + } + sqlite3_bind_text(s, SKIP_ONE, src_project, CBM_NOT_FOUND, SQLITE_STATIC); + + int count = 0; + while (sqlite3_step(s) == SQLITE_ROW && count < CR_MAX_EDGES) { + int64_t caller_id = sqlite3_column_int64(s, 0); + int64_t route_id = sqlite3_column_int64(s, SKIP_ONE); + const char *props = (const char *)sqlite3_column_text(s, PAIR_LEN); + + char url_path[CBM_SZ_256] = {0}; + char method[CBM_SZ_32] = {0}; + json_str_prop(props, "url_path", url_path, sizeof(url_path)); + json_str_prop(props, "method", method, sizeof(method)); + if (!url_path[0]) { + continue; + } + + /* Normalise: strip scheme+host+port so a full URL consumer-side path + * ("/v2/orders/{id}"). Issue #523. */ + const char *norm_path = cr_url_path(url_path); + + /* Build the expected Route QN in the target project */ + char route_qn[CR_QN_BUF]; + snprintf(route_qn, sizeof(route_qn), "__route__%s__%s", method[0] ? method : "ANY", + norm_path); + + char handler_name[CBM_SZ_256] = {0}; + char handler_file[CBM_SZ_512] = {0}; + int64_t handler_id = + find_route_handler(tgt_store, route_qn, handler_name, sizeof(handler_name), + handler_file, sizeof(handler_file)); + if (handler_id == 0) { + /* Try without method (ANY) */ + snprintf(route_qn, sizeof(route_qn), "__route__ANY__%s", norm_path); + handler_id = find_route_handler(tgt_store, route_qn, handler_name, sizeof(handler_name), + handler_file, sizeof(handler_file)); + } + if (handler_id == 0) { + /* Exact QN lookup missed — try path-param template matching. + * Handles concrete vs template mismatch ("123" vs "{id}"). Issue #523. */ + handler_id = find_route_handler_fuzzy( + tgt_store, norm_path, method[0] ? method : NULL, route_qn, sizeof(route_qn), + handler_name, sizeof(handler_name), handler_file, sizeof(handler_file)); + } + if (handler_id == 0) { + continue; + } + + emit_cross_route_bidirectional(src_store, src_project, src_db, caller_id, route_id, + tgt_store, tgt_project, handler_id, route_qn, handler_name, + handler_file, url_path, method, "CROSS_HTTP_CALLS"); + + count++; + } + sqlite3_finalize(s); + return count; +} + +/* ── Phase B: Async matching ─────────────────────────────────────── */ + +static int match_async_routes(cbm_store_t *src_store, const char *src_project, + cbm_store_t *tgt_store, const char *tgt_project) { + struct sqlite3 *src_db = cbm_store_get_db(src_store); + if (!src_db) { + return 0; + } + + sqlite3_stmt *s = NULL; + if (sqlite3_prepare_v2(src_db, + "SELECT e.source_id, e.target_id, e.properties FROM edges e " + "WHERE e.project = ?1 AND e.type = 'ASYNC_CALLS'", + CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { + return 0; + } + sqlite3_bind_text(s, SKIP_ONE, src_project, CBM_NOT_FOUND, SQLITE_STATIC); + + int count = 0; + while (sqlite3_step(s) == SQLITE_ROW && count < CR_MAX_EDGES) { + int64_t caller_id = sqlite3_column_int64(s, 0); + int64_t route_id = sqlite3_column_int64(s, SKIP_ONE); + const char *props = (const char *)sqlite3_column_text(s, PAIR_LEN); + + char url_path[CBM_SZ_256] = {0}; + char broker[CBM_SZ_128] = {0}; + json_str_prop(props, "url_path", url_path, sizeof(url_path)); + json_str_prop(props, "broker", broker, sizeof(broker)); + if (!url_path[0]) { + continue; + } + + char route_qn[CR_QN_BUF]; + snprintf(route_qn, sizeof(route_qn), "__route__%s__%s", broker[0] ? broker : "async", + url_path); + + char handler_name[CBM_SZ_256] = {0}; + char handler_file[CBM_SZ_512] = {0}; + int64_t handler_id = + find_route_handler(tgt_store, route_qn, handler_name, sizeof(handler_name), + handler_file, sizeof(handler_file)); + if (handler_id == 0) { + continue; + } + + char edge_props[CR_PROPS_BUF]; + build_cross_props(edge_props, sizeof(edge_props), tgt_project, handler_name, handler_file, + url_path, "url_path", broker); + insert_cross_edge(src_store, src_project, caller_id, route_id, "CROSS_ASYNC_CALLS", + edge_props); + count++; + } + sqlite3_finalize(s); + return count; +} + +/* ── Phase C: Channel matching ───────────────────────────────────── */ + +/* Try to find a matching listener in target DB for a channel name. */ +static bool try_match_channel_listener(cbm_store_t *src_store, const char *src_project, + cbm_store_t *tgt_store, const char *tgt_project, + const char *channel_name, const char *transport, + int64_t emitter_id, int64_t channel_id) { + struct sqlite3 *tgt_db = cbm_store_get_db(tgt_store); + if (!tgt_db) { + return false; + } + sqlite3_stmt *tq = NULL; + if (sqlite3_prepare_v2(tgt_db, + "SELECT n.id, e.source_id, fn.name, fn.file_path FROM nodes n " + "JOIN edges e ON e.target_id = n.id AND e.type = 'LISTENS_ON' " + "JOIN nodes fn ON fn.id = e.source_id " + "WHERE n.project = ?1 AND n.name = ?2 AND n.label = 'Channel' LIMIT 1", + CBM_NOT_FOUND, &tq, NULL) != SQLITE_OK) { + return false; + } + sqlite3_bind_text(tq, SKIP_ONE, tgt_project, CBM_NOT_FOUND, SQLITE_STATIC); + sqlite3_bind_text(tq, PAIR_LEN, channel_name, CBM_NOT_FOUND, SQLITE_STATIC); + + bool matched = false; + if (sqlite3_step(tq) == SQLITE_ROW) { + int64_t tgt_channel_id = sqlite3_column_int64(tq, 0); + int64_t listener_id = sqlite3_column_int64(tq, SKIP_ONE); + const char *listener_name = (const char *)sqlite3_column_text(tq, PAIR_LEN); + const char *listener_file = (const char *)sqlite3_column_text(tq, CR_COL_3); + + /* Forward edge: emitter → local Channel */ + char fwd[CR_PROPS_BUF]; + build_cross_props(fwd, sizeof(fwd), tgt_project, listener_name ? listener_name : "", + listener_file ? listener_file : "", channel_name, "channel_name", + transport); + insert_cross_edge(src_store, src_project, emitter_id, channel_id, "CROSS_CHANNEL", fwd); + + /* Reverse edge: listener → target Channel */ + char caller_name[CBM_SZ_256] = {0}; + char caller_file[CBM_SZ_512] = {0}; + lookup_node_info(cbm_store_get_db(src_store), emitter_id, caller_name, sizeof(caller_name), + caller_file, sizeof(caller_file)); + + char rev[CR_PROPS_BUF]; + build_cross_props(rev, sizeof(rev), src_project, caller_name, caller_file, channel_name, + "channel_name", transport); + insert_cross_edge(tgt_store, tgt_project, listener_id, tgt_channel_id, "CROSS_CHANNEL", + rev); + matched = true; + } + sqlite3_finalize(tq); + return matched; +} + +static int match_channels(cbm_store_t *src_store, const char *src_project, cbm_store_t *tgt_store, + const char *tgt_project) { + struct sqlite3 *src_db = cbm_store_get_db(src_store); + if (!src_db) { + return 0; + } + + sqlite3_stmt *s = NULL; + if (sqlite3_prepare_v2(src_db, + "SELECT DISTINCT n.id, n.name, n.qualified_name, n.properties, " + "e.source_id FROM nodes n " + "JOIN edges e ON e.target_id = n.id AND e.type = 'EMITS' " + "WHERE n.project = ?1 AND n.label = 'Channel'", + CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { + return 0; + } + sqlite3_bind_text(s, SKIP_ONE, src_project, CBM_NOT_FOUND, SQLITE_STATIC); + + int count = 0; + while (sqlite3_step(s) == SQLITE_ROW && count < CR_MAX_EDGES) { + const char *channel_name = (const char *)sqlite3_column_text(s, SKIP_ONE); + const char *channel_qn = (const char *)sqlite3_column_text(s, PAIR_LEN); + if (!channel_name || !channel_qn) { + continue; + } + int64_t channel_id = sqlite3_column_int64(s, 0); + const char *channel_props = (const char *)sqlite3_column_text(s, CR_COL_3); + int64_t emitter_id = sqlite3_column_int64(s, CR_COL_4); + + char transport[CBM_SZ_64] = {0}; + json_str_prop(channel_props, "transport", transport, sizeof(transport)); + + if (try_match_channel_listener(src_store, src_project, tgt_store, tgt_project, channel_name, + transport, emitter_id, channel_id)) { + count++; + } + } + sqlite3_finalize(s); + return count; +} + +/* ── Phase D: Generic route-type matcher (gRPC, GraphQL, tRPC) ──── */ + +/* Look up a node's qualified_name by id. Returns true if found. */ +static bool lookup_node_qn(struct sqlite3 *db, int64_t node_id, char *out, size_t out_sz) { + out[0] = '\0'; + sqlite3_stmt *st = NULL; + if (sqlite3_prepare_v2(db, "SELECT qualified_name FROM nodes WHERE id = ?1", CBM_NOT_FOUND, &st, + NULL) != SQLITE_OK) { + return false; + } + sqlite3_bind_int64(st, SKIP_ONE, node_id); + bool found = false; + if (sqlite3_step(st) == SQLITE_ROW) { + const char *qn = (const char *)sqlite3_column_text(st, 0); + if (qn) { + snprintf(out, out_sz, "%s", qn); + found = true; + } + } + sqlite3_finalize(st); + return found; +} + +/* Match edges of a given type against Route nodes with a given QN prefix. + * Reuses the same infrastructure as HTTP/async matching. */ +static int match_typed_routes(cbm_store_t *src_store, const char *src_project, + cbm_store_t *tgt_store, const char *tgt_project, + const char *edge_type, const char *svc_key, const char *method_key, + const char *cross_edge_type) { + struct sqlite3 *src_db = cbm_store_get_db(src_store); + if (!src_db) { + return 0; + } + + char sql[CBM_SZ_256]; + snprintf(sql, sizeof(sql), + "SELECT e.source_id, e.target_id, e.properties FROM edges e " + "WHERE e.project = ?1 AND e.type = '%s'", + edge_type); + + sqlite3_stmt *s = NULL; + if (sqlite3_prepare_v2(src_db, sql, CBM_NOT_FOUND, &s, NULL) != SQLITE_OK) { + return 0; + } + sqlite3_bind_text(s, SKIP_ONE, src_project, CBM_NOT_FOUND, SQLITE_STATIC); + + int count = 0; + while (sqlite3_step(s) == SQLITE_ROW && count < CR_MAX_EDGES) { + int64_t caller_id = sqlite3_column_int64(s, 0); + int64_t route_id = sqlite3_column_int64(s, SKIP_ONE); + const char *props = (const char *)sqlite3_column_text(s, PAIR_LEN); + + char svc_val[CBM_SZ_256] = {0}; + char meth_val[CBM_SZ_256] = {0}; + json_str_prop(props, svc_key, svc_val, sizeof(svc_val)); + json_str_prop(props, method_key, meth_val, sizeof(meth_val)); + if (!svc_val[0] && !meth_val[0]) { + continue; + } + + /* Look up the Route QN from the target node (already points to the Route). */ + char route_qn[CR_QN_BUF] = {0}; + if (!lookup_node_qn(src_db, route_id, route_qn, sizeof(route_qn))) { + continue; + } + + char handler_name[CBM_SZ_256] = {0}; + char handler_file[CBM_SZ_512] = {0}; + int64_t handler_id = + find_route_handler(tgt_store, route_qn, handler_name, sizeof(handler_name), + handler_file, sizeof(handler_file)); + if (handler_id == 0) { + continue; + } + + emit_cross_route_bidirectional(src_store, src_project, src_db, caller_id, route_id, + tgt_store, tgt_project, handler_id, route_qn, handler_name, + handler_file, svc_val, svc_key, cross_edge_type); + count++; + } + sqlite3_finalize(s); + return count; +} + +/* ── Collect target projects ─────────────────────────────────────── */ + +/* When target_projects = ["*"], scan the cache directory for all .db files. */ +static int collect_all_projects(char ***out) { + const char *dir = cr_cache_dir(); + cbm_dir_t *d = cbm_opendir(dir); + if (!d) { + *out = NULL; + return 0; + } + + int cap = CR_INIT_CAP; + int count = 0; + char **projects = malloc((size_t)cap * sizeof(char *)); + + cbm_dirent_t *ent; + while ((ent = cbm_readdir(d)) != NULL) { + size_t len = strlen(ent->name); + if (len < CR_COL_4 || strcmp(ent->name + len - CR_DB_EXT_LEN, ".db") != 0) { + continue; + } + if (strstr(ent->name, "_cross_repo") || strstr(ent->name, "_config")) { + continue; + } + if (strstr(ent->name, "-wal") || strstr(ent->name, "-shm")) { + continue; + } + if (count >= cap) { + cap *= PAIR_LEN; + char **tmp = realloc(projects, (size_t)cap * sizeof(char *)); + if (!tmp) { + break; + } + projects = tmp; + } + /* Strip .db extension */ + projects[count] = malloc(len - PAIR_LEN); + memcpy(projects[count], ent->name, len - CR_DB_EXT_LEN); + projects[count][len - CR_DB_EXT_LEN] = '\0'; + count++; + } + cbm_closedir(d); + + *out = projects; + return count; +} + +static void free_project_list(char **projects, int count) { + for (int i = 0; i < count; i++) { + free(projects[i]); + } + free(projects); +} + +/* ── Entry point ─────────────────────────────────────────────────── */ + +cbm_cross_repo_result_t cbm_cross_repo_match(const char *project, const char **target_projects, + int target_count) { + cbm_cross_repo_result_t result = {0}; + struct timespec t0; + clock_gettime(CLOCK_MONOTONIC, &t0); + + /* Open source project store (read-write) */ + char src_path[CR_PATH_BUF]; + cr_db_path(project, src_path, sizeof(src_path)); + cbm_store_t *src_store = cbm_store_open_path(src_path); + if (!src_store) { + return result; + } + + /* Clean existing CROSS_* edges for this project */ + delete_cross_edges(src_store, project); + + /* Resolve target projects */ + char **resolved = NULL; + int resolved_count = 0; + bool own_list = false; + + if (target_count == SKIP_ONE && strcmp(target_projects[0], "*") == 0) { + resolved_count = collect_all_projects(&resolved); + own_list = true; + } else { + resolved = (char **)target_projects; + resolved_count = target_count; + } + + /* Match against each target */ + for (int i = 0; i < resolved_count; i++) { + const char *tgt = resolved[i]; + if (strcmp(tgt, project) == 0) { + continue; /* skip self */ + } + + char tgt_path[CR_PATH_BUF]; + cr_db_path(tgt, tgt_path, sizeof(tgt_path)); + + /* Open target store read-write (for bidirectional edge writes) */ + cbm_store_t *tgt_store = cbm_store_open_path(tgt_path); + if (!tgt_store) { + continue; + } + + result.http_edges += match_http_routes(src_store, project, tgt_store, tgt); + result.http_edges += match_http_routes(tgt_store, tgt, src_store, project); + result.async_edges += match_async_routes(src_store, project, tgt_store, tgt); + result.channel_edges += match_channels(src_store, project, tgt_store, tgt); + result.grpc_edges += match_typed_routes(src_store, project, tgt_store, tgt, "GRPC_CALLS", + "service", "method", "CROSS_GRPC_CALLS"); + result.graphql_edges += + match_typed_routes(src_store, project, tgt_store, tgt, "GRAPHQL_CALLS", "operation", + "operation", "CROSS_GRAPHQL_CALLS"); + result.trpc_edges += match_typed_routes(src_store, project, tgt_store, tgt, "TRPC_CALLS", + "procedure", "procedure", "CROSS_TRPC_CALLS"); + result.projects_scanned++; + + cbm_store_close(tgt_store); + } + + cbm_store_close(src_store); + + if (own_list) { + free_project_list(resolved, resolved_count); + } + + struct timespec t1; + clock_gettime(CLOCK_MONOTONIC, &t1); + result.elapsed_ms = ((double)(t1.tv_sec - t0.tv_sec) * CR_MS_PER_SEC) + + ((double)(t1.tv_nsec - t0.tv_nsec) / CR_NS_PER_MS); + + int total = result.http_edges + result.async_edges + result.channel_edges + result.grpc_edges + + result.graphql_edges + result.trpc_edges; + cbm_log_info("cross_repo.done", "project", project, "total", cr_itoa(total)); + + return result; +}