From 3892e7dcddd5173d4d83cc6839ce0d888aa36226 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:31:28 -0600 Subject: [PATCH 01/25] chore: gitignore napi-generated artifacts in crates/codegraph-core --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index aa62bcb59..2606dd88f 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,9 @@ dist/ coverage/ .env grammars/*.wasm +crates/codegraph-core/index.js +crates/codegraph-core/index.d.ts +crates/codegraph-core/*.node .claude/session-edits.log .claude/worktrees/ generated/DEPENDENCIES.md From ef8ea4fb31f4c073529c8772fa3aa9288a992fd6 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:32:19 -0600 Subject: [PATCH 02/25] chore(tests): remove unused biome suppression in visitor.test.ts --- tests/unit/visitor.test.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/visitor.test.ts b/tests/unit/visitor.test.ts index 992b4307a..62dc5b3e6 100644 --- a/tests/unit/visitor.test.ts +++ b/tests/unit/visitor.test.ts @@ -4,7 +4,6 @@ import { describe, expect, it } from 'vitest'; // We need a tree-sitter tree to test. Use the JS parser. -// biome-ignore lint/suspicious/noExplicitAny: tree-sitter parser type is complex and not worth typing for tests let parse: any; async function ensureParser() { From a372b82593ddfecd591c17133ef378872ffbec13 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:33:09 -0600 Subject: [PATCH 03/25] fix(titan-run): sync --start-from enum and phase-timestamp list with actual phases --- .claude/skills/titan-run/SKILL.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.claude/skills/titan-run/SKILL.md b/.claude/skills/titan-run/SKILL.md index 51e22569f..04c3fdc92 100644 --- a/.claude/skills/titan-run/SKILL.md +++ b/.claude/skills/titan-run/SKILL.md @@ -1,7 +1,7 @@ --- name: titan-run description: Run the full Titan Paradigm pipeline end-to-end by dispatching each phase to sub-agents with fresh context windows. Orchestrates recon → gauntlet → sync → forge → grind (+ repo-provided parity audit) automatically. -argument-hint: <--skip-recon> <--skip-gauntlet> <--start-from recon|gauntlet|sync|forge|grind|parity> <--gauntlet-batch-size 5> <--yes> +argument-hint: <--skip-recon> <--skip-gauntlet> <--start-from recon|gauntlet|sync|forge|grind|parity|close> <--gauntlet-batch-size 5> <--yes> allowed-tools: Agent, Read, Bash, Glob, Write, Edit --- @@ -50,7 +50,7 @@ You are the **orchestrator** for the full Titan Paradigm pipeline. Your job is t node -e "const fs=require('fs');const s=JSON.parse(fs.readFileSync('.codegraph/titan/titan-state.json','utf8'));s.phaseTimestamps=s.phaseTimestamps||{};s.phaseTimestamps['']=s.phaseTimestamps['']||{};s.phaseTimestamps[''].completedAt=new Date().toISOString();fs.writeFileSync('.codegraph/titan/titan-state.json',JSON.stringify(s,null,2));" ``` - Replace `` with `recon`, `gauntlet`, `sync`, `forge`, `parity`, or `close`. **Run the start command immediately before dispatching each phase's first sub-agent, and the completion command immediately after post-phase validation passes.** If resuming a phase (e.g., gauntlet loop iteration 2+), do NOT overwrite `startedAt` — only set it if it doesn't already exist. + Replace `` with `recon`, `gauntlet`, `sync`, `forge`, `grind`, `parity`, or `close`. **Run the start command immediately before dispatching each phase's first sub-agent, and the completion command immediately after post-phase validation passes.** If resuming a phase (e.g., gauntlet loop iteration 2+), do NOT overwrite `startedAt` — only set it if it doesn't already exist. **Timestamp validation:** After recording `completedAt` for any phase, verify `startedAt < completedAt`: ```bash From 9a52c7cc5eea2cba016d14ed3928e07128fad4e3 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:36:04 -0600 Subject: [PATCH 04/25] fix(hooks): track Bash file modifications via before/after git status diff Adds snapshot-pre-bash.sh (PreToolUse Bash) + track-bash-writes.sh (PostToolUse Bash): the pre-hook captures git status --porcelain to a per-worktree temp file before each Bash call; the post-hook diffs the before/after state and appends newly modified or created files to .claude/session-edits.log. This closes the gap where files written by sed -i, printf redirects, tee, heredocs, or build tools (Cargo.lock, lockfiles) were never recorded, causing guard-git.sh to emit false-positive BLOCKED errors. Closes #1457 --- .claude/hooks/snapshot-pre-bash.sh | 54 +++++++++++++ .claude/hooks/track-bash-writes.sh | 119 +++++++++++++++++++++++++++++ .claude/settings.json | 10 +++ 3 files changed, 183 insertions(+) create mode 100755 .claude/hooks/snapshot-pre-bash.sh create mode 100755 .claude/hooks/track-bash-writes.sh diff --git a/.claude/hooks/snapshot-pre-bash.sh b/.claude/hooks/snapshot-pre-bash.sh new file mode 100755 index 000000000..a91caebbb --- /dev/null +++ b/.claude/hooks/snapshot-pre-bash.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# snapshot-pre-bash.sh — PreToolUse hook for Bash tool calls +# Snapshots `git status --porcelain` to a temp file before each Bash call so +# that track-bash-writes.sh (PostToolUse) can diff the before/after state and +# log files newly modified by the command to .claude/session-edits.log. +# Always exits 0 (informational only, never blocks). + +set -euo pipefail + +INPUT=$(cat) + +# Extract the command from tool_input JSON +COMMAND=$(echo "$INPUT" | node -e " + let d=''; + process.stdin.on('data',c=>d+=c); + process.stdin.on('end',()=>{ + const p=JSON.parse(d).tool_input?.command||''; + if(p)process.stdout.write(p); + }); +" 2>/dev/null) || true + +if [ -z "$COMMAND" ]; then + exit 0 +fi + +# Skip read-only commands that can never write files — reduces snapshot overhead +# for the most common Bash calls (ls, cat, grep, git log, git status, etc.). +# sed is intentionally NOT in this list because `sed -i` modifies files in-place. +if echo "$COMMAND" | grep -qE '^\s*(ls|cat|head|tail|grep|find|git\s+(log|status|diff|show|branch|remote|fetch|rev-parse|stash\s+list|ls-files|blame|describe|tag|config\s+--get)|gh\s+(pr|issue|repo)\s+(view|list|status)|echo|printf|pwd|which|node\s+-e|node\s+-p|npx\s+--version|wc|sort|uniq|awk)\b'; then + exit 0 +fi + +# Resolve the project root (worktree-aware — each worktree has its own .claude/) +PROJECT_DIR=$(git rev-parse --show-toplevel 2>/dev/null) || PROJECT_DIR="${CLAUDE_PROJECT_DIR:-.}" + +# Key the snapshot file to the project root so parallel worktrees don't collide. +# Use a simple hash of the path — just enough to be unique per worktree. +PROJECT_HASH=$(echo "$PROJECT_DIR" | node -e " + const crypto = require('crypto'); + let d=''; + process.stdin.on('data',c=>d+=c); + process.stdin.on('end',()=>{ + process.stdout.write(crypto.createHash('sha1').update(d.trim()).digest('hex').slice(0,8)); + }); +" 2>/dev/null) || PROJECT_HASH="default" + +SNAPSHOT_FILE="/tmp/claude-bash-snapshot-${PROJECT_HASH}.txt" + +# Capture current git status --porcelain. +# Lines look like: "XY filename" or "XY orig -> dest" (rename). +# We only care about the status marker and path — porcelain is stable across git versions. +git -C "$PROJECT_DIR" status --porcelain 2>/dev/null > "$SNAPSHOT_FILE" || true + +exit 0 diff --git a/.claude/hooks/track-bash-writes.sh b/.claude/hooks/track-bash-writes.sh new file mode 100755 index 000000000..e5d1ded98 --- /dev/null +++ b/.claude/hooks/track-bash-writes.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash +# track-bash-writes.sh — PostToolUse hook for Bash tool calls +# Compares `git status --porcelain` against the snapshot taken by +# snapshot-pre-bash.sh (PreToolUse) to detect files newly modified or +# created by the Bash command, then appends them to .claude/session-edits.log +# so that guard-git.sh can validate commits correctly. +# Always exits 0 (informational only, never blocks). + +set -euo pipefail + +INPUT=$(cat) + +# Extract the command from tool_input JSON +COMMAND=$(echo "$INPUT" | node -e " + let d=''; + process.stdin.on('data',c=>d+=c); + process.stdin.on('end',()=>{ + const p=JSON.parse(d).tool_input?.command||''; + if(p)process.stdout.write(p); + }); +" 2>/dev/null) || true + +if [ -z "$COMMAND" ]; then + exit 0 +fi + +# Resolve the project root (worktree-aware — each worktree has its own .claude/) +PROJECT_DIR=$(git rev-parse --show-toplevel 2>/dev/null) || PROJECT_DIR="${CLAUDE_PROJECT_DIR:-.}" + +# Reproduce the same project hash used by snapshot-pre-bash.sh +PROJECT_HASH=$(echo "$PROJECT_DIR" | node -e " + const crypto = require('crypto'); + let d=''; + process.stdin.on('data',c=>d+=c); + process.stdin.on('end',()=>{ + process.stdout.write(crypto.createHash('sha1').update(d.trim()).digest('hex').slice(0,8)); + }); +" 2>/dev/null) || PROJECT_HASH="default" + +SNAPSHOT_FILE="/tmp/claude-bash-snapshot-${PROJECT_HASH}.txt" + +# If there is no snapshot (hook was not installed yet, or the pre-hook was +# skipped for a read-only command) we have no baseline — exit cleanly. +if [ ! -f "$SNAPSHOT_FILE" ]; then + exit 0 +fi + +# Capture current state after the command ran +AFTER=$(git -C "$PROJECT_DIR" status --porcelain 2>/dev/null) || true + +# Read the before-state +BEFORE=$(cat "$SNAPSHOT_FILE") || true + +# Clean up the snapshot so it doesn't pollute the next command's pre-hook +rm -f "$SNAPSHOT_FILE" + +# Build the set of paths that existed (as dirty) before the command ran. +# porcelain format: "XY path" or "XY original -> new" (rename). +# We extract every path token after the two-char status code. +parse_paths() { + local status_output="$1" + echo "$status_output" | awk ' + /^[ MADRCU?!]{2} / { + # Drop the two-char status + space + rest = substr($0, 4) + # Handle rename: "old -> new" + if (index(rest, " -> ") > 0) { + n = split(rest, parts, " -> ") + for (i = 1; i <= n; i++) { + p = parts[i] + gsub(/^"/, "", p); gsub(/"$/, "", p) + if (p != "") print p + } + } else { + gsub(/^"/, "", rest); gsub(/"$/, "", rest) + if (rest != "") print rest + } + } + ' +} + +BEFORE_PATHS=$(parse_paths "$BEFORE" | sort) +AFTER_PATHS=$(parse_paths "$AFTER" | sort) + +if [ -z "$AFTER_PATHS" ]; then + exit 0 +fi + +# Find paths present in AFTER but not in BEFORE — these were newly dirtied +# (modified, created, or renamed-to) by the Bash command. +NEW_PATHS=$(comm -13 <(echo "$BEFORE_PATHS") <(echo "$AFTER_PATHS")) || true + +if [ -z "$NEW_PATHS" ]; then + exit 0 +fi + +# Also exclude paths that were already tracked by track-edits.sh or other hooks +# (i.e. already in the session-edits.log) so we don't double-log. +LOG_FILE="$PROJECT_DIR/.claude/session-edits.log" +ALREADY_LOGGED="" +if [ -f "$LOG_FILE" ] && [ -s "$LOG_FILE" ]; then + ALREADY_LOGGED=$(awk '{print $2}' "$LOG_FILE" | sort -u) +fi + +mkdir -p "$(dirname "$LOG_FILE")" +TS=$(date -u +%Y-%m-%dT%H:%M:%SZ) + +while IFS= read -r rel_path; do + if [ -z "$rel_path" ]; then + continue + fi + # Skip if already in the log from a prior hook (Edit/Write/track-moves) + if [ -n "$ALREADY_LOGGED" ] && echo "$ALREADY_LOGGED" | grep -qxF "$rel_path"; then + continue + fi + echo "$TS $rel_path" >> "$LOG_FILE" +done <<< "$NEW_PATHS" + +exit 0 diff --git a/.claude/settings.json b/.claude/settings.json index b3acd6d1b..7ab746809 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -7,6 +7,11 @@ { "matcher": "Bash", "hooks": [ + { + "type": "command", + "command": "p=\"${CLAUDE_PROJECT_DIR}\"; [ -d \"$p/.claude/hooks\" ] || p=\"$(git rev-parse --show-toplevel 2>/dev/null)\"; [ -d \"$p/.claude/hooks\" ] || exit 0; bash \"$p/.claude/hooks/snapshot-pre-bash.sh\"", + "timeout": 5 + }, { "type": "command", "command": "p=\"${CLAUDE_PROJECT_DIR}\"; [ -d \"$p/.claude/hooks\" ] || p=\"$(git rev-parse --show-toplevel 2>/dev/null)\"; [ -d \"$p/.claude/hooks\" ] || exit 0; bash \"$p/.claude/hooks/check-readme.sh\"", @@ -79,6 +84,11 @@ { "matcher": "Bash", "hooks": [ + { + "type": "command", + "command": "p=\"${CLAUDE_PROJECT_DIR}\"; [ -d \"$p/.claude/hooks\" ] || p=\"$(git rev-parse --show-toplevel 2>/dev/null)\"; [ -d \"$p/.claude/hooks\" ] || exit 0; bash \"$p/.claude/hooks/track-bash-writes.sh\"", + "timeout": 5 + }, { "type": "command", "command": "p=\"${CLAUDE_PROJECT_DIR}\"; [ -d \"$p/.claude/hooks\" ] || p=\"$(git rev-parse --show-toplevel 2>/dev/null)\"; [ -d \"$p/.claude/hooks\" ] || exit 0; bash \"$p/.claude/hooks/track-moves.sh\"", From 85a26df4f1a06db752548b3f5e3d299ec5f46806 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:39:52 -0600 Subject: [PATCH 05/25] chore(native): remove dead code (unused var, method, variant, fields) - clojure.rs: annotate lifetime-anchor assignment to silence false-positive - cfg.rs: remove never-called start_line_of method - complexity.rs: remove never-constructed NotHandled variant; convert irrefutable if-let patterns to plain let destructures - dataflow.rs: remove never-read callee fields from CallReturn/Destructured - incremental.rs: remove never-read lang field from CacheEntry cargo check and cargo clippy both clean after these changes. --- crates/codegraph-core/src/ast_analysis/cfg.rs | 4 --- .../src/ast_analysis/complexity.rs | 36 ++++++++----------- .../src/ast_analysis/dataflow.rs | 10 +++--- .../src/domain/graph/builder/incremental.rs | 3 +- .../codegraph-core/src/extractors/clojure.rs | 4 +++ 5 files changed, 25 insertions(+), 32 deletions(-) diff --git a/crates/codegraph-core/src/ast_analysis/cfg.rs b/crates/codegraph-core/src/ast_analysis/cfg.rs index 226a31362..fb784d40f 100644 --- a/crates/codegraph-core/src/ast_analysis/cfg.rs +++ b/crates/codegraph-core/src/ast_analysis/cfg.rs @@ -659,10 +659,6 @@ impl<'a> CfgBuilder<'a> { } } - fn start_line_of(&self, block_idx: u32) -> Option { - self.blocks.iter().find(|b| b.index == block_idx).and_then(|b| b.start_line) - } - /// Get statement children from a block or statement list. fn get_statements<'b>(&self, node: &Node<'b>) -> Vec> { let kind = node.kind(); diff --git a/crates/codegraph-core/src/ast_analysis/complexity.rs b/crates/codegraph-core/src/ast_analysis/complexity.rs index fdd572512..9827b091f 100644 --- a/crates/codegraph-core/src/ast_analysis/complexity.rs +++ b/crates/codegraph-core/src/ast_analysis/complexity.rs @@ -516,8 +516,6 @@ fn walk_children( enum BranchAction { /// Node handled — walk children at the given nesting delta, then return. Handled { cognitive_delta: u32, cyclomatic_delta: u32, nesting_delta: u32 }, - /// Not a special branch pattern — fall through to normal processing. - NotHandled, } /// Classify a branch node (one where `rules.is_branch(kind)` is true). @@ -675,14 +673,12 @@ fn walk( // Branch/control flow nodes (skip keyword leaf tokens) if rules.is_branch(kind) && node.child_count() > 0 { - if let BranchAction::Handled { cognitive_delta, cyclomatic_delta, nesting_delta } = - classify_branch(node, kind, rules, nesting_level) - { - *cognitive += cognitive_delta; - *cyclomatic += cyclomatic_delta; - walk_children(node, nesting_level + nesting_delta, false, rules, cognitive, cyclomatic, max_nesting, depth); - return; - } + let BranchAction::Handled { cognitive_delta, cyclomatic_delta, nesting_delta } = + classify_branch(node, kind, rules, nesting_level); + *cognitive += cognitive_delta; + *cyclomatic += cyclomatic_delta; + walk_children(node, nesting_level + nesting_delta, false, rules, cognitive, cyclomatic, max_nesting, depth); + return; } // Pattern C plain else (Go/Java) @@ -1323,17 +1319,15 @@ fn walk_all( // Branch/control flow nodes (skip keyword leaf tokens) if c_rules.is_branch(kind) && node.child_count() > 0 { - if let BranchAction::Handled { cognitive_delta, cyclomatic_delta, nesting_delta } = - classify_branch(node, kind, c_rules, nesting_level) - { - *cognitive += cognitive_delta; - *cyclomatic += cyclomatic_delta; - walk_all_children( - node, source, nesting_level + nesting_delta, false, skip_h, - c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, - ); - return; - } + let BranchAction::Handled { cognitive_delta, cyclomatic_delta, nesting_delta } = + classify_branch(node, kind, c_rules, nesting_level); + *cognitive += cognitive_delta; + *cyclomatic += cyclomatic_delta; + walk_all_children( + node, source, nesting_level + nesting_delta, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; } // Pattern C plain else (Go/Java) diff --git a/crates/codegraph-core/src/ast_analysis/dataflow.rs b/crates/codegraph-core/src/ast_analysis/dataflow.rs index ddb4a11a1..5a897c0b9 100644 --- a/crates/codegraph-core/src/ast_analysis/dataflow.rs +++ b/crates/codegraph-core/src/ast_analysis/dataflow.rs @@ -882,8 +882,8 @@ fn collect_identifiers(node: &Node, out: &mut Vec, rules: &DataflowRules #[derive(Debug, Clone)] enum LocalSource { - CallReturn { callee: String }, - Destructured { callee: String }, + CallReturn, + Destructured, } struct ScopeFrame { @@ -1200,7 +1200,7 @@ fn handle_var_declarator( }); scope .locals - .insert(n.clone(), LocalSource::Destructured { callee: callee.clone() }); + .insert(n.clone(), LocalSource::Destructured); } } else { let var_name = node_text(&name_n, source).to_string(); @@ -1211,7 +1211,7 @@ fn handle_var_declarator( expression: truncate(node_text(node, source), DATAFLOW_TRUNCATION_LIMIT), line: node_line(node), }); - scope.locals.insert(var_name, LocalSource::CallReturn { callee }); + scope.locals.insert(var_name, LocalSource::CallReturn); } } @@ -1267,7 +1267,7 @@ fn handle_assignment( line: node_line(node), }); if let Some(scope) = scope_stack.last_mut() { - scope.locals.insert(var_name, LocalSource::CallReturn { callee }); + scope.locals.insert(var_name, LocalSource::CallReturn); } } } diff --git a/crates/codegraph-core/src/domain/graph/builder/incremental.rs b/crates/codegraph-core/src/domain/graph/builder/incremental.rs index 35fa04345..4985904ed 100644 --- a/crates/codegraph-core/src/domain/graph/builder/incremental.rs +++ b/crates/codegraph-core/src/domain/graph/builder/incremental.rs @@ -10,7 +10,6 @@ use crate::types::FileSymbols; struct CacheEntry { tree: Tree, - lang: LanguageKind, } /// Cache of parse trees for incremental parsing. @@ -51,7 +50,7 @@ impl ParseTreeCache { let symbols = extract_symbols(lang, &tree, source_bytes, &file_path); - self.entries.insert(file_path, CacheEntry { tree, lang }); + self.entries.insert(file_path, CacheEntry { tree }); Some(symbols) } diff --git a/crates/codegraph-core/src/extractors/clojure.rs b/crates/codegraph-core/src/extractors/clojure.rs index b5160474f..7263ecf1a 100644 --- a/crates/codegraph-core/src/extractors/clojure.rs +++ b/crates/codegraph-core/src/extractors/clojure.rs @@ -51,6 +51,10 @@ fn walk_clojure( return; } + // `next_ns_owned` holds the String so that `next_ns` can borrow it as + // `&str` for the duration of this stack frame. The assignment looks + // "never read" to the compiler but the borrow on the next line reads it. + #[allow(unused_assignments)] let mut next_ns_owned: Option = None; let next_ns: Option<&str> = if node.kind() == "list_lit" { match handle_list_form(node, source, symbols, current_ns) { From 184d22167f2f58a7a569538af6b616a1c11a3744 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:41:46 -0600 Subject: [PATCH 06/25] refactor(native): extract emit_pts_alias_edges params into PtsAliasCtx struct --- .../graph/builder/stages/build_edges.rs | 79 +++++++++++++------ 1 file changed, 55 insertions(+), 24 deletions(-) diff --git a/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs b/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs index 3475adebe..39108e3d8 100644 --- a/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs +++ b/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs @@ -360,50 +360,55 @@ fn resolve_via_points_to<'a>( } } +/// Per-call-site inputs for `emit_pts_alias_edges`. +/// Groups the lookup parameters so the function stays within the argument-count limit. +struct PtsAliasCtx<'a> { + pts: &'a HashMap>, + lookup_name: &'a str, + call_line: u32, + caller_id: u32, + caller_name: &'a str, + is_dynamic: u32, + rel_path: &'a str, + imported_names: &'a HashMap<&'a str, &'a str>, + type_map: &'a HashMap<&'a str, (&'a str, f64)>, +} + /// Resolve each pts alias of `lookup_name` and emit hop-penalised call edges. /// Shared by the no-receiver gate and the receiver-key (`rest.prop()`) fallback; /// mirrors the alias-emission loops in buildFileCallEdges (build-edges.ts). -#[allow(clippy::too_many_arguments)] fn emit_pts_alias_edges<'a>( ctx: &EdgeContext<'a>, - pts: &HashMap>, - lookup_name: &str, - call_line: u32, - caller_id: u32, - caller_name: &str, - is_dynamic: u32, - rel_path: &str, - imported_names: &HashMap<&str, &str>, - type_map: &HashMap<&str, (&str, f64)>, + alias_ctx: &PtsAliasCtx<'_>, seen_edges: &HashSet, pts_edge_map: &mut HashMap, edges: &mut Vec, ) { - for alias in resolve_via_points_to(lookup_name, pts) { - let alias_imported_from = imported_names.get(alias).copied(); + for alias in resolve_via_points_to(alias_ctx.lookup_name, alias_ctx.pts) { + let alias_imported_from = alias_ctx.imported_names.get(alias).copied(); let alias_call = CallInfo { name: alias.to_string(), - line: call_line, + line: alias_ctx.call_line, dynamic: Some(true), receiver: None, }; let mut alias_targets = resolve_call_targets( - ctx, &alias_call, rel_path, alias_imported_from, type_map, caller_name, + ctx, &alias_call, alias_ctx.rel_path, alias_imported_from, alias_ctx.type_map, alias_ctx.caller_name, ); - sort_targets_by_confidence(&mut alias_targets, rel_path, alias_imported_from); + sort_targets_by_confidence(&mut alias_targets, alias_ctx.rel_path, alias_imported_from); for t in &alias_targets { - let edge_key = ((caller_id as u64) << 32) | (t.id as u64); - if t.id != caller_id && !seen_edges.contains(&edge_key) && !pts_edge_map.contains_key(&edge_key) { - let conf = resolve::compute_confidence(rel_path, &t.file, alias_imported_from) + let edge_key = ((alias_ctx.caller_id as u64) << 32) | (t.id as u64); + if t.id != alias_ctx.caller_id && !seen_edges.contains(&edge_key) && !pts_edge_map.contains_key(&edge_key) { + let conf = resolve::compute_confidence(alias_ctx.rel_path, &t.file, alias_imported_from) - PROPAGATION_HOP_PENALTY; if conf > 0.0 { pts_edge_map.insert(edge_key, edges.len()); edges.push(ComputedEdge { - source_id: caller_id, + source_id: alias_ctx.caller_id, target_id: t.id, kind: "calls".to_string(), confidence: conf, - dynamic: is_dynamic, + dynamic: alias_ctx.is_dynamic, }); } } @@ -593,8 +598,21 @@ fn process_file<'a>( }; if let Some(lookup_name) = lookup_name { emit_pts_alias_edges( - ctx, pts, &lookup_name, call.line, caller_id, caller_name, is_dynamic, - rel_path, &imported_names, &type_map, &seen_edges, &mut pts_edge_map, edges, + ctx, + &PtsAliasCtx { + pts, + lookup_name: &lookup_name, + call_line: call.line, + caller_id, + caller_name, + is_dynamic, + rel_path, + imported_names: &imported_names, + type_map: &type_map, + }, + &seen_edges, + &mut pts_edge_map, + edges, ); } } @@ -609,8 +627,21 @@ fn process_file<'a>( let receiver_key = format!("{}.{}", receiver, call.name); if pts.contains_key(receiver_key.as_str()) { emit_pts_alias_edges( - ctx, pts, &receiver_key, call.line, caller_id, caller_name, is_dynamic, - rel_path, &imported_names, &type_map, &seen_edges, &mut pts_edge_map, edges, + ctx, + &PtsAliasCtx { + pts, + lookup_name: &receiver_key, + call_line: call.line, + caller_id, + caller_name, + is_dynamic, + rel_path, + imported_names: &imported_names, + type_map: &type_map, + }, + &seen_edges, + &mut pts_edge_map, + edges, ); } } From 909e1df55b58fee7cc7d5942e1132be648fd7169 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:43:24 -0600 Subject: [PATCH 07/25] fix(wasm): sort call targets by confidence before emit to match native engine --- src/domain/graph/builder/stages/build-edges.ts | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index 3c0f2e3c3..88027aee0 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -1107,6 +1107,19 @@ function buildFileCallEdges( } } + // Sort targets by confidence descending before emitting edges. + // For multi-target calls with duplicate (source_id, target_id) pairs the + // stored confidence depends on which duplicate is processed last — sorting + // here guarantees the highest-confidence target wins on dedup, matching the + // native engine's sort_targets_by_confidence call in build_edges.rs. + if (targets.length > 1) { + targets = [...targets].sort( + (a, b) => + computeConfidence(relPath, b.file, importedFrom ?? null) - + computeConfidence(relPath, a.file, importedFrom ?? null), + ); + } + for (const t of targets) { const edgeKey = `${caller.id}|${t.id}`; if (t.id !== caller.id) { From 66fc899813dd51fcb8ca94063741530ac5a1ff54 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:44:33 -0600 Subject: [PATCH 08/25] fix(bench): add 2 warmup runs and raise INCREMENTAL_RUNS to 5 for incremental tiers --- scripts/benchmark.ts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/scripts/benchmark.ts b/scripts/benchmark.ts index fbc449813..642e2b1f5 100644 --- a/scripts/benchmark.ts +++ b/scripts/benchmark.ts @@ -90,7 +90,8 @@ try { if (typeof parser.disposeParsers === 'function') disposeParsers = parser.disposeParsers; } catch { /* older release — no worker pool to dispose */ } -const INCREMENTAL_RUNS = 3; +const WARMUP_RUNS = 2; +const INCREMENTAL_RUNS = 5; const QUERY_RUNS = 5; const QUERY_WARMUP_RUNS = 3; const PROBE_FILE = path.join(root, 'src', 'domain', 'queries.ts'); @@ -154,6 +155,9 @@ const dbSizeBytes = fs.statSync(dbPath).size; console.error(` [${engine}] Benchmarking no-op rebuild...`); let noopRebuildMs = null; try { + for (let i = 0; i < WARMUP_RUNS; i++) { + await buildGraph(root, { engine, incremental: true, exclude: BENCH_EXCLUDE }); + } const noopTimings = []; for (let i = 0; i < INCREMENTAL_RUNS; i++) { const start = performance.now(); @@ -170,6 +174,10 @@ const original = fs.readFileSync(PROBE_FILE, 'utf8'); let oneFileRebuildMs = null; let oneFilePhases = null; try { + for (let i = 0; i < WARMUP_RUNS; i++) { + fs.writeFileSync(PROBE_FILE, original + `\n// warmup-${i}\n`); + await buildGraph(root, { engine, incremental: true, exclude: BENCH_EXCLUDE }); + } const oneFileRuns = []; for (let i = 0; i < INCREMENTAL_RUNS; i++) { fs.writeFileSync(PROBE_FILE, original + `\n// probe-${i}\n`); From 84e1a5f588beccda4ebfa6917785bb93132dc9cf Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:49:25 -0600 Subject: [PATCH 09/25] ci(bench): add per-PR perf canary for extractor/graph/native changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds .github/workflows/perf-canary.yml — a path-filtered workflow that fires on PRs touching src/extractors/, src/domain/graph/, or crates/** and runs only the incremental-benchmark suite (full build + no-op + 1-file rebuild, both engines). Catches the class of regressions that accumulated invisibly across the Phase 8.x PRs and were only detected at v3.12.0 publish time. The regression guard gains BENCH_CANARY=1 mode: raises thresholds to 50%/100%/150% (standard/noisy/WASM) and skips the build, query, and resolution suites — only incremental checks run. This absorbs shared- runner timing variance while still blocking catastrophic regressions (+98% full build, +1827% 1-file rebuild from v3.12.0). Closes #1433 --- .github/workflows/perf-canary.yml | 111 ++++++++++++++++++++++ tests/benchmarks/regression-guard.test.ts | 49 ++++++++-- 2 files changed, 150 insertions(+), 10 deletions(-) create mode 100644 .github/workflows/perf-canary.yml diff --git a/.github/workflows/perf-canary.yml b/.github/workflows/perf-canary.yml new file mode 100644 index 000000000..fb4432aed --- /dev/null +++ b/.github/workflows/perf-canary.yml @@ -0,0 +1,111 @@ +name: Perf Canary + +# Lightweight per-PR build-time regression gate for PRs that touch the +# extractor, graph-builder, or native Rust layers — the parts of the codebase +# that caused the v3.12.0 regressions (+1827% 1-file rebuild, +98% full build). +# +# Only the incremental-benchmark suite is run (full build + no-op + 1-file +# rebuild for both engines). The regression guard uses BENCH_CANARY=1 mode, +# which applies a 50% threshold instead of the full suite's 25% — enough +# to catch catastrophic regressions while tolerating CI runner variance. +# +# This is intentionally separate from the full pre-publish-benchmark job in +# ci.yml, which runs unconditionally on every PR and measures the complete +# suite. The canary completes in roughly 5–10 minutes; the full suite takes +# 20–60 minutes. + +on: + pull_request: + paths: + - "src/extractors/**" + - "src/domain/graph/**" + - "crates/**" + - "scripts/benchmark.ts" + - "scripts/incremental-benchmark.ts" + - "scripts/lib/bench-config.ts" + - "scripts/lib/fork-engine.ts" + +concurrency: + group: perf-canary-${{ github.ref }} + cancel-in-progress: true + +jobs: + perf-canary: + name: Perf canary (incremental tiers) + runs-on: ubuntu-latest + env: + CODEGRAPH_FAST_SKIP_DIAG: "1" + + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - uses: actions/setup-node@v6 + with: + node-version: "22" + cache: "npm" + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + + - name: Rust cache + uses: Swatinem/rust-cache@v2 + with: + workspaces: crates/codegraph-core + + - name: Install napi-rs CLI + timeout-minutes: 5 + run: npm install -g @napi-rs/cli@3 + + - name: Build native addon + working-directory: crates/codegraph-core + run: napi build --release + + - name: Install dependencies + timeout-minutes: 20 + shell: bash + run: | + for attempt in 1 2 3; do + npm install && break + if [ "$attempt" -lt 3 ]; then + echo "::warning::npm install attempt $attempt failed, retrying in 15s..." + sleep 15 + else + echo "::error::npm install failed after 3 attempts" + exit 1 + fi + done + + - name: Install native addon over published binary + run: node scripts/ci-install-native.mjs + + # Build dist/ so benchmarks load the same compiled JS that ships to npm, + # matching the methodology used by the full pre-publish-benchmark gate. + - name: Build TypeScript + run: npm run build + + - name: Run incremental benchmark + timeout-minutes: 15 + run: | + STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')") + node $STRIP_FLAG --import ./scripts/ts-resolve-loader.js scripts/incremental-benchmark.ts --version dev --dist > incremental-canary-result.json + + - name: Update incremental report + run: | + STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')") + node $STRIP_FLAG scripts/update-incremental-report.ts incremental-canary-result.json + + - name: Regression guard (50% threshold) + env: + RUN_REGRESSION_GUARD: "1" + BENCH_CANARY: "1" + run: npm run test:regression-guard + + - name: Upload canary result + if: always() + uses: actions/upload-artifact@v7 + with: + name: incremental-canary-result + path: incremental-canary-result.json + if-no-files-found: warn diff --git a/tests/benchmarks/regression-guard.test.ts b/tests/benchmarks/regression-guard.test.ts index 7ca1689d8..939dde61d 100644 --- a/tests/benchmarks/regression-guard.test.ts +++ b/tests/benchmarks/regression-guard.test.ts @@ -16,6 +16,16 @@ import { describe, expect, test } from 'vitest'; // ── Configuration ──────────────────────────────────────────────────────── +/** + * When BENCH_CANARY=1, only incremental-benchmark checks run and all timing + * thresholds are raised to 50%. This mode is used by the per-PR perf-canary + * workflow (.github/workflows/perf-canary.yml) which runs only on PRs + * touching src/extractors/, src/domain/graph/, or crates/. The looser + * threshold absorbs CI runner variance while still catching the class of + * catastrophic regressions that hit v3.12.0 (+98%/+1827%). + */ +const BENCH_CANARY = process.env.BENCH_CANARY === '1'; + /** * Maximum allowed regression (as a fraction, e.g. 0.25 = 25%). * @@ -26,8 +36,10 @@ import { describe, expect, test } from 'vitest'; * * Genuinely high-variance sub-30ms metrics get a wider tolerance via * `NOISY_METRICS` below — see that set's docstring for rationale. + * + * In BENCH_CANARY mode this is overridden to 0.5 (50%) — see above. */ -const REGRESSION_THRESHOLD = 0.25; +const REGRESSION_THRESHOLD = BENCH_CANARY ? 0.5 : 0.25; /** * Wider regression threshold applied to metrics in NOISY_METRICS. @@ -41,8 +53,11 @@ const REGRESSION_THRESHOLD = 0.25; * Keeping the global threshold at 25% means a regression in the 30–100ms * range is still caught (e.g. 50ms→63ms = +26%, flagged), while sub-30ms * metrics in this set get the wider 50% allowance. + * + * In BENCH_CANARY mode this is overridden to 1.0 (100%) — the canary's + * purpose is to catch gross regressions (+50%+), not sub-30ms jitter. */ -const NOISY_METRIC_THRESHOLD = 0.5; +const NOISY_METRIC_THRESHOLD = BENCH_CANARY ? 1.0 : 0.5; /** * Metric labels treated as high-variance and given the NOISY_METRIC_THRESHOLD @@ -86,8 +101,12 @@ const NOISY_METRICS = new Set(['No-op rebuild', '1-file rebuild', 'fnDep * v3.0.1–3.4.0), which 75% still flags, while absorbing the ≤71% shared-runner * jitter. Size metrics (DB bytes/file) are engine-independent and excluded from * this widening via SIZE_METRICS below — they keep the strict threshold. + * + * In BENCH_CANARY mode this is overridden to 1.5 (150%) — the canary targets + * gross regressions only, and WASM incremental metrics have extreme variance + * on shared runners. */ -const WASM_TIMING_THRESHOLD = 0.75; +const WASM_TIMING_THRESHOLD = BENCH_CANARY ? 1.5 : 0.75; /** * Metric labels that measure size/count rather than wall-clock time. These are @@ -608,6 +627,10 @@ interface IncrementalEntry { // in the default `npm test` run so docs commits that merge already-recorded // regressed history into main don't trigger false failures — by then the // release has already passed the gate. +// +// When BENCH_CANARY=1 (set by .github/workflows/perf-canary.yml), only the +// incremental-benchmark suite runs and thresholds are raised to 50% — see +// the BENCH_CANARY constant above. const RUN_REGRESSION_GUARD = process.env.RUN_REGRESSION_GUARD === '1'; describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => { @@ -627,7 +650,9 @@ describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => { // Warn when KNOWN_REGRESSIONS entries are stale (more than 1 minor version // behind the current package version). This makes the stale-exemption // problem self-detecting rather than requiring manual bookkeeping. - test('KNOWN_REGRESSIONS entries are not stale', () => { + // Skipped in canary mode — this check is maintenance-only and irrelevant + // for a lightweight build-time regression gate. + test.skipIf(BENCH_CANARY)('KNOWN_REGRESSIONS entries are not stale', () => { // eslint-disable-next-line @typescript-eslint/no-require-imports const pkgVersion: string = JSON.parse( fs.readFileSync(path.join(ROOT, 'package.json'), 'utf8'), @@ -656,18 +681,22 @@ describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => { ).toBe(0); }); - // Validate newest-first ordering assumption for all history arrays - test('build history is sorted newest-first', () => { + // Validate newest-first ordering assumption for all history arrays. + // Build/query ordering checks are skipped in canary mode (only incremental + // history is updated by the canary workflow). + test.skipIf(BENCH_CANARY)('build history is sorted newest-first', () => { assertNewestFirst(buildHistory, 'Build benchmark'); }); - test('query history is sorted newest-first', () => { + test.skipIf(BENCH_CANARY)('query history is sorted newest-first', () => { assertNewestFirst(queryHistory, 'Query benchmark'); }); test('incremental history is sorted newest-first', () => { assertNewestFirst(incrementalHistory, 'Incremental benchmark'); }); - describe('build benchmarks', () => { + // In canary mode only the incremental suite runs — build/query/resolution + // benchmarks are not measured by the perf-canary workflow. + describe.skipIf(BENCH_CANARY)('build benchmarks', () => { for (const engineKey of ['native', 'wasm'] as const) { const pair = findLatestPair(buildHistory, (e) => e[engineKey] != null); if (!pair) continue; @@ -700,7 +729,7 @@ describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => { }); }); - describe('query benchmarks', () => { + describe.skipIf(BENCH_CANARY)('query benchmarks', () => { for (const engineKey of ['native', 'wasm'] as const) { const pair = findLatestPair(queryHistory, (e) => e[engineKey] != null); if (!pair) continue; @@ -803,7 +832,7 @@ describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => { }); }); - describe('resolution benchmarks', () => { + describe.skipIf(BENCH_CANARY)('resolution benchmarks', () => { /** * Resolution precision/recall regression thresholds. * These are percentage-point drops (not relative %) because resolution From d07b3588d5eb8c90d1901d0fcdc794f995a08d1e Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:53:10 -0600 Subject: [PATCH 10/25] fix(perf): plumb symbolsOnly through parseFilesWasmInline to skip analysis visitors --- src/domain/parser.ts | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/domain/parser.ts b/src/domain/parser.ts index 54aa7b994..9ff4fe58e 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -1198,11 +1198,16 @@ const INLINE_BACKFILL_THRESHOLD = 16; * * Returns symbols with `_tree` set so `runAnalyses` can run AST/CFG/dataflow * visitors via the unified walker (mirrors how WASM-engine results behaved - * before the worker pool was introduced). + * before the worker pool was introduced), unless `symbolsOnly` is true — in + * that case `_tree` is not set, skipping all analysis visitor walks. Use + * `symbolsOnly` when only definitions/calls/typeMap are needed (e.g. the + * this/super dispatch post-pass) to avoid the analysis overhead on the inline + * path, matching the optimization already applied to the worker-pool path. */ async function parseFilesWasmInline( filePaths: string[], rootDir: string, + symbolsOnly = false, ): Promise> { const result = new Map(); if (filePaths.length === 0) return result; @@ -1220,7 +1225,12 @@ async function parseFilesWasmInline( if (!extracted) continue; const relPath = path.relative(rootDir, filePath).split(path.sep).join('/'); const symbols = extracted.symbols as ExtractorOutput & { _tree?: unknown; _langId?: string }; - symbols._tree = extracted.tree; + // When symbolsOnly=true, skip setting _tree so runAnalyses does not run + // AST/complexity/CFG/dataflow visitor walks — only definitions/calls/typeMap + // are needed by callers like the this/super dispatch post-pass. + if (!symbolsOnly) { + symbols._tree = extracted.tree; + } symbols._langId = extracted.langId; result.set(relPath, symbols); } @@ -1246,7 +1256,7 @@ export async function parseFilesWasmForBackfill( opts: { symbolsOnly?: boolean } = {}, ): Promise> { if (filePaths.length <= INLINE_BACKFILL_THRESHOLD) { - return parseFilesWasmInline(filePaths, rootDir); + return parseFilesWasmInline(filePaths, rootDir, opts.symbolsOnly); } return parseFilesWasm(filePaths, rootDir, opts.symbolsOnly ? EXTRACT_ONLY : FULL_ANALYSIS); } From 3db5d8ccb1b45071cbde2d0980f6fde707da119d Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:56:46 -0600 Subject: [PATCH 11/25] fix(perf): scope runPostNativeCha to changed files on incremental builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On incremental builds, runPostNativeCha previously scanned all call→qualified-method edges in the DB (~12ms flat, O(graph size)), even for 1-file changes where no hierarchy or RTA evidence changed. Add two cheap indexed gate queries. Gate A checks whether any changed file introduced a class/interface/trait/struct/record node (hierarchy may have new implementors reachable from unchanged call sites). Gate B checks whether any changed file added a call edge to a class-kind target (RTA set may have grown, enabling previously filtered expansions in unchanged callers). If neither gate fires, restrict the candidate query to src.file IN changedFiles — safe because the hierarchy and instantiated set are unchanged for all other files. Full builds (isFullBuild=true) and cases where either gate fires retain the existing full-scan behaviour. Mirrors the changed-files scoping pattern of runPostNativeThisDispatch. Closes #1441 --- .../builder/stages/native-orchestrator.ts | 130 ++++++++++++++++-- 1 file changed, 119 insertions(+), 11 deletions(-) diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index e5c5bd9b5..a8981c1be 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -401,12 +401,26 @@ async function runPostNativeAnalysis( * Note: `this`/`super` dispatch is handled separately by `runPostNativeThisDispatch`, * which WASM-re-parses JS/TS files to obtain raw call site receiver info. * + * `changedFiles` controls candidate scoping on incremental builds: + * - null → full build; scan all call→method edges (existing behaviour). + * - array → incremental; two cheap gate queries decide scope: + * Gate A: any class/interface/trait/struct/record nodes in changed files? + * If yes, a new implementor may have appeared — full scan required. + * Gate B: any `calls` edges from changed-file sources targeting class-kind + * nodes? If yes, the RTA set may have grown, enabling previously + * filtered expansions in unchanged caller files — full scan required. + * If neither gate fires: scope `callToMethods` to `src.file IN changedFiles` + * (safe because no hierarchy or RTA evidence changed). + * * Returns the count of newly inserted CHA edges plus the set of files containing * the new edges' endpoints, so the caller can scope role re-classification to the * nodes whose fan-in/out actually changed. A zero count means no edges were added * and role re-classification is unnecessary. */ -function runPostNativeCha(db: BetterSqlite3Database): { +function runPostNativeCha( + db: BetterSqlite3Database, + changedFiles: string[] | null, +): { newEdgeCount: number; affectedFiles: Set; } { @@ -474,19 +488,111 @@ function runPostNativeCha(db: BetterSqlite3Database): { debug('runPostNativeCha: no constructor-call evidence found — proceeding without RTA filter'); } + // ── Incremental candidate scoping ────────────────────────────────────────── + // On incremental builds, two gate queries decide whether to restrict the + // candidate scan to changed-file call sites or run the full graph scan. + // + // Gate A: did a changed file add/change a class hierarchy node? + // A new `extends`/`implements` edge means a previously-untracked implementor + // is now in the hierarchy — unchanged call sites in OTHER files may gain new + // valid expansions, so the full scan is required. + // + // Gate B: did a changed file add new RTA evidence (`new ConcreteX()`)? + // A new `calls` edge to a class-kind target means the instantiated set grew — + // previously RTA-filtered expansions in unchanged caller files become + // admissible, so the full scan is required. + // + // If neither gate fires, the hierarchy and RTA set are unchanged for all files + // outside changedFiles, so restricting to changed-file sources is safe. + let scopeToChangedFiles = false; // true → add WHERE src.file IN changedFiles + if (changedFiles !== null && changedFiles.length > 0) { + // Gate A: class/interface/trait/struct/record nodes in changed files? + const CHUNK_SIZE = 500; + let gateAFired = false; + for (let i = 0; i < changedFiles.length && !gateAFired; i += CHUNK_SIZE) { + const chunk = changedFiles.slice(i, i + CHUNK_SIZE); + const ph = chunk.map(() => '?').join(','); + const row = db + .prepare( + `SELECT 1 FROM nodes + WHERE file IN (${ph}) + AND kind IN ('class', 'interface', 'trait', 'struct', 'record') + LIMIT 1`, + ) + .get(...chunk); + if (row) gateAFired = true; + } + + // Gate B: calls from changed-file sources to class-kind targets? + let gateBFired = false; + if (!gateAFired) { + for (let i = 0; i < changedFiles.length && !gateBFired; i += CHUNK_SIZE) { + const chunk = changedFiles.slice(i, i + CHUNK_SIZE); + const ph = chunk.map(() => '?').join(','); + const row = db + .prepare( + `SELECT 1 FROM edges e + JOIN nodes src ON e.source_id = src.id + JOIN nodes tgt ON e.target_id = tgt.id + WHERE e.kind = 'calls' AND tgt.kind = 'class' + AND src.file IN (${ph}) + LIMIT 1`, + ) + .get(...chunk); + if (row) gateBFired = true; + } + } + + if (!gateAFired && !gateBFired) { + scopeToChangedFiles = true; + debug( + `runPostNativeCha: neither gate fired — scoping candidate scan to ${changedFiles.length} changed file(s)`, + ); + } else { + debug( + `runPostNativeCha: ${gateAFired ? 'Gate A (hierarchy)' : 'Gate B (RTA)'} fired — running full scan`, + ); + } + } + // Find existing call edges targeting qualified methods (e.g., 'IWorker.doWork'). // Include the caller node's file so confidence can be computed file-pair-aware, // matching the WASM path's computeConfidence(callerFile, targetFile, null) - CHA_DISPATCH_PENALTY formula. - const callToMethods = db - .prepare(` - SELECT e.source_id, tgt.name AS method_name, src.file AS caller_file - FROM edges e - JOIN nodes tgt ON e.target_id = tgt.id - JOIN nodes src ON e.source_id = src.id - WHERE e.kind = 'calls' AND tgt.kind = 'method' - AND INSTR(tgt.name, '.') > 0 - `) - .all() as Array<{ source_id: number; method_name: string; caller_file: string | null }>; + // When scopeToChangedFiles is true, restrict to call sites in the changed files + // (safe because no hierarchy or RTA evidence changed outside those files). + let callToMethods: Array<{ source_id: number; method_name: string; caller_file: string | null }>; + if (scopeToChangedFiles && changedFiles && changedFiles.length > 0) { + const CHUNK_SIZE = 500; + const rows: Array<{ source_id: number; method_name: string; caller_file: string | null }> = []; + for (let i = 0; i < changedFiles.length; i += CHUNK_SIZE) { + const chunk = changedFiles.slice(i, i + CHUNK_SIZE); + const ph = chunk.map(() => '?').join(','); + const chunkRows = db + .prepare( + `SELECT e.source_id, tgt.name AS method_name, src.file AS caller_file + FROM edges e + JOIN nodes tgt ON e.target_id = tgt.id + JOIN nodes src ON e.source_id = src.id + WHERE e.kind = 'calls' AND tgt.kind = 'method' + AND INSTR(tgt.name, '.') > 0 + AND src.file IN (${ph})`, + ) + .all(...chunk) as Array<{ source_id: number; method_name: string; caller_file: string | null }>; + rows.push(...chunkRows); + } + callToMethods = rows; + } else { + callToMethods = db + .prepare(` + SELECT e.source_id, tgt.name AS method_name, src.file AS caller_file + FROM edges e + JOIN nodes tgt ON e.target_id = tgt.id + JOIN nodes src ON e.source_id = src.id + WHERE e.kind = 'calls' AND tgt.kind = 'method' + AND INSTR(tgt.name, '.') > 0 + `) + .all() as Array<{ source_id: number; method_name: string; caller_file: string | null }>; + } // Seed seen-pairs only from the source_ids we'll be expanding — avoids loading every // call edge in the DB (which would be O(all edges)) for large codebases. @@ -1427,6 +1533,8 @@ export async function tryNativeOrchestrator( // no WASM re-parse post-pass is needed for them. `Foo.prototype.bar = fn` likewise. const { newEdgeCount: chaEdgeCount, affectedFiles: chaAffectedFiles } = runPostNativeCha( ctx.db as unknown as BetterSqlite3Database, + // null = full build (scan all call→method edges); array = incremental (gate queries decide scope) + result.isFullBuild ? null : (result.changedFiles ?? null), ); // Phase 8.5: this/super dispatch — hybrid WASM re-parse to resolve call sites From 8b3aa3d3438178bc306edba9c7a4038663dc44b0 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 19:01:26 -0600 Subject: [PATCH 12/25] fix(native): add post-pass phase timings to result.phases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Times each JS post-pass in tryNativeOrchestrator and exposes the measurements in BuildResult.phases: - gapDetectMs — dropped-language gap detection + backfill - chaMs — CHA expansion (interface dispatch) - thisDispatchMs — this/super dispatch WASM re-parse (was already tracked but now properly named alongside the rest) - reclassifyMs — scoped role re-classification after edge insertion - techniqueBackfillMs — technique-column UPDATE on native-written edges Previously only thisDispatchMs was reported, causing wall-clock vs phaseSum to diverge by 1.1s+ on 1-file rebuilds and making benchmark regressions undiagnosable from committed history. Updates update-incremental-report.ts to render the new phases in a collapsible details block under each engine's 1-file rebuild section. Closes #1434 --- scripts/update-incremental-report.ts | 49 +++++++++++++++++++ .../builder/stages/native-orchestrator.ts | 49 ++++++++++++++++--- src/types.ts | 10 ++++ 3 files changed, 102 insertions(+), 6 deletions(-) diff --git a/scripts/update-incremental-report.ts b/scripts/update-incremental-report.ts index bce3e7346..3a047a845 100644 --- a/scripts/update-incremental-report.ts +++ b/scripts/update-incremental-report.ts @@ -161,6 +161,55 @@ for (const engineKey of ['native', 'wasm']) { md += `| Full build | ${formatMs(e.fullBuildMs)} |\n`; md += `| No-op rebuild | ${e.noopRebuildMs != null ? formatMs(e.noopRebuildMs) : 'n/a'} |\n`; md += `| 1-file rebuild | ${e.oneFileRebuildMs != null ? formatMs(e.oneFileRebuildMs) : 'n/a'} |\n\n`; + + // 1-file rebuild phase breakdown — skipped when phases are unavailable (older + // benchmark entries that predate per-phase tracking, or failed runs). + const ph = e.oneFilePhases; + if (ph && typeof ph === 'object') { + md += `
1-file rebuild phase breakdown (${engineKey})\n\n`; + md += '| Phase | Time |\n'; + md += '|-------|-----:|\n'; + // Core Rust pipeline phases (present for both engines) + const corePhases = [ + ['setup', 'setupMs'], + ['collect', 'collectMs'], + ['detect', 'detectMs'], + ['parse', 'parseMs'], + ['insert', 'insertMs'], + ['resolve', 'resolveMs'], + ['edges', 'edgesMs'], + ['structure', 'structureMs'], + ['roles', 'rolesMs'], + ]; + for (const [label, key] of corePhases) { + if (ph[key] != null) md += `| ${label} | ${formatMs(ph[key])} |\n`; + } + // Native-only JS post-pass phases (only present when engine=native) + if (engineKey === 'native') { + const nativePostPhases = [ + ['gap detect + backfill', 'gapDetectMs'], + ['CHA expansion', 'chaMs'], + ['this/super dispatch', 'thisDispatchMs'], + ['role reclassify', 'reclassifyMs'], + ['technique backfill', 'techniqueBackfillMs'], + ]; + for (const [label, key] of nativePostPhases) { + if (ph[key] != null) md += `| ${label} | ${formatMs(ph[key])} |\n`; + } + } + // Analysis phases (present for both engines) + const analysisPhases = [ + ['ast', 'astMs'], + ['complexity', 'complexityMs'], + ['cfg', 'cfgMs'], + ['dataflow', 'dataflowMs'], + ['finalize', 'finalizeMs'], + ]; + for (const [label, key] of analysisPhases) { + if (ph[key] != null) md += `| ${label} | ${formatMs(ph[key])} |\n`; + } + md += '\n
\n\n'; + } } const r = latest.resolve; diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index a8981c1be..c16213a7b 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -577,7 +577,11 @@ function runPostNativeCha( AND INSTR(tgt.name, '.') > 0 AND src.file IN (${ph})`, ) - .all(...chunk) as Array<{ source_id: number; method_name: string; caller_file: string | null }>; + .all(...chunk) as Array<{ + source_id: number; + method_name: string; + caller_file: string | null; + }>; rows.push(...chunkRows); } callToMethods = rows; @@ -933,12 +937,20 @@ async function runPostNativeThisDispatch( return { elapsedMs: Date.now() - t0, targetIds, affectedFiles }; } +interface PostPassTimings { + gapDetectMs: number; + chaMs: number; + thisDispatchMs: number; + reclassifyMs: number; + techniqueBackfillMs: number; +} + /** Format timing result from native orchestrator phases + JS post-processing. */ function formatNativeTimingResult( p: Record, structurePatchMs: number, analysisTiming: { astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number }, - thisDispatchMs: number, + postPass: PostPassTimings, ): BuildResult { return { phases: { @@ -951,7 +963,11 @@ function formatNativeTimingResult( edgesMs: +(p.edgesMs ?? 0).toFixed(1), structureMs: +((p.structureMs ?? 0) + structurePatchMs).toFixed(1), rolesMs: +(p.rolesMs ?? 0).toFixed(1), - thisDispatchMs: +thisDispatchMs.toFixed(1), + gapDetectMs: +postPass.gapDetectMs.toFixed(1), + chaMs: +postPass.chaMs.toFixed(1), + thisDispatchMs: +postPass.thisDispatchMs.toFixed(1), + reclassifyMs: +postPass.reclassifyMs.toFixed(1), + techniqueBackfillMs: +postPass.techniqueBackfillMs.toFixed(1), astMs: +(analysisTiming.astMs ?? 0).toFixed(1), complexityMs: +(analysisTiming.complexityMs ?? 0).toFixed(1), cfgMs: +(analysisTiming.cfgMs ?? 0).toFixed(1), @@ -1490,8 +1506,14 @@ export async function tryNativeOrchestrator( ctx.db = openDb(ctx.dbPath); ctx.nativeFirstProxy = false; } else if (!ctx.nativeFirstProxy && !handoffWalAfterNativeBuild(ctx)) { - // DB reopen failed — return partial result - return formatNativeTimingResult(p, 0, analysisTiming, 0); + // DB reopen failed — return partial result (no post-pass phases completed) + return formatNativeTimingResult(p, 0, analysisTiming, { + gapDetectMs: 0, + chaMs: 0, + thisDispatchMs: 0, + reclassifyMs: 0, + techniqueBackfillMs: 0, + }); } } @@ -1513,6 +1535,7 @@ export async function tryNativeOrchestrator( // gated below. const removedCount = result.removedCount ?? 0; const changedCount = result.changedCount ?? 0; + const gapDetectStart = performance.now(); const gap = detectDroppedLanguageGap(ctx); if ( result.isFullBuild || @@ -1523,6 +1546,7 @@ export async function tryNativeOrchestrator( ) { await backfillNativeDroppedFiles(ctx, gap); } + const gapDetectMs = performance.now() - gapDetectStart; // Phase 8.5: expand CHA call edges (interface dispatch → concrete implementations). // Returns the affected files so role re-classification below can be scoped to @@ -1531,11 +1555,13 @@ export async function tryNativeOrchestrator( // Function-as-object-property methods (`fn.method = function() {}`) are extracted // natively by the Rust engine (#1432) and resolved in-build by its edge builder, so // no WASM re-parse post-pass is needed for them. `Foo.prototype.bar = fn` likewise. + const chaStart = performance.now(); const { newEdgeCount: chaEdgeCount, affectedFiles: chaAffectedFiles } = runPostNativeCha( ctx.db as unknown as BetterSqlite3Database, // null = full build (scan all call→method edges); array = incremental (gate queries decide scope) result.isFullBuild ? null : (result.changedFiles ?? null), ); + const chaMs = performance.now() - chaStart; // Phase 8.5: this/super dispatch — hybrid WASM re-parse to resolve call sites // whose raw receiver info the Rust pipeline does not persist to DB. @@ -1558,6 +1584,7 @@ export async function tryNativeOrchestrator( // files restores correctness without re-running the classifier over the // whole graph (which cost ~130ms per build on codegraph itself and was a // major part of the v3.12.0 native full-build benchmark regression). + let reclassifyMs = 0; if (chaEdgeCount > 0 || thisDispatchTargetIds.size > 0) { const affectedFiles = [...new Set([...chaAffectedFiles, ...thisDispatchAffectedFiles])]; // When edges were inserted but all their endpoint nodes have null `file` @@ -1566,6 +1593,7 @@ export async function tryNativeOrchestrator( // case — scoped classification with an empty set would be a no-op, leaving // roles stale for those nodes. const scopedFiles = affectedFiles.length > 0 ? affectedFiles : null; + const reclassifyStart = performance.now(); try { const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as { classifyNodeRoles: ( @@ -1582,13 +1610,16 @@ export async function tryNativeOrchestrator( } catch (err) { debug(`Post-pass role re-classification failed: ${toErrorMessage(err)}`); } + reclassifyMs = performance.now() - reclassifyStart; } // Backfill the `technique` column on `calls` edges written by the Rust // orchestrator, which does not write the column. Runs after all edge-writing // phases (including the WASM dropped-language backfill, CHA post-pass, and // this/super dispatch) so every new edge in this build cycle gets a label. + const techniqueBackfillStart = performance.now(); backfillEdgeTechniquesAfterNativeOrchestrator(ctx.db, !!result.isFullBuild, result.changedFiles); + const techniqueBackfillMs = performance.now() - techniqueBackfillStart; // Re-count nodes/edges now that all edge-writing post-passes have run: the // Rust orchestrator captured its counts before the JS post-passes added @@ -1633,5 +1664,11 @@ export async function tryNativeOrchestrator( } closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb }); - return formatNativeTimingResult(p, structurePatchMs, analysisTiming, thisDispatchMs); + return formatNativeTimingResult(p, structurePatchMs, analysisTiming, { + gapDetectMs, + chaMs, + thisDispatchMs, + reclassifyMs, + techniqueBackfillMs, + }); } diff --git a/src/types.ts b/src/types.ts index d7f97da6a..59897944a 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1268,8 +1268,18 @@ export interface BuildResult { edgesMs: number; structureMs: number; rolesMs: number; + /** Wall-clock time for the prototype-method post-pass (native path only). */ + protoMethodsMs?: number; + /** Wall-clock time for the CHA expansion post-pass (native path only). */ + chaMs?: number; /** Wall-clock time for the this/super dispatch WASM post-pass (native path only). */ thisDispatchMs?: number; + /** Wall-clock time for the dropped-language gap detection + backfill (native path only). */ + gapDetectMs?: number; + /** Wall-clock time for role re-classification after JS edge-writing post-passes (native path only). */ + reclassifyMs?: number; + /** Wall-clock time for the technique-column backfill on native-written edges (native path only). */ + techniqueBackfillMs?: number; astMs: number; complexityMs: number; cfgMs: number; From fd4ffd123eecac2410e29b13ea14cb704b64571e Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 19:04:04 -0600 Subject: [PATCH 13/25] fix(perf): correct INLINE_BACKFILL_THRESHOLD docstring; raise threshold for required-tier grammars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The docstring claimed pool cost was "amortised over enough parse work" — measurements show IPC overhead scales linearly (~55–64ms/file pool vs ~8–10ms/file inline). The real motivation is crash safety for exotic WASM grammars (#965); JS/TS/TSX (required-tier, used in all this-dispatch backfill calls) have never triggered the V8 fatal crash class and are safe to run inline. Raise threshold 16 → 32 to keep typical this-dispatch batches (≤ 18 files on the codegraph corpus) on the inline fast path. Exotic-language drops are almost always well under 32 files and also benefit from the inline path without meaningful crash risk increase. Closes #1435 --- src/domain/parser.ts | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/domain/parser.ts b/src/domain/parser.ts index 9ff4fe58e..411ba147a 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -1181,12 +1181,25 @@ async function parseFilesWasm( /** * Files at or below this count use the inline parse path (no worker spawn). * - * Sized for typical engine-parity drops: a handful of fixture files in one - * or two languages (the recurring HCL case is 4 files). Above this, the - * worker-pool's IPC + crash-isolation cost (#965) is amortized over enough - * parse work to be worth paying; below it, the ~1–2s cold-start dominates. + * The worker pool exists for crash safety (#965): exotic (non-required) WASM + * grammars can trigger uncatchable V8 fatal errors that would kill the main + * process. Running them in a worker means only the worker dies; the pool + * detects the exit, skips the file, respawns, and continues. + * + * JS/TS/TSX are required-tier grammars — they have never triggered the V8 + * fatal crash class and are safe to run inline. The primary hot caller + * (this/super dispatch post-pass) exclusively handles JS/TS/TSX files and + * measured ~55–64ms/file through the pool vs ~8–10ms/file inline (#1435); + * IPC overhead scales linearly with file count, not amortised. + * + * The threshold is set high enough to keep typical this-dispatch batches + * (≤ 18 files on the codegraph corpus) on the inline path, while still + * routing truly large exotic-language drops (rare; typical HCL case is 4 + * files) through the pool for crash isolation. Exotic-language drops are + * almost always well under this limit anyway, so they benefit from the + * inline fast path too without meaningful crash risk increase. */ -const INLINE_BACKFILL_THRESHOLD = 16; +const INLINE_BACKFILL_THRESHOLD = 32; /** * Inline WASM parse (no worker) for small file batches. From 498ee2150a3e89b0fd3bcd1e558177997260f751 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 19:12:20 -0600 Subject: [PATCH 14/25] fix(perf): guard post-native passes against unnecessary work on 1-file incremental rebuilds On 1-file native incremental builds, two JS post-passes ran unconditionally even when they had no work to do: - `backfillNativeDroppedFiles`: called whenever changedCount > 0, even when detectDroppedLanguageGap returned an empty gap. Gate now checks gap.missingAbs.length > 0 || gap.staleRel.length > 0 directly, matching backfillNativeDroppedFiles's own internal early-exit guard. - Node/edge COUNT(*) re-count: ran unconditionally after all post-passes even when none of them wrote any edges. COUNT(*) over 50K+ edge tables is non-trivial, especially via the NativeDbProxy napi-rs round-trip. Now gated on postPassWroteData (backfill | CHA edges | this-dispatch edges). Closes #1454 --- .../builder/stages/native-orchestrator.ts | 54 +++++++++---------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index c16213a7b..18fe478ef 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -1524,28 +1524,18 @@ export async function tryNativeOrchestrator( // stale native binaries). WASM handles those — backfill via WASM so both // engines process the same file set (#967). // - // Detect the gap once (fs walk + 2 DB queries, ~20–30ms) and use it for - // both gating and the backfill itself. On dirty incrementals/full builds - // the orchestrator signals trigger backfill, so the walk happens once - // (instead of redundantly inside backfill). On quiet incrementals we - // still pay the walk so we can detect brand-new files in dropped-language - // extensions — a gap that the orchestrator's `detect_removed_files` - // filter (#1070) leaves open (#1083, #1091). The pre-check is cheap - // because the expensive part (WASM re-parse of the missing set) is - // gated below. - const removedCount = result.removedCount ?? 0; - const changedCount = result.changedCount ?? 0; + // Detect the gap once (fs walk + 2 DB queries) and use it for both gating + // and the backfill itself. On quiet incrementals we still pay the walk so + // we can detect brand-new files in dropped-language extensions — a gap that + // the orchestrator's `detect_removed_files` filter (#1070) leaves open + // (#1083, #1091). The pre-check is cheap because the expensive part (WASM + // re-parse of the missing set) is gated below. const gapDetectStart = performance.now(); const gap = detectDroppedLanguageGap(ctx); - if ( - result.isFullBuild || - removedCount > 0 || - changedCount > 0 || - gap.missingAbs.length > 0 || - gap.staleRel.length > 0 - ) { + if (gap.missingAbs.length > 0 || gap.staleRel.length > 0) { await backfillNativeDroppedFiles(ctx, gap); } + const backfillHappened = gap.missingAbs.length > 0 || gap.staleRel.length > 0; const gapDetectMs = performance.now() - gapDetectStart; // Phase 8.5: expand CHA call edges (interface dispatch → concrete implementations). @@ -1624,19 +1614,27 @@ export async function tryNativeOrchestrator( // Re-count nodes/edges now that all edge-writing post-passes have run: the // Rust orchestrator captured its counts before the JS post-passes added // edges, so both its summary and build_meta under-report (#1452). + // + // Fast path: skip the COUNT(*) scan when no post-pass wrote any edges. + // COUNT(*) on large tables (50K+ edges) is non-trivial, especially via the + // NativeDbProxy napi-rs round-trip. When all post-passes were no-ops, the + // Rust orchestrator's counts are still accurate — no re-count needed. let finalNodeCount = result.nodeCount ?? 0; let finalEdgeCount = result.edgeCount ?? 0; - try { - const counts = (ctx.db as unknown as BetterSqlite3Database) - .prepare('SELECT (SELECT COUNT(*) FROM nodes) AS n, (SELECT COUNT(*) FROM edges) AS e') - .get() as { n: number; e: number }; - if (counts.n !== finalNodeCount || counts.e !== finalEdgeCount) { - finalNodeCount = counts.n; - finalEdgeCount = counts.e; - setBuildMeta(ctx.db, { node_count: finalNodeCount, edge_count: finalEdgeCount }); + const postPassWroteData = backfillHappened || chaEdgeCount > 0 || thisDispatchTargetIds.size > 0; + if (postPassWroteData) { + try { + const counts = (ctx.db as unknown as BetterSqlite3Database) + .prepare('SELECT (SELECT COUNT(*) FROM nodes) AS n, (SELECT COUNT(*) FROM edges) AS e') + .get() as { n: number; e: number }; + if (counts.n !== finalNodeCount || counts.e !== finalEdgeCount) { + finalNodeCount = counts.n; + finalEdgeCount = counts.e; + setBuildMeta(ctx.db, { node_count: finalNodeCount, edge_count: finalEdgeCount }); + } + } catch (err) { + debug(`Post-pass node/edge re-count failed: ${toErrorMessage(err)}`); } - } catch (err) { - debug(`Post-pass node/edge re-count failed: ${toErrorMessage(err)}`); } info( `Native build orchestrator completed: ${finalNodeCount} nodes, ${finalEdgeCount} edges, ${result.fileCount ?? 0} files`, From 61a9839e5ce06851c4d883bede530606e4897fe5 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 19:16:18 -0600 Subject: [PATCH 15/25] chore(types): remove dead protoMethodsMs field and stale comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The post-pass it timed (runPostNativePrototypeMethods) was deleted in b5c03a29 when func-prop extraction moved to Rust (#1432). The optional field was never set by any code path that survived the deletion. Also remove the stale reference to "prototype-methods post-pass" from the parseFilesWasmForBackfill docstring — only the this-dispatch post-pass uses symbolsOnly now. Closes #1432 --- src/domain/parser.ts | 4 ++-- src/types.ts | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/domain/parser.ts b/src/domain/parser.ts index 411ba147a..267895c82 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -1259,8 +1259,8 @@ async function parseFilesWasmInline( * `opts.symbolsOnly` skips the AST/complexity/CFG/dataflow visitors in the * worker (and their result serialization across the thread boundary) for * callers that only consume definitions/calls/typeMap — the native - * orchestrator's prototype-methods and this-dispatch post-passes. Callers - * that ingest the files into the DB (dropped-language backfill) must keep + * orchestrator's this-dispatch post-pass. Callers that ingest the files into + * the DB (dropped-language backfill) must keep * the default full analysis. */ export async function parseFilesWasmForBackfill( diff --git a/src/types.ts b/src/types.ts index 59897944a..8e1f46fc2 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1268,8 +1268,6 @@ export interface BuildResult { edgesMs: number; structureMs: number; rolesMs: number; - /** Wall-clock time for the prototype-method post-pass (native path only). */ - protoMethodsMs?: number; /** Wall-clock time for the CHA expansion post-pass (native path only). */ chaMs?: number; /** Wall-clock time for the this/super dispatch WASM post-pass (native path only). */ From 5f5d4d25d3cf3aba07e3856eb8eea56f1e0decde Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 19:24:44 -0600 Subject: [PATCH 16/25] fix: class-scope field annotation typeMap keys to prevent cross-class collision MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Field type annotations (`private repo: OrderRepository`) were seeded as bare file-wide typeMap keys, causing `this.repo` inside `UserService` to resolve to `OrderRepository` when both classes had a `repo` field (issue #1458). Both extractors (TS `handleFieldDefTypeMap` and Rust `field_definition` branch) now seed `ClassName.field` keys at confidence 0.9, matching the `CallerClass.X` resolver fallback added in PR #1382. Bare keys are kept at confidence 0.6 as fallbacks for single-class files or class expressions where no enclosing class name is available. Both engines change identically — parity preserved. --- .../src/extractors/javascript.rs | 39 ++++++++++++++++--- src/extractors/javascript.ts | 37 ++++++++++++++---- tests/parsers/javascript.test.ts | 25 ++++++++++-- 3 files changed, 85 insertions(+), 16 deletions(-) diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index 2c00a615e..8cfb61223 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -202,8 +202,13 @@ fn match_js_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _dep } } // TypeScript class field declarations: `private repo: Repository` - // Seeds both "repo" and "this.repo" so that `this.repo.method()` calls - // can be resolved to the interface/class type via the type map. + // Seeds a class-scoped key `ClassName.field` (confidence 0.9) as the primary + // entry so that two classes with identically-named fields don't overwrite each + // other's typeMap entry (issue #1458). The resolver's `CallerClass.X` fallback + // looks up exactly this key. + // Bare `field` and `this.field` keys are kept at lower confidence (0.6) as + // fallbacks for single-class files where the resolver may lack callerClass context. + // Mirrors handleFieldDefTypeMap in src/extractors/javascript.ts. "public_field_definition" | "field_definition" => { let name_node = node.child_by_field_name("name") .or_else(|| node.child_by_field_name("property")) @@ -216,9 +221,33 @@ fn match_js_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _dep let field_name = node_text(&name_node, source).to_string(); if let Some(type_anno) = find_child(node, "type_annotation") { if let Some(type_name) = extract_simple_type_name(&type_anno, source) { - push_type_map_entry(symbols, field_name.clone(), type_name.to_string()); - // "this.fieldName" key resolves `this.repo.method()` calls. - push_type_map_entry(symbols, format!("this.{}", field_name), type_name.to_string()); + match enclosing_type_map_class(node, source) { + Some(class_name) => { + // Primary: class-scoped key prevents cross-class collision. + push_type_map_entry( + symbols, + format!("{}.{}", class_name, field_name), + type_name.to_string(), + ); + // Fallback bare keys at lower confidence. + symbols.type_map.push(TypeMapEntry { + name: field_name.clone(), + type_name: type_name.to_string(), + confidence: 0.6, + }); + symbols.type_map.push(TypeMapEntry { + name: format!("this.{}", field_name), + type_name: type_name.to_string(), + confidence: 0.6, + }); + } + None => { + // No enclosing class declaration (e.g. class expression) + // — use bare keys only at full confidence. + push_type_map_entry(symbols, field_name.clone(), type_name.to_string()); + push_type_map_entry(symbols, format!("this.{}", field_name), type_name.to_string()); + } + } } } } diff --git a/src/extractors/javascript.ts b/src/extractors/javascript.ts index 2be0ac4b3..c4103e51a 100644 --- a/src/extractors/javascript.ts +++ b/src/extractors/javascript.ts @@ -1867,7 +1867,7 @@ function runContextCollectorWalk(rootNode: TreeSitterNode, out: ContextCollector } else if (t === 'required_parameter' || t === 'optional_parameter') { handleParamTypeMap(node, out.typeMap); } else if (t === 'public_field_definition' || t === 'field_definition') { - handleFieldDefTypeMap(node, out.typeMap); + handleFieldDefTypeMap(node, out.typeMap, typeMapClass); } else if (t === 'assignment_expression') { handlePropWriteTypeMap(node, out.typeMap, typeMapClass); } else if (t === 'call_expression') { @@ -2094,11 +2094,23 @@ function handleParamTypeMap(node: TreeSitterNode, typeMap: Map`. - * Seeds both "repo" and "this.repo" so `this.repo.method()` calls resolve to the - * declared type via the type map. Mirrors the field_definition branch of - * match_js_type_map in crates/codegraph-core/src/extractors/javascript.rs. + * + * Seeds a class-scoped key `ClassName.field` (confidence 0.9) as the primary entry + * so that two classes with identically-named fields don't overwrite each other's + * typeMap entry (issue #1458). The resolver's `CallerClass.X` fallback (call-resolver.ts + * line 110) looks up exactly this key. + * + * Bare `field` and `this.field` keys are kept at lower confidence (0.6) as fallbacks + * for single-class files where the resolver may not have a callerClass context. + * + * Mirrors the field_definition branch of match_js_type_map in + * crates/codegraph-core/src/extractors/javascript.rs. */ -function handleFieldDefTypeMap(node: TreeSitterNode, typeMap: Map): void { +function handleFieldDefTypeMap( + node: TreeSitterNode, + typeMap: Map, + currentClass: string | null, +): void { const nameNode = node.childForFieldName('name') || node.childForFieldName('property') || @@ -2115,9 +2127,18 @@ function handleFieldDefTypeMap(node: TreeSitterNode, typeMap: Map { expect(symbols.typeMap.get('res')).toEqual({ type: 'Response', confidence: 0.9 }); }); - it('extracts class field annotations into typeMap with confidence 0.9', () => { + it('extracts class field annotations into class-scoped typeMap key (issue #1458)', () => { const symbols = parseTS(` class UserService { private repo: Repository; run() { this.repo.save(); } } `); - expect(symbols.typeMap.get('repo')).toEqual({ type: 'Repository', confidence: 0.9 }); - expect(symbols.typeMap.get('this.repo')).toEqual({ type: 'Repository', confidence: 0.9 }); + // Primary: class-scoped key at 0.9 — prevents cross-class collision. + expect(symbols.typeMap.get('UserService.repo')).toEqual({ type: 'Repository', confidence: 0.9 }); + // Fallback bare keys at lower confidence for single-class files. + expect(symbols.typeMap.get('repo')).toEqual({ type: 'Repository', confidence: 0.6 }); + expect(symbols.typeMap.get('this.repo')).toEqual({ type: 'Repository', confidence: 0.6 }); + }); + + it('prevents cross-class collision for same-named fields (issue #1458)', () => { + const symbols = parseTS(` + class OrderService { + private repo: OrderRepository; + } + class UserService { + private repo: UserRepository; + } + `); + // Each class gets its own scoped key — no collision. + expect(symbols.typeMap.get('OrderService.repo')).toEqual({ type: 'OrderRepository', confidence: 0.9 }); + expect(symbols.typeMap.get('UserService.repo')).toEqual({ type: 'UserRepository', confidence: 0.9 }); + // Bare "repo" key should hold the first class's type at 0.6 (second write is same confidence, no overwrite). + expect(symbols.typeMap.get('repo')?.confidence).toBe(0.6); }); it('returns empty typeMap when no annotations', () => { From 29dd101fa272b97b228b79340f2b4ccf23161d33 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 19:35:54 -0600 Subject: [PATCH 17/25] fix(bench): update elixir/julia/objc expected-edges to module-qualified names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The resolution benchmark uses WASM-built graphs where the Elixir, Julia, and Objective-C extractors emit module-qualified symbol names (Main.run, App.main, UserService.create_user, etc.). The expected-edges manifests were written with bare unqualified names (run, main, create_user), so every correctly-resolved edge appeared as a false positive and every expected edge appeared as a false negative — causing all three languages to show 0% precision even though resolution was working correctly. Root cause: starting in v3.12.0, cross-module call resolution began working for these languages (via the improved receiver-dispatch and same-class fallback in resolveByMethodOrGlobal / build-edges.ts). With 0 edges previously resolved, the name mismatch was invisible; once edges started resolving, the manifests showed 17 FP (elixir), 11 FP (julia), 6 FP (objc) — all correctly resolved edges misidentified as false positives. Fix: - Update all three expected-edges.json manifests to use the module-qualified names matching actual extractor output: elixir: Main.run, UserService.create_user, Validators.validate_user, etc. julia: App.main, Service.create_user, Repository.new_repo, etc. objc: full ObjC selectors (createUserWithId:name:email:, isValidEmail:, etc.) plus add main -> run (plain C call correctly resolved) - Ratchet THRESHOLDS for all three: elixir: precision 0.0 -> 1.0, recall 0.0 -> 0.8 (17/21 resolved) julia: precision 0.0 -> 1.0, recall 0.0 -> 0.7 (11/15 resolved) objc: precision 0.0 -> 1.0, recall 0.0 -> 0.4 (6/13 resolved) Remaining FNs are genuine unresolved edges (same-file bare calls in elixir/julia, receiver-typed message sends in objc) — not regressions. Closes #1447 --- .../fixtures/elixir/expected-edges.json | 92 +++++++++---------- .../fixtures/julia/expected-edges.json | 90 +++++++++--------- .../fixtures/objc/expected-edges.json | 69 +++++++------- .../resolution/resolution-benchmark.test.ts | 19 +++- 4 files changed, 144 insertions(+), 126 deletions(-) diff --git a/tests/benchmarks/resolution/fixtures/elixir/expected-edges.json b/tests/benchmarks/resolution/fixtures/elixir/expected-edges.json index af1085cb9..d6782901c 100644 --- a/tests/benchmarks/resolution/fixtures/elixir/expected-edges.json +++ b/tests/benchmarks/resolution/fixtures/elixir/expected-edges.json @@ -4,148 +4,148 @@ "description": "Hand-annotated call edges for Elixir resolution benchmark", "edges": [ { - "source": { "name": "validate_user", "file": "validators.ex" }, - "target": { "name": "valid_name?", "file": "validators.ex" }, + "source": { "name": "Validators.validate_user", "file": "validators.ex" }, + "target": { "name": "Validators.valid_name?", "file": "validators.ex" }, "kind": "calls", "mode": "same-file", - "notes": "Same-module helper call within Validators" + "notes": "Same-module helper call within Validators — extractor emits module-qualified names" }, { - "source": { "name": "validate_user", "file": "validators.ex" }, - "target": { "name": "valid_email?", "file": "validators.ex" }, + "source": { "name": "Validators.validate_user", "file": "validators.ex" }, + "target": { "name": "Validators.valid_email?", "file": "validators.ex" }, "kind": "calls", "mode": "same-file", - "notes": "Same-module helper call within Validators" + "notes": "Same-module helper call within Validators — extractor emits module-qualified names" }, { - "source": { "name": "create_user", "file": "service.ex" }, - "target": { "name": "validate_user", "file": "validators.ex" }, + "source": { "name": "UserService.create_user", "file": "service.ex" }, + "target": { "name": "Validators.validate_user", "file": "validators.ex" }, "kind": "calls", "mode": "module-function", "notes": "Validators.validate_user() — cross-module qualified call" }, { - "source": { "name": "create_user", "file": "service.ex" }, - "target": { "name": "save", "file": "repository.ex" }, + "source": { "name": "UserService.create_user", "file": "service.ex" }, + "target": { "name": "UserRepository.save", "file": "repository.ex" }, "kind": "calls", "mode": "module-function", "notes": "UserRepository.save() — cross-module qualified call" }, { - "source": { "name": "get_user", "file": "service.ex" }, - "target": { "name": "find_by_id", "file": "repository.ex" }, + "source": { "name": "UserService.get_user", "file": "service.ex" }, + "target": { "name": "UserRepository.find_by_id", "file": "repository.ex" }, "kind": "calls", "mode": "module-function", "notes": "UserRepository.find_by_id() — cross-module qualified call" }, { - "source": { "name": "remove_user", "file": "service.ex" }, - "target": { "name": "delete", "file": "repository.ex" }, + "source": { "name": "UserService.remove_user", "file": "service.ex" }, + "target": { "name": "UserRepository.delete", "file": "repository.ex" }, "kind": "calls", "mode": "module-function", "notes": "UserRepository.delete() — cross-module qualified call" }, { - "source": { "name": "list_users", "file": "service.ex" }, - "target": { "name": "list_all", "file": "repository.ex" }, + "source": { "name": "UserService.list_users", "file": "service.ex" }, + "target": { "name": "UserRepository.list_all", "file": "repository.ex" }, "kind": "calls", "mode": "module-function", "notes": "UserRepository.list_all() — cross-module qualified call" }, { - "source": { "name": "display_user", "file": "service.ex" }, - "target": { "name": "get_user", "file": "service.ex" }, + "source": { "name": "UserService.display_user", "file": "service.ex" }, + "target": { "name": "UserService.get_user", "file": "service.ex" }, "kind": "calls", "mode": "same-file", - "notes": "Same-module call to get_user within UserService" + "notes": "Same-module call to get_user within UserService — extractor emits module-qualified names" }, { - "source": { "name": "display_user", "file": "service.ex" }, - "target": { "name": "format_user", "file": "service.ex" }, + "source": { "name": "UserService.display_user", "file": "service.ex" }, + "target": { "name": "UserService.format_user", "file": "service.ex" }, "kind": "calls", "mode": "same-file", - "notes": "Same-module call to private helper format_user" + "notes": "Same-module call to private helper format_user — extractor emits module-qualified names" }, { - "source": { "name": "run", "file": "main.ex" }, - "target": { "name": "new_store", "file": "repository.ex" }, + "source": { "name": "Main.run", "file": "main.ex" }, + "target": { "name": "UserRepository.new_store", "file": "repository.ex" }, "kind": "calls", "mode": "module-function", "notes": "UserRepository.new_store() — cross-module qualified call" }, { - "source": { "name": "run", "file": "main.ex" }, - "target": { "name": "create_user", "file": "service.ex" }, + "source": { "name": "Main.run", "file": "main.ex" }, + "target": { "name": "UserService.create_user", "file": "service.ex" }, "kind": "calls", "mode": "module-function", "notes": "UserService.create_user() — cross-module qualified call" }, { - "source": { "name": "run", "file": "main.ex" }, - "target": { "name": "get_user", "file": "service.ex" }, + "source": { "name": "Main.run", "file": "main.ex" }, + "target": { "name": "UserService.get_user", "file": "service.ex" }, "kind": "calls", "mode": "module-function", "notes": "UserService.get_user() — cross-module qualified call" }, { - "source": { "name": "run", "file": "main.ex" }, - "target": { "name": "list_users", "file": "service.ex" }, + "source": { "name": "Main.run", "file": "main.ex" }, + "target": { "name": "UserService.list_users", "file": "service.ex" }, "kind": "calls", "mode": "module-function", "notes": "UserService.list_users() — cross-module qualified call" }, { - "source": { "name": "run", "file": "main.ex" }, - "target": { "name": "display_user", "file": "service.ex" }, + "source": { "name": "Main.run", "file": "main.ex" }, + "target": { "name": "UserService.display_user", "file": "service.ex" }, "kind": "calls", "mode": "module-function", "notes": "UserService.display_user() — cross-module qualified call" }, { - "source": { "name": "run", "file": "main.ex" }, - "target": { "name": "remove_user", "file": "service.ex" }, + "source": { "name": "Main.run", "file": "main.ex" }, + "target": { "name": "UserService.remove_user", "file": "service.ex" }, "kind": "calls", "mode": "module-function", "notes": "UserService.remove_user() — cross-module qualified call" }, { - "source": { "name": "run", "file": "main.ex" }, - "target": { "name": "fetch", "file": "patterns.ex" }, + "source": { "name": "Main.run", "file": "main.ex" }, + "target": { "name": "Patterns.fetch", "file": "patterns.ex" }, "kind": "calls", "mode": "module-function", "notes": "Patterns.fetch() — exercises default-value parameter extraction" }, { - "source": { "name": "run", "file": "main.ex" }, - "target": { "name": "first_of", "file": "patterns.ex" }, + "source": { "name": "Main.run", "file": "main.ex" }, + "target": { "name": "Patterns.first_of", "file": "patterns.ex" }, "kind": "calls", "mode": "module-function", "notes": "Patterns.first_of() — exercises tuple-pattern parameter extraction" }, { - "source": { "name": "run", "file": "main.ex" }, - "target": { "name": "name_of", "file": "patterns.ex" }, + "source": { "name": "Main.run", "file": "main.ex" }, + "target": { "name": "Patterns.name_of", "file": "patterns.ex" }, "kind": "calls", "mode": "module-function", "notes": "Patterns.name_of() — exercises map-pattern parameter extraction" }, { - "source": { "name": "run", "file": "main.ex" }, - "target": { "name": "id_of", "file": "patterns.ex" }, + "source": { "name": "Main.run", "file": "main.ex" }, + "target": { "name": "Patterns.id_of", "file": "patterns.ex" }, "kind": "calls", "mode": "module-function", "notes": "Patterns.id_of() — exercises struct-pattern parameter extraction" }, { - "source": { "name": "run", "file": "main.ex" }, - "target": { "name": "head_of", "file": "patterns.ex" }, + "source": { "name": "Main.run", "file": "main.ex" }, + "target": { "name": "Patterns.head_of", "file": "patterns.ex" }, "kind": "calls", "mode": "module-function", "notes": "Patterns.head_of() — exercises list-cons pattern parameter extraction" }, { - "source": { "name": "run", "file": "main.ex" }, - "target": { "name": "all_of", "file": "patterns.ex" }, + "source": { "name": "Main.run", "file": "main.ex" }, + "target": { "name": "Patterns.all_of", "file": "patterns.ex" }, "kind": "calls", "mode": "module-function", "notes": "Patterns.all_of() — exercises list pattern parameter extraction" diff --git a/tests/benchmarks/resolution/fixtures/julia/expected-edges.json b/tests/benchmarks/resolution/fixtures/julia/expected-edges.json index 9d0661c74..df2f83baf 100644 --- a/tests/benchmarks/resolution/fixtures/julia/expected-edges.json +++ b/tests/benchmarks/resolution/fixtures/julia/expected-edges.json @@ -4,109 +4,109 @@ "description": "Hand-annotated call edges for Julia resolution benchmark", "edges": [ { - "source": { "name": "main", "file": "main.jl" }, - "target": { "name": "new_repo", "file": "repository.jl" }, + "source": { "name": "App.main", "file": "main.jl" }, + "target": { "name": "Repository.new_repo", "file": "repository.jl" }, "kind": "calls", "mode": "module-function", - "notes": "Repository.new_repo() — qualified call to Repository module" + "notes": "Repository.new_repo() — qualified call to Repository module; extractor emits module-qualified names" }, { - "source": { "name": "main", "file": "main.jl" }, - "target": { "name": "create_user", "file": "service.jl" }, + "source": { "name": "App.main", "file": "main.jl" }, + "target": { "name": "Service.create_user", "file": "service.jl" }, "kind": "calls", "mode": "module-function", - "notes": "Service.create_user() — qualified call to Service module" + "notes": "Service.create_user() — qualified call to Service module; extractor emits module-qualified names" }, { - "source": { "name": "main", "file": "main.jl" }, - "target": { "name": "get_user", "file": "service.jl" }, + "source": { "name": "App.main", "file": "main.jl" }, + "target": { "name": "Service.get_user", "file": "service.jl" }, "kind": "calls", "mode": "module-function", - "notes": "Service.get_user() — qualified call to Service module" + "notes": "Service.get_user() — qualified call to Service module; extractor emits module-qualified names" }, { - "source": { "name": "main", "file": "main.jl" }, - "target": { "name": "remove_user", "file": "service.jl" }, + "source": { "name": "App.main", "file": "main.jl" }, + "target": { "name": "Service.remove_user", "file": "service.jl" }, "kind": "calls", "mode": "module-function", - "notes": "Service.remove_user() — qualified call to Service module" + "notes": "Service.remove_user() — qualified call to Service module; extractor emits module-qualified names" }, { - "source": { "name": "main", "file": "main.jl" }, - "target": { "name": "summary", "file": "service.jl" }, + "source": { "name": "App.main", "file": "main.jl" }, + "target": { "name": "Service.summary", "file": "service.jl" }, "kind": "calls", "mode": "module-function", - "notes": "Service.summary() — qualified call to Service module" + "notes": "Service.summary() — qualified call to Service module; extractor emits module-qualified names" }, { - "source": { "name": "create_user", "file": "service.jl" }, - "target": { "name": "validate_name", "file": "validators.jl" }, + "source": { "name": "Service.create_user", "file": "service.jl" }, + "target": { "name": "Validators.validate_name", "file": "validators.jl" }, "kind": "calls", "mode": "module-function", - "notes": "Validators.validate_name() — qualified call to Validators module" + "notes": "Validators.validate_name() — qualified call to Validators module; extractor emits module-qualified names" }, { - "source": { "name": "create_user", "file": "service.jl" }, - "target": { "name": "validate_email", "file": "validators.jl" }, + "source": { "name": "Service.create_user", "file": "service.jl" }, + "target": { "name": "Validators.validate_email", "file": "validators.jl" }, "kind": "calls", "mode": "module-function", - "notes": "Validators.validate_email() — qualified call to Validators module" + "notes": "Validators.validate_email() — qualified call to Validators module; extractor emits module-qualified names" }, { - "source": { "name": "create_user", "file": "service.jl" }, - "target": { "name": "save", "file": "repository.jl" }, + "source": { "name": "Service.create_user", "file": "service.jl" }, + "target": { "name": "Repository.save", "file": "repository.jl" }, "kind": "calls", "mode": "module-function", - "notes": "Repository.save() — qualified call to Repository module" + "notes": "Repository.save() — qualified call to Repository module; extractor emits module-qualified names" }, { - "source": { "name": "get_user", "file": "service.jl" }, - "target": { "name": "find_by_id", "file": "repository.jl" }, + "source": { "name": "Service.get_user", "file": "service.jl" }, + "target": { "name": "Repository.find_by_id", "file": "repository.jl" }, "kind": "calls", "mode": "module-function", - "notes": "Repository.find_by_id() — qualified call to Repository module" + "notes": "Repository.find_by_id() — qualified call to Repository module; extractor emits module-qualified names" }, { - "source": { "name": "remove_user", "file": "service.jl" }, - "target": { "name": "delete", "file": "repository.jl" }, + "source": { "name": "Service.remove_user", "file": "service.jl" }, + "target": { "name": "Repository.delete", "file": "repository.jl" }, "kind": "calls", "mode": "module-function", - "notes": "Repository.delete() — qualified call to Repository module" + "notes": "Repository.delete() — qualified call to Repository module; extractor emits module-qualified names" }, { - "source": { "name": "summary", "file": "service.jl" }, - "target": { "name": "count", "file": "repository.jl" }, + "source": { "name": "Service.summary", "file": "service.jl" }, + "target": { "name": "Repository.count", "file": "repository.jl" }, "kind": "calls", "mode": "module-function", - "notes": "Repository.count() — qualified call to Repository module" + "notes": "Repository.count() — qualified call to Repository module; extractor emits module-qualified names" }, { - "source": { "name": "summary", "file": "service.jl" }, - "target": { "name": "format_summary", "file": "service.jl" }, + "source": { "name": "Service.summary", "file": "service.jl" }, + "target": { "name": "Service.format_summary", "file": "service.jl" }, "kind": "calls", "mode": "same-file", - "notes": "Same-file call to private helper function" + "notes": "Same-file call to private helper function — extractor emits module-qualified names" }, { - "source": { "name": "validate_name", "file": "validators.jl" }, - "target": { "name": "check_length", "file": "validators.jl" }, + "source": { "name": "Validators.validate_name", "file": "validators.jl" }, + "target": { "name": "Validators.check_length", "file": "validators.jl" }, "kind": "calls", "mode": "same-file", - "notes": "Same-file call to private helper function" + "notes": "Same-file call to private helper function — extractor emits module-qualified names" }, { - "source": { "name": "validate_email", "file": "validators.jl" }, - "target": { "name": "contains_at", "file": "validators.jl" }, + "source": { "name": "Validators.validate_email", "file": "validators.jl" }, + "target": { "name": "Validators.contains_at", "file": "validators.jl" }, "kind": "calls", "mode": "same-file", - "notes": "Same-file call to private helper function" + "notes": "Same-file call to private helper function — extractor emits module-qualified names" }, { - "source": { "name": "count", "file": "repository.jl" }, - "target": { "name": "count_entries", "file": "repository.jl" }, + "source": { "name": "Repository.count", "file": "repository.jl" }, + "target": { "name": "Repository.count_entries", "file": "repository.jl" }, "kind": "calls", "mode": "same-file", - "notes": "Same-file call to private helper function" + "notes": "Same-file call to private helper function — extractor emits module-qualified names" } ] } diff --git a/tests/benchmarks/resolution/fixtures/objc/expected-edges.json b/tests/benchmarks/resolution/fixtures/objc/expected-edges.json index 82ceb667f..a1db6bcd5 100644 --- a/tests/benchmarks/resolution/fixtures/objc/expected-edges.json +++ b/tests/benchmarks/resolution/fixtures/objc/expected-edges.json @@ -3,89 +3,96 @@ "language": "objc", "description": "Hand-annotated call edges for Objective-C resolution benchmark", "edges": [ + { + "source": { "name": "main", "file": "main.m" }, + "target": { "name": "run", "file": "main.m" }, + "kind": "calls", + "mode": "static", + "notes": "main() calls run() — plain C function call; extractor sees both as plain functions" + }, { "source": { "name": "run", "file": "main.m" }, - "target": { "name": "UserService.initWithRepository", "file": "Service.m" }, + "target": { "name": "UserService.initWithRepository:", "file": "Service.m" }, "kind": "calls", "mode": "constructor", - "notes": "[[UserService alloc] initWithRepository:repo] — message send constructor" + "notes": "[[UserService alloc] initWithRepository:repo] — message send constructor; full selector includes trailing colon" }, { "source": { "name": "run", "file": "main.m" }, - "target": { "name": "UserService.createUserWithId", "file": "Service.m" }, + "target": { "name": "UserService.createUserWithId:name:email:", "file": "Service.m" }, "kind": "calls", "mode": "receiver-typed", - "notes": "[svc createUserWithId:...] — message send on UserService instance" + "notes": "[svc createUserWithId:name:email:...] — message send on UserService instance; full multi-part selector" }, { "source": { "name": "run", "file": "main.m" }, - "target": { "name": "UserService.getUserWithId", "file": "Service.m" }, + "target": { "name": "UserService.getUserWithId:", "file": "Service.m" }, "kind": "calls", "mode": "receiver-typed", - "notes": "[svc getUserWithId:...] — message send on UserService instance" + "notes": "[svc getUserWithId:...] — message send on UserService instance; full selector includes trailing colon" }, { "source": { "name": "run", "file": "main.m" }, - "target": { "name": "UserService.removeUserWithId", "file": "Service.m" }, + "target": { "name": "UserService.removeUserWithId:", "file": "Service.m" }, "kind": "calls", "mode": "receiver-typed", - "notes": "[svc removeUserWithId:...] — message send on UserService instance" + "notes": "[svc removeUserWithId:...] — message send on UserService instance; full selector includes trailing colon" }, { "source": { "name": "run", "file": "main.m" }, - "target": { "name": "Validators.isValidEmail", "file": "Validators.m" }, + "target": { "name": "Validators.isValidEmail:", "file": "Validators.m" }, "kind": "calls", "mode": "static", - "notes": "[Validators isValidEmail:...] — class method message send" + "notes": "[Validators isValidEmail:...] — class method message send; full selector includes trailing colon" }, { - "source": { "name": "UserService.createUserWithId", "file": "Service.m" }, - "target": { "name": "Validators.isValidEmail", "file": "Validators.m" }, + "source": { "name": "UserService.createUserWithId:name:email:", "file": "Service.m" }, + "target": { "name": "Validators.isValidEmail:", "file": "Validators.m" }, "kind": "calls", "mode": "static", - "notes": "[Validators isValidEmail:...] — class method message send" + "notes": "[Validators isValidEmail:...] — class method message send; full selector includes trailing colon" }, { - "source": { "name": "UserService.createUserWithId", "file": "Service.m" }, - "target": { "name": "Validators.isValidName", "file": "Validators.m" }, + "source": { "name": "UserService.createUserWithId:name:email:", "file": "Service.m" }, + "target": { "name": "Validators.isValidName:", "file": "Validators.m" }, "kind": "calls", "mode": "static", - "notes": "[Validators isValidName:...] — class method message send" + "notes": "[Validators isValidName:...] — class method message send; full selector includes trailing colon" }, { - "source": { "name": "UserService.createUserWithId", "file": "Service.m" }, - "target": { "name": "UserRepository.saveWithId", "file": "Repository.m" }, + "source": { "name": "UserService.createUserWithId:name:email:", "file": "Service.m" }, + "target": { "name": "UserRepository.saveWithId:name:", "file": "Repository.m" }, "kind": "calls", "mode": "receiver-typed", - "notes": "[_repo saveWithId:...] — message send on UserRepository ivar" + "notes": "[_repo saveWithId:name:...] — message send on UserRepository ivar; full selector" }, { - "source": { "name": "UserService.getUserWithId", "file": "Service.m" }, - "target": { "name": "UserRepository.findById", "file": "Repository.m" }, + "source": { "name": "UserService.getUserWithId:", "file": "Service.m" }, + "target": { "name": "UserRepository.findById:", "file": "Repository.m" }, "kind": "calls", "mode": "receiver-typed", - "notes": "[_repo findById:...] — message send on UserRepository ivar" + "notes": "[_repo findById:...] — message send on UserRepository ivar; full selector includes trailing colon" }, { - "source": { "name": "UserService.removeUserWithId", "file": "Service.m" }, - "target": { "name": "UserRepository.deleteWithId", "file": "Repository.m" }, + "source": { "name": "UserService.removeUserWithId:", "file": "Service.m" }, + "target": { "name": "UserRepository.deleteWithId:", "file": "Repository.m" }, "kind": "calls", "mode": "receiver-typed", - "notes": "[_repo deleteWithId:...] — message send on UserRepository ivar" + "notes": "[_repo deleteWithId:...] — message send on UserRepository ivar; full selector includes trailing colon" }, { - "source": { "name": "Validators.validateUser", "file": "Validators.m" }, - "target": { "name": "Validators.isValidName", "file": "Validators.m" }, + "source": { "name": "Validators.validateUser:email:", "file": "Validators.m" }, + "target": { "name": "Validators.isValidName:", "file": "Validators.m" }, "kind": "calls", "mode": "same-file", - "notes": "[self isValidName:...] — same-class class method call" + "notes": "[self isValidName:...] — same-class class method call; full selector includes trailing colon" }, { - "source": { "name": "Validators.validateUser", "file": "Validators.m" }, - "target": { "name": "Validators.isValidEmail", "file": "Validators.m" }, + "source": { "name": "Validators.validateUser:email:", "file": "Validators.m" }, + "target": { "name": "Validators.isValidEmail:", "file": "Validators.m" }, "kind": "calls", "mode": "same-file", - "notes": "[self isValidEmail:...] — same-class class method call" + "notes": "[self isValidEmail:...] — same-class class method call; full selector includes trailing colon" } ] } diff --git a/tests/benchmarks/resolution/resolution-benchmark.test.ts b/tests/benchmarks/resolution/resolution-benchmark.test.ts index 719f9685b..a4ddad9e1 100644 --- a/tests/benchmarks/resolution/resolution-benchmark.test.ts +++ b/tests/benchmarks/resolution/resolution-benchmark.test.ts @@ -167,19 +167,30 @@ const THRESHOLDS: Record = { // TODO(#875): raise scala thresholds once call resolution lands scala: { precision: 0.0, recall: 0.0 }, php: { precision: 0.6, recall: 0.2 }, - // TODO: raise thresholds below once call resolution is implemented for each language - elixir: { precision: 0.0, recall: 0.0 }, + // elixir: cross-module qualified calls resolve at 100% precision, 81% recall. + // Expected-edges now use module-qualified names (Main.run, UserService.create_user, etc.) + // matching what the Elixir extractor emits. Same-module bare calls (display_user → get_user) + // are not yet resolved — tracked as FNs. Precision 1.0 acts as ratchet against future FPs. + elixir: { precision: 1.0, recall: 0.8 }, dart: { precision: 0.0, recall: 0.0 }, zig: { precision: 0.0, recall: 0.0 }, fsharp: { precision: 0.0, recall: 0.0 }, gleam: { precision: 0.0, recall: 0.0 }, clojure: { precision: 0.0, recall: 0.0 }, - julia: { precision: 0.0, recall: 0.0 }, + // julia: cross-module qualified calls resolve at 100% precision, 73% recall. + // Expected-edges now use module-qualified names (App.main, Service.create_user, etc.) + // matching what the Julia extractor emits. Same-file calls (summary → format_summary, etc.) + // are not yet resolved — tracked as FNs. Precision 1.0 acts as ratchet against future FPs. + julia: { precision: 1.0, recall: 0.7 }, r: { precision: 0.0, recall: 0.0 }, erlang: { precision: 0.0, recall: 0.0 }, solidity: { precision: 0.0, recall: 0.0 }, // New fixture languages — no parser or call resolution yet - objc: { precision: 0.0, recall: 0.0 }, + // objc: class-method static calls and same-class calls resolve at 100% precision, 46% recall. + // Expected-edges now use full ObjC selectors (createUserWithId:name:email:, isValidEmail:, etc.) + // matching what the ObjC extractor emits. Receiver-typed instance message sends + // are not yet resolved — tracked as FNs. Precision 1.0 acts as ratchet against future FPs. + objc: { precision: 1.0, recall: 0.4 }, cuda: { precision: 0.0, recall: 0.0 }, groovy: { precision: 0.0, recall: 0.0 }, verilog: { precision: 0.0, recall: 0.0 }, From 9320ed27f7c9e017b6134191090715f994cdcaed Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 19:39:48 -0600 Subject: [PATCH 18/25] fix(wasm): emit receiver edges for declaration-typed locals in C++/CUDA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The JS C++ and CUDA extractors had no handler for 'declaration' AST nodes, so typeMap was never seeded for statically-typed locals (e.g. 'UserService svc;'). Without a typeMap entry for 'svc', resolveReceiverEdge had nothing to look up and silently skipped the receiver edge. Add handleCppDeclaration / handleCudaDeclaration to both extractors. They mirror match_c_family_type_map ('declaration' branch) from the native Rust path: extract the type node text and seed typeMap[varName] = { type, confidence: 0.9 } for each identifier or init_declarator child. Primitive types (int, char, bool, …) are skipped to avoid spurious edges. parity-compare.mjs --langs cpp,cuda --hybrid: PARITY OK (wasm = native = hybrid) All 3044 tests pass. --- src/extractors/cpp.ts | 51 ++++++++++++++++++++++++++++++++++++++++++ src/extractors/cuda.ts | 50 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) diff --git a/src/extractors/cpp.ts b/src/extractors/cpp.ts index b014d58ae..f7658eeac 100644 --- a/src/extractors/cpp.ts +++ b/src/extractors/cpp.ts @@ -4,6 +4,7 @@ import type { SubDeclaration, TreeSitterNode, TreeSitterTree, + TypeMapEntry, } from '../types.js'; import { extractModifierVisibility, findChild, nodeEndLine } from './helpers.js'; @@ -50,6 +51,9 @@ function walkCppNode(node: TreeSitterNode, ctx: ExtractorOutput): void { case 'call_expression': handleCppCallExpression(node, ctx); break; + case 'declaration': + handleCppDeclaration(node, ctx); + break; } for (let i = 0; i < node.childCount; i++) { @@ -204,6 +208,36 @@ function handleCppInclude(node: TreeSitterNode, ctx: ExtractorOutput): void { }); } +/** + * Seed typeMap for declaration-typed locals: `UserService svc;` and + * `UserService svc = makeService();` both yield typeMap["svc"] = "UserService" + * at confidence 0.9. Mirrors `match_c_family_type_map` ("declaration" branch) + * in the native Rust C++ extractor. + */ +function handleCppDeclaration(node: TreeSitterNode, ctx: ExtractorOutput): void { + const typeNode = node.childForFieldName('type'); + if (!typeNode) return; + const typeName = typeNode.text; + // Skip primitive types — they are never class/struct receivers + if (isPrimitiveCppType(typeName)) return; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + const kind = child.type; + let nameNode: TreeSitterNode | null = null; + if (kind === 'init_declarator') { + nameNode = child.childForFieldName('declarator') ?? null; + } else if (kind === 'identifier') { + nameNode = child; + } + if (!nameNode) continue; + const varName = unwrapCppDeclaratorName(nameNode); + if (varName) { + ctx.typeMap.set(varName, { type: typeName, confidence: 0.9 }); + } + } +} + function handleCppCallExpression(node: TreeSitterNode, ctx: ExtractorOutput): void { const funcNode = node.childForFieldName('function'); if (!funcNode) return; @@ -324,6 +358,23 @@ function extractCppClassFields(classNode: TreeSitterNode): SubDeclaration[] { return fields; } +/** + * Primitive C/C++ types that are never class/struct receivers. Seeding these + * into typeMap would cause spurious receiver edges (e.g. `int x` → `int`). + */ +const CPP_PRIMITIVE_TYPES = new Set([ + 'int', 'long', 'short', 'unsigned', 'signed', 'float', 'double', + 'char', 'bool', 'void', 'wchar_t', 'auto', 'size_t', 'uint8_t', + 'uint16_t', 'uint32_t', 'uint64_t', 'int8_t', 'int16_t', 'int32_t', + 'int64_t', 'ptrdiff_t', 'intptr_t', 'uintptr_t', +]); + +function isPrimitiveCppType(typeName: string): boolean { + // Strip qualifiers like `const`, `volatile`, `unsigned` etc. + const base = typeName.split(/\s+/).pop() ?? typeName; + return CPP_PRIMITIVE_TYPES.has(base) || CPP_PRIMITIVE_TYPES.has(typeName); +} + function extractCppEnumEntries(enumNode: TreeSitterNode): SubDeclaration[] { const entries: SubDeclaration[] = []; const body = findChild(enumNode, 'enumerator_list'); diff --git a/src/extractors/cuda.ts b/src/extractors/cuda.ts index 14f30609b..2e0972c38 100644 --- a/src/extractors/cuda.ts +++ b/src/extractors/cuda.ts @@ -4,6 +4,7 @@ import type { SubDeclaration, TreeSitterNode, TreeSitterTree, + TypeMapEntry, } from '../types.js'; import { extractModifierVisibility, findChild, nodeEndLine } from './helpers.js'; @@ -63,6 +64,9 @@ function walkCudaNode(node: TreeSitterNode, ctx: ExtractorOutput): void { case 'call_expression': handleCudaCallExpression(node, ctx); break; + case 'declaration': + handleCudaDeclaration(node, ctx); + break; } for (let i = 0; i < node.childCount; i++) { @@ -204,6 +208,36 @@ function handleCudaInclude(node: TreeSitterNode, ctx: ExtractorOutput): void { }); } +/** + * Seed typeMap for declaration-typed locals: `UserService svc;` and + * `UserService svc = make();` both yield typeMap["svc"] = "UserService" + * at confidence 0.9. Mirrors `match_c_family_type_map` ("declaration" branch) + * in the native Rust CUDA extractor. + */ +function handleCudaDeclaration(node: TreeSitterNode, ctx: ExtractorOutput): void { + const typeNode = node.childForFieldName('type'); + if (!typeNode) return; + const typeName = typeNode.text; + // Skip primitive types — they are never class/struct receivers + if (isCudaPrimitiveType(typeName)) return; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + const kind = child.type; + let nameNode: TreeSitterNode | null = null; + if (kind === 'init_declarator') { + nameNode = child.childForFieldName('declarator') ?? null; + } else if (kind === 'identifier') { + nameNode = child; + } + if (!nameNode) continue; + const varName = extractCudaFieldName(nameNode); + if (varName) { + ctx.typeMap.set(varName, { type: typeName, confidence: 0.9 }); + } + } +} + function handleCudaCallExpression(node: TreeSitterNode, ctx: ExtractorOutput): void { const funcNode = node.childForFieldName('function'); if (!funcNode) return; @@ -374,6 +408,22 @@ function innerCudaDeclarator(node: TreeSitterNode): TreeSitterNode | null { return null; } +/** + * Primitive C/C++/CUDA types that are never class/struct receivers. Seeding + * these into typeMap would produce spurious receiver edges (e.g. `int x` → `int`). + */ +const CUDA_PRIMITIVE_TYPES = new Set([ + 'int', 'long', 'short', 'unsigned', 'signed', 'float', 'double', + 'char', 'bool', 'void', 'wchar_t', 'auto', 'size_t', 'uint8_t', + 'uint16_t', 'uint32_t', 'uint64_t', 'int8_t', 'int16_t', 'int32_t', + 'int64_t', 'ptrdiff_t', 'intptr_t', 'uintptr_t', +]); + +function isCudaPrimitiveType(typeName: string): boolean { + const base = typeName.split(/\s+/).pop() ?? typeName; + return CUDA_PRIMITIVE_TYPES.has(base) || CUDA_PRIMITIVE_TYPES.has(typeName); +} + function extractCudaEnumEntries(enumNode: TreeSitterNode): SubDeclaration[] { const entries: SubDeclaration[] = []; const body = findChild(enumNode, 'enumerator_list'); From 7313330ebd35ad164ec5d4324a0b3c048948a0d5 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 20:01:14 -0600 Subject: [PATCH 19/25] fix(native): resolve Go factory and Python constructor receiver types in Rust solver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Go extractor was only seeding typeMap for var_spec and parameter_declaration, missing short_var_declaration. Added infer_short_var_types to handle: - x := Struct{} → conf 1.0 (composite literal) - x := &Struct{} → conf 1.0 (address-of composite) - x := NewFoo() / x := pkg.NewFoo() → conf 0.7 (New* factory prefix) Python extractor was only seeding typeMap for typed_parameter and typed_default_parameter, missing plain assignment. Added infer_py_assignment_type to handle: - order = Order(...) → conf 1.0 (uppercase constructor) - obj = Module.Class(...) → conf 0.7 (uppercase module prefix, non-builtin) Both mirror the existing JS extractors exactly. Parity check for go and python: wasm vs native/hybrid OK. --- crates/codegraph-core/src/extractors/go.rs | 191 ++++++++++++++++++ .../codegraph-core/src/extractors/python.rs | 154 ++++++++++++++ 2 files changed, 345 insertions(+) diff --git a/crates/codegraph-core/src/extractors/go.rs b/crates/codegraph-core/src/extractors/go.rs index 05b1e49a4..54973d8a2 100644 --- a/crates/codegraph-core/src/extractors/go.rs +++ b/crates/codegraph-core/src/extractors/go.rs @@ -317,10 +317,140 @@ fn match_go_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _dep "var_spec" | "parameter_declaration" => { collect_go_typed_identifiers(node, source, &mut symbols.type_map); } + // x := Struct{} / x := &Struct{} / x := NewFoo() — short variable declarations. + "short_var_declaration" => { + infer_short_var_types(node, source, &mut symbols.type_map); + } _ => {} } } +/// Seed typeMap entries from `x := Struct{}`, `x := &Struct{}`, and `x := NewFoo()`. +/// Mirrors the JS `inferShortVarType` → `inferCompositeLiteral` / `inferAddressOfComposite` +/// / `inferFactoryCall` chain in `src/extractors/go.ts`. +fn infer_short_var_types(node: &Node, source: &[u8], type_map: &mut Vec) { + let Some(left) = node.child_by_field_name("left") else { return }; + let Some(right) = node.child_by_field_name("right") else { return }; + + // Collect LHS identifiers (may be an expression_list for multi-assignment). + let lefts: Vec = if left.kind() == "expression_list" { + (0..left.child_count()) + .filter_map(|i| left.child(i)) + .filter(|c| c.kind() == "identifier") + .collect() + } else if left.kind() == "identifier" { + vec![left] + } else { + return; + }; + + // Collect RHS values (may be an expression_list). + let rights: Vec = if right.kind() == "expression_list" { + (0..right.child_count()) + .filter_map(|i| right.child(i)) + .filter(|c| c.kind() != ",") + .collect() + } else { + vec![right] + }; + + for (idx, var_node) in lefts.iter().enumerate() { + let Some(rhs) = rights.get(idx) else { continue }; + infer_single_short_var(var_node, rhs, source, type_map); + } +} + +/// Try composite literal, address-of-composite, then factory call for a single LHS/RHS pair. +fn infer_single_short_var( + var_node: &Node, + rhs: &Node, + source: &[u8], + type_map: &mut Vec, +) { + if infer_composite_literal(var_node, rhs, source, type_map) { return; } + if infer_address_of_composite(var_node, rhs, source, type_map) { return; } + infer_factory_call(var_node, rhs, source, type_map); +} + +/// `x := Struct{...}` → seed x : Struct at conf 1.0. +fn infer_composite_literal( + var_node: &Node, + rhs: &Node, + source: &[u8], + type_map: &mut Vec, +) -> bool { + if rhs.kind() != "composite_literal" { return false; } + let Some(type_node) = rhs.child_by_field_name("type") else { return false }; + let Some(type_name) = extract_go_type_name(&type_node, source) else { return false }; + type_map.push(TypeMapEntry { + name: node_text(var_node, source).to_string(), + type_name: type_name.to_string(), + confidence: 1.0, + }); + true +} + +/// `x := &Struct{...}` → seed x : Struct at conf 1.0. +fn infer_address_of_composite( + var_node: &Node, + rhs: &Node, + source: &[u8], + type_map: &mut Vec, +) -> bool { + if rhs.kind() != "unary_expression" { return false; } + // The operand of `&` is a composite_literal. + let Some(operand) = rhs.child_by_field_name("operand") else { return false }; + if operand.kind() != "composite_literal" { return false; } + let Some(type_node) = operand.child_by_field_name("type") else { return false }; + let Some(type_name) = extract_go_type_name(&type_node, source) else { return false }; + type_map.push(TypeMapEntry { + name: node_text(var_node, source).to_string(), + type_name: type_name.to_string(), + confidence: 1.0, + }); + true +} + +/// `x := NewFoo(...)` or `x := pkg.NewFoo(...)` → seed x : Foo at conf 0.7. +fn infer_factory_call( + var_node: &Node, + rhs: &Node, + source: &[u8], + type_map: &mut Vec, +) -> bool { + if rhs.kind() != "call_expression" { return false; } + let Some(fn_node) = rhs.child_by_field_name("function") else { return false }; + match fn_node.kind() { + "selector_expression" => { + // pkg.NewFoo(...) — use the field name only. + let Some(field) = fn_node.child_by_field_name("field") else { return false }; + let field_text = node_text(&field, source); + if !field_text.starts_with("New") { return false; } + let type_name = &field_text[3..]; + if type_name.is_empty() { return false; } + type_map.push(TypeMapEntry { + name: node_text(var_node, source).to_string(), + type_name: type_name.to_string(), + confidence: 0.7, + }); + true + } + "identifier" => { + let fn_text = node_text(&fn_node, source); + if !fn_text.starts_with("New") { return false; } + let type_name = &fn_text[3..]; + if type_name.is_empty() { return false; } + type_map.push(TypeMapEntry { + name: node_text(var_node, source).to_string(), + type_name: type_name.to_string(), + confidence: 0.7, + }); + true + } + _ => false, + } +} + fn collect_go_typed_identifiers(node: &Node, source: &[u8], type_map: &mut Vec) { let Some(type_node) = node.child_by_field_name("type") else { return }; let Some(type_name) = extract_go_type_name(&type_node, source) else { return }; @@ -412,4 +542,65 @@ mod tests { let c = s.definitions.iter().find(|d| d.name == "MaxRetries").unwrap(); assert_eq!(c.kind, "constant"); } + + // ── Short-var-declaration typeMap tests ───────────────────────────────── + + #[test] + fn infers_factory_call_new_prefix() { + // svc := NewUserService(repo) → svc : UserService at conf 0.7 + let s = parse_go( + "package main\nfunc main() {\n svc := NewUserService(repo)\n _ = svc\n}\n", + ); + let entry = s.type_map.iter().find(|e| e.name == "svc"); + assert!(entry.is_some(), "expected svc in type_map"); + let entry = entry.unwrap(); + assert_eq!(entry.type_name, "UserService"); + assert!((entry.confidence - 0.7).abs() < f64::EPSILON); + } + + #[test] + fn infers_pkg_factory_call() { + // svc := service.NewUserService(repo) → svc : UserService at conf 0.7 + let s = parse_go( + "package main\nfunc main() {\n svc := service.NewUserService(repo)\n _ = svc\n}\n", + ); + let entry = s.type_map.iter().find(|e| e.name == "svc"); + assert!(entry.is_some(), "expected svc in type_map for pkg.NewX"); + assert_eq!(entry.unwrap().type_name, "UserService"); + } + + #[test] + fn infers_composite_literal() { + // u := User{Name: "Alice"} → u : User at conf 1.0 + let s = parse_go( + "package main\nfunc main() {\n u := User{Name: \"Alice\"}\n _ = u\n}\n", + ); + let entry = s.type_map.iter().find(|e| e.name == "u"); + assert!(entry.is_some(), "expected u in type_map for composite literal"); + assert_eq!(entry.unwrap().type_name, "User"); + assert!((entry.unwrap().confidence - 1.0).abs() < f64::EPSILON); + } + + #[test] + fn infers_address_of_composite() { + // u := &User{} → u : User at conf 1.0 + let s = parse_go( + "package main\nfunc main() {\n u := &User{}\n _ = u\n}\n", + ); + let entry = s.type_map.iter().find(|e| e.name == "u"); + assert!(entry.is_some(), "expected u in type_map for address-of composite literal"); + assert_eq!(entry.unwrap().type_name, "User"); + } + + #[test] + fn non_new_prefix_not_inferred() { + // srv := createServer() — not a New* factory, should not seed typeMap + let s = parse_go( + "package main\nfunc main() {\n srv := createServer()\n _ = srv\n}\n", + ); + assert!( + s.type_map.iter().all(|e| e.name != "srv"), + "unexpected typeMap entry for non-New factory" + ); + } } diff --git a/crates/codegraph-core/src/extractors/python.rs b/crates/codegraph-core/src/extractors/python.rs index bd72eef92..7e648cc6d 100644 --- a/crates/codegraph-core/src/extractors/python.rs +++ b/crates/codegraph-core/src/extractors/python.rs @@ -317,6 +317,53 @@ fn extract_python_type_name<'a>(type_node: &Node<'a>, source: &'a [u8]) -> Optio } } +/// Python builtins / stdlib classes that start with an uppercase letter and would +/// false-positive on the constructor-call heuristic. Mirrors `BUILTIN_GLOBALS_PY` +/// in `src/extractors/python.ts`. +fn is_python_builtin(name: &str) -> bool { + matches!( + name, + "Exception" + | "BaseException" + | "ValueError" + | "TypeError" + | "KeyError" + | "IndexError" + | "AttributeError" + | "RuntimeError" + | "OSError" + | "IOError" + | "FileNotFoundError" + | "PermissionError" + | "NotImplementedError" + | "StopIteration" + | "GeneratorExit" + | "SystemExit" + | "KeyboardInterrupt" + | "ArithmeticError" + | "LookupError" + | "UnicodeError" + | "UnicodeDecodeError" + | "UnicodeEncodeError" + | "ImportError" + | "ModuleNotFoundError" + | "ConnectionError" + | "TimeoutError" + | "OverflowError" + | "ZeroDivisionError" + | "NameError" + | "SyntaxError" + | "RecursionError" + | "MemoryError" + | "Path" + | "PurePath" + | "OrderedDict" + | "Counter" + | "Decimal" + | "Fraction" + ) +} + fn match_python_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { match node.kind() { "typed_parameter" => { @@ -357,6 +404,52 @@ fn match_python_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, } } } + // `order = Order(...)` → seed order : Order at conf 1.0. + // `obj = module.Class(...)` → seed obj : module at conf 0.7 (factory pattern). + // Mirrors `handlePyAssignmentType` in `src/extractors/python.ts`. + "assignment" => { + infer_py_assignment_type(node, source, &mut symbols.type_map); + } + _ => {} + } +} + +/// Seed typeMap from plain Python assignments where the RHS is a constructor or factory call. +fn infer_py_assignment_type(node: &Node, source: &[u8], type_map: &mut Vec) { + let Some(left) = node.child_by_field_name("left") else { return }; + let Some(right) = node.child_by_field_name("right") else { return }; + if left.kind() != "identifier" || right.kind() != "call" { return; } + let var_name = node_text(&left, source).to_string(); + let Some(fn_node) = right.child_by_field_name("function") else { return }; + match fn_node.kind() { + "identifier" => { + // `order = Order(...)` — uppercase first char → constructor, conf 1.0. + let name = node_text(&fn_node, source); + if name.chars().next().map(|c| c.is_uppercase()).unwrap_or(false) { + type_map.push(TypeMapEntry { + name: var_name, + type_name: name.to_string(), + confidence: 1.0, + }); + } + } + "attribute" => { + // `obj = Module.Class(...)` — uppercase object name, not a builtin → conf 0.7. + if let Some(obj_node) = fn_node.child_by_field_name("object") { + if obj_node.kind() == "identifier" { + let obj_name = node_text(&obj_node, source); + if obj_name.chars().next().map(|c| c.is_uppercase()).unwrap_or(false) + && !is_python_builtin(obj_name) + { + type_map.push(TypeMapEntry { + name: var_name, + type_name: obj_name.to_string(), + confidence: 0.7, + }); + } + } + } + } _ => {} } } @@ -455,4 +548,65 @@ mod tests { let c = s.definitions.iter().find(|d| d.name == "MAX_RETRIES").unwrap(); assert_eq!(c.kind, "constant"); } + + // ── Assignment typeMap tests ───────────────────────────────────────────── + + #[test] + fn infers_constructor_call_uppercase() { + // order = Order("o1", 100.0) → order : Order at conf 1.0 + let s = parse_py("def run():\n order = Order(\"o1\", 100.0)\n order.validate()\n"); + let entry = s.type_map.iter().find(|e| e.name == "order"); + assert!(entry.is_some(), "expected order in type_map"); + let entry = entry.unwrap(); + assert_eq!(entry.type_name, "Order"); + assert!((entry.confidence - 1.0).abs() < f64::EPSILON); + } + + #[test] + fn infers_module_factory_call() { + // svc = Models.UserService(db) → svc : Models at conf 0.7 + // The object name must be uppercase to match the JS heuristic. + let s = parse_py("def run():\n svc = Models.UserService(db)\n svc.create()\n"); + let entry = s.type_map.iter().find(|e| e.name == "svc"); + assert!(entry.is_some(), "expected svc in type_map for Module.Class(...)"); + let entry = entry.unwrap(); + assert_eq!(entry.type_name, "Models"); + assert!((entry.confidence - 0.7).abs() < f64::EPSILON); + } + + #[test] + fn does_not_infer_lowercase_module_factory() { + // svc = models.UserService(db) — lowercase module name → no typeMap entry (matches JS) + let s = parse_py("def run():\n svc = models.UserService(db)\n svc.create()\n"); + assert!( + s.type_map.iter().all(|e| e.name != "svc"), + "should not seed typeMap for lowercase module prefix" + ); + } + + #[test] + fn does_not_infer_lowercase_constructor() { + // obj = create_thing() — lowercase, should not seed typeMap + let s = parse_py("def run():\n obj = create_thing()\n obj.work()\n"); + assert!( + s.type_map.iter().all(|e| e.name != "obj"), + "should not seed typeMap for lowercase function call" + ); + } + + #[test] + fn does_not_infer_builtin_exception() { + // err = ValueError("msg") — builtin exception, should not seed typeMap + let s = parse_py("def run():\n err = ValueError(\"msg\")\n"); + // Note: ValueError is uppercase so it WOULD match the heuristic — but it's a builtin. + // The JS extractor does NOT exclude builtins from conf-1.0 uppercase constructor + // matching (only from the attribute/factory path). We match that behaviour here. + // This test documents the current behaviour rather than asserting exclusion. + let entry = s.type_map.iter().find(|e| e.name == "err"); + // Builtins ARE seeded at conf 1.0 by the identifier branch (same as JS). + // Only the attribute/factory branch (Module.Class) checks is_python_builtin. + if let Some(e) = entry { + assert_eq!(e.type_name, "ValueError"); + } + } } From f93d0cbf0ad627a29ea0aacbcf40601d0fbd3177 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 20:13:27 -0600 Subject: [PATCH 20/25] fix: align enclosing-caller attribution for variable bindings (haskell, zig) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both engines used different rules for attributing calls inside variable bindings: WASM: attributed to the narrowest enclosing span regardless of kind, so local variable declarations inside fn main() shadowed the enclosing function (Zig: calls attributed to repo/svc variables instead of main), and nested let-bindings inside a Haskell do-block shadowed the top-level main binding. Native: loaded allNodes from a query that excluded 'variable' kind, so top-level Haskell bind nodes (main = do …, kind='variable') never matched in defs_with_ids, causing all calls to fall back to the file node. Unified rule implemented in findCaller (TS) and find_enclosing_caller (Rust): - Function/method definitions are preferred over any variable/constant binding as the enclosing caller scope — local var declarations inside a function body never shadow the enclosing function (fixes Zig repo/svc attribution). - When no function/method encloses the call, fall back to the WIDEST (outermost) variable/constant binding — this handles Haskell where main is a top-level bind node with kind 'variable'. Widest span is used so that nested let-bindings do not shadow the outer main binding. - File node remains the absolute last resort. Also adds 'variable' to NODE_KIND_FILTER_SQL (JS) and EDGE_NODE_KIND_FILTER (Rust pipeline.rs) so top-level variable bindings are included in the allNodes set available for caller matching. parity-compare.mjs --langs haskell,zig --hybrid: PARITY OK — 2/2 fixtures. --- .../src/domain/graph/builder/pipeline.rs | 2 +- .../graph/builder/stages/build_edges.rs | 76 ++++++++++++++++--- src/domain/graph/builder/call-resolver.ts | 75 +++++++++++++++--- .../graph/builder/stages/build-edges.ts | 2 +- 4 files changed, 130 insertions(+), 25 deletions(-) diff --git a/crates/codegraph-core/src/domain/graph/builder/pipeline.rs b/crates/codegraph-core/src/domain/graph/builder/pipeline.rs index 25f518965..70840d948 100644 --- a/crates/codegraph-core/src/domain/graph/builder/pipeline.rs +++ b/crates/codegraph-core/src/domain/graph/builder/pipeline.rs @@ -1115,7 +1115,7 @@ fn builtin_call_receivers() -> Vec { .collect() } -const EDGE_NODE_KIND_FILTER: &str = "kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')"; +const EDGE_NODE_KIND_FILTER: &str = "kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant','variable')"; /// For the scoped (incremental, small-batch) path of the edge builder, /// compute the set of files that must be loaded: changed/reverse-dep files diff --git a/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs b/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs index 39108e3d8..338c29516 100644 --- a/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs +++ b/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs @@ -127,6 +127,7 @@ pub struct ComputedEdge { /// Internal struct for caller resolution (def line range → node ID). struct DefWithId<'a> { name: &'a str, + kind: &'a str, line: u32, end_line: u32, node_id: Option, @@ -473,7 +474,7 @@ fn process_file<'a>( let node_id = file_nodes.iter() .find(|n| n.name == d.name && n.kind == d.kind && n.line == d.line) .map(|n| n.id); - DefWithId { name: &d.name, line: d.line, end_line: d.end_line.unwrap_or(u32::MAX), node_id } + DefWithId { name: &d.name, kind: &d.kind, line: d.line, end_line: d.end_line.unwrap_or(u32::MAX), node_id } }).collect(); // Phase 8.3: build pts map for alias resolution — mirrors buildPointsToMapForFile. @@ -654,25 +655,76 @@ fn process_file<'a>( emit_hierarchy_edges(ctx, file_input, rel_path, edges); } +/// Callable definition kinds — only function/method bodies act as enclosing +/// caller scopes. Variable/constant bindings are a lower-priority fallback +/// tier for top-level bindings like Haskell `main = do …` (kind `variable`). +/// Mirrors `CALLABLE_KINDS` / `TOP_LEVEL_BINDING_KINDS` in call-resolver.ts. +fn is_callable_kind(kind: &str) -> bool { + kind == "function" || kind == "method" +} + +fn is_top_level_binding_kind(kind: &str) -> bool { + kind == "variable" || kind == "constant" +} + /// Find the narrowest enclosing definition for a call at the given line. -/// Returns `(caller_id, caller_name)` — `caller_name` is `""` when the call is at file scope. +/// +/// Two-pass strategy (mirrors the updated `findCaller` in call-resolver.ts): +/// Pass 1 — narrowest enclosing function/method. Local variable declarations +/// inside a function body must not shadow the enclosing function. +/// Pass 2 — narrowest enclosing variable/constant binding. Used as fallback +/// when no function/method encloses the call (e.g. Haskell top-level +/// `main = do …` is a `bind` node with kind `variable`). +/// +/// Returns `(caller_id, caller_name)` — `caller_name` is `""` when the call +/// falls back to file scope. fn find_enclosing_caller<'a>(defs: &[DefWithId<'a>], call_line: u32, file_node_id: u32) -> (u32, &'a str) { - let mut caller_id = file_node_id; - let mut caller_name = ""; - let mut caller_span = u32::MAX; + let mut fn_caller_id: Option = None; + let mut fn_caller_name = ""; + let mut fn_caller_span = u32::MAX; + + // For variable/constant bindings we pick the WIDEST span (outermost binding), + // not the narrowest, so that nested `let` bindings inside `main`'s do-block + // do not shadow `main` itself. The outermost enclosing variable is the + // "function-like" top-level binding (e.g. Haskell `main = do …`). + // var_caller_span starts at 0 — any real spanning binding has span >= 0 + // and we overwrite only when span is strictly greater. + let mut var_caller_id: Option = None; + let mut var_caller_name = ""; + // Using i64 so the initial sentinel (-1) is always beaten by a real span (>= 0). + let mut var_caller_span: i64 = -1; + for def in defs { if def.line <= call_line && call_line <= def.end_line { - let span = def.end_line - def.line; - if span < caller_span { - if let Some(id) = def.node_id { - caller_id = id; - caller_name = def.name; - caller_span = span; + let span = def.end_line.saturating_sub(def.line); + if is_callable_kind(def.kind) { + if span < fn_caller_span { + if let Some(id) = def.node_id { + fn_caller_id = Some(id); + fn_caller_name = def.name; + fn_caller_span = span; + } + } + } else if is_top_level_binding_kind(def.kind) { + if (span as i64) > var_caller_span { + if let Some(id) = def.node_id { + var_caller_id = Some(id); + var_caller_name = def.name; + var_caller_span = span as i64; + } } } } } - (caller_id, caller_name) + + // Prefer function/method over variable/constant binding. + if let Some(id) = fn_caller_id { + return (id, fn_caller_name); + } + if let Some(id) = var_caller_id { + return (id, var_caller_name); + } + (file_node_id, "") } /// Multi-strategy call target resolution: import-aware → same-file → type-aware → scoped. diff --git a/src/domain/graph/builder/call-resolver.ts b/src/domain/graph/builder/call-resolver.ts index 85cc77672..ad45f71f0 100644 --- a/src/domain/graph/builder/call-resolver.ts +++ b/src/domain/graph/builder/call-resolver.ts @@ -47,6 +47,22 @@ export function isModuleScopedLanguage(relPath: string): boolean { // ── Shared resolution functions ────────────────────────────────────────── +/** + * Callable definition kinds — variable/constant bindings are NOT callable + * in the function-as-enclosing-scope sense (they are local declarations, not + * function bodies). Top-level variable bindings (e.g. Haskell `main = do …`) + * are handled separately as a fallback tier. + */ +const CALLABLE_KINDS = new Set(['function', 'method']); + +/** + * Variable-like binding kinds that may act as top-level callers when no + * enclosing function/method exists (e.g. Haskell top-level `main` is a + * `bind` node → kind `variable`). Local variable declarations inside a + * function body must NOT win over the enclosing function. + */ +const TOP_LEVEL_BINDING_KINDS = new Set(['variable', 'constant']); + export function findCaller( lookup: CallNodeLookup, call: { line: number }, @@ -59,26 +75,63 @@ export function findCaller( relPath: string, fileNodeRow: { id: number }, ): { id: number; callerName: string | null } { - let caller: { id: number } | null = null; - let callerName: string | null = null; - let callerSpan = Infinity; + // Pass 1: find the narrowest enclosing function/method. + let fnCaller: { id: number } | null = null; + let fnCallerName: string | null = null; + let fnCallerSpan = Infinity; + + // Pass 2: find the widest (outermost) enclosing variable/constant binding. + // Used as fallback when no function/method encloses the call site + // (e.g. Haskell `main = do …` is a `bind` node with kind `variable`). + // We pick the WIDEST span (outermost binding), not the narrowest, so that + // nested `let` bindings inside `main`'s do-block do not shadow `main` + // itself as the attributing caller. The outermost enclosing variable is + // the "function-like" top-level binding. + let varCaller: { id: number } | null = null; + let varCallerName: string | null = null; + let varCallerSpan = -1; // looking for WIDEST span, so start at -1 + for (const def of definitions) { if (def.line <= call.line) { const end = def.endLine || Infinity; if (call.line <= end) { - const span = end - def.line; - if (span < callerSpan) { - const row = lookup.nodeId(def.name, def.kind, relPath, def.line); - if (row) { - caller = row; - callerName = def.name; - callerSpan = span; + const span = end === Infinity ? Infinity : end - def.line; + if (CALLABLE_KINDS.has(def.kind)) { + if (span < fnCallerSpan) { + const row = lookup.nodeId(def.name, def.kind, relPath, def.line); + if (row) { + fnCaller = row; + fnCallerName = def.name; + fnCallerSpan = span; + } + } + } else if (TOP_LEVEL_BINDING_KINDS.has(def.kind)) { + if (span > varCallerSpan) { + const row = lookup.nodeId(def.name, def.kind, relPath, def.line); + if (row) { + varCaller = row; + varCallerName = def.name; + varCallerSpan = span; + } } } } } } - return { ...(caller ?? fileNodeRow), callerName }; + + // Prefer function/method enclosing scope over variable binding. + // If a function/method encloses the call, use it — local variable + // declarations inside the function body must not shadow it. + // Only fall back to a variable/constant binding when the call is at + // top-level scope (no enclosing function/method found), which handles + // languages like Haskell where `main` is a top-level `bind` node. + if (fnCaller) { + return { ...fnCaller, callerName: fnCallerName }; + } + if (varCaller) { + return { ...varCaller, callerName: varCallerName }; + } + return { ...fileNodeRow, callerName: null }; } export function resolveByMethodOrGlobal( diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index 88027aee0..9be7f8096 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -1471,7 +1471,7 @@ function reconnectReverseDepEdges(ctx: PipelineContext): void { * their import targets. Falls back to loading ALL nodes for full builds or * larger incremental changes. */ -const NODE_KIND_FILTER_SQL = `kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')`; +const NODE_KIND_FILTER_SQL = `kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant','variable')`; function loadNodes(ctx: PipelineContext): { rows: QueryNodeRow[]; scoped: boolean } { const { db, fileSymbols, isFullBuild, batchResolved } = ctx; From 1915e5e95bfd320907249d45a68c89b672a76d1b Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 20:23:41 -0600 Subject: [PATCH 21/25] chore(lint): fix unused import and formatting in cpp/cuda extractors and test Remove unused TypeMapEntry import from cpp.ts and cuda.ts, reformat primitive-type Set literals and test expect() calls to satisfy biome line-length rules. --- src/extractors/cpp.ts | 29 ++++++++++++++++++++++++----- src/extractors/cuda.ts | 29 ++++++++++++++++++++++++----- tests/parsers/javascript.test.ts | 15 ++++++++++++--- 3 files changed, 60 insertions(+), 13 deletions(-) diff --git a/src/extractors/cpp.ts b/src/extractors/cpp.ts index f7658eeac..4de38bc56 100644 --- a/src/extractors/cpp.ts +++ b/src/extractors/cpp.ts @@ -4,7 +4,6 @@ import type { SubDeclaration, TreeSitterNode, TreeSitterTree, - TypeMapEntry, } from '../types.js'; import { extractModifierVisibility, findChild, nodeEndLine } from './helpers.js'; @@ -363,10 +362,30 @@ function extractCppClassFields(classNode: TreeSitterNode): SubDeclaration[] { * into typeMap would cause spurious receiver edges (e.g. `int x` → `int`). */ const CPP_PRIMITIVE_TYPES = new Set([ - 'int', 'long', 'short', 'unsigned', 'signed', 'float', 'double', - 'char', 'bool', 'void', 'wchar_t', 'auto', 'size_t', 'uint8_t', - 'uint16_t', 'uint32_t', 'uint64_t', 'int8_t', 'int16_t', 'int32_t', - 'int64_t', 'ptrdiff_t', 'intptr_t', 'uintptr_t', + 'int', + 'long', + 'short', + 'unsigned', + 'signed', + 'float', + 'double', + 'char', + 'bool', + 'void', + 'wchar_t', + 'auto', + 'size_t', + 'uint8_t', + 'uint16_t', + 'uint32_t', + 'uint64_t', + 'int8_t', + 'int16_t', + 'int32_t', + 'int64_t', + 'ptrdiff_t', + 'intptr_t', + 'uintptr_t', ]); function isPrimitiveCppType(typeName: string): boolean { diff --git a/src/extractors/cuda.ts b/src/extractors/cuda.ts index 2e0972c38..5233f95d6 100644 --- a/src/extractors/cuda.ts +++ b/src/extractors/cuda.ts @@ -4,7 +4,6 @@ import type { SubDeclaration, TreeSitterNode, TreeSitterTree, - TypeMapEntry, } from '../types.js'; import { extractModifierVisibility, findChild, nodeEndLine } from './helpers.js'; @@ -413,10 +412,30 @@ function innerCudaDeclarator(node: TreeSitterNode): TreeSitterNode | null { * these into typeMap would produce spurious receiver edges (e.g. `int x` → `int`). */ const CUDA_PRIMITIVE_TYPES = new Set([ - 'int', 'long', 'short', 'unsigned', 'signed', 'float', 'double', - 'char', 'bool', 'void', 'wchar_t', 'auto', 'size_t', 'uint8_t', - 'uint16_t', 'uint32_t', 'uint64_t', 'int8_t', 'int16_t', 'int32_t', - 'int64_t', 'ptrdiff_t', 'intptr_t', 'uintptr_t', + 'int', + 'long', + 'short', + 'unsigned', + 'signed', + 'float', + 'double', + 'char', + 'bool', + 'void', + 'wchar_t', + 'auto', + 'size_t', + 'uint8_t', + 'uint16_t', + 'uint32_t', + 'uint64_t', + 'int8_t', + 'int16_t', + 'int32_t', + 'int64_t', + 'ptrdiff_t', + 'intptr_t', + 'uintptr_t', ]); function isCudaPrimitiveType(typeName: string): boolean { diff --git a/tests/parsers/javascript.test.ts b/tests/parsers/javascript.test.ts index c99c937f9..9d0ec99d3 100644 --- a/tests/parsers/javascript.test.ts +++ b/tests/parsers/javascript.test.ts @@ -225,7 +225,10 @@ describe('JavaScript parser', () => { } `); // Primary: class-scoped key at 0.9 — prevents cross-class collision. - expect(symbols.typeMap.get('UserService.repo')).toEqual({ type: 'Repository', confidence: 0.9 }); + expect(symbols.typeMap.get('UserService.repo')).toEqual({ + type: 'Repository', + confidence: 0.9, + }); // Fallback bare keys at lower confidence for single-class files. expect(symbols.typeMap.get('repo')).toEqual({ type: 'Repository', confidence: 0.6 }); expect(symbols.typeMap.get('this.repo')).toEqual({ type: 'Repository', confidence: 0.6 }); @@ -241,8 +244,14 @@ describe('JavaScript parser', () => { } `); // Each class gets its own scoped key — no collision. - expect(symbols.typeMap.get('OrderService.repo')).toEqual({ type: 'OrderRepository', confidence: 0.9 }); - expect(symbols.typeMap.get('UserService.repo')).toEqual({ type: 'UserRepository', confidence: 0.9 }); + expect(symbols.typeMap.get('OrderService.repo')).toEqual({ + type: 'OrderRepository', + confidence: 0.9, + }); + expect(symbols.typeMap.get('UserService.repo')).toEqual({ + type: 'UserRepository', + confidence: 0.9, + }); // Bare "repo" key should hold the first class's type at 0.6 (second write is same confidence, no overwrite). expect(symbols.typeMap.get('repo')?.confidence).toBe(0.6); }); From 7095ffe8b6405aac0a62e70ef1ca829508770b71 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 20:28:48 -0600 Subject: [PATCH 22/25] fix: align Java interface dispatch across wasm/native/hybrid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Java was the only fixture where all three build paths (wasm, native, hybrid) disagreed pairwise. Bug 1 — WASM typeMap pollution: `handleJavaLocalVarDecl` used last-wins Map.set(), so the local `InMemoryUserRepository repo` in the static `createDefault()` method silently overrode the constructor parameter `UserRepository repo`. This caused WASM to bypass the interface and resolve directly to the concrete class, producing no interface edge and the wrong receiver. Fix: switch to first-wins `setTypeMapEntry` to match Rust extractor semantics. First-wins preserves the interface annotation that drives correct CHA dispatch. Bug 2 — native vs wasm/hybrid confidence mismatch: `runPostNativeCha` (native orchestrator path) used `computeConfidence − CHA_DISPATCH_PENALTY = 0.7 − 0.1 = 0.6`, while `runChaPostPass` (DB post-pass used by wasm and hybrid) hardcodes 0.8. Fix: align `runPostNativeCha` to also use 0.8. Result: all three build paths now emit identical edges and confidences. `parity-compare.mjs --langs java --hybrid` passes. Updated expected-edges.json to include both the interface declaration edge (TypeRepository.X at 0.7) and the CHA-expanded impl edge (InMemoryUserRepository.X at 0.8), which are the correct semantics for an interface-typed receiver. Closes #1469 --- .../builder/stages/native-orchestrator.ts | 14 +++++----- src/extractors/java.ts | 15 ++++++----- .../fixtures/java/expected-edges.json | 27 ++++++++++++++++--- 3 files changed, 38 insertions(+), 18 deletions(-) diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index 18fe478ef..0fe4555cc 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -556,8 +556,8 @@ function runPostNativeCha( } // Find existing call edges targeting qualified methods (e.g., 'IWorker.doWork'). - // Include the caller node's file so confidence can be computed file-pair-aware, - // matching the WASM path's computeConfidence(callerFile, targetFile, null) - CHA_DISPATCH_PENALTY formula. + // Include caller_file and method_file so affectedFiles can be populated for + // incremental role reclassification; confidence is hardcoded 0.8 matching runChaPostPass. // When scopeToChangedFiles is true, restrict to call sites in the changed files // (safe because no hierarchy or RTA evidence changed outside those files). let callToMethods: Array<{ source_id: number; method_name: string; caller_file: string | null }>; @@ -653,12 +653,10 @@ function runPostNativeCha( const key = `${source_id}|${methodNode.id}`; if (seen.has(key)) continue; seen.add(key); - // Compute confidence file-pair-aware (mirrors WASM path: computeConfidence - CHA_DISPATCH_PENALTY) - // Skip zero-confidence edges to match buildFileCallEdges / buildChaPostPass behaviour. - const conf = - computeConfidence(caller_file ?? '', methodNode.method_file ?? '', null) - - CHA_DISPATCH_PENALTY; - if (conf <= 0) continue; + // Use the same hardcoded 0.8 that runChaPostPass (helpers.ts) uses for + // DB-level CHA dispatch edges. This aligns the native orchestrator path + // with the WASM and hybrid paths, which both go through runChaPostPass. + const conf = 0.8; newEdges.push([source_id, methodNode.id, 'calls', conf, 0, 'cha']); newEdgeCount++; if (caller_file) affectedFiles.add(caller_file); diff --git a/src/extractors/java.ts b/src/extractors/java.ts index 225273bfa..98922ccc4 100644 --- a/src/extractors/java.ts +++ b/src/extractors/java.ts @@ -16,6 +16,7 @@ import { nodeStartLine, pushCall, pushImport, + setTypeMapEntry, } from './helpers.js'; /** @@ -273,13 +274,13 @@ function handleJavaLocalVarDecl(node: TreeSitterNode, ctx: ExtractorOutput): voi const child = node.child(i); if (child?.type === 'variable_declarator') { const nameNode = child.childForFieldName('name'); - // Use direct Map.set (last-wins) for local variable declarations. - // Local variable types are method-scoped and should override any - // prior entry (e.g. a same-named constructor parameter). Using - // setTypeMapEntry (first-wins on tie) would let a constructor - // parameter type block a local variable's more-specific concrete type. - if (nameNode && ctx.typeMap) - ctx.typeMap.set(nameNode.text, { type: typeName, confidence: 0.9 }); + // Use setTypeMapEntry (first-wins on tie) to match Rust extractor semantics. + // The typeMap is flat per-file without method scoping, so a local variable + // in one method (e.g. `InMemoryUserRepository repo` in `createDefault()`) must + // not override a parameter binding set by an earlier method + // (e.g. `UserRepository repo` constructor param). First-wins preserves the + // interface/abstract type annotation that drives correct CHA dispatch. + if (nameNode && ctx.typeMap) setTypeMapEntry(ctx.typeMap, nameNode.text, typeName, 0.9); } } } diff --git a/tests/benchmarks/resolution/fixtures/java/expected-edges.json b/tests/benchmarks/resolution/fixtures/java/expected-edges.json index d727cd4a6..a7dd6b458 100644 --- a/tests/benchmarks/resolution/fixtures/java/expected-edges.json +++ b/tests/benchmarks/resolution/fixtures/java/expected-edges.json @@ -31,6 +31,13 @@ "mode": "class-inheritance", "notes": "log() inherited from BaseService via extends" }, + { + "source": { "name": "UserService.getUser", "file": "UserService.java" }, + "target": { "name": "UserRepository.findById", "file": "UserRepository.java" }, + "kind": "calls", + "mode": "interface-dispatched", + "notes": "repo.findById() — static type is UserRepository interface; interface declaration edge" + }, { "source": { "name": "UserService.getUser", "file": "UserService.java" }, "target": { @@ -39,7 +46,7 @@ }, "kind": "calls", "mode": "interface-dispatched", - "notes": "repo.findById() — typed as UserRepository interface, resolved to InMemoryUserRepository" + "notes": "repo.findById() — CHA expansion of UserRepository to concrete InMemoryUserRepository" }, { "source": { "name": "UserService.createUser", "file": "UserService.java" }, @@ -48,12 +55,19 @@ "mode": "receiver-typed", "notes": "validator.validateUser() — call on typed Validator field" }, + { + "source": { "name": "UserService.createUser", "file": "UserService.java" }, + "target": { "name": "UserRepository.save", "file": "UserRepository.java" }, + "kind": "calls", + "mode": "interface-dispatched", + "notes": "repo.save() — static type is UserRepository interface; interface declaration edge" + }, { "source": { "name": "UserService.createUser", "file": "UserService.java" }, "target": { "name": "InMemoryUserRepository.save", "file": "InMemoryUserRepository.java" }, "kind": "calls", "mode": "interface-dispatched", - "notes": "repo.save() — typed as UserRepository interface, resolved to InMemoryUserRepository" + "notes": "repo.save() — CHA expansion of UserRepository to concrete InMemoryUserRepository" }, { "source": { "name": "UserService.createUser", "file": "UserService.java" }, @@ -69,12 +83,19 @@ "mode": "class-inheritance", "notes": "log() inherited from BaseService via extends" }, + { + "source": { "name": "UserService.removeUser", "file": "UserService.java" }, + "target": { "name": "UserRepository.delete", "file": "UserRepository.java" }, + "kind": "calls", + "mode": "interface-dispatched", + "notes": "repo.delete() — static type is UserRepository interface; interface declaration edge" + }, { "source": { "name": "UserService.removeUser", "file": "UserService.java" }, "target": { "name": "InMemoryUserRepository.delete", "file": "InMemoryUserRepository.java" }, "kind": "calls", "mode": "interface-dispatched", - "notes": "repo.delete() — typed as UserRepository interface, resolved to InMemoryUserRepository" + "notes": "repo.delete() — CHA expansion of UserRepository to concrete InMemoryUserRepository" }, { "source": { "name": "UserService.createDefault", "file": "UserService.java" }, From 1875c7faf2f19ce8a35bd4738d204153303edf19 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 20:38:32 -0600 Subject: [PATCH 23/25] fix(wasm): align typed-receiver CHA dispatch confidence to 0.8 The inline CHA expansion in buildCallEdges and buildChaPostPass used computeConfidence(relPath, t.file) - CHA_DISPATCH_PENALTY for all CHA targets, producing 0.6 for cross-directory interface dispatch (same-dir = 0.7, minus 0.1 penalty). runChaPostPass (helpers.ts) and runPostNativeCha (native-orchestrator.ts) both hardcode 0.8 for interface/CHA-dispatch edges. The deduplication in runChaPostPass uses the existing DB edge as-is and skips reinsertion, so the 0.6 edges from the inline pass were never upgraded to 0.8. Fix: typed-receiver (interface) dispatch branches now use hardcoded 0.8 matching the post-pass constants. The this/super branch keeps computeConfidence-based proximity scoring to remain aligned with runPostNativeThisDispatch. parity-compare.mjs --langs typescript --hybrid goes green (was 12 edge diffs). Closes #1470 docs check acknowledged --- .../graph/builder/stages/build-edges.ts | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index 9be7f8096..503dec127 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -709,6 +709,7 @@ function buildChaPostPass( const caller = findCaller(lookup, call, symbols.definitions, relPath, fileNodeRow); let chaTargets: ReadonlyArray<{ id: number; file: string }> = []; + let isTypedReceiverDispatch = false; if (call.receiver === 'this' || call.receiver === 'self' || call.receiver === 'super') { chaTargets = resolveThisDispatch( @@ -727,13 +728,21 @@ function buildChaPostPass( : null; if (typeName) { chaTargets = resolveChaTargets(typeName, call.name, chaCtx, lookup); + isTypedReceiverDispatch = true; } } for (const t of chaTargets) { const edgeKey = `${caller.id}|${t.id}`; if (t.id !== caller.id && !seenByPair.has(edgeKey)) { - const conf = computeConfidence(relPath, t.file, null) - CHA_DISPATCH_PENALTY; + // Typed-receiver (interface/CHA) dispatch: use the same hardcoded 0.8 that + // runChaPostPass (helpers.ts) and runPostNativeCha (native-orchestrator.ts) + // use — file proximity is not meaningful for virtual dispatch confidence. + // this/super dispatch keeps computeConfidence-based proximity scoring to + // match runPostNativeThisDispatch (native-orchestrator.ts). + const conf = isTypedReceiverDispatch + ? 0.8 + : computeConfidence(relPath, t.file, null) - CHA_DISPATCH_PENALTY; if (conf > 0) { seenByPair.add(edgeKey); allEdgeRows.push([caller.id, t.id, 'calls', conf, 0, 'cha']); @@ -1278,6 +1287,7 @@ function buildFileCallEdges( // For typed receiver calls: expand to all instantiated concrete implementations. if (chaCtx && call.receiver) { let chaTargets: ReadonlyArray<{ id: number; file: string }> = []; + let isTypedReceiverDispatch = false; if (call.receiver === 'this' || call.receiver === 'self' || call.receiver === 'super') { chaTargets = resolveThisDispatch( call.name, @@ -1295,12 +1305,20 @@ function buildFileCallEdges( : null; if (typeName) { chaTargets = resolveChaTargets(typeName, call.name, chaCtx, lookup); + isTypedReceiverDispatch = true; } } for (const t of chaTargets) { const edgeKey = `${caller.id}|${t.id}`; if (t.id !== caller.id && !seenCallEdges.has(edgeKey) && !ptsEdgeRows.has(edgeKey)) { - const conf = computeConfidence(relPath, t.file, null) - CHA_DISPATCH_PENALTY; + // Typed-receiver (interface/CHA) dispatch: use the same hardcoded 0.8 that + // runChaPostPass (helpers.ts) and runPostNativeCha (native-orchestrator.ts) + // use — file proximity is not meaningful for virtual dispatch confidence. + // this/super dispatch keeps computeConfidence-based proximity scoring to + // match runPostNativeThisDispatch (native-orchestrator.ts line 906). + const conf = isTypedReceiverDispatch + ? 0.8 + : computeConfidence(relPath, t.file, null) - CHA_DISPATCH_PENALTY; if (conf > 0) { seenCallEdges.add(edgeKey); allEdgeRows.push([caller.id, t.id, 'calls', conf, 0, 'cha']); From 4aced082dbdd93a14638f979f1b6b94d0904c834 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 21:23:33 -0600 Subject: [PATCH 24/25] fix: use setTypeMapEntry in cpp/cuda extractors and extract CHA_DISPATCH_CONFIDENCE constant Switch handleCppDeclaration and handleCudaDeclaration from last-wins ctx.typeMap.set() to first-wins setTypeMapEntry(), fixing the same flat typeMap pollution bug this PR corrects in java.ts. Extract the CHA dispatch confidence value to a named CHA_DISPATCH_CONFIDENCE constant in helpers.ts so runChaPostPass and the native orchestrator share a single source of truth instead of two synchronized magic numbers. --- src/domain/graph/builder/helpers.ts | 5 ++++- src/domain/graph/builder/stages/native-orchestrator.ts | 8 +++----- src/extractors/cpp.ts | 4 ++-- src/extractors/cuda.ts | 4 ++-- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/domain/graph/builder/helpers.ts b/src/domain/graph/builder/helpers.ts index dd94ea2bd..4dfd6f469 100644 --- a/src/domain/graph/builder/helpers.ts +++ b/src/domain/graph/builder/helpers.ts @@ -361,6 +361,9 @@ export function batchInsertEdges(db: BetterSqlite3Database, rows: unknown[][]): } } +/** Confidence assigned to CHA-expanded interface/abstract dispatch edges. */ +export const CHA_DISPATCH_CONFIDENCE = 0.8; + /** * CHA (Class Hierarchy Analysis) post-pass. * @@ -514,7 +517,7 @@ export function runChaPostPass(db: BetterSqlite3Database): number { const key = `${source_id}|${methodNode.id}`; if (seen.has(key)) continue; seen.add(key); - newEdges.push([source_id, methodNode.id, 'calls', 0.8, 0, 'cha']); + newEdges.push([source_id, methodNode.id, 'calls', CHA_DISPATCH_CONFIDENCE, 0, 'cha']); } } diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index 0fe4555cc..b83ce01cd 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -49,6 +49,7 @@ import type { PipelineContext } from '../context.js'; import { batchInsertEdges, batchInsertNodes, + CHA_DISPATCH_CONFIDENCE, collectFiles as collectFilesUtil, fileHash, fileStat, @@ -557,7 +558,7 @@ function runPostNativeCha( // Find existing call edges targeting qualified methods (e.g., 'IWorker.doWork'). // Include caller_file and method_file so affectedFiles can be populated for - // incremental role reclassification; confidence is hardcoded 0.8 matching runChaPostPass. + // incremental role reclassification; confidence is CHA_DISPATCH_CONFIDENCE matching runChaPostPass. // When scopeToChangedFiles is true, restrict to call sites in the changed files // (safe because no hierarchy or RTA evidence changed outside those files). let callToMethods: Array<{ source_id: number; method_name: string; caller_file: string | null }>; @@ -653,10 +654,7 @@ function runPostNativeCha( const key = `${source_id}|${methodNode.id}`; if (seen.has(key)) continue; seen.add(key); - // Use the same hardcoded 0.8 that runChaPostPass (helpers.ts) uses for - // DB-level CHA dispatch edges. This aligns the native orchestrator path - // with the WASM and hybrid paths, which both go through runChaPostPass. - const conf = 0.8; + const conf = CHA_DISPATCH_CONFIDENCE; newEdges.push([source_id, methodNode.id, 'calls', conf, 0, 'cha']); newEdgeCount++; if (caller_file) affectedFiles.add(caller_file); diff --git a/src/extractors/cpp.ts b/src/extractors/cpp.ts index 4de38bc56..90e6bc15f 100644 --- a/src/extractors/cpp.ts +++ b/src/extractors/cpp.ts @@ -5,7 +5,7 @@ import type { TreeSitterNode, TreeSitterTree, } from '../types.js'; -import { extractModifierVisibility, findChild, nodeEndLine } from './helpers.js'; +import { extractModifierVisibility, findChild, nodeEndLine, setTypeMapEntry } from './helpers.js'; /** * Extract symbols from C++ files. @@ -232,7 +232,7 @@ function handleCppDeclaration(node: TreeSitterNode, ctx: ExtractorOutput): void if (!nameNode) continue; const varName = unwrapCppDeclaratorName(nameNode); if (varName) { - ctx.typeMap.set(varName, { type: typeName, confidence: 0.9 }); + setTypeMapEntry(ctx.typeMap, varName, typeName, 0.9); } } } diff --git a/src/extractors/cuda.ts b/src/extractors/cuda.ts index 5233f95d6..ad6ff163e 100644 --- a/src/extractors/cuda.ts +++ b/src/extractors/cuda.ts @@ -5,7 +5,7 @@ import type { TreeSitterNode, TreeSitterTree, } from '../types.js'; -import { extractModifierVisibility, findChild, nodeEndLine } from './helpers.js'; +import { extractModifierVisibility, findChild, nodeEndLine, setTypeMapEntry } from './helpers.js'; /** * Extract symbols from CUDA files. @@ -232,7 +232,7 @@ function handleCudaDeclaration(node: TreeSitterNode, ctx: ExtractorOutput): void if (!nameNode) continue; const varName = extractCudaFieldName(nameNode); if (varName) { - ctx.typeMap.set(varName, { type: typeName, confidence: 0.9 }); + setTypeMapEntry(ctx.typeMap, varName, typeName, 0.9); } } } From 1b7e25ebe2f4ad4bd25ccf1352eef243eb38d9bd Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 21:23:42 -0600 Subject: [PATCH 25/25] refactor: extract CHA_TYPED_DISPATCH_CONFIDENCE named constant Move CHA_DISPATCH_PENALTY to helpers.ts alongside the new CHA_TYPED_DISPATCH_CONFIDENCE = 0.8 constant so all four CHA dispatch sites (helpers.ts runChaPostPass, build-edges.ts buildChaPostPass, build-edges.ts buildFileCallEdges, native-orchestrator.ts runPostNativeCha) reference a single named export instead of repeating the literal. native-orchestrator.ts now imports both constants from helpers.ts, removing the build-edges.ts import. --- src/domain/graph/builder/helpers.ts | 16 ++++++++++- .../graph/builder/stages/build-edges.ts | 27 ++++++++++--------- .../builder/stages/native-orchestrator.ts | 10 +++---- 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/src/domain/graph/builder/helpers.ts b/src/domain/graph/builder/helpers.ts index dd94ea2bd..5b23bd556 100644 --- a/src/domain/graph/builder/helpers.ts +++ b/src/domain/graph/builder/helpers.ts @@ -48,6 +48,13 @@ export const BUILTIN_RECEIVERS: Set = new Set([ 'require', ]); +/** Phase 8.5: confidence penalty applied to CHA-dispatch edges. */ +export const CHA_DISPATCH_PENALTY = 0.1; +/** Phase 8.5: fixed confidence for typed-receiver (interface/CHA) dispatch edges. + * File proximity is not meaningful for virtual dispatch — all three engine paths + * (WASM inline, WASM post-pass, native post-pass) must agree on this value. */ +export const CHA_TYPED_DISPATCH_CONFIDENCE = 0.8; + /** Check if a directory entry should be skipped (ignored dirs, dotfiles). */ function shouldSkipEntry(entry: fs.Dirent, extraIgnore: Set | null): boolean { if (entry.name.startsWith('.') && entry.name !== '.') { @@ -514,7 +521,14 @@ export function runChaPostPass(db: BetterSqlite3Database): number { const key = `${source_id}|${methodNode.id}`; if (seen.has(key)) continue; seen.add(key); - newEdges.push([source_id, methodNode.id, 'calls', 0.8, 0, 'cha']); + newEdges.push([ + source_id, + methodNode.id, + 'calls', + CHA_TYPED_DISPATCH_CONFIDENCE, + 0, + 'cha', + ]); } } diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index 503dec127..7b21c2d0f 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -45,7 +45,13 @@ import { import type { ChaContext } from '../cha.js'; import { buildChaContext, resolveChaTargets, resolveThisDispatch } from '../cha.js'; import type { PipelineContext } from '../context.js'; -import { BUILTIN_RECEIVERS, batchInsertEdges, runChaPostPass } from '../helpers.js'; +import { + BUILTIN_RECEIVERS, + batchInsertEdges, + CHA_DISPATCH_PENALTY, + CHA_TYPED_DISPATCH_CONFIDENCE, + runChaPostPass, +} from '../helpers.js'; import { getResolved, isBarrelFile, resolveBarrelExportCached } from './resolve-imports.js'; // ── Local types ────────────────────────────────────────────────────────── @@ -101,9 +107,6 @@ interface NativeEdge { dynamic: number; } -/** Phase 8.5: confidence penalty applied to CHA-dispatch edges. */ -export const CHA_DISPATCH_PENALTY = 0.1; - // ── Node lookup setup ─────────────────────────────────────────────────── function makeGetNodeIdStmt(db: BetterSqlite3Database): NodeIdStmt { @@ -735,13 +738,12 @@ function buildChaPostPass( for (const t of chaTargets) { const edgeKey = `${caller.id}|${t.id}`; if (t.id !== caller.id && !seenByPair.has(edgeKey)) { - // Typed-receiver (interface/CHA) dispatch: use the same hardcoded 0.8 that - // runChaPostPass (helpers.ts) and runPostNativeCha (native-orchestrator.ts) - // use — file proximity is not meaningful for virtual dispatch confidence. + // Typed-receiver (interface/CHA) dispatch: use CHA_TYPED_DISPATCH_CONFIDENCE + // — file proximity is not meaningful for virtual dispatch confidence. // this/super dispatch keeps computeConfidence-based proximity scoring to // match runPostNativeThisDispatch (native-orchestrator.ts). const conf = isTypedReceiverDispatch - ? 0.8 + ? CHA_TYPED_DISPATCH_CONFIDENCE : computeConfidence(relPath, t.file, null) - CHA_DISPATCH_PENALTY; if (conf > 0) { seenByPair.add(edgeKey); @@ -1311,13 +1313,12 @@ function buildFileCallEdges( for (const t of chaTargets) { const edgeKey = `${caller.id}|${t.id}`; if (t.id !== caller.id && !seenCallEdges.has(edgeKey) && !ptsEdgeRows.has(edgeKey)) { - // Typed-receiver (interface/CHA) dispatch: use the same hardcoded 0.8 that - // runChaPostPass (helpers.ts) and runPostNativeCha (native-orchestrator.ts) - // use — file proximity is not meaningful for virtual dispatch confidence. + // Typed-receiver (interface/CHA) dispatch: use CHA_TYPED_DISPATCH_CONFIDENCE + // — file proximity is not meaningful for virtual dispatch confidence. // this/super dispatch keeps computeConfidence-based proximity scoring to - // match runPostNativeThisDispatch (native-orchestrator.ts line 906). + // match runPostNativeThisDispatch (native-orchestrator.ts). const conf = isTypedReceiverDispatch - ? 0.8 + ? CHA_TYPED_DISPATCH_CONFIDENCE : computeConfidence(relPath, t.file, null) - CHA_DISPATCH_PENALTY; if (conf > 0) { seenCallEdges.add(edgeKey); diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index 0fe4555cc..999781ad2 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -49,13 +49,14 @@ import type { PipelineContext } from '../context.js'; import { batchInsertEdges, batchInsertNodes, + CHA_DISPATCH_PENALTY, + CHA_TYPED_DISPATCH_CONFIDENCE, collectFiles as collectFilesUtil, fileHash, fileStat, readFileSafe, } from '../helpers.js'; import { NativeDbProxy } from '../native-db-proxy.js'; -import { CHA_DISPATCH_PENALTY } from './build-edges.js'; import { closeNativeDb } from './native-db-lifecycle.js'; // ── Native orchestrator types ────────────────────────────────────────── @@ -557,7 +558,7 @@ function runPostNativeCha( // Find existing call edges targeting qualified methods (e.g., 'IWorker.doWork'). // Include caller_file and method_file so affectedFiles can be populated for - // incremental role reclassification; confidence is hardcoded 0.8 matching runChaPostPass. + // incremental role reclassification; confidence uses CHA_TYPED_DISPATCH_CONFIDENCE matching runChaPostPass. // When scopeToChangedFiles is true, restrict to call sites in the changed files // (safe because no hierarchy or RTA evidence changed outside those files). let callToMethods: Array<{ source_id: number; method_name: string; caller_file: string | null }>; @@ -653,10 +654,7 @@ function runPostNativeCha( const key = `${source_id}|${methodNode.id}`; if (seen.has(key)) continue; seen.add(key); - // Use the same hardcoded 0.8 that runChaPostPass (helpers.ts) uses for - // DB-level CHA dispatch edges. This aligns the native orchestrator path - // with the WASM and hybrid paths, which both go through runChaPostPass. - const conf = 0.8; + const conf = CHA_TYPED_DISPATCH_CONFIDENCE; newEdges.push([source_id, methodNode.id, 'calls', conf, 0, 'cha']); newEdgeCount++; if (caller_file) affectedFiles.add(caller_file);