Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ Source is TypeScript in `src/`, compiled via `tsup`. The Rust native engine live

**Configuration:** All tunable behavioral constants live in `DEFAULTS` in `src/infrastructure/config.ts`, grouped by concern (`analysis`, `risk`, `search`, `display`, `community`, `structure`, `mcp`, `check`, `coChange`, `manifesto`). Users override via `.codegraphrc.json` — `mergeConfig` deep-merges recursively so partial overrides preserve sibling keys. Env vars override LLM settings (`CODEGRAPH_LLM_*`). When adding new behavioral constants, **always add them to `DEFAULTS`** and wire them through config — never introduce new hardcoded magic numbers in individual modules. Category F values (safety boundaries, standard formulas, platform concerns) are the only exception.

**Database:** SQLite at `.codegraph/graph.db` with tables: `nodes`, `edges`, `metadata`, `embeddings`, `function_complexity`
**Database:** SQLite at `.codegraph/graph.db` with tables: `nodes`, `edges`, `metadata`, `embeddings`, `function_complexity`, `ast_nodes` (stored `new`/`throw`/`await`/`string`/`regex` literals queryable via `codegraph ast`). Both engines must extract `ast_nodes` for every language they parse — per-language node-type maps live in `src/ast-analysis/rules/index.ts` (`AST_TYPE_MAPS`, `AST_STRING_CONFIGS`) and mirror the native `LangAstConfig` constants in `crates/codegraph-core/src/extractors/helpers.rs`. Adding a new language requires a matching entry in both.

## Test Structure

Expand Down
7 changes: 6 additions & 1 deletion crates/codegraph-core/src/extractors/javascript.rs
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,12 @@ fn walk_ast_nodes_depth(node: &Node, source: &[u8], ast_nodes: &mut Vec<AstNode>
let content = raw
.trim_start_matches(|c| c == '\'' || c == '"' || c == '`')
.trim_end_matches(|c| c == '\'' || c == '"' || c == '`');
if content.len() < 2 {
// Count Unicode code points, not UTF-8 bytes, so the filter matches
// helpers.rs `build_string_node` and the WASM visitor — a single non-
// ASCII glyph like `─` (3 bytes / 1 code point) must be treated as one
// character, otherwise we emit "excess" string nodes the WASM engine
// skips (see parity issue #1010).
if content.chars().count() < 2 {
// Still recurse children (template_string may have nested expressions)
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
Expand Down
12 changes: 11 additions & 1 deletion src/ast-analysis/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ import type {
} from '../types.js';
import { computeLOCMetrics, computeMaintainabilityIndex } from './metrics.js';
import {
AST_STRING_CONFIGS,
AST_TYPE_MAPS,
astStopRecurseKinds,
CFG_RULES,
COMPLEXITY_RULES,
DATAFLOW_RULES,
Expand Down Expand Up @@ -458,7 +460,15 @@ function setupAstVisitor(
for (const row of bulkNodeIdsByFile(db, relPath)) {
nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
}
return createAstStoreVisitor(astTypeMap, symbols.definitions || [], relPath, nodeIdMap);
const stringConfig = AST_STRING_CONFIGS.get(langId);
return createAstStoreVisitor(
astTypeMap,
symbols.definitions || [],
relPath,
nodeIdMap,
stringConfig,
astStopRecurseKinds(langId),
);
}

/** Set up complexity visitor if any definitions need WASM complexity analysis. */
Expand Down
9 changes: 8 additions & 1 deletion src/ast-analysis/rules/csharp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -200,4 +200,11 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({

// ─── AST Node Types ───────────────────────────────────────────────────────

export const astTypes: Record<string, string> | null = null;
export const astTypes: Record<string, string> | null = {
object_creation_expression: 'new',
throw_statement: 'throw',
throw_expression: 'throw',
await_expression: 'await',
string_literal: 'string',
verbatim_string_literal: 'string',
};
5 changes: 4 additions & 1 deletion src/ast-analysis/rules/go.ts
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,7 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({

// ─── AST Node Types ───────────────────────────────────────────────────────

export const astTypes: Record<string, string> | null = null;
export const astTypes: Record<string, string> | null = {
interpreted_string_literal: 'string',
raw_string_literal: 'string',
};
185 changes: 181 additions & 4 deletions src/ast-analysis/rules/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,187 @@ export const DATAFLOW_RULES: Map<string, DataflowRulesConfig> = new Map([
['ruby', ruby.dataflow],
]);

// ─── AST Type Maps ───────────────────────────────────────────────────────
// ─── AST Node Type Maps ──────────────────────────────────────────────────
//
// These mirror the per-language `LangAstConfig` constants in the native Rust
// engine (`crates/codegraph-core/src/extractors/helpers.rs`). WASM and native
// must agree on which tree-sitter node types to emit as `ast_nodes` rows.
// Languages without a dedicated rules/*.ts file have their maps inlined here.

const JS_AST_TYPES = javascript.astTypes as Record<string, string>;
const PY_AST_TYPES = python.astTypes as Record<string, string>;
const GO_AST_TYPES = go.astTypes as Record<string, string>;
const RS_AST_TYPES = rust.astTypes as Record<string, string>;
const JAVA_AST_TYPES = java.astTypes as Record<string, string>;
const CS_AST_TYPES = csharp.astTypes as Record<string, string>;
const RB_AST_TYPES = ruby.astTypes as Record<string, string>;
const PHP_AST_TYPES = php.astTypes as Record<string, string>;

const C_AST_TYPES: Record<string, string> = {
string_literal: 'string',
};

const CPP_AST_TYPES: Record<string, string> = {
new_expression: 'new',
throw_statement: 'throw',
co_await_expression: 'await',
string_literal: 'string',
raw_string_literal: 'string',
};

const KOTLIN_AST_TYPES: Record<string, string> = {
throw_expression: 'throw',
string_literal: 'string',
};

const SWIFT_AST_TYPES: Record<string, string> = {
throw_statement: 'throw',
await_expression: 'await',
string_literal: 'string',
};

const SCALA_AST_TYPES: Record<string, string> = {
object_creation_expression: 'new',
throw_expression: 'throw',
string_literal: 'string',
};

const BASH_AST_TYPES: Record<string, string> = {
string: 'string',
expansion: 'string',
};

const ELIXIR_AST_TYPES: Record<string, string> = {
string: 'string',
sigil: 'regex',
};

const LUA_AST_TYPES: Record<string, string> = {
string: 'string',
};

const DART_AST_TYPES: Record<string, string> = {
new_expression: 'new',
constructor_invocation: 'new',
throw_expression: 'throw',
await_expression: 'await',
string_literal: 'string',
};

const ZIG_AST_TYPES: Record<string, string> = {
string_literal: 'string',
};

const HASKELL_AST_TYPES: Record<string, string> = {
string: 'string',
char: 'string',
};

const OCAML_AST_TYPES: Record<string, string> = {
string: 'string',
};

export const AST_TYPE_MAPS: Map<string, Record<string, string>> = new Map([
['javascript', javascript.astTypes as Record<string, string>],
['typescript', javascript.astTypes as Record<string, string>],
['tsx', javascript.astTypes as Record<string, string>],
['javascript', JS_AST_TYPES],
['typescript', JS_AST_TYPES],
['tsx', JS_AST_TYPES],
['python', PY_AST_TYPES],
['go', GO_AST_TYPES],
['rust', RS_AST_TYPES],
['java', JAVA_AST_TYPES],
['csharp', CS_AST_TYPES],
['ruby', RB_AST_TYPES],
['php', PHP_AST_TYPES],
['c', C_AST_TYPES],
['cpp', CPP_AST_TYPES],
['kotlin', KOTLIN_AST_TYPES],
['swift', SWIFT_AST_TYPES],
['scala', SCALA_AST_TYPES],
['bash', BASH_AST_TYPES],
['elixir', ELIXIR_AST_TYPES],
['lua', LUA_AST_TYPES],
['dart', DART_AST_TYPES],
['zig', ZIG_AST_TYPES],
['haskell', HASKELL_AST_TYPES],
['ocaml', OCAML_AST_TYPES],
['ocaml-interface', OCAML_AST_TYPES],
]);

// ─── Per-language string-extraction config ───────────────────────────────
//
// Mirrors `quote_chars` + `string_prefixes` in the native `LangAstConfig`.
// Used by the AST-store visitor to strip quote characters and language-
// specific prefix sigils (Python `r"..."`, C# verbatim `@"..."`, Rust raw
// `r#"..."#`, etc.) when computing string content for the `name` column.

export interface AstStringConfig {
quoteChars: string;
stringPrefixes: string;
}

const JS_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"`', stringPrefixes: '' };
const PY_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: 'rbfuRBFU' };
const GO_STRING_CONFIG: AstStringConfig = { quoteChars: '"`', stringPrefixes: '' };
const RS_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
const JAVA_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
const CS_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
const RB_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: '' };
const PHP_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: '' };
const C_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
const CPP_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: 'LuUR' };
const KOTLIN_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
const SWIFT_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
const SCALA_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
const BASH_STRING_CONFIG: AstStringConfig = { quoteChars: '"\'', stringPrefixes: '' };
const ELIXIR_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
const LUA_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: '' };
const DART_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: '' };
const ZIG_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
const HASKELL_STRING_CONFIG: AstStringConfig = { quoteChars: '"\'', stringPrefixes: '' };
const OCAML_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };

export const AST_STRING_CONFIGS: Map<string, AstStringConfig> = new Map([
['javascript', JS_STRING_CONFIG],
['typescript', JS_STRING_CONFIG],
['tsx', JS_STRING_CONFIG],
['python', PY_STRING_CONFIG],
['go', GO_STRING_CONFIG],
['rust', RS_STRING_CONFIG],
['java', JAVA_STRING_CONFIG],
['csharp', CS_STRING_CONFIG],
['ruby', RB_STRING_CONFIG],
['php', PHP_STRING_CONFIG],
['c', C_STRING_CONFIG],
['cpp', CPP_STRING_CONFIG],
['kotlin', KOTLIN_STRING_CONFIG],
['swift', SWIFT_STRING_CONFIG],
['scala', SCALA_STRING_CONFIG],
['bash', BASH_STRING_CONFIG],
['elixir', ELIXIR_STRING_CONFIG],
['lua', LUA_STRING_CONFIG],
['dart', DART_STRING_CONFIG],
['zig', ZIG_STRING_CONFIG],
['haskell', HASKELL_STRING_CONFIG],
['ocaml', OCAML_STRING_CONFIG],
['ocaml-interface', OCAML_STRING_CONFIG],
]);

// ─── Per-language "stop-after-collect" kinds ─────────────────────────────
//
// Mirrors the subtle difference between the native JS walker
// (`extractors/javascript.rs::walk_ast_nodes_depth`) — which *returns* after
// collecting `new_expression` and `throw_statement` to avoid double-counting
// the wrapped expression — and the generic walker (`helpers.rs::walk_ast_
// nodes_with_config_depth`), which always recurses. For WASM/native parity
// the JS family must skip recursion on `new` and `throw`; every other
// language recurses normally.

const JS_STOP_RECURSE: ReadonlySet<string> = new Set(['new', 'throw']);
const EMPTY_STOP_RECURSE: ReadonlySet<string> = new Set();

export function astStopRecurseKinds(langId: string): ReadonlySet<string> {
if (langId === 'javascript' || langId === 'typescript' || langId === 'tsx') {
return JS_STOP_RECURSE;
}
return EMPTY_STOP_RECURSE;
}
6 changes: 5 additions & 1 deletion src/ast-analysis/rules/java.ts
Original file line number Diff line number Diff line change
Expand Up @@ -174,4 +174,8 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({

// ─── AST Node Types ───────────────────────────────────────────────────────

export const astTypes: Record<string, string> | null = null;
export const astTypes: Record<string, string> | null = {
object_creation_expression: 'new',
throw_statement: 'throw',
string_literal: 'string',
};
7 changes: 6 additions & 1 deletion src/ast-analysis/rules/php.ts
Original file line number Diff line number Diff line change
Expand Up @@ -218,4 +218,9 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({

// ─── AST Node Types ───────────────────────────────────────────────────────

export const astTypes: Record<string, string> | null = null;
export const astTypes: Record<string, string> | null = {
object_creation_expression: 'new',
throw_expression: 'throw',
string: 'string',
encapsed_string: 'string',
};
6 changes: 5 additions & 1 deletion src/ast-analysis/rules/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -195,4 +195,8 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({

// ─── AST Node Types ───────────────────────────────────────────────────────

export const astTypes: Record<string, string> | null = null;
export const astTypes: Record<string, string> | null = {
raise_statement: 'throw',
await: 'await',
string: 'string',
};
5 changes: 4 additions & 1 deletion src/ast-analysis/rules/ruby.ts
Original file line number Diff line number Diff line change
Expand Up @@ -203,4 +203,7 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({

// ─── AST Node Types ───────────────────────────────────────────────────────

export const astTypes: Record<string, string> | null = null;
export const astTypes: Record<string, string> | null = {
string: 'string',
regex: 'regex',
};
6 changes: 5 additions & 1 deletion src/ast-analysis/rules/rust.ts
Original file line number Diff line number Diff line change
Expand Up @@ -172,4 +172,8 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({

// ─── AST Node Types ───────────────────────────────────────────────────────

export const astTypes: Record<string, string> | null = null;
export const astTypes: Record<string, string> | null = {
await_expression: 'await',
string_literal: 'string',
raw_string_literal: 'string',
};
Loading
Loading