diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index b08408a4..6cbcb333 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -3722,3 +3722,161 @@ class Svc { expect(decoratedNode?.name).toBe('method'); }); }); + +describe('Julia Extraction', () => { + describe('Language detection', () => { + it('should detect Julia files', () => { + expect(detectLanguage('main.jl')).toBe('julia'); + expect(detectLanguage('src/utils.jl')).toBe('julia'); + }); + + it('should report Julia as supported', () => { + expect(isLanguageSupported('julia')).toBe(true); + expect(getSupportedLanguages()).toContain('julia'); + }); + }); + + describe('Function extraction', () => { + it('should extract top-level function definitions', () => { + const code = ` +function greet(name::String) + println("Hello, \$name!") +end + +function add(a::Int, b::Int)::Int + return a + b +end +`; + const result = extractFromSource('utils.jl', code); + const fns = result.nodes.filter((n) => n.kind === 'function'); + expect(fns.find((f) => f.name === 'greet')).toBeDefined(); + expect(fns.find((f) => f.name === 'add')).toBeDefined(); + }); + + it('should extract function signature', () => { + const code = ` +function process(x::Int, y::Float64)::String + return string(x + y) +end +`; + const result = extractFromSource('process.jl', code); + const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'process'); + expect(fn).toBeDefined(); + expect(fn?.signature).toContain('x::Int'); + }); + + it('should extract zero-argument functions', () => { + const code = ` +function hello() + println("Hello!") +end +`; + const result = extractFromSource('hello.jl', code); + const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'hello'); + expect(fn).toBeDefined(); + }); + + it('should extract macro definitions', () => { + const code = ` +macro mytime(expr) + return :(@elapsed \$expr) +end +`; + const result = extractFromSource('macros.jl', code); + const macro = result.nodes.find((n) => n.kind === 'function' && n.name === 'mytime'); + expect(macro).toBeDefined(); + }); + }); + + describe('Struct extraction', () => { + it('should extract struct definitions', () => { + const code = ` +struct Point + x::Float64 + y::Float64 +end + +mutable struct Counter + value::Int +end +`; + const result = extractFromSource('types.jl', code); + const structs = result.nodes.filter((n) => n.kind === 'struct'); + expect(structs.find((s) => s.name === 'Point')).toBeDefined(); + expect(structs.find((s) => s.name === 'Counter')).toBeDefined(); + }); + + it('should extract parametric struct definitions', () => { + const code = ` +struct Vector2D{T<:Number} + x::T + y::T +end +`; + const result = extractFromSource('vector.jl', code); + const struct_ = result.nodes.find((n) => n.kind === 'struct'); + expect(struct_).toBeDefined(); + // Name may include type parameters (tree-sitter includes full type_head text) + expect(struct_?.name).toContain('Vector2D'); + }); + }); + + describe('Abstract type extraction', () => { + it('should extract abstract type definitions', () => { + const code = ` +abstract type Animal end +abstract type Shape end +`; + const result = extractFromSource('abstract.jl', code); + const abstracts = result.nodes.filter((n) => n.kind === 'interface'); + expect(abstracts.find((a) => a.name === 'Animal')).toBeDefined(); + expect(abstracts.find((a) => a.name === 'Shape')).toBeDefined(); + }); + }); + + describe('Module extraction', () => { + it('should extract module definitions', () => { + const code = ` +module MyModule + export greet + + function greet(name::String) + println("Hello, \$name!") + end +end +`; + const result = extractFromSource('mymodule.jl', code); + const fns = result.nodes.filter((n) => n.kind === 'function'); + expect(fns.find((f) => f.name === 'greet')).toBeDefined(); + }); + }); + + describe('Import extraction', () => { + it('should extract import statements', () => { + const code = ` +import LinearAlgebra +import Base.Math: sin, cos +using Statistics +using DataFrames: DataFrame, groupby +`; + const result = extractFromSource('imports.jl', code); + const imports = result.nodes.filter((n) => n.kind === 'import'); + expect(imports.length).toBeGreaterThan(0); + }); + }); + + describe('Call extraction', () => { + it('should extract function calls', () => { + const code = ` +function main() + x = sqrt(2.0) + println(x) + y = sin(x) + cos(x) +end +`; + const result = extractFromSource('main.jl', code); + const calls = result.unresolvedReferences.filter((r) => r.referenceKind === 'calls'); + expect(calls.length).toBeGreaterThan(0); + }); + }); +}); diff --git a/src/extraction/grammars.ts b/src/extraction/grammars.ts index d1540424..91b75ca7 100644 --- a/src/extraction/grammars.ts +++ b/src/extraction/grammars.ts @@ -35,6 +35,7 @@ const WASM_GRAMMAR_FILES: Record = { dart: 'tree-sitter-dart.wasm', pascal: 'tree-sitter-pascal.wasm', scala: 'tree-sitter-scala.wasm', + julia: 'tree-sitter-julia.wasm', }; /** @@ -78,6 +79,7 @@ export const EXTENSION_MAP: Record = { '.fmx': 'pascal', '.scala': 'scala', '.sc': 'scala', + '.jl': 'julia', }; /** @@ -126,7 +128,7 @@ export async function loadGrammarsForLanguages(languages: Language[]): Promise> = { typescript: typescriptExtractor, @@ -43,4 +44,5 @@ export const EXTRACTORS: Partial> = { dart: dartExtractor, pascal: pascalExtractor, scala: scalaExtractor, + julia: juliaExtractor, }; diff --git a/src/extraction/languages/julia.ts b/src/extraction/languages/julia.ts new file mode 100644 index 00000000..70327a69 --- /dev/null +++ b/src/extraction/languages/julia.ts @@ -0,0 +1,237 @@ +import type { Node as SyntaxNode } from 'web-tree-sitter'; +import { getNodeText } from '../tree-sitter-helpers'; +import type { LanguageExtractor } from '../tree-sitter-types'; + +/** + * Extract the name identifier from a Julia function signature node. + * + * The signature rule is one of: + * identifier → `function foo end` + * call_expression → `function foo(args...) end` + * typed_expression → `function foo(x)::T end` (return type annotation on sig) + * where_expression → `function foo(x::T) where T end` + */ +function extractFunctionName(signatureNode: SyntaxNode, source: string): string | null { + // Unwrap the tree-sitter 'signature' wrapper node + if (signatureNode.type === 'signature') { + const inner = signatureNode.namedChild(0); + if (inner) return extractFunctionName(inner, source); + return getNodeText(signatureNode, source); + } + if (signatureNode.type === 'identifier') { + return getNodeText(signatureNode, source); + } + if (signatureNode.type === 'call_expression') { + // The first named child is the function name (identifier or field_expression) + const first = signatureNode.namedChild(0); + if (first) return getNodeText(first, source); + } + if (signatureNode.type === 'typed_expression') { + // typed_expression: '::' — recurse on left side + const expr = signatureNode.namedChild(0); + if (expr) return extractFunctionName(expr, source); + } + if (signatureNode.type === 'where_expression') { + // where_expression: 'where' — recurse on left side + const expr = signatureNode.namedChild(0); + if (expr) return extractFunctionName(expr, source); + } + return getNodeText(signatureNode, source); +} + +/** + * Extract a readable signature (parameter list + optional return type) from + * the Julia function signature node. + */ +function extractFunctionSignature(signatureNode: SyntaxNode, source: string): string | undefined { + // Unwrap the tree-sitter 'signature' wrapper node + if (signatureNode.type === 'signature') { + const inner = signatureNode.namedChild(0); + if (!inner) return undefined; + return extractFunctionSignature(inner, source); + } + + // Unwrap where_expression first + let sig = signatureNode; + let whereClause = ''; + if (sig.type === 'where_expression') { + const whereType = sig.namedChild(1); + if (whereType) whereClause = ' where ' + getNodeText(whereType, source); + const left = sig.namedChild(0); + if (left) sig = left; + } + + // Unwrap return type annotation + let returnType = ''; + if (sig.type === 'typed_expression') { + const retNode = sig.namedChild(1); + if (retNode) returnType = '::' + getNodeText(retNode, source); + const left = sig.namedChild(0); + if (left) sig = left; + } + + // Extract argument list from call_expression + if (sig.type === 'call_expression') { + const argsNode = sig.namedChild(1); // argument_list + if (argsNode) { + return getNodeText(argsNode, source) + returnType + whereClause; + } + } + + return undefined; +} + +/** + * Extract the name from a Julia type_head node (used in struct/abstract definitions). + * type_head can be: identifier, call_expression (for parametric types), binary_expression + * (for subtype declarations like `Foo <: Bar`), etc. + */ +function extractTypeName(typeHeadNode: SyntaxNode, source: string): string | null { + if (typeHeadNode.type === 'identifier') { + return getNodeText(typeHeadNode, source); + } + if (typeHeadNode.type === 'call_expression' || typeHeadNode.type === 'parametrized_type_expression') { + // Parametric type: Foo{T, U} — first named child is the name + const first = typeHeadNode.namedChild(0); + if (first) return getNodeText(first, source); + } + if (typeHeadNode.type === 'binary_expression') { + // Subtype: `Foo <: Bar` — first named child is the name + const first = typeHeadNode.namedChild(0); + if (first) return extractTypeName(first, source); + } + if (typeHeadNode.type === 'where_expression') { + const expr = typeHeadNode.namedChild(0); + if (expr) return extractTypeName(expr, source); + } + // Fallback: use full text + return getNodeText(typeHeadNode, source); +} + +export const juliaExtractor: LanguageExtractor = { + functionTypes: ['function_definition', 'macro_definition'], + classTypes: [], + methodTypes: ['function_definition'], // methods are just multiple-dispatch functions + interfaceTypes: ['abstract_definition'], + structTypes: ['struct_definition'], + enumTypes: [], + typeAliasTypes: [], + importTypes: ['import_statement', 'using_statement'], + callTypes: ['call_expression'], + variableTypes: ['const_statement'], + interfaceKind: 'interface', + + nameField: 'name', // not used directly — overridden in getName below + bodyField: 'body', + paramsField: 'signature', + returnField: undefined, + + /** + * Extract the name from a Julia AST node. + * Falls back to the default field-based approach for nodes without custom handling. + */ + getName: (node, source) => { + if (node.type === 'function_definition' || node.type === 'macro_definition') { + // signature is always the second named child after the 'function'/'macro' keyword + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (!child) continue; + // The signature node is the first non-keyword named child + if (child.type !== 'block') { + return extractFunctionName(child, source); + } + } + return null; + } + + if (node.type === 'struct_definition') { + // Find type_head child + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (!child) continue; + if (child.type !== 'block') { + return extractTypeName(child, source); + } + } + return null; + } + + if (node.type === 'abstract_definition') { + // abstract type end — first named child is type_head + const typeHead = node.namedChild(0); + if (typeHead) return extractTypeName(typeHead, source); + return null; + } + + if (node.type === 'module_definition') { + const nameNode = node.childForFieldName('name'); + if (nameNode) return getNodeText(nameNode, source); + return null; + } + + return null; + }, + + getSignature: (node, source) => { + if (node.type === 'function_definition' || node.type === 'macro_definition') { + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (!child || child.type === 'block') continue; + return extractFunctionSignature(child, source); + } + } + return undefined; + }, + + isAsync: (_node) => false, // Julia has @async macro, not a keyword modifier + + /** + * Julia doesn't use `field('body', ...)` in the grammar; bodies are plain + * named `block` children. Find the first `block` child on the node. + */ + resolveBody: (node, _bodyField) => { + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (child?.type === 'block') return child; + } + return null; + }, + + extractImport: (node, source) => { + const importText = source.substring(node.startIndex, node.endIndex).trim(); + + // Extract the module name from `import Foo` / `import Foo.Bar` / `using Foo` + // The first named child is typically an identifier or import_path or selected_import + const firstChild = node.namedChild(0); + if (!firstChild) return { moduleName: importText, signature: importText }; + + // selected_import: `using Foo: bar, baz` → module is `Foo` + if (firstChild.type === 'selected_import') { + const pathNode = firstChild.namedChild(0); + if (pathNode) { + return { + moduleName: getNodeText(pathNode, source), + signature: importText, + }; + } + } + + // import_path: `.Foo` (relative import) + if (firstChild.type === 'import_path') { + return { moduleName: getNodeText(firstChild, source), signature: importText }; + } + + // import_alias: `Foo as F` + if (firstChild.type === 'import_alias') { + const pathNode = firstChild.namedChild(0); + if (pathNode) { + return { moduleName: getNodeText(pathNode, source), signature: importText }; + } + } + + // Scoped identifier: `Foo.Bar.Baz` — take first part + const text = getNodeText(firstChild, source); + const topModule = text.split('.')[0] ?? text; + return { moduleName: topModule, signature: importText }; + }, +}; diff --git a/src/extraction/tree-sitter-types.ts b/src/extraction/tree-sitter-types.ts index c3a6b94e..a059a53c 100644 --- a/src/extraction/tree-sitter-types.ts +++ b/src/extraction/tree-sitter-types.ts @@ -120,6 +120,13 @@ export interface LanguageExtractor { // --- Existing hooks --- + /** + * Override name extraction for languages where the name is not a direct field child. + * When provided, replaces the default `nameField`-based lookup in `extractName`. + * Return null to fall back to the default logic. + */ + getName?: (node: SyntaxNode, source: string) => string | null; + /** Extract signature from node */ getSignature?: (node: SyntaxNode, source: string) => string | undefined; /** Extract visibility from node */ diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts index 00830ab8..ce0484c9 100644 --- a/src/extraction/tree-sitter.ts +++ b/src/extraction/tree-sitter.ts @@ -35,6 +35,12 @@ export { generateNodeId } from './tree-sitter-helpers'; * Extract the name from a node based on language */ function extractName(node: SyntaxNode, source: string, extractor: LanguageExtractor): string { + // Language-specific name extraction hook + if (extractor.getName) { + const customName = extractor.getName(node, source); + if (customName !== null) return customName || ''; + } + // Try field name first const nameNode = getChildByField(node, extractor.nameField); if (nameNode) { @@ -752,7 +758,8 @@ export class TreeSitterExtractor { if (!this.extractor) return; // Skip forward declarations and type references (no body = not a definition) - const body = getChildByField(node, this.extractor.bodyField); + const body = this.extractor.resolveBody?.(node, this.extractor.bodyField) + ?? getChildByField(node, this.extractor.bodyField); if (!body) return; const name = extractName(node, this.source, this.extractor); diff --git a/src/extraction/wasm/tree-sitter-julia.wasm b/src/extraction/wasm/tree-sitter-julia.wasm new file mode 100644 index 00000000..904f26a7 Binary files /dev/null and b/src/extraction/wasm/tree-sitter-julia.wasm differ diff --git a/src/types.ts b/src/types.ts index 328f7432..bf7c0969 100644 --- a/src/types.ts +++ b/src/types.ts @@ -85,6 +85,7 @@ export const LANGUAGES = [ 'liquid', 'pascal', 'scala', + 'julia', 'unknown', ] as const; @@ -545,6 +546,8 @@ export const DEFAULT_CONFIG: CodeGraphConfig = { // Scala '**/*.scala', '**/*.sc', + // Julia + '**/*.jl', ], exclude: [ // Version control