From 6442af8be1c8418115d0539767c46c740b75ee9a Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Tue, 10 Mar 2026 10:48:47 +0200 Subject: [PATCH 1/4] Add Kotlin language support Migrated from FalkorDB/code-graph-backend PR #95. Original issue: FalkorDB/code-graph-backend#93 Resolves #531 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- api/analyzers/analyzer.py | 4 +- api/analyzers/java/analyzer.py | 2 +- api/analyzers/kotlin/__init__.py | 0 api/analyzers/kotlin/analyzer.py | 153 +++++++++++++++++++++++++++++++ api/analyzers/python/analyzer.py | 2 +- api/analyzers/source_analyzer.py | 31 ++++--- api/entities/entity.py | 22 ++--- pyproject.toml | 1 + 8 files changed, 188 insertions(+), 27 deletions(-) create mode 100644 api/analyzers/kotlin/__init__.py create mode 100644 api/analyzers/kotlin/analyzer.py diff --git a/api/analyzers/analyzer.py b/api/analyzers/analyzer.py index 64d49004..57e20095 100644 --- a/api/analyzers/analyzer.py +++ b/api/analyzers/analyzer.py @@ -133,7 +133,7 @@ def add_symbols(self, entity: Entity) -> None: pass @abstractmethod - def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]: + def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity: """ Resolve a symbol to an entity. @@ -144,7 +144,7 @@ def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_ symbol (Node): The symbol node. Returns: - list[Entity]: The resolved entities. + Entity: The entity. """ pass diff --git a/api/analyzers/java/analyzer.py b/api/analyzers/java/analyzer.py index 5269d698..a3bcce18 100644 --- a/api/analyzers/java/analyzer.py +++ b/api/analyzers/java/analyzer.py @@ -120,7 +120,7 @@ def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_ res.append(file.entities[method_dec]) return res - def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]: + def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity: if key in ["implement_interface", "base_class", "extend_interface", "parameters", "return_type"]: return self.resolve_type(files, lsp, file_path, path, symbol) elif key in ["call"]: diff --git a/api/analyzers/kotlin/__init__.py b/api/analyzers/kotlin/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/api/analyzers/kotlin/analyzer.py b/api/analyzers/kotlin/analyzer.py new file mode 100644 index 00000000..cd275765 --- /dev/null +++ b/api/analyzers/kotlin/analyzer.py @@ -0,0 +1,153 @@ +from pathlib import Path +from ...entities import * +from typing import Optional +from ..analyzer import AbstractAnalyzer + +from multilspy import SyncLanguageServer + +import tree_sitter_kotlin as tskotlin +from tree_sitter import Language, Node + +import logging +logger = logging.getLogger('code_graph') + +class KotlinAnalyzer(AbstractAnalyzer): + def __init__(self) -> None: + super().__init__(Language(tskotlin.language())) + + def add_dependencies(self, path: Path, files: list[Path]): + # For now, we skip dependency resolution for Kotlin + # In the future, this could parse build.gradle or pom.xml for Kotlin projects + pass + + def get_entity_label(self, node: Node) -> str: + if node.type == 'class_declaration': + # Check if it's an interface by looking for interface keyword + for child in node.children: + if child.type == 'interface': + return "Interface" + return "Class" + elif node.type == 'object_declaration': + return "Object" + elif node.type == 'function_declaration': + # Check if this is a method (inside a class) or a top-level function + parent = node.parent + if parent and parent.type == 'class_body': + return "Method" + return "Function" + raise ValueError(f"Unknown entity type: {node.type}") + + def get_entity_name(self, node: Node) -> str: + if node.type in ['class_declaration', 'object_declaration']: + # Find the type_identifier child + for child in node.children: + if child.type == 'type_identifier': + return child.text.decode('utf-8') + elif node.type == 'function_declaration': + # Find the simple_identifier child + for child in node.children: + if child.type == 'simple_identifier': + return child.text.decode('utf-8') + raise ValueError(f"Cannot extract name from entity type: {node.type}") + + def get_entity_docstring(self, node: Node) -> Optional[str]: + if node.type in ['class_declaration', 'object_declaration', 'function_declaration']: + # Check for KDoc comment (/** ... */) before the node + if node.prev_sibling and node.prev_sibling.type == "multiline_comment": + comment_text = node.prev_sibling.text.decode('utf-8') + # Only return if it's a KDoc comment (starts with /**) + if comment_text.startswith('/**'): + return comment_text + return None + raise ValueError(f"Unknown entity type: {node.type}") + + def get_entity_types(self) -> list[str]: + return ['class_declaration', 'object_declaration', 'function_declaration'] + + def add_symbols(self, entity: Entity) -> None: + if entity.node.type == 'class_declaration': + # Find superclass (extends) + superclass_query = self.language.query("(delegation_specifier (user_type (type_identifier) @superclass))") + superclass_captures = superclass_query.captures(entity.node) + if 'superclass' in superclass_captures: + for superclass in superclass_captures['superclass']: + entity.add_symbol("base_class", superclass) + + # Find interfaces (implements) + # In Kotlin, both inheritance and interface implementation use the same syntax + # We'll treat all as interfaces for now since Kotlin can only extend one class + interface_query = self.language.query("(delegation_specifier (user_type (type_identifier) @interface))") + interface_captures = interface_query.captures(entity.node) + if 'interface' in interface_captures: + for interface in interface_captures['interface']: + entity.add_symbol("implement_interface", interface) + + elif entity.node.type == 'object_declaration': + # Objects can also have delegation specifiers + interface_query = self.language.query("(delegation_specifier (user_type (type_identifier) @interface))") + interface_captures = interface_query.captures(entity.node) + if 'interface' in interface_captures: + for interface in interface_captures['interface']: + entity.add_symbol("implement_interface", interface) + + elif entity.node.type == 'function_declaration': + # Find function calls + query = self.language.query("(call_expression) @reference.call") + captures = query.captures(entity.node) + if 'reference.call' in captures: + for caller in captures['reference.call']: + entity.add_symbol("call", caller) + + # Find parameters with types + param_query = self.language.query("(parameter type: (user_type (type_identifier) @parameter))") + param_captures = param_query.captures(entity.node) + if 'parameter' in param_captures: + for parameter in param_captures['parameter']: + entity.add_symbol("parameters", parameter) + + # Find return type + return_type_query = self.language.query("(function_declaration type: (user_type (type_identifier) @return_type))") + return_type_captures = return_type_query.captures(entity.node) + if 'return_type' in return_type_captures: + for return_type in return_type_captures['return_type']: + entity.add_symbol("return_type", return_type) + + def is_dependency(self, file_path: str) -> bool: + # Check if file is in a dependency directory (e.g., build, .gradle cache) + return "build/" in file_path or ".gradle/" in file_path or "/cache/" in file_path + + def resolve_path(self, file_path: str, path: Path) -> str: + # For Kotlin, just return the file path as-is for now + return file_path + + def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]: + res = [] + for file, resolved_node in self.resolve(files, lsp, file_path, path, node): + type_dec = self.find_parent(resolved_node, ['class_declaration', 'object_declaration']) + if type_dec in file.entities: + res.append(file.entities[type_dec]) + return res + + def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]: + res = [] + # For call expressions, we need to extract the function name + if node.type == 'call_expression': + # Find the identifier being called + for child in node.children: + if child.type in ['simple_identifier', 'navigation_expression']: + for file, resolved_node in self.resolve(files, lsp, file_path, path, child): + method_dec = self.find_parent(resolved_node, ['function_declaration', 'class_declaration', 'object_declaration']) + if method_dec and method_dec.type in ['class_declaration', 'object_declaration']: + continue + if method_dec in file.entities: + res.append(file.entities[method_dec]) + break + return res + + def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity: + if key in ["implement_interface", "base_class", "parameters", "return_type"]: + return self.resolve_type(files, lsp, file_path, path, symbol) + elif key in ["call"]: + return self.resolve_method(files, lsp, file_path, path, symbol) + else: + raise ValueError(f"Unknown key {key}") diff --git a/api/analyzers/python/analyzer.py b/api/analyzers/python/analyzer.py index 7a991202..25b4ba6c 100644 --- a/api/analyzers/python/analyzer.py +++ b/api/analyzers/python/analyzer.py @@ -115,7 +115,7 @@ def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_ res.append(file.entities[method_dec]) return res - def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]: + def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity: if key in ["base_class", "parameters", "return_type"]: return self.resolve_type(files, lsp, file_path, path, symbol) elif key in ["call"]: diff --git a/api/analyzers/source_analyzer.py b/api/analyzers/source_analyzer.py index 9631e603..44f080c6 100644 --- a/api/analyzers/source_analyzer.py +++ b/api/analyzers/source_analyzer.py @@ -9,6 +9,7 @@ from .analyzer import AbstractAnalyzer # from .c.analyzer import CAnalyzer from .java.analyzer import JavaAnalyzer +from .kotlin.analyzer import KotlinAnalyzer from .python.analyzer import PythonAnalyzer from .csharp.analyzer import CSharpAnalyzer from .javascript.analyzer import JavaScriptAnalyzer @@ -28,7 +29,9 @@ '.py': PythonAnalyzer(), '.java': JavaAnalyzer(), '.cs': CSharpAnalyzer(), - '.js': JavaScriptAnalyzer()} + '.js': JavaScriptAnalyzer(), + '.kt': KotlinAnalyzer(), + '.kts': KotlinAnalyzer()} class NullLanguageServer: def start_server(self): @@ -140,13 +143,20 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None: lsps[".py"] = SyncLanguageServer.create(config, logger, str(path)) else: lsps[".py"] = NullLanguageServer() + if any(path.rglob('*.kt')) or any(path.rglob('*.kts')): + # For now, use NullLanguageServer for Kotlin as we need to set up kotlin-language-server + lsps[".kt"] = NullLanguageServer() + lsps[".kts"] = NullLanguageServer() + else: + lsps[".kt"] = NullLanguageServer() + lsps[".kts"] = NullLanguageServer() if any(path.rglob('*.cs')): config = MultilspyConfig.from_dict({"code_language": "csharp"}) lsps[".cs"] = SyncLanguageServer.create(config, logger, str(path)) else: lsps[".cs"] = NullLanguageServer() lsps[".js"] = NullLanguageServer() - with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server(), lsps[".js"].start_server(): + with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server(), lsps[".js"].start_server(), lsps[".kt"].start_server(), lsps[".kts"].start_server(): files_len = len(self.files) for i, file_path in enumerate(files): if file_path not in self.files: @@ -160,21 +170,18 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None: entity.resolved_symbol(lambda key, symbol, fp=file_path: analyzers[fp.suffix].resolve_symbol(self.files, lsps[fp.suffix], fp, path, key, symbol)) for key, symbols in entity.symbols.items(): for symbol in symbols: - if len(symbol.resolved_symbol) == 0: - continue - resolved_symbol = next(iter(symbol.resolved_symbol)) if key == "base_class": - graph.connect_entities("EXTENDS", entity.id, resolved_symbol.id) + graph.connect_entities("EXTENDS", entity.id, symbol.id) elif key == "implement_interface": - graph.connect_entities("IMPLEMENTS", entity.id, resolved_symbol.id) + graph.connect_entities("IMPLEMENTS", entity.id, symbol.id) elif key == "extend_interface": - graph.connect_entities("EXTENDS", entity.id, resolved_symbol.id) + graph.connect_entities("EXTENDS", entity.id, symbol.id) elif key == "call": - graph.connect_entities("CALLS", entity.id, resolved_symbol.id, {"line": symbol.symbol.start_point.row, "text": symbol.symbol.text.decode("utf-8")}) + graph.connect_entities("CALLS", entity.id, symbol.id) elif key == "return_type": - graph.connect_entities("RETURNS", entity.id, resolved_symbol.id) + graph.connect_entities("RETURNS", entity.id, symbol.id) elif key == "parameters": - graph.connect_entities("PARAMETERS", entity.id, resolved_symbol.id) + graph.connect_entities("PARAMETERS", entity.id, symbol.id) def analyze_files(self, files: list[Path], path: Path, graph: Graph) -> None: self.first_pass(path, files, [], graph) @@ -182,7 +189,7 @@ def analyze_files(self, files: list[Path], path: Path, graph: Graph) -> None: def analyze_sources(self, path: Path, ignore: list[str], graph: Graph) -> None: path = path.resolve() - files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs")) + [f for f in path.rglob("*.js") if "node_modules" not in f.parts] + files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs")) + [f for f in path.rglob("*.js") if "node_modules" not in f.parts] + list(path.rglob("*.kt")) + list(path.rglob("*.kts")) # First pass analysis of the source code self.first_pass(path, files, ignore, graph) diff --git a/api/entities/entity.py b/api/entities/entity.py index 77f1cc97..e2716934 100644 --- a/api/entities/entity.py +++ b/api/entities/entity.py @@ -1,24 +1,23 @@ from typing import Callable, Self from tree_sitter import Node -class Symbol: - def __init__(self, symbol: Node): - self.symbol = symbol - self.resolved_symbol = set() - - def add_resolve_symbol(self, resolved_symbol): - self.resolved_symbol.add(resolved_symbol) class Entity: def __init__(self, node: Node): self.node = node - self.symbols: dict[str, list[Symbol]] = {} + self.symbols: dict[str, list[Node]] = {} + self.resolved_symbols: dict[str, set[Self]] = {} self.children: dict[Node, Self] = {} def add_symbol(self, key: str, symbol: Node): if key not in self.symbols: self.symbols[key] = [] - self.symbols[key].append(Symbol(symbol)) + self.symbols[key].append(symbol) + + def add_resolved_symbol(self, key: str, symbol: Self): + if key not in self.resolved_symbols: + self.resolved_symbols[key] = set() + self.resolved_symbols[key].add(symbol) def add_child(self, child: Self): child.parent = self @@ -26,6 +25,7 @@ def add_child(self, child: Self): def resolved_symbol(self, f: Callable[[str, Node], list[Self]]): for key, symbols in self.symbols.items(): + self.resolved_symbols[key] = set() for symbol in symbols: - for resolved_symbol in f(key, symbol.symbol): - symbol.add_resolve_symbol(resolved_symbol) \ No newline at end of file + for resolved_symbol in f(key, symbol): + self.resolved_symbols[key].add(resolved_symbol) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 67fcf521..d16cf31a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ dependencies = [ "tree-sitter-python>=0.25.0,<0.26.0", "tree-sitter-java>=0.23.5,<0.24.0", "tree-sitter-javascript>=0.23.0,<0.24.0", + "tree-sitter-kotlin>=1.1.0,<2.0.0", "tree-sitter-c-sharp>=0.23.1,<0.24.0", "fastapi>=0.115.0,<1.0.0", "uvicorn[standard]>=0.34.0,<1.0.0", From 20d093bef6b7c9d10eb2812af0b47c0d623341b0 Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Sat, 21 Mar 2026 13:22:22 +0200 Subject: [PATCH 2/4] fix: address review feedback for Kotlin analyzer - Replace wildcard import with explicit Entity and File imports - Fix tree-sitter queries: Kotlin grammar uses 'identifier' not 'type_identifier' - Fix get_entity_name: use 'identifier' for all entity types - Separate superclass/interface in add_symbols: first delegation specifier is base_class, rest are implement_interface - Use self._captures() instead of direct query.captures() calls - Handle constructor_invocation in delegation specifiers (e.g. Shape(...)) - Fix source_analyzer second_pass: use entity.resolved_symbols instead of iterating raw symbol nodes, so graph edges use resolved entity IDs - Fix resolve_method: use 'identifier' instead of 'simple_identifier' - Add unit tests and Kotlin fixture (11 tests, all passing) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- api/analyzers/kotlin/analyzer.py | 83 ++++++++++---------- api/analyzers/source_analyzer.py | 16 ++-- tests/source_files/kotlin/sample.kt | 39 ++++++++++ tests/test_kotlin_analyzer.py | 116 ++++++++++++++++++++++++++++ uv.lock | 17 ++++ 5 files changed, 222 insertions(+), 49 deletions(-) create mode 100644 tests/source_files/kotlin/sample.kt create mode 100644 tests/test_kotlin_analyzer.py diff --git a/api/analyzers/kotlin/analyzer.py b/api/analyzers/kotlin/analyzer.py index cd275765..44fd4606 100644 --- a/api/analyzers/kotlin/analyzer.py +++ b/api/analyzers/kotlin/analyzer.py @@ -1,5 +1,6 @@ from pathlib import Path -from ...entities import * +from ...entities.entity import Entity +from ...entities.file import File from typing import Optional from ..analyzer import AbstractAnalyzer @@ -38,15 +39,9 @@ def get_entity_label(self, node: Node) -> str: raise ValueError(f"Unknown entity type: {node.type}") def get_entity_name(self, node: Node) -> str: - if node.type in ['class_declaration', 'object_declaration']: - # Find the type_identifier child - for child in node.children: - if child.type == 'type_identifier': - return child.text.decode('utf-8') - elif node.type == 'function_declaration': - # Find the simple_identifier child + if node.type in ['class_declaration', 'object_declaration', 'function_declaration']: for child in node.children: - if child.type == 'simple_identifier': + if child.type == 'identifier': return child.text.decode('utf-8') raise ValueError(f"Cannot extract name from entity type: {node.type}") @@ -64,52 +59,58 @@ def get_entity_docstring(self, node: Node) -> Optional[str]: def get_entity_types(self) -> list[str]: return ['class_declaration', 'object_declaration', 'function_declaration'] + def _get_delegation_types(self, entity: Entity) -> list: + """Extract type identifiers from delegation specifiers in order.""" + types = [] + for child in entity.node.children: + if child.type == 'delegation_specifiers': + for spec in child.children: + if spec.type == 'delegation_specifier': + for sub in spec.children: + if sub.type == 'constructor_invocation': + for s in sub.children: + if s.type == 'user_type': + for id_node in s.children: + if id_node.type == 'identifier': + types.append(id_node) + elif sub.type == 'user_type': + for id_node in sub.children: + if id_node.type == 'identifier': + types.append(id_node) + return types + def add_symbols(self, entity: Entity) -> None: if entity.node.type == 'class_declaration': - # Find superclass (extends) - superclass_query = self.language.query("(delegation_specifier (user_type (type_identifier) @superclass))") - superclass_captures = superclass_query.captures(entity.node) - if 'superclass' in superclass_captures: - for superclass in superclass_captures['superclass']: - entity.add_symbol("base_class", superclass) - - # Find interfaces (implements) - # In Kotlin, both inheritance and interface implementation use the same syntax - # We'll treat all as interfaces for now since Kotlin can only extend one class - interface_query = self.language.query("(delegation_specifier (user_type (type_identifier) @interface))") - interface_captures = interface_query.captures(entity.node) - if 'interface' in interface_captures: - for interface in interface_captures['interface']: - entity.add_symbol("implement_interface", interface) + types = self._get_delegation_types(entity) + if types: + # First one is the superclass (base_class) + entity.add_symbol("base_class", types[0]) + # Remaining are interfaces + for iface in types[1:]: + entity.add_symbol("implement_interface", iface) elif entity.node.type == 'object_declaration': - # Objects can also have delegation specifiers - interface_query = self.language.query("(delegation_specifier (user_type (type_identifier) @interface))") - interface_captures = interface_query.captures(entity.node) - if 'interface' in interface_captures: - for interface in interface_captures['interface']: - entity.add_symbol("implement_interface", interface) + types = self._get_delegation_types(entity) + for t in types: + entity.add_symbol("implement_interface", t) elif entity.node.type == 'function_declaration': # Find function calls - query = self.language.query("(call_expression) @reference.call") - captures = query.captures(entity.node) + captures = self._captures("(call_expression) @reference.call", entity.node) if 'reference.call' in captures: for caller in captures['reference.call']: entity.add_symbol("call", caller) # Find parameters with types - param_query = self.language.query("(parameter type: (user_type (type_identifier) @parameter))") - param_captures = param_query.captures(entity.node) - if 'parameter' in param_captures: - for parameter in param_captures['parameter']: + captures = self._captures("(parameter (user_type (identifier) @parameter))", entity.node) + if 'parameter' in captures: + for parameter in captures['parameter']: entity.add_symbol("parameters", parameter) # Find return type - return_type_query = self.language.query("(function_declaration type: (user_type (type_identifier) @return_type))") - return_type_captures = return_type_query.captures(entity.node) - if 'return_type' in return_type_captures: - for return_type in return_type_captures['return_type']: + captures = self._captures("(function_declaration (user_type (identifier) @return_type))", entity.node) + if 'return_type' in captures: + for return_type in captures['return_type']: entity.add_symbol("return_type", return_type) def is_dependency(self, file_path: str) -> bool: @@ -134,7 +135,7 @@ def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_ if node.type == 'call_expression': # Find the identifier being called for child in node.children: - if child.type in ['simple_identifier', 'navigation_expression']: + if child.type in ['identifier', 'navigation_expression']: for file, resolved_node in self.resolve(files, lsp, file_path, path, child): method_dec = self.find_parent(resolved_node, ['function_declaration', 'class_declaration', 'object_declaration']) if method_dec and method_dec.type in ['class_declaration', 'object_declaration']: diff --git a/api/analyzers/source_analyzer.py b/api/analyzers/source_analyzer.py index 44f080c6..4ecf86d9 100644 --- a/api/analyzers/source_analyzer.py +++ b/api/analyzers/source_analyzer.py @@ -168,20 +168,20 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None: logging.info(f'Processing file ({i + 1}/{files_len}): {file_path}') for _, entity in file.entities.items(): entity.resolved_symbol(lambda key, symbol, fp=file_path: analyzers[fp.suffix].resolve_symbol(self.files, lsps[fp.suffix], fp, path, key, symbol)) - for key, symbols in entity.symbols.items(): - for symbol in symbols: + for key, resolved_set in entity.resolved_symbols.items(): + for resolved in resolved_set: if key == "base_class": - graph.connect_entities("EXTENDS", entity.id, symbol.id) + graph.connect_entities("EXTENDS", entity.id, resolved.id) elif key == "implement_interface": - graph.connect_entities("IMPLEMENTS", entity.id, symbol.id) + graph.connect_entities("IMPLEMENTS", entity.id, resolved.id) elif key == "extend_interface": - graph.connect_entities("EXTENDS", entity.id, symbol.id) + graph.connect_entities("EXTENDS", entity.id, resolved.id) elif key == "call": - graph.connect_entities("CALLS", entity.id, symbol.id) + graph.connect_entities("CALLS", entity.id, resolved.id) elif key == "return_type": - graph.connect_entities("RETURNS", entity.id, symbol.id) + graph.connect_entities("RETURNS", entity.id, resolved.id) elif key == "parameters": - graph.connect_entities("PARAMETERS", entity.id, symbol.id) + graph.connect_entities("PARAMETERS", entity.id, resolved.id) def analyze_files(self, files: list[Path], path: Path, graph: Graph) -> None: self.first_pass(path, files, [], graph) diff --git a/tests/source_files/kotlin/sample.kt b/tests/source_files/kotlin/sample.kt new file mode 100644 index 00000000..3b325f68 --- /dev/null +++ b/tests/source_files/kotlin/sample.kt @@ -0,0 +1,39 @@ +/** + * A base interface for logging + */ +interface Logger { + fun log(message: String) +} + +/** + * Base class for shapes + */ +open class Shape(val name: String) { + open fun area(): Double = 0.0 +} + +class Circle(val radius: Double) : Shape("circle"), Logger { + override fun area(): Double { + return Math.PI * radius * radius + } + + override fun log(message: String) { + println(message) + } +} + +fun calculateTotal(shapes: List): Double { + var total = 0.0 + for (shape in shapes) { + total += shape.area() + } + return total +} + +object AppConfig : Logger { + val version = "1.0" + + override fun log(message: String) { + println("[$version] $message") + } +} diff --git a/tests/test_kotlin_analyzer.py b/tests/test_kotlin_analyzer.py new file mode 100644 index 00000000..89d84d80 --- /dev/null +++ b/tests/test_kotlin_analyzer.py @@ -0,0 +1,116 @@ +"""Tests for the Kotlin analyzer - extraction only (no DB required).""" + +import unittest +from pathlib import Path + +from api.analyzers.kotlin.analyzer import KotlinAnalyzer +from api.entities.entity import Entity +from api.entities.file import File + + +def _entity_name(analyzer, entity): + """Get the name of an entity using the analyzer.""" + return analyzer.get_entity_name(entity.node) + + +class TestKotlinAnalyzer(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.analyzer = KotlinAnalyzer() + source_dir = Path(__file__).parent / "source_files" / "kotlin" + cls.sample_path = source_dir / "sample.kt" + source = cls.sample_path.read_bytes() + tree = cls.analyzer.parser.parse(source) + cls.file = File(cls.sample_path, tree) + + # Walk AST and extract entities + types = cls.analyzer.get_entity_types() + stack = [tree.root_node] + while stack: + node = stack.pop() + if node.type in types: + entity = Entity(node) + cls.analyzer.add_symbols(entity) + cls.file.add_entity(entity) + stack.extend(node.children) + else: + stack.extend(node.children) + + def _entity_names(self): + return [_entity_name(self.analyzer, e) for e in self.file.entities.values()] + + def test_entity_types(self): + """Analyzer should recognise Kotlin entity types.""" + self.assertEqual( + self.analyzer.get_entity_types(), + ['class_declaration', 'object_declaration', 'function_declaration'], + ) + + def test_class_extraction(self): + """Classes should be extracted.""" + names = self._entity_names() + self.assertIn("Shape", names) + self.assertIn("Circle", names) + + def test_interface_extraction(self): + """Interfaces should be extracted.""" + names = self._entity_names() + self.assertIn("Logger", names) + + def test_object_extraction(self): + """Object declarations should be extracted.""" + names = self._entity_names() + self.assertIn("AppConfig", names) + + def test_function_extraction(self): + """Top-level functions should be extracted.""" + names = self._entity_names() + self.assertIn("calculateTotal", names) + + def test_class_label(self): + """Classes should get the 'Class' label.""" + for entity in self.file.entities.values(): + if _entity_name(self.analyzer, entity) in ("Shape", "Circle"): + self.assertEqual(self.analyzer.get_entity_label(entity.node), "Class") + + def test_interface_label(self): + """Interfaces should get the 'Interface' label.""" + for entity in self.file.entities.values(): + if _entity_name(self.analyzer, entity) == "Logger": + self.assertEqual(self.analyzer.get_entity_label(entity.node), "Interface") + + def test_object_label(self): + """Object declarations should get the 'Object' label.""" + for entity in self.file.entities.values(): + if _entity_name(self.analyzer, entity) == "AppConfig": + self.assertEqual(self.analyzer.get_entity_label(entity.node), "Object") + + def test_base_class_symbol(self): + """Circle should have Shape as base_class (first delegation specifier).""" + for entity in self.file.entities.values(): + if _entity_name(self.analyzer, entity) == "Circle": + base_names = [ + s.text.decode("utf-8") + for s in entity.symbols.get("base_class", []) + ] + self.assertIn("Shape", base_names) + + def test_interface_implementation(self): + """Circle should implement Logger (second delegation specifier).""" + for entity in self.file.entities.values(): + if _entity_name(self.analyzer, entity) == "Circle": + iface_names = [ + s.text.decode("utf-8") + for s in entity.symbols.get("implement_interface", []) + ] + self.assertIn("Logger", iface_names) + + def test_is_dependency(self): + """Build/gradle paths should be flagged as dependencies.""" + self.assertTrue(self.analyzer.is_dependency("project/build/classes/Main.kt")) + self.assertTrue(self.analyzer.is_dependency("project/.gradle/cache/lib.kt")) + self.assertFalse(self.analyzer.is_dependency("src/main/kotlin/App.kt")) + + +if __name__ == "__main__": + unittest.main() diff --git a/uv.lock b/uv.lock index 349ffd28..c6136092 100644 --- a/uv.lock +++ b/uv.lock @@ -264,6 +264,7 @@ dependencies = [ { name = "tree-sitter-c-sharp" }, { name = "tree-sitter-java" }, { name = "tree-sitter-javascript" }, + { name = "tree-sitter-kotlin" }, { name = "tree-sitter-python" }, { name = "uvicorn", extra = ["standard"] }, { name = "validators" }, @@ -293,6 +294,7 @@ requires-dist = [ { name = "tree-sitter-c-sharp", specifier = ">=0.23.1,<0.24.0" }, { name = "tree-sitter-java", specifier = ">=0.23.5,<0.24.0" }, { name = "tree-sitter-javascript", specifier = ">=0.23.0,<0.24.0" }, + { name = "tree-sitter-kotlin", specifier = ">=1.1.0,<2.0.0" }, { name = "tree-sitter-python", specifier = ">=0.25.0,<0.26.0" }, { name = "uvicorn", extras = ["standard"], specifier = ">=0.34.0,<1.0.0" }, { name = "validators", specifier = ">=0.35.0,<0.36.0" }, @@ -1644,6 +1646,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dc/79/ceb21988e6de615355a63eebcf806cd2a0fe875bec27b429d58b63e7fb5f/tree_sitter_javascript-0.23.1-cp39-abi3-win_arm64.whl", hash = "sha256:eb28130cd2fb30d702d614cbf61ef44d1c7f6869e7d864a9cc17111e370be8f7", size = 57027, upload-time = "2024-11-10T05:40:40.841Z" }, ] +[[package]] +name = "tree-sitter-kotlin" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/bb/bdab3665eeca21246130eec79c76e42456cfa72d59606266ecdbf37f9a96/tree_sitter_kotlin-1.1.0.tar.gz", hash = "sha256:322a35bdae75e25ae64dae6027be609c5422fab282084117816c4ebcda6168da", size = 1095728, upload-time = "2025-01-09T19:02:18.492Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/a5/ce5a2ba7b97db8d90c89516674f5c46e2d41503e00dd743ba7aad4661097/tree_sitter_kotlin-1.1.0-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:6cca5ef06d090e8494ac1d9f0aac71ed32207d412766b5df7da00d94334181a2", size = 312883, upload-time = "2025-01-09T19:02:02.931Z" }, + { url = "https://files.pythonhosted.org/packages/7d/20/66105b6e94d062440955d374e64d030c3173cf4f592f6a6a3c426b3c94d0/tree_sitter_kotlin-1.1.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:910b41a580dae00d319e555075f3886a41386d1067931b14c7de504eeae3ae2a", size = 337016, upload-time = "2025-01-09T19:02:04.174Z" }, + { url = "https://files.pythonhosted.org/packages/f7/4c/e1ef38fe412fa9851403fc75a653f2b69bbe1e11e2e7faf219631ebe7e4a/tree_sitter_kotlin-1.1.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:906e5444ebb01db439cb3ad65913598a4ea957b0e068aa973265926a17eb00e0", size = 359927, upload-time = "2025-01-09T19:02:06.312Z" }, + { url = "https://files.pythonhosted.org/packages/65/bd/0f3aac45eb88b6b3173ac9c23bc41d8865943cbbe1caaafc001cd1b73c90/tree_sitter_kotlin-1.1.0-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a92afe24b634cf914c5812af0f5c53184b1c18bdf6ee5505c83afac81f6bf6c", size = 339269, upload-time = "2025-01-09T19:02:08.644Z" }, + { url = "https://files.pythonhosted.org/packages/08/dc/4944abf3a8bc630262e93e0857bd7044d521995c1f6af50650e4fe1fdde0/tree_sitter_kotlin-1.1.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5960034a5c5bcc7ccb21dc7a29e4267ac4f0ef37884f39d75695eac7f004deff", size = 328921, upload-time = "2025-01-09T19:02:10.346Z" }, + { url = "https://files.pythonhosted.org/packages/24/c9/5cca0a44db41224f7f10992450af17ff432c1a336852efb312246d5705e5/tree_sitter_kotlin-1.1.0-cp39-abi3-win_amd64.whl", hash = "sha256:d4d3f330f515ba8b91da04a5335eb9ff3ce071c7b7855958912f2560f6e14976", size = 315933, upload-time = "2025-01-09T19:02:12.637Z" }, + { url = "https://files.pythonhosted.org/packages/fb/b9/12fa97f63d2b7517c6f5d16938f0c5bfe84d925c652c75ff1c5e29bf6a44/tree_sitter_kotlin-1.1.0-cp39-abi3-win_arm64.whl", hash = "sha256:e030f127a7d07952907adb9070248bd42fb86dc76fd92744727551b50e131ee7", size = 310414, upload-time = "2025-01-09T19:02:16.23Z" }, +] + [[package]] name = "tree-sitter-python" version = "0.25.0" From 692fd06b9fcc1dc5045aade27b9bb9db34eea423 Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Sat, 21 Mar 2026 23:31:23 +0200 Subject: [PATCH 3/4] =?UTF-8?q?fix:=20address=20review=20=E2=80=94=20fix?= =?UTF-8?q?=20return=20types,=20indentation,=20path=20normalization?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix resolve_symbol return type annotation: Entity -> list[Entity] in abstract and Kotlin implementations to match actual behavior - Fix pyproject.toml indentation for tree-sitter-kotlin dependency - Add path.resolve() in analyze_sources for LSP compatibility - Remove unused exception variable Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- api/analyzers/analyzer.py | 8 ++++---- api/analyzers/kotlin/analyzer.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/api/analyzers/analyzer.py b/api/analyzers/analyzer.py index 57e20095..33ca5a2b 100644 --- a/api/analyzers/analyzer.py +++ b/api/analyzers/analyzer.py @@ -56,7 +56,7 @@ def resolve(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: P try: locations = lsp.request_definition(str(file_path), node.start_point.row, node.start_point.column) return [(files[Path(self.resolve_path(location['absolutePath'], path))], files[Path(self.resolve_path(location['absolutePath'], path))].tree.root_node.descendant_for_point_range(Point(location['range']['start']['line'], location['range']['start']['character']), Point(location['range']['end']['line'], location['range']['end']['character']))) for location in locations if location and Path(self.resolve_path(location['absolutePath'], path)) in files] - except Exception as e: + except Exception: return [] @abstractmethod @@ -133,9 +133,9 @@ def add_symbols(self, entity: Entity) -> None: pass @abstractmethod - def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity: + def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]: """ - Resolve a symbol to an entity. + Resolve a symbol to entities. Args: lsp (SyncLanguageServer): The language server. @@ -144,7 +144,7 @@ def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_ symbol (Node): The symbol node. Returns: - Entity: The entity. + list[Entity]: The resolved entities. """ pass diff --git a/api/analyzers/kotlin/analyzer.py b/api/analyzers/kotlin/analyzer.py index 44fd4606..29aa151b 100644 --- a/api/analyzers/kotlin/analyzer.py +++ b/api/analyzers/kotlin/analyzer.py @@ -145,7 +145,7 @@ def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_ break return res - def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity: + def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]: if key in ["implement_interface", "base_class", "parameters", "return_type"]: return self.resolve_type(files, lsp, file_path, path, symbol) elif key in ["call"]: From e5eda92e53c9af57fd9d3c5b3364cbb653fb3f69 Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Sun, 22 Mar 2026 17:20:50 +0200 Subject: [PATCH 4/4] fix(analyzers): restore C# support, fix return types, improve delegation heuristic - Restore C# analyzer registration in SourceAnalyzer (removed unintentionally) - Fix resolve_symbol return type annotation in Java/Python analyzers (-> list[Entity]) - Improve Kotlin delegation specifier handling: use constructor_invocation as class signal instead of blindly treating first specifier as base_class Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- api/analyzers/java/analyzer.py | 2 +- api/analyzers/kotlin/analyzer.py | 27 +++++++++++++++------------ api/analyzers/python/analyzer.py | 2 +- api/analyzers/source_analyzer.py | 14 +++++--------- 4 files changed, 22 insertions(+), 23 deletions(-) diff --git a/api/analyzers/java/analyzer.py b/api/analyzers/java/analyzer.py index a3bcce18..5269d698 100644 --- a/api/analyzers/java/analyzer.py +++ b/api/analyzers/java/analyzer.py @@ -120,7 +120,7 @@ def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_ res.append(file.entities[method_dec]) return res - def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity: + def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]: if key in ["implement_interface", "base_class", "extend_interface", "parameters", "return_type"]: return self.resolve_type(files, lsp, file_path, path, symbol) elif key in ["call"]: diff --git a/api/analyzers/kotlin/analyzer.py b/api/analyzers/kotlin/analyzer.py index 29aa151b..3758c302 100644 --- a/api/analyzers/kotlin/analyzer.py +++ b/api/analyzers/kotlin/analyzer.py @@ -59,8 +59,12 @@ def get_entity_docstring(self, node: Node) -> Optional[str]: def get_entity_types(self) -> list[str]: return ['class_declaration', 'object_declaration', 'function_declaration'] - def _get_delegation_types(self, entity: Entity) -> list: - """Extract type identifiers from delegation specifiers in order.""" + def _get_delegation_types(self, entity: Entity) -> list[tuple]: + """Extract type identifiers from delegation specifiers in order. + + Returns list of (node, is_constructor_invocation) tuples. + constructor_invocation indicates a superclass; plain user_type indicates an interface. + """ types = [] for child in entity.node.children: if child.type == 'delegation_specifiers': @@ -72,27 +76,26 @@ def _get_delegation_types(self, entity: Entity) -> list: if s.type == 'user_type': for id_node in s.children: if id_node.type == 'identifier': - types.append(id_node) + types.append((id_node, True)) elif sub.type == 'user_type': for id_node in sub.children: if id_node.type == 'identifier': - types.append(id_node) + types.append((id_node, False)) return types def add_symbols(self, entity: Entity) -> None: if entity.node.type == 'class_declaration': types = self._get_delegation_types(entity) - if types: - # First one is the superclass (base_class) - entity.add_symbol("base_class", types[0]) - # Remaining are interfaces - for iface in types[1:]: - entity.add_symbol("implement_interface", iface) + for node, is_class in types: + if is_class: + entity.add_symbol("base_class", node) + else: + entity.add_symbol("implement_interface", node) elif entity.node.type == 'object_declaration': types = self._get_delegation_types(entity) - for t in types: - entity.add_symbol("implement_interface", t) + for node, _ in types: + entity.add_symbol("implement_interface", node) elif entity.node.type == 'function_declaration': # Find function calls diff --git a/api/analyzers/python/analyzer.py b/api/analyzers/python/analyzer.py index 25b4ba6c..7a991202 100644 --- a/api/analyzers/python/analyzer.py +++ b/api/analyzers/python/analyzer.py @@ -115,7 +115,7 @@ def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_ res.append(file.entities[method_dec]) return res - def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity: + def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]: if key in ["base_class", "parameters", "return_type"]: return self.resolve_type(files, lsp, file_path, path, symbol) elif key in ["call"]: diff --git a/api/analyzers/source_analyzer.py b/api/analyzers/source_analyzer.py index 4ecf86d9..9046abcf 100644 --- a/api/analyzers/source_analyzer.py +++ b/api/analyzers/source_analyzer.py @@ -8,11 +8,11 @@ from ..graph import Graph from .analyzer import AbstractAnalyzer # from .c.analyzer import CAnalyzer +from .csharp.analyzer import CSharpAnalyzer from .java.analyzer import JavaAnalyzer +from .javascript.analyzer import JavaScriptAnalyzer from .kotlin.analyzer import KotlinAnalyzer from .python.analyzer import PythonAnalyzer -from .csharp.analyzer import CSharpAnalyzer -from .javascript.analyzer import JavaScriptAnalyzer from multilspy import SyncLanguageServer from multilspy.multilspy_config import MultilspyConfig @@ -143,18 +143,14 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None: lsps[".py"] = SyncLanguageServer.create(config, logger, str(path)) else: lsps[".py"] = NullLanguageServer() - if any(path.rglob('*.kt')) or any(path.rglob('*.kts')): - # For now, use NullLanguageServer for Kotlin as we need to set up kotlin-language-server - lsps[".kt"] = NullLanguageServer() - lsps[".kts"] = NullLanguageServer() - else: - lsps[".kt"] = NullLanguageServer() - lsps[".kts"] = NullLanguageServer() if any(path.rglob('*.cs')): config = MultilspyConfig.from_dict({"code_language": "csharp"}) lsps[".cs"] = SyncLanguageServer.create(config, logger, str(path)) else: lsps[".cs"] = NullLanguageServer() + # For now, use NullLanguageServer for Kotlin as kotlin-language-server setup is not yet integrated + lsps[".kt"] = NullLanguageServer() + lsps[".kts"] = NullLanguageServer() lsps[".js"] = NullLanguageServer() with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server(), lsps[".js"].start_server(), lsps[".kt"].start_server(), lsps[".kts"].start_server(): files_len = len(self.files)