diff --git a/api/analyzers/analyzer.py b/api/analyzers/analyzer.py index 64d49004..33ca5a2b 100644 --- a/api/analyzers/analyzer.py +++ b/api/analyzers/analyzer.py @@ -56,7 +56,7 @@ def resolve(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: P try: locations = lsp.request_definition(str(file_path), node.start_point.row, node.start_point.column) return [(files[Path(self.resolve_path(location['absolutePath'], path))], files[Path(self.resolve_path(location['absolutePath'], path))].tree.root_node.descendant_for_point_range(Point(location['range']['start']['line'], location['range']['start']['character']), Point(location['range']['end']['line'], location['range']['end']['character']))) for location in locations if location and Path(self.resolve_path(location['absolutePath'], path)) in files] - except Exception as e: + except Exception: return [] @abstractmethod @@ -135,7 +135,7 @@ def add_symbols(self, entity: Entity) -> None: @abstractmethod def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]: """ - Resolve a symbol to an entity. + Resolve a symbol to entities. Args: lsp (SyncLanguageServer): The language server. diff --git a/api/analyzers/kotlin/__init__.py b/api/analyzers/kotlin/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/api/analyzers/kotlin/analyzer.py b/api/analyzers/kotlin/analyzer.py new file mode 100644 index 00000000..3758c302 --- /dev/null +++ b/api/analyzers/kotlin/analyzer.py @@ -0,0 +1,157 @@ +from pathlib import Path +from ...entities.entity import Entity +from ...entities.file import File +from typing import Optional +from ..analyzer import AbstractAnalyzer + +from multilspy import SyncLanguageServer + +import tree_sitter_kotlin as tskotlin +from tree_sitter import Language, Node + +import logging +logger = logging.getLogger('code_graph') + +class KotlinAnalyzer(AbstractAnalyzer): + def __init__(self) -> None: + super().__init__(Language(tskotlin.language())) + + def add_dependencies(self, path: Path, files: list[Path]): + # For now, we skip dependency resolution for Kotlin + # In the future, this could parse build.gradle or pom.xml for Kotlin projects + pass + + def get_entity_label(self, node: Node) -> str: + if node.type == 'class_declaration': + # Check if it's an interface by looking for interface keyword + for child in node.children: + if child.type == 'interface': + return "Interface" + return "Class" + elif node.type == 'object_declaration': + return "Object" + elif node.type == 'function_declaration': + # Check if this is a method (inside a class) or a top-level function + parent = node.parent + if parent and parent.type == 'class_body': + return "Method" + return "Function" + raise ValueError(f"Unknown entity type: {node.type}") + + def get_entity_name(self, node: Node) -> str: + if node.type in ['class_declaration', 'object_declaration', 'function_declaration']: + for child in node.children: + if child.type == 'identifier': + return child.text.decode('utf-8') + raise ValueError(f"Cannot extract name from entity type: {node.type}") + + def get_entity_docstring(self, node: Node) -> Optional[str]: + if node.type in ['class_declaration', 'object_declaration', 'function_declaration']: + # Check for KDoc comment (/** ... */) before the node + if node.prev_sibling and node.prev_sibling.type == "multiline_comment": + comment_text = node.prev_sibling.text.decode('utf-8') + # Only return if it's a KDoc comment (starts with /**) + if comment_text.startswith('/**'): + return comment_text + return None + raise ValueError(f"Unknown entity type: {node.type}") + + def get_entity_types(self) -> list[str]: + return ['class_declaration', 'object_declaration', 'function_declaration'] + + def _get_delegation_types(self, entity: Entity) -> list[tuple]: + """Extract type identifiers from delegation specifiers in order. + + Returns list of (node, is_constructor_invocation) tuples. + constructor_invocation indicates a superclass; plain user_type indicates an interface. + """ + types = [] + for child in entity.node.children: + if child.type == 'delegation_specifiers': + for spec in child.children: + if spec.type == 'delegation_specifier': + for sub in spec.children: + if sub.type == 'constructor_invocation': + for s in sub.children: + if s.type == 'user_type': + for id_node in s.children: + if id_node.type == 'identifier': + types.append((id_node, True)) + elif sub.type == 'user_type': + for id_node in sub.children: + if id_node.type == 'identifier': + types.append((id_node, False)) + return types + + def add_symbols(self, entity: Entity) -> None: + if entity.node.type == 'class_declaration': + types = self._get_delegation_types(entity) + for node, is_class in types: + if is_class: + entity.add_symbol("base_class", node) + else: + entity.add_symbol("implement_interface", node) + + elif entity.node.type == 'object_declaration': + types = self._get_delegation_types(entity) + for node, _ in types: + entity.add_symbol("implement_interface", node) + + elif entity.node.type == 'function_declaration': + # Find function calls + captures = self._captures("(call_expression) @reference.call", entity.node) + if 'reference.call' in captures: + for caller in captures['reference.call']: + entity.add_symbol("call", caller) + + # Find parameters with types + captures = self._captures("(parameter (user_type (identifier) @parameter))", entity.node) + if 'parameter' in captures: + for parameter in captures['parameter']: + entity.add_symbol("parameters", parameter) + + # Find return type + captures = self._captures("(function_declaration (user_type (identifier) @return_type))", entity.node) + if 'return_type' in captures: + for return_type in captures['return_type']: + entity.add_symbol("return_type", return_type) + + def is_dependency(self, file_path: str) -> bool: + # Check if file is in a dependency directory (e.g., build, .gradle cache) + return "build/" in file_path or ".gradle/" in file_path or "/cache/" in file_path + + def resolve_path(self, file_path: str, path: Path) -> str: + # For Kotlin, just return the file path as-is for now + return file_path + + def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]: + res = [] + for file, resolved_node in self.resolve(files, lsp, file_path, path, node): + type_dec = self.find_parent(resolved_node, ['class_declaration', 'object_declaration']) + if type_dec in file.entities: + res.append(file.entities[type_dec]) + return res + + def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]: + res = [] + # For call expressions, we need to extract the function name + if node.type == 'call_expression': + # Find the identifier being called + for child in node.children: + if child.type in ['identifier', 'navigation_expression']: + for file, resolved_node in self.resolve(files, lsp, file_path, path, child): + method_dec = self.find_parent(resolved_node, ['function_declaration', 'class_declaration', 'object_declaration']) + if method_dec and method_dec.type in ['class_declaration', 'object_declaration']: + continue + if method_dec in file.entities: + res.append(file.entities[method_dec]) + break + return res + + def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]: + if key in ["implement_interface", "base_class", "parameters", "return_type"]: + return self.resolve_type(files, lsp, file_path, path, symbol) + elif key in ["call"]: + return self.resolve_method(files, lsp, file_path, path, symbol) + else: + raise ValueError(f"Unknown key {key}") diff --git a/api/analyzers/source_analyzer.py b/api/analyzers/source_analyzer.py index 9631e603..9046abcf 100644 --- a/api/analyzers/source_analyzer.py +++ b/api/analyzers/source_analyzer.py @@ -8,10 +8,11 @@ from ..graph import Graph from .analyzer import AbstractAnalyzer # from .c.analyzer import CAnalyzer -from .java.analyzer import JavaAnalyzer -from .python.analyzer import PythonAnalyzer from .csharp.analyzer import CSharpAnalyzer +from .java.analyzer import JavaAnalyzer from .javascript.analyzer import JavaScriptAnalyzer +from .kotlin.analyzer import KotlinAnalyzer +from .python.analyzer import PythonAnalyzer from multilspy import SyncLanguageServer from multilspy.multilspy_config import MultilspyConfig @@ -28,7 +29,9 @@ '.py': PythonAnalyzer(), '.java': JavaAnalyzer(), '.cs': CSharpAnalyzer(), - '.js': JavaScriptAnalyzer()} + '.js': JavaScriptAnalyzer(), + '.kt': KotlinAnalyzer(), + '.kts': KotlinAnalyzer()} class NullLanguageServer: def start_server(self): @@ -145,8 +148,11 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None: lsps[".cs"] = SyncLanguageServer.create(config, logger, str(path)) else: lsps[".cs"] = NullLanguageServer() + # For now, use NullLanguageServer for Kotlin as kotlin-language-server setup is not yet integrated + lsps[".kt"] = NullLanguageServer() + lsps[".kts"] = NullLanguageServer() lsps[".js"] = NullLanguageServer() - with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server(), lsps[".js"].start_server(): + with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server(), lsps[".js"].start_server(), lsps[".kt"].start_server(), lsps[".kts"].start_server(): files_len = len(self.files) for i, file_path in enumerate(files): if file_path not in self.files: @@ -158,23 +164,20 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None: logging.info(f'Processing file ({i + 1}/{files_len}): {file_path}') for _, entity in file.entities.items(): entity.resolved_symbol(lambda key, symbol, fp=file_path: analyzers[fp.suffix].resolve_symbol(self.files, lsps[fp.suffix], fp, path, key, symbol)) - for key, symbols in entity.symbols.items(): - for symbol in symbols: - if len(symbol.resolved_symbol) == 0: - continue - resolved_symbol = next(iter(symbol.resolved_symbol)) + for key, resolved_set in entity.resolved_symbols.items(): + for resolved in resolved_set: if key == "base_class": - graph.connect_entities("EXTENDS", entity.id, resolved_symbol.id) + graph.connect_entities("EXTENDS", entity.id, resolved.id) elif key == "implement_interface": - graph.connect_entities("IMPLEMENTS", entity.id, resolved_symbol.id) + graph.connect_entities("IMPLEMENTS", entity.id, resolved.id) elif key == "extend_interface": - graph.connect_entities("EXTENDS", entity.id, resolved_symbol.id) + graph.connect_entities("EXTENDS", entity.id, resolved.id) elif key == "call": - graph.connect_entities("CALLS", entity.id, resolved_symbol.id, {"line": symbol.symbol.start_point.row, "text": symbol.symbol.text.decode("utf-8")}) + graph.connect_entities("CALLS", entity.id, resolved.id) elif key == "return_type": - graph.connect_entities("RETURNS", entity.id, resolved_symbol.id) + graph.connect_entities("RETURNS", entity.id, resolved.id) elif key == "parameters": - graph.connect_entities("PARAMETERS", entity.id, resolved_symbol.id) + graph.connect_entities("PARAMETERS", entity.id, resolved.id) def analyze_files(self, files: list[Path], path: Path, graph: Graph) -> None: self.first_pass(path, files, [], graph) @@ -182,7 +185,7 @@ def analyze_files(self, files: list[Path], path: Path, graph: Graph) -> None: def analyze_sources(self, path: Path, ignore: list[str], graph: Graph) -> None: path = path.resolve() - files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs")) + [f for f in path.rglob("*.js") if "node_modules" not in f.parts] + files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs")) + [f for f in path.rglob("*.js") if "node_modules" not in f.parts] + list(path.rglob("*.kt")) + list(path.rglob("*.kts")) # First pass analysis of the source code self.first_pass(path, files, ignore, graph) diff --git a/api/entities/entity.py b/api/entities/entity.py index 77f1cc97..e2716934 100644 --- a/api/entities/entity.py +++ b/api/entities/entity.py @@ -1,24 +1,23 @@ from typing import Callable, Self from tree_sitter import Node -class Symbol: - def __init__(self, symbol: Node): - self.symbol = symbol - self.resolved_symbol = set() - - def add_resolve_symbol(self, resolved_symbol): - self.resolved_symbol.add(resolved_symbol) class Entity: def __init__(self, node: Node): self.node = node - self.symbols: dict[str, list[Symbol]] = {} + self.symbols: dict[str, list[Node]] = {} + self.resolved_symbols: dict[str, set[Self]] = {} self.children: dict[Node, Self] = {} def add_symbol(self, key: str, symbol: Node): if key not in self.symbols: self.symbols[key] = [] - self.symbols[key].append(Symbol(symbol)) + self.symbols[key].append(symbol) + + def add_resolved_symbol(self, key: str, symbol: Self): + if key not in self.resolved_symbols: + self.resolved_symbols[key] = set() + self.resolved_symbols[key].add(symbol) def add_child(self, child: Self): child.parent = self @@ -26,6 +25,7 @@ def add_child(self, child: Self): def resolved_symbol(self, f: Callable[[str, Node], list[Self]]): for key, symbols in self.symbols.items(): + self.resolved_symbols[key] = set() for symbol in symbols: - for resolved_symbol in f(key, symbol.symbol): - symbol.add_resolve_symbol(resolved_symbol) \ No newline at end of file + for resolved_symbol in f(key, symbol): + self.resolved_symbols[key].add(resolved_symbol) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 67fcf521..d16cf31a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ dependencies = [ "tree-sitter-python>=0.25.0,<0.26.0", "tree-sitter-java>=0.23.5,<0.24.0", "tree-sitter-javascript>=0.23.0,<0.24.0", + "tree-sitter-kotlin>=1.1.0,<2.0.0", "tree-sitter-c-sharp>=0.23.1,<0.24.0", "fastapi>=0.115.0,<1.0.0", "uvicorn[standard]>=0.34.0,<1.0.0", diff --git a/tests/source_files/kotlin/sample.kt b/tests/source_files/kotlin/sample.kt new file mode 100644 index 00000000..3b325f68 --- /dev/null +++ b/tests/source_files/kotlin/sample.kt @@ -0,0 +1,39 @@ +/** + * A base interface for logging + */ +interface Logger { + fun log(message: String) +} + +/** + * Base class for shapes + */ +open class Shape(val name: String) { + open fun area(): Double = 0.0 +} + +class Circle(val radius: Double) : Shape("circle"), Logger { + override fun area(): Double { + return Math.PI * radius * radius + } + + override fun log(message: String) { + println(message) + } +} + +fun calculateTotal(shapes: List): Double { + var total = 0.0 + for (shape in shapes) { + total += shape.area() + } + return total +} + +object AppConfig : Logger { + val version = "1.0" + + override fun log(message: String) { + println("[$version] $message") + } +} diff --git a/tests/test_kotlin_analyzer.py b/tests/test_kotlin_analyzer.py new file mode 100644 index 00000000..89d84d80 --- /dev/null +++ b/tests/test_kotlin_analyzer.py @@ -0,0 +1,116 @@ +"""Tests for the Kotlin analyzer - extraction only (no DB required).""" + +import unittest +from pathlib import Path + +from api.analyzers.kotlin.analyzer import KotlinAnalyzer +from api.entities.entity import Entity +from api.entities.file import File + + +def _entity_name(analyzer, entity): + """Get the name of an entity using the analyzer.""" + return analyzer.get_entity_name(entity.node) + + +class TestKotlinAnalyzer(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.analyzer = KotlinAnalyzer() + source_dir = Path(__file__).parent / "source_files" / "kotlin" + cls.sample_path = source_dir / "sample.kt" + source = cls.sample_path.read_bytes() + tree = cls.analyzer.parser.parse(source) + cls.file = File(cls.sample_path, tree) + + # Walk AST and extract entities + types = cls.analyzer.get_entity_types() + stack = [tree.root_node] + while stack: + node = stack.pop() + if node.type in types: + entity = Entity(node) + cls.analyzer.add_symbols(entity) + cls.file.add_entity(entity) + stack.extend(node.children) + else: + stack.extend(node.children) + + def _entity_names(self): + return [_entity_name(self.analyzer, e) for e in self.file.entities.values()] + + def test_entity_types(self): + """Analyzer should recognise Kotlin entity types.""" + self.assertEqual( + self.analyzer.get_entity_types(), + ['class_declaration', 'object_declaration', 'function_declaration'], + ) + + def test_class_extraction(self): + """Classes should be extracted.""" + names = self._entity_names() + self.assertIn("Shape", names) + self.assertIn("Circle", names) + + def test_interface_extraction(self): + """Interfaces should be extracted.""" + names = self._entity_names() + self.assertIn("Logger", names) + + def test_object_extraction(self): + """Object declarations should be extracted.""" + names = self._entity_names() + self.assertIn("AppConfig", names) + + def test_function_extraction(self): + """Top-level functions should be extracted.""" + names = self._entity_names() + self.assertIn("calculateTotal", names) + + def test_class_label(self): + """Classes should get the 'Class' label.""" + for entity in self.file.entities.values(): + if _entity_name(self.analyzer, entity) in ("Shape", "Circle"): + self.assertEqual(self.analyzer.get_entity_label(entity.node), "Class") + + def test_interface_label(self): + """Interfaces should get the 'Interface' label.""" + for entity in self.file.entities.values(): + if _entity_name(self.analyzer, entity) == "Logger": + self.assertEqual(self.analyzer.get_entity_label(entity.node), "Interface") + + def test_object_label(self): + """Object declarations should get the 'Object' label.""" + for entity in self.file.entities.values(): + if _entity_name(self.analyzer, entity) == "AppConfig": + self.assertEqual(self.analyzer.get_entity_label(entity.node), "Object") + + def test_base_class_symbol(self): + """Circle should have Shape as base_class (first delegation specifier).""" + for entity in self.file.entities.values(): + if _entity_name(self.analyzer, entity) == "Circle": + base_names = [ + s.text.decode("utf-8") + for s in entity.symbols.get("base_class", []) + ] + self.assertIn("Shape", base_names) + + def test_interface_implementation(self): + """Circle should implement Logger (second delegation specifier).""" + for entity in self.file.entities.values(): + if _entity_name(self.analyzer, entity) == "Circle": + iface_names = [ + s.text.decode("utf-8") + for s in entity.symbols.get("implement_interface", []) + ] + self.assertIn("Logger", iface_names) + + def test_is_dependency(self): + """Build/gradle paths should be flagged as dependencies.""" + self.assertTrue(self.analyzer.is_dependency("project/build/classes/Main.kt")) + self.assertTrue(self.analyzer.is_dependency("project/.gradle/cache/lib.kt")) + self.assertFalse(self.analyzer.is_dependency("src/main/kotlin/App.kt")) + + +if __name__ == "__main__": + unittest.main() diff --git a/uv.lock b/uv.lock index 349ffd28..c6136092 100644 --- a/uv.lock +++ b/uv.lock @@ -264,6 +264,7 @@ dependencies = [ { name = "tree-sitter-c-sharp" }, { name = "tree-sitter-java" }, { name = "tree-sitter-javascript" }, + { name = "tree-sitter-kotlin" }, { name = "tree-sitter-python" }, { name = "uvicorn", extra = ["standard"] }, { name = "validators" }, @@ -293,6 +294,7 @@ requires-dist = [ { name = "tree-sitter-c-sharp", specifier = ">=0.23.1,<0.24.0" }, { name = "tree-sitter-java", specifier = ">=0.23.5,<0.24.0" }, { name = "tree-sitter-javascript", specifier = ">=0.23.0,<0.24.0" }, + { name = "tree-sitter-kotlin", specifier = ">=1.1.0,<2.0.0" }, { name = "tree-sitter-python", specifier = ">=0.25.0,<0.26.0" }, { name = "uvicorn", extras = ["standard"], specifier = ">=0.34.0,<1.0.0" }, { name = "validators", specifier = ">=0.35.0,<0.36.0" }, @@ -1644,6 +1646,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dc/79/ceb21988e6de615355a63eebcf806cd2a0fe875bec27b429d58b63e7fb5f/tree_sitter_javascript-0.23.1-cp39-abi3-win_arm64.whl", hash = "sha256:eb28130cd2fb30d702d614cbf61ef44d1c7f6869e7d864a9cc17111e370be8f7", size = 57027, upload-time = "2024-11-10T05:40:40.841Z" }, ] +[[package]] +name = "tree-sitter-kotlin" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/bb/bdab3665eeca21246130eec79c76e42456cfa72d59606266ecdbf37f9a96/tree_sitter_kotlin-1.1.0.tar.gz", hash = "sha256:322a35bdae75e25ae64dae6027be609c5422fab282084117816c4ebcda6168da", size = 1095728, upload-time = "2025-01-09T19:02:18.492Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/a5/ce5a2ba7b97db8d90c89516674f5c46e2d41503e00dd743ba7aad4661097/tree_sitter_kotlin-1.1.0-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:6cca5ef06d090e8494ac1d9f0aac71ed32207d412766b5df7da00d94334181a2", size = 312883, upload-time = "2025-01-09T19:02:02.931Z" }, + { url = "https://files.pythonhosted.org/packages/7d/20/66105b6e94d062440955d374e64d030c3173cf4f592f6a6a3c426b3c94d0/tree_sitter_kotlin-1.1.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:910b41a580dae00d319e555075f3886a41386d1067931b14c7de504eeae3ae2a", size = 337016, upload-time = "2025-01-09T19:02:04.174Z" }, + { url = "https://files.pythonhosted.org/packages/f7/4c/e1ef38fe412fa9851403fc75a653f2b69bbe1e11e2e7faf219631ebe7e4a/tree_sitter_kotlin-1.1.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:906e5444ebb01db439cb3ad65913598a4ea957b0e068aa973265926a17eb00e0", size = 359927, upload-time = "2025-01-09T19:02:06.312Z" }, + { url = "https://files.pythonhosted.org/packages/65/bd/0f3aac45eb88b6b3173ac9c23bc41d8865943cbbe1caaafc001cd1b73c90/tree_sitter_kotlin-1.1.0-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a92afe24b634cf914c5812af0f5c53184b1c18bdf6ee5505c83afac81f6bf6c", size = 339269, upload-time = "2025-01-09T19:02:08.644Z" }, + { url = "https://files.pythonhosted.org/packages/08/dc/4944abf3a8bc630262e93e0857bd7044d521995c1f6af50650e4fe1fdde0/tree_sitter_kotlin-1.1.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5960034a5c5bcc7ccb21dc7a29e4267ac4f0ef37884f39d75695eac7f004deff", size = 328921, upload-time = "2025-01-09T19:02:10.346Z" }, + { url = "https://files.pythonhosted.org/packages/24/c9/5cca0a44db41224f7f10992450af17ff432c1a336852efb312246d5705e5/tree_sitter_kotlin-1.1.0-cp39-abi3-win_amd64.whl", hash = "sha256:d4d3f330f515ba8b91da04a5335eb9ff3ce071c7b7855958912f2560f6e14976", size = 315933, upload-time = "2025-01-09T19:02:12.637Z" }, + { url = "https://files.pythonhosted.org/packages/fb/b9/12fa97f63d2b7517c6f5d16938f0c5bfe84d925c652c75ff1c5e29bf6a44/tree_sitter_kotlin-1.1.0-cp39-abi3-win_arm64.whl", hash = "sha256:e030f127a7d07952907adb9070248bd42fb86dc76fd92744727551b50e131ee7", size = 310414, upload-time = "2025-01-09T19:02:16.23Z" }, +] + [[package]] name = "tree-sitter-python" version = "0.25.0"