-
Notifications
You must be signed in to change notification settings - Fork 9
fix(embed): resolve source files from DB root, not cwd #992
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d82461c
9f20bda
e48690c
f99f10e
080437a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -335,6 +335,126 @@ describe('absolute file paths in DB (#752)', () => { | |
| }); | ||
| }); | ||
|
|
||
| describe('embed resolves source files from DB root, not cwd (#983)', () => { | ||
| let repoDir: string, otherDir: string, repoDbPath: string; | ||
| let originalCwd: string; | ||
|
|
||
| beforeAll(() => { | ||
| // Repo that was built (files live here) | ||
| repoDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-embed983-repo-')); | ||
| // Unrelated directory we'll cd into when running embed | ||
| otherDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-embed983-other-')); | ||
|
|
||
| fs.writeFileSync( | ||
| path.join(repoDir, 'a.js'), | ||
| 'export function alpha() { return 1; }\nexport function beta() { return alpha(); }\n', | ||
| ); | ||
|
|
||
| const dbDir = path.join(repoDir, '.codegraph'); | ||
| fs.mkdirSync(dbDir, { recursive: true }); | ||
| repoDbPath = path.join(dbDir, 'graph.db'); | ||
|
|
||
| const db = new Database(repoDbPath); | ||
| db.pragma('journal_mode = WAL'); | ||
| initSchema(db); | ||
| // DB stores *relative* file paths (typical of WASM-engine builds) | ||
| insertNode(db, 'alpha', 'function', 'a.js', 1, 1); | ||
| insertNode(db, 'beta', 'function', 'a.js', 2, 2); | ||
| // Persist the repo root as the build pipeline would | ||
| db.prepare('INSERT OR REPLACE INTO build_meta (key, value) VALUES (?, ?)').run( | ||
| 'root_dir', | ||
| path.resolve(repoDir), | ||
| ); | ||
| db.close(); | ||
|
|
||
| originalCwd = process.cwd(); | ||
| }); | ||
|
|
||
| afterAll(() => { | ||
| try { | ||
| process.chdir(originalCwd); | ||
| } catch { | ||
| /* ignore */ | ||
| } | ||
| if (repoDir) fs.rmSync(repoDir, { recursive: true, force: true }); | ||
| if (otherDir) fs.rmSync(otherDir, { recursive: true, force: true }); | ||
| }); | ||
|
|
||
| test('uses root_dir metadata when embed is invoked from unrelated cwd', async () => { | ||
| EMBEDDED_TEXTS.length = 0; | ||
| process.chdir(otherDir); | ||
|
|
||
| // Simulate the CLI: positional dir defaults to cwd (here: otherDir), DB path is absolute | ||
| await buildEmbeddings(process.cwd(), 'minilm', repoDbPath); | ||
|
|
||
| expect(EMBEDDED_TEXTS.length).toBe(2); | ||
|
|
||
| const db = new Database(repoDbPath, { readonly: true }); | ||
| const count = db.prepare('SELECT COUNT(*) as c FROM embeddings').get().c; | ||
| db.close(); | ||
| expect(count).toBe(2); | ||
| }); | ||
|
|
||
| test('falls back to <dbPath>/../.. when root_dir metadata is missing', async () => { | ||
| // Build a fresh DB without root_dir metadata | ||
| const legacyRepo = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-embed983-legacy-')); | ||
| try { | ||
| fs.writeFileSync(path.join(legacyRepo, 'b.js'), 'export function gamma() { return 42; }\n'); | ||
| const legacyDbDir = path.join(legacyRepo, '.codegraph'); | ||
| fs.mkdirSync(legacyDbDir, { recursive: true }); | ||
| const legacyDb = path.join(legacyDbDir, 'graph.db'); | ||
|
|
||
| const db = new Database(legacyDb); | ||
| db.pragma('journal_mode = WAL'); | ||
| initSchema(db); | ||
| insertNode(db, 'gamma', 'function', 'b.js', 1, 1); | ||
| // Deliberately NOT writing root_dir — simulates DB built before #983 fix | ||
| db.close(); | ||
|
|
||
| EMBEDDED_TEXTS.length = 0; | ||
| process.chdir(otherDir); | ||
| await buildEmbeddings(process.cwd(), 'minilm', legacyDb); | ||
|
|
||
| expect(EMBEDDED_TEXTS.length).toBe(1); | ||
| } finally { | ||
| fs.rmSync(legacyRepo, { recursive: true, force: true }); | ||
| } | ||
| }); | ||
|
|
||
| test('exits non-zero (throws) when no source files can be read', async () => { | ||
| // Build a DB pointing at files that no longer exist | ||
| const ghostRepo = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-embed983-ghost-')); | ||
| try { | ||
| const ghostDbDir = path.join(ghostRepo, '.codegraph'); | ||
| fs.mkdirSync(ghostDbDir, { recursive: true }); | ||
| const ghostDb = path.join(ghostDbDir, 'graph.db'); | ||
|
|
||
| const db = new Database(ghostDb); | ||
| db.pragma('journal_mode = WAL'); | ||
| initSchema(db); | ||
| insertNode(db, 'missing', 'function', 'does-not-exist.js', 1, 1); | ||
| db.prepare('INSERT OR REPLACE INTO build_meta (key, value) VALUES (?, ?)').run( | ||
| 'root_dir', | ||
| path.resolve(ghostRepo), | ||
| ); | ||
| db.close(); | ||
|
|
||
| EMBEDDED_TEXTS.length = 0; | ||
| await expect(buildEmbeddings(ghostRepo, 'minilm', ghostDb)).rejects.toThrow( | ||
| /could not read any of the .* source files/, | ||
| ); | ||
|
|
||
| // No embeddings were persisted (they would have been overwritten via DELETE) | ||
| const readDb = new Database(ghostDb, { readonly: true }); | ||
| const count = readDb.prepare('SELECT COUNT(*) as c FROM embeddings').get().c; | ||
| readDb.close(); | ||
| expect(count).toBe(0); | ||
| } finally { | ||
| fs.rmSync(ghostRepo, { recursive: true, force: true }); | ||
| } | ||
| }); | ||
|
Comment on lines
+424
to
+455
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in f99f10e. Wrapped both |
||
| }); | ||
|
|
||
| describe('context window overflow detection', () => { | ||
| let bigDir: string, bigDbPath: string; | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
root_dirwritesThe Rust
finalize_buildpersistsroot_dirusingstd::fs::canonicalize(resolves symlinks), but the JSpersistBuildMetadatainfinalize.tsthen overwrites the same key withpath.resolve(ctx.rootDir)(which does not resolve symlinks) for any full build using the native engine. On systems where the project root is behind a symlink, these two values can differ — and the JS write wins for full builds, potentially re-introducing the wrong path just after Rust correctly wrote the canonical one.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed in e48690c. Good catch — for native full builds the JS
persistBuildMetadataruns afterfinalize_buildand would overwrite the canonical Rust value. Switched the JS write tofs.realpathSync(path.resolve(ctx.rootDir))so both engines agree on a symlink-resolved canonical path; kept a safe fallback topath.resolveifrealpaththrows (e.g. path removed mid-build).