Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 36 additions & 5 deletions src/services/code-index/__tests__/orchestrator.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,9 @@ describe("CodeIndexOrchestrator - error path cleanup gating", () => {
expect(lastCall[0]).toBe("Error")
})

it("should call clearCollection() and clear cache when an error occurs after initialize() succeeds (indexing started)", async () => {
// Arrange: initialize succeeds; fail soon after to enter error path with indexingStarted=true
vectorStore.initialize.mockResolvedValue(false) // existing collection
it("should preserve existing data when an error occurs on an existing collection (collectionCreated=false)", async () => {
// Arrange: initialize succeeds with existing collection; fail soon after
vectorStore.initialize.mockResolvedValue(false) // existing collection, NOT newly created
vectorStore.hasIndexedData.mockResolvedValue(false) // force full scan path
vectorStore.markIndexingIncomplete.mockRejectedValue(new Error("mark incomplete failure"))

Expand All @@ -149,9 +149,40 @@ describe("CodeIndexOrchestrator - error path cleanup gating", () => {
// Act
await orchestrator.startIndexing()

// Assert: cleanup gated behind indexingStarted should have happened
// Assert: should NOT clear existing collection data on error (preserves user's index)
expect(vectorStore.clearCollection).not.toHaveBeenCalled()
// Should flush (persist) cache rather than clearing it
expect(cacheManager.flush).toHaveBeenCalledTimes(1)
expect(cacheManager.clearCacheFile).not.toHaveBeenCalled()

// Error state should be set
expect(stateManager.setSystemState).toHaveBeenCalled()
const lastCall = stateManager.setSystemState.mock.calls[stateManager.setSystemState.mock.calls.length - 1]
expect(lastCall[0]).toBe("Error")
})

it("should clear collection and cache when an error occurs on a newly created collection (collectionCreated=true)", async () => {
// Arrange: initialize creates a new collection; fail soon after
vectorStore.initialize.mockResolvedValue(true) // newly created collection
vectorStore.hasIndexedData.mockResolvedValue(false) // new collection has no data
vectorStore.markIndexingIncomplete.mockRejectedValue(new Error("mark incomplete failure"))

const orchestrator = new CodeIndexOrchestrator(
configManager,
stateManager,
workspacePath,
cacheManager,
vectorStore,
scanner,
fileWatcher,
)

// Act
await orchestrator.startIndexing()

// Assert: should clear data since the collection was just created (no pre-existing data to preserve)
expect(vectorStore.clearCollection).toHaveBeenCalledTimes(1)
expect(cacheManager.clearCacheFile).toHaveBeenCalledTimes(1)
expect(cacheManager.clearCacheFile).toHaveBeenCalledTimes(2) // once in try block (collectionCreated), once in catch

// Error state should be set
expect(stateManager.setSystemState).toHaveBeenCalled()
Expand Down
54 changes: 34 additions & 20 deletions src/services/code-index/orchestrator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,12 @@ export class CodeIndexOrchestrator {
// Track whether we successfully connected to Qdrant and started indexing
// This helps us decide whether to preserve cache on error
let indexingStarted = false
// Track whether a new collection was created (vs reusing existing one)
// This helps us decide whether to clear data on error
let collectionCreated = false

try {
const collectionCreated = await this.vectorStore.initialize()
collectionCreated = await this.vectorStore.initialize()

// Successfully connected to Qdrant
indexingStarted = true
Expand Down Expand Up @@ -316,27 +319,38 @@ export class CodeIndexOrchestrator {
stack: error instanceof Error ? error.stack : undefined,
location: "startIndexing",
})
if (indexingStarted) {
try {
await this.vectorStore.clearCollection()
} catch (cleanupError) {
console.error("[CodeIndexOrchestrator] Failed to clean up after error:", cleanupError)
TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, {
error: cleanupError instanceof Error ? cleanupError.message : String(cleanupError),
stack: cleanupError instanceof Error ? cleanupError.stack : undefined,
location: "startIndexing.cleanup",
})
}
}

// Only clear cache if indexing had started (Qdrant connection succeeded)
// If we never connected to Qdrant, preserve cache for incremental scan when it comes back
// Determine if this is a connection error (never reached Qdrant) vs a mid-indexing failure.
// Only wipe collection + cache when indexing actually started AND data was written,
// since clearing on transient errors destroys a perfectly valid existing index.
if (indexingStarted) {
// Indexing started but failed mid-way - clear cache to avoid cache-Qdrant mismatch
await this.cacheManager.clearCacheFile()
console.log(
"[CodeIndexOrchestrator] Indexing failed after starting. Clearing cache to avoid inconsistency.",
)
// Indexing started — but only clear data if a new collection was just created
// (meaning there's no pre-existing data to preserve). If we were doing an
// incremental scan on an existing collection, preserve the data so the user
// doesn't lose their entire index due to a transient embedding API error.
if (collectionCreated) {
try {
await this.vectorStore.clearCollection()
} catch (cleanupError) {
console.error("[CodeIndexOrchestrator] Failed to clean up after error:", cleanupError)
TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, {
error: cleanupError instanceof Error ? cleanupError.message : String(cleanupError),
stack: cleanupError instanceof Error ? cleanupError.stack : undefined,
location: "startIndexing.cleanup",
})
}
await this.cacheManager.clearCacheFile()
console.log(
"[CodeIndexOrchestrator] Indexing failed on a newly created collection. Clearing cache to avoid inconsistency.",
)
} else {
// Pre-existing collection — flush (persist) the cache rather than clearing it
// so the next startup can resume incrementally from where we left off.
await this.cacheManager.flush()
console.log(
"[CodeIndexOrchestrator] Indexing failed on existing collection. Preserving existing data and cache for recovery.",
)
}
} else {
// Never connected to Qdrant - preserve cache for future incremental scan
console.log(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -647,23 +647,21 @@ describe("QdrantVectorStore", () => {
expect(mockQdrantClientInstance.createPayloadIndex).toHaveBeenCalledTimes(6)
;(console.warn as any).mockRestore() // Restore console.warn
})
it("should log warning for non-404 errors but still create collection", async () => {
it("should throw on non-404 errors instead of creating a new collection", async () => {
const genericError = new Error("Generic Qdrant Error")
mockQdrantClientInstance.getCollection.mockRejectedValue(genericError)
vitest.spyOn(console, "warn").mockImplementation(() => {}) // Suppress console.warn
vitest.spyOn(console, "error").mockImplementation(() => {}) // Suppress console.error

const result = await vectorStore.initialize()
// Non-404 errors should propagate up as a connection failure, NOT create a new collection
await expect(vectorStore.initialize()).rejects.toThrow(
/Failed to connect to Qdrant vector database|vectorStore\.qdrantConnectionFailed/,
)

expect(result).toBe(true) // Collection was created
expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledTimes(1)
expect(mockQdrantClientInstance.createCollection).toHaveBeenCalledTimes(1)
// Should NOT have tried to create a collection - the error is a connectivity issue, not a missing collection
expect(mockQdrantClientInstance.createCollection).not.toHaveBeenCalled()
expect(mockQdrantClientInstance.deleteCollection).not.toHaveBeenCalled()
expect(mockQdrantClientInstance.createPayloadIndex).toHaveBeenCalledTimes(6)
expect(console.warn).toHaveBeenCalledWith(
expect.stringContaining(`Warning during getCollectionInfo for "${expectedCollectionName}"`),
genericError.message,
)
;(console.warn as any).mockRestore()
;(console.error as any).mockRestore()
})
it("should re-throw error from createCollection when no collection initially exists", async () => {
mockQdrantClientInstance.getCollection.mockRejectedValue({
Expand Down Expand Up @@ -1007,20 +1005,16 @@ describe("QdrantVectorStore", () => {
expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledWith(expectedCollectionName)
})

it("should return false and log warning for non-404 errors", async () => {
it("should throw for non-404 errors instead of returning false", async () => {
const genericError = new Error("Network error")
mockQdrantClientInstance.getCollection.mockRejectedValue(genericError)
vitest.spyOn(console, "warn").mockImplementation(() => {})
vitest.spyOn(console, "error").mockImplementation(() => {})

const result = await vectorStore.collectionExists()
// Non-404 errors should propagate so callers know Qdrant is unreachable
await expect(vectorStore.collectionExists()).rejects.toThrow("Network error")

expect(result).toBe(false)
expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledTimes(1)
expect(console.warn).toHaveBeenCalledWith(
expect.stringContaining(`Warning during getCollectionInfo for "${expectedCollectionName}"`),
genericError.message,
)
;(console.warn as any).mockRestore()
;(console.error as any).mockRestore()
})
describe("deleteCollection", () => {
it("should delete collection when it exists", async () => {
Expand Down
115 changes: 76 additions & 39 deletions src/services/code-index/vector-store/qdrant-client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,18 +127,56 @@ export class QdrantVectorStore implements IVectorStore {
}
}

/**
* Checks if an error from the Qdrant client indicates a "not found" (404) response.
* Qdrant client errors may have a `response.status` property or a `status` property.
*/
private isNotFoundError(error: unknown): boolean {
if (error && typeof error === "object") {
const err = error as Record<string, any>
// Check for response.status (common Qdrant client error shape)
if (err.response?.status === 404) {
return true
}
// Check for top-level status property
if (err.status === 404) {
return true
}
// Check for error message patterns indicating not found
if (err.message && typeof err.message === "string") {
const msg = err.message.toLowerCase()
if (msg.includes("not found") || msg.includes("doesn't exist") || msg.includes("does not exist")) {
return true
}
}
}
return false
}

/**
* Retrieves collection info from Qdrant.
* Returns null ONLY when the collection does not exist (404).
* Throws for other errors (connection failures, timeouts, etc.) so callers
* can distinguish "collection missing" from "Qdrant unreachable".
*/
private async getCollectionInfo(): Promise<Schemas["CollectionInfo"] | null> {
try {
const collectionInfo = await this.client.getCollection(this.collectionName)
return collectionInfo
} catch (error: unknown) {
if (error instanceof Error) {
console.warn(
`[QdrantVectorStore] Warning during getCollectionInfo for "${this.collectionName}". Collection may not exist or another error occurred:`,
error.message,
if (this.isNotFoundError(error)) {
console.log(
`[QdrantVectorStore] Collection "${this.collectionName}" not found (404). Will create a new one.`,
)
return null
}
return null
// For non-404 errors (connection failures, timeouts, etc.), propagate the error
// so callers don't mistakenly assume the collection doesn't exist.
const message = error instanceof Error ? error.message : String(error)
console.error(
`[QdrantVectorStore] Error retrieving collection "${this.collectionName}" (not a 404): ${message}`,
)
throw error
}
}

Expand Down Expand Up @@ -572,52 +610,51 @@ export class QdrantVectorStore implements IVectorStore {
}

/**
* Checks if the collection exists
* @returns Promise resolving to boolean indicating if the collection exists
* Checks if the collection exists.
* Returns false for 404 (not found). Throws for connection/other errors.
*/
async collectionExists(): Promise<boolean> {
const collectionInfo = await this.getCollectionInfo()
return collectionInfo !== null
}

/**
* Checks if the collection exists and has indexed points
* @returns Promise resolving to boolean indicating if the collection exists and has points
* Checks if the collection exists and has indexed points.
* Returns false when the collection doesn't exist (404).
* Throws for connection errors so callers can distinguish
* "no data" from "can't reach Qdrant".
*/
async hasIndexedData(): Promise<boolean> {
try {
const collectionInfo = await this.getCollectionInfo()
if (!collectionInfo) {
return false
}
// Check if the collection has any points indexed
const pointsCount = collectionInfo.points_count ?? 0
if (pointsCount === 0) {
return false
}

// Check if the indexing completion marker exists
// Use a deterministic UUID generated from a constant string
const metadataId = uuidv5("__indexing_metadata__", QDRANT_CODE_BLOCK_NAMESPACE)
const metadataPoints = await this.client.retrieve(this.collectionName, {
ids: [metadataId],
})
// getCollectionInfo() now throws on non-404 errors, so connection
// failures will propagate to the caller instead of returning false.
const collectionInfo = await this.getCollectionInfo()
if (!collectionInfo) {
return false
}
// Check if the collection has any points indexed
const pointsCount = collectionInfo.points_count ?? 0
if (pointsCount === 0) {
return false
}

// If marker exists, use it to determine completion status
if (metadataPoints.length > 0) {
return metadataPoints[0].payload?.indexing_complete === true
}
// Check if the indexing completion marker exists
// Use a deterministic UUID generated from a constant string
const metadataId = uuidv5("__indexing_metadata__", QDRANT_CODE_BLOCK_NAMESPACE)
const metadataPoints = await this.client.retrieve(this.collectionName, {
ids: [metadataId],
})

// Backward compatibility: No marker exists (old index or pre-marker version)
// Fall back to old logic - assume complete if collection has points
console.log(
"[QdrantVectorStore] No indexing metadata marker found. Using backward compatibility mode (checking points_count > 0).",
)
return pointsCount > 0
} catch (error) {
console.warn("[QdrantVectorStore] Failed to check if collection has data:", error)
return false
// If marker exists, use it to determine completion status
if (metadataPoints.length > 0) {
return metadataPoints[0].payload?.indexing_complete === true
}

// Backward compatibility: No marker exists (old index or pre-marker version)
// Fall back to old logic - assume complete if collection has points
console.log(
"[QdrantVectorStore] No indexing metadata marker found. Using backward compatibility mode (checking points_count > 0).",
)
return pointsCount > 0
}

/**
Expand Down
Loading