diff --git a/src/services/code-index/__tests__/orchestrator.spec.ts b/src/services/code-index/__tests__/orchestrator.spec.ts index e940ea04c24..963ec72d24c 100644 --- a/src/services/code-index/__tests__/orchestrator.spec.ts +++ b/src/services/code-index/__tests__/orchestrator.spec.ts @@ -130,9 +130,9 @@ describe("CodeIndexOrchestrator - error path cleanup gating", () => { expect(lastCall[0]).toBe("Error") }) - it("should call clearCollection() and clear cache when an error occurs after initialize() succeeds (indexing started)", async () => { - // Arrange: initialize succeeds; fail soon after to enter error path with indexingStarted=true - vectorStore.initialize.mockResolvedValue(false) // existing collection + it("should preserve existing data when an error occurs on an existing collection (collectionCreated=false)", async () => { + // Arrange: initialize succeeds with existing collection; fail soon after + vectorStore.initialize.mockResolvedValue(false) // existing collection, NOT newly created vectorStore.hasIndexedData.mockResolvedValue(false) // force full scan path vectorStore.markIndexingIncomplete.mockRejectedValue(new Error("mark incomplete failure")) @@ -149,9 +149,40 @@ describe("CodeIndexOrchestrator - error path cleanup gating", () => { // Act await orchestrator.startIndexing() - // Assert: cleanup gated behind indexingStarted should have happened + // Assert: should NOT clear existing collection data on error (preserves user's index) + expect(vectorStore.clearCollection).not.toHaveBeenCalled() + // Should flush (persist) cache rather than clearing it + expect(cacheManager.flush).toHaveBeenCalledTimes(1) + expect(cacheManager.clearCacheFile).not.toHaveBeenCalled() + + // Error state should be set + expect(stateManager.setSystemState).toHaveBeenCalled() + const lastCall = stateManager.setSystemState.mock.calls[stateManager.setSystemState.mock.calls.length - 1] + expect(lastCall[0]).toBe("Error") + }) + + it("should clear collection and cache when an error occurs on a newly created collection (collectionCreated=true)", async () => { + // Arrange: initialize creates a new collection; fail soon after + vectorStore.initialize.mockResolvedValue(true) // newly created collection + vectorStore.hasIndexedData.mockResolvedValue(false) // new collection has no data + vectorStore.markIndexingIncomplete.mockRejectedValue(new Error("mark incomplete failure")) + + const orchestrator = new CodeIndexOrchestrator( + configManager, + stateManager, + workspacePath, + cacheManager, + vectorStore, + scanner, + fileWatcher, + ) + + // Act + await orchestrator.startIndexing() + + // Assert: should clear data since the collection was just created (no pre-existing data to preserve) expect(vectorStore.clearCollection).toHaveBeenCalledTimes(1) - expect(cacheManager.clearCacheFile).toHaveBeenCalledTimes(1) + expect(cacheManager.clearCacheFile).toHaveBeenCalledTimes(2) // once in try block (collectionCreated), once in catch // Error state should be set expect(stateManager.setSystemState).toHaveBeenCalled() diff --git a/src/services/code-index/orchestrator.ts b/src/services/code-index/orchestrator.ts index cd65fceb5ef..4019df0e1b0 100644 --- a/src/services/code-index/orchestrator.ts +++ b/src/services/code-index/orchestrator.ts @@ -129,9 +129,12 @@ export class CodeIndexOrchestrator { // Track whether we successfully connected to Qdrant and started indexing // This helps us decide whether to preserve cache on error let indexingStarted = false + // Track whether a new collection was created (vs reusing existing one) + // This helps us decide whether to clear data on error + let collectionCreated = false try { - const collectionCreated = await this.vectorStore.initialize() + collectionCreated = await this.vectorStore.initialize() // Successfully connected to Qdrant indexingStarted = true @@ -316,27 +319,38 @@ export class CodeIndexOrchestrator { stack: error instanceof Error ? error.stack : undefined, location: "startIndexing", }) - if (indexingStarted) { - try { - await this.vectorStore.clearCollection() - } catch (cleanupError) { - console.error("[CodeIndexOrchestrator] Failed to clean up after error:", cleanupError) - TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, { - error: cleanupError instanceof Error ? cleanupError.message : String(cleanupError), - stack: cleanupError instanceof Error ? cleanupError.stack : undefined, - location: "startIndexing.cleanup", - }) - } - } - // Only clear cache if indexing had started (Qdrant connection succeeded) - // If we never connected to Qdrant, preserve cache for incremental scan when it comes back + // Determine if this is a connection error (never reached Qdrant) vs a mid-indexing failure. + // Only wipe collection + cache when indexing actually started AND data was written, + // since clearing on transient errors destroys a perfectly valid existing index. if (indexingStarted) { - // Indexing started but failed mid-way - clear cache to avoid cache-Qdrant mismatch - await this.cacheManager.clearCacheFile() - console.log( - "[CodeIndexOrchestrator] Indexing failed after starting. Clearing cache to avoid inconsistency.", - ) + // Indexing started — but only clear data if a new collection was just created + // (meaning there's no pre-existing data to preserve). If we were doing an + // incremental scan on an existing collection, preserve the data so the user + // doesn't lose their entire index due to a transient embedding API error. + if (collectionCreated) { + try { + await this.vectorStore.clearCollection() + } catch (cleanupError) { + console.error("[CodeIndexOrchestrator] Failed to clean up after error:", cleanupError) + TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, { + error: cleanupError instanceof Error ? cleanupError.message : String(cleanupError), + stack: cleanupError instanceof Error ? cleanupError.stack : undefined, + location: "startIndexing.cleanup", + }) + } + await this.cacheManager.clearCacheFile() + console.log( + "[CodeIndexOrchestrator] Indexing failed on a newly created collection. Clearing cache to avoid inconsistency.", + ) + } else { + // Pre-existing collection — flush (persist) the cache rather than clearing it + // so the next startup can resume incrementally from where we left off. + await this.cacheManager.flush() + console.log( + "[CodeIndexOrchestrator] Indexing failed on existing collection. Preserving existing data and cache for recovery.", + ) + } } else { // Never connected to Qdrant - preserve cache for future incremental scan console.log( diff --git a/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts b/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts index ab7b15783e3..febc2f2e690 100644 --- a/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts +++ b/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts @@ -647,23 +647,21 @@ describe("QdrantVectorStore", () => { expect(mockQdrantClientInstance.createPayloadIndex).toHaveBeenCalledTimes(6) ;(console.warn as any).mockRestore() // Restore console.warn }) - it("should log warning for non-404 errors but still create collection", async () => { + it("should throw on non-404 errors instead of creating a new collection", async () => { const genericError = new Error("Generic Qdrant Error") mockQdrantClientInstance.getCollection.mockRejectedValue(genericError) - vitest.spyOn(console, "warn").mockImplementation(() => {}) // Suppress console.warn + vitest.spyOn(console, "error").mockImplementation(() => {}) // Suppress console.error - const result = await vectorStore.initialize() + // Non-404 errors should propagate up as a connection failure, NOT create a new collection + await expect(vectorStore.initialize()).rejects.toThrow( + /Failed to connect to Qdrant vector database|vectorStore\.qdrantConnectionFailed/, + ) - expect(result).toBe(true) // Collection was created expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledTimes(1) - expect(mockQdrantClientInstance.createCollection).toHaveBeenCalledTimes(1) + // Should NOT have tried to create a collection - the error is a connectivity issue, not a missing collection + expect(mockQdrantClientInstance.createCollection).not.toHaveBeenCalled() expect(mockQdrantClientInstance.deleteCollection).not.toHaveBeenCalled() - expect(mockQdrantClientInstance.createPayloadIndex).toHaveBeenCalledTimes(6) - expect(console.warn).toHaveBeenCalledWith( - expect.stringContaining(`Warning during getCollectionInfo for "${expectedCollectionName}"`), - genericError.message, - ) - ;(console.warn as any).mockRestore() + ;(console.error as any).mockRestore() }) it("should re-throw error from createCollection when no collection initially exists", async () => { mockQdrantClientInstance.getCollection.mockRejectedValue({ @@ -1007,20 +1005,16 @@ describe("QdrantVectorStore", () => { expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledWith(expectedCollectionName) }) - it("should return false and log warning for non-404 errors", async () => { + it("should throw for non-404 errors instead of returning false", async () => { const genericError = new Error("Network error") mockQdrantClientInstance.getCollection.mockRejectedValue(genericError) - vitest.spyOn(console, "warn").mockImplementation(() => {}) + vitest.spyOn(console, "error").mockImplementation(() => {}) - const result = await vectorStore.collectionExists() + // Non-404 errors should propagate so callers know Qdrant is unreachable + await expect(vectorStore.collectionExists()).rejects.toThrow("Network error") - expect(result).toBe(false) expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledTimes(1) - expect(console.warn).toHaveBeenCalledWith( - expect.stringContaining(`Warning during getCollectionInfo for "${expectedCollectionName}"`), - genericError.message, - ) - ;(console.warn as any).mockRestore() + ;(console.error as any).mockRestore() }) describe("deleteCollection", () => { it("should delete collection when it exists", async () => { diff --git a/src/services/code-index/vector-store/qdrant-client.ts b/src/services/code-index/vector-store/qdrant-client.ts index ba62afc5f81..6fa62f00b00 100644 --- a/src/services/code-index/vector-store/qdrant-client.ts +++ b/src/services/code-index/vector-store/qdrant-client.ts @@ -127,18 +127,56 @@ export class QdrantVectorStore implements IVectorStore { } } + /** + * Checks if an error from the Qdrant client indicates a "not found" (404) response. + * Qdrant client errors may have a `response.status` property or a `status` property. + */ + private isNotFoundError(error: unknown): boolean { + if (error && typeof error === "object") { + const err = error as Record + // Check for response.status (common Qdrant client error shape) + if (err.response?.status === 404) { + return true + } + // Check for top-level status property + if (err.status === 404) { + return true + } + // Check for error message patterns indicating not found + if (err.message && typeof err.message === "string") { + const msg = err.message.toLowerCase() + if (msg.includes("not found") || msg.includes("doesn't exist") || msg.includes("does not exist")) { + return true + } + } + } + return false + } + + /** + * Retrieves collection info from Qdrant. + * Returns null ONLY when the collection does not exist (404). + * Throws for other errors (connection failures, timeouts, etc.) so callers + * can distinguish "collection missing" from "Qdrant unreachable". + */ private async getCollectionInfo(): Promise { try { const collectionInfo = await this.client.getCollection(this.collectionName) return collectionInfo } catch (error: unknown) { - if (error instanceof Error) { - console.warn( - `[QdrantVectorStore] Warning during getCollectionInfo for "${this.collectionName}". Collection may not exist or another error occurred:`, - error.message, + if (this.isNotFoundError(error)) { + console.log( + `[QdrantVectorStore] Collection "${this.collectionName}" not found (404). Will create a new one.`, ) + return null } - return null + // For non-404 errors (connection failures, timeouts, etc.), propagate the error + // so callers don't mistakenly assume the collection doesn't exist. + const message = error instanceof Error ? error.message : String(error) + console.error( + `[QdrantVectorStore] Error retrieving collection "${this.collectionName}" (not a 404): ${message}`, + ) + throw error } } @@ -572,8 +610,8 @@ export class QdrantVectorStore implements IVectorStore { } /** - * Checks if the collection exists - * @returns Promise resolving to boolean indicating if the collection exists + * Checks if the collection exists. + * Returns false for 404 (not found). Throws for connection/other errors. */ async collectionExists(): Promise { const collectionInfo = await this.getCollectionInfo() @@ -581,43 +619,42 @@ export class QdrantVectorStore implements IVectorStore { } /** - * Checks if the collection exists and has indexed points - * @returns Promise resolving to boolean indicating if the collection exists and has points + * Checks if the collection exists and has indexed points. + * Returns false when the collection doesn't exist (404). + * Throws for connection errors so callers can distinguish + * "no data" from "can't reach Qdrant". */ async hasIndexedData(): Promise { - try { - const collectionInfo = await this.getCollectionInfo() - if (!collectionInfo) { - return false - } - // Check if the collection has any points indexed - const pointsCount = collectionInfo.points_count ?? 0 - if (pointsCount === 0) { - return false - } - - // Check if the indexing completion marker exists - // Use a deterministic UUID generated from a constant string - const metadataId = uuidv5("__indexing_metadata__", QDRANT_CODE_BLOCK_NAMESPACE) - const metadataPoints = await this.client.retrieve(this.collectionName, { - ids: [metadataId], - }) + // getCollectionInfo() now throws on non-404 errors, so connection + // failures will propagate to the caller instead of returning false. + const collectionInfo = await this.getCollectionInfo() + if (!collectionInfo) { + return false + } + // Check if the collection has any points indexed + const pointsCount = collectionInfo.points_count ?? 0 + if (pointsCount === 0) { + return false + } - // If marker exists, use it to determine completion status - if (metadataPoints.length > 0) { - return metadataPoints[0].payload?.indexing_complete === true - } + // Check if the indexing completion marker exists + // Use a deterministic UUID generated from a constant string + const metadataId = uuidv5("__indexing_metadata__", QDRANT_CODE_BLOCK_NAMESPACE) + const metadataPoints = await this.client.retrieve(this.collectionName, { + ids: [metadataId], + }) - // Backward compatibility: No marker exists (old index or pre-marker version) - // Fall back to old logic - assume complete if collection has points - console.log( - "[QdrantVectorStore] No indexing metadata marker found. Using backward compatibility mode (checking points_count > 0).", - ) - return pointsCount > 0 - } catch (error) { - console.warn("[QdrantVectorStore] Failed to check if collection has data:", error) - return false + // If marker exists, use it to determine completion status + if (metadataPoints.length > 0) { + return metadataPoints[0].payload?.indexing_complete === true } + + // Backward compatibility: No marker exists (old index or pre-marker version) + // Fall back to old logic - assume complete if collection has points + console.log( + "[QdrantVectorStore] No indexing metadata marker found. Using backward compatibility mode (checking points_count > 0).", + ) + return pointsCount > 0 } /**