Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/constants/codebase-context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export const INDEX_META_FILENAME = 'index-meta.json' as const;

export const MEMORY_FILENAME = 'memory.json' as const;
export const INTELLIGENCE_FILENAME = 'intelligence.json' as const;
export const HEALTH_FILENAME = 'health.json' as const;
export const KEYWORD_INDEX_FILENAME = 'index.json' as const;
export const INDEXING_STATS_FILENAME = 'indexing-stats.json' as const;
export const VECTOR_DB_DIRNAME = 'index' as const;
Expand Down
60 changes: 60 additions & 0 deletions src/core/index-meta.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { z } from 'zod';

import {
CODEBASE_CONTEXT_DIRNAME,
HEALTH_FILENAME,
INDEX_FORMAT_VERSION,
INDEX_META_FILENAME,
INDEX_META_VERSION,
Expand Down Expand Up @@ -41,6 +42,30 @@ const RelationshipsFileSchema = z
})
.passthrough();

const HealthFileSchema = z.object({
header: ArtifactHeaderSchema,
generatedAt: z.string().datetime(),
summary: z
.object({
files: z.number().int().nonnegative(),
highRiskFiles: z.number().int().nonnegative(),
mediumRiskFiles: z.number().int().nonnegative(),
lowRiskFiles: z.number().int().nonnegative()
})
.passthrough(),
files: z.array(
z
.object({
file: z.string().min(1),
level: z.enum(['low', 'medium', 'high']),
score: z.number().nonnegative(),
reasons: z.array(z.string()),
signals: z.record(z.string(), z.number()).optional()
})
.passthrough()
)
});

export const IndexMetaSchema = z.object({
metaVersion: z.number().int().positive(),
formatVersion: z.number().int().nonnegative(),
Expand All @@ -59,6 +84,11 @@ export const IndexMetaSchema = z.object({
embeddingModel: z.string().optional()
}),
intelligence: z
.object({
path: z.string().min(1)
})
.optional(),
health: z
.object({
path: z.string().min(1)
})
Expand Down Expand Up @@ -270,4 +300,34 @@ export async function validateIndexArtifacts(rootDir: string, meta: IndexMeta):
throw asIndexCorrupted('Relationships sidecar corrupted (rebuild required)', error);
}
}

// Optional health sidecar: validate if present, but do not require.
const healthPath = path.join(contextDir, HEALTH_FILENAME);
if (await pathExists(healthPath)) {
try {
const raw = await fs.readFile(healthPath, 'utf-8');
const json = JSON.parse(raw);
const parsed = HealthFileSchema.safeParse(json);
if (!parsed.success) {
throw new IndexCorruptedError(
`Health schema mismatch (rebuild required): ${parsed.error.message}`
);
}

const { buildId, formatVersion } = parsed.data.header;
if (formatVersion !== meta.formatVersion) {
throw new IndexCorruptedError(
`Health formatVersion mismatch (rebuild required): meta=${meta.formatVersion}, health.json=${formatVersion}`
);
}
if (buildId !== meta.buildId) {
throw new IndexCorruptedError(
`Health buildId mismatch (rebuild required): meta=${meta.buildId}, health.json=${buildId}`
);
}
} catch (error) {
if (error instanceof IndexCorruptedError) throw error;
throw asIndexCorrupted('Health sidecar corrupted (rebuild required)', error);
}
}
}
20 changes: 19 additions & 1 deletion src/core/indexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ import { getFileCommitDates } from '../utils/git-dates.js';
import {
CODEBASE_CONTEXT_DIRNAME,
EXCLUDED_GLOB_PATTERNS,
HEALTH_FILENAME,
INDEX_FORMAT_VERSION,
INDEXING_STATS_FILENAME,
INDEX_META_FILENAME,
Expand All @@ -52,6 +53,7 @@ import {
RELATIONSHIPS_FILENAME,
VECTOR_DB_DIRNAME
} from '../constants/codebase-context.js';
import { deriveCodebaseHealth } from '../health/derive.js';

const STAGING_DIRNAME = '.staging';
const PREVIOUS_DIRNAME = '.previous';
Expand Down Expand Up @@ -104,6 +106,7 @@ async function atomicSwapStagingToActive(
const activeManifestPath = path.join(contextDir, MANIFEST_FILENAME);
const activeStatsPath = path.join(contextDir, INDEXING_STATS_FILENAME);
const activeRelationshipsPath = path.join(contextDir, RELATIONSHIPS_FILENAME);
const activeHealthPath = path.join(contextDir, HEALTH_FILENAME);

const stagingMetaPath = path.join(stagingDir, INDEX_META_FILENAME);
const stagingIndexPath = path.join(stagingDir, KEYWORD_INDEX_FILENAME);
Expand All @@ -112,6 +115,7 @@ async function atomicSwapStagingToActive(
const stagingManifestPath = path.join(stagingDir, MANIFEST_FILENAME);
const stagingStatsPath = path.join(stagingDir, INDEXING_STATS_FILENAME);
const stagingRelationshipsPath = path.join(stagingDir, RELATIONSHIPS_FILENAME);
const stagingHealthPath = path.join(stagingDir, HEALTH_FILENAME);

// Step 1: Create .previous directory and move current active there
await fs.mkdir(previousDir, { recursive: true });
Expand Down Expand Up @@ -149,6 +153,7 @@ async function atomicSwapStagingToActive(
await moveIfExists(activeManifestPath, path.join(previousDir, MANIFEST_FILENAME));
await moveIfExists(activeStatsPath, path.join(previousDir, INDEXING_STATS_FILENAME));
await moveIfExists(activeRelationshipsPath, path.join(previousDir, RELATIONSHIPS_FILENAME));
await moveIfExists(activeHealthPath, path.join(previousDir, HEALTH_FILENAME));
await moveDirIfExists(activeVectorDir, path.join(previousDir, VECTOR_DB_DIRNAME));

// Step 2: Move staging artifacts to active location
Expand All @@ -159,6 +164,7 @@ async function atomicSwapStagingToActive(
await moveIfExists(stagingManifestPath, activeManifestPath);
await moveIfExists(stagingStatsPath, activeStatsPath);
await moveIfExists(stagingRelationshipsPath, activeRelationshipsPath);
await moveIfExists(stagingHealthPath, activeHealthPath);
await moveDirIfExists(stagingVectorDir, activeVectorDir);

// Step 3: Clean up .previous and staging directories
Expand Down Expand Up @@ -188,6 +194,7 @@ async function atomicSwapStagingToActive(
await moveIfExists(path.join(previousDir, MANIFEST_FILENAME), activeManifestPath);
await moveIfExists(path.join(previousDir, INDEXING_STATS_FILENAME), activeStatsPath);
await moveIfExists(path.join(previousDir, RELATIONSHIPS_FILENAME), activeRelationshipsPath);
await moveIfExists(path.join(previousDir, HEALTH_FILENAME), activeHealthPath);
await moveDirIfExists(path.join(previousDir, VECTOR_DB_DIRNAME), activeVectorDir);
console.error('Rollback successful');
} catch (rollbackError) {
Expand Down Expand Up @@ -980,6 +987,16 @@ export class CodebaseIndexer {
};
await fs.writeFile(relationshipsPath, JSON.stringify(relationships, null, 2));

const healthPath = path.join(activeContextDir, HEALTH_FILENAME);
const health = deriveCodebaseHealth({
buildId,
formatVersion: INDEX_FORMAT_VERSION,
generatedAt,
chunks: allChunks,
graph: internalFileGraph
});
await fs.writeFile(healthPath, JSON.stringify(health, null, 2));

// Write manifest (both full and incremental)
// For full rebuild, write to staging; for incremental, write to active
const activeManifestPath = path.join(activeContextDir, MANIFEST_FILENAME);
Expand Down Expand Up @@ -1021,7 +1038,8 @@ export class CodebaseIndexer {
intelligence: { path: INTELLIGENCE_FILENAME },
manifest: { path: MANIFEST_FILENAME },
indexingStats: { path: INDEXING_STATS_FILENAME },
relationships: { path: RELATIONSHIPS_FILENAME }
relationships: { path: RELATIONSHIPS_FILENAME },
health: { path: HEALTH_FILENAME }
}
},
null,
Expand Down
207 changes: 207 additions & 0 deletions src/health/derive.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
import type { CodeChunk, CodebaseHealthArtifact, CodebaseHealthFile } from '../types/index.js';
import { InternalFileGraph } from '../utils/usage-tracker.js';

interface DeriveCodebaseHealthParams {
buildId: string;
formatVersion: number;
generatedAt: string;
chunks: CodeChunk[];
graph: InternalFileGraph;
}

interface FileMetrics {
importCount: number;
importerCount: number;
cycleCount: number;
maxCyclomaticComplexity: number;
hotspotRank?: number;
}

type FileMetricsMap = Map<string, FileMetrics>;

function normalizePathLike(filePath: string): string {
return filePath.replace(/\\/g, '/').replace(/^\.\//, '');
}

function collectFileMetrics(chunks: CodeChunk[], graph: InternalFileGraph): FileMetricsMap {
const metrics = new Map<string, FileMetrics>();
const graphJson = graph.toJSON();
const reverseImports = new Map<string, Set<string>>();

for (const [file, deps] of Object.entries(graphJson.imports)) {
const normalizedFile = normalizePathLike(file);
const fileMetrics = metrics.get(normalizedFile) ?? {
importCount: 0,
importerCount: 0,
cycleCount: 0,
maxCyclomaticComplexity: 0
};
fileMetrics.importCount = deps.length;
metrics.set(normalizedFile, fileMetrics);

for (const dependency of deps) {
const normalizedDependency = normalizePathLike(dependency);
const importers = reverseImports.get(normalizedDependency) ?? new Set<string>();
importers.add(normalizedFile);
reverseImports.set(normalizedDependency, importers);
}
}

for (const [file, importers] of reverseImports.entries()) {
const fileMetrics = metrics.get(file) ?? {
importCount: 0,
importerCount: 0,
cycleCount: 0,
maxCyclomaticComplexity: 0
};
fileMetrics.importerCount = importers.size;
metrics.set(file, fileMetrics);
}

for (const chunk of chunks) {
const file = normalizePathLike(chunk.relativePath || chunk.filePath);
const fileMetrics = metrics.get(file) ?? {
importCount: 0,
importerCount: 0,
cycleCount: 0,
maxCyclomaticComplexity: 0
};
const chunkComplexity =
typeof chunk.metadata?.cyclomaticComplexity === 'number'
? chunk.metadata.cyclomaticComplexity
: typeof chunk.metadata?.complexity === 'number'
? chunk.metadata.complexity
: 0;
fileMetrics.maxCyclomaticComplexity = Math.max(
fileMetrics.maxCyclomaticComplexity,
chunkComplexity
);
metrics.set(file, fileMetrics);
}

const hotspotRanks = Array.from(metrics.entries())
.map(([file, fileMetrics]) => ({
file,
combined: fileMetrics.importCount + fileMetrics.importerCount
}))
.filter((entry) => entry.combined > 0)
.sort((a, b) => b.combined - a.combined || a.file.localeCompare(b.file));

hotspotRanks.forEach((entry, index) => {
const fileMetrics = metrics.get(entry.file);
if (fileMetrics) {
fileMetrics.hotspotRank = index + 1;
}
});

for (const cycle of graph.findCycles()) {
for (const file of cycle.files.slice(0, -1)) {
const normalizedFile = normalizePathLike(file);
const fileMetrics = metrics.get(normalizedFile) ?? {
importCount: 0,
importerCount: 0,
cycleCount: 0,
maxCyclomaticComplexity: 0
};
fileMetrics.cycleCount += 1;
metrics.set(normalizedFile, fileMetrics);
}
}

return metrics;
}

function getHealthLevel(fileMetrics: FileMetrics): CodebaseHealthFile {
const reasons: string[] = [];
let score = 0;

if (fileMetrics.cycleCount > 0) {
score += 3;
reasons.push(
`Participates in ${fileMetrics.cycleCount} circular dependenc${fileMetrics.cycleCount === 1 ? 'y' : 'ies'}`
);
}

if (fileMetrics.importerCount >= 8) {
score += 2;
reasons.push(`High fan-in: ${fileMetrics.importerCount} files depend on it`);
} else if (fileMetrics.importerCount >= 4) {
score += 1;
reasons.push(`Shared dependency for ${fileMetrics.importerCount} files`);
}

if (fileMetrics.hotspotRank && fileMetrics.hotspotRank <= 5) {
score += 2;
reasons.push(`Hotspot rank #${fileMetrics.hotspotRank} by graph centrality`);
} else if (fileMetrics.hotspotRank && fileMetrics.hotspotRank <= 10) {
score += 1;
reasons.push('Top-10 hotspot by graph centrality');
}

if (fileMetrics.maxCyclomaticComplexity >= 18) {
score += 2;
reasons.push(`Complex implementation (cyclomatic ${fileMetrics.maxCyclomaticComplexity})`);
} else if (fileMetrics.maxCyclomaticComplexity >= 10) {
score += 1;
reasons.push(`Moderate code complexity (cyclomatic ${fileMetrics.maxCyclomaticComplexity})`);
}

const level = score >= 4 ? 'high' : score >= 2 ? 'medium' : ('low' as const);

return {
file: '',
level,
score,
reasons: reasons.slice(0, 3),
signals: {
...(fileMetrics.hotspotRank ? { hotspotRank: fileMetrics.hotspotRank } : {}),
...(fileMetrics.importerCount > 0 ? { importerCount: fileMetrics.importerCount } : {}),
...(fileMetrics.importCount > 0 ? { importCount: fileMetrics.importCount } : {}),
...(fileMetrics.cycleCount > 0 ? { cycleCount: fileMetrics.cycleCount } : {}),
...(fileMetrics.maxCyclomaticComplexity > 0
? { maxCyclomaticComplexity: fileMetrics.maxCyclomaticComplexity }
: {})
}
};
}

export function deriveCodebaseHealth({
buildId,
formatVersion,
generatedAt,
chunks,
graph
}: DeriveCodebaseHealthParams): CodebaseHealthArtifact {
const fileMetrics = collectFileMetrics(chunks, graph);
const files = Array.from(fileMetrics.entries())
.map(([file, metrics]) => {
const health = getHealthLevel(metrics);
return {
...health,
file
};
})
.sort((a, b) => {
const priority = { high: 0, medium: 1, low: 2 };
const levelDelta = priority[a.level] - priority[b.level];
if (levelDelta !== 0) return levelDelta;
if (b.score !== a.score) return b.score - a.score;
return a.file.localeCompare(b.file);
});

const highRiskFiles = files.filter((file) => file.level === 'high').length;
const mediumRiskFiles = files.filter((file) => file.level === 'medium').length;
const lowRiskFiles = files.length - highRiskFiles - mediumRiskFiles;

return {
header: { buildId, formatVersion },
generatedAt,
summary: {
files: files.length,
highRiskFiles,
mediumRiskFiles,
lowRiskFiles
},
files
};
}
Loading
Loading