Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions apps/cli/src/commands/results/remote.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import path from 'node:path';
import {
DEFAULT_THRESHOLD,
type EvaluationResult,
type ResultsExportConfig,
type ResultsConfig,
type ResultsRepoStatus,
directPushResults,
directorySizeBytes,
Expand Down Expand Up @@ -59,7 +59,7 @@ function getStatusMessage(error: unknown): string {
return error instanceof Error ? error.message : String(error);
}

function normalizeResultsExportConfig(config: ResultsExportConfig): Required<ResultsExportConfig> {
function normalizeResultsConfig(config: ResultsConfig): Required<ResultsConfig> {
return {
repo: config.repo,
path: config.path,
Expand Down Expand Up @@ -107,13 +107,13 @@ async function maybeWarnLargeArtifact(runDir: string): Promise<void> {

async function loadNormalizedResultsConfig(
cwd: string,
): Promise<Required<ResultsExportConfig> | undefined> {
): Promise<Required<ResultsConfig> | undefined> {
const repoRoot = (await findRepoRoot(cwd)) ?? cwd;
const config = await loadConfig(path.join(cwd, '_'), repoRoot);
if (!config?.results?.export) {
if (!config?.results) {
return undefined;
}
return normalizeResultsExportConfig(config.results.export);
return normalizeResultsConfig(config.results);
}

export function encodeRemoteRunId(filename: string): string {
Expand Down
10 changes: 4 additions & 6 deletions apps/cli/test/commands/results/serve.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -501,9 +501,8 @@ describe('serve app', () => {
writeFileSync(
path.join(tempDir, '.agentv', 'config.yaml'),
`results:
export:
repo: EntityProcess/agentv-evals
path: autopilot-dev/runs
repo: EntityProcess/agentv-evals
path: autopilot-dev/runs
`,
);

Expand Down Expand Up @@ -586,9 +585,8 @@ describe('serve app', () => {
writeFileSync(
path.join(tempDir, '.agentv', 'config.yaml'),
`results:
export:
repo: EntityProcess/agentv-evals
path: autopilot-dev/runs
repo: EntityProcess/agentv-evals
path: autopilot-dev/runs
`,
);

Expand Down
2 changes: 1 addition & 1 deletion apps/studio/src/components/RunSourceToolbar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ export function RunSourceToolbar({
) : filter === 'all' ? (
<p className="text-sm text-gray-500">
Remote results are not configured. Add{' '}
<code className="rounded bg-gray-800 px-1 text-gray-400">results.export</code> to{' '}
<code className="rounded bg-gray-800 px-1 text-gray-400">results</code> to{' '}
<code className="rounded bg-gray-800 px-1 text-gray-400">.agentv/config.yaml</code> to
enable.
</p>
Expand Down
5 changes: 1 addition & 4 deletions apps/studio/src/routes/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -349,10 +349,7 @@ function RunsTabContent({
<>
<p className="text-lg text-gray-400">Remote results are not configured.</p>
<p className="mt-2 text-sm text-gray-500">
Add{' '}
<code className="rounded bg-gray-800 px-2 py-1 text-cyan-400">
results.export
</code>{' '}
Add <code className="rounded bg-gray-800 px-2 py-1 text-cyan-400">results</code>{' '}
to{' '}
<code className="rounded bg-gray-800 px-2 py-1 text-cyan-400">
.agentv/config.yaml
Expand Down
9 changes: 4 additions & 5 deletions apps/web/src/content/docs/docs/tools/studio.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -210,14 +210,13 @@ Studio can display runs pushed to a remote git repository by other machines or C

### Configuration

Add a `results.export` block to `.agentv/config.yaml`:
Add a `results` block to `.agentv/config.yaml`:

```yaml
results:
export:
repo: EntityProcess/agentv-evals # GitHub repo (owner/repo or full URL)
path: runs # Directory within the repo
auto_push: true # Push directly to base branch after every eval run
repo: EntityProcess/agentv-evals # GitHub repo (owner/repo or full URL)
path: runs # Directory within the repo
auto_push: true # Push directly to base branch after every eval run
```

With `auto_push: true`, every `agentv eval` or `agentv pipeline bench` pushes results directly to the configured repo's base branch (e.g., `main`). Results appear immediately in Studio without requiring PR merges.
Expand Down
48 changes: 10 additions & 38 deletions packages/core/src/evaluation/loaders/config-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ export type ExecutionDefaults = {
readonly pool_slots?: number;
};

export type ResultsExportConfig = {
export type ResultsConfig = {
readonly repo: string;
readonly path: string;
readonly auto_push?: boolean;
Expand All @@ -52,9 +52,7 @@ export type AgentVConfig = {
readonly required_version?: string;
readonly eval_patterns?: readonly string[];
readonly execution?: ExecutionDefaults;
readonly results?: {
readonly export?: ResultsExportConfig;
};
readonly results?: ResultsConfig;
readonly hooks?: HooksConfig;
};

Expand Down Expand Up @@ -560,10 +558,7 @@ export function parseExecutionDefaults(
return Object.keys(result).length > 0 ? (result as ExecutionDefaults) : undefined;
}

export function parseResultsConfig(
raw: unknown,
configPath: string,
): AgentVConfig['results'] | undefined {
export function parseResultsConfig(raw: unknown, configPath: string): ResultsConfig | undefined {
if (raw === undefined || raw === null) {
return undefined;
}
Expand All @@ -572,60 +567,37 @@ export function parseResultsConfig(
return undefined;
}

const obj = raw as Record<string, unknown>;
const exportConfig = parseResultsExportConfig(obj.export, configPath);
if (!exportConfig) {
return undefined;
}

return { export: exportConfig };
}

export function parseResultsExportConfig(
raw: unknown,
configPath: string,
): ResultsExportConfig | undefined {
if (raw === undefined || raw === null) {
return undefined;
}
if (typeof raw !== 'object' || Array.isArray(raw)) {
logWarning(`Invalid results.export in ${configPath}, expected object`);
return undefined;
}

const obj = raw as Record<string, unknown>;
const repo = typeof obj.repo === 'string' ? obj.repo.trim() : '';
const exportPath = typeof obj.path === 'string' ? obj.path.trim() : '';
const resultsPath = typeof obj.path === 'string' ? obj.path.trim() : '';

if (!repo) {
logWarning(`Invalid results.export.repo in ${configPath}, expected non-empty string`);
logWarning(`Invalid results.repo in ${configPath}, expected non-empty string`);
return undefined;
}

if (!exportPath) {
logWarning(`Invalid results.export.path in ${configPath}, expected non-empty string`);
if (!resultsPath) {
logWarning(`Invalid results.path in ${configPath}, expected non-empty string`);
return undefined;
}

if (obj.auto_push !== undefined && typeof obj.auto_push !== 'boolean') {
logWarning(`Invalid results.export.auto_push in ${configPath}, expected boolean`);
logWarning(`Invalid results.auto_push in ${configPath}, expected boolean`);
return undefined;
}

let branchPrefix: string | undefined;
if (obj.branch_prefix !== undefined) {
if (typeof obj.branch_prefix !== 'string' || obj.branch_prefix.trim().length === 0) {
logWarning(
`Invalid results.export.branch_prefix in ${configPath}, expected non-empty string`,
);
logWarning(`Invalid results.branch_prefix in ${configPath}, expected non-empty string`);
return undefined;
}
branchPrefix = obj.branch_prefix.trim();
}

return {
repo,
path: exportPath,
path: resultsPath,
...(typeof obj.auto_push === 'boolean' && { auto_push: obj.auto_push }),
...(branchPrefix && { branch_prefix: branchPrefix }),
};
Expand Down
40 changes: 19 additions & 21 deletions packages/core/src/evaluation/results-repo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import path from 'node:path';
import { promisify } from 'node:util';

import { getAgentvHome } from '../paths.js';
import type { ResultsExportConfig } from './loaders/config-loader.js';
import type { ResultsConfig } from './loaders/config-loader.js';

const execFileAsync = promisify(execFile);

Expand Down Expand Up @@ -61,9 +61,7 @@ function withFriendlyGitHubAuthError(error: unknown): Error {
return new Error(message);
}

export function normalizeResultsExportConfig(
config: ResultsExportConfig,
): Required<ResultsExportConfig> {
export function normalizeResultsConfig(config: ResultsConfig): Required<ResultsConfig> {
return {
repo: config.repo.trim(),
path: config.path.trim().replace(/^\/+|\/+$/g, ''),
Expand Down Expand Up @@ -172,7 +170,7 @@ async function updateCacheRepo(repoDir: string, baseBranch: string): Promise<voi
await runGit(['pull', '--ff-only', 'origin', baseBranch], { cwd: repoDir });
}

function updateStatusFile(config: ResultsExportConfig, patch: PersistedStatus): void {
function updateStatusFile(config: ResultsConfig, patch: PersistedStatus): void {
const cachePaths = getResultsRepoCachePaths(config.repo);
const current = readPersistedStatus(cachePaths.statusFile);
writePersistedStatus(cachePaths.statusFile, {
Expand All @@ -181,8 +179,8 @@ function updateStatusFile(config: ResultsExportConfig, patch: PersistedStatus):
});
}

export async function ensureResultsRepoClone(config: ResultsExportConfig): Promise<string> {
const normalized = normalizeResultsExportConfig(config);
export async function ensureResultsRepoClone(config: ResultsConfig): Promise<string> {
const normalized = normalizeResultsConfig(config);
const cachePaths = getResultsRepoCachePaths(normalized.repo);
mkdirSync(cachePaths.rootDir, { recursive: true });

Expand All @@ -208,7 +206,7 @@ export async function ensureResultsRepoClone(config: ResultsExportConfig): Promi
return cachePaths.repoDir;
}

export function getResultsRepoStatus(config?: ResultsExportConfig): ResultsRepoStatus {
export function getResultsRepoStatus(config?: ResultsConfig): ResultsRepoStatus {
if (!config) {
return {
configured: false,
Expand All @@ -218,7 +216,7 @@ export function getResultsRepoStatus(config?: ResultsExportConfig): ResultsRepoS
};
}

const normalized = normalizeResultsExportConfig(config);
const normalized = normalizeResultsConfig(config);
const cachePaths = getResultsRepoCachePaths(normalized.repo);
const persisted = readPersistedStatus(cachePaths.statusFile);

Expand All @@ -235,8 +233,8 @@ export function getResultsRepoStatus(config?: ResultsExportConfig): ResultsRepoS
};
}

export async function syncResultsRepo(config: ResultsExportConfig): Promise<ResultsRepoStatus> {
const normalized = normalizeResultsExportConfig(config);
export async function syncResultsRepo(config: ResultsConfig): Promise<ResultsRepoStatus> {
const normalized = normalizeResultsConfig(config);

try {
const repoDir = await ensureResultsRepoClone(normalized);
Expand All @@ -257,10 +255,10 @@ export async function syncResultsRepo(config: ResultsExportConfig): Promise<Resu
}

export async function checkoutResultsRepoBranch(
config: ResultsExportConfig,
config: ResultsConfig,
branchName: string,
): Promise<CheckedOutResultsRepoBranch> {
const normalized = normalizeResultsExportConfig(config);
const normalized = normalizeResultsConfig(config);
const repoDir = await ensureResultsRepoClone(normalized);
const baseBranch = await resolveDefaultBranch(repoDir);
await updateCacheRepo(repoDir, baseBranch);
Expand All @@ -274,10 +272,10 @@ export async function checkoutResultsRepoBranch(
}

export async function prepareResultsRepoBranch(
config: ResultsExportConfig,
config: ResultsConfig,
branchName: string,
): Promise<PreparedResultsRepoBranch> {
const normalized = normalizeResultsExportConfig(config);
const normalized = normalizeResultsConfig(config);
const cloneDir = await ensureResultsRepoClone(normalized);
const baseBranch = await resolveDefaultBranch(cloneDir);
await updateCacheRepo(cloneDir, baseBranch);
Expand Down Expand Up @@ -312,8 +310,8 @@ export async function stageResultsArtifacts(params: {
await cp(params.sourceDir, params.destinationDir, { recursive: true });
}

export function resolveResultsRepoRunsDir(config: ResultsExportConfig): string {
const normalized = normalizeResultsExportConfig(config);
export function resolveResultsRepoRunsDir(config: ResultsConfig): string {
const normalized = normalizeResultsConfig(config);
return path.join(
getResultsRepoCachePaths(normalized.repo).repoDir,
...normalized.path.split('/'),
Expand Down Expand Up @@ -354,11 +352,11 @@ export async function commitAndPushResultsBranch(params: {
}

export async function pushResultsRepoBranch(
config: ResultsExportConfig,
config: ResultsConfig,
branchName: string,
cwd?: string,
): Promise<void> {
const normalized = normalizeResultsExportConfig(config);
const normalized = normalizeResultsConfig(config);
await runGit(['push', '-u', 'origin', branchName], {
cwd: cwd ?? getResultsRepoCachePaths(normalized.repo).repoDir,
});
Expand Down Expand Up @@ -405,12 +403,12 @@ const DIRECT_PUSH_MAX_RETRIES = 3;
* Returns true if artifacts were pushed, false if no changes were detected.
*/
export async function directPushResults(params: {
readonly config: ResultsExportConfig;
readonly config: ResultsConfig;
readonly sourceDir: string;
readonly destinationPath: string;
readonly commitMessage: string;
}): Promise<boolean> {
const normalized = normalizeResultsExportConfig(params.config);
const normalized = normalizeResultsConfig(params.config);
const repoDir = await ensureResultsRepoClone(normalized);
const baseBranch = await resolveDefaultBranch(repoDir);
await updateCacheRepo(repoDir, baseBranch);
Expand Down
Loading
Loading