Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions scripts/integration-progress-model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,11 @@ function summarizeProviderPressure(files) {
pattern:
/\bLinuxToolProvider\b|\blinuxToolProvider\b|\brunCommand\b|\bwhichCommand\b|\bxdotool\b|\bydotool\b|\bxclip\b|\bscrot\b|\bgrim\b|\bwmctrl\b|\bpkill\b/g,
},
{
name: 'Web semantic provider',
pattern:
/\bWebProvider\b|\bwebProvider\b|\bwithWebProvider\b|\bresolveWebProvider\b|\['web'/g,
},
{
name: 'Recording provider',
pattern: /\bRecordingProvider\b|\brecordingProvider\b|\bstartRecording\b/g,
Expand Down
9 changes: 9 additions & 0 deletions test/integration/provider-scenarios/fixtures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,15 @@ export const PROVIDER_SCENARIO_LINUX: DeviceInfo = {
booted: true,
};

export const PROVIDER_SCENARIO_WEB: DeviceInfo = {
platform: 'web',
id: 'agent-browser-chrome',
name: 'Agent Browser Chrome',
kind: 'device',
target: 'desktop',
booted: true,
};

export function createDemoIosApp(prefix: string): { tempRoot: string; appPath: string } {
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), prefix));
const appPath = path.join(tempRoot, 'Demo.app');
Expand Down
161 changes: 161 additions & 0 deletions test/integration/provider-scenarios/web-desktop.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import assert from 'node:assert/strict';
import fs from 'node:fs';
import { test } from 'vitest';
import { assertFlatToolCall, assertPngFile } from './assertions.ts';
import { PROVIDER_SCENARIO_WEB } from './fixtures.ts';
import { createProviderScenarioTempPath, withProviderScenarioResource } from './harness.ts';
import { runProviderScenario } from './scenario.ts';
import { createWebDesktopWorld } from './web-world.ts';

const WEB_URL = 'https://example.test/dashboard';

test('Provider-backed integration web desktop flow uses semantic web provider calls', async () => {
await withProviderScenarioResource(createWebDesktopWorld, async ({ daemon, semanticCalls }) => {
const screenshotPath = createProviderScenarioTempPath(
'agent-device-provider-scenario-web',
'png',
);

try {
const devices = await daemon.client().devices.list({ platform: 'web' });
assert.equal(devices.length, 1);
assert.equal(devices[0]?.platform, 'web');
assert.equal(devices[0]?.id, PROVIDER_SCENARIO_WEB.id);
assert.equal(devices[0]?.target, 'desktop');

await runProviderScenario(daemon, [
{
name: 'open web URL',
command: 'open',
positionals: [WEB_URL],
flags: { platform: 'web' },
},
{
name: 'capture interactive web snapshot',
command: 'snapshot',
flags: { snapshotInteractiveOnly: true },
assert: (snapshot) => {
const labels = snapshot.json?.result?.data?.nodes?.map(
(node: { label?: string }) => node.label,
);
assert.deepEqual(labels, [
WEB_URL,
'Ready',
'Email',
'Submit order',
'Ready',
'Below the fold',
]);
},
},
{
name: 'read snapshot ref text',
command: 'get',
positionals: ['text', '@e2'],
expectData: { text: 'Ready' },
},
{
name: 'find visible text',
command: 'find',
positionals: ['text', 'Submit order', 'exists'],
expectData: { found: true },
},
{
name: 'assert visible text',
command: 'is',
positionals: ['visible', 'label="Submit order"'],
expectData: { pass: true },
},
{
name: 'wait for text',
command: 'wait',
positionals: ['text', 'Ready', '100'],
expectData: { text: 'Ready' },
},
{
name: 'click submit ref',
command: 'click',
positionals: ['@e4'],
expectData: { x: 84, y: 166 },
},
{
name: 'fill email ref',
command: 'fill',
positionals: ['@e3', 'qa@example.test'],
flags: { delayMs: 1 },
expectData: { text: 'qa@example.test' },
},
{
name: 'type suffix',
command: 'type',
positionals: [' ok'],
expectData: { text: ' ok' },
},
{
name: 'scroll by pixels',
command: 'scroll',
positionals: ['down'],
flags: { pixels: 240 },
expectData: { pixels: 240 },
},
{
name: 'capture web screenshot artifact',
command: 'screenshot',
positionals: [screenshotPath],
flags: {
screenshotFullscreen: true,
screenshotNoStabilize: true,
},
expectData: { path: screenshotPath },
assert: () => {
assertPngFile(screenshotPath);
},
},
]);

const actions = daemon.session()?.actions ?? [];
assert.ok(
actions.some(
(action) => action.command === 'click' && action.positionals.join(' ') === '@e4',
),
'Expected ref click action to be recorded on the session',
);
assert.ok(
actions.some(
(action) =>
action.command === 'fill' &&
action.positionals.join(' ') === '@e3 qa@example.test' &&
action.flags.delayMs === 1,
),
'Expected ref fill action to be recorded on the session',
);
assert.ok(
actions.some(
(action) => action.command === 'type' && action.positionals.join(' ') === ' ok',
),
'Expected type action to be recorded on the session',
);

const close = await daemon.callCommand('close', [WEB_URL]);
assert.equal(close.statusCode, 200, JSON.stringify(close.json));

assertFlatToolCall(semanticCalls, ['web', 'open', WEB_URL, '']);
assertFlatToolCall(semanticCalls, ['web', 'snapshot', 'true', '']);
assertFlatToolCall(semanticCalls, ['web', 'click', '84', '166']);
assertFlatToolCall(semanticCalls, ['web', 'fill', '144', '114', 'qa@example.test', '1']);
assertFlatToolCall(semanticCalls, ['web', 'type', ' ok', '0']);
assertFlatToolCall(semanticCalls, ['web', 'scroll', 'down', '', '240']);
assertFlatToolCall(semanticCalls, [
'web',
'screenshot',
screenshotPath,
'true',
'false',
'app',
]);
assertFlatToolCall(semanticCalls, ['web', 'close', WEB_URL]);
} finally {
fs.rmSync(screenshotPath, { force: true });
}
});
}, 10_000);
192 changes: 192 additions & 0 deletions test/integration/provider-scenarios/web-world.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
import fs from 'node:fs';
import type { WebProvider } from '../../../src/platforms/web/provider.ts';
import type { RawSnapshotNode } from '../../../src/utils/snapshot.ts';
import { validPng } from './assertions.ts';
import { PROVIDER_SCENARIO_WEB } from './fixtures.ts';
import { createProviderScenarioHarness, type ProviderScenarioHarness } from './harness.ts';
import type { FlatToolCall } from './providers.ts';

const INPUT_RECT = { x: 24, y: 96, width: 240, height: 36 };
const BUTTON_RECT = { x: 24, y: 148, width: 120, height: 36 };

type WebPageState = {
openedTarget: string;
inputValue: string;
statusText: string;
scrolled: boolean;
};

export type WebDesktopWorld = {
daemon: ProviderScenarioHarness;
semanticCalls: FlatToolCall[];
close: () => Promise<void>;
};

export async function createWebDesktopWorld(): Promise<WebDesktopWorld> {
const semanticCalls: FlatToolCall[] = [];
const state: WebPageState = {
openedTarget: 'about:blank',
inputValue: '',
statusText: 'Ready',
scrolled: false,
};

const provider: WebProvider = {
open: async (target, options) => {
semanticCalls.push(['web', 'open', target, options?.url ?? '']);
state.openedTarget = target;
state.statusText = 'Ready';
},
close: async (target) => {
semanticCalls.push(['web', 'close', target ?? '']);
},
snapshot: async (options) => {
semanticCalls.push([
'web',
'snapshot',
String(options?.interactiveOnly ?? ''),
String(options?.surface ?? ''),
]);
return { nodes: webSnapshotNodes(state), truncated: false };
},
screenshot: async (outPath, options) => {
semanticCalls.push([
'web',
'screenshot',
outPath,
String(options?.fullscreen ?? ''),
String(options?.stabilize ?? ''),
String(options?.surface ?? ''),
]);
fs.writeFileSync(outPath, validPng());
},
click: async (x, y) => {
semanticCalls.push(['web', 'click', String(x), String(y)]);
if (pointInRect(x, y, BUTTON_RECT)) {
state.statusText = 'Submitted';
}
},
fill: async (x, y, text, options) => {
semanticCalls.push([
'web',
'fill',
String(x),
String(y),
text,
String(options?.delayMs ?? 0),
]);
if (pointInRect(x, y, INPUT_RECT)) {
state.inputValue = text;
}
},
typeText: async (text, options) => {
semanticCalls.push(['web', 'type', text, String(options?.delayMs ?? 0)]);
state.inputValue += text;
},
scroll: async (direction, options) => {
semanticCalls.push([
'web',
'scroll',
direction,
String(options?.amount ?? ''),
String(options?.pixels ?? ''),
]);
state.scrolled = true;
},
};

const daemon = await createProviderScenarioHarness({
webProvider: () => provider,
deviceInventoryProvider: async () => [PROVIDER_SCENARIO_WEB],
});

let closed = false;
return {
daemon,
semanticCalls,
close: async () => {
if (closed) return;
closed = true;
await daemon.close();
},
};
}

function webSnapshotNodes(state: WebPageState): RawSnapshotNode[] {
return [
{
index: 0,
role: 'document',
label: state.openedTarget,
rect: { x: 0, y: 0, width: 390, height: 720 },
enabled: true,
hittable: true,
visibleToUser: true,
depth: 0,
},
{
index: 1,
role: 'static text',
label: 'Ready',
rect: { x: 24, y: 32, width: 160, height: 28 },
enabled: true,
hittable: true,
visibleToUser: true,
depth: 1,
parentIndex: 0,
},
{
index: 2,
role: 'text field',
label: 'Email',
value: state.inputValue,
rect: INPUT_RECT,
enabled: true,
hittable: true,
visibleToUser: true,
depth: 1,
parentIndex: 0,
},
{
index: 3,
role: 'button',
label: 'Submit order',
rect: BUTTON_RECT,
enabled: true,
hittable: true,
visibleToUser: true,
depth: 1,
parentIndex: 0,
},
{
index: 4,
role: 'static text',
label: state.statusText,
rect: { x: 24, y: 204, width: 180, height: 28 },
enabled: true,
hittable: true,
visibleToUser: true,
depth: 1,
parentIndex: 0,
},
{
index: 5,
role: 'static text',
label: state.scrolled ? 'Scrolled section' : 'Below the fold',
rect: { x: 24, y: 620, width: 180, height: 28 },
enabled: true,
hittable: true,
visibleToUser: true,
depth: 1,
parentIndex: 0,
},
];
}

function pointInRect(
x: number,
y: number,
rect: { x: number; y: number; width: number; height: number },
): boolean {
return x >= rect.x && x <= rect.x + rect.width && y >= rect.y && y <= rect.y + rect.height;
}
Loading