From 7c25a42d6e2ddb92f0917e17a6e1740d759575cd Mon Sep 17 00:00:00 2001 From: Cristian Pufu Date: Tue, 24 Feb 2026 09:54:18 +0200 Subject: [PATCH] feat: add evaluations UI with eval sets, runs, evaluators - Add eval data models, service layer, and REST/WS routes - Add frontend eval pages with I/O, evaluators, and eval run results tabs - Add evaluators management with create/edit forms and card-based layout - Restructure sidebar with shared header and activity bar - Add resizable split-pane layout with animations - Route-driven item selection for eval runs - Auto-select latest run or first eval set - Bump version to 0.0.61 Co-Authored-By: Claude Opus 4.6 --- pyproject.toml | 4 +- src/uipath/dev/models/eval_data.py | 132 ++ src/uipath/dev/server/__init__.py | 116 +- src/uipath/dev/server/app.py | 8 + .../dev/server/frontend/package-lock.json | 72 + src/uipath/dev/server/frontend/package.json | 1 + src/uipath/dev/server/frontend/src/App.tsx | 392 +++- .../server/frontend/src/api/agent-client.ts | 20 + .../server/frontend/src/api/eval-client.ts | 126 ++ .../frontend/src/api/explorer-client.ts | 37 + .../dev/server/frontend/src/api/websocket.ts | 21 + .../src/components/agent/AgentChatSidebar.tsx | 388 ++++ .../src/components/agent/AgentMessage.tsx | 247 ++ .../src/components/chat/ChatInput.tsx | 5 +- .../src/components/chat/ChatInterrupt.tsx | 21 +- .../src/components/chat/ChatMessage.tsx | 6 +- .../src/components/debug/DebugControls.tsx | 6 +- .../components/evals/CreateEvalSetView.tsx | 165 ++ .../src/components/evals/EvalRunResults.tsx | 710 ++++++ .../src/components/evals/EvalSetDetail.tsx | 518 +++++ .../src/components/evals/EvalsSidebar.tsx | 141 ++ .../evaluators/CreateEvaluatorView.tsx | 272 +++ .../components/evaluators/EvaluatorDetail.tsx | 559 +++++ .../evaluators/EvaluatorsSidebar.tsx | 107 + .../components/explorer/ExplorerSidebar.tsx | 147 ++ .../src/components/explorer/FileEditor.tsx | 381 ++++ .../src/components/graph/GraphPanel.tsx | 18 +- .../src/components/layout/ActivityBar.tsx | 108 + .../src/components/layout/DebugSidebar.tsx | 63 + .../src/components/layout/SidePanel.tsx | 23 + .../src/components/layout/Sidebar.tsx | 4 +- .../src/components/layout/StatusBar.tsx | 14 +- .../src/components/runs/AddToEvalModal.tsx | 442 ++++ .../src/components/runs/NewRunPanel.tsx | 8 +- .../src/components/runs/RunDetailsPanel.tsx | 96 +- .../src/components/runs/RunHistoryItem.tsx | 133 +- .../src/components/runs/SetupView.tsx | 24 +- .../src/components/shared/DataSection.tsx | 48 + .../src/components/shared/ToastContainer.tsx | 58 + .../src/components/traces/SpanDetails.tsx | 16 +- .../src/components/traces/TraceTree.tsx | 35 +- .../server/frontend/src/hooks/useHashRoute.ts | 128 +- .../frontend/src/store/useAgentStore.ts | 228 ++ .../server/frontend/src/store/useEvalStore.ts | 109 + .../frontend/src/store/useExplorerStore.ts | 89 + .../frontend/src/store/useToastStore.ts | 30 + .../server/frontend/src/store/useWebSocket.ts | 100 +- .../dev/server/frontend/src/styles/global.css | 90 +- .../dev/server/frontend/src/types/agent.ts | 37 + .../dev/server/frontend/src/types/eval.ts | 71 + .../dev/server/frontend/src/types/explorer.ts | 12 + .../dev/server/frontend/src/types/ws.ts | 25 +- .../dev/server/frontend/tsconfig.tsbuildinfo | 2 +- src/uipath/dev/server/frontend/vite.config.ts | 1 + src/uipath/dev/server/routes/agent.py | 65 + src/uipath/dev/server/routes/evals.py | 150 ++ src/uipath/dev/server/routes/evaluators.py | 327 +++ src/uipath/dev/server/routes/files.py | 195 ++ src/uipath/dev/server/serializers.py | 29 +- .../static/assets/ChatPanel-CeP3-CFA.js | 36 - .../static/assets/ChatPanel-CiK6YSHu.js | 2 + .../server/static/assets/index-BL2n_TWc.css | 1 - .../server/static/assets/index-BhpA3bEW.css | 1 + .../server/static/assets/index-CQlfl4ed.js | 42 - .../server/static/assets/index-Gpw0SLbu.js | 106 + ...elk-CTNP4r_q.js => vendor-elk-CzxJ-xdZ.js} | 2 +- .../static/assets/vendor-react-BN_uQvcy.js | 59 + .../static/assets/vendor-react-BVoutfaX.js | 49 - ...21rT8r.js => vendor-reactflow-BP_V7ttx.js} | 2 +- src/uipath/dev/server/static/index.html | 8 +- src/uipath/dev/server/watcher.py | 13 +- src/uipath/dev/server/ws/handler.py | 25 + src/uipath/dev/server/ws/manager.py | 136 ++ src/uipath/dev/server/ws/protocol.py | 14 + src/uipath/dev/services/agent_service.py | 789 +++++++ src/uipath/dev/services/eval_service.py | 459 ++++ src/uipath/dev/services/run_service.py | 35 +- src/uipath/dev/services/skill_service.py | 112 + src/uipath/dev/skills/uipath/SKILL.md | 173 ++ .../uipath/references/creating-agents.md | 95 + .../skills/uipath/references/evaluations.md | 1999 +++++++++++++++++ .../skills/uipath/references/evaluators.md | 1701 ++++++++++++++ .../uipath/references/running-agents.md | 99 + .../dev/skills/uipath/references/tracing.md | 239 ++ uv.lock | 1115 ++++++++- 85 files changed, 14209 insertions(+), 483 deletions(-) create mode 100644 src/uipath/dev/models/eval_data.py create mode 100644 src/uipath/dev/server/frontend/src/api/agent-client.ts create mode 100644 src/uipath/dev/server/frontend/src/api/eval-client.ts create mode 100644 src/uipath/dev/server/frontend/src/api/explorer-client.ts create mode 100644 src/uipath/dev/server/frontend/src/components/agent/AgentChatSidebar.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/agent/AgentMessage.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/evals/CreateEvalSetView.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/evals/EvalRunResults.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/evals/EvalSetDetail.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/evals/EvalsSidebar.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/evaluators/CreateEvaluatorView.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/evaluators/EvaluatorDetail.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/evaluators/EvaluatorsSidebar.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/explorer/ExplorerSidebar.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/explorer/FileEditor.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/layout/ActivityBar.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/layout/DebugSidebar.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/layout/SidePanel.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/runs/AddToEvalModal.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/shared/DataSection.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/shared/ToastContainer.tsx create mode 100644 src/uipath/dev/server/frontend/src/store/useAgentStore.ts create mode 100644 src/uipath/dev/server/frontend/src/store/useEvalStore.ts create mode 100644 src/uipath/dev/server/frontend/src/store/useExplorerStore.ts create mode 100644 src/uipath/dev/server/frontend/src/store/useToastStore.ts create mode 100644 src/uipath/dev/server/frontend/src/types/agent.ts create mode 100644 src/uipath/dev/server/frontend/src/types/eval.ts create mode 100644 src/uipath/dev/server/frontend/src/types/explorer.ts create mode 100644 src/uipath/dev/server/routes/agent.py create mode 100644 src/uipath/dev/server/routes/evals.py create mode 100644 src/uipath/dev/server/routes/evaluators.py create mode 100644 src/uipath/dev/server/routes/files.py delete mode 100644 src/uipath/dev/server/static/assets/ChatPanel-CeP3-CFA.js create mode 100644 src/uipath/dev/server/static/assets/ChatPanel-CiK6YSHu.js delete mode 100644 src/uipath/dev/server/static/assets/index-BL2n_TWc.css create mode 100644 src/uipath/dev/server/static/assets/index-BhpA3bEW.css delete mode 100644 src/uipath/dev/server/static/assets/index-CQlfl4ed.js create mode 100644 src/uipath/dev/server/static/assets/index-Gpw0SLbu.js rename src/uipath/dev/server/static/assets/{vendor-elk-CTNP4r_q.js => vendor-elk-CzxJ-xdZ.js} (99%) create mode 100644 src/uipath/dev/server/static/assets/vendor-react-BN_uQvcy.js delete mode 100644 src/uipath/dev/server/static/assets/vendor-react-BVoutfaX.js rename src/uipath/dev/server/static/assets/{vendor-reactflow-mU21rT8r.js => vendor-reactflow-BP_V7ttx.js} (99%) create mode 100644 src/uipath/dev/services/agent_service.py create mode 100644 src/uipath/dev/services/eval_service.py create mode 100644 src/uipath/dev/services/skill_service.py create mode 100644 src/uipath/dev/skills/uipath/SKILL.md create mode 100644 src/uipath/dev/skills/uipath/references/creating-agents.md create mode 100644 src/uipath/dev/skills/uipath/references/evaluations.md create mode 100644 src/uipath/dev/skills/uipath/references/evaluators.md create mode 100644 src/uipath/dev/skills/uipath/references/running-agents.md create mode 100644 src/uipath/dev/skills/uipath/references/tracing.md diff --git a/pyproject.toml b/pyproject.toml index b33bfdf..4408913 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "uipath-dev" -version = "0.0.60" +version = "0.0.61" description = "UiPath Developer Console" readme = { file = "README.md", content-type = "text/markdown" } requires-python = ">=3.11" @@ -10,6 +10,8 @@ dependencies = [ "pyperclip>=1.11.0, <2.0.0", "fastapi>=0.128.8", "uvicorn[standard]>=0.40.0", + "uipath>=2.10.0, <2.11.0", + "openai", ] classifiers = [ "Intended Audience :: Developers", diff --git a/src/uipath/dev/models/eval_data.py b/src/uipath/dev/models/eval_data.py new file mode 100644 index 0000000..8fda331 --- /dev/null +++ b/src/uipath/dev/models/eval_data.py @@ -0,0 +1,132 @@ +"""Data models for evaluation runs.""" + +from __future__ import annotations + +import uuid +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any + + +@dataclass +class EvalSetInfo: + """Summary of a discovered evaluation set.""" + + id: str + name: str + eval_count: int + evaluator_ids: list[str] + + +@dataclass +class EvalItemResult: + """Result of evaluating a single item.""" + + name: str + inputs: dict[str, Any] = field(default_factory=dict) + expected_output: Any = None + scores: dict[str, float] = field(default_factory=dict) + overall_score: float = 0.0 + output: Any = None + justifications: dict[str, str] = field(default_factory=dict) + duration_ms: float | None = None + status: str = "pending" # pending | running | completed | failed + error: str | None = None + traces: list[dict[str, Any]] = field(default_factory=list) + + +@dataclass +class EvalRunState: + """Full state of an eval run.""" + + id: str = field(default_factory=lambda: str(uuid.uuid4())) + eval_set_id: str = "" + eval_set_name: str = "" + status: str = "pending" # pending | running | completed | failed + progress_completed: int = 0 + progress_total: int = 0 + overall_score: float | None = None + evaluator_scores: dict[str, float] = field(default_factory=dict) + results: list[EvalItemResult] = field(default_factory=list) + start_time: datetime | None = None + end_time: datetime | None = None + + def to_summary(self) -> dict[str, Any]: + """Serialize to summary dict (no per-item results).""" + return { + "id": self.id, + "eval_set_id": self.eval_set_id, + "eval_set_name": self.eval_set_name, + "status": self.status, + "progress_completed": self.progress_completed, + "progress_total": self.progress_total, + "overall_score": self.overall_score, + "evaluator_scores": self.evaluator_scores, + "start_time": self.start_time.isoformat() if self.start_time else None, + "end_time": self.end_time.isoformat() if self.end_time else None, + } + + def to_detail(self) -> dict[str, Any]: + """Serialize to detail dict (includes per-item results).""" + base = self.to_summary() + base["results"] = [ + { + "name": r.name, + "inputs": r.inputs, + "expected_output": r.expected_output, + "scores": r.scores, + "overall_score": r.overall_score, + "output": str(r.output) + if isinstance(r.output, Exception) + else r.output, + "justifications": r.justifications, + "duration_ms": r.duration_ms, + "status": r.status, + "error": r.error, + "traces": r.traces, + } + for r in self.results + ] + return base + + def start(self) -> None: + """Mark run as started.""" + self.status = "running" + self.start_time = datetime.now(timezone.utc) + + def complete(self) -> None: + """Mark run as completed, computing final scores.""" + self.status = "completed" + self.end_time = datetime.now(timezone.utc) + self._compute_scores() + + def fail(self) -> None: + """Mark run as failed.""" + self.status = "failed" + self.end_time = datetime.now(timezone.utc) + + def _compute_scores(self) -> None: + """Compute overall and per-evaluator scores from item results.""" + scored = [r for r in self.results if r.status in ("completed", "failed")] + if not scored: + self.overall_score = 0.0 + return + + # Per-evaluator averages (failed items count as 0 for each evaluator) + evaluator_totals: dict[str, list[float]] = {} + for r in scored: + for ev_id, score in r.scores.items(): + evaluator_totals.setdefault(ev_id, []).append(score) + + failed = [r for r in scored if r.status == "failed"] + for ev_id in evaluator_totals: + for _ in failed: + evaluator_totals[ev_id].append(0.0) + + self.evaluator_scores = { + ev_id: sum(scores) / len(scores) + for ev_id, scores in evaluator_totals.items() + } + + # Overall = average of item overall_scores + self.overall_score = sum(r.overall_score for r in scored) / len(scored) diff --git a/src/uipath/dev/server/__init__.py b/src/uipath/dev/server/__init__.py index 2e3ba31..ad80bc7 100644 --- a/src/uipath/dev/server/__init__.py +++ b/src/uipath/dev/server/__init__.py @@ -24,9 +24,13 @@ StateData, TraceData, ) +from uipath.dev.models.eval_data import EvalItemResult, EvalRunState from uipath.dev.models.execution import ExecutionRun from uipath.dev.server.debug_bridge import WebDebugBridge +from uipath.dev.services.agent_service import AgentService +from uipath.dev.services.eval_service import EvalService from uipath.dev.services.run_service import RunService +from uipath.dev.services.skill_service import SkillService logger = logging.getLogger(__name__) @@ -86,6 +90,27 @@ def __init__( on_run_removed=self.connection_manager.remove_run_subscriptions, ) + self.eval_service = EvalService( + runtime_factory=self.runtime_factory, + trace_manager=self.trace_manager, + on_eval_run_created=self._on_eval_run_created, + on_eval_run_progress=self._on_eval_run_progress, + on_eval_run_completed=self._on_eval_run_completed, + ) + + self.skill_service = SkillService() + + self.agent_service = AgentService( + skill_service=self.skill_service, + on_status=self._on_agent_status, + on_text=self._on_agent_text, + on_plan=self._on_agent_plan, + on_tool_use=self._on_agent_tool_use, + on_tool_result=self._on_agent_tool_result, + on_tool_approval=self._on_agent_tool_approval, + on_error=self._on_agent_error, + ) + def create_app(self) -> Any: """Create and return a FastAPI application.""" from uipath.dev.server.app import create_app @@ -111,9 +136,8 @@ async def run_async(self) -> None: daemon=True, ).start() - # Start file watcher if factory_creator is available - if self.factory_creator is not None: - self._start_watcher() + # Start file watcher for editor auto-refresh and factory hot-reload + self._start_watcher() config = uvicorn.Config( app, @@ -180,11 +204,11 @@ async def reload_factory(self) -> None: def _start_watcher(self) -> None: """Start the file watcher background task.""" - from uipath.dev.server.watcher import watch_python_files + from uipath.dev.server.watcher import watch_project_files self._watcher_stop = asyncio.Event() self._watcher_task = asyncio.create_task( - watch_python_files( + watch_project_files( on_change=self._on_files_changed, stop_event=self._watcher_stop, ) @@ -200,8 +224,24 @@ def _stop_watcher(self) -> None: def _on_files_changed(self, changed_files: list[str]) -> None: """Handle file change events from the watcher.""" - self.reload_pending = True - self.connection_manager.broadcast_reload(changed_files) + # Convert to relative paths with forward slashes for frontend + cwd = os.getcwd() + relative_files = [] + for f in changed_files: + try: + relative_files.append(os.path.relpath(f, cwd).replace("\\", "/")) + except ValueError: + continue # different drive on Windows + + # Broadcast files.changed for editor auto-refresh + if relative_files: + self.connection_manager.broadcast_files_changed(relative_files) + + # Factory hot-reload for Python files only + py_files = [f for f in changed_files if f.endswith((".py", ".pyx"))] + if py_files and self.factory_creator is not None: + self.reload_pending = True + self.connection_manager.broadcast_reload(py_files) # ------------------------------------------------------------------ # Internal callbacks @@ -231,6 +271,68 @@ def _on_state(self, state_data: StateData) -> None: """Broadcast state transition to subscribed WebSocket clients.""" self.connection_manager.broadcast_state(state_data) + def _on_eval_run_created(self, run: EvalRunState) -> None: + """Broadcast eval run created to all connected clients.""" + self.connection_manager.broadcast_eval_run_created(run) + + def _on_eval_run_progress( + self, + run_id: str, + completed: int, + total: int, + item_result: EvalItemResult | None, + ) -> None: + """Broadcast eval run progress to all connected clients.""" + self.connection_manager.broadcast_eval_run_progress( + run_id, completed, total, item_result + ) + + def _on_eval_run_completed(self, run: EvalRunState) -> None: + """Broadcast eval run completed to all connected clients.""" + self.connection_manager.broadcast_eval_run_completed(run) + + def _on_agent_status(self, session_id: str, status: str) -> None: + """Broadcast agent status to all connected clients.""" + self.connection_manager.broadcast_agent_status(session_id, status) + + def _on_agent_text(self, session_id: str, content: str, done: bool) -> None: + """Broadcast agent text to all connected clients.""" + self.connection_manager.broadcast_agent_text(session_id, content, done) + + def _on_agent_plan(self, session_id: str, items: list[dict[str, str]]) -> None: + """Broadcast agent plan to all connected clients.""" + self.connection_manager.broadcast_agent_plan(session_id, items) + + def _on_agent_tool_use( + self, session_id: str, tool: str, args: dict[str, Any] + ) -> None: + """Broadcast agent tool use to all connected clients.""" + self.connection_manager.broadcast_agent_tool_use(session_id, tool, args) + + def _on_agent_tool_result( + self, session_id: str, tool: str, result: str, is_error: bool + ) -> None: + """Broadcast agent tool result to all connected clients.""" + self.connection_manager.broadcast_agent_tool_result( + session_id, tool, result, is_error + ) + + def _on_agent_tool_approval( + self, + session_id: str, + tool_call_id: str, + tool: str, + args: dict[str, Any], + ) -> None: + """Broadcast agent tool approval request to all connected clients.""" + self.connection_manager.broadcast_agent_tool_approval( + session_id, tool_call_id, tool, args + ) + + def _on_agent_error(self, session_id: str, message: str) -> None: + """Broadcast agent error to all connected clients.""" + self.connection_manager.broadcast_agent_error(session_id, message) + @staticmethod def _find_free_port(host: str, start_port: int, max_attempts: int = 100) -> int: """Find a free port starting from *start_port*. diff --git a/src/uipath/dev/server/app.py b/src/uipath/dev/server/app.py index d608338..9b2f8e8 100644 --- a/src/uipath/dev/server/app.py +++ b/src/uipath/dev/server/app.py @@ -149,7 +149,11 @@ async def _config(): return {"auth_enabled": auth_enabled, **_user_project} # Register routes + from uipath.dev.server.routes.agent import router as agent_router from uipath.dev.server.routes.entrypoints import router as entrypoints_router + from uipath.dev.server.routes.evals import router as evals_router + from uipath.dev.server.routes.evaluators import router as evaluators_router + from uipath.dev.server.routes.files import router as files_router from uipath.dev.server.routes.graph import router as graph_router from uipath.dev.server.routes.reload import router as reload_router from uipath.dev.server.routes.runs import router as runs_router @@ -166,6 +170,10 @@ async def _config(): app.include_router(runs_router, prefix="/api") app.include_router(graph_router, prefix="/api") app.include_router(reload_router, prefix="/api") + app.include_router(evaluators_router, prefix="/api") + app.include_router(evals_router, prefix="/api") + app.include_router(agent_router, prefix="/api") + app.include_router(files_router, prefix="/api") app.include_router(ws_router) # Auto-build frontend if source is available and build is stale diff --git a/src/uipath/dev/server/frontend/package-lock.json b/src/uipath/dev/server/frontend/package-lock.json index 30693ab..32e99fe 100644 --- a/src/uipath/dev/server/frontend/package-lock.json +++ b/src/uipath/dev/server/frontend/package-lock.json @@ -8,6 +8,7 @@ "name": "uipath-dev-frontend", "version": "0.1.0", "dependencies": { + "@monaco-editor/react": "^4.7.0", "elkjs": "^0.11.0", "react": "^19.0.0", "react-dom": "^19.0.0", @@ -801,6 +802,29 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@monaco-editor/loader": { + "version": "1.7.0", + "resolved": "https://registry.npmjs.org/@monaco-editor/loader/-/loader-1.7.0.tgz", + "integrity": "sha512-gIwR1HrJrrx+vfyOhYmCZ0/JcWqG5kbfG7+d3f/C1LXk2EvzAbHSg3MQ5lO2sMlo9izoAZ04shohfKLVT6crVA==", + "license": "MIT", + "dependencies": { + "state-local": "^1.0.6" + } + }, + "node_modules/@monaco-editor/react": { + "version": "4.7.0", + "resolved": "https://registry.npmjs.org/@monaco-editor/react/-/react-4.7.0.tgz", + "integrity": "sha512-cyzXQCtO47ydzxpQtCGSQGOC8Gk3ZUeBXFAxD+CWXYFo5OqZyZUonFl0DwUlTyAfRHntBfw2p3w4s9R6oe1eCA==", + "license": "MIT", + "dependencies": { + "@monaco-editor/loader": "^1.5.0" + }, + "peerDependencies": { + "monaco-editor": ">= 0.25.0 < 1", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", + "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" + } + }, "node_modules/@reactflow/background": { "version": "11.3.14", "resolved": "https://registry.npmjs.org/@reactflow/background/-/background-11.3.14.tgz", @@ -2071,6 +2095,14 @@ "@types/react": "^19.2.0" } }, + "node_modules/@types/trusted-types": { + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/@types/trusted-types/-/trusted-types-2.0.7.tgz", + "integrity": "sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==", + "license": "MIT", + "optional": true, + "peer": true + }, "node_modules/@types/unist": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz", @@ -2425,6 +2457,16 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/dompurify": { + "version": "3.2.7", + "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.2.7.tgz", + "integrity": "sha512-WhL/YuveyGXJaerVlMYGWhvQswa7myDG17P7Vu65EWC05o8vfeNbvNf4d/BOvH99+ZW+LlQsc1GDKMa1vNK6dw==", + "license": "(MPL-2.0 OR Apache-2.0)", + "peer": true, + "optionalDependencies": { + "@types/trusted-types": "^2.0.7" + } + }, "node_modules/electron-to-chromium": { "version": "1.5.286", "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.286.tgz", @@ -3091,6 +3133,19 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/marked": { + "version": "14.0.0", + "resolved": "https://registry.npmjs.org/marked/-/marked-14.0.0.tgz", + "integrity": "sha512-uIj4+faQ+MgHgwUW1l2PsPglZLOLOT1uErt06dAPtx2kjteLAkbsd/0FiYg/MGS+i7ZKLb7w2WClxHkzOOuryQ==", + "license": "MIT", + "peer": true, + "bin": { + "marked": "bin/marked.js" + }, + "engines": { + "node": ">= 18" + } + }, "node_modules/mdast-util-find-and-replace": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/mdast-util-find-and-replace/-/mdast-util-find-and-replace-3.0.2.tgz", @@ -3924,6 +3979,17 @@ ], "license": "MIT" }, + "node_modules/monaco-editor": { + "version": "0.55.1", + "resolved": "https://registry.npmjs.org/monaco-editor/-/monaco-editor-0.55.1.tgz", + "integrity": "sha512-jz4x+TJNFHwHtwuV9vA9rMujcZRb0CEilTEwG2rRSpe/A7Jdkuj8xPKttCgOh+v/lkHy7HsZ64oj+q3xoAFl9A==", + "license": "MIT", + "peer": true, + "dependencies": { + "dompurify": "3.2.7", + "marked": "14.0.0" + } + }, "node_modules/ms": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", @@ -4280,6 +4346,12 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/state-local": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/state-local/-/state-local-1.0.7.tgz", + "integrity": "sha512-HTEHMNieakEnoe33shBYcZ7NX83ACUjCu8c40iOGEZsngj9zRnkqS9j1pqQPXwobB0ZcVTk27REb7COQ0UR59w==", + "license": "MIT" + }, "node_modules/stringify-entities": { "version": "4.0.4", "resolved": "https://registry.npmjs.org/stringify-entities/-/stringify-entities-4.0.4.tgz", diff --git a/src/uipath/dev/server/frontend/package.json b/src/uipath/dev/server/frontend/package.json index 9638678..a4cea6a 100644 --- a/src/uipath/dev/server/frontend/package.json +++ b/src/uipath/dev/server/frontend/package.json @@ -16,6 +16,7 @@ "reactflow": "^11.11.4", "rehype-highlight": "^7.0.2", "remark-gfm": "^4.0.1", + "@monaco-editor/react": "^4.7.0", "zustand": "^5.0.0" }, "devDependencies": { diff --git a/src/uipath/dev/server/frontend/src/App.tsx b/src/uipath/dev/server/frontend/src/App.tsx index 8249df3..af87a67 100644 --- a/src/uipath/dev/server/frontend/src/App.tsx +++ b/src/uipath/dev/server/frontend/src/App.tsx @@ -6,18 +6,38 @@ import { useWebSocket } from "./store/useWebSocket"; import { listRuns, listEntrypoints, getRun } from "./api/client"; import type { RunDetail } from "./types/run"; import { useHashRoute } from "./hooks/useHashRoute"; +import type { Section } from "./hooks/useHashRoute"; import { useIsMobile } from "./hooks/useIsMobile"; -import Sidebar from "./components/layout/Sidebar"; +import ActivityBar from "./components/layout/ActivityBar"; +import DebugSidebar from "./components/layout/DebugSidebar"; import StatusBar from "./components/layout/StatusBar"; import NewRunPanel from "./components/runs/NewRunPanel"; import SetupView from "./components/runs/SetupView"; import RunDetailsPanel from "./components/runs/RunDetailsPanel"; import ReloadToast from "./components/shared/ReloadToast"; +import ToastContainer from "./components/shared/ToastContainer"; +import { useEvalStore } from "./store/useEvalStore"; +import { listEvalSets, listEvaluators, listEvalRuns, listLocalEvaluators } from "./api/eval-client"; +import EvalsSidebar from "./components/evals/EvalsSidebar"; +import EvalSetDetail from "./components/evals/EvalSetDetail"; +import EvalRunResults from "./components/evals/EvalRunResults"; +import CreateEvalSetView from "./components/evals/CreateEvalSetView"; +import EvaluatorsSidebar from "./components/evaluators/EvaluatorsSidebar"; +import EvaluatorsView from "./components/evaluators/EvaluatorDetail"; +import CreateEvaluatorView from "./components/evaluators/CreateEvaluatorView"; +import ExplorerSidebar from "./components/explorer/ExplorerSidebar"; +import FileEditor from "./components/explorer/FileEditor"; +import AgentChatSidebar from "./components/agent/AgentChatSidebar"; +import { useExplorerStore } from "./store/useExplorerStore"; export default function App() { const ws = useWebSocket(); const isMobile = useIsMobile(); const [sidebarOpen, setSidebarOpen] = useState(false); + const [sidebarWidth, setSidebarWidth] = useState(248); + const [isDraggingSidebar, setIsDraggingSidebar] = useState(false); + const [agentWidth, setAgentWidth] = useState(380); + const [isDraggingAgent, setIsDraggingAgent] = useState(false); const { runs, selectedRunId, @@ -33,14 +53,31 @@ export default function App() { setActiveNode, removeActiveNode, } = useRunStore(); - const { view, runId: routeRunId, setupEntrypoint, setupMode, navigate } = useHashRoute(); + const { + section, + view, + runId: routeRunId, + setupEntrypoint, + setupMode, + evalCreating, + evalSetId, + evalRunId, + evalRunItemName, + evaluatorCreateType, + evaluatorId, + evaluatorFilter, + explorerFile, + navigate, + } = useHashRoute(); + + const { setEvalSets, setEvaluators, setLocalEvaluators, setEvalRuns } = useEvalStore(); // Sync route runId → store selection useEffect(() => { - if (view === "details" && routeRunId && routeRunId !== selectedRunId) { + if (section === "debug" && view === "details" && routeRunId && routeRunId !== selectedRunId) { selectRun(routeRunId); } - }, [view, routeRunId, selectedRunId, selectRun]); + }, [section, view, routeRunId, selectedRunId, selectRun]); // Load existing runs, entrypoints, auth status, and config on mount const initAuth = useAuthStore((s) => s.init); @@ -54,6 +91,49 @@ export default function App() { initConfig(); }, [setRuns, setEntrypoints, initAuth, initConfig]); + // Load eval data when switching to evals/evaluators section + useEffect(() => { + if (section === "evals") { + listEvalSets().then((sets) => setEvalSets(sets)).catch(console.error); + listEvalRuns().then((runs) => setEvalRuns(runs)).catch(console.error); + } + if (section === "evals" || section === "evaluators") { + listEvaluators().then((evs) => setEvaluators(evs)).catch(console.error); + listLocalEvaluators().then((evs) => setLocalEvaluators(evs)).catch(console.error); + } + }, [section, setEvalSets, setEvaluators, setLocalEvaluators, setEvalRuns]); + + // Auto-select latest run or first eval set when navigating to evals with no selection + const evalSets = useEvalStore((s) => s.evalSets); + const evalRuns = useEvalStore((s) => s.evalRuns); + useEffect(() => { + if (section !== "evals" || evalCreating || evalSetId || evalRunId) return; + // Pick latest run by start_time + const runs = Object.values(evalRuns).sort( + (a, b) => new Date(b.start_time ?? 0).getTime() - new Date(a.start_time ?? 0).getTime(), + ); + if (runs.length > 0) { + navigate(`#/evals/runs/${runs[0].id}`); + return; + } + // Fallback: first eval set + const sets = Object.values(evalSets); + if (sets.length > 0) { + navigate(`#/evals/sets/${sets[0].id}`); + } + }, [section, evalCreating, evalSetId, evalRunId, evalRuns, evalSets, navigate]); + + // Keyboard shortcuts + useEffect(() => { + const onKeyDown = (e: KeyboardEvent) => { + if (e.key === "Escape" && sidebarOpen) { + setSidebarOpen(false); + } + }; + window.addEventListener("keydown", onKeyDown); + return () => window.removeEventListener("keydown", onKeyDown); + }, [sidebarOpen]); + const selectedRun = selectedRunId ? runs[selectedRunId] : null; // Shared helper: apply a full run detail response to the store @@ -169,70 +249,288 @@ export default function App() { }, [selectedRunId, selectedRun?.status, applyRunDetail]); const handleRunCreated = (runId: string) => { - navigate(`#/runs/${runId}/traces`); + navigate(`#/debug/runs/${runId}/traces`); selectRun(runId); setSidebarOpen(false); }; const handleSelectRun = (runId: string) => { - navigate(`#/runs/${runId}/traces`); + navigate(`#/debug/runs/${runId}/traces`); selectRun(runId); setSidebarOpen(false); }; const handleNewRun = () => { - navigate("#/new"); + navigate("#/debug/new"); setSidebarOpen(false); }; - return ( -
-
- {/* Mobile hamburger button */} - {isMobile && !sidebarOpen && ( - - )} - { + if (s === "debug") navigate("#/debug/new"); + else if (s === "evals") navigate("#/evals"); + else if (s === "evaluators") navigate("#/evaluators"); + else if (s === "explorer") navigate("#/explorer"); + }; + + // --- Sidebar col resize --- + const onSidebarResizeStart = useCallback((e: React.MouseEvent | React.TouchEvent) => { + e.preventDefault(); + setIsDraggingSidebar(true); + + const startX = "touches" in e ? e.touches[0].clientX : e.clientX; + const startW = sidebarWidth; + + const onMove = (ev: MouseEvent | TouchEvent) => { + const clientX = "touches" in ev ? ev.touches[0].clientX : ev.clientX; + const newW = Math.max(200, Math.min(480, startW + (clientX - startX))); + setSidebarWidth(newW); + }; + + const onUp = () => { + setIsDraggingSidebar(false); + document.removeEventListener("mousemove", onMove); + document.removeEventListener("mouseup", onUp); + document.removeEventListener("touchmove", onMove); + document.removeEventListener("touchend", onUp); + document.body.style.cursor = ""; + document.body.style.userSelect = ""; + }; + + document.body.style.cursor = "col-resize"; + document.body.style.userSelect = "none"; + document.addEventListener("mousemove", onMove); + document.addEventListener("mouseup", onUp); + document.addEventListener("touchmove", onMove, { passive: false }); + document.addEventListener("touchend", onUp); + }, [sidebarWidth]); + + // --- Agent panel col resize --- + const onAgentResizeStart = useCallback((e: React.MouseEvent | React.TouchEvent) => { + e.preventDefault(); + setIsDraggingAgent(true); + + const startX = "touches" in e ? e.touches[0].clientX : e.clientX; + const startW = agentWidth; + + const onMove = (ev: MouseEvent | TouchEvent) => { + const clientX = "touches" in ev ? ev.touches[0].clientX : ev.clientX; + // Dragging left increases width (panel is on the right) + const newW = Math.max(280, Math.min(500, startW - (clientX - startX))); + setAgentWidth(newW); + }; + + const onUp = () => { + setIsDraggingAgent(false); + document.removeEventListener("mousemove", onMove); + document.removeEventListener("mouseup", onUp); + document.removeEventListener("touchmove", onMove); + document.removeEventListener("touchend", onUp); + document.body.style.cursor = ""; + document.body.style.userSelect = ""; + }; + + document.body.style.cursor = "col-resize"; + document.body.style.userSelect = "none"; + document.addEventListener("mousemove", onMove); + document.addEventListener("mouseup", onUp); + document.addEventListener("touchmove", onMove, { passive: false }); + document.addEventListener("touchend", onUp); + }, [agentWidth]); + + const explorerTabs = useExplorerStore((s) => s.openTabs); + + // --- Render main content based on section --- + const renderMainContent = () => { + if (section === "explorer") { + if (explorerTabs.length > 0 || explorerFile) return ; + return ( +
+ Select a file to view +
+ ); + } + + if (section === "evals") { + if (evalCreating) return ; + if (evalRunId) return ; + if (evalSetId) return ; + return ; + } + + if (section === "evaluators") { + if (evaluatorCreateType) { + return ; + } + return ; + } + + // Debug section + if (view === "new") { + return ; + } + if (view === "setup" && setupEntrypoint && setupMode) { + return ( + setSidebarOpen(false)} /> -
- {view === "new" ? ( - - ) : view === "setup" && setupEntrypoint && setupMode ? ( - - ) : selectedRun ? ( - - ) : ( -
- Select a run or create a new one + ); + } + if (selectedRun) { + return ; + } + return ( +
+ Select a run or create a new one +
+ ); + }; + + // --- Mobile layout --- + if (isMobile) { + return ( +
+
+ {!sidebarOpen && ( + + )} + {sidebarOpen && ( + <> +
setSidebarOpen(false)} + /> + + + )} +
+ {renderMainContent()} +
+
+ + + +
+ ); + } + + // --- Desktop layout --- + return ( +
+
+ {/* Left aside: shared header + ActivityBar + section sidebar */} + +
+
+
+ {renderMainContent()} +
+ {section === "explorer" && ( + <> +
+
+ +
+ )}
+
); } diff --git a/src/uipath/dev/server/frontend/src/api/agent-client.ts b/src/uipath/dev/server/frontend/src/api/agent-client.ts new file mode 100644 index 0000000..318b247 --- /dev/null +++ b/src/uipath/dev/server/frontend/src/api/agent-client.ts @@ -0,0 +1,20 @@ +import type { AgentModel, AgentSkill } from "../types/agent"; + +const BASE = "/api"; + +export async function listAgentModels(): Promise { + const res = await fetch(`${BASE}/agent/models`); + if (!res.ok) { + if (res.status === 401) return []; + throw new Error(`HTTP ${res.status}`); + } + return res.json(); +} + +export async function listAgentSkills(): Promise { + const res = await fetch(`${BASE}/agent/skills`); + if (!res.ok) { + throw new Error(`HTTP ${res.status}`); + } + return res.json(); +} diff --git a/src/uipath/dev/server/frontend/src/api/eval-client.ts b/src/uipath/dev/server/frontend/src/api/eval-client.ts new file mode 100644 index 0000000..eb6baa3 --- /dev/null +++ b/src/uipath/dev/server/frontend/src/api/eval-client.ts @@ -0,0 +1,126 @@ +import type { EvaluatorInfo, LocalEvaluator, EvalSetSummary, EvalSetDetail, EvalItem, EvalRunSummary, EvalRunDetail } from "../types/eval"; + +const BASE = "/api"; + +async function fetchJson(url: string, options?: RequestInit): Promise { + const res = await fetch(url, options); + if (!res.ok) { + let errorDetail; + try { + const body = await res.json(); + errorDetail = body.detail || res.statusText; + } catch { + errorDetail = res.statusText; + } + const error = new Error(`HTTP ${res.status}`); + (error as any).detail = errorDetail; + (error as any).status = res.status; + throw error; + } + return res.json(); +} + +export async function listEvaluators(): Promise { + return fetchJson(`${BASE}/evaluators`); +} + +export async function listEvalSets(): Promise { + return fetchJson(`${BASE}/eval-sets`); +} + +export async function createEvalSet(body: { + name: string; + evaluator_refs: string[]; +}): Promise { + return fetchJson(`${BASE}/eval-sets`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); +} + +export async function addEvalItem( + evalSetId: string, + item: { + name: string; + inputs: Record; + expected_output: unknown; + evaluation_criterias?: Record>; + }, +): Promise { + return fetchJson(`${BASE}/eval-sets/${encodeURIComponent(evalSetId)}/items`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(item), + }); +} + +export async function deleteEvalItem( + evalSetId: string, + itemName: string, +): Promise { + await fetchJson(`${BASE}/eval-sets/${encodeURIComponent(evalSetId)}/items/${encodeURIComponent(itemName)}`, { + method: "DELETE", + }); +} + +export async function getEvalSet(id: string): Promise { + return fetchJson(`${BASE}/eval-sets/${encodeURIComponent(id)}`); +} + +export async function startEvalRun(evalSetId: string): Promise { + return fetchJson(`${BASE}/eval-sets/${encodeURIComponent(evalSetId)}/runs`, { + method: "POST", + }); +} + +export async function listEvalRuns(): Promise { + return fetchJson(`${BASE}/eval-runs`); +} + +export async function getEvalRun(id: string): Promise { + return fetchJson(`${BASE}/eval-runs/${encodeURIComponent(id)}`); +} + +export async function listLocalEvaluators(): Promise { + return fetchJson(`${BASE}/local-evaluators`); +} + +export async function createLocalEvaluator(body: { + name: string; + description: string; + evaluator_type_id: string; + config: Record; +}): Promise { + return fetchJson(`${BASE}/local-evaluators`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); +} + +export async function updateEvalSetEvaluators( + evalSetId: string, + evaluatorRefs: string[], +): Promise { + return fetchJson(`${BASE}/eval-sets/${encodeURIComponent(evalSetId)}/evaluators`, { + method: "PATCH", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ evaluator_refs: evaluatorRefs }), + }); +} + +export async function updateLocalEvaluator( + id: string, + body: { + description?: string; + evaluator_type_id?: string; + config?: Record; + }, +): Promise { + return fetchJson(`${BASE}/local-evaluators/${encodeURIComponent(id)}`, { + method: "PUT", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); +} diff --git a/src/uipath/dev/server/frontend/src/api/explorer-client.ts b/src/uipath/dev/server/frontend/src/api/explorer-client.ts new file mode 100644 index 0000000..e178222 --- /dev/null +++ b/src/uipath/dev/server/frontend/src/api/explorer-client.ts @@ -0,0 +1,37 @@ +import type { FileEntry, FileContent } from "../types/explorer"; + +const BASE = "/api"; + +async function fetchJson(url: string, options?: RequestInit): Promise { + const res = await fetch(url, options); + if (!res.ok) { + let errorDetail; + try { + const body = await res.json(); + errorDetail = body.detail || res.statusText; + } catch { + errorDetail = res.statusText; + } + const error = new Error(`HTTP ${res.status}`); + (error as any).detail = errorDetail; + (error as any).status = res.status; + throw error; + } + return res.json(); +} + +export async function listDirectory(path: string): Promise { + return fetchJson(`${BASE}/files/tree?path=${encodeURIComponent(path)}`); +} + +export async function readFile(path: string): Promise { + return fetchJson(`${BASE}/files/content?path=${encodeURIComponent(path)}`); +} + +export async function saveFile(path: string, content: string): Promise { + await fetchJson(`${BASE}/files/content?path=${encodeURIComponent(path)}`, { + method: "PUT", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ content }), + }); +} diff --git a/src/uipath/dev/server/frontend/src/api/websocket.ts b/src/uipath/dev/server/frontend/src/api/websocket.ts index 64bc1a7..b0012fe 100644 --- a/src/uipath/dev/server/frontend/src/api/websocket.ts +++ b/src/uipath/dev/server/frontend/src/api/websocket.ts @@ -123,4 +123,25 @@ export class WsClient { setBreakpoints(runId: string, breakpoints: string[]): void { this.send("debug.set_breakpoints", { run_id: runId, breakpoints }); } + + sendAgentMessage(text: string, model: string, sessionId?: string | null, skillIds?: string[]): void { + this.send("agent.message", { + text, + model, + session_id: sessionId ?? undefined, + skill_ids: skillIds && skillIds.length > 0 ? skillIds : undefined, + }); + } + + sendAgentStop(sessionId: string): void { + this.send("agent.stop", { session_id: sessionId }); + } + + sendToolApproval(sessionId: string, toolCallId: string, approved: boolean): void { + this.send("agent.tool_response", { + session_id: sessionId, + tool_call_id: toolCallId, + approved, + }); + } } diff --git a/src/uipath/dev/server/frontend/src/components/agent/AgentChatSidebar.tsx b/src/uipath/dev/server/frontend/src/components/agent/AgentChatSidebar.tsx new file mode 100644 index 0000000..863546b --- /dev/null +++ b/src/uipath/dev/server/frontend/src/components/agent/AgentChatSidebar.tsx @@ -0,0 +1,388 @@ +import { useCallback, useEffect, useRef, useState } from "react"; +import { useAgentStore } from "../../store/useAgentStore"; +import { useAuthStore } from "../../store/useAuthStore"; +import { listAgentModels, listAgentSkills } from "../../api/agent-client"; +import { getWs } from "../../store/useWebSocket"; +import AgentMessageComponent from "./AgentMessage"; +import type { AgentSkill } from "../../types/agent"; + +export default function AgentChatSidebar() { + const ws = useRef(getWs()).current; + const [input, setInput] = useState(""); + const scrollRef = useRef(null); + const stickToBottom = useRef(true); + + const authEnabled = useAuthStore((s) => s.enabled); + const authStatus = useAuthStore((s) => s.status); + const isAuthenticated = !authEnabled || authStatus === "authenticated"; + + const { + sessionId, + status, + messages, + models, + selectedModel, + modelsLoading, + skills, + selectedSkillIds, + skillsLoading, + setModels, + setSelectedModel, + setModelsLoading, + setSkills, + setSelectedSkillIds, + toggleSkill, + setSkillsLoading, + addUserMessage, + clearSession, + } = useAgentStore(); + + // Load models on mount if authenticated + useEffect(() => { + if (!isAuthenticated) return; + if (models.length > 0) return; + setModelsLoading(true); + listAgentModels() + .then((m) => { + setModels(m); + if (m.length > 0 && !selectedModel) { + const claude = m.find((x) => x.model_name.includes("claude")); + setSelectedModel(claude ? claude.model_name : m[0].model_name); + } + }) + .catch(console.error) + .finally(() => setModelsLoading(false)); + }, [isAuthenticated, models.length, selectedModel, setModels, setSelectedModel, setModelsLoading]); + + // Load skills on mount + useEffect(() => { + if (skills.length > 0) return; + setSkillsLoading(true); + listAgentSkills() + .then((s) => { + setSkills(s); + setSelectedSkillIds(s.map((sk) => sk.id)); + }) + .catch(console.error) + .finally(() => setSkillsLoading(false)); + }, [skills.length, setSkills, setSelectedSkillIds, setSkillsLoading]); + + const [showScrollTop, setShowScrollTop] = useState(false); + + const handleScroll = () => { + const el = scrollRef.current; + if (!el) return; + const atBottom = el.scrollHeight - el.scrollTop - el.clientHeight < 40; + stickToBottom.current = atBottom; + setShowScrollTop(el.scrollTop > 100); + }; + + // Auto-scroll on any message content change (streaming tokens) + useEffect(() => { + if (stickToBottom.current && scrollRef.current) { + scrollRef.current.scrollTop = scrollRef.current.scrollHeight; + } + }); + + const isBusy = status === "thinking" || status === "executing" || status === "planning"; + + const handleSend = useCallback(() => { + const text = input.trim(); + if (!text || !selectedModel || isBusy) return; + stickToBottom.current = true; + addUserMessage(text); + ws.sendAgentMessage(text, selectedModel, sessionId, selectedSkillIds); + setInput(""); + }, [input, selectedModel, isBusy, sessionId, selectedSkillIds, addUserMessage, ws]); + + const handleStop = useCallback(() => { + if (sessionId) ws.sendAgentStop(sessionId); + }, [sessionId, ws]); + + const handleKeyDown = (e: React.KeyboardEvent) => { + if (e.key === "Enter" && !e.shiftKey) { + e.preventDefault(); + handleSend(); + } + }; + + const canSend = !isBusy && !!selectedModel && input.trim().length > 0; + + // Auth gate + if (!isAuthenticated) { + return ( +
+
+
+
+ + + +

Sign in to use Agent

+

Authentication is required to access the coding agent.

+
+
+
+ ); + } + + return ( +
+
0} + isBusy={isBusy} + /> + + {/* Messages */} +
+
+ {messages.length === 0 && ( +

+ No messages yet +

+ )} + {messages.map((msg) => ( + + ))} + {isBusy && ( +
+
+
+ + {status === "thinking" ? "Thinking..." : status === "executing" ? "Executing..." : "Planning..."} + +
+
+ )} +
+ {showScrollTop && ( + + )} +
+ + {/* Input — matches ChatInput from debug view */} +
+ setInput(e.target.value)} + onKeyDown={handleKeyDown} + disabled={isBusy || !selectedModel} + placeholder={isBusy ? "Waiting for response..." : "Message..."} + className="flex-1 bg-transparent text-sm py-1 disabled:opacity-40 placeholder:text-[var(--text-muted)]" + style={{ color: "var(--text-primary)" }} + /> + +
+
+ ); +} + +function Header({ + selectedModel, + models, + modelsLoading, + onModelChange, + skills, + selectedSkillIds, + skillsLoading, + onToggleSkill, + onClear, + onStop, + hasMessages, + isBusy, +}: { + selectedModel: string | null; + models: { model_name: string; vendor: string | null }[]; + modelsLoading: boolean; + onModelChange: (model: string) => void; + skills: AgentSkill[]; + selectedSkillIds: string[]; + skillsLoading: boolean; + onToggleSkill: (id: string) => void; + onClear: () => void; + onStop: () => void; + hasMessages: boolean; + isBusy: boolean; +}) { + const [skillsOpen, setSkillsOpen] = useState(false); + const dropdownRef = useRef(null); + + // Close dropdown when clicking outside + useEffect(() => { + if (!skillsOpen) return; + const handler = (e: MouseEvent) => { + if (dropdownRef.current && !dropdownRef.current.contains(e.target as Node)) { + setSkillsOpen(false); + } + }; + document.addEventListener("mousedown", handler); + return () => document.removeEventListener("mousedown", handler); + }, [skillsOpen]); + + return ( +
+ + Agent + + + {/* Skills dropdown */} + {!skillsLoading && skills.length > 0 && ( +
+ + {skillsOpen && ( +
+ {skills.map((s) => ( + + ))} +
+ )} +
+ )} + {isBusy && ( + + )} + {hasMessages && !isBusy && ( + + )} +
+ ); +} diff --git a/src/uipath/dev/server/frontend/src/components/agent/AgentMessage.tsx b/src/uipath/dev/server/frontend/src/components/agent/AgentMessage.tsx new file mode 100644 index 0000000..a389e57 --- /dev/null +++ b/src/uipath/dev/server/frontend/src/components/agent/AgentMessage.tsx @@ -0,0 +1,247 @@ +import { useState } from "react"; +import Markdown from "react-markdown"; +import rehypeHighlight from "rehype-highlight"; +import remarkGfm from "remark-gfm"; +import type { AgentMessage as AgentMessageType, AgentToolCall } from "../../types/agent"; +import { useAgentStore } from "../../store/useAgentStore"; +import { getWs } from "../../store/useWebSocket"; + +interface Props { + message: AgentMessageType; +} + +const ROLE_CONFIG: Record = { + user: { label: "You", color: "var(--info)" }, + assistant: { label: "AI", color: "var(--success)" }, + tool: { label: "Tool", color: "var(--warning)" }, + plan: { label: "Plan", color: "var(--accent)" }, +}; + +function PlanCard({ message }: Props) { + const items = message.planItems ?? []; + return ( +
+
+
+ Plan +
+
+ {items.map((item, i) => ( +
+ {item.status === "completed" ? ( + + ) : item.status === "in_progress" ? ( + + + + ) : ( + + + + )} + + {item.title} + +
+ ))} +
+
+ ); +} + +function SingleToolCall({ tc }: { tc: AgentToolCall }) { + const isPending = tc.status === "pending"; + const isDenied = tc.status === "denied"; + const [expanded, setExpanded] = useState(false); + const hasResult = tc.result !== undefined; + + const handleApproval = (approved: boolean) => { + if (!tc.tool_call_id) return; + const sessionId = useAgentStore.getState().sessionId; + if (!sessionId) return; + useAgentStore.getState().resolveToolApproval(tc.tool_call_id, approved); + getWs().sendToolApproval(sessionId, tc.tool_call_id, approved); + }; + + /* ── Pending: card layout matching ChatInterrupt ── */ + if (isPending) { + return ( +
+
+ + Action Required + + + {tc.tool} + +
+ + {tc.args != null && ( +
+            {JSON.stringify(tc.args, null, 2)}
+          
+ )} + +
+ + +
+
+ ); + } + + /* ── Resolved / completed: compact inline style ── */ + const statusColor = isDenied + ? "var(--error)" + : hasResult + ? tc.is_error + ? "var(--error)" + : "var(--success)" + : "var(--text-muted)"; + + const statusIcon = isDenied ? "\u2717" : hasResult ? (tc.is_error ? "\u2717" : "\u2713") : "\u2022"; + + return ( +
+
+ +
+ {expanded && ( +
+
+
Arguments
+
+              {JSON.stringify(tc.args, null, 2)}
+            
+
+ {hasResult && ( +
+
+ {tc.is_error ? "Error" : "Result"} +
+
+                {tc.result}
+              
+
+ )} +
+ )} +
+ ); +} + +function ToolCard({ message }: Props) { + const calls = message.toolCalls ?? (message.toolCall ? [message.toolCall] : []); + if (calls.length === 0) return null; + + return ( +
+
+
+ + {calls.length === 1 ? "Tool" : `Tools (${calls.length})`} + +
+
+ {calls.map((tc, i) => ( + + ))} +
+
+ ); +} + +export default function AgentMessageComponent({ message }: Props) { + if (message.role === "plan") return ; + if (message.role === "tool") return ; + + const roleKey = message.role === "user" ? "user" : "assistant"; + const role = ROLE_CONFIG[roleKey]; + + return ( +
+
+
+ + {role.label} + +
+ {message.content && ( + message.role === "user" ? ( +
+ {message.content} +
+ ) : ( +
+ {message.content} +
+ ) + )} +
+ ); +} diff --git a/src/uipath/dev/server/frontend/src/components/chat/ChatInput.tsx b/src/uipath/dev/server/frontend/src/components/chat/ChatInput.tsx index 55a3d72..95ec9e5 100644 --- a/src/uipath/dev/server/frontend/src/components/chat/ChatInput.tsx +++ b/src/uipath/dev/server/frontend/src/components/chat/ChatInput.tsx @@ -36,13 +36,14 @@ export default function ChatInput({ onSend, disabled, placeholder }: Props) { onKeyDown={handleKeyDown} disabled={disabled} placeholder={placeholder ?? "Message..."} - className="flex-1 bg-transparent text-sm py-1 focus:outline-none disabled:opacity-40 placeholder:text-[var(--text-muted)]" + className="flex-1 bg-transparent text-sm py-1 disabled:opacity-40 placeholder:text-[var(--text-muted)]" style={{ color: "var(--text-primary)" }} /> +

+ ) : ( +
+ {localEvaluators.map((ev) => ( + + ))} +
+ )} +
+ + {/* Error */} + {error && ( +

{error}

+ )} + + {/* Create button */} + +
+
+ ); +} diff --git a/src/uipath/dev/server/frontend/src/components/evals/EvalRunResults.tsx b/src/uipath/dev/server/frontend/src/components/evals/EvalRunResults.tsx new file mode 100644 index 0000000..d8b9131 --- /dev/null +++ b/src/uipath/dev/server/frontend/src/components/evals/EvalRunResults.tsx @@ -0,0 +1,710 @@ +import { useCallback, useEffect, useRef, useState } from "react"; +import { getEvalRun } from "../../api/eval-client"; +import { useEvalStore } from "../../store/useEvalStore"; +import { useHashRoute } from "../../hooks/useHashRoute"; +import type { EvalRunDetail, EvalItemResult } from "../../types/eval"; +import TraceTree from "../traces/TraceTree"; +import JsonHighlight from "../shared/JsonHighlight"; +import DataSection from "../shared/DataSection"; + +interface Props { + evalRunId: string; + itemName?: string | null; +} + +function formatScore(score: number | null): string { + if (score === null) return "-"; + return `${Math.round(score * 100)}%`; +} + +function scoreColor(score: number | null): string { + if (score === null) return "var(--text-muted)"; + const pct = score * 100; + if (pct >= 80) return "var(--success)"; + if (pct >= 50) return "var(--warning)"; + return "var(--error)"; +} + +function formatDuration(startTime: string | null, endTime: string | null): string { + if (!startTime) return "-"; + const start = new Date(startTime).getTime(); + const end = endTime ? new Date(endTime).getTime() : Date.now(); + const secs = Math.round((end - start) / 1000); + if (secs < 60) return `${secs}s`; + return `${Math.floor(secs / 60)}m ${secs % 60}s`; +} + +function stripEvaluatorSuffix(name: string): string { + return name.replace(/\s*Evaluator$/i, ""); +} + +const statusStyles: Record = { + pending: { color: "var(--text-muted)", bg: "var(--bg-tertiary)", label: "Pending" }, + running: { color: "var(--info)", bg: "rgba(59,130,246,0.1)", label: "Running" }, + completed: { color: "var(--success)", bg: "rgba(34,197,94,0.1)", label: "Completed" }, + failed: { color: "var(--error)", bg: "rgba(239,68,68,0.1)", label: "Failed" }, +}; + +export default function EvalRunResults({ evalRunId, itemName }: Props) { + const [detail, setDetail] = useState(null); + const [loading, setLoading] = useState(true); + const { navigate } = useHashRoute(); + + const selectedItemName = itemName ?? null; + + // Item list height (top panel, resizable like GraphPanel) + const [itemListHeight, setItemListHeight] = useState(220); + const containerRef = useRef(null); + const draggingRow = useRef(false); + + // Sidebar width (right panel, resizable like ChatPanel) + const [sidebarWidth, setSidebarWidth] = useState(() => { + const saved = localStorage.getItem("evalSidebarWidth"); + return saved ? parseInt(saved, 10) : 320; + }); + const [isDragging, setIsDragging] = useState(false); + const outerRef = useRef(null); + + useEffect(() => { + localStorage.setItem("evalSidebarWidth", String(sidebarWidth)); + }, [sidebarWidth]); + + const storeRun = useEvalStore((s) => s.evalRuns[evalRunId]); + const evaluators = useEvalStore((s) => s.evaluators); + + useEffect(() => { + setLoading(true); + getEvalRun(evalRunId) + .then((d) => { + setDetail(d); + // Auto-navigate to first item if none selected + if (!itemName) { + const first = d.results.find((r) => r.status === "completed") ?? d.results[0]; + if (first) navigate(`#/evals/runs/${evalRunId}/${encodeURIComponent(first.name)}`); + } + }) + .catch(console.error) + .finally(() => setLoading(false)); + }, [evalRunId]); + + // Re-fetch when store run status reaches terminal + useEffect(() => { + if (storeRun?.status === "completed" || storeRun?.status === "failed") { + getEvalRun(evalRunId).then(setDetail).catch(console.error); + } + }, [storeRun?.status, evalRunId]); + + // Auto-select first completed item as results come in (when no item is in route) + useEffect(() => { + if (itemName || !detail?.results) return; + const first = detail.results.find((r) => r.status === "completed") ?? detail.results[0]; + if (first) navigate(`#/evals/runs/${evalRunId}/${encodeURIComponent(first.name)}`); + }, [detail?.results]); + + // --- Row resize (item list height) --- + const onRowResizeStart = useCallback((e: React.MouseEvent | React.TouchEvent) => { + e.preventDefault(); + draggingRow.current = true; + + const startY = "touches" in e ? e.touches[0].clientY : e.clientY; + const startH = itemListHeight; + + const onMove = (ev: MouseEvent | TouchEvent) => { + if (!draggingRow.current) return; + const container = containerRef.current; + if (!container) return; + const clientY = "touches" in ev ? ev.touches[0].clientY : ev.clientY; + const maxH = container.clientHeight - 100; + const newH = Math.max(80, Math.min(maxH, startH + (clientY - startY))); + setItemListHeight(newH); + }; + + const onUp = () => { + draggingRow.current = false; + document.removeEventListener("mousemove", onMove); + document.removeEventListener("mouseup", onUp); + document.removeEventListener("touchmove", onMove); + document.removeEventListener("touchend", onUp); + document.body.style.cursor = ""; + document.body.style.userSelect = ""; + }; + + document.body.style.cursor = "row-resize"; + document.body.style.userSelect = "none"; + document.addEventListener("mousemove", onMove); + document.addEventListener("mouseup", onUp); + document.addEventListener("touchmove", onMove, { passive: false }); + document.addEventListener("touchend", onUp); + }, [itemListHeight]); + + // --- Sidebar col resize --- + const onSidebarResizeStart = useCallback((e: React.MouseEvent | React.TouchEvent) => { + e.preventDefault(); + setIsDragging(true); + + const startX = "touches" in e ? e.touches[0].clientX : e.clientX; + const startW = sidebarWidth; + + const onMove = (ev: MouseEvent | TouchEvent) => { + const container = outerRef.current; + if (!container) return; + const clientX = "touches" in ev ? ev.touches[0].clientX : ev.clientX; + const maxW = container.clientWidth - 300; + const newW = Math.max(280, Math.min(maxW, startW + (startX - clientX))); + setSidebarWidth(newW); + }; + + const onUp = () => { + setIsDragging(false); + document.removeEventListener("mousemove", onMove); + document.removeEventListener("mouseup", onUp); + document.removeEventListener("touchmove", onMove); + document.removeEventListener("touchend", onUp); + document.body.style.cursor = ""; + document.body.style.userSelect = ""; + }; + + document.body.style.cursor = "col-resize"; + document.body.style.userSelect = "none"; + document.addEventListener("mousemove", onMove); + document.addEventListener("mouseup", onUp); + document.addEventListener("touchmove", onMove, { passive: false }); + document.addEventListener("touchend", onUp); + }, [sidebarWidth]); + + if (loading) { + return ( +
+ Loading... +
+ ); + } + + if (!detail) { + return ( +
+ Eval run not found +
+ ); + } + + const run = storeRun ?? detail; + const status = statusStyles[run.status] ?? statusStyles.pending; + const isRunning = run.status === "running"; + const evaluatorIds = Object.keys(run.evaluator_scores ?? {}); + const selectedItem = detail.results.find((r) => r.name === selectedItemName) ?? null; + const selectedTraces = (selectedItem?.traces ?? []).map((t) => ({ ...t, run_id: "" })); + + return ( +
+ {/* Main content: item list (top) + traces (bottom) */} +
+ {/* Header bar */} +
+

+ {run.eval_set_name} +

+ + {status.label} + + + {formatScore(run.overall_score)} + + + {formatDuration(run.start_time, run.end_time)} + + {isRunning && ( +
+
+
0 ? (run.progress_completed / run.progress_total) * 100 : 0}%`, + background: "var(--info)", + }} + /> +
+ + {run.progress_completed}/{run.progress_total} + +
+ )} + {/* Per-evaluator scores inline */} + {evaluatorIds.length > 0 && ( +
+ {evaluatorIds.map((id) => { + const ev = evaluators.find((e) => e.id === id); + const score = run.evaluator_scores[id]; + return ( +
+ + {stripEvaluatorSuffix(ev?.name ?? id)} + +
+
+
+ + {formatScore(score)} + +
+ ); + })} +
+ )} +
+ + {/* Item list (resizable height, like GraphPanel) */} +
+ {/* Table header */} +
+ + Name + Score + {evaluatorIds.map((id) => { + const ev = evaluators.find((e) => e.id === id); + return ( + {stripEvaluatorSuffix(ev?.name ?? id)} + ); + })} + Time +
+ {/* Scrollable item rows */} +
+ {detail.results.map((item: EvalItemResult) => { + const isPending = item.status === "pending"; + const isFailed = item.status === "failed"; + const isSelected = item.name === selectedItemName; + return ( + + ); + })} + {detail.results.length === 0 && ( +
+ {isRunning ? "Waiting for results..." : "No results"} +
+ )} +
+
+ + {/* Row drag handle */} +
+ + {/* Trace tree (bottom, flex-1, like TraceTree in debug) */} +
+ {selectedItem && selectedTraces.length > 0 ? ( + + ) : ( +
+ {selectedItem?.status === "pending" ? "Pending..." : "No traces available"} +
+ )} +
+
+ + {/* Sidebar drag handle */} +
+ + {/* Right sidebar */} + +
+ ); +} + +type DetailTab = "score" | "io" | "logs"; + +const detailTabs: { id: DetailTab; label: string }[] = [ + { id: "score", label: "Score" }, + { id: "io", label: "I/O" }, + { id: "logs", label: "Logs" }, +]; + +function DetailsSidebar({ + width, + item, + evaluators, + isRunning, + isDragging, +}: { + width: number; + item: EvalItemResult | null; + evaluators: { id: string; name: string }[]; + isRunning: boolean; + isDragging: boolean; +}) { + const [tab, setTab] = useState("score"); + + const showSidebar = !!item; + + return ( +
+ {/* Tab bar */} +
+ {detailTabs.map((t) => ( + + ))} + {isRunning && ( + + Running... + + )} +
+ + {/* Content */} +
+ {!item ? null : item.status === "pending" ? ( +
+ Pending... +
+ ) : ( + <> + {item.status === "failed" && ( +
+
+ + Evaluator error +
+ {item.error && ( +
+ {item.error} +
+ )} +
+ )} + {tab === "score" ? ( + + ) : tab === "io" ? ( + + ) : ( +
+ Logs coming soon +
+ )} + + )} +
+
+ ); +} + +function ScoreTab({ item, evaluators }: { item: EvalItemResult; evaluators: { id: string; name: string }[] }) { + const evalIds = Object.keys(item.scores); + + return ( +
+ {/* Overall */} +
+
+ + Overall + +
+
+
+
+ + {formatScore(item.overall_score)} + +
+
+
+ + {/* Failed: no evaluator scores */} + {item.status === "failed" && evalIds.length === 0 && ( +
+ All evaluators failed — no scores available +
+ )} + + {/* Per-evaluator scores */} + {evalIds.map((evId) => { + const ev = evaluators.find((e) => e.id === evId); + const score = item.scores[evId]; + const justification = item.justifications[evId]; + return ( +
+
+ + {ev?.name ?? evId} + +
+
+
+
+ + {formatScore(score)} + +
+
+ {justification && ( + + )} +
+ ); + })} +
+ ); +} + +function IOTab({ item }: { item: EvalItemResult }) { + const inputJson = JSON.stringify(item.inputs, null, 2); + const outputJson = typeof item.output === "string" ? item.output : JSON.stringify(item.output, null, 2); + const expectedJson = item.expected_output != null + ? (typeof item.expected_output === "string" ? item.expected_output : JSON.stringify(item.expected_output, null, 2)) + : null; + + return ( +
+ + + + + {expectedJson && ( + + + + )} + + + {(item.duration_ms / 1000).toFixed(2)}s + + ) : undefined} + > + + +
+ ); +} + +/** Try to parse `expected="..." actual="..."` from justification text into structured blocks. */ +function parseExpectedActual(text: string): { expected: string; actual: string; meta: Record } | null { + const m = text.match(/expected="(.+?)"\s+actual="(.+?)"(.*)/s); + if (!m) return null; + // Parse trailing key=value pairs like matched_leaves=1.0 total_leaves=1.0 + const meta: Record = {}; + const rest = m[3]?.trim() ?? ""; + if (rest) { + for (const pair of rest.match(/(\w+)=([\S]+)/g) ?? []) { + const eq = pair.indexOf("="); + meta[pair.slice(0, eq)] = pair.slice(eq + 1); + } + } + return { expected: m[1], actual: m[2], meta }; +} + +function tryFormatValue(raw: string): string { + // Python dict repr → JSON: single quotes → double quotes, True/False/None → JSON equivalents + try { + const jsonLike = raw + .replace(/'/g, '"') + .replace(/\bTrue\b/g, "true") + .replace(/\bFalse\b/g, "false") + .replace(/\bNone\b/g, "null"); + const parsed = JSON.parse(jsonLike); + return JSON.stringify(parsed, null, 2); + } catch { + return raw; + } +} + +function JustificationBlock({ text }: { text: string }) { + const parsed = parseExpectedActual(text); + + if (!parsed) { + // Fallback: plain text + return ( +
+
+ {text} +
+
+ ); + } + + const expected = tryFormatValue(parsed.expected); + const actual = tryFormatValue(parsed.actual); + const match = expected === actual; + + return ( +
+
+ {/* Expected */} +
+
+ Expected +
+
+            {expected}
+          
+
+ {/* Actual */} +
+
+ + Actual + + +
+
+            {actual}
+          
+
+
+ {Object.keys(parsed.meta).length > 0 && ( +
+ {Object.entries(parsed.meta).map(([k, v]) => ( + + {k.replace(/_/g, " ")}{" "} + {v} + + ))} +
+ )} +
+ ); +} diff --git a/src/uipath/dev/server/frontend/src/components/evals/EvalSetDetail.tsx b/src/uipath/dev/server/frontend/src/components/evals/EvalSetDetail.tsx new file mode 100644 index 0000000..2d322be --- /dev/null +++ b/src/uipath/dev/server/frontend/src/components/evals/EvalSetDetail.tsx @@ -0,0 +1,518 @@ +import { useCallback, useEffect, useRef, useState } from "react"; +import { getEvalSet, startEvalRun, updateEvalSetEvaluators, deleteEvalItem } from "../../api/eval-client"; +import { useEvalStore } from "../../store/useEvalStore"; +import { useHashRoute } from "../../hooks/useHashRoute"; +import type { EvalSetDetail as EvalSetDetailType, EvalItem } from "../../types/eval"; +import JsonHighlight from "../shared/JsonHighlight"; +import DataSection from "../shared/DataSection"; + +interface Props { + evalSetId: string; +} + +function truncateJson(val: unknown, max = 60): string { + const s = typeof val === "string" ? val : JSON.stringify(val); + if (!s || s === "null") return "-"; + return s.length > max ? s.slice(0, max) + "..." : s; +} + +type SidebarTab = "io" | "evaluators"; + +export default function EvalSetDetail({ evalSetId }: Props) { + const [detail, setDetail] = useState(null); + const [loading, setLoading] = useState(true); + const [selectedItemName, setSelectedItemName] = useState(null); + const [runLoading, setRunLoading] = useState(false); + const [sidebarTab, setSidebarTab] = useState("io"); + const evaluators = useEvalStore((s) => s.evaluators); + const localEvaluators = useEvalStore((s) => s.localEvaluators); + const storeUpdateEvaluators = useEvalStore((s) => s.updateEvalSetEvaluators); + const incrementEvalSetCount = useEvalStore((s) => s.incrementEvalSetCount); + const upsertEvalRun = useEvalStore((s) => s.upsertEvalRun); + const { navigate } = useHashRoute(); + + // Evaluator edit popover state + const [editOpen, setEditOpen] = useState(false); + const [editRefs, setEditRefs] = useState>(new Set()); + const [editSaving, setEditSaving] = useState(false); + const popoverRef = useRef(null); + + // Sidebar width (resizable) + const [sidebarWidth, setSidebarWidth] = useState(() => { + const saved = localStorage.getItem("evalSetSidebarWidth"); + return saved ? parseInt(saved, 10) : 320; + }); + const [isDragging, setIsDragging] = useState(false); + const outerRef = useRef(null); + + useEffect(() => { + localStorage.setItem("evalSetSidebarWidth", String(sidebarWidth)); + }, [sidebarWidth]); + + useEffect(() => { + setLoading(true); + setSelectedItemName(null); + getEvalSet(evalSetId) + .then((d) => { + setDetail(d); + if (d.items.length > 0) setSelectedItemName(d.items[0].name); + }) + .catch(console.error) + .finally(() => setLoading(false)); + }, [evalSetId]); + + const handleRun = async () => { + setRunLoading(true); + try { + const run = await startEvalRun(evalSetId); + upsertEvalRun(run); + navigate(`#/evals/runs/${run.id}`); + } catch (err) { + console.error(err); + } finally { + setRunLoading(false); + } + }; + + const handleDeleteItem = async (itemName: string) => { + if (!detail) return; + try { + await deleteEvalItem(evalSetId, itemName); + setDetail((prev) => { + if (!prev) return prev; + const items = prev.items.filter((i) => i.name !== itemName); + return { ...prev, items, eval_count: items.length }; + }); + incrementEvalSetCount(evalSetId, -1); + if (selectedItemName === itemName) setSelectedItemName(null); + } catch (err) { + console.error(err); + } + }; + + // --- Evaluator edit popover --- + const openEditPopover = useCallback(() => { + if (detail) { + setEditRefs(new Set(detail.evaluator_ids)); + } + setEditOpen(true); + }, [detail]); + + const toggleEditRef = (id: string) => { + setEditRefs((prev) => { + const next = new Set(prev); + if (next.has(id)) next.delete(id); + else next.add(id); + return next; + }); + }; + + const handleSaveEvaluators = async () => { + if (!detail) return; + setEditSaving(true); + try { + const updated = await updateEvalSetEvaluators(evalSetId, Array.from(editRefs)); + setDetail(updated); + storeUpdateEvaluators(evalSetId, updated.evaluator_ids); + setEditOpen(false); + } catch (err) { + console.error(err); + } finally { + setEditSaving(false); + } + }; + + // Click-outside to close popover + useEffect(() => { + if (!editOpen) return; + const handler = (e: MouseEvent) => { + if (popoverRef.current && !popoverRef.current.contains(e.target as Node)) { + setEditOpen(false); + } + }; + document.addEventListener("mousedown", handler); + return () => document.removeEventListener("mousedown", handler); + }, [editOpen]); + + // --- Sidebar col resize --- + const onSidebarResizeStart = useCallback((e: React.MouseEvent | React.TouchEvent) => { + e.preventDefault(); + setIsDragging(true); + + const startX = "touches" in e ? e.touches[0].clientX : e.clientX; + const startW = sidebarWidth; + + const onMove = (ev: MouseEvent | TouchEvent) => { + const container = outerRef.current; + if (!container) return; + const clientX = "touches" in ev ? ev.touches[0].clientX : ev.clientX; + const maxW = container.clientWidth - 300; + const newW = Math.max(280, Math.min(maxW, startW + (startX - clientX))); + setSidebarWidth(newW); + }; + + const onUp = () => { + setIsDragging(false); + document.removeEventListener("mousemove", onMove); + document.removeEventListener("mouseup", onUp); + document.removeEventListener("touchmove", onMove); + document.removeEventListener("touchend", onUp); + document.body.style.cursor = ""; + document.body.style.userSelect = ""; + }; + + document.body.style.cursor = "col-resize"; + document.body.style.userSelect = "none"; + document.addEventListener("mousemove", onMove); + document.addEventListener("mouseup", onUp); + document.addEventListener("touchmove", onMove, { passive: false }); + document.addEventListener("touchend", onUp); + }, [sidebarWidth]); + + if (loading) { + return ( +
+ Loading... +
+ ); + } + + if (!detail) { + return ( +
+ Eval set not found +
+ ); + } + + const selectedItem = detail.items.find((i) => i.name === selectedItemName) ?? null; + + return ( +
+ {/* Main content: header + item grid */} +
+ {/* Header bar */} +
+

+ {detail.name} +

+ + {detail.eval_count} items + +
+ + {detail.evaluator_ids.map((id) => { + const ev = evaluators.find((e) => e.id === id); + return ( + + {ev?.name ?? id} + + ); + })} + {editOpen && ( +
+
+ Evaluators +
+
+ {localEvaluators.length === 0 ? ( +
+ No evaluators available +
+ ) : ( + localEvaluators.map((ev) => ( + + )) + )} +
+
+ +
+
+ )} +
+ +
+ + {/* Table header */} +
+ Name + Input + Expected Behavior + Expected Output + Simulation Instr. + +
+ + {/* Scrollable item rows */} +
+ {detail.items.map((item: EvalItem) => { + const isSelected = item.name === selectedItemName; + return ( + + ); + })} + {detail.items.length === 0 && ( +
+ No items in this eval set +
+ )} +
+
+ + {/* Sidebar drag handle */} +
+ + {/* Right sidebar */} +
+ {/* Tab bar */} +
+ {(["io", "evaluators"] as const).map((tab) => { + const active = sidebarTab === tab; + const label = tab === "io" ? "I/O" : "Evaluators"; + return ( + + ); + })} +
+ + {/* Content */} +
+ {selectedItem ? ( + sidebarTab === "io" ? ( + + ) : ( + + ) + ) : null} +
+
+
+ ); +} + +function ItemIOView({ item }: { item: EvalItem }) { + const inputJson = JSON.stringify(item.inputs, null, 2); + const expectedOutputJson = item.expected_output != null + ? (typeof item.expected_output === "string" ? item.expected_output : JSON.stringify(item.expected_output, null, 2)) + : null; + + return ( +
+ + + + + {item.expected_behavior && ( + +
+ {item.expected_behavior} +
+
+ )} + + {expectedOutputJson && ( + + + + )} + + {item.simulation_instructions && ( + +
+ {item.simulation_instructions} +
+
+ )} +
+ ); +} + +function ItemEvaluatorsView({ item, evaluators }: { item: EvalItem; evaluators: { id: string; name: string }[] }) { + return ( +
+ {item.evaluator_ids.length > 0 ? ( + <> + {item.evaluator_ids.map((evId) => { + const ev = evaluators.find((e) => e.id === evId); + const criteria = item.evaluation_criterias?.[evId]; + return ( +
+
+ + {ev?.name ?? evId} + + + {criteria ? "Custom criteria" : "Default criteria"} + +
+ {criteria && ( +
+                    {JSON.stringify(criteria, null, 2)}
+                  
+ )} +
+ ); + })} + + ) : ( +
+ No evaluators configured for this item +
+ )} +
+ ); +} diff --git a/src/uipath/dev/server/frontend/src/components/evals/EvalsSidebar.tsx b/src/uipath/dev/server/frontend/src/components/evals/EvalsSidebar.tsx new file mode 100644 index 0000000..48844d4 --- /dev/null +++ b/src/uipath/dev/server/frontend/src/components/evals/EvalsSidebar.tsx @@ -0,0 +1,141 @@ +import { useEvalStore } from "../../store/useEvalStore"; +import { useHashRoute } from "../../hooks/useHashRoute"; + +function formatScore(score: number | null): string { + if (score === null) return "-"; + return `${Math.round(score * 100)}%`; +} + +function scoreColor(score: number | null): string { + if (score === null) return "var(--text-muted)"; + const pct = score * 100; + if (pct >= 80) return "var(--success)"; + if (pct >= 50) return "var(--warning)"; + return "var(--error)"; +} + +const statusIcons: Record = { + pending: { color: "var(--text-muted)", label: "Pending" }, + running: { color: "var(--info)", label: "Running" }, + completed: { color: "var(--success)", label: "Completed" }, + failed: { color: "var(--error)", label: "Failed" }, +}; + +export default function EvalsSidebar() { + const evalSets = useEvalStore((s) => s.evalSets); + const evalRuns = useEvalStore((s) => s.evalRuns); + const { evalSetId, evalRunId, navigate } = useHashRoute(); + + const sets = Object.values(evalSets); + const runs = Object.values(evalRuns).sort( + (a, b) => new Date(b.start_time ?? 0).getTime() - new Date(a.start_time ?? 0).getTime(), + ); + + return ( +
+ {/* New Eval Set */} + + + {/* Eval Sets */} +
+ Eval Sets +
+ {sets.map((es) => { + const active = evalSetId === es.id; + return ( + + ); + })} + {sets.length === 0 && ( +

+ No eval sets yet +

+ )} + + {/* Run History */} +
+ History +
+ {runs.map((run) => { + const active = evalRunId === run.id; + const si = statusIcons[run.status] ?? statusIcons.pending; + return ( + + ); + })} + {runs.length === 0 && ( +

+ No eval runs yet +

+ )} +
+ ); +} diff --git a/src/uipath/dev/server/frontend/src/components/evaluators/CreateEvaluatorView.tsx b/src/uipath/dev/server/frontend/src/components/evaluators/CreateEvaluatorView.tsx new file mode 100644 index 0000000..9a4c76c --- /dev/null +++ b/src/uipath/dev/server/frontend/src/components/evaluators/CreateEvaluatorView.tsx @@ -0,0 +1,272 @@ +import { useEffect, useState } from "react"; +import { useEvalStore } from "../../store/useEvalStore"; +import { useHashRoute } from "../../hooks/useHashRoute"; +import { createLocalEvaluator } from "../../api/eval-client"; +import { typesByCategory, typeDefaults, getTypeFields, categoryLabel } from "./EvaluatorDetail"; + +const allCategories = ["deterministic", "llm", "tool"] as const; + +interface Props { + category: string; +} + +export default function CreateEvaluatorView({ category: initialCategory }: Props) { + const addLocalEvaluator = useEvalStore((s) => s.addLocalEvaluator); + const { navigate } = useHashRoute(); + + const isFixed = initialCategory !== "any"; + const [category, setCategory] = useState(isFixed ? initialCategory : "deterministic"); + const types = typesByCategory[category] ?? []; + const [name, setName] = useState(""); + const [description, setDescription] = useState(""); + const [typeId, setTypeId] = useState(types[0]?.id ?? ""); + const [targetOutputKey, setTargetOutputKey] = useState("*"); + const [prompt, setPrompt] = useState(""); + const [saving, setSaving] = useState(false); + const [error, setError] = useState(null); + const [descriptionTouched, setDescriptionTouched] = useState(false); + const [promptTouched, setPromptTouched] = useState(false); + + // Reset form when initial category prop changes + useEffect(() => { + const cat = isFixed ? initialCategory : "deterministic"; + setCategory(cat); + const t = typesByCategory[cat] ?? []; + const firstId = t[0]?.id ?? ""; + const defaults = typeDefaults[firstId]; + setName(""); + setDescription(defaults?.description ?? ""); + setTypeId(firstId); + setTargetOutputKey("*"); + setPrompt(defaults?.prompt ?? ""); + setError(null); + setDescriptionTouched(false); + setPromptTouched(false); + }, [initialCategory, isFixed]); + + const handleCategoryChange = (newCat: string) => { + setCategory(newCat); + const t = typesByCategory[newCat] ?? []; + const firstId = t[0]?.id ?? ""; + const defaults = typeDefaults[firstId]; + setTypeId(firstId); + if (!descriptionTouched) setDescription(defaults?.description ?? ""); + if (!promptTouched) setPrompt(defaults?.prompt ?? ""); + }; + + const handleTypeChange = (newTypeId: string) => { + setTypeId(newTypeId); + const defaults = typeDefaults[newTypeId]; + if (defaults) { + if (!descriptionTouched) setDescription(defaults.description); + if (!promptTouched) setPrompt(defaults.prompt); + } + }; + + const fields = getTypeFields(typeId); + + const handleSubmit = async () => { + if (!name.trim()) { + setError("Name is required"); + return; + } + setSaving(true); + setError(null); + try { + const config: Record = {}; + if (fields.targetOutputKey) config.targetOutputKey = targetOutputKey; + if (fields.prompt && prompt.trim()) config.prompt = prompt; + + const result = await createLocalEvaluator({ + name: name.trim(), + description: description.trim(), + evaluator_type_id: typeId, + config, + }); + addLocalEvaluator(result); + navigate("#/evaluators"); + } catch (err: unknown) { + const detail = (err as { detail?: string })?.detail; + setError(detail ?? "Failed to create evaluator"); + } finally { + setSaving(false); + } + }; + + const inputStyle = { + background: "var(--bg-secondary)", + border: "1px solid var(--border)", + color: "var(--text-primary)", + }; + + return ( +
+
+
+ {/* Header */} +
+
+
+ + New Evaluator + +
+

+ Create an evaluator to score agent outputs +

+
+ + {/* Name */} +
+ + setName(e.target.value)} + placeholder="e.g. MyEvaluator" + className="w-full rounded-md px-3 py-2 text-xs" + style={inputStyle} + onKeyDown={(e) => { + if (e.key === "Enter" && name.trim()) handleSubmit(); + }} + /> +
+ + {/* Category */} +
+ + {isFixed ? ( +
+ {categoryLabel[category] ?? category} +
+ ) : ( + + )} +
+ + {/* Type */} +
+ + +
+ + {/* Description */} +
+ +