Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
d733a5e
feat: extract uipath-eval as standalone package
rakesh-uipath Apr 21, 2026
8c47132
refactor(uipath-eval): finalize package setup — tests, CI, mypy, pypr…
rakesh-uipath Apr 21, 2026
509096f
fix(uipath-eval): resolve critical issues from code review
rakesh-uipath Apr 22, 2026
94fae8f
fix(uipath-eval): remove duplicate track_evaluation_metrics and dead …
rakesh-uipath Apr 22, 2026
29f100c
fix: guard parent_dir in finally block against UnboundLocalError
rakesh-uipath Apr 22, 2026
a5333d4
fix(uipath-eval): deduplicate track_evaluation_metrics and fix _helpe…
rakesh-uipath Apr 22, 2026
6eceb4a
fix: catch worker exceptions in execute_parallel, surface aggregated …
rakesh-uipath Apr 22, 2026
cb2b42c
ci: remove continue-on-error from test-uipath job
rakesh-uipath Apr 22, 2026
06957fa
fix(uipath-eval): add type annotation to extract_selected_evals eval_…
rakesh-uipath Apr 22, 2026
8d26bf2
fix(uipath): regenerate uv.lock to include uipath-eval editable dep
rakesh-uipath Apr 22, 2026
ab6f3f5
chore: resolve merge conflict with origin/main
rakesh-uipath Apr 22, 2026
39725da
chore: remove unused COMMUNITY_agents_SUFFIX from evaluators_helpers
rakesh-uipath Apr 22, 2026
7c93200
refactor(uipath): convert eval package to re-export from uipath-eval
rakesh-uipath Apr 22, 2026
9c6784c
fix: clean up split-brain BaseEvaluator and dead legacy_evaluator_uti…
rakesh-uipath Apr 23, 2026
1182ec6
fix(uipath): use extended local LegacyExactMatchEvaluator and LegacyJ…
rakesh-uipath Apr 23, 2026
bfc6fa0
fix(eval): resolve critical hierarchy split and dead import from review
rakesh-uipath Apr 23, 2026
b8fbd1b
fix(uipath): make OutputEvaluationCriteria a re-export from uipath-eval
rakesh-uipath Apr 23, 2026
adf1fca
chore: merge main into feat/uipath-eval-package and resolve version c…
rakesh-uipath Apr 23, 2026
62062cc
fix(uipath): restore line_by_line support in ExactMatchEvaluator
rakesh-uipath Apr 23, 2026
586e8cc
chore(uipath): bump version to 2.10.55 for PyPI uniqueness check
rakesh-uipath Apr 23, 2026
d0c7704
fix(eval): resolve mypy errors and lint in uipath evaluators
rakesh-uipath Apr 23, 2026
457cd88
style: fix ruff formatting violations in eval packages
rakesh-uipath Apr 23, 2026
c62e4e2
ci: add uipath-eval wheel build to llamaindex test workflow
rakesh-uipath Apr 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/scripts/detect_changed_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# handled separately via labeler.yml auto-labels.
DEPENDENTS: dict[str, list[str]] = {
"uipath-core": ["uipath-platform", "uipath"],
"uipath-eval": ["uipath"],
"uipath-platform": ["uipath"],
}

Expand Down
59 changes: 57 additions & 2 deletions .github/workflows/lint-packages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -198,17 +198,72 @@ jobs:
working-directory: packages/uipath
run: uv run python scripts/lint_httpx_client.py

lint-uipath-eval:
name: Lint uipath-eval
needs: detect-changed-packages
runs-on: ubuntu-latest
steps:
- name: Check if package changed
id: check
run: |
if echo '${{ needs.detect-changed-packages.outputs.packages }}' | jq -e 'index("uipath-eval")' > /dev/null; then
echo "skip=false" >> $GITHUB_OUTPUT
else
echo "skip=true" >> $GITHUB_OUTPUT
fi

- name: Skip
if: steps.check.outputs.skip == 'true'
run: echo "Skipping - no changes to uipath-eval"

- name: Checkout
if: steps.check.outputs.skip != 'true'
uses: actions/checkout@v4

- name: Setup uv
if: steps.check.outputs.skip != 'true'
uses: astral-sh/setup-uv@v5
with:
enable-cache: true

- name: Setup Python
if: steps.check.outputs.skip != 'true'
uses: actions/setup-python@v5
with:
python-version-file: "packages/uipath-eval/.python-version"

- name: Install dependencies
if: steps.check.outputs.skip != 'true'
working-directory: packages/uipath-eval
run: uv sync --locked --no-sources --all-extras

- name: Check static types
if: steps.check.outputs.skip != 'true'
working-directory: packages/uipath-eval
run: uv run mypy --config-file pyproject.toml .

- name: Check linting
if: steps.check.outputs.skip != 'true'
working-directory: packages/uipath-eval
run: uv run ruff check .

- name: Check formatting
if: steps.check.outputs.skip != 'true'
working-directory: packages/uipath-eval
run: uv run ruff format --check .

lint-gate:
name: Lint
needs: [lint-uipath-core, lint-uipath-platform, lint-uipath]
needs: [lint-uipath-core, lint-uipath-platform, lint-uipath, lint-uipath-eval]
runs-on: ubuntu-latest
if: always()
steps:
- name: Check lint results
run: |
if [[ "${{ needs.lint-uipath-core.result }}" == "failure" || \
"${{ needs.lint-uipath-platform.result }}" == "failure" || \
"${{ needs.lint-uipath.result }}" == "failure" ]]; then
"${{ needs.lint-uipath.result }}" == "failure" || \
"${{ needs.lint-uipath-eval.result }}" == "failure" ]]; then
echo "Lint failed"
exit 1
fi
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -224,4 +224,4 @@ jobs:
- name: Publish
run: uv publish --index testpypi
env:
UV_PUBLISH_TOKEN: ${{ matrix.package == 'uipath' && secrets.TESTPYPI_TOKEN || matrix.package == 'uipath-platform' && secrets.TESTPYPI_TOKEN_PLATFORM || secrets.TESTPYPI_TOKEN_CORE }}
UV_PUBLISH_TOKEN: ${{ matrix.package == 'uipath' && secrets.TESTPYPI_TOKEN || matrix.package == 'uipath-platform' && secrets.TESTPYPI_TOKEN_PLATFORM || matrix.package == 'uipath-eval' && secrets.TESTPYPI_TOKEN_EVAL || secrets.TESTPYPI_TOKEN_CORE }}
54 changes: 51 additions & 3 deletions .github/workflows/test-packages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -229,19 +229,67 @@ jobs:
working-directory: packages/uipath
run: uv run pytest

continue-on-error: true
test-uipath-eval:
name: Test (uipath-eval, ${{ matrix.python-version }}, ${{ matrix.os }})
needs: detect-changed-packages
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
python-version: ["3.11", "3.12", "3.13"]
os: [ubuntu-latest, windows-latest]
steps:
- name: Check if package changed
id: check
shell: bash
run: |
if echo '${{ needs.detect-changed-packages.outputs.packages }}' | jq -e 'index("uipath-eval")' > /dev/null; then
echo "skip=false" >> $GITHUB_OUTPUT
else
echo "skip=true" >> $GITHUB_OUTPUT
fi

- name: Skip
if: steps.check.outputs.skip == 'true'
shell: bash
run: echo "Skipping - no changes to uipath-eval"

- name: Checkout
if: steps.check.outputs.skip != 'true'
uses: actions/checkout@v4

- name: Setup uv
if: steps.check.outputs.skip != 'true'
uses: astral-sh/setup-uv@v5

- name: Setup Python
if: steps.check.outputs.skip != 'true'
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
if: steps.check.outputs.skip != 'true'
working-directory: packages/uipath-eval
run: uv sync --all-extras --python ${{ matrix.python-version }}

- name: Run tests
if: steps.check.outputs.skip != 'true'
working-directory: packages/uipath-eval
run: uv run pytest

test-gate:
name: Test
needs: [test-uipath-core, test-uipath-platform, test-uipath, e2e-uipath-platform]
needs: [test-uipath-core, test-uipath-platform, test-uipath, test-uipath-eval, e2e-uipath-platform]
runs-on: ubuntu-latest
if: always()
steps:
- name: Check test results
run: |
if [[ "${{ needs.test-uipath-core.result }}" == "failure" || \
"${{ needs.test-uipath-platform.result }}" == "failure" || \
"${{ needs.test-uipath.result }}" == "failure" ]]; then
"${{ needs.test-uipath.result }}" == "failure" || \
"${{ needs.test-uipath-eval.result }}" == "failure" ]]; then
echo "Tests failed"
exit 1
fi
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/test-uipath-langchain.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ jobs:
working-directory: packages/uipath-platform
run: uv build

- name: Build uipath-eval package
working-directory: packages/uipath-eval
run: uv build

- name: Build uipath package
working-directory: packages/uipath
run: uv build
Expand Down Expand Up @@ -78,6 +82,7 @@ jobs:
run: |
uv add ../wheels/uipath-core/dist/*.whl --dev
uv add ../wheels/uipath-platform/dist/*.whl --dev
uv add ../wheels/uipath-eval/dist/*.whl --dev
uv add ../wheels/uipath/dist/*.whl --dev

- name: Run uipath-langchain tests
Expand Down Expand Up @@ -152,6 +157,7 @@ jobs:
run: |
uv add ../wheels/uipath-core/dist/*.whl
uv add ../wheels/uipath-platform/dist/*.whl
uv add ../wheels/uipath-eval/dist/*.whl
uv add ../wheels/uipath/dist/*.whl

- name: Install dependencies
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/test-uipath-llamaindex.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ jobs:
working-directory: packages/uipath-platform
run: uv build

- name: Build uipath-eval package
working-directory: packages/uipath-eval
run: uv build

- name: Build uipath package
working-directory: packages/uipath
run: uv build
Expand Down Expand Up @@ -78,6 +82,7 @@ jobs:
run: |
uv add ../../../wheels/uipath-core/dist/*.whl --dev
uv add ../../../wheels/uipath-platform/dist/*.whl --dev
uv add ../../../wheels/uipath-eval/dist/*.whl --dev
uv add ../../../wheels/uipath/dist/*.whl --dev

- name: Run uipath-llamaindex tests
Expand Down Expand Up @@ -151,6 +156,7 @@ jobs:
run: |
uv add ../../../wheels/uipath-core/dist/*.whl
uv add ../../../wheels/uipath-platform/dist/*.whl
uv add ../../../wheels/uipath-eval/dist/*.whl
uv add ../../../wheels/uipath/dist/*.whl

- name: Install dependencies
Expand Down
1 change: 1 addition & 0 deletions packages/uipath-eval/.python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.11
38 changes: 38 additions & 0 deletions packages/uipath-eval/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# uipath-eval

Standalone evaluator logic extracted from the `uipath` SDK.

Use this package in `python-eval-workers` and other services that need
evaluator logic without the full UiPath SDK overhead.

## Install

```bash
pip install uipath-eval
```

For LLM-based evaluators (llm-as-judge, trajectory):

```bash
pip install "uipath-eval[llm]"
```

## Usage

```python
from uipath_eval import ExactMatchEvaluator
from uipath.eval import LLMJudgeOutputEvaluator # LLM evaluators stay in uipath.eval
from uipath_eval.models import EvaluationResult
```

## What's here

- `uipath_eval.evaluators` — all evaluator implementations
- `uipath_eval.models` — evaluation data models
- `uipath_eval.runtime` — pure asyncio/stdlib runtime utilities

## What's NOT here

`UiPathEvalRuntime`, `UiPathEvalContext`, and `evaluate()` depend on
`uipath.runtime` and stay in `uipath.eval`. Use `uipath` if you need
the full eval pipeline with runtime integration.
99 changes: 99 additions & 0 deletions packages/uipath-eval/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
[project]
name = "uipath-eval"
version = "0.1.0"
description = "UiPath evaluator logic as a standalone package — for use in python-eval-workers without the full UiPath SDK."
readme = { file = "README.md", content-type = "text/markdown" }
requires-python = ">=3.11"
dependencies = [
"uipath-core>=0.5.8, <0.6.0",
"opentelemetry-sdk>=1.39.0, <2.0.0",
"httpx>=0.28.1",
"pydantic>=2.12.5, <3.0.0",
]
classifiers = [
"Intended Audience :: Developers",
"Topic :: Software Development :: Build Tools",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
]
maintainers = [
{ name = "Marius Cosareanu", email = "marius.cosareanu@uipath.com" },
{ name = "Cristian Pufu", email = "cristian.pufu@uipath.com" },
]

[project.optional-dependencies]
llm = [
"langchain-core>=0.3",
"openai>=1.0",
]

[project.urls]
Homepage = "https://uipath.com"
Repository = "https://github.com/UiPath/uipath-python"
Documentation = "https://uipath.github.io/uipath-python/"

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build.targets.wheel]
packages = ["src/uipath_eval"]

[dependency-groups]
dev = [
"bandit>=1.8.2",
"mypy>=1.14.1",
"ruff>=0.9.4",
"rust-just>=1.39.0",
"pytest>=7.4.0",
"pytest-asyncio>=1.0.0",
"pytest-httpx>=0.35.0",
"pytest-cov>=4.1.0",
"pytest-mock>=3.11.1",
"pre-commit>=4.5.1",
]

[tool.ruff]
line-length = 88
indent-width = 4

[tool.ruff.lint]
select = ["E", "F", "B", "I", "D"]
ignore = ["D417", "E501"]

[tool.ruff.lint.per-file-ignores]
"*" = ["E501"]
"tests/**" = ["D"]
"*_test.py" = ["D"]

[tool.ruff.lint.pydocstyle]
convention = "google"

[tool.mypy]
plugins = ["pydantic.mypy"]
mypy_path = "src"
explicit_package_bases = true
namespace_packages = true
follow_imports = "silent"
warn_redundant_casts = true
warn_unused_ignores = true
disallow_any_generics = true
check_untyped_defs = true
no_implicit_reexport = true
disallow_untyped_defs = false

[[tool.mypy.overrides]]
module = [
"tests.*",
]
ignore_errors = true

[tool.pydantic-mypy]
init_forbid_extra = true
init_typed = true
warn_required_dynamic_aliases = true

[tool.pytest.ini_options]
asyncio_mode = "auto"
testpaths = ["tests"]
Loading
Loading