diff --git a/pyproject.toml b/pyproject.toml index 4fd106947..16a6ac5c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "uipath" -version = "2.10.1" +version = "2.10.2" description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools." readme = { file = "README.md", content-type = "text/markdown" } requires-python = ">=3.11" diff --git a/src/uipath/_cli/cli_eval.py b/src/uipath/_cli/cli_eval.py index 0c32ad6e9..ed1154516 100644 --- a/src/uipath/_cli/cli_eval.py +++ b/src/uipath/_cli/cli_eval.py @@ -205,6 +205,12 @@ def _resolve_model_settings_override( default=False, help="Resume execution from a previous suspended state", ) +@click.option( + "--verbose", + is_flag=True, + default=False, + help="Include agent execution output (trace, result) in the output file", +) def eval( entrypoint: str | None, eval_set: str | None, @@ -220,6 +226,7 @@ def eval( max_llm_concurrency: int, input_overrides: dict[str, Any], resume: bool, + verbose: bool, ) -> None: """Run an evaluation set against the agent. @@ -272,6 +279,7 @@ def eval( eval_context.report_coverage = report_coverage eval_context.input_overrides = input_overrides eval_context.resume = resume + eval_context.verbose = verbose try: diff --git a/src/uipath/eval/runtime/_types.py b/src/uipath/eval/runtime/_types.py index 8e1345824..cccc59a0d 100644 --- a/src/uipath/eval/runtime/_types.py +++ b/src/uipath/eval/runtime/_types.py @@ -55,7 +55,7 @@ class EvaluationResultDto(BaseModel): model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True) score: float - details: str | BaseModel | None = None + details: str | dict[str, Any] | None = None evaluation_time: float | None = None @model_serializer(mode="wrap") @@ -82,9 +82,17 @@ def from_evaluation_result( else: score = evaluation_result.score + # Convert BaseModel details to dict so Pydantic doesn't lose subclass fields + if isinstance(evaluation_result.details, BaseModel): + details: str | dict[str, Any] | None = ( + evaluation_result.details.model_dump() + ) + else: + details = evaluation_result.details + return cls( score=score, - details=evaluation_result.details, + details=details, evaluation_time=evaluation_result.evaluation_time, ) diff --git a/tests/cli/eval/test_evaluate.py b/tests/cli/eval/test_evaluate.py index c7ce64386..39f516263 100644 --- a/tests/cli/eval/test_evaluate.py +++ b/tests/cli/eval/test_evaluate.py @@ -120,7 +120,8 @@ async def dispose(self) -> None: ) # Assert that the output is json-serializable - UiPathEvalOutput.model_validate(result.output).model_dump_json() + eval_output = UiPathEvalOutput.model_validate(result.output) + eval_output.model_dump_json() assert result.output output_dict = ( result.output.model_dump() @@ -128,16 +129,16 @@ async def dispose(self) -> None: else result.output ) assert isinstance(output_dict, dict) - assert ( - output_dict["evaluationSetResults"][0]["evaluationRunResults"][0]["result"][ - "score" - ] - == 1.0 - ) - assert ( - output_dict["evaluationSetResults"][0]["evaluationRunResults"][0]["evaluatorId"] - == "ExactMatchEvaluator" - ) + first_result = output_dict["evaluationSetResults"][0]["evaluationRunResults"][0] + assert first_result["result"]["score"] == 1.0 + assert first_result["evaluatorId"] == "ExactMatchEvaluator" + # Verify details are properly serialized (not empty dict) + details = first_result["result"].get("details") + if details is not None: + assert details != {}, ( + "details should not be an empty dict - BaseModel serialization bug" + ) + assert isinstance(details, (str, dict)) async def test_eval_runtime_generates_uuid_when_no_custom_id(): diff --git a/uv.lock b/uv.lock index d5b170efa..0757c5b0f 100644 --- a/uv.lock +++ b/uv.lock @@ -2531,7 +2531,7 @@ wheels = [ [[package]] name = "uipath" -version = "2.10.1" +version = "2.10.2" source = { editable = "." } dependencies = [ { name = "applicationinsights" },