From e3384b70d4f2f4baf5190e34adbecfa2d5afffe8 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 15 Apr 2026 11:22:39 +0200 Subject: [PATCH 1/8] Move GET /datasets migration tests to the endpoint test file --- tests/routers/openml/datasets_get_test.py | 147 +++++++++++++++++- .../migration/datasets_migration_test.py | 141 ----------------- 2 files changed, 146 insertions(+), 142 deletions(-) diff --git a/tests/routers/openml/datasets_get_test.py b/tests/routers/openml/datasets_get_test.py index fe67abe..4b9fb33 100644 --- a/tests/routers/openml/datasets_get_test.py +++ b/tests/routers/openml/datasets_get_test.py @@ -1,5 +1,7 @@ """Tests for the GET /datasets/{dataset_id} endpoint.""" +import asyncio +import json import re from http import HTTPStatus @@ -8,11 +10,12 @@ from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncConnection +import tests.constants from core.errors import DatasetNoAccessError, DatasetNotFoundError from database.users import User from routers.openml.datasets import get_dataset from schemas.datasets.openml import DatasetMetadata -from tests.users import ADMIN_USER, DATASET_130_OWNER, NO_USER, SOME_USER +from tests.users import ADMIN_USER, DATASET_130_OWNER, NO_USER, SOME_USER, ApiKey async def test_get_dataset_via_api(py_api: httpx.AsyncClient) -> None: @@ -140,3 +143,145 @@ async def test_private_dataset_access( expdb_db=expdb_test, ) assert isinstance(dataset, DatasetMetadata) + + +# -- Migration Tests -- + + +@pytest.mark.parametrize( + "dataset_id", + range(1, 132), +) +async def test_dataset_response_is_identical( # noqa: C901, PLR0912 + dataset_id: int, + py_api: httpx.AsyncClient, + php_api: httpx.AsyncClient, +) -> None: + py_response, php_response = await asyncio.gather( + py_api.get(f"/datasets/{dataset_id}"), + php_api.get(f"/data/{dataset_id}"), + ) + + if py_response.status_code == HTTPStatus.FORBIDDEN: + assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED + else: + assert py_response.status_code == php_response.status_code + + if py_response.status_code != HTTPStatus.OK: + # RFC 9457: Python API now returns problem+json format + assert py_response.headers["content-type"] == "application/problem+json" + # Both APIs should return error responses in the same cases + assert py_response.json()["code"] == php_response.json()["error"]["code"] + old_error_message = php_response.json()["error"]["message"] + assert py_response.json()["detail"].startswith(old_error_message) + return + + try: + php_json = php_response.json()["data_set_description"] + except json.decoder.JSONDecodeError: + pytest.skip("A PHP error occurred on the test server.") + + if "div" in php_json: + pytest.skip("A PHP error occurred on the test server.") + + # There are a few changes between the old API and the new API, so we convert here: + # The new API has normalized `format` field: + php_json["format"] = php_json["format"].lower() + + # Pydantic HttpURL serialization omits port 80 for HTTP urls. + php_json["url"] = php_json["url"].replace(":80", "") + + # There is odd behavior in the live server that I don't want to recreate: + # when the creator is a list of csv names, it can either be a str or a list + # depending on whether the names are quoted. E.g.: + # '"Alice", "Bob"' -> ["Alice", "Bob"] + # 'Alice, Bob' -> 'Alice, Bob' + if ( + "creator" in php_json + and isinstance(php_json["creator"], str) + and len(php_json["creator"].split(",")) > 1 + ): + php_json["creator"] = [name.strip() for name in php_json["creator"].split(",")] + + py_json = py_response.json() + if processing_data := py_json.get("processing_date"): + py_json["processing_date"] = str(processing_data).replace("T", " ") + + manual = [] + # ref test.openml.org/d/33 (contributor) and d/34 (creator) + # contributor/creator in database is '""' + # json content is [] + for field in ["contributor", "creator"]: + if py_json[field] == [""]: + py_json[field] = [] + manual.append(field) + + if isinstance(py_json["original_data_url"], list): + py_json["original_data_url"] = ", ".join(str(url) for url in py_json["original_data_url"]) + + for field, value in list(py_json.items()): + if field in manual: + continue + if isinstance(value, int): + py_json[field] = str(value) + elif isinstance(value, list) and len(value) == 1: + py_json[field] = str(value[0]) + if not py_json[field]: + del py_json[field] + + if "description" not in py_json: + py_json["description"] = [] + + assert py_json == php_json + + +@pytest.mark.parametrize( + "dataset_id", + [-1, 138, 100_000], +) +async def test_error_unknown_dataset( + dataset_id: int, + py_api: httpx.AsyncClient, +) -> None: + response = await py_api.get(f"/datasets/{dataset_id}") + + # The new API has "404 Not Found" instead of "412 PRECONDITION_FAILED" + assert response.status_code == HTTPStatus.NOT_FOUND + # RFC 9457: Python API now returns problem+json format + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["code"] == "111" + # instead of 'Unknown dataset' + assert error["detail"].startswith("No dataset") + + +async def test_private_dataset_no_user_no_access( + py_api: httpx.AsyncClient, +) -> None: + response = await py_api.get("/datasets/130") + + # New response is 403: Forbidden instead of 412: PRECONDITION FAILED + assert response.status_code == HTTPStatus.FORBIDDEN + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["code"] == "112" + assert error["detail"].startswith("No access granted") + + +@pytest.mark.parametrize( + "api_key", + [ApiKey.DATASET_130_OWNER, ApiKey.ADMIN], +) +async def test_private_dataset_owner_access( + py_api: httpx.AsyncClient, + php_api: httpx.AsyncClient, + api_key: str, +) -> None: + [private_dataset] = tests.constants.PRIVATE_DATASET_ID + py_response, php_response = await asyncio.gather( + py_api.get(f"/datasets/{private_dataset}?api_key={api_key}"), + php_api.get(f"/data/{private_dataset}?api_key={api_key}"), + ) + assert php_response.status_code == HTTPStatus.OK + assert py_response.status_code == php_response.status_code + assert py_response.json()["id"] == private_dataset diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py index 718c06f..b73e505 100644 --- a/tests/routers/openml/migration/datasets_migration_test.py +++ b/tests/routers/openml/migration/datasets_migration_test.py @@ -1,155 +1,14 @@ import asyncio -import json import re from http import HTTPStatus import httpx import pytest -import tests.constants from core.conversions import nested_remove_single_element_list from tests.users import ApiKey -@pytest.mark.parametrize( - "dataset_id", - range(1, 132), -) -async def test_dataset_response_is_identical( # noqa: C901, PLR0912 - dataset_id: int, - py_api: httpx.AsyncClient, - php_api: httpx.AsyncClient, -) -> None: - py_response, php_response = await asyncio.gather( - py_api.get(f"/datasets/{dataset_id}"), - php_api.get(f"/data/{dataset_id}"), - ) - - if py_response.status_code == HTTPStatus.FORBIDDEN: - assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED - else: - assert py_response.status_code == php_response.status_code - - if py_response.status_code != HTTPStatus.OK: - # RFC 9457: Python API now returns problem+json format - assert py_response.headers["content-type"] == "application/problem+json" - # Both APIs should return error responses in the same cases - assert py_response.json()["code"] == php_response.json()["error"]["code"] - old_error_message = php_response.json()["error"]["message"] - assert py_response.json()["detail"].startswith(old_error_message) - return - - try: - php_json = php_response.json()["data_set_description"] - except json.decoder.JSONDecodeError: - pytest.skip("A PHP error occurred on the test server.") - - if "div" in php_json: - pytest.skip("A PHP error occurred on the test server.") - - # There are a few changes between the old API and the new API, so we convert here: - # The new API has normalized `format` field: - php_json["format"] = php_json["format"].lower() - - # Pydantic HttpURL serialization omits port 80 for HTTP urls. - php_json["url"] = php_json["url"].replace(":80", "") - - # There is odd behavior in the live server that I don't want to recreate: - # when the creator is a list of csv names, it can either be a str or a list - # depending on whether the names are quoted. E.g.: - # '"Alice", "Bob"' -> ["Alice", "Bob"] - # 'Alice, Bob' -> 'Alice, Bob' - if ( - "creator" in php_json - and isinstance(php_json["creator"], str) - and len(php_json["creator"].split(",")) > 1 - ): - php_json["creator"] = [name.strip() for name in php_json["creator"].split(",")] - - py_json = py_response.json() - if processing_data := py_json.get("processing_date"): - py_json["processing_date"] = str(processing_data).replace("T", " ") - - manual = [] - # ref test.openml.org/d/33 (contributor) and d/34 (creator) - # contributor/creator in database is '""' - # json content is [] - for field in ["contributor", "creator"]: - if py_json[field] == [""]: - py_json[field] = [] - manual.append(field) - - if isinstance(py_json["original_data_url"], list): - py_json["original_data_url"] = ", ".join(str(url) for url in py_json["original_data_url"]) - - for field, value in list(py_json.items()): - if field in manual: - continue - if isinstance(value, int): - py_json[field] = str(value) - elif isinstance(value, list) and len(value) == 1: - py_json[field] = str(value[0]) - if not py_json[field]: - del py_json[field] - - if "description" not in py_json: - py_json["description"] = [] - - assert py_json == php_json - - -@pytest.mark.parametrize( - "dataset_id", - [-1, 138, 100_000], -) -async def test_error_unknown_dataset( - dataset_id: int, - py_api: httpx.AsyncClient, -) -> None: - response = await py_api.get(f"/datasets/{dataset_id}") - - # The new API has "404 Not Found" instead of "412 PRECONDITION_FAILED" - assert response.status_code == HTTPStatus.NOT_FOUND - # RFC 9457: Python API now returns problem+json format - assert response.headers["content-type"] == "application/problem+json" - error = response.json() - assert error["code"] == "111" - # instead of 'Unknown dataset' - assert error["detail"].startswith("No dataset") - - -async def test_private_dataset_no_user_no_access( - py_api: httpx.AsyncClient, -) -> None: - response = await py_api.get("/datasets/130") - - # New response is 403: Forbidden instead of 412: PRECONDITION FAILED - assert response.status_code == HTTPStatus.FORBIDDEN - assert response.headers["content-type"] == "application/problem+json" - error = response.json() - assert error["code"] == "112" - assert error["detail"].startswith("No access granted") - - -@pytest.mark.parametrize( - "api_key", - [ApiKey.DATASET_130_OWNER, ApiKey.ADMIN], -) -async def test_private_dataset_owner_access( - py_api: httpx.AsyncClient, - php_api: httpx.AsyncClient, - api_key: str, -) -> None: - [private_dataset] = tests.constants.PRIVATE_DATASET_ID - py_response, php_response = await asyncio.gather( - py_api.get(f"/datasets/{private_dataset}?api_key={api_key}"), - php_api.get(f"/data/{private_dataset}?api_key={api_key}"), - ) - assert php_response.status_code == HTTPStatus.OK - assert py_response.status_code == php_response.status_code - assert py_response.json()["id"] == private_dataset - - @pytest.mark.mut @pytest.mark.parametrize( "dataset_id", From 3ccf010bc8e0e573dc793194cffc903c580adafe Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 15 Apr 2026 11:27:31 +0200 Subject: [PATCH 2/8] Move migration tag tests to dataset tag test file --- tests/routers/openml/dataset_tag_test.py | 76 +++++++++++++++++++ .../migration/datasets_migration_test.py | 74 ------------------ 2 files changed, 76 insertions(+), 74 deletions(-) diff --git a/tests/routers/openml/dataset_tag_test.py b/tests/routers/openml/dataset_tag_test.py index d11fc96..f1eab3c 100644 --- a/tests/routers/openml/dataset_tag_test.py +++ b/tests/routers/openml/dataset_tag_test.py @@ -1,9 +1,11 @@ +import re from http import HTTPStatus import httpx import pytest from sqlalchemy.ext.asyncio import AsyncConnection +from core.conversions import nested_remove_single_element_list from core.errors import TagAlreadyExistsError from database.datasets import get_tags_for from database.users import User @@ -92,3 +94,77 @@ async def test_dataset_tag_fails_if_tag_exists(expdb_test: AsyncConnection) -> N ) assert str(dataset_id) in e.value.detail assert tag in e.value.detail + + +# -- migration tests -- + + +@pytest.mark.mut +@pytest.mark.parametrize( + "dataset_id", + [*range(1, 10), 101, 131], +) +@pytest.mark.parametrize( + "api_key", + [ApiKey.ADMIN, ApiKey.SOME_USER, ApiKey.OWNER_USER], + ids=["Administrator", "regular user", "possible owner"], +) +@pytest.mark.parametrize( + "tag", + ["study_14", "totally_new_tag_for_migration_testing"], + ids=["typically existing tag", "new tag"], +) +async def test_dataset_tag_response_is_identical( + dataset_id: int, + tag: str, + api_key: str, + py_api: httpx.AsyncClient, + php_api: httpx.AsyncClient, +) -> None: + # PHP request must happen first to check state, can't parallelize + php_response = await php_api.post( + "/data/tag", + data={"api_key": api_key, "tag": tag, "data_id": dataset_id}, + ) + already_tagged = ( + php_response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR + and "already tagged" in php_response.json()["error"]["message"] + ) + if not already_tagged: + # undo the tag, because we don't want to persist this change to the database + # Sometimes a change is already committed to the database even if an error occurs. + await php_api.post( + "/data/untag", + data={"api_key": api_key, "tag": tag, "data_id": dataset_id}, + ) + if ( + php_response.status_code != HTTPStatus.OK + and php_response.json()["error"]["message"] == "An Elastic Search Exception occured." + ): + pytest.skip("Encountered Elastic Search error.") + py_response = await py_api.post( + f"/datasets/tag?api_key={api_key}", + json={"data_id": dataset_id, "tag": tag}, + ) + + # RFC 9457: Tag conflict now returns 409 instead of 500 + if php_response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR and already_tagged: + assert py_response.status_code == HTTPStatus.CONFLICT + assert py_response.json()["code"] == php_response.json()["error"]["code"] + assert php_response.json()["error"]["message"] == "Entity already tagged by this tag." + assert re.match( + pattern=r"Dataset \d+ already tagged with " + f"'{tag}'.", + string=py_response.json()["detail"], + ) + return + + assert py_response.status_code == php_response.status_code, php_response.json() + if py_response.status_code != HTTPStatus.OK: + assert py_response.json()["code"] == php_response.json()["error"]["code"] + assert py_response.json()["detail"] == php_response.json()["error"]["message"] + return + + php_json = php_response.json() + py_json = py_response.json() + py_json = nested_remove_single_element_list(py_json) + assert py_json == php_json diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py index b73e505..65f4d23 100644 --- a/tests/routers/openml/migration/datasets_migration_test.py +++ b/tests/routers/openml/migration/datasets_migration_test.py @@ -5,80 +5,6 @@ import httpx import pytest -from core.conversions import nested_remove_single_element_list -from tests.users import ApiKey - - -@pytest.mark.mut -@pytest.mark.parametrize( - "dataset_id", - [*range(1, 10), 101, 131], -) -@pytest.mark.parametrize( - "api_key", - [ApiKey.ADMIN, ApiKey.SOME_USER, ApiKey.OWNER_USER], - ids=["Administrator", "regular user", "possible owner"], -) -@pytest.mark.parametrize( - "tag", - ["study_14", "totally_new_tag_for_migration_testing"], - ids=["typically existing tag", "new tag"], -) -async def test_dataset_tag_response_is_identical( - dataset_id: int, - tag: str, - api_key: str, - py_api: httpx.AsyncClient, - php_api: httpx.AsyncClient, -) -> None: - # PHP request must happen first to check state, can't parallelize - php_response = await php_api.post( - "/data/tag", - data={"api_key": api_key, "tag": tag, "data_id": dataset_id}, - ) - already_tagged = ( - php_response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR - and "already tagged" in php_response.json()["error"]["message"] - ) - if not already_tagged: - # undo the tag, because we don't want to persist this change to the database - # Sometimes a change is already committed to the database even if an error occurs. - await php_api.post( - "/data/untag", - data={"api_key": api_key, "tag": tag, "data_id": dataset_id}, - ) - if ( - php_response.status_code != HTTPStatus.OK - and php_response.json()["error"]["message"] == "An Elastic Search Exception occured." - ): - pytest.skip("Encountered Elastic Search error.") - py_response = await py_api.post( - f"/datasets/tag?api_key={api_key}", - json={"data_id": dataset_id, "tag": tag}, - ) - - # RFC 9457: Tag conflict now returns 409 instead of 500 - if php_response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR and already_tagged: - assert py_response.status_code == HTTPStatus.CONFLICT - assert py_response.json()["code"] == php_response.json()["error"]["code"] - assert php_response.json()["error"]["message"] == "Entity already tagged by this tag." - assert re.match( - pattern=r"Dataset \d+ already tagged with " + f"'{tag}'.", - string=py_response.json()["detail"], - ) - return - - assert py_response.status_code == php_response.status_code, php_response.json() - if py_response.status_code != HTTPStatus.OK: - assert py_response.json()["code"] == php_response.json()["error"]["code"] - assert py_response.json()["detail"] == php_response.json()["error"]["message"] - return - - php_json = php_response.json() - py_json = py_response.json() - py_json = nested_remove_single_element_list(py_json) - assert py_json == php_json - @pytest.mark.parametrize( "data_id", From c51b2fade8e01319865c289e94f2fe400a4999d4 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 15 Apr 2026 11:30:13 +0200 Subject: [PATCH 3/8] Move dataset feature migration tests to its endpoint test file --- .../routers/openml/datasets_features_test.py | 49 ++++++++++++++++++ .../migration/datasets_migration_test.py | 50 ------------------- 2 files changed, 49 insertions(+), 50 deletions(-) delete mode 100644 tests/routers/openml/migration/datasets_migration_test.py diff --git a/tests/routers/openml/datasets_features_test.py b/tests/routers/openml/datasets_features_test.py index 193b0f3..1fd8985 100644 --- a/tests/routers/openml/datasets_features_test.py +++ b/tests/routers/openml/datasets_features_test.py @@ -1,5 +1,7 @@ """Tests for the GET /datasets/features/{dataset_id} endpoint.""" +import asyncio +import re from http import HTTPStatus import httpx @@ -102,3 +104,50 @@ async def test_dataset_features_with_processing_error(expdb_test: AsyncConnectio async def test_dataset_features_dataset_does_not_exist(expdb_test: AsyncConnection) -> None: with pytest.raises(DatasetNotFoundError): await get_dataset_features(dataset_id=1000, user=None, expdb=expdb_test) + + +# -- migration tests -- + + +@pytest.mark.parametrize( + "data_id", + list(range(1, 130)), +) +async def test_datasets_feature_is_identical( + data_id: int, + py_api: httpx.AsyncClient, + php_api: httpx.AsyncClient, +) -> None: + py_response, php_response = await asyncio.gather( + py_api.get(f"/datasets/features/{data_id}"), + php_api.get(f"/data/features/{data_id}"), + ) + assert py_response.status_code == php_response.status_code + + if py_response.status_code != HTTPStatus.OK: + error = php_response.json()["error"] + assert py_response.json()["code"] == error["code"] + if error["message"] == "No features found. Additionally, dataset processed with error": + pattern = r"No features found. Additionally, dataset \d+ processed with error\." + assert re.match(pattern, py_response.json()["detail"]) + else: + assert py_response.json()["detail"] == error["message"] + return + + py_json = py_response.json() + for feature in py_json: + for key, value in list(feature.items()): + if key == "nominal_values": + # The old API uses `nominal_value` instead of `nominal_values` + values = feature.pop(key) + # The old API returns a str if there is only a single element + feature["nominal_value"] = values if len(values) > 1 else values[0] + elif key == "ontology": + # The old API returns a str if there is only a single element + values = feature.pop(key) + feature["ontology"] = values if len(values) > 1 else values[0] + else: + # The old API formats bool as string in lower-case + feature[key] = str(value) if not isinstance(value, bool) else str(value).lower() + php_features = php_response.json()["data_features"]["feature"] + assert py_json == php_features diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py deleted file mode 100644 index 65f4d23..0000000 --- a/tests/routers/openml/migration/datasets_migration_test.py +++ /dev/null @@ -1,50 +0,0 @@ -import asyncio -import re -from http import HTTPStatus - -import httpx -import pytest - - -@pytest.mark.parametrize( - "data_id", - list(range(1, 130)), -) -async def test_datasets_feature_is_identical( - data_id: int, - py_api: httpx.AsyncClient, - php_api: httpx.AsyncClient, -) -> None: - py_response, php_response = await asyncio.gather( - py_api.get(f"/datasets/features/{data_id}"), - php_api.get(f"/data/features/{data_id}"), - ) - assert py_response.status_code == php_response.status_code - - if py_response.status_code != HTTPStatus.OK: - error = php_response.json()["error"] - assert py_response.json()["code"] == error["code"] - if error["message"] == "No features found. Additionally, dataset processed with error": - pattern = r"No features found. Additionally, dataset \d+ processed with error\." - assert re.match(pattern, py_response.json()["detail"]) - else: - assert py_response.json()["detail"] == error["message"] - return - - py_json = py_response.json() - for feature in py_json: - for key, value in list(feature.items()): - if key == "nominal_values": - # The old API uses `nominal_value` instead of `nominal_values` - values = feature.pop(key) - # The old API returns a str if there is only a single element - feature["nominal_value"] = values if len(values) > 1 else values[0] - elif key == "ontology": - # The old API returns a str if there is only a single element - values = feature.pop(key) - feature["ontology"] = values if len(values) > 1 else values[0] - else: - # The old API formats bool as string in lower-case - feature[key] = str(value) if not isinstance(value, bool) else str(value).lower() - php_features = php_response.json()["data_features"]["feature"] - assert py_json == php_features From 52a50c709e94a491710b7b66a151985133a9c179 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 15 Apr 2026 11:34:13 +0200 Subject: [PATCH 4/8] Move migration test to endpoint test file --- .../routers/openml/evaluationmeasures_test.py | 36 +++++++++++++++++++ .../migration/evaluations_migration_test.py | 35 ------------------ 2 files changed, 36 insertions(+), 35 deletions(-) delete mode 100644 tests/routers/openml/migration/evaluations_migration_test.py diff --git a/tests/routers/openml/evaluationmeasures_test.py b/tests/routers/openml/evaluationmeasures_test.py index 29cef16..d09bd9a 100644 --- a/tests/routers/openml/evaluationmeasures_test.py +++ b/tests/routers/openml/evaluationmeasures_test.py @@ -1,4 +1,6 @@ +import asyncio from http import HTTPStatus +from typing import Any import httpx @@ -387,3 +389,37 @@ async def test_estimation_procedure_list(py_api: httpx.AsyncClient) -> None: "stratified_sampling": True, }, ] + + +# -- migration test -- + + +async def test_evaluationmeasure_list_migration( + py_api: httpx.AsyncClient, php_api: httpx.AsyncClient +) -> None: + py_response, php_response = await asyncio.gather( + py_api.get("/evaluationmeasure/list"), + php_api.get("/evaluationmeasure/list"), + ) + assert py_response.status_code == php_response.status_code + assert py_response.json() == php_response.json()["evaluation_measures"]["measures"]["measure"] + + +async def test_estimation_procedure_list_migration( + py_api: httpx.AsyncClient, php_api: httpx.AsyncClient +) -> None: + py_response, php_response = await asyncio.gather( + py_api.get("/estimationprocedure/list"), + php_api.get("/estimationprocedure/list"), + ) + assert py_response.status_code == php_response.status_code + expected = php_response.json()["estimationprocedures"]["estimationprocedure"] + + def py_to_php(procedure: dict[str, Any]) -> dict[str, Any]: + procedure = {k: str(v) for k, v in procedure.items()} + if "stratified_sampling" in procedure: + procedure["stratified_sampling"] = procedure["stratified_sampling"].lower() + procedure["ttid"] = procedure.pop("task_type_id") + return procedure + + assert [py_to_php(procedure) for procedure in py_response.json()] == expected diff --git a/tests/routers/openml/migration/evaluations_migration_test.py b/tests/routers/openml/migration/evaluations_migration_test.py deleted file mode 100644 index 08fb75f..0000000 --- a/tests/routers/openml/migration/evaluations_migration_test.py +++ /dev/null @@ -1,35 +0,0 @@ -import asyncio -from typing import Any - -import httpx - - -async def test_evaluationmeasure_list( - py_api: httpx.AsyncClient, php_api: httpx.AsyncClient -) -> None: - py_response, php_response = await asyncio.gather( - py_api.get("/evaluationmeasure/list"), - php_api.get("/evaluationmeasure/list"), - ) - assert py_response.status_code == php_response.status_code - assert py_response.json() == php_response.json()["evaluation_measures"]["measures"]["measure"] - - -async def test_estimation_procedure_list( - py_api: httpx.AsyncClient, php_api: httpx.AsyncClient -) -> None: - py_response, php_response = await asyncio.gather( - py_api.get("/estimationprocedure/list"), - php_api.get("/estimationprocedure/list"), - ) - assert py_response.status_code == php_response.status_code - expected = php_response.json()["estimationprocedures"]["estimationprocedure"] - - def py_to_php(procedure: dict[str, Any]) -> dict[str, Any]: - procedure = {k: str(v) for k, v in procedure.items()} - if "stratified_sampling" in procedure: - procedure["stratified_sampling"] = procedure["stratified_sampling"].lower() - procedure["ttid"] = procedure.pop("task_type_id") - return procedure - - assert [py_to_php(procedure) for procedure in py_response.json()] == expected From 09ba85b35fb00df000013449d7b18710cd26d1e4 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 15 Apr 2026 11:36:34 +0200 Subject: [PATCH 5/8] Separte out tests for different endpoints to different files --- ..._test.py => estimation_procedcure_test.py} | 89 ----------------- .../routers/openml/evaluation_measure_test.py | 96 +++++++++++++++++++ 2 files changed, 96 insertions(+), 89 deletions(-) rename tests/routers/openml/{evaluationmeasures_test.py => estimation_procedcure_test.py} (77%) create mode 100644 tests/routers/openml/evaluation_measure_test.py diff --git a/tests/routers/openml/evaluationmeasures_test.py b/tests/routers/openml/estimation_procedcure_test.py similarity index 77% rename from tests/routers/openml/evaluationmeasures_test.py rename to tests/routers/openml/estimation_procedcure_test.py index d09bd9a..a05b34d 100644 --- a/tests/routers/openml/evaluationmeasures_test.py +++ b/tests/routers/openml/estimation_procedcure_test.py @@ -5,84 +5,6 @@ import httpx -async def test_evaluationmeasure_list(py_api: httpx.AsyncClient) -> None: - response = await py_api.get("/evaluationmeasure/list") - assert response.status_code == HTTPStatus.OK - assert response.json() == [ - "area_under_roc_curve", - "average_cost", - "binominal_test", - "build_cpu_time", - "build_memory", - "c_index", - "chi-squared", - "class_complexity", - "class_complexity_gain", - "confusion_matrix", - "correlation_coefficient", - "cortana_quality", - "coverage", - "f_measure", - "information_gain", - "jaccard", - "kappa", - "kb_relative_information_score", - "kohavi_wolpert_bias_squared", - "kohavi_wolpert_error", - "kohavi_wolpert_sigma_squared", - "kohavi_wolpert_variance", - "kononenko_bratko_information_score", - "matthews_correlation_coefficient", - "mean_absolute_error", - "mean_class_complexity", - "mean_class_complexity_gain", - "mean_f_measure", - "mean_kononenko_bratko_information_score", - "mean_precision", - "mean_prior_absolute_error", - "mean_prior_class_complexity", - "mean_recall", - "mean_weighted_area_under_roc_curve", - "mean_weighted_f_measure", - "mean_weighted_precision", - "weighted_recall", - "number_of_instances", - "os_information", - "positives", - "precision", - "predictive_accuracy", - "prior_class_complexity", - "prior_entropy", - "probability", - "quality", - "ram_hours", - "recall", - "relative_absolute_error", - "root_mean_prior_squared_error", - "root_mean_squared_error", - "root_relative_squared_error", - "run_cpu_time", - "run_memory", - "run_virtual_memory", - "scimark_benchmark", - "single_point_area_under_roc_curve", - "total_cost", - "unclassified_instance_count", - "usercpu_time_millis", - "usercpu_time_millis_testing", - "usercpu_time_millis_training", - "webb_bias", - "webb_error", - "webb_variance", - "joint_entropy", - "pattern_team_auroc10", - "wall_clock_time_millis", - "wall_clock_time_millis_training", - "wall_clock_time_millis_testing", - "unweighted_recall", - ] - - async def test_estimation_procedure_list(py_api: httpx.AsyncClient) -> None: response = await py_api.get("/estimationprocedure/list") assert response.status_code == HTTPStatus.OK @@ -394,17 +316,6 @@ async def test_estimation_procedure_list(py_api: httpx.AsyncClient) -> None: # -- migration test -- -async def test_evaluationmeasure_list_migration( - py_api: httpx.AsyncClient, php_api: httpx.AsyncClient -) -> None: - py_response, php_response = await asyncio.gather( - py_api.get("/evaluationmeasure/list"), - php_api.get("/evaluationmeasure/list"), - ) - assert py_response.status_code == php_response.status_code - assert py_response.json() == php_response.json()["evaluation_measures"]["measures"]["measure"] - - async def test_estimation_procedure_list_migration( py_api: httpx.AsyncClient, php_api: httpx.AsyncClient ) -> None: diff --git a/tests/routers/openml/evaluation_measure_test.py b/tests/routers/openml/evaluation_measure_test.py new file mode 100644 index 0000000..2df2483 --- /dev/null +++ b/tests/routers/openml/evaluation_measure_test.py @@ -0,0 +1,96 @@ +import asyncio +from http import HTTPStatus + +import httpx + + +async def test_evaluationmeasure_list(py_api: httpx.AsyncClient) -> None: + response = await py_api.get("/evaluationmeasure/list") + assert response.status_code == HTTPStatus.OK + assert response.json() == [ + "area_under_roc_curve", + "average_cost", + "binominal_test", + "build_cpu_time", + "build_memory", + "c_index", + "chi-squared", + "class_complexity", + "class_complexity_gain", + "confusion_matrix", + "correlation_coefficient", + "cortana_quality", + "coverage", + "f_measure", + "information_gain", + "jaccard", + "kappa", + "kb_relative_information_score", + "kohavi_wolpert_bias_squared", + "kohavi_wolpert_error", + "kohavi_wolpert_sigma_squared", + "kohavi_wolpert_variance", + "kononenko_bratko_information_score", + "matthews_correlation_coefficient", + "mean_absolute_error", + "mean_class_complexity", + "mean_class_complexity_gain", + "mean_f_measure", + "mean_kononenko_bratko_information_score", + "mean_precision", + "mean_prior_absolute_error", + "mean_prior_class_complexity", + "mean_recall", + "mean_weighted_area_under_roc_curve", + "mean_weighted_f_measure", + "mean_weighted_precision", + "weighted_recall", + "number_of_instances", + "os_information", + "positives", + "precision", + "predictive_accuracy", + "prior_class_complexity", + "prior_entropy", + "probability", + "quality", + "ram_hours", + "recall", + "relative_absolute_error", + "root_mean_prior_squared_error", + "root_mean_squared_error", + "root_relative_squared_error", + "run_cpu_time", + "run_memory", + "run_virtual_memory", + "scimark_benchmark", + "single_point_area_under_roc_curve", + "total_cost", + "unclassified_instance_count", + "usercpu_time_millis", + "usercpu_time_millis_testing", + "usercpu_time_millis_training", + "webb_bias", + "webb_error", + "webb_variance", + "joint_entropy", + "pattern_team_auroc10", + "wall_clock_time_millis", + "wall_clock_time_millis_training", + "wall_clock_time_millis_testing", + "unweighted_recall", + ] + + +# -- migration test -- + + +async def test_evaluationmeasure_list_migration( + py_api: httpx.AsyncClient, php_api: httpx.AsyncClient +) -> None: + py_response, php_response = await asyncio.gather( + py_api.get("/evaluationmeasure/list"), + php_api.get("/evaluationmeasure/list"), + ) + assert py_response.status_code == php_response.status_code + assert py_response.json() == php_response.json()["evaluation_measures"]["measures"]["measure"] From a8e4506899b5899a0e549c79b652f3b7ce144e51 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 15 Apr 2026 11:52:46 +0200 Subject: [PATCH 6/8] Move flow migration tests to respective endpoint files --- tests/routers/openml/flows_exists_test.py | 46 ++++++++ tests/routers/openml/flows_get_test.py | 58 ++++++++++ .../openml/migration/flows_migration_test.py | 102 ------------------ 3 files changed, 104 insertions(+), 102 deletions(-) delete mode 100644 tests/routers/openml/migration/flows_migration_test.py diff --git a/tests/routers/openml/flows_exists_test.py b/tests/routers/openml/flows_exists_test.py index d767b9a..bb09edd 100644 --- a/tests/routers/openml/flows_exists_test.py +++ b/tests/routers/openml/flows_exists_test.py @@ -1,3 +1,5 @@ +import asyncio +import re from http import HTTPStatus import httpx @@ -79,3 +81,47 @@ async def test_flow_exists_handles_flow_not_found( await flow_exists("foo", "bar", expdb_test) assert error.value.status_code == HTTPStatus.NOT_FOUND assert error.value.uri == FlowNotFoundError.uri + + +# -- migration tests -- + + +async def test_flow_exists_not( + py_api: httpx.AsyncClient, + php_api: httpx.AsyncClient, +) -> None: + path = "exists/foo/bar" + py_response, php_response = await asyncio.gather( + py_api.get(f"/flows/{path}"), + php_api.get(f"/flow/{path}"), + ) + + assert py_response.status_code == HTTPStatus.NOT_FOUND + assert php_response.status_code == HTTPStatus.OK + + assert php_response.json() == {"flow_exists": {"exists": "false", "id": str(-1)}} + # RFC 9457: Python API now returns problem+json format + error = py_response.json() + assert re.match( + pattern=r"Flow with name \S+ and external version \S+ not found.", + string=error["detail"], + ) + + +@pytest.mark.mut +async def test_flow_exists_migration( + persisted_flow: Flow, + py_api: httpx.AsyncClient, + php_api: httpx.AsyncClient, +) -> None: + path = f"exists/{persisted_flow.name}/{persisted_flow.external_version}" + py_response, php_response = await asyncio.gather( + py_api.get(f"/flows/{path}"), + php_api.get(f"/flow/{path}"), + ) + + assert py_response.status_code == php_response.status_code, php_response.content + + expect_php = {"flow_exists": {"exists": "true", "id": str(persisted_flow.id)}} + assert php_response.json() == expect_php + assert py_response.json() == {"flow_id": persisted_flow.id} diff --git a/tests/routers/openml/flows_get_test.py b/tests/routers/openml/flows_get_test.py index e24e705..17bbfcc 100644 --- a/tests/routers/openml/flows_get_test.py +++ b/tests/routers/openml/flows_get_test.py @@ -1,7 +1,15 @@ +import asyncio from http import HTTPStatus +from typing import Any import deepdiff.diff import httpx +import pytest + +from core.conversions import ( + nested_remove_single_element_list, + nested_str_to_num, +) async def test_get_flow_no_subflow(py_api: httpx.AsyncClient) -> None: @@ -302,3 +310,53 @@ async def test_get_flow_with_subflow(py_api: httpx.AsyncClient) -> None: } difference = deepdiff.diff.DeepDiff(response.json(), expected, ignore_order=True) assert not difference + + +# -- migration test -- + + +@pytest.mark.parametrize( + "flow_id", + range(1, 16), +) +async def test_get_flow_equal( + flow_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient +) -> None: + py_response, php_response = await asyncio.gather( + py_api.get(f"/flows/{flow_id}"), + php_api.get(f"/flow/{flow_id}"), + ) + assert py_response.status_code == HTTPStatus.OK + + py_json = py_response.json() + + # PHP sets parameter default value to [], None is more appropriate, omission is considered + # Similar for the default "identifier" of subflows. + # Subflow field (old: component) is omitted if empty + def convert_flow_naming_and_defaults(flow: dict[str, Any]) -> dict[str, Any]: + for parameter in flow["parameter"]: + if parameter["default_value"] is None: + parameter["default_value"] = [] + for subflow in flow["subflows"]: + subflow["flow"] = convert_flow_naming_and_defaults(subflow["flow"]) + if subflow["identifier"] is None: + subflow["identifier"] = [] + flow["component"] = flow.pop("subflows") + if flow["component"] == []: + flow.pop("component") + return flow + + py_json = convert_flow_naming_and_defaults(py_json) + py_json = nested_remove_single_element_list(py_json) + + php_json = php_response.json()["flow"] + # The reason we don't transform py_json to str is that it becomes harder to ignore numeric type + # differences (e.g., '1.0' vs '1') + php_json = nested_str_to_num(php_json) + difference = deepdiff.diff.DeepDiff( + py_json, + php_json, + ignore_order=True, + ignore_numeric_type_changes=True, + ) + assert not difference diff --git a/tests/routers/openml/migration/flows_migration_test.py b/tests/routers/openml/migration/flows_migration_test.py deleted file mode 100644 index 2ef7da1..0000000 --- a/tests/routers/openml/migration/flows_migration_test.py +++ /dev/null @@ -1,102 +0,0 @@ -import asyncio -import re -from http import HTTPStatus -from typing import Any - -import deepdiff -import httpx -import pytest - -from core.conversions import ( - nested_remove_single_element_list, - nested_str_to_num, -) -from tests.conftest import Flow - - -async def test_flow_exists_not( - py_api: httpx.AsyncClient, - php_api: httpx.AsyncClient, -) -> None: - path = "exists/foo/bar" - py_response, php_response = await asyncio.gather( - py_api.get(f"/flows/{path}"), - php_api.get(f"/flow/{path}"), - ) - - assert py_response.status_code == HTTPStatus.NOT_FOUND - assert php_response.status_code == HTTPStatus.OK - - assert php_response.json() == {"flow_exists": {"exists": "false", "id": str(-1)}} - # RFC 9457: Python API now returns problem+json format - error = py_response.json() - assert re.match( - pattern=r"Flow with name \S+ and external version \S+ not found.", - string=error["detail"], - ) - - -@pytest.mark.mut -async def test_flow_exists( - persisted_flow: Flow, - py_api: httpx.AsyncClient, - php_api: httpx.AsyncClient, -) -> None: - path = f"exists/{persisted_flow.name}/{persisted_flow.external_version}" - py_response, php_response = await asyncio.gather( - py_api.get(f"/flows/{path}"), - php_api.get(f"/flow/{path}"), - ) - - assert py_response.status_code == php_response.status_code, php_response.content - - expect_php = {"flow_exists": {"exists": "true", "id": str(persisted_flow.id)}} - assert php_response.json() == expect_php - assert py_response.json() == {"flow_id": persisted_flow.id} - - -@pytest.mark.parametrize( - "flow_id", - range(1, 16), -) -async def test_get_flow_equal( - flow_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient -) -> None: - py_response, php_response = await asyncio.gather( - py_api.get(f"/flows/{flow_id}"), - php_api.get(f"/flow/{flow_id}"), - ) - assert py_response.status_code == HTTPStatus.OK - - py_json = py_response.json() - - # PHP sets parameter default value to [], None is more appropriate, omission is considered - # Similar for the default "identifier" of subflows. - # Subflow field (old: component) is omitted if empty - def convert_flow_naming_and_defaults(flow: dict[str, Any]) -> dict[str, Any]: - for parameter in flow["parameter"]: - if parameter["default_value"] is None: - parameter["default_value"] = [] - for subflow in flow["subflows"]: - subflow["flow"] = convert_flow_naming_and_defaults(subflow["flow"]) - if subflow["identifier"] is None: - subflow["identifier"] = [] - flow["component"] = flow.pop("subflows") - if flow["component"] == []: - flow.pop("component") - return flow - - py_json = convert_flow_naming_and_defaults(py_json) - py_json = nested_remove_single_element_list(py_json) - - php_json = php_response.json()["flow"] - # The reason we don't transform py_json to str is that it becomes harder to ignore numeric type - # differences (e.g., '1.0' vs '1') - php_json = nested_str_to_num(php_json) - difference = deepdiff.diff.DeepDiff( - py_json, - php_json, - ignore_order=True, - ignore_numeric_type_changes=True, - ) - assert not difference From 2c46fb6d2ee217c83597fc36285801c81902c62d Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 15 Apr 2026 14:32:09 +0200 Subject: [PATCH 7/8] Move other migration tests to respective files --- tests/routers/openml/migration/__init__.py | 0 .../openml/migration/runs_migration_test.py | 80 ----- .../openml/migration/setups_migration_test.py | 329 ------------------ .../migration/studies_migration_test.py | 38 -- .../openml/migration/tasks_migration_test.py | 226 ------------ tests/routers/openml/runs_trace_test.py | 74 ++++ tests/routers/openml/setups_get_test.py | 54 +++ tests/routers/openml/setups_tag_test.py | 153 +++++++- tests/routers/openml/setups_untag_test.py | 123 +++++++ tests/routers/openml/study_get_test.py | 36 ++ tests/routers/openml/task_get_test.py | 57 +++ tests/routers/openml/task_list_test.py | 166 ++++++++- 12 files changed, 660 insertions(+), 676 deletions(-) delete mode 100644 tests/routers/openml/migration/__init__.py delete mode 100644 tests/routers/openml/migration/runs_migration_test.py delete mode 100644 tests/routers/openml/migration/setups_migration_test.py delete mode 100644 tests/routers/openml/migration/studies_migration_test.py delete mode 100644 tests/routers/openml/migration/tasks_migration_test.py diff --git a/tests/routers/openml/migration/__init__.py b/tests/routers/openml/migration/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/routers/openml/migration/runs_migration_test.py b/tests/routers/openml/migration/runs_migration_test.py deleted file mode 100644 index 826aa18..0000000 --- a/tests/routers/openml/migration/runs_migration_test.py +++ /dev/null @@ -1,80 +0,0 @@ -"""Migration tests comparing PHP and Python API responses for run trace endpoints.""" - -import asyncio -from http import HTTPStatus -from typing import Any - -import deepdiff -import httpx -import pytest - -from core.conversions import nested_num_to_str - -_SERVER_RUNS = [*range(24, 40), *range(134, 140), 999_999_999] - - -@pytest.mark.parametrize("run_id", _SERVER_RUNS) -async def test_get_run_trace_equal( - run_id: int, - py_api: httpx.AsyncClient, - php_api: httpx.AsyncClient, -) -> None: - """Test that Python and PHP run trace responses are equivalent after normalization.""" - py_response, php_response = await asyncio.gather( - py_api.get(f"/run/trace/{run_id}"), - php_api.get(f"/run/trace/{run_id}"), - ) - if php_response.status_code == HTTPStatus.OK: - _assert_trace_response_success(py_response, php_response) - return - - assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED - assert py_response.status_code == HTTPStatus.NOT_FOUND - - php_error = php_response.json()["error"] - py_error = py_response.json() - assert py_error["code"] == php_error["code"] - if php_error["code"] == "571": - assert php_error["message"] == "Run not found." - assert py_error["detail"] == f"Run {run_id} not found." - elif php_error["code"] == "572": - assert php_error["message"] == "No successful trace associated with this run." - assert py_error["detail"] == f"No trace found for run {run_id}." - else: - msg = f"Unknown error code {php_error['code']} for run {run_id}." - raise AssertionError(msg) - - -def _assert_trace_response_success( - py_response: httpx.Response, php_response: httpx.Response -) -> None: - assert py_response.status_code == HTTPStatus.OK - assert php_response.status_code == HTTPStatus.OK - - py_json = py_response.json() - - # PHP nests response under "trace" key — match that structure - py_json = {"trace": py_json} - - # PHP uses "trace_iteration" key, Python uses "trace" - py_json["trace"]["trace_iteration"] = py_json["trace"].pop("trace") - - # PHP returns all numeric values as strings — normalize Python response - py_json = nested_num_to_str(py_json) - - def _sort_trace(payload: dict[str, Any]) -> dict[str, Any]: - """Sort trace iterations by (repeat, fold, iteration) for order-sensitive comparison.""" - copied = payload.copy() - copied["trace"] = copied["trace"].copy() - copied["trace"]["trace_iteration"] = sorted( - copied["trace"]["trace_iteration"], - key=lambda row: (int(row["repeat"]), int(row["fold"]), int(row["iteration"])), - ) - return copied - - differences = deepdiff.diff.DeepDiff( - _sort_trace(py_json), - _sort_trace(php_response.json()), - ignore_order=False, - ) - assert not differences diff --git a/tests/routers/openml/migration/setups_migration_test.py b/tests/routers/openml/migration/setups_migration_test.py deleted file mode 100644 index 34613fc..0000000 --- a/tests/routers/openml/migration/setups_migration_test.py +++ /dev/null @@ -1,329 +0,0 @@ -import asyncio -import contextlib -import re -from collections.abc import AsyncIterator, Callable, Iterable -from contextlib import AbstractAsyncContextManager -from http import HTTPStatus - -import httpx -import pytest -from sqlalchemy import text -from sqlalchemy.ext.asyncio import AsyncConnection - -from core.conversions import nested_remove_values, nested_str_to_num -from tests.conftest import temporary_records -from tests.users import OWNER_USER, ApiKey - - -@pytest.fixture -def temporary_tags( - expdb_test: AsyncConnection, -) -> Callable[..., AbstractAsyncContextManager[None]]: - @contextlib.asynccontextmanager - async def _temporary_tags( - tags: Iterable[str], setup_id: int, *, persist: bool = False - ) -> AsyncIterator[None]: - insert_queries = [ - ( - "INSERT INTO setup_tag(`id`,`tag`,`uploader`) VALUES (:setup_id, :tag, :user_id);", - {"setup_id": setup_id, "tag": tag, "user_id": OWNER_USER.user_id}, - ) - for tag in tags - ] - delete_queries = [ - ( - "DELETE FROM setup_tag WHERE `id`=:setup_id AND `tag`=:tag", - {"setup_id": setup_id, "tag": tag}, - ) - for tag in tags - ] - async with temporary_records( - connection=expdb_test, - insert_queries=insert_queries, - delete_queries=delete_queries, - persist=persist, - ): - yield - - return _temporary_tags - - -@pytest.mark.mut -@pytest.mark.parametrize( - "api_key", - [ApiKey.ADMIN, ApiKey.SOME_USER, ApiKey.OWNER_USER], - ids=["Administrator", "non-owner", "tag owner"], -) -@pytest.mark.parametrize( - "other_tags", - [[], ["some_other_tag"], ["foo_some_other_tag", "bar_some_other_tag"]], - ids=["none", "one tag", "two tags"], -) -async def test_setup_untag_response_is_identical_when_tag_exists( - api_key: str, - other_tags: list[str], - py_api: httpx.AsyncClient, - php_api: httpx.AsyncClient, - temporary_tags: Callable[..., AbstractAsyncContextManager[None]], -) -> None: - setup_id = 1 - tag = "totally_new_tag_for_migration_testing" - - all_tags = [tag, *other_tags] - async with temporary_tags(tags=all_tags, setup_id=setup_id, persist=True): - php_response = await php_api.post( - "/setup/untag", - data={"api_key": api_key, "tag": tag, "setup_id": setup_id}, - ) - - # expdb_test transaction shared with Python API, - # no commit needed and rolled back at the end of the test - async with temporary_tags(tags=all_tags, setup_id=setup_id): - py_response = await py_api.post( - f"/setup/untag?api_key={api_key}", - json={"setup_id": setup_id, "tag": tag}, - ) - - if py_response.status_code == HTTPStatus.OK: - assert py_response.status_code == php_response.status_code - php_untag = php_response.json()["setup_untag"] - py_untag = py_response.json()["setup_untag"] - assert py_untag["id"] == php_untag["id"] - if tags := php_untag.get("tag"): - if isinstance(tags, str): - assert py_untag["tag"][0] == tags - else: - assert py_untag["tag"] == tags - else: - assert py_untag["tag"] == [] - return - - code, message = php_response.json()["error"].values() - assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED - assert py_response.status_code == HTTPStatus.FORBIDDEN - assert py_response.json()["code"] == code - assert message == "Tag is not owned by you" - assert re.match( - r"You may not remove tag \S+ of setup \d+ because it was not created by you.", - py_response.json()["detail"], - ) - - -async def test_setup_untag_response_is_identical_setup_doesnt_exist( - py_api: httpx.AsyncClient, - php_api: httpx.AsyncClient, -) -> None: - setup_id = 999999 - tag = "totally_new_tag_for_migration_testing" - api_key = ApiKey.SOME_USER - - php_response, py_response = await asyncio.gather( - php_api.post( - "/setup/untag", - data={"api_key": api_key, "tag": tag, "setup_id": setup_id}, - ), - py_api.post( - f"/setup/untag?api_key={api_key}", - json={"setup_id": setup_id, "tag": tag}, - ), - ) - - assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED - assert py_response.status_code == HTTPStatus.NOT_FOUND - assert php_response.json()["error"]["message"] == "Entity not found." - assert py_response.json()["code"] == php_response.json()["error"]["code"] - assert re.match( - r"Setup \d+ not found.", - py_response.json()["detail"], - ) - - -async def test_setup_untag_response_is_identical_tag_doesnt_exist( - py_api: httpx.AsyncClient, - php_api: httpx.AsyncClient, -) -> None: - setup_id = 1 - tag = "totally_new_tag_for_migration_testing" - api_key = ApiKey.SOME_USER - - php_response, py_response = await asyncio.gather( - php_api.post( - "/setup/untag", - data={"api_key": api_key, "tag": tag, "setup_id": setup_id}, - ), - py_api.post( - f"/setup/untag?api_key={api_key}", - json={"setup_id": setup_id, "tag": tag}, - ), - ) - - assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED - assert py_response.status_code == HTTPStatus.NOT_FOUND - assert py_response.json()["code"] == php_response.json()["error"]["code"] - assert php_response.json()["error"]["message"] == "Tag not found." - assert re.match( - r"Setup \d+ does not have tag '\S+'.", - py_response.json()["detail"], - ) - - -@pytest.mark.mut -@pytest.mark.parametrize( - "api_key", - [ApiKey.ADMIN, ApiKey.SOME_USER], - ids=["Administrator", "non-owner"], -) -@pytest.mark.parametrize( - "other_tags", - [[], ["some_other_tag"], ["foo_some_other_tag", "bar_some_other_tag"]], - ids=["none", "one tag", "two tags"], -) -async def test_setup_tag_response_is_identical_when_tag_doesnt_exist( # noqa: PLR0913 - api_key: str, - other_tags: list[str], - py_api: httpx.AsyncClient, - php_api: httpx.AsyncClient, - expdb_test: AsyncConnection, - temporary_tags: Callable[..., AbstractAsyncContextManager[None]], -) -> None: - setup_id = 1 - tag = "totally_new_tag_for_migration_testing" - - async with temporary_tags(tags=other_tags, setup_id=setup_id, persist=True): - php_response = await php_api.post( - "/setup/tag", - data={"api_key": api_key, "tag": tag, "setup_id": setup_id}, - ) - - await expdb_test.execute( - text("DELETE FROM setup_tag WHERE `id`=:setup_id AND `tag`=:tag"), - parameters={"setup_id": setup_id, "tag": tag}, - ) - await expdb_test.commit() - - async with temporary_tags(tags=other_tags, setup_id=setup_id): - py_response = await py_api.post( - f"/setup/tag?api_key={api_key}", - json={"setup_id": setup_id, "tag": tag}, - ) - - assert py_response.status_code == HTTPStatus.OK - assert py_response.status_code == php_response.status_code - php_tag = php_response.json()["setup_tag"] - py_tag = py_response.json()["setup_tag"] - assert py_tag["id"] == php_tag["id"] - if tags := php_tag.get("tag"): - if isinstance(tags, str): - assert py_tag["tag"][0] == tags - else: - assert set(py_tag["tag"]) == set(tags) - else: - assert py_tag["tag"] == [] - - -async def test_setup_tag_response_is_identical_setup_doesnt_exist( - py_api: httpx.AsyncClient, - php_api: httpx.AsyncClient, -) -> None: - setup_id = 999999 - tag = "totally_new_tag_for_migration_testing" - api_key = ApiKey.SOME_USER - - php_response, py_response = await asyncio.gather( - php_api.post( - "/setup/tag", - data={"api_key": api_key, "tag": tag, "setup_id": setup_id}, - ), - py_api.post( - f"/setup/tag?api_key={api_key}", - json={"setup_id": setup_id, "tag": tag}, - ), - ) - - assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED - assert py_response.status_code == HTTPStatus.NOT_FOUND - assert php_response.json()["error"]["message"] == "Entity not found." - assert py_response.json()["code"] == php_response.json()["error"]["code"] - assert re.match( - r"Setup \d+ not found.", - py_response.json()["detail"], - ) - - -@pytest.mark.mut -async def test_setup_tag_response_is_identical_tag_already_exists( - py_api: httpx.AsyncClient, - php_api: httpx.AsyncClient, - temporary_tags: Callable[..., AbstractAsyncContextManager[None]], -) -> None: - setup_id = 1 - tag = "totally_new_tag_for_migration_testing" - api_key = ApiKey.SOME_USER - - async with temporary_tags(tags=[tag], setup_id=setup_id, persist=True): - # Both APIs can be tested in parallel since the tag is already persisted - php_response, py_response = await asyncio.gather( - php_api.post( - "/setup/tag", - data={"api_key": api_key, "tag": tag, "setup_id": setup_id}, - ), - py_api.post( - f"/setup/tag?api_key={api_key}", - json={"setup_id": setup_id, "tag": tag}, - ), - ) - - assert php_response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR - assert py_response.status_code == HTTPStatus.CONFLICT - assert php_response.json()["error"]["message"] == "Entity already tagged by this tag." - assert py_response.json()["detail"] == f"Setup {setup_id} already has tag {tag!r}." - - -async def test_get_setup_response_is_identical_setup_doesnt_exist( - py_api: httpx.AsyncClient, - php_api: httpx.AsyncClient, -) -> None: - setup_id = 999999 - - php_response, py_response = await asyncio.gather( - php_api.get(f"/setup/{setup_id}"), - py_api.get(f"/setup/{setup_id}"), - ) - - assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED - assert py_response.status_code == HTTPStatus.NOT_FOUND - assert php_response.json()["error"]["message"] == "Unknown setup" - assert py_response.json()["code"] == php_response.json()["error"]["code"] - assert py_response.json()["detail"] == f"Setup {setup_id} not found." - - -@pytest.mark.parametrize("setup_id", range(1, 125)) -async def test_get_setup_response_is_identical( - setup_id: int, - py_api: httpx.AsyncClient, - php_api: httpx.AsyncClient, -) -> None: - php_response, py_response = await asyncio.gather( - php_api.get(f"/setup/{setup_id}"), - py_api.get(f"/setup/{setup_id}"), - ) - - if php_response.status_code == HTTPStatus.PRECONDITION_FAILED: - assert py_response.status_code == HTTPStatus.NOT_FOUND - return - - assert php_response.status_code == HTTPStatus.OK - assert py_response.status_code == HTTPStatus.OK - - php_json = php_response.json() - - # PHP returns integer fields as strings. To compare, we recursively convert string digits - # to integers. - # PHP also returns `[]` instead of null for empty string optional fields, which Python omits. - php_json = nested_str_to_num(php_json) - php_json = nested_remove_values(php_json, values=[[], None]) - - py_json = nested_str_to_num(py_response.json()) - py_json = nested_remove_values(py_json, values=[[], None]) - - assert py_json == php_json diff --git a/tests/routers/openml/migration/studies_migration_test.py b/tests/routers/openml/migration/studies_migration_test.py deleted file mode 100644 index fc1340c..0000000 --- a/tests/routers/openml/migration/studies_migration_test.py +++ /dev/null @@ -1,38 +0,0 @@ -import asyncio - -import deepdiff -import httpx - -from core.conversions import nested_num_to_str, nested_remove_values - - -async def test_get_study_equal(py_api: httpx.AsyncClient, php_api: httpx.AsyncClient) -> None: - py_response, php_response = await asyncio.gather( - py_api.get("/studies/1"), - php_api.get("/study/1"), - ) - assert py_response.status_code == php_response.status_code - - py_json = py_response.json() - # New implementation is typed - py_json = nested_num_to_str(py_json) - # New implementation has same fields even if empty - py_json = nested_remove_values(py_json, values=[None]) - py_json["tasks"] = {"task_id": py_json.pop("task_ids")} - py_json["data"] = {"data_id": py_json.pop("data_ids")} - if runs := py_json.pop("run_ids", None): - py_json["runs"] = {"run_id": runs} - if flows := py_json.pop("flow_ids", None): - py_json["flows"] = {"flow_id": flows} - if setups := py_json.pop("setup_ids", None): - py_json["setup"] = {"setup_id": setups} - - # New implementation is not nested - py_json = {"study": py_json} - difference = deepdiff.diff.DeepDiff( - py_json, - php_response.json(), - ignore_order=True, - ignore_numeric_type_changes=True, - ) - assert not difference diff --git a/tests/routers/openml/migration/tasks_migration_test.py b/tests/routers/openml/migration/tasks_migration_test.py deleted file mode 100644 index ea3226b..0000000 --- a/tests/routers/openml/migration/tasks_migration_test.py +++ /dev/null @@ -1,226 +0,0 @@ -import asyncio -from http import HTTPStatus -from typing import Any, cast - -import deepdiff -import httpx -import pytest - -from core.conversions import ( - nested_num_to_str, - nested_remove_single_element_list, - nested_remove_values, -) -from routers.dependencies import LIMIT_MAX - - -@pytest.mark.parametrize( - "task_id", - range(1, 1306), -) -async def test_get_task_equal( - task_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient -) -> None: - py_response, php_response = await asyncio.gather( - py_api.get(f"/tasks/{task_id}"), - php_api.get(f"/task/{task_id}"), - ) - assert py_response.status_code == HTTPStatus.OK - assert php_response.status_code == HTTPStatus.OK - - py_json = py_response.json() - # Some fields are renamed (old = tag, new = tags) - py_json["tag"] = py_json.pop("tags") - py_json["task_id"] = py_json.pop("id") - py_json["task_name"] = py_json.pop("name") - # PHP is not typed *and* automatically removes None values - py_json = nested_remove_values(py_json, values=[None]) - py_json = nested_num_to_str(py_json) - # It also removes "value" entries for parameters if the list is empty, - # it does not remove *all* empty lists, e.g., for cost_matrix input they are kept - estimation_procedure = next( - v["estimation_procedure"] for v in py_json["input"] if "estimation_procedure" in v - ) - if "parameter" in estimation_procedure: - estimation_procedure["parameter"] = [ - {k: v for k, v in parameter.items() if v != []} - for parameter in estimation_procedure["parameter"] - ] - # Fields that may return in a list now always return a list - py_json = nested_remove_single_element_list(py_json) - # Tags are not returned if they are an empty list: - if py_json["tag"] == []: - py_json.pop("tag") - - # The response is no longer nested - py_json = {"task": py_json} - - differences = deepdiff.diff.DeepDiff( - py_json, - php_response.json(), - ignore_order=True, - ) - assert not differences - - -# Task list no-results error code is 482 (unlike datasets which uses 372). -_TASK_LIST_NO_RESULTS_CODE = "482" - - -def _build_php_task_list_path(php_params: dict[str, Any]) -> str: - """Build a PHP-style path for /task/list with path-encoded filter parameters.""" - if not php_params: - return "/task/list" - parts = "/".join(f"{k}/{v}" for k, v in php_params.items()) - return f"/task/list/{parts}" - - -def _normalize_py_task(task: dict[str, Any]) -> dict[str, Any]: - """Normalize a single Python task list entry to match PHP format. - - PHP (XML-to-JSON) returns single-element arrays as plain values, not lists. - PHP returns task_id, task_type_id, and did as integers (same for Python). - and completely omits the "tag" field for all tasks in the list endpoint. - """ - t = nested_remove_single_element_list(task.copy()) - - # PHP's list endpoint does not return tags AT ALL - t.pop("tag", None) - - # PHP omits qualities where value is None string - if "quality" in t: - t["quality"] = [q for q in t["quality"] if q.get("value") != "None"] - - return cast("dict[str, Any]", t) - - -# Filter combos: (php_path_params, python_body_extras) -# PHP uses path-based filter keys (e.g. "type"), Python uses JSON body keys (e.g. "task_type_id") -_FILTER_COMBOS: list[tuple[dict[str, Any], dict[str, Any]]] = [ - ({"type": 1}, {"task_type_id": 1}), # by task type - ({"tag": "OpenML100"}, {"tag": "OpenML100"}), # by tag - ({"type": 1, "tag": "OpenML100"}, {"task_type_id": 1, "tag": "OpenML100"}), # combined - ({"data_name": "iris"}, {"data_name": "iris"}), # by dataset name - ({"data_id": 61}, {"data_id": [61]}), # by dataset id - ({"data_tag": "study_14"}, {"data_tag": "study_14"}), # by dataset tag - ({"number_instances": "150"}, {"number_instances": "150"}), # quality filter - ( - {"data_id": 61, "number_instances": "150"}, - {"data_id": [61], "number_instances": "150"}, - ), -] - -_FILTER_IDS = [ - "type", - "tag", - "type_and_tag", - "data_name", - "data_id", - "data_tag", - "number_instances", - "data_and_quality", -] - - -@pytest.mark.parametrize( - ("php_params", "py_extra"), - _FILTER_COMBOS, - ids=_FILTER_IDS, -) -async def test_list_tasks_equal( - php_params: dict[str, Any], - py_extra: dict[str, Any], - py_api: httpx.AsyncClient, - php_api: httpx.AsyncClient, -) -> None: - """Python and PHP task list responses contain the same tasks for the same filters. - - Known differences documented here: - - PHP wraps response in {"tasks": {"task": [...]}}, Python returns a flat list. - - PHP uses XML-to-JSON which collapses single-element arrays into plain values. - - PHP omits the "tag" key when a task has no tags; Python returns "tag": []. - - PHP error status is 412 PRECONDITION_FAILED; Python uses 404 NOT_FOUND. - """ - php_path = _build_php_task_list_path(php_params) - py_body = {**py_extra, "pagination": {"limit": LIMIT_MAX, "offset": 0}} - py_response, php_response = await asyncio.gather( - py_api.post("/tasks/list", json=py_body), - php_api.get(php_path), - ) - - # Error case: no results — PHP returns 412, Python returns 404 - if php_response.status_code == HTTPStatus.PRECONDITION_FAILED: - assert py_response.status_code == HTTPStatus.NOT_FOUND - assert py_response.headers["content-type"] == "application/problem+json" - assert php_response.json()["error"]["code"] == _TASK_LIST_NO_RESULTS_CODE - assert py_response.json()["code"] == _TASK_LIST_NO_RESULTS_CODE - return - - assert php_response.status_code == HTTPStatus.OK - assert py_response.status_code == HTTPStatus.OK - - php_tasks_raw = php_response.json()["tasks"]["task"] - php_tasks: list[dict[str, Any]] = ( - php_tasks_raw if isinstance(php_tasks_raw, list) else [php_tasks_raw] - ) - php_tasks = php_tasks[:LIMIT_MAX] - py_tasks: list[dict[str, Any]] = [_normalize_py_task(t) for t in py_response.json()] - - php_ids = {int(t["task_id"]) for t in php_tasks} - py_ids = {int(t["task_id"]) for t in py_tasks} - - assert py_ids == php_ids, ( - f"PHP and Python must return the exact same task IDs: {php_ids ^ py_ids}" - ) - - # Compare only the tasks PHP returned — per-task deepdiff for clear error messages - py_by_id = {int(t["task_id"]): t for t in py_tasks} - php_by_id = {int(t["task_id"]): t for t in php_tasks} - for task_id in php_ids: - differences = deepdiff.diff.DeepDiff( - py_by_id[task_id], - php_by_id[task_id], - ignore_order=True, - ) - assert not differences, f"Differences for task {task_id}: {differences}" - - -@pytest.mark.parametrize( - ("php_params", "py_extra"), - [ - ({"tag": "nonexistent_tag_xyz_abc"}, {"tag": "nonexistent_tag_xyz_abc"}), - ({"type": 9999}, {"task_type_id": 9999}), - ({"data_name": "nonexistent_dataset_xyz"}, {"data_name": "nonexistent_dataset_xyz"}), - ], - ids=["bad_tag", "bad_type", "bad_data_name"], -) -async def test_list_tasks_no_results_matches_php( - php_params: dict[str, Any], - py_extra: dict[str, Any], - py_api: httpx.AsyncClient, - php_api: httpx.AsyncClient, -) -> None: - """Both APIs return a "no results" error for filters matching nothing. - - Documented differences: - - PHP returns 412 PRECONDITION_FAILED; Python returns 404 NOT_FOUND. - - PHP message: "No results"; Python detail: "No tasks match the search criteria." - """ - php_path = _build_php_task_list_path(php_params) - py_response, php_response = await asyncio.gather( - py_api.post("/tasks/list", json=py_extra), - php_api.get(php_path), - ) - - assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED - assert py_response.status_code == HTTPStatus.NOT_FOUND - - php_error = php_response.json()["error"] - py_error = py_response.json() - - # Error codes should be the same - assert php_error["code"] == _TASK_LIST_NO_RESULTS_CODE - assert py_error["code"] == _TASK_LIST_NO_RESULTS_CODE - assert php_error["message"] == "No results" - assert py_error["detail"] == "No tasks match the search criteria." - assert py_response.headers["content-type"] == "application/problem+json" diff --git a/tests/routers/openml/runs_trace_test.py b/tests/routers/openml/runs_trace_test.py index 75b8f01..11fd10a 100644 --- a/tests/routers/openml/runs_trace_test.py +++ b/tests/routers/openml/runs_trace_test.py @@ -1,10 +1,14 @@ """Tests for the GET /run/trace/{run_id} endpoint.""" +import asyncio from http import HTTPStatus +from typing import Any +import deepdiff import httpx import pytest +from core.conversions import nested_num_to_str from core.errors import RunNotFoundError, RunTraceNotFoundError @@ -47,3 +51,73 @@ async def test_get_run_trace_run_not_found(run_id: int, py_api: httpx.AsyncClien assert body["type"] == RunNotFoundError.uri assert body["title"] == RunNotFoundError.title assert body["status"] == HTTPStatus.NOT_FOUND + + +_SERVER_RUNS = [*range(24, 40), *range(134, 140), 999_999_999] + + +@pytest.mark.parametrize("run_id", _SERVER_RUNS) +async def test_get_run_trace_equal( + run_id: int, + py_api: httpx.AsyncClient, + php_api: httpx.AsyncClient, +) -> None: + """Test that Python and PHP run trace responses are equivalent after normalization.""" + py_response, php_response = await asyncio.gather( + py_api.get(f"/run/trace/{run_id}"), + php_api.get(f"/run/trace/{run_id}"), + ) + if php_response.status_code == HTTPStatus.OK: + _assert_trace_response_success(py_response, php_response) + return + + assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED + assert py_response.status_code == HTTPStatus.NOT_FOUND + + php_error = php_response.json()["error"] + py_error = py_response.json() + assert py_error["code"] == php_error["code"] + if php_error["code"] == "571": + assert php_error["message"] == "Run not found." + assert py_error["detail"] == f"Run {run_id} not found." + elif php_error["code"] == "572": + assert php_error["message"] == "No successful trace associated with this run." + assert py_error["detail"] == f"No trace found for run {run_id}." + else: + msg = f"Unknown error code {php_error['code']} for run {run_id}." + raise AssertionError(msg) + + +def _assert_trace_response_success( + py_response: httpx.Response, php_response: httpx.Response +) -> None: + assert py_response.status_code == HTTPStatus.OK + assert php_response.status_code == HTTPStatus.OK + + py_json = py_response.json() + + # PHP nests response under "trace" key — match that structure + py_json = {"trace": py_json} + + # PHP uses "trace_iteration" key, Python uses "trace" + py_json["trace"]["trace_iteration"] = py_json["trace"].pop("trace") + + # PHP returns all numeric values as strings — normalize Python response + py_json = nested_num_to_str(py_json) + + def _sort_trace(payload: dict[str, Any]) -> dict[str, Any]: + """Sort trace iterations by (repeat, fold, iteration) for order-sensitive comparison.""" + copied = payload.copy() + copied["trace"] = copied["trace"].copy() + copied["trace"]["trace_iteration"] = sorted( + copied["trace"]["trace_iteration"], + key=lambda row: (int(row["repeat"]), int(row["fold"]), int(row["iteration"])), + ) + return copied + + differences = deepdiff.diff.DeepDiff( + _sort_trace(py_json), + _sort_trace(php_response.json()), + ignore_order=False, + ) + assert not differences diff --git a/tests/routers/openml/setups_get_test.py b/tests/routers/openml/setups_get_test.py index 90094ac..6762714 100644 --- a/tests/routers/openml/setups_get_test.py +++ b/tests/routers/openml/setups_get_test.py @@ -1,7 +1,11 @@ +import asyncio import re from http import HTTPStatus import httpx +import pytest + +from core.conversions import nested_remove_values, nested_str_to_num async def test_get_setup_unknown(py_api: httpx.AsyncClient) -> None: @@ -16,3 +20,53 @@ async def test_get_setup_success(py_api: httpx.AsyncClient) -> None: data = response.json()["setup_parameters"] assert data["setup_id"] == 1 assert "parameter" in data + + +async def test_get_setup_response_is_identical_setup_doesnt_exist( + py_api: httpx.AsyncClient, + php_api: httpx.AsyncClient, +) -> None: + setup_id = 999999 + + php_response, py_response = await asyncio.gather( + php_api.get(f"/setup/{setup_id}"), + py_api.get(f"/setup/{setup_id}"), + ) + + assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED + assert py_response.status_code == HTTPStatus.NOT_FOUND + assert php_response.json()["error"]["message"] == "Unknown setup" + assert py_response.json()["code"] == php_response.json()["error"]["code"] + assert py_response.json()["detail"] == f"Setup {setup_id} not found." + + +@pytest.mark.parametrize("setup_id", range(1, 125)) +async def test_get_setup_response_is_identical( + setup_id: int, + py_api: httpx.AsyncClient, + php_api: httpx.AsyncClient, +) -> None: + php_response, py_response = await asyncio.gather( + php_api.get(f"/setup/{setup_id}"), + py_api.get(f"/setup/{setup_id}"), + ) + + if php_response.status_code == HTTPStatus.PRECONDITION_FAILED: + assert py_response.status_code == HTTPStatus.NOT_FOUND + return + + assert php_response.status_code == HTTPStatus.OK + assert py_response.status_code == HTTPStatus.OK + + php_json = php_response.json() + + # PHP returns integer fields as strings. To compare, we recursively convert string digits + # to integers. + # PHP also returns `[]` instead of null for empty string optional fields, which Python omits. + php_json = nested_str_to_num(php_json) + php_json = nested_remove_values(php_json, values=[[], None]) + + py_json = nested_str_to_num(py_response.json()) + py_json = nested_remove_values(py_json, values=[[], None]) + + assert py_json == php_json diff --git a/tests/routers/openml/setups_tag_test.py b/tests/routers/openml/setups_tag_test.py index db629bc..b4f704d 100644 --- a/tests/routers/openml/setups_tag_test.py +++ b/tests/routers/openml/setups_tag_test.py @@ -1,3 +1,8 @@ +import asyncio +import contextlib +import re +from collections.abc import AsyncIterator, Callable, Iterable +from contextlib import AbstractAsyncContextManager from http import HTTPStatus import httpx @@ -7,7 +12,8 @@ from core.errors import SetupNotFoundError, TagAlreadyExistsError from routers.openml.setups import tag_setup -from tests.users import SOME_USER, ApiKey +from tests.conftest import temporary_records +from tests.users import OWNER_USER, SOME_USER, ApiKey async def test_setup_tag_missing_auth(py_api: httpx.AsyncClient) -> None: @@ -83,3 +89,148 @@ async def test_setup_tag_direct_success(expdb_test: AsyncConnection) -> None: parameters={"tag": tag}, ) assert len(rows.all()) == 1 + + +@pytest.mark.mut +@pytest.mark.parametrize( + "api_key", + [ApiKey.ADMIN, ApiKey.SOME_USER], + ids=["Administrator", "non-owner"], +) +@pytest.mark.parametrize( + "other_tags", + [[], ["some_other_tag"], ["foo_some_other_tag", "bar_some_other_tag"]], + ids=["none", "one tag", "two tags"], +) +async def test_setup_tag_response_is_identical_when_tag_doesnt_exist( # noqa: PLR0913 + api_key: str, + other_tags: list[str], + py_api: httpx.AsyncClient, + php_api: httpx.AsyncClient, + expdb_test: AsyncConnection, + temporary_tags: Callable[..., AbstractAsyncContextManager[None]], +) -> None: + setup_id = 1 + tag = "totally_new_tag_for_migration_testing" + + async with temporary_tags(tags=other_tags, setup_id=setup_id, persist=True): + php_response = await php_api.post( + "/setup/tag", + data={"api_key": api_key, "tag": tag, "setup_id": setup_id}, + ) + + await expdb_test.execute( + text("DELETE FROM setup_tag WHERE `id`=:setup_id AND `tag`=:tag"), + parameters={"setup_id": setup_id, "tag": tag}, + ) + await expdb_test.commit() + + async with temporary_tags(tags=other_tags, setup_id=setup_id): + py_response = await py_api.post( + f"/setup/tag?api_key={api_key}", + json={"setup_id": setup_id, "tag": tag}, + ) + + assert py_response.status_code == HTTPStatus.OK + assert py_response.status_code == php_response.status_code + php_tag = php_response.json()["setup_tag"] + py_tag = py_response.json()["setup_tag"] + assert py_tag["id"] == php_tag["id"] + if tags := php_tag.get("tag"): + if isinstance(tags, str): + assert py_tag["tag"][0] == tags + else: + assert set(py_tag["tag"]) == set(tags) + else: + assert py_tag["tag"] == [] + + +async def test_setup_tag_response_is_identical_setup_doesnt_exist( + py_api: httpx.AsyncClient, + php_api: httpx.AsyncClient, +) -> None: + setup_id = 999999 + tag = "totally_new_tag_for_migration_testing" + api_key = ApiKey.SOME_USER + + php_response, py_response = await asyncio.gather( + php_api.post( + "/setup/tag", + data={"api_key": api_key, "tag": tag, "setup_id": setup_id}, + ), + py_api.post( + f"/setup/tag?api_key={api_key}", + json={"setup_id": setup_id, "tag": tag}, + ), + ) + + assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED + assert py_response.status_code == HTTPStatus.NOT_FOUND + assert php_response.json()["error"]["message"] == "Entity not found." + assert py_response.json()["code"] == php_response.json()["error"]["code"] + assert re.match( + r"Setup \d+ not found.", + py_response.json()["detail"], + ) + + +@pytest.mark.mut +async def test_setup_tag_response_is_identical_tag_already_exists( + py_api: httpx.AsyncClient, + php_api: httpx.AsyncClient, + temporary_tags: Callable[..., AbstractAsyncContextManager[None]], +) -> None: + setup_id = 1 + tag = "totally_new_tag_for_migration_testing" + api_key = ApiKey.SOME_USER + + async with temporary_tags(tags=[tag], setup_id=setup_id, persist=True): + # Both APIs can be tested in parallel since the tag is already persisted + php_response, py_response = await asyncio.gather( + php_api.post( + "/setup/tag", + data={"api_key": api_key, "tag": tag, "setup_id": setup_id}, + ), + py_api.post( + f"/setup/tag?api_key={api_key}", + json={"setup_id": setup_id, "tag": tag}, + ), + ) + + assert php_response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR + assert py_response.status_code == HTTPStatus.CONFLICT + assert php_response.json()["error"]["message"] == "Entity already tagged by this tag." + assert py_response.json()["detail"] == f"Setup {setup_id} already has tag {tag!r}." + + +@pytest.fixture +def temporary_tags( + expdb_test: AsyncConnection, +) -> Callable[..., AbstractAsyncContextManager[None]]: + @contextlib.asynccontextmanager + async def _temporary_tags( + tags: Iterable[str], setup_id: int, *, persist: bool = False + ) -> AsyncIterator[None]: + insert_queries = [ + ( + "INSERT INTO setup_tag(`id`,`tag`,`uploader`) VALUES (:setup_id, :tag, :user_id);", + {"setup_id": setup_id, "tag": tag, "user_id": OWNER_USER.user_id}, + ) + for tag in tags + ] + delete_queries = [ + ( + "DELETE FROM setup_tag WHERE `id`=:setup_id AND `tag`=:tag", + {"setup_id": setup_id, "tag": tag}, + ) + for tag in tags + ] + async with temporary_records( + connection=expdb_test, + insert_queries=insert_queries, + delete_queries=delete_queries, + persist=persist, + ): + yield + + return _temporary_tags diff --git a/tests/routers/openml/setups_untag_test.py b/tests/routers/openml/setups_untag_test.py index b96671e..1ed7b42 100644 --- a/tests/routers/openml/setups_untag_test.py +++ b/tests/routers/openml/setups_untag_test.py @@ -1,3 +1,7 @@ +import asyncio +import re +from collections.abc import Callable +from contextlib import AbstractAsyncContextManager from http import HTTPStatus import httpx @@ -116,3 +120,122 @@ async def test_setup_untag_admin_removes_tag_uploaded_by_another_user( parameters={"tag": tag}, ) assert len(rows.all()) == 0 + + +@pytest.mark.mut +@pytest.mark.parametrize( + "api_key", + [ApiKey.ADMIN, ApiKey.SOME_USER, ApiKey.OWNER_USER], + ids=["Administrator", "non-owner", "tag owner"], +) +@pytest.mark.parametrize( + "other_tags", + [[], ["some_other_tag"], ["foo_some_other_tag", "bar_some_other_tag"]], + ids=["none", "one tag", "two tags"], +) +async def test_setup_untag_response_is_identical_when_tag_exists( + api_key: str, + other_tags: list[str], + py_api: httpx.AsyncClient, + php_api: httpx.AsyncClient, + temporary_tags: Callable[..., AbstractAsyncContextManager[None]], +) -> None: + setup_id = 1 + tag = "totally_new_tag_for_migration_testing" + + all_tags = [tag, *other_tags] + async with temporary_tags(tags=all_tags, setup_id=setup_id, persist=True): + php_response = await php_api.post( + "/setup/untag", + data={"api_key": api_key, "tag": tag, "setup_id": setup_id}, + ) + + # expdb_test transaction shared with Python API, + # no commit needed and rolled back at the end of the test + async with temporary_tags(tags=all_tags, setup_id=setup_id): + py_response = await py_api.post( + f"/setup/untag?api_key={api_key}", + json={"setup_id": setup_id, "tag": tag}, + ) + + if py_response.status_code == HTTPStatus.OK: + assert py_response.status_code == php_response.status_code + php_untag = php_response.json()["setup_untag"] + py_untag = py_response.json()["setup_untag"] + assert py_untag["id"] == php_untag["id"] + if tags := php_untag.get("tag"): + if isinstance(tags, str): + assert py_untag["tag"][0] == tags + else: + assert py_untag["tag"] == tags + else: + assert py_untag["tag"] == [] + return + + code, message = php_response.json()["error"].values() + assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED + assert py_response.status_code == HTTPStatus.FORBIDDEN + assert py_response.json()["code"] == code + assert message == "Tag is not owned by you" + assert re.match( + r"You may not remove tag \S+ of setup \d+ because it was not created by you.", + py_response.json()["detail"], + ) + + +async def test_setup_untag_response_is_identical_setup_doesnt_exist( + py_api: httpx.AsyncClient, + php_api: httpx.AsyncClient, +) -> None: + setup_id = 999999 + tag = "totally_new_tag_for_migration_testing" + api_key = ApiKey.SOME_USER + + php_response, py_response = await asyncio.gather( + php_api.post( + "/setup/untag", + data={"api_key": api_key, "tag": tag, "setup_id": setup_id}, + ), + py_api.post( + f"/setup/untag?api_key={api_key}", + json={"setup_id": setup_id, "tag": tag}, + ), + ) + + assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED + assert py_response.status_code == HTTPStatus.NOT_FOUND + assert php_response.json()["error"]["message"] == "Entity not found." + assert py_response.json()["code"] == php_response.json()["error"]["code"] + assert re.match( + r"Setup \d+ not found.", + py_response.json()["detail"], + ) + + +async def test_setup_untag_response_is_identical_tag_doesnt_exist( + py_api: httpx.AsyncClient, + php_api: httpx.AsyncClient, +) -> None: + setup_id = 1 + tag = "totally_new_tag_for_migration_testing" + api_key = ApiKey.SOME_USER + + php_response, py_response = await asyncio.gather( + php_api.post( + "/setup/untag", + data={"api_key": api_key, "tag": tag, "setup_id": setup_id}, + ), + py_api.post( + f"/setup/untag?api_key={api_key}", + json={"setup_id": setup_id, "tag": tag}, + ), + ) + + assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED + assert py_response.status_code == HTTPStatus.NOT_FOUND + assert py_response.json()["code"] == php_response.json()["error"]["code"] + assert php_response.json()["error"]["message"] == "Tag not found." + assert re.match( + r"Setup \d+ does not have tag '\S+'.", + py_response.json()["detail"], + ) diff --git a/tests/routers/openml/study_get_test.py b/tests/routers/openml/study_get_test.py index 92b79b1..1ef2cff 100644 --- a/tests/routers/openml/study_get_test.py +++ b/tests/routers/openml/study_get_test.py @@ -1,7 +1,11 @@ +import asyncio from http import HTTPStatus +import deepdiff import httpx +from core.conversions import nested_num_to_str, nested_remove_values + async def test_get_task_study_by_id(py_api: httpx.AsyncClient) -> None: response = await py_api.get("/studies/1") @@ -449,3 +453,35 @@ async def test_get_task_study_by_alias(py_api: httpx.AsyncClient) -> None: "setup_ids": [], } assert response.json() == expected + + +async def test_get_study_equal(py_api: httpx.AsyncClient, php_api: httpx.AsyncClient) -> None: + py_response, php_response = await asyncio.gather( + py_api.get("/studies/1"), + php_api.get("/study/1"), + ) + assert py_response.status_code == php_response.status_code + + py_json = py_response.json() + # New implementation is typed + py_json = nested_num_to_str(py_json) + # New implementation has same fields even if empty + py_json = nested_remove_values(py_json, values=[None]) + py_json["tasks"] = {"task_id": py_json.pop("task_ids")} + py_json["data"] = {"data_id": py_json.pop("data_ids")} + if runs := py_json.pop("run_ids", None): + py_json["runs"] = {"run_id": runs} + if flows := py_json.pop("flow_ids", None): + py_json["flows"] = {"flow_id": flows} + if setups := py_json.pop("setup_ids", None): + py_json["setup"] = {"setup_id": setups} + + # New implementation is not nested + py_json = {"study": py_json} + difference = deepdiff.diff.DeepDiff( + py_json, + php_response.json(), + ignore_order=True, + ignore_numeric_type_changes=True, + ) + assert not difference diff --git a/tests/routers/openml/task_get_test.py b/tests/routers/openml/task_get_test.py index e78bba8..955a7b8 100644 --- a/tests/routers/openml/task_get_test.py +++ b/tests/routers/openml/task_get_test.py @@ -1,7 +1,15 @@ +import asyncio from http import HTTPStatus import deepdiff import httpx +import pytest + +from core.conversions import ( + nested_num_to_str, + nested_remove_single_element_list, + nested_remove_values, +) async def test_get_task(py_api: httpx.AsyncClient) -> None: @@ -50,3 +58,52 @@ async def test_get_task(py_api: httpx.AsyncClient) -> None: } differences = deepdiff.diff.DeepDiff(response.json(), expected, ignore_order=True) assert not differences + + +@pytest.mark.parametrize( + "task_id", + range(1, 1306), +) +async def test_get_task_equal( + task_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient +) -> None: + py_response, php_response = await asyncio.gather( + py_api.get(f"/tasks/{task_id}"), + php_api.get(f"/task/{task_id}"), + ) + assert py_response.status_code == HTTPStatus.OK + assert php_response.status_code == HTTPStatus.OK + + py_json = py_response.json() + # Some fields are renamed (old = tag, new = tags) + py_json["tag"] = py_json.pop("tags") + py_json["task_id"] = py_json.pop("id") + py_json["task_name"] = py_json.pop("name") + # PHP is not typed *and* automatically removes None values + py_json = nested_remove_values(py_json, values=[None]) + py_json = nested_num_to_str(py_json) + # It also removes "value" entries for parameters if the list is empty, + # it does not remove *all* empty lists, e.g., for cost_matrix input they are kept + estimation_procedure = next( + v["estimation_procedure"] for v in py_json["input"] if "estimation_procedure" in v + ) + if "parameter" in estimation_procedure: + estimation_procedure["parameter"] = [ + {k: v for k, v in parameter.items() if v != []} + for parameter in estimation_procedure["parameter"] + ] + # Fields that may return in a list now always return a list + py_json = nested_remove_single_element_list(py_json) + # Tags are not returned if they are an empty list: + if py_json["tag"] == []: + py_json.pop("tag") + + # The response is no longer nested + py_json = {"task": py_json} + + differences = deepdiff.diff.DeepDiff( + py_json, + php_response.json(), + ignore_order=True, + ) + assert not differences diff --git a/tests/routers/openml/task_list_test.py b/tests/routers/openml/task_list_test.py index 78eb5ec..45404d1 100644 --- a/tests/routers/openml/task_list_test.py +++ b/tests/routers/openml/task_list_test.py @@ -1,12 +1,15 @@ +import asyncio from http import HTTPStatus -from typing import Any +from typing import Any, cast +import deepdiff import httpx import pytest from sqlalchemy.ext.asyncio import AsyncConnection +from core.conversions import nested_remove_single_element_list from core.errors import NoResultsError -from routers.dependencies import Pagination +from routers.dependencies import LIMIT_MAX, Pagination from routers.openml.tasks import TaskStatusFilter, list_tasks @@ -250,3 +253,162 @@ async def test_list_tasks_no_results(payload: dict[str, Any], expdb_test: AsyncC """Filters matching nothing return 404 NoResultsError.""" with pytest.raises(NoResultsError): await list_tasks(pagination=Pagination(), expdb=expdb_test, **payload) + + +_TASK_LIST_NO_RESULTS_CODE = "482" + + +def _build_php_task_list_path(php_params: dict[str, Any]) -> str: + """Build a PHP-style path for /task/list with path-encoded filter parameters.""" + if not php_params: + return "/task/list" + parts = "/".join(f"{k}/{v}" for k, v in php_params.items()) + return f"/task/list/{parts}" + + +def _normalize_py_task(task: dict[str, Any]) -> dict[str, Any]: + """Normalize a single Python task list entry to match PHP format. + + PHP (XML-to-JSON) returns single-element arrays as plain values, not lists. + PHP returns task_id, task_type_id, and did as integers (same for Python). + and completely omits the "tag" field for all tasks in the list endpoint. + """ + t = nested_remove_single_element_list(task.copy()) + + # PHP's list endpoint does not return tags AT ALL + t.pop("tag", None) + + # PHP omits qualities where value is None string + if "quality" in t: + t["quality"] = [q for q in t["quality"] if q.get("value") != "None"] + + return cast("dict[str, Any]", t) + + +_FILTER_COMBOS: list[tuple[dict[str, Any], dict[str, Any]]] = [ + ({"type": 1}, {"task_type_id": 1}), # by task type + ({"tag": "OpenML100"}, {"tag": "OpenML100"}), # by tag + ({"type": 1, "tag": "OpenML100"}, {"task_type_id": 1, "tag": "OpenML100"}), # combined + ({"data_name": "iris"}, {"data_name": "iris"}), # by dataset name + ({"data_id": 61}, {"data_id": [61]}), # by dataset id + ({"data_tag": "study_14"}, {"data_tag": "study_14"}), # by dataset tag + ({"number_instances": "150"}, {"number_instances": "150"}), # quality filter + ( + {"data_id": 61, "number_instances": "150"}, + {"data_id": [61], "number_instances": "150"}, + ), +] +_FILTER_IDS = [ + "type", + "tag", + "type_and_tag", + "data_name", + "data_id", + "data_tag", + "number_instances", + "data_and_quality", +] + + +@pytest.mark.parametrize( + ("php_params", "py_extra"), + _FILTER_COMBOS, + ids=_FILTER_IDS, +) +async def test_list_tasks_equal( + php_params: dict[str, Any], + py_extra: dict[str, Any], + py_api: httpx.AsyncClient, + php_api: httpx.AsyncClient, +) -> None: + """Python and PHP task list responses contain the same tasks for the same filters. + + Known differences documented here: + - PHP wraps response in {"tasks": {"task": [...]}}, Python returns a flat list. + - PHP uses XML-to-JSON which collapses single-element arrays into plain values. + - PHP omits the "tag" key when a task has no tags; Python returns "tag": []. + - PHP error status is 412 PRECONDITION_FAILED; Python uses 404 NOT_FOUND. + """ + php_path = _build_php_task_list_path(php_params) + py_body = {**py_extra, "pagination": {"limit": LIMIT_MAX, "offset": 0}} + py_response, php_response = await asyncio.gather( + py_api.post("/tasks/list", json=py_body), + php_api.get(php_path), + ) + + # Error case: no results — PHP returns 412, Python returns 404 + if php_response.status_code == HTTPStatus.PRECONDITION_FAILED: + assert py_response.status_code == HTTPStatus.NOT_FOUND + assert py_response.headers["content-type"] == "application/problem+json" + assert php_response.json()["error"]["code"] == _TASK_LIST_NO_RESULTS_CODE + assert py_response.json()["code"] == _TASK_LIST_NO_RESULTS_CODE + return + + assert php_response.status_code == HTTPStatus.OK + assert py_response.status_code == HTTPStatus.OK + + php_tasks_raw = php_response.json()["tasks"]["task"] + php_tasks: list[dict[str, Any]] = ( + php_tasks_raw if isinstance(php_tasks_raw, list) else [php_tasks_raw] + ) + php_tasks = php_tasks[:LIMIT_MAX] + py_tasks: list[dict[str, Any]] = [_normalize_py_task(t) for t in py_response.json()] + + php_ids = {int(t["task_id"]) for t in php_tasks} + py_ids = {int(t["task_id"]) for t in py_tasks} + + assert py_ids == php_ids, ( + f"PHP and Python must return the exact same task IDs: {php_ids ^ py_ids}" + ) + + # Compare only the tasks PHP returned — per-task deepdiff for clear error messages + py_by_id = {int(t["task_id"]): t for t in py_tasks} + php_by_id = {int(t["task_id"]): t for t in php_tasks} + for task_id in php_ids: + differences = deepdiff.diff.DeepDiff( + py_by_id[task_id], + php_by_id[task_id], + ignore_order=True, + ) + assert not differences, f"Differences for task {task_id}: {differences}" + + +@pytest.mark.parametrize( + ("php_params", "py_extra"), + [ + ({"tag": "nonexistent_tag_xyz_abc"}, {"tag": "nonexistent_tag_xyz_abc"}), + ({"type": 9999}, {"task_type_id": 9999}), + ({"data_name": "nonexistent_dataset_xyz"}, {"data_name": "nonexistent_dataset_xyz"}), + ], + ids=["bad_tag", "bad_type", "bad_data_name"], +) +async def test_list_tasks_no_results_matches_php( + php_params: dict[str, Any], + py_extra: dict[str, Any], + py_api: httpx.AsyncClient, + php_api: httpx.AsyncClient, +) -> None: + """Both APIs return a "no results" error for filters matching nothing. + + Documented differences: + - PHP returns 412 PRECONDITION_FAILED; Python returns 404 NOT_FOUND. + - PHP message: "No results"; Python detail: "No tasks match the search criteria." + """ + php_path = _build_php_task_list_path(php_params) + py_response, php_response = await asyncio.gather( + py_api.post("/tasks/list", json=py_extra), + php_api.get(php_path), + ) + + assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED + assert py_response.status_code == HTTPStatus.NOT_FOUND + + php_error = php_response.json()["error"] + py_error = py_response.json() + + # Error codes should be the same + assert php_error["code"] == _TASK_LIST_NO_RESULTS_CODE + assert py_error["code"] == _TASK_LIST_NO_RESULTS_CODE + assert php_error["message"] == "No results" + assert py_error["detail"] == "No tasks match the search criteria." + assert py_response.headers["content-type"] == "application/problem+json" From c7fb2c504626e69beba9ef79dfaf6ff14b7fface Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 15 Apr 2026 14:48:09 +0200 Subject: [PATCH 8/8] move temporary tags to conftest --- tests/conftest.py | 36 ++++++++++++++++++++++- tests/routers/openml/setups_tag_test.py | 39 ++----------------------- 2 files changed, 37 insertions(+), 38 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index ad86ce4..368b789 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,6 @@ import contextlib import json -from collections.abc import AsyncIterator, Iterable, Iterator +from collections.abc import AsyncIterator, Callable, Iterable, Iterator from pathlib import Path from typing import Any, NamedTuple @@ -17,6 +17,7 @@ from database.setup import expdb_database, user_database from main import create_api from routers.dependencies import expdb_connection, userdb_connection +from tests.users import OWNER_USER PHP_API_URL = "http://php-api:80/api/v1/json" @@ -168,6 +169,39 @@ async def persisted_flow(flow: Flow, expdb_test: AsyncConnection) -> AsyncIterat await expdb_test.commit() +@pytest.fixture +def temporary_tags( + expdb_test: AsyncConnection, +) -> Callable[..., contextlib.AbstractAsyncContextManager[None]]: + @contextlib.asynccontextmanager + async def _temporary_tags( + tags: Iterable[str], setup_id: int, *, persist: bool = False + ) -> AsyncIterator[None]: + insert_queries = [ + ( + "INSERT INTO setup_tag(`id`,`tag`,`uploader`) VALUES (:setup_id, :tag, :user_id);", + {"setup_id": setup_id, "tag": tag, "user_id": OWNER_USER.user_id}, + ) + for tag in tags + ] + delete_queries = [ + ( + "DELETE FROM setup_tag WHERE `id`=:setup_id AND `tag`=:tag", + {"setup_id": setup_id, "tag": tag}, + ) + for tag in tags + ] + async with temporary_records( + connection=expdb_test, + insert_queries=insert_queries, + delete_queries=delete_queries, + persist=persist, + ): + yield + + return _temporary_tags + + def pytest_collection_modifyitems(config: Config, items: list[Item]) -> None: # noqa: ARG001 for test_item in items: for fixture in test_item.fixturenames: # type: ignore[attr-defined] diff --git a/tests/routers/openml/setups_tag_test.py b/tests/routers/openml/setups_tag_test.py index b4f704d..ad9659f 100644 --- a/tests/routers/openml/setups_tag_test.py +++ b/tests/routers/openml/setups_tag_test.py @@ -1,7 +1,6 @@ import asyncio -import contextlib import re -from collections.abc import AsyncIterator, Callable, Iterable +from collections.abc import Callable from contextlib import AbstractAsyncContextManager from http import HTTPStatus @@ -12,8 +11,7 @@ from core.errors import SetupNotFoundError, TagAlreadyExistsError from routers.openml.setups import tag_setup -from tests.conftest import temporary_records -from tests.users import OWNER_USER, SOME_USER, ApiKey +from tests.users import SOME_USER, ApiKey async def test_setup_tag_missing_auth(py_api: httpx.AsyncClient) -> None: @@ -201,36 +199,3 @@ async def test_setup_tag_response_is_identical_tag_already_exists( assert py_response.status_code == HTTPStatus.CONFLICT assert php_response.json()["error"]["message"] == "Entity already tagged by this tag." assert py_response.json()["detail"] == f"Setup {setup_id} already has tag {tag!r}." - - -@pytest.fixture -def temporary_tags( - expdb_test: AsyncConnection, -) -> Callable[..., AbstractAsyncContextManager[None]]: - @contextlib.asynccontextmanager - async def _temporary_tags( - tags: Iterable[str], setup_id: int, *, persist: bool = False - ) -> AsyncIterator[None]: - insert_queries = [ - ( - "INSERT INTO setup_tag(`id`,`tag`,`uploader`) VALUES (:setup_id, :tag, :user_id);", - {"setup_id": setup_id, "tag": tag, "user_id": OWNER_USER.user_id}, - ) - for tag in tags - ] - delete_queries = [ - ( - "DELETE FROM setup_tag WHERE `id`=:setup_id AND `tag`=:tag", - {"setup_id": setup_id, "tag": tag}, - ) - for tag in tags - ] - async with temporary_records( - connection=expdb_test, - insert_queries=insert_queries, - delete_queries=delete_queries, - persist=persist, - ): - yield - - return _temporary_tags