From e3384b70d4f2f4baf5190e34adbecfa2d5afffe8 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Wed, 15 Apr 2026 11:22:39 +0200
Subject: [PATCH 1/8] Move GET /datasets migration tests to the endpoint test
 file

---
 tests/routers/openml/datasets_get_test.py     | 147 +++++++++++++++++-
 .../migration/datasets_migration_test.py      | 141 -----------------
 2 files changed, 146 insertions(+), 142 deletions(-)

diff --git a/tests/routers/openml/datasets_get_test.py b/tests/routers/openml/datasets_get_test.py
index fe67abe..4b9fb33 100644
--- a/tests/routers/openml/datasets_get_test.py
+++ b/tests/routers/openml/datasets_get_test.py
@@ -1,5 +1,7 @@
 """Tests for the GET /datasets/{dataset_id} endpoint."""
 
+import asyncio
+import json
 import re
 from http import HTTPStatus
 
@@ -8,11 +10,12 @@
 from sqlalchemy import text
 from sqlalchemy.ext.asyncio import AsyncConnection
 
+import tests.constants
 from core.errors import DatasetNoAccessError, DatasetNotFoundError
 from database.users import User
 from routers.openml.datasets import get_dataset
 from schemas.datasets.openml import DatasetMetadata
-from tests.users import ADMIN_USER, DATASET_130_OWNER, NO_USER, SOME_USER
+from tests.users import ADMIN_USER, DATASET_130_OWNER, NO_USER, SOME_USER, ApiKey
 
 
 async def test_get_dataset_via_api(py_api: httpx.AsyncClient) -> None:
@@ -140,3 +143,145 @@ async def test_private_dataset_access(
         expdb_db=expdb_test,
     )
     assert isinstance(dataset, DatasetMetadata)
+
+
+# -- Migration Tests --
+
+
+@pytest.mark.parametrize(
+    "dataset_id",
+    range(1, 132),
+)
+async def test_dataset_response_is_identical(  # noqa: C901, PLR0912
+    dataset_id: int,
+    py_api: httpx.AsyncClient,
+    php_api: httpx.AsyncClient,
+) -> None:
+    py_response, php_response = await asyncio.gather(
+        py_api.get(f"/datasets/{dataset_id}"),
+        php_api.get(f"/data/{dataset_id}"),
+    )
+
+    if py_response.status_code == HTTPStatus.FORBIDDEN:
+        assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
+    else:
+        assert py_response.status_code == php_response.status_code
+
+    if py_response.status_code != HTTPStatus.OK:
+        # RFC 9457: Python API now returns problem+json format
+        assert py_response.headers["content-type"] == "application/problem+json"
+        # Both APIs should return error responses in the same cases
+        assert py_response.json()["code"] == php_response.json()["error"]["code"]
+        old_error_message = php_response.json()["error"]["message"]
+        assert py_response.json()["detail"].startswith(old_error_message)
+        return
+
+    try:
+        php_json = php_response.json()["data_set_description"]
+    except json.decoder.JSONDecodeError:
+        pytest.skip("A PHP error occurred on the test server.")
+
+    if "div" in php_json:
+        pytest.skip("A PHP error occurred on the test server.")
+
+    # There are a few changes between the old API and the new API, so we convert here:
+    # The new API has normalized `format` field:
+    php_json["format"] = php_json["format"].lower()
+
+    # Pydantic HttpURL serialization omits port 80 for HTTP urls.
+    php_json["url"] = php_json["url"].replace(":80", "")
+
+    # There is odd behavior in the live server that I don't want to recreate:
+    # when the creator is a list of csv names, it can either be a str or a list
+    # depending on whether the names are quoted. E.g.:
+    # '"Alice", "Bob"' -> ["Alice", "Bob"]
+    # 'Alice, Bob' -> 'Alice, Bob'
+    if (
+        "creator" in php_json
+        and isinstance(php_json["creator"], str)
+        and len(php_json["creator"].split(",")) > 1
+    ):
+        php_json["creator"] = [name.strip() for name in php_json["creator"].split(",")]
+
+    py_json = py_response.json()
+    if processing_data := py_json.get("processing_date"):
+        py_json["processing_date"] = str(processing_data).replace("T", " ")
+
+    manual = []
+    # ref test.openml.org/d/33 (contributor) and d/34 (creator)
+    #   contributor/creator in database is '""'
+    #   json content is []
+    for field in ["contributor", "creator"]:
+        if py_json[field] == [""]:
+            py_json[field] = []
+            manual.append(field)
+
+    if isinstance(py_json["original_data_url"], list):
+        py_json["original_data_url"] = ", ".join(str(url) for url in py_json["original_data_url"])
+
+    for field, value in list(py_json.items()):
+        if field in manual:
+            continue
+        if isinstance(value, int):
+            py_json[field] = str(value)
+        elif isinstance(value, list) and len(value) == 1:
+            py_json[field] = str(value[0])
+        if not py_json[field]:
+            del py_json[field]
+
+    if "description" not in py_json:
+        py_json["description"] = []
+
+    assert py_json == php_json
+
+
+@pytest.mark.parametrize(
+    "dataset_id",
+    [-1, 138, 100_000],
+)
+async def test_error_unknown_dataset(
+    dataset_id: int,
+    py_api: httpx.AsyncClient,
+) -> None:
+    response = await py_api.get(f"/datasets/{dataset_id}")
+
+    # The new API has "404 Not Found" instead of "412 PRECONDITION_FAILED"
+    assert response.status_code == HTTPStatus.NOT_FOUND
+    # RFC 9457: Python API now returns problem+json format
+    assert response.headers["content-type"] == "application/problem+json"
+    error = response.json()
+    assert error["code"] == "111"
+    # instead of 'Unknown dataset'
+    assert error["detail"].startswith("No dataset")
+
+
+async def test_private_dataset_no_user_no_access(
+    py_api: httpx.AsyncClient,
+) -> None:
+    response = await py_api.get("/datasets/130")
+
+    # New response is 403: Forbidden instead of 412: PRECONDITION FAILED
+    assert response.status_code == HTTPStatus.FORBIDDEN
+    assert response.headers["content-type"] == "application/problem+json"
+    error = response.json()
+    assert error["code"] == "112"
+    assert error["detail"].startswith("No access granted")
+
+
+@pytest.mark.parametrize(
+    "api_key",
+    [ApiKey.DATASET_130_OWNER, ApiKey.ADMIN],
+)
+async def test_private_dataset_owner_access(
+    py_api: httpx.AsyncClient,
+    php_api: httpx.AsyncClient,
+    api_key: str,
+) -> None:
+    [private_dataset] = tests.constants.PRIVATE_DATASET_ID
+    py_response, php_response = await asyncio.gather(
+        py_api.get(f"/datasets/{private_dataset}?api_key={api_key}"),
+        php_api.get(f"/data/{private_dataset}?api_key={api_key}"),
+    )
+    assert php_response.status_code == HTTPStatus.OK
+    assert py_response.status_code == php_response.status_code
+    assert py_response.json()["id"] == private_dataset
diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py
index 718c06f..b73e505 100644
--- a/tests/routers/openml/migration/datasets_migration_test.py
+++ b/tests/routers/openml/migration/datasets_migration_test.py
@@ -1,155 +1,14 @@
 import asyncio
-import json
 import re
 from http import HTTPStatus
 
 import httpx
 import pytest
 
-import tests.constants
 from core.conversions import nested_remove_single_element_list
 from tests.users import ApiKey
 
 
-@pytest.mark.parametrize(
-    "dataset_id",
-    range(1, 132),
-)
-async def test_dataset_response_is_identical(  # noqa: C901, PLR0912
-    dataset_id: int,
-    py_api: httpx.AsyncClient,
-    php_api: httpx.AsyncClient,
-) -> None:
-    py_response, php_response = await asyncio.gather(
-        py_api.get(f"/datasets/{dataset_id}"),
-        php_api.get(f"/data/{dataset_id}"),
-    )
-
-    if py_response.status_code == HTTPStatus.FORBIDDEN:
-        assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
-    else:
-        assert py_response.status_code == php_response.status_code
-
-    if py_response.status_code != HTTPStatus.OK:
-        # RFC 9457: Python API now returns problem+json format
-        assert py_response.headers["content-type"] == "application/problem+json"
-        # Both APIs should return error responses in the same cases
-        assert py_response.json()["code"] == php_response.json()["error"]["code"]
-        old_error_message = php_response.json()["error"]["message"]
-        assert py_response.json()["detail"].startswith(old_error_message)
-        return
-
-    try:
-        php_json = php_response.json()["data_set_description"]
-    except json.decoder.JSONDecodeError:
-        pytest.skip("A PHP error occurred on the test server.")
-
-    if "div" in php_json:
-        pytest.skip("A PHP error occurred on the test server.")
-
-    # There are a few changes between the old API and the new API, so we convert here:
-    # The new API has normalized `format` field:
-    php_json["format"] = php_json["format"].lower()
-
-    # Pydantic HttpURL serialization omits port 80 for HTTP urls.
-    php_json["url"] = php_json["url"].replace(":80", "")
-
-    # There is odd behavior in the live server that I don't want to recreate:
-    # when the creator is a list of csv names, it can either be a str or a list
-    # depending on whether the names are quoted. E.g.:
-    # '"Alice", "Bob"' -> ["Alice", "Bob"]
-    # 'Alice, Bob' -> 'Alice, Bob'
-    if (
-        "creator" in php_json
-        and isinstance(php_json["creator"], str)
-        and len(php_json["creator"].split(",")) > 1
-    ):
-        php_json["creator"] = [name.strip() for name in php_json["creator"].split(",")]
-
-    py_json = py_response.json()
-    if processing_data := py_json.get("processing_date"):
-        py_json["processing_date"] = str(processing_data).replace("T", " ")
-
-    manual = []
-    # ref test.openml.org/d/33 (contributor) and d/34 (creator)
-    #   contributor/creator in database is '""'
-    #   json content is []
-    for field in ["contributor", "creator"]:
-        if py_json[field] == [""]:
-            py_json[field] = []
-            manual.append(field)
-
-    if isinstance(py_json["original_data_url"], list):
-        py_json["original_data_url"] = ", ".join(str(url) for url in py_json["original_data_url"])
-
-    for field, value in list(py_json.items()):
-        if field in manual:
-            continue
-        if isinstance(value, int):
-            py_json[field] = str(value)
-        elif isinstance(value, list) and len(value) == 1:
-            py_json[field] = str(value[0])
-        if not py_json[field]:
-            del py_json[field]
-
-    if "description" not in py_json:
-        py_json["description"] = []
-
-    assert py_json == php_json
-
-
-@pytest.mark.parametrize(
-    "dataset_id",
-    [-1, 138, 100_000],
-)
-async def test_error_unknown_dataset(
-    dataset_id: int,
-    py_api: httpx.AsyncClient,
-) -> None:
-    response = await py_api.get(f"/datasets/{dataset_id}")
-
-    # The new API has "404 Not Found" instead of "412 PRECONDITION_FAILED"
-    assert response.status_code == HTTPStatus.NOT_FOUND
-    # RFC 9457: Python API now returns problem+json format
-    assert response.headers["content-type"] == "application/problem+json"
-    error = response.json()
-    assert error["code"] == "111"
-    # instead of 'Unknown dataset'
-    assert error["detail"].startswith("No dataset")
-
-
-async def test_private_dataset_no_user_no_access(
-    py_api: httpx.AsyncClient,
-) -> None:
-    response = await py_api.get("/datasets/130")
-
-    # New response is 403: Forbidden instead of 412: PRECONDITION FAILED
-    assert response.status_code == HTTPStatus.FORBIDDEN
-    assert response.headers["content-type"] == "application/problem+json"
-    error = response.json()
-    assert error["code"] == "112"
-    assert error["detail"].startswith("No access granted")
-
-
-@pytest.mark.parametrize(
-    "api_key",
-    [ApiKey.DATASET_130_OWNER, ApiKey.ADMIN],
-)
-async def test_private_dataset_owner_access(
-    py_api: httpx.AsyncClient,
-    php_api: httpx.AsyncClient,
-    api_key: str,
-) -> None:
-    [private_dataset] = tests.constants.PRIVATE_DATASET_ID
-    py_response, php_response = await asyncio.gather(
-        py_api.get(f"/datasets/{private_dataset}?api_key={api_key}"),
-        php_api.get(f"/data/{private_dataset}?api_key={api_key}"),
-    )
-    assert php_response.status_code == HTTPStatus.OK
-    assert py_response.status_code == php_response.status_code
-    assert py_response.json()["id"] == private_dataset
-
-
 @pytest.mark.mut
 @pytest.mark.parametrize(
     "dataset_id",

From 3ccf010bc8e0e573dc793194cffc903c580adafe Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Wed, 15 Apr 2026 11:27:31 +0200
Subject: [PATCH 2/8] Move migration tag tests to dataset tag test file

---
 tests/routers/openml/dataset_tag_test.py      | 76 +++++++++++++++++++
 .../migration/datasets_migration_test.py      | 74 ------------------
 2 files changed, 76 insertions(+), 74 deletions(-)

diff --git a/tests/routers/openml/dataset_tag_test.py b/tests/routers/openml/dataset_tag_test.py
index d11fc96..f1eab3c 100644
--- a/tests/routers/openml/dataset_tag_test.py
+++ b/tests/routers/openml/dataset_tag_test.py
@@ -1,9 +1,11 @@
+import re
 from http import HTTPStatus
 
 import httpx
 import pytest
 from sqlalchemy.ext.asyncio import AsyncConnection
 
+from core.conversions import nested_remove_single_element_list
 from core.errors import TagAlreadyExistsError
 from database.datasets import get_tags_for
 from database.users import User
@@ -92,3 +94,77 @@ async def test_dataset_tag_fails_if_tag_exists(expdb_test: AsyncConnection) -> N
         )
     assert str(dataset_id) in e.value.detail
     assert tag in e.value.detail
+
+
+# -- migration tests --
+
+
+@pytest.mark.mut
+@pytest.mark.parametrize(
+    "dataset_id",
+    [*range(1, 10), 101, 131],
+)
+@pytest.mark.parametrize(
+    "api_key",
+    [ApiKey.ADMIN, ApiKey.SOME_USER, ApiKey.OWNER_USER],
+    ids=["Administrator", "regular user", "possible owner"],
+)
+@pytest.mark.parametrize(
+    "tag",
+    ["study_14", "totally_new_tag_for_migration_testing"],
+    ids=["typically existing tag", "new tag"],
+)
+async def test_dataset_tag_response_is_identical(
+    dataset_id: int,
+    tag: str,
+    api_key: str,
+    py_api: httpx.AsyncClient,
+    php_api: httpx.AsyncClient,
+) -> None:
+    # PHP request must happen first to check state, can't parallelize
+    php_response = await php_api.post(
+        "/data/tag",
+        data={"api_key": api_key, "tag": tag, "data_id": dataset_id},
+    )
+    already_tagged = (
+        php_response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR
+        and "already tagged" in php_response.json()["error"]["message"]
+    )
+    if not already_tagged:
+        # undo the tag, because we don't want to persist this change to the database
+        # Sometimes a change is already committed to the database even if an error occurs.
+        await php_api.post(
+            "/data/untag",
+            data={"api_key": api_key, "tag": tag, "data_id": dataset_id},
+        )
+    if (
+        php_response.status_code != HTTPStatus.OK
+        and php_response.json()["error"]["message"] == "An Elastic Search Exception occured."
+    ):
+        pytest.skip("Encountered Elastic Search error.")
+    py_response = await py_api.post(
+        f"/datasets/tag?api_key={api_key}",
+        json={"data_id": dataset_id, "tag": tag},
+    )
+
+    # RFC 9457: Tag conflict now returns 409 instead of 500
+    if php_response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR and already_tagged:
+        assert py_response.status_code == HTTPStatus.CONFLICT
+        assert py_response.json()["code"] == php_response.json()["error"]["code"]
+        assert php_response.json()["error"]["message"] == "Entity already tagged by this tag."
+        assert re.match(
+            pattern=r"Dataset \d+ already tagged with " + f"'{tag}'.",
+            string=py_response.json()["detail"],
+        )
+        return
+
+    assert py_response.status_code == php_response.status_code, php_response.json()
+    if py_response.status_code != HTTPStatus.OK:
+        assert py_response.json()["code"] == php_response.json()["error"]["code"]
+        assert py_response.json()["detail"] == php_response.json()["error"]["message"]
+        return
+
+    php_json = php_response.json()
+    py_json = py_response.json()
+    py_json = nested_remove_single_element_list(py_json)
+    assert py_json == php_json
diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py
index b73e505..65f4d23 100644
--- a/tests/routers/openml/migration/datasets_migration_test.py
+++ b/tests/routers/openml/migration/datasets_migration_test.py
@@ -5,80 +5,6 @@
 import httpx
 import pytest
 
-from core.conversions import nested_remove_single_element_list
-from tests.users import ApiKey
-
-
-@pytest.mark.mut
-@pytest.mark.parametrize(
-    "dataset_id",
-    [*range(1, 10), 101, 131],
-)
-@pytest.mark.parametrize(
-    "api_key",
-    [ApiKey.ADMIN, ApiKey.SOME_USER, ApiKey.OWNER_USER],
-    ids=["Administrator", "regular user", "possible owner"],
-)
-@pytest.mark.parametrize(
-    "tag",
-    ["study_14", "totally_new_tag_for_migration_testing"],
-    ids=["typically existing tag", "new tag"],
-)
-async def test_dataset_tag_response_is_identical(
-    dataset_id: int,
-    tag: str,
-    api_key: str,
-    py_api: httpx.AsyncClient,
-    php_api: httpx.AsyncClient,
-) -> None:
-    # PHP request must happen first to check state, can't parallelize
-    php_response = await php_api.post(
-        "/data/tag",
-        data={"api_key": api_key, "tag": tag, "data_id": dataset_id},
-    )
-    already_tagged = (
-        php_response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR
-        and "already tagged" in php_response.json()["error"]["message"]
-    )
-    if not already_tagged:
-        # undo the tag, because we don't want to persist this change to the database
-        # Sometimes a change is already committed to the database even if an error occurs.
-        await php_api.post(
-            "/data/untag",
-            data={"api_key": api_key, "tag": tag, "data_id": dataset_id},
-        )
-    if (
-        php_response.status_code != HTTPStatus.OK
-        and php_response.json()["error"]["message"] == "An Elastic Search Exception occured."
-    ):
-        pytest.skip("Encountered Elastic Search error.")
-    py_response = await py_api.post(
-        f"/datasets/tag?api_key={api_key}",
-        json={"data_id": dataset_id, "tag": tag},
-    )
-
-    # RFC 9457: Tag conflict now returns 409 instead of 500
-    if php_response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR and already_tagged:
-        assert py_response.status_code == HTTPStatus.CONFLICT
-        assert py_response.json()["code"] == php_response.json()["error"]["code"]
-        assert php_response.json()["error"]["message"] == "Entity already tagged by this tag."
-        assert re.match(
-            pattern=r"Dataset \d+ already tagged with " + f"'{tag}'.",
-            string=py_response.json()["detail"],
-        )
-        return
-
-    assert py_response.status_code == php_response.status_code, php_response.json()
-    if py_response.status_code != HTTPStatus.OK:
-        assert py_response.json()["code"] == php_response.json()["error"]["code"]
-        assert py_response.json()["detail"] == php_response.json()["error"]["message"]
-        return
-
-    php_json = php_response.json()
-    py_json = py_response.json()
-    py_json = nested_remove_single_element_list(py_json)
-    assert py_json == php_json
-
 
 @pytest.mark.parametrize(
     "data_id",

From c51b2fade8e01319865c289e94f2fe400a4999d4 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Wed, 15 Apr 2026 11:30:13 +0200
Subject: [PATCH 3/8] Move dataset feature migration tests to its endpoint test
 file

---
 .../routers/openml/datasets_features_test.py  | 49 ++++++++++++++++++
 .../migration/datasets_migration_test.py      | 50 -------------------
 2 files changed, 49 insertions(+), 50 deletions(-)
 delete mode 100644 tests/routers/openml/migration/datasets_migration_test.py

diff --git a/tests/routers/openml/datasets_features_test.py b/tests/routers/openml/datasets_features_test.py
index 193b0f3..1fd8985 100644
--- a/tests/routers/openml/datasets_features_test.py
+++ b/tests/routers/openml/datasets_features_test.py
@@ -1,5 +1,7 @@
 """Tests for the GET /datasets/features/{dataset_id} endpoint."""
 
+import asyncio
+import re
 from http import HTTPStatus
 
 import httpx
@@ -102,3 +104,50 @@ async def test_dataset_features_with_processing_error(expdb_test: AsyncConnectio
 async def test_dataset_features_dataset_does_not_exist(expdb_test: AsyncConnection) -> None:
     with pytest.raises(DatasetNotFoundError):
         await get_dataset_features(dataset_id=1000, user=None, expdb=expdb_test)
+
+
+# -- migration tests --
+
+
+@pytest.mark.parametrize(
+    "data_id",
+    list(range(1, 130)),
+)
+async def test_datasets_feature_is_identical(
+    data_id: int,
+    py_api: httpx.AsyncClient,
+    php_api: httpx.AsyncClient,
+) -> None:
+    py_response, php_response = await asyncio.gather(
+        py_api.get(f"/datasets/features/{data_id}"),
+        php_api.get(f"/data/features/{data_id}"),
+    )
+    assert py_response.status_code == php_response.status_code
+
+    if py_response.status_code != HTTPStatus.OK:
+        error = php_response.json()["error"]
+        assert py_response.json()["code"] == error["code"]
+        if error["message"] == "No features found. Additionally, dataset processed with error":
+            pattern = r"No features found. Additionally, dataset \d+ processed with error\."
+            assert re.match(pattern, py_response.json()["detail"])
+        else:
+            assert py_response.json()["detail"] == error["message"]
+        return
+
+    py_json = py_response.json()
+    for feature in py_json:
+        for key, value in list(feature.items()):
+            if key == "nominal_values":
+                # The old API uses `nominal_value` instead of `nominal_values`
+                values = feature.pop(key)
+                # The old API returns a str if there is only a single element
+                feature["nominal_value"] = values if len(values) > 1 else values[0]
+            elif key == "ontology":
+                # The old API returns a str if there is only a single element
+                values = feature.pop(key)
+                feature["ontology"] = values if len(values) > 1 else values[0]
+            else:
+                # The old API formats bool as string in lower-case
+                feature[key] = str(value) if not isinstance(value, bool) else str(value).lower()
+    php_features = php_response.json()["data_features"]["feature"]
+    assert py_json == php_features
diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py
deleted file mode 100644
index 65f4d23..0000000
--- a/tests/routers/openml/migration/datasets_migration_test.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import asyncio
-import re
-from http import HTTPStatus
-
-import httpx
-import pytest
-
-
-@pytest.mark.parametrize(
-    "data_id",
-    list(range(1, 130)),
-)
-async def test_datasets_feature_is_identical(
-    data_id: int,
-    py_api: httpx.AsyncClient,
-    php_api: httpx.AsyncClient,
-) -> None:
-    py_response, php_response = await asyncio.gather(
-        py_api.get(f"/datasets/features/{data_id}"),
-        php_api.get(f"/data/features/{data_id}"),
-    )
-    assert py_response.status_code == php_response.status_code
-
-    if py_response.status_code != HTTPStatus.OK:
-        error = php_response.json()["error"]
-        assert py_response.json()["code"] == error["code"]
-        if error["message"] == "No features found. Additionally, dataset processed with error":
-            pattern = r"No features found. Additionally, dataset \d+ processed with error\."
-            assert re.match(pattern, py_response.json()["detail"])
-        else:
-            assert py_response.json()["detail"] == error["message"]
-        return
-
-    py_json = py_response.json()
-    for feature in py_json:
-        for key, value in list(feature.items()):
-            if key == "nominal_values":
-                # The old API uses `nominal_value` instead of `nominal_values`
-                values = feature.pop(key)
-                # The old API returns a str if there is only a single element
-                feature["nominal_value"] = values if len(values) > 1 else values[0]
-            elif key == "ontology":
-                # The old API returns a str if there is only a single element
-                values = feature.pop(key)
-                feature["ontology"] = values if len(values) > 1 else values[0]
-            else:
-                # The old API formats bool as string in lower-case
-                feature[key] = str(value) if not isinstance(value, bool) else str(value).lower()
-    php_features = php_response.json()["data_features"]["feature"]
-    assert py_json == php_features

From 52a50c709e94a491710b7b66a151985133a9c179 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Wed, 15 Apr 2026 11:34:13 +0200
Subject: [PATCH 4/8] Move migration test to endpoint test file

---
 .../routers/openml/evaluationmeasures_test.py | 36 +++++++++++++++++++
 .../migration/evaluations_migration_test.py   | 35 ------------------
 2 files changed, 36 insertions(+), 35 deletions(-)
 delete mode 100644 tests/routers/openml/migration/evaluations_migration_test.py

diff --git a/tests/routers/openml/evaluationmeasures_test.py b/tests/routers/openml/evaluationmeasures_test.py
index 29cef16..d09bd9a 100644
--- a/tests/routers/openml/evaluationmeasures_test.py
+++ b/tests/routers/openml/evaluationmeasures_test.py
@@ -1,4 +1,6 @@
+import asyncio
 from http import HTTPStatus
+from typing import Any
 
 import httpx
 
@@ -387,3 +389,37 @@ async def test_estimation_procedure_list(py_api: httpx.AsyncClient) -> None:
             "stratified_sampling": True,
         },
     ]
+
+
+# -- migration test --
+
+
+async def test_evaluationmeasure_list_migration(
+    py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
+) -> None:
+    py_response, php_response = await asyncio.gather(
+        py_api.get("/evaluationmeasure/list"),
+        php_api.get("/evaluationmeasure/list"),
+    )
+    assert py_response.status_code == php_response.status_code
+    assert py_response.json() == php_response.json()["evaluation_measures"]["measures"]["measure"]
+
+
+async def test_estimation_procedure_list_migration(
+    py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
+) -> None:
+    py_response, php_response = await asyncio.gather(
+        py_api.get("/estimationprocedure/list"),
+        php_api.get("/estimationprocedure/list"),
+    )
+    assert py_response.status_code == php_response.status_code
+    expected = php_response.json()["estimationprocedures"]["estimationprocedure"]
+
+    def py_to_php(procedure: dict[str, Any]) -> dict[str, Any]:
+        procedure = {k: str(v) for k, v in procedure.items()}
+        if "stratified_sampling" in procedure:
+            procedure["stratified_sampling"] = procedure["stratified_sampling"].lower()
+        procedure["ttid"] = procedure.pop("task_type_id")
+        return procedure
+
+    assert [py_to_php(procedure) for procedure in py_response.json()] == expected
diff --git a/tests/routers/openml/migration/evaluations_migration_test.py b/tests/routers/openml/migration/evaluations_migration_test.py
deleted file mode 100644
index 08fb75f..0000000
--- a/tests/routers/openml/migration/evaluations_migration_test.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import asyncio
-from typing import Any
-
-import httpx
-
-
-async def test_evaluationmeasure_list(
-    py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
-) -> None:
-    py_response, php_response = await asyncio.gather(
-        py_api.get("/evaluationmeasure/list"),
-        php_api.get("/evaluationmeasure/list"),
-    )
-    assert py_response.status_code == php_response.status_code
-    assert py_response.json() == php_response.json()["evaluation_measures"]["measures"]["measure"]
-
-
-async def test_estimation_procedure_list(
-    py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
-) -> None:
-    py_response, php_response = await asyncio.gather(
-        py_api.get("/estimationprocedure/list"),
-        php_api.get("/estimationprocedure/list"),
-    )
-    assert py_response.status_code == php_response.status_code
-    expected = php_response.json()["estimationprocedures"]["estimationprocedure"]
-
-    def py_to_php(procedure: dict[str, Any]) -> dict[str, Any]:
-        procedure = {k: str(v) for k, v in procedure.items()}
-        if "stratified_sampling" in procedure:
-            procedure["stratified_sampling"] = procedure["stratified_sampling"].lower()
-        procedure["ttid"] = procedure.pop("task_type_id")
-        return procedure
-
-    assert [py_to_php(procedure) for procedure in py_response.json()] == expected

From 09ba85b35fb00df000013449d7b18710cd26d1e4 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Wed, 15 Apr 2026 11:36:34 +0200
Subject: [PATCH 5/8] Separte out tests for different endpoints to different
 files

---
 ..._test.py => estimation_procedcure_test.py} | 89 -----------------
 .../routers/openml/evaluation_measure_test.py | 96 +++++++++++++++++++
 2 files changed, 96 insertions(+), 89 deletions(-)
 rename tests/routers/openml/{evaluationmeasures_test.py => estimation_procedcure_test.py} (77%)
 create mode 100644 tests/routers/openml/evaluation_measure_test.py

diff --git a/tests/routers/openml/evaluationmeasures_test.py b/tests/routers/openml/estimation_procedcure_test.py
similarity index 77%
rename from tests/routers/openml/evaluationmeasures_test.py
rename to tests/routers/openml/estimation_procedcure_test.py
index d09bd9a..a05b34d 100644
--- a/tests/routers/openml/evaluationmeasures_test.py
+++ b/tests/routers/openml/estimation_procedcure_test.py
@@ -5,84 +5,6 @@
 import httpx
 
 
-async def test_evaluationmeasure_list(py_api: httpx.AsyncClient) -> None:
-    response = await py_api.get("/evaluationmeasure/list")
-    assert response.status_code == HTTPStatus.OK
-    assert response.json() == [
-        "area_under_roc_curve",
-        "average_cost",
-        "binominal_test",
-        "build_cpu_time",
-        "build_memory",
-        "c_index",
-        "chi-squared",
-        "class_complexity",
-        "class_complexity_gain",
-        "confusion_matrix",
-        "correlation_coefficient",
-        "cortana_quality",
-        "coverage",
-        "f_measure",
-        "information_gain",
-        "jaccard",
-        "kappa",
-        "kb_relative_information_score",
-        "kohavi_wolpert_bias_squared",
-        "kohavi_wolpert_error",
-        "kohavi_wolpert_sigma_squared",
-        "kohavi_wolpert_variance",
-        "kononenko_bratko_information_score",
-        "matthews_correlation_coefficient",
-        "mean_absolute_error",
-        "mean_class_complexity",
-        "mean_class_complexity_gain",
-        "mean_f_measure",
-        "mean_kononenko_bratko_information_score",
-        "mean_precision",
-        "mean_prior_absolute_error",
-        "mean_prior_class_complexity",
-        "mean_recall",
-        "mean_weighted_area_under_roc_curve",
-        "mean_weighted_f_measure",
-        "mean_weighted_precision",
-        "weighted_recall",
-        "number_of_instances",
-        "os_information",
-        "positives",
-        "precision",
-        "predictive_accuracy",
-        "prior_class_complexity",
-        "prior_entropy",
-        "probability",
-        "quality",
-        "ram_hours",
-        "recall",
-        "relative_absolute_error",
-        "root_mean_prior_squared_error",
-        "root_mean_squared_error",
-        "root_relative_squared_error",
-        "run_cpu_time",
-        "run_memory",
-        "run_virtual_memory",
-        "scimark_benchmark",
-        "single_point_area_under_roc_curve",
-        "total_cost",
-        "unclassified_instance_count",
-        "usercpu_time_millis",
-        "usercpu_time_millis_testing",
-        "usercpu_time_millis_training",
-        "webb_bias",
-        "webb_error",
-        "webb_variance",
-        "joint_entropy",
-        "pattern_team_auroc10",
-        "wall_clock_time_millis",
-        "wall_clock_time_millis_training",
-        "wall_clock_time_millis_testing",
-        "unweighted_recall",
-    ]
-
-
 async def test_estimation_procedure_list(py_api: httpx.AsyncClient) -> None:
     response = await py_api.get("/estimationprocedure/list")
     assert response.status_code == HTTPStatus.OK
@@ -394,17 +316,6 @@ async def test_estimation_procedure_list(py_api: httpx.AsyncClient) -> None:
 # -- migration test --
 
 
-async def test_evaluationmeasure_list_migration(
-    py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
-) -> None:
-    py_response, php_response = await asyncio.gather(
-        py_api.get("/evaluationmeasure/list"),
-        php_api.get("/evaluationmeasure/list"),
-    )
-    assert py_response.status_code == php_response.status_code
-    assert py_response.json() == php_response.json()["evaluation_measures"]["measures"]["measure"]
-
-
 async def test_estimation_procedure_list_migration(
     py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
 ) -> None:
diff --git a/tests/routers/openml/evaluation_measure_test.py b/tests/routers/openml/evaluation_measure_test.py
new file mode 100644
index 0000000..2df2483
--- /dev/null
+++ b/tests/routers/openml/evaluation_measure_test.py
@@ -0,0 +1,96 @@
+import asyncio
+from http import HTTPStatus
+
+import httpx
+
+
+async def test_evaluationmeasure_list(py_api: httpx.AsyncClient) -> None:
+    response = await py_api.get("/evaluationmeasure/list")
+    assert response.status_code == HTTPStatus.OK
+    assert response.json() == [
+        "area_under_roc_curve",
+        "average_cost",
+        "binominal_test",
+        "build_cpu_time",
+        "build_memory",
+        "c_index",
+        "chi-squared",
+        "class_complexity",
+        "class_complexity_gain",
+        "confusion_matrix",
+        "correlation_coefficient",
+        "cortana_quality",
+        "coverage",
+        "f_measure",
+        "information_gain",
+        "jaccard",
+        "kappa",
+        "kb_relative_information_score",
+        "kohavi_wolpert_bias_squared",
+        "kohavi_wolpert_error",
+        "kohavi_wolpert_sigma_squared",
+        "kohavi_wolpert_variance",
+        "kononenko_bratko_information_score",
+        "matthews_correlation_coefficient",
+        "mean_absolute_error",
+        "mean_class_complexity",
+        "mean_class_complexity_gain",
+        "mean_f_measure",
+        "mean_kononenko_bratko_information_score",
+        "mean_precision",
+        "mean_prior_absolute_error",
+        "mean_prior_class_complexity",
+        "mean_recall",
+        "mean_weighted_area_under_roc_curve",
+        "mean_weighted_f_measure",
+        "mean_weighted_precision",
+        "weighted_recall",
+        "number_of_instances",
+        "os_information",
+        "positives",
+        "precision",
+        "predictive_accuracy",
+        "prior_class_complexity",
+        "prior_entropy",
+        "probability",
+        "quality",
+        "ram_hours",
+        "recall",
+        "relative_absolute_error",
+        "root_mean_prior_squared_error",
+        "root_mean_squared_error",
+        "root_relative_squared_error",
+        "run_cpu_time",
+        "run_memory",
+        "run_virtual_memory",
+        "scimark_benchmark",
+        "single_point_area_under_roc_curve",
+        "total_cost",
+        "unclassified_instance_count",
+        "usercpu_time_millis",
+        "usercpu_time_millis_testing",
+        "usercpu_time_millis_training",
+        "webb_bias",
+        "webb_error",
+        "webb_variance",
+        "joint_entropy",
+        "pattern_team_auroc10",
+        "wall_clock_time_millis",
+        "wall_clock_time_millis_training",
+        "wall_clock_time_millis_testing",
+        "unweighted_recall",
+    ]
+
+
+# -- migration test --
+
+
+async def test_evaluationmeasure_list_migration(
+    py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
+) -> None:
+    py_response, php_response = await asyncio.gather(
+        py_api.get("/evaluationmeasure/list"),
+        php_api.get("/evaluationmeasure/list"),
+    )
+    assert py_response.status_code == php_response.status_code
+    assert py_response.json() == php_response.json()["evaluation_measures"]["measures"]["measure"]

From a8e4506899b5899a0e549c79b652f3b7ce144e51 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Wed, 15 Apr 2026 11:52:46 +0200
Subject: [PATCH 6/8] Move flow migration tests to respective endpoint files

---
 tests/routers/openml/flows_exists_test.py     |  46 ++++++++
 tests/routers/openml/flows_get_test.py        |  58 ++++++++++
 .../openml/migration/flows_migration_test.py  | 102 ------------------
 3 files changed, 104 insertions(+), 102 deletions(-)
 delete mode 100644 tests/routers/openml/migration/flows_migration_test.py

diff --git a/tests/routers/openml/flows_exists_test.py b/tests/routers/openml/flows_exists_test.py
index d767b9a..bb09edd 100644
--- a/tests/routers/openml/flows_exists_test.py
+++ b/tests/routers/openml/flows_exists_test.py
@@ -1,3 +1,5 @@
+import asyncio
+import re
 from http import HTTPStatus
 
 import httpx
@@ -79,3 +81,47 @@ async def test_flow_exists_handles_flow_not_found(
         await flow_exists("foo", "bar", expdb_test)
     assert error.value.status_code == HTTPStatus.NOT_FOUND
     assert error.value.uri == FlowNotFoundError.uri
+
+
+# -- migration tests --
+
+
+async def test_flow_exists_not(
+    py_api: httpx.AsyncClient,
+    php_api: httpx.AsyncClient,
+) -> None:
+    path = "exists/foo/bar"
+    py_response, php_response = await asyncio.gather(
+        py_api.get(f"/flows/{path}"),
+        php_api.get(f"/flow/{path}"),
+    )
+
+    assert py_response.status_code == HTTPStatus.NOT_FOUND
+    assert php_response.status_code == HTTPStatus.OK
+
+    assert php_response.json() == {"flow_exists": {"exists": "false", "id": str(-1)}}
+    # RFC 9457: Python API now returns problem+json format
+    error = py_response.json()
+    assert re.match(
+        pattern=r"Flow with name \S+ and external version \S+ not found.",
+        string=error["detail"],
+    )
+
+
+@pytest.mark.mut
+async def test_flow_exists_migration(
+    persisted_flow: Flow,
+    py_api: httpx.AsyncClient,
+    php_api: httpx.AsyncClient,
+) -> None:
+    path = f"exists/{persisted_flow.name}/{persisted_flow.external_version}"
+    py_response, php_response = await asyncio.gather(
+        py_api.get(f"/flows/{path}"),
+        php_api.get(f"/flow/{path}"),
+    )
+
+    assert py_response.status_code == php_response.status_code, php_response.content
+
+    expect_php = {"flow_exists": {"exists": "true", "id": str(persisted_flow.id)}}
+    assert php_response.json() == expect_php
+    assert py_response.json() == {"flow_id": persisted_flow.id}
diff --git a/tests/routers/openml/flows_get_test.py b/tests/routers/openml/flows_get_test.py
index e24e705..17bbfcc 100644
--- a/tests/routers/openml/flows_get_test.py
+++ b/tests/routers/openml/flows_get_test.py
@@ -1,7 +1,15 @@
+import asyncio
 from http import HTTPStatus
+from typing import Any
 
 import deepdiff.diff
 import httpx
+import pytest
+
+from core.conversions import (
+    nested_remove_single_element_list,
+    nested_str_to_num,
+)
 
 
 async def test_get_flow_no_subflow(py_api: httpx.AsyncClient) -> None:
@@ -302,3 +310,53 @@ async def test_get_flow_with_subflow(py_api: httpx.AsyncClient) -> None:
     }
     difference = deepdiff.diff.DeepDiff(response.json(), expected, ignore_order=True)
     assert not difference
+
+
+# -- migration test --
+
+
+@pytest.mark.parametrize(
+    "flow_id",
+    range(1, 16),
+)
+async def test_get_flow_equal(
+    flow_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
+) -> None:
+    py_response, php_response = await asyncio.gather(
+        py_api.get(f"/flows/{flow_id}"),
+        php_api.get(f"/flow/{flow_id}"),
+    )
+    assert py_response.status_code == HTTPStatus.OK
+
+    py_json = py_response.json()
+
+    # PHP sets parameter default value to [], None is more appropriate, omission is considered
+    # Similar for the default "identifier" of subflows.
+    # Subflow field (old: component) is omitted if empty
+    def convert_flow_naming_and_defaults(flow: dict[str, Any]) -> dict[str, Any]:
+        for parameter in flow["parameter"]:
+            if parameter["default_value"] is None:
+                parameter["default_value"] = []
+        for subflow in flow["subflows"]:
+            subflow["flow"] = convert_flow_naming_and_defaults(subflow["flow"])
+            if subflow["identifier"] is None:
+                subflow["identifier"] = []
+        flow["component"] = flow.pop("subflows")
+        if flow["component"] == []:
+            flow.pop("component")
+        return flow
+
+    py_json = convert_flow_naming_and_defaults(py_json)
+    py_json = nested_remove_single_element_list(py_json)
+
+    php_json = php_response.json()["flow"]
+    # The reason we don't transform py_json to str is that it becomes harder to ignore numeric type
+    # differences (e.g., '1.0' vs '1')
+    php_json = nested_str_to_num(php_json)
+    difference = deepdiff.diff.DeepDiff(
+        py_json,
+        php_json,
+        ignore_order=True,
+        ignore_numeric_type_changes=True,
+    )
+    assert not difference
diff --git a/tests/routers/openml/migration/flows_migration_test.py b/tests/routers/openml/migration/flows_migration_test.py
deleted file mode 100644
index 2ef7da1..0000000
--- a/tests/routers/openml/migration/flows_migration_test.py
+++ /dev/null
@@ -1,102 +0,0 @@
-import asyncio
-import re
-from http import HTTPStatus
-from typing import Any
-
-import deepdiff
-import httpx
-import pytest
-
-from core.conversions import (
-    nested_remove_single_element_list,
-    nested_str_to_num,
-)
-from tests.conftest import Flow
-
-
-async def test_flow_exists_not(
-    py_api: httpx.AsyncClient,
-    php_api: httpx.AsyncClient,
-) -> None:
-    path = "exists/foo/bar"
-    py_response, php_response = await asyncio.gather(
-        py_api.get(f"/flows/{path}"),
-        php_api.get(f"/flow/{path}"),
-    )
-
-    assert py_response.status_code == HTTPStatus.NOT_FOUND
-    assert php_response.status_code == HTTPStatus.OK
-
-    assert php_response.json() == {"flow_exists": {"exists": "false", "id": str(-1)}}
-    # RFC 9457: Python API now returns problem+json format
-    error = py_response.json()
-    assert re.match(
-        pattern=r"Flow with name \S+ and external version \S+ not found.",
-        string=error["detail"],
-    )
-
-
-@pytest.mark.mut
-async def test_flow_exists(
-    persisted_flow: Flow,
-    py_api: httpx.AsyncClient,
-    php_api: httpx.AsyncClient,
-) -> None:
-    path = f"exists/{persisted_flow.name}/{persisted_flow.external_version}"
-    py_response, php_response = await asyncio.gather(
-        py_api.get(f"/flows/{path}"),
-        php_api.get(f"/flow/{path}"),
-    )
-
-    assert py_response.status_code == php_response.status_code, php_response.content
-
-    expect_php = {"flow_exists": {"exists": "true", "id": str(persisted_flow.id)}}
-    assert php_response.json() == expect_php
-    assert py_response.json() == {"flow_id": persisted_flow.id}
-
-
-@pytest.mark.parametrize(
-    "flow_id",
-    range(1, 16),
-)
-async def test_get_flow_equal(
-    flow_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
-) -> None:
-    py_response, php_response = await asyncio.gather(
-        py_api.get(f"/flows/{flow_id}"),
-        php_api.get(f"/flow/{flow_id}"),
-    )
-    assert py_response.status_code == HTTPStatus.OK
-
-    py_json = py_response.json()
-
-    # PHP sets parameter default value to [], None is more appropriate, omission is considered
-    # Similar for the default "identifier" of subflows.
-    # Subflow field (old: component) is omitted if empty
-    def convert_flow_naming_and_defaults(flow: dict[str, Any]) -> dict[str, Any]:
-        for parameter in flow["parameter"]:
-            if parameter["default_value"] is None:
-                parameter["default_value"] = []
-        for subflow in flow["subflows"]:
-            subflow["flow"] = convert_flow_naming_and_defaults(subflow["flow"])
-            if subflow["identifier"] is None:
-                subflow["identifier"] = []
-        flow["component"] = flow.pop("subflows")
-        if flow["component"] == []:
-            flow.pop("component")
-        return flow
-
-    py_json = convert_flow_naming_and_defaults(py_json)
-    py_json = nested_remove_single_element_list(py_json)
-
-    php_json = php_response.json()["flow"]
-    # The reason we don't transform py_json to str is that it becomes harder to ignore numeric type
-    # differences (e.g., '1.0' vs '1')
-    php_json = nested_str_to_num(php_json)
-    difference = deepdiff.diff.DeepDiff(
-        py_json,
-        php_json,
-        ignore_order=True,
-        ignore_numeric_type_changes=True,
-    )
-    assert not difference

From 2c46fb6d2ee217c83597fc36285801c81902c62d Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Wed, 15 Apr 2026 14:32:09 +0200
Subject: [PATCH 7/8] Move other migration tests to respective files

---
 tests/routers/openml/migration/__init__.py    |   0
 .../openml/migration/runs_migration_test.py   |  80 -----
 .../openml/migration/setups_migration_test.py | 329 ------------------
 .../migration/studies_migration_test.py       |  38 --
 .../openml/migration/tasks_migration_test.py  | 226 ------------
 tests/routers/openml/runs_trace_test.py       |  74 ++++
 tests/routers/openml/setups_get_test.py       |  54 +++
 tests/routers/openml/setups_tag_test.py       | 153 +++++++-
 tests/routers/openml/setups_untag_test.py     | 123 +++++++
 tests/routers/openml/study_get_test.py        |  36 ++
 tests/routers/openml/task_get_test.py         |  57 +++
 tests/routers/openml/task_list_test.py        | 166 ++++++++-
 12 files changed, 660 insertions(+), 676 deletions(-)
 delete mode 100644 tests/routers/openml/migration/__init__.py
 delete mode 100644 tests/routers/openml/migration/runs_migration_test.py
 delete mode 100644 tests/routers/openml/migration/setups_migration_test.py
 delete mode 100644 tests/routers/openml/migration/studies_migration_test.py
 delete mode 100644 tests/routers/openml/migration/tasks_migration_test.py

diff --git a/tests/routers/openml/migration/__init__.py b/tests/routers/openml/migration/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/routers/openml/migration/runs_migration_test.py b/tests/routers/openml/migration/runs_migration_test.py
deleted file mode 100644
index 826aa18..0000000
--- a/tests/routers/openml/migration/runs_migration_test.py
+++ /dev/null
@@ -1,80 +0,0 @@
-"""Migration tests comparing PHP and Python API responses for run trace endpoints."""
-
-import asyncio
-from http import HTTPStatus
-from typing import Any
-
-import deepdiff
-import httpx
-import pytest
-
-from core.conversions import nested_num_to_str
-
-_SERVER_RUNS = [*range(24, 40), *range(134, 140), 999_999_999]
-
-
-@pytest.mark.parametrize("run_id", _SERVER_RUNS)
-async def test_get_run_trace_equal(
-    run_id: int,
-    py_api: httpx.AsyncClient,
-    php_api: httpx.AsyncClient,
-) -> None:
-    """Test that Python and PHP run trace responses are equivalent after normalization."""
-    py_response, php_response = await asyncio.gather(
-        py_api.get(f"/run/trace/{run_id}"),
-        php_api.get(f"/run/trace/{run_id}"),
-    )
-    if php_response.status_code == HTTPStatus.OK:
-        _assert_trace_response_success(py_response, php_response)
-        return
-
-    assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
-    assert py_response.status_code == HTTPStatus.NOT_FOUND
-
-    php_error = php_response.json()["error"]
-    py_error = py_response.json()
-    assert py_error["code"] == php_error["code"]
-    if php_error["code"] == "571":
-        assert php_error["message"] == "Run not found."
-        assert py_error["detail"] == f"Run {run_id} not found."
-    elif php_error["code"] == "572":
-        assert php_error["message"] == "No successful trace associated with this run."
-        assert py_error["detail"] == f"No trace found for run {run_id}."
-    else:
-        msg = f"Unknown error code {php_error['code']} for run {run_id}."
-        raise AssertionError(msg)
-
-
-def _assert_trace_response_success(
-    py_response: httpx.Response, php_response: httpx.Response
-) -> None:
-    assert py_response.status_code == HTTPStatus.OK
-    assert php_response.status_code == HTTPStatus.OK
-
-    py_json = py_response.json()
-
-    # PHP nests response under "trace" key — match that structure
-    py_json = {"trace": py_json}
-
-    # PHP uses "trace_iteration" key, Python uses "trace"
-    py_json["trace"]["trace_iteration"] = py_json["trace"].pop("trace")
-
-    # PHP returns all numeric values as strings — normalize Python response
-    py_json = nested_num_to_str(py_json)
-
-    def _sort_trace(payload: dict[str, Any]) -> dict[str, Any]:
-        """Sort trace iterations by (repeat, fold, iteration) for order-sensitive comparison."""
-        copied = payload.copy()
-        copied["trace"] = copied["trace"].copy()
-        copied["trace"]["trace_iteration"] = sorted(
-            copied["trace"]["trace_iteration"],
-            key=lambda row: (int(row["repeat"]), int(row["fold"]), int(row["iteration"])),
-        )
-        return copied
-
-    differences = deepdiff.diff.DeepDiff(
-        _sort_trace(py_json),
-        _sort_trace(php_response.json()),
-        ignore_order=False,
-    )
-    assert not differences
diff --git a/tests/routers/openml/migration/setups_migration_test.py b/tests/routers/openml/migration/setups_migration_test.py
deleted file mode 100644
index 34613fc..0000000
--- a/tests/routers/openml/migration/setups_migration_test.py
+++ /dev/null
@@ -1,329 +0,0 @@
-import asyncio
-import contextlib
-import re
-from collections.abc import AsyncIterator, Callable, Iterable
-from contextlib import AbstractAsyncContextManager
-from http import HTTPStatus
-
-import httpx
-import pytest
-from sqlalchemy import text
-from sqlalchemy.ext.asyncio import AsyncConnection
-
-from core.conversions import nested_remove_values, nested_str_to_num
-from tests.conftest import temporary_records
-from tests.users import OWNER_USER, ApiKey
-
-
-@pytest.fixture
-def temporary_tags(
-    expdb_test: AsyncConnection,
-) -> Callable[..., AbstractAsyncContextManager[None]]:
-    @contextlib.asynccontextmanager
-    async def _temporary_tags(
-        tags: Iterable[str], setup_id: int, *, persist: bool = False
-    ) -> AsyncIterator[None]:
-        insert_queries = [
-            (
-                "INSERT INTO setup_tag(`id`,`tag`,`uploader`) VALUES (:setup_id, :tag, :user_id);",
-                {"setup_id": setup_id, "tag": tag, "user_id": OWNER_USER.user_id},
-            )
-            for tag in tags
-        ]
-        delete_queries = [
-            (
-                "DELETE FROM setup_tag WHERE `id`=:setup_id AND `tag`=:tag",
-                {"setup_id": setup_id, "tag": tag},
-            )
-            for tag in tags
-        ]
-        async with temporary_records(
-            connection=expdb_test,
-            insert_queries=insert_queries,
-            delete_queries=delete_queries,
-            persist=persist,
-        ):
-            yield
-
-    return _temporary_tags
-
-
-@pytest.mark.mut
-@pytest.mark.parametrize(
-    "api_key",
-    [ApiKey.ADMIN, ApiKey.SOME_USER, ApiKey.OWNER_USER],
-    ids=["Administrator", "non-owner", "tag owner"],
-)
-@pytest.mark.parametrize(
-    "other_tags",
-    [[], ["some_other_tag"], ["foo_some_other_tag", "bar_some_other_tag"]],
-    ids=["none", "one tag", "two tags"],
-)
-async def test_setup_untag_response_is_identical_when_tag_exists(
-    api_key: str,
-    other_tags: list[str],
-    py_api: httpx.AsyncClient,
-    php_api: httpx.AsyncClient,
-    temporary_tags: Callable[..., AbstractAsyncContextManager[None]],
-) -> None:
-    setup_id = 1
-    tag = "totally_new_tag_for_migration_testing"
-
-    all_tags = [tag, *other_tags]
-    async with temporary_tags(tags=all_tags, setup_id=setup_id, persist=True):
-        php_response = await php_api.post(
-            "/setup/untag",
-            data={"api_key": api_key, "tag": tag, "setup_id": setup_id},
-        )
-
-    # expdb_test transaction shared with Python API,
-    # no commit needed and rolled back at the end of the test
-    async with temporary_tags(tags=all_tags, setup_id=setup_id):
-        py_response = await py_api.post(
-            f"/setup/untag?api_key={api_key}",
-            json={"setup_id": setup_id, "tag": tag},
-        )
-
-    if py_response.status_code == HTTPStatus.OK:
-        assert py_response.status_code == php_response.status_code
-        php_untag = php_response.json()["setup_untag"]
-        py_untag = py_response.json()["setup_untag"]
-        assert py_untag["id"] == php_untag["id"]
-        if tags := php_untag.get("tag"):
-            if isinstance(tags, str):
-                assert py_untag["tag"][0] == tags
-            else:
-                assert py_untag["tag"] == tags
-        else:
-            assert py_untag["tag"] == []
-        return
-
-    code, message = php_response.json()["error"].values()
-    assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
-    assert py_response.status_code == HTTPStatus.FORBIDDEN
-    assert py_response.json()["code"] == code
-    assert message == "Tag is not owned by you"
-    assert re.match(
-        r"You may not remove tag \S+ of setup \d+ because it was not created by you.",
-        py_response.json()["detail"],
-    )
-
-
-async def test_setup_untag_response_is_identical_setup_doesnt_exist(
-    py_api: httpx.AsyncClient,
-    php_api: httpx.AsyncClient,
-) -> None:
-    setup_id = 999999
-    tag = "totally_new_tag_for_migration_testing"
-    api_key = ApiKey.SOME_USER
-
-    php_response, py_response = await asyncio.gather(
-        php_api.post(
-            "/setup/untag",
-            data={"api_key": api_key, "tag": tag, "setup_id": setup_id},
-        ),
-        py_api.post(
-            f"/setup/untag?api_key={api_key}",
-            json={"setup_id": setup_id, "tag": tag},
-        ),
-    )
-
-    assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
-    assert py_response.status_code == HTTPStatus.NOT_FOUND
-    assert php_response.json()["error"]["message"] == "Entity not found."
-    assert py_response.json()["code"] == php_response.json()["error"]["code"]
-    assert re.match(
-        r"Setup \d+ not found.",
-        py_response.json()["detail"],
-    )
-
-
-async def test_setup_untag_response_is_identical_tag_doesnt_exist(
-    py_api: httpx.AsyncClient,
-    php_api: httpx.AsyncClient,
-) -> None:
-    setup_id = 1
-    tag = "totally_new_tag_for_migration_testing"
-    api_key = ApiKey.SOME_USER
-
-    php_response, py_response = await asyncio.gather(
-        php_api.post(
-            "/setup/untag",
-            data={"api_key": api_key, "tag": tag, "setup_id": setup_id},
-        ),
-        py_api.post(
-            f"/setup/untag?api_key={api_key}",
-            json={"setup_id": setup_id, "tag": tag},
-        ),
-    )
-
-    assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
-    assert py_response.status_code == HTTPStatus.NOT_FOUND
-    assert py_response.json()["code"] == php_response.json()["error"]["code"]
-    assert php_response.json()["error"]["message"] == "Tag not found."
-    assert re.match(
-        r"Setup \d+ does not have tag '\S+'.",
-        py_response.json()["detail"],
-    )
-
-
-@pytest.mark.mut
-@pytest.mark.parametrize(
-    "api_key",
-    [ApiKey.ADMIN, ApiKey.SOME_USER],
-    ids=["Administrator", "non-owner"],
-)
-@pytest.mark.parametrize(
-    "other_tags",
-    [[], ["some_other_tag"], ["foo_some_other_tag", "bar_some_other_tag"]],
-    ids=["none", "one tag", "two tags"],
-)
-async def test_setup_tag_response_is_identical_when_tag_doesnt_exist(  # noqa: PLR0913
-    api_key: str,
-    other_tags: list[str],
-    py_api: httpx.AsyncClient,
-    php_api: httpx.AsyncClient,
-    expdb_test: AsyncConnection,
-    temporary_tags: Callable[..., AbstractAsyncContextManager[None]],
-) -> None:
-    setup_id = 1
-    tag = "totally_new_tag_for_migration_testing"
-
-    async with temporary_tags(tags=other_tags, setup_id=setup_id, persist=True):
-        php_response = await php_api.post(
-            "/setup/tag",
-            data={"api_key": api_key, "tag": tag, "setup_id": setup_id},
-        )
-
-        await expdb_test.execute(
-            text("DELETE FROM setup_tag WHERE `id`=:setup_id AND `tag`=:tag"),
-            parameters={"setup_id": setup_id, "tag": tag},
-        )
-        await expdb_test.commit()
-
-    async with temporary_tags(tags=other_tags, setup_id=setup_id):
-        py_response = await py_api.post(
-            f"/setup/tag?api_key={api_key}",
-            json={"setup_id": setup_id, "tag": tag},
-        )
-
-    assert py_response.status_code == HTTPStatus.OK
-    assert py_response.status_code == php_response.status_code
-    php_tag = php_response.json()["setup_tag"]
-    py_tag = py_response.json()["setup_tag"]
-    assert py_tag["id"] == php_tag["id"]
-    if tags := php_tag.get("tag"):
-        if isinstance(tags, str):
-            assert py_tag["tag"][0] == tags
-        else:
-            assert set(py_tag["tag"]) == set(tags)
-    else:
-        assert py_tag["tag"] == []
-
-
-async def test_setup_tag_response_is_identical_setup_doesnt_exist(
-    py_api: httpx.AsyncClient,
-    php_api: httpx.AsyncClient,
-) -> None:
-    setup_id = 999999
-    tag = "totally_new_tag_for_migration_testing"
-    api_key = ApiKey.SOME_USER
-
-    php_response, py_response = await asyncio.gather(
-        php_api.post(
-            "/setup/tag",
-            data={"api_key": api_key, "tag": tag, "setup_id": setup_id},
-        ),
-        py_api.post(
-            f"/setup/tag?api_key={api_key}",
-            json={"setup_id": setup_id, "tag": tag},
-        ),
-    )
-
-    assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
-    assert py_response.status_code == HTTPStatus.NOT_FOUND
-    assert php_response.json()["error"]["message"] == "Entity not found."
-    assert py_response.json()["code"] == php_response.json()["error"]["code"]
-    assert re.match(
-        r"Setup \d+ not found.",
-        py_response.json()["detail"],
-    )
-
-
-@pytest.mark.mut
-async def test_setup_tag_response_is_identical_tag_already_exists(
-    py_api: httpx.AsyncClient,
-    php_api: httpx.AsyncClient,
-    temporary_tags: Callable[..., AbstractAsyncContextManager[None]],
-) -> None:
-    setup_id = 1
-    tag = "totally_new_tag_for_migration_testing"
-    api_key = ApiKey.SOME_USER
-
-    async with temporary_tags(tags=[tag], setup_id=setup_id, persist=True):
-        # Both APIs can be tested in parallel since the tag is already persisted
-        php_response, py_response = await asyncio.gather(
-            php_api.post(
-                "/setup/tag",
-                data={"api_key": api_key, "tag": tag, "setup_id": setup_id},
-            ),
-            py_api.post(
-                f"/setup/tag?api_key={api_key}",
-                json={"setup_id": setup_id, "tag": tag},
-            ),
-        )
-
-    assert php_response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR
-    assert py_response.status_code == HTTPStatus.CONFLICT
-    assert php_response.json()["error"]["message"] == "Entity already tagged by this tag."
-    assert py_response.json()["detail"] == f"Setup {setup_id} already has tag {tag!r}."
-
-
-async def test_get_setup_response_is_identical_setup_doesnt_exist(
-    py_api: httpx.AsyncClient,
-    php_api: httpx.AsyncClient,
-) -> None:
-    setup_id = 999999
-
-    php_response, py_response = await asyncio.gather(
-        php_api.get(f"/setup/{setup_id}"),
-        py_api.get(f"/setup/{setup_id}"),
-    )
-
-    assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
-    assert py_response.status_code == HTTPStatus.NOT_FOUND
-    assert php_response.json()["error"]["message"] == "Unknown setup"
-    assert py_response.json()["code"] == php_response.json()["error"]["code"]
-    assert py_response.json()["detail"] == f"Setup {setup_id} not found."
-
-
-@pytest.mark.parametrize("setup_id", range(1, 125))
-async def test_get_setup_response_is_identical(
-    setup_id: int,
-    py_api: httpx.AsyncClient,
-    php_api: httpx.AsyncClient,
-) -> None:
-    php_response, py_response = await asyncio.gather(
-        php_api.get(f"/setup/{setup_id}"),
-        py_api.get(f"/setup/{setup_id}"),
-    )
-
-    if php_response.status_code == HTTPStatus.PRECONDITION_FAILED:
-        assert py_response.status_code == HTTPStatus.NOT_FOUND
-        return
-
-    assert php_response.status_code == HTTPStatus.OK
-    assert py_response.status_code == HTTPStatus.OK
-
-    php_json = php_response.json()
-
-    # PHP returns integer fields as strings. To compare, we recursively convert string digits
-    # to integers.
-    # PHP also returns `[]` instead of null for empty string optional fields, which Python omits.
-    php_json = nested_str_to_num(php_json)
-    php_json = nested_remove_values(php_json, values=[[], None])
-
-    py_json = nested_str_to_num(py_response.json())
-    py_json = nested_remove_values(py_json, values=[[], None])
-
-    assert py_json == php_json
diff --git a/tests/routers/openml/migration/studies_migration_test.py b/tests/routers/openml/migration/studies_migration_test.py
deleted file mode 100644
index fc1340c..0000000
--- a/tests/routers/openml/migration/studies_migration_test.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import asyncio
-
-import deepdiff
-import httpx
-
-from core.conversions import nested_num_to_str, nested_remove_values
-
-
-async def test_get_study_equal(py_api: httpx.AsyncClient, php_api: httpx.AsyncClient) -> None:
-    py_response, php_response = await asyncio.gather(
-        py_api.get("/studies/1"),
-        php_api.get("/study/1"),
-    )
-    assert py_response.status_code == php_response.status_code
-
-    py_json = py_response.json()
-    # New implementation is typed
-    py_json = nested_num_to_str(py_json)
-    # New implementation has same fields even if empty
-    py_json = nested_remove_values(py_json, values=[None])
-    py_json["tasks"] = {"task_id": py_json.pop("task_ids")}
-    py_json["data"] = {"data_id": py_json.pop("data_ids")}
-    if runs := py_json.pop("run_ids", None):
-        py_json["runs"] = {"run_id": runs}
-    if flows := py_json.pop("flow_ids", None):
-        py_json["flows"] = {"flow_id": flows}
-    if setups := py_json.pop("setup_ids", None):
-        py_json["setup"] = {"setup_id": setups}
-
-    # New implementation is not nested
-    py_json = {"study": py_json}
-    difference = deepdiff.diff.DeepDiff(
-        py_json,
-        php_response.json(),
-        ignore_order=True,
-        ignore_numeric_type_changes=True,
-    )
-    assert not difference
diff --git a/tests/routers/openml/migration/tasks_migration_test.py b/tests/routers/openml/migration/tasks_migration_test.py
deleted file mode 100644
index ea3226b..0000000
--- a/tests/routers/openml/migration/tasks_migration_test.py
+++ /dev/null
@@ -1,226 +0,0 @@
-import asyncio
-from http import HTTPStatus
-from typing import Any, cast
-
-import deepdiff
-import httpx
-import pytest
-
-from core.conversions import (
-    nested_num_to_str,
-    nested_remove_single_element_list,
-    nested_remove_values,
-)
-from routers.dependencies import LIMIT_MAX
-
-
-@pytest.mark.parametrize(
-    "task_id",
-    range(1, 1306),
-)
-async def test_get_task_equal(
-    task_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
-) -> None:
-    py_response, php_response = await asyncio.gather(
-        py_api.get(f"/tasks/{task_id}"),
-        php_api.get(f"/task/{task_id}"),
-    )
-    assert py_response.status_code == HTTPStatus.OK
-    assert php_response.status_code == HTTPStatus.OK
-
-    py_json = py_response.json()
-    # Some fields are renamed (old = tag, new = tags)
-    py_json["tag"] = py_json.pop("tags")
-    py_json["task_id"] = py_json.pop("id")
-    py_json["task_name"] = py_json.pop("name")
-    # PHP is not typed *and* automatically removes None values
-    py_json = nested_remove_values(py_json, values=[None])
-    py_json = nested_num_to_str(py_json)
-    # It also removes "value" entries for parameters if the list is empty,
-    # it does not remove *all* empty lists, e.g., for cost_matrix input they are kept
-    estimation_procedure = next(
-        v["estimation_procedure"] for v in py_json["input"] if "estimation_procedure" in v
-    )
-    if "parameter" in estimation_procedure:
-        estimation_procedure["parameter"] = [
-            {k: v for k, v in parameter.items() if v != []}
-            for parameter in estimation_procedure["parameter"]
-        ]
-    # Fields that may return in a list now always return a list
-    py_json = nested_remove_single_element_list(py_json)
-    # Tags are not returned if they are an empty list:
-    if py_json["tag"] == []:
-        py_json.pop("tag")
-
-    # The response is no longer nested
-    py_json = {"task": py_json}
-
-    differences = deepdiff.diff.DeepDiff(
-        py_json,
-        php_response.json(),
-        ignore_order=True,
-    )
-    assert not differences
-
-
-# Task list no-results error code is 482 (unlike datasets which uses 372).
-_TASK_LIST_NO_RESULTS_CODE = "482"
-
-
-def _build_php_task_list_path(php_params: dict[str, Any]) -> str:
-    """Build a PHP-style path for /task/list with path-encoded filter parameters."""
-    if not php_params:
-        return "/task/list"
-    parts = "/".join(f"{k}/{v}" for k, v in php_params.items())
-    return f"/task/list/{parts}"
-
-
-def _normalize_py_task(task: dict[str, Any]) -> dict[str, Any]:
-    """Normalize a single Python task list entry to match PHP format.
-
-    PHP (XML-to-JSON) returns single-element arrays as plain values, not lists.
-    PHP returns task_id, task_type_id, and did as integers (same for Python).
-    and completely omits the "tag" field for all tasks in the list endpoint.
-    """
-    t = nested_remove_single_element_list(task.copy())
-
-    # PHP's list endpoint does not return tags AT ALL
-    t.pop("tag", None)
-
-    # PHP omits qualities where value is None string
-    if "quality" in t:
-        t["quality"] = [q for q in t["quality"] if q.get("value") != "None"]
-
-    return cast("dict[str, Any]", t)
-
-
-# Filter combos: (php_path_params, python_body_extras)
-# PHP uses path-based filter keys (e.g. "type"), Python uses JSON body keys (e.g. "task_type_id")
-_FILTER_COMBOS: list[tuple[dict[str, Any], dict[str, Any]]] = [
-    ({"type": 1}, {"task_type_id": 1}),  # by task type
-    ({"tag": "OpenML100"}, {"tag": "OpenML100"}),  # by tag
-    ({"type": 1, "tag": "OpenML100"}, {"task_type_id": 1, "tag": "OpenML100"}),  # combined
-    ({"data_name": "iris"}, {"data_name": "iris"}),  # by dataset name
-    ({"data_id": 61}, {"data_id": [61]}),  # by dataset id
-    ({"data_tag": "study_14"}, {"data_tag": "study_14"}),  # by dataset tag
-    ({"number_instances": "150"}, {"number_instances": "150"}),  # quality filter
-    (
-        {"data_id": 61, "number_instances": "150"},
-        {"data_id": [61], "number_instances": "150"},
-    ),
-]
-
-_FILTER_IDS = [
-    "type",
-    "tag",
-    "type_and_tag",
-    "data_name",
-    "data_id",
-    "data_tag",
-    "number_instances",
-    "data_and_quality",
-]
-
-
-@pytest.mark.parametrize(
-    ("php_params", "py_extra"),
-    _FILTER_COMBOS,
-    ids=_FILTER_IDS,
-)
-async def test_list_tasks_equal(
-    php_params: dict[str, Any],
-    py_extra: dict[str, Any],
-    py_api: httpx.AsyncClient,
-    php_api: httpx.AsyncClient,
-) -> None:
-    """Python and PHP task list responses contain the same tasks for the same filters.
-
-    Known differences documented here:
-    - PHP wraps response in {"tasks": {"task": [...]}}, Python returns a flat list.
-    - PHP uses XML-to-JSON which collapses single-element arrays into plain values.
-    - PHP omits the "tag" key when a task has no tags; Python returns "tag": [].
-    - PHP error status is 412 PRECONDITION_FAILED; Python uses 404 NOT_FOUND.
-    """
-    php_path = _build_php_task_list_path(php_params)
-    py_body = {**py_extra, "pagination": {"limit": LIMIT_MAX, "offset": 0}}
-    py_response, php_response = await asyncio.gather(
-        py_api.post("/tasks/list", json=py_body),
-        php_api.get(php_path),
-    )
-
-    # Error case: no results — PHP returns 412, Python returns 404
-    if php_response.status_code == HTTPStatus.PRECONDITION_FAILED:
-        assert py_response.status_code == HTTPStatus.NOT_FOUND
-        assert py_response.headers["content-type"] == "application/problem+json"
-        assert php_response.json()["error"]["code"] == _TASK_LIST_NO_RESULTS_CODE
-        assert py_response.json()["code"] == _TASK_LIST_NO_RESULTS_CODE
-        return
-
-    assert php_response.status_code == HTTPStatus.OK
-    assert py_response.status_code == HTTPStatus.OK
-
-    php_tasks_raw = php_response.json()["tasks"]["task"]
-    php_tasks: list[dict[str, Any]] = (
-        php_tasks_raw if isinstance(php_tasks_raw, list) else [php_tasks_raw]
-    )
-    php_tasks = php_tasks[:LIMIT_MAX]
-    py_tasks: list[dict[str, Any]] = [_normalize_py_task(t) for t in py_response.json()]
-
-    php_ids = {int(t["task_id"]) for t in php_tasks}
-    py_ids = {int(t["task_id"]) for t in py_tasks}
-
-    assert py_ids == php_ids, (
-        f"PHP and Python must return the exact same task IDs: {php_ids ^ py_ids}"
-    )
-
-    # Compare only the tasks PHP returned — per-task deepdiff for clear error messages
-    py_by_id = {int(t["task_id"]): t for t in py_tasks}
-    php_by_id = {int(t["task_id"]): t for t in php_tasks}
-    for task_id in php_ids:
-        differences = deepdiff.diff.DeepDiff(
-            py_by_id[task_id],
-            php_by_id[task_id],
-            ignore_order=True,
-        )
-        assert not differences, f"Differences for task {task_id}: {differences}"
-
-
-@pytest.mark.parametrize(
-    ("php_params", "py_extra"),
-    [
-        ({"tag": "nonexistent_tag_xyz_abc"}, {"tag": "nonexistent_tag_xyz_abc"}),
-        ({"type": 9999}, {"task_type_id": 9999}),
-        ({"data_name": "nonexistent_dataset_xyz"}, {"data_name": "nonexistent_dataset_xyz"}),
-    ],
-    ids=["bad_tag", "bad_type", "bad_data_name"],
-)
-async def test_list_tasks_no_results_matches_php(
-    php_params: dict[str, Any],
-    py_extra: dict[str, Any],
-    py_api: httpx.AsyncClient,
-    php_api: httpx.AsyncClient,
-) -> None:
-    """Both APIs return a "no results" error for filters matching nothing.
-
-    Documented differences:
-    - PHP returns 412 PRECONDITION_FAILED; Python returns 404 NOT_FOUND.
-    - PHP message: "No results"; Python detail: "No tasks match the search criteria."
-    """
-    php_path = _build_php_task_list_path(php_params)
-    py_response, php_response = await asyncio.gather(
-        py_api.post("/tasks/list", json=py_extra),
-        php_api.get(php_path),
-    )
-
-    assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
-    assert py_response.status_code == HTTPStatus.NOT_FOUND
-
-    php_error = php_response.json()["error"]
-    py_error = py_response.json()
-
-    # Error codes should be the same
-    assert php_error["code"] == _TASK_LIST_NO_RESULTS_CODE
-    assert py_error["code"] == _TASK_LIST_NO_RESULTS_CODE
-    assert php_error["message"] == "No results"
-    assert py_error["detail"] == "No tasks match the search criteria."
-    assert py_response.headers["content-type"] == "application/problem+json"
diff --git a/tests/routers/openml/runs_trace_test.py b/tests/routers/openml/runs_trace_test.py
index 75b8f01..11fd10a 100644
--- a/tests/routers/openml/runs_trace_test.py
+++ b/tests/routers/openml/runs_trace_test.py
@@ -1,10 +1,14 @@
 """Tests for the GET /run/trace/{run_id} endpoint."""
 
+import asyncio
 from http import HTTPStatus
+from typing import Any
 
+import deepdiff
 import httpx
 import pytest
 
+from core.conversions import nested_num_to_str
 from core.errors import RunNotFoundError, RunTraceNotFoundError
 
 
@@ -47,3 +51,73 @@ async def test_get_run_trace_run_not_found(run_id: int, py_api: httpx.AsyncClien
     assert body["type"] == RunNotFoundError.uri
     assert body["title"] == RunNotFoundError.title
     assert body["status"] == HTTPStatus.NOT_FOUND
+
+
+_SERVER_RUNS = [*range(24, 40), *range(134, 140), 999_999_999]
+
+
+@pytest.mark.parametrize("run_id", _SERVER_RUNS)
+async def test_get_run_trace_equal(
+    run_id: int,
+    py_api: httpx.AsyncClient,
+    php_api: httpx.AsyncClient,
+) -> None:
+    """Test that Python and PHP run trace responses are equivalent after normalization."""
+    py_response, php_response = await asyncio.gather(
+        py_api.get(f"/run/trace/{run_id}"),
+        php_api.get(f"/run/trace/{run_id}"),
+    )
+    if php_response.status_code == HTTPStatus.OK:
+        _assert_trace_response_success(py_response, php_response)
+        return
+
+    assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
+    assert py_response.status_code == HTTPStatus.NOT_FOUND
+
+    php_error = php_response.json()["error"]
+    py_error = py_response.json()
+    assert py_error["code"] == php_error["code"]
+    if php_error["code"] == "571":
+        assert php_error["message"] == "Run not found."
+        assert py_error["detail"] == f"Run {run_id} not found."
+    elif php_error["code"] == "572":
+        assert php_error["message"] == "No successful trace associated with this run."
+        assert py_error["detail"] == f"No trace found for run {run_id}."
+    else:
+        msg = f"Unknown error code {php_error['code']} for run {run_id}."
+        raise AssertionError(msg)
+
+
+def _assert_trace_response_success(
+    py_response: httpx.Response, php_response: httpx.Response
+) -> None:
+    assert py_response.status_code == HTTPStatus.OK
+    assert php_response.status_code == HTTPStatus.OK
+
+    py_json = py_response.json()
+
+    # PHP nests response under "trace" key — match that structure
+    py_json = {"trace": py_json}
+
+    # PHP uses "trace_iteration" key, Python uses "trace"
+    py_json["trace"]["trace_iteration"] = py_json["trace"].pop("trace")
+
+    # PHP returns all numeric values as strings — normalize Python response
+    py_json = nested_num_to_str(py_json)
+
+    def _sort_trace(payload: dict[str, Any]) -> dict[str, Any]:
+        """Sort trace iterations by (repeat, fold, iteration) for order-sensitive comparison."""
+        copied = payload.copy()
+        copied["trace"] = copied["trace"].copy()
+        copied["trace"]["trace_iteration"] = sorted(
+            copied["trace"]["trace_iteration"],
+            key=lambda row: (int(row["repeat"]), int(row["fold"]), int(row["iteration"])),
+        )
+        return copied
+
+    differences = deepdiff.diff.DeepDiff(
+        _sort_trace(py_json),
+        _sort_trace(php_response.json()),
+        ignore_order=False,
+    )
+    assert not differences
diff --git a/tests/routers/openml/setups_get_test.py b/tests/routers/openml/setups_get_test.py
index 90094ac..6762714 100644
--- a/tests/routers/openml/setups_get_test.py
+++ b/tests/routers/openml/setups_get_test.py
@@ -1,7 +1,11 @@
+import asyncio
 import re
 from http import HTTPStatus
 
 import httpx
+import pytest
+
+from core.conversions import nested_remove_values, nested_str_to_num
 
 
 async def test_get_setup_unknown(py_api: httpx.AsyncClient) -> None:
@@ -16,3 +20,53 @@ async def test_get_setup_success(py_api: httpx.AsyncClient) -> None:
     data = response.json()["setup_parameters"]
     assert data["setup_id"] == 1
     assert "parameter" in data
+
+
+async def test_get_setup_response_is_identical_setup_doesnt_exist(
+    py_api: httpx.AsyncClient,
+    php_api: httpx.AsyncClient,
+) -> None:
+    setup_id = 999999
+
+    php_response, py_response = await asyncio.gather(
+        php_api.get(f"/setup/{setup_id}"),
+        py_api.get(f"/setup/{setup_id}"),
+    )
+
+    assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
+    assert py_response.status_code == HTTPStatus.NOT_FOUND
+    assert php_response.json()["error"]["message"] == "Unknown setup"
+    assert py_response.json()["code"] == php_response.json()["error"]["code"]
+    assert py_response.json()["detail"] == f"Setup {setup_id} not found."
+
+
+@pytest.mark.parametrize("setup_id", range(1, 125))
+async def test_get_setup_response_is_identical(
+    setup_id: int,
+    py_api: httpx.AsyncClient,
+    php_api: httpx.AsyncClient,
+) -> None:
+    php_response, py_response = await asyncio.gather(
+        php_api.get(f"/setup/{setup_id}"),
+        py_api.get(f"/setup/{setup_id}"),
+    )
+
+    if php_response.status_code == HTTPStatus.PRECONDITION_FAILED:
+        assert py_response.status_code == HTTPStatus.NOT_FOUND
+        return
+
+    assert php_response.status_code == HTTPStatus.OK
+    assert py_response.status_code == HTTPStatus.OK
+
+    php_json = php_response.json()
+
+    # PHP returns integer fields as strings. To compare, we recursively convert string digits
+    # to integers.
+    # PHP also returns `[]` instead of null for empty string optional fields, which Python omits.
+    php_json = nested_str_to_num(php_json)
+    php_json = nested_remove_values(php_json, values=[[], None])
+
+    py_json = nested_str_to_num(py_response.json())
+    py_json = nested_remove_values(py_json, values=[[], None])
+
+    assert py_json == php_json
diff --git a/tests/routers/openml/setups_tag_test.py b/tests/routers/openml/setups_tag_test.py
index db629bc..b4f704d 100644
--- a/tests/routers/openml/setups_tag_test.py
+++ b/tests/routers/openml/setups_tag_test.py
@@ -1,3 +1,8 @@
+import asyncio
+import contextlib
+import re
+from collections.abc import AsyncIterator, Callable, Iterable
+from contextlib import AbstractAsyncContextManager
 from http import HTTPStatus
 
 import httpx
@@ -7,7 +12,8 @@
 
 from core.errors import SetupNotFoundError, TagAlreadyExistsError
 from routers.openml.setups import tag_setup
-from tests.users import SOME_USER, ApiKey
+from tests.conftest import temporary_records
+from tests.users import OWNER_USER, SOME_USER, ApiKey
 
 
 async def test_setup_tag_missing_auth(py_api: httpx.AsyncClient) -> None:
@@ -83,3 +89,148 @@ async def test_setup_tag_direct_success(expdb_test: AsyncConnection) -> None:
         parameters={"tag": tag},
     )
     assert len(rows.all()) == 1
+
+
+@pytest.mark.mut
+@pytest.mark.parametrize(
+    "api_key",
+    [ApiKey.ADMIN, ApiKey.SOME_USER],
+    ids=["Administrator", "non-owner"],
+)
+@pytest.mark.parametrize(
+    "other_tags",
+    [[], ["some_other_tag"], ["foo_some_other_tag", "bar_some_other_tag"]],
+    ids=["none", "one tag", "two tags"],
+)
+async def test_setup_tag_response_is_identical_when_tag_doesnt_exist(  # noqa: PLR0913
+    api_key: str,
+    other_tags: list[str],
+    py_api: httpx.AsyncClient,
+    php_api: httpx.AsyncClient,
+    expdb_test: AsyncConnection,
+    temporary_tags: Callable[..., AbstractAsyncContextManager[None]],
+) -> None:
+    setup_id = 1
+    tag = "totally_new_tag_for_migration_testing"
+
+    async with temporary_tags(tags=other_tags, setup_id=setup_id, persist=True):
+        php_response = await php_api.post(
+            "/setup/tag",
+            data={"api_key": api_key, "tag": tag, "setup_id": setup_id},
+        )
+
+        await expdb_test.execute(
+            text("DELETE FROM setup_tag WHERE `id`=:setup_id AND `tag`=:tag"),
+            parameters={"setup_id": setup_id, "tag": tag},
+        )
+        await expdb_test.commit()
+
+    async with temporary_tags(tags=other_tags, setup_id=setup_id):
+        py_response = await py_api.post(
+            f"/setup/tag?api_key={api_key}",
+            json={"setup_id": setup_id, "tag": tag},
+        )
+
+    assert py_response.status_code == HTTPStatus.OK
+    assert py_response.status_code == php_response.status_code
+    php_tag = php_response.json()["setup_tag"]
+    py_tag = py_response.json()["setup_tag"]
+    assert py_tag["id"] == php_tag["id"]
+    if tags := php_tag.get("tag"):
+        if isinstance(tags, str):
+            assert py_tag["tag"][0] == tags
+        else:
+            assert set(py_tag["tag"]) == set(tags)
+    else:
+        assert py_tag["tag"] == []
+
+
+async def test_setup_tag_response_is_identical_setup_doesnt_exist(
+    py_api: httpx.AsyncClient,
+    php_api: httpx.AsyncClient,
+) -> None:
+    setup_id = 999999
+    tag = "totally_new_tag_for_migration_testing"
+    api_key = ApiKey.SOME_USER
+
+    php_response, py_response = await asyncio.gather(
+        php_api.post(
+            "/setup/tag",
+            data={"api_key": api_key, "tag": tag, "setup_id": setup_id},
+        ),
+        py_api.post(
+            f"/setup/tag?api_key={api_key}",
+            json={"setup_id": setup_id, "tag": tag},
+        ),
+    )
+
+    assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
+    assert py_response.status_code == HTTPStatus.NOT_FOUND
+    assert php_response.json()["error"]["message"] == "Entity not found."
+    assert py_response.json()["code"] == php_response.json()["error"]["code"]
+    assert re.match(
+        r"Setup \d+ not found.",
+        py_response.json()["detail"],
+    )
+
+
+@pytest.mark.mut
+async def test_setup_tag_response_is_identical_tag_already_exists(
+    py_api: httpx.AsyncClient,
+    php_api: httpx.AsyncClient,
+    temporary_tags: Callable[..., AbstractAsyncContextManager[None]],
+) -> None:
+    setup_id = 1
+    tag = "totally_new_tag_for_migration_testing"
+    api_key = ApiKey.SOME_USER
+
+    async with temporary_tags(tags=[tag], setup_id=setup_id, persist=True):
+        # Both APIs can be tested in parallel since the tag is already persisted
+        php_response, py_response = await asyncio.gather(
+            php_api.post(
+                "/setup/tag",
+                data={"api_key": api_key, "tag": tag, "setup_id": setup_id},
+            ),
+            py_api.post(
+                f"/setup/tag?api_key={api_key}",
+                json={"setup_id": setup_id, "tag": tag},
+            ),
+        )
+
+    assert php_response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR
+    assert py_response.status_code == HTTPStatus.CONFLICT
+    assert php_response.json()["error"]["message"] == "Entity already tagged by this tag."
+    assert py_response.json()["detail"] == f"Setup {setup_id} already has tag {tag!r}."
+
+
+@pytest.fixture
+def temporary_tags(
+    expdb_test: AsyncConnection,
+) -> Callable[..., AbstractAsyncContextManager[None]]:
+    @contextlib.asynccontextmanager
+    async def _temporary_tags(
+        tags: Iterable[str], setup_id: int, *, persist: bool = False
+    ) -> AsyncIterator[None]:
+        insert_queries = [
+            (
+                "INSERT INTO setup_tag(`id`,`tag`,`uploader`) VALUES (:setup_id, :tag, :user_id);",
+                {"setup_id": setup_id, "tag": tag, "user_id": OWNER_USER.user_id},
+            )
+            for tag in tags
+        ]
+        delete_queries = [
+            (
+                "DELETE FROM setup_tag WHERE `id`=:setup_id AND `tag`=:tag",
+                {"setup_id": setup_id, "tag": tag},
+            )
+            for tag in tags
+        ]
+        async with temporary_records(
+            connection=expdb_test,
+            insert_queries=insert_queries,
+            delete_queries=delete_queries,
+            persist=persist,
+        ):
+            yield
+
+    return _temporary_tags
diff --git a/tests/routers/openml/setups_untag_test.py b/tests/routers/openml/setups_untag_test.py
index b96671e..1ed7b42 100644
--- a/tests/routers/openml/setups_untag_test.py
+++ b/tests/routers/openml/setups_untag_test.py
@@ -1,3 +1,7 @@
+import asyncio
+import re
+from collections.abc import Callable
+from contextlib import AbstractAsyncContextManager
 from http import HTTPStatus
 
 import httpx
@@ -116,3 +120,122 @@ async def test_setup_untag_admin_removes_tag_uploaded_by_another_user(
         parameters={"tag": tag},
     )
     assert len(rows.all()) == 0
+
+
+@pytest.mark.mut
+@pytest.mark.parametrize(
+    "api_key",
+    [ApiKey.ADMIN, ApiKey.SOME_USER, ApiKey.OWNER_USER],
+    ids=["Administrator", "non-owner", "tag owner"],
+)
+@pytest.mark.parametrize(
+    "other_tags",
+    [[], ["some_other_tag"], ["foo_some_other_tag", "bar_some_other_tag"]],
+    ids=["none", "one tag", "two tags"],
+)
+async def test_setup_untag_response_is_identical_when_tag_exists(
+    api_key: str,
+    other_tags: list[str],
+    py_api: httpx.AsyncClient,
+    php_api: httpx.AsyncClient,
+    temporary_tags: Callable[..., AbstractAsyncContextManager[None]],
+) -> None:
+    setup_id = 1
+    tag = "totally_new_tag_for_migration_testing"
+
+    all_tags = [tag, *other_tags]
+    async with temporary_tags(tags=all_tags, setup_id=setup_id, persist=True):
+        php_response = await php_api.post(
+            "/setup/untag",
+            data={"api_key": api_key, "tag": tag, "setup_id": setup_id},
+        )
+
+    # expdb_test transaction shared with Python API,
+    # no commit needed and rolled back at the end of the test
+    async with temporary_tags(tags=all_tags, setup_id=setup_id):
+        py_response = await py_api.post(
+            f"/setup/untag?api_key={api_key}",
+            json={"setup_id": setup_id, "tag": tag},
+        )
+
+    if py_response.status_code == HTTPStatus.OK:
+        assert py_response.status_code == php_response.status_code
+        php_untag = php_response.json()["setup_untag"]
+        py_untag = py_response.json()["setup_untag"]
+        assert py_untag["id"] == php_untag["id"]
+        if tags := php_untag.get("tag"):
+            if isinstance(tags, str):
+                assert py_untag["tag"][0] == tags
+            else:
+                assert py_untag["tag"] == tags
+        else:
+            assert py_untag["tag"] == []
+        return
+
+    code, message = php_response.json()["error"].values()
+    assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
+    assert py_response.status_code == HTTPStatus.FORBIDDEN
+    assert py_response.json()["code"] == code
+    assert message == "Tag is not owned by you"
+    assert re.match(
+        r"You may not remove tag \S+ of setup \d+ because it was not created by you.",
+        py_response.json()["detail"],
+    )
+
+
+async def test_setup_untag_response_is_identical_setup_doesnt_exist(
+    py_api: httpx.AsyncClient,
+    php_api: httpx.AsyncClient,
+) -> None:
+    setup_id = 999999
+    tag = "totally_new_tag_for_migration_testing"
+    api_key = ApiKey.SOME_USER
+
+    php_response, py_response = await asyncio.gather(
+        php_api.post(
+            "/setup/untag",
+            data={"api_key": api_key, "tag": tag, "setup_id": setup_id},
+        ),
+        py_api.post(
+            f"/setup/untag?api_key={api_key}",
+            json={"setup_id": setup_id, "tag": tag},
+        ),
+    )
+
+    assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
+    assert py_response.status_code == HTTPStatus.NOT_FOUND
+    assert php_response.json()["error"]["message"] == "Entity not found."
+    assert py_response.json()["code"] == php_response.json()["error"]["code"]
+    assert re.match(
+        r"Setup \d+ not found.",
+        py_response.json()["detail"],
+    )
+
+
+async def test_setup_untag_response_is_identical_tag_doesnt_exist(
+    py_api: httpx.AsyncClient,
+    php_api: httpx.AsyncClient,
+) -> None:
+    setup_id = 1
+    tag = "totally_new_tag_for_migration_testing"
+    api_key = ApiKey.SOME_USER
+
+    php_response, py_response = await asyncio.gather(
+        php_api.post(
+            "/setup/untag",
+            data={"api_key": api_key, "tag": tag, "setup_id": setup_id},
+        ),
+        py_api.post(
+            f"/setup/untag?api_key={api_key}",
+            json={"setup_id": setup_id, "tag": tag},
+        ),
+    )
+
+    assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
+    assert py_response.status_code == HTTPStatus.NOT_FOUND
+    assert py_response.json()["code"] == php_response.json()["error"]["code"]
+    assert php_response.json()["error"]["message"] == "Tag not found."
+    assert re.match(
+        r"Setup \d+ does not have tag '\S+'.",
+        py_response.json()["detail"],
+    )
diff --git a/tests/routers/openml/study_get_test.py b/tests/routers/openml/study_get_test.py
index 92b79b1..1ef2cff 100644
--- a/tests/routers/openml/study_get_test.py
+++ b/tests/routers/openml/study_get_test.py
@@ -1,7 +1,11 @@
+import asyncio
 from http import HTTPStatus
 
+import deepdiff
 import httpx
 
+from core.conversions import nested_num_to_str, nested_remove_values
+
 
 async def test_get_task_study_by_id(py_api: httpx.AsyncClient) -> None:
     response = await py_api.get("/studies/1")
@@ -449,3 +453,35 @@ async def test_get_task_study_by_alias(py_api: httpx.AsyncClient) -> None:
         "setup_ids": [],
     }
     assert response.json() == expected
+
+
+async def test_get_study_equal(py_api: httpx.AsyncClient, php_api: httpx.AsyncClient) -> None:
+    py_response, php_response = await asyncio.gather(
+        py_api.get("/studies/1"),
+        php_api.get("/study/1"),
+    )
+    assert py_response.status_code == php_response.status_code
+
+    py_json = py_response.json()
+    # New implementation is typed
+    py_json = nested_num_to_str(py_json)
+    # New implementation has same fields even if empty
+    py_json = nested_remove_values(py_json, values=[None])
+    py_json["tasks"] = {"task_id": py_json.pop("task_ids")}
+    py_json["data"] = {"data_id": py_json.pop("data_ids")}
+    if runs := py_json.pop("run_ids", None):
+        py_json["runs"] = {"run_id": runs}
+    if flows := py_json.pop("flow_ids", None):
+        py_json["flows"] = {"flow_id": flows}
+    if setups := py_json.pop("setup_ids", None):
+        py_json["setup"] = {"setup_id": setups}
+
+    # New implementation is not nested
+    py_json = {"study": py_json}
+    difference = deepdiff.diff.DeepDiff(
+        py_json,
+        php_response.json(),
+        ignore_order=True,
+        ignore_numeric_type_changes=True,
+    )
+    assert not difference
diff --git a/tests/routers/openml/task_get_test.py b/tests/routers/openml/task_get_test.py
index e78bba8..955a7b8 100644
--- a/tests/routers/openml/task_get_test.py
+++ b/tests/routers/openml/task_get_test.py
@@ -1,7 +1,15 @@
+import asyncio
 from http import HTTPStatus
 
 import deepdiff
 import httpx
+import pytest
+
+from core.conversions import (
+    nested_num_to_str,
+    nested_remove_single_element_list,
+    nested_remove_values,
+)
 
 
 async def test_get_task(py_api: httpx.AsyncClient) -> None:
@@ -50,3 +58,52 @@ async def test_get_task(py_api: httpx.AsyncClient) -> None:
     }
     differences = deepdiff.diff.DeepDiff(response.json(), expected, ignore_order=True)
     assert not differences
+
+
+@pytest.mark.parametrize(
+    "task_id",
+    range(1, 1306),
+)
+async def test_get_task_equal(
+    task_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
+) -> None:
+    py_response, php_response = await asyncio.gather(
+        py_api.get(f"/tasks/{task_id}"),
+        php_api.get(f"/task/{task_id}"),
+    )
+    assert py_response.status_code == HTTPStatus.OK
+    assert php_response.status_code == HTTPStatus.OK
+
+    py_json = py_response.json()
+    # Some fields are renamed (old = tag, new = tags)
+    py_json["tag"] = py_json.pop("tags")
+    py_json["task_id"] = py_json.pop("id")
+    py_json["task_name"] = py_json.pop("name")
+    # PHP is not typed *and* automatically removes None values
+    py_json = nested_remove_values(py_json, values=[None])
+    py_json = nested_num_to_str(py_json)
+    # It also removes "value" entries for parameters if the list is empty,
+    # it does not remove *all* empty lists, e.g., for cost_matrix input they are kept
+    estimation_procedure = next(
+        v["estimation_procedure"] for v in py_json["input"] if "estimation_procedure" in v
+    )
+    if "parameter" in estimation_procedure:
+        estimation_procedure["parameter"] = [
+            {k: v for k, v in parameter.items() if v != []}
+            for parameter in estimation_procedure["parameter"]
+        ]
+    # Fields that may return in a list now always return a list
+    py_json = nested_remove_single_element_list(py_json)
+    # Tags are not returned if they are an empty list:
+    if py_json["tag"] == []:
+        py_json.pop("tag")
+
+    # The response is no longer nested
+    py_json = {"task": py_json}
+
+    differences = deepdiff.diff.DeepDiff(
+        py_json,
+        php_response.json(),
+        ignore_order=True,
+    )
+    assert not differences
diff --git a/tests/routers/openml/task_list_test.py b/tests/routers/openml/task_list_test.py
index 78eb5ec..45404d1 100644
--- a/tests/routers/openml/task_list_test.py
+++ b/tests/routers/openml/task_list_test.py
@@ -1,12 +1,15 @@
+import asyncio
 from http import HTTPStatus
-from typing import Any
+from typing import Any, cast
 
+import deepdiff
 import httpx
 import pytest
 from sqlalchemy.ext.asyncio import AsyncConnection
 
+from core.conversions import nested_remove_single_element_list
 from core.errors import NoResultsError
-from routers.dependencies import Pagination
+from routers.dependencies import LIMIT_MAX, Pagination
 from routers.openml.tasks import TaskStatusFilter, list_tasks
 
 
@@ -250,3 +253,162 @@ async def test_list_tasks_no_results(payload: dict[str, Any], expdb_test: AsyncC
     """Filters matching nothing return 404 NoResultsError."""
     with pytest.raises(NoResultsError):
         await list_tasks(pagination=Pagination(), expdb=expdb_test, **payload)
+
+
+_TASK_LIST_NO_RESULTS_CODE = "482"
+
+
+def _build_php_task_list_path(php_params: dict[str, Any]) -> str:
+    """Build a PHP-style path for /task/list with path-encoded filter parameters."""
+    if not php_params:
+        return "/task/list"
+    parts = "/".join(f"{k}/{v}" for k, v in php_params.items())
+    return f"/task/list/{parts}"
+
+
+def _normalize_py_task(task: dict[str, Any]) -> dict[str, Any]:
+    """Normalize a single Python task list entry to match PHP format.
+
+    PHP (XML-to-JSON) returns single-element arrays as plain values, not lists.
+    PHP returns task_id, task_type_id, and did as integers (same for Python).
+    and completely omits the "tag" field for all tasks in the list endpoint.
+    """
+    t = nested_remove_single_element_list(task.copy())
+
+    # PHP's list endpoint does not return tags AT ALL
+    t.pop("tag", None)
+
+    # PHP omits qualities where value is None string
+    if "quality" in t:
+        t["quality"] = [q for q in t["quality"] if q.get("value") != "None"]
+
+    return cast("dict[str, Any]", t)
+
+
+_FILTER_COMBOS: list[tuple[dict[str, Any], dict[str, Any]]] = [
+    ({"type": 1}, {"task_type_id": 1}),  # by task type
+    ({"tag": "OpenML100"}, {"tag": "OpenML100"}),  # by tag
+    ({"type": 1, "tag": "OpenML100"}, {"task_type_id": 1, "tag": "OpenML100"}),  # combined
+    ({"data_name": "iris"}, {"data_name": "iris"}),  # by dataset name
+    ({"data_id": 61}, {"data_id": [61]}),  # by dataset id
+    ({"data_tag": "study_14"}, {"data_tag": "study_14"}),  # by dataset tag
+    ({"number_instances": "150"}, {"number_instances": "150"}),  # quality filter
+    (
+        {"data_id": 61, "number_instances": "150"},
+        {"data_id": [61], "number_instances": "150"},
+    ),
+]
+_FILTER_IDS = [
+    "type",
+    "tag",
+    "type_and_tag",
+    "data_name",
+    "data_id",
+    "data_tag",
+    "number_instances",
+    "data_and_quality",
+]
+
+
+@pytest.mark.parametrize(
+    ("php_params", "py_extra"),
+    _FILTER_COMBOS,
+    ids=_FILTER_IDS,
+)
+async def test_list_tasks_equal(
+    php_params: dict[str, Any],
+    py_extra: dict[str, Any],
+    py_api: httpx.AsyncClient,
+    php_api: httpx.AsyncClient,
+) -> None:
+    """Python and PHP task list responses contain the same tasks for the same filters.
+
+    Known differences documented here:
+    - PHP wraps response in {"tasks": {"task": [...]}}, Python returns a flat list.
+    - PHP uses XML-to-JSON which collapses single-element arrays into plain values.
+    - PHP omits the "tag" key when a task has no tags; Python returns "tag": [].
+    - PHP error status is 412 PRECONDITION_FAILED; Python uses 404 NOT_FOUND.
+    """
+    php_path = _build_php_task_list_path(php_params)
+    py_body = {**py_extra, "pagination": {"limit": LIMIT_MAX, "offset": 0}}
+    py_response, php_response = await asyncio.gather(
+        py_api.post("/tasks/list", json=py_body),
+        php_api.get(php_path),
+    )
+
+    # Error case: no results — PHP returns 412, Python returns 404
+    if php_response.status_code == HTTPStatus.PRECONDITION_FAILED:
+        assert py_response.status_code == HTTPStatus.NOT_FOUND
+        assert py_response.headers["content-type"] == "application/problem+json"
+        assert php_response.json()["error"]["code"] == _TASK_LIST_NO_RESULTS_CODE
+        assert py_response.json()["code"] == _TASK_LIST_NO_RESULTS_CODE
+        return
+
+    assert php_response.status_code == HTTPStatus.OK
+    assert py_response.status_code == HTTPStatus.OK
+
+    php_tasks_raw = php_response.json()["tasks"]["task"]
+    php_tasks: list[dict[str, Any]] = (
+        php_tasks_raw if isinstance(php_tasks_raw, list) else [php_tasks_raw]
+    )
+    php_tasks = php_tasks[:LIMIT_MAX]
+    py_tasks: list[dict[str, Any]] = [_normalize_py_task(t) for t in py_response.json()]
+
+    php_ids = {int(t["task_id"]) for t in php_tasks}
+    py_ids = {int(t["task_id"]) for t in py_tasks}
+
+    assert py_ids == php_ids, (
+        f"PHP and Python must return the exact same task IDs: {php_ids ^ py_ids}"
+    )
+
+    # Compare only the tasks PHP returned — per-task deepdiff for clear error messages
+    py_by_id = {int(t["task_id"]): t for t in py_tasks}
+    php_by_id = {int(t["task_id"]): t for t in php_tasks}
+    for task_id in php_ids:
+        differences = deepdiff.diff.DeepDiff(
+            py_by_id[task_id],
+            php_by_id[task_id],
+            ignore_order=True,
+        )
+        assert not differences, f"Differences for task {task_id}: {differences}"
+
+
+@pytest.mark.parametrize(
+    ("php_params", "py_extra"),
+    [
+        ({"tag": "nonexistent_tag_xyz_abc"}, {"tag": "nonexistent_tag_xyz_abc"}),
+        ({"type": 9999}, {"task_type_id": 9999}),
+        ({"data_name": "nonexistent_dataset_xyz"}, {"data_name": "nonexistent_dataset_xyz"}),
+    ],
+    ids=["bad_tag", "bad_type", "bad_data_name"],
+)
+async def test_list_tasks_no_results_matches_php(
+    php_params: dict[str, Any],
+    py_extra: dict[str, Any],
+    py_api: httpx.AsyncClient,
+    php_api: httpx.AsyncClient,
+) -> None:
+    """Both APIs return a "no results" error for filters matching nothing.
+
+    Documented differences:
+    - PHP returns 412 PRECONDITION_FAILED; Python returns 404 NOT_FOUND.
+    - PHP message: "No results"; Python detail: "No tasks match the search criteria."
+    """
+    php_path = _build_php_task_list_path(php_params)
+    py_response, php_response = await asyncio.gather(
+        py_api.post("/tasks/list", json=py_extra),
+        php_api.get(php_path),
+    )
+
+    assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
+    assert py_response.status_code == HTTPStatus.NOT_FOUND
+
+    php_error = php_response.json()["error"]
+    py_error = py_response.json()
+
+    # Error codes should be the same
+    assert php_error["code"] == _TASK_LIST_NO_RESULTS_CODE
+    assert py_error["code"] == _TASK_LIST_NO_RESULTS_CODE
+    assert php_error["message"] == "No results"
+    assert py_error["detail"] == "No tasks match the search criteria."
+    assert py_response.headers["content-type"] == "application/problem+json"

From c7fb2c504626e69beba9ef79dfaf6ff14b7fface Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Wed, 15 Apr 2026 14:48:09 +0200
Subject: [PATCH 8/8] move temporary tags to conftest

---
 tests/conftest.py                       | 36 ++++++++++++++++++++++-
 tests/routers/openml/setups_tag_test.py | 39 ++-----------------------
 2 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index ad86ce4..368b789 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,6 @@
 import contextlib
 import json
-from collections.abc import AsyncIterator, Iterable, Iterator
+from collections.abc import AsyncIterator, Callable, Iterable, Iterator
 from pathlib import Path
 from typing import Any, NamedTuple
 
@@ -17,6 +17,7 @@
 from database.setup import expdb_database, user_database
 from main import create_api
 from routers.dependencies import expdb_connection, userdb_connection
+from tests.users import OWNER_USER
 
 PHP_API_URL = "http://php-api:80/api/v1/json"
 
@@ -168,6 +169,39 @@ async def persisted_flow(flow: Flow, expdb_test: AsyncConnection) -> AsyncIterat
     await expdb_test.commit()
 
 
+@pytest.fixture
+def temporary_tags(
+    expdb_test: AsyncConnection,
+) -> Callable[..., contextlib.AbstractAsyncContextManager[None]]:
+    @contextlib.asynccontextmanager
+    async def _temporary_tags(
+        tags: Iterable[str], setup_id: int, *, persist: bool = False
+    ) -> AsyncIterator[None]:
+        insert_queries = [
+            (
+                "INSERT INTO setup_tag(`id`,`tag`,`uploader`) VALUES (:setup_id, :tag, :user_id);",
+                {"setup_id": setup_id, "tag": tag, "user_id": OWNER_USER.user_id},
+            )
+            for tag in tags
+        ]
+        delete_queries = [
+            (
+                "DELETE FROM setup_tag WHERE `id`=:setup_id AND `tag`=:tag",
+                {"setup_id": setup_id, "tag": tag},
+            )
+            for tag in tags
+        ]
+        async with temporary_records(
+            connection=expdb_test,
+            insert_queries=insert_queries,
+            delete_queries=delete_queries,
+            persist=persist,
+        ):
+            yield
+
+    return _temporary_tags
+
+
 def pytest_collection_modifyitems(config: Config, items: list[Item]) -> None:  # noqa: ARG001
     for test_item in items:
         for fixture in test_item.fixturenames:  # type: ignore[attr-defined]
diff --git a/tests/routers/openml/setups_tag_test.py b/tests/routers/openml/setups_tag_test.py
index b4f704d..ad9659f 100644
--- a/tests/routers/openml/setups_tag_test.py
+++ b/tests/routers/openml/setups_tag_test.py
@@ -1,7 +1,6 @@
 import asyncio
-import contextlib
 import re
-from collections.abc import AsyncIterator, Callable, Iterable
+from collections.abc import Callable
 from contextlib import AbstractAsyncContextManager
 from http import HTTPStatus
 
@@ -12,8 +11,7 @@
 
 from core.errors import SetupNotFoundError, TagAlreadyExistsError
 from routers.openml.setups import tag_setup
-from tests.conftest import temporary_records
-from tests.users import OWNER_USER, SOME_USER, ApiKey
+from tests.users import SOME_USER, ApiKey
 
 
 async def test_setup_tag_missing_auth(py_api: httpx.AsyncClient) -> None:
@@ -201,36 +199,3 @@ async def test_setup_tag_response_is_identical_tag_already_exists(
     assert py_response.status_code == HTTPStatus.CONFLICT
     assert php_response.json()["error"]["message"] == "Entity already tagged by this tag."
     assert py_response.json()["detail"] == f"Setup {setup_id} already has tag {tag!r}."
-
-
-@pytest.fixture
-def temporary_tags(
-    expdb_test: AsyncConnection,
-) -> Callable[..., AbstractAsyncContextManager[None]]:
-    @contextlib.asynccontextmanager
-    async def _temporary_tags(
-        tags: Iterable[str], setup_id: int, *, persist: bool = False
-    ) -> AsyncIterator[None]:
-        insert_queries = [
-            (
-                "INSERT INTO setup_tag(`id`,`tag`,`uploader`) VALUES (:setup_id, :tag, :user_id);",
-                {"setup_id": setup_id, "tag": tag, "user_id": OWNER_USER.user_id},
-            )
-            for tag in tags
-        ]
-        delete_queries = [
-            (
-                "DELETE FROM setup_tag WHERE `id`=:setup_id AND `tag`=:tag",
-                {"setup_id": setup_id, "tag": tag},
-            )
-            for tag in tags
-        ]
-        async with temporary_records(
-            connection=expdb_test,
-            insert_queries=insert_queries,
-            delete_queries=delete_queries,
-            persist=persist,
-        ):
-            yield
-
-    return _temporary_tags