From 7355149b2c27332697e27d39f53917a8689db2d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Vokr=C3=A1=C4=8Dko?= Date: Wed, 29 Apr 2026 14:10:37 +0200 Subject: [PATCH 1/3] build: measure coverage on source only via coverage run The previous setup relied on pytest-cov (``--cov pytest_split --cov tests`` in addopts) which has a long-standing limitation when measuring a pytest plugin: the plugin imports happen during pytest's plugin discovery, *before* pytest-cov starts tracing. Anything that runs at import time (class bodies, decorators, type aliases, enum definitions) is reported as uncovered even though it obviously executed - the plugin wouldn't load otherwise. The ``CoverageWarning: Module pytest_split was previously imported, but not measured`` made that explicit. The cookiecutter template the project was generated from worked around this by adding ``--cov tests`` to the addopts, inflating the denominator with 100%-covered test files so the average reached 90%. That pulled the number up but stopped reporting the actual source coverage. Switch to ``coverage run -m pytest`` followed by ``coverage report``. Running ``coverage`` from the outside means tracing starts before Python imports anything at all, so plugin import-time code is counted. Real source coverage jumps from ~74% to 99%; threshold goes from 90 to 95 to reflect that. --- .github/workflows/test.yml | 3 ++- pyproject.toml | 12 ++++-------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f2dbbdd..505b3ad 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -44,7 +44,8 @@ jobs: - uses: ./.github/actions/python-poetry-env with: python-version: ${{ matrix.python-version }} - - run: poetry run pytest + - run: poetry run coverage run -m pytest + - run: poetry run coverage report docs: runs-on: ubuntu-latest diff --git a/pyproject.toml b/pyproject.toml index be14d7d..164e854 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,16 +63,12 @@ pytest-split = "pytest_split.plugin" target-version = ["py310", "py311", "py312", "py313", "py314"] include = '\.pyi?$' -[tool.pytest.ini_options] -addopts = """\ - --cov pytest_split \ - --cov tests \ - --cov-report term-missing \ - --no-cov-on-fail \ -""" +[tool.coverage.run] +source = ["pytest_split"] [tool.coverage.report] -fail_under = 90 +fail_under = 95 +show_missing = true exclude_lines = [ 'if TYPE_CHECKING:', 'pragma: no cover' From 93b830b152d467b11fe00905017d95fa8a0b4974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Vokr=C3=A1=C4=8Dko?= Date: Wed, 29 Apr 2026 10:01:47 +0200 Subject: [PATCH 2/3] refactor: assert TestGroup directly in algorithm tests Tests previously asserted each TestGroup field separately (`first.selected`, `first.deselected`, `first.duration`). Consolidate to a single equality check against an expected TestGroup, with concrete values for every field rather than only ``selected``. --- tests/test_algorithms.py | 142 +++++++++++++++++++++++++++++---------- 1 file changed, 105 insertions(+), 37 deletions(-) diff --git a/tests/test_algorithms.py b/tests/test_algorithms.py index a02b6ed..5b388fd 100644 --- a/tests/test_algorithms.py +++ b/tests/test_algorithms.py @@ -10,6 +10,7 @@ from pytest_split.algorithms import ( AlgorithmBase, Algorithms, + TestGroup, ) item = namedtuple("item", "nodeid") # noqa: PYI024 @@ -21,20 +22,25 @@ def test__split_test(self, algo_name): durations = {"a": 1, "b": 1, "c": 1} items = [item(x) for x in durations] algo = Algorithms[algo_name].value + first, second, third = algo(splits=3, items=items, durations=durations) # each split should have one test - assert first.selected == [item("a")] - assert first.deselected == [item("b"), item("c")] - assert first.duration == 1 - - assert second.selected == [item("b")] - assert second.deselected == [item("a"), item("c")] - assert second.duration == 1 - - assert third.selected == [item("c")] - assert third.deselected == [item("a"), item("b")] - assert third.duration == 1 + assert first == TestGroup( + selected=[item("a")], + deselected=[item("b"), item("c")], + duration=1, + ) + assert second == TestGroup( + selected=[item("b")], + deselected=[item("a"), item("c")], + duration=1, + ) + assert third == TestGroup( + selected=[item("c")], + deselected=[item("a"), item("b")], + duration=1, + ) @pytest.mark.parametrize("algo_name", Algorithms.names()) def test__split_tests_handles_tests_in_durations_but_missing_from_items( @@ -43,39 +49,83 @@ def test__split_tests_handles_tests_in_durations_but_missing_from_items( durations = {"a": 1, "b": 1} items = [item(x) for x in ["a"]] algo = Algorithms[algo_name].value - splits = algo(splits=2, items=items, durations=durations) - first, second = splits - assert first.selected == [item("a")] - assert second.selected == [] + first, second = algo(splits=2, items=items, durations=durations) + + assert first == TestGroup( + selected=[item("a")], deselected=[], duration=1 + ) + assert second == TestGroup( + selected=[], deselected=[item("a")], duration=0 + ) @pytest.mark.parametrize("algo_name", Algorithms.names()) def test__split_tests_handles_tests_with_missing_durations(self, algo_name): durations = {"a": 1} items = [item(x) for x in ["a", "b"]] algo = Algorithms[algo_name].value - splits = algo(splits=2, items=items, durations=durations) - first, second = splits - assert first.selected == [item("a")] - assert second.selected == [item("b")] + first, second = algo(splits=2, items=items, durations=durations) + + assert first == TestGroup( + selected=[item("a")], deselected=[item("b")], duration=1 + ) + assert second == TestGroup( + selected=[item("b")], deselected=[item("a")], duration=1 + ) def test__split_test_handles_large_duration_at_end(self): """NOTE: only least_duration does this correctly""" durations = {"a": 1, "b": 1, "c": 1, "d": 3} items = [item(x) for x in ["a", "b", "c", "d"]] algo = Algorithms["least_duration"].value - splits = algo(splits=2, items=items, durations=durations) - first, second = splits - assert first.selected == [item("d")] - assert second.selected == [item(x) for x in ["a", "b", "c"]] + first, second = algo(splits=2, items=items, durations=durations) + + assert first == TestGroup( + selected=[item("d")], + deselected=[item("a"), item("b"), item("c")], + duration=3, + ) + assert second == TestGroup( + selected=[item("a"), item("b"), item("c")], + deselected=[item("d")], + duration=3, + ) @pytest.mark.parametrize( ("algo_name", "expected"), [ - ("duration_based_chunks", [[item("a"), item("b")], [item("c"), item("d")]]), - ("least_duration", [[item("a"), item("c")], [item("b"), item("d")]]), + ( + "duration_based_chunks", + [ + TestGroup( + selected=[item("a"), item("b")], + deselected=[item("c"), item("d")], + duration=2, + ), + TestGroup( + selected=[item("c"), item("d")], + deselected=[item("a"), item("b")], + duration=2, + ), + ], + ), + ( + "least_duration", + [ + TestGroup( + selected=[item("a"), item("c")], + deselected=[item("b"), item("d")], + duration=2, + ), + TestGroup( + selected=[item("b"), item("d")], + deselected=[item("a"), item("c")], + duration=2, + ), + ], + ), ], ) def test__split_tests_calculates_avg_test_duration_only_on_present_tests( @@ -88,23 +138,43 @@ def test__split_tests_calculates_avg_test_duration_only_on_present_tests( durations = {"b": 1, "c": 1, "d": 1, "e": 10000} items = [item(x) for x in ["a", "b", "c", "d"]] algo = Algorithms[algo_name].value - splits = algo(splits=2, items=items, durations=durations) - first, second = splits - expected_first, expected_second = expected - assert first.selected == expected_first - assert second.selected == expected_second + groups = algo(splits=2, items=items, durations=durations) + + assert groups == expected @pytest.mark.parametrize( ("algo_name", "expected"), [ ( "duration_based_chunks", - [[item("a"), item("b"), item("c"), item("d"), item("e")], []], + [ + TestGroup( + selected=[item(x) for x in "abcde"], + deselected=[], + duration=10014, + ), + TestGroup( + selected=[], + deselected=[item(x) for x in "abcde"], + duration=0, + ), + ], ), ( "least_duration", - [[item("e")], [item("a"), item("b"), item("c"), item("d")]], + [ + TestGroup( + selected=[item("e")], + deselected=[item(x) for x in "dcba"], + duration=10000, + ), + TestGroup( + selected=[item(x) for x in "abcd"], + deselected=[item("e")], + duration=14, + ), + ], ), ], ) @@ -112,12 +182,10 @@ def test__split_tests_maintains_relative_order_of_tests(self, algo_name, expecte durations = {"a": 2, "b": 3, "c": 4, "d": 5, "e": 10000} items = [item(x) for x in ["a", "b", "c", "d", "e"]] algo = Algorithms[algo_name].value - splits = algo(splits=2, items=items, durations=durations) - first, second = splits - expected_first, expected_second = expected - assert first.selected == expected_first - assert second.selected == expected_second + groups = algo(splits=2, items=items, durations=durations) + + assert groups == expected def test__split_tests_same_set_regardless_of_order(self): """NOTE: only least_duration does this correctly""" From 347e578948e9083c22d4cd679f7999bd2f6bbdb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Vokr=C3=A1=C4=8Dko?= Date: Tue, 28 Apr 2026 16:56:24 +0200 Subject: [PATCH 3/3] refactor: pass durations dict into algorithms Tighten the algorithm contract so each one receives a single data argument: a dict mapping pytest items to their durations. Promote the helper that builds the dict to a public name (compute_durations) and inline the two helpers that only it used. The plugin builds the dict once and passes it to the algorithm. Algorithm bodies otherwise consume the same (item, duration) stream they did before, so behaviour is unchanged. Sets up follow-ups that separate group membership from within-group ordering. --- CHANGELOG.md | 7 ++++ src/pytest_split/algorithms.py | 67 ++++++++++++++-------------------- src/pytest_split/plugin.py | 3 +- tests/test_algorithms.py | 47 +++++++++++++++++++----- tests/test_ipynb.py | 4 +- 5 files changed, 77 insertions(+), 51 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1391dff..a38d764 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Changed +- `AlgorithmBase.__call__` now takes a single `durations: dict[Item, float]` + argument instead of separate `items` and `durations` arguments. Custom + algorithm subclasses must update their signature. Use the new public + `pytest_split.algorithms.compute_durations(items, cached_durations)` helper to + build the dict the same way the plugin does. + ### Fixed - Fix malformed bullet points rendering in GitHub Pages documentation diff --git a/src/pytest_split/algorithms.py b/src/pytest_split/algorithms.py index 83223b3..803e19f 100644 --- a/src/pytest_split/algorithms.py +++ b/src/pytest_split/algorithms.py @@ -1,7 +1,6 @@ import enum import heapq from abc import ABC, abstractmethod -from operator import itemgetter from typing import TYPE_CHECKING, NamedTuple if TYPE_CHECKING: @@ -19,7 +18,7 @@ class AlgorithmBase(ABC): @abstractmethod def __call__( - self, splits: int, items: "list[nodes.Item]", durations: "dict[str, float]" + self, splits: int, durations: "dict[nodes.Item, float]" ) -> "list[TestGroup]": pass @@ -43,25 +42,22 @@ class LeastDurationAlgorithm(AlgorithmBase): that use this plugin. Due to issue #25 this might not always be the case. :param splits: How many groups we're splitting in. - :param items: Test items passed down by Pytest. - :param durations: Our cached test runtimes. Assumes contains timings only of relevant tests + :param durations: Mapping from each test item to its duration. Build it with :func:`compute_durations`. :return: List of groups """ def __call__( - self, splits: int, items: "list[nodes.Item]", durations: "dict[str, float]" + self, splits: int, durations: "dict[nodes.Item, float]" ) -> "list[TestGroup]": - items_with_durations = _get_items_with_durations(items, durations) - # add index of item in list items_with_durations_indexed = [ - (*tup, i) for i, tup in enumerate(items_with_durations) + (item, dur, i) for i, (item, dur) in enumerate(durations.items()) ] # Sort by name to ensure it's always the same order items_with_durations_indexed = sorted( - items_with_durations_indexed, key=lambda tup: str(tup[0]) + items_with_durations_indexed, key=lambda tup: tup[0].nodeid ) # sort in ascending order @@ -114,23 +110,21 @@ class DurationBasedChunksAlgorithm(AlgorithmBase): and creating group_1 = items[0:i_0], group_2 = items[i_0, i_1], group_3 = items[i_1, i_2], ... :param splits: How many groups we're splitting in. - :param items: Test items passed down by Pytest. - :param durations: Our cached test runtimes. Assumes contains timings only of relevant tests + :param durations: Mapping from each test item to its duration. Build it with :func:`compute_durations`. :return: List of TestGroup """ def __call__( - self, splits: int, items: "list[nodes.Item]", durations: "dict[str, float]" + self, splits: int, durations: "dict[nodes.Item, float]" ) -> "list[TestGroup]": - items_with_durations = _get_items_with_durations(items, durations) - time_per_group = sum(map(itemgetter(1), items_with_durations)) / splits + time_per_group = sum(durations.values()) / splits selected: list[list[nodes.Item]] = [[] for i in range(splits)] deselected: list[list[nodes.Item]] = [[] for i in range(splits)] duration: list[float] = [0 for i in range(splits)] group_idx = 0 - for item, item_duration in items_with_durations: + for item, item_duration in durations.items(): if duration[group_idx] >= time_per_group: group_idx += 1 @@ -148,33 +142,28 @@ def __call__( ] -def _get_items_with_durations( - items: "list[nodes.Item]", durations: "dict[str, float]" -) -> "list[tuple[nodes.Item, float]]": - durations = _remove_irrelevant_durations(items, durations) - avg_duration_per_test = _get_avg_duration_per_test(durations) - items_with_durations = [ - (item, durations.get(item.nodeid, avg_duration_per_test)) for item in items - ] - return items_with_durations - +def compute_durations( + items: "list[nodes.Item]", cached_durations: "dict[str, float]" +) -> "dict[nodes.Item, float]": + """ + Build the splitting input from collected items and their cached durations. -def _get_avg_duration_per_test(durations: "dict[str, float]") -> float: - if durations: - avg_duration_per_test = sum(durations.values()) / len(durations) + Items missing from ``cached_durations`` get the average duration of the + cached entries that are relevant to this suite; with no cached data at + all, every item gets ``1`` as a placeholder. + """ + # Filtering down durations to relevant ones ensures the avg isn't skewed by irrelevant data + relevant = { + item.nodeid: cached_durations[item.nodeid] + for item in items + if item.nodeid in cached_durations + } + if relevant: + avg = sum(relevant.values()) / len(relevant) else: # If there are no durations, give every test the same arbitrary value - avg_duration_per_test = 1 - return avg_duration_per_test - - -def _remove_irrelevant_durations( - items: "list[nodes.Item]", durations: "dict[str, float]" -) -> "dict[str, float]": - # Filtering down durations to relevant ones ensures the avg isn't skewed by irrelevant data - test_ids = [item.nodeid for item in items] - durations = {name: durations[name] for name in test_ids if name in durations} - return durations + avg = 1 + return {item: relevant.get(item.nodeid, avg) for item in items} class Algorithms(enum.Enum): diff --git a/src/pytest_split/plugin.py b/src/pytest_split/plugin.py index 9140936..4868d77 100644 --- a/src/pytest_split/plugin.py +++ b/src/pytest_split/plugin.py @@ -160,7 +160,8 @@ def pytest_collection_modifyitems( group_idx: int = config.option.group algo = algorithms.Algorithms[config.option.splitting_algorithm].value - groups = algo(splits, items, self.cached_durations) + durations = algorithms.compute_durations(items, self.cached_durations) + groups = algo(splits, durations) group = groups[group_idx - 1] ensure_ipynb_compatibility(group, items) diff --git a/tests/test_algorithms.py b/tests/test_algorithms.py index 5b388fd..20293d1 100644 --- a/tests/test_algorithms.py +++ b/tests/test_algorithms.py @@ -11,6 +11,7 @@ AlgorithmBase, Algorithms, TestGroup, + compute_durations, ) item = namedtuple("item", "nodeid") # noqa: PYI024 @@ -23,7 +24,9 @@ def test__split_test(self, algo_name): items = [item(x) for x in durations] algo = Algorithms[algo_name].value - first, second, third = algo(splits=3, items=items, durations=durations) + first, second, third = algo( + splits=3, durations=compute_durations(items, durations) + ) # each split should have one test assert first == TestGroup( @@ -50,7 +53,7 @@ def test__split_tests_handles_tests_in_durations_but_missing_from_items( items = [item(x) for x in ["a"]] algo = Algorithms[algo_name].value - first, second = algo(splits=2, items=items, durations=durations) + first, second = algo(splits=2, durations=compute_durations(items, durations)) assert first == TestGroup( selected=[item("a")], deselected=[], duration=1 @@ -65,7 +68,7 @@ def test__split_tests_handles_tests_with_missing_durations(self, algo_name): items = [item(x) for x in ["a", "b"]] algo = Algorithms[algo_name].value - first, second = algo(splits=2, items=items, durations=durations) + first, second = algo(splits=2, durations=compute_durations(items, durations)) assert first == TestGroup( selected=[item("a")], deselected=[item("b")], duration=1 @@ -80,7 +83,7 @@ def test__split_test_handles_large_duration_at_end(self): items = [item(x) for x in ["a", "b", "c", "d"]] algo = Algorithms["least_duration"].value - first, second = algo(splits=2, items=items, durations=durations) + first, second = algo(splits=2, durations=compute_durations(items, durations)) assert first == TestGroup( selected=[item("d")], @@ -139,7 +142,7 @@ def test__split_tests_calculates_avg_test_duration_only_on_present_tests( items = [item(x) for x in ["a", "b", "c", "d"]] algo = Algorithms[algo_name].value - groups = algo(splits=2, items=items, durations=durations) + groups = algo(splits=2, durations=compute_durations(items, durations)) assert groups == expected @@ -183,7 +186,7 @@ def test__split_tests_maintains_relative_order_of_tests(self, algo_name, expecte items = [item(x) for x in ["a", "b", "c", "d", "e"]] algo = Algorithms[algo_name].value - groups = algo(splits=2, items=items, durations=durations) + groups = algo(splits=2, durations=compute_durations(items, durations)) assert groups == expected @@ -196,7 +199,10 @@ def test__split_tests_same_set_regardless_of_order(self): for n in (2, 3, 4): selected_each: list[set[Item]] = [set() for _ in range(n)] for order in itertools.permutations(items): - splits = algo(splits=n, items=order, durations=durations) + splits = algo( + splits=n, + durations=compute_durations(list(order), durations), + ) for i, group in enumerate(splits): if not selected_each[i]: selected_each[i] = set(group.selected) @@ -207,13 +213,36 @@ def test__algorithms_members_derived_correctly(self): assert issubclass(Algorithms[a].value.__class__, AlgorithmBase) +class TestComputeDurations: + def test_uses_real_durations_avg_fills_missing_ignores_irrelevant(self): + # "ghost" isn't in the suite so it's excluded from the avg, and "c" + # gets the avg of "a" and "b": (2.0 + 4.0) / 2 = 3.0. + items = [item("a"), item("b"), item("c")] + cached = {"a": 2.0, "b": 4.0, "ghost": 10000.0} + assert compute_durations(items, cached) == { + item("a"): 2.0, + item("b"): 4.0, + item("c"): 3.0, + } + + def test_falls_back_to_one_when_no_relevant_durations(self): + assert compute_durations([item("a"), item("b")], {}) == { + item("a"): 1, + item("b"): 1, + } + + def test_returned_dict_iterates_in_input_order(self): + items = [item("c"), item("a"), item("b")] + assert list(compute_durations(items, {"a": 1, "b": 2, "c": 3})) == items + + class MyAlgorithm(AlgorithmBase): - def __call__(self, a, b, c): + def __call__(self, a, b): """no-op""" class MyOtherAlgorithm(AlgorithmBase): - def __call__(self, a, b, c): + def __call__(self, a, b): """no-op""" diff --git a/tests/test_ipynb.py b/tests/test_ipynb.py index 9bc2000..c60aa11 100644 --- a/tests/test_ipynb.py +++ b/tests/test_ipynb.py @@ -1,7 +1,7 @@ from collections import namedtuple import pytest -from pytest_split.algorithms import Algorithms +from pytest_split.algorithms import Algorithms, compute_durations from pytest_split.ipynb_compatibility import ensure_ipynb_compatibility item = namedtuple("item", "nodeid") # noqa: PYI024 @@ -29,7 +29,7 @@ def test_ensure_ipynb_compatibility(self, algo_name): } items = [item(x) for x in durations] algo = Algorithms[algo_name].value - groups = algo(splits=3, items=items, durations=durations) + groups = algo(splits=3, durations=compute_durations(items, durations)) assert groups[0].selected == [ item(nodeid="temp/nbs/test_1.ipynb::Cell 0"),