From 9c70417835a924513116bf2ca5780ec888f8bd96 Mon Sep 17 00:00:00 2001 From: "congxiao.wxx" Date: Wed, 27 May 2026 22:26:29 +0800 Subject: [PATCH 1/2] Require test evidence for SDK source changes Coverage thresholds do not see omitted modules such as integration and server, so SDK source changes need an explicit per-change evidence gate. Wire the gate into coverage and a PR workflow, requiring matching unit and e2e test changes for each changed agentrun source file. Constraint: GitHub Python SDK repository uses repository-root paths, not monorepo python/ paths. Rejected: Relying on global or incremental coverage alone | coverage omits important SDK directories and is not per-file enforcement. Confidence: high Scope-risk: moderate Directive: Keep coverage thresholds and this evidence gate separate; omitted coverage files still need unit and e2e evidence. Tested: uv run --python 3.10 --all-extras isort scripts/check_test_evidence.py tests/unittests/test_quality_gate.py Tested: uv run --python 3.10 --all-extras pyink --config pyproject.toml scripts/check_test_evidence.py tests/unittests/test_quality_gate.py Tested: uv run --python 3.10 --all-extras pytest tests/unittests/test_quality_gate.py Tested: uv run python scripts/check_test_evidence.py simulated missing/unit-only/e2e-only/unit-plus-e2e cases Change-Id: I6241b508960ee114f344ca2de5302c3fbdd80b92 Not-tested: Full make coverage on the GitHub checkout; the change was validated with targeted gate tests and simulations. Signed-off-by: congxiao.wxx --- .github/workflows/python-quality.yml | 40 ++++ Makefile | 7 +- README.md | 22 ++ scripts/check_test_evidence.py | 309 +++++++++++++++++++++++++++ tests/unittests/test_quality_gate.py | 140 ++++++++++++ 5 files changed, 516 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/python-quality.yml create mode 100644 scripts/check_test_evidence.py create mode 100644 tests/unittests/test_quality_gate.py diff --git a/.github/workflows/python-quality.yml b/.github/workflows/python-quality.yml new file mode 100644 index 0000000..fde05af --- /dev/null +++ b/.github/workflows/python-quality.yml @@ -0,0 +1,40 @@ +name: Python Quality + +on: + pull_request: + paths: + - 'Makefile' + - 'agentrun/**' + - 'tests/**' + - 'scripts/check_test_evidence.py' + - '.github/workflows/python-quality.yml' + workflow_dispatch: + +permissions: + contents: read + +jobs: + quality-gate: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Install uv + run: python -m pip install uv + + - name: Install dependencies + run: uv sync --python 3.10 --dev --all-extras + + - name: Run SDK test evidence gate + run: uv run python scripts/check_test_evidence.py --base-ref "origin/${{ github.base_ref || 'main' }}" + + - name: Run quality gate tests + run: uv run pytest tests/unittests/test_quality_gate.py diff --git a/Makefile b/Makefile index 7015393..fc25b94 100644 --- a/Makefile +++ b/Makefile @@ -142,12 +142,15 @@ test-unit: ## 运行单元测试 test-e2e: ## 运行端到端测试 @uv run pytest tests/e2e/ +.PHONY: quality-gate +quality-gate: ## 运行 SDK 增量质量门禁 + @uv run --python ${PYTHON_VERSION} --all-extras python scripts/check_test_evidence.py + .PHONY: mypy-check mypy-check: ## 运行 mypy 类型检查 @uv run mypy --config-file mypy.ini . .PHONY: coverage -coverage: ## 运行测试并显示覆盖率报告(全量代码 + 增量代码) +coverage: quality-gate ## 运行测试并显示覆盖率报告(全量代码 + 增量代码) @echo "📊 运行覆盖率测试..." @uv run --python ${PYTHON_VERSION} --all-extras python scripts/check_coverage.py $(COVERAGE_ARGS) - diff --git a/README.md b/README.md index 4e6695c..dca64c3 100644 --- a/README.md +++ b/README.md @@ -223,3 +223,25 @@ SDK 会自动读取以下环境变量: | `AGENTRUN_DATA_ENDPOINT` | 数据端点 | - | | `AGENTRUN_SDK_DEBUG` | 开启 DEBUG 日志 | - | +## 开发质量门禁 + +PR 中修改 `agentrun/**/*.py` 时,必须在同一个变更里提供匹配的单元测试 +证据和 E2E 测试证据。这个门禁独立于 coverage 统计,即使文件位于 +coverage omit 目录(例如 `agentrun/integration/*` 或 +`agentrun/server/*`),也会被检查。 + +本地运行: + +```bash +make quality-gate +``` + +匹配规则示例: + +- `agentrun/integration/...` 变更需要 `tests/unittests/integration/` + 或匹配的单测文件变更,同时需要 `tests/e2e/test_integration.py` 等 E2E + 文件变更。 +- `agentrun/server/...` 变更需要 `tests/unittests/server/`、 + `tests/unittests/**/test_server.py` 等单测变更,同时需要 + `tests/e2e/test_server.py` 等 E2E 变更。 +- 非 SDK 源码路径(如 `examples/`、`docs/`、`codegen/`)不触发此门禁。 diff --git a/scripts/check_test_evidence.py b/scripts/check_test_evidence.py new file mode 100644 index 0000000..b9e613c --- /dev/null +++ b/scripts/check_test_evidence.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 +"""Require test evidence for changed SDK source files.""" + +from __future__ import annotations + +import argparse +from dataclasses import dataclass +from pathlib import PurePosixPath +import subprocess +import sys +from typing import Iterable, Sequence + +SDK_ROOT = PurePosixPath("agentrun") +UNITTEST_ROOT = PurePosixPath("tests/unittests") +E2E_ROOT = PurePosixPath("tests/e2e") +DEFAULT_BASE_REF = "origin/main" + + +@dataclass(frozen=True) +class ChangedFile: + status: str + path: str + + +@dataclass(frozen=True) +class MissingTestEvidence: + source_path: str + evidence_type: str + expected_patterns: tuple[str, ...] + + +def is_sdk_source(path: str) -> bool: + posix_path = PurePosixPath(path) + return ( + path.endswith(".py") + and posix_path.is_relative_to(SDK_ROOT) + and "__pycache__" not in posix_path.parts + ) + + +def is_unit_test_evidence(path: str) -> bool: + return is_test_evidence(path, UNITTEST_ROOT) + + +def is_e2e_test_evidence(path: str) -> bool: + return is_test_evidence(path, E2E_ROOT) + + +def is_test_evidence(path: str, test_root: PurePosixPath) -> bool: + posix_path = PurePosixPath(path) + return path.endswith(".py") and posix_path.is_relative_to(test_root) + + +def _module_parts(source_path: str) -> tuple[str, ...]: + relative = PurePosixPath(source_path).relative_to(SDK_ROOT) + if relative.name == "__init__.py": + return relative.parent.parts + return relative.with_suffix("").parts + + +def expected_test_patterns( + source_path: str, test_root: PurePosixPath +) -> tuple[str, ...]: + module_parts = _module_parts(source_path) + if not module_parts: + return (f"{test_root}/test_agentrun.py",) + + top_level = module_parts[0] + leaf = module_parts[-1] + nearest_package = module_parts[-2] if len(module_parts) > 1 else top_level + patterns = [ + f"{test_root}/{top_level}/", + f"{test_root}/test_{top_level}.py", + f"{test_root}/**/test_{top_level}.py", + f"{test_root}/**/test_{nearest_package}.py", + f"{test_root}/**/test_{leaf}.py", + ] + return tuple(dict.fromkeys(patterns)) + + +def has_matching_test_evidence( + source_path: str, + changed_test_paths: Iterable[str], + test_root: PurePosixPath, +) -> bool: + module_parts = _module_parts(source_path) + if not module_parts: + return any( + is_test_evidence(path, test_root) for path in changed_test_paths + ) + + top_level = module_parts[0] + leaf = module_parts[-1] + nearest_package = module_parts[-2] if len(module_parts) > 1 else top_level + acceptable_names = { + f"test_{top_level}.py", + f"test_{nearest_package}.py", + f"test_{leaf}.py", + } + + for path in changed_test_paths: + posix_path = PurePosixPath(path) + if not is_test_evidence(path, test_root): + continue + if posix_path.name in acceptable_names: + return True + if posix_path.is_relative_to(test_root / top_level): + return True + return False + + +def find_missing_test_evidence( + changed_files: Sequence[ChangedFile], +) -> list[MissingTestEvidence]: + changed_sources = [ + changed.path + for changed in changed_files + if changed.status != "D" and is_sdk_source(changed.path) + ] + changed_tests = [ + changed.path + for changed in changed_files + if changed.status != "D" + and ( + is_unit_test_evidence(changed.path) + or is_e2e_test_evidence(changed.path) + ) + ] + + missing: list[MissingTestEvidence] = [] + required_evidence = ( + ("unit", UNITTEST_ROOT), + ("e2e", E2E_ROOT), + ) + for source_path in changed_sources: + for evidence_type, test_root in required_evidence: + if has_matching_test_evidence( + source_path, changed_tests, test_root + ): + continue + missing.append( + MissingTestEvidence( + source_path=source_path, + evidence_type=evidence_type, + expected_patterns=expected_test_patterns( + source_path, test_root + ), + ) + ) + return missing + + +def parse_changed_file_specs(specs: Sequence[str]) -> list[ChangedFile]: + changed_files: list[ChangedFile] = [] + for spec in specs: + if not spec: + continue + if ":" in spec and spec.split(":", 1)[0] in { + "A", + "C", + "D", + "M", + "R", + "T", + }: + status, path = spec.split(":", 1) + else: + status, path = "M", spec + changed_files.append(ChangedFile(status=status[0], path=path.strip())) + return changed_files + + +def _run_git(args: Sequence[str]) -> str: + result = subprocess.run( + ["git", *args], + check=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + if result.returncode != 0: + raise RuntimeError(result.stderr.strip() or "git command failed") + return result.stdout + + +def _parse_git_name_status(output: str) -> list[ChangedFile]: + changed_files: list[ChangedFile] = [] + for line in output.splitlines(): + if not line.strip(): + continue + fields = line.split("\t") + status = fields[0][0] + path = fields[-1] + changed_files.append(ChangedFile(status=status, path=path)) + return changed_files + + +def git_ref_exists(ref: str) -> bool: + result = subprocess.run( + ["git", "rev-parse", "--verify", "--quiet", ref], + check=False, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + return result.returncode == 0 + + +def get_changed_files(base_ref: str) -> list[ChangedFile]: + changed: list[ChangedFile] = [] + if not git_ref_exists(base_ref): + raise RuntimeError( + f"Base ref {base_ref!r} was not found. Fetch the PR base ref or " + "pass explicit --changed-file entries." + ) + base = _run_git(["merge-base", base_ref, "HEAD"]).strip() + changed.extend( + _parse_git_name_status( + _run_git([ + "diff", + "--name-status", + "--diff-filter=ACMRT", + base, + "HEAD", + ]) + ) + ) + + changed.extend( + _parse_git_name_status( + _run_git( + ["diff", "--name-status", "--diff-filter=ACMRT", "HEAD", "--"] + ) + ) + ) + + deduped: dict[str, ChangedFile] = {} + for changed_file in changed: + deduped[changed_file.path] = changed_file + return list(deduped.values()) + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description=( + "Fail when changed agentrun source files do not have " + "matching unit and e2e test evidence in the same change." + ) + ) + parser.add_argument( + "--base-ref", + default=DEFAULT_BASE_REF, + help=( + "Git base ref for committed PR changes. Default:" + f" {DEFAULT_BASE_REF}" + ), + ) + parser.add_argument( + "--changed-file", + action="append", + default=[], + metavar="[STATUS:]PATH", + help=( + "Explicit changed file for tests or custom CI integrations. " + "Status defaults to M." + ), + ) + return parser + + +def main(argv: Sequence[str] | None = None) -> int: + args = build_parser().parse_args(argv) + try: + changed_files = ( + parse_changed_file_specs(args.changed_file) + if args.changed_file + else get_changed_files(args.base_ref) + ) + except RuntimeError as exc: + print( + f"SDK test evidence gate could not inspect git diff: {exc}", + file=sys.stderr, + ) + return 2 + + missing = find_missing_test_evidence(changed_files) + if not missing: + print("SDK test evidence gate passed.") + return 0 + + print("SDK test evidence gate failed.", file=sys.stderr) + print( + "Every changed agentrun/**/*.py file must include matching " + "tests/unittests and tests/e2e evidence.", + file=sys.stderr, + ) + for item in missing: + print( + f"\nMissing {item.evidence_type} test evidence for:" + f" {item.source_path}", + file=sys.stderr, + ) + print("Expected one of:", file=sys.stderr) + for pattern in item.expected_patterns: + print(f" - {pattern}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/unittests/test_quality_gate.py b/tests/unittests/test_quality_gate.py new file mode 100644 index 0000000..e566859 --- /dev/null +++ b/tests/unittests/test_quality_gate.py @@ -0,0 +1,140 @@ +import pytest + +from scripts.check_test_evidence import ( + ChangedFile, + find_missing_test_evidence, + get_changed_files, + main, +) + + +def changed(path, status="M"): + return ChangedFile(status=status, path=path) + + +def test_integration_source_change_requires_matching_test_evidence(): + missing = find_missing_test_evidence([ + changed("agentrun/integration/langchain/model_adapter.py"), + ]) + + assert {item.evidence_type for item in missing} == {"unit", "e2e"} + by_type = {item.evidence_type: item for item in missing} + assert ( + by_type["unit"].source_path + == "agentrun/integration/langchain/model_adapter.py" + ) + assert "tests/unittests/integration/" in by_type["unit"].expected_patterns + assert "tests/e2e/test_integration.py" in by_type["e2e"].expected_patterns + + +def test_integration_source_change_rejects_unittest_only_evidence(): + missing = find_missing_test_evidence([ + changed("agentrun/integration/langchain/model_adapter.py"), + changed("tests/unittests/integration/test_integration.py"), + ]) + + assert [item.evidence_type for item in missing] == ["e2e"] + + +def test_integration_source_change_accepts_unit_and_e2e_evidence(): + missing = find_missing_test_evidence([ + changed("agentrun/integration/langchain/model_adapter.py"), + changed("tests/unittests/integration/test_integration.py"), + changed("tests/e2e/test_integration.py"), + ]) + + assert missing == [] + + +def test_server_source_change_rejects_e2e_only_evidence(): + missing = find_missing_test_evidence([ + changed("agentrun/server/server.py"), + changed("tests/e2e/test_server.py"), + ]) + + assert len(missing) == 1 + assert missing[0].source_path == "agentrun/server/server.py" + assert missing[0].evidence_type == "unit" + + +def test_server_source_change_rejects_unittest_only_evidence(): + missing = find_missing_test_evidence([ + changed("agentrun/server/server.py"), + changed("tests/unittests/server/test_server.py"), + ]) + + assert len(missing) == 1 + assert missing[0].source_path == "agentrun/server/server.py" + assert missing[0].evidence_type == "e2e" + + +def test_server_source_change_accepts_unit_and_e2e_evidence(): + missing = find_missing_test_evidence([ + changed("agentrun/server/server.py"), + changed("tests/unittests/server/test_server.py"), + changed("tests/e2e/test_server.py"), + ]) + + assert missing == [] + + +def test_non_sdk_changes_do_not_require_test_evidence(): + missing = find_missing_test_evidence([ + changed("examples/integration_examples.py"), + changed("docs/docs/quick-start.md"), + ]) + + assert missing == [] + + +def test_deleted_source_is_ignored(): + missing = find_missing_test_evidence([ + changed("agentrun/integration/langchain/model_adapter.py", "D"), + ]) + + assert missing == [] + + +def test_missing_base_ref_fails_closed(monkeypatch): + monkeypatch.setattr( + "scripts.check_test_evidence.git_ref_exists", lambda ref: False + ) + + with pytest.raises(RuntimeError, match="Base ref 'origin/main'"): + get_changed_files("origin/main") + + +def test_main_returns_failure_for_missing_evidence(capsys): + exit_code = main([ + "--changed-file", + "agentrun/integration/langchain/model_adapter.py", + ]) + + captured = capsys.readouterr() + assert exit_code == 1 + assert "SDK test evidence gate failed" in captured.err + assert "tests/unittests/integration/" in captured.err + assert "tests/e2e/test_integration.py" in captured.err + + +@pytest.mark.parametrize( + ("source_path", "test_path"), + [ + ( + "agentrun/toolset/api/openapi.py", + "tests/unittests/toolset/test_openapi.py", + ), + ( + "agentrun/utils/helper.py", + "tests/unittests/utils/test_helper.py", + ), + ], +) +def test_matching_leaf_test_evidence_passes(source_path, test_path): + missing = find_missing_test_evidence([ + changed(source_path), + changed(test_path), + changed(test_path.replace("tests/unittests", "tests/e2e")), + ]) + + assert missing == [] From 1b9122e4ffbc07a0ee59dd9909092926408c0621 Mon Sep 17 00:00:00 2001 From: "congxiao.wxx" Date: Wed, 27 May 2026 23:18:05 +0800 Subject: [PATCH 2/2] Keep quality gate script visible to mypy The CI mypy step scans the repository root and the new tests import the gate as scripts.check_test_evidence. Without a package marker, mypy sees scripts/check_test_evidence.py under two module names and exits before coverage can run. Constraint: Existing CI runs make mypy-check before make coverage on every push. Rejected: Excluding scripts from mypy | the quality gate code should remain type-checkable with the rest of the repository. Confidence: high Scope-risk: narrow Directive: Keep scripts/__init__.py if tests import repository scripts as package modules. Tested: make mypy-check Tested: uv run pytest tests/unittests/test_quality_gate.py Tested: make coverage Change-Id: I5b894dbaa5ae23181833827ab277d68be380f39d Not-tested: GitHub-hosted CI rerun is pending after push. Signed-off-by: congxiao.wxx --- scripts/__init__.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 scripts/__init__.py diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..bc5786c --- /dev/null +++ b/scripts/__init__.py @@ -0,0 +1 @@ +"""Repository scripts package."""