From 9c70417835a924513116bf2ca5780ec888f8bd96 Mon Sep 17 00:00:00 2001
From: "congxiao.wxx" <congxiao.wxx@alibaba-inc.com>
Date: Wed, 27 May 2026 22:26:29 +0800
Subject: [PATCH 1/2] Require test evidence for SDK source changes

Coverage thresholds do not see omitted modules such as integration and server, so SDK source changes need an explicit per-change evidence gate. Wire the gate into coverage and a PR workflow, requiring matching unit and e2e test changes for each changed agentrun source file.

Constraint: GitHub Python SDK repository uses repository-root paths, not monorepo python/ paths.

Rejected: Relying on global or incremental coverage alone | coverage omits important SDK directories and is not per-file enforcement.

Confidence: high

Scope-risk: moderate

Directive: Keep coverage thresholds and this evidence gate separate; omitted coverage files still need unit and e2e evidence.

Tested: uv run --python 3.10 --all-extras isort scripts/check_test_evidence.py tests/unittests/test_quality_gate.py

Tested: uv run --python 3.10 --all-extras pyink --config pyproject.toml scripts/check_test_evidence.py tests/unittests/test_quality_gate.py

Tested: uv run --python 3.10 --all-extras pytest tests/unittests/test_quality_gate.py

Tested: uv run python scripts/check_test_evidence.py simulated missing/unit-only/e2e-only/unit-plus-e2e cases

Change-Id: I6241b508960ee114f344ca2de5302c3fbdd80b92
Not-tested: Full make coverage on the GitHub checkout; the change was validated with targeted gate tests and simulations.
Signed-off-by: congxiao.wxx <congxiao.wxx@alibaba-inc.com>
---
 .github/workflows/python-quality.yml |  40 ++++
 Makefile                             |   7 +-
 README.md                            |  22 ++
 scripts/check_test_evidence.py       | 309 +++++++++++++++++++++++++++
 tests/unittests/test_quality_gate.py | 140 ++++++++++++
 5 files changed, 516 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/python-quality.yml
 create mode 100644 scripts/check_test_evidence.py
 create mode 100644 tests/unittests/test_quality_gate.py

diff --git a/.github/workflows/python-quality.yml b/.github/workflows/python-quality.yml
new file mode 100644
index 0000000..fde05af
--- /dev/null
+++ b/.github/workflows/python-quality.yml
@@ -0,0 +1,40 @@
+name: Python Quality
+
+on:
+  pull_request:
+    paths:
+      - 'Makefile'
+      - 'agentrun/**'
+      - 'tests/**'
+      - 'scripts/check_test_evidence.py'
+      - '.github/workflows/python-quality.yml'
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  quality-gate:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+
+      - name: Install uv
+        run: python -m pip install uv
+
+      - name: Install dependencies
+        run: uv sync --python 3.10 --dev --all-extras
+
+      - name: Run SDK test evidence gate
+        run: uv run python scripts/check_test_evidence.py --base-ref "origin/${{ github.base_ref || 'main' }}"
+
+      - name: Run quality gate tests
+        run: uv run pytest tests/unittests/test_quality_gate.py
diff --git a/Makefile b/Makefile
index 7015393..fc25b94 100644
--- a/Makefile
+++ b/Makefile
@@ -142,12 +142,15 @@ test-unit: ## 运行单元测试
 test-e2e: ## 运行端到端测试
 	@uv run pytest tests/e2e/
 
+.PHONY: quality-gate
+quality-gate: ## 运行 SDK 增量质量门禁
+	@uv run --python ${PYTHON_VERSION} --all-extras python scripts/check_test_evidence.py
+
 .PHONY: mypy-check
 mypy-check: ## 运行 mypy 类型检查
 	@uv run mypy --config-file mypy.ini .
 
 .PHONY: coverage
-coverage: ## 运行测试并显示覆盖率报告（全量代码 + 增量代码）
+coverage: quality-gate ## 运行测试并显示覆盖率报告（全量代码 + 增量代码）
 	@echo "📊 运行覆盖率测试..."
 	@uv run --python ${PYTHON_VERSION} --all-extras python scripts/check_coverage.py $(COVERAGE_ARGS)
-
diff --git a/README.md b/README.md
index 4e6695c..dca64c3 100644
--- a/README.md
+++ b/README.md
@@ -223,3 +223,25 @@ SDK 会自动读取以下环境变量：
 | `AGENTRUN_DATA_ENDPOINT` | 数据端点 | - |
 | `AGENTRUN_SDK_DEBUG` | 开启 DEBUG 日志 | - |
 
+## 开发质量门禁
+
+PR 中修改 `agentrun/**/*.py` 时，必须在同一个变更里提供匹配的单元测试
+证据和 E2E 测试证据。这个门禁独立于 coverage 统计，即使文件位于
+coverage omit 目录（例如 `agentrun/integration/*` 或
+`agentrun/server/*`），也会被检查。
+
+本地运行：
+
+```bash
+make quality-gate
+```
+
+匹配规则示例：
+
+- `agentrun/integration/...` 变更需要 `tests/unittests/integration/`
+  或匹配的单测文件变更，同时需要 `tests/e2e/test_integration.py` 等 E2E
+  文件变更。
+- `agentrun/server/...` 变更需要 `tests/unittests/server/`、
+  `tests/unittests/**/test_server.py` 等单测变更，同时需要
+  `tests/e2e/test_server.py` 等 E2E 变更。
+- 非 SDK 源码路径（如 `examples/`、`docs/`、`codegen/`）不触发此门禁。
diff --git a/scripts/check_test_evidence.py b/scripts/check_test_evidence.py
new file mode 100644
index 0000000..b9e613c
--- /dev/null
+++ b/scripts/check_test_evidence.py
@@ -0,0 +1,309 @@
+#!/usr/bin/env python3
+"""Require test evidence for changed SDK source files."""
+
+from __future__ import annotations
+
+import argparse
+from dataclasses import dataclass
+from pathlib import PurePosixPath
+import subprocess
+import sys
+from typing import Iterable, Sequence
+
+SDK_ROOT = PurePosixPath("agentrun")
+UNITTEST_ROOT = PurePosixPath("tests/unittests")
+E2E_ROOT = PurePosixPath("tests/e2e")
+DEFAULT_BASE_REF = "origin/main"
+
+
+@dataclass(frozen=True)
+class ChangedFile:
+    status: str
+    path: str
+
+
+@dataclass(frozen=True)
+class MissingTestEvidence:
+    source_path: str
+    evidence_type: str
+    expected_patterns: tuple[str, ...]
+
+
+def is_sdk_source(path: str) -> bool:
+    posix_path = PurePosixPath(path)
+    return (
+        path.endswith(".py")
+        and posix_path.is_relative_to(SDK_ROOT)
+        and "__pycache__" not in posix_path.parts
+    )
+
+
+def is_unit_test_evidence(path: str) -> bool:
+    return is_test_evidence(path, UNITTEST_ROOT)
+
+
+def is_e2e_test_evidence(path: str) -> bool:
+    return is_test_evidence(path, E2E_ROOT)
+
+
+def is_test_evidence(path: str, test_root: PurePosixPath) -> bool:
+    posix_path = PurePosixPath(path)
+    return path.endswith(".py") and posix_path.is_relative_to(test_root)
+
+
+def _module_parts(source_path: str) -> tuple[str, ...]:
+    relative = PurePosixPath(source_path).relative_to(SDK_ROOT)
+    if relative.name == "__init__.py":
+        return relative.parent.parts
+    return relative.with_suffix("").parts
+
+
+def expected_test_patterns(
+    source_path: str, test_root: PurePosixPath
+) -> tuple[str, ...]:
+    module_parts = _module_parts(source_path)
+    if not module_parts:
+        return (f"{test_root}/test_agentrun.py",)
+
+    top_level = module_parts[0]
+    leaf = module_parts[-1]
+    nearest_package = module_parts[-2] if len(module_parts) > 1 else top_level
+    patterns = [
+        f"{test_root}/{top_level}/",
+        f"{test_root}/test_{top_level}.py",
+        f"{test_root}/**/test_{top_level}.py",
+        f"{test_root}/**/test_{nearest_package}.py",
+        f"{test_root}/**/test_{leaf}.py",
+    ]
+    return tuple(dict.fromkeys(patterns))
+
+
+def has_matching_test_evidence(
+    source_path: str,
+    changed_test_paths: Iterable[str],
+    test_root: PurePosixPath,
+) -> bool:
+    module_parts = _module_parts(source_path)
+    if not module_parts:
+        return any(
+            is_test_evidence(path, test_root) for path in changed_test_paths
+        )
+
+    top_level = module_parts[0]
+    leaf = module_parts[-1]
+    nearest_package = module_parts[-2] if len(module_parts) > 1 else top_level
+    acceptable_names = {
+        f"test_{top_level}.py",
+        f"test_{nearest_package}.py",
+        f"test_{leaf}.py",
+    }
+
+    for path in changed_test_paths:
+        posix_path = PurePosixPath(path)
+        if not is_test_evidence(path, test_root):
+            continue
+        if posix_path.name in acceptable_names:
+            return True
+        if posix_path.is_relative_to(test_root / top_level):
+            return True
+    return False
+
+
+def find_missing_test_evidence(
+    changed_files: Sequence[ChangedFile],
+) -> list[MissingTestEvidence]:
+    changed_sources = [
+        changed.path
+        for changed in changed_files
+        if changed.status != "D" and is_sdk_source(changed.path)
+    ]
+    changed_tests = [
+        changed.path
+        for changed in changed_files
+        if changed.status != "D"
+        and (
+            is_unit_test_evidence(changed.path)
+            or is_e2e_test_evidence(changed.path)
+        )
+    ]
+
+    missing: list[MissingTestEvidence] = []
+    required_evidence = (
+        ("unit", UNITTEST_ROOT),
+        ("e2e", E2E_ROOT),
+    )
+    for source_path in changed_sources:
+        for evidence_type, test_root in required_evidence:
+            if has_matching_test_evidence(
+                source_path, changed_tests, test_root
+            ):
+                continue
+            missing.append(
+                MissingTestEvidence(
+                    source_path=source_path,
+                    evidence_type=evidence_type,
+                    expected_patterns=expected_test_patterns(
+                        source_path, test_root
+                    ),
+                )
+            )
+    return missing
+
+
+def parse_changed_file_specs(specs: Sequence[str]) -> list[ChangedFile]:
+    changed_files: list[ChangedFile] = []
+    for spec in specs:
+        if not spec:
+            continue
+        if ":" in spec and spec.split(":", 1)[0] in {
+            "A",
+            "C",
+            "D",
+            "M",
+            "R",
+            "T",
+        }:
+            status, path = spec.split(":", 1)
+        else:
+            status, path = "M", spec
+        changed_files.append(ChangedFile(status=status[0], path=path.strip()))
+    return changed_files
+
+
+def _run_git(args: Sequence[str]) -> str:
+    result = subprocess.run(
+        ["git", *args],
+        check=False,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+    )
+    if result.returncode != 0:
+        raise RuntimeError(result.stderr.strip() or "git command failed")
+    return result.stdout
+
+
+def _parse_git_name_status(output: str) -> list[ChangedFile]:
+    changed_files: list[ChangedFile] = []
+    for line in output.splitlines():
+        if not line.strip():
+            continue
+        fields = line.split("\t")
+        status = fields[0][0]
+        path = fields[-1]
+        changed_files.append(ChangedFile(status=status, path=path))
+    return changed_files
+
+
+def git_ref_exists(ref: str) -> bool:
+    result = subprocess.run(
+        ["git", "rev-parse", "--verify", "--quiet", ref],
+        check=False,
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+    )
+    return result.returncode == 0
+
+
+def get_changed_files(base_ref: str) -> list[ChangedFile]:
+    changed: list[ChangedFile] = []
+    if not git_ref_exists(base_ref):
+        raise RuntimeError(
+            f"Base ref {base_ref!r} was not found. Fetch the PR base ref or "
+            "pass explicit --changed-file entries."
+        )
+    base = _run_git(["merge-base", base_ref, "HEAD"]).strip()
+    changed.extend(
+        _parse_git_name_status(
+            _run_git([
+                "diff",
+                "--name-status",
+                "--diff-filter=ACMRT",
+                base,
+                "HEAD",
+            ])
+        )
+    )
+
+    changed.extend(
+        _parse_git_name_status(
+            _run_git(
+                ["diff", "--name-status", "--diff-filter=ACMRT", "HEAD", "--"]
+            )
+        )
+    )
+
+    deduped: dict[str, ChangedFile] = {}
+    for changed_file in changed:
+        deduped[changed_file.path] = changed_file
+    return list(deduped.values())
+
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description=(
+            "Fail when changed agentrun source files do not have "
+            "matching unit and e2e test evidence in the same change."
+        )
+    )
+    parser.add_argument(
+        "--base-ref",
+        default=DEFAULT_BASE_REF,
+        help=(
+            "Git base ref for committed PR changes. Default:"
+            f" {DEFAULT_BASE_REF}"
+        ),
+    )
+    parser.add_argument(
+        "--changed-file",
+        action="append",
+        default=[],
+        metavar="[STATUS:]PATH",
+        help=(
+            "Explicit changed file for tests or custom CI integrations. "
+            "Status defaults to M."
+        ),
+    )
+    return parser
+
+
+def main(argv: Sequence[str] | None = None) -> int:
+    args = build_parser().parse_args(argv)
+    try:
+        changed_files = (
+            parse_changed_file_specs(args.changed_file)
+            if args.changed_file
+            else get_changed_files(args.base_ref)
+        )
+    except RuntimeError as exc:
+        print(
+            f"SDK test evidence gate could not inspect git diff: {exc}",
+            file=sys.stderr,
+        )
+        return 2
+
+    missing = find_missing_test_evidence(changed_files)
+    if not missing:
+        print("SDK test evidence gate passed.")
+        return 0
+
+    print("SDK test evidence gate failed.", file=sys.stderr)
+    print(
+        "Every changed agentrun/**/*.py file must include matching "
+        "tests/unittests and tests/e2e evidence.",
+        file=sys.stderr,
+    )
+    for item in missing:
+        print(
+            f"\nMissing {item.evidence_type} test evidence for:"
+            f" {item.source_path}",
+            file=sys.stderr,
+        )
+        print("Expected one of:", file=sys.stderr)
+        for pattern in item.expected_patterns:
+            print(f"  - {pattern}", file=sys.stderr)
+    return 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tests/unittests/test_quality_gate.py b/tests/unittests/test_quality_gate.py
new file mode 100644
index 0000000..e566859
--- /dev/null
+++ b/tests/unittests/test_quality_gate.py
@@ -0,0 +1,140 @@
+import pytest
+
+from scripts.check_test_evidence import (
+    ChangedFile,
+    find_missing_test_evidence,
+    get_changed_files,
+    main,
+)
+
+
+def changed(path, status="M"):
+    return ChangedFile(status=status, path=path)
+
+
+def test_integration_source_change_requires_matching_test_evidence():
+    missing = find_missing_test_evidence([
+        changed("agentrun/integration/langchain/model_adapter.py"),
+    ])
+
+    assert {item.evidence_type for item in missing} == {"unit", "e2e"}
+    by_type = {item.evidence_type: item for item in missing}
+    assert (
+        by_type["unit"].source_path
+        == "agentrun/integration/langchain/model_adapter.py"
+    )
+    assert "tests/unittests/integration/" in by_type["unit"].expected_patterns
+    assert "tests/e2e/test_integration.py" in by_type["e2e"].expected_patterns
+
+
+def test_integration_source_change_rejects_unittest_only_evidence():
+    missing = find_missing_test_evidence([
+        changed("agentrun/integration/langchain/model_adapter.py"),
+        changed("tests/unittests/integration/test_integration.py"),
+    ])
+
+    assert [item.evidence_type for item in missing] == ["e2e"]
+
+
+def test_integration_source_change_accepts_unit_and_e2e_evidence():
+    missing = find_missing_test_evidence([
+        changed("agentrun/integration/langchain/model_adapter.py"),
+        changed("tests/unittests/integration/test_integration.py"),
+        changed("tests/e2e/test_integration.py"),
+    ])
+
+    assert missing == []
+
+
+def test_server_source_change_rejects_e2e_only_evidence():
+    missing = find_missing_test_evidence([
+        changed("agentrun/server/server.py"),
+        changed("tests/e2e/test_server.py"),
+    ])
+
+    assert len(missing) == 1
+    assert missing[0].source_path == "agentrun/server/server.py"
+    assert missing[0].evidence_type == "unit"
+
+
+def test_server_source_change_rejects_unittest_only_evidence():
+    missing = find_missing_test_evidence([
+        changed("agentrun/server/server.py"),
+        changed("tests/unittests/server/test_server.py"),
+    ])
+
+    assert len(missing) == 1
+    assert missing[0].source_path == "agentrun/server/server.py"
+    assert missing[0].evidence_type == "e2e"
+
+
+def test_server_source_change_accepts_unit_and_e2e_evidence():
+    missing = find_missing_test_evidence([
+        changed("agentrun/server/server.py"),
+        changed("tests/unittests/server/test_server.py"),
+        changed("tests/e2e/test_server.py"),
+    ])
+
+    assert missing == []
+
+
+def test_non_sdk_changes_do_not_require_test_evidence():
+    missing = find_missing_test_evidence([
+        changed("examples/integration_examples.py"),
+        changed("docs/docs/quick-start.md"),
+    ])
+
+    assert missing == []
+
+
+def test_deleted_source_is_ignored():
+    missing = find_missing_test_evidence([
+        changed("agentrun/integration/langchain/model_adapter.py", "D"),
+    ])
+
+    assert missing == []
+
+
+def test_missing_base_ref_fails_closed(monkeypatch):
+    monkeypatch.setattr(
+        "scripts.check_test_evidence.git_ref_exists", lambda ref: False
+    )
+
+    with pytest.raises(RuntimeError, match="Base ref 'origin/main'"):
+        get_changed_files("origin/main")
+
+
+def test_main_returns_failure_for_missing_evidence(capsys):
+    exit_code = main([
+        "--changed-file",
+        "agentrun/integration/langchain/model_adapter.py",
+    ])
+
+    captured = capsys.readouterr()
+    assert exit_code == 1
+    assert "SDK test evidence gate failed" in captured.err
+    assert "tests/unittests/integration/" in captured.err
+    assert "tests/e2e/test_integration.py" in captured.err
+
+
+@pytest.mark.parametrize(
+    ("source_path", "test_path"),
+    [
+        (
+            "agentrun/toolset/api/openapi.py",
+            "tests/unittests/toolset/test_openapi.py",
+        ),
+        (
+            "agentrun/utils/helper.py",
+            "tests/unittests/utils/test_helper.py",
+        ),
+    ],
+)
+def test_matching_leaf_test_evidence_passes(source_path, test_path):
+    missing = find_missing_test_evidence([
+        changed(source_path),
+        changed(test_path),
+        changed(test_path.replace("tests/unittests", "tests/e2e")),
+    ])
+
+    assert missing == []

From 1b9122e4ffbc07a0ee59dd9909092926408c0621 Mon Sep 17 00:00:00 2001
From: "congxiao.wxx" <congxiao.wxx@alibaba-inc.com>
Date: Wed, 27 May 2026 23:18:05 +0800
Subject: [PATCH 2/2] Keep quality gate script visible to mypy

The CI mypy step scans the repository root and the new tests import the gate as scripts.check_test_evidence. Without a package marker, mypy sees scripts/check_test_evidence.py under two module names and exits before coverage can run.

Constraint: Existing CI runs make mypy-check before make coverage on every push.

Rejected: Excluding scripts from mypy | the quality gate code should remain type-checkable with the rest of the repository.

Confidence: high

Scope-risk: narrow

Directive: Keep scripts/__init__.py if tests import repository scripts as package modules.

Tested: make mypy-check

Tested: uv run pytest tests/unittests/test_quality_gate.py

Tested: make coverage

Change-Id: I5b894dbaa5ae23181833827ab277d68be380f39d
Not-tested: GitHub-hosted CI rerun is pending after push.
Signed-off-by: congxiao.wxx <congxiao.wxx@alibaba-inc.com>
---
 scripts/__init__.py | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 scripts/__init__.py

diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644
index 0000000..bc5786c
--- /dev/null
+++ b/scripts/__init__.py
@@ -0,0 +1 @@
+"""Repository scripts package."""