From 64bbbc6ee54421bcd02ee7c73168c350c8d7a78f Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Fri, 27 Mar 2026 13:41:38 -0400 Subject: [PATCH 01/11] Add 'PRs in Release' table to report --- .../ci_run_report.html.jinja | 9 + .../create_workflow_report.py | 311 +++++++++++++++--- 2 files changed, 279 insertions(+), 41 deletions(-) diff --git a/.github/actions/create_workflow_report/ci_run_report.html.jinja b/.github/actions/create_workflow_report/ci_run_report.html.jinja index 4c94465a16c6..a6e0df17c49f 100644 --- a/.github/actions/create_workflow_report/ci_run_report.html.jinja +++ b/.github/actions/create_workflow_report/ci_run_report.html.jinja @@ -168,6 +168,7 @@ {% endif %}
Some PRs are missing verification.
+ {% endif %} + {{ prs_in_release_html }} + {%- endif %} + {%- if pr_number != 0 -%}Compared with base sha {{ base_sha }}
diff --git a/.github/actions/create_workflow_report/create_workflow_report.py b/.github/actions/create_workflow_report/create_workflow_report.py index 8daf0fc24f31..a4f43e172459 100755 --- a/.github/actions/create_workflow_report/create_workflow_report.py +++ b/.github/actions/create_workflow_report/create_workflow_report.py @@ -6,10 +6,12 @@ from itertools import combinations import json from datetime import datetime +from datetime import timezone from functools import lru_cache from glob import glob import urllib.parse import re +import subprocess import pandas as pd from jinja2 import Environment, FileSystemLoader @@ -169,6 +171,196 @@ def get_run_details(run_id: str) -> dict: return response.json() +def _enrich_prs_in_release_merge_prs(df: pd.DataFrame, repo: str) -> tuple[pd.DataFrame, bool]: + if len(df) == 0: + return pd.DataFrame(columns=["pr_number", "pr_name", "labels"]), False + if not GITHUB_TOKEN: + raise Exception("GITHUB_TOKEN is required to fetch PR titles and labels") + headers = { + "Authorization": f"token {GITHUB_TOKEN}", + "Accept": "application/vnd.github.v3+json", + } + rows = [] + missing_verification = False + for pr_number in df["pr_number"].tolist(): + response = requests.get( + f"https://api.github.com/repos/{repo}/pulls/{pr_number}", + headers=headers, + ) + if response.status_code != 200: + raise Exception( + f"Failed to fetch pull request info: {response.status_code} {response.text}" + ) + pr = response.json() + label_names = [l["name"] for l in pr.get("labels", [])] + if any(ln.lower() == "cicd" for ln in label_names): + continue + lowered = {ln.lower() for ln in label_names} + if "verified" not in lowered and "verified-with-issue" not in lowered: + missing_verification = True + rows.append( + { + "pr_number": pr_number, + "pr_name": pr.get("title", ""), + "labels": ", ".join(sorted(label_names)), + } + ) + return pd.DataFrame(rows), missing_verification + + +def _git_rev_parse(ref: str, cwd: str | None) -> str | None: + p = subprocess.run( + ["git", "rev-parse", "--verify", ref], + cwd=cwd, + capture_output=True, + text=True, + ) + if p.returncode != 0: + return None + return p.stdout.strip() + + +def _git_is_ancestor(ancestor: str, descendant: str, cwd: str | None) -> bool: + p = subprocess.run( + ["git", "merge-base", "--is-ancestor", ancestor, descendant], + cwd=cwd, + capture_output=True, + ) + return p.returncode == 0 + + +def _git_log_merge_prs( + baseline: str, branch_ref: str, cwd: str | None, repo: str +) -> pd.DataFrame: + p = subprocess.run( + [ + "git", + "-c", + "core.quotepath=false", + "log", + f"{baseline}..{branch_ref}", + "--merges", + "--format=%H%x09%s", + ], + cwd=cwd, + capture_output=True, + text=True, + check=True, + ) + rows = [] + for line in p.stdout.splitlines(): + if not line.strip(): + continue + sha, subject = line.split("\t", 1) + m = re.match( + r"Merge pull request #(\d+) from ([^/\s]+)/", subject, re.IGNORECASE + ) + if not m: + continue + pr_number, head_owner = int(m.group(1)), m.group(2) + if head_owner.lower() != repo.split("/")[0].lower(): + continue + rows.append( + { + "pr_number": pr_number, + "merge_commit_sha": sha, + "merge_subject": subject, + } + ) + if not rows: + return pd.DataFrame(columns=["pr_number", "merge_commit_sha", "merge_subject"]) + df = pd.DataFrame(rows) + df = df.drop_duplicates(subset=["pr_number"], keep="first") + return df + + +def _find_release_baseline( + branch_ref: str, repo: str, cwd: str | None +) -> tuple[str | None, str | None]: + if not GITHUB_TOKEN: + return None, None + headers = { + "Authorization": f"token {GITHUB_TOKEN}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get( + f"https://api.github.com/repos/{repo}/releases?per_page=100", + headers=headers, + ) + if response.status_code != 200: + raise Exception( + f"GitHub API request failed: {response.status_code} {response.text}" + ) + for rel in response.json(): + if rel.get("draft"): + continue + tag_name = rel.get("tag_name") + if not tag_name: + continue + tag_sha = _git_rev_parse(tag_name, cwd) + if not tag_sha: + continue + if not _git_is_ancestor(tag_sha, branch_ref, cwd): + continue + return tag_name, tag_sha + return None, None + + +def _find_rebase_baseline(branch_ref: str, cwd: str | None) -> str | None: + p = subprocess.run( + [ + "git", + "log", + branch_ref, + "--reverse", + "--merges", + "-i", + "--grep=rebase-cicd", + "--grep=rebase/", + "--format=%H", + ], + cwd=cwd, + capture_output=True, + text=True, + ) + if p.returncode != 0: + return None + lines = [ln for ln in p.stdout.splitlines() if ln.strip()] + if not lines: + return None + return lines[0] + + +def get_prs_in_release_dataframe( + branch_ref: str = "HEAD", + *, + repo: str = GITHUB_REPO, + cwd: str | None = None, +) -> tuple[pd.DataFrame, bool]: + """ + PRs merged into branch_ref that belong in the next release notes: after the latest GitHub + Release tag on this history, or after the oldest rebase bootstrap if no such tag exists. + Only merge commits whose subject has fromNothing to report
" - results.columns = [col.replace("_", " ").title() for col in results.columns] + + results = results.copy() + + def format_col_name(col_name: str) -> str: + return col_name.replace("_", " ").title().replace("Pr ", "PR ") + + results.columns = [format_col_name(col) for col in results.columns] + + formatters = { + "Results Link": url_to_html_link, + "Test Name": format_test_name_for_linewrap, + "Test Status": format_test_status, + "Job Status": format_test_status, + "Status": format_test_status, + "Message": lambda m: m.replace("\n", " "), + "Identifier": lambda i: url_to_html_link( + "https://nvd.nist.gov/vuln/detail/" + i + ), + "Severity": lambda s: ( + f'{s}' + ), + "PR Number": lambda n: url_to_html_link( + f"https://github.com/{GITHUB_REPO}/pull/{n}" + ), + } + html = results.to_html( index=False, - formatters={ - "Results Link": url_to_html_link, - "Test Name": format_test_name_for_linewrap, - "Test Status": format_test_status, - "Job Status": format_test_status, - "Status": format_test_status, - "Message": lambda m: m.replace("\n", " "), - "Identifier": lambda i: url_to_html_link( - "https://nvd.nist.gov/vuln/detail/" + i - ), - "Severity": lambda s: ( - f'{s}' - ), - }, + formatters=formatters, escape=False, border=0, classes=["test-results-table"], @@ -816,7 +1023,9 @@ def create_workflow_report( settings={"use_numpy": True}, ) - fail_results = { + prs_in_release_missing_verification = False + results_dfs = { + "prs_in_release": [], "job_statuses": get_commit_statuses(commit_sha), "checks_fails": get_checks_fails(db_client, commit_sha, branch_name), "checks_known_fails": [], @@ -826,8 +1035,17 @@ def create_workflow_report( "docker_images_cves": [], } + if pr_number == 0 and not mark_preview: + try: + prs_df, prs_in_release_missing_verification = get_prs_in_release_dataframe( + branch_name, cwd=os.getcwd() + ) + results_dfs["prs_in_release"] = prs_df + except Exception as e: + print(f"Error in get_prs_in_release_dataframe: {e}") + try: - fail_results["docker_images_cves"] = ( + results_dfs["docker_images_cves"] = ( [] if not check_cves else get_cves(pr_number, commit_sha, branch_name) ) except Exception as e: @@ -835,7 +1053,7 @@ def create_workflow_report( # get_cves returns ... in the case where no Grype result files were found. # This might occur when run in preview mode. - cves_not_checked = not check_cves or fail_results["docker_images_cves"] is ... + cves_not_checked = not check_cves or results_dfs["docker_images_cves"] is ... if known_fails_file_path: if not os.path.exists(known_fails_file_path): @@ -843,7 +1061,7 @@ def create_workflow_report( else: known_fails = get_broken_tests_rules(known_fails_file_path) - fail_results["checks_known_fails"] = get_checks_known_fails( + results_dfs["checks_known_fails"] = get_checks_known_fails( db_client, commit_sha, branch_name, known_fails ) @@ -855,24 +1073,24 @@ def create_workflow_report( pr_info_html = f""" #{pr_info.get("number")} ({pr_info.get("base", {}).get('ref')} <- {pr_info.get("head", {}).get('ref')}) {pr_info.get("title")} """ - fail_results["pr_new_fails"] = get_new_fails_this_pr( + results_dfs["pr_new_fails"] = get_new_fails_this_pr( db_client, pr_info, - fail_results["checks_fails"], - fail_results["regression_fails"], + results_dfs["checks_fails"], + results_dfs["regression_fails"], ) except Exception as e: pr_info_html = e pr_info = {} - fail_results["job_statuses"] = backfill_skipped_statuses( - fail_results["job_statuses"], pr_number, branch_name, commit_sha + results_dfs["job_statuses"] = backfill_skipped_statuses( + results_dfs["job_statuses"], pr_number, branch_name, commit_sha ) high_cve_count = 0 - if not cves_not_checked and len(fail_results["docker_images_cves"]) > 0: + if not cves_not_checked and len(results_dfs["docker_images_cves"]) > 0: high_cve_count = ( - fail_results["docker_images_cves"]["severity"] + results_dfs["docker_images_cves"]["severity"] .str.lower() .isin(("high", "critical")) .sum() @@ -893,43 +1111,54 @@ def create_workflow_report( "workflow_id": run_id, "commit_sha": commit_sha, "base_sha": "" if pr_number == 0 else pr_info.get("base", {}).get("sha"), - "date": f"{datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC", + "date": f"{datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC", "is_preview": mark_preview, + "prs_in_release_missing_verification": prs_in_release_missing_verification, "counts": { - "jobs_status": f"{sum(fail_results['job_statuses']['job_status'].value_counts().get(x, 0) for x in ('failure', 'error'))} fail/error", - "checks_errors": len(fail_results["checks_errors"]), - "checks_new_fails": len(fail_results["checks_fails"]), - "regression_new_fails": len(fail_results["regression_fails"]), + "jobs_status": f"{sum(results_dfs['job_statuses']['job_status'].value_counts().get(x, 0) for x in ('failure', 'error'))} fail/error", + "checks_errors": len(results_dfs["checks_errors"]), + "checks_new_fails": len(results_dfs["checks_fails"]), + "regression_new_fails": len(results_dfs["regression_fails"]), "cves": "N/A" if cves_not_checked else f"{high_cve_count} high/critical", "checks_known_fails": ( - "N/A" if not known_fails else len(fail_results["checks_known_fails"]) + "N/A" if not known_fails else len(results_dfs["checks_known_fails"]) + ), + "pr_new_fails": len(results_dfs["pr_new_fails"]), + "prs_in_release": ( + "N/A" + if mark_preview or pr_number != 0 + else len(results_dfs["prs_in_release"]) ), - "pr_new_fails": len(fail_results["pr_new_fails"]), }, "build_report_links": get_build_report_links( - fail_results["job_statuses"], pr_number, branch_name, commit_sha + results_dfs["job_statuses"], pr_number, branch_name, commit_sha + ), + "prs_in_release_html": ( + "PR details are not loaded during preview.
" + if mark_preview or pr_number != 0 + else format_results_as_html_table(results_dfs["prs_in_release"]) ), "ci_jobs_status_html": format_results_as_html_table( - fail_results["job_statuses"] + results_dfs["job_statuses"] ), "checks_errors_html": format_results_as_html_table( - fail_results["checks_errors"] + results_dfs["checks_errors"] ), - "checks_fails_html": format_results_as_html_table(fail_results["checks_fails"]), + "checks_fails_html": format_results_as_html_table(results_dfs["checks_fails"]), "regression_fails_html": format_results_as_html_table( - fail_results["regression_fails"] + results_dfs["regression_fails"] ), "docker_images_cves_html": ( "Not Checked
" if cves_not_checked - else format_results_as_html_table(fail_results["docker_images_cves"]) + else format_results_as_html_table(results_dfs["docker_images_cves"]) ), "checks_known_fails_html": ( "Not Checked
" if not known_fails - else format_results_as_html_table(fail_results["checks_known_fails"]) + else format_results_as_html_table(results_dfs["checks_known_fails"]) ), - "new_fails_html": format_results_as_html_table(fail_results["pr_new_fails"]), + "new_fails_html": format_results_as_html_table(results_dfs["pr_new_fails"]), } # Render the template with the context From d4a39c82a6e8387a3c9c1d1670a3fdd118aa086e Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Fri, 27 Mar 2026 16:37:14 -0400 Subject: [PATCH 02/11] highlight labels for unverified PRs --- .../create_workflow_report.py | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/.github/actions/create_workflow_report/create_workflow_report.py b/.github/actions/create_workflow_report/create_workflow_report.py index a4f43e172459..505872d6260b 100755 --- a/.github/actions/create_workflow_report/create_workflow_report.py +++ b/.github/actions/create_workflow_report/create_workflow_report.py @@ -32,6 +32,8 @@ CVE_SEVERITY_ORDER = {"critical": 1, "high": 2, "medium": 3, "low": 4, "negligible": 5} +PR_LABELS_VERIFIED = {"verified", "verified-with-issue", "no-verification-needed"} + def _is_clickhouse_memory_limit_error(exc: BaseException) -> bool: if isinstance(exc, ServerException) and getattr(exc, "code", None) == 241: return True @@ -173,7 +175,7 @@ def get_run_details(run_id: str) -> dict: def _enrich_prs_in_release_merge_prs(df: pd.DataFrame, repo: str) -> tuple[pd.DataFrame, bool]: if len(df) == 0: - return pd.DataFrame(columns=["pr_number", "pr_name", "labels"]), False + return pd.DataFrame(columns=["pr_number", "pr_name", "pr_labels"]), False if not GITHUB_TOKEN: raise Exception("GITHUB_TOKEN is required to fetch PR titles and labels") headers = { @@ -193,16 +195,15 @@ def _enrich_prs_in_release_merge_prs(df: pd.DataFrame, repo: str) -> tuple[pd.Da ) pr = response.json() label_names = [l["name"] for l in pr.get("labels", [])] - if any(ln.lower() == "cicd" for ln in label_names): + if "cicd" in label_names: continue - lowered = {ln.lower() for ln in label_names} - if "verified" not in lowered and "verified-with-issue" not in lowered: + if not PR_LABELS_VERIFIED.intersection(label_names): missing_verification = True rows.append( { "pr_number": pr_number, "pr_name": pr.get("title", ""), - "labels": ", ".join(sorted(label_names)), + "pr_labels": ", ".join(sorted(label_names)), } ) return pd.DataFrame(rows), missing_verification @@ -337,12 +338,12 @@ def get_prs_in_release_dataframe( repo: str = GITHUB_REPO, cwd: str | None = None, ) -> tuple[pd.DataFrame, bool]: - """ + f""" PRs merged into branch_ref that belong in the next release notes: after the latest GitHub Release tag on this history, or after the oldest rebase bootstrap if no such tag exists. Only merge commits whose subject has fromSome PRs are missing verification.
- {% endif %} {{ prs_in_release_html }} {%- endif %} diff --git a/.github/actions/create_workflow_report/create_workflow_report.py b/.github/actions/create_workflow_report/create_workflow_report.py index 1eddb30b83eb..8e03badf464c 100755 --- a/.github/actions/create_workflow_report/create_workflow_report.py +++ b/.github/actions/create_workflow_report/create_workflow_report.py @@ -33,7 +33,6 @@ CVE_SEVERITY_ORDER = {"critical": 1, "high": 2, "medium": 3, "low": 4, "negligible": 5} -PR_LABELS_VERIFIED = {"verified", "verified-with-issue"} def _is_clickhouse_memory_limit_error(exc: BaseException) -> bool: if isinstance(exc, ServerException) and getattr(exc, "code", None) == 241: @@ -174,9 +173,9 @@ def get_run_details(run_id: str) -> dict: return response.json() -def _enrich_prs_in_release_merge_prs(df: pd.DataFrame, repo: str) -> tuple[pd.DataFrame, bool]: +def _enrich_prs_in_release_merge_prs(df: pd.DataFrame, repo: str) -> pd.DataFrame: if len(df) == 0: - return pd.DataFrame(columns=["pr_number", "pr_name", "pr_labels"]), False + return pd.DataFrame(columns=["pr_number", "pr_name", "pr_labels"]) if not GITHUB_TOKEN: raise Exception("GITHUB_TOKEN is required to fetch PR titles and labels") headers = { @@ -184,7 +183,6 @@ def _enrich_prs_in_release_merge_prs(df: pd.DataFrame, repo: str) -> tuple[pd.Da "Accept": "application/vnd.github.v3+json", } rows = [] - missing_verification = False for pr_number in df["pr_number"].tolist(): response = requests.get( f"https://api.github.com/repos/{repo}/pulls/{pr_number}", @@ -196,8 +194,6 @@ def _enrich_prs_in_release_merge_prs(df: pd.DataFrame, repo: str) -> tuple[pd.Da ) pr = response.json() label_names = [l["name"] for l in pr.get("labels", [])] - if not PR_LABELS_VERIFIED.intersection(label_names): - missing_verification = True rows.append( { "pr_number": pr_number, @@ -205,7 +201,7 @@ def _enrich_prs_in_release_merge_prs(df: pd.DataFrame, repo: str) -> tuple[pd.Da "pr_labels": ", ".join(sorted(label_names)), } ) - return pd.DataFrame(rows), missing_verification + return pd.DataFrame(rows) def _git_rev_parse(ref: str, cwd: str | None) -> str | None: @@ -337,13 +333,12 @@ def get_prs_in_release_dataframe( *, repo: str = GITHUB_REPO, cwd: str, -) -> tuple[pd.DataFrame, bool]: +) -> pd.DataFrame: f""" PRs merged into branch_ref that belong in the next release notes: after the latest GitHub Release tag on this history, or after the oldest rebase bootstrap if no such tag exists. Only merge commits whose subject has fromCompared with base sha {{ base_sha }}
- {{ new_fails_html }} + {%- if pr_number != 0 %} +Compared with base sha {{ base_sha }}
+ {{ new_fails_html }} +
+ Fail reason conventions:
+ KNOWN - Accepted fail and fix is not planned
+ INVESTIGATE - We don't know why it fails
+ NEEDSFIX - Investigation done and a fix is needed to make it pass
+
- Fail reason conventions:
- KNOWN - Accepted fail and fix is not planned
- INVESTIGATE - We don't know why it fails
- NEEDSFIX - Investigation done and a fix is needed to make it pass
-