diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d4f93614..002f050c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,9 +17,9 @@ jobs: python-version: "3.11" cache: pip - name: Install dependencies - run: python -m pip install .[test] - - name: Run lints - run: ./lint.sh + run: python -m pip install .[test] pre-commit + - name: Run pre-commit + run: pre-commit run --all-files test: runs-on: ubuntu-latest diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..3e08e1fa --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,18 @@ +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: e05c5c0818279e5ac248ac9e954431ba58865e61 # frozen: v0.15.7 + hooks: + - id: ruff-check + args: [--exit-non-zero-on-fix] + - id: ruff-format + args: [--exit-non-zero-on-fix] + + - repo: local + hooks: + - id: pyright + name: pyright + entry: python -m pyright bench_runner + language: system + pass_filenames: false + types: [python] + stages: [pre-commit] diff --git a/CHANGELOG.md b/CHANGELOG.md index fe37dfc3..1a5a0c13 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ ## Unreleased +### Migrate to ruff and add pre-commit + +`black` and `flake8` have been replaced with `ruff` (lint + format). A +`.pre-commit-config.yaml` is now included so contributors can install +pre-commit hooks to catch lint, format, and `pyright` type errors before +pushing. CI runs the same checks via `pre-commit run --all-files`. + +To set up locally: + +```sh +pip install pre-commit +pre-commit install +``` + ### Bugfixes #### Use PGO on weekly builds diff --git a/bench_runner/__main__.py b/bench_runner/__main__.py index 9b191221..89a00cb1 100644 --- a/bench_runner/__main__.py +++ b/bench_runner/__main__.py @@ -33,7 +33,9 @@ init() except Exception: - sys.stderr.write("pytest-cov: Failed to setup subprocess coverage.") + sys.stderr.write( + "pytest-cov: Failed to setup subprocess coverage." + ) command = len(sys.argv) >= 2 and sys.argv[1] or "" diff --git a/bench_runner/config.py b/bench_runner/config.py index 7a8989bd..c9740dbc 100644 --- a/bench_runner/config.py +++ b/bench_runner/config.py @@ -65,12 +65,16 @@ def __post_init__(self): class Config: bases: Bases runners: dict[str, mrunners.Runner] - publish_mirror: PublishMirror = dataclasses.field(default_factory=PublishMirror) + publish_mirror: PublishMirror = dataclasses.field( + default_factory=PublishMirror + ) benchmarks: Benchmarks = dataclasses.field(default_factory=Benchmarks) notify: Notify = dataclasses.field(default_factory=Notify) longitudinal_plot: mplot.LongitudinalPlotConfig | None = None flag_effect_plot: mplot.FlagEffectPlotConfig | None = None - benchmark_longitudinal_plot: mplot.BenchmarkLongitudinalPlotConfig | None = None + benchmark_longitudinal_plot: ( + mplot.BenchmarkLongitudinalPlotConfig | None + ) = None weekly: dict[str, Weekly] = dataclasses.field(default_factory=dict) def __post_init__(self): @@ -82,7 +86,8 @@ def __post_init__(self): ) self.runners = { name: mrunners.Runner( - nickname=name, **runner # pyright: ignore[reportCallIssue] + nickname=name, + **runner, # pyright: ignore[reportCallIssue] ) for name, runner in self.runners.items() } @@ -97,13 +102,19 @@ def __post_init__(self): **self.longitudinal_plot ) if isinstance(self.flag_effect_plot, dict): - self.flag_effect_plot = mplot.FlagEffectPlotConfig(**self.flag_effect_plot) + self.flag_effect_plot = mplot.FlagEffectPlotConfig( + **self.flag_effect_plot + ) if isinstance(self.benchmark_longitudinal_plot, dict): - self.benchmark_longitudinal_plot = mplot.BenchmarkLongitudinalPlotConfig( - **self.benchmark_longitudinal_plot + self.benchmark_longitudinal_plot = ( + mplot.BenchmarkLongitudinalPlotConfig( + **self.benchmark_longitudinal_plot + ) ) if len(self.weekly) == 0: - self.weekly = {"default": Weekly(runners=list(self.runners.keys()))} + self.weekly = { + "default": Weekly(runners=list(self.runners.keys())) + } else: self.weekly = { name: Weekly(**weekly) # pyright: ignore[reportCallIssue] diff --git a/bench_runner/flags.py b/bench_runner/flags.py index b87ba603..bc37a727 100644 --- a/bench_runner/flags.py +++ b/bench_runner/flags.py @@ -27,7 +27,9 @@ class Flag: def parse_flags(flag_str: str | None) -> list[str]: if flag_str is None: return [] - flags = [flag.strip() for flag in flag_str.split(",") if flag.strip() != ""] + flags = [ + flag.strip() for flag in flag_str.split(",") if flag.strip() != "" + ] internal_flags = [] for flag in flags: if flag not in FLAG_MAPPING: diff --git a/bench_runner/gh.py b/bench_runner/gh.py index e9d0e1d5..6977a4ee 100644 --- a/bench_runner/gh.py +++ b/bench_runner/gh.py @@ -49,7 +49,9 @@ def benchmark( raise ValueError(f"machine must be one of {machines}") if not (benchmark_base is None or isinstance(benchmark_base, bool)): - raise TypeError(f"benchmark_base must be bool, got {type(benchmark_base)}") + raise TypeError( + f"benchmark_base must be bool, got {type(benchmark_base)}" + ) if flags is None: flags = [] diff --git a/bench_runner/git.py b/bench_runner/git.py index 0df312fc..7c81c5fb 100644 --- a/bench_runner/git.py +++ b/bench_runner/git.py @@ -37,7 +37,14 @@ def get_log( n_args = [] if n < 1 else ["-n", str(n)] return subprocess.check_output( - ["git", "log", f"--pretty=format:{format}", *n_args, *ref_args, *extra], + [ + "git", + "log", + f"--pretty=format:{format}", + *n_args, + *ref_args, + *extra, + ], encoding="utf-8", cwd=dirname, ).strip() @@ -144,7 +151,11 @@ def clone( dirname = Path(dirname) if dirname.is_dir(): - if is_hash and (dirname / ".git").is_dir() and get_git_hash(dirname) == branch: + if ( + is_hash + and (dirname / ".git").is_dir() + and get_git_hash(dirname) == branch + ): # This is a git repo, and the hash matches return util.smart_rmtree(dirname) diff --git a/bench_runner/hpt.py b/bench_runner/hpt.py index cc77ced9..d7c6cb83 100644 --- a/bench_runner/hpt.py +++ b/bench_runner/hpt.py @@ -342,6 +342,8 @@ def make_report(ref: PathLike, head: PathLike, alpha=0.1): for reli in [0.9, 0.95, 0.99]: ret = maxspeedup(reli, better, alpha, mtx_a, mtx_b) if ret > 0: - result.write(f"- {reli:.0%} likely to have a {effect} of {ret:.2f}x\n") + result.write( + f"- {reli:.0%} likely to have a {effect} of {ret:.2f}x\n" + ) return result.getvalue() diff --git a/bench_runner/plot.py b/bench_runner/plot.py index ef22d68d..05c8f0b9 100644 --- a/bench_runner/plot.py +++ b/bench_runner/plot.py @@ -142,7 +142,9 @@ def plot_diff( names.append("ALL") axs.set_yticks(np.arange(len(names)) + 1, names) axs.set_ylim(0, len(names) + 1) - axs.tick_params(axis="x", bottom=True, top=True, labelbottom=True, labeltop=True) + axs.tick_params( + axis="x", bottom=True, top=True, labelbottom=True, labeltop=True + ) axs.xaxis.set_major_formatter(formatter) xlim = axs.get_xlim() if xlim[0] > 0.75 and xlim[1] < 1.25: @@ -226,7 +228,9 @@ class Subplot: def __post_init__(self): if not util.valid_version(self.base): - raise RuntimeError(f"Invalid base '{self.base}' in `longitudinal_plot`") + raise RuntimeError( + f"Invalid base '{self.base}' in `longitudinal_plot`" + ) if ( not util.valid_version(self.version) or len(self.version.split(".")) != 2 @@ -248,9 +252,9 @@ def __post_init__(self): def longitudinal_plot( results: Iterable[result.Result], output_filename: PathLike, - getter: Callable[ - [result.BenchmarkComparison], float | None - ] = lambda r: r.geometric_mean_float, + getter: Callable[[result.BenchmarkComparison], float | None] = lambda r: ( + r.geometric_mean_float + ), differences: tuple[str, str] = ("slower", "faster"), title="Performance improvement by configuration", ): @@ -297,7 +301,9 @@ def get_comparison_value(ref, r, base): for subcfg, ax in zip(all_cfg, axs): version = [int(x) for x in subcfg.version.split(".")] ver_results = [ - r for r in results if list(r.parsed_version.release[0:2]) == version + r + for r in results + if list(r.parsed_version.release[0:2]) == version ] if subcfg.runners: cfg_runners = [r for r in runners if r.nickname in subcfg.runners] @@ -333,14 +339,17 @@ def get_comparison_value(ref, r, base): continue runner_results.sort( - key=lambda x: datetime.datetime.fromisoformat(x.commit_datetime) + key=lambda x: datetime.datetime.fromisoformat( + x.commit_datetime + ) ) dates = [ datetime.datetime.fromisoformat(x.commit_datetime) for x in runner_results ] changes = [ - get_comparison_value(ref, r, subcfg.base) for r in runner_results + get_comparison_value(ref, r, subcfg.base) + for r in runner_results ] if any(x is not None for x in changes): @@ -359,7 +368,9 @@ def get_comparison_value(ref, r, base): annotations = set() for r, date, change in zip(runner_results, dates, changes): micro = get_micro_version(r.version) - if micro not in annotations and not r.version.endswith("+"): + if micro not in annotations and not r.version.endswith( + "+" + ): annotations.add(micro) text = ax.annotate( micro, @@ -368,7 +379,9 @@ def get_comparison_value(ref, r, base): xytext=(-3, 15), textcoords="offset points", rotation=90, - arrowprops=dict(arrowstyle="-", connectionstyle="arc"), + arrowprops=dict( + arrowstyle="-", connectionstyle="arc" + ), ) text.set_color("#888") text.set_size(8) @@ -442,9 +455,9 @@ def __post_init__(self): def flag_effect_plot( results: Iterable[result.Result], output_filename: PathLike, - getter: Callable[ - [result.BenchmarkComparison], float | None - ] = lambda r: r.geometric_mean_float, + getter: Callable[[result.BenchmarkComparison], float | None] = lambda r: ( + r.geometric_mean_float + ), differences: tuple[str, str] = ("slower", "faster"), title="Performance improvement by configuration", ): @@ -465,7 +478,9 @@ def get_comparison_value(ref, r, force_valid): return data[key] else: value = getter( - result.BenchmarkComparison(ref, r, "default", force_valid=force_valid) + result.BenchmarkComparison( + ref, r, "default", force_valid=force_valid + ) ) data[key] = value return value @@ -496,11 +511,10 @@ def get_comparison_value(ref, r, force_valid): ] = r for subplot, ax in zip(subplots, axs): - ax.set_title(f"Effect of {subplot.name}") version = tuple(int(x) for x in subplot.version.split(".")) assert len(version) == 2, ( - "Version config in {subplot.name}" " should only be major.minor" + "Version config in {subplot.name} should only be major.minor" ) for runner in cfg.runners.values(): @@ -573,7 +587,10 @@ def __post_init__(self): raise RuntimeError( f"Invalid base '{self.base}' in `benchmark_longitudinal_plot`" ) - if not util.valid_version(self.version) or len(self.version.split(".")) != 2: + if ( + not util.valid_version(self.version) + or len(self.version.split(".")) != 2 + ): raise RuntimeError( f"Invalid version '{self.version}' in `benchmark_longitudinal_plot`" ) @@ -599,7 +616,9 @@ def benchmark_longitudinal_plot( else: cache = {} - results = [r for r in results if r.fork == "python" and r.nickname in cfg.runners] + results = [ + r for r in results if r.fork == "python" and r.nickname in cfg.runners + ] base = None for r in results: @@ -638,7 +657,9 @@ def benchmark_longitudinal_plot( # Exclude any benchmarks where we don't have enough data to make a # meaningful plot by_benchmark = { - k: v for k, v in by_benchmark.items() if any(len(x) > 2 for x in v.values()) + k: v + for k, v in by_benchmark.items() + if any(len(x) > 2 for x in v.values()) } fig, axs = plt.subplots( @@ -650,7 +671,9 @@ def benchmark_longitudinal_plot( if len(by_benchmark) == 1: axs = [axs] - plt.suptitle(f"Performance change by benchmark on {cfg.version} vs. {cfg.base}") + plt.suptitle( + f"Performance change by benchmark on {cfg.version} vs. {cfg.base}" + ) first = True for (benchmark, runners), ax in zip(sorted(by_benchmark.items()), axs): diff --git a/bench_runner/result.py b/bench_runner/result.py index f561126a..510a52bc 100644 --- a/bench_runner/result.py +++ b/bench_runner/result.py @@ -68,7 +68,8 @@ def _get_platform_value(python: PathLike, item: str) -> str: Get a value from the platform module of the given Python interpreter. """ output = subprocess.check_output( - [python, "-c", f"import platform; print(platform.{item}())"], encoding="utf-8" + [python, "-c", f"import platform; print(platform.{item}())"], + encoding="utf-8", ) return output.strip().lower() @@ -77,13 +78,19 @@ def _get_architecture(python: PathLike) -> str: machine = _get_platform_value(python, "machine") bits = eval(_get_platform_value(python, "architecture"))[0] if bits == "32bit": - return {"x86_64": "x86", "amd64": "x86", "arm64": "arm32"}.get(machine, machine) + return {"x86_64": "x86", "amd64": "x86", "arm64": "arm32"}.get( + machine, machine + ) return machine class Comparison: def __init__( - self, ref: "Result", head: "Result", base: str, force_valid: bool = False + self, + ref: "Result", + head: "Result", + base: str, + force_valid: bool = False, ): self.ref = ref self.head = head @@ -127,7 +134,9 @@ def _contents(self) -> str | None: return None if self.base_filename.with_suffix(".md").is_file(): - with self.base_filename.with_suffix(".md").open(encoding="utf-8") as fd: + with self.base_filename.with_suffix(".md").open( + encoding="utf-8" + ) as fd: return fd.read() else: return self._generate_contents() @@ -203,9 +212,13 @@ def remove_outliers(values, m=2): abs(values - np.mean(values)) < np.multiply(m, np.std(values)) ] - def calculate_diffs(ref_values, head_values) -> tuple[np.ndarray | None, float]: + def calculate_diffs( + ref_values, head_values + ) -> tuple[np.ndarray | None, float]: if len(ref_values) > 3 and len(head_values) > 3: - sig, t_score = pyperf._utils.is_significant(ref_values, head_values) + sig, t_score = pyperf._utils.is_significant( + ref_values, head_values + ) if not sig: return None, 0.0 else: @@ -280,7 +293,7 @@ def _calculate_geometric_mean(self) -> str: elif gm > 1.0: return f"{gm:.03f}x faster" else: - return f"{1.0+(1.0-gm):.03f}x slower" + return f"{1.0 + (1.0 - gm):.03f}x slower" @property def geometric_mean(self) -> str: @@ -419,7 +432,10 @@ def write_pystats_diff(self, filename: PathLike) -> None: contents = subprocess.check_output( [ sys.executable, - Path("cpython") / "Tools" / "scripts" / "summarize_stats.py", + Path("cpython") + / "Tools" + / "scripts" + / "summarize_stats.py", self.ref.filename, self.head.filename, ], @@ -515,7 +531,9 @@ def from_arbitrary_filename(cls, filename: PathLike) -> "Result": fork="unknown", ref=filename.stem, version="unknown", - cpython_hash=content.get("metadata", {}).get("commit_id", "unknown"), + cpython_hash=content.get("metadata", {}).get( + "commit_id", "unknown" + ), extra=[], suffix=filename.suffix, flags=[], @@ -560,7 +578,9 @@ def filename(self) -> Path: flags = [] self._filename = ( Path("results") - / "-".join(["bm", date, self.version, self.cpython_hash, *flags]) + / "-".join( + ["bm", date, self.version, self.cpython_hash, *flags] + ) / ( "-".join( [ @@ -656,7 +676,9 @@ def is_windows(self) -> bool: return self.system == "windows" else: return ( - self.metadata.get("platform", "unknown").lower().startswith("windows") + self.metadata.get("platform", "unknown") + .lower() + .startswith("windows") ) @property @@ -679,7 +701,10 @@ def github_action_url(self) -> str | None: def hash_and_flags(self) -> str: # A representation for the user that combines the commit hash and other flags return " ".join( - [self.cpython_hash, *(f"({x})" for x in mflags.flags_to_human(self.flags))] + [ + self.cpython_hash, + *(f"({x})" for x in mflags.flags_to_human(self.flags)), + ] ) @functools.cached_property @@ -791,7 +816,9 @@ def has_result( def match_to_bases( - results: Iterable[Result], bases: Sequence[str] | None, progress: bool = True + results: Iterable[Result], + bases: Sequence[str] | None, + progress: bool = True, ): def find_match(result, candidates, base, func): # Try for an exact match (same benchmark_hash) first, @@ -863,7 +890,8 @@ def track(it, *_args, **_kwargs): candidates, "default_base_vs_" + flag, lambda ref: ( - _merge_base.startswith(ref.cpython_hash) and ref.flags == [] + _merge_base.startswith(ref.cpython_hash) + and ref.flags == [] ), ) found_base = found_base or found_default_base diff --git a/bench_runner/runners.py b/bench_runner/runners.py index 826a43fe..19c6d4d9 100644 --- a/bench_runner/runners.py +++ b/bench_runner/runners.py @@ -64,10 +64,14 @@ def display_name(self) -> str: return f"{self.os} {self.arch} ({self.nickname})" -unknown_runner = Runner("unknown", "unknown", "unknown", "unknown", False, {}, None) +unknown_runner = Runner( + "unknown", "unknown", "unknown", "unknown", False, {}, None +) -def get_runners_by_hostname(cfgpath: PathLike | None = None) -> dict[str, Runner]: +def get_runners_by_hostname( + cfgpath: PathLike | None = None, +) -> dict[str, Runner]: from . import config return {x.hostname: x for x in config.get_config(cfgpath).runners.values()} @@ -83,7 +87,9 @@ def get_nickname_for_hostname( return get_runner_for_hostname(hostname, cfgpath).nickname -def get_runner_by_nickname(nickname: str, cfgpath: PathLike | None = None) -> Runner: +def get_runner_by_nickname( + nickname: str, cfgpath: PathLike | None = None +) -> Runner: from . import config return config.get_config(cfgpath).runners.get(nickname, unknown_runner) diff --git a/bench_runner/scripts/backfill.py b/bench_runner/scripts/backfill.py index 4347ef2f..ec7cca2a 100644 --- a/bench_runner/scripts/backfill.py +++ b/bench_runner/scripts/backfill.py @@ -30,7 +30,9 @@ @functools.cache -def _get_hash_and_date(cpython: PathLike, ref: str) -> tuple[str, datetime.datetime]: +def _get_hash_and_date( + cpython: PathLike, ref: str +) -> tuple[str, datetime.datetime]: hash, date = git.get_log("%H %cI", cpython, ref).split() return hash, datetime.datetime.fromisoformat(date) @@ -82,7 +84,9 @@ def get_latest_with_prefix( commits = [] for tag in tags: if tag.startswith(prefix): - commits.append(Commit(cpython, tag, f"--latest-with-prefix {prefix}")) + commits.append( + Commit(cpython, tag, f"--latest-with-prefix {prefix}") + ) commits.sort(key=attrgetter("date")) if len(commits) > 0: @@ -127,7 +131,9 @@ def get_weekly_since(cpython: PathLike, start_date: str) -> Iterable[Commit]: def get_bisect(cpython: PathLike, refs: Sequence[str]) -> Iterable[Commit]: if len(refs) != 2: - raise ValueError(f"Each --bisect entry must contain 2 refs, got {len(refs)}") + raise ValueError( + f"Each --bisect entry must contain 2 refs, got {len(refs)}" + ) yield Commit( cpython, @@ -138,7 +144,9 @@ def get_bisect(cpython: PathLike, refs: Sequence[str]) -> Iterable[Commit]: def match_machine(a: str, b: str) -> bool: return ( - (a == "amd64" and b == "x86_64") or (a == "x86_64" and b == "amd64") or (a == b) + (a == "amd64" and b == "x86_64") + or (a == "x86_64" and b == "amd64") + or (a == b) ) @@ -167,8 +175,9 @@ def remove_existing( commit.runners = [] for runner in runners: for result in results: - if result.nickname == runner.nickname and commit.hash.startswith( - result.cpython_hash + if ( + result.nickname == runner.nickname + and commit.hash.startswith(result.cpython_hash) ): break else: @@ -213,7 +222,9 @@ def deduplicate_commits( yield first_commit else: yield Commit( - cpython, first_commit.ref, ", ".join(x.source for x in commit_set) + cpython, + first_commit.ref, + ", ".join(x.source for x in commit_set), ) @@ -248,7 +259,12 @@ def _main( tags = git.get_tags(cpython) commits = get_commits( - cpython, tags, all_with_prefix, latest_with_prefix, weekly_since, bisect + cpython, + tags, + all_with_prefix, + latest_with_prefix, + weekly_since, + bisect, ) commits = deduplicate_commits(cpython, commits) @@ -256,7 +272,9 @@ def _main( commits = sorted(commits, key=attrgetter("date")) - rich.print(f"runners: {', '.join(f'[blue]{x.nickname}[/blue]' for x in runners)}") + rich.print( + f"runners: {', '.join(f'[blue]{x.nickname}[/blue]' for x in runners)}" + ) rich.print() table = rich.table.Table(title="Benchmark runs") @@ -284,7 +302,9 @@ def _main( if runs == 0: return - if rich.prompt.Confirm.ask("Are you sure you want to run them all?", default=False): + if rich.prompt.Confirm.ask( + "Are you sure you want to run them all?", default=False + ): for commit in commits: if len(commit.runners) == len(all_runners): gh.benchmark(ref=commit.hash, machine="all") @@ -350,7 +370,9 @@ def main(): f"from {', '.join(flag.gha_variable for flag in mflags.FLAGS)}" ), ) - parser.add_argument("cpython", type=Path, help="The path to a checkout of CPython") + parser.add_argument( + "cpython", type=Path, help="The path to a checkout of CPython" + ) args = parser.parse_args() diff --git a/bench_runner/scripts/bisect.py b/bench_runner/scripts/bisect.py index d0dca21a..e680beb1 100644 --- a/bench_runner/scripts/bisect.py +++ b/bench_runner/scripts/bisect.py @@ -81,16 +81,25 @@ def get_result( # first time in case the *last* bisect run used pgo. subprocess.run(["make", "clean"], cwd=cpython) - workflow.compile_unix(cpython, parsed_flags, pgo, pystats is not None, reconfigure) + workflow.compile_unix( + cpython, parsed_flags, pgo, pystats is not None, reconfigure + ) if pystats is None: run_benchmarks.run_benchmarks(python, benchmark) return parse_timing_result(run_benchmarks.BENCHMARK_JSON) else: run_benchmarks.collect_pystats(python, benchmark) - summarize_stats_path = cpython / "Tools" / "scripts" / "summarize_stats.py" + summarize_stats_path = ( + cpython / "Tools" / "scripts" / "summarize_stats.py" + ) subprocess.check_output( - [str(python), str(summarize_stats_path), "--json-output", "pystats.json"] + [ + str(python), + str(summarize_stats_path), + "--json-output", + "pystats.json", + ] ) with open("pystats.json", "r") as f: contents = json.load(f) @@ -140,7 +149,10 @@ def _main( if not cpython.is_dir(): git.clone( - cpython, "https://github.com/python/cpython.git", branch="main", depth=None + cpython, + "https://github.com/python/cpython.git", + branch="main", + depth=None, ) git.checkout(cpython, good) @@ -287,7 +299,9 @@ def main(): # The confidence is 0.0 at the mid-point, 1.0 at the good and bad values, # and > 1.0 outside of that. - confidence = abs((result - mid_point) / ((args.bad_val - args.good_val) / 2.0)) + confidence = abs( + (result - mid_point) / ((args.bad_val - args.good_val) / 2.0) + ) with contextlib.chdir(repo): if im * result > im * mid_point: diff --git a/bench_runner/scripts/compare.py b/bench_runner/scripts/compare.py index f5f490ad..0c26ba48 100644 --- a/bench_runner/scripts/compare.py +++ b/bench_runner/scripts/compare.py @@ -61,7 +61,7 @@ def compare_pair( output_dir = Path(output_dir) rich.print( - f"Comparing {counter[0]+1: 2}/{counter[1]: 2}: {head_name} vs. {ref_name}" + f"Comparing {counter[0] + 1: 2}/{counter[1]: 2}: {head_name} vs. {ref_name}" ) counter[0] += 1 @@ -75,7 +75,9 @@ def compare_pair( for func, suffix, file_type in comparison.get_files(): output_filename = util.apply_suffix(output_dir / name, suffix) func(output_filename) - entry.append(f"[{util.TYPE_TO_ICON[file_type]}]({output_filename.name})") + entry.append( + f"[{util.TYPE_TO_ICON[file_type]}]({output_filename.name})" + ) return "".join(entry) @@ -94,7 +96,9 @@ def get_first_result_for_machine( results: Iterable[mod_result.Result], machine: str | None ) -> mod_result.Result: return next( - result for result in results if machine is None or result.nickname == machine + result + for result in results + if machine is None or result.nickname == machine ) @@ -112,7 +116,13 @@ def do_one_to_many( for hash, _, name, results in parsed_commits[1:]: result = get_first_result_for_machine(results, machine) link = compare_pair( - output_dir, machine, first_name, first_result, name, result, counter + output_dir, + machine, + first_name, + first_result, + name, + result, + counter, ) write_row(fd, [name_and_hash(name, hash), link]) @@ -135,7 +145,13 @@ def do_many_to_many( else: result2 = get_first_result_for_machine(results2, machine) link = compare_pair( - output_dir, machine, name1, result1, name2, result2, counter + output_dir, + machine, + name1, + result1, + name2, + result2, + counter, ) columns.append(link) write_row(fd, columns) @@ -144,7 +160,9 @@ def do_many_to_many( def _main_with_hashes( - commits: Sequence[str], output_dir: Path, comparison_type: Literal["1:n", "n:n"] + commits: Sequence[str], + output_dir: Path, + comparison_type: Literal["1:n", "n:n"], ): cfg = config.get_config() @@ -161,7 +179,8 @@ def _main_with_hashes( subresults = [ result for result in results - if result.cpython_hash.startswith(commit_hash) and result.flags == flags + if result.cpython_hash.startswith(commit_hash) + and result.flags == flags ] if len(subresults) == 0: @@ -185,7 +204,9 @@ def _main_with_hashes( total = (len(parsed_commits) - 1) * len(machines) func = do_one_to_many case "n:n": - total = ((len(parsed_commits) ** 2) - len(parsed_commits)) * len(machines) + total = ((len(parsed_commits) ** 2) - len(parsed_commits)) * len( + machines + ) func = do_many_to_many case _: raise ValueError(f"Unknown comparison type {comparison_type}") @@ -199,7 +220,9 @@ def _main_with_hashes( rich.print() -def _main_with_files(commits: Sequence[str], output_dir: Path, comparison_type: str): +def _main_with_files( + commits: Sequence[str], output_dir: Path, comparison_type: str +): parsed_results = [] for commit in commits: if "," in commit: @@ -209,7 +232,12 @@ def _main_with_files(commits: Sequence[str], output_dir: Path, comparison_type: commit_path = Path(commit) name = commit_path.stem parsed_results.append( - (name, [], name, [mod_result.Result.from_arbitrary_filename(commit_path)]) + ( + name, + [], + name, + [mod_result.Result.from_arbitrary_filename(commit_path)], + ) ) match comparison_type: @@ -231,7 +259,9 @@ def _main_with_files(commits: Sequence[str], output_dir: Path, comparison_type: def _main( - commits: Sequence[str], output_dir: PathLike, comparison_type: Literal["1:n", "n:n"] + commits: Sequence[str], + output_dir: PathLike, + comparison_type: Literal["1:n", "n:n"], ): if len(commits) < 2: raise ValueError("Must provide at least 2 commits") diff --git a/bench_runner/scripts/find_failures.py b/bench_runner/scripts/find_failures.py index 77ae4738..475f1975 100644 --- a/bench_runner/scripts/find_failures.py +++ b/bench_runner/scripts/find_failures.py @@ -12,7 +12,9 @@ from bench_runner import table -Failures: TypeAlias = Mapping[str, Mapping[str, Mapping[str, tuple[str, list[str]]]]] +Failures: TypeAlias = Mapping[ + str, Mapping[str, Mapping[str, tuple[str, list[str]]]] +] def iter_lines(log: str) -> Iterator[tuple[str, str, str]]: @@ -24,7 +26,9 @@ def iter_lines(log: str) -> Iterator[tuple[str, str, str]]: if " / " in parts[0]: config, machine = parts[0].split(" / ") if machine.startswith("benchmark-"): - machine = machine[machine.find("-") + 1 : machine.rfind("-")] + machine = machine[ + machine.find("-") + 1 : machine.rfind("-") + ] config = config.split("-")[-1] if config == "weekly": config = "default" @@ -42,16 +46,24 @@ def iter_configs(configs: Mapping[str, Any]) -> Iterator[tuple[str, Any]]: def get_last_weekly_run_id() -> str: output = subprocess.check_output( - ["gh", "run", "list", "--workflow", "_weekly.yml", "--json", "databaseId"] + [ + "gh", + "run", + "list", + "--workflow", + "_weekly.yml", + "--json", + "databaseId", + ] ) content = json.loads(output) return content[0]["databaseId"] def get_log(run_id: str) -> str: - return subprocess.check_output(["gh", "run", "view", "--log", str(run_id)]).decode( - "utf-8" - ) + return subprocess.check_output( + ["gh", "run", "view", "--log", str(run_id)] + ).decode("utf-8") def parse_log(content: str) -> Failures: @@ -63,7 +75,9 @@ def parse_log(content: str) -> Failures: current_benchmark_build = None current_benchmark_run = None for machine, config, line in iter: - if match := re.match(r"\(.+\) creating venv for benchmark \((.+)\)", line): + if match := re.match( + r"\(.+\) creating venv for benchmark \((.+)\)", line + ): current_benchmark_build = match.groups()[0] collected_lines = [] elif line.startswith("(benchmark will be skipped)"): @@ -75,7 +89,9 @@ def parse_log(content: str) -> Failures: elif match := re.match(r"\[.+\] (.+)\.\.\.", line): current_benchmark_run = match.groups()[0] collected_lines = [] - elif match := re.match(r"ERROR: Benchmark (.+) failed: Benchmark died", line): + elif match := re.match( + r"ERROR: Benchmark (.+) failed: Benchmark died", line + ): assert current_benchmark_run failures[current_benchmark_run][machine][config] = ( "run", diff --git a/bench_runner/scripts/generate_results.py b/bench_runner/scripts/generate_results.py index d2d07ddf..d1b1ed2d 100644 --- a/bench_runner/scripts/generate_results.py +++ b/bench_runner/scripts/generate_results.py @@ -27,7 +27,9 @@ from bench_runner.util import PathLike -def _tuple_to_nested_dicts(entries: Iterable[tuple], d: dict | None = None) -> dict: +def _tuple_to_nested_dicts( + entries: Iterable[tuple], d: dict | None = None +) -> dict: def recurse(entry: tuple, d: dict): if len(entry) == 2: d.setdefault(entry[0], []) @@ -46,7 +48,9 @@ def recurse(entry: tuple, d: dict): return d -def save_generated_results(results: Iterable[Result], force: bool = False) -> None: +def save_generated_results( + results: Iterable[Result], force: bool = False +) -> None: """ Write out the comparison tables and plots for every result. @@ -77,14 +81,19 @@ def save_generated_results(results: Iterable[Result], force: bool = False) -> No def output_results_index( - fd: TextIO, bases: Iterable[str], results: Iterable[Result], filename: PathLike + fd: TextIO, + bases: Iterable[str], + results: Iterable[Result], + filename: PathLike, ): """ Outputs a results index table. """ bases = [*bases, "base"] - head = ["date", "fork/ref", "hash/flags"] + [f"vs. {base}:" for base in bases] + head = ["date", "fork/ref", "hash/flags"] + [ + f"vs. {base}:" for base in bases + ] rows = [] for result in results: @@ -97,7 +106,11 @@ def output_results_index( entry.append( table.md_link( util.TYPE_TO_ICON[file_type], - str(util.apply_suffix(compare.base_filename, suffix)), + str( + util.apply_suffix( + compare.base_filename, suffix + ) + ), filename, ) ) @@ -161,7 +174,9 @@ def summarize_results( """ results = list(results) new_results = [] - earliest = (datetime.date.today() - datetime.timedelta(days=days)).isoformat() + earliest = ( + datetime.date.today() - datetime.timedelta(days=days) + ).isoformat() for i, result in enumerate(results): if i < n_recent or result.run_date >= earliest: new_results.append(result) @@ -241,10 +256,14 @@ def generate_indices( results_file = repo_dir / "RESULTS.md" if not results_file.is_file(): results_file = repo_dir / "results" / "README.md" - generate_index(results_file, bases, all_results, benchmarking_results, False) + generate_index( + results_file, bases, all_results, benchmarking_results, False + ) -def find_different_benchmarks(head: Result, ref: Result) -> tuple[list[str], list[str]]: +def find_different_benchmarks( + head: Result, ref: Result +) -> tuple[list[str], list[str]]: head_benchmarks = head.benchmark_names base_benchmarks = ref.benchmark_names return ( @@ -264,14 +283,21 @@ def get_directory_indices_entries( dirpaths.add(dirpath) refs[dirpath].add(result.ref) entries.append( - (dirpath, None, None, f"fork: {unquote(result.fork)}/{unquote(result.ref)}") + ( + dirpath, + None, + None, + f"fork: {unquote(result.fork)}/{unquote(result.ref)}", + ) ) entries.append((dirpath, None, None, f"version: {result.version}")) config = ",".join(mflags.flags_to_human(result.flags)) entries.append((dirpath, None, None, f"config: {config}")) link = table.link_to_hash(result.cpython_hash, result.fork) entries.append((dirpath, None, None, f"commit hash: {link}")) - entries.append((dirpath, None, None, f"commit date: {result.commit_datetime}")) + entries.append( + (dirpath, None, None, f"commit date: {result.commit_datetime}") + ) if result.commit_merge_base is not None: link = table.link_to_hash(result.commit_merge_base, "python") entries.append((dirpath, None, None, f"commit merge base: {link}")) @@ -280,14 +306,25 @@ def get_directory_indices_entries( entries.append((dirpath, result.runner, None, link)) entries.append( - (dirpath, result.runner, None, f"cpu model: {result.cpu_model_name}") + ( + dirpath, + result.runner, + None, + f"cpu model: {result.cpu_model_name}", + ) + ) + entries.append( + (dirpath, result.runner, None, f"platform: {result.platform}") ) - entries.append((dirpath, result.runner, None, f"platform: {result.platform}")) if result.result_info[0] == "raw results": for base, compare in result.bases.items(): - entries.append((dirpath, result.runner, base, compare.long_summary)) - entries.append((dirpath, result.runner, base, compare.memory_summary)) + entries.append( + (dirpath, result.runner, base, compare.long_summary) + ) + entries.append( + (dirpath, result.runner, base, compare.memory_summary) + ) missing_benchmarks, new_benchmarks = find_different_benchmarks( result, compare.ref ) @@ -329,7 +366,8 @@ def get_directory_indices_entries( result.runner, base, table.md_link( - util.TYPE_TO_ICON.get(type, "") + type, result.filename.name + util.TYPE_TO_ICON.get(type, "") + type, + result.filename.name, ), ) ) @@ -351,7 +389,9 @@ def generate_directory_indices(results: Iterable[Result]) -> None: entries = get_directory_indices_entries(results) structure = _tuple_to_nested_dicts(entries) - for dirpath, dirresults in util.track(structure.items(), "Generating indices"): + for dirpath, dirresults in util.track( + structure.items(), "Generating indices" + ): with (dirpath / "README.md").open("w") as fd: fd.write("# Results\n\n") table.write_md_list(fd, dirresults[None][None]) @@ -372,7 +412,9 @@ def filter_broken_memory_results(results): return [r for r in results if r.nickname != "darwin"] -def _main(repo_dir: PathLike, force: bool = False, bases: Sequence[str] | None = None): +def _main( + repo_dir: PathLike, force: bool = False, bases: Sequence[str] | None = None +): repo_dir = Path(repo_dir) results_dir = repo_dir / "results" if bases is None: @@ -383,11 +425,15 @@ def _main(repo_dir: PathLike, force: bool = False, bases: Sequence[str] | None = results = load_all_results(bases, results_dir) rich.print(f"Found {len(results)} results") save_generated_results(results, force=force) - benchmarking_results = [r for r in results if r.result_info[0] == "raw results"] + benchmarking_results = [ + r for r in results if r.result_info[0] == "raw results" + ] generate_indices(bases, results, benchmarking_results, repo_dir) generate_directory_indices(benchmarking_results) - memory_benchmarking_results = filter_broken_memory_results(benchmarking_results) + memory_benchmarking_results = filter_broken_memory_results( + benchmarking_results + ) for plot_func, args, kwargs in util.track( [ diff --git a/bench_runner/scripts/install.py b/bench_runner/scripts/install.py index 3e78e7f7..2c1fa632 100644 --- a/bench_runner/scripts/install.py +++ b/bench_runner/scripts/install.py @@ -41,7 +41,9 @@ def fail_check(dst: PathLike): sys.exit(1) -def write_and_check(dst: PathLike, writer: Callable[[TextIO], None], check: bool): +def write_and_check( + dst: PathLike, writer: Callable[[TextIO], None], check: bool +): """ Call `writer` with a file descriptor to write the contents to `dst`. @@ -134,7 +136,9 @@ def add_flag_env(jobs: dict[str, Any]): job["env"]["flags"] = flag_value for step in job["steps"]: if "run" in step: - step["run"] = step["run"].replace("${{ env.flags }}", flag_value) + step["run"] = step["run"].replace( + "${{ env.flags }}", flag_value + ) def generate__benchmark(src: Any) -> Any: @@ -170,7 +174,8 @@ def generate__benchmark(src: Any) -> Any: "name": "Setup environment", "run": LiteralScalarString( "\n".join( - f'echo "{key}={val}" >> {github_env}' for key, val in vars.items() + f'echo "{key}={val}" >> {github_env}' + for key, val in vars.items() ) ), } @@ -190,7 +195,9 @@ def generate__benchmark(src: Any) -> Any: add_flag_env(dst["jobs"]) - dst["on"]["workflow_dispatch"]["inputs"]["machine"]["options"] = runner_choices + dst["on"]["workflow_dispatch"]["inputs"]["machine"]["options"] = ( + runner_choices + ) add_flag_variables(dst["on"]["workflow_dispatch"]["inputs"]) add_flag_variables(dst["on"]["workflow_call"]["inputs"]) @@ -207,9 +214,15 @@ def generate_benchmark(dst: Any) -> Any: """ cfg = config.get_config() available_runners = [r for r in cfg.runners.values() if r.available] - runner_choices = [*[x.name for x in available_runners], "all", "__really_all"] - - dst["on"]["workflow_dispatch"]["inputs"]["machine"]["options"] = runner_choices + runner_choices = [ + *[x.name for x in available_runners], + "all", + "__really_all", + ] + + dst["on"]["workflow_dispatch"]["inputs"]["machine"]["options"] = ( + runner_choices + ) add_flag_variables(dst["on"]["workflow_dispatch"]["inputs"]) add_flag_env(dst["jobs"]) @@ -220,9 +233,9 @@ def generate_benchmark(dst: Any) -> Any: continue if "with" in job: for flag in flags.FLAGS: - job["with"][ - flag.gha_variable - ] = f"${{{{ inputs.{flag.gha_variable} }}}}" + job["with"][flag.gha_variable] = ( + f"${{{{ inputs.{flag.gha_variable} }}}}" + ) # Include all of the flags in the human-readable workflow "run name" dst["run-name"] += " " + " ".join( diff --git a/bench_runner/scripts/notify.py b/bench_runner/scripts/notify.py index 764e0eb8..c6cbc1d1 100644 --- a/bench_runner/scripts/notify.py +++ b/bench_runner/scripts/notify.py @@ -18,7 +18,13 @@ def generate_dirname( else: flag_string = [] return "-".join( - ["bm", date[:10].replace("-", ""), version, cpython_hash[:7], *flag_string] + [ + "bm", + date[:10].replace("-", ""), + version, + cpython_hash[:7], + *flag_string, + ] ) @@ -29,12 +35,17 @@ def _main( actor = os.environ.get("GITHUB_ACTOR", "UNKNOWN") github_repo = os.environ.get("GITHUB_REPOSITORY", "UNKNOWN") - lines = ["🤖 This is the friendly benchmarking bot with some new results!", ""] + lines = [ + "🤖 This is the friendly benchmarking bot with some new results!", + "", + ] line = f"@{actor}: [{fork}/{ref}]" skip_publish = config.get_config().publish_mirror.skip if skip_publish: - line += f"(https://github.com/{github_repo}/tree/main/results/{dirname})" + line += ( + f"(https://github.com/{github_repo}/tree/main/results/{dirname})" + ) else: line += f"(https://github.com/{github_repo}-public/tree/main/results/{dirname})" print(f"::notice ::{line}") diff --git a/bench_runner/scripts/profiling_plot.py b/bench_runner/scripts/profiling_plot.py index c095dbb9..277e0dc6 100644 --- a/bench_runner/scripts/profiling_plot.py +++ b/bench_runner/scripts/profiling_plot.py @@ -352,7 +352,9 @@ def handle_benchmark( scaled_time = self_time * scale if scaled_time >= 0.0025: - md.write(f"| {scaled_time:.2%} | `{obj}` | `{sym}` | {category} |\n") + md.write( + f"| {scaled_time:.2%} | `{obj}` | `{sym}` | {category} |\n" + ) return total @@ -362,7 +364,9 @@ def plot_bargraph( categories: list[tuple[float, str]], output_filename: PathLike, ): - fig, ax = plt.subplots(figsize=(8, len(results) * 0.3), layout="constrained") + fig, ax = plt.subplots( + figsize=(8, len(results) * 0.3), layout="constrained" + ) bottom = np.zeros(len(results)) names = list(results.keys())[::-1] @@ -389,7 +393,14 @@ def plot_bargraph( bottom += values values = 1.0 - bottom - ax.barh(names, values, 0.5, label="(other functions)", left=bottom, color="#ddd") + ax.barh( + names, + values, + 0.5, + label="(other functions)", + left=bottom, + color="#ddd", + ) ax.set_xlabel("percentage time") ax.legend(bbox_to_anchor=(1.05, 1.0), loc="upper left") @@ -403,7 +414,8 @@ def plot_pie(categories: list[tuple[float, str]], output_filename: PathLike): values = [x[0] for x in categories] den = sum(values) labels = [ - i < 10 and f"{x[1]} {x[0] / den:.2%}" or "" for i, x in enumerate(categories) + i < 10 and f"{x[1]} {x[0] / den:.2%}" or "" + for i, x in enumerate(categories) ] colors = [get_color_and_hatch(cat[1])[0] for cat in categories] hatches = [get_color_and_hatch(cat[1])[1] for cat in categories] @@ -418,7 +430,11 @@ def plot_pie(categories: list[tuple[float, str]], output_filename: PathLike): hatches.append("") ax.pie( - values, labels=labels, colors=colors, hatch=hatches, textprops={"fontsize": 6} + values, + labels=labels, + colors=colors, + hatch=hatches, + textprops={"fontsize": 6}, ) fig.savefig(output_filename, dpi=200) @@ -518,7 +534,9 @@ def _main(input_dir: PathLike, output_prefix: PathLike): break md.write(f"| {self_fraction:.2%} | {obj} | {sym} |\n") - plot_bargraph(results, sorted_categories, output_prefix.with_suffix(".svg")) + plot_bargraph( + results, sorted_categories, output_prefix.with_suffix(".svg") + ) plot_pie(sorted_categories, output_prefix.with_suffix(".pie.svg")) handle_tail_call_stats(input_dir, categories, output_prefix) diff --git a/bench_runner/scripts/purge.py b/bench_runner/scripts/purge.py index d623b30a..3af5ff02 100644 --- a/bench_runner/scripts/purge.py +++ b/bench_runner/scripts/purge.py @@ -16,7 +16,10 @@ import rich_argparse -from bench_runner.bases import get_bases, get_minimum_version_for_all_comparisons +from bench_runner.bases import ( + get_bases, + get_minimum_version_for_all_comparisons, +) from bench_runner.result import load_all_results from bench_runner.scripts.generate_results import _main as generate_results from bench_runner.util import PathLike @@ -32,7 +35,10 @@ def dir_size(path: PathLike) -> int: def _main( - repo_dir: PathLike, days: int, dry_run: bool, bases: Sequence[str] | None = None + repo_dir: PathLike, + days: int, + dry_run: bool, + bases: Sequence[str] | None = None, ): results_dir = Path(repo_dir) / "results" if bases is None: @@ -45,7 +51,9 @@ def _main( keep_dirs = set() remove_generated_files = set() - earliest = (datetime.date.today() - datetime.timedelta(days=days)).isoformat() + earliest = ( + datetime.date.today() - datetime.timedelta(days=days) + ).isoformat() for result in rich.progress.track( results, description="Selecting results to remove" @@ -63,7 +71,9 @@ def _main( and result.parsed_version.release[0:2] < get_minimum_version_for_all_comparisons() ): - remove_generated_files.update(result.filename.parent.glob("*-vs-*")) + remove_generated_files.update( + result.filename.parent.glob("*-vs-*") + ) for filename in results_dir.glob("**/*"): if m := re.match(".*-vs-(.+?)(-.+)?$", filename.stem): @@ -85,7 +95,9 @@ def _main( if not dry_run: shutil.rmtree(d) - for f in rich.progress.track(remove_generated_files, "Removing comparison files"): + for f in rich.progress.track( + remove_generated_files, "Removing comparison files" + ): if f.is_file(): rich.print(f"Removing {f}") total += f.stat().st_size diff --git a/bench_runner/scripts/remove_benchmark.py b/bench_runner/scripts/remove_benchmark.py index f607e02e..bd3e5ea0 100644 --- a/bench_runner/scripts/remove_benchmark.py +++ b/bench_runner/scripts/remove_benchmark.py @@ -42,7 +42,9 @@ def remove_benchmark( json.dump(data, fd, indent=2) -def _main(benchmarks: Sequence[str], keep_hash: Sequence[str], dry_run: bool = False): +def _main( + benchmarks: Sequence[str], keep_hash: Sequence[str], dry_run: bool = False +): rich.print(f"Removing benchmarks {', '.join(benchmarks)} from all results") keep_hash_set = set(keep_hash) @@ -52,15 +54,21 @@ def _main(benchmarks: Sequence[str], keep_hash: Sequence[str], dry_run: bool = F if Path("longitudinal.json").is_file(): Path("longitudinal.json").unlink() - for filename in util.track(list(Path("results").glob("**/*")), "Deleting results"): + for filename in util.track( + list(Path("results").glob("**/*")), "Deleting results" + ): if filename.is_dir(): continue if filename.name != "README.md": res = result.Result.from_filename(filename) if res.result_info[0] == "raw results": - remove_benchmark(filename, benchmarks_set, keep_hash_set, dry_run) + remove_benchmark( + filename, benchmarks_set, keep_hash_set, dry_run + ) - rich.print("Regenerating all derived results. This will take quite some time...") + rich.print( + "Regenerating all derived results. This will take quite some time..." + ) if not dry_run: generate_results._main(Path(), force=True) @@ -78,7 +86,9 @@ def main(): help="Benchmark to remove", ) parser.add_argument( - "--keep-hash", action="append", help="The benchmark hash(es) to leave alone" + "--keep-hash", + action="append", + help="The benchmark hash(es) to leave alone", ) parser.add_argument( "--dry-run", diff --git a/bench_runner/scripts/run_benchmarks.py b/bench_runner/scripts/run_benchmarks.py index 36ab6025..542005a8 100644 --- a/bench_runner/scripts/run_benchmarks.py +++ b/bench_runner/scripts/run_benchmarks.py @@ -61,7 +61,11 @@ def get_benchmark_names(benchmarks: str) -> list[str]: encoding="utf-8", ) - return [line[2:].strip() for line in output.splitlines() if line.startswith("- ")] + return [ + line[2:].strip() + for line in output.splitlines() + if line.startswith("- ") + ] def run_benchmarks( @@ -168,7 +172,9 @@ def collect_pystats( pass else: if individual and fork is not None and ref is not None: - run_summarize_stats(python, fork, ref, benchmark, flags=flags) + run_summarize_stats( + python, fork, ref, benchmark, flags=flags + ) for filename in pystats_dir.iterdir(): os.rename(filename, Path(tempdir) / filename.name) @@ -182,7 +188,9 @@ def collect_pystats( benchmark_links = [] if fork is not None and ref is not None: - run_summarize_stats(python, fork, ref, "all", benchmark_links, flags=flags) + run_summarize_stats( + python, fork, ref, "all", benchmark_links, flags=flags + ) def get_perf_lines(files: Iterable[PathLike]) -> Iterable[str]: @@ -256,7 +264,10 @@ def collect_perf(python: PathLike, benchmarks: str): PROFILING_RESULTS / f"{benchmark}.perf.csv", ) else: - print(f"No perf.data files generated for {benchmark}", file=sys.stderr) + print( + f"No perf.data files generated for {benchmark}", + file=sys.stderr, + ) for filename in Path(".").glob(perf_data_glob): filename.unlink() @@ -293,7 +304,11 @@ def update_metadata( def copy_to_directory( - filename: PathLike, python: PathLike, fork: str, ref: str, flags: Iterable[str] + filename: PathLike, + python: PathLike, + fork: str, + ref: str, + flags: Iterable[str], ) -> None: result = Result.from_scratch(python, fork, ref, flags=flags) result.filename.parent.mkdir(parents=True, exist_ok=True) @@ -337,8 +352,8 @@ def run_summarize_stats( - benchmark: {benchmark} - fork: {fork} - ref: {ref} - - commit hash: {git.get_git_hash(Path('cpython'))[:7]} - - commit date: {git.get_git_commit_date(Path('cpython'))} + - commit hash: {git.get_git_hash(Path("cpython"))[:7]} + - commit date: {git.get_git_commit_date(Path("cpython"))} """ ) @@ -351,7 +366,8 @@ def run_summarize_stats( fd.write( md_link( name, - str(Path(result.filename.name).with_suffix("")) + f"-{name}.md", + str(Path(result.filename.name).with_suffix("")) + + f"-{name}.md", ) ) fd.write(", ") @@ -371,7 +387,10 @@ def select_benchmarks(benchmarks: str): cfg = config.get_config() if benchmarks == "all": return ",".join( - ["all", *[f"-{x}" for x in cfg.benchmarks.excluded_benchmarks if x]] + [ + "all", + *[f"-{x}" for x in cfg.benchmarks.excluded_benchmarks if x], + ] ) elif benchmarks == "all_and_excluded": return "all" @@ -415,7 +434,9 @@ def main(): formatter_class=rich_argparse.ArgumentDefaultsRichHelpFormatter, ) parser.add_argument( - "mode", choices=["benchmark", "perf", "pystats"], help="The mode of execution" + "mode", + choices=["benchmark", "perf", "pystats"], + help="The mode of execution", ) parser.add_argument("python", help="The path to the Python executable") parser.add_argument("fork", help="The fork of CPython") @@ -427,7 +448,9 @@ def main(): action="store_true", help="Run in a special mode for unit testing", ) - parser.add_argument("--run_id", default=None, type=str, help="The github run id") + parser.add_argument( + "--run_id", default=None, type=str, help="The github run id" + ) parser.add_argument( "--individual", action="store_true", @@ -469,6 +492,8 @@ def dummy(*args, **kwargs): init() except Exception: - sys.stderr.write("pytest-cov: Failed to setup subprocess coverage.") + sys.stderr.write( + "pytest-cov: Failed to setup subprocess coverage." + ) main() diff --git a/bench_runner/scripts/synthesize_loops_file.py b/bench_runner/scripts/synthesize_loops_file.py index c52c641e..2abadb64 100644 --- a/bench_runner/scripts/synthesize_loops_file.py +++ b/bench_runner/scripts/synthesize_loops_file.py @@ -242,7 +242,10 @@ def main(): ) group = parser.add_mutually_exclusive_group(required=False) group.add_argument( - "-u", "--update", action="store_true", help="add to existing loops file" + "-u", + "--update", + action="store_true", + help="add to existing loops file", ) group.add_argument( "-f", "--overwrite", action="store_true", help="replace loops file" @@ -254,7 +257,9 @@ def main(): default="max", help="how to merge multiple runs", ) - parser.add_argument("results", nargs="+", help="benchmark results to parse") + parser.add_argument( + "results", nargs="+", help="benchmark results to parse" + ) args = parser.parse_args() _main( diff --git a/bench_runner/scripts/workflow.py b/bench_runner/scripts/workflow.py index 92728c04..66a12173 100644 --- a/bench_runner/scripts/workflow.py +++ b/bench_runner/scripts/workflow.py @@ -90,7 +90,10 @@ def should_run( # totally inscrutable. print("The checkout of cpython failed.", file=sys.stderr) print(f"You specified fork {fork!r} and ref {ref!r}.", file=sys.stderr) - print("Are you sure you entered the fork and ref correctly?", file=sys.stderr) + print( + "Are you sure you entered the fork and ref correctly?", + file=sys.stderr, + ) # Fail the rest of the workflow sys.exit(1) @@ -111,13 +114,17 @@ def should_run( git.remove(results_dir.parent, filepath) should_run = True else: - should_run = (machine in ("__really_all", "all")) or found_result is None + should_run = ( + machine in ("__really_all", "all") + ) or found_result is None return should_run def checkout_cpython(fork: str, ref: str, cpython: PathLike = Path("cpython")): - git.clone(cpython, f"https://github.com/{fork}/cpython.git", branch=ref, depth=50) + git.clone( + cpython, f"https://github.com/{fork}/cpython.git", branch=ref, depth=50 + ) def checkout_benchmarks(): @@ -208,7 +215,9 @@ def compile_windows( *args, ], ) - shutil.copytree(get_windows_build_dir(force_32bit), "libs", dirs_exist_ok=True) + shutil.copytree( + get_windows_build_dir(force_32bit), "libs", dirs_exist_ok=True + ) def clear_pip_cache(venv: PathLike) -> None: @@ -301,7 +310,9 @@ def _main( # Print out the version of Python we built just so we can confirm it's the # right thing in the logs - subprocess.check_call([get_exe_path(cpython, flags, force_32bit), "-VV"]) + subprocess.check_call( + [get_exe_path(cpython, flags, force_32bit), "-VV"] + ) clear_pip_cache(venv) @@ -382,7 +393,9 @@ def main(): dest="force_32bit", help="Do a 32-bit build (Windows only)", ) - parser.add_argument("--run_id", default=None, type=str, help="The github run id") + parser.add_argument( + "--run_id", default=None, type=str, help="The github run id" + ) parser.add_argument( "--_fast", action="store_true", help="Use fast mode, for testing" ) diff --git a/bench_runner/table.py b/bench_runner/table.py index cc04487f..6c9e7a58 100644 --- a/bench_runner/table.py +++ b/bench_runner/table.py @@ -18,7 +18,7 @@ def output_table( """ def output_row(row): - fd.write(f'| {" | ".join(row)} |\n') + fd.write(f"| {' | '.join(row)} |\n") output_row(head) output_row(col.endswith(":") and "---:" or "---" for col in head) @@ -61,7 +61,9 @@ def md_link(text: str, link: str, root: PathLike | None = None) -> str: Formats a Markdown link. The link is resolved relative to the given root. """ if root is not None: - link = str(Path(link).resolve().relative_to(Path(root).parent.resolve())) + link = str( + Path(link).resolve().relative_to(Path(root).parent.resolve()) + ) if not str(link).startswith("http"): link = "/".join(quote(x) for x in Path(link).parts) return f"[{text}]({link})" diff --git a/bench_runner/util.py b/bench_runner/util.py index 40e885d4..0001cb93 100644 --- a/bench_runner/util.py +++ b/bench_runner/util.py @@ -33,7 +33,9 @@ def has_any_element(iterable): Checks if an iterable (like a generator) has at least one element without consuming the original iterable more than necessary. """ - first, iterable = itertools.tee(iterable, 2) # Create two independent iterators + first, iterable = itertools.tee( + iterable, 2 + ) # Create two independent iterators try: next(first) # Try to get the first element return True # If successful, the generator is not empty @@ -58,7 +60,9 @@ def get_brew_prefix(command: str) -> str: try: prefix = subprocess.check_output(["brew", "--prefix", command]) except subprocess.CalledProcessError: - raise RuntimeError(f"Unable to find brew installation prefix for {command}") + raise RuntimeError( + f"Unable to find brew installation prefix for {command}" + ) return prefix.decode("utf-8").strip() diff --git a/lint.sh b/lint.sh deleted file mode 100755 index 01d914fa..00000000 --- a/lint.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh -set -e -python -m black --check bench_runner tests -python -m flake8 bench_runner tests -python -m pyright bench_runner diff --git a/pyproject.toml b/pyproject.toml index 90b92bf6..79a0b4fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,13 +27,12 @@ dynamic = ["version"] [project.optional-dependencies] test = [ - "black==25.1.0", "filelock==3.18.0", - "flake8==7.3.0", "pyright==1.1.402", "pytest==8.4.1", "pytest-cov==6.2.1", "pytest-xdist==3.7.0", + "ruff==0.15.7", ] [tool.setuptools_scm] @@ -44,6 +43,14 @@ include-package-data = true [tool.setuptools.package-data] bench_runner = ["templates/*"] +[tool.ruff] +target-version = "py311" +line-length = 79 +fix = true + +[tool.ruff.lint] +extend-ignore = ["E203", "E402"] + [tool.pytest.ini_options] markers = [ "long_running: marks tests as long_running (deselect with '-m \"not running\"')", diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index b63059eb..00000000 --- a/setup.cfg +++ /dev/null @@ -1,3 +0,0 @@ -[flake8] -max-line-length = 88 -extend-ignore = E203, E402 \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index e60bf1d4..5337eeb9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,7 +10,9 @@ os.environ["PYPERFORMANCE_HASH"] = "f7f36509e2e81e9a20cfeadddd6608f2378ff26c" -os.environ["PYSTON_BENCHMARKS_HASH"] = "d4868ff7825f3996e0005197643ed56eba4fb567" +os.environ["PYSTON_BENCHMARKS_HASH"] = ( + "d4868ff7825f3996e0005197643ed56eba4fb567" +) DATA_PATH = Path(__file__).parent / "data" @@ -66,7 +68,9 @@ def _setup_repositories(root): venv_python = venv_dir / "bin" / "python" venv_dir.parent.mkdir(parents=True, exist_ok=True) subprocess.check_call([sys.executable, "-m", "venv", venv_dir], cwd=root) - subprocess.check_call([venv_python, "-m", "pip", "install", "setuptools"], cwd=root) + subprocess.check_call( + [venv_python, "-m", "pip", "install", "setuptools"], cwd=root + ) subprocess.check_call( [venv_python, "-m", "pip", "install", root / "pyperformance"], cwd=root ) diff --git a/tests/test_config.py b/tests/test_config.py index 10cbfb9e..0e91a053 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -11,7 +11,9 @@ def test_get_runner_for_hostname(monkeypatch): monkeypatch.setattr(socket, "gethostname", lambda: "pyperf") - runner = runners.get_runner_for_hostname(cfgpath=DATA_PATH / "bench_runner.toml") + runner = runners.get_runner_for_hostname( + cfgpath=DATA_PATH / "bench_runner.toml" + ) assert runner.os == "linux" assert runner.arch == "x86_64" diff --git a/tests/test_generate_results.py b/tests/test_generate_results.py index 074796b8..7a69303a 100644 --- a/tests/test_generate_results.py +++ b/tests/test_generate_results.py @@ -19,7 +19,9 @@ def _copy_repo(tmp_path): return repo_path -def _run_for_bases(bases, repo_path, force=False, has_base=[], check_readmes=True): +def _run_for_bases( + bases, repo_path, force=False, has_base=[], check_readmes=True +): results_path = repo_path / "results" generate_results._main(repo_path, force=force, bases=bases) @@ -76,9 +78,9 @@ def test_main(tmp_path, monkeypatch): ) with open(result_with_base) as fd: contents = json.load(fd) - contents["metadata"][ - "commit_merge_base" - ] = "9d38120e335357a3b294277fd5eff0a10e46e043" + contents["metadata"]["commit_merge_base"] = ( + "9d38120e335357a3b294277fd5eff0a10e46e043" + ) with open(result_with_base, "w") as fd: json.dump(contents, fd) # End hack @@ -122,7 +124,9 @@ def test_fork_with_hyphen(tmp_path): / "bm-20221119-3.12.0a3+-b0e1f9c" / "bm-20221119-linux-x86_64-python-main-3.12.0a3+-b0e1f9c.json" ) - result_new = result.with_name(result.name.replace("python", "with%2dhyphen")) + result_new = result.with_name( + result.name.replace("python", "with%2dhyphen") + ) result.rename(result_new) @@ -139,5 +143,8 @@ def test_fork_with_hyphen(tmp_path): assert contents.count("with-hyphen") == 1 assert contents.count(" with%2dhyphen ") == 0 assert ( - contents.count("with%252dhyphen-main-3.12.0a3%2B-b0e1f9c-vs-3.11.0b3.md") == 1 + contents.count( + "with%252dhyphen-main-3.12.0a3%2B-b0e1f9c-vs-3.11.0b3.md" + ) + == 1 ) diff --git a/tests/test_get_merge_base.py b/tests/test_get_merge_base.py index 78369e77..a92b6e78 100644 --- a/tests/test_get_merge_base.py +++ b/tests/test_get_merge_base.py @@ -35,7 +35,9 @@ def checkout(tmp_path): def test_get_merge_base(capsys, checkout, monkeypatch): monkeypatch.chdir(DATA_PATH) - get_merge_base._main(True, "linux-x86_64-linux", False, [], checkout / "cpython") + get_merge_base._main( + True, "linux-x86_64-linux", False, [], checkout / "cpython" + ) captured = capsys.readouterr() diff --git a/tests/test_result.py b/tests/test_result.py index d878b79e..dd8aef2e 100644 --- a/tests/test_result.py +++ b/tests/test_result.py @@ -14,7 +14,9 @@ def _copy_results(tmp_path): results_path = tmp_path / "results" - shutil.copyfile(DATA_PATH / "bench_runner.toml", tmp_path / "bench_runner.toml") + shutil.copyfile( + DATA_PATH / "bench_runner.toml", tmp_path / "bench_runner.toml" + ) shutil.copytree(DATA_PATH / "results", tmp_path / "results") return results_path @@ -57,9 +59,9 @@ def test_merge_base(tmp_path, monkeypatch): ) with open(result_with_base) as fd: contents = json.load(fd) - contents["metadata"][ - "commit_merge_base" - ] = "9d38120e335357a3b294277fd5eff0a10e46e043" + contents["metadata"]["commit_merge_base"] = ( + "9d38120e335357a3b294277fd5eff0a10e46e043" + ) with open(result_with_base, "w") as fd: json.dump(contents, fd) # End hack @@ -90,7 +92,9 @@ def get_git_hash(*args): def get_git_commit_date(*args): return "2022-11-19T20:47:09+00:00" - monkeypatch.setattr(mod_result.git, "get_git_commit_date", get_git_commit_date) + monkeypatch.setattr( + mod_result.git, "get_git_commit_date", get_git_commit_date + ) def gethostname(*args): return "pyperf" diff --git a/tests/test_run_benchmarks.py b/tests/test_run_benchmarks.py index 74e7c1e2..65bc7059 100644 --- a/tests/test_run_benchmarks.py +++ b/tests/test_run_benchmarks.py @@ -68,7 +68,8 @@ def test_run_benchmarks(benchmarks_checkout, monkeypatch): hardcode_benchmark_hash(monkeypatch) shutil.copyfile( - DATA_PATH / "bench_runner.toml", benchmarks_checkout / "bench_runner.toml" + DATA_PATH / "bench_runner.toml", + benchmarks_checkout / "bench_runner.toml", ) venv_dir = benchmarks_checkout / "venv" @@ -153,7 +154,8 @@ def test_run_benchmarks(benchmarks_checkout, monkeypatch): def test_run_benchmarks_flags(benchmarks_checkout): shutil.copyfile( - DATA_PATH / "bench_runner.toml", benchmarks_checkout / "bench_runner.toml" + DATA_PATH / "bench_runner.toml", + benchmarks_checkout / "bench_runner.toml", ) venv_dir = benchmarks_checkout / "venv" diff --git a/tests/test_table.py b/tests/test_table.py index 323eb75a..484260ae 100644 --- a/tests/test_table.py +++ b/tests/test_table.py @@ -55,10 +55,12 @@ def test_replace_section(tmp_path): def test_md_link(): assert table.md_link("text", "link") == "[text](link)" assert ( - table.md_link("text", "relative/link", Path("relative/other")) == "[text](link)" + table.md_link("text", "relative/link", Path("relative/other")) + == "[text](link)" ) assert ( - table.md_link("text", "relative/link", Path("other")) == "[text](relative/link)" + table.md_link("text", "relative/link", Path("other")) + == "[text](relative/link)" ) diff --git a/tests/test_workflow.py b/tests/test_workflow.py index 338b8c28..81427065 100644 --- a/tests/test_workflow.py +++ b/tests/test_workflow.py @@ -75,7 +75,10 @@ def test_should_run_diff_machine_noforce(benchmarks_checkout, monkeypatch): ) assert result is True - assert len(list((repo / "results" / "bm-20220323-3.10.4-9d38120").iterdir())) == 1 + assert ( + len(list((repo / "results" / "bm-20220323-3.10.4-9d38120").iterdir())) + == 1 + ) def test_should_run_all_noforce(benchmarks_checkout, monkeypatch): @@ -94,7 +97,10 @@ def test_should_run_all_noforce(benchmarks_checkout, monkeypatch): ) assert result is True - assert len(list((repo / "results" / "bm-20220323-3.10.4-9d38120").iterdir())) == 1 + assert ( + len(list((repo / "results" / "bm-20220323-3.10.4-9d38120").iterdir())) + == 1 + ) def test_should_run_noexists_noforce(benchmarks_checkout, monkeypatch): @@ -226,7 +232,13 @@ def test_whole_workflow(tmpdir): ] ) subprocess.check_call( - [str(binary), "-m", "pip", "install", f"{bench_runner_checkout}[test]"] + [ + str(binary), + "-m", + "pip", + "install", + f"{bench_runner_checkout}[test]", + ] ) subprocess.check_call([str(binary), "-m", "bench_runner", "install"]) # install --check should never fail immediately after install @@ -304,7 +316,13 @@ def test_pystats(tmpdir): ] ) subprocess.check_call( - [str(binary), "-m", "pip", "install", f"{bench_runner_checkout}[test]"] + [ + str(binary), + "-m", + "pip", + "install", + f"{bench_runner_checkout}[test]", + ] ) subprocess.check_call([str(binary), "-m", "bench_runner", "install"]) with open("requirements.txt", "w") as fd: @@ -323,7 +341,9 @@ def test_pystats(tmpdir): ] ) - deltablue_output = list((repo / "results").glob("**/*-pystats-deltablue.*")) + deltablue_output = list( + (repo / "results").glob("**/*-pystats-deltablue.*") + ) assert len(deltablue_output) == 2 all_output = list((repo / "results").glob("**/*-pystats.*")) assert len(all_output) == 2