Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -284,12 +284,22 @@ after each iteration with the default harness.

## Measuring memory usage

`--rss` option of `run_benchmarks.rb` allows you to measure RSS after benchmark iterations.
`--rss` option of `run_benchmarks.rb` allows you to measure RSS (resident set size).

```
./run_benchmarks.rb --rss
```

The harness samples RSS once per iteration across the benchmarking window (after
warmup), so the `RSS (MiB)` column reports the mean working set during measurement
along with its run-to-run variability (`mean ± stddev%`), and the `RSS` ratio is
computed from those means. The raw per-iteration samples are stored in the JSON
output under `rss_samples` (bytes).

For reference, the JSON output also keeps `rss`, a single snapshot taken after a
full GC at the end of the run (the retained set, a lower bound), and `maxrss`, the
process's lifetime peak from `getrusage`.

## Rendering a graph

`--graph` option of `run_benchmarks.rb` allows you to render benchmark results as a graph.
Expand Down
4 changes: 4 additions & 0 deletions harness-gc/harness.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def gc_stat_heap_delta(before, after)

def run_benchmark(_num_itrs_hint, **, &block)
times = []
rss_samples = []
marking_times = []
sweeping_times = []
gc_counts = []
Expand Down Expand Up @@ -82,6 +83,7 @@ def run_benchmark(_num_itrs_hint, **, &block)
puts itr_str

times << time
rss_samples << get_rss
marking_times << mark_delta
sweeping_times << sweep_delta
gc_counts << count_delta
Expand All @@ -95,6 +97,8 @@ def run_benchmark(_num_itrs_hint, **, &block)
bench_range = WARMUP_ITRS..-1

extra = {}
rss_bench = rss_samples[bench_range] || []
extra["rss_samples"] = rss_bench unless rss_bench.empty?
extra["gc_marking_time_warmup"] = marking_times[warmup_range]
extra["gc_marking_time_bench"] = marking_times[bench_range]
extra["gc_sweeping_time_warmup"] = sweeping_times[warmup_range]
Expand Down
6 changes: 5 additions & 1 deletion harness-warmup/harness.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@ def print_stats(bench, elapsed)
def run_benchmark(num_itrs_hint, **)
start = monotonic_time
times = []
rss_samples = []

begin
time = Benchmark.realtime { yield }
times << time
rss_samples << get_rss

stats = Stats.new(times)
median = stats.median
Expand All @@ -63,7 +65,9 @@ def run_benchmark(num_itrs_hint, **)
end until times.size >= MIN_ITERS and elapsed >= MIN_TIME and mad <= threshold

warmup, bench = times[0...times.size/2], times[times.size/2..-1]
return_results(warmup, bench)
rss_bench = rss_samples[times.size/2..-1] || []
extra = rss_bench.empty? ? {} : { "rss_samples" => rss_bench }
return_results(warmup, bench, **extra)

print_stats(bench, elapsed)
end
12 changes: 12 additions & 0 deletions harness/harness-common.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
require 'rbconfig'
require_relative '../misc/stats'

# Ensure the ruby in PATH is the ruby running this, so we can safely shell out to other commands
ruby_in_path = `ruby -e 'print RbConfig.ruby'`
Expand Down Expand Up @@ -214,6 +215,17 @@ def return_results(warmup_iterations, bench_iterations, **extra)
puts "MAXRSS: %.1fMiB" % (maxrss / 1024.0 / 1024.0)
end

rss_samples = ruby_bench_results["rss_samples"]
if rss_samples.is_a?(Array) && !rss_samples.empty?
mib = rss_samples.map { |bytes| bytes / 1024.0 / 1024.0 }
stats = Stats.new(mib)
median = stats.median
mad = stats.median_absolute_deviation(median)
puts "RSS sampled (n=%d): median %.1fMiB \u00b1 %.1fMiB (MAD), range [%.1f, %.1f]MiB" % [
mib.size, median, mad, stats.min, stats.max
]
end

write_json_file(ruby_bench_results)
end

Expand Down
8 changes: 7 additions & 1 deletion harness/harness.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def realtime
# Takes a block as input
def run_benchmark(_num_itrs_hint, **, &block)
times = []
rss_samples = []
total_time = 0
num_itrs = 0
header = "itr: time"
Expand Down Expand Up @@ -75,10 +76,15 @@ def run_benchmark(_num_itrs_hint, **, &block)
# We internally save the time in seconds to avoid loss of precision
times << time
total_time += time
# Sample current RSS between iterations (outside the timed block) so we can
# report the working set across the window with variance.
rss_samples << get_rss
end until num_itrs >= WARMUP_ITRS + MIN_BENCH_ITRS and total_time >= MIN_BENCH_TIME

warmup, bench = times[0...WARMUP_ITRS], times[WARMUP_ITRS..-1]
return_results(warmup, bench)
rss_bench = rss_samples[WARMUP_ITRS..-1] || []
extra = rss_bench.empty? ? {} : { "rss_samples" => rss_bench }
return_results(warmup, bench, **extra)

non_warmups = times[WARMUP_ITRS..-1]
if non_warmups.size > 1
Expand Down
52 changes: 46 additions & 6 deletions lib/results_table_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def initialize(executable_names:, bench_data:, include_rss: false, include_pvalu
@include_pvalue = include_pvalue
@zjit_stats = zjit_stats || []
@include_gc = detect_gc_data(bench_data)
@rss_has_samples = @include_rss && detect_rss_samples(bench_data)
@base_name = executable_names.first
@other_names = executable_names[1..]
@bench_names = compute_bench_names
Expand Down Expand Up @@ -86,7 +87,7 @@ def build_format

@executable_names.each do |_name|
format << "%s"
format << "%.1f" if @include_rss
format << (@rss_has_samples ? "%s" : "%.1f") if @include_rss
@zjit_stats.each { format << "%s" }
if @include_gc
format << "%s"
Expand Down Expand Up @@ -125,11 +126,15 @@ def build_row(bench_name)
t0s = extract_first_iteration_times(bench_name)
times_no_warmup = extract_benchmark_times(bench_name)
rsss = extract_rss_values(bench_name)
rss_series = @rss_has_samples ? extract_rss_series(bench_name) : nil

base_t0, *other_t0s = t0s
base_t, *other_ts = times_no_warmup
base_rss, *other_rsss = rsss

base_rss_cell = rss_cell(base_rss, rss_series && rss_series[0])
other_rss_cells = other_rsss.each_index.map { |i| rss_cell(other_rsss[i], rss_series && rss_series[i + 1]) }

# Extract zjit stats: { stat_name => [base_val, other1_val, ...] }
zjit_stat_values = @zjit_stats.map do |stat|
[stat, extract_zjit_stat(bench_name, stat)]
Expand All @@ -143,8 +148,8 @@ def build_row(bench_name)
end

row = [bench_name]
build_base_columns(row, base_t, base_rss, zjit_stat_values, 0, base_mark, base_sweep)
build_comparison_columns(row, other_ts, other_rsss, zjit_stat_values, other_marks, other_sweeps)
build_base_columns(row, base_t, base_rss_cell, zjit_stat_values, 0, base_mark, base_sweep)
build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values, other_marks, other_sweeps)
build_ratio_columns(row, base_t0, other_t0s, base_t, other_ts)
build_rss_ratio_columns(row, base_rss, other_rsss)
build_gc_ratio_columns(row, base_mark, other_marks, base_sweep, other_sweeps)
Expand All @@ -162,10 +167,10 @@ def build_base_columns(row, base_t, base_rss, zjit_stat_values, exe_index, base_
end
end

def build_comparison_columns(row, other_ts, other_rsss, zjit_stat_values, other_marks, other_sweeps)
def build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values, other_marks, other_sweeps)
other_ts.each_with_index do |other_t, i|
row << format_time_with_stddev(other_t)
row << other_rsss[i] if @include_rss
row << other_rss_cells[i] if @include_rss
zjit_stat_values.each { |_stat, values| row << format_stat(values[i + 1]) }
if @include_gc
row << format_time_with_stddev(other_marks[i])
Expand Down Expand Up @@ -283,9 +288,38 @@ def extract_benchmark_times(bench_name)
end
end

# Numeric RSS (MiB) per executable, used for the RSS ratio. When per-iteration
# samples are present we use their mean so the ratio matches the displayed value.
def extract_rss_values(bench_name)
@executable_names.map do |name|
bench_data_for(name, bench_name)['rss'] / BYTES_TO_MIB
data = bench_data_for(name, bench_name)
samples = data['rss_samples']
if samples.is_a?(Array) && !samples.empty?
mean(samples) / BYTES_TO_MIB
else
data['rss'] / BYTES_TO_MIB
end
end
end

# Per-iteration RSS samples (MiB) per executable, or nil when a run lacks them.
def extract_rss_series(bench_name)
@executable_names.map do |name|
samples = bench_data_for(name, bench_name)['rss_samples']
next nil unless samples.is_a?(Array) && !samples.empty?
samples.map { |bytes| bytes / BYTES_TO_MIB }
end
end

# Display value for an RSS column: mean ± stddev% when samples exist (matching
# the timing columns), otherwise a plain MiB value. Returns a Float when no run
# in the suite has samples, preserving the legacy "%.1f" formatting.
def rss_cell(mean_value, series)
return mean_value unless @rss_has_samples
if series && !series.empty?
format_time_with_stddev(series)
else
"%.1f" % mean_value
end
end

Expand All @@ -305,6 +339,12 @@ def detect_gc_data(bench_data)
bench_data.values.any? { |benchmarks| benchmarks.values.any? { |d| d.is_a?(Hash) && d.key?('gc_marking_time_bench') } }
end

def detect_rss_samples(bench_data)
bench_data.values.any? do |benchmarks|
benchmarks.values.any? { |d| d.is_a?(Hash) && d['rss_samples'].is_a?(Array) && !d['rss_samples'].empty? }
end
end

def bench_data_for(name, bench_name)
@bench_data[name][bench_name]
end
Expand Down
121 changes: 121 additions & 0 deletions test/results_table_builder_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -549,4 +549,125 @@
assert_equal 'fib', bench_names[4]
end
end

describe 'RSS sampling (rss_samples)' do
MIB = 1024 * 1024

it 'shows mean ± stddev% and uses %s format when samples are present' do
bench_data = {
'ruby' => {
'fib' => {
'warmup' => [0.1],
'bench' => [0.1, 0.1, 0.1],
'rss' => 10 * MIB,
'rss_samples' => [9 * MIB, 10 * MIB, 11 * MIB]
}
}
}

builder = ResultsTableBuilder.new(
executable_names: ['ruby'],
bench_data: bench_data,
include_rss: true
)

table, format = builder.build

assert_equal ['bench', 'ruby (ms)', 'RSS (MiB)'], table[0]
assert_equal ['%s', '%s', '%s'], format

m = table[1][2].match(/\A(\d+\.\d) ± (\d+\.\d)%\z/)
assert m, "expected mean ± stddev%, got #{table[1][2].inspect}"
assert_in_delta 10.0, m[1].to_f, 0.1
assert_operator m[2].to_f, :>, 0.0
end

it 'computes the RSS ratio from the mean of samples' do
bench_data = {
'ruby' => {
'fib' => {
'warmup' => [0.1],
'bench' => [0.1, 0.1, 0.1],
'rss' => 99 * MIB, # should be ignored in favour of samples
'rss_samples' => [10 * MIB, 10 * MIB, 10 * MIB]
}
},
'ruby-yjit' => {
'fib' => {
'warmup' => [0.05],
'bench' => [0.05, 0.05, 0.05],
'rss' => 1 * MIB,
'rss_samples' => [18 * MIB, 20 * MIB, 22 * MIB]
}
}
}

builder = ResultsTableBuilder.new(
executable_names: ['ruby', 'ruby-yjit'],
bench_data: bench_data,
include_rss: true
)

table, _format = builder.build

# ratio = mean(ruby samples) / mean(yjit samples) = 10 / 20 = 0.5
assert_in_delta 0.5, table[1].last, 0.001
end

it 'falls back to a plain MiB value for runs without samples in a mixed suite' do
bench_data = {
'ruby' => {
'fib' => {
'warmup' => [0.1],
'bench' => [0.1, 0.1],
'rss' => 10 * MIB,
'rss_samples' => [10 * MIB, 10 * MIB]
},
'loop' => {
'warmup' => [0.2],
'bench' => [0.2, 0.2],
'rss' => 15 * MIB
# no rss_samples for this benchmark
}
}
}

builder = ResultsTableBuilder.new(
executable_names: ['ruby'],
bench_data: bench_data,
include_rss: true
)

table, format = builder.build

# Suite has samples somewhere, so the RSS column is string-formatted.
assert_equal ['%s', '%s', '%s'], format

rows = table[1..].each_with_object({}) { |row, h| h[row[0]] = row }
assert_match(/\A\d+\.\d ± \d+\.\d%\z/, rows['fib'][2])
# The sample-less benchmark still renders as a bare MiB value.
assert_equal '15.0', rows['loop'][2]
end

it 'keeps %.1f formatting when no run in the suite has samples' do
bench_data = {
'ruby' => {
'fib' => {
'warmup' => [0.1],
'bench' => [0.1],
'rss' => 10 * MIB
}
}
}

builder = ResultsTableBuilder.new(
executable_names: ['ruby'],
bench_data: bench_data,
include_rss: true
)

_table, format = builder.build
assert_equal ['%s', '%s', '%.1f'], format
end
end
end
Loading