Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions lib/benchmark_runner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def write_csv(output_path, ruby_descriptions, table)
end

# Build output text string with metadata, table, and legend
def build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: false, include_gc: false)
def build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: false, include_gc: false, include_pvalue: false)
base_name, *other_names = ruby_descriptions.keys

output_str = +""
Expand All @@ -73,7 +73,9 @@ def build_output_text(ruby_descriptions, table, format, bench_failures, include_
output_str << "- sweep #{base_name}/#{name}: ratio of GC sweeping time. Higher is better for #{name}. Above 1 represents faster sweeping.\n"
end
end
output_str << "- ***: p < 0.001, **: p < 0.01, *: p < 0.05 (Welch's t-test)\n"
if include_pvalue
output_str << "- ***: p < 0.001, **: p < 0.01, *: p < 0.05 (Welch's t-test)\n"
end
end

output_str
Expand Down
2 changes: 1 addition & 1 deletion lib/benchmark_runner/cli.rb
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def run
BenchmarkRunner.write_csv(output_path, ruby_descriptions, table)

# Save the output in a text file that we can easily refer to
output_str = BenchmarkRunner.build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: args.rss, include_gc: builder.include_gc?)
output_str = BenchmarkRunner.build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: args.rss, include_gc: builder.include_gc?, include_pvalue: args.pvalue)
out_txt_path = output_path + ".txt"
File.open(out_txt_path, "w") { |f| f.write output_str }

Expand Down
4 changes: 2 additions & 2 deletions lib/results_table_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def build_ratio_columns(row, base_t0, other_t0s, base_t, other_ts)
row.concat(ratio_1sts)

other_ts.each do |other_t|
pval = Stats.welch_p_value(base_t, other_t)
pval = @include_pvalue ? Stats.welch_p_value(base_t, other_t) : nil
row << format_ratio(mean(base_t) / mean(other_t), pval)
if @include_pvalue
row << format_p_value(pval)
Expand Down Expand Up @@ -207,7 +207,7 @@ def gc_ratio(base, other)
mean(other) == 0.0
return "N/A"
end
pval = Stats.welch_p_value(base, other)
pval = @include_pvalue ? Stats.welch_p_value(base, other) : nil
format_ratio(mean(base) / mean(other), pval)
end

Expand Down
19 changes: 19 additions & 0 deletions test/benchmark_runner_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,25 @@
assert_includes result, 'Legend:'
assert_includes result, '- ruby-yjit 1st itr: ratio of ruby-base/ruby-yjit time for the first benchmarking iteration.'
assert_includes result, '- ruby-base/ruby-yjit: ratio of ruby-base/ruby-yjit time. Higher is better for ruby-yjit. Above 1 represents a speedup.'
refute_includes result, "p < 0.001"
end

it 'includes p-value legend when include_pvalue is true' do
ruby_descriptions = {
'ruby-base' => 'ruby 3.3.0',
'ruby-yjit' => 'ruby 3.3.0 +YJIT'
}
table = [
['bench', 'ruby-base (ms)', 'stddev (%)', 'ruby-yjit (ms)', 'stddev (%)'],
['fib', '100.0', '5.0', '50.0', '3.0']
]
format = ['%s', '%.1f', '%.1f', '%.1f', '%.1f']
bench_failures = {}

result = BenchmarkRunner.build_output_text(
ruby_descriptions, table, format, bench_failures, include_pvalue: true
)

assert_includes result, "- ***: p < 0.001, **: p < 0.01, *: p < 0.05 (Welch's t-test)"
end

Expand Down
38 changes: 36 additions & 2 deletions test/results_table_builder_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@
assert_equal '', table[1].last
end

it 'always shows significance symbol but omits verbose columns without --pvalue' do
it 'omits significance symbols and p-value columns without --pvalue' do
executable_names = ['ruby', 'ruby-yjit']
bench_data = {
'ruby' => {
Expand All @@ -452,7 +452,41 @@
table, _format = builder.build
refute_includes table[0], 'p-value'
refute_includes table[0], 'sig'
assert_match(/\(\*{1,3}\)$/, table[1].last)
ratio_cell = table[1].last
refute_match(/\*/, ratio_cell)
assert_match(/\A\d+\.\d+\s*\z/, ratio_cell)
end

it 'shows significance symbols and p-value columns with --pvalue' do
executable_names = ['ruby', 'ruby-yjit']
bench_data = {
'ruby' => {
'fib' => {
'warmup' => [0.1],
'bench' => [0.100, 0.101, 0.099],
'rss' => 1024 * 1024 * 10
}
},
'ruby-yjit' => {
'fib' => {
'warmup' => [0.05],
'bench' => [0.050, 0.051, 0.049],
'rss' => 1024 * 1024 * 12
}
}
}

builder = ResultsTableBuilder.new(
executable_names: executable_names,
bench_data: bench_data,
include_pvalue: true
)

table, _format = builder.build
assert_includes table[0], 'p-value'
assert_includes table[0], 'sig'
ratio_col_idx = table[0].index('ruby/ruby-yjit')
assert_match(/\(\*{1,3}\)/, table[1][ratio_col_idx])
end

it 'handles only headline benchmarks' do
Expand Down
Loading