Skip to content

Commit 37c3497

Browse files
authored
Calculate 5 summaries while running benchmarks (#42)
* Add option to specify which summary stat you want the benchmark to return; defaults to . * Store all summaries from a benchmark, allow user to specify when calling or * Remove invalid argument * display the plots * Fix colors, add option to not display plots.
1 parent dd3fbf2 commit 37c3497

File tree

4 files changed

+125
-38
lines changed

4 files changed

+125
-38
lines changed

src/plotting.jl

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Defines the mapping between libraries and colors
1010
# make sure colors are distinguishable against white background by adding white to the seed list,
1111
# then deleting it from the resultant palette
1212
palette = distinguishable_colors(length(LIBRARIES) + 2, [colorant"white", colorant"black", colorant"#66023C", colorant"#0071c5"])
13-
deleteat!(palette, 1); deleteat!(palette, 2)
13+
deleteat!(palette, 1); deleteat!(palette, 1)
1414
const COLOR_MAP = Dict(zip(LIBRARIES, palette))
1515
getcolor(l::Symbol) = COLOR_MAP[l]
1616
for (alias,ref) [(:BLIS,:blis),(:generic,:Generic),(:GENERIC,:Generic)]
@@ -76,9 +76,17 @@ end
7676
logscale = true,
7777
width = 1200,
7878
height = 600,
79+
measure = :minimum,
7980
plot_directory = default_plot_directory(),
8081
plot_filename = default_plot_filename(br; desc = desc, logscale = logscale),
81-
file_extensions = ["svg", "png"])
82+
file_extensions = ["svg", "png"],
83+
displayplot = true)
84+
85+
`measure` refers to the BenchmarkTools summary on times. Valid options are:
86+
`:minimum`, `:medain`, `:mean`, `:maximum`, and `:hmean`.
87+
88+
- `:minimum` would yield the maximum `GFLOPS`, and would be the usual estimate used in Julia.
89+
- `:hmean`, the harmonic mean of the times, is usful if you want an average GFLOPS, instead of a GFLOPS computed with the average times.
8290
"""
8391
function Gadfly.plot(br::BenchmarkResult{T}; kwargs...) where {T}
8492
_plot(br; kwargs...)
@@ -92,10 +100,13 @@ function _plot(
92100
logscale::Bool = true,
93101
width = 12inch,
94102
height = 8inch,
103+
measure = :minimum,
95104
plot_directory::AbstractString = default_plot_directory(),
96105
plot_filename::AbstractString = default_plot_filename(br; desc = desc, logscale = logscale),
97106
file_extensions = ["svg", "png"],
107+
displayplot = true
98108
) where {T}
109+
j = get_measure_index(measure) # throw early if `measure` invalid
99110
colors = getcolor.(br.libraries);
100111
libraries = string.(br.libraries)
101112
xscale = logscale ? Scale.x_log10(labels=string roundint exp10) : Scale.x_continuous
@@ -107,11 +118,12 @@ function _plot(
107118
for i eachindex(libraries)
108119
linestyle = isjulialib(libraries[i]) ? :solid : :dash
109120
l = layer(
110-
x = br.sizes, y = br.gflops[:,i],
121+
x = br.sizes, y = br.gflops[:,i,j],
111122
Geom.line, Theme(default_color = colors[i], line_style = [linestyle])
112123
)
113124
push!(plt, l)
114125
end
126+
displayplot && display(plt)
115127
mkpath(plot_directory)
116128
_filenames = String[]
117129
extension_dict = Dict("svg" => SVG, "png" => PNG, "pdf" => PDF, "ps" => PS)

src/runbenchmark.jl

Lines changed: 64 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
11
struct BenchmarkResult{T,I<:Union{Int,NTuple{3,Int}}}
22
libraries::Vector{Symbol}
33
sizes::Vector{I}
4-
gflops::Matrix{Float64}
5-
times::Matrix{Float64}
4+
gflops::Array{Float64,3}
5+
times::Array{Float64,3}
66
threaded::Bool
7+
function BenchmarkResult{T}(libraries, sizes, gflops, times, threaded) where {T}
8+
gflopsperm = permutedims(gflops, (2,3,1))
9+
timesperm = permutedims(times, (2,3,1))
10+
I = eltype(sizes)
11+
new{T,I}(libraries, convert(Vector{I},sizes), gflopsperm, timesperm, threaded)
12+
end
713
end
814

915
"""
@@ -13,23 +19,37 @@ function benchmark_result_type(::BenchmarkResult{T}) where {T}
1319
return T
1420
end
1521

16-
function _benchmark_result_df(sizes, libraries, mat)
22+
function get_measure_index(measure::Symbol)::Int
23+
j = findfirst(==(measure), (:minimum,:median,:mean,:maximum,:hmean))
24+
if j === nothing
25+
throw(ArgumentError("`measure` argument must be one of (:minimum,:median,:mean,:maximum,:hmean), but was $(repr(measure))."))
26+
end
27+
return j
28+
end
29+
function _benchmark_result_df(sizes, libraries, mat, measure)
30+
j = get_measure_index(measure)
1731
df = DataFrame(Size = sizes)
1832
for i eachindex(libraries)
19-
setproperty!(df, libraries[i], mat[:,i])
33+
setproperty!(df, libraries[i], mat[:,i,j])
2034
end
2135
return df
2236
end
23-
function _benchmark_result_df(br::BenchmarkResult, s::Symbol = :gflops)
24-
_benchmark_result_df(br.sizes, br.libraries, getproperty(br, s))
37+
function _benchmark_result_df(br::BenchmarkResult, s::Symbol = :gflops, measure = :minimum)
38+
_benchmark_result_df(br.sizes, br.libraries, getproperty(br, s), measure)
2539
end
2640

2741

2842
"""
29-
benchmark_result_df(benchmark_result::BenchmarkResult)
43+
benchmark_result_df(benchmark_result::BenchmarkResult, `measure` = :minimum)
44+
45+
`measure` refers to the BenchmarkTools summary on times. Valid options are:
46+
`:minimum`, `:medain`, `:mean`, `:maximum`, and `:hmean`.
47+
48+
- `:minimum` would yield the maximum `GFLOPS`, and would be the usual estimate used in Julia.
49+
- `:hmean`, the harmonic mean of the times, is usful if you want an average GFLOPS, instead of a GFLOPS computed with the average times.
3050
"""
31-
function benchmark_result_df(benchmark_result::BenchmarkResult)
32-
df = _benchmark_result_df(benchmark_result, :times)
51+
function benchmark_result_df(benchmark_result::BenchmarkResult, measure = :minimum)
52+
df = _benchmark_result_df(benchmark_result, :times, measure)
3353
df = stack(df, Not(:Size), variable_name = :Library, value_name = :Seconds)
3454
df.GFLOPS = @. 2e-9 * matmul_length(df.Size) ./ df.Seconds
3555
return df
@@ -61,10 +81,11 @@ function benchmark_fun!(
6181
if force_belapsed || 2t0 < BenchmarkTools.DEFAULT_PARAMETERS.seconds
6282
maybe_sleep(sleep_time)
6383
br = @benchmark $f!($C, $A, $B)
64-
tret = summarystat(br).time
65-
if summarystat === minimum # don't want to do this for `median` or `mean`, for example
66-
tret = min(tret, t0)
67-
end
84+
tmin = min(1e-9minimum(br).time, t0)
85+
tmedian = 1e-9median(br).time
86+
tmean = 1e-9mean(br).time
87+
tmax = 1e-9maximum(br).time # We'll exclude the first for this...
88+
thmean⁻¹ = 1e9mean(inv, br.times)
6889
else
6990
maybe_sleep(sleep_time)
7091
t1 = @elapsed f!(C, A, B)
@@ -73,12 +94,20 @@ function benchmark_fun!(
7394
if (t0+t1) < 4BenchmarkTools.DEFAULT_PARAMETERS.seconds
7495
maybe_sleep(sleep_time)
7596
t3 = @elapsed f!(C, A, B)
76-
tret = summarystat((t0, t1, t2, t3))
97+
tmin = minimum((t0, t1, t2, t3))
98+
tmedian = median((t0, t1, t2, t3))
99+
tmean = mean((t0, t1, t2, t3))
100+
tmax = maximum((t0, t1, t2, t3))
101+
thmean⁻¹ = mean(inv, (t0, t1, t2, t3))
77102
else
78-
tret = summarystat((t0, t1, t2))
103+
tmin = minimum((t0, t1, t2))
104+
tmedian = median((t0, t1, t2))
105+
tmean = mean((t0, t1, t2))
106+
tmax = maximum((t0, t1, t2))
107+
thmean⁻¹ = mean(inv, (t0, t1, t2))
79108
end
80109
end
81-
return tret
110+
return tmin, tmedian, tmean, tmax, thmean⁻¹
82111
end
83112
_mat_size(M, N, ::typeof(adjoint)) = (N, M)
84113
_mat_size(M, N, ::typeof(transpose)) = (N, M)
@@ -191,8 +220,7 @@ end
191220
threaded::Bool = Threads.nthreads() > 1,
192221
A_transform = identity,
193222
B_transform = identity,
194-
sleep_time = 0.0,
195-
summarystat = median)
223+
sleep_time = 0.0)
196224
197225
- T: The element type of the matrices.
198226
- libs: Libraries to benchmark.
@@ -207,9 +235,8 @@ end
207235
- sleep_time: The use of this keyword argument is discouraged. If set, it will call `sleep`
208236
in between benchmarks, the idea being to help keep the CPU cool. This is an unreliable
209237
means of trying to get more reliable benchmarks. Instead, it's reccommended you disable
210-
your systems turbo. Disabling it -- and reenabling when you're done benchmarking --
238+
your systems turbo. Disabling it -- and reenabling when you're done benchmarking --
211239
should be possible without requiring a reboot.
212-
- summarystat: Which summary statistic should be reported? Defaults to `minimum`
213240
214241
"""
215242
function runbench(
@@ -241,14 +268,15 @@ function runbench(
241268
end
242269
memory = Vector{T}(undef, max_matrix_sizes)
243270
library = reduce(vcat, (libs for _ eachindex(sizevec)))
244-
times = Matrix{Float64}(undef, length(sizes), length(libs))
271+
times = Array{Float64}(undef, 5, length(sizes), length(libs))
245272
gflop = similar(times);
246273
k = 0
247274

248275
force_belapsed = true # force when compiling
249276

250277
p = Progress(length(sizes))
251-
last_perfs = Vector{Tuple{Symbol,Union{Float64,NTuple{3,Int}}}}(undef, length(libs)+1)
278+
gflop_report_type = NamedTuple{(:MedianGFLOPS, :MaxGFLOPS), Tuple{Float64, Float64}}
279+
last_perfs = Vector{Tuple{Symbol,Union{gflop_report_type,NTuple{3,Int}}}}(undef, length(libs)+1)
252280
for (j,s) enumerate(sizevec)
253281
M, K, N = matmul_sizes(s)
254282
A, off = alloc_mat(M, K, memory, 0, A_transform)
@@ -262,13 +290,22 @@ function runbench(
262290
t = benchmark_fun!(
263291
funcs[i], summarystat, C, A, B, sleep_time, force_belapsed, ref
264292
)
265-
gflops = 2e-9M*K*N / t
266-
times[j,i] = t
267-
gflop[j,i] = gflops
268-
last_perfs[i+1] = (libs[i], round(gflops,sigdigits=4))
293+
gffactor = 2e-9M*K*N
294+
@inbounds for k 1:4
295+
times[k,j,i] = t[k]
296+
gflop[k,j,i] = gffactor / t[k]
297+
end
298+
times[5,j,i] = inv(t[5])
299+
gflop[5,j,i] = gffactor * t[5]
300+
gflops = round.((gflop[1,j,i], gflop[2,j,i]), sigdigits = 4)
301+
gflops = (
302+
MedianGFLOPS = round(gflop[2,j,i], sigdigits = 4),
303+
MaxGFLOPS = round(gflop[1,j,i], sigdigits = 4)
304+
)
305+
last_perfs[i+1] = (libs[i], gflops)
269306
end
270307
ProgressMeter.next!(p; showvalues = last_perfs)
271308
force_belapsed = false
272309
end
273-
BenchmarkResult{T,eltype(sizes)}(libs, sizes, gflop, times, threaded)
310+
BenchmarkResult{T}(libs, sizes, gflop, times, threaded)
274311
end

test/interface.jl

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,31 @@
22
import BLASBenchmarksCPU
33
import StatsPlots
44
@testset "Interface" begin
5-
benchmark_result = BLASBenchmarksCPU.runbench(Float64; sizes = [1, 2, 5, 10, 20, 50, 100, 200], threaded=false, summarystat = BLASBenchmarksCPU.median) #test that threads=false at least doesn't throw somewhere.
6-
df = BLASBenchmarksCPU.benchmark_result_df(benchmark_result)
7-
@test df isa BLASBenchmarksCPU.DataFrame
8-
df[!, :Size] = Float64.(df[!, :Size]);
9-
df[!, :GFLOPS] = Float64.(df[!, :GFLOPS]);
10-
df[!, :Seconds] = Float64.(df[!, :Seconds]);
11-
p = StatsPlots.@df df StatsPlots.plot(:Size, :GFLOPS; group = :Library, legend = :bottomright)
12-
@test p isa StatsPlots.Plots.Plot
5+
benchmark_result = BLASBenchmarksCPU.runbench(Float64; sizes = [1, 2, 5, 10, 20, 50, 100, 200], threaded=false) #test that threads=false at least doesn't throw somewhere.
6+
dfmin = BLASBenchmarksCPU.benchmark_result_df(benchmark_result) # minimum
7+
dfmedian = BLASBenchmarksCPU.benchmark_result_df(benchmark_result, :median)
8+
dfmean = BLASBenchmarksCPU.benchmark_result_df(benchmark_result, :mean)
9+
dfmax = BLASBenchmarksCPU.benchmark_result_df(benchmark_result, :maximum)
10+
@test_throws ArgumentError BLASBenchmarksCPU.benchmark_result_df(benchmark_result, :foobar)
11+
@test dfmin isa BLASBenchmarksCPU.DataFrame
12+
@test dfmedian isa BLASBenchmarksCPU.DataFrame
13+
@test dfmean isa BLASBenchmarksCPU.DataFrame
14+
@test dfmax isa BLASBenchmarksCPU.DataFrame
15+
for df (dfmin,dfmedian,dfmean,dfmax)
16+
df[!, :Size] = Float64.(df[!, :Size]);
17+
df[!, :GFLOPS] = Float64.(df[!, :GFLOPS]);
18+
df[!, :Seconds] = Float64.(df[!, :Seconds]);
19+
p = StatsPlots.@df df StatsPlots.plot(:Size, :GFLOPS; group = :Library, legend = :bottomright)
20+
@test p isa StatsPlots.Plots.Plot
21+
end
22+
@test all(dfmin[!, :GFLOPS] .≥ dfmedian[!, :GFLOPS])
23+
@test all(dfmin[!, :GFLOPS] .≥ dfmean[!, :GFLOPS])
24+
@test all(dfmin[!, :GFLOPS] .≥ dfmax[!, :GFLOPS])
25+
@test any(dfmin[!, :GFLOPS] .≠ dfmedian[!, :GFLOPS])
26+
@test any(dfmin[!, :GFLOPS] .≠ dfmean[!, :GFLOPS])
27+
@test any(dfmin[!, :GFLOPS] .≠ dfmax[!, :GFLOPS])
28+
@test any(dfmedian[!, :GFLOPS] .≥ dfmax[!, :GFLOPS])
29+
@test any(dfmean[!, :GFLOPS] .≥ dfmax[!, :GFLOPS])
30+
@test any(dfmedian[!, :GFLOPS] .≠ dfmax[!, :GFLOPS])
31+
@test any(dfmean[!, :GFLOPS] .≠ dfmax[!, :GFLOPS])
1332
end

test/main.jl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,24 @@ for T in [Float64, Float32]
1919
BLASBenchmarksCPU.plot(
2020
benchmark_result;
2121
plot_directory = plot_directory,
22+
displayplot = false
23+
)
24+
BLASBenchmarksCPU.plot(
25+
benchmark_result;
26+
plot_directory = plot_directory,
27+
measure = :median,
28+
displayplot = false
29+
)
30+
BLASBenchmarksCPU.plot(
31+
benchmark_result;
32+
plot_directory = plot_directory,
33+
measure = :mean,
34+
displayplot = false
35+
)
36+
BLASBenchmarksCPU.plot(
37+
benchmark_result;
38+
plot_directory = plot_directory,
39+
measure = :maximum,
40+
displayplot = false
2241
)
2342
end

0 commit comments

Comments
 (0)