Skip to content

Calculate 5 summaries while running benchmarks #42

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions src/plotting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Defines the mapping between libraries and colors
# make sure colors are distinguishable against white background by adding white to the seed list,
# then deleting it from the resultant palette
palette = distinguishable_colors(length(LIBRARIES) + 2, [colorant"white", colorant"black", colorant"#66023C", colorant"#0071c5"])
deleteat!(palette, 1); deleteat!(palette, 2)
deleteat!(palette, 1); deleteat!(palette, 1)
const COLOR_MAP = Dict(zip(LIBRARIES, palette))
getcolor(l::Symbol) = COLOR_MAP[l]
for (alias,ref) ∈ [(:BLIS,:blis),(:generic,:Generic),(:GENERIC,:Generic)]
Expand Down Expand Up @@ -76,9 +76,17 @@ end
logscale = true,
width = 1200,
height = 600,
measure = :minimum,
plot_directory = default_plot_directory(),
plot_filename = default_plot_filename(br; desc = desc, logscale = logscale),
file_extensions = ["svg", "png"])
file_extensions = ["svg", "png"],
displayplot = true)

`measure` refers to the BenchmarkTools summary on times. Valid options are:
`:minimum`, `:medain`, `:mean`, `:maximum`, and `:hmean`.

- `:minimum` would yield the maximum `GFLOPS`, and would be the usual estimate used in Julia.
- `:hmean`, the harmonic mean of the times, is usful if you want an average GFLOPS, instead of a GFLOPS computed with the average times.
"""
function Gadfly.plot(br::BenchmarkResult{T}; kwargs...) where {T}
_plot(br; kwargs...)
Expand All @@ -92,10 +100,13 @@ function _plot(
logscale::Bool = true,
width = 12inch,
height = 8inch,
measure = :minimum,
plot_directory::AbstractString = default_plot_directory(),
plot_filename::AbstractString = default_plot_filename(br; desc = desc, logscale = logscale),
file_extensions = ["svg", "png"],
displayplot = true
) where {T}
j = get_measure_index(measure) # throw early if `measure` invalid
colors = getcolor.(br.libraries);
libraries = string.(br.libraries)
xscale = logscale ? Scale.x_log10(labels=string ∘ roundint ∘ exp10) : Scale.x_continuous
Expand All @@ -107,11 +118,12 @@ function _plot(
for i ∈ eachindex(libraries)
linestyle = isjulialib(libraries[i]) ? :solid : :dash
l = layer(
x = br.sizes, y = br.gflops[:,i],
x = br.sizes, y = br.gflops[:,i,j],
Geom.line, Theme(default_color = colors[i], line_style = [linestyle])
)
push!(plt, l)
end
displayplot && display(plt)
mkpath(plot_directory)
_filenames = String[]
extension_dict = Dict("svg" => SVG, "png" => PNG, "pdf" => PDF, "ps" => PS)
Expand Down
91 changes: 64 additions & 27 deletions src/runbenchmark.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
struct BenchmarkResult{T,I<:Union{Int,NTuple{3,Int}}}
libraries::Vector{Symbol}
sizes::Vector{I}
gflops::Matrix{Float64}
times::Matrix{Float64}
gflops::Array{Float64,3}
times::Array{Float64,3}
threaded::Bool
function BenchmarkResult{T}(libraries, sizes, gflops, times, threaded) where {T}
gflopsperm = permutedims(gflops, (2,3,1))
timesperm = permutedims(times, (2,3,1))
I = eltype(sizes)
new{T,I}(libraries, convert(Vector{I},sizes), gflopsperm, timesperm, threaded)
end
end

"""
Expand All @@ -13,23 +19,37 @@ function benchmark_result_type(::BenchmarkResult{T}) where {T}
return T
end

function _benchmark_result_df(sizes, libraries, mat)
function get_measure_index(measure::Symbol)::Int
j = findfirst(==(measure), (:minimum,:median,:mean,:maximum,:hmean))
if j === nothing
throw(ArgumentError("`measure` argument must be one of (:minimum,:median,:mean,:maximum,:hmean), but was $(repr(measure))."))
end
return j
end
function _benchmark_result_df(sizes, libraries, mat, measure)
j = get_measure_index(measure)
df = DataFrame(Size = sizes)
for i ∈ eachindex(libraries)
setproperty!(df, libraries[i], mat[:,i])
setproperty!(df, libraries[i], mat[:,i,j])
end
return df
end
function _benchmark_result_df(br::BenchmarkResult, s::Symbol = :gflops)
_benchmark_result_df(br.sizes, br.libraries, getproperty(br, s))
function _benchmark_result_df(br::BenchmarkResult, s::Symbol = :gflops, measure = :minimum)
_benchmark_result_df(br.sizes, br.libraries, getproperty(br, s), measure)
end


"""
benchmark_result_df(benchmark_result::BenchmarkResult)
benchmark_result_df(benchmark_result::BenchmarkResult, `measure` = :minimum)

`measure` refers to the BenchmarkTools summary on times. Valid options are:
`:minimum`, `:medain`, `:mean`, `:maximum`, and `:hmean`.

- `:minimum` would yield the maximum `GFLOPS`, and would be the usual estimate used in Julia.
- `:hmean`, the harmonic mean of the times, is usful if you want an average GFLOPS, instead of a GFLOPS computed with the average times.
"""
function benchmark_result_df(benchmark_result::BenchmarkResult)
df = _benchmark_result_df(benchmark_result, :times)
function benchmark_result_df(benchmark_result::BenchmarkResult, measure = :minimum)
df = _benchmark_result_df(benchmark_result, :times, measure)
df = stack(df, Not(:Size), variable_name = :Library, value_name = :Seconds)
df.GFLOPS = @. 2e-9 * matmul_length(df.Size) ./ df.Seconds
return df
Expand Down Expand Up @@ -61,10 +81,11 @@ function benchmark_fun!(
if force_belapsed || 2t0 < BenchmarkTools.DEFAULT_PARAMETERS.seconds
maybe_sleep(sleep_time)
br = @benchmark $f!($C, $A, $B)
tret = summarystat(br).time
if summarystat === minimum # don't want to do this for `median` or `mean`, for example
tret = min(tret, t0)
end
tmin = min(1e-9minimum(br).time, t0)
tmedian = 1e-9median(br).time
tmean = 1e-9mean(br).time
tmax = 1e-9maximum(br).time # We'll exclude the first for this...
thmean⁻¹ = 1e9mean(inv, br.times)
else
maybe_sleep(sleep_time)
t1 = @elapsed f!(C, A, B)
Expand All @@ -73,12 +94,20 @@ function benchmark_fun!(
if (t0+t1) < 4BenchmarkTools.DEFAULT_PARAMETERS.seconds
maybe_sleep(sleep_time)
t3 = @elapsed f!(C, A, B)
tret = summarystat((t0, t1, t2, t3))
tmin = minimum((t0, t1, t2, t3))
tmedian = median((t0, t1, t2, t3))
tmean = mean((t0, t1, t2, t3))
tmax = maximum((t0, t1, t2, t3))
thmean⁻¹ = mean(inv, (t0, t1, t2, t3))
else
tret = summarystat((t0, t1, t2))
tmin = minimum((t0, t1, t2))
tmedian = median((t0, t1, t2))
tmean = mean((t0, t1, t2))
tmax = maximum((t0, t1, t2))
thmean⁻¹ = mean(inv, (t0, t1, t2))
end
end
return tret
return tmin, tmedian, tmean, tmax, thmean⁻¹
end
_mat_size(M, N, ::typeof(adjoint)) = (N, M)
_mat_size(M, N, ::typeof(transpose)) = (N, M)
Expand Down Expand Up @@ -191,8 +220,7 @@ end
threaded::Bool = Threads.nthreads() > 1,
A_transform = identity,
B_transform = identity,
sleep_time = 0.0,
summarystat = median)
sleep_time = 0.0)

- T: The element type of the matrices.
- libs: Libraries to benchmark.
Expand All @@ -207,9 +235,8 @@ end
- sleep_time: The use of this keyword argument is discouraged. If set, it will call `sleep`
in between benchmarks, the idea being to help keep the CPU cool. This is an unreliable
means of trying to get more reliable benchmarks. Instead, it's reccommended you disable
your systems turbo. Disabling it -- and reenabling when you're done benchmarking --
your systems turbo. Disabling it -- and reenabling when you're done benchmarking --
should be possible without requiring a reboot.
- summarystat: Which summary statistic should be reported? Defaults to `minimum`

"""
function runbench(
Expand Down Expand Up @@ -241,14 +268,15 @@ function runbench(
end
memory = Vector{T}(undef, max_matrix_sizes)
library = reduce(vcat, (libs for _ ∈ eachindex(sizevec)))
times = Matrix{Float64}(undef, length(sizes), length(libs))
times = Array{Float64}(undef, 5, length(sizes), length(libs))
gflop = similar(times);
k = 0

force_belapsed = true # force when compiling

p = Progress(length(sizes))
last_perfs = Vector{Tuple{Symbol,Union{Float64,NTuple{3,Int}}}}(undef, length(libs)+1)
gflop_report_type = NamedTuple{(:MedianGFLOPS, :MaxGFLOPS), Tuple{Float64, Float64}}
last_perfs = Vector{Tuple{Symbol,Union{gflop_report_type,NTuple{3,Int}}}}(undef, length(libs)+1)
for (j,s) ∈ enumerate(sizevec)
M, K, N = matmul_sizes(s)
A, off = alloc_mat(M, K, memory, 0, A_transform)
Expand All @@ -262,13 +290,22 @@ function runbench(
t = benchmark_fun!(
funcs[i], summarystat, C, A, B, sleep_time, force_belapsed, ref
)
gflops = 2e-9M*K*N / t
times[j,i] = t
gflop[j,i] = gflops
last_perfs[i+1] = (libs[i], round(gflops,sigdigits=4))
gffactor = 2e-9M*K*N
@inbounds for k ∈ 1:4
times[k,j,i] = t[k]
gflop[k,j,i] = gffactor / t[k]
end
times[5,j,i] = inv(t[5])
gflop[5,j,i] = gffactor * t[5]
gflops = round.((gflop[1,j,i], gflop[2,j,i]), sigdigits = 4)
gflops = (
MedianGFLOPS = round(gflop[2,j,i], sigdigits = 4),
MaxGFLOPS = round(gflop[1,j,i], sigdigits = 4)
)
last_perfs[i+1] = (libs[i], gflops)
end
ProgressMeter.next!(p; showvalues = last_perfs)
force_belapsed = false
end
BenchmarkResult{T,eltype(sizes)}(libs, sizes, gflop, times, threaded)
BenchmarkResult{T}(libs, sizes, gflop, times, threaded)
end
35 changes: 27 additions & 8 deletions test/interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,31 @@
import BLASBenchmarksCPU
import StatsPlots
@testset "Interface" begin
benchmark_result = BLASBenchmarksCPU.runbench(Float64; sizes = [1, 2, 5, 10, 20, 50, 100, 200], threaded=false, summarystat = BLASBenchmarksCPU.median) #test that threads=false at least doesn't throw somewhere.
df = BLASBenchmarksCPU.benchmark_result_df(benchmark_result)
@test df isa BLASBenchmarksCPU.DataFrame
df[!, :Size] = Float64.(df[!, :Size]);
df[!, :GFLOPS] = Float64.(df[!, :GFLOPS]);
df[!, :Seconds] = Float64.(df[!, :Seconds]);
p = StatsPlots.@df df StatsPlots.plot(:Size, :GFLOPS; group = :Library, legend = :bottomright)
@test p isa StatsPlots.Plots.Plot
benchmark_result = BLASBenchmarksCPU.runbench(Float64; sizes = [1, 2, 5, 10, 20, 50, 100, 200], threaded=false) #test that threads=false at least doesn't throw somewhere.
dfmin = BLASBenchmarksCPU.benchmark_result_df(benchmark_result) # minimum
dfmedian = BLASBenchmarksCPU.benchmark_result_df(benchmark_result, :median)
dfmean = BLASBenchmarksCPU.benchmark_result_df(benchmark_result, :mean)
dfmax = BLASBenchmarksCPU.benchmark_result_df(benchmark_result, :maximum)
@test_throws ArgumentError BLASBenchmarksCPU.benchmark_result_df(benchmark_result, :foobar)
@test dfmin isa BLASBenchmarksCPU.DataFrame
@test dfmedian isa BLASBenchmarksCPU.DataFrame
@test dfmean isa BLASBenchmarksCPU.DataFrame
@test dfmax isa BLASBenchmarksCPU.DataFrame
for df ∈ (dfmin,dfmedian,dfmean,dfmax)
df[!, :Size] = Float64.(df[!, :Size]);
df[!, :GFLOPS] = Float64.(df[!, :GFLOPS]);
df[!, :Seconds] = Float64.(df[!, :Seconds]);
p = StatsPlots.@df df StatsPlots.plot(:Size, :GFLOPS; group = :Library, legend = :bottomright)
@test p isa StatsPlots.Plots.Plot
end
@test all(dfmin[!, :GFLOPS] .≥ dfmedian[!, :GFLOPS])
@test all(dfmin[!, :GFLOPS] .≥ dfmean[!, :GFLOPS])
@test all(dfmin[!, :GFLOPS] .≥ dfmax[!, :GFLOPS])
@test any(dfmin[!, :GFLOPS] .≠ dfmedian[!, :GFLOPS])
@test any(dfmin[!, :GFLOPS] .≠ dfmean[!, :GFLOPS])
@test any(dfmin[!, :GFLOPS] .≠ dfmax[!, :GFLOPS])
@test any(dfmedian[!, :GFLOPS] .≥ dfmax[!, :GFLOPS])
@test any(dfmean[!, :GFLOPS] .≥ dfmax[!, :GFLOPS])
@test any(dfmedian[!, :GFLOPS] .≠ dfmax[!, :GFLOPS])
@test any(dfmean[!, :GFLOPS] .≠ dfmax[!, :GFLOPS])
end
19 changes: 19 additions & 0 deletions test/main.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,24 @@ for T in [Float64, Float32]
BLASBenchmarksCPU.plot(
benchmark_result;
plot_directory = plot_directory,
displayplot = false
)
BLASBenchmarksCPU.plot(
benchmark_result;
plot_directory = plot_directory,
measure = :median,
displayplot = false
)
BLASBenchmarksCPU.plot(
benchmark_result;
plot_directory = plot_directory,
measure = :mean,
displayplot = false
)
BLASBenchmarksCPU.plot(
benchmark_result;
plot_directory = plot_directory,
measure = :maximum,
displayplot = false
)
end