Skip to content

Commit dd3fbf2

Browse files
authored
Add option to specify which summary stat you want the benchmark to return; defaults to . (#41)
1 parent 99715c8 commit dd3fbf2

File tree

3 files changed

+45
-15
lines changed

3 files changed

+45
-15
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "BLASBenchmarksCPU"
22
uuid = "5fdc822c-4560-4d20-af7e-e5ee461714d5"
33
authors = ["Chris Elrod <[email protected]> and contributors"]
4-
version = "0.2.0"
4+
version = "0.2.1"
55

66
[deps]
77
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"

src/runbenchmark.jl

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -53,23 +53,32 @@ function maybe_sleep(x)
5353
end
5454

5555
function benchmark_fun!(
56-
f!::F, C, A, B, sleep_time, force_belapsed = false, reference = nothing
56+
f!::F, summarystat, C, A, B, sleep_time, force_belapsed = false, reference = nothing
5757
) where {F}
5858
maybe_sleep(sleep_time)
59-
tmin = @elapsed f!(C, A, B)
59+
t0 = @elapsed f!(C, A, B)
6060
isnothing(reference) || @assert C reference
61-
if force_belapsed || 2tmin < BenchmarkTools.DEFAULT_PARAMETERS.seconds
61+
if force_belapsed || 2t0 < BenchmarkTools.DEFAULT_PARAMETERS.seconds
6262
maybe_sleep(sleep_time)
63-
tmin = min(tmin, @belapsed $f!($C, $A, $B))
64-
else#if tmin < BenchmarkTools.DEFAULT_PARAMETERS.seconds
63+
br = @benchmark $f!($C, $A, $B)
64+
tret = summarystat(br).time
65+
if summarystat === minimum # don't want to do this for `median` or `mean`, for example
66+
tret = min(tret, t0)
67+
end
68+
else
6569
maybe_sleep(sleep_time)
66-
tmin = min(tmin, @elapsed f!(C, A, B))
67-
if tmin < 2BenchmarkTools.DEFAULT_PARAMETERS.seconds
70+
t1 = @elapsed f!(C, A, B)
71+
maybe_sleep(sleep_time)
72+
t2 = @elapsed f!(C, A, B)
73+
if (t0+t1) < 4BenchmarkTools.DEFAULT_PARAMETERS.seconds
6874
maybe_sleep(sleep_time)
69-
tmin = min(tmin, @elapsed f!(C, A, B))
75+
t3 = @elapsed f!(C, A, B)
76+
tret = summarystat((t0, t1, t2, t3))
77+
else
78+
tret = summarystat((t0, t1, t2))
7079
end
7180
end
72-
tmin
81+
return tret
7382
end
7483
_mat_size(M, N, ::typeof(adjoint)) = (N, M)
7584
_mat_size(M, N, ::typeof(transpose)) = (N, M)
@@ -79,7 +88,6 @@ function alloc_mat(_M, _N, memory::Vector{T}, off, f = identity) where {T}
7988
A = f(reshape(view(memory, (off+1):(off+M*N)), (M, N)))
8089
A, off + align(M*N, T)
8190
end
82-
8391
matmul_sizes(s::Integer) = (s,s,s)
8492
matmul_sizes(mkn::Tuple{Vararg{Integer,3}}) = mkn
8593
matmul_length(s) = prod(matmul_sizes(s))
@@ -174,14 +182,35 @@ function default_libs(::Type{T}) where {T}
174182
end
175183
end
176184

185+
186+
177187
"""
178188
runbench(T = Float64;
179189
libs = default_libs(T),
180190
sizes = logspace(2, 4000, 200),
181191
threaded::Bool = Threads.nthreads() > 1,
182192
A_transform = identity,
183193
B_transform = identity,
184-
sleep_time = 0.0)
194+
sleep_time = 0.0,
195+
summarystat = median)
196+
197+
- T: The element type of the matrices.
198+
- libs: Libraries to benchmark.
199+
- sizes: Sizes of matrices to benchmark. Must be an iterable with either
200+
`eltype(sizes) === Int` or `eltype(sizes) === NTuple{3,Int}`.
201+
If the former, the matrices are square, with each dimension equal to the value.
202+
If `i::NTuple{3,Int}`, it benchmarks `C = A * B` where `A` is `i[1]` by `i[2]`,
203+
`B` is `i[2]` by `i[3]` and `C` is `i[1]` by `i[3]`.
204+
- threaded: Should it benchmark multithreaded implementations?
205+
- A_transform: a function to apply to `A`. Defaults to `identity`, but can be `adjoint`.
206+
- B_transofrm: a function to apply to `B`. Defaults to `identity`, but can be `adjoint`.
207+
- sleep_time: The use of this keyword argument is discouraged. If set, it will call `sleep`
208+
in between benchmarks, the idea being to help keep the CPU cool. This is an unreliable
209+
means of trying to get more reliable benchmarks. Instead, it's reccommended you disable
210+
your systems turbo. Disabling it -- and reenabling when you're done benchmarking --
211+
should be possible without requiring a reboot.
212+
- summarystat: Which summary statistic should be reported? Defaults to `minimum`
213+
185214
"""
186215
function runbench(
187216
::Type{T} = Float64;
@@ -190,7 +219,8 @@ function runbench(
190219
threaded::Bool = Threads.nthreads() > 1,
191220
A_transform = identity,
192221
B_transform = identity,
193-
sleep_time = 0.0
222+
sleep_time = 0.0,
223+
summarystat = minimum
194224
) where {T}
195225
if threaded
196226
mkl_set_num_threads(num_cores())
@@ -230,7 +260,7 @@ function runbench(
230260
for i eachindex(funcs)
231261
C, ref = i == 1 ? (C0, nothing) : (fill!(C1,junk(T)), C0)
232262
t = benchmark_fun!(
233-
funcs[i], C, A, B, sleep_time, force_belapsed, ref
263+
funcs[i], summarystat, C, A, B, sleep_time, force_belapsed, ref
234264
)
235265
gflops = 2e-9M*K*N / t
236266
times[j,i] = t

test/interface.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import BLASBenchmarksCPU
33
import StatsPlots
44
@testset "Interface" begin
5-
benchmark_result = BLASBenchmarksCPU.runbench(Float64; sizes = [1, 2, 5, 10, 20, 50, 100, 200], threaded=false) #test that threads=false at least doesn't throw somewhere.
5+
benchmark_result = BLASBenchmarksCPU.runbench(Float64; sizes = [1, 2, 5, 10, 20, 50, 100, 200], threaded=false, summarystat = BLASBenchmarksCPU.median) #test that threads=false at least doesn't throw somewhere.
66
df = BLASBenchmarksCPU.benchmark_result_df(benchmark_result)
77
@test df isa BLASBenchmarksCPU.DataFrame
88
df[!, :Size] = Float64.(df[!, :Size]);

0 commit comments

Comments
 (0)