@@ -53,23 +53,32 @@ function maybe_sleep(x)
53
53
end
54
54
55
55
function benchmark_fun! (
56
- f!:: F , C, A, B, sleep_time, force_belapsed = false , reference = nothing
56
+ f!:: F , summarystat, C, A, B, sleep_time, force_belapsed = false , reference = nothing
57
57
) where {F}
58
58
maybe_sleep (sleep_time)
59
- tmin = @elapsed f! (C, A, B)
59
+ t0 = @elapsed f! (C, A, B)
60
60
isnothing (reference) || @assert C ≈ reference
61
- if force_belapsed || 2 tmin < BenchmarkTools. DEFAULT_PARAMETERS. seconds
61
+ if force_belapsed || 2 t0 < BenchmarkTools. DEFAULT_PARAMETERS. seconds
62
62
maybe_sleep (sleep_time)
63
- tmin = min (tmin, @belapsed $ f! ($ C, $ A, $ B))
64
- else # if tmin < BenchmarkTools.DEFAULT_PARAMETERS.seconds
63
+ br = @benchmark $ f! ($ C, $ A, $ B)
64
+ tret = summarystat (br). time
65
+ if summarystat === minimum # don't want to do this for `median` or `mean`, for example
66
+ tret = min (tret, t0)
67
+ end
68
+ else
65
69
maybe_sleep (sleep_time)
66
- tmin = min (tmin, @elapsed f! (C, A, B))
67
- if tmin < 2 BenchmarkTools. DEFAULT_PARAMETERS. seconds
70
+ t1 = @elapsed f! (C, A, B)
71
+ maybe_sleep (sleep_time)
72
+ t2 = @elapsed f! (C, A, B)
73
+ if (t0+ t1) < 4 BenchmarkTools. DEFAULT_PARAMETERS. seconds
68
74
maybe_sleep (sleep_time)
69
- tmin = min (tmin, @elapsed f! (C, A, B))
75
+ t3 = @elapsed f! (C, A, B)
76
+ tret = summarystat ((t0, t1, t2, t3))
77
+ else
78
+ tret = summarystat ((t0, t1, t2))
70
79
end
71
80
end
72
- tmin
81
+ return tret
73
82
end
74
83
_mat_size (M, N, :: typeof (adjoint)) = (N, M)
75
84
_mat_size (M, N, :: typeof (transpose)) = (N, M)
@@ -79,7 +88,6 @@ function alloc_mat(_M, _N, memory::Vector{T}, off, f = identity) where {T}
79
88
A = f (reshape (view (memory, (off+ 1 ): (off+ M* N)), (M, N)))
80
89
A, off + align (M* N, T)
81
90
end
82
-
83
91
matmul_sizes (s:: Integer ) = (s,s,s)
84
92
matmul_sizes (mkn:: Tuple{Vararg{Integer,3}} ) = mkn
85
93
matmul_length (s) = prod (matmul_sizes (s))
@@ -174,14 +182,35 @@ function default_libs(::Type{T}) where {T}
174
182
end
175
183
end
176
184
185
+
186
+
177
187
"""
178
188
runbench(T = Float64;
179
189
libs = default_libs(T),
180
190
sizes = logspace(2, 4000, 200),
181
191
threaded::Bool = Threads.nthreads() > 1,
182
192
A_transform = identity,
183
193
B_transform = identity,
184
- sleep_time = 0.0)
194
+ sleep_time = 0.0,
195
+ summarystat = median)
196
+
197
+ - T: The element type of the matrices.
198
+ - libs: Libraries to benchmark.
199
+ - sizes: Sizes of matrices to benchmark. Must be an iterable with either
200
+ `eltype(sizes) === Int` or `eltype(sizes) === NTuple{3,Int}`.
201
+ If the former, the matrices are square, with each dimension equal to the value.
202
+ If `i::NTuple{3,Int}`, it benchmarks `C = A * B` where `A` is `i[1]` by `i[2]`,
203
+ `B` is `i[2]` by `i[3]` and `C` is `i[1]` by `i[3]`.
204
+ - threaded: Should it benchmark multithreaded implementations?
205
+ - A_transform: a function to apply to `A`. Defaults to `identity`, but can be `adjoint`.
206
+ - B_transofrm: a function to apply to `B`. Defaults to `identity`, but can be `adjoint`.
207
+ - sleep_time: The use of this keyword argument is discouraged. If set, it will call `sleep`
208
+ in between benchmarks, the idea being to help keep the CPU cool. This is an unreliable
209
+ means of trying to get more reliable benchmarks. Instead, it's reccommended you disable
210
+ your systems turbo. Disabling it -- and reenabling when you're done benchmarking --
211
+ should be possible without requiring a reboot.
212
+ - summarystat: Which summary statistic should be reported? Defaults to `minimum`
213
+
185
214
"""
186
215
function runbench (
187
216
:: Type{T} = Float64;
@@ -190,7 +219,8 @@ function runbench(
190
219
threaded:: Bool = Threads. nthreads () > 1 ,
191
220
A_transform = identity,
192
221
B_transform = identity,
193
- sleep_time = 0.0
222
+ sleep_time = 0.0 ,
223
+ summarystat = minimum
194
224
) where {T}
195
225
if threaded
196
226
mkl_set_num_threads (num_cores ())
@@ -230,7 +260,7 @@ function runbench(
230
260
for i ∈ eachindex (funcs)
231
261
C, ref = i == 1 ? (C0, nothing ) : (fill! (C1,junk (T)), C0)
232
262
t = benchmark_fun! (
233
- funcs[i], C, A, B, sleep_time, force_belapsed, ref
263
+ funcs[i], summarystat, C, A, B, sleep_time, force_belapsed, ref
234
264
)
235
265
gflops = 2e-9 M* K* N / t
236
266
times[j,i] = t
0 commit comments