1
1
struct BenchmarkResult{T,I<: Union{Int,NTuple{3,Int}} }
2
2
libraries:: Vector{Symbol}
3
3
sizes:: Vector{I}
4
- gflops:: Matrix {Float64}
5
- times:: Matrix {Float64}
4
+ gflops:: Array {Float64,3 }
5
+ times:: Array {Float64,3 }
6
6
threaded:: Bool
7
+ function BenchmarkResult {T} (libraries, sizes, gflops, times, threaded) where {T}
8
+ gflopsperm = permutedims (gflops, (2 ,3 ,1 ))
9
+ timesperm = permutedims (times, (2 ,3 ,1 ))
10
+ I = eltype (sizes)
11
+ new {T,I} (libraries, convert (Vector{I},sizes), gflopsperm, timesperm, threaded)
12
+ end
7
13
end
8
14
9
15
"""
@@ -13,23 +19,37 @@ function benchmark_result_type(::BenchmarkResult{T}) where {T}
13
19
return T
14
20
end
15
21
16
- function _benchmark_result_df (sizes, libraries, mat)
22
+ function get_measure_index (measure:: Symbol ):: Int
23
+ j = findfirst (== (measure), (:minimum ,:median ,:mean ,:maximum ,:hmean ))
24
+ if j === nothing
25
+ throw (ArgumentError (" `measure` argument must be one of (:minimum,:median,:mean,:maximum,:hmean), but was $(repr (measure)) ." ))
26
+ end
27
+ return j
28
+ end
29
+ function _benchmark_result_df (sizes, libraries, mat, measure)
30
+ j = get_measure_index (measure)
17
31
df = DataFrame (Size = sizes)
18
32
for i ∈ eachindex (libraries)
19
- setproperty! (df, libraries[i], mat[:,i])
33
+ setproperty! (df, libraries[i], mat[:,i,j ])
20
34
end
21
35
return df
22
36
end
23
- function _benchmark_result_df (br:: BenchmarkResult , s:: Symbol = :gflops )
24
- _benchmark_result_df (br. sizes, br. libraries, getproperty (br, s))
37
+ function _benchmark_result_df (br:: BenchmarkResult , s:: Symbol = :gflops , measure = :minimum )
38
+ _benchmark_result_df (br. sizes, br. libraries, getproperty (br, s), measure )
25
39
end
26
40
27
41
28
42
"""
29
- benchmark_result_df(benchmark_result::BenchmarkResult)
43
+ benchmark_result_df(benchmark_result::BenchmarkResult, `measure` = :minimum)
44
+
45
+ `measure` refers to the BenchmarkTools summary on times. Valid options are:
46
+ `:minimum`, `:medain`, `:mean`, `:maximum`, and `:hmean`.
47
+
48
+ - `:minimum` would yield the maximum `GFLOPS`, and would be the usual estimate used in Julia.
49
+ - `:hmean`, the harmonic mean of the times, is usful if you want an average GFLOPS, instead of a GFLOPS computed with the average times.
30
50
"""
31
- function benchmark_result_df (benchmark_result:: BenchmarkResult )
32
- df = _benchmark_result_df (benchmark_result, :times )
51
+ function benchmark_result_df (benchmark_result:: BenchmarkResult , measure = :minimum )
52
+ df = _benchmark_result_df (benchmark_result, :times , measure )
33
53
df = stack (df, Not (:Size ), variable_name = :Library , value_name = :Seconds )
34
54
df. GFLOPS = @. 2e-9 * matmul_length (df. Size) ./ df. Seconds
35
55
return df
@@ -61,10 +81,11 @@ function benchmark_fun!(
61
81
if force_belapsed || 2 t0 < BenchmarkTools. DEFAULT_PARAMETERS. seconds
62
82
maybe_sleep (sleep_time)
63
83
br = @benchmark $ f! ($ C, $ A, $ B)
64
- tret = summarystat (br). time
65
- if summarystat === minimum # don't want to do this for `median` or `mean`, for example
66
- tret = min (tret, t0)
67
- end
84
+ tmin = min (1e-9 minimum (br). time, t0)
85
+ tmedian = 1e-9 median (br). time
86
+ tmean = 1e-9 mean (br). time
87
+ tmax = 1e-9 maximum (br). time # We'll exclude the first for this...
88
+ thmean⁻¹ = 1e9 mean (inv, br. times)
68
89
else
69
90
maybe_sleep (sleep_time)
70
91
t1 = @elapsed f! (C, A, B)
@@ -73,12 +94,20 @@ function benchmark_fun!(
73
94
if (t0+ t1) < 4 BenchmarkTools. DEFAULT_PARAMETERS. seconds
74
95
maybe_sleep (sleep_time)
75
96
t3 = @elapsed f! (C, A, B)
76
- tret = summarystat ((t0, t1, t2, t3))
97
+ tmin = minimum ((t0, t1, t2, t3))
98
+ tmedian = median ((t0, t1, t2, t3))
99
+ tmean = mean ((t0, t1, t2, t3))
100
+ tmax = maximum ((t0, t1, t2, t3))
101
+ thmean⁻¹ = mean (inv, (t0, t1, t2, t3))
77
102
else
78
- tret = summarystat ((t0, t1, t2))
103
+ tmin = minimum ((t0, t1, t2))
104
+ tmedian = median ((t0, t1, t2))
105
+ tmean = mean ((t0, t1, t2))
106
+ tmax = maximum ((t0, t1, t2))
107
+ thmean⁻¹ = mean (inv, (t0, t1, t2))
79
108
end
80
109
end
81
- return tret
110
+ return tmin, tmedian, tmean, tmax, thmean⁻¹
82
111
end
83
112
_mat_size (M, N, :: typeof (adjoint)) = (N, M)
84
113
_mat_size (M, N, :: typeof (transpose)) = (N, M)
191
220
threaded::Bool = Threads.nthreads() > 1,
192
221
A_transform = identity,
193
222
B_transform = identity,
194
- sleep_time = 0.0,
195
- summarystat = median)
223
+ sleep_time = 0.0)
196
224
197
225
- T: The element type of the matrices.
198
226
- libs: Libraries to benchmark.
207
235
- sleep_time: The use of this keyword argument is discouraged. If set, it will call `sleep`
208
236
in between benchmarks, the idea being to help keep the CPU cool. This is an unreliable
209
237
means of trying to get more reliable benchmarks. Instead, it's reccommended you disable
210
- your systems turbo. Disabling it -- and reenabling when you're done benchmarking --
238
+ your systems turbo. Disabling it -- and reenabling when you're done benchmarking --
211
239
should be possible without requiring a reboot.
212
- - summarystat: Which summary statistic should be reported? Defaults to `minimum`
213
240
214
241
"""
215
242
function runbench (
@@ -241,14 +268,15 @@ function runbench(
241
268
end
242
269
memory = Vector {T} (undef, max_matrix_sizes)
243
270
library = reduce (vcat, (libs for _ ∈ eachindex (sizevec)))
244
- times = Matrix {Float64} (undef, length (sizes), length (libs))
271
+ times = Array {Float64} (undef, 5 , length (sizes), length (libs))
245
272
gflop = similar (times);
246
273
k = 0
247
274
248
275
force_belapsed = true # force when compiling
249
276
250
277
p = Progress (length (sizes))
251
- last_perfs = Vector{Tuple{Symbol,Union{Float64,NTuple{3 ,Int}}}}(undef, length (libs)+ 1 )
278
+ gflop_report_type = NamedTuple{(:MedianGFLOPS , :MaxGFLOPS ), Tuple{Float64, Float64}}
279
+ last_perfs = Vector{Tuple{Symbol,Union{gflop_report_type,NTuple{3 ,Int}}}}(undef, length (libs)+ 1 )
252
280
for (j,s) ∈ enumerate (sizevec)
253
281
M, K, N = matmul_sizes (s)
254
282
A, off = alloc_mat (M, K, memory, 0 , A_transform)
@@ -262,13 +290,22 @@ function runbench(
262
290
t = benchmark_fun! (
263
291
funcs[i], summarystat, C, A, B, sleep_time, force_belapsed, ref
264
292
)
265
- gflops = 2e-9 M* K* N / t
266
- times[j,i] = t
267
- gflop[j,i] = gflops
268
- last_perfs[i+ 1 ] = (libs[i], round (gflops,sigdigits= 4 ))
293
+ gffactor = 2e-9 M* K* N
294
+ @inbounds for k ∈ 1 : 4
295
+ times[k,j,i] = t[k]
296
+ gflop[k,j,i] = gffactor / t[k]
297
+ end
298
+ times[5 ,j,i] = inv (t[5 ])
299
+ gflop[5 ,j,i] = gffactor * t[5 ]
300
+ gflops = round .((gflop[1 ,j,i], gflop[2 ,j,i]), sigdigits = 4 )
301
+ gflops = (
302
+ MedianGFLOPS = round (gflop[2 ,j,i], sigdigits = 4 ),
303
+ MaxGFLOPS = round (gflop[1 ,j,i], sigdigits = 4 )
304
+ )
305
+ last_perfs[i+ 1 ] = (libs[i], gflops)
269
306
end
270
307
ProgressMeter. next! (p; showvalues = last_perfs)
271
308
force_belapsed = false
272
309
end
273
- BenchmarkResult {T,eltype(sizes) } (libs, sizes, gflop, times, threaded)
310
+ BenchmarkResult {T} (libs, sizes, gflop, times, threaded)
274
311
end
0 commit comments