Skip to content

Commit 134ee6f

Browse files
committed
Updates to benchmarks.
1 parent f09c753 commit 134ee6f

28 files changed

+469
-184
lines changed

benchmark/benchmarkflops.jl

Lines changed: 85 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,10 @@ function matmul_bench!(br, C, A, B, i)
5252
@assert C Cblas "eigen gemm wrong?"; fill!(C, NaN)
5353
br[10,i] = n_gflop / @belapsed iegemm!($C, $A, $B)
5454
@assert C Cblas "i-eigen gemm wrong?"; fill!(C, NaN)
55-
br[11,i] = n_gflop / @belapsed dgemmjit!($C, $A, $B)
56-
@assert C Cblas "MKL JIT gemm wrong?"; fill!(C, NaN)
57-
br[12,i] = n_gflop / @belapsed gemmavx!($C, $A, $B)
55+
# br[11,i] = n_gflop / @belapsed dgemmjit!($C, $A, $B)
56+
# @assert C ≈ Cblas "MKL JIT gemm wrong?"; fill!(C, NaN)
57+
# br[12,i] = n_gflop / @belapsed gemmavx!($C, $A, $B)
58+
br[end,i] = n_gflop / @belapsed gemmavx!($C, $A, $B)
5859
@assert C Cblas "LoopVec gemm wrong?"
5960
end
6061
function A_mul_B_bench!(br, s, i)
@@ -93,35 +94,36 @@ function At_mul_Bt_bench!(br, s, i)
9394
matmul_bench!(br, C, A, B, i)
9495
end
9596

96-
const BLASTESTS = [
97+
blastests() = [
9798
BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS",
9899
"Julia", "Clang-Polly",
99100
"GFortran", "GFort-intrinsic",
100101
"icc", "ifort", "ifort-intrinsic",
101-
"Clang++ & Eigen-3", "icpc & Eigen-3",
102-
"MKL JIT", "LoopVectorization"
102+
"g++ & Eigen-3", "icpc & Eigen-3",
103+
"LoopVectorization"
104+
# "MKL JIT", "LoopVectorization"
103105
]
104106

105107
function benchmark_AmulB(sizes)
106-
br = BenchmarkResult(BLASTESTS, sizes)
108+
br = BenchmarkResult(blastests(), sizes)
107109
sm = br.sizedresults.results
108110
pmap(is -> A_mul_B_bench!(sm, is[2], is[1]), enumerate(sizes))
109111
br
110112
end
111113
function benchmark_AmulBt(sizes)
112-
br = BenchmarkResult(BLASTESTS, sizes)
114+
br = BenchmarkResult(blastests(), sizes)
113115
sm = br.sizedresults.results
114116
pmap(is -> A_mul_Bt_bench!(sm, is[2], is[1]), enumerate(sizes))
115117
br
116118
end
117119
function benchmark_AtmulB(sizes)
118-
br = BenchmarkResult(BLASTESTS, sizes)
120+
br = BenchmarkResult(blastests(), sizes)
119121
sm = br.sizedresults.results
120122
pmap(is -> At_mul_B_bench!(sm, is[2], is[1]), enumerate(sizes))
121123
br
122124
end
123125
function benchmark_AtmulBt(sizes)
124-
br = BenchmarkResult(BLASTESTS, sizes)
126+
br = BenchmarkResult(blastests(), sizes)
125127
sm = br.sizedresults.results
126128
pmap(is -> At_mul_Bt_bench!(sm, is[2], is[1]), enumerate(sizes))
127129
br
@@ -150,7 +152,7 @@ function dot_bench!(br, s, i)
150152
@assert jdotavx(a,b) dotblas "LoopVec dot wrong?"
151153
end
152154
function benchmark_dot(sizes)
153-
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "icc", "ifort", "Clang++ & Eigen-3", "icpc & Eigen-3", "LoopVectorization"]
155+
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "icc", "ifort", "g++ & Eigen-3", "icpc & Eigen-3", "LoopVectorization"]
154156
br = BenchmarkResult(tests, sizes)
155157
sm = br.sizedresults.results
156158
pmap(is -> dot_bench!(sm, is[2], is[1]), enumerate(sizes))
@@ -179,7 +181,7 @@ function selfdot_bench!(br, s, i)
179181
@assert jselfdotavx(a) dotblas "LoopVec dot wrong?"
180182
end
181183
function benchmark_selfdot(sizes)
182-
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "icc", "ifort", "Clang++ & Eigen-3", "icpc & Eigen-3", "LoopVectorization"]
184+
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "icc", "ifort", "g++ & Eigen-3", "icpc & Eigen-3", "LoopVectorization"]
183185
br = BenchmarkResult(tests, sizes)
184186
sm = br.sizedresults.results
185187
pmap(is -> selfdot_bench!(sm, is[2], is[1]), enumerate(sizes))
@@ -211,9 +213,9 @@ function gemv_bench!(br, x, A, y, i)
211213
@assert x xblas "eigen wrong?"; fill!(x, NaN);
212214
br[10,i] = n_gflop / @belapsed iegemv!($x, $A, $y)
213215
@assert x xblas "i-eigen wrong?"; fill!(x, NaN);
214-
br[11,i] = n_gflop / @belapsed dgemmjit!($x, $A, $y)
215-
@assert x xblas "gemmjit wrong?"; fill!(x, NaN);
216-
br[12,i] = n_gflop / @belapsed jgemvavx!($x, $A, $y)
216+
# br[11,i] = n_gflop / @belapsed dgemmjit!($x, $A, $y)
217+
# @assert x ≈ xblas "gemmjit wrong?"; fill!(x, NaN);
218+
br[end,i] = n_gflop / @belapsed jgemvavx!($x, $A, $y)
217219
@assert x xblas "LoopVec wrong?"
218220
end
219221
function A_mul_vb_bench!(br, s, i)
@@ -231,13 +233,13 @@ function At_mul_vb_bench!(br, s, i)
231233
gemv_bench!(br, x, A, y, i)
232234
end
233235
function benchmark_Amulvb(sizes)
234-
br = BenchmarkResult(BLASTESTS, sizes)
236+
br = BenchmarkResult(blastests(), sizes)
235237
sm = br.sizedresults.results
236238
pmap(is -> A_mul_vb_bench!(sm, is[2], is[1]), enumerate(sizes))
237239
br
238240
end
239241
function benchmark_Atmulvb(sizes)
240-
br = BenchmarkResult(BLASTESTS, sizes)
242+
br = BenchmarkResult(blastests(), sizes)
241243
sm = br.sizedresults.results
242244
pmap(is -> At_mul_vb_bench!(sm, is[2], is[1]), enumerate(sizes))
243245
br
@@ -267,7 +269,7 @@ function dot3_bench!(br, s, i)
267269
@assert jdot3avx(x, A, y) dotblas "LoopVec dot wrong?"
268270
end
269271
function benchmark_dot3(sizes)
270-
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "icc", "ifort", "Clang++ & Eigen-3", "icpc & Eigen-3", "LoopVectorization"]
272+
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "icc", "ifort", "g++ & Eigen-3", "icpc & Eigen-3", "LoopVectorization"]
271273
br = BenchmarkResult(tests, sizes)
272274
sm = br.sizedresults.results
273275
pmap(is -> dot3_bench!(sm, is[2], is[1]), enumerate(sizes))
@@ -306,7 +308,7 @@ function sse_bench!(br, s, i)
306308
@assert jOLSlp_avx(y, X, β) lpblas "LoopVec wrong?"
307309
end
308310
function benchmark_sse(sizes)
309-
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "icc", "ifort", "Clang++ & Eigen-3", "icpc & Eigen-3", "LoopVectorization"]
311+
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "icc", "ifort", "g++ & Eigen-3", "icpc & Eigen-3", "LoopVectorization"]
310312
br = BenchmarkResult(tests, sizes)
311313
sm = br.sizedresults.results
312314
pmap(is -> sse_bench!(sm, is[2], is[1]), enumerate(sizes))
@@ -360,7 +362,7 @@ function aplusBc_bench!(br, s, i)
360362
@assert D Dcopy "LoopVec wrong?"
361363
end
362364
function benchmark_aplusBc(sizes)
363-
tests = ["Julia", "Clang-Polly", "GFortran", "icc", "ifort", "Clang++ & Eigen-3", "icpc & Eigen-3", "LoopVectorization"]
365+
tests = ["Julia", "Clang-Polly", "GFortran", "icc", "ifort", "g++ & Eigen-3", "icpc & Eigen-3", "LoopVectorization"]
364366
br = BenchmarkResult(tests, sizes)
365367
sm = br.sizedresults.results
366368
pmap(is -> aplusBc_bench!(sm, is[2], is[1]), enumerate(sizes))
@@ -392,7 +394,7 @@ function AplusAt_bench!(br, s, i)
392394
@assert B baseB "LoopVec wrong?"
393395
end
394396
function benchmark_AplusAt(sizes)
395-
tests = ["Julia", "Clang-Polly", "GFortran", "GFortran-builtin", "icc", "ifort", "ifort-builtin", "Clang++ & Eigen-3", "icpc & Eigen-3", "LoopVectorization"]
397+
tests = ["Julia", "Clang-Polly", "GFortran", "GFortran-builtin", "icc", "ifort", "ifort-builtin", "g++ & Eigen-3", "icpc & Eigen-3", "LoopVectorization"]
396398
br = BenchmarkResult(tests, sizes)
397399
sm = br.sizedresults.results
398400
pmap(is -> AplusAt_bench!(sm, is[2], is[1]), enumerate(sizes))
@@ -453,3 +455,65 @@ function benchmark_logdettriangle(sizes)
453455
br
454456
end
455457

458+
459+
function filter2d_bench_run!(br, s, i, K)
460+
A = rand(s + 2, s + 2)
461+
B = OffsetArray(similar(A, (s,s)), 1, 1)
462+
Mk, Nk = size(K)
463+
n_gflop = 1e-9 * (2Mk * Nk - 1) * s^2
464+
br[1,i] = n_gflop / @belapsed filter2d!($B, $A, $K)
465+
Bcopy = copy(B); fill!(B, NaN);
466+
br[2,i] = n_gflop / @belapsed cfilter2d!($B, $A, $K)
467+
@assert B Bcopy "Clang wrong?"
468+
br[3,i] = n_gflop / @belapsed ffilter2d!($B, $A, $K)
469+
@assert B Bcopy "Fort wrong?"
470+
br[4,i] = n_gflop / @belapsed icfilter2d!($B, $A, $K)
471+
@assert B Bcopy "icc wrong?"
472+
br[5,i] = n_gflop / @belapsed iffilter2d!($B, $A, $K)
473+
@assert B Bcopy "ifort wrong?"
474+
br[6,i] = n_gflop / @belapsed filter2davx!($B, $A, $K)
475+
@assert B Bcopy "LoopVec wrong?"
476+
end
477+
function benchmark_filter2d(sizes, K)
478+
tests = ["Julia", "Clang-Polly", "GFortran", "icc", "ifort", "LoopVectorization"]
479+
br = BenchmarkResult(tests, sizes)
480+
sm = br.sizedresults.results
481+
pmap(is -> filter2d_bench_run!(sm, is[2], is[1], K), enumerate(sizes))
482+
br
483+
end
484+
485+
function benchmark_filter2ddynamic(sizes)
486+
K = OffsetArray(rand(Float64, 3, 3), -1:1, -1:1)
487+
benchmark_filter2d(sizes, K)
488+
end
489+
function benchmark_filter2d3x3(sizes)
490+
K = SizedOffsetMatrix{Float64,-1,1,-1,1}(rand(3,3))
491+
benchmark_filter2d(sizes, K)
492+
end
493+
494+
function filter2dunrolled_bench_run!(br, s, i, K)
495+
A = rand(s + 2, s + 2)
496+
B = OffsetArray(similar(A, (s,s)), 1, 1)
497+
Mk, Nk = size(K)
498+
n_gflop = 1e-9 * (2Mk * Nk - 1) * s^2
499+
br[1,i] = n_gflop / @belapsed filter2dunrolled!($B, $A, $K)
500+
Bcopy = copy(B); fill!(B, NaN);
501+
br[2,i] = n_gflop / @belapsed cfilter2dunrolled!($B, $A, $K)
502+
@assert B Bcopy "Clang wrong?"
503+
br[3,i] = n_gflop / @belapsed ffilter2dunrolled!($B, $A, $K)
504+
@assert B Bcopy "Fort wrong?"
505+
br[4,i] = n_gflop / @belapsed icfilter2dunrolled!($B, $A, $K)
506+
@assert B Bcopy "icc wrong?"
507+
br[5,i] = n_gflop / @belapsed iffilter2dunrolled!($B, $A, $K)
508+
@assert B Bcopy "ifort wrong?"
509+
br[6,i] = n_gflop / @belapsed filter2dunrolledavx!($B, $A, $K)
510+
@assert B Bcopy "LoopVec wrong?"
511+
end
512+
function benchmark_filter2dunrolled(sizes)
513+
tests = ["Julia", "Clang-Polly", "GFortran", "icc", "ifort", "LoopVectorization"]
514+
br = BenchmarkResult(tests, sizes)
515+
sm = br.sizedresults.results
516+
K = SizedOffsetMatrix{Float64,-1,1,-1,1}(rand(3,3))
517+
pmap(is -> filter2dunrolled_bench_run!(sm, is[2], is[1], K), enumerate(sizes))
518+
br
519+
end

benchmark/driver.jl

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22
# const LOOPVECBENCHDIR = joinpath(pkgdir("LoopVectorization"), "benchmarks")
33
# includet(joinpath(LOOPVECBENCHDIR, "driver.jl"))
44

5+
using Distributed
6+
57
pkgdir(pkg::String) = abspath(joinpath(dirname(Base.find_package(pkg)), ".."))
68
const LOOPVECBENCHDIR = joinpath(pkgdir("LoopVectorization"), "benchmark")
79
include(joinpath(LOOPVECBENCHDIR, "benchmarkflops.jl"))
810
include(joinpath(LOOPVECBENCHDIR, "plotbenchmarks.jl"))
911

1012

11-
using Distributed
12-
1313
addprocs((Sys.CPU_THREADS >> 1)-1); nprocs()
1414

1515
@everywhere begin
@@ -19,25 +19,36 @@ addprocs((Sys.CPU_THREADS >> 1)-1); nprocs()
1919
# BenchmarkTools.DEFAULT_PARAMETERS.seconds = 1
2020
end
2121

22-
AmulB_bench = benchmark_AmulB(2:256)
23-
AmulBt_bench = benchmark_AmulBt(2:256)
24-
AtmulB_bench = benchmark_AtmulB(2:256)
25-
AtmulBt_bench = benchmark_AtmulBt(2:256)
26-
dot_bench = benchmark_dot(2:256)
27-
selfdot_bench = benchmark_selfdot(2:256)
28-
Amulvb_bench = benchmark_Amulvb(2:256)
29-
Atmulvb_bench = benchmark_Atmulvb(2:256)
30-
dot3_bench = benchmark_dot3(2:256)
31-
sse_bench = benchmark_sse(2:256)
32-
aplusBc_bench = benchmark_aplusBc(2:256)
33-
AplusAt_bench = benchmark_AplusAt(2:256)
34-
exp_bench = benchmark_exp(2:256)
35-
randomaccess_bench = benchmark_random_access(2:256)
36-
logdettriangle_bench = benchmark_logdettriangle(2:256)
22+
23+
# sizes = 23:23
24+
sizes = 256:-1:2
25+
26+
filter2d_dynamic_bench = benchmark_filter2ddynamic(sizes)
27+
filter2d_3x3_bench = benchmark_filter2d3x3(sizes)
28+
filter2d_unrolled_bench = benchmark_filter2dunrolled(sizes)
29+
30+
AmulB_bench = benchmark_AmulB(sizes)
31+
AmulBt_bench = benchmark_AmulBt(sizes)
32+
AtmulB_bench = benchmark_AtmulB(sizes)
33+
AtmulBt_bench = benchmark_AtmulBt(sizes)
34+
dot_bench = benchmark_dot(sizes)
35+
selfdot_bench = benchmark_selfdot(sizes)
36+
Amulvb_bench = benchmark_Amulvb(sizes)
37+
Atmulvb_bench = benchmark_Atmulvb(sizes)
38+
dot3_bench = benchmark_dot3(sizes)
39+
sse_bench = benchmark_sse(sizes)
40+
aplusBc_bench = benchmark_aplusBc(sizes)
41+
AplusAt_bench = benchmark_AplusAt(sizes)
42+
exp_bench = benchmark_exp(sizes)
43+
randomaccess_bench = benchmark_random_access(sizes)
44+
logdettriangle_bench = benchmark_logdettriangle(sizes)
3745

3846
v = 1
3947
filetype = "svg"
4048
const PICTURES = joinpath(pkgdir("LoopVectorization"), "docs", "src", "assets")
49+
save(joinpath(PICTURES, "bench_filter2d_dynamic_v$v.$filetype"), plot(filter2d_dynamic_bench));
50+
save(joinpath(PICTURES, "bench_filter2d_3x3_v$v.$filetype"), plot(filter2d_3x3_bench));
51+
save(joinpath(PICTURES, "bench_filter2d_unrolled_v$v.$filetype"), plot(filter2d_unrolled_bench));
4152
save(joinpath(PICTURES, "bench_AmulB_v$v.$filetype"), plot(AmulB_bench));
4253
save(joinpath(PICTURES, "bench_AmulBt_v$v.$filetype"), plot(AmulBt_bench));
4354
save(joinpath(PICTURES, "bench_AtmulB_v$v.$filetype"), plot(AtmulB_bench));

0 commit comments

Comments
 (0)