@@ -52,9 +52,10 @@ function matmul_bench!(br, C, A, B, i)
52
52
@assert C ≈ Cblas " eigen gemm wrong?" ; fill! (C, NaN )
53
53
br[10 ,i] = n_gflop / @belapsed iegemm! ($ C, $ A, $ B)
54
54
@assert C ≈ Cblas " i-eigen gemm wrong?" ; fill! (C, NaN )
55
- br[11 ,i] = n_gflop / @belapsed dgemmjit! ($ C, $ A, $ B)
56
- @assert C ≈ Cblas " MKL JIT gemm wrong?" ; fill! (C, NaN )
57
- br[12 ,i] = n_gflop / @belapsed gemmavx! ($ C, $ A, $ B)
55
+ # br[11,i] = n_gflop / @belapsed dgemmjit!($C, $A, $B)
56
+ # @assert C ≈ Cblas "MKL JIT gemm wrong?"; fill!(C, NaN)
57
+ # br[12,i] = n_gflop / @belapsed gemmavx!($C, $A, $B)
58
+ br[end ,i] = n_gflop / @belapsed gemmavx! ($ C, $ A, $ B)
58
59
@assert C ≈ Cblas " LoopVec gemm wrong?"
59
60
end
60
61
function A_mul_B_bench! (br, s, i)
@@ -93,35 +94,36 @@ function At_mul_Bt_bench!(br, s, i)
93
94
matmul_bench! (br, C, A, B, i)
94
95
end
95
96
96
- const BLASTESTS = [
97
+ blastests () = [
97
98
BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" ,
98
99
" Julia" , " Clang-Polly" ,
99
100
" GFortran" , " GFort-intrinsic" ,
100
101
" icc" , " ifort" , " ifort-intrinsic" ,
101
- " Clang++ & Eigen-3" , " icpc & Eigen-3" ,
102
- " MKL JIT" , " LoopVectorization"
102
+ " g++ & Eigen-3" , " icpc & Eigen-3" ,
103
+ " LoopVectorization"
104
+ # "MKL JIT", "LoopVectorization"
103
105
]
104
106
105
107
function benchmark_AmulB (sizes)
106
- br = BenchmarkResult (BLASTESTS , sizes)
108
+ br = BenchmarkResult (blastests () , sizes)
107
109
sm = br. sizedresults. results
108
110
pmap (is -> A_mul_B_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
109
111
br
110
112
end
111
113
function benchmark_AmulBt (sizes)
112
- br = BenchmarkResult (BLASTESTS , sizes)
114
+ br = BenchmarkResult (blastests () , sizes)
113
115
sm = br. sizedresults. results
114
116
pmap (is -> A_mul_Bt_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
115
117
br
116
118
end
117
119
function benchmark_AtmulB (sizes)
118
- br = BenchmarkResult (BLASTESTS , sizes)
120
+ br = BenchmarkResult (blastests () , sizes)
119
121
sm = br. sizedresults. results
120
122
pmap (is -> At_mul_B_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
121
123
br
122
124
end
123
125
function benchmark_AtmulBt (sizes)
124
- br = BenchmarkResult (BLASTESTS , sizes)
126
+ br = BenchmarkResult (blastests () , sizes)
125
127
sm = br. sizedresults. results
126
128
pmap (is -> At_mul_Bt_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
127
129
br
@@ -150,7 +152,7 @@ function dot_bench!(br, s, i)
150
152
@assert jdotavx (a,b) ≈ dotblas " LoopVec dot wrong?"
151
153
end
152
154
function benchmark_dot (sizes)
153
- tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " Clang ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
155
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " g ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
154
156
br = BenchmarkResult (tests, sizes)
155
157
sm = br. sizedresults. results
156
158
pmap (is -> dot_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
@@ -179,7 +181,7 @@ function selfdot_bench!(br, s, i)
179
181
@assert jselfdotavx (a) ≈ dotblas " LoopVec dot wrong?"
180
182
end
181
183
function benchmark_selfdot (sizes)
182
- tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " Clang ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
184
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " g ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
183
185
br = BenchmarkResult (tests, sizes)
184
186
sm = br. sizedresults. results
185
187
pmap (is -> selfdot_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
@@ -211,9 +213,9 @@ function gemv_bench!(br, x, A, y, i)
211
213
@assert x ≈ xblas " eigen wrong?" ; fill! (x, NaN );
212
214
br[10 ,i] = n_gflop / @belapsed iegemv! ($ x, $ A, $ y)
213
215
@assert x ≈ xblas " i-eigen wrong?" ; fill! (x, NaN );
214
- br[11 ,i] = n_gflop / @belapsed dgemmjit! ($ x, $ A, $ y)
215
- @assert x ≈ xblas " gemmjit wrong?" ; fill! (x, NaN );
216
- br[12 ,i] = n_gflop / @belapsed jgemvavx! ($ x, $ A, $ y)
216
+ # br[11,i] = n_gflop / @belapsed dgemmjit!($x, $A, $y)
217
+ # @assert x ≈ xblas "gemmjit wrong?"; fill!(x, NaN);
218
+ br[end ,i] = n_gflop / @belapsed jgemvavx! ($ x, $ A, $ y)
217
219
@assert x ≈ xblas " LoopVec wrong?"
218
220
end
219
221
function A_mul_vb_bench! (br, s, i)
@@ -231,13 +233,13 @@ function At_mul_vb_bench!(br, s, i)
231
233
gemv_bench! (br, x, A, y, i)
232
234
end
233
235
function benchmark_Amulvb (sizes)
234
- br = BenchmarkResult (BLASTESTS , sizes)
236
+ br = BenchmarkResult (blastests () , sizes)
235
237
sm = br. sizedresults. results
236
238
pmap (is -> A_mul_vb_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
237
239
br
238
240
end
239
241
function benchmark_Atmulvb (sizes)
240
- br = BenchmarkResult (BLASTESTS , sizes)
242
+ br = BenchmarkResult (blastests () , sizes)
241
243
sm = br. sizedresults. results
242
244
pmap (is -> At_mul_vb_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
243
245
br
@@ -267,7 +269,7 @@ function dot3_bench!(br, s, i)
267
269
@assert jdot3avx (x, A, y) ≈ dotblas " LoopVec dot wrong?"
268
270
end
269
271
function benchmark_dot3 (sizes)
270
- tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " Clang ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
272
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " g ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
271
273
br = BenchmarkResult (tests, sizes)
272
274
sm = br. sizedresults. results
273
275
pmap (is -> dot3_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
@@ -306,7 +308,7 @@ function sse_bench!(br, s, i)
306
308
@assert jOLSlp_avx (y, X, β) ≈ lpblas " LoopVec wrong?"
307
309
end
308
310
function benchmark_sse (sizes)
309
- tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " Clang ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
311
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " g ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
310
312
br = BenchmarkResult (tests, sizes)
311
313
sm = br. sizedresults. results
312
314
pmap (is -> sse_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
@@ -360,7 +362,7 @@ function aplusBc_bench!(br, s, i)
360
362
@assert D ≈ Dcopy " LoopVec wrong?"
361
363
end
362
364
function benchmark_aplusBc (sizes)
363
- tests = [" Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " Clang ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
365
+ tests = [" Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " g ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
364
366
br = BenchmarkResult (tests, sizes)
365
367
sm = br. sizedresults. results
366
368
pmap (is -> aplusBc_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
@@ -392,7 +394,7 @@ function AplusAt_bench!(br, s, i)
392
394
@assert B ≈ baseB " LoopVec wrong?"
393
395
end
394
396
function benchmark_AplusAt (sizes)
395
- tests = [" Julia" , " Clang-Polly" , " GFortran" , " GFortran-builtin" , " icc" , " ifort" , " ifort-builtin" , " Clang ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
397
+ tests = [" Julia" , " Clang-Polly" , " GFortran" , " GFortran-builtin" , " icc" , " ifort" , " ifort-builtin" , " g ++ & Eigen-3" , " icpc & Eigen-3" , " LoopVectorization" ]
396
398
br = BenchmarkResult (tests, sizes)
397
399
sm = br. sizedresults. results
398
400
pmap (is -> AplusAt_bench! (sm, is[2 ], is[1 ]), enumerate (sizes))
@@ -453,3 +455,65 @@ function benchmark_logdettriangle(sizes)
453
455
br
454
456
end
455
457
458
+
459
+ function filter2d_bench_run! (br, s, i, K)
460
+ A = rand (s + 2 , s + 2 )
461
+ B = OffsetArray (similar (A, (s,s)), 1 , 1 )
462
+ Mk, Nk = size (K)
463
+ n_gflop = 1e-9 * (2 Mk * Nk - 1 ) * s^ 2
464
+ br[1 ,i] = n_gflop / @belapsed filter2d! ($ B, $ A, $ K)
465
+ Bcopy = copy (B); fill! (B, NaN );
466
+ br[2 ,i] = n_gflop / @belapsed cfilter2d! ($ B, $ A, $ K)
467
+ @assert B ≈ Bcopy " Clang wrong?"
468
+ br[3 ,i] = n_gflop / @belapsed ffilter2d! ($ B, $ A, $ K)
469
+ @assert B ≈ Bcopy " Fort wrong?"
470
+ br[4 ,i] = n_gflop / @belapsed icfilter2d! ($ B, $ A, $ K)
471
+ @assert B ≈ Bcopy " icc wrong?"
472
+ br[5 ,i] = n_gflop / @belapsed iffilter2d! ($ B, $ A, $ K)
473
+ @assert B ≈ Bcopy " ifort wrong?"
474
+ br[6 ,i] = n_gflop / @belapsed filter2davx! ($ B, $ A, $ K)
475
+ @assert B ≈ Bcopy " LoopVec wrong?"
476
+ end
477
+ function benchmark_filter2d (sizes, K)
478
+ tests = [" Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " LoopVectorization" ]
479
+ br = BenchmarkResult (tests, sizes)
480
+ sm = br. sizedresults. results
481
+ pmap (is -> filter2d_bench_run! (sm, is[2 ], is[1 ], K), enumerate (sizes))
482
+ br
483
+ end
484
+
485
+ function benchmark_filter2ddynamic (sizes)
486
+ K = OffsetArray (rand (Float64, 3 , 3 ), - 1 : 1 , - 1 : 1 )
487
+ benchmark_filter2d (sizes, K)
488
+ end
489
+ function benchmark_filter2d3x3 (sizes)
490
+ K = SizedOffsetMatrix {Float64,-1,1,-1,1} (rand (3 ,3 ))
491
+ benchmark_filter2d (sizes, K)
492
+ end
493
+
494
+ function filter2dunrolled_bench_run! (br, s, i, K)
495
+ A = rand (s + 2 , s + 2 )
496
+ B = OffsetArray (similar (A, (s,s)), 1 , 1 )
497
+ Mk, Nk = size (K)
498
+ n_gflop = 1e-9 * (2 Mk * Nk - 1 ) * s^ 2
499
+ br[1 ,i] = n_gflop / @belapsed filter2dunrolled! ($ B, $ A, $ K)
500
+ Bcopy = copy (B); fill! (B, NaN );
501
+ br[2 ,i] = n_gflop / @belapsed cfilter2dunrolled! ($ B, $ A, $ K)
502
+ @assert B ≈ Bcopy " Clang wrong?"
503
+ br[3 ,i] = n_gflop / @belapsed ffilter2dunrolled! ($ B, $ A, $ K)
504
+ @assert B ≈ Bcopy " Fort wrong?"
505
+ br[4 ,i] = n_gflop / @belapsed icfilter2dunrolled! ($ B, $ A, $ K)
506
+ @assert B ≈ Bcopy " icc wrong?"
507
+ br[5 ,i] = n_gflop / @belapsed iffilter2dunrolled! ($ B, $ A, $ K)
508
+ @assert B ≈ Bcopy " ifort wrong?"
509
+ br[6 ,i] = n_gflop / @belapsed filter2dunrolledavx! ($ B, $ A, $ K)
510
+ @assert B ≈ Bcopy " LoopVec wrong?"
511
+ end
512
+ function benchmark_filter2dunrolled (sizes)
513
+ tests = [" Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " LoopVectorization" ]
514
+ br = BenchmarkResult (tests, sizes)
515
+ sm = br. sizedresults. results
516
+ K = SizedOffsetMatrix {Float64,-1,1,-1,1} (rand (3 ,3 ))
517
+ pmap (is -> filter2dunrolled_bench_run! (sm, is[2 ], is[1 ], K), enumerate (sizes))
518
+ br
519
+ end
0 commit comments