1
1
include (joinpath (LOOPVECBENCHDIR, " looptests.jl" ))
2
2
include (joinpath (LOOPVECBENCHDIR, " loadsharedlibs.jl" ))
3
3
4
- using PrettyTables, BenchmarkTools
4
+ using BenchmarkTools
5
5
struct SizedResults{V <: AbstractVector } <: AbstractMatrix{String}
6
6
results:: Matrix{Float64}
7
7
sizes:: V
@@ -26,15 +26,6 @@ function Base.getindex(br::SizedResults, row, col)
26
26
end
27
27
Base. setindex! (br:: BenchmarkResult , v, i... ) = br. sizedresults. results[i... ] = v
28
28
29
- const HIGHLIGHT_BEST = Highlighter (
30
- (br,i,j) -> (j > 1 && maximum (@view (br. results[:, i])) == br. results[j- 1 ,i]),
31
- foreground = :green
32
- );
33
- function Base. show (io:: IO , br:: BenchmarkResult )
34
- pretty_table (
35
- io, br. sizedresults, br. tests, crop = :none , highlighters = (HIGHLIGHT_BEST,)
36
- )
37
- end
38
29
39
30
tothreetuple (i:: Int ) = (i,i,i)
40
31
tothreetuple (i:: NTuple{3,Int} ) = i
@@ -106,6 +97,40 @@ function benchmark_AtmulB(sizes)
106
97
end
107
98
br
108
99
end
100
+ function benchmark_AmulBt (sizes)
101
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " GFort-intrinsic" , " icc" , " ifort" , " ifort-intrinsic" , " LoopVectorization" ]
102
+ br = BenchmarkResult (tests, sizes)
103
+ for (i,s) ∈ enumerate (sizes)
104
+ M, K, N = tothreetuple (s)
105
+ C = Matrix {Float64} (undef, M, N)
106
+ A = rand (M, K)
107
+ Bt = rand (N, K)
108
+ n_gflop = M* K* N* 2e-9
109
+ br[1 ,i] = n_gflop / @belapsed mul! ($ C, $ A, $ Bt' )
110
+ Cblas = copy (C)
111
+ br[2 ,i] = n_gflop / @belapsed jgemm! ($ C, $ A, $ Bt' )
112
+ @assert C ≈ Cblas " Julia gemm wrong?"
113
+ br[3 ,i] = n_gflop / @belapsed cgemm! ($ C, $ A, $ Bt' )
114
+ @assert C ≈ Cblas " Polly gemm wrong?"
115
+ br[4 ,i] = n_gflop / @belapsed fgemm! ($ C, $ A, $ Bt' )
116
+ @assert C ≈ Cblas " Fort gemm wrong?"
117
+ br[5 ,i] = n_gflop / @belapsed fgemm_builtin! ($ C, $ A, $ Bt' )
118
+ @assert C ≈ Cblas " Fort intrinsic gemm wrong?"
119
+ br[6 ,i] = n_gflop / @belapsed icgemm! ($ C, $ A, $ Bt' )
120
+ @assert C ≈ Cblas " icc gemm wrong?"
121
+ br[7 ,i] = n_gflop / @belapsed ifgemm! ($ C, $ A, $ Bt' )
122
+ @assert C ≈ Cblas " iort gemm wrong?"
123
+ br[8 ,i] = n_gflop / @belapsed ifgemm_builtin! ($ C, $ A, $ Bt' )
124
+ @assert C ≈ Cblas " ifort intrinsic gemm wrong?"
125
+ br[9 ,i] = n_gflop / @belapsed gemmavx! ($ C, $ A, $ Bt' )
126
+ @assert C ≈ Cblas " LoopVec gemm wrong?"
127
+ # if i % 10 == 0
128
+ # percent_complete = round(100i/ length(sizes), sigdigits = 4)
129
+ # @show percent_complete
130
+ # end
131
+ end
132
+ br
133
+ end
109
134
110
135
function benchmark_dot (sizes)
111
136
tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " LoopVectorization" ]
164
189
totwotuple (i:: Int ) = (i,i)
165
190
totwotuple (i:: Tuple{Int,Int} ) = i
166
191
function benchmark_gemv (sizes)
167
- tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " icc" , " ifort" , " LoopVectorization" ]
192
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " GFort-intrinsic " , " icc" , " ifort" , " ifort-intrinsic " , " LoopVectorization" ]
168
193
br = BenchmarkResult (tests, sizes)
169
194
for (i,s) ∈ enumerate (sizes)
170
195
M, N = totwotuple (s)
@@ -178,11 +203,47 @@ function benchmark_gemv(sizes)
178
203
@assert x ≈ xblas " Polly wrong?"
179
204
br[4 ,i] = n_gflop / @belapsed fgemv! ($ x, $ A, $ y)
180
205
@assert x ≈ xblas " Fort wrong?"
181
- br[5 ,i] = n_gflop / @belapsed icgemv! ($ x, $ A, $ y)
206
+ br[5 ,i] = n_gflop / @belapsed fgemv_builtin! ($ x, $ A, $ y)
207
+ @assert x ≈ xblas " Fort wrong?"
208
+ br[6 ,i] = n_gflop / @belapsed icgemv! ($ x, $ A, $ y)
182
209
@assert x ≈ xblas " icc wrong?"
183
- br[6 ,i] = n_gflop / @belapsed ifgemv! ($ x, $ A, $ y)
210
+ br[7 ,i] = n_gflop / @belapsed ifgemv! ($ x, $ A, $ y)
211
+ @assert x ≈ xblas " ifort wrong?"
212
+ br[8 ,i] = n_gflop / @belapsed ifgemv_builtin! ($ x, $ A, $ y)
213
+ @assert x ≈ xblas " ifort wrong?"
214
+ br[9 ,i] = n_gflop / @belapsed jgemvavx! ($ x, $ A, $ y)
215
+ @assert x ≈ xblas " LoopVec wrong?"
216
+ # if i % 10 == 0
217
+ # percent_complete = round(100i/ length(sizes), sigdigits = 4)
218
+ # @show percent_complete
219
+ # end
220
+ end
221
+ br
222
+ end
223
+ function benchmark_Atmulvb (sizes)
224
+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFortran" , " GFort-intrinsic" , " icc" , " ifort" , " ifort-intrinsic" , " LoopVectorization" ]
225
+ br = BenchmarkResult (tests, sizes)
226
+ for (i,s) ∈ enumerate (sizes)
227
+ M, N = totwotuple (s)
228
+ x = Vector {Float64} (undef, M); A = rand (N, M); y = rand (N);
229
+ n_gflop = M* N * 2e-9
230
+ br[1 ,i] = n_gflop / @belapsed mul! ($ x, $ A' , $ y)
231
+ xblas = copy (x)
232
+ br[2 ,i] = n_gflop / @belapsed jgemv! ($ x, $ A' , $ y)
233
+ @assert x ≈ xblas " Julia wrong?"
234
+ br[3 ,i] = n_gflop / @belapsed cgemv! ($ x, $ A' , $ y)
235
+ @assert x ≈ xblas " Polly wrong?"
236
+ br[4 ,i] = n_gflop / @belapsed fgemv! ($ x, $ A' , $ y)
237
+ @assert x ≈ xblas " Fort wrong?"
238
+ br[5 ,i] = n_gflop / @belapsed fgemv_builtin! ($ x, $ A' , $ y)
239
+ @assert x ≈ xblas " Fort wrong?"
240
+ br[6 ,i] = n_gflop / @belapsed icgemv! ($ x, $ A' , $ y)
241
+ @assert x ≈ xblas " icc wrong?"
242
+ br[7 ,i] = n_gflop / @belapsed ifgemv! ($ x, $ A' , $ y)
243
+ @assert x ≈ xblas " ifort wrong?"
244
+ br[8 ,i] = n_gflop / @belapsed ifgemv_builtin! ($ x, $ A' , $ y)
184
245
@assert x ≈ xblas " ifort wrong?"
185
- br[7 ,i] = n_gflop / @belapsed jgemvavx! ($ x, $ A, $ y)
246
+ br[9 ,i] = n_gflop / @belapsed jgemvavx! ($ x, $ A' , $ y)
186
247
@assert x ≈ xblas " LoopVec wrong?"
187
248
# if i % 10 == 0
188
249
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
@@ -323,13 +384,13 @@ function benchmark_AplusAt(sizes)
323
384
@assert B ≈ baseB " Clang wrong?"
324
385
br[3 ,i] = n_gflop / @belapsed fAplusAt! ($ B, $ A)
325
386
@assert B ≈ baseB " Fort wrong?"
326
- br[4 ,i] = n_gflop / @belapsed fAplusAtbuiltin ! ($ B, $ A)
387
+ br[4 ,i] = n_gflop / @belapsed fAplusAt_builtin ! ($ B, $ A)
327
388
@assert B ≈ baseB " Fort-builtin wrong?"
328
389
br[5 ,i] = n_gflop / @belapsed icAplusAt! ($ B, $ A)
329
390
@assert B ≈ baseB " icc wrong?"
330
391
br[6 ,i] = n_gflop / @belapsed ifAplusAt! ($ B, $ A)
331
392
@assert B ≈ baseB " ifort wrong?"
332
- br[7 ,i] = n_gflop / @belapsed ifAplusAtbuiltin ! ($ B, $ A)
393
+ br[7 ,i] = n_gflop / @belapsed ifAplusAt_builtin ! ($ B, $ A)
333
394
@assert B ≈ baseB " ifort-builtin wrong?"
334
395
br[8 ,i] = n_gflop / @belapsed @avx @. $ B = $ A + $ A'
335
396
@assert B ≈ baseB " LoopVec wrong?"
0 commit comments