1
-
1
+ using LinearAlgebra
2
2
using LoopVectorization. VectorizationBase: REGISTER_SIZE
3
3
4
4
pkgdir (pkg:: String ) = abspath (joinpath (dirname (Base. find_package (pkg)), " .." ))
@@ -11,6 +11,7 @@ const LIBICTEST = joinpath(LOOPVECBENCHDIR, "libictests.so")
11
11
const LIBIFTEST = joinpath (LOOPVECBENCHDIR, " libiftests.so" )
12
12
const LIBEIGENTEST = joinpath (LOOPVECBENCHDIR, " libetest.so" )
13
13
const LIBIEIGENTEST = joinpath (LOOPVECBENCHDIR, " libietest.so" )
14
+ const LIBDIRECTCALLJIT = joinpath (LOOPVECBENCHDIR, " libdcjtest.so" )
14
15
15
16
# requires Clang with polly to build
16
17
cfile = joinpath (LOOPVECBENCHDIR, " looptests.c" )
23
24
ffile = joinpath (LOOPVECBENCHDIR, " looptests.f90" )
24
25
if ! isfile (LIBFTEST) || mtime (ffile) > mtime (LIBFTEST)
25
26
# --param max-unroll-times defaults to ≥8, which is generally excessive
26
- run (` gfortran -Ofast -march=native -funroll-loops --param max-unroll-times=4 - floop-nest-optimize -mprefer-vector-width=$(8 REGISTER_SIZE) -shared -fPIC $ffile -o $LIBFTEST ` )
27
+ run (` gfortran -Ofast -march=native -funroll-loops -floop-nest-optimize -mprefer-vector-width=$(8 REGISTER_SIZE) -shared -fPIC $ffile -o $LIBFTEST ` )
27
28
end
28
29
if ! isfile (LIBIFTEST) || mtime (ffile) > mtime (LIBIFTEST)
29
30
run (` ifort -fast -qopt-zmm-usage=high -qoverride-limits -shared -fPIC $ffile -o $LIBIFTEST ` )
@@ -39,6 +40,26 @@ if !isfile(LIBIEIGENTEST) || mtime(eigenfile) > mtime(LIBIEIGENTEST)
39
40
run (` icpc -fast -qopt-zmm-usage=high -fargument-noalias-global -qoverride-limits -I/usr/include/eigen3 -shared -fPIC $eigenfile -o $LIBIEIGENTEST ` )
40
41
end
41
42
43
+ directcalljitfile = joinpath (LOOPVECBENCHDIR, " directcalljit.f90" )
44
+ if ! isfile (LIBDIRECTCALLJIT) || mtime (directcalljitfile) > mtime (LIBDIRECTCALLJIT)
45
+ # run(`ifort -fast -DMKL_DIRECT_CALL_SEQ_JIT -fpp -qopt-zmm-usage=high -shared -fPIC $directcalljitfile -o $LIBDIRECTCALLJIT`)
46
+ run (` gfortran -Ofast -march=native -DMKL_DIRECT_CALL_SEQ_JIT -cpp -mprefer-vector-width=$(8 REGISTER_SIZE) -shared -fPIC $directcalljitfile -o $LIBDIRECTCALLJIT ` )
47
+ end
48
+
49
+ istransposed (x) = false
50
+ istransposed (x:: Adjoint ) = true
51
+ istransposed (x:: Transpose ) = true
52
+ function dgemmjit! (C:: AbstractVecOrMat{Float64} , A:: AbstractVecOrMat{Float64} , B:: AbstractVecOrMat{Float64} )
53
+ M, N = size (C); K = size (B, 1 )
54
+ ccall (
55
+ (:dgemmjit , LIBDIRECTCALLJIT), Cvoid,
56
+ (Ptr{Float64},Ptr{Float64},Ptr{Float64},Ref{Int},Ref{Int},Ref{Int},Ref{Bool},Ref{Bool}),
57
+ parent (C), parent (A), parent (B),
58
+ Ref (M), Ref (K), Ref (N),
59
+ Ref (istransposed (A)), Ref (istransposed (B))
60
+ )
61
+ end
62
+
42
63
for (prefix,Cshared,Fshared,Eshared) ∈ ((Symbol (" " ),LIBCTEST,LIBFTEST,LIBEIGENTEST), (:i ,LIBICTEST,LIBIFTEST,LIBIEIGENTEST))
43
64
for order ∈ (:kmn , :knm , :mkn , :mnk , :nkm , :nmk )
44
65
gemm = Symbol (:gemm_ , order)
@@ -59,9 +80,9 @@ for (prefix,Cshared,Fshared,Eshared) ∈ ((Symbol(""),LIBCTEST,LIBFTEST,LIBEIGEN
59
80
)
60
81
end
61
82
end
62
- @eval @inline $ (Symbol (prefix,:cgemm! ))(C, A, B) = $ (Symbol (prefix, :cgemm_nkm! ))(C, A, B)
63
- @eval @inline $ (Symbol (prefix,:fgemm! ))(C, A, B) = $ (Symbol (prefix, :fgemm_nkm! ))(C, A, B)
64
- @eval @inline function $ (Symbol (prefix,:egemm! ))(C, A, B)
83
+ @eval $ (Symbol (prefix,:cgemm! ))(C, A, B) = $ (Symbol (prefix, :cgemm_nkm! ))(C, A, B)
84
+ @eval $ (Symbol (prefix,:fgemm! ))(C, A, B) = $ (Symbol (prefix, :fgemm_nkm! ))(C, A, B)
85
+ @eval function $ (Symbol (prefix,:egemm! ))(C, A, B)
65
86
M, N = size (C); K = size (B, 1 )
66
87
ccall (
67
88
(:AmulB , $ Eshared), Cvoid,
@@ -78,7 +99,7 @@ for (prefix,Cshared,Fshared,Eshared) ∈ ((Symbol(""),LIBCTEST,LIBFTEST,LIBEIGEN
78
99
)
79
100
end
80
101
for (p,s) ∈ [(:c ,Cshared) (:e ,Eshared)]
81
- @eval @inline function $ (Symbol (prefix,p,:gemm! ))(C, A:: Adjoint , B)
102
+ @eval function $ (Symbol (prefix,p,:gemm! ))(C, A:: Adjoint , B)
82
103
M, N = size (C); K = size (B, 1 )
83
104
ccall (
84
105
(:AtmulB , $ s), Cvoid,
@@ -87,15 +108,15 @@ for (p,s) ∈ [(:c,Cshared) (:e,Eshared)]
87
108
)
88
109
end
89
110
end
90
- @eval @inline function $ (Symbol (prefix,:fgemm! ))(C, A:: Adjoint , B)
111
+ @eval function $ (Symbol (prefix,:fgemm! ))(C, A:: Adjoint , B)
91
112
M, N = size (C); K = size (B, 1 )
92
113
ccall (
93
114
(:AtmulB , $ Fshared), Cvoid,
94
115
(Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ref{Clong}, Ref{Clong}, Ref{Clong}),
95
116
C, parent (A), B, Ref (M), Ref (K), Ref (N)
96
117
)
97
118
end
98
- @eval @inline function $ (Symbol (prefix,:fgemm_builtin! ))(C, A:: Adjoint , B)
119
+ @eval function $ (Symbol (prefix,:fgemm_builtin! ))(C, A:: Adjoint , B)
99
120
M, N = size (C); K = size (B, 1 )
100
121
ccall (
101
122
(:AtmulBbuiltin , $ Fshared), Cvoid,
104
125
)
105
126
end
106
127
for (p,s) ∈ [(:c ,Cshared) (:e ,Eshared)]
107
- @eval @inline function $ (Symbol (prefix,p,:gemm! ))(C, A, B:: Adjoint )
128
+ @eval function $ (Symbol (prefix,p,:gemm! ))(C, A, B:: Adjoint )
108
129
M, N = size (C); K = size (B, 1 )
109
130
ccall (
110
131
(:AmulBt , $ s), Cvoid,
@@ -113,15 +134,15 @@ for (p,s) ∈ [(:c,Cshared) (:e,Eshared)]
113
134
)
114
135
end
115
136
end
116
- @eval @inline function $ (Symbol (prefix,:fgemm! ))(C, A, B:: Adjoint )
137
+ @eval function $ (Symbol (prefix,:fgemm! ))(C, A, B:: Adjoint )
117
138
M, N = size (C); K = size (B, 1 )
118
139
ccall (
119
140
(:AmulBt , $ Fshared), Cvoid,
120
141
(Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ref{Clong}, Ref{Clong}, Ref{Clong}),
121
142
C, A, parent (B), Ref (M), Ref (K), Ref (N)
122
143
)
123
144
end
124
- @eval @inline function $ (Symbol (prefix,:fgemm_builtin! ))(C, A, B:: Adjoint )
145
+ @eval function $ (Symbol (prefix,:fgemm_builtin! ))(C, A, B:: Adjoint )
125
146
M, N = size (C); K = size (B, 1 )
126
147
ccall (
127
148
(:AmulBtbuiltin , $ Fshared), Cvoid,
130
151
)
131
152
end
132
153
for (p,s) ∈ [(:c ,Cshared) (:e ,Eshared)]
133
- @eval @inline function $ (Symbol (prefix,p,:gemm! ))(C, A:: Adjoint , B:: Adjoint )
154
+ @eval function $ (Symbol (prefix,p,:gemm! ))(C, A:: Adjoint , B:: Adjoint )
134
155
M, N = size (C); K = size (B, 1 )
135
156
ccall (
136
157
(:AtmulBt , $ s), Cvoid,
@@ -139,15 +160,15 @@ for (p,s) ∈ [(:c,Cshared) (:e,Eshared)]
139
160
)
140
161
end
141
162
end
142
- @eval @inline function $ (Symbol (prefix,:fgemm! ))(C, A:: Adjoint , B:: Adjoint )
163
+ @eval function $ (Symbol (prefix,:fgemm! ))(C, A:: Adjoint , B:: Adjoint )
143
164
M, N = size (C); K = size (B, 1 )
144
165
ccall (
145
166
(:AtmulBt , $ Fshared), Cvoid,
146
167
(Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ref{Clong}, Ref{Clong}, Ref{Clong}),
147
168
C, parent (A), parent (B), Ref (M), Ref (K), Ref (N)
148
169
)
149
170
end
150
- @eval @inline function $ (Symbol (prefix,:fgemm_builtin! ))(C, A:: Adjoint , B:: Adjoint )
171
+ @eval function $ (Symbol (prefix,:fgemm_builtin! ))(C, A:: Adjoint , B:: Adjoint )
151
172
M, N = size (C); K = size (B, 1 )
152
173
ccall (
153
174
(:AtmulBtbuiltin , $ Fshared), Cvoid,
242
263
)
243
264
end
244
265
for (p,s) ∈ [(:c ,Cshared) (:e ,Eshared)]
245
- @eval @inline function $ (Symbol (prefix,p,:gemv! ))(y, A:: Adjoint , x)
266
+ @eval function $ (Symbol (prefix,p,:gemv! ))(y, A:: Adjoint , x)
246
267
M, K = size (A)
247
268
ccall (
248
269
(:Atmulvb , $ s), Cvoid,
@@ -251,7 +272,7 @@ for (p,s) ∈ [(:c,Cshared) (:e,Eshared)]
251
272
)
252
273
end
253
274
end
254
- @eval @inline function $ (Symbol (prefix,:fgemv! ))(y, A:: Adjoint , x)
275
+ @eval function $ (Symbol (prefix,:fgemv! ))(y, A:: Adjoint , x)
255
276
M, K = size (A)
256
277
ccall (
257
278
(:Atmulvb , $ Fshared), Cvoid,
0 commit comments