Skip to content

Commit 0842b02

Browse files
committed
Limit <32 register architectures to unroll/tile by up to 4.
1 parent 2098d86 commit 0842b02

File tree

4 files changed

+4
-4
lines changed

4 files changed

+4
-4
lines changed

src/determinestrategy.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ function solve_tilesize(
352352
reg_pressure::AbstractVector{Float64},
353353
W::Int, vectorized::Symbol
354354
)
355-
maxTbase = maxUbase = 6#8
355+
maxTbase = maxUbase = VectorizationBase.REGISTER_COUNT == 32 ? 6 : 4#8
356356
maxT = maxTbase#8
357357
maxU = maxUbase#8
358358
tiledloop = getloop(ls, tiled)

test/gemm.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@
294294
if LoopVectorization.VectorizationBase.REGISTER_COUNT == 32
295295
@test LoopVectorization.choose_order(lsr2amb) == ([:n, :m, :k], :k, :n, :m, 3, 6)
296296
else
297-
@test LoopVectorization.choose_order(lsr2amb) == ([:n, :m, :k], :k, :n, :m, 1, 6)
297+
@test LoopVectorization.choose_order(lsr2amb) == ([:n, :m, :k], :k, :n, :m, 2, 4)
298298
end
299299
function rank2AmulBavx!(C, Aₘ, Aₖ, B)
300300
@avx for m 1:size(C,1), n 1:size(C,2)

test/gemv.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ using LoopVectorization
22
using Test
33

44
@testset "GEMV" begin
5-
Unum, Tnum = LoopVectorization.VectorizationBase.REGISTER_COUNT == 16 ? (2, 6) : (4, 6)
5+
Unum, Tnum = LoopVectorization.VectorizationBase.REGISTER_COUNT == 16 ? (3, 4) : (4, 6)
66
gemvq = :(for i eachindex(y)
77
yᵢ = 0.0
88
for j eachindex(x)

test/miscellaneous.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11

22
@testset "Miscellaneous" begin
3-
Unum, Tnum = LoopVectorization.VectorizationBase.REGISTER_COUNT == 16 ? (2, 6) : (4, 6)
3+
Unum, Tnum = LoopVectorization.VectorizationBase.REGISTER_COUNT == 16 ? (3, 4) : (4, 6)
44
dot3q = :(for m 1:M, n 1:N
55
s += x[m] * A[m,n] * y[n]
66
end);

0 commit comments

Comments
 (0)