Skip to content

Commit f09c753

Browse files
committed
Make register spills double cost rather than return infinite, to allow specifying tile sizes even when the optimizer thinks it is inadvisable.
1 parent d47fb09 commit f09c753

File tree

4 files changed

+10
-6
lines changed

4 files changed

+10
-6
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.6.21"
4+
version = "0.6.22"
55

66
[deps]
77
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

src/determinestrategy.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -499,11 +499,11 @@ function evaluate_cost_tile(
499499
reg_pressure[4] += rp
500500
end
501501
end
502-
sum(reg_pressure) > VectorizationBase.REGISTER_COUNT && return 0, 0, Inf
502+
costpenalty = (sum(reg_pressure) > VectorizationBase.REGISTER_COUNT) ? 2 : 1
503503
# @show order, vectorized cost_vec reg_pressure
504504
# @show solve_tilesize(ls, unrolled, tiled, cost_vec, reg_pressure)
505505
U, T, tcost = solve_tilesize(ls, unrolled, tiled, cost_vec, reg_pressure, W, vectorized)
506-
U, T, tcost + stride_penalty(ls, order)
506+
U, T, costpenalty * tcost + stride_penalty(ls, order)
507507
end
508508

509509

src/lowering.jl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,12 @@ function lower(ls::LoopSet)#, prependinlineORorUnroll = 0)
353353
end
354354
function lower(ls::LoopSet, U, T)#, prependinlineORorUnroll = 0)
355355
num_loops(ls) == 1 && @assert T == -1
356-
order, unrolled, tiled, vectorized, _U, _T = choose_order(ls)
356+
if T == -1
357+
order, vectorized, c = choose_unroll_order(ls, Inf)
358+
unrolled = first(order); tiled = Symbol("##undefined##")
359+
else
360+
order, unrolled, tiled, vectorized, _U, _T, c = choose_tile(ls)
361+
end
357362
lower(ls, order, unrolled, tiled, vectorized, U, T)
358363
end
359364

test/offsetarrays.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ using Test
6060
function avx2d!(out::AbstractMatrix, A::AbstractMatrix, kern)
6161
rng1k, rng2k = axes(kern)
6262
rng1, rng2 = axes(out)
63-
# Manually unpack the OffsetArray
6463
for j in rng2, i in rng1
6564
tmp = zero(eltype(out))
6665
@avx for jk in rng2k, ik in rng1k
@@ -73,7 +72,6 @@ using Test
7372
function avx2douter!(out::AbstractMatrix, A::AbstractMatrix, kern)
7473
rng1k, rng2k = axes(kern)
7574
rng1, rng2 = axes(out)
76-
# Manually unpack the OffsetArray
7775
@avx for j in rng2, i in rng1
7876
tmp = zero(eltype(out))
7977
for jk in rng2k, ik in rng1k
@@ -84,6 +82,7 @@ using Test
8482
out
8583
end
8684

85+
8786

8887
struct SizedOffsetMatrix{T,LR,UR,LC,RC} <: AbstractMatrix{T}
8988
data::Matrix{T}

0 commit comments

Comments
 (0)