Skip to content

Commit 515169c

Browse files
committed
Updates to improve performance for loops where LLVM compiler needs aliasing information to eliminate loads.
1 parent 5ce49bf commit 515169c

25 files changed

+33
-30
lines changed

Manifest.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,6 @@ uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
7676

7777
[[VectorizationBase]]
7878
deps = ["CpuId", "LinearAlgebra"]
79-
git-tree-sha1 = "1e8a90888ec61405ea345c1ac2bdc7d86b99bd69"
79+
git-tree-sha1 = "b68b3234127d7839280f39bd668fd0025633aa01"
8080
uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
81-
version = "0.8.2"
81+
version = "0.8.5"

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ OffsetArrays = "1"
1616
Parameters = "0"
1717
SIMDPirates = "0.7"
1818
SLEEFPirates = "0.4"
19-
VectorizationBase = "0.8"
19+
VectorizationBase = "0.8.5"
2020
julia = "1.1"
2121

2222
[extras]

benchmark/driver.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ end
2323
# sizes = 23:23
2424
sizes = 256:-1:2
2525

26-
filter2d_dynamic_bench = benchmark_filter2ddynamic(512:-1:2)
27-
filter2d_3x3_bench = benchmark_filter2d3x3(512:-1:2)
28-
filter2d_unrolled_bench = benchmark_filter2dunrolled(512:-1:2)
26+
filter2d_dynamic_bench = benchmark_filter2ddynamic(sizes)#512:-1:2)
27+
filter2d_3x3_bench = benchmark_filter2d3x3(sizes)#512:-1:2)
28+
filter2d_unrolled_bench = benchmark_filter2dunrolled(sizes)#512:-1:2)
2929

3030
AmulB_bench = benchmark_AmulB(sizes)
3131
AmulBt_bench = benchmark_AmulBt(sizes)

docs/src/assets/bench_AmulB_v1.svg

Lines changed: 1 addition & 1 deletion
Loading

docs/src/assets/bench_AmulBt_v1.svg

Lines changed: 1 addition & 1 deletion
Loading

docs/src/assets/bench_Amulvb_v1.svg

Lines changed: 1 addition & 1 deletion
Loading

docs/src/assets/bench_AplusAt_v1.svg

Lines changed: 1 addition & 1 deletion
Loading

docs/src/assets/bench_AtmulB_v1.svg

Lines changed: 1 addition & 1 deletion
Loading

docs/src/assets/bench_AtmulBt_v1.svg

Lines changed: 1 addition & 1 deletion
Loading

docs/src/assets/bench_Atmulvb_v1.svg

Lines changed: 1 addition & 1 deletion
Loading

docs/src/assets/bench_aplusBc_v1.svg

Lines changed: 1 addition & 1 deletion
Loading

docs/src/assets/bench_dot3_v1.svg

Lines changed: 1 addition & 1 deletion
Loading

docs/src/assets/bench_dot_v1.svg

Lines changed: 1 addition & 1 deletion
Loading

docs/src/assets/bench_exp_v1.svg

Lines changed: 1 addition & 1 deletion
Loading

docs/src/assets/bench_filter2d_3x3_v1.svg

Lines changed: 1 addition & 1 deletion
Loading

docs/src/assets/bench_filter2d_dynamic_v1.svg

Lines changed: 1 addition & 1 deletion
Loading

docs/src/assets/bench_filter2d_unrolled_v1.svg

Lines changed: 1 addition & 1 deletion
Loading

docs/src/assets/bench_logdettriangle_v1.svg

Lines changed: 1 addition & 1 deletion
Loading

docs/src/assets/bench_random_access_v1.svg

Lines changed: 1 addition & 1 deletion
Loading

docs/src/assets/bench_selfdot_v1.svg

Lines changed: 1 addition & 1 deletion
Loading

docs/src/assets/bench_sse_v1.svg

Lines changed: 1 addition & 1 deletion
Loading

src/condense_loopset.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ end
210210
LHS = ind === nothing ? gensym() : vptrs[ind]
211211
assigned_names[i] = LHS
212212
d = (D[i])::Union{Nothing,Int}
213-
if d === nothing # stridedpointer
213+
if d === nothing # stridedpointer instead of noaliasstridedpointer, because alias info will be lost across function boundary...
214214
num_arrays += 1
215215
RHS = Expr(:call, lv(:stridedpointer), Expr(:ref, :vargs, ari), Expr(:ref, :arraydescript, ari))
216216
else #subsetview
@@ -284,7 +284,7 @@ function setup_call_noinline(ls::LoopSet, U = zero(Int8), T = zero(Int8))
284284
if ex isa Expr && ex.head === :(=) && length(ex.args) == 2
285285
if ex.args[2] isa Expr && ex.args[2].head === :call
286286
gr = first(ex.args[2].args)
287-
if gr == lv(:stridedpointer)
287+
if gr == lv(:noaliasstridedpointer)
288288
array = ex.args[2].args[2]
289289
arrayid = findfirst(a -> a === array, ls.includedactualarrays)
290290
if arrayid isa Int

src/memory_ops_common.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,16 @@ end
1515

1616
add_vptr!(ls::LoopSet, op::Operation) = add_vptr!(ls, op.ref)
1717
add_vptr!(ls::LoopSet, mref::ArrayReferenceMeta) = add_vptr!(ls, mref.ref.array, vptr(mref))
18+
using VectorizationBase: noaliasstridedpointer
1819
function add_vptr!(ls::LoopSet, array::Symbol, vptrarray::Symbol = vptr(array), actualarray::Bool = true, broadcast::Bool = false)
1920
if !includesarray(ls, array)
2021
push!(ls.includedarrays, array)
2122
actualarray && push!(ls.includedactualarrays, array)
2223
if broadcast
2324
pushpreamble!(ls, Expr(:(=), vptrarray, Expr(:call, lv(:stridedpointer_for_broadcast), array)))
2425
else
25-
pushpreamble!(ls, Expr(:(=), vptrarray, Expr(:call, lv(:stridedpointer), array)))
26+
# pushpreamble!(ls, Expr(:(=), vptrarray, Expr(:call, lv(:stridedpointer), array)))
27+
pushpreamble!(ls, Expr(:(=), vptrarray, Expr(:call, lv(:noaliasstridedpointer), array)))
2628
end
2729
end
2830
nothing

src/vectorizationbase_extensions.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ end
2323
# Tuple of length > 1, subtract offsets.
2424
@inline VectorizationBase.offset(ptr::OffsetStridedPointer{<:Any,N}, ind::Tuple) where {N} = ntuple(n -> ind[n] - ptr.offsets[n], Val{N}())
2525
@inline Base.similar(p::OffsetStridedPointer, ptr::Ptr) = OffsetStridedPointer(similar(p.ptr, ptr), p.offsets)
26+
@inline Base.pointer(p::OffsetStridedPointer) = pointer(p.ptr)
2627

2728
# If an OffsetArray is getting indexed by a (loop-)constant value, then this particular vptr object cannot also be eachindexed, so we can safely return a stridedpointer
2829
@inline function VectorizationBase.subsetview(ptr::OffsetStridedPointer{<:Any,N}, ::Val{I}, i) where {I,N}

test/miscellaneous.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
B[j,i] = A[j,i] - x[j]
3434
end)
3535
lssubcol = LoopVectorization.LoopSet(subcolq);
36-
@test LoopVectorization.choose_order(lssubcol) == (Symbol[:j,:i], :i, :j, :j, Unum, Tnum)
36+
@test LoopVectorization.choose_order(lssubcol) == (Symbol[:j,:i], :i, :j, :j, 4, 4)
3737
## @avx is SLOWER!!!!
3838
## need to fix!
3939
function mysubcol!(B, A, x)
@@ -58,7 +58,7 @@
5858
x[j] += A[j,i] - 0.25
5959
end)
6060
lscolsum = LoopVectorization.LoopSet(colsumq);
61-
@test LoopVectorization.choose_order(lscolsum) == (Symbol[:j,:i], :i, :j, :j, Unum, Tnum)
61+
@test LoopVectorization.choose_order(lscolsum) == (Symbol[:j,:i], :i, :j, :j, 4, 4)
6262

6363
# my colsum is wrong (by 0.25), but slightly more interesting
6464
function mycolsum!(x, A)
@@ -95,7 +95,7 @@
9595
lsvar = LoopVectorization.LoopSet(varq);
9696
# LoopVectorization.choose_order(lsvar)
9797
# @test LoopVectorization.choose_order(lscolsum) == (Symbol[:j,:i], :j, Symbol("##undefined##"), :j, 4, -1)
98-
@test LoopVectorization.choose_order(lsvar) == (Symbol[:j,:i], :i, :j, :j, Unum, Tnum)
98+
@test LoopVectorization.choose_order(lsvar) == (Symbol[:j,:i], :i, :j, :j, 4, 4)
9999

100100
function myvar!(s², A, x̄)
101101
@.= 0

0 commit comments

Comments
 (0)