Skip to content

Commit d6ed285

Browse files
committed
Revert changes back to vload/vstore! instead of load/store!.
1 parent ef20072 commit d6ed285

File tree

9 files changed

+31
-27
lines changed

9 files changed

+31
-27
lines changed

Manifest.toml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,15 @@ uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
4949

5050
[[SIMDPirates]]
5151
deps = ["VectorizationBase"]
52-
git-tree-sha1 = "ecacd3f808e559d9e363f2620041c6286c8efaca"
52+
git-tree-sha1 = "4b1e0b1442fb4af5e6b93b9c7fdeacf287d2653b"
5353
uuid = "21efa798-c60a-11e8-04d3-e1a92915a26a"
54-
version = "0.4.0"
54+
version = "0.5.0"
5555

5656
[[SLEEFPirates]]
5757
deps = ["Libdl", "SIMDPirates", "VectorizationBase"]
58-
git-tree-sha1 = "bb99e28c0284de9c3233258a93882429752faa55"
58+
git-tree-sha1 = "769fd039d0835e8e628d61e2f0c80822ba668497"
5959
uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa"
60-
version = "0.3.8"
60+
version = "0.3.9"
6161

6262
[[Serialization]]
6363
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
@@ -71,6 +71,6 @@ uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
7171

7272
[[VectorizationBase]]
7373
deps = ["CpuId", "LinearAlgebra"]
74-
git-tree-sha1 = "b9b5c8fa55e9b859989e759f405624d16b0b0ca2"
74+
git-tree-sha1 = "9f8caaa5d033f88e188f62a3dba0dab5f429447a"
7575
uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
76-
version = "0.4.2"
76+
version = "0.5.0"

Project.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.6.14"
4+
version = "0.6.15"
55

66
[deps]
77
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -12,9 +12,9 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
1212

1313
[compat]
1414
Parameters = "0"
15-
SIMDPirates = "~0.4"
16-
SLEEFPirates = "~0.3.8"
17-
VectorizationBase = "~0.4.2"
15+
SIMDPirates = "~0.5"
16+
SLEEFPirates = "~0.3.9"
17+
VectorizationBase = "~0.5"
1818
julia = "1.1"
1919

2020
[extras]

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ It then tries to vectorize the loop to improve runtime performance.
2121

2222
The macro assumes that loop iterations can be reordered. It also currently supports simple nested loops, where loop bounds of inner loops are constant across iterations of the outer loop, and only a single loop at each level of noop lest. These limitations should be removed in a future version.
2323

24+
## Benchmarks
25+
26+
Please see the documentation for benchmarks versus base Julia, Clang-Polly, icc, ifort, gfortran, and Eigen. If you would believe any code or compiler flags can be improved, would like to submit your own benchmarks, or have Julia code using LoopVectorization that you would like to be tested for performance regressions on a semi-regular basis, please feel file an issue or PR with the code sample.
27+
2428
## Examples
2529
### Dot Product
2630
<details>

src/add_constants.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ function add_constant!(ls::LoopSet, var::Symbol, mpref::ArrayReferenceMetaPositi
3939
op = Operation(length(operations(ls)), var, elementbytes, LOOPCONSTANT, constant, NODEPENDENCY, Symbol[], NOPARENTS, mpref.mref)
4040
add_vptr!(ls, op)
4141
temp = gensym(:intermediateconstref)
42-
pushpreamble!(ls, Expr(:(=), temp, Expr(:call, lv(:load), mpref.mref.ptr, mem_offset(op, UnrollArgs(0, Symbol(""), Symbol(""), nothing)))))
42+
pushpreamble!(ls, Expr(:(=), temp, Expr(:call, lv(:vload), mpref.mref.ptr, mem_offset(op, UnrollArgs(0, Symbol(""), Symbol(""), nothing)))))
4343
pushpreamble!(ls, op, temp)
4444
pushop!(ls, op, temp)
4545
end

src/add_loads.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,9 @@ end
7373

7474
struct LoopValue end
7575
@inline VectorizationBase.stridedpointer(::LoopValue) = LoopValue()
76-
@inline VectorizationBase.load(::LoopValue, i::Tuple{_MM{W}}) where {W} = _MM{W}(@inbounds(i[1].i) + 1)
77-
@inline VectorizationBase.load(::LoopValue, i::Tuple{_MM{W}}, ::Unsigned) where {W} = _MM{W}(@inbounds(i[1].i) + 1)
78-
@inline VectorizationBase.load(::LoopValue, i::Integer) = i + one(i)
79-
@inline VectorizationBase.load(::LoopValue, i::Tuple{I}) where {I<:Integer} = @inbounds(i[1]) + one(I)
76+
@inline VectorizationBase.vload(::LoopValue, i::Tuple{_MM{W}}) where {W} = _MM{W}(@inbounds(i[1].i) + 1)
77+
@inline VectorizationBase.vload(::LoopValue, i::Tuple{_MM{W}}, ::Unsigned) where {W} = _MM{W}(@inbounds(i[1].i) + 1)
78+
@inline VectorizationBase.vload(::LoopValue, i::Integer) = i + one(i)
79+
@inline VectorizationBase.vload(::LoopValue, i::Tuple{I}) where {I<:Integer} = @inbounds(i[1]) + one(I)
8080
@inline Base.eltype(::LoopValue) = Int8
8181

src/filter.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@ function vfilter!(f::F, x::Vector{T}, y::AbstractArray{T}) where {F,T <: SUPPORT
1010
ptr_x = pointer(x)
1111
ptr_y = pointer(y)
1212
for _ 1:Nrep
13-
vy = load(Vec{W,T}, ptr_y, i)
13+
vy = vload(Vec{W,T}, ptr_y, i)
1414
mask = f(SVec(vy))
1515
SIMDPirates.compressstore!(gep(ptr_x, j), vy, mask)
1616
i += W
1717
j += count_ones(mask)
1818
end
1919
rem_mask = VectorizationBase.mask(T, Nrem)
20-
vy = load(Vec{W,T}, gep(ptr_y, i), rem_mask)
20+
vy = vload(Vec{W,T}, gep(ptr_y, i), rem_mask)
2121
mask = rem_mask & f(SVec(vy))
2222
SIMDPirates.compressstore!(gep(ptr_x, j), vy, mask)
2323
j += count_ones(mask)

src/lower_load.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ function pushvectorload!(q::Expr, op::Operation, var::Symbol, td::UnrollArgs, U:
22
@unpack u, unrolled = td
33
ptr = refname(op)
44
name, mo = name_memoffset(var, op, td, W, vecnotunrolled)
5-
instrcall = Expr(:call, lv(:load), ptr, mo)
5+
instrcall = Expr(:call, lv(:vload), ptr, mo)
66
if mask !== nothing && (vecnotunrolled || u == U - 1)
77
push!(instrcall.args, mask)
88
end
@@ -21,7 +21,7 @@ function lower_load_scalar!(
2121
for u 0:U-1
2222
varname = varassignname(var, u, isunrolled)
2323
td = UnrollArgs(u, unrolled, tiled, suffix)
24-
push!(q.args, Expr(:(=), varname, Expr(:call, lv(:load), ptr, mem_offset_u(op, td))))
24+
push!(q.args, Expr(:(=), varname, Expr(:call, lv(:vload), ptr, mem_offset_u(op, td))))
2525
end
2626
nothing
2727
end

src/lower_store.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ function lower_conditionalstore_scalar!(
6666
varname = varassignname(var, u, parentisunrolled)
6767
condvarname = varassignname(condvar, u, condunrolled)
6868
td = UnrollArgs(u, unrolled, tiled, suffix)
69-
push!(q.args, Expr(:&&, condvarname, Expr(:call, lv(:store!), ptr, varname, mem_offset_u(op, td))))
69+
push!(q.args, Expr(:&&, condvarname, Expr(:call, lv(:vstore!), ptr, varname, mem_offset_u(op, td))))
7070
end
7171
nothing
7272
end
@@ -103,7 +103,7 @@ function lower_conditionalstore_vectorized!(
103103
td = UnrollArgs(u, unrolled, tiled, suffix)
104104
name, mo = name_memoffset(var, op, td, W, vecnotunrolled, parentisunrolled)
105105
condvarname = varassignname(condvar, u, condunrolled)
106-
instrcall = Expr(:call, lv(:store!), ptr, name, mo)
106+
instrcall = Expr(:call, lv(:vstore!), ptr, name, mo)
107107
if mask !== nothing && (vecnotunrolled || u == U - 1)
108108
push!(instrcall.args, Expr(:call, lv(:combinemasks), condvarname, mask))
109109
else
@@ -123,7 +123,7 @@ function lower_store_scalar!(
123123
for u 0:U-1
124124
varname = varassignname(var, u, parentisunrolled)
125125
td = UnrollArgs(u, unrolled, tiled, suffix)
126-
push!(q.args, Expr(:call, lv(:store!), ptr, varname, mem_offset_u(op, td)))
126+
push!(q.args, Expr(:call, lv(:vstore!), ptr, varname, mem_offset_u(op, td)))
127127
end
128128
nothing
129129
end
@@ -147,7 +147,7 @@ function lower_store_vectorized!(
147147
for u 0:U-1
148148
td = UnrollArgs(u, unrolled, tiled, suffix)
149149
name, mo = name_memoffset(var, op, td, W, vecnotunrolled, parentisunrolled)
150-
instrcall = Expr(:call, lv(:store!), ptr, name, mo)
150+
instrcall = Expr(:call, lv(:vstore!), ptr, name, mo)
151151
if mask !== nothing && (vecnotunrolled || u == U - 1)
152152
push!(instrcall.args, mask)
153153
end

src/map.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@ function vmap_quote(N, ::Type{T}) where {T}
55
val = Expr(:call, Expr(:curly, :Val, W))
66
q = Expr(:block, Expr(:(=), :M, Expr(:call, :length, :dest)), Expr(:(=), :vdest, Expr(:call, :pointer, :dest)), Expr(:(=), :m, 0))
77
fcall = Expr(:call, :f)
8-
loopbody = Expr(:block, Expr(:call, :store!, :vdest, fcall, :m), Expr(:(+=), :m, W))
8+
loopbody = Expr(:block, Expr(:call, :vstore!, :vdest, fcall, :m), Expr(:(+=), :m, W))
99
fcallmask = Expr(:call, :f)
10-
bodymask = Expr(:block, Expr(:(=), :__mask__, Expr(:call, :mask, val, Expr(:call, :&, :M, W-1))), Expr(:call, :store!, :vdest, fcallmask, :m, :__mask__))
10+
bodymask = Expr(:block, Expr(:(=), :__mask__, Expr(:call, :mask, val, Expr(:call, :&, :M, W-1))), Expr(:call, :vstore!, :vdest, fcallmask, :m, :__mask__))
1111
for n 1:N
1212
arg_n = Symbol(:varg_,n)
1313
push!(q.args, Expr(:(=), arg_n, Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__,Symbol(@__FILE__)), Expr(:call, :pointer, Expr(:ref, :args, n)))))
14-
push!(fcall.args, Expr(:call, :load, val, arg_n, :m))
15-
push!(fcallmask.args, Expr(:call, :load, val, arg_n, :m, :__mask__))
14+
push!(fcall.args, Expr(:call, :vload, val, arg_n, :m))
15+
push!(fcallmask.args, Expr(:call, :vload, val, arg_n, :m, :__mask__))
1616
end
1717
loop = Expr(:for, Expr(:(=), :_, Expr(:call, :(:), 0, Expr(:call, :-, Expr(:call, :(>>>), :M, Wshift), 1))), loopbody)
1818
push!(q.args, loop)

0 commit comments

Comments
 (0)