Skip to content

Commit 1a86804

Browse files
committed
Update compat and Manifest for SIMDPirates version adding ::Mask + ::Vec{W,<:Integer} addition; fixes #61. Also now using given type of reduction vars for temporary accumulators.
1 parent 51a75c9 commit 1a86804

File tree

6 files changed

+64
-12
lines changed

6 files changed

+64
-12
lines changed

Manifest.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
4949

5050
[[SIMDPirates]]
5151
deps = ["VectorizationBase"]
52-
git-tree-sha1 = "839625f8699855a7d5ca96be25bc24d71c5c00ff"
52+
git-tree-sha1 = "0be25063d6e4306eb656778bb613d32c2ed9268b"
5353
uuid = "21efa798-c60a-11e8-04d3-e1a92915a26a"
54-
version = "0.6.0"
54+
version = "0.6.1"
5555

5656
[[SLEEFPirates]]
5757
deps = ["Libdl", "SIMDPirates", "VectorizationBase"]

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.6.16"
4+
version = "0.6.17"
55

66
[deps]
77
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -12,7 +12,7 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
1212

1313
[compat]
1414
Parameters = "0"
15-
SIMDPirates = "~0.6"
15+
SIMDPirates = "~0.6.1"
1616
SLEEFPirates = "~0.4"
1717
VectorizationBase = "~0.6.1"
1818
julia = "1.1"

src/condense_loopset.jl

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,7 @@ function setup_call_noinline(ls::LoopSet, U = zero(Int8), T = zero(Int8))
335335
mvar = mangledvar(op)
336336
out = Symbol(mvar, 0)
337337
push!(outer_reducts.args, out)
338+
# push!(call.args, Symbol("##TYPEOF##", var))
338339
end
339340
push!(q.args, outer_reducts)
340341
retv = loopset_return_value(ls, Val(false))
@@ -356,15 +357,18 @@ end
356357
function setup_call_inline(ls::LoopSet, U = zero(Int8), T = zero(Int8))
357358
call = generate_call(ls, (true,U,T))
358359
hasouterreductions = length(ls.outer_reductions) > 0
359-
if hasouterreductions
360-
retv = loopset_return_value(ls, Val(false))
361-
call = Expr(:(=), retv, call)
360+
if !hasouterreductions
361+
q = Expr(:block,gc_preserve(ls, call))
362+
append!(ls.preamble.args, q.args)
363+
return ls.preamble
362364
end
363-
q = Expr(:block,gc_preserve(ls, call))
365+
retv = loopset_return_value(ls, Val(false))
364366
outer_reducts = Expr(:local)
367+
q = Expr(:block,gc_preserve(ls, Expr(:(=), retv, call)))
365368
for or ls.outer_reductions
366369
op = ls.operations[or]
367370
var = name(op)
371+
# push!(call.args, Symbol("##TYPEOF##", var))
368372
mvar = mangledvar(op)
369373
instr = instruction(op)
370374
out = Symbol(mvar, 0)

src/lowering.jl

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,14 +154,16 @@ function initialize_outer_reductions!(
154154
)
155155
reduct_zero = reduction_zero(op.instruction)
156156
isvectorized = vectorized reduceddependencies(op)
157+
# typeTr = Symbol("##TYPEOF##", name(op))
158+
typeTr = Expr(:call, :typeof, mangledvar(op))
157159
z = if isvectorized
158160
if reduct_zero === :zero
159-
Expr(:call, lv(:vzero), W, typeT)
161+
Expr(:call, lv(:vzero), W, typeTr)
160162
else
161-
Expr(:call, lv(:vbroadcast), W, Expr(:call, reduct_zero, typeT))
163+
Expr(:call, lv(:vbroadcast), W, Expr(:call, reduct_zero, typeTr))
162164
end
163165
else
164-
Expr(:call, reduct_zero, typeT)
166+
Expr(:call, reduct_zero, typeTr)
165167
end
166168
mvar = variable_name(op, suffix)
167169
for u Umin:Umax-1

src/reconstruct_loopset.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,6 @@ function sizeofeltypes(v, num_arrays)::Int
216216
sizeof(T)
217217
end
218218

219-
220219
function avx_loopset(instr, ops, arf, AM, LB, vargs)
221220
ls = LoopSet(:LoopVectorization)
222221
num_arrays = length(arf)
@@ -241,6 +240,7 @@ function avx_body(ls, UT)
241240
end
242241

243242
function _avx_loopset_debug(::Type{OPS}, ::Type{ARF}, ::Type{AM}, ::Type{LB}, vargs...) where {UT, OPS, ARF, AM, LB}
243+
@show OPS ARF AM LB vargs
244244
_avx_loopset(OPS.parameters, ARF.parameters, AM.parameters, LB.parameters, typeof.(vargs))
245245
end
246246
function _avx_loopset(OPSsv, ARFsv, AMsv, LBsv, vargs) where {UT, OPS, ARF, AM, LB}

test/dot.jl

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,44 @@
130130
c_im[i] = b_re[i] * a_im[i + 1] + b_im[i] * a_re[i + 1]
131131
end
132132
end
133+
134+
135+
function pi(x, y)
136+
acc = 0
137+
@inbounds @simd for i eachindex(x)
138+
acc += (x[i]*x[i] + y[i]*y[i]) < 1.0
139+
end
140+
4acc/length(x)
141+
end
142+
function piavx(x, y)
143+
acc = 0
144+
@avx for i eachindex(x)
145+
acc += (x[i]*x[i] + y[i]*y[i]) < 1.0
146+
end
147+
4acc/length(x)
148+
end
149+
function piavx_u4(x, y)
150+
acc = 0
151+
@avx unroll=4 for i eachindex(x)
152+
acc += (x[i]*x[i] + y[i]*y[i]) < 1.0
153+
end
154+
4acc/length(x)
155+
end
156+
function pi_avx(x, y)
157+
acc = 0
158+
@_avx for i eachindex(x)
159+
acc += (x[i]*x[i] + y[i]*y[i]) < 1.0
160+
end
161+
4acc/length(x)
162+
end
163+
function pi_avx_u4(x, y)
164+
acc = 0
165+
@_avx unroll=4 for i eachindex(x)
166+
acc += (x[i]*x[i] + y[i]*y[i]) < 1.0
167+
end
168+
4acc/length(x)
169+
end
170+
133171
# @macroexpand @_avx for i = 1:length(a_re) - 1
134172
# c_re[i] = b_re[i] * a_re[i + 1] - b_im[i] * a_im[i + 1]
135173
# c_im[i] = b_re[i] * a_im[i + 1] + b_im[i] * a_re[i + 1]
@@ -155,6 +193,14 @@
155193
@test myselfdot_avx(a) s
156194
@test myselfdotavx(a) s
157195

196+
if T <: Union{Float32,Float64}
197+
πest = pi(a, b)
198+
@test πest == piavx(a, b)
199+
@test πest == piavx_u4(a, b)
200+
@test πest == pi_avx(a, b)
201+
@test πest == pi_avx_u4(a, b)
202+
end
203+
158204
a_re = rand(R, N); a_im = rand(R, N);
159205
b_re = rand(R, N); b_im = rand(R, N);
160206
ac = Complex.(a_re, a_im);

0 commit comments

Comments
 (0)