Skip to content

Commit 7d75430

Browse files
authored
vcount (#471)
* Add vcount * Test vcount * Test range of sizes * Test updates * Returns not defined in Julia 1.6
1 parent 282a4e1 commit 7d75430

File tree

6 files changed

+69
-12
lines changed

6 files changed

+69
-12
lines changed

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.151"
4+
version = "0.12.152"
55

66
[weakdeps]
77
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
@@ -57,5 +57,5 @@ Static = "0.8.4"
5757
StaticArrayInterface = "1"
5858
ThreadingUtilities = "0.5"
5959
UnPack = "1"
60-
VectorizationBase = "0.21.53"
60+
VectorizationBase = "0.21.60"
6161
julia = "1.6"

src/LoopVectorization.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,8 @@ export LowDimArray,
196196
vfilter,
197197
vfilter!,
198198
vmapreduce,
199-
vreduce
199+
vreduce,
200+
vcount
200201

201202
const VECTORWIDTHSYMBOL, ELTYPESYMBOL, MASKSYMBOL =
202203
Symbol("##Wvecwidth##"), Symbol("##Tloopeltype##"), Symbol("##mask##")
@@ -234,6 +235,7 @@ include("reconstruct_loopset.jl")
234235
include("constructors.jl")
235236
include("user_api_conveniences.jl")
236237
include("simdfunctionals/mapreduce.jl")
238+
include("simdfunctionals/count.jl")
237239
include("broadcast.jl")
238240

239241
"""

src/simdfunctionals/count.jl

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
_vcount(f) = 0
2+
function _vcount(f::F, args::Vararg{DenseArray,M}) where {F,M}
3+
x = first(args)
4+
y = Base.tail(args)
5+
foreach(a -> @assert(size(a) == size(x)), y)
6+
N = length(x)
7+
ptrargs = map(VectorizationBase.zstridedpointer, args)
8+
i = 0
9+
V = VectorizationBase.pick_vector_width(
10+
reduce(promote_type, map(eltype, ptrargs))
11+
)
12+
W = unwrap(V)
13+
UNROLL = 4
14+
LOG2UNROLL = 2
15+
counts = if VERSION >= v"1.7"
16+
VecUnroll(ntuple(Returns(0), Val(UNROLL)))
17+
else
18+
VecUnroll(ntuple(_ -> (0), Val(UNROLL)))
19+
end
20+
while i < vsub_nsw(N, ((W << LOG2UNROLL) - 1))
21+
index = VectorizationBase.Unroll{1,W,UNROLL,1,W,zero(UInt)}((i,))
22+
counts += count_ones(f(VectorizationBase.fmap(vload, ptrargs, index)...))
23+
i = vadd_nw(i, StaticInt{UNROLL}() * W)
24+
end
25+
count = reduce_tup(+, data(counts))
26+
while i < vsub_nsw(N, (W - 1)) # stops at 16 when
27+
count += count_ones(f(map1(vload, ptrargs, (MM{W}(i),))...))
28+
i = vadd_nw(i, W)
29+
end
30+
if i < N
31+
m = mask(StaticInt(W), N & (W - 1))
32+
vfinal = f(map1(vload, ptrargs, (MM{W}(i),), m)...)
33+
count += count_ones(vfinal & m)
34+
end
35+
count
36+
end
37+
38+
@generated function vcount(f::F, args::Vararg{DenseArray,M}) where {F,M}
39+
call = Expr(:call, :_vcount, :f)
40+
gc_preserve_call_quote(call, M::Int)
41+
end

src/simdfunctionals/map.jl

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,11 @@ function vmap_singlethread!(
7474
::Val{NonTemporal},
7575
args::Vararg{AbstractArray,A}
7676
) where {F,T<:NativeTypes,A,NonTemporal}
77-
ptry, ptrargs, N = setup_vmap!(f, y, Val{NonTemporal}(), args...)
78-
_vmap_singlethread!(f, ptry, Zero(), N, Val{NonTemporal}(), ptrargs)
77+
presy = preserve_buffer(y)
78+
GC.@preserve presy begin
79+
ptry, ptrargs, N = setup_vmap!(f, y, Val{NonTemporal}(), args...)
80+
_vmap_singlethread!(f, ptry, Zero(), N, Val{NonTemporal}(), ptrargs)
81+
end
7982
nothing
8083
end
8184
function _vmap_singlethread!(
@@ -263,20 +266,25 @@ function vmap_multithread!(
263266
end
264267
nothing
265268
end
266-
function gc_preserve_vmap_quote(NonTemporal::Bool, Threaded::Bool, A::Int)
267-
m = Threaded ? :vmap_multithread! : :vmap_singlethread!
268-
call = Expr(:call, m, :f, :y, Expr(:call, Expr(:curly, :Val, NonTemporal)))
269+
function gc_preserve_call_quote(call, A::Int)
269270
q = Expr(:block, Expr(:meta, :inline))
270271
gcpres = Expr(:gc_preserve, call)
271-
for a 1:Int(A)::Int
272+
for a 1:A
272273
arg = Symbol(:arg_, a)
273274
parg = Symbol(:parg_, a)
274-
push!(q.args, Expr(:(=), arg, :(@inbounds args[$a])))#Expr(:ref, :args, a)))
275+
push!(q.args, Expr(:(=), arg, :($getfield(args, $a))))
275276
push!(q.args, Expr(:(=), parg, Expr(:call, :preserve_buffer, arg)))
276277
push!(call.args, arg)
277278
push!(gcpres.args, parg)
278279
end
279-
push!(q.args, gcpres, :y)
280+
push!(q.args, gcpres)
281+
q
282+
end
283+
function gc_preserve_vmap_quote(NonTemporal::Bool, Threaded::Bool, A::Int)
284+
m = Threaded ? :vmap_multithread! : :vmap_singlethread!
285+
call = Expr(:call, m, :f, :y, Expr(:call, Expr(:curly, :Val, NonTemporal)))
286+
q = gc_preserve_call_quote(call, A)
287+
push!(q.args, :y)
280288
q
281289
end
282290
@generated function gc_preserve_vmap!(

test/grouptests.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ const START_TIME = time()
1515
@time if LOOPVECTORIZATION_TEST == "all" || LOOPVECTORIZATION_TEST == "part2"
1616
if VERSION <= v"1.8" || isempty(VERSION.prerelease)
1717
using Aqua
18-
@time Aqua.test_all(LoopVectorization, ambiguities = false)
18+
@time Aqua.test_all(LoopVectorization, ambiguities = false, piracy = false)
1919
end
2020
@test isempty(detect_unbound_args(LoopVectorization))
2121

test/map.jl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,10 @@
3737
@test vmap(abs2, 1:100) == map(abs2, 1:100)
3838
@test vmapt(abs2, 1:3:10000) == map(abs2, 1:3:10000)
3939
@test vmapt(abs2, 1.0:3.0:10000.0) map(abs2, 1.0:3.0:10000.0)
40+
41+
for n = -64:64
42+
let x = rand(UInt8, (1 << 14) + n)
43+
@test count(==(UInt8('\n')), x) == vcount(==(UInt8('\n')), x)
44+
end
45+
end
4046
end

0 commit comments

Comments
 (0)