Skip to content

Commit 48b056b

Browse files
committed
Add vcount(::AbstractArray{Bool}), fixes #479
also add `vcount(::typeof(identity), ::AbstractArray{Bool})` Performance was good on an x64 AVX512 system, but bad on ARM NEON.
1 parent c7950f9 commit 48b056b

File tree

2 files changed

+6
-2
lines changed

2 files changed

+6
-2
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,5 +57,5 @@ Static = "0.8.4"
5757
StaticArrayInterface = "1"
5858
ThreadingUtilities = "0.5"
5959
UnPack = "1"
60-
VectorizationBase = "0.21.60"
60+
VectorizationBase = "0.21.62"
6161
julia = "1.6"

src/simdfunctionals/count.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@ function _vcount(f::F, args::Vararg{DenseArray,M}) where {F,M}
1212
W = unwrap(V)
1313
UNROLL = 4
1414
LOG2UNROLL = 2
15-
counts = if VERSION >= v"1.7"
15+
_counts = if VERSION >= v"1.7"
1616
VecUnroll(ntuple(Returns(0), Val(UNROLL)))
1717
else
1818
VecUnroll(ntuple(_ -> (0), Val(UNROLL)))
1919
end
20+
counts::typeof(_counts) = _counts
2021
while i < vsub_nsw(N, ((W << LOG2UNROLL) - 1))
2122
index = VectorizationBase.Unroll{1,W,UNROLL,1,W,zero(UInt)}((i,))
2223
counts += count_ones(f(VectorizationBase.fmap(vload, ptrargs, index)...))
@@ -39,3 +40,6 @@ end
3940
call = Expr(:call, :_vcount, :f)
4041
gc_preserve_call_quote(call, M::Int)
4142
end
43+
vcount(::typeof(identity), x::AbstractArray{Bool}) =
44+
vcount(VectorizationBase.tomask, x)
45+
vcount(x::AbstractArray{Bool}) = vcount(VectorizationBase.tomask, x)

0 commit comments

Comments
 (0)