Skip to content

Commit 7a63110

Browse files
committed
Added vfilter to resolve #42.
1 parent b74f4f5 commit 7a63110

File tree

6 files changed

+1214
-1162
lines changed

6 files changed

+1214
-1162
lines changed

Manifest.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
4949

5050
[[SIMDPirates]]
5151
deps = ["VectorizationBase"]
52-
git-tree-sha1 = "d652a1a47fd0806ca3f9109a0d729c3f7701e897"
52+
git-tree-sha1 = "a27b812034efdb062ec3d9e787bc299510057b3d"
5353
uuid = "21efa798-c60a-11e8-04d3-e1a92915a26a"
54-
version = "0.3.4"
54+
version = "0.3.5"
5555

5656
[[SLEEFPirates]]
5757
deps = ["Libdl", "SIMDPirates", "VectorizationBase"]

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.6.6"
4+
version = "0.6.7"
55

66
[deps]
77
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -12,7 +12,7 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
1212

1313
[compat]
1414
Parameters = "0"
15-
SIMDPirates = "0.3.4, 0.4, 0.5"
15+
SIMDPirates = "0.3.5, 0.4, 0.5"
1616
SLEEFPirates = "0.3.2, 0.4, 0.5"
1717
VectorizationBase = "0.2.5, 0.3, 0.4"
1818
julia = "1.1"

src/LoopVectorization.jl

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,16 @@ using SIMDPirates: VECTOR_SYMBOLS, evadd, evmul, vrange, reduced_add, reduced_pr
1111
using Base.Broadcast: Broadcasted, DefaultArrayStyle
1212
using LinearAlgebra: Adjoint, Transpose
1313

14+
const SUPPORTED_TYPES = Union{Float16,Float32,Float64,Integer}
1415

1516
export LowDimArray, stridedpointer, vectorizable,
1617
@avx, @_avx, *ˡ, _avx_!,
17-
vmap, vmap!
18+
vmap, vmap!,
19+
vfilter, vfilter!
1820

1921

2022
include("map.jl")
23+
include("filter.jl")
2124
include("costs.jl")
2225
include("operations.jl")
2326
include("graphs.jl")

src/broadcast.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ end
188188
# function vmaterialize!(
189189
@generated function vmaterialize!(
190190
dest::AbstractArray{T,N}, bc::BC, ::Val{Mod}
191-
) where {T <: Union{Float32,Float64}, N, BC <: Broadcasted, Mod}
191+
) where {T <: SUPPORTED_TYPES, N, BC <: Broadcasted, Mod}
192192
# we have an N dimensional loop.
193193
# need to construct the LoopSet
194194
loopsyms = [gensym(:n) for n 1:N]
@@ -212,7 +212,7 @@ end
212212
end
213213
@generated function vmaterialize!(
214214
dest′::Union{Adjoint{T,A},Transpose{T,A}}, bc::BC, ::Val{Mod}
215-
) where {T <: Union{Float32,Float64}, N, A <: AbstractArray{T,N}, BC <: Broadcasted, Mod}
215+
) where {T <: SUPPORTED_TYPES, N, A <: AbstractArray{T,N}, BC <: Broadcasted, Mod}
216216
# we have an N dimensional loop.
217217
# need to construct the LoopSet
218218
loopsyms = [gensym(:n) for n 1:N]

src/filter.jl

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
2+
function vfilter!(f::F, x::Vector{T}, y::AbstractArray{T}) where {F,T <: SUPPORTED_TYPES}
3+
W, Wshift = VectorizationBase.pick_vector_width_shift(T)
4+
N = length(y)
5+
Nrep = N >>> Wshift
6+
Nrem = N & (W - 1)
7+
i = 0
8+
j = 0
9+
GC.@preserve x y begin
10+
ptr_x = pointer(x)
11+
ptr_y = pointer(y)
12+
for _ 1:Nrep
13+
vy = vload(Vec{W,T}, gep(ptr_y, i))
14+
mask = f(SVec(vy))
15+
SIMDPirates.compressstore!(gep(ptr_x, j), vy, mask)
16+
i += W
17+
j += count_ones(mask)
18+
end
19+
rem_mask = VectorizationBase.mask(T, Nrem)
20+
vy = vload(Vec{W,T}, gep(ptr_y, i), rem_mask)
21+
mask = rem_mask & f(SVec(vy))
22+
SIMDPirates.compressstore!(gep(ptr_x, j), vy, mask)
23+
j += count_ones(mask)
24+
resize!(x, j)
25+
end
26+
x
27+
end
28+
vfilter(f, y::AbstractArray{T}) where {T<:SUPPORTED_TYPES} = vfilter!(f, Vector{T}(undef, length(y)), y)
29+

0 commit comments

Comments
 (0)