Skip to content

Commit 0b7aaa5

Browse files
committed
Added vmapnt! and vmapntt!
1 parent d38ea34 commit 0b7aaa5

File tree

3 files changed

+59
-2
lines changed

3 files changed

+59
-2
lines changed

src/LoopVectorization.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ const SUPPORTED_TYPES = Union{Float16,Float32,Float64,Integer}
1717

1818
export LowDimArray, stridedpointer, vectorizable,
1919
@avx, @_avx, *ˡ, _avx_!,
20-
vmap, vmap!,
20+
vmap, vmap!, vmapnt!, vmapntt!,
2121
vfilter, vfilter!
2222

2323

src/map.jl

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,56 @@ function vmap(f::F, args...) where {F}
3030
vmap!(f, dest, args...)
3131
end
3232

33+
function vmapnt!(f::F, y::AbstractVector{T}, args::Vararg{<:Any,A}) where {F,T,A}
34+
ptry = pointer(y)
35+
@assert reinterpret(UInt, ptry) & (VectorizationBase.REGISTER_SIZE - 1) == 0
36+
W, Wshift = VectorizationBase.pick_vector_width_shift(T)
37+
ptrargs = pointer.(args)
38+
V = VectorizationBase.pick_vector_width_val(T)
39+
N = length(y)
40+
i = 0
41+
while i < N - ((W << 2) - 1)
42+
vstorent!(ptry, extract_data(f(vload.(V, ptrargs, i)...)), i); i += W
43+
vstorent!(ptry, extract_data(f(vload.(V, ptrargs, i)...)), i); i += W
44+
vstorent!(ptry, extract_data(f(vload.(V, ptrargs, i)...)), i); i += W
45+
vstorent!(ptry, extract_data(f(vload.(V, ptrargs, i)...)), i); i += W
46+
end
47+
while i < N - (W - 1) # stops at 16 when
48+
vstorent!(ptry, extract_data(f(vload.(V, ptrargs, i)...)), i); i += W
49+
end
50+
if i < N
51+
m = mask(T, N & (W - 1))
52+
vstore!(ptry, extract_data(f(vload.(V, ptrargs, i, m)...)), i, m)
53+
end
54+
y
55+
end
56+
function vmapntt!(f::F, y::AbstractVector{T}, args::Vararg{<:Any,A}) where {F,T,A}
57+
ptry = pointer(y)
58+
@assert reinterpret(UInt, ptry) & (VectorizationBase.REGISTER_SIZE - 1) == 0
59+
W, Wshift = VectorizationBase.pick_vector_width_shift(T)
60+
ptrargs = pointer.(args)
61+
V = VectorizationBase.pick_vector_width_val(T)
62+
N = length(y)
63+
Wsh = Wshift + 2
64+
Niter = N >>> Wsh
65+
Base.Threads.@threads for j 0:Niter-1
66+
i = j << Wsh
67+
vstorent!(ptry, extract_data(f(vload.(V, ptrargs, i)...)), i); i += W
68+
vstorent!(ptry, extract_data(f(vload.(V, ptrargs, i)...)), i); i += W
69+
vstorent!(ptry, extract_data(f(vload.(V, ptrargs, i)...)), i); i += W
70+
vstorent!(ptry, extract_data(f(vload.(V, ptrargs, i)...)), i)
71+
end
72+
ii = Niter << Wsh
73+
while ii < N - (W - 1) # stops at 16 when
74+
vstorent!(ptry, extract_data(f(vload.(V, ptrargs, ii)...)), ii); ii += W
75+
end
76+
if ii < N
77+
m = mask(T, N & (W - 1))
78+
vstore!(ptry, extract_data(f(vload.(V, ptrargs, ii, m)...)), ii, m)
79+
end
80+
y
81+
end
82+
3383
# @inline vmap!(f, y, x...) = @avx y .= f.(x...)
3484
# @inline vmap(f, x...) = @avx f.(x...)
3585

test/map.jl

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,23 @@
11
@testset "map" begin
22
@inline foo(x, y) = exp(x) - sin(y)
3-
N = 37
3+
N = 3781
44
for T (Float32,Float64)
55
@show T, @__LINE__
66
a = rand(T, N); b = rand(T, N);
77
c1 = map(foo, a, b);
88
c2 = vmap(foo, a, b);
99
@test c1 c2
10+
fill!(c2, NaN); vmapnt!(foo, c2, a, b);
11+
@test c1 c2
12+
fill!(c2, NaN); vmapntt!(foo, c2, a, b);
13+
@test c1 c2
14+
@test_throws AssertionError @views vmapnt!(c2[2:end], a[2:end], b[2:end])
15+
@test_throws AssertionError @views vmapntt!(c2[2:end], a[2:end], b[2:end])
1016

1117
c = rand(T,100); x = rand(T,10^4); y1 = similar(x); y2 = similar(x);
1218
map!(xᵢ -> clenshaw(xᵢ, c), y1, x)
1319
vmap!(xᵢ -> clenshaw(xᵢ, c), y2, x)
1420
@test y1 y2
21+
1522
end
1623
end

0 commit comments

Comments
 (0)