25
25
vmap_quote (N, T)
26
26
end
27
27
28
- function vmapnt! (f:: F , y:: AbstractVector{T} , args:: Vararg{<:Any,A} ) where {F,T,A}
28
+ function alignstores! (f:: F , y:: AbstractVector{T} , args:: Vararg{<:Any,A} ) where {F,T,A}
29
+ N = length (y)
29
30
ptry = pointer (y)
30
- @assert reinterpret (UInt, ptry) & (VectorizationBase. REGISTER_SIZE - 1 ) == 0
31
- W, Wshift = VectorizationBase. pick_vector_width_shift (T)
32
31
ptrargs = pointer .(args)
32
+ W = VectorizationBase. pick_vector_width (T)
33
33
V = VectorizationBase. pick_vector_width_val (T)
34
- N = length (y)
34
+ @assert iszero (reinterpret (UInt, ptry) & (sizeof (T) - 1 )) " The destination vector (`dest`) must be aligned at least to `sizeof(eltype(dest))`."
35
+ alignment = reinterpret (UInt, ptry) & (VectorizationBase. REGISTER_SIZE - 1 )
36
+ if alignment > 0
37
+ i = reinterpret (Int, W - (alignment >>> VectorizationBase. intlog2 (sizeof (T))))
38
+ m = mask (T, i)
39
+ if N < i
40
+ m &= mask (T, N & (W - 1 ))
41
+ end
42
+ vstore! (ptry, extract_data (f (vload .(V, ptrargs, m)... )), m)
43
+ gep (ptry, i), gep .(ptrargs, i), N - i
44
+ else
45
+ ptry, ptrargs, N
46
+ end
47
+ end
48
+
49
+ function vmapnt! (f:: F , y:: AbstractVector{T} , args:: Vararg{<:Any,A} ) where {F,T,A}
50
+ ptry, ptrargs, N = alignstores! (f, y, args... )
35
51
i = 0
52
+ W = VectorizationBase. pick_vector_width (T)
53
+ V = VectorizationBase. pick_vector_width_val (T)
36
54
while i < N - ((W << 2 ) - 1 )
37
55
vstorent! (ptry, extract_data (f (vload .(V, ptrargs, i)... )), i); i += W
38
56
vstorent! (ptry, extract_data (f (vload .(V, ptrargs, i)... )), i); i += W
@@ -49,12 +67,9 @@ function vmapnt!(f::F, y::AbstractVector{T}, args::Vararg{<:Any,A}) where {F,T,A
49
67
y
50
68
end
51
69
function vmapntt! (f:: F , y:: AbstractVector{T} , args:: Vararg{<:Any,A} ) where {F,T,A}
52
- ptry = pointer (y)
53
- @assert reinterpret (UInt, ptry) & (VectorizationBase. REGISTER_SIZE - 1 ) == 0
70
+ ptry, ptrargs, N = alignstores! (f, y, args... )
54
71
W, Wshift = VectorizationBase. pick_vector_width_shift (T)
55
- ptrargs = pointer .(args)
56
72
V = VectorizationBase. pick_vector_width_val (T)
57
- N = length (y)
58
73
Wsh = Wshift + 2
59
74
Niter = N >>> Wsh
60
75
Base. Threads. @threads for j ∈ 0 : Niter- 1
0 commit comments