Skip to content

Commit ed466fb

Browse files
committed
ArrayInterface 5
1 parent 4fecc57 commit ed466fb

File tree

3 files changed

+47
-57
lines changed

3 files changed

+47
-57
lines changed

Project.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.102"
4+
version = "0.12.103"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
@@ -25,8 +25,8 @@ UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
2525
VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
2626

2727
[compat]
28-
ArrayInterface = "3.1.32, 3.2.1"
29-
CPUSummary = "0.1.3"
28+
ArrayInterface = "3.1.32, 3.2.1, 5.0.1"
29+
CPUSummary = "0.1.3 - 0.1.8, 0.1.11"
3030
ChainRulesCore = "1"
3131
CloseOpenIntervals = "0.1.2"
3232
DocStringExtensions = "0.8"
@@ -39,7 +39,7 @@ PolyesterWeave = "0.1"
3939
SIMDDualNumbers = "0.1"
4040
SLEEFPirates = "0.6.23"
4141
SpecialFunctions = "1, 2"
42-
Static = "0.3.3, 0.4"
42+
Static = "0.3.3, 0.4, 0.6"
4343
ThreadingUtilities = "0.5"
4444
UnPack = "1"
4545
VectorizationBase = "0.21.21"

src/broadcast.jl

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Base.@propagate_inbounds Base.getindex(A::LowDimArray, i::Vararg{Union{Integer,C
1010

1111
@inline ArrayInterface.parent_type(::Type{LowDimArray{D,T,N,A}}) where {T,D,N,A} = A
1212
@inline Base.strides(A::LowDimArray) = map(Int, strides(A))
13-
@inline ArrayInterface.device(A::LowDimArray) = ArrayInterface.CPUPointer()
13+
@inline ArrayInterface.device(::LowDimArray) = ArrayInterface.CPUPointer()
1414
@generated function ArrayInterface.size(A::LowDimArray{D,T,N}) where {D,T,N}
1515
t = Expr(:tuple)
1616
gf = GlobalRef(Core,:getfield)
@@ -335,9 +335,8 @@ end
335335

336336
function add_broadcast!(
337337
ls::LoopSet, destname::Symbol, bcname::Symbol, loopsyms::Vector{Symbol},
338-
@nospecialize(LDA::Type{LowDimArray{D,T,N,A}}), elementbytes::Int
338+
@nospecialize(_::Type{LowDimArray{D,T,N,A}}), elementbytes::Int
339339
) where {D,T,N,A}
340-
# D,T,N::Int,_ = LDA.parameters
341340
Dlen = length(D)
342341
if Dlen == N && !any(D) # array is a scalar, as it is broadcasted on all dimensions
343342
return extract_all_1_array!(ls, bcname, N, elementbytes)
@@ -422,7 +421,6 @@ function add_broadcast!(
422421
Instruction(bcname, f)
423422
end
424423
args = A.parameters
425-
Nargs = length(args)
426424
bcargs = gensym!(ls, "bcargs")
427425
pushprepreamble!(ls, Expr(:(=), bcargs, Expr(:(.), bcname, QuoteNode(:args))))
428426
# this is the var name in the loop
@@ -447,7 +445,7 @@ end
447445
function add_broadcast_loops!(ls::LoopSet, loopsyms::Vector{Symbol}, destsym::Symbol)
448446
axes_tuple = Expr(:tuple)
449447
pushpreamble!(ls, Expr(:(=), axes_tuple, Expr(:call, :axes, destsym)))
450-
for (n,itersym) enumerate(loopsyms)
448+
for itersym loopsyms
451449
Nrange = gensym!(ls, "N")
452450
Nlower = gensym!(ls, "N")
453451
Nupper = gensym!(ls, "N")
@@ -464,15 +462,15 @@ end
464462
@generated function vmaterialize!(
465463
dest::AbstractArray{T,N}, bc::BC, ::Val{Mod}, ::Val{UNROLL}
466464
) where {T <: NativeTypes, N, BC <: Union{Broadcasted,Product}, Mod, UNROLL}
467-
# 2+1
465+
2+1
468466
# we have an N dimensional loop.
469467
# need to construct the LoopSet
470468
# @show typeof(dest)
471469
ls = LoopSet(Mod)
472-
inline, u₁, u₂, v, isbroadcast, W, rs, rc, cls, l1, l2, l3, threads, warncheckarg = UNROLL
470+
inline, u₁, u₂, v, isbroadcast, _, rs, rc, cls, l1, l2, l3, threads, warncheckarg = UNROLL
473471
set_hw!(ls, rs, rc, cls, l1, l2, l3)
474472
ls.isbroadcast = isbroadcast # maybe set `false` in a DiffEq-like `@..` macro
475-
loopsyms = [gensym!(ls, "n") for n 1:N]
473+
loopsyms = [gensym!(ls, "n") for _ 1:N]
476474
add_broadcast_loops!(ls, loopsyms, :dest)
477475
elementbytes = sizeof(T)
478476
add_broadcast!(ls, :destination, :bc, loopsyms, BC, elementbytes)
@@ -481,6 +479,7 @@ end
481479
resize!(ls.loop_order, num_loops(ls)) # num_loops may be greater than N, eg Product
482480
# return ls
483481
sc = setup_call(ls, :(Base.Broadcast.materialize!(dest, bc)), LineNumberNode(0), inline, false, u₁, u₂, v, threads%Int, warncheckarg)
482+
# for n in loopsyms; push!(sc.args, :(@show $n)); end
484483
Expr(:block, Expr(:meta,:inline), sc, :dest)
485484
end
486485
@generated function vmaterialize!(
@@ -489,10 +488,10 @@ end
489488
# we have an N dimensional loop.
490489
# need to construct the LoopSet
491490
ls = LoopSet(Mod)
492-
inline, u₁, u₂, v, isbroadcast, W, rs, rc, cls, l1, l2, l3, threads, warncheckarg = UNROLL
491+
inline, u₁, u₂, v, isbroadcast, _, rs, rc, cls, l1, l2, l3, threads, warncheckarg = UNROLL
493492
set_hw!(ls, rs, rc, cls, l1, l2, l3)
494493
ls.isbroadcast = isbroadcast # maybe set `false` in a DiffEq-like `@..` macro
495-
loopsyms = [gensym!(ls, "n") for n 1:N]
494+
loopsyms = [gensym!(ls, "n") for _ 1:N]
496495
pushprepreamble!(ls, Expr(:(=), :dest, Expr(:call, :parent, :dest′)))
497496
add_broadcast_loops!(ls, loopsyms, :dest′)
498497
elementbytes = sizeof(T)

test/broadcast.jl

Lines changed: 34 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,5 @@
11
using LoopVectorization, Test
2-
# T = Float32
3-
4-
macro outline(ex)
5-
quote
6-
(() -> begin
7-
Base.@_noinline_meta
8-
$(esc(ex))
9-
end)()
10-
end
11-
end
2+
T = Float32
123

134
function test_broadcast(::Type{T}) where {T}
145
M, N = 37, 47
@@ -22,35 +13,35 @@ function test_broadcast(::Type{T}) where {T}
2213

2314
br = reshape(b, (99,99));
2415
c1 = a .+ b;
25-
c2 = @outline @turbo a .+ bl;
16+
c2 = @turbo inline=false a .+ bl;
2617
@test c1 c2
27-
fill!(c2, 99999); @outline @turbo c2 .= a .+ br;
18+
fill!(c2, 99999); @turbo inline=false c2 .= a .+ br;
2819
@test c1 c2
29-
fill!(c2, 99999); @outline @turbo c2 .= a .+ b;
20+
fill!(c2, 99999); @turbo inline=false c2 .= a .+ b;
3021
@test c1 c2
3122
br = reshape(b, (99,1,99));
3223
bl = LowDimArray{(true,false,true)}(br);
3324
@test size(bl) == size(br)
3425
@test LoopVectorization.ArrayInterface.size(bl) === (size(br,1),LoopVectorization.StaticInt(1),size(br,3))
3526
@. c1 = a + br;
36-
fill!(c2, 99999); @outline @turbo @. c2 = a + bl;
27+
fill!(c2, 99999); @turbo inline=false @. c2 = a + bl;
3728
@test c1 c2
38-
fill!(c2, 99999); @outline @turbo @. c2 = a + br;
29+
fill!(c2, 99999); @turbo inline=false @. c2 = a + br;
3930
@test c1 c2
4031
br = reshape(b, (1,99,99));
4132
bl = LowDimArray{(false,)}(br);
4233
@test size(bl) == size(br)
4334
@test LoopVectorization.ArrayInterface.size(bl) === (LoopVectorization.StaticInt(1),size(br,2),size(br,3))
4435
@. c1 = a + br;
4536
fill!(c2, 99999);
46-
@test c1 @outline @turbo @. c2 = a + bl
37+
@test c1 @turbo inline=false @. c2 = a + bl
4738
# @test c1 ≈ c2
4839
br = reshape(rand(R,99), (1,99,1));
4940
bl = LowDimArray{(false,)}(br);
5041
@test size(bl) == size(br)
5142
@. c1 = a + br;
5243
fill!(c2, 99999);
53-
@outline @turbo @. c2 = a + bl;
44+
@turbo inline=false @. c2 = a + bl;
5445
@test c1 c2
5546

5647
if T <: Integer
@@ -59,76 +50,76 @@ function test_broadcast(::Type{T}) where {T}
5950
xs = rand(T, M);
6051
end
6152
max_ = maximum(xs, dims=1);
62-
@test (@outline @turbo exp.(xs .- LowDimArray{(false,)}(max_))) exp.(xs .- LowDimArray{(false,)}(max_))
53+
@test (@turbo inline=false exp.(xs .- LowDimArray{(false,)}(max_))) exp.(xs .- LowDimArray{(false,)}(max_))
6354
@test size(LowDimArray{(false,)}(max_)) == size(max_)
6455

6556
if T === Int32
6657
a = rand(T(1):T(100), 73, 1);
67-
@test sqrt.(Float32.(a)) @outline @turbo sqrt.(a)
58+
@test sqrt.(Float32.(a)) @turbo inline=false sqrt.(a)
6859
elseif T === Int64
6960
a = rand(T(1):T(100), 73, 1);
70-
@test sqrt.(a) @outline @tturbo sqrt.(a)
61+
@test sqrt.(a) @tturbo inline=false sqrt.(a)
7162
else
7263
a = rand(T, 73, 1);
73-
@test sqrt.(a) @outline @turbo sqrt.(a)
64+
@test sqrt.(a) @turbo inline=false sqrt.(a)
7465
end
7566

7667
a = rand(R, M); B = rand(R, M, N); c = rand(R, N); c′ = c';
7768
d1 = @. a + B * c′;
78-
d2 = @outline @tturbo @. a + B * c′;
69+
d2 = @tturbo inline=false @. a + B * c′;
7970
@test d1 d2
8071

8172
@. d1 = a + B * c′;
82-
@outline @turbo @. d2 = a + B * c′;
73+
@turbo inline=false @. d2 = a + B * c′;
8374
@test d1 d2
8475

8576
d3 = a .+ B * c;
86-
d4 = @outline @turbo a .+ B *ˡ c;
77+
d4 = @turbo inline=false a .+ B *ˡ c;
8778
@test d3 d4
8879

8980
fill!(d3, -1000.0);
9081
fill!(d4, 91000.0);
9182

9283
d3 .= a .+ B * c;
93-
@outline @turbo d4 .= a .+ B *ˡ c;
84+
@turbo inline=false d4 .= a .+ B *ˡ c;
9485
@test d3 d4
9586

9687
fill!(d4, 91000.0);
97-
@outline @turbo @. d4 = a + B *ˡ c;
88+
@turbo inline=false @. d4 = a + B *ˡ c;
9889
@test d3 d4
9990

10091
M, K, N = 77, 83, 57;
10192
A = rand(R,M,K); B = rand(R,K,N); C = rand(R,M,N);
10293
At = copy(A');
10394
D1 = C .+ A * B;
104-
D2 = @outline @tturbo C .+ A .*ˡ B;
95+
D2 = @tturbo inline=false C .+ A .*ˡ B;
10596
@test D1 D2
10697
if RUN_SLOW_TESTS
107-
fill!(D2, -999999); D2 = @outline @turbo C .+ At' *ˡ B;
98+
fill!(D2, -999999); D2 = @turbo inline=false C .+ At' *ˡ B;
10899
@test D1 D2
109-
fill!(D2, -999999); @test A * B (@outline @turbo @. D2 = A *ˡ B)
100+
fill!(D2, -999999); @test A * B (@turbo inline=false @. D2 = A *ˡ B)
110101
D1 .= view(C, 1, :)' .+ A * B;
111102
fill!(D2, -999999);
112-
@outline @turbo D2 .= view(C, 1, :)' .+ A .*ˡ B;
103+
@turbo inline=false D2 .= view(C, 1, :)' .+ A .*ˡ B;
113104
@test D1 D2
114105
C3d = rand(R,3,M,N);
115106
D1 .= view(C3d, 1, :, :) .+ A * B;
116107
fill!(D2, -999999);
117-
@outline @tturbo D2 .= view(C3d, 1, :, :) .+ A .*ˡ B;
108+
@tturbo inline=false D2 .= view(C3d, 1, :, :) .+ A .*ˡ B;
118109
@test D1 D2
119110
end
120111
D1 .= 9999;
121-
@outline @turbo D2 .= 9999;
112+
@turbo inline=false D2 .= 9999;
122113
@test D1 == D2
123114
D1 .= -99999;
124-
@outline @tturbo D2' .= -99999;
115+
@tturbo inline=false D2' .= -99999;
125116
@test D1 == D2
126117

127118
b = rand(T,K); x = rand(R,N);
128119
D1 .= C .+ A * (b .+ x');
129-
@outline @tturbo @. D2 = C + A *ˡ (b + x');
120+
@tturbo inline=false @. D2 = C + A *ˡ (b + x');
130121
@test D1 D2
131-
D2 = @outline @turbo @. C + A *ˡ (b + x');
122+
D2 = @turbo inline=false @. C + A *ˡ (b + x');
132123
@test D1 D2
133124
if T === Int64
134125
xd = rand(-1_000_000_000_000:1_000_000_000_000,89);
@@ -143,43 +134,43 @@ function test_broadcast(::Type{T}) where {T}
143134

144135
if T <: Union{Float32,Float64}
145136
D3 = cos.(B');
146-
D4 = @outline @turbo cos.(B');
137+
D4 = @turbo inline=false cos.(B');
147138
@test D3 D4
148139

149140
fill!(D3, -1e3); fill!(D4, 9e9);
150141
Bt = transpose(B);
151142
@. D3 = exp(Bt);
152-
@outline @tturbo @. D4 = exp(Bt);
143+
@tturbo inline=false @. D4 = exp(Bt);
153144
@test D3 D4
154145

155146
D1 = similar(B); D2 = similar(B);
156147
D1t = transpose(D1);
157148
D2t = transpose(D2);
158149
@. D1t = exp(Bt);
159-
@outline @turbo @. D2t = exp(Bt);
150+
@turbo inline=false @. D2t = exp(Bt);
160151
@test D1t D2t
161152

162153
fill!(D1, -1e3);
163154
fill!(D2, 9e9);
164155
@. D1' = exp(Bt);
165-
lset = @outline @tturbo @. D2' = exp(Bt);
156+
lset = @tturbo inline=false @. D2' = exp(Bt);
166157

167158
@test D1 D2
168159

169160
a = rand(137);
170-
b1 = @outline @turbo @. 3*a + sin(a) + sqrt(a);
161+
b1 = @turbo inline=false @. 3*a + sin(a) + sqrt(a);
171162
b2 = @. 3*a + sin(a) + sqrt(a);
172163
@test b1 b2
173164
three = 3; fill!(b1, -9999);
174-
@outline @tturbo @. b1 = three*a + sin(a) + sqrt(a);
165+
@tturbo inline=false @. b1 = three*a + sin(a) + sqrt(a);
175166
@test b1 b2
176167

177168
C = rand(100,10,10);
178169
D1 = C .^ 0.3;
179-
D2 = @outline @tturbo C .^ 0.3;
170+
D2 = @tturbo inline=false C .^ 0.3;
180171
@test D1 D2
181172
@. D1 = C ^ 2;
182-
@outline @turbo @. D2 = C ^ 2;
173+
@turbo inline=false @. D2 = C ^ 2;
183174
@test D1 D2
184175
@turbo view(C,1:100,1:10,1:10) .= 0;
185176
@test all(==(0), C)

0 commit comments

Comments
 (0)