Skip to content

Commit 09f8364

Browse files
authored
tests pass, incorporates new storage (#18)
* BandedBlockBandedMatrix can parameterize storage But no tests for anything but PseudoBlockMatrix yet. * Generalizes some constructor to parameterized over container type * Add tests for BlockArray container type * Added still one more generalized constructor * Cannot use const in local scope * simplify constructors * add SharedArray example * Speed up + * make shared array example allocation free * Start CuArrays example * CuArrays experimental version * push from discussion * Fix shared example * tests pass again (with BlockArrays master) * Faster fill! * Example for alternate underlying array types * split blockarray/gpu example from shared arrays * tests pass with new LazyArrays * BlockBanded * Matrix, Matrix * BlockBanded * remove blas macros * pass with new lazy arrays * bump bandedmatrices require * bump LazyArrays * import materialize!
1 parent a158e88 commit 09f8364

18 files changed

+857
-115
lines changed

REQUIRE

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
julia 0.7
2-
BandedMatrices 0.6
2+
BandedMatrices 0.7
33
BlockArrays 0.5
4-
FillArrays 0.2.1
5-
LazyArrays 0.2.1
4+
FillArrays 0.3
5+
LazyArrays 0.3.1

examples/blockarray_backend.jl

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
# using CUDAnative
2+
# device!(0)
3+
# using CuArrays
4+
using GPUArrays
5+
using BlockArrays: _BlockArray, PseudoBlockArray, BlockArray, BlockMatrix, BlockVector,
6+
nblocks, Block, cumulsizes, AbstractBlockVector
7+
using BlockBandedMatrices: BandedBlockBandedMatrix, _BandedBlockBandedMatrix,
8+
blockbandwidths, subblockbandwidths, blockbandwidth,
9+
BandedBlockBandedSizes
10+
using LinearAlgebra: BLAS
11+
using BandedMatrices: _BandedMatrix
12+
using SharedArrays
13+
using LazyArrays
14+
import Distributed
15+
16+
import Adapt: adapt
17+
import LinearAlgebra
18+
19+
############### Loot and plunder
20+
# BlockArrays
21+
adapt(T::Type{<:AbstractArray}, b::BlockArray) =
22+
_BlockArray(T.(b.blocks), b.block_sizes)
23+
adapt(T::Type{<:AbstractArray}, b::PseudoBlockArray) =
24+
PseudoBlockArray(T(b.blocks), b.block_sizes)
25+
adapt(T::Type{<:PseudoBlockArray}, b::BlockArray) = T(b.blocks, b.block_sizes)
26+
adapt(T::Type{<:BlockArray}, b::PseudoBlockArray) = T(b.blocks, b.block_sizes)
27+
# CuArrays and BlockArrays
28+
if @isdefined CuArray
29+
adapt(T::Type{<:CuArray}, b::PseudoBlockArray) = adapt(T, BlockArray(b))
30+
end
31+
###############
32+
33+
adapt(T::Type, b::BandedBlockBandedMatrix) =
34+
_BandedBlockBandedMatrix(adapt(T, b.data), b.block_sizes)
35+
36+
37+
function LinearAlgebra.mul!(c::BlockVector{T},
38+
A::BandedBlockBandedMatrix{T, <: BlockMatrix},
39+
x::BlockVector{T}) where T
40+
@assert nblocks(A, 1) == nblocks(c, 1)
41+
@assert cumulsizes(A, 1) == cumulsizes(c, 1)
42+
@assert nblocks(A, 2) == nblocks(x, 1)
43+
@assert cumulsizes(A, 2) == cumulsizes(x, 1)
44+
45+
for block in c.blocks
46+
fill!(block, zero(eltype(block)))
47+
end
48+
l, u = blockbandwidths(A)
49+
λ, μ = subblockbandwidths(A)
50+
N,M = nblocks(A)
51+
52+
@inbounds for i = 1:N, j = max(1,i-l):min(M,i+u)
53+
BLAS.gbmv!('N', size(view(A, Block(i, j)), 1), λ, μ, one(T),
54+
A.data.blocks[i - j + u + 1, j],
55+
x.blocks[j], one(T), c.blocks[i])
56+
end
57+
58+
c
59+
end
60+
61+
function banded_mul!(c::BlockVector{T},
62+
A::BandedBlockBandedMatrix{T, <: BlockMatrix},
63+
x::AbstractBlockVector{T}) where T
64+
@assert nblocks(A, 1) == nblocks(c, 1)
65+
@assert cumulsizes(A, 1) == cumulsizes(c, 1)
66+
@assert nblocks(A, 2) == nblocks(x, 1)
67+
@assert cumulsizes(A, 2) == cumulsizes(x, 1)
68+
69+
for block in c.blocks
70+
fill!(block, zero(eltype(block)))
71+
end
72+
l, u = blockbandwidths(A)
73+
λ, μ = subblockbandwidths(A)
74+
N, M = nblocks(A)
75+
76+
@inbounds for i = 1:N, j = max(1,i-l):min(M,i+u)
77+
B = _BandedMatrix(A.data.blocks[i - j + u + 1, j],
78+
size(view(A, Block(i, j)), 1),
79+
λ, μ)
80+
c[Block(i)] .+= Mul(B, x.blocks[j])
81+
end
82+
83+
c
84+
end
85+
86+
function nofill_mul!(Cblock::BandedBlockBandedMatrix{T, <: BlockMatrix},
87+
Ablock::BandedBlockBandedMatrix{T, <: BlockMatrix},
88+
Xblock::BandedBlockBandedMatrix{T, <: BlockMatrix}) where T
89+
@assert nblocks(Ablock, 1) == nblocks(Cblock, 1)
90+
@assert cumulsizes(Ablock, 1) == cumulsizes(Cblock, 1)
91+
@assert nblocks(Ablock, 2) == nblocks(Xblock, 1)
92+
@assert cumulsizes(Ablock, 2) == cumulsizes(Xblock, 1)
93+
@assert nblocks(Xblock, 2) == nblocks(Cblock, 2)
94+
@assert cumulsizes(Xblock, 2) == cumulsizes(Xblock, 2)
95+
96+
lₐ, uₐ = blockbandwidths(Ablock)
97+
lₓ, uₓ = blockbandwidths(xblock)
98+
λ, μ = subblockbandwidths(Ablock)
99+
N,M = nblocks(Ablock)
100+
M, K = nblocks(Xblock)
101+
102+
@inbounds for i = 1:N, j = max(1,i-lₐ):min(M,i+uₐ), k = max(1, j - lₓ):min(j + uₓ, K)
103+
BLAS.gbmv!('N', size(view(Ablock, Block(i, j)), 1), λ, μ, one(T),
104+
Ablock.data.blocks[i - j + u + 1, j],
105+
Xblock.blocks[j], one(T), Cblock.blocks[i])
106+
end
107+
108+
Cblock
109+
end
110+
111+
using Test
112+
113+
function testme()
114+
@testset "block-banded on NVIDIA gpus" begin
115+
116+
@testset "BlockArray Adapters" begin
117+
bmat = BlockArray{Float64}(undef, [1, 1], [2, 2])
118+
@test adapt(JLArray, bmat) isa BlockArray{T, 2, JLArray{T, 2}} where T
119+
@test eltype(adapt(JLArray, bmat)) === Float64
120+
if @isdefined CuArray
121+
@test cu(bmat) isa BlockArray{T, 2, JLArray{T, 2}} where T
122+
@test eltype(cu(bmat)) === Float32
123+
end
124+
end
125+
126+
@testset "PseudoBlockArray Adapters" begin
127+
bmat = PseudoBlockArray{Float64}(undef, [1, 1], [2, 2])
128+
@test eltype(adapt(JLArray, bmat)) === Float64
129+
@test adapt(JLArray, bmat) isa PseudoBlockArray
130+
if @isdefined CuArray
131+
@test !(adapt(CuArray, bmat) isa PseudoBlockArray)
132+
@test adapt(CuArray, bmat) isa BlockArray{T, 2, JLArray{T, 2}} where T
133+
@test cu(bmat) isa BlockArray{T, 2, JLArray{T, 2}} where T
134+
@test eltype(cu(bmat)) === Float32
135+
end
136+
end
137+
138+
@testset "PseudoBlockArray Adapters" begin
139+
bmat = BandedBlockBandedMatrix{Float64}(undef, ([1, 1], [2, 2]), (1, 2), (1, 1))
140+
@test adapt(JLArray, bmat) isa BandedBlockBandedMatrix
141+
@test adapt(JLArray, bmat).data isa PseudoBlockArray{T, 2, JLArray{T, 2}} where T
142+
@test eltype(adapt(JLArray, bmat)) === Float64
143+
if @isdefined CuArray
144+
@test adapt(CuArray, bmat).data isa BlockArray{T, 2, CuArray{T, 2}} where T
145+
@test cu(bmat) isa BandedBlockBandedMatrix
146+
@test cu(bmat).data isa BlockArray{T, 2, JLArray{T, 2}} where T
147+
@test eltype(cu(bmat)) === Float32
148+
end
149+
end
150+
151+
@testset "Multiplication" begin
152+
N, M = rand(1:20, 2)
153+
l, u, λ, μ = rand(0:2, 4)
154+
n, m = rand(max(l, u, λ, μ):20, N), rand(max(l, u, λ, μ):20, M)
155+
A = BandedBlockBandedMatrix{Float64}(undef, (n, m), (l, u), (λ, μ))
156+
A.data .= rand.()
157+
Ablock = adapt(BlockArray, A)
158+
cblock = BlockArray(Array{Float64, 1}(undef, size(A, 1)), n)
159+
cblock .= rand.()
160+
x = PseudoBlockArray(Array{Float64, 1}(undef, size(A, 2)), m)
161+
x .= rand.()
162+
xblock = adapt(BlockArray, x)
163+
164+
@test LinearAlgebra.mul!(cblock, Ablock, xblock) A * x
165+
cblock .= 0
166+
@test banded_mul!(cblock, Ablock, xblock) A * x
167+
end
168+
end
169+
end
170+
171+
using BenchmarkTools
172+
using Statistics
173+
174+
function benchmarks()
175+
suite = BenchmarkGroup()
176+
suite["viabm"] = BenchmarkGroup()
177+
suite["pseudo"] = BenchmarkGroup()
178+
suite["block"] = BenchmarkGroup()
179+
possibles = [5, 10, 100, 500, 1000]
180+
for N in possibles #, n in possibles
181+
n = N
182+
suite["pseudo"]["N=$N n=$n"] = BenchmarkGroup()
183+
suite["block"]["N=$N n=$n"] = BenchmarkGroup()
184+
suite["viabm"]["N=$N n=$n"] = BenchmarkGroup()
185+
186+
l, u, λ, μ = rand(0:2, 4)
187+
M, m = N, n
188+
189+
A = BandedBlockBandedMatrix{Float64}(
190+
undef, (repeat([n], N), repeat([m], M)), (l, u), (λ, μ))
191+
A.data .= rand.()
192+
c = PseudoBlockArray(Array{Float64, 1}(undef, size(A, 1)), repeat([n], N))
193+
c .= rand.()
194+
x = PseudoBlockArray(Array{Float64, 1}(undef, size(A, 2)), repeat([m], M))
195+
x .= rand.()
196+
197+
suite["pseudo"]["N=$N n=$n"] = @benchmarkable begin
198+
$c .= Mul($A, $x)
199+
end
200+
suite["block"]["N=$N n=$n"] = @benchmarkable begin
201+
LinearAlgebra.mul!($(adapt(BlockArray, c)), $(adapt(BlockArray, A)),
202+
$(adapt(BlockArray, x)))
203+
end
204+
suite["viabm"]["N=$N n=$n"] = @benchmarkable begin
205+
banded_mul!($(adapt(BlockArray, c)), $(adapt(BlockArray, A)),
206+
$(adapt(BlockArray, x)))
207+
end
208+
end
209+
suite
210+
end
211+
212+
block_ratio(result, name; method=median) =
213+
ratio(method(result["block"][name]), method(result["pseudo"][name]))
214+
viabm_ratio(result, name; method=median) =
215+
ratio(method(result["viabm"][name]), method(result["block"][name]))

0 commit comments

Comments
 (0)