Skip to content

tests pass, incorporates new storage #18

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 26 commits into from
Oct 29, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
4319dcb
BandedBlockBandedMatrix can parameterize storage
mdavezac Sep 17, 2018
29fa9ac
Generalizes some constructor to parameterized over container type
mdavezac Sep 21, 2018
922c625
Add tests for BlockArray container type
mdavezac Sep 21, 2018
aabf7df
Added still one more generalized constructor
mdavezac Sep 21, 2018
ce88ffc
Cannot use const in local scope
mdavezac Sep 21, 2018
b872c8f
simplify constructors
dlfivefifty Sep 27, 2018
a2750c0
merge
dlfivefifty Sep 27, 2018
6d578c2
add SharedArray example
dlfivefifty Sep 27, 2018
e5474fa
Speed up +
dlfivefifty Sep 28, 2018
8aec29a
make shared array example allocation free
dlfivefifty Sep 29, 2018
136859b
Start CuArrays example
dlfivefifty Oct 2, 2018
ed3b524
CuArrays experimental version
dlfivefifty Oct 2, 2018
b07a4b4
push from discussion
dlfivefifty Oct 3, 2018
af8122e
Fix shared example
dlfivefifty Oct 3, 2018
5c9a659
merge faster +
dlfivefifty Oct 3, 2018
6e424e1
tests pass again (with BlockArrays master)
dlfivefifty Oct 3, 2018
221570b
Faster fill!
mdavezac Oct 15, 2018
97bc0c4
Example for alternate underlying array types
mdavezac Oct 15, 2018
6a9926e
split blockarray/gpu example from shared arrays
mdavezac Oct 17, 2018
f7126e4
tests pass with new LazyArrays
dlfivefifty Oct 27, 2018
36f1f31
BlockBanded * Matrix, Matrix * BlockBanded
dlfivefifty Oct 27, 2018
8c4cae0
remove blas macros
dlfivefifty Oct 28, 2018
335a5b3
pass with new lazy arrays
dlfivefifty Oct 29, 2018
6c27df2
bump bandedmatrices require
dlfivefifty Oct 29, 2018
055c263
bump LazyArrays
dlfivefifty Oct 29, 2018
4aae550
import materialize!
dlfivefifty Oct 29, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions REQUIRE
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
julia 0.7
BandedMatrices 0.6
BandedMatrices 0.7
BlockArrays 0.5
FillArrays 0.2.1
LazyArrays 0.2.1
FillArrays 0.3
LazyArrays 0.3.1
215 changes: 215 additions & 0 deletions examples/blockarray_backend.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
# using CUDAnative
# device!(0)
# using CuArrays
using GPUArrays
using BlockArrays: _BlockArray, PseudoBlockArray, BlockArray, BlockMatrix, BlockVector,
nblocks, Block, cumulsizes, AbstractBlockVector
using BlockBandedMatrices: BandedBlockBandedMatrix, _BandedBlockBandedMatrix,
blockbandwidths, subblockbandwidths, blockbandwidth,
BandedBlockBandedSizes
using LinearAlgebra: BLAS
using BandedMatrices: _BandedMatrix
using SharedArrays
using LazyArrays
import Distributed

import Adapt: adapt
import LinearAlgebra

############### Loot and plunder
# BlockArrays
adapt(T::Type{<:AbstractArray}, b::BlockArray) =
_BlockArray(T.(b.blocks), b.block_sizes)
adapt(T::Type{<:AbstractArray}, b::PseudoBlockArray) =
PseudoBlockArray(T(b.blocks), b.block_sizes)
adapt(T::Type{<:PseudoBlockArray}, b::BlockArray) = T(b.blocks, b.block_sizes)
adapt(T::Type{<:BlockArray}, b::PseudoBlockArray) = T(b.blocks, b.block_sizes)
# CuArrays and BlockArrays
if @isdefined CuArray
adapt(T::Type{<:CuArray}, b::PseudoBlockArray) = adapt(T, BlockArray(b))
end
###############

adapt(T::Type, b::BandedBlockBandedMatrix) =
_BandedBlockBandedMatrix(adapt(T, b.data), b.block_sizes)


function LinearAlgebra.mul!(c::BlockVector{T},
A::BandedBlockBandedMatrix{T, <: BlockMatrix},
x::BlockVector{T}) where T
@assert nblocks(A, 1) == nblocks(c, 1)
@assert cumulsizes(A, 1) == cumulsizes(c, 1)
@assert nblocks(A, 2) == nblocks(x, 1)
@assert cumulsizes(A, 2) == cumulsizes(x, 1)

for block in c.blocks
fill!(block, zero(eltype(block)))
end
l, u = blockbandwidths(A)
λ, μ = subblockbandwidths(A)
N,M = nblocks(A)

@inbounds for i = 1:N, j = max(1,i-l):min(M,i+u)
BLAS.gbmv!('N', size(view(A, Block(i, j)), 1), λ, μ, one(T),
A.data.blocks[i - j + u + 1, j],
x.blocks[j], one(T), c.blocks[i])
end

c
end

function banded_mul!(c::BlockVector{T},
A::BandedBlockBandedMatrix{T, <: BlockMatrix},
x::AbstractBlockVector{T}) where T
@assert nblocks(A, 1) == nblocks(c, 1)
@assert cumulsizes(A, 1) == cumulsizes(c, 1)
@assert nblocks(A, 2) == nblocks(x, 1)
@assert cumulsizes(A, 2) == cumulsizes(x, 1)

for block in c.blocks
fill!(block, zero(eltype(block)))
end
l, u = blockbandwidths(A)
λ, μ = subblockbandwidths(A)
N, M = nblocks(A)

@inbounds for i = 1:N, j = max(1,i-l):min(M,i+u)
B = _BandedMatrix(A.data.blocks[i - j + u + 1, j],
size(view(A, Block(i, j)), 1),
λ, μ)
c[Block(i)] .+= Mul(B, x.blocks[j])
end

c
end

function nofill_mul!(Cblock::BandedBlockBandedMatrix{T, <: BlockMatrix},
Ablock::BandedBlockBandedMatrix{T, <: BlockMatrix},
Xblock::BandedBlockBandedMatrix{T, <: BlockMatrix}) where T
@assert nblocks(Ablock, 1) == nblocks(Cblock, 1)
@assert cumulsizes(Ablock, 1) == cumulsizes(Cblock, 1)
@assert nblocks(Ablock, 2) == nblocks(Xblock, 1)
@assert cumulsizes(Ablock, 2) == cumulsizes(Xblock, 1)
@assert nblocks(Xblock, 2) == nblocks(Cblock, 2)
@assert cumulsizes(Xblock, 2) == cumulsizes(Xblock, 2)

lₐ, uₐ = blockbandwidths(Ablock)
lₓ, uₓ = blockbandwidths(xblock)
λ, μ = subblockbandwidths(Ablock)
N,M = nblocks(Ablock)
M, K = nblocks(Xblock)

@inbounds for i = 1:N, j = max(1,i-lₐ):min(M,i+uₐ), k = max(1, j - lₓ):min(j + uₓ, K)
BLAS.gbmv!('N', size(view(Ablock, Block(i, j)), 1), λ, μ, one(T),
Ablock.data.blocks[i - j + u + 1, j],
Xblock.blocks[j], one(T), Cblock.blocks[i])
end

Cblock
end

using Test

function testme()
@testset "block-banded on NVIDIA gpus" begin

@testset "BlockArray Adapters" begin
bmat = BlockArray{Float64}(undef, [1, 1], [2, 2])
@test adapt(JLArray, bmat) isa BlockArray{T, 2, JLArray{T, 2}} where T
@test eltype(adapt(JLArray, bmat)) === Float64
if @isdefined CuArray
@test cu(bmat) isa BlockArray{T, 2, JLArray{T, 2}} where T
@test eltype(cu(bmat)) === Float32
end
end

@testset "PseudoBlockArray Adapters" begin
bmat = PseudoBlockArray{Float64}(undef, [1, 1], [2, 2])
@test eltype(adapt(JLArray, bmat)) === Float64
@test adapt(JLArray, bmat) isa PseudoBlockArray
if @isdefined CuArray
@test !(adapt(CuArray, bmat) isa PseudoBlockArray)
@test adapt(CuArray, bmat) isa BlockArray{T, 2, JLArray{T, 2}} where T
@test cu(bmat) isa BlockArray{T, 2, JLArray{T, 2}} where T
@test eltype(cu(bmat)) === Float32
end
end

@testset "PseudoBlockArray Adapters" begin
bmat = BandedBlockBandedMatrix{Float64}(undef, ([1, 1], [2, 2]), (1, 2), (1, 1))
@test adapt(JLArray, bmat) isa BandedBlockBandedMatrix
@test adapt(JLArray, bmat).data isa PseudoBlockArray{T, 2, JLArray{T, 2}} where T
@test eltype(adapt(JLArray, bmat)) === Float64
if @isdefined CuArray
@test adapt(CuArray, bmat).data isa BlockArray{T, 2, CuArray{T, 2}} where T
@test cu(bmat) isa BandedBlockBandedMatrix
@test cu(bmat).data isa BlockArray{T, 2, JLArray{T, 2}} where T
@test eltype(cu(bmat)) === Float32
end
end

@testset "Multiplication" begin
N, M = rand(1:20, 2)
l, u, λ, μ = rand(0:2, 4)
n, m = rand(max(l, u, λ, μ):20, N), rand(max(l, u, λ, μ):20, M)
A = BandedBlockBandedMatrix{Float64}(undef, (n, m), (l, u), (λ, μ))
A.data .= rand.()
Ablock = adapt(BlockArray, A)
cblock = BlockArray(Array{Float64, 1}(undef, size(A, 1)), n)
cblock .= rand.()
x = PseudoBlockArray(Array{Float64, 1}(undef, size(A, 2)), m)
x .= rand.()
xblock = adapt(BlockArray, x)

@test LinearAlgebra.mul!(cblock, Ablock, xblock) ≈ A * x
cblock .= 0
@test banded_mul!(cblock, Ablock, xblock) ≈ A * x
end
end
end

using BenchmarkTools
using Statistics

function benchmarks()
suite = BenchmarkGroup()
suite["viabm"] = BenchmarkGroup()
suite["pseudo"] = BenchmarkGroup()
suite["block"] = BenchmarkGroup()
possibles = [5, 10, 100, 500, 1000]
for N in possibles #, n in possibles
n = N
suite["pseudo"]["N=$N n=$n"] = BenchmarkGroup()
suite["block"]["N=$N n=$n"] = BenchmarkGroup()
suite["viabm"]["N=$N n=$n"] = BenchmarkGroup()

l, u, λ, μ = rand(0:2, 4)
M, m = N, n

A = BandedBlockBandedMatrix{Float64}(
undef, (repeat([n], N), repeat([m], M)), (l, u), (λ, μ))
A.data .= rand.()
c = PseudoBlockArray(Array{Float64, 1}(undef, size(A, 1)), repeat([n], N))
c .= rand.()
x = PseudoBlockArray(Array{Float64, 1}(undef, size(A, 2)), repeat([m], M))
x .= rand.()

suite["pseudo"]["N=$N n=$n"] = @benchmarkable begin
$c .= Mul($A, $x)
end
suite["block"]["N=$N n=$n"] = @benchmarkable begin
LinearAlgebra.mul!($(adapt(BlockArray, c)), $(adapt(BlockArray, A)),
$(adapt(BlockArray, x)))
end
suite["viabm"]["N=$N n=$n"] = @benchmarkable begin
banded_mul!($(adapt(BlockArray, c)), $(adapt(BlockArray, A)),
$(adapt(BlockArray, x)))
end
end
suite
end

block_ratio(result, name; method=median) =
ratio(method(result["block"][name]), method(result["pseudo"][name]))
viabm_ratio(result, name; method=median) =
ratio(method(result["viabm"][name]), method(result["block"][name]))
Loading