Skip to content

Speed up Cholesky Jacobi matrices #169

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Feb 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "ClassicalOrthogonalPolynomials"
uuid = "b30e2e7b-c4ee-47da-9d5f-2c5c27239acd"
authors = ["Sheehan Olver <[email protected]>"]
version = "0.12.3"
version = "0.12.4"

[deps]
ArrayLayouts = "4c555306-a7a7-4459-81d9-ec55ddd5c99a"
Expand Down
4 changes: 2 additions & 2 deletions src/ClassicalOrthogonalPolynomials.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import LazyArrays: MemoryLayout, Applied, ApplyStyle, flatten, _flatten, adjoint
sub_materialize, arguments, sub_paddeddata, paddeddata, PaddedLayout, resizedata!, LazyVector, ApplyLayout, call,
_mul_arguments, CachedVector, CachedMatrix, LazyVector, LazyMatrix, axpy!, AbstractLazyLayout, BroadcastLayout,
AbstractCachedVector, AbstractCachedMatrix, paddeddata, cache_filldata!,
simplifiable, PaddedArray, converteltype
simplifiable, PaddedArray, converteltype, simplify
import ArrayLayouts: MatMulVecAdd, materialize!, _fill_lmul!, sublayout, sub_materialize, lmul!, ldiv!, ldiv, transposelayout, triangulardata,
subdiagonaldata, diagonaldata, supdiagonaldata, mul, rowsupport, colsupport
import LazyBandedMatrices: SymTridiagonal, Bidiagonal, Tridiagonal, unitblocks, BlockRange1, AbstractLazyBandedLayout
Expand All @@ -33,7 +33,7 @@ import QuasiArrays: cardinality, checkindex, QuasiAdjoint, QuasiTranspose, Inclu
AbstractQuasiFill, equals_layout, QuasiArrayLayout, PolynomialLayout, diff_layout

import InfiniteArrays: OneToInf, InfAxes, Infinity, AbstractInfUnitRange, InfiniteCardinal, InfRanges
import InfiniteLinearAlgebra: chop!, chop, pad, choplength, compatible_resize!
import InfiniteLinearAlgebra: chop!, chop, pad, choplength, compatible_resize!, partialcholesky!
import ContinuumArrays: Basis, Weight, basis_axes, @simplify, Identity, AbstractAffineQuasiVector, ProjectionFactorization,
grid, plotgrid, plotgrid_layout, plotvalues_layout, grid_layout, transform_ldiv, TransformFactorization, QInfAxes, broadcastbasis, ExpansionLayout, basismap,
AffineQuasiVector, AffineMap, AbstractWeightLayout, AbstractWeightedBasisLayout, WeightedBasisLayout, WeightedBasisLayouts, demap, AbstractBasisLayout, BasisLayout,
Expand Down
78 changes: 49 additions & 29 deletions src/choleskyQR.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ orthogonalpolynomial(w::Function, P::AbstractQuasiMatrix) = orthogonalpolynomial
cholesky_jacobimatrix(w, P)

returns the Jacobi matrix `X` associated to a quasi-matrix of polynomials
orthogonal with respect to `w(x) w_p(x)` where `w_p(x)` is the weight of the polynomials in `P` by computing a Cholesky decomposition of the weight modification.
orthogonal with respect to `w(x)` by computing a Cholesky decomposition of the weight modification.

The resulting polynomials are orthonormal on the same domain as `P`. The supplied `P` must be normalized. Accepted inputs are `w` as a function or `W` as an infinite matrix representing multiplication with the function `w` on the basis `P`.
The resulting polynomials are orthonormal on the same domain as `P`. The supplied `P` must be normalized. Accepted inputs are `w` as a function or `W` as an infinite matrix representing the weight modifier multiplication by the function `w / w_P` on `P` where `w_P` is the orthogonality weight of `P`.
"""
cholesky_jacobimatrix(w::Function, P) = cholesky_jacobimatrix(w.(axes(P,1)), P)

Expand All @@ -52,9 +52,7 @@ end
function cholesky_jacobimatrix(W::AbstractMatrix, Q)
isnormalized(Q) || error("Polynomials must be orthonormal")
U = cholesky(W).U
X = jacobimatrix(Q)
UX = ApplyArray(*,U,X)
CJD = CholeskyJacobiData(U,UX)
CJD = CholeskyJacobiData(U,Q)
return SymTridiagonal(view(CJD,:,1),view(CJD,:,2))
end

Expand All @@ -63,24 +61,35 @@ mutable struct CholeskyJacobiData{T} <: LazyMatrix{T}
dv::AbstractVector{T} # store diagonal band entries in adaptively sized vector
ev::AbstractVector{T} # store off-diagonal band entries in adaptively sized vector
U::UpperTriangular{T} # store upper triangular conversion matrix (needed to extend available entries)
UX::ApplyArray{T} # store U*X, where X is the Jacobi matrix of the original P (needed to extend available entries)
X::AbstractMatrix{T} # store Jacobi matrix of original polynomials
P # store original polynomials
datasize::Int # size of so-far computed block
end

# Computes the initial data for the Jacobi operator bands
function CholeskyJacobiData(U::AbstractMatrix{T}, UX) where T
function CholeskyJacobiData(U::AbstractMatrix{T}, P) where T
# compute a length 2 vector on first go and circumvent BigFloat issue
dv = zeros(T,2)
dv = zeros(T,2)
ev = zeros(T,2)
X = jacobimatrix(P)
partialcholesky!(U.data, 3)
UX = view(U,1:3,1:3)*X[1:3,1:3]
dv[1] = UX[1,1]/U[1,1] # this is dot(view(UX,1,1), U[1,1] \ [one(T)])
dv[2] = -U[1,2]*UX[2,1]/(U[1,1]*U[2,2])+UX[2,2]/U[2,2] # this is dot(view(UX,2,1:2), U[1:2,1:2] \ [zero(T); one(T)])
ev[1] = -UX[1,1]*U[1,2]/(U[1,1]*U[2,2])+UX[1,2]/U[2,2] # this is dot(view(UX,1,1:2), U[1:2,1:2] \ [zero(T); one(T)])
ev[2] = UX[2,1]/U[3,3]*(-U[1,3]/U[1,1]+U[1,2]*U[2,3]/(U[1,1]*U[2,2]))+UX[2,2]/U[3,3]*(-U[2,3]/U[2,2])+UX[2,3]/U[3,3] # this is dot(view(UX,2,1:3), U[1:3,1:3] \ [zeros(T,2); one(T)])[1:3,1:3] \ [zeros(T,2); one(T)])
return CholeskyJacobiData{T}(dv, ev, U, UX, 2)
return CholeskyJacobiData{T}(dv, ev, U, X, P, 2)
end

size(::CholeskyJacobiData) = (ℵ₀,2) # Stored as two infinite cached bands

function getindex(C::SymTridiagonal{<:Any, <:SubArray{<:Any, 1, <:CholeskyJacobiData, <:Tuple, false}, <:SubArray{<:Any, 1, <:CholeskyJacobiData, <:Tuple, false}}, kr::UnitRange, jr::UnitRange)
m = 1+max(kr.stop,jr.stop)
resizedata!(C.dv.parent,m,1)
resizedata!(C.ev.parent,m,2)
return copy(view(C,kr,jr))
end

function getindex(K::CholeskyJacobiData, n::Integer, m::Integer)
@assert (m==1) || (m==2)
resizedata!(K,n,m)
Expand All @@ -90,7 +99,7 @@ end

# Resize and filling functions for cached implementation
function resizedata!(K::CholeskyJacobiData, n::Integer, m::Integer)
nm = max(n,m)
nm = 1+max(n,m)
νμ = K.datasize
if nm > νμ
resize!(K.dv,nm)
Expand All @@ -102,12 +111,14 @@ function resizedata!(K::CholeskyJacobiData, n::Integer, m::Integer)
end

function _fillcholeskybanddata!(J::CholeskyJacobiData{T}, inds::UnitRange{Int}) where T
# pre-fill U and UX to prevent expensive step-by-step filling in of cached U and UX in the loop
resizedata!(J.U,inds[end]+1,inds[end]+1)
resizedata!(J.UX,inds[end]+1,inds[end]+1)
# pre-fill U to prevent expensive step-by-step filling in
m = inds[end]+1
partialcholesky!(J.U.data, m)
dv, ev, U, X = J.dv, J.ev, J.U, J.X

UX = view(U,1:m,1:m)*X[1:m,1:m]

dv, ev, UX, U = J.dv, J.ev, J.UX, J.U
@inbounds for k in inds
@inbounds Threads.@threads for k = inds
# this is dot(view(UX,k,k-1:k), U[k-1:k,k-1:k] \ ek)
dv[k] = -U[k-1,k]*UX[k,k-1]/(U[k-1,k-1]*U[k,k])+UX[k,k]/U[k,k]
ev[k] = UX[k,k-1]/U[k+1,k+1]*(-U[k-1,k+1]/U[k-1,k-1]+U[k-1,k]*U[k,k+1]/(U[k-1,k-1]*U[k,k]))+UX[k,k]/U[k+1,k+1]*(-U[k,k+1]/U[k,k])+UX[k,k+1]/U[k+1,k+1]
Expand All @@ -116,20 +127,21 @@ end


"""
qr_jacobimatrix(sqrtw, P)
qr_jacobimatrix(w, P)

returns the Jacobi matrix `X` associated to a quasi-matrix of polynomials
orthogonal with respect to `w(x) w_p(x)` where `w_p(x)` is the weight of the polynomials in `P` by computing a QR decomposition of the square root weight modification.
orthogonal with respect to `w(x)` by computing a QR decomposition of the square root weight modification.

The resulting polynomials are orthonormal on the same domain as `P`. The supplied `P` must be normalized. Accepted inputs for `sqrtw` are the square root of the weight modification as a function or `sqrtW` as an infinite matrix representing multiplication with the function `sqrt(w)` on the basis `P`.
The resulting polynomials are orthonormal on the same domain as `P`. The supplied `P` must be normalized. Accepted inputs for `w` are the target weight as a function or `sqrtW`, representing the multiplication operator of square root weight modification on the basis `P`.

The underlying QR approach allows two methods, one which uses the Q matrix and one which uses the R matrix. To change between methods, an optional argument :Q or :R may be supplied. The default is to use the Q method.
"""
function qr_jacobimatrix(sqrtw::Function, P, method = :Q)
qr_jacobimatrix(w::Function, P, method = :Q) = qr_jacobimatrix(w.(axes(P,1)), P, Symbol(method))
function qr_jacobimatrix(w::AbstractQuasiVector, P, method = :Q)
Q = normalized(P)
x = axes(P,1)
sqrtW = (Q \ (sqrtw.(x) .* Q)) # Compute weight multiplication via Clenshaw
return qr_jacobimatrix(sqrtW, Q, method)
w_P = orthogonalityweight(P)
sqrtW = Symmetric(Q \ (sqrt.(w ./ w_P) .* Q)) # Compute weight multiplication via Clenshaw
return qr_jacobimatrix(sqrtW, Q, Symbol(method))
end
function qr_jacobimatrix(sqrtW::AbstractMatrix{T}, Q, method = :Q) where T
isnormalized(Q) || error("Polynomials must be orthonormal")
Expand All @@ -143,7 +155,7 @@ mutable struct QRJacobiData{method,T} <: LazyMatrix{T}
dv::AbstractVector{T} # store diagonal band entries in adaptively sized vector
ev::AbstractVector{T} # store off-diagonal band entries in adaptively sized vector
U # store conversion, Q method: stores QR object. R method: only stores R.
UX # Auxilliary matrix. Q method: stores in-progress incomplete modification. R method: stores U*X for efficiency.
UX # Auxilliary matrix. Q method: stores in-progress incomplete modification. R method: stores Jacobi matrix of original polynomials
P # Remember original polynomials
datasize::Int # size of so-far computed block
end
Expand Down Expand Up @@ -179,15 +191,15 @@ function QRJacobiData{:R,T}(F, P) where T
U = F.R
U = ApplyArray(*,Diagonal(sign.(view(U,band(0)))),U) # QR decomposition does not force positive diagonals on R by default
X = jacobimatrix(P)
UX = ApplyArray(*,U,X)
UX = view(U,1:3,1:3)*X[1:3,1:3]
# compute a length 2 vector on first go and circumvent BigFloat issue
dv = zeros(T,2)
ev = zeros(T,2)
dv[1] = UX[1,1]/U[1,1] # this is dot(view(UX,1,1), U[1,1] \ [one(T)])
dv[2] = -U[1,2]*UX[2,1]/(U[1,1]*U[2,2])+UX[2,2]/U[2,2] # this is dot(view(UX,2,1:2), U[1:2,1:2] \ [zero(T); one(T)])
ev[1] = -UX[1,1]*U[1,2]/(U[1,1]*U[2,2])+UX[1,2]/U[2,2] # this is dot(view(UX,1,1:2), U[1:2,1:2] \ [zero(T); one(T)])
ev[2] = UX[2,1]/U[3,3]*(-U[1,3]/U[1,1]+U[1,2]*U[2,3]/(U[1,1]*U[2,2]))+UX[2,2]/U[3,3]*(-U[2,3]/U[2,2])+UX[2,3]/U[3,3] # this is dot(view(UX,2,1:3), U[1:3,1:3] \ [zeros(T,2); one(T)])
return QRJacobiData{:R,T}(dv, ev, U, UX, P, 2)
return QRJacobiData{:R,T}(dv, ev, U, X, P, 2)
end


Expand All @@ -200,9 +212,16 @@ function getindex(K::QRJacobiData, n::Integer, m::Integer)
m == 2 && return K.ev[n]
end

function getindex(C::SymTridiagonal{<:Any, <:SubArray{<:Any, 1, <:QRJacobiData, <:Tuple, false}, <:SubArray{<:Any, 1, <:QRJacobiData, <:Tuple, false}}, kr::UnitRange, jr::UnitRange)
m = maximum(max(kr,jr))+1
resizedata!(C.dv.parent,m,2)
resizedata!(C.ev.parent,m,2)
return copy(view(C,kr,jr))
end

# Resize and filling functions for cached implementation
function resizedata!(K::QRJacobiData, n::Integer, m::Integer)
nm = max(n,m)
nm = 1+max(n,m)
νμ = K.datasize
if nm > νμ
resize!(K.dv,nm)
Expand Down Expand Up @@ -246,10 +265,11 @@ function _fillqrbanddata!(J::QRJacobiData{:R,T}, inds::UnitRange{Int}) where T
# pre-fill U and UX to prevent expensive step-by-step filling in of cached U and UX in the loop
m = inds[end]+1
resizedata!(J.U,m,m)
resizedata!(J.UX,m,m)
dv, ev, X, U = J.dv, J.ev, J.UX, J.U

UX = view(U,1:m,1:m)*X[1:m,1:m]

dv, ev, UX, U = J.dv, J.ev, J.UX, J.U
@inbounds for k in inds
@inbounds Threads.@threads for k in inds
dv[k] = -U[k-1,k]*UX[k,k-1]/(U[k-1,k-1]*U[k,k])+UX[k,k]./U[k,k] # this is dot(view(UX,k,k-1:k), U[k-1:k,k-1:k] \ ek)
ev[k] = UX[k,k-1]/U[k+1,k+1]*(-U[k-1,k+1]/U[k-1,k-1]+U[k-1,k]*U[k,k+1]/(U[k-1,k-1]*U[k,k]))+UX[k,k]/U[k+1,k+1]*(-U[k,k+1]/U[k,k])+UX[k,k+1]/U[k+1,k+1] # this is dot(view(UX,k,k-1:k+1), U[k-1:k+1,k-1:k+1] \ ek)
end
Expand Down
17 changes: 15 additions & 2 deletions src/clenshaw.jl
Original file line number Diff line number Diff line change
Expand Up @@ -289,15 +289,15 @@ function _BandedMatrix(::ClenshawLayout, V::SubArray{<:Any,2})
M = parent(V)
kr,jr = parentindices(V)
b = bandwidth(M,1)
jkr=max(1,min(first(jr),first(kr))-b÷2):max(last(jr),last(kr))+b÷2
jkr = max(1,min(first(jr),first(kr))-b÷2):max(last(jr),last(kr))+b÷2
# relationship between jkr and kr, jr
kr2,jr2 = kr.-first(jkr).+1,jr.-first(jkr).+1
lmul!(M.p0, clenshaw(M.c, M.A, M.B, M.C, M.X[jkr, jkr])[kr2,jr2])
end

function getindex(M::Clenshaw{T}, kr::AbstractUnitRange, j::Integer) where T
b = bandwidth(M,1)
jkr=max(1,min(j,first(kr))-b÷2):max(j,last(kr))+b÷2
jkr = max(1,min(j,first(kr))-b÷2):max(j,last(kr))+b÷2
# relationship between jkr and kr, jr
kr2,j2 = kr.-first(jkr).+1,j-first(jkr)+1
f = [Zeros{T}(j2-1); one(T); Zeros{T}(length(jkr)-j2)]
Expand All @@ -306,6 +306,19 @@ end

getindex(M::Clenshaw, k::Int, j::Int) = M[k:k,j][1]

function getindex(S::Symmetric{T,<:Clenshaw}, k::Integer, jr::AbstractUnitRange) where T
m = max(jr.start,jr.stop,k)
return Symmetric(getindex(S.data,1:m,1:m),Symbol(S.uplo))[k,jr]
end
function getindex(S::Symmetric{T,<:Clenshaw}, kr::AbstractUnitRange, j::Integer) where T
m = max(kr.start,kr.stop,j)
return Symmetric(getindex(S.data,1:m,1:m),Symbol(S.uplo))[kr,j]
end
function getindex(S::Symmetric{T,<:Clenshaw}, kr::AbstractUnitRange, jr::AbstractUnitRange) where T
m = max(kr.start,jr.start,kr.stop,jr.stop)
return Symmetric(getindex(S.data,1:m,1:m),Symbol(S.uplo))[kr,jr]
end

transposelayout(M::ClenshawLayout) = LazyBandedMatrices.LazyBandedLayout()
# TODO: generalise for layout, use Base.PermutedDimsArray
Base.permutedims(M::Clenshaw{<:Number}) = transpose(M)
Expand Down
Loading