Skip to content

Implement Q-based qrjacobimatrix() in O(n) #138

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 33 commits into from
Jun 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
75ff7ab
implement Q based qrjacobimatrix in O(n)
TSGut May 22, 2023
ea62ec6
fix test name
TSGut May 22, 2023
3dd9a33
factor 2 spee-up for Q method
TSGut May 22, 2023
c7ca9a3
more optimisations (final for now)
TSGut May 22, 2023
2f7b7e5
same QR in both bands. affects both R and Q method
TSGut May 23, 2023
041d844
reduce cost slightly
TSGut May 23, 2023
059ca70
Update src/choleskyQR.jl
TSGut May 24, 2023
8ec099b
Update src/choleskyQR.jl
TSGut May 24, 2023
f3ab56d
Update src/choleskyQR.jl
TSGut May 24, 2023
3d32f53
Update src/choleskyQR.jl
TSGut May 24, 2023
3b12145
implement dlfivefifty's notes
TSGut May 24, 2023
ae1a177
a number of small fixes
TSGut May 24, 2023
183d482
add a test that checks resizing is consistent
TSGut May 25, 2023
ad70ed1
bugfix in test
TSGut May 25, 2023
0ac80c6
add missing import in tests
TSGut May 25, 2023
703e867
write out the dot products in R method
TSGut May 26, 2023
f995f20
view instead of getindex to allocations
TSGut May 26, 2023
a0ca0be
remove more allocations
TSGut May 26, 2023
2817577
switch to in-place householder
TSGut May 26, 2023
8c874ae
bugfix for changes in R method
TSGut May 26, 2023
ad8c5b5
generate bands in tandem but also keep them as vectors
TSGut May 27, 2023
654b6df
Update src/choleskyQR.jl
TSGut Jun 5, 2023
8847388
remove redundant dv = J.dv
TSGut Jun 5, 2023
81055e1
steps towards lowering allocations
TSGut Jun 5, 2023
014b470
reduce allocations again
TSGut Jun 6, 2023
17741bc
clarify minor comment
TSGut Jun 6, 2023
651536a
use reflectorApply!
TSGut Jun 7, 2023
b14ffef
removal of redundant computation
TSGut Jun 7, 2023
d1c6435
replace reflectorapply! with inplaceHouseholder!
TSGut Jun 7, 2023
ee5f0aa
prep for 1.9 fix
TSGut Jun 8, 2023
b0c67b4
Update Project.toml
TSGut Jun 8, 2023
72c8125
Update ci.yml
TSGut Jun 8, 2023
765d0e2
Update Project.toml
TSGut Jun 8, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ jobs:
fail-fast: false
matrix:
version:
- '1.7'
- '1'
- '^1.9.0-0'
os:
- ubuntu-latest
Expand Down
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "ClassicalOrthogonalPolynomials"
uuid = "b30e2e7b-c4ee-47da-9d5f-2c5c27239acd"
authors = ["Sheehan Olver <[email protected]>"]
version = "0.8.1"
version = "0.9.0"

[deps]
ArrayLayouts = "4c555306-a7a7-4459-81d9-ec55ddd5c99a"
Expand Down Expand Up @@ -43,7 +43,7 @@ LazyArrays = "1.0.1"
LazyBandedMatrices = "0.8.5"
QuasiArrays = "0.9.6"
SpecialFunctions = "1.0, 2"
julia = "1.7"
julia = "1.9"

[extras]
Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
Expand Down
2 changes: 1 addition & 1 deletion src/ClassicalOrthogonalPolynomials.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import LazyArrays: MemoryLayout, Applied, ApplyStyle, flatten, _flatten, adjoint
import ArrayLayouts: MatMulVecAdd, materialize!, _fill_lmul!, sublayout, sub_materialize, lmul!, ldiv!, ldiv, transposelayout, triangulardata,
subdiagonaldata, diagonaldata, supdiagonaldata, mul, rowsupport, colsupport
import LazyBandedMatrices: SymTridiagonal, Bidiagonal, Tridiagonal, unitblocks, BlockRange1, AbstractLazyBandedLayout
import LinearAlgebra: pinv, factorize, qr, adjoint, transpose, dot, mul!
import LinearAlgebra: pinv, factorize, qr, adjoint, transpose, dot, mul!, reflectorApply!
import BandedMatrices: AbstractBandedLayout, AbstractBandedMatrix, _BandedMatrix, bandeddata
import FillArrays: AbstractFill, getindex_value, SquareEye
import DomainSets: components
Expand Down
215 changes: 134 additions & 81 deletions src/choleskyQR.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ OrthogonalPolynomial(w::AbstractQuasiVector) = OrthogonalPolynomial(w, orthogona
function OrthogonalPolynomial(w::AbstractQuasiVector, P::AbstractQuasiMatrix)
Q = normalized(P)
X = cholesky_jacobimatrix(w, Q)
ConvertedOrthogonalPolynomial(w, X, X.dv.U, Q)
ConvertedOrthogonalPolynomial(w, X, parent(X.dv).U, Q)
end

orthogonalpolynomial(w::AbstractQuasiVector) = OrthogonalPolynomial(w)
Expand All @@ -35,7 +35,7 @@ orthogonalpolynomial(w::SubQuasiArray) = orthogonalpolynomial(parent(w))[parenti
cholesky_jacobimatrix(w, P)

returns the Jacobi matrix `X` associated to a quasi-matrix of polynomials
orthogonal with respect to `w(x) w_p(x)` where `w_p(x)` is the weight of the polynomials in `P`.
orthogonal with respect to `w(x) w_p(x)` where `w_p(x)` is the weight of the polynomials in `P` by computing a Cholesky decomposition of the weight modification.

The resulting polynomials are orthonormal on the same domain as `P`. The supplied `P` must be normalized. Accepted inputs are `w` as a function or `W` as an infinite matrix representing multiplication with the function `w` on the basis `P`.
"""
Expand All @@ -51,61 +51,62 @@ function cholesky_jacobimatrix(W::AbstractMatrix, Q)
U = cholesky(W).U
X = jacobimatrix(Q)
UX = ApplyArray(*,U,X)
return SymTridiagonal(CholeskyJacobiBand{:dv}(U, UX),CholeskyJacobiBand{:ev}(U, UX))
CJD = CholeskyJacobiData(U,UX)
return SymTridiagonal(view(CJD,:,1),view(CJD,:,2))
end

# The generated Jacobi operators are symmetric tridiagonal, so we store their data in cached bands
mutable struct CholeskyJacobiBand{dv,T} <: AbstractCachedVector{T}
data::Vector{T} # store band entries, :dv for diagonal, :ev for off-diagonal
# The generated Jacobi operators are symmetric tridiagonal, so we store their data in two cached bands which are generated in tandem but can be accessed separately.
mutable struct CholeskyJacobiData{T} <: AbstractMatrix{T}
dv::AbstractVector{T} # store diagonal band entries in adaptively sized vector
ev::AbstractVector{T} # store off-diagonal band entries in adaptively sized vector
U::UpperTriangular{T} # store upper triangular conversion matrix (needed to extend available entries)
UX::ApplyArray{T} # store U*X, where X is the Jacobi matrix of the original P (needed to extend available entries)
datasize::Int # size of so-far computed block
end

# Computes the initial data for the Jacobi operator bands
function CholeskyJacobiBand{:dv}(U::AbstractMatrix{T}, UX) where T
dv = zeros(T,2) # compute a length 2 vector on first go
dv[1] = dot(view(UX,1,1), U[1,1] \ [one(T)])
dv[2] = dot(view(UX,2,1:2), U[1:2,1:2] \ [zero(T); one(T)])
return CholeskyJacobiBand{:dv,T}(dv, U, UX, 2)
end
function CholeskyJacobiBand{:ev}(U::AbstractMatrix{T}, UX) where T
ev = zeros(T,2) # compute a length 2 vector on first go
ev[1] = dot(view(UX,1,1:2), U[1:2,1:2] \ [zero(T); one(T)])
ev[2] = dot(view(UX,2,1:3), U[1:3,1:3] \ [zeros(T,2); one(T)])
return CholeskyJacobiBand{:ev,T}(ev, U, UX, 2)
function CholeskyJacobiData(U::AbstractMatrix{T}, UX) where T
dv = Vector{T}(undef,2) # compute a length 2 vector on first go
ev = Vector{T}(undef,2)
dv[1] = UX[1,1]/U[1,1] # this is dot(view(UX,1,1), U[1,1] \ [one(T)])
dv[2] = -U[1,2]*UX[2,1]/(U[1,1]*U[2,2])+UX[2,2]/U[2,2] # this is dot(view(UX,2,1:2), U[1:2,1:2] \ [zero(T); one(T)])
ev[1] = -UX[1,1]*U[1,2]/(U[1,1]*U[2,2])+UX[1,2]/U[2,2] # this is dot(view(UX,1,1:2), U[1:2,1:2] \ [zero(T); one(T)])
ev[2] = UX[2,1]/U[3,3]*(-U[1,3]/U[1,1]+U[1,2]*U[2,3]/(U[1,1]*U[2,2]))+UX[2,2]/U[3,3]*(-U[2,3]/U[2,2])+UX[2,3]/U[3,3] # this is dot(view(UX,2,1:3), U[1:3,1:3] \ [zeros(T,2); one(T)])[1:3,1:3] \ [zeros(T,2); one(T)])
return CholeskyJacobiData{T}(dv, ev, U, UX, 2)
end

size(::CholeskyJacobiBand) = (ℵ₀,) # Stored as an infinite cached vector
size(::CholeskyJacobiData) = (ℵ₀,2) # Stored as two infinite cached bands

function getindex(K::CholeskyJacobiData, n::Integer, m::Integer)
@assert (m==1) || (m==2)
resizedata!(K,n,m)
m == 1 && return K.dv[n]
m == 2 && return K.ev[n]
end

# Resize and filling functions for cached implementation
function resizedata!(K::CholeskyJacobiBand, nm::Integer)
function resizedata!(K::CholeskyJacobiData, n::Integer, m::Integer)
nm = max(n,m)
νμ = K.datasize
if nm > νμ
resize!(K.data,nm)
cache_filldata!(K, νμ:nm)
resize!(K.dv,nm)
resize!(K.ev,nm)
_fillcholeskybanddata!(K, νμ:nm)
K.datasize = nm
end
K
end
function cache_filldata!(J::CholeskyJacobiBand{:dv,T}, inds::UnitRange{Int}) where T
# pre-fill U and UX to prevent expensive step-by-step filling in of cached U and UX in the loop
getindex(J.U,inds[end]+1,inds[end]+1)
getindex(J.UX,inds[end]+1,inds[end]+1)

ek = [zero(T); one(T)]
@inbounds for k in inds
J.data[k] = dot(view(J.UX,k,k-1:k), J.U[k-1:k,k-1:k] \ ek)
end
end
function cache_filldata!(J::CholeskyJacobiBand{:ev, T}, inds::UnitRange{Int}) where T
function _fillcholeskybanddata!(J::CholeskyJacobiData{T}, inds::UnitRange{Int}) where T
# pre-fill U and UX to prevent expensive step-by-step filling in of cached U and UX in the loop
getindex(J.U,inds[end]+1,inds[end]+1)
getindex(J.UX,inds[end]+1,inds[end]+1)
resizedata!(J.U,inds[end]+1,inds[end]+1)
resizedata!(J.UX,inds[end]+1,inds[end]+1)

ek = [zeros(T,2); one(T)]
dv, ev, UX, U = J.dv, J.ev, J.UX, J.U
@inbounds for k in inds
J.data[k] = dot(view(J.UX,k,k-1:k+1), J.U[k-1:k+1,k-1:k+1] \ ek)
# this is dot(view(UX,k,k-1:k), U[k-1:k,k-1:k] \ ek)
dv[k] = -U[k-1,k]*UX[k,k-1]/(U[k-1,k-1]*U[k,k])+UX[k,k]/U[k,k]
ev[k] = UX[k,k-1]/U[k+1,k+1]*(-U[k-1,k+1]/U[k-1,k-1]+U[k-1,k]*U[k,k+1]/(U[k-1,k-1]*U[k,k]))+UX[k,k]/U[k+1,k+1]*(-U[k,k+1]/U[k,k])+UX[k,k+1]/U[k+1,k+1]
end
end

Expand All @@ -114,80 +115,132 @@ end
qr_jacobimatrix(sqrtw, P)

returns the Jacobi matrix `X` associated to a quasi-matrix of polynomials
orthogonal with respect to `w(x) w_p(x)` where `w_p(x)` is the weight of the polynomials in `P`.
orthogonal with respect to `w(x) w_p(x)` where `w_p(x)` is the weight of the polynomials in `P` by computing a QR decomposition of the square root weight modification.

The resulting polynomials are orthonormal on the same domain as `P`. The supplied `P` must be normalized. Accepted inputs for `sqrtw` are the square root of the weight modification as a function or `sqrtW` as an infinite matrix representing multiplication with the function `sqrt(w)` on the basis `P`.

The underlying QR approach allows two methods, one which uses the Q matrix and one which uses the R matrix. To change between methods, an optional argument :Q or :R may be supplied. The default is to use the Q method.
"""
function qr_jacobimatrix(sqrtw::Function, P)
function qr_jacobimatrix(sqrtw::Function, P, method = :Q)
Q = normalized(P)
x = axes(P,1)
sqrtW = (Q \ (sqrtw.(x) .* Q)) # Compute weight multiplication via Clenshaw
return qr_jacobimatrix(sqrtW, Q)
return qr_jacobimatrix(sqrtW, Q, method)
end
function qr_jacobimatrix(sqrtW::AbstractMatrix, Q)
function qr_jacobimatrix(sqrtW::AbstractMatrix{T}, Q, method = :Q) where T
isnormalized(Q) || error("Polynomials must be orthonormal")
SymTridiagonal(QRJacobiBand{:dv}(sqrtW,Q),QRJacobiBand{:ev}(sqrtW,Q))
end

# The generated Jacobi operators are symmetric tridiagonal, so we store their data in cached bands
mutable struct QRJacobiBand{dv,T} <: AbstractCachedVector{T}
data::Vector{T} # store band entries, :dv for diagonal, :ev for off-diagonal
U::ApplyArray{T} # store upper triangular conversion matrix (needed to extend available entries)
UX::ApplyArray{T} # store U*X, where X is the Jacobi matrix of the original P (needed to extend available entries)
F = qr(sqrtW)
QRJD = QRJacobiData{method,T}(F,Q)
SymTridiagonal(view(QRJD,:,1),view(QRJD,:,2))
end

# The generated Jacobi operators are symmetric tridiagonal, so we store their data in two cached bands which are generated in tandem but can be accessed separately.
mutable struct QRJacobiData{method,T} <: AbstractMatrix{T}
dv::AbstractVector{T} # store diagonal band entries in adaptively sized vector
ev::AbstractVector{T} # store off-diagonal band entries in adaptively sized vector
U # store conversion, Q method: stores QR object. R method: only stores R.
UX # Auxilliary matrix. Q method: stores in-progress incomplete modification. R method: stores U*X for efficiency.
P # Remember original polynomials
datasize::Int # size of so-far computed block
end

# Computes the initial data for the Jacobi operator bands
function QRJacobiBand{:dv}(sqrtW, P::OrthogonalPolynomial{T}) where T
U = qr(sqrtW).R
U = ApplyArray(*,Diagonal(sign.(view(U,band(0)))),U)
function QRJacobiData{:Q,T}(F, P) where T
b = 3+bandwidths(F.R)[2]÷2
X = jacobimatrix(P)
UX = ApplyArray(*,U,X)
dv = zeros(T,2) # compute a length 2 vector on first go
dv[1] = dot(view(UX,1,1), U[1,1] \ [one(T)])
dv[2] = dot(view(UX,2,1:2), U[1:2,1:2] \ [zero(T); one(T)])
return QRJacobiBand{:dv,T}(dv, U, UX, 2)
end
function QRJacobiBand{:ev}(sqrtW, P::OrthogonalPolynomial{T}) where T
U = qr(sqrtW).R
U = ApplyArray(*,Diagonal(sign.(view(U,band(0)))),U)
# we fill 1 entry on the first run
dv = zeros(T,2)
ev = zeros(T,1)
# fill first entry (special case)
M = Matrix(X[1:b,1:b])
resizedata!(F.factors,b,b)
# special case for first entry double Householder product
v = view(F.factors,1:b,1)
reflectorApply!(v, F.τ[1], M)
reflectorApply!(v, F.τ[1], M')
dv[1] = M[1,1]
# fill second entry
# computes H*M*H in-place, overwriting M
v = view(F.factors,2:b,2)
reflectorApply!(v, F.τ[2], view(M,1,2:b))
M[1,2:b] .= view(M,1,2:b) # symmetric matrix, avoid recomputation
reflectorApply!(v, F.τ[2], view(M,2:b,2:b))
reflectorApply!(v, F.τ[2], view(M,2:b,2:b)')
ev[1] = M[1,2]*sign(F.R[1,1]*F.R[2,2]) # includes possible correction for sign (only needed in off-diagonal case), since the QR decomposition does not guarantee positive diagonal on R
K = Matrix(X[2:b+1,2:b+1])
K[1:end-1,1:end-1] .= view(M,2:b,2:b)
return QRJacobiData{:Q,T}(dv, ev, F, K, P, 1)
end
function QRJacobiData{:R,T}(F, P) where T
U = F.R
U = ApplyArray(*,Diagonal(sign.(view(U,band(0)))),U) # QR decomposition does not force positive diagonals on R by default
X = jacobimatrix(P)
UX = ApplyArray(*,U,X)
ev = zeros(T,2) # compute a length 2 vector on first go
ev[1] = dot(view(UX,1,1:2), U[1:2,1:2] \ [zero(T); one(T)])
ev[2] = dot(view(UX,2,1:3), U[1:3,1:3] \ [zeros(T,2); one(T)])
return QRJacobiBand{:ev,T}(ev, U, UX, 2)
dv = Vector{T}(undef,2) # compute a length 2 vector on first go
ev = Vector{T}(undef,2)
dv[1] = UX[1,1]/U[1,1] # this is dot(view(UX,1,1), U[1,1] \ [one(T)])
dv[2] = -U[1,2]*UX[2,1]/(U[1,1]*U[2,2])+UX[2,2]/U[2,2] # this is dot(view(UX,2,1:2), U[1:2,1:2] \ [zero(T); one(T)])
ev[1] = -UX[1,1]*U[1,2]/(U[1,1]*U[2,2])+UX[1,2]/U[2,2] # this is dot(view(UX,1,1:2), U[1:2,1:2] \ [zero(T); one(T)])
ev[2] = UX[2,1]/U[3,3]*(-U[1,3]/U[1,1]+U[1,2]*U[2,3]/(U[1,1]*U[2,2]))+UX[2,2]/U[3,3]*(-U[2,3]/U[2,2])+UX[2,3]/U[3,3] # this is dot(view(UX,2,1:3), U[1:3,1:3] \ [zeros(T,2); one(T)])
return QRJacobiData{:R,T}(dv, ev, U, UX, P, 2)
end

size(::QRJacobiBand) = (ℵ₀,) # Stored as an infinite cached vector

size(::QRJacobiData) = (ℵ₀,2) # Stored as two infinite cached bands

function getindex(K::QRJacobiData, n::Integer, m::Integer)
@assert (m==1) || (m==2)
resizedata!(K,n,m)
m == 1 && return K.dv[n]
m == 2 && return K.ev[n]
end

# Resize and filling functions for cached implementation
function resizedata!(K::QRJacobiBand, nm::Integer)
function resizedata!(K::QRJacobiData, n::Integer, m::Integer)
nm = max(n,m)
νμ = K.datasize
if nm > νμ
resize!(K.data,nm)
cache_filldata!(K, νμ:nm)
resize!(K.dv,nm)
resize!(K.ev,nm)
_fillqrbanddata!(K, νμ:nm)
K.datasize = nm
end
K
end
function cache_filldata!(J::QRJacobiBand{:dv,T}, inds::UnitRange{Int}) where T
# pre-fill U and UX to prevent expensive step-by-step filling in of cached U and UX in the loop
getindex(J.U,inds[end]+1,inds[end]+1)
getindex(J.UX,inds[end]+1,inds[end]+1)

ek = [zero(T); one(T)]
@inbounds for k in inds
J.data[k] = dot(view(J.UX,k,k-1:k), J.U[k-1:k,k-1:k] \ ek)
function _fillqrbanddata!(J::QRJacobiData{:Q,T}, inds::UnitRange{Int}) where T
b = bandwidths(J.U.factors)[2]÷2
# pre-fill cached arrays to avoid excessive cost from expansion in loop
m, jj = 1+inds[end], inds[2:end]
X = jacobimatrix(J.P)[1:m+b+2,1:m+b+2]
resizedata!(J.U.factors,m+b,m+b)
resizedata!(J.U.τ,m)
K, τ, F, dv, ev = J.UX, J.U.τ, J.U.factors, J.dv, J.ev
D = sign.(view(J.U.R,band(0)).*view(J.U.R,band(0))[2:end])
M = Matrix{T}(undef,b+3,b+3)
@inbounds for n in jj
dv[n] = K[1,1] # no sign correction needed on diagonal entry due to cancellation
# doublehouseholderapply!(K,τ[n+1],view(F,n+2:n+b+2,n+1),w)
v = view(F,n+1:n+b+2,n+1)
reflectorApply!(v, τ[n+1], view(K,1,2:b+3))
M[1,2:b+3] .= view(M,1,2:b+3) # symmetric matrix, avoid recomputation
reflectorApply!(v, τ[n+1], view(K,2:b+3,2:b+3))
reflectorApply!(v, τ[n+1], view(K,2:b+3,2:b+3)')
ev[n] = K[1,2]*D[n] # contains sign correction from QR not forcing positive diagonals
M .= view(X,n+1:n+b+3,n+1:n+b+3)
M[1:end-1,1:end-1] .= view(K,2:b+3,2:b+3)
K .= M
end
end
function cache_filldata!(J::QRJacobiBand{:ev, T}, inds::UnitRange{Int}) where T

function _fillqrbanddata!(J::QRJacobiData{:R,T}, inds::UnitRange{Int}) where T
# pre-fill U and UX to prevent expensive step-by-step filling in of cached U and UX in the loop
getindex(J.U,inds[end]+1,inds[end]+1)
getindex(J.UX,inds[end]+1,inds[end]+1)
m = inds[end]+1
resizedata!(J.U,m,m)
resizedata!(J.UX,m,m)

ek = [zeros(T,2); one(T)]
dv, ev, UX, U = J.dv, J.ev, J.UX, J.U
@inbounds for k in inds
J.data[k] = dot(view(J.UX,k,k-1:k+1), J.U[k-1:k+1,k-1:k+1] \ ek)
dv[k] = -U[k-1,k]*UX[k,k-1]/(U[k-1,k-1]*U[k,k])+UX[k,k]./U[k,k] # this is dot(view(UX,k,k-1:k), U[k-1:k,k-1:k] \ ek)
ev[k] = UX[k,k-1]/U[k+1,k+1]*(-U[k-1,k+1]/U[k-1,k-1]+U[k-1,k]*U[k,k+1]/(U[k-1,k-1]*U[k,k]))+UX[k,k]/U[k+1,k+1]*(-U[k,k+1]/U[k,k])+UX[k,k+1]/U[k+1,k+1] # this is dot(view(UX,k,k-1:k+1), U[k-1:k+1,k-1:k+1] \ ek)
end
end
end
Loading