Skip to content

Commit d3d03ef

Browse files
TSGutdlfivefifty
andauthored
Speed up Cholesky Jacobi matrices (#169)
* symmetric clenshaw getindex + better resizing * adjustments to caching + doc improvements * simplify * minor change * fix a bunch of tests * bugfix * another bugfix * increase coverage * Update Project.toml * Update src/clenshaw.jl * Update src/clenshaw.jl * Update src/clenshaw.jl --------- Co-authored-by: Sheehan Olver <[email protected]>
1 parent 5256e8e commit d3d03ef

File tree

6 files changed

+97
-57
lines changed

6 files changed

+97
-57
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "ClassicalOrthogonalPolynomials"
22
uuid = "b30e2e7b-c4ee-47da-9d5f-2c5c27239acd"
33
authors = ["Sheehan Olver <[email protected]>"]
4-
version = "0.12.3"
4+
version = "0.12.4"
55

66
[deps]
77
ArrayLayouts = "4c555306-a7a7-4459-81d9-ec55ddd5c99a"

src/ClassicalOrthogonalPolynomials.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import LazyArrays: MemoryLayout, Applied, ApplyStyle, flatten, _flatten, adjoint
1717
sub_materialize, arguments, sub_paddeddata, paddeddata, PaddedLayout, resizedata!, LazyVector, ApplyLayout, call,
1818
_mul_arguments, CachedVector, CachedMatrix, LazyVector, LazyMatrix, axpy!, AbstractLazyLayout, BroadcastLayout,
1919
AbstractCachedVector, AbstractCachedMatrix, paddeddata, cache_filldata!,
20-
simplifiable, PaddedArray, converteltype
20+
simplifiable, PaddedArray, converteltype, simplify
2121
import ArrayLayouts: MatMulVecAdd, materialize!, _fill_lmul!, sublayout, sub_materialize, lmul!, ldiv!, ldiv, transposelayout, triangulardata,
2222
subdiagonaldata, diagonaldata, supdiagonaldata, mul, rowsupport, colsupport
2323
import LazyBandedMatrices: SymTridiagonal, Bidiagonal, Tridiagonal, unitblocks, BlockRange1, AbstractLazyBandedLayout
@@ -33,7 +33,7 @@ import QuasiArrays: cardinality, checkindex, QuasiAdjoint, QuasiTranspose, Inclu
3333
AbstractQuasiFill, equals_layout, QuasiArrayLayout, PolynomialLayout, diff_layout
3434

3535
import InfiniteArrays: OneToInf, InfAxes, Infinity, AbstractInfUnitRange, InfiniteCardinal, InfRanges
36-
import InfiniteLinearAlgebra: chop!, chop, pad, choplength, compatible_resize!
36+
import InfiniteLinearAlgebra: chop!, chop, pad, choplength, compatible_resize!, partialcholesky!
3737
import ContinuumArrays: Basis, Weight, basis_axes, @simplify, Identity, AbstractAffineQuasiVector, ProjectionFactorization,
3838
grid, plotgrid, plotgrid_layout, plotvalues_layout, grid_layout, transform_ldiv, TransformFactorization, QInfAxes, broadcastbasis, ExpansionLayout, basismap,
3939
AffineQuasiVector, AffineMap, AbstractWeightLayout, AbstractWeightedBasisLayout, WeightedBasisLayout, WeightedBasisLayouts, demap, AbstractBasisLayout, BasisLayout,

src/choleskyQR.jl

Lines changed: 49 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@ orthogonalpolynomial(w::Function, P::AbstractQuasiMatrix) = orthogonalpolynomial
3737
cholesky_jacobimatrix(w, P)
3838
3939
returns the Jacobi matrix `X` associated to a quasi-matrix of polynomials
40-
orthogonal with respect to `w(x) w_p(x)` where `w_p(x)` is the weight of the polynomials in `P` by computing a Cholesky decomposition of the weight modification.
40+
orthogonal with respect to `w(x)` by computing a Cholesky decomposition of the weight modification.
4141
42-
The resulting polynomials are orthonormal on the same domain as `P`. The supplied `P` must be normalized. Accepted inputs are `w` as a function or `W` as an infinite matrix representing multiplication with the function `w` on the basis `P`.
42+
The resulting polynomials are orthonormal on the same domain as `P`. The supplied `P` must be normalized. Accepted inputs are `w` as a function or `W` as an infinite matrix representing the weight modifier multiplication by the function `w / w_P` on `P` where `w_P` is the orthogonality weight of `P`.
4343
"""
4444
cholesky_jacobimatrix(w::Function, P) = cholesky_jacobimatrix(w.(axes(P,1)), P)
4545

@@ -52,9 +52,7 @@ end
5252
function cholesky_jacobimatrix(W::AbstractMatrix, Q)
5353
isnormalized(Q) || error("Polynomials must be orthonormal")
5454
U = cholesky(W).U
55-
X = jacobimatrix(Q)
56-
UX = ApplyArray(*,U,X)
57-
CJD = CholeskyJacobiData(U,UX)
55+
CJD = CholeskyJacobiData(U,Q)
5856
return SymTridiagonal(view(CJD,:,1),view(CJD,:,2))
5957
end
6058

@@ -63,24 +61,35 @@ mutable struct CholeskyJacobiData{T} <: LazyMatrix{T}
6361
dv::AbstractVector{T} # store diagonal band entries in adaptively sized vector
6462
ev::AbstractVector{T} # store off-diagonal band entries in adaptively sized vector
6563
U::UpperTriangular{T} # store upper triangular conversion matrix (needed to extend available entries)
66-
UX::ApplyArray{T} # store U*X, where X is the Jacobi matrix of the original P (needed to extend available entries)
64+
X::AbstractMatrix{T} # store Jacobi matrix of original polynomials
65+
P # store original polynomials
6766
datasize::Int # size of so-far computed block
6867
end
6968

7069
# Computes the initial data for the Jacobi operator bands
71-
function CholeskyJacobiData(U::AbstractMatrix{T}, UX) where T
70+
function CholeskyJacobiData(U::AbstractMatrix{T}, P) where T
7271
# compute a length 2 vector on first go and circumvent BigFloat issue
73-
dv = zeros(T,2)
72+
dv = zeros(T,2)
7473
ev = zeros(T,2)
74+
X = jacobimatrix(P)
75+
partialcholesky!(U.data, 3)
76+
UX = view(U,1:3,1:3)*X[1:3,1:3]
7577
dv[1] = UX[1,1]/U[1,1] # this is dot(view(UX,1,1), U[1,1] \ [one(T)])
7678
dv[2] = -U[1,2]*UX[2,1]/(U[1,1]*U[2,2])+UX[2,2]/U[2,2] # this is dot(view(UX,2,1:2), U[1:2,1:2] \ [zero(T); one(T)])
7779
ev[1] = -UX[1,1]*U[1,2]/(U[1,1]*U[2,2])+UX[1,2]/U[2,2] # this is dot(view(UX,1,1:2), U[1:2,1:2] \ [zero(T); one(T)])
7880
ev[2] = UX[2,1]/U[3,3]*(-U[1,3]/U[1,1]+U[1,2]*U[2,3]/(U[1,1]*U[2,2]))+UX[2,2]/U[3,3]*(-U[2,3]/U[2,2])+UX[2,3]/U[3,3] # this is dot(view(UX,2,1:3), U[1:3,1:3] \ [zeros(T,2); one(T)])[1:3,1:3] \ [zeros(T,2); one(T)])
79-
return CholeskyJacobiData{T}(dv, ev, U, UX, 2)
81+
return CholeskyJacobiData{T}(dv, ev, U, X, P, 2)
8082
end
8183

8284
size(::CholeskyJacobiData) = (ℵ₀,2) # Stored as two infinite cached bands
8385

86+
function getindex(C::SymTridiagonal{<:Any, <:SubArray{<:Any, 1, <:CholeskyJacobiData, <:Tuple, false}, <:SubArray{<:Any, 1, <:CholeskyJacobiData, <:Tuple, false}}, kr::UnitRange, jr::UnitRange)
87+
m = 1+max(kr.stop,jr.stop)
88+
resizedata!(C.dv.parent,m,1)
89+
resizedata!(C.ev.parent,m,2)
90+
return copy(view(C,kr,jr))
91+
end
92+
8493
function getindex(K::CholeskyJacobiData, n::Integer, m::Integer)
8594
@assert (m==1) || (m==2)
8695
resizedata!(K,n,m)
@@ -90,7 +99,7 @@ end
9099

91100
# Resize and filling functions for cached implementation
92101
function resizedata!(K::CholeskyJacobiData, n::Integer, m::Integer)
93-
nm = max(n,m)
102+
nm = 1+max(n,m)
94103
νμ = K.datasize
95104
if nm > νμ
96105
resize!(K.dv,nm)
@@ -102,12 +111,14 @@ function resizedata!(K::CholeskyJacobiData, n::Integer, m::Integer)
102111
end
103112

104113
function _fillcholeskybanddata!(J::CholeskyJacobiData{T}, inds::UnitRange{Int}) where T
105-
# pre-fill U and UX to prevent expensive step-by-step filling in of cached U and UX in the loop
106-
resizedata!(J.U,inds[end]+1,inds[end]+1)
107-
resizedata!(J.UX,inds[end]+1,inds[end]+1)
114+
# pre-fill U to prevent expensive step-by-step filling in
115+
m = inds[end]+1
116+
partialcholesky!(J.U.data, m)
117+
dv, ev, U, X = J.dv, J.ev, J.U, J.X
118+
119+
UX = view(U,1:m,1:m)*X[1:m,1:m]
108120

109-
dv, ev, UX, U = J.dv, J.ev, J.UX, J.U
110-
@inbounds for k in inds
121+
@inbounds Threads.@threads for k = inds
111122
# this is dot(view(UX,k,k-1:k), U[k-1:k,k-1:k] \ ek)
112123
dv[k] = -U[k-1,k]*UX[k,k-1]/(U[k-1,k-1]*U[k,k])+UX[k,k]/U[k,k]
113124
ev[k] = UX[k,k-1]/U[k+1,k+1]*(-U[k-1,k+1]/U[k-1,k-1]+U[k-1,k]*U[k,k+1]/(U[k-1,k-1]*U[k,k]))+UX[k,k]/U[k+1,k+1]*(-U[k,k+1]/U[k,k])+UX[k,k+1]/U[k+1,k+1]
@@ -116,20 +127,21 @@ end
116127

117128

118129
"""
119-
qr_jacobimatrix(sqrtw, P)
130+
qr_jacobimatrix(w, P)
120131
121132
returns the Jacobi matrix `X` associated to a quasi-matrix of polynomials
122-
orthogonal with respect to `w(x) w_p(x)` where `w_p(x)` is the weight of the polynomials in `P` by computing a QR decomposition of the square root weight modification.
133+
orthogonal with respect to `w(x)` by computing a QR decomposition of the square root weight modification.
123134
124-
The resulting polynomials are orthonormal on the same domain as `P`. The supplied `P` must be normalized. Accepted inputs for `sqrtw` are the square root of the weight modification as a function or `sqrtW` as an infinite matrix representing multiplication with the function `sqrt(w)` on the basis `P`.
135+
The resulting polynomials are orthonormal on the same domain as `P`. The supplied `P` must be normalized. Accepted inputs for `w` are the target weight as a function or `sqrtW`, representing the multiplication operator of square root weight modification on the basis `P`.
125136
126137
The underlying QR approach allows two methods, one which uses the Q matrix and one which uses the R matrix. To change between methods, an optional argument :Q or :R may be supplied. The default is to use the Q method.
127138
"""
128-
function qr_jacobimatrix(sqrtw::Function, P, method = :Q)
139+
qr_jacobimatrix(w::Function, P, method = :Q) = qr_jacobimatrix(w.(axes(P,1)), P, Symbol(method))
140+
function qr_jacobimatrix(w::AbstractQuasiVector, P, method = :Q)
129141
Q = normalized(P)
130-
x = axes(P,1)
131-
sqrtW = (Q \ (sqrtw.(x) .* Q)) # Compute weight multiplication via Clenshaw
132-
return qr_jacobimatrix(sqrtW, Q, method)
142+
w_P = orthogonalityweight(P)
143+
sqrtW = Symmetric(Q \ (sqrt.(w ./ w_P) .* Q)) # Compute weight multiplication via Clenshaw
144+
return qr_jacobimatrix(sqrtW, Q, Symbol(method))
133145
end
134146
function qr_jacobimatrix(sqrtW::AbstractMatrix{T}, Q, method = :Q) where T
135147
isnormalized(Q) || error("Polynomials must be orthonormal")
@@ -143,7 +155,7 @@ mutable struct QRJacobiData{method,T} <: LazyMatrix{T}
143155
dv::AbstractVector{T} # store diagonal band entries in adaptively sized vector
144156
ev::AbstractVector{T} # store off-diagonal band entries in adaptively sized vector
145157
U # store conversion, Q method: stores QR object. R method: only stores R.
146-
UX # Auxilliary matrix. Q method: stores in-progress incomplete modification. R method: stores U*X for efficiency.
158+
UX # Auxilliary matrix. Q method: stores in-progress incomplete modification. R method: stores Jacobi matrix of original polynomials
147159
P # Remember original polynomials
148160
datasize::Int # size of so-far computed block
149161
end
@@ -179,15 +191,15 @@ function QRJacobiData{:R,T}(F, P) where T
179191
U = F.R
180192
U = ApplyArray(*,Diagonal(sign.(view(U,band(0)))),U) # QR decomposition does not force positive diagonals on R by default
181193
X = jacobimatrix(P)
182-
UX = ApplyArray(*,U,X)
194+
UX = view(U,1:3,1:3)*X[1:3,1:3]
183195
# compute a length 2 vector on first go and circumvent BigFloat issue
184196
dv = zeros(T,2)
185197
ev = zeros(T,2)
186198
dv[1] = UX[1,1]/U[1,1] # this is dot(view(UX,1,1), U[1,1] \ [one(T)])
187199
dv[2] = -U[1,2]*UX[2,1]/(U[1,1]*U[2,2])+UX[2,2]/U[2,2] # this is dot(view(UX,2,1:2), U[1:2,1:2] \ [zero(T); one(T)])
188200
ev[1] = -UX[1,1]*U[1,2]/(U[1,1]*U[2,2])+UX[1,2]/U[2,2] # this is dot(view(UX,1,1:2), U[1:2,1:2] \ [zero(T); one(T)])
189201
ev[2] = UX[2,1]/U[3,3]*(-U[1,3]/U[1,1]+U[1,2]*U[2,3]/(U[1,1]*U[2,2]))+UX[2,2]/U[3,3]*(-U[2,3]/U[2,2])+UX[2,3]/U[3,3] # this is dot(view(UX,2,1:3), U[1:3,1:3] \ [zeros(T,2); one(T)])
190-
return QRJacobiData{:R,T}(dv, ev, U, UX, P, 2)
202+
return QRJacobiData{:R,T}(dv, ev, U, X, P, 2)
191203
end
192204

193205

@@ -200,9 +212,16 @@ function getindex(K::QRJacobiData, n::Integer, m::Integer)
200212
m == 2 && return K.ev[n]
201213
end
202214

215+
function getindex(C::SymTridiagonal{<:Any, <:SubArray{<:Any, 1, <:QRJacobiData, <:Tuple, false}, <:SubArray{<:Any, 1, <:QRJacobiData, <:Tuple, false}}, kr::UnitRange, jr::UnitRange)
216+
m = maximum(max(kr,jr))+1
217+
resizedata!(C.dv.parent,m,2)
218+
resizedata!(C.ev.parent,m,2)
219+
return copy(view(C,kr,jr))
220+
end
221+
203222
# Resize and filling functions for cached implementation
204223
function resizedata!(K::QRJacobiData, n::Integer, m::Integer)
205-
nm = max(n,m)
224+
nm = 1+max(n,m)
206225
νμ = K.datasize
207226
if nm > νμ
208227
resize!(K.dv,nm)
@@ -246,10 +265,11 @@ function _fillqrbanddata!(J::QRJacobiData{:R,T}, inds::UnitRange{Int}) where T
246265
# pre-fill U and UX to prevent expensive step-by-step filling in of cached U and UX in the loop
247266
m = inds[end]+1
248267
resizedata!(J.U,m,m)
249-
resizedata!(J.UX,m,m)
268+
dv, ev, X, U = J.dv, J.ev, J.UX, J.U
269+
270+
UX = view(U,1:m,1:m)*X[1:m,1:m]
250271

251-
dv, ev, UX, U = J.dv, J.ev, J.UX, J.U
252-
@inbounds for k in inds
272+
@inbounds Threads.@threads for k in inds
253273
dv[k] = -U[k-1,k]*UX[k,k-1]/(U[k-1,k-1]*U[k,k])+UX[k,k]./U[k,k] # this is dot(view(UX,k,k-1:k), U[k-1:k,k-1:k] \ ek)
254274
ev[k] = UX[k,k-1]/U[k+1,k+1]*(-U[k-1,k+1]/U[k-1,k-1]+U[k-1,k]*U[k,k+1]/(U[k-1,k-1]*U[k,k]))+UX[k,k]/U[k+1,k+1]*(-U[k,k+1]/U[k,k])+UX[k,k+1]/U[k+1,k+1] # this is dot(view(UX,k,k-1:k+1), U[k-1:k+1,k-1:k+1] \ ek)
255275
end

src/clenshaw.jl

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,15 +289,15 @@ function _BandedMatrix(::ClenshawLayout, V::SubArray{<:Any,2})
289289
M = parent(V)
290290
kr,jr = parentindices(V)
291291
b = bandwidth(M,1)
292-
jkr=max(1,min(first(jr),first(kr))-b÷2):max(last(jr),last(kr))+b÷2
292+
jkr = max(1,min(first(jr),first(kr))-b÷2):max(last(jr),last(kr))+b÷2
293293
# relationship between jkr and kr, jr
294294
kr2,jr2 = kr.-first(jkr).+1,jr.-first(jkr).+1
295295
lmul!(M.p0, clenshaw(M.c, M.A, M.B, M.C, M.X[jkr, jkr])[kr2,jr2])
296296
end
297297

298298
function getindex(M::Clenshaw{T}, kr::AbstractUnitRange, j::Integer) where T
299299
b = bandwidth(M,1)
300-
jkr=max(1,min(j,first(kr))-b÷2):max(j,last(kr))+b÷2
300+
jkr = max(1,min(j,first(kr))-b÷2):max(j,last(kr))+b÷2
301301
# relationship between jkr and kr, jr
302302
kr2,j2 = kr.-first(jkr).+1,j-first(jkr)+1
303303
f = [Zeros{T}(j2-1); one(T); Zeros{T}(length(jkr)-j2)]
@@ -306,6 +306,19 @@ end
306306

307307
getindex(M::Clenshaw, k::Int, j::Int) = M[k:k,j][1]
308308

309+
function getindex(S::Symmetric{T,<:Clenshaw}, k::Integer, jr::AbstractUnitRange) where T
310+
m = max(jr.start,jr.stop,k)
311+
return Symmetric(getindex(S.data,1:m,1:m),Symbol(S.uplo))[k,jr]
312+
end
313+
function getindex(S::Symmetric{T,<:Clenshaw}, kr::AbstractUnitRange, j::Integer) where T
314+
m = max(kr.start,kr.stop,j)
315+
return Symmetric(getindex(S.data,1:m,1:m),Symbol(S.uplo))[kr,j]
316+
end
317+
function getindex(S::Symmetric{T,<:Clenshaw}, kr::AbstractUnitRange, jr::AbstractUnitRange) where T
318+
m = max(kr.start,jr.start,kr.stop,jr.stop)
319+
return Symmetric(getindex(S.data,1:m,1:m),Symbol(S.uplo))[kr,jr]
320+
end
321+
309322
transposelayout(M::ClenshawLayout) = LazyBandedMatrices.LazyBandedLayout()
310323
# TODO: generalise for layout, use Base.PermutedDimsArray
311324
Base.permutedims(M::Clenshaw{<:Number}) = transpose(M)

0 commit comments

Comments
 (0)