Skip to content

Commit 66bc345

Browse files
implement #20
1 parent c86bc7f commit 66bc345

File tree

1 file changed

+42
-3
lines changed

1 file changed

+42
-3
lines changed

src/SphericalHarmonics/Butterfly.jl

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ immutable Butterfly{T} <: Factorization{T}
66
temp1::Vector{T}
77
temp2::Vector{T}
88
temp3::Vector{T}
9+
temp4::Vector{T}
910
end
1011

1112
function size(B::Butterfly, dim::Integer)
@@ -105,7 +106,7 @@ function Butterfly{T}(A::AbstractMatrix{T}, L::Int; isorthogonal::Bool = false,
105106

106107
kk = sumkmax(indices)
107108

108-
Butterfly(columns, factors, permutations, indices, zeros(T, kk), zeros(T, kk), zeros(T, kk))
109+
Butterfly(columns, factors, permutations, indices, zeros(T, kk), zeros(T, kk), zeros(T, kk), zeros(T, kk))
109110
end
110111

111112
function sumkmax(indices::Vector{Vector{Int}})
@@ -150,11 +151,30 @@ function rowperm!(fwd::Bool, x::StridedVecOrMat, p::Vector{Int}, jstart::Int)
150151
x
151152
end
152153

154+
function rowperm!(fwd::Bool, y::StridedVector, x::StridedVector, p::Vector{Int}, jstart::Int)
155+
n = length(p)
156+
jshift = jstart-1
157+
@inbounds if (fwd)
158+
@simd for i = 1:n
159+
y[jshift+i] = x[jshift+p[i]]
160+
end
161+
else
162+
@simd for i = 1:n
163+
y[jshift+p[i]] = x[jshift+i]
164+
end
165+
end
166+
y
167+
end
168+
153169
## ColumnPermutation
154170
A_mul_B!(A::ColPerm, B::StridedVecOrMat, jstart::Int) = rowperm!(false, B, A.p, jstart)
155171
At_mul_B!(A::ColPerm, B::StridedVecOrMat, jstart::Int) = rowperm!(true, B, A.p, jstart)
156172
Ac_mul_B!(A::ColPerm, B::StridedVecOrMat, jstart::Int) = At_mul_B!(A, B, jstart)
157173

174+
A_mul_B!(y::StridedVector, A::ColPerm, x::StridedVector, jstart::Int) = rowperm!(false, y, x, A.p, jstart)
175+
At_mul_B!(y::StridedVector, A::ColPerm, x::StridedVector, jstart::Int) = rowperm!(true, y, x, A.p, jstart)
176+
Ac_mul_B!(y::StridedVector, A::ColPerm, x::StridedVector, jstart::Int) = At_mul_B!(y, x, A, jstart)
177+
158178
# Fast A_mul_B!, At_mul_B!, and Ac_mul_B! for an ID. These overwrite the output.
159179

160180
function A_mul_B!{T}(y::AbstractVecOrMat{T}, A::IDPackedV{T}, P::ColumnPermutation, x::AbstractVecOrMat{T}, istart::Int, jstart::Int)
@@ -166,6 +186,14 @@ function A_mul_B!{T}(y::AbstractVecOrMat{T}, A::IDPackedV{T}, P::ColumnPermutati
166186
y
167187
end
168188

189+
function A_mul_B!{T}(y::AbstractVector{T}, A::IDPackedV{T}, P::ColumnPermutation, x::AbstractVector{T}, temp::AbstractVector{T}, istart::Int, jstart::Int)
190+
k, n = size(A)
191+
At_mul_B!(temp, P, x, jstart)
192+
copy!(y, istart, temp, jstart, k)
193+
A_mul_B!(y, A.T, temp, istart, jstart+k)
194+
y
195+
end
196+
169197
for f! in (:At_mul_B!, :Ac_mul_B!)
170198
@eval begin
171199
function $f!{T}(y::AbstractVecOrMat{T}, A::IDPackedV{T}, P::ColumnPermutation, x::AbstractVecOrMat{T}, istart::Int, jstart::Int)
@@ -175,6 +203,14 @@ for f! in (:At_mul_B!, :Ac_mul_B!)
175203
A_mul_B!(P, y, istart)
176204
y
177205
end
206+
207+
function $f!{T}(y::AbstractVector{T}, A::IDPackedV{T}, P::ColumnPermutation, x::AbstractVector{T}, temp::AbstractVector{T}, istart::Int, jstart::Int)
208+
k, n = size(A)
209+
copy!(temp, istart, x, jstart, k)
210+
$f!(temp, A.T, x, istart+k, jstart)
211+
A_mul_B!(y, P, temp, istart)
212+
y
213+
end
178214
end
179215
end
180216

@@ -194,6 +230,7 @@ function A_mul_B_col_J!{T}(u::VecOrMat{T}, B::Butterfly{T}, b::VecOrMat{T}, J::I
194230

195231
temp1 = B.temp1
196232
temp2 = B.temp2
233+
temp3 = B.temp3
197234
fill!(temp1, zero(T))
198235
fill!(temp2, zero(T))
199236

@@ -217,7 +254,7 @@ function A_mul_B_col_J!{T}(u::VecOrMat{T}, B::Butterfly{T}, b::VecOrMat{T}, J::I
217254
for i = 1:ii
218255
shft = 2jj*div(ctr,2jj)
219256
for j = 1:jj
220-
A_mul_B!(temp2, factors[j+ctr], permutations[j+ctr], temp1, indsout[j+ctr], indsin[2j+shft-1])
257+
A_mul_B!(temp2, factors[j+ctr], permutations[j+ctr], temp1, temp3, indsout[j+ctr], indsin[2j+shft-1])
221258
end
222259
ctr += jj
223260
end
@@ -252,6 +289,7 @@ for f! in (:At_mul_B!,:Ac_mul_B!)
252289
temp1 = B.temp1
253290
temp2 = B.temp2
254291
temp3 = B.temp3
292+
temp4 = B.temp4
255293
fill!(temp1, zero(T))
256294
fill!(temp2, zero(T))
257295
fill!(temp3, zero(T))
@@ -274,8 +312,9 @@ for f! in (:At_mul_B!,:Ac_mul_B!)
274312
ctr = 0
275313
for i = 1:ii
276314
shft = 2jj*div(ctr,2jj)
315+
fill!(temp4, zero(T))
277316
for j = 1:jj
278-
$f!(temp3, factors[j+ctr], permutations[j+ctr], temp1, indsout[2j+shft-1], indsin[j+ctr])
317+
$f!(temp3, factors[j+ctr], permutations[j+ctr], temp1, temp4, indsout[2j+shft-1], indsin[j+ctr])
279318
addtemp3totemp2!(temp2, temp3, indsout[2j+shft-1], indsout[2j+shft+1]-1)
280319
end
281320
ctr += jj

0 commit comments

Comments
 (0)