@@ -6,6 +6,7 @@ immutable Butterfly{T} <: Factorization{T}
6
6
temp1:: Vector{T}
7
7
temp2:: Vector{T}
8
8
temp3:: Vector{T}
9
+ temp4:: Vector{T}
9
10
end
10
11
11
12
function size (B:: Butterfly , dim:: Integer )
@@ -105,7 +106,7 @@ function Butterfly{T}(A::AbstractMatrix{T}, L::Int; isorthogonal::Bool = false,
105
106
106
107
kk = sumkmax (indices)
107
108
108
- Butterfly (columns, factors, permutations, indices, zeros (T, kk), zeros (T, kk), zeros (T, kk))
109
+ Butterfly (columns, factors, permutations, indices, zeros (T, kk), zeros (T, kk), zeros (T, kk), zeros (T, kk) )
109
110
end
110
111
111
112
function sumkmax (indices:: Vector{Vector{Int}} )
@@ -150,11 +151,30 @@ function rowperm!(fwd::Bool, x::StridedVecOrMat, p::Vector{Int}, jstart::Int)
150
151
x
151
152
end
152
153
154
+ function rowperm! (fwd:: Bool , y:: StridedVector , x:: StridedVector , p:: Vector{Int} , jstart:: Int )
155
+ n = length (p)
156
+ jshift = jstart- 1
157
+ @inbounds if (fwd)
158
+ @simd for i = 1 : n
159
+ y[jshift+ i] = x[jshift+ p[i]]
160
+ end
161
+ else
162
+ @simd for i = 1 : n
163
+ y[jshift+ p[i]] = x[jshift+ i]
164
+ end
165
+ end
166
+ y
167
+ end
168
+
153
169
# # ColumnPermutation
154
170
A_mul_B! (A:: ColPerm , B:: StridedVecOrMat , jstart:: Int ) = rowperm! (false , B, A. p, jstart)
155
171
At_mul_B! (A:: ColPerm , B:: StridedVecOrMat , jstart:: Int ) = rowperm! (true , B, A. p, jstart)
156
172
Ac_mul_B! (A:: ColPerm , B:: StridedVecOrMat , jstart:: Int ) = At_mul_B! (A, B, jstart)
157
173
174
+ A_mul_B! (y:: StridedVector , A:: ColPerm , x:: StridedVector , jstart:: Int ) = rowperm! (false , y, x, A. p, jstart)
175
+ At_mul_B! (y:: StridedVector , A:: ColPerm , x:: StridedVector , jstart:: Int ) = rowperm! (true , y, x, A. p, jstart)
176
+ Ac_mul_B! (y:: StridedVector , A:: ColPerm , x:: StridedVector , jstart:: Int ) = At_mul_B! (y, x, A, jstart)
177
+
158
178
# Fast A_mul_B!, At_mul_B!, and Ac_mul_B! for an ID. These overwrite the output.
159
179
160
180
function A_mul_B! {T} (y:: AbstractVecOrMat{T} , A:: IDPackedV{T} , P:: ColumnPermutation , x:: AbstractVecOrMat{T} , istart:: Int , jstart:: Int )
@@ -166,6 +186,14 @@ function A_mul_B!{T}(y::AbstractVecOrMat{T}, A::IDPackedV{T}, P::ColumnPermutati
166
186
y
167
187
end
168
188
189
+ function A_mul_B! {T} (y:: AbstractVector{T} , A:: IDPackedV{T} , P:: ColumnPermutation , x:: AbstractVector{T} , temp:: AbstractVector{T} , istart:: Int , jstart:: Int )
190
+ k, n = size (A)
191
+ At_mul_B! (temp, P, x, jstart)
192
+ copy! (y, istart, temp, jstart, k)
193
+ A_mul_B! (y, A. T, temp, istart, jstart+ k)
194
+ y
195
+ end
196
+
169
197
for f! in (:At_mul_B! , :Ac_mul_B! )
170
198
@eval begin
171
199
function $f! {T} (y:: AbstractVecOrMat{T} , A:: IDPackedV{T} , P:: ColumnPermutation , x:: AbstractVecOrMat{T} , istart:: Int , jstart:: Int )
@@ -175,6 +203,14 @@ for f! in (:At_mul_B!, :Ac_mul_B!)
175
203
A_mul_B! (P, y, istart)
176
204
y
177
205
end
206
+
207
+ function $f! {T} (y:: AbstractVector{T} , A:: IDPackedV{T} , P:: ColumnPermutation , x:: AbstractVector{T} , temp:: AbstractVector{T} , istart:: Int , jstart:: Int )
208
+ k, n = size (A)
209
+ copy! (temp, istart, x, jstart, k)
210
+ $ f! (temp, A. T, x, istart+ k, jstart)
211
+ A_mul_B! (y, P, temp, istart)
212
+ y
213
+ end
178
214
end
179
215
end
180
216
@@ -194,6 +230,7 @@ function A_mul_B_col_J!{T}(u::VecOrMat{T}, B::Butterfly{T}, b::VecOrMat{T}, J::I
194
230
195
231
temp1 = B. temp1
196
232
temp2 = B. temp2
233
+ temp3 = B. temp3
197
234
fill! (temp1, zero (T))
198
235
fill! (temp2, zero (T))
199
236
@@ -217,7 +254,7 @@ function A_mul_B_col_J!{T}(u::VecOrMat{T}, B::Butterfly{T}, b::VecOrMat{T}, J::I
217
254
for i = 1 : ii
218
255
shft = 2 jj* div (ctr,2 jj)
219
256
for j = 1 : jj
220
- A_mul_B! (temp2, factors[j+ ctr], permutations[j+ ctr], temp1, indsout[j+ ctr], indsin[2 j+ shft- 1 ])
257
+ A_mul_B! (temp2, factors[j+ ctr], permutations[j+ ctr], temp1, temp3, indsout[j+ ctr], indsin[2 j+ shft- 1 ])
221
258
end
222
259
ctr += jj
223
260
end
@@ -252,6 +289,7 @@ for f! in (:At_mul_B!,:Ac_mul_B!)
252
289
temp1 = B. temp1
253
290
temp2 = B. temp2
254
291
temp3 = B. temp3
292
+ temp4 = B. temp4
255
293
fill! (temp1, zero (T))
256
294
fill! (temp2, zero (T))
257
295
fill! (temp3, zero (T))
@@ -274,8 +312,9 @@ for f! in (:At_mul_B!,:Ac_mul_B!)
274
312
ctr = 0
275
313
for i = 1 : ii
276
314
shft = 2 jj* div (ctr,2 jj)
315
+ fill! (temp4, zero (T))
277
316
for j = 1 : jj
278
- $ f! (temp3, factors[j+ ctr], permutations[j+ ctr], temp1, indsout[2 j+ shft- 1 ], indsin[j+ ctr])
317
+ $ f! (temp3, factors[j+ ctr], permutations[j+ ctr], temp1, temp4, indsout[2 j+ shft- 1 ], indsin[j+ ctr])
279
318
addtemp3totemp2! (temp2, temp3, indsout[2 j+ shft- 1 ], indsout[2 j+ shft+ 1 ]- 1 )
280
319
end
281
320
ctr += jj
0 commit comments