Skip to content

Commit c0e4a54

Browse files
Fix even column SHT segfault
Issue #16, thanks @meggart.
1 parent e857158 commit c0e4a54

File tree

6 files changed

+161
-71
lines changed

6 files changed

+161
-71
lines changed

src/SphericalHarmonics/fastplan.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,17 @@ function Base.A_mul_B!(Y::Matrix, FP::FastSphericalHarmonicPlan, X::Matrix)
4343
copy!(B, 1, X, 1, 3M)
4444
for J = 2:N÷2
4545
A_mul_B_col_J!(B, BF[J-1], X, 2J)
46-
A_mul_B_col_J!(B, BF[J-1], X, 2J+1)
46+
2J < N && A_mul_B_col_J!(B, BF[J-1], X, 2J+1)
4747
end
4848

4949
A_mul_B_col_J!!(Y, p1, B, 1)
5050
for J = 2:4:N
5151
A_mul_B_col_J!!(Y, p2, B, J)
52-
A_mul_B_col_J!!(Y, p2, B, J+1)
52+
J < N && A_mul_B_col_J!!(Y, p2, B, J+1)
5353
end
5454
for J = 4:4:N
5555
A_mul_B_col_J!!(Y, p1, B, J)
56-
A_mul_B_col_J!!(Y, p1, B, J+1)
56+
J < N && A_mul_B_col_J!!(Y, p1, B, J+1)
5757
end
5858
Y
5959
end
@@ -65,17 +65,17 @@ function Base.At_mul_B!(Y::Matrix, FP::FastSphericalHarmonicPlan, X::Matrix)
6565
A_mul_B_col_J!!(Y, p1inv, B, 1)
6666
for J = 2:4:N
6767
A_mul_B_col_J!!(Y, p2inv, B, J)
68-
A_mul_B_col_J!!(Y, p2inv, B, J+1)
68+
J < N && A_mul_B_col_J!!(Y, p2inv, B, J+1)
6969
end
7070
for J = 4:4:N
7171
A_mul_B_col_J!!(Y, p1inv, B, J)
72-
A_mul_B_col_J!!(Y, p1inv, B, J+1)
72+
J < N && A_mul_B_col_J!!(Y, p1inv, B, J+1)
7373
end
7474

7575
copy!(B, Y)
7676
for J = 2:N÷2
7777
At_mul_B_col_J!(Y, BF[J-1], B, 2J)
78-
At_mul_B_col_J!(Y, BF[J-1], B, 2J+1)
78+
2J < N && At_mul_B_col_J!(Y, BF[J-1], B, 2J+1)
7979
end
8080
sph_zero_spurious_modes!(Y)
8181
end

src/SphericalHarmonics/slowplan.jl

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,36 @@ function Base.A_mul_B!(P::RotationPlan, A::AbstractMatrix)
103103
N, M = size(A)
104104
snm = P.snm
105105
cnm = P.cnm
106-
@stepthreads for m = M÷2:-1:2
106+
if isodd(M)
107+
m = M÷2
108+
@inbounds for j = m:-2:2
109+
for l = N-j:-1:1
110+
s = snm[l+(j-2)*(2*N+3-j)÷2]
111+
c = cnm[l+(j-2)*(2*N+3-j)÷2]
112+
a1 = A[l+N*(2*m-1)]
113+
a2 = A[l+2+N*(2*m-1)]
114+
a3 = A[l+N*(2*m)]
115+
a4 = A[l+2+N*(2*m)]
116+
A[l+N*(2*m-1)] = c*a1 + s*a2
117+
A[l+2+N*(2*m-1)] = c*a2 - s*a1
118+
A[l+N*(2*m)] = c*a3 + s*a4
119+
A[l+2+N*(2*m)] = c*a4 - s*a3
120+
end
121+
end
122+
else
123+
m = M÷2
124+
@inbounds for j = m:-2:2
125+
for l = N-j:-1:1
126+
s = snm[l+(j-2)*(2*N+3-j)÷2]
127+
c = cnm[l+(j-2)*(2*N+3-j)÷2]
128+
a1 = A[l+N*(2*m-1)]
129+
a2 = A[l+2+N*(2*m-1)]
130+
A[l+N*(2*m-1)] = c*a1 + s*a2
131+
A[l+2+N*(2*m-1)] = c*a2 - s*a1
132+
end
133+
end
134+
end
135+
@stepthreads for m = M÷2-1:-1:2
107136
@inbounds for j = m:-2:2
108137
for l = N-j:-1:1
109138
s = snm[l+(j-2)*(2*N+3-j)÷2]
@@ -126,7 +155,36 @@ function Base.At_mul_B!(P::RotationPlan, A::AbstractMatrix)
126155
N, M = size(A)
127156
snm = P.snm
128157
cnm = P.cnm
129-
@stepthreads for m = M÷2:-1:2
158+
if isodd(M)
159+
m = M÷2
160+
@inbounds for j = reverse(m:-2:2)
161+
for l = 1:N-j
162+
s = snm[l+(j-2)*(2*N+3-j)÷2]
163+
c = cnm[l+(j-2)*(2*N+3-j)÷2]
164+
a1 = A[l+N*(2*m-1)]
165+
a2 = A[l+2+N*(2*m-1)]
166+
a3 = A[l+N*(2*m)]
167+
a4 = A[l+2+N*(2*m)]
168+
A[l+N*(2*m-1)] = c*a1 - s*a2
169+
A[l+2+N*(2*m-1)] = c*a2 + s*a1
170+
A[l+N*(2*m)] = c*a3 - s*a4
171+
A[l+2+N*(2*m)] = c*a4 + s*a3
172+
end
173+
end
174+
else
175+
m = M÷2
176+
@inbounds for j = reverse(m:-2:2)
177+
for l = 1:N-j
178+
s = snm[l+(j-2)*(2*N+3-j)÷2]
179+
c = cnm[l+(j-2)*(2*N+3-j)÷2]
180+
a1 = A[l+N*(2*m-1)]
181+
a2 = A[l+2+N*(2*m-1)]
182+
A[l+N*(2*m-1)] = c*a1 - s*a2
183+
A[l+2+N*(2*m-1)] = c*a2 + s*a1
184+
end
185+
end
186+
end
187+
@stepthreads for m = M÷2-1:-1:2
130188
@inbounds for j = reverse(m:-2:2)
131189
for l = 1:N-j
132190
s = snm[l+(j-2)*(2*N+3-j)÷2]
@@ -216,11 +274,11 @@ function Base.A_mul_B!(Y::Matrix, SP::SlowSphericalHarmonicPlan, X::Matrix)
216274
A_mul_B_col_J!!(Y, p1, B, 1)
217275
for J = 2:4:N
218276
A_mul_B_col_J!!(Y, p2, B, J)
219-
A_mul_B_col_J!!(Y, p2, B, J+1)
277+
J < N && A_mul_B_col_J!!(Y, p2, B, J+1)
220278
end
221279
for J = 4:4:N
222280
A_mul_B_col_J!!(Y, p1, B, J)
223-
A_mul_B_col_J!!(Y, p1, B, J+1)
281+
J < N && A_mul_B_col_J!!(Y, p1, B, J+1)
224282
end
225283
Y
226284
end
@@ -232,11 +290,11 @@ function Base.At_mul_B!(Y::Matrix, SP::SlowSphericalHarmonicPlan, X::Matrix)
232290
A_mul_B_col_J!!(Y, p1inv, B, 1)
233291
for J = 2:4:N
234292
A_mul_B_col_J!!(Y, p2inv, B, J)
235-
A_mul_B_col_J!!(Y, p2inv, B, J+1)
293+
J < N && A_mul_B_col_J!!(Y, p2inv, B, J+1)
236294
end
237295
for J = 4:4:N
238296
A_mul_B_col_J!!(Y, p1inv, B, J)
239-
A_mul_B_col_J!!(Y, p1inv, B, J+1)
297+
J < N && A_mul_B_col_J!!(Y, p1inv, B, J+1)
240298
end
241299
sph_zero_spurious_modes!(At_mul_B!(RP, Y))
242300
end

src/SphericalHarmonics/sphfunctions.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
function sph_zero_spurious_modes!(A::AbstractMatrix)
22
M, N = size(A)
33
n = N÷2
4-
@inbounds for j = 1:n
4+
@inbounds for j = 1:n-1
55
@simd for i = M-j+1:M
66
A[i,2j] = 0
77
A[i,2j+1] = 0
88
end
99
end
10+
@inbounds @simd for i = M-n+1:M
11+
A[i,2n] = 0
12+
2n < N && (A[i,2n+1] = 0)
13+
end
1014
A
1115
end
1216

src/SphericalHarmonics/synthesisanalysis.jl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,15 @@ function Base.A_mul_B!(Y::Matrix{T}, P::SynthesisPlan{T}, X::Matrix{T}) where T
4545

4646
for J = 2:4:N
4747
A_mul_B_col_J!(Y, PCo, X, J)
48-
A_mul_B_col_J!(Y, PCo, X, J+1)
48+
J < N && A_mul_B_col_J!(Y, PCo, X, J+1)
4949
end
5050
for J = 4:4:N
5151
X[1,J] *= two(T)
52-
X[1,J+1] *= two(T)
52+
J < N && (X[1,J+1] *= two(T))
5353
A_mul_B_col_J!(Y, PCe, X, J)
54-
A_mul_B_col_J!(Y, PCe, X, J+1)
54+
J < N && A_mul_B_col_J!(Y, PCe, X, J+1)
5555
X[1,J] *= half(T)
56-
X[1,J+1] *= half(T)
56+
J < N && (X[1,J+1] *= half(T))
5757
end
5858
scale!(half(T), Y)
5959

@@ -94,13 +94,13 @@ function Base.A_mul_B!(Y::Matrix{T}, P::AnalysisPlan{T}, X::Matrix{T}) where T
9494
Y[1] *= half(T)
9595
for J = 2:4:N
9696
A_mul_B_col_J!(Y, PCo, Y, J)
97-
A_mul_B_col_J!(Y, PCo, Y, J+1)
97+
J < N && A_mul_B_col_J!(Y, PCo, Y, J+1)
9898
end
9999
for J = 4:4:N
100100
A_mul_B_col_J!(Y, PCe, Y, J)
101-
A_mul_B_col_J!(Y, PCe, Y, J+1)
101+
J < N && A_mul_B_col_J!(Y, PCe, Y, J+1)
102102
Y[1,J] *= half(T)
103-
Y[1,J+1] *= half(T)
103+
J < N && (Y[1,J+1] *= half(T))
104104
end
105105
scale!(sqrt(π)*inv(T(M)), Y)
106106
sqrttwo = sqrt(2)

src/SphericalHarmonics/thinplan.jl

Lines changed: 51 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
const LAYERSKELETON = 64
22

3-
checklayer(j::Int) = j÷LAYERSKELETON == j/LAYERSKELETON
3+
checklayer(J::Int) = J÷LAYERSKELETON == J/LAYERSKELETON
44

55
struct ThinSphericalHarmonicPlan{T} <: SphericalHarmonicPlan{T}
66
RP::RotationPlan{T}
@@ -25,14 +25,14 @@ function ThinSphericalHarmonicPlan(A::Matrix{T}, L::Int; opts...) where T
2525
Co = eye(T, M)
2626
BF = Vector{Butterfly{T}}(n-2)
2727
P = Progress(n-2, 0.1, "Pre-computing...", 43)
28-
for j = 1:2:n-2
29-
A_mul_B!(Ce, RP.layers[j])
30-
checklayer(j+1) && (BF[j] = Butterfly(Ce, L; isorthogonal = true, opts...))
28+
for J = 1:2:n-2
29+
A_mul_B!(Ce, RP.layers[J])
30+
checklayer(J+1) && (BF[J] = Butterfly(Ce, L; isorthogonal = true, opts...))
3131
next!(P)
3232
end
33-
for j = 2:2:n-2
34-
A_mul_B!(Co, RP.layers[j])
35-
checklayer(j) && (BF[j] = Butterfly(Co, L; isorthogonal = true, opts...))
33+
for J = 2:2:n-2
34+
A_mul_B!(Co, RP.layers[J])
35+
checklayer(J) && (BF[J] = Butterfly(Co, L; isorthogonal = true, opts...))
3636
next!(P)
3737
end
3838
ThinSphericalHarmonicPlan(RP, BF, p1, p2, p1inv, p2inv, B)
@@ -45,36 +45,36 @@ function Base.A_mul_B!(Y::Matrix, TP::ThinSphericalHarmonicPlan, X::Matrix)
4545
copy!(B, X)
4646
M, N = size(X)
4747

48-
for j = 3:2:N÷2
49-
if checklayer(j-1)
50-
A_mul_B_col_J!(Y, BF[j-1], B, 2j)
51-
A_mul_B_col_J!(Y, BF[j-1], B, 2j+1)
48+
for J = 3:2:N÷2
49+
if checklayer(J-1)
50+
A_mul_B_col_J!(Y, BF[J-1], B, 2J)
51+
2J < N && A_mul_B_col_J!(Y, BF[J-1], B, 2J+1)
5252
else
53-
= round(Int, (j-1)÷LAYERSKELETON)*LAYERSKELETON
54-
A_mul_B_col_J!(RP, B, 2j, ℓ+1, j-1)
55-
A_mul_B_col_J!(RP, B, 2j+1, ℓ+1, j-1)
53+
= round(Int, (J-1)÷LAYERSKELETON)*LAYERSKELETON
54+
A_mul_B_col_J!(RP, B, 2J, ℓ+1, J-1)
55+
2J < N && A_mul_B_col_J!(RP, B, 2J+1, ℓ+1, J-1)
5656
if> LAYERSKELETON-2
57-
A_mul_B_col_J!(Y, BF[ℓ], B, 2j)
58-
A_mul_B_col_J!(Y, BF[ℓ], B, 2j+1)
57+
A_mul_B_col_J!(Y, BF[ℓ], B, 2J)
58+
2J < N && A_mul_B_col_J!(Y, BF[ℓ], B, 2J+1)
5959
else
60-
copy!(Y, 1+M*(2j-1), B, 1+M*(2j-1), 2M)
60+
copy!(Y, 1+M*(2J-1), B, 1+M*(2J-1), 2M)
6161
end
6262
end
6363
end
6464

65-
for j = 2:2:N÷2
66-
if checklayer(j)
67-
A_mul_B_col_J!(Y, BF[j-1], B, 2j)
68-
A_mul_B_col_J!(Y, BF[j-1], B, 2j+1)
65+
for J = 2:2:N÷2
66+
if checklayer(J)
67+
A_mul_B_col_J!(Y, BF[J-1], B, 2J)
68+
2J < N && A_mul_B_col_J!(Y, BF[J-1], B, 2J+1)
6969
else
70-
= round(Int, j÷LAYERSKELETON)*LAYERSKELETON
71-
A_mul_B_col_J!(RP, B, 2j, ℓ, j-1)
72-
A_mul_B_col_J!(RP, B, 2j+1, ℓ, j-1)
70+
= round(Int, J÷LAYERSKELETON)*LAYERSKELETON
71+
A_mul_B_col_J!(RP, B, 2J, ℓ, J-1)
72+
2J < N && A_mul_B_col_J!(RP, B, 2J+1, ℓ, J-1)
7373
if> LAYERSKELETON-2
74-
A_mul_B_col_J!(Y, BF[ℓ-1], B, 2j)
75-
A_mul_B_col_J!(Y, BF[ℓ-1], B, 2j+1)
74+
A_mul_B_col_J!(Y, BF[ℓ-1], B, 2J)
75+
2J < N && A_mul_B_col_J!(Y, BF[ℓ-1], B, 2J+1)
7676
else
77-
copy!(Y, 1+M*(2j-1), B, 1+M*(2j-1), 2M)
77+
copy!(Y, 1+M*(2J-1), B, 1+M*(2J-1), 2M)
7878
end
7979
end
8080
end
@@ -86,11 +86,11 @@ function Base.A_mul_B!(Y::Matrix, TP::ThinSphericalHarmonicPlan, X::Matrix)
8686
A_mul_B_col_J!!(Y, p1, B, 1)
8787
for J = 2:4:N
8888
A_mul_B_col_J!!(Y, p2, B, J)
89-
A_mul_B_col_J!!(Y, p2, B, J+1)
89+
J < N && A_mul_B_col_J!!(Y, p2, B, J+1)
9090
end
9191
for J = 4:4:N
9292
A_mul_B_col_J!!(Y, p1, B, J)
93-
A_mul_B_col_J!!(Y, p1, B, J+1)
93+
J < N && A_mul_B_col_J!!(Y, p1, B, J+1)
9494
end
9595
Y
9696
end
@@ -103,48 +103,48 @@ function Base.At_mul_B!(Y::Matrix, TP::ThinSphericalHarmonicPlan, X::Matrix)
103103
A_mul_B_col_J!!(Y, p1inv, B, 1)
104104
for J = 2:4:N
105105
A_mul_B_col_J!!(Y, p2inv, B, J)
106-
A_mul_B_col_J!!(Y, p2inv, B, J+1)
106+
J < N && A_mul_B_col_J!!(Y, p2inv, B, J+1)
107107
end
108108
for J = 4:4:N
109109
A_mul_B_col_J!!(Y, p1inv, B, J)
110-
A_mul_B_col_J!!(Y, p1inv, B, J+1)
110+
J < N && A_mul_B_col_J!!(Y, p1inv, B, J+1)
111111
end
112112

113113
copy!(B, Y)
114114
fill!(Y, zero(eltype(Y)))
115115
copy!(Y, 1, B, 1, 3M)
116116

117-
for j = 3:2:N÷2
118-
if checklayer(j-1)
119-
At_mul_B_col_J!(Y, BF[j-1], B, 2j)
120-
At_mul_B_col_J!(Y, BF[j-1], B, 2j+1)
117+
for J = 3:2:N÷2
118+
if checklayer(J-1)
119+
At_mul_B_col_J!(Y, BF[J-1], B, 2J)
120+
2J < N && At_mul_B_col_J!(Y, BF[J-1], B, 2J+1)
121121
else
122-
= round(Int, (j-1)÷LAYERSKELETON)*LAYERSKELETON
122+
= round(Int, (J-1)÷LAYERSKELETON)*LAYERSKELETON
123123
if> LAYERSKELETON-2
124-
At_mul_B_col_J!(Y, BF[ℓ], B, 2j)
125-
At_mul_B_col_J!(Y, BF[ℓ], B, 2j+1)
124+
At_mul_B_col_J!(Y, BF[ℓ], B, 2J)
125+
2J < N && At_mul_B_col_J!(Y, BF[ℓ], B, 2J+1)
126126
else
127-
copy!(Y, 1+M*(2j-1), B, 1+M*(2j-1), 2M)
127+
copy!(Y, 1+M*(2J-1), B, 1+M*(2J-1), 2M)
128128
end
129-
At_mul_B_col_J!(RP, Y, 2j, ℓ+1, j-1)
130-
At_mul_B_col_J!(RP, Y, 2j+1, ℓ+1, j-1)
129+
At_mul_B_col_J!(RP, Y, 2J, ℓ+1, J-1)
130+
2J < N && At_mul_B_col_J!(RP, Y, 2J+1, ℓ+1, J-1)
131131
end
132132
end
133133

134-
for j = 2:2:N÷2
135-
if checklayer(j)
136-
At_mul_B_col_J!(Y, BF[j-1], B, 2j)
137-
At_mul_B_col_J!(Y, BF[j-1], B, 2j+1)
134+
for J = 2:2:N÷2
135+
if checklayer(J)
136+
At_mul_B_col_J!(Y, BF[J-1], B, 2J)
137+
2J < N && At_mul_B_col_J!(Y, BF[J-1], B, 2J+1)
138138
else
139-
= round(Int, j÷LAYERSKELETON)*LAYERSKELETON
139+
= round(Int, J÷LAYERSKELETON)*LAYERSKELETON
140140
if> LAYERSKELETON-2
141-
At_mul_B_col_J!(Y, BF[ℓ-1], B, 2j)
142-
At_mul_B_col_J!(Y, BF[ℓ-1], B, 2j+1)
141+
At_mul_B_col_J!(Y, BF[ℓ-1], B, 2J)
142+
2J < N && At_mul_B_col_J!(Y, BF[ℓ-1], B, 2J+1)
143143
else
144-
copy!(Y, 1+M*(2j-1), B, 1+M*(2j-1), 2M)
144+
copy!(Y, 1+M*(2J-1), B, 1+M*(2J-1), 2M)
145145
end
146-
At_mul_B_col_J!(RP, Y, 2j, ℓ, j-1)
147-
At_mul_B_col_J!(RP, Y, 2j+1, ℓ, j-1)
146+
At_mul_B_col_J!(RP, Y, 2J, ℓ, J-1)
147+
2J < N && At_mul_B_col_J!(RP, Y, 2J+1, ℓ, J-1)
148148
end
149149
end
150150

0 commit comments

Comments
 (0)