@@ -151,43 +151,56 @@ See https://github.com/JuliaLinearAlgebra/BandedMatrices.jl/blob/master/LICENSE
151
151
end
152
152
153
153
_view (:: Any , A, b) = view (A, b)
154
- _view (:: Val{true} , A:: BandedMatrix , b) = dataview (view (A, b))
154
+ function _view (:: Val{true} , A:: BandedMatrix , b:: Band )
155
+ l, u = bandwidths (A)
156
+ - l <= b. i <= u || throw (ArgumentError (" invalid band $b for bandwidths $((- l,u)) " ))
157
+ dataview (view (A, b))
158
+ end
155
159
156
- function _get_bands (B, C, bmk, f, ValBC )
160
+ function _get_bands (B, C, bmk, f, valB )
157
161
Cbmk = _view (Val (true ), C, band (bmk* f))
158
162
Bm = _view (Val (true ), B, band (flipsign (bmk- 1 , f)))
159
163
B0 = _view (Val (true ), B, band (flipsign (bmk, f)))
160
- Bp = _view (ValBC , B, band (flipsign (bmk+ 1 , f)))
164
+ Bp = _view (valB , B, band (flipsign (bmk+ 1 , f)))
161
165
Cbmk, Bm, B0, Bp
162
166
end
163
167
164
- function _jac_gbmm! (α, J, B, β, C, b, (Cn, Cm), n, ValJ, ValBC)
165
- Jp = _view (ValJ, J, band (1 ))
166
- J0 = _view (ValJ, J, band (0 ))
167
- Jm = _view (ValJ, J, band (- 1 ))
168
+ # Fast implementation of C[:,:] = α*J*B+β*C where the bandediwth of B is
169
+ # specified by b, not by the parameters in B
170
+ function jac_gbmm! (α, J, B, β, C, b, valB)
171
+ if β ≠ 1
172
+ lmul! (β,C)
173
+ end
174
+
175
+ n = size (J,1 )
176
+ Cn, Cm = size (C)
177
+
178
+ Jp = _view (Val (true ), J, band (1 ))
179
+ J0 = _view (Val (true ), J, band (0 ))
180
+ Jm = _view (Val (true ), J, band (- 1 ))
168
181
169
182
kr = intersect (- 1 : b- 1 , b- Cm+ 1 : b- 1 + Cn)
170
183
171
184
# unwrap the loops to forward indexing to the data wherever applicable
172
185
# this might also help with cache localization
173
186
k = - 1
174
187
if k in kr
175
- Cbmk, Bm, B0, Bp = _get_bands (B, C, b- k, 1 , ValBC )
188
+ Cbmk, Bm, B0, Bp = _get_bands (B, C, b- k, 1 , valB )
176
189
for i in 1 : n- b+ k
177
190
Cbmk[i] += α * Bm[i+ 1 ] * Jp[i]
178
191
end
179
192
end
180
193
181
194
k = 0
182
195
if k in kr
183
- Cbmk, Bm, B0, Bp = _get_bands (B, C, b- k, 1 , Val ( true ) )
196
+ Cbmk, Bm, B0, Bp = _get_bands (B, C, b- k, 1 , valB )
184
197
for i in 1 : n- b+ k
185
198
Cbmk[i] += α * (Bm[i+ 1 ] * Jp[i] + B0[i] * J0[i])
186
199
end
187
200
end
188
201
189
202
for k in max (1 , first (kr)): last (kr)
190
- Cbmk, Bm, B0, Bp = _get_bands (B, C, b- k, 1 , Val ( true ) )
203
+ Cbmk, Bm, B0, Bp = _get_bands (B, C, b- k, 1 , valB )
191
204
Cbmk[1 ] += α * (Bm[2 ] * Jp[1 ] + B0[1 ] * J0[1 ])
192
205
for i in 2 : n- b+ k
193
206
Cbmk[i] += α * (Bm[i+ 1 ] * Jp[i] + B0[i] * J0[i] + Bp[i- 1 ] * Jm[i- 1 ])
@@ -198,15 +211,15 @@ function _jac_gbmm!(α, J, B, β, C, b, (Cn, Cm), n, ValJ, ValBC)
198
211
199
212
k = - 1
200
213
if k in kr
201
- Ckmb, Bp, B0, Bm = _get_bands (B, C, b- k, - 1 , ValBC )
214
+ Ckmb, Bp, B0, Bm = _get_bands (B, C, b- k, - 1 , valB )
202
215
for (i, Ji) in enumerate (b- k: n- 1 )
203
216
Ckmb[i] += α * Bp[i] * Jm[Ji]
204
217
end
205
218
end
206
219
207
220
k = 0
208
221
if k in kr
209
- Ckmb, Bp, B0, Bm = _get_bands (B, C, b- k, - 1 , Val ( true ) )
222
+ Ckmb, Bp, B0, Bm = _get_bands (B, C, b- k, - 1 , valB )
210
223
Ckmb[1 ] += α * Bp[1 ] * Jm[b- k]
211
224
for (i, Ji) in enumerate (b- k+ 1 : n- 1 )
212
225
Ckmb[i] += α * B0[i] * J0[Ji]
@@ -238,21 +251,6 @@ function _jac_gbmm!(α, J, B, β, C, b, (Cn, Cm), n, ValJ, ValBC)
238
251
return C
239
252
end
240
253
241
- # Fast implementation of C[:,:] = α*J*B+β*C where the bandediwth of B is
242
- # specified by b, not by the parameters in B
243
- function jac_gbmm! (α, J, B, β, C, b, valJ, valBC)
244
- if β ≠ 1
245
- lmul! (β,C)
246
- end
247
-
248
- n = size (J,1 )
249
- Cn, Cm = size (C)
250
-
251
- _jac_gbmm! (α, J, B, β, C, b, (Cn, Cm), n, valJ, valBC)
252
-
253
- C
254
- end
255
-
256
254
function BandedMatrix (S:: SubOperator {T,ConcreteMultiplication{C,PS,T},
257
255
NTuple{2 ,UnitRange{Int}}}) where {PS<: PolynomialSpace ,T,C<: PolynomialSpace }
258
256
M= parent (S)
@@ -285,31 +283,31 @@ function BandedMatrix(S::SubOperator{T,ConcreteMultiplication{C,PS,T},
285
283
286
284
# Multiplication is transpose
287
285
J= Operator {T} (Recurrence (M. space))[jkr,jkr]
288
- valJ = all (>= (1 ), bandwidths (J)) ? Val (true ) : Val (false )
289
286
290
287
B= n- 1 # final bandwidth
291
288
292
289
# Clenshaw for operators
293
290
Bk2 = BandedMatrix (Zeros {T} (size (J)), (B,B))
294
291
dataview (view (Bk2, band (0 ))) .= a[n]/ recβ (T,sp,n- 1 )
295
292
α,β = recα (T,sp,n- 1 ),recβ (T,sp,n- 2 )
296
- Bk1 = (- α/ β) * Bk2
293
+ Bk1 = lmul! (- α/ β, copy ( Bk2))
297
294
dataview (view (Bk1, band (0 ))) .+ = a[n- 1 ]/ β
298
- jac_gbmm! (one (T)/ β,J,Bk2,one (T),Bk1,0 ,valJ, Val (true ))
295
+ jac_gbmm! (one (T)/ β,J,Bk2,one (T),Bk1,0 , Val (true ))
299
296
b= 1 # we keep track of bandwidths manually to reuse memory
300
297
for k= n- 2 : - 1 : 2
298
+ # b goes from 1:
301
299
α,β,γ= recα (T,sp,k),recβ (T,sp,k- 1 ),recγ (T,sp,k+ 1 )
302
300
lmul! (- γ/ β,Bk2)
303
301
dataview (view (Bk2, band (0 ))) .+ = a[k]/ β
304
- jac_gbmm! (1 / β,J,Bk1,one (T),Bk2,b,valJ, Val (true ))
302
+ jac_gbmm! (1 / β,J,Bk1,one (T),Bk2,b,Val (true ))
305
303
LinearAlgebra. axpy! (- α/ β,Bk1,Bk2)
306
304
Bk2,Bk1= Bk1,Bk2
307
305
b+= 1
308
306
end
309
307
α,γ= recα (T,sp,1 ),recγ (T,sp,2 )
310
308
lmul! (- γ,Bk2)
311
309
dataview (view (Bk2, band (0 ))) .+ = a[1 ]
312
- jac_gbmm! (one (T),J,Bk1,one (T),Bk2,b,valJ, Val (false ))
310
+ jac_gbmm! (one (T),J,Bk1,one (T),Bk2,b,Val (false ))
313
311
LinearAlgebra. axpy! (- α,Bk1,Bk2)
314
312
315
313
# relationship between jkr and kr, jr
0 commit comments