Skip to content

Commit 84d1c24

Browse files
committed
Block Jacobi (#69)
1 parent 325bbd3 commit 84d1c24

File tree

6 files changed

+143
-9
lines changed

6 files changed

+143
-9
lines changed

examples/degeus_mechanics/mech.i

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
ymax = ${fparse 2*pi}
88
zmax = ${fparse 2*pi}
99
mesh_mode = DUMMY
10-
device_names = cuda
10+
device_names = cpu
1111
[]
1212

1313
[TensorComputes]
@@ -72,7 +72,11 @@
7272
constitutive_model = hyper_elasticity
7373
stress = stress
7474
applied_macroscopic_strain = applied_strain
75-
hutchinson_steps = 6
75+
# hutchinson_steps = 64
76+
# jacobi_min_rel = 1e-2
77+
# jacobi_inv_cap = 1e4
78+
block_jacobi = true
79+
block_jacobi_damp=1e-1
7680
verbose = true
7781
[]
7882
[]

include/tensor_buffers/NEML2TensorBuffer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class NEML2TensorBuffer : public TensorBuffer<T>
2121

2222
NEML2TensorBuffer(const InputParameters & parameters);
2323

24-
virtual void init();
24+
virtual void init() override;
2525
virtual void makeCPUCopy() override;
2626

2727
using TensorBuffer<T>::_u;

include/tensor_computes/FFTMechanics.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,16 @@ class FFTMechanics : public TensorOperator<>
6767
/// steps for diagonal estimation
6868
const unsigned int _hutchinson_steps;
6969

70+
/// use block-Jacobi (local compliance) preconditioner
71+
const bool _block_jacobi;
72+
/// relative damping for block-Jacobi inversion
73+
const Real _block_jacobi_damp;
74+
75+
/// minimum relative floor for Jacobi diagonal (relative to median)
76+
const Real _jacobi_min_rel;
77+
/// cap on inverse diagonal scaling (0 disables)
78+
const Real _jacobi_inv_cap;
79+
7080
/// add diagnostic output for iterations
7181
const bool _verbose;
7282

include/utils/SwiftUtils.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,16 @@ torch::Tensor dot24(const torch::Tensor & A2, const torch::Tensor & B4);
4848
torch::Tensor dot42(const torch::Tensor & A4, const torch::Tensor & B2);
4949
torch::Tensor dyad22(const torch::Tensor & A2, const torch::Tensor & B2);
5050

51+
// Invert local 4th-order blocks (...., i, j, k, l) -> (...., i, j, k, l)
52+
// Treats each grid point's (i,j)-(k,l) matrix as a (d*d x d*d) block and inverts it in batch.
53+
// Returns a tensor with the same shape as the input, containing per-point block inverses.
54+
torch::Tensor invertLocalBlocks(const torch::Tensor & K4);
55+
56+
// Damped inversion of local 4th-order blocks with optional pinv fallback.
57+
// damp_rel scales an identity added to each (d*d x d*d) block by damp_rel * mean(|diag|).
58+
// If inversion fails, falls back to pseudo-inverse.
59+
torch::Tensor invertLocalBlocksDamped(const torch::Tensor & K4, double damp_rel = 1e-8);
60+
5161
torch::Tensor
5262
estimateJacobiPreconditioner(const std::function<torch::Tensor(const torch::Tensor &)> & A,
5363
const torch::Tensor & template_vec,

src/tensor_computes/FFTMechanics.C

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <ATen/core/TensorBody.h>
1515
#include <ATen/ops/unsqueeze_ops.h>
1616
#include <util/Optional.h>
17+
#include <numeric>
1718

1819
registerMooseObject("SwiftApp", FFTMechanics);
1920

@@ -45,6 +46,18 @@ FFTMechanics::validParams()
4546
0,
4647
"Steps for diagonal estimation with Hutchinson's method used in "
4748
"Jacobi preconditioning. 0 skips preconditioning.");
49+
params.addParam<bool>("block_jacobi",
50+
false,
51+
"Use block-Jacobi (local compliance) preconditioner instead of diagonal.");
52+
params.addParam<Real>("block_jacobi_damp",
53+
1e-8,
54+
"Relative damping added to local tangent blocks before inversion.");
55+
params.addParam<Real>("jacobi_min_rel",
56+
1e-3,
57+
"Minimum relative floor for stochastic Jacobi diagonal (relative to median).");
58+
params.addParam<Real>("jacobi_inv_cap",
59+
0.0,
60+
"Cap on inverse diagonal scaling; 0 disables clamping.");
4861
params.addParam<bool>("verbose", false, "Print non-linear residuals.");
4962
return params;
5063
}
@@ -74,6 +87,10 @@ FFTMechanics::FFTMechanics(const InputParameters & parameters)
7487
? &getInputBuffer("applied_macroscopic_strain")
7588
: nullptr),
7689
_hutchinson_steps(getParam<unsigned int>("hutchinson_steps")),
90+
_block_jacobi(getParam<bool>("block_jacobi")),
91+
_block_jacobi_damp(getParam<Real>("block_jacobi_damp")),
92+
_jacobi_min_rel(getParam<Real>("jacobi_min_rel")),
93+
_jacobi_inv_cap(getParam<Real>("jacobi_inv_cap")),
7794
_verbose(getParam<bool>("verbose"))
7895
{
7996
// Build projection tensor once
@@ -133,14 +150,58 @@ FFTMechanics::computeBuffer()
133150
// iterate as long as the iterative update does not vanish
134151
for (const auto iiter : make_range(_nl_max_its))
135152
{
136-
const auto diag_precond =
137-
_hutchinson_steps ? torch::abs(estimateJacobiPreconditioner(G_K_dF, b, _hutchinson_steps))
138-
: torch::ones_like(b);
139-
const auto M_inv = [&diag_precond](const torch::Tensor & x) { return x / diag_precond; };
153+
c10::optional<torch::Tensor> invK4;
154+
const auto diag_estimate =
155+
(!_block_jacobi && _hutchinson_steps)
156+
? torch::abs(estimateJacobiPreconditioner(G_K_dF, b, _hutchinson_steps))
157+
: torch::ones_like(b);
158+
auto inv_diag = torch::ones_like(b);
159+
if (!_block_jacobi && _hutchinson_steps)
160+
{
161+
// Robust floor relative to a nonzero scale to avoid huge inverse scaling
162+
auto mask = diag_estimate > 1e-9; // ignore near-zero estimates
163+
auto selected = at::masked_select(diag_estimate, mask);
164+
auto scale_t = selected.numel() > 0 ? selected.mean() : diag_estimate.mean();
165+
auto floor_t = scale_t * _jacobi_min_rel;
166+
auto diag_precond = torch::clamp(diag_estimate, floor_t, c10::nullopt);
167+
inv_diag = 1.0 / diag_precond;
168+
if (_jacobi_inv_cap > 0.0)
169+
{
170+
inv_diag = torch::clamp(inv_diag, 0.0, _jacobi_inv_cap);
171+
}
172+
}
173+
const auto M_inv = [&](const torch::Tensor & x)
174+
{
175+
if (_block_jacobi)
176+
{
177+
if (!invK4.has_value())
178+
invK4 = MooseTensor::invertLocalBlocksDamped(_tK4, _block_jacobi_damp);
179+
auto x2 = x.reshape(_r2_shape);
180+
auto z2raw = MooseTensor::trans2(MooseTensor::ddot42(*invK4, MooseTensor::trans2(x2)));
181+
// Enforce zero-mean (remove k=0 mode) without FFT cost
182+
std::vector<int64_t> reduce_dims(z2raw.dim() - 2);
183+
std::iota(reduce_dims.begin(), reduce_dims.end(), 0);
184+
auto mean2 = z2raw.mean(reduce_dims, /*keepdim=*/true);
185+
auto z2 = z2raw - mean2;
186+
return z2.reshape(-1);
187+
}
188+
else
189+
{
190+
auto x2 = x.reshape(_r2_shape);
191+
auto z2raw = x2 * inv_diag.reshape(_r2_shape);
192+
// Enforce zero-mean (remove k=0 mode) without FFT cost
193+
std::vector<int64_t> reduce_dims(z2raw.dim() - 2);
194+
std::iota(reduce_dims.begin(), reduce_dims.end(), 0);
195+
auto mean2 = z2raw.mean(reduce_dims, /*keepdim=*/true);
196+
auto z2 = z2raw - mean2;
197+
return z2.reshape(-1);
198+
}
199+
};
140200

141201
const auto [dFm_new, iterations, lnorm] =
142-
_hutchinson_steps ? conjugateGradientSolve(G_K_dF, b, dFm, _l_tol, _l_max_its, M_inv)
143-
: conjugateGradientSolve(G_K_dF, b, dFm, _l_tol, _l_max_its);
202+
(_block_jacobi || _hutchinson_steps)
203+
? conjugateGradientSolve(G_K_dF, b, dFm, _l_tol, _l_max_its, M_inv)
204+
: conjugateGradientSolve(G_K_dF, b, dFm, _l_tol, _l_max_its);
144205
dFm = dFm_new;
145206

146207
// update DOFs (array -> tens.grid)

src/utils/SwiftUtils.C

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
#include "SwiftApp.h"
1111
#include "MooseUtils.h"
1212
#include "Moose.h"
13+
// for batched linear algebra
14+
#include <ATen/ops/linalg_inv.h>
15+
#include <ATen/ops/linalg_pinv.h>
1316

1417
namespace MooseTensor
1518
{
@@ -182,6 +185,52 @@ dyad22(const torch::Tensor & A2, const torch::Tensor & B2)
182185
return torch::einsum("...ij ,...kl ->...ijkl", {A2, B2});
183186
}
184187

188+
// Invert local 4th-order blocks (batch of d*d x d*d matrices)
189+
torch::Tensor
190+
invertLocalBlocks(const torch::Tensor & K4)
191+
{
192+
// Expect shape: [..., d, d, d, d]
193+
const auto d = K4.size(-1);
194+
// Flatten last 4 dims to (d*d, d*d) with batch = prod(leading dims)
195+
auto K2 = K4.reshape({-1, d * d, d * d});
196+
// Batched inverse
197+
auto K2_inv = at::linalg_inv(K2);
198+
// Restore original shape
199+
return K2_inv.reshape(K4.sizes());
200+
}
201+
202+
torch::Tensor
203+
invertLocalBlocksDamped(const torch::Tensor & K4, double damp_rel)
204+
{
205+
// Flatten to (batch, n, n)
206+
const auto d = K4.size(-1);
207+
const auto n = d * d;
208+
auto K2 = K4.reshape({-1, n, n});
209+
210+
// Build batched identity
211+
auto I = torch::eye(n, K4.options()).unsqueeze(0).expand({K2.size(0), n, n});
212+
213+
// Scale damping by mean absolute diagonal across batch
214+
auto diag = K2.diagonal(0, -2, -1);
215+
double scale = diag.abs().mean().template item<double>();
216+
if (!(scale > 0.0))
217+
scale = 1.0;
218+
const double eps = damp_rel > 0.0 ? damp_rel * scale : 0.0;
219+
220+
auto K2_reg = eps > 0.0 ? (K2 + eps * I) : K2;
221+
222+
try
223+
{
224+
auto K2_inv = at::linalg_inv(K2_reg);
225+
return K2_inv.reshape(K4.sizes());
226+
}
227+
catch (const c10::Error &)
228+
{
229+
auto K2_pinv = at::linalg_pinv(K2_reg);
230+
return K2_pinv.reshape(K4.sizes());
231+
}
232+
}
233+
185234
// Diagonal estimation with Hutchinson's method
186235
torch::Tensor
187236
estimateJacobiPreconditioner(const std::function<torch::Tensor(const torch::Tensor &)> & A,

0 commit comments

Comments
 (0)