Skip to content

Commit 0a833c9

Browse files
committed
Optimize L-BFGS-B with efficient LU factorization
Replace explicit matrix inversion (M_) with cached LU factorization (MM_lu_) for more efficient triangular solves. This reduces numerical instability and improves computational performance when solving linear systems during optimization steps. Add point projection to handle infeasible initial points by clipping to bounds before gradient computation. Introduce conditional line search that skips when Cauchy point is already optimal (no free variables). Improve numerical stability by using fixed epsilon (1e-12) instead of machine epsilon and refactor curvature check with clearer naming (sTy). Update SubspaceMinimization to return both the solution and a flag indicating whether line search is needed, enabling early termination when the Cauchy point is optimal. Replace all M_ * v operations with SolveM(v) calls using the cached LU factorization.
1 parent 4443c93 commit 0a833c9

File tree

1 file changed

+67
-35
lines changed

1 file changed

+67
-35
lines changed

include/cppoptlib/solver/lbfgsb.h

Lines changed: 67 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ class Lbfgsb
9191
theta_ = 1.0;
9292

9393
W_ = dyn_MatrixType::Zero(dim_, 0);
94-
M_ = dyn_MatrixType::Zero(0, 0);
94+
// MM_lu_ will be initialized when first L-BFGS update occurs
9595

9696
y_history_ = dyn_MatrixType::Zero(dim_, 0);
9797
s_history_ = dyn_MatrixType::Zero(dim_, 0);
@@ -100,27 +100,34 @@ class Lbfgsb
100100
StateType OptimizationStep(const FunctionType &function,
101101
const StateType &current,
102102
const ProgressType & /*progress*/) override {
103+
// Project current point to bounds (handles infeasible initial points)
104+
const VectorType x =
105+
current.x.cwiseMin(upper_bound_).cwiseMax(lower_bound_);
106+
103107
// STEP 2: compute the cauchy point
104-
VectorType cauchy_point = VectorType::Zero(current.x.rows());
108+
VectorType cauchy_point = VectorType::Zero(x.rows());
105109

106110
VectorType current_gradient;
107-
function(current.x, &current_gradient);
111+
function(x, &current_gradient);
108112

109113
dyn_VectorType c = dyn_VectorType::Zero(W_.cols());
110-
GetGeneralizedCauchyPoint(current.x, current_gradient, &cauchy_point, &c);
114+
GetGeneralizedCauchyPoint(x, current_gradient, &cauchy_point, &c);
111115

112116
// STEP 3: compute a search direction d_k by the primal method for the
113117
// sub-problem
114-
const VectorType subspace_min =
115-
SubspaceMinimization(current.x, current_gradient, cauchy_point, c);
118+
const auto [subspace_min, do_line_search] =
119+
SubspaceMinimization(x, current_gradient, cauchy_point, c);
116120

117121
// STEP 4: perform linesearch and STEP 5: compute gradient
118-
ScalarType alpha_init = 1.0;
119-
const ScalarType rate = linesearch::MoreThuente<FunctionType, 1>::Search(
120-
current.x, subspace_min - current.x, function, alpha_init);
122+
ScalarType rate = 1.0;
123+
if (do_line_search) {
124+
ScalarType alpha_init = 1.0;
125+
rate = linesearch::MoreThuente<FunctionType, 1>::Search(
126+
x, subspace_min - x, function, alpha_init);
127+
}
121128

122129
// update current guess and function information
123-
const VectorType x_next = current.x - rate * (current.x - subspace_min);
130+
const VectorType x_next = x - rate * (x - subspace_min);
124131
// if current solution is out of bound, we clip it
125132
const VectorType clipped_x_next =
126133
x_next.cwiseMin(upper_bound_).cwiseMax(lower_bound_);
@@ -131,11 +138,11 @@ class Lbfgsb
131138

132139
// prepare for next iteration
133140
const VectorType new_y = next_gradient - current_gradient;
134-
const VectorType new_s = next.x - current.x;
141+
const VectorType new_s = next.x - x;
135142

136-
// STEP 6:
137-
const ScalarType test = fabs(new_s.dot(new_y));
138-
if (test > 1e-7 * new_y.squaredNorm()) {
143+
// STEP 6: Only update if positive curvature (s'*y > 0)
144+
const ScalarType sTy = new_s.dot(new_y);
145+
if (sTy > 1e-7 * new_y.squaredNorm()) {
139146
if (y_history_.cols() < m) {
140147
y_history_.conservativeResize(dim_, y_history_.cols() + 1);
141148
s_history_.conservativeResize(dim_, s_history_.cols() + 1);
@@ -157,7 +164,8 @@ class Lbfgsb
157164
dyn_MatrixType D = -1 * A.diagonal().asDiagonal();
158165
MM << D, L.transpose(), L,
159166
((s_history_.transpose() * s_history_) * theta_);
160-
M_ = MM.inverse();
167+
// Store LU factorization for efficient triangular solves
168+
MM_lu_ = MM.lu();
161169
}
162170

163171
return next;
@@ -176,12 +184,24 @@ class Lbfgsb
176184
return idx;
177185
}
178186

187+
/**
188+
* @brief Solve MM * x = b using stored LU factorization (triangular solves)
189+
* More efficient than computing M_ * b when M_ = MM^{-1}
190+
*/
191+
dyn_VectorType SolveM(const dyn_VectorType &b) const {
192+
if (b.size() == 0 || MM_lu_.matrixLU().size() == 0) {
193+
return b;
194+
}
195+
return MM_lu_.solve(b);
196+
}
197+
179198
void GetGeneralizedCauchyPoint(const VectorType &x,
180199
const VectorType &gradient,
181200
VectorType *x_cauchy,
182201
dyn_VectorType *c) const {
183202
constexpr ScalarType max_value = std::numeric_limits<ScalarType>::max();
184-
constexpr ScalarType epsilon = std::numeric_limits<ScalarType>::epsilon();
203+
// Use larger epsilon for numerical stability
204+
constexpr ScalarType epsilon = 1e-12;
185205

186206
// Given x,l,u,g, and B = \theta_ I-WMW
187207
// {all t_i} = { (idx,value), ... }
@@ -216,11 +236,10 @@ class Lbfgsb
216236
// f' := g^ScalarType*d = -d^Td
217237
ScalarType f_prime = -d.dot(d); // (n operations)
218238
// f'' := \theta_*d^ScalarType*d-d^ScalarType*W*M*W^ScalarType*d =
219-
// -\theta_*f'
220-
// -
221-
// p^ScalarType*M*p
239+
// -\theta_*f' - p^ScalarType*M*p
240+
// Use triangular solve: M*p means solve(MM, p)
222241
ScalarType f_doubleprime = (ScalarType)(-1.0 * theta_) * f_prime -
223-
p.dot(M_ * p); // (O(m^2) operations)
242+
p.dot(SolveM(p)); // (O(m^2) operations)
224243
f_doubleprime = std::max<ScalarType>(epsilon, f_doubleprime);
225244
ScalarType f_dp_orig = f_doubleprime;
226245
// \delta t_min := -f'/f''
@@ -251,12 +270,15 @@ class Lbfgsb
251270
*c += dt * p;
252271
// cache
253272
dyn_VectorType wbt = W_.row(b);
273+
dyn_VectorType Mc = SolveM(*c);
274+
dyn_VectorType Mp = SolveM(p);
275+
dyn_VectorType Mwbt = SolveM(wbt);
254276
f_prime += dt * f_doubleprime + gradient(b) * gradient(b) +
255277
theta_ * gradient(b) * zb -
256-
gradient(b) * wbt.transpose() * (M_ * *c);
278+
gradient(b) * wbt.transpose() * Mc;
257279
f_doubleprime += ScalarType(-1.0) * theta_ * gradient(b) * gradient(b) -
258-
ScalarType(2.0) * (gradient(b) * (wbt.dot(M_ * p))) -
259-
gradient(b) * gradient(b) * wbt.transpose() * (M_ * wbt);
280+
ScalarType(2.0) * (gradient(b) * (wbt.dot(Mp))) -
281+
gradient(b) * gradient(b) * wbt.transpose() * Mwbt;
260282
f_doubleprime = std::max<ScalarType>(epsilon * f_dp_orig, f_doubleprime);
261283
p += gradient(b) * wbt.transpose();
262284
d(b) = 0;
@@ -304,12 +326,9 @@ class Lbfgsb
304326
return alphastar;
305327
}
306328

307-
VectorType SubspaceMinimization(const VectorType &x,
308-
const VectorType &gradient,
309-
const VectorType &x_cauchy,
310-
const dyn_VectorType &c) const {
311-
const ScalarType theta_inverse = 1 / theta_;
312-
329+
std::pair<VectorType, bool> SubspaceMinimization(
330+
const VectorType &x, const VectorType &gradient,
331+
const VectorType &x_cauchy, const dyn_VectorType &c) const {
313332
std::vector<int> free_variables_index;
314333
for (int i = 0; i < x_cauchy.rows(); i++) {
315334
if ((x_cauchy(i) != upper_bound_(i)) &&
@@ -318,19 +337,32 @@ class Lbfgsb
318337
}
319338
}
320339
const int free_var_count = free_variables_index.size();
340+
341+
// Early return if no free variables - Cauchy point is optimal
342+
if (free_var_count == 0) {
343+
return {x_cauchy, false};
344+
}
345+
346+
const ScalarType theta_inverse = 1 / theta_;
321347
const dyn_MatrixType WZ =
322348
W_(free_variables_index, Eigen::indexing::all).transpose();
323349

324-
const VectorType rr = (gradient + theta_ * (x_cauchy - x) - W_ * (M_ * c));
350+
const VectorType rr = (gradient + theta_ * (x_cauchy - x) - W_ * SolveM(c));
325351
// r=r(free_variables);
326352
const dyn_VectorType r = rr(free_variables_index);
327353

328354
// STEP 2: "v = w^T*Z*r" and STEP 3: "v = M*v"
329-
dyn_VectorType v = M_ * (WZ * r);
355+
dyn_VectorType v = SolveM(WZ * r);
330356
// STEP 4: N = 1/theta*W^T*Z*(W^T*Z)^T
331357
dyn_MatrixType N = theta_inverse * WZ * WZ.transpose();
332-
// N = I - MN
333-
N = dyn_MatrixType::Identity(N.rows(), N.rows()) - M_ * N;
358+
// N = I - MN (solve M*X = N for each column, then compute I - X)
359+
if (N.cols() > 0) {
360+
dyn_MatrixType MN(N.rows(), N.cols());
361+
for (int col = 0; col < N.cols(); ++col) {
362+
MN.col(col) = SolveM(N.col(col));
363+
}
364+
N = dyn_MatrixType::Identity(N.rows(), N.rows()) - MN;
365+
}
334366
// STEP: 5
335367
// v = N^{-1}*v
336368
if (v.size() > 0) {
@@ -349,7 +381,7 @@ class Lbfgsb
349381
subspace_min(free_variables_index[i]) =
350382
subspace_min(free_variables_index[i]) + dStar(i);
351383
}
352-
return subspace_min;
384+
return {subspace_min, true};
353385
}
354386

355387
private:
@@ -358,7 +390,7 @@ class Lbfgsb
358390
VectorType upper_bound_;
359391
bool bounds_initialized_ = false;
360392

361-
dyn_MatrixType M_;
393+
Eigen::PartialPivLU<dyn_MatrixType> MM_lu_;
362394
dyn_MatrixType W_;
363395
ScalarType theta_;
364396

0 commit comments

Comments
 (0)