@@ -91,7 +91,7 @@ class Lbfgsb
9191 theta_ = 1.0 ;
9292
9393 W_ = dyn_MatrixType::Zero (dim_, 0 );
94- M_ = dyn_MatrixType::Zero ( 0 , 0 );
94+ // MM_lu_ will be initialized when first L-BFGS update occurs
9595
9696 y_history_ = dyn_MatrixType::Zero (dim_, 0 );
9797 s_history_ = dyn_MatrixType::Zero (dim_, 0 );
@@ -100,27 +100,34 @@ class Lbfgsb
100100 StateType OptimizationStep (const FunctionType &function,
101101 const StateType ¤t,
102102 const ProgressType & /* progress*/ ) override {
103+ // Project current point to bounds (handles infeasible initial points)
104+ const VectorType x =
105+ current.x .cwiseMin (upper_bound_).cwiseMax (lower_bound_);
106+
103107 // STEP 2: compute the cauchy point
104- VectorType cauchy_point = VectorType::Zero (current. x .rows ());
108+ VectorType cauchy_point = VectorType::Zero (x.rows ());
105109
106110 VectorType current_gradient;
107- function (current. x , ¤t_gradient);
111+ function (x, ¤t_gradient);
108112
109113 dyn_VectorType c = dyn_VectorType::Zero (W_.cols ());
110- GetGeneralizedCauchyPoint (current. x , current_gradient, &cauchy_point, &c);
114+ GetGeneralizedCauchyPoint (x, current_gradient, &cauchy_point, &c);
111115
112116 // STEP 3: compute a search direction d_k by the primal method for the
113117 // sub-problem
114- const VectorType subspace_min =
115- SubspaceMinimization (current. x , current_gradient, cauchy_point, c);
118+ const auto [ subspace_min, do_line_search] =
119+ SubspaceMinimization (x, current_gradient, cauchy_point, c);
116120
117121 // STEP 4: perform linesearch and STEP 5: compute gradient
118- ScalarType alpha_init = 1.0 ;
119- const ScalarType rate = linesearch::MoreThuente<FunctionType, 1 >::Search (
120- current.x , subspace_min - current.x , function, alpha_init);
122+ ScalarType rate = 1.0 ;
123+ if (do_line_search) {
124+ ScalarType alpha_init = 1.0 ;
125+ rate = linesearch::MoreThuente<FunctionType, 1 >::Search (
126+ x, subspace_min - x, function, alpha_init);
127+ }
121128
122129 // update current guess and function information
123- const VectorType x_next = current. x - rate * (current. x - subspace_min);
130+ const VectorType x_next = x - rate * (x - subspace_min);
124131 // if current solution is out of bound, we clip it
125132 const VectorType clipped_x_next =
126133 x_next.cwiseMin (upper_bound_).cwiseMax (lower_bound_);
@@ -131,11 +138,11 @@ class Lbfgsb
131138
132139 // prepare for next iteration
133140 const VectorType new_y = next_gradient - current_gradient;
134- const VectorType new_s = next.x - current. x ;
141+ const VectorType new_s = next.x - x;
135142
136- // STEP 6:
137- const ScalarType test = fabs ( new_s.dot (new_y) );
138- if (test > 1e-7 * new_y.squaredNorm ()) {
143+ // STEP 6: Only update if positive curvature (s'*y > 0)
144+ const ScalarType sTy = new_s.dot (new_y);
145+ if (sTy > 1e-7 * new_y.squaredNorm ()) {
139146 if (y_history_.cols () < m) {
140147 y_history_.conservativeResize (dim_, y_history_.cols () + 1 );
141148 s_history_.conservativeResize (dim_, s_history_.cols () + 1 );
@@ -157,7 +164,8 @@ class Lbfgsb
157164 dyn_MatrixType D = -1 * A.diagonal ().asDiagonal ();
158165 MM << D, L.transpose (), L,
159166 ((s_history_.transpose () * s_history_) * theta_);
160- M_ = MM.inverse ();
167+ // Store LU factorization for efficient triangular solves
168+ MM_lu_ = MM.lu ();
161169 }
162170
163171 return next;
@@ -176,12 +184,24 @@ class Lbfgsb
176184 return idx;
177185 }
178186
187+ /* *
188+ * @brief Solve MM * x = b using stored LU factorization (triangular solves)
189+ * More efficient than computing M_ * b when M_ = MM^{-1}
190+ */
191+ dyn_VectorType SolveM (const dyn_VectorType &b) const {
192+ if (b.size () == 0 || MM_lu_.matrixLU ().size () == 0 ) {
193+ return b;
194+ }
195+ return MM_lu_.solve (b);
196+ }
197+
179198 void GetGeneralizedCauchyPoint (const VectorType &x,
180199 const VectorType &gradient,
181200 VectorType *x_cauchy,
182201 dyn_VectorType *c) const {
183202 constexpr ScalarType max_value = std::numeric_limits<ScalarType>::max ();
184- constexpr ScalarType epsilon = std::numeric_limits<ScalarType>::epsilon ();
203+ // Use larger epsilon for numerical stability
204+ constexpr ScalarType epsilon = 1e-12 ;
185205
186206 // Given x,l,u,g, and B = \theta_ I-WMW
187207 // {all t_i} = { (idx,value), ... }
@@ -216,11 +236,10 @@ class Lbfgsb
216236 // f' := g^ScalarType*d = -d^Td
217237 ScalarType f_prime = -d.dot (d); // (n operations)
218238 // f'' := \theta_*d^ScalarType*d-d^ScalarType*W*M*W^ScalarType*d =
219- // -\theta_*f'
220- // -
221- // p^ScalarType*M*p
239+ // -\theta_*f' - p^ScalarType*M*p
240+ // Use triangular solve: M*p means solve(MM, p)
222241 ScalarType f_doubleprime = (ScalarType)(-1.0 * theta_) * f_prime -
223- p.dot (M_ * p ); // (O(m^2) operations)
242+ p.dot (SolveM (p) ); // (O(m^2) operations)
224243 f_doubleprime = std::max<ScalarType>(epsilon, f_doubleprime);
225244 ScalarType f_dp_orig = f_doubleprime;
226245 // \delta t_min := -f'/f''
@@ -251,12 +270,15 @@ class Lbfgsb
251270 *c += dt * p;
252271 // cache
253272 dyn_VectorType wbt = W_.row (b);
273+ dyn_VectorType Mc = SolveM (*c);
274+ dyn_VectorType Mp = SolveM (p);
275+ dyn_VectorType Mwbt = SolveM (wbt);
254276 f_prime += dt * f_doubleprime + gradient (b) * gradient (b) +
255277 theta_ * gradient (b) * zb -
256- gradient (b) * wbt.transpose () * (M_ * *c) ;
278+ gradient (b) * wbt.transpose () * Mc ;
257279 f_doubleprime += ScalarType (-1.0 ) * theta_ * gradient (b) * gradient (b) -
258- ScalarType (2.0 ) * (gradient (b) * (wbt.dot (M_ * p ))) -
259- gradient (b) * gradient (b) * wbt.transpose () * (M_ * wbt) ;
280+ ScalarType (2.0 ) * (gradient (b) * (wbt.dot (Mp ))) -
281+ gradient (b) * gradient (b) * wbt.transpose () * Mwbt ;
260282 f_doubleprime = std::max<ScalarType>(epsilon * f_dp_orig, f_doubleprime);
261283 p += gradient (b) * wbt.transpose ();
262284 d (b) = 0 ;
@@ -304,12 +326,9 @@ class Lbfgsb
304326 return alphastar;
305327 }
306328
307- VectorType SubspaceMinimization (const VectorType &x,
308- const VectorType &gradient,
309- const VectorType &x_cauchy,
310- const dyn_VectorType &c) const {
311- const ScalarType theta_inverse = 1 / theta_;
312-
329+ std::pair<VectorType, bool > SubspaceMinimization (
330+ const VectorType &x, const VectorType &gradient,
331+ const VectorType &x_cauchy, const dyn_VectorType &c) const {
313332 std::vector<int > free_variables_index;
314333 for (int i = 0 ; i < x_cauchy.rows (); i++) {
315334 if ((x_cauchy (i) != upper_bound_ (i)) &&
@@ -318,19 +337,32 @@ class Lbfgsb
318337 }
319338 }
320339 const int free_var_count = free_variables_index.size ();
340+
341+ // Early return if no free variables - Cauchy point is optimal
342+ if (free_var_count == 0 ) {
343+ return {x_cauchy, false };
344+ }
345+
346+ const ScalarType theta_inverse = 1 / theta_;
321347 const dyn_MatrixType WZ =
322348 W_ (free_variables_index, Eigen::indexing::all).transpose ();
323349
324- const VectorType rr = (gradient + theta_ * (x_cauchy - x) - W_ * (M_ * c));
350+ const VectorType rr = (gradient + theta_ * (x_cauchy - x) - W_ * SolveM ( c));
325351 // r=r(free_variables);
326352 const dyn_VectorType r = rr (free_variables_index);
327353
328354 // STEP 2: "v = w^T*Z*r" and STEP 3: "v = M*v"
329- dyn_VectorType v = M_ * (WZ * r);
355+ dyn_VectorType v = SolveM (WZ * r);
330356 // STEP 4: N = 1/theta*W^T*Z*(W^T*Z)^T
331357 dyn_MatrixType N = theta_inverse * WZ * WZ.transpose ();
332- // N = I - MN
333- N = dyn_MatrixType::Identity (N.rows (), N.rows ()) - M_ * N;
358+ // N = I - MN (solve M*X = N for each column, then compute I - X)
359+ if (N.cols () > 0 ) {
360+ dyn_MatrixType MN (N.rows (), N.cols ());
361+ for (int col = 0 ; col < N.cols (); ++col) {
362+ MN.col (col) = SolveM (N.col (col));
363+ }
364+ N = dyn_MatrixType::Identity (N.rows (), N.rows ()) - MN;
365+ }
334366 // STEP: 5
335367 // v = N^{-1}*v
336368 if (v.size () > 0 ) {
@@ -349,7 +381,7 @@ class Lbfgsb
349381 subspace_min (free_variables_index[i]) =
350382 subspace_min (free_variables_index[i]) + dStar (i);
351383 }
352- return subspace_min;
384+ return { subspace_min, true } ;
353385 }
354386
355387 private:
@@ -358,7 +390,7 @@ class Lbfgsb
358390 VectorType upper_bound_;
359391 bool bounds_initialized_ = false ;
360392
361- dyn_MatrixType M_ ;
393+ Eigen::PartialPivLU< dyn_MatrixType> MM_lu_ ;
362394 dyn_MatrixType W_;
363395 ScalarType theta_;
364396
0 commit comments