diff --git a/CHANGELOG.md b/CHANGELOG.md index 81d0f32b..95463bec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- `prox-solver.hpp`: Add more alternatives to the `HessianApprox` enum ([#92](https://github.com/Simple-Robotics/proxsuite-nlp/pull/92)) +- Add `BfgsStrategy` to estimate (inverse) Hessian and use it in Euclidian example ([#92](https://github.com/Simple-Robotics/proxsuite-nlp/pull/92)) + ## [0.10.1] - 2025-01-24 ### Changed diff --git a/examples/ur5-ik.cpp b/examples/ur5-ik.cpp index 89b50e85..1acb23a9 100644 --- a/examples/ur5-ik.cpp +++ b/examples/ur5-ik.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -70,6 +71,11 @@ struct FramePosition : C2FunctionTpl { int main() { + // solve with different hessian approximations + std::vector hess_approximations = { + HessianApprox::EXACT, HessianApprox::GAUSS_NEWTON, HessianApprox::BFGS, + HessianApprox::IDENTITY}; + const std::string ee_link_name = "tool0"; Model model = loadModel(); Space space{model}; @@ -106,19 +112,23 @@ int main() { problem.addConstraint(cstrobj); } - ProxNLPSolverTpl solver(problem, 1e-4, 0.01, 0.0, - proxsuite::nlp::VERBOSE); - solver.setup(); - solver.solve(q0); - - ResultsTpl const &results = *solver.results_; - WorkspaceTpl const &ws = *solver.workspace_; - std::cout << results << std::endl; - - fmt::print("Optimal cost: {}\n", ws.objective_value); - fmt::print("Optimal cost grad: {}\n", ws.objective_gradient.transpose()); - auto opt_frame_pos = (*fn)(results.x_opt); - fmt::print("Optimal frame pos: {}\n", opt_frame_pos.transpose()); - + for (auto hess_approx : hess_approximations) { + fmt::print("Solving with hessian approximation: {}\n", hess_approx); + ProxNLPSolverTpl solver(problem, 1e-4, 0.01, 0.0, + proxsuite::nlp::QUIET); + solver.hess_approx = hess_approx; + solver.max_iters = 500; + solver.setup(); + solver.solve(q0); + + ResultsTpl const &results = *solver.results_; + WorkspaceTpl const &ws = *solver.workspace_; + std::cout << results << std::endl; + + fmt::print("Optimal cost: {}\n", ws.objective_value); + fmt::print("Optimal cost grad: {}\n", ws.objective_gradient.transpose()); + auto opt_frame_pos = (*fn)(results.x_opt); + fmt::print("Optimal frame pos: {}\n\n", opt_frame_pos.transpose()); + } return 0; } diff --git a/include/proxsuite-nlp/bfgs-strategy.hpp b/include/proxsuite-nlp/bfgs-strategy.hpp new file mode 100644 index 00000000..8123fc6a --- /dev/null +++ b/include/proxsuite-nlp/bfgs-strategy.hpp @@ -0,0 +1,103 @@ +/// @file +/// @copyright Copyright (C) 2024-2025 INRIA +#pragma once +#include "proxsuite-nlp/fwd.hpp" + +namespace proxsuite { +namespace nlp { + +enum BFGSType { Hessian, InverseHessian }; + +template +class BFGSStrategy { + PROXSUITE_NLP_DYNAMIC_TYPEDEFS(Scalar); + +public: + BFGSStrategy() + : is_init(false), is_psd(false), x_prev(), g_prev(), s(), y(), + xx_transpose(), xy_transpose(), V(), VMinv(), VMinvVt(), + is_valid(false) {} + + explicit BFGSStrategy(const int num_vars) + : is_init(false), is_psd(true), x_prev(VectorXs::Zero(num_vars)), + g_prev(VectorXs::Zero(num_vars)), s(VectorXs::Zero(num_vars)), + y(VectorXs::Zero(num_vars)), + xx_transpose(MatrixXs::Zero(num_vars, num_vars)), + xy_transpose(MatrixXs::Zero(num_vars, num_vars)), + V(MatrixXs::Zero(num_vars, num_vars)), + VMinv(MatrixXs::Zero(num_vars, num_vars)), + VMinvVt(MatrixXs::Zero(num_vars, num_vars)), is_valid(true) { + x_prev = VectorXs::Zero(num_vars); + g_prev = VectorXs::Zero(num_vars); + } + + void init(const ConstVectorRef &x0, const ConstVectorRef &g0) { + if (!is_valid) { + throw std::runtime_error("Cannot initialize an invalid BFGSStrategy. Use " + "the constructor with num_vars first."); + } + + x_prev = x0; + g_prev = g0; + is_init = true; + } + + void update(const ConstVectorRef &x, const ConstVectorRef &g, + MatrixXs &hessian) { + if (!is_init) { + init(x, g); + return; + } + PROXSUITE_NLP_NOMALLOC_BEGIN; + s = x - x_prev; + y = g - g_prev; + const Scalar sy = s.dot(y); + + if (sy > 0) { + if constexpr (BFGS_TYPE == BFGSType::InverseHessian) { + // Nocedal and Wright, Numerical Optimization, 2nd ed., p. 140, eqn 6.17 + // (BFGS update) + xx_transpose.noalias() = s * s.transpose(); + xy_transpose.noalias() = s * y.transpose(); + } else if constexpr (BFGS_TYPE == BFGSType::Hessian) { + // Nocedal and Wright, Numerical Optimization, 2nd ed., p. 139, eqn 6.13 + // (DFP update) + xx_transpose.noalias() = y * y.transpose(); + xy_transpose.noalias() = y * s.transpose(); + } + + V.noalias() = MatrixXs::Identity(s.size(), s.size()) - xy_transpose / sy; + VMinv.noalias() = V * hessian; + VMinvVt.noalias() = VMinv * V.transpose(); + hessian.noalias() = VMinvVt + xx_transpose / sy; + is_psd = true; + } else { + is_psd = false; + PROXSUITE_NLP_WARN("Skipping BFGS update as s^Ty <= 0"); + } + x_prev = x; + g_prev = g; + PROXSUITE_NLP_NOMALLOC_END; + } + bool isValid() const { return is_valid; } + +public: + bool is_init; + bool is_psd; + +private: + VectorXs x_prev; // previous iterate + VectorXs g_prev; // previous gradient + VectorXs s; // delta iterate + VectorXs y; // delta gradient + // temporary variables to avoid dynamic memory allocation + MatrixXs xx_transpose; + MatrixXs xy_transpose; + MatrixXs V; + MatrixXs VMinv; + MatrixXs VMinvVt; + bool is_valid; +}; + +} // namespace nlp +} // namespace proxsuite diff --git a/include/proxsuite-nlp/fmt-hessian-approx.hpp b/include/proxsuite-nlp/fmt-hessian-approx.hpp new file mode 100644 index 00000000..0d2aef62 --- /dev/null +++ b/include/proxsuite-nlp/fmt-hessian-approx.hpp @@ -0,0 +1,35 @@ +/// @file +/// @copyright Copyright (C) 2024-2025 INRIA +#pragma once + +#include +#include + +namespace fmt { +template <> struct formatter { + template constexpr auto parse(ParseContext &ctx) { + return ctx.begin(); + } + + template + auto format(const proxsuite::nlp::HessianApprox &hessian_approx, + FormatContext &ctx) const { + std::string name; + switch (hessian_approx) { + case proxsuite::nlp::HessianApprox::EXACT: + name = "EXACT"; + break; + case proxsuite::nlp::HessianApprox::GAUSS_NEWTON: + name = "GAUSS_NEWTON"; + break; + case proxsuite::nlp::HessianApprox::BFGS: + name = "BFGS"; + break; + case proxsuite::nlp::HessianApprox::IDENTITY: + name = "IDENTITY"; + break; + } + return format_to(ctx.out(), "{}", name); + } +}; +} // namespace fmt diff --git a/include/proxsuite-nlp/prox-solver.hpp b/include/proxsuite-nlp/prox-solver.hpp index ef245a30..4dbb1ed1 100644 --- a/include/proxsuite-nlp/prox-solver.hpp +++ b/include/proxsuite-nlp/prox-solver.hpp @@ -1,5 +1,6 @@ /// @file /// @copyright Copyright (C) 2022 LAAS-CNRS, INRIA +/// @copyright Copyright (C) 2024-2025 INRIA #pragma once #include "proxsuite-nlp/fwd.hpp" @@ -10,6 +11,7 @@ #include "proxsuite-nlp/helpers-base.hpp" #include "proxsuite-nlp/logger.hpp" #include "proxsuite-nlp/bcl-params.hpp" +#include "proxsuite-nlp/bfgs-strategy.hpp" #include @@ -27,6 +29,10 @@ enum class HessianApprox { EXACT, /// Gauss-Newton (or rather SCQP) approximation GAUSS_NEWTON, + /// BFGS approximation + BFGS, + /// Identity + IDENTITY }; enum InertiaFlag { INERTIA_OK = 0, INERTIA_BAD = 1, INERTIA_HAS_ZEROS = 2 }; @@ -48,6 +54,7 @@ template class ProxNLPSolverTpl { using CallbackPtr = shared_ptr>; using ConstraintSet = ConstraintSetTpl; using ConstraintObject = ConstraintObjectTpl; + using BFGS = BFGSStrategy; protected: /// General nonlinear program to solve. @@ -67,6 +74,8 @@ template class ProxNLPSolverTpl { LinesearchStrategy ls_strat = LinesearchStrategy::ARMIJO; MultiplierUpdateMode mul_update_mode = MultiplierUpdateMode::NEWTON; + mutable BFGS bfgs_strategy_; + /// linear algebra opts std::size_t max_refinement_steps_ = 5; Scalar kkt_tolerance_ = 1e-13; @@ -138,6 +147,19 @@ template class ProxNLPSolverTpl { void setup() { workspace_ = std::make_unique(*problem_, ldlt_choice_); results_ = std::make_unique(*problem_); + + if (hess_approx == HessianApprox::BFGS) { + // TODO: work on implementation of BFGS on manifolds + if (problem_->ndx() == problem_->nx()) { + BFGS valid_bfgs_strategy_ = BFGS(problem_->ndx()); + bfgs_strategy_ = std::move(valid_bfgs_strategy_); + // set workspace hessian to identity matrix (init to zero in workspace) + workspace_->objective_hessian.setIdentity(); + } else { + throw std::runtime_error("BFGS for hessian approximation currently " + "only works for Euclidean manifolds."); + } + } } /** diff --git a/include/proxsuite-nlp/prox-solver.hxx b/include/proxsuite-nlp/prox-solver.hxx index c7ddb88b..b76a08e3 100644 --- a/include/proxsuite-nlp/prox-solver.hxx +++ b/include/proxsuite-nlp/prox-solver.hxx @@ -1,5 +1,6 @@ /// @file /// @copyright Copyright (C) 2022 LAAS-CNRS, INRIA +/// @copyright Copyright (C) 2024-2025 INRIA /// @brief Implementations for the prox solver. #pragma once @@ -228,7 +229,22 @@ template void ProxNLPSolverTpl::computeProblemDerivatives( const ConstVectorRef &x, Workspace &workspace, boost::mpl::true_) const { this->computeProblemDerivatives(x, workspace, boost::mpl::false_()); - problem_->computeHessians(x, workspace, hess_approx == HessianApprox::EXACT); + + switch (hess_approx) { + case HessianApprox::BFGS: + bfgs_strategy_.update(x, workspace.objective_gradient, + workspace.objective_hessian); + break; + + case HessianApprox::IDENTITY: + workspace.objective_hessian.setIdentity(); + break; + + default: // handle both EXACT and GAUSS_NEWTON + problem_->computeHessians(x, workspace, + hess_approx == HessianApprox::EXACT); + break; + } } template diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 887519b2..643ecbbb 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -153,6 +153,7 @@ add_proxsuite_nlp_test(finite-diff) add_proxsuite_nlp_test(math) add_proxsuite_nlp_test(functions) add_proxsuite_nlp_test(linesearch) +add_proxsuite_nlp_test(bfgs-strategy) add_proxsuite_nlp_test(manifolds) add_proxsuite_nlp_test(solver) diff --git a/tests/bfgs-strategy.cpp b/tests/bfgs-strategy.cpp new file mode 100644 index 00000000..d4aac9c2 --- /dev/null +++ b/tests/bfgs-strategy.cpp @@ -0,0 +1,143 @@ +#include +#include "util.hpp" + +#include "proxsuite-nlp/bfgs-strategy.hpp" + +using namespace proxsuite::nlp; + +// Tests the BFGS inverse Hessian update by verifying it follows the standard +// formula. +BOOST_AUTO_TEST_CASE(test_inverse_hessian_update) { + const long nx = 4; + using BFGSStrategy_t = BFGSStrategy; // default to InverseHessian + BFGSStrategy_t bfgs(nx); + VectorXs x0 = VectorXs::Random(nx); + VectorXs g0 = VectorXs::Random(nx); + bfgs.init(x0, g0); + + VectorXs x; + VectorXs g; + VectorXs s; + VectorXs y; + + bool is_psd = false; + MatrixXs H = MatrixXs::Identity(nx, nx); + while (!is_psd) { + x = VectorXs::Random(nx); + g = VectorXs::Random(nx); + s = x - x0; + y = g - g0; + if (s.dot(y) > 0) { + bfgs.update(x, g, H); + is_psd = bfgs.is_psd; + } + } + Scalar rho = 1. / s.dot(y); + MatrixXs H_inv_prev = MatrixXs::Identity(nx, nx); + MatrixXs H_inv_expected = + (MatrixXs::Identity(nx, nx) - rho * s * y.transpose()) * H_inv_prev * + (MatrixXs::Identity(nx, nx) - rho * y * s.transpose()) + + rho * s * s.transpose(); + BOOST_TEST_CHECK(H.isApprox(H_inv_expected, 1e-6)); +} + +// Tests the BFGS Hessian update by verifying it follows the standard DFP +// formula. +BOOST_AUTO_TEST_CASE(test_hessian_update) { + const long nx = 4; + using BFGSStrategy_t = BFGSStrategy; + BFGSStrategy_t bfgs(nx); + VectorXs x0 = VectorXs::Random(nx); + VectorXs g0 = VectorXs::Random(nx); + bfgs.init(x0, g0); + + VectorXs x; + VectorXs g; + VectorXs s; + VectorXs y; + + bool is_psd = false; + MatrixXs H = MatrixXs::Identity(nx, nx); + while (!is_psd) { + x = VectorXs::Random(nx); + g = VectorXs::Random(nx); + s = x - x0; + y = g - g0; + if (s.dot(y) > 0) { + bfgs.update(x, g, H); + is_psd = bfgs.is_psd; + } + } + + Scalar rho = 1. / s.dot(y); + MatrixXs H_prev = MatrixXs::Identity(nx, nx); + MatrixXs H_expected = + (MatrixXs::Identity(nx, nx) - rho * y * s.transpose()) * H_prev * + (MatrixXs::Identity(nx, nx) - rho * s * y.transpose()) + + rho * y * y.transpose(); + BOOST_TEST_CHECK(H.isApprox(H_expected, 1e-6)); +} + +// Tests whether the BFGS inverse Hessian approximation converges to the true +// inverse Hessian. +BOOST_AUTO_TEST_CASE(test_bfgs_inverse_hessian) { + const long nx = 4; + using BFGSStrategy_t = BFGSStrategy; + BFGSStrategy_t bfgs(nx); + + // random quadratic function with positive definite Hessian + Eigen::MatrixXd H = Eigen::MatrixXd::Random(nx, nx); + H = H.transpose() * H; // make it symmetric and positive definite + Eigen::MatrixXd H_inv = H.inverse(); + Eigen::VectorXd b = Eigen::VectorXd::Random(nx); + + Eigen::VectorXd x0 = Eigen::VectorXd::Random(nx); + Eigen::VectorXd g0 = H * x0 - b; + bfgs.init(x0, g0); + + MatrixXs H_inv_approx = MatrixXs::Identity(nx, nx); + for (int i = 0; i < 1000; ++i) { + Eigen::VectorXd x = Eigen::VectorXd::Random(nx); + Eigen::VectorXd g = H * x - b; // gradient + bfgs.update(x, g, H_inv_approx); + + Eigen::MatrixXd H_inv_err = H_inv - H_inv_approx; + double err = H_inv_err.norm(); + if (err < 1e-5) { + break; + } + } + + BOOST_TEST_CHECK(H_inv_approx.isApprox(H_inv, 1e-5)); +} + +// Tests whether the BFGS Hessian approximation converges to the true Hessian +BOOST_AUTO_TEST_CASE(test_bfgs_hessian) { + const long nx = 4; + using BFGSStrategy_t = BFGSStrategy; + BFGSStrategy_t bfgs(nx); + + // random quadratic function with positive definite Hessian + Eigen::MatrixXd H = Eigen::MatrixXd::Random(nx, nx); + H = H.transpose() * H; // make it symmetric and positive definite + Eigen::VectorXd b = Eigen::VectorXd::Random(nx); + + Eigen::VectorXd x0 = Eigen::VectorXd::Random(nx); + Eigen::VectorXd g0 = H * x0 - b; + bfgs.init(x0, g0); + + MatrixXs H_approx = MatrixXs::Identity(nx, nx); + for (int i = 0; i < 1000; ++i) { + Eigen::VectorXd x = Eigen::VectorXd::Random(nx); + Eigen::VectorXd g = H * x - b; // gradient + bfgs.update(x, g, H_approx); + + Eigen::MatrixXd H_err = H - H_approx; + double err = H_err.norm(); + if (err < 1e-5) { + break; + } + } + + BOOST_TEST_CHECK(H_approx.isApprox(H, 1e-5)); +}