JuliaNLSolvers
diff --git a/‎Project.toml‎
Lines changed: 1 addition & 1 deletion b/‎Project.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/src/user/config.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/src/user/config.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/multivariate/optimize/optimize.jl‎
Lines changed: 17 additions & 0 deletions b/‎src/multivariate/optimize/optimize.jl‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎src/multivariate/solvers/first_order/accelerated_gradient_descent.jl‎
Lines changed: 52 additions & 10 deletions b/‎src/multivariate/solvers/first_order/accelerated_gradient_descent.jl‎
Lines changed: 52 additions & 10 deletions
diff --git a/‎src/multivariate/solvers/first_order/bfgs.jl‎
Lines changed: 31 additions & 9 deletions b/‎src/multivariate/solvers/first_order/bfgs.jl‎
Lines changed: 31 additions & 9 deletions
diff --git a/‎src/multivariate/solvers/first_order/cg.jl‎
Lines changed: 39 additions & 21 deletions b/‎src/multivariate/solvers/first_order/cg.jl‎
Lines changed: 39 additions & 21 deletions
diff --git a/‎src/multivariate/solvers/first_order/gradient_descent.jl‎
Lines changed: 33 additions & 3 deletions b/‎src/multivariate/solvers/first_order/gradient_descent.jl‎
Lines changed: 33 additions & 3 deletions
@@ -30,7 +30,7 @@ ExplicitImports = "1.13.2"
 FillArrays = "0.6.2, 0.7, 0.8, 0.9, 0.10, 0.11, 0.12, 0.13, 1"
 ForwardDiff = "0.10, 1"
 JET = "0.9, 0.10, 0.11"
-LineSearches = "7.6.2"
+LineSearches = "7.7"
 LinearAlgebra = "<0.0.1, 1.6"
 MathOptInterface = "1.17"
 Measurements = "2.14.1"
 
@@ -47,7 +47,7 @@ In addition to the solver, you can alter the behavior of the Optim package by us
 * `f_calls_limit`: A soft upper limit on the number of objective calls. Defaults to `0` (unlimited).
 * `g_calls_limit`: A soft upper limit on the number of gradient calls. Defaults to `0` (unlimited).
 * `h_calls_limit`: A soft upper limit on the number of Hessian calls. Defaults to `0` (unlimited).
-* `allow_f_increases`: Allow steps that increase the objective value. Defaults to `true`. Note that, when this setting is `true`, the last iterate will be returned as the minimizer even if the objective increased.
+* `allow_f_increases`: Allow steps that increase the objective value. Defaults to `true`. Note that, when this setting is `true`, the last iterate will be returned as the minimizer even if the objective increased. This is useful in some methods such as line search based methods with HagerZhang() that allows for a slight increase in the objective function as long as the directional derivative decreases. 
 * `successive_f_tol`: Determines the number of times the objective is allowed to increase across iterations. Defaults to 1.
 * `iterations`: How many iterations will run before the algorithm gives up? Defaults to `1_000`.
 * `time_limit`: A soft upper limit on the total run time. Defaults to `NaN` (unlimited).
 
@@ -27,6 +27,15 @@ end
 
 after_while!(d, state, method, options) = nothing
 
+# Validate and commit the trial iterate produced by update_state! / update_fgh!.
+# Default: no-op (un-migrated solvers mutate state.x/f_x/g_x directly and have
+# nothing to accept). Migrated solvers override this to:
+#   - check finiteness of state.f_candidate / g_candidate / x_candidate
+#   - on success, commit candidate -> state.x / state.f_x / state.g_x and return true
+#   - on failure, leave state.x / state.f_x / state.g_x at the last accepted iterate
+#     and return false (the main loop then breaks).
+accept_step!(d, state, method, options) = true
+
 function initial_convergence(state::AbstractOptimizerState, options::Options)
     stopped = !isfinite(state.f_x) || any(!isfinite, state.g_x)
     return g_residual(state) <= options.g_abstol, stopped
@@ -85,6 +94,14 @@ function optimize(
         # TODO: Already perform in `update_state!`?
         update_fgh!(d, state, method)
 
+        # Validate the trial iterate and commit candidate -> state for migrated
+        # solvers. No-op for solvers that mutate state.x/f_x/g_x directly.
+        if !accept_step!(d, state, method, options)
+            options.show_warnings && @warn "Terminated early: trial iterate had non-finite values."
+            ls_success = false
+            break
+        end
+
         # Check convergence
         x_converged, f_converged, g_converged, f_increased =
             assess_convergence(state, d, options)
 
@@ -32,6 +32,12 @@ mutable struct AcceleratedGradientDescentState{T,Tx,Tg} <: AbstractOptimizerStat
     y::Tx
     y_previous::Tx
     s::Tx
+    # Trial iterates produced by update_state! / update_fgh!. Committed to
+    # state.x / state.g_x / state.f_x / state.y by accept_step! once validated.
+    x_candidate::Tx
+    y_candidate::Tx
+    g_candidate::Tg
+    f_candidate::T
     @add_linesearch_fields()
 end
 
@@ -57,6 +63,10 @@ function initial_state(
         copy(x0), # Maintain intermediary current state in state.y
         fill!(similar(x0), NaN), # Maintain intermediary state in state.y_previous
         fill!(similar(x0), NaN), # Maintain current search direction in state.s
+        fill!(similar(x0), NaN), # Trial iterate in state.x_candidate
+        fill!(similar(x0), NaN), # Trial y iterate in state.y_candidate
+        fill!(similar(g_x), NaN), # Trial gradient in state.g_candidate
+        oftype(f_x, NaN), # Trial f value in state.f_candidate
         @initial_linesearch()...,
     )
 end
@@ -66,27 +76,59 @@ function update_state!(
     state::AcceleratedGradientDescentState,
     method::AcceleratedGradientDescent,
 )
-    state.iteration += 1
-
     # Search direction is always the negative gradient
     state.s .= .-state.g_x
 
     # Determine the distance of movement along the search line
     lssuccess = perform_linesearch!(state, method, ManifoldObjective(method.manifold, d))
 
-    # Make one move in the direction of the gradient
-    copyto!(state.y_previous, state.y)
-    state.y .= state.x .+ state.alpha .* state.s
-    retract!(method.manifold, state.y)
+    # Propose trial intermediary y (do NOT mutate state.y; accept_step! commits)
+    state.y_candidate .= state.x .+ state.alpha .* state.s
+    retract!(method.manifold, state.y_candidate)
 
-    # Update current position with Nesterov correction
-    scaling = (state.iteration - 1) / (state.iteration + 2)
-    state.x .= state.y .+ scaling .* (state.y .- state.y_previous)
-    retract!(method.manifold, state.x)
+    # Propose trial position with Nesterov correction. iteration is incremented
+    # on accept so the scaling here uses the would-be next iteration index.
+    next_iteration = state.iteration + 1
+    scaling = (next_iteration - 1) / (next_iteration + 2)
+    state.x_candidate .= state.y_candidate .+ scaling .* (state.y_candidate .- state.y)
+    retract!(method.manifold, state.x_candidate)
 
     return !lssuccess # break on linesearch error
 end
 
+function update_fgh!(
+    d,
+    state::AcceleratedGradientDescentState,
+    method::AcceleratedGradientDescent,
+)
+    f_c, g_c = NLSolversBase.value_gradient!(d, state.x_candidate)
+    copyto!(state.g_candidate, g_c)
+    project_tangent!(method.manifold, state.g_candidate, state.x_candidate)
+    state.f_candidate = f_c
+    return nothing
+end
+
+function accept_step!(
+    d,
+    state::AcceleratedGradientDescentState,
+    method::AcceleratedGradientDescent,
+    options,
+)
+    if !isfinite(state.f_candidate) ||
+       !all(isfinite, state.g_candidate) ||
+       !all(isfinite, state.x_candidate) ||
+       !all(isfinite, state.y_candidate)
+        return false
+    end
+    copyto!(state.y_previous, state.y)
+    copyto!(state.y, state.y_candidate)
+    copyto!(state.x, state.x_candidate)
+    copyto!(state.g_x, state.g_candidate)
+    state.f_x = state.f_candidate
+    state.iteration += 1
+    return true
+end
+
 function trace!(
     tr,
     d,
 
@@ -59,6 +59,11 @@ mutable struct BFGSState{Tx,Tm,T,G} <: AbstractOptimizerState
     u::Tx
     invH::Tm
     s::Tx
+    # Trial iterate produced by update_state! / update_fgh!. Committed to
+    # state.x / state.g_x / state.f_x by accept_step! once validated.
+    x_candidate::Tx
+    g_candidate::G
+    f_candidate::T
     @add_linesearch_fields()
 end
 
@@ -137,6 +142,9 @@ function initial_state(method::BFGS, ::Options, d, x0::AbstractArray)
         fill!(similar(x0), NaN), # Buffer stored in state.u
         invH0, # Store current invH in state.invH
         fill!(similar(x0), NaN), # Store current search direction in state.s
+        fill!(similar(x0), NaN), # Trial iterate in state.x_candidate
+        fill!(similar(g_x), NaN), # Trial gradient in state.g_candidate
+        oftype(f_x, NaN), # Trial f value in state.f_candidate
         @initial_linesearch()...,
     )
 end
@@ -154,22 +162,35 @@ function update_state!(d, state::BFGSState, method::BFGS)
     # semi-definite
     lssuccess = perform_linesearch!(state, method, ManifoldObjective(method.manifold, d))
 
-    # Update current position
+    # Propose trial iterate (do NOT mutate state.x; accept_step! commits)
     state.dx .= state.alpha .* state.s
-    state.x .= state.x .+ state.dx
-    retract!(method.manifold, state.x)
+    state.x_candidate .= state.x .+ state.dx
+    retract!(method.manifold, state.x_candidate)
 
     return !lssuccess # break on linesearch error
 end
 
 function update_fgh!(d, state::BFGSState, method::BFGS)
+    f_c, g_c = NLSolversBase.value_gradient!(d, state.x_candidate)
+    copyto!(state.g_candidate, g_c)
+    project_tangent!(method.manifold, state.g_candidate, state.x_candidate)
+    state.f_candidate = f_c
+    return nothing
+end
+
+function accept_step!(d, state::BFGSState, method::BFGS, options)
+    if !isfinite(state.f_candidate) ||
+       !all(isfinite, state.g_candidate) ||
+       !all(isfinite, state.x_candidate)
+        return false
+    end
+
+    # Commit candidates.
+    copyto!(state.x, state.x_candidate)
+    copyto!(state.g_x, state.g_candidate)
+    state.f_x = state.f_candidate
+
     (; invH, dx, dg, u) = state
-    
-    # Update function value and gradient
-    f_x, g_x = NLSolversBase.value_gradient!(d, state.x)
-    copyto!(state.g_x, g_x)
-    project_tangent!(method.manifold, state.g_x, state.x)
-    state.f_x = f_x
 
     # Measure the change in the gradient
     dg .= state.g_x .- state.g_x_previous
@@ -203,6 +224,7 @@ function update_fgh!(d, state::BFGSState, method::BFGS)
             mul!(invH, vec(dx), vec(u)', -c2, 1)
         end
     end
+    return true
 end
 
 function trace!(tr, d, state::BFGSState, iteration::Integer, method::BFGS, options::Options, curr_time = time())
 
@@ -103,6 +103,11 @@ mutable struct ConjugateGradientState{Tx,T,G} <: AbstractOptimizerState
     pg::Tx
     s::Tx
     beta::T
+    # Trial iterate produced by update_state! / update_fgh!. Committed to
+    # state.x / state.g_x / state.f_x by accept_step! once validated.
+    x_candidate::Tx
+    g_candidate::G
+    f_candidate::T
     @add_linesearch_fields()
 end
 
@@ -164,34 +169,50 @@ function initial_state(method::ConjugateGradient, ::Options, d, x0)
         pg, # Maintain the preconditioned gradient in pg
         -pg, # Maintain current search direction in state.s
         oftype(f_x, 0), # Store beta in state.beta
+        fill!(similar(x0), NaN), # Trial iterate in state.x_candidate
+        fill!(similar(g_x), NaN), # Trial gradient in state.g_candidate
+        oftype(f_x, NaN), # Trial f value in state.f_candidate
         @initial_linesearch()...,
     )
 end
 
 function update_state!(d, state::ConjugateGradientState, method::ConjugateGradient)
-    # Search direction is predetermined
-
-    # Maintain a record of the previous gradient
-    copyto!(state.g_x_previous, state.g_x)
+    # Search direction state.s is predetermined (set during initial_state or in
+    # accept_step! at the end of the previous iteration).
 
     # Determine the distance of movement along the search line
     lssuccess = perform_linesearch!(state, method, ManifoldObjective(method.manifold, d))
 
-    # Update current position # x = x + alpha * s
-    state.x .= muladd.(state.alpha, state.s, state.x)
-    retract!(method.manifold, state.x)
+    # Propose trial iterate (do NOT mutate state.x; accept_step! commits)
+    state.x_candidate .= muladd.(state.alpha, state.s, state.x)
+    retract!(method.manifold, state.x_candidate)
+
+    return !lssuccess # break on linesearch error
+end
+
+function update_fgh!(d, state::ConjugateGradientState, method::ConjugateGradient)
+    f_c, g_c = NLSolversBase.value_gradient!(d, state.x_candidate)
+    copyto!(state.g_candidate, g_c)
+    project_tangent!(method.manifold, state.g_candidate, state.x_candidate)
+    state.f_candidate = f_c
+    return nothing
+end
 
-    # Update the function value and gradient
-    f_x, g_x = NLSolversBase.value_gradient!(d, state.x)
-    copyto!(state.g_x, g_x)
-    project_tangent!(method.manifold, state.g_x, state.x)
-    state.f_x = f_x
+function accept_step!(d, state::ConjugateGradientState, method::ConjugateGradient, options)
+    if !isfinite(state.f_candidate) ||
+       !all(isfinite, state.g_candidate) ||
+       !all(isfinite, state.x_candidate)
+        return false
+    end
 
-    # Check sanity of function and gradient
-    isfinite(f_x) || error(LazyString("Non-finite f(x) while optimizing (", f_x, ")"))
+    # Commit candidates. state.g_x_previous was already captured by perform_linesearch!
+    # before the step was proposed.
+    copyto!(state.x, state.x_candidate)
+    copyto!(state.g_x, state.g_candidate)
+    state.f_x = state.f_candidate
 
-    # Determine the next search direction using HZ's CG rule
-    #  Calculate the beta factor (HZ2013)
+    # Determine the next search direction using HZ's CG rule.
+    # Calculate the beta factor (HZ2013).
     # -----------------
     # Comment on py: one could replace the computation of py with
     #    ydotpgprev = dot(y, pg)
@@ -207,7 +228,7 @@ function update_state!(d, state::ConjugateGradientState, method::ConjugateGradie
     ydots = real(dot(state.y, state.s))
     copyto!(state.py, state.pg)        # below, store pg - pg_previous in py
     # P already updated in _apply_precondprep above
-    __precondition!(state.pg, method.P, g_x)
+    __precondition!(state.pg, method.P, state.g_x)
 
     state.py .= state.pg .- state.py
     # ydots may be zero if f is not strongly convex or the line search does not satisfy Wolfe
@@ -221,12 +242,9 @@ function update_state!(d, state::ConjugateGradientState, method::ConjugateGradie
     state.beta = beta
     state.s .= beta .* state.s .- state.pg
     project_tangent!(method.manifold, state.s, state.x)
-    return !lssuccess # break on linesearch error
+    return true
 end
 
-# Function value, gradient and Hessian are already updated in `update_state!`
-update_fgh!(d, state, ::ConjugateGradient) = nothing
-
 function trace!(
     tr,
     d,
 
@@ -46,6 +46,11 @@ mutable struct GradientDescentState{Tx,Tg,T} <: AbstractOptimizerState
     x_previous::Tx
     f_x_previous::T
     s::Tx
+    # Trial iterate produced by update_state! / update_fgh!. Committed to
+    # state.x / state.g_x / state.f_x by accept_step! once validated.
+    x_candidate::Tx
+    g_candidate::Tg
+    f_candidate::T
     @add_linesearch_fields()
 end
 
@@ -85,6 +90,9 @@ function initial_state(
         fill!(similar(x0), NaN), # Maintain previous state in state.x_previous
         oftype(f_x, NaN), # Store previous f in state.f_x_previous
         fill!(similar(x0), NaN), # Maintain current search direction in state.s
+        fill!(similar(x0), NaN), # Trial iterate in state.x_candidate
+        fill!(similar(g_x), NaN), # Trial gradient in state.g_candidate
+        oftype(f_x, NaN), # Trial f value in state.f_candidate
         @initial_linesearch()...,
     )
 end
@@ -100,13 +108,35 @@ function update_state!(d, state::GradientDescentState{T}, method::GradientDescen
     # Determine the distance of movement along the search line
     lssuccess = perform_linesearch!(state, method, ManifoldObjective(method.manifold, d))
 
-    # Update current position # x = x + alpha * s
-    @. state.x = state.x + state.alpha * state.s
-    retract!(method.manifold, state.x)
+    # Propose trial iterate (do NOT mutate state.x; accept_step! commits)
+    @. state.x_candidate = state.x + state.alpha * state.s
+    retract!(method.manifold, state.x_candidate)
 
     return !lssuccess # break on linesearch error
 end
 
+function update_fgh!(d, state::GradientDescentState, method::GradientDescent)
+    f_c, g_c = NLSolversBase.value_gradient!(d, state.x_candidate)
+    copyto!(state.g_candidate, g_c)
+    project_tangent!(method.manifold, state.g_candidate, state.x_candidate)
+    state.f_candidate = f_c
+    return nothing
+end
+
+function accept_step!(d, state::GradientDescentState, method::GradientDescent, options)
+    if !isfinite(state.f_candidate) ||
+       !all(isfinite, state.g_candidate) ||
+       !all(isfinite, state.x_candidate)
+        return false
+    end
+    # state.x_previous / state.f_x_previous were captured by perform_linesearch!
+    # before the step was proposed, so they already hold the prior accepted values.
+    copyto!(state.x, state.x_candidate)
+    copyto!(state.g_x, state.g_candidate)
+    state.f_x = state.f_candidate
+    return true
+end
+
 function trace!(
     tr,
     d,