Skip to content

Commit d401b99

Browse files
authored
Try to restart the optimization if the line search fails. (#1242)
* Update perform_linesearch.jl * Update Project.toml * Update runtests.jl * Update lsthrow.jl * Update perform_linesearch.jl * Update perform_linesearch.jl * try to re-instate som skips * Update perform_linesearch.jl * reinstate two failures * Update bfgs.jl * Update l_bfgs.jl * defer acceptance * Update ngmres.jl * Update manifolds.jl * Update manifolds.jl * Update ngmres.jl * Update ngmres.jl * Update ngmres.jl * ngmres * try to run all bfgs * update some tests * Update l_bfgs.jl * Update l_bfgs.jl * Update l_bfgs.jl * default to defaults... * try more cg tests * Update cg.jl * adjust tests * Update perform_linesearch.jl * Update Project.toml
1 parent 420985c commit d401b99

21 files changed

Lines changed: 439 additions & 137 deletions

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ ExplicitImports = "1.13.2"
3030
FillArrays = "0.6.2, 0.7, 0.8, 0.9, 0.10, 0.11, 0.12, 0.13, 1"
3131
ForwardDiff = "0.10, 1"
3232
JET = "0.9, 0.10, 0.11"
33-
LineSearches = "7.6.2"
33+
LineSearches = "7.7"
3434
LinearAlgebra = "<0.0.1, 1.6"
3535
MathOptInterface = "1.17"
3636
Measurements = "2.14.1"

docs/src/user/config.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ In addition to the solver, you can alter the behavior of the Optim package by us
4747
* `f_calls_limit`: A soft upper limit on the number of objective calls. Defaults to `0` (unlimited).
4848
* `g_calls_limit`: A soft upper limit on the number of gradient calls. Defaults to `0` (unlimited).
4949
* `h_calls_limit`: A soft upper limit on the number of Hessian calls. Defaults to `0` (unlimited).
50-
* `allow_f_increases`: Allow steps that increase the objective value. Defaults to `true`. Note that, when this setting is `true`, the last iterate will be returned as the minimizer even if the objective increased.
50+
* `allow_f_increases`: Allow steps that increase the objective value. Defaults to `true`. Note that, when this setting is `true`, the last iterate will be returned as the minimizer even if the objective increased. This is useful in some methods such as line search based methods with HagerZhang() that allows for a slight increase in the objective function as long as the directional derivative decreases.
5151
* `successive_f_tol`: Determines the number of times the objective is allowed to increase across iterations. Defaults to 1.
5252
* `iterations`: How many iterations will run before the algorithm gives up? Defaults to `1_000`.
5353
* `time_limit`: A soft upper limit on the total run time. Defaults to `NaN` (unlimited).

src/multivariate/optimize/optimize.jl

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,15 @@ end
2727

2828
after_while!(d, state, method, options) = nothing
2929

30+
# Validate and commit the trial iterate produced by update_state! / update_fgh!.
31+
# Default: no-op (un-migrated solvers mutate state.x/f_x/g_x directly and have
32+
# nothing to accept). Migrated solvers override this to:
33+
# - check finiteness of state.f_candidate / g_candidate / x_candidate
34+
# - on success, commit candidate -> state.x / state.f_x / state.g_x and return true
35+
# - on failure, leave state.x / state.f_x / state.g_x at the last accepted iterate
36+
# and return false (the main loop then breaks).
37+
accept_step!(d, state, method, options) = true
38+
3039
function initial_convergence(state::AbstractOptimizerState, options::Options)
3140
stopped = !isfinite(state.f_x) || any(!isfinite, state.g_x)
3241
return g_residual(state) <= options.g_abstol, stopped
@@ -85,6 +94,14 @@ function optimize(
8594
# TODO: Already perform in `update_state!`?
8695
update_fgh!(d, state, method)
8796

97+
# Validate the trial iterate and commit candidate -> state for migrated
98+
# solvers. No-op for solvers that mutate state.x/f_x/g_x directly.
99+
if !accept_step!(d, state, method, options)
100+
options.show_warnings && @warn "Terminated early: trial iterate had non-finite values."
101+
ls_success = false
102+
break
103+
end
104+
88105
# Check convergence
89106
x_converged, f_converged, g_converged, f_increased =
90107
assess_convergence(state, d, options)

src/multivariate/solvers/first_order/accelerated_gradient_descent.jl

Lines changed: 52 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ mutable struct AcceleratedGradientDescentState{T,Tx,Tg} <: AbstractOptimizerStat
3232
y::Tx
3333
y_previous::Tx
3434
s::Tx
35+
# Trial iterates produced by update_state! / update_fgh!. Committed to
36+
# state.x / state.g_x / state.f_x / state.y by accept_step! once validated.
37+
x_candidate::Tx
38+
y_candidate::Tx
39+
g_candidate::Tg
40+
f_candidate::T
3541
@add_linesearch_fields()
3642
end
3743

@@ -57,6 +63,10 @@ function initial_state(
5763
copy(x0), # Maintain intermediary current state in state.y
5864
fill!(similar(x0), NaN), # Maintain intermediary state in state.y_previous
5965
fill!(similar(x0), NaN), # Maintain current search direction in state.s
66+
fill!(similar(x0), NaN), # Trial iterate in state.x_candidate
67+
fill!(similar(x0), NaN), # Trial y iterate in state.y_candidate
68+
fill!(similar(g_x), NaN), # Trial gradient in state.g_candidate
69+
oftype(f_x, NaN), # Trial f value in state.f_candidate
6070
@initial_linesearch()...,
6171
)
6272
end
@@ -66,27 +76,59 @@ function update_state!(
6676
state::AcceleratedGradientDescentState,
6777
method::AcceleratedGradientDescent,
6878
)
69-
state.iteration += 1
70-
7179
# Search direction is always the negative gradient
7280
state.s .= .-state.g_x
7381

7482
# Determine the distance of movement along the search line
7583
lssuccess = perform_linesearch!(state, method, ManifoldObjective(method.manifold, d))
7684

77-
# Make one move in the direction of the gradient
78-
copyto!(state.y_previous, state.y)
79-
state.y .= state.x .+ state.alpha .* state.s
80-
retract!(method.manifold, state.y)
85+
# Propose trial intermediary y (do NOT mutate state.y; accept_step! commits)
86+
state.y_candidate .= state.x .+ state.alpha .* state.s
87+
retract!(method.manifold, state.y_candidate)
8188

82-
# Update current position with Nesterov correction
83-
scaling = (state.iteration - 1) / (state.iteration + 2)
84-
state.x .= state.y .+ scaling .* (state.y .- state.y_previous)
85-
retract!(method.manifold, state.x)
89+
# Propose trial position with Nesterov correction. iteration is incremented
90+
# on accept so the scaling here uses the would-be next iteration index.
91+
next_iteration = state.iteration + 1
92+
scaling = (next_iteration - 1) / (next_iteration + 2)
93+
state.x_candidate .= state.y_candidate .+ scaling .* (state.y_candidate .- state.y)
94+
retract!(method.manifold, state.x_candidate)
8695

8796
return !lssuccess # break on linesearch error
8897
end
8998

99+
function update_fgh!(
100+
d,
101+
state::AcceleratedGradientDescentState,
102+
method::AcceleratedGradientDescent,
103+
)
104+
f_c, g_c = NLSolversBase.value_gradient!(d, state.x_candidate)
105+
copyto!(state.g_candidate, g_c)
106+
project_tangent!(method.manifold, state.g_candidate, state.x_candidate)
107+
state.f_candidate = f_c
108+
return nothing
109+
end
110+
111+
function accept_step!(
112+
d,
113+
state::AcceleratedGradientDescentState,
114+
method::AcceleratedGradientDescent,
115+
options,
116+
)
117+
if !isfinite(state.f_candidate) ||
118+
!all(isfinite, state.g_candidate) ||
119+
!all(isfinite, state.x_candidate) ||
120+
!all(isfinite, state.y_candidate)
121+
return false
122+
end
123+
copyto!(state.y_previous, state.y)
124+
copyto!(state.y, state.y_candidate)
125+
copyto!(state.x, state.x_candidate)
126+
copyto!(state.g_x, state.g_candidate)
127+
state.f_x = state.f_candidate
128+
state.iteration += 1
129+
return true
130+
end
131+
90132
function trace!(
91133
tr,
92134
d,

src/multivariate/solvers/first_order/bfgs.jl

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@ mutable struct BFGSState{Tx,Tm,T,G} <: AbstractOptimizerState
5959
u::Tx
6060
invH::Tm
6161
s::Tx
62+
# Trial iterate produced by update_state! / update_fgh!. Committed to
63+
# state.x / state.g_x / state.f_x by accept_step! once validated.
64+
x_candidate::Tx
65+
g_candidate::G
66+
f_candidate::T
6267
@add_linesearch_fields()
6368
end
6469

@@ -137,6 +142,9 @@ function initial_state(method::BFGS, ::Options, d, x0::AbstractArray)
137142
fill!(similar(x0), NaN), # Buffer stored in state.u
138143
invH0, # Store current invH in state.invH
139144
fill!(similar(x0), NaN), # Store current search direction in state.s
145+
fill!(similar(x0), NaN), # Trial iterate in state.x_candidate
146+
fill!(similar(g_x), NaN), # Trial gradient in state.g_candidate
147+
oftype(f_x, NaN), # Trial f value in state.f_candidate
140148
@initial_linesearch()...,
141149
)
142150
end
@@ -154,22 +162,35 @@ function update_state!(d, state::BFGSState, method::BFGS)
154162
# semi-definite
155163
lssuccess = perform_linesearch!(state, method, ManifoldObjective(method.manifold, d))
156164

157-
# Update current position
165+
# Propose trial iterate (do NOT mutate state.x; accept_step! commits)
158166
state.dx .= state.alpha .* state.s
159-
state.x .= state.x .+ state.dx
160-
retract!(method.manifold, state.x)
167+
state.x_candidate .= state.x .+ state.dx
168+
retract!(method.manifold, state.x_candidate)
161169

162170
return !lssuccess # break on linesearch error
163171
end
164172

165173
function update_fgh!(d, state::BFGSState, method::BFGS)
174+
f_c, g_c = NLSolversBase.value_gradient!(d, state.x_candidate)
175+
copyto!(state.g_candidate, g_c)
176+
project_tangent!(method.manifold, state.g_candidate, state.x_candidate)
177+
state.f_candidate = f_c
178+
return nothing
179+
end
180+
181+
function accept_step!(d, state::BFGSState, method::BFGS, options)
182+
if !isfinite(state.f_candidate) ||
183+
!all(isfinite, state.g_candidate) ||
184+
!all(isfinite, state.x_candidate)
185+
return false
186+
end
187+
188+
# Commit candidates.
189+
copyto!(state.x, state.x_candidate)
190+
copyto!(state.g_x, state.g_candidate)
191+
state.f_x = state.f_candidate
192+
166193
(; invH, dx, dg, u) = state
167-
168-
# Update function value and gradient
169-
f_x, g_x = NLSolversBase.value_gradient!(d, state.x)
170-
copyto!(state.g_x, g_x)
171-
project_tangent!(method.manifold, state.g_x, state.x)
172-
state.f_x = f_x
173194

174195
# Measure the change in the gradient
175196
dg .= state.g_x .- state.g_x_previous
@@ -203,6 +224,7 @@ function update_fgh!(d, state::BFGSState, method::BFGS)
203224
mul!(invH, vec(dx), vec(u)', -c2, 1)
204225
end
205226
end
227+
return true
206228
end
207229

208230
function trace!(tr, d, state::BFGSState, iteration::Integer, method::BFGS, options::Options, curr_time = time())

src/multivariate/solvers/first_order/cg.jl

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,11 @@ mutable struct ConjugateGradientState{Tx,T,G} <: AbstractOptimizerState
103103
pg::Tx
104104
s::Tx
105105
beta::T
106+
# Trial iterate produced by update_state! / update_fgh!. Committed to
107+
# state.x / state.g_x / state.f_x by accept_step! once validated.
108+
x_candidate::Tx
109+
g_candidate::G
110+
f_candidate::T
106111
@add_linesearch_fields()
107112
end
108113

@@ -164,34 +169,50 @@ function initial_state(method::ConjugateGradient, ::Options, d, x0)
164169
pg, # Maintain the preconditioned gradient in pg
165170
-pg, # Maintain current search direction in state.s
166171
oftype(f_x, 0), # Store beta in state.beta
172+
fill!(similar(x0), NaN), # Trial iterate in state.x_candidate
173+
fill!(similar(g_x), NaN), # Trial gradient in state.g_candidate
174+
oftype(f_x, NaN), # Trial f value in state.f_candidate
167175
@initial_linesearch()...,
168176
)
169177
end
170178

171179
function update_state!(d, state::ConjugateGradientState, method::ConjugateGradient)
172-
# Search direction is predetermined
173-
174-
# Maintain a record of the previous gradient
175-
copyto!(state.g_x_previous, state.g_x)
180+
# Search direction state.s is predetermined (set during initial_state or in
181+
# accept_step! at the end of the previous iteration).
176182

177183
# Determine the distance of movement along the search line
178184
lssuccess = perform_linesearch!(state, method, ManifoldObjective(method.manifold, d))
179185

180-
# Update current position # x = x + alpha * s
181-
state.x .= muladd.(state.alpha, state.s, state.x)
182-
retract!(method.manifold, state.x)
186+
# Propose trial iterate (do NOT mutate state.x; accept_step! commits)
187+
state.x_candidate .= muladd.(state.alpha, state.s, state.x)
188+
retract!(method.manifold, state.x_candidate)
189+
190+
return !lssuccess # break on linesearch error
191+
end
192+
193+
function update_fgh!(d, state::ConjugateGradientState, method::ConjugateGradient)
194+
f_c, g_c = NLSolversBase.value_gradient!(d, state.x_candidate)
195+
copyto!(state.g_candidate, g_c)
196+
project_tangent!(method.manifold, state.g_candidate, state.x_candidate)
197+
state.f_candidate = f_c
198+
return nothing
199+
end
183200

184-
# Update the function value and gradient
185-
f_x, g_x = NLSolversBase.value_gradient!(d, state.x)
186-
copyto!(state.g_x, g_x)
187-
project_tangent!(method.manifold, state.g_x, state.x)
188-
state.f_x = f_x
201+
function accept_step!(d, state::ConjugateGradientState, method::ConjugateGradient, options)
202+
if !isfinite(state.f_candidate) ||
203+
!all(isfinite, state.g_candidate) ||
204+
!all(isfinite, state.x_candidate)
205+
return false
206+
end
189207

190-
# Check sanity of function and gradient
191-
isfinite(f_x) || error(LazyString("Non-finite f(x) while optimizing (", f_x, ")"))
208+
# Commit candidates. state.g_x_previous was already captured by perform_linesearch!
209+
# before the step was proposed.
210+
copyto!(state.x, state.x_candidate)
211+
copyto!(state.g_x, state.g_candidate)
212+
state.f_x = state.f_candidate
192213

193-
# Determine the next search direction using HZ's CG rule
194-
# Calculate the beta factor (HZ2013)
214+
# Determine the next search direction using HZ's CG rule.
215+
# Calculate the beta factor (HZ2013).
195216
# -----------------
196217
# Comment on py: one could replace the computation of py with
197218
# ydotpgprev = dot(y, pg)
@@ -207,7 +228,7 @@ function update_state!(d, state::ConjugateGradientState, method::ConjugateGradie
207228
ydots = real(dot(state.y, state.s))
208229
copyto!(state.py, state.pg) # below, store pg - pg_previous in py
209230
# P already updated in _apply_precondprep above
210-
__precondition!(state.pg, method.P, g_x)
231+
__precondition!(state.pg, method.P, state.g_x)
211232

212233
state.py .= state.pg .- state.py
213234
# ydots may be zero if f is not strongly convex or the line search does not satisfy Wolfe
@@ -221,12 +242,9 @@ function update_state!(d, state::ConjugateGradientState, method::ConjugateGradie
221242
state.beta = beta
222243
state.s .= beta .* state.s .- state.pg
223244
project_tangent!(method.manifold, state.s, state.x)
224-
return !lssuccess # break on linesearch error
245+
return true
225246
end
226247

227-
# Function value, gradient and Hessian are already updated in `update_state!`
228-
update_fgh!(d, state, ::ConjugateGradient) = nothing
229-
230248
function trace!(
231249
tr,
232250
d,

src/multivariate/solvers/first_order/gradient_descent.jl

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@ mutable struct GradientDescentState{Tx,Tg,T} <: AbstractOptimizerState
4646
x_previous::Tx
4747
f_x_previous::T
4848
s::Tx
49+
# Trial iterate produced by update_state! / update_fgh!. Committed to
50+
# state.x / state.g_x / state.f_x by accept_step! once validated.
51+
x_candidate::Tx
52+
g_candidate::Tg
53+
f_candidate::T
4954
@add_linesearch_fields()
5055
end
5156

@@ -85,6 +90,9 @@ function initial_state(
8590
fill!(similar(x0), NaN), # Maintain previous state in state.x_previous
8691
oftype(f_x, NaN), # Store previous f in state.f_x_previous
8792
fill!(similar(x0), NaN), # Maintain current search direction in state.s
93+
fill!(similar(x0), NaN), # Trial iterate in state.x_candidate
94+
fill!(similar(g_x), NaN), # Trial gradient in state.g_candidate
95+
oftype(f_x, NaN), # Trial f value in state.f_candidate
8896
@initial_linesearch()...,
8997
)
9098
end
@@ -100,13 +108,35 @@ function update_state!(d, state::GradientDescentState{T}, method::GradientDescen
100108
# Determine the distance of movement along the search line
101109
lssuccess = perform_linesearch!(state, method, ManifoldObjective(method.manifold, d))
102110

103-
# Update current position # x = x + alpha * s
104-
@. state.x = state.x + state.alpha * state.s
105-
retract!(method.manifold, state.x)
111+
# Propose trial iterate (do NOT mutate state.x; accept_step! commits)
112+
@. state.x_candidate = state.x + state.alpha * state.s
113+
retract!(method.manifold, state.x_candidate)
106114

107115
return !lssuccess # break on linesearch error
108116
end
109117

118+
function update_fgh!(d, state::GradientDescentState, method::GradientDescent)
119+
f_c, g_c = NLSolversBase.value_gradient!(d, state.x_candidate)
120+
copyto!(state.g_candidate, g_c)
121+
project_tangent!(method.manifold, state.g_candidate, state.x_candidate)
122+
state.f_candidate = f_c
123+
return nothing
124+
end
125+
126+
function accept_step!(d, state::GradientDescentState, method::GradientDescent, options)
127+
if !isfinite(state.f_candidate) ||
128+
!all(isfinite, state.g_candidate) ||
129+
!all(isfinite, state.x_candidate)
130+
return false
131+
end
132+
# state.x_previous / state.f_x_previous were captured by perform_linesearch!
133+
# before the step was proposed, so they already hold the prior accepted values.
134+
copyto!(state.x, state.x_candidate)
135+
copyto!(state.g_x, state.g_candidate)
136+
state.f_x = state.f_candidate
137+
return true
138+
end
139+
110140
function trace!(
111141
tr,
112142
d,

0 commit comments

Comments
 (0)