Skip to content

Commit

Permalink
RFC: more generic convergence assessment (#530)
Browse files Browse the repository at this point in the history
* more generic convergence assessment

* fix initial convergence

* added tests for gradient_convergence_assessment and initial_convergence
  • Loading branch information
jonathanBieler authored and pkofod committed Feb 15, 2018
1 parent ede177e commit 0d304a1
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 18 deletions.
17 changes: 7 additions & 10 deletions src/multivariate/optimize/optimize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ update_h!(d, state, method::SecondOrderOptimizer) = hessian!(d, state.x)

after_while!(d, state, method, options) = nothing

function initial_convergence(d, state, method::AbstractOptimizer, initial_x, options)
gradient!(d, initial_x)
vecnorm(gradient(d), Inf) < options.g_tol
end
initial_convergence(d, state, method::ZerothOrderOptimizer, initial_x, options) = false

function optimize(d::D, initial_x::AbstractArray{Tx, N}, method::M,
options::Options = Options(;default_options(method)...),
state = initial_state(method, options, d, complex_to_real(d, initial_x))) where {D<:AbstractObjective, M<:AbstractOptimizer, Tx, N}
Expand All @@ -31,16 +37,7 @@ function optimize(d::D, initial_x::AbstractArray{Tx, N}, method::M,
f_limit_reached, g_limit_reached, h_limit_reached = false, false, false
x_converged, f_converged, f_increased = false, false, false

g_converged = if typeof(method) <: NelderMead
nmobjective(state.f_simplex, state.m, n) < options.g_tol
elseif typeof(method) <: ParticleSwarm || typeof(method) <: SimulatedAnnealing
# TODO: remove KrylovTrustRegion when TwiceDifferentiableHV is in NLSolversBase
false
else
gradient!(d, initial_x)
vecnorm(gradient(d), Inf) < options.g_tol
end

g_converged = initial_convergence(d, state, method, initial_x, options)
converged = g_converged

# prepare iteration counter (used to make "initial state" trace entry)
Expand Down
4 changes: 4 additions & 0 deletions src/multivariate/solvers/zeroth_order/nelder_mead.jl
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,10 @@ function assess_convergence(state::NelderMeadState, d, options)
return false, false, g_converged, g_converged, false
end

function initial_convergence(d, state::NelderMeadState, method::NelderMead, initial_x, options)
nmobjective(state.f_simplex, state.m, length(initial_x)) < options.g_tol
end

function trace!(tr, d, state, iteration, method::NelderMead, options)
dt = Dict()
if options.extended_trace
Expand Down
10 changes: 6 additions & 4 deletions src/utilities/assess_convergence.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,13 @@ function default_convergence_assessment(state::AbstractOptimizerState, d, option
f_increased = true
end

if g_residual(gradient(d)) options.g_tol
g_converged = true
end

g_converged = gradient_convergence_assessment(state,d,options)

converged = x_converged || f_converged || g_converged

return x_converged, f_converged, g_converged, converged, f_increased
end

gradient_convergence_assessment(state::AbstractOptimizerState, d, options) = g_residual(gradient(d)) options.g_tol
gradient_convergence_assessment(state::ZerothOrderState, d, options) = false

45 changes: 41 additions & 4 deletions test/general/convergence.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
mutable struct DummyState
mutable struct DummyState <: Optim.AbstractOptimizerState
x
x_previous
f_x
f_x_previous
g
end

mutable struct DummyStateZeroth <: Optim.ZerothOrderState
x
x_previous
f_x
Expand All @@ -12,7 +20,13 @@ mutable struct DummyOptions
g_tol
end

@testset "assess_convergence" begin
mutable struct DummyMethod <: Optim.AbstractOptimizer end
mutable struct DummyMethodZeroth <: Optim.ZerothOrderOptimizer end

@testset "Convergence assessment" begin

## assess_convergence

# should converge
x0, x1 = [1.], [1.0 - 1e-7]
f0, f1 = 1.0, 1.0 - 1e-7
Expand All @@ -28,8 +42,6 @@ end
f_tol = 1e-12
@test Optim.assess_convergence(x1, x0, f1, f0, g, x_tol, f_tol, g_tol) == (true, false, true, true, true)

ds = DummyState(x1, x0, f1, f0, g)
dOpt = DummyOptions(x_tol, f_tol, g_tol)
@test Optim.assess_convergence(x1, x0, f1, f0, g, x_tol, f_tol, g_tol) == (true, false, true, true, true)

f_tol = 1e-6 # rel tol
Expand All @@ -40,5 +52,30 @@ end
dOpt = DummyOptions(x_tol, f_tol, g_tol)
@test Optim.assess_convergence(x1, x0, f1, f0, g, x_tol, f_tol, g_tol) == (true, true, true, true, false)

## initial_convergence and gradient_convergence_assessment

ds = DummyState(x1, x0, f1, f0, g)
dOpt = DummyOptions(x_tol, f_tol, g_tol)
dm = DummyMethod()

# >= First Order
d = Optim.OnceDifferentiable(x->sum(abs2.(x)),zeros(2))

Optim.gradient!(d,ones(2))
@test Optim.gradient_convergence_assessment(ds,d,dOpt) == false
Optim.gradient!(d,zeros(2))
@test Optim.gradient_convergence_assessment(ds,d,dOpt) == true

@test Optim.initial_convergence(d, ds, dm, ones(2), dOpt) == false
@test Optim.initial_convergence(d, ds, dm, zeros(2), dOpt) == true

# Zeroth order methods have no gradient -> returns false by default
ds = DummyStateZeroth(x1, x0, f1, f0, g)
dm = DummyMethodZeroth()

@test Optim.gradient_convergence_assessment(ds,d,dOpt) == false
@test Optim.initial_convergence(d, ds, dm, ones(2), dOpt) == false

# should check all other methods as well

end

0 comments on commit 0d304a1

Please sign in to comment.