Skip to content

Commit

Permalink
[WIP] Fix HagerZhang bugs (#136)
Browse files Browse the repository at this point in the history
* fixes bugs and paper discrepancies in HagerZhang linesearch

* Initial HagerZhang fixes

* fix tests
  • Loading branch information
mohamed82008 authored and pkofod committed Nov 6, 2018
1 parent 1f40bf3 commit fae0768
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 83 deletions.
53 changes: 27 additions & 26 deletions src/hagerzhang.jl
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,10 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
if !(isfinite(phi_0) && isfinite(dphi_0))
throw(ArgumentError("Value and slope at step length = 0 must be finite."))
end
if dphi_0 >= zeroT
if dphi_0 >= eps(T) * abs(phi_0)
throw(ArgumentError("Search direction is not a direction of descent."))
elseif dphi_0 >= 0
return zeroT, phi_0
end

# Prevent values of x_new = x+αs that are likely to make
Expand All @@ -132,7 +134,8 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,


phi_lim = phi_0 + epsilon * abs(phi_0)
@assert c > zeroT
@assert c >= 0
c <= eps(T) && return zeroT, phi_0
@assert isfinite(c) && c <= alphamax
phi_c, dphi_c = ϕdϕ(c)
iterfinite = 1
Expand All @@ -145,7 +148,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
if !(isfinite(phi_c) && isfinite(dphi_c))
@warn("Failed to achieve finite new evaluation point, using alpha=0")
mayterminate[] = false # reset in case another initial guess is used next
return zeroT, ϕ(zeroT) # phi_0
return zeroT, phi_0
end
push!(alphas, c)
push!(values, phi_c)
Expand Down Expand Up @@ -191,32 +194,40 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
# The value is higher, but the slope is downward, so we must
# have crested over the peak. Use bisection.
ib = length(alphas)
ia = ib - 1
ia = 1
if c alphas[ib] || slopes[ib] >= zeroT
error("c = ", c)
end
# ia, ib = bisect(phi, lsr, ia, ib, phi_lim) # TODO: Pass options
ia, ib = bisect!(ϕdϕ, alphas, values, slopes, ia, ib, phi_lim, display)
isbracketed = true
else
# We'll still going downhill, expand the interval and try again
# We'll still going downhill, expand the interval and try again.
# Reaching this branch means that dphi_c < 0 and phi_c <= phi_0 + ϵ_k
# So cold = c has a lower objective than phi_0 up to epsilon.
# This makes it a viable step to return if bracketing fails.

# Bracketing can fail if no cold < c <= alphamax can be found with finite phi_c and dphi_c.
# Going back to the loop with c = cold will only result in infinite cycling.
# So returning (cold, phi_cold) and exiting the line search is the best move.
cold = c
phi_cold = phi_c
if nextfloat(cold) >= alphamax
mayterminate[] = false # reset in case another initial guess is used next
return cold, phi_cold
end
c *= rho
if c > alphamax
c = (alphamax + cold)/2
c = alphamax
if display & BRACKET > 0
println("bracket: exceeding alphamax, bisecting: alphamax = ", alphamax,
", cold = ", cold, ", new c = ", c)
end
if c == cold || nextfloat(c) >= alphamax
mayterminate[] = false # reset in case another initial guess is used next
return cold, phi_c
println("bracket: exceeding alphamax, using c = alphamax = ", alphamax,
", cold = ", cold)
end
end
phi_c, dphi_c = ϕdϕ(c)
iterfinite = 1
while !(isfinite(phi_c) && isfinite(dphi_c)) && c > nextfloat(cold) && iterfinite < iterfinitemax
alphamax = c
alphamax = c # shrinks alphamax, assumes that steps >= c can never have finite phi_c and dphi_c
iterfinite += 1
if display & BRACKET > 0
println("bracket: non-finite value, bisection")
Expand All @@ -225,24 +236,14 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
phi_c, dphi_c = ϕdϕ(c)
end
if !(isfinite(phi_c) && isfinite(dphi_c))
mayterminate[] = false # reset in case another initial guess is used next
return cold, ϕ(cold)
elseif dphi_c < zeroT && c == alphamax
# We're on the edge of the allowed region, and the
# value is still decreasing. This can be due to
# roundoff error in barrier penalties, a barrier
# coefficient being so small that being eps() away
# from it still doesn't turn the slope upward, or
# mistakes in the user's function.
if iterfinite >= iterfinitemax
if display & BRACKET > 0
println("Warning: failed to expand interval to bracket with finite values. If this happens frequently, check your function and gradient.")
println("c = ", c,
", alphamax = ", alphamax,
", phi_c = ", phi_c,
", dphi_c = ", dphi_c)
end
mayterminate[] = false # reset in case another initial guess is used next
return c, phi_c
return cold, phi_cold
end
push!(alphas, c)
push!(values, phi_c)
Expand Down Expand Up @@ -394,7 +395,7 @@ function secant2!(ϕdϕ,
# we updated a, do it for b too
c = secant(alphas, values, slopes, ia, iA)
end
if a <= c <= b
if (iA == ic || iB == ic) && a <= c <= b
if display & SECANT2 > 0
println("secant2: second c = ", c)
end
Expand Down
120 changes: 64 additions & 56 deletions src/initialguess.jl
Original file line number Diff line number Diff line change
Expand Up @@ -164,13 +164,14 @@ If α0 is NaN, then procedure I0 is called at the first iteration,
otherwise, we select according to procedure I1-2, with starting value α0.
"""
@with_kw struct InitialHagerZhang{T}
ψ0::T = 0.01
ψ1::T = 0.2
ψ2::T = 2.0
ψ3::T = 0.1
αmax::T = Inf
α0::T = 1.0 # Initial alpha guess. NaN => algorithm calculates
verbose::Bool = false
ψ0::T = 0.01
ψ1::T = 0.2
ψ2::T = 2.0
ψ3::T = 0.1
αmax::T = Inf
α0::T = 1.0 # Initial alpha guess. NaN => algorithm calculates
quadstep::Bool = true
verbose::Bool = false
end

function (is::InitialHagerZhang)(ls::Tls, state, phi_0, dphi_0, df) where Tls
Expand All @@ -181,6 +182,7 @@ function (is::InitialHagerZhang)(ls::Tls, state, phi_0, dphi_0, df) where Tls
# pick the initial step size according to HZ #I0
state.alpha = _hzI0(state.x, NLSolversBase.gradient(df),
NLSolversBase.value(df),
is.αmax,
convert(eltype(state.x), is.ψ0)) # Hack to deal with type instability between is{T} and state.x
if Tls <: HagerZhang
ls.mayterminate[] = false
Expand All @@ -194,7 +196,7 @@ function (is::InitialHagerZhang)(ls::Tls, state, phi_0, dphi_0, df) where Tls
end
T = eltype(state.alpha)
state.alpha = _hzI12(state.alpha, df, state.x, state.s, state.x_ls, phi_0, dphi_0,
is.ψ1, is.ψ2, is.ψ3, convert(T, is.αmax), is.verbose, mayterminate)
is.ψ1, is.ψ2, is.ψ3, T(is.αmax), is.verbose, is.quadstep, mayterminate)
end
return state.alpha
end
Expand All @@ -212,6 +214,7 @@ function _hzI12(alpha::T,
psi3::Real,
alphamax::T,
verbose::Bool,
quadstep::Bool,
mayterminate) where {Tx,T}
ϕ = make_ϕ(df, x_new, x, s)

Expand All @@ -221,65 +224,55 @@ function _hzI12(alpha::T,

alphatest = psi1 * alpha
alphatest = min(alphatest, alphamax)

alphatest == 0 && return alphatest
phitest = ϕ(alphatest)
alphatest, phitest = get_finite(alphatest, phitest, ϕ, psi3, iterfinitemax, phi_0, mayterminate)
alphatest == 0 && return alphatest

iterfinite = 1
while !isfinite(phitest)
if iterfinite >= iterfinitemax
mayterminate[] = true
return convert(T, 0)
# TODO: Throw error / LineSearchException instead?
# error("Failed to achieve finite test value; alphatest = ", alphatest)
end

alphatest = psi3 * alphatest
phitest = ϕ(alphatest)
iterfinite += 1
end
a = ((phitest-phi_0)/alphatest - dphi_0)/alphatest # quadratic fit

if verbose == true
println("quadfit: alphatest = ", alphatest,
", phi_0 = ", phi_0,
", dphi_0 = ", dphi_0,
", phitest = ", phitest,
", quadcoef = ", a)
end
mayterminate[] = false
if isfinite(a) && a > 0 && phitest <= phi_0
alpha = -dphi_0 / 2 / a # if convex, choose minimum of quadratic
if alpha == 0
error("alpha is zero. dphi_0 = ", dphi_0, ", phi_0 = ", phi_0, ", phitest = ", phitest, ", alphatest = ", alphatest, ", a = ", a)
end
if alpha <= alphamax
mayterminate[] = true
else
alpha = alphamax
mayterminate[] = false
end
mayterminate[] = quadstep_success = false
if quadstep
a = ((phitest - phi_0) / alphatest - dphi_0) / alphatest # quadratic fit
if verbose == true
println("alpha guess (quadratic): ", alpha,
",(mayterminate = ", mayterminate, ")")
println("quadfit: alphatest = ", alphatest,
", phi_0 = ", phi_0,
", dphi_0 = ", dphi_0,
", phitest = ", phitest,
", quadcoef = ", a)
end
else
if phitest > phi_0
alpha = alphatest
else
alpha *= psi2 # if not convex, expand the interval
if isfinite(a) && a > 0 && phitest <= phi_0
alphatest2 = -dphi_0 / 2 / a # if convex, choose minimum of quadratic
alphatest2 = min(alphatest2, alphamax)
phitest2 = ϕ(alphatest2)
if isfinite(phitest2)
mayterminate[] = quadstep_success = true
alphatest = alphatest2
phitest = phitest2
if verbose == true
println("alpha guess (quadratic): ", alphatest,
",(mayterminate = ", mayterminate[], ")")
end
end
end
end
alpha = min(alphamax, alpha)
if verbose == true
println("alpha guess (expand): ", alpha)
if (!quadstep || !quadstep_success) && phitest <= phi_0
# If no quadstep or it fails, expand the interval.
# While the phitest <= phi_0 condition was not in the paper, it gives a significant boost to the speed. The rationale behind it is that since the slope at alpha = 0 is negative, if phitest > phi_0 then a local minimum must be between alpha = 0 and alpha = alphatest, so alpha_test is good enough to return.
alphatest = psi2 * alpha
alphatest = min(alphatest, alphamax)
phitest = ϕ(alphatest)
alphatest, phitest = get_finite(alphatest, phitest, ϕ, psi3, iterfinitemax, phi_0, mayterminate)
if verbose == true
println("alpha guess (expand): ", alphatest)
end
end
return alpha
return alphatest
end

# Generate initial guess for step size (HZ, stage I0)
function _hzI0(x::AbstractArray{Tx},
gr::AbstractArray{Tx},
f_x::T,
alphamax::T,
psi0::T = convert(T, 1)/100) where {Tx,T}
zeroT = convert(T, 0)
alpha = convert(T, 1)
Expand All @@ -289,8 +282,23 @@ function _hzI0(x::AbstractArray{Tx},
if x_max != zeroT
alpha = psi0 * x_max / gr_max
elseif f_x != zeroT
alpha = psi0 * abs(f_x) / norm(gr)
alpha = psi0 * abs(f_x) / norm(gr)^2
end
end
return alpha
return min(alpha, alphamax)
end

function get_finite(alpha::T, phi, ϕ, factor, itermax, phi_0, mayterminate) where {T}
iter = 1
while !isfinite(phi)
if iter >= itermax
mayterminate[] = true
return T(0), phi_0
end

alpha = factor * alpha
phi = ϕ(alpha)
iter += 1
end
return alpha, phi
end
2 changes: 1 addition & 1 deletion test/initial.jl
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@
state.f_x_previous = 2*phi_0
is = InitialQuadratic(snap2one=(0.9,Inf))
is(ls, state, phi_0, dphi_0, df)
@test state.alpha == 0.8200000000000001
@test state.alpha == 1.0
@test ls.mayterminate[] == false

# Test Quadratic snap2one
Expand Down

0 comments on commit fae0768

Please sign in to comment.