From 680edc760617a8e5ad93c9260fe791c9908a9e9d Mon Sep 17 00:00:00 2001 From: timholy Date: Sat, 8 Mar 2014 13:48:39 -0600 Subject: [PATCH 01/28] Add support for constraints and a interior-point methods --- src/Optim.jl | 11 +- src/cg.jl | 21 ++- src/constraints.jl | 121 ++++++++++++++ src/interior.jl | 275 ++++++++++++++++++++++++++++++++ src/linesearch/hz_linesearch.jl | 133 +++++++-------- 5 files changed, 488 insertions(+), 73 deletions(-) create mode 100644 src/constraints.jl create mode 100644 src/interior.jl diff --git a/src/Optim.jl b/src/Optim.jl index 64d10de41..37fa65c25 100644 --- a/src/Optim.jl +++ b/src/Optim.jl @@ -9,12 +9,18 @@ module Optim Base.setindex! export optimize, + interior, + linlsq, DifferentiableFunction, - TwiceDifferentiableFunction + TwiceDifferentiableFunction, + ConstraintsBox # Types include("types.jl") + # Types for constrained optimization + include("constraints.jl") + # Automatic differentiation utilities include("autodiff.jl") @@ -65,6 +71,9 @@ module Optim include("golden_section.jl") include("brent.jl") + # Constrained optimization algorithms + include("interior.jl") + # End-User Facing Wrapper Functions include("optimize.jl") diff --git a/src/cg.jl b/src/cg.jl index fb2834344..b175bc5f4 100644 --- a/src/cg.jl +++ b/src/cg.jl @@ -104,6 +104,8 @@ end function cg{T}(df::Union(DifferentiableFunction, TwiceDifferentiableFunction), initial_x::Array{T}; + constraints::AbstractConstraints = ConstraintsNone(), + interior::Bool = false, xtol::Real = convert(T,1e-32), ftol::Real = convert(T,1e-8), grtol::Real = convert(T,1e-8), @@ -117,7 +119,9 @@ function cg{T}(df::Union(DifferentiableFunction, precondprep::Function = (P, x) -> nothing) # Maintain current state in x and previous state in x_previous - x, x_previous = copy(initial_x), copy(initial_x) + x = copy(initial_x) + project!(x, constraints) + x_previous = copy(x) # Count the total number of iterations iteration = 0 @@ -208,23 +212,28 @@ function cg{T}(df::Union(DifferentiableFunction, @assert typeof(dphi0) == T push!(lsr, zero(T), f_x, dphi0) + alphamax = interior ? toedge(x, s, constraints) : inf(T) + # Pick the initial step size (HZ #I1-I2) alpha, mayterminate, f_update, g_update = - alphatry(alpha, df, x, s, x_ls, gr_ls, lsr) + alphatry(alpha, df, x, s, x_ls, gr_ls, lsr, constraints, alphamax) f_calls, g_calls = f_calls + f_update, g_calls + g_update + if alpha == zero(T) + x_converged = true + break + end + # Determine the distance of movement along the search line alpha, f_update, g_update = - linesearch!(df, x, s, x_ls, gr_ls, lsr, alpha, mayterminate) + linesearch!(df, x, s, x_ls, gr_ls, lsr, alpha, mayterminate, constraints, alphamax) f_calls, g_calls = f_calls + f_update, g_calls + g_update # Maintain a record of previous position copy!(x_previous, x) # Update current position - for i in 1:n - @inbounds x[i] = x[i] + alpha * s[i] - end + step!(x, x, s, alpha, constraints) # Maintain a record of the previous gradient copy!(gr_previous, gr) diff --git a/src/constraints.jl b/src/constraints.jl new file mode 100644 index 000000000..9a4e30504 --- /dev/null +++ b/src/constraints.jl @@ -0,0 +1,121 @@ +abstract AbstractConstraints + +immutable ConstraintsNone <: AbstractConstraints end + +immutable ConstraintsBox{T,N} <: AbstractConstraints + lower::Array{T,N} + upper::Array{T,N} + + function ConstraintsBox(l::AbstractArray{T}, u::AbstractArray{T}) + size(l) == size(u) || error("The sizes of the bounds must match") + for i = 1:length(l) + l[i] <= u[i] || error("The lower bound must be smaller than the upper bound") + end + new(l, u) + end +end +ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::AbstractArray{T,N}) +ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::Nothing) = ConstraintsBox{T,N}(l::AbstractArray{T,N}, infs(T, size(l))) +ConstraintsBox{T,N}(l::Nothing, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(-infs(T,size(u)), u::AbstractArray{T,N}) + +# FIXME: need a way to indicate equality/inequality +immutable ConstraintsL{T,M<:AbstractMatrix,V<:AbstractVector} <: AbstractConstraints + A::M + b::V +end + +# Generic constraints +immutable ConstraintsNL{F<:Union(Function,DifferentiableFunction,TwiceDifferentiableFunction)} <: AbstractConstraints + funcs::Vector{F} +end + +type Constraints + bounds::AbstractConstraints + leq::AbstractConstraints + lineq::AbstractConstraints + nleq::AbstractConstraints + nlineq::AbstractConstraints + mu::Real +end +Constraints() = Constraints(ConstraintsNone(), ConstraintsNone(), ConstraintsNone(), ConstraintsNone(), ConstraintsNone()) + + +## feasible +feasible(x, constraints::ConstraintsNone, ineq::Bool) = true + +function feasible(x, constraints::ConstraintsBox) + l = constraints.lower + u = constraints.upper + for i = 1:length(x) + l[i] <= x[i] <= u[i] || return false + end + true +end + + +## project! +project!(x, constraints::AbstractConstraints) = x + +function project!(x::Array, bounds::ConstraintsBox) + for i = 1:length(x) + x[i] = max(bounds.lower[i], min(bounds.upper[i], x[i])) + end + x +end + +## step! +function step!{T}(xtmp::Array{T}, x, s, alpha::Real, constraints::AbstractConstraints = ConstraintsNone{T}()) + (length(xtmp) == length(x) == length(s)) || error("lengths of xtmp ($(length(xtmp))), x ($(length(x))), and s ($(length(s))) disagree") + for i = 1:length(xtmp) + @inbounds xtmp[i] = x[i] + alpha*s[i] + end + project!(xtmp, constraints) +end + + +## toedge and tocorner +# For bounds constraints, +# - toedge returns the "distance along s" (the value of the coefficient alpha) to reach the boundary +# - tocorner returns the "distance along s" needed to be projected into the "corner" (all constraints are active) +# The first is the farthest you might consider searching in a barrier method. The second is the farthest that it +# makes sense to search in a projection method. +toedge{T}(x::AbstractArray{T}, s, constraints::AbstractConstraints) = inf(T) +tocorner{T}(x::AbstractArray{T}, s, constraints::AbstractConstraints) = inf(T) + +function toedge{T}(x::AbstractArray{T}, s, bounds::ConstraintsBox) + # Stop at the first coordinate to leave the box + alphamax = inf(T) + for i = 1:length(x) + si = s[i] + li = bounds.lower[i] + ui = bounds.upper[i] + if si < 0 && isfinite(li) + alphamax = min(alphamax, (li-x[i])/si) + elseif si > 0 && isfinite(ui) + alphamax = min(alphamax, (ui-x[i])/si) + end + end + convert(T, alphamax) +end + +toedge(x, s, constraints::Constraints) = toedge(x, s, constraints.bounds) + +function tocorner{T}(x::AbstractArray{T}, s, bounds::ConstraintsBox) + # Stop at the last coordinate to leave the box + alphamax = zero(T) + for i = 1:length(x) + si = s[i] + li = bounds.lower[i] + ui = bounds.upper[i] + if si < 0 && isfinite(li) + alphamax = max(alphamax, (li-x[i])/si) + elseif si > 0 && isfinite(ui) + alphamax = max(alphamax, (ui-x[i])/si) + else + return inf(T) + end + end + convert(T, alphamax) +end + +tocorner(x, s, constraints::Constraints) = tocorner(x, s, constraints.bounds) diff --git a/src/interior.jl b/src/interior.jl new file mode 100644 index 000000000..be1835830 --- /dev/null +++ b/src/interior.jl @@ -0,0 +1,275 @@ +############ +## +## Barriers +## +############ +# Note: barrier_g! and barrier_h! simply add to an existing g or H +# (they do not initialize with zeros) + +## Box constraints +function barrier_f{T}(x::Array{T}, bounds::ConstraintsBox) + n = length(x) + phi = zero(T) + l = bounds.lower + u = bounds.upper + for i = 1:n + li, ui = l[i], u[i] + xi = x[i] + if isfinite(li) + xi >= li || return inf(T) + phi += log(xi-li) + end + if isfinite(ui) + xi <= ui || return inf(T) + phi += log(ui-xi) + end + end + -phi +end + +function barrier_g!{T}(x::Array{T}, g::Array{T}, bounds::ConstraintsBox) + n = length(x) + l = bounds.lower + u = bounds.upper + for i = 1:n + li, ui = l[i], u[i] + xi = x[i] + g[i] -= 1/(xi-li) + g[i] += 1/(ui-xi) + end +end + +function barrier_fg!{T}(x::Array{T}, g::Array{T}, bounds::ConstraintsBox) + n = length(x) + phi = zero(T) + l = bounds.lower + u = bounds.upper + for i = 1:n + li, ui = l[i], u[i] + xi = x[i] + if isfinite(li) + xi >= li || return inf(T) # no need to compute additional gradient terms + phi += log(xi-li) + g[i] -= 1/(xi-li) + end + if isfinite(ui) + xi <= ui || return inf(T) + phi += log(ui-xi) + g[i] += 1/(ui-xi) + end + end + -phi +end + +function barrier_h!{T}(x::Vector{T}, H::Matrix{T}, bounds::ConstraintsBox) + n = length(x) + l = bounds.lower + u = bounds.upper + for i = 1:n + li, ui = l[i], u[i] + xi = x[i] + dx = xi-li + H[i,i] += 1/(dx*dx) + dx = ui-xi + H[i,i] += 1/(dx*dx) + end +end + +## Linear inequalities + +## Nonlinear inequalities + + +######################### +## +## Combined cost function +## +######################### +combined_f(x, objective_f::Function, constraints::AbstractConstraints, t) = + t*objective_f(x) + barrier_f(x, constraints) + +function combined_g!(x, g, objective_g!::Function, constraints::AbstractConstraints, t) + objective_g!(x, g) + scale!(g, t) + barrier_fg!(x, g, constraints) +end + +function combined_fg!(x, g, objective_fg!::Function, constraints::AbstractConstraints, t) + f_x = t*objective_fg!(x, g) + scale!(g, t) + f_x += barrier_fg!(x, g, constraints) + f_x +end + +function combined_h!(x, H, objective_h!::Function, constraints::AbstractConstraints, t) + objective_h!(x, H) + scale!(H, t) + barrier_h!(x, H, constraints) +end + +# combined_f(x, objective::Union(DifferentiableFunction,TwiceDifferentiableFunction), constraints::AbstractConstraints, t) = +# combined_f(x, objective.f, constraints, t) +# +# combined_g!(x, g, objective::Union(DifferentiableFunction,TwiceDifferentiableFunction), constraints::AbstractConstraints, t) = +# combined_g!(x, g, objective.g, constraints, t) +# +# combined_fg!(x, g, objective::Union(DifferentiableFunction,TwiceDifferentiableFunction), constraints::AbstractConstraints, t) = +# combined_fg!(x, g, objective.fg!, constraints, t) +# +# combined_h!(x, H, objective::Union(DifferentiableFunction,TwiceDifferentiableFunction), constraints::AbstractConstraints, t) = +# combined_h!(x, H, objective.h!, constraints, t) + + +######################################### +## +## Cost function for linear least squares +## +######################################### + +linlsq(A::Matrix, b::Vector) = + TwiceDifferentiableFunction( x -> linlsq_fg!(x, Array(eltype(x),0), A, b, similar(b)), + (x,g) -> linlsq_fg!(x, g, A, b, similar(b)), + (x,g) -> linlsq_fg!(x, g, A, b, similar(b)), + (x,H) -> linlsq_h!(x, H, A, b)) + +dummy(x) = (Base.show_backtrace(STDOUT, backtrace()); error("No f")) +dummy_g!(x,g) = (Base.show_backtrace(STDOUT, backtrace()); error("No g!")) + +function linlsq_fg!{T}(x::AbstractArray{T}, g, A, b, scratch) + A_mul_B!(scratch, A, x) + f_x = zero(typeof(one(T)*one(T)+one(T)*one(T))) + for i = 1:length(g) + tmp = scratch[i] + tmp -= b[i] + f_x += tmp*tmp + scratch[i] = tmp + end + if !isempty(g) + At_mul_B!(g, A, scratch) + end + f_x/2 +end + +linlsq_h!(x, H, A, b) = At_mul_B!(H, A, A) + + +######################### +## +## Interior point methods +## +######################### +function interior_newton{T}(objective::TwiceDifferentiableFunction, + initial_x::Vector{T}, + constraints::AbstractConstraints; + t = one(T), mu = 10, eps_gap = 1e-12, + xtol::Real = convert(T,1e-32), + ftol::Real = convert(T,1e-8), + grtol::Real = convert(T,1e-8), + iterations::Integer = 1_000, + store_trace::Bool = false, + show_trace::Bool = false, + extended_trace::Bool = false) + + if !feasible(initial_x, constraints) + error("Initial guess must be feasible") + end + x = copy(initial_x) + xtmp = similar(x) + x_previous = similar(x) + m = length(x) + gr = similar(x) + H = Array(T, m, m) + lsr = LineSearchResults(T) + + tr = OptimizationTrace() + tracing = store_trace || show_trace || extended_trace + + df = DifferentiableFunction(dummy, + dummy_g!, + (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t)) + iteration, f_calls, g_calls = 0, 0, 0 + f_x = f_x_previous = inf(T) + while m/t > eps_gap && iteration < iterations + f_x_previous = f_x + f_x = combined_fg!(x, gr, objective.fg!, constraints, t) + combined_h!(x, H, objective.h!, constraints, t) + @newtontrace + cH = cholfact!(H) + s = -(cH\gr) + clear!(lsr) + push!(lsr, zero(T), f_x, dot(s,gr)) + alphamax = toedge(x, s, constraints) + alpha = alphamax < 1 ? 0.9*alphamax : 1.0 + alpha, _f_calls, _g_calls = + hz_linesearch!(df, x, s, xtmp, gr, lsr, alpha, true, constraints, alphamax) + copy!(x_previous, x) + step!(x, x, s, alpha, constraints) + iteration += 1 + f_calls += _f_calls + g_calls += _g_calls + f_x /= t + if alphamax >= 1 + t *= mu + df = DifferentiableFunction(dummy, + dummy_g!, + (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t)) + end + end + x_converged, + f_converged, + gr_converged, + converged = assess_convergence(x, + x_previous, + f_x, + f_x_previous, + gr, + xtol, + ftol, + grtol) + MultivariateOptimizationResults("Interior/Newton", + initial_x, + x, + float64(f_x), + iteration, + iteration == iterations, + x_converged, + xtol, + f_converged, + ftol, + gr_converged, + grtol, + tr, + f_calls, + g_calls) +end + +function interior{T}(objective::Union(DifferentiableFunction, TwiceDifferentiableFunction), + initial_x::Vector{T}, + constraints::AbstractConstraints; + method = :cg, t = one(T), mu = 10, eps_gap = 1e-12) + if method == :newton + return iterior_newton(objective, initial_x, constraints; t=t, mu=mu, eps_gap=eps_gap) + end + if !feasible(initial_x, constraints) + error("Initial guess must be feasible") + end + x = copy(initial_x) + m = length(x) + local results + iteration, f_calls, g_calls = 0, 0, 0 + while m/t > eps_gap + df = DifferentiableFunction( x -> combined_f(x, objective.f, constraints, t), + (x,g) -> combined_g! (x, g, objective.g!, constraints, t), + (x,g) -> combined_fg!(x, g, objective.fg!, constraints, t)) + results = optimize(df, x, method=method) + copy!(x, results.minimum) + iteration += results.iterations + f_calls += results.f_calls + g_calls += results.g_calls + results.f_minimum /= t + t *= mu + end + results.iterations, results.f_calls, results.g_calls = iteration, f_calls, g_calls + copy!(results.initial_x, initial_x) + results +end diff --git a/src/linesearch/hz_linesearch.jl b/src/linesearch/hz_linesearch.jl index 815a1112a..27d24f433 100644 --- a/src/linesearch/hz_linesearch.jl +++ b/src/linesearch/hz_linesearch.jl @@ -35,23 +35,17 @@ display_nextbit = 14 # paper (these may or may not be extensions of the cg_descent code # that can be downloaded from Hager's site; his code has undergone # numerous revisions since publication of the paper): -# cgdescent: the termination condition employs a "unit-correct" -# expression rather than a condition on gradient -# components---whether this is a good or bad idea will require -# additional experience, but preliminary evidence seems to suggest -# that it makes "reasonable" choices over a wider range of problem -# types. -# linesearch: the Wolfe conditions are checked only after alpha is +# - The Wolfe conditions are checked only after alpha is # generated either by quadratic interpolation or secant # interpolation, not when alpha is generated by bisection or # expansion. This increases the likelihood that alpha will be a # good approximation of the minimum. -# linesearch: In step I2, we multiply by psi2 only if the convexity +# - In step I2, we multiply by psi2 only if the convexity # test failed, not if the function-value test failed. This # prevents one from going uphill further when you already know # you're already higher than the point at alpha=0. -# both: checks for Inf/NaN function values -# both: support maximum value of alpha (equivalently, c). This +# - Checks for Inf/NaN function values +# - Support maximum value of alpha (equivalently, c). This # facilitates using these routines for constrained minimization # when you can calculate the distance along the path to the # disallowed region. (When you can't easily calculate that @@ -62,6 +56,7 @@ display_nextbit = 14 # which a finite value will be returned. See, e.g., limits_box # below. The default value for alphamax is Inf. See alphamaxfunc # for cgdescent and alphamax for linesearch_hz. +# - Support for projected-gradient methods const DEFAULTDELTA = 0.1 const DEFAULTSIGMA = 0.9 @@ -97,12 +92,13 @@ function alphatry{T}(alpha::T, xtmp::Array, gtmp::Array, lsr::LineSearchResults, + constraints::AbstractConstraints = ConstraintsNone(), + alphamax::Real = inf(T), psi1::Real = convert(T,0.2), psi2::Real = convert(T,2), psi3::Real = convert(T,0.1), iterfinitemax::Integer = iceil(-log2(eps(T))), - alphamax::Real = inf(T), - display::Integer = 0) + detailed_trace::Integer = 0) f_calls = 0 g_calls = 0 @@ -110,17 +106,16 @@ function alphatry{T}(alpha::T, dphi0 = lsr.slope[1] alphatest = psi1 * alpha + alphamax = min(alphamax, tocorner(x, s, constraints)) alphatest = min(alphatest, alphamax) - # Use xtmp here - phitest = d.f(x + alphatest * s) + phitest = d.f(step!(xtmp, x, s, alphatest, constraints)) f_calls += 1 iterfinite = 1 while !isfinite(phitest) alphatest = psi3 * alphatest - # Use xtmp here - phitest = d.f(x + alphatest * s) + phitest = d.f(step!(xtmp, x, s, alphatest, constraints)) f_calls += 1 lsr.nfailures += 1 iterfinite += 1 @@ -130,7 +125,7 @@ function alphatry{T}(alpha::T, end end a = ((phitest-phi0)/alphatest - dphi0)/alphatest # quadratic fit - if display & ALPHAGUESS > 0 + if detailed_trace & ALPHAGUESS > 0 println("quadfit: alphatest = ", alphatest, ", phi0 = ", phi0, ", phitest = ", phitest, @@ -148,7 +143,7 @@ function alphatry{T}(alpha::T, alpha = alphamax mayterminate = false end - if display & ALPHAGUESS > 0 + if detailed_trace & ALPHAGUESS > 0 println("alpha guess (quadratic): ", alpha, ",(mayterminate = ", mayterminate, ")") end @@ -160,7 +155,7 @@ function alphatry{T}(alpha::T, end end alpha = min(alphamax, alpha) - if display & ALPHAGUESS > 0 + if detailed_trace & ALPHAGUESS > 0 println("alpha guess (expand): ", alpha) end return alpha, mayterminate, f_calls, g_calls @@ -175,29 +170,32 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, lsr::LineSearchResults{T}, c::Real, mayterminate::Bool, + constraints::AbstractConstraints = ConstraintsNone(), + alphamax::Real = inf(T), delta::Real = DEFAULTDELTA, sigma::Real = DEFAULTSIGMA, - alphamax::Real = inf(T), rho::Real = convert(T,5), epsilon::Real = convert(T,1e-6), gamma::Real = convert(T,0.66), linesearchmax::Integer = 50, psi3::Real = convert(T,0.1), iterfinitemax::Integer = iceil(-log2(eps(T))), - display::Integer = 0) - if display & LINESEARCH > 0 + detailed_trace::Integer = 0) + if detailed_trace & LINESEARCH > 0 println("New linesearch") end f_calls = 0 g_calls = 0 + alphamax = min(alphamax, tocorner(x, s, constraints)) + phi0 = lsr.value[1] dphi0 = lsr.slope[1] (isfinite(phi0) && isfinite(dphi0)) || error("Initial value and slope must be finite") philim = phi0 + epsilon * abs(phi0) @assert c > 0 @assert isfinite(c) && c <= alphamax - phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true) + phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints) f_calls += f_up g_calls += g_up iterfinite = 1 @@ -206,7 +204,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, lsr.nfailures += 1 iterfinite += 1 c *= psi3 - phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true) + phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints) f_calls += f_up g_calls += g_up end @@ -219,7 +217,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, # satisfies the Wolfe conditions if mayterminate && satisfies_wolfe(c, phic, dphic, phi0, dphi0, philim, delta, sigma) - if display & LINESEARCH > 0 + if detailed_trace & LINESEARCH > 0 println("Wolfe condition satisfied on point alpha = ", c) end return c, f_calls, g_calls # phic @@ -232,7 +230,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, iter = 1 cold = -one(T) while !isbracketed && iter < linesearchmax - if display & BRACKET > 0 + if detailed_trace & BRACKET > 0 println("bracketing: ia = ", ia, ", ib = ", ib, ", c = ", c, @@ -259,7 +257,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, error("c = ", c, ", lsr = ", lsr) end # ia, ib = bisect(phi, lsr, ia, ib, philim) # TODO: Pass options - ia, ib, f_up, g_up = bisect!(df, x, s, xtmp, g, lsr, ia, ib, philim, display) + ia, ib, f_up, g_up = bisect!(df, x, s, xtmp, g, lsr, ia, ib, philim, constraints, detailed_trace) f_calls += f_up g_calls += g_up isbracketed = true @@ -269,7 +267,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, c *= rho if c > alphamax c = (alphamax + cold)/2 - if display & BRACKET > 0 + if detailed_trace & BRACKET > 0 println("bracket: exceeding alphamax, bisecting: alphamax = ", alphamax, ", cold = ", cold, ", new c = ", c) end @@ -277,7 +275,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, return cold, f_calls, g_calls end end - phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true) + phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints) f_calls += f_up g_calls += g_up iterfinite = 1 @@ -285,11 +283,11 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, alphamax = c lsr.nfailures += 1 iterfinite += 1 - if display & BRACKET > 0 + if detailed_trace & BRACKET > 0 println("bracket: non-finite value, bisection") end c = (cold + c) / 2 - phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true) + phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints) f_calls += f_up g_calls += g_up end @@ -319,18 +317,19 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, a = lsr.alpha[ia] b = lsr.alpha[ib] @assert b > a - if display & LINESEARCH > 0 + if detailed_trace & LINESEARCH > 0 println("linesearch: ia = ", ia, ", ib = ", ib, ", a = ", a, ", b = ", b, ", phi(a) = ", lsr.value[ia], ", phi(b) = ", lsr.value[ib]) + println("phi(a) == phi(b) ? ", lsr.value[ia] == lsr.value[ib]) end if b - a <= eps(b) return a, f_calls, g_calls # lsr.value[ia] end - iswolfe, iA, iB, f_up, g_up = secant2!(df, x, s, xtmp, g, lsr, ia, ib, philim, delta, sigma, display) + iswolfe, iA, iB, f_up, g_up = secant2!(df, x, s, xtmp, g, lsr, ia, ib, philim, constraints, delta, sigma, detailed_trace) f_calls += f_up g_calls += g_up if iswolfe @@ -340,12 +339,12 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, B = lsr.alpha[iB] @assert B > A if B - A < gamma * (b - a) - if display & LINESEARCH > 0 + if detailed_trace & LINESEARCH > 0 println("Linesearch: secant succeeded") end if nextfloat(lsr.value[ia]) >= lsr.value[ib] && nextfloat(lsr.value[iA]) >= lsr.value[iB] # It's so flat, secant didn't do anything useful, time to quit - if display & LINESEARCH > 0 + if detailed_trace & LINESEARCH > 0 println("Linesearch: secant suggests it's flat") end return A, f_calls, g_calls @@ -354,18 +353,18 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, ib = iB else # Secant is converging too slowly, use bisection - if display & LINESEARCH > 0 + if detailed_trace & LINESEARCH > 0 println("Linesearch: secant failed, using bisection") end c = (A + B) / convert(T, 2) # phic = phi(gphi, c) # TODO: Replace - phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true) + phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints) f_calls += f_up g_calls += g_up @assert isfinite(phic) && isfinite(dphic) push!(lsr, c, phic, dphic) # ia, ib = update(phi, lsr, iA, iB, length(lsr), philim) # TODO: Pass options - ia, ib, f_up, g_up = update!(df, x, s, xtmp, g, lsr, iA, iB, length(lsr), philim, display) + ia, ib, f_up, g_up = update!(df, x, s, xtmp, g, lsr, iA, iB, length(lsr), philim, constraints, detailed_trace) f_calls += f_up g_calls += g_up end @@ -408,9 +407,10 @@ function secant2!{T}(df::Union(DifferentiableFunction, ia::Integer, ib::Integer, philim::Real, + constraints::AbstractConstraints = ConstraintsNone(), delta::Real = DEFAULTDELTA, sigma::Real = DEFAULTSIGMA, - display::Integer = 0) + detailed_trace::Integer = 0) f_calls = 0 g_calls = 0 @@ -423,28 +423,28 @@ function secant2!{T}(df::Union(DifferentiableFunction, @assert dphia < 0 @assert dphib >= 0 c = secant(a, b, dphia, dphib) - if display & SECANT2 > 0 + if detailed_trace & SECANT2 > 0 println("secant2: a = ", a, ", b = ", b, ", c = ", c) end @assert isfinite(c) # phic = phi(tmpc, c) # Replace - phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true) + phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints) f_calls += f_up g_calls += g_up @assert isfinite(phic) && isfinite(dphic) push!(lsr, c, phic, dphic) ic = length(lsr) if satisfies_wolfe(c, phic, dphic, phi0, dphi0, philim, delta, sigma) - if display & SECANT2 > 0 + if detailed_trace & SECANT2 > 0 println("secant2: first c satisfied Wolfe conditions") end return true, ic, ic, f_calls, g_calls end # iA, iB = update(phi, lsr, ia, ib, ic, philim) - iA, iB, f_up, g_up = update!(df, x, s, xtmp, g, lsr, ia, ib, ic, philim, display) + iA, iB, f_up, g_up = update!(df, x, s, xtmp, g, lsr, ia, ib, ic, philim, constraints, detailed_trace) f_calls += f_up g_calls += g_up - if display & SECANT2 > 0 + if detailed_trace & SECANT2 > 0 println("secant2: iA = ", iA, ", iB = ", iB, ", ic = ", ic) end a = lsr.alpha[iA] @@ -458,11 +458,11 @@ function secant2!{T}(df::Union(DifferentiableFunction, c = secant(lsr, ia, iA) end if a <= c <= b - if display & SECANT2 > 0 + if detailed_trace & SECANT2 > 0 println("secant2: second c = ", c) end # phic = phi(tmpc, c) # TODO: Replace - phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true) + phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints) f_calls += f_up g_calls += g_up @assert isfinite(phic) && isfinite(dphic) @@ -470,16 +470,16 @@ function secant2!{T}(df::Union(DifferentiableFunction, ic = length(lsr) # Check arguments here if satisfies_wolfe(c, phic, dphic, phi0, dphi0, philim, delta, sigma) - if display & SECANT2 > 0 + if detailed_trace & SECANT2 > 0 println("secant2: second c satisfied Wolfe conditions") end return true, ic, ic, f_calls, g_calls end - iA, iB, f_up, g_up = update!(df, x, s, xtmp, g, lsr, iA, iB, ic, philim, display) + iA, iB, f_up, g_up = update!(df, x, s, xtmp, g, lsr, iA, iB, ic, philim, constraints, detailed_trace) f_calls += f_up g_calls += g_up end - if display & SECANT2 > 0 + if detailed_trace & SECANT2 > 0 println("secant2 output: a = ", lsr.alpha[iA], ", b = ", lsr.alpha[iB]) end return false, iA, iB, f_calls, g_calls @@ -500,7 +500,8 @@ function update!(df::Union(DifferentiableFunction, ib::Integer, ic::Integer, philim::Real, - display::Integer = 0) + constraints::AbstractConstraints = ConstraintsNone(), + detailed_trace::Integer = 0) a = lsr.alpha[ia] b = lsr.alpha[ib] # Debugging (HZ, eq. 4.4): @@ -511,7 +512,7 @@ function update!(df::Union(DifferentiableFunction, c = lsr.alpha[ic] phic = lsr.value[ic] dphic = lsr.slope[ic] - if display & UPDATE > 0 + if detailed_trace & UPDATE > 0 println("update: ia = ", ia, ", a = ", a, ", ib = ", ib, @@ -536,7 +537,7 @@ function update!(df::Union(DifferentiableFunction, end # phic is bigger than phi0, which implies that the minimum # lies between a and c. Find it via bisection. - return bisect!(df, x, s, xtmp, g, lsr, ia, ic, philim, display) + return bisect!(df, x, s, xtmp, g, lsr, ia, ic, philim, constraints, detailed_trace) end # HZ, stage U3 (with theta=0.5) @@ -550,7 +551,8 @@ function bisect!{T}(df::Union(DifferentiableFunction, ia::Integer, ib::Integer, philim::Real, - display::Integer = 0) + constraints::AbstractConstraints = ConstraintsNone(), + detailed_trace::Integer = 0) f_calls = 0 g_calls = 0 gphi = nan(T) @@ -563,11 +565,11 @@ function bisect!{T}(df::Union(DifferentiableFunction, @assert lsr.value[ib] > philim @assert b > a while b - a > eps(b) - if display & BISECT > 0 + if detailed_trace & BISECT > 0 println("bisect: a = ", a, ", b = ", b, ", b - a = ", b - a) end d = (a + b) / convert(T, 2) - phid, gphi, f_up, g_up = linefunc!(df, x, s, d, xtmp, g, true) + phid, gphi, f_up, g_up = linefunc!(df, x, s, d, xtmp, g, true, constraints) f_calls += f_up g_calls += g_up @assert isfinite(phid) && isfinite(gphi) @@ -588,19 +590,18 @@ function bisect!{T}(df::Union(DifferentiableFunction, end # Define one-parameter function for line searches -function linefunc!(df::Union(DifferentiableFunction, - TwiceDifferentiableFunction), - x::Array, - s::Array, - alpha::Real, - xtmp::Array, - g::Array, - calc_grad::Bool) +function linefunc!{T}(df::Union(DifferentiableFunction, + TwiceDifferentiableFunction), + x::Array{T}, + s::Array, + alpha::Real, + xtmp::Array, + g::Array, + calc_grad::Bool, + constraints::AbstractConstraints = ConstraintsNone()) f_calls = 0 g_calls = 0 - for i = 1:length(x) - xtmp[i] = x[i] + alpha * s[i] - end + step!(xtmp, x, s, alpha, constraints) gphi = nan(eltype(g)) if calc_grad val = df.fg!(xtmp, g) From 809be9637c825eb4f3545e75c9af48fdcb827a13 Mon Sep 17 00:00:00 2001 From: timholy Date: Mon, 10 Mar 2014 22:00:53 -0500 Subject: [PATCH 02/28] Add constrained tests, and don't load problems unless running tests --- src/Optim.jl | 9 ++-- src/problems/constrained.jl | 77 +++++++++++++++++++++++++++++++ test/cg.jl | 34 +++++++++----- test/momentum_gradient_descent.jl | 2 +- test/runtests.jl | 4 ++ 5 files changed, 111 insertions(+), 15 deletions(-) create mode 100644 src/problems/constrained.jl diff --git a/src/Optim.jl b/src/Optim.jl index 37fa65c25..4fc17d2af 100644 --- a/src/Optim.jl +++ b/src/Optim.jl @@ -77,8 +77,11 @@ module Optim # End-User Facing Wrapper Functions include("optimize.jl") - # Examples for testing - include(joinpath("problems", "unconstrained.jl")) - cgdescent(args...) = error("API has changed. Please use cg.") + + # Tests + const basedir = dirname(Base.source_path()) + const testpaths = [joinpath(basedir, "problems", "unconstrained.jl"), + joinpath(basedir, "problems", "constrained.jl")] + end diff --git a/src/problems/constrained.jl b/src/problems/constrained.jl new file mode 100644 index 000000000..37f8710a6 --- /dev/null +++ b/src/problems/constrained.jl @@ -0,0 +1,77 @@ +module ConstrainedProblems + +using Optim + +immutable ConstrainedProblem{C<:Optim.AbstractConstraints} + name::ASCIIString + f::Function + g!::Function + h!::Function + constraints::C + initial_x::Vector{Float64} + solutions::Vector + isdifferentiable::Bool + istwicedifferentiable::Bool +end + +pexamples = Dict{ASCIIString, ConstrainedProblem}() + +##################################### +### +### Minimum distance, box constraints +### +##################################### + +function sqrdist(x) + dx1 = x[1] - 5 + dx2 = x[2] - 3 + (dx1*dx1 + dx2*dx2)/2 +end + +function sqrdist_gradient!(x, g) + g[1] = x[1] - 5 + g[2] = x[2] - 3 +end + +function sqrdist_hessian!(x, H) + H[1,1] = H[2,2] = 1 + H[1,2] = H[2,1] = 0 +end + +constraints = Optim.ConstraintsBox([-2.0,-2.0],[2.0,2.0]) + +pexamples["SqrdistBox"] = ConstrainedProblem("SqrdistBox", + sqrdist, + sqrdist_gradient!, + sqrdist_hessian!, + constraints, + [0.0, 0.0], + {[2.0, 2.0]}, + true, + true) + +constraints = Optim.ConstraintsBox(nothing,[2.0,2.0]) + +pexamples["SqrdistLwr"] = ConstrainedProblem("SqrdistLwr", + sqrdist, + sqrdist_gradient!, + sqrdist_hessian!, + constraints, + [0.0, 0.0], + {[2.0, 2.0]}, + true, + true) + +constraints = Optim.ConstraintsBox([-2.0,-2.0],nothing) + +pexamples["SqrdistLwr"] = ConstrainedProblem("SqrdistLwr", + sqrdist, + sqrdist_gradient!, + sqrdist_hessian!, + constraints, + [0.0, 0.0], + {[5.0, 3.0]}, + true, + true) + +end # module diff --git a/test/cg.jl b/test/cg.jl index fa99fd27a..22742be8a 100644 --- a/test/cg.jl +++ b/test/cg.jl @@ -1,18 +1,30 @@ using Optim -for (name, prob) in Optim.UnconstrainedProblems.examples - if prob.isdifferentiable - if name == "Himmelbrau" - continue - end - df = DifferentiableFunction(prob.f, prob.g!) - res = Optim.cg(df, prob.initial_x) - if length(prob.solutions) == 1 - @assert norm(res.minimum - prob.solutions[1]) < 1e-2 - end - end +for (name, prob) in UnconstrainedProblems.examples + if prob.isdifferentiable + if name == "Himmelbrau" + continue + end + df = DifferentiableFunction(prob.f, prob.g!) + res = Optim.cg(df, prob.initial_x) + if length(prob.solutions) == 1 + @assert norm(res.minimum - prob.solutions[1]) < 1e-2 + end + end end +for (name, prob) in ConstrainedProblems.pexamples + if prob.isdifferentiable + df = DifferentiableFunction(prob.f, prob.g!) + res = Optim.cg(df, prob.initial_x, constraints=prob.constraints) + if length(prob.solutions) == 1 + @assert norm(res.minimum - prob.solutions[1]) < 1e-2 + end + end +end + + +# Functions of arrays let objective(X, B) = sum((X.-B).^2)/2 diff --git a/test/momentum_gradient_descent.jl b/test/momentum_gradient_descent.jl index abe0fe7a3..e79c3bfb3 100644 --- a/test/momentum_gradient_descent.jl +++ b/test/momentum_gradient_descent.jl @@ -1,4 +1,4 @@ -p = Optim.UnconstrainedProblems.examples["Rosenbrock"] +p = UnconstrainedProblems.examples["Rosenbrock"] df = DifferentiableFunction(p.f, p.g!) diff --git a/test/runtests.jl b/test/runtests.jl index 85ba62b63..d20712504 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,6 +4,10 @@ using Optim +for path in Optim.testpaths + require(path) +end + my_tests = ["bfgs.jl", "gradient_descent.jl", "momentum_gradient_descent.jl", From 83a0bc33077545109fa66d67843d9c69877a0aba Mon Sep 17 00:00:00 2001 From: timholy Date: Fri, 14 Mar 2014 07:27:32 -0500 Subject: [PATCH 03/28] Implement linear constraints --- src/constraints.jl | 55 ++++++++++++++++++++++++++++++++++--------- src/interior.jl | 58 +++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 96 insertions(+), 17 deletions(-) diff --git a/src/constraints.jl b/src/constraints.jl index 9a4e30504..434a439e7 100644 --- a/src/constraints.jl +++ b/src/constraints.jl @@ -14,27 +14,50 @@ immutable ConstraintsBox{T,N} <: AbstractConstraints new(l, u) end end -ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::AbstractArray{T,N}) -ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::Nothing) = ConstraintsBox{T,N}(l::AbstractArray{T,N}, infs(T, size(l))) -ConstraintsBox{T,N}(l::Nothing, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(-infs(T,size(u)), u::AbstractArray{T,N}) +ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(l, u) +ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::Nothing) = ConstraintsBox{T,N}(l, infs(T, size(l))) +ConstraintsBox{T,N}(l::Nothing, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(-infs(T,size(u)), u) -# FIXME: need a way to indicate equality/inequality -immutable ConstraintsL{T,M<:AbstractMatrix,V<:AbstractVector} <: AbstractConstraints +immutable ConstraintsL{T,M<:AbstractMatrix,N} <: AbstractConstraints A::M - b::V + lower::Array{T,N} + upper::Array{T,N} + scratch1::Array{T,N} + scratch2::Array{T,N} + scratch3::Array{T,N} + + function ConstraintsL(A, l::Array{T,N}, u::Array{T,N}, + scratch1 = similar(l), scratch2 = similar(l), scratch3 = Array(T, ndims(l) == 1 ? size(A, 2) : (size(A,2),size(l,2)))) + size(A, 1) == size(l,1) == size(u,1) || error("The sizes of the bounds must match the size of A") + for i = 1:length(l) + l[i] <= u[i] || error("The lower bound must be smaller than the upper bound") + end + new(A, l, u, scratch1, scratch2, scratch3) + end end +ConstraintsL{T}(A::AbstractMatrix, l::Union(Vector{T},Matrix{T}), u::Union(Vector{T},Matrix{T})) = ConstraintsL{T,typeof(A),ndims(l)}(A, l, u) +ConstraintsL{T}(A::AbstractMatrix, l::Union(Vector{T},Matrix{T}), u::Nothing) = ConstraintsL(A, l, infs(T, size(l))) +ConstraintsL{T}(A::AbstractMatrix, l::Nothing, u::Union(Vector{T},Matrix{T})) = ConstraintsL(A, -infs(T,size(u)), u) # Generic constraints -immutable ConstraintsNL{F<:Union(Function,DifferentiableFunction,TwiceDifferentiableFunction)} <: AbstractConstraints +immutable ConstraintsNL{T,F<:Union(Function,DifferentiableFunction,TwiceDifferentiableFunction)} <: AbstractConstraints funcs::Vector{F} + lower::Vector{T} + upper::Vector{T} + + function ConstraintsNL(funcs::Vector, l::AbstractVector{T}, u::AbstractVector{T}) + size(A, 1) == length(l) == length(u) || error("The sizes of the bounds must match the ") + for i = 1:length(l) + l[i] <= u[i] || error("The lower bound must be smaller than the upper bound") + end + new(funcs, l, u) + end end type Constraints bounds::AbstractConstraints - leq::AbstractConstraints - lineq::AbstractConstraints - nleq::AbstractConstraints - nlineq::AbstractConstraints + linear::AbstractConstraints + nonlinear::AbstractConstraints mu::Real end Constraints() = Constraints(ConstraintsNone(), ConstraintsNone(), ConstraintsNone(), ConstraintsNone(), ConstraintsNone()) @@ -52,6 +75,16 @@ function feasible(x, constraints::ConstraintsBox) true end +function feasible(x, constraints::ConstraintsL) + l = constraints.lower + u = constraints.upper + y = constraints.scratch1 + A_mul_B!(y, constraints.A, x) + for i = 1:length(y) + l[i] <= y[i] <= u[i] || return false + end + true +end ## project! project!(x, constraints::AbstractConstraints) = x diff --git a/src/interior.jl b/src/interior.jl index be1835830..87e0f0ca4 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -75,7 +75,52 @@ function barrier_h!{T}(x::Vector{T}, H::Matrix{T}, bounds::ConstraintsBox) end end -## Linear inequalities +## Linear constraints +function barrier_fg!{T}(x::Array{T}, g::Array{T}, constraints::ConstraintsL) + m, n = size(constraints.A) + y1 = constraints.scratch1 + A_mul_B!(y1, constraints.A, x) + phi = zero(T) + l = constraints.lower + u = constraints.upper + lenx = length(x) + lenc = length(y1) + for i = 1:lenc + li, ui = l[i], u[i] + yi = y1[i] + if isfinite(li) + yi >= li || return inf(T) # no need to compute additional gradient terms + phi += log(yi-li) + end + if isfinite(ui) + yi <= ui || return inf(T) + phi += log(ui-yi) + end + end + if isempty(g) + return -phi + end + y2 = constraints.scratch2 + y3 = constraints.scratch3 + for i = 1:lenc + y2[i] = 1/(y1[i] - l[i]) + end + At_mul_B!(y3, constraints.A, y2) + for i = 1:lenx + g[i] -= y3[i] + end + for i = 1:lenc + y2[i] = 1/(u[i] - y1[i]) + end + At_mul_B!(y3, constraints.A, y2) + for i = 1:lenx + g[i] += y3[i] + end + -phi +end + +barrier_f{T}(x::Array{T}, constraints::ConstraintsL) = barrier_fg!(x, T[], constraints) + ## Nonlinear inequalities @@ -132,7 +177,7 @@ linlsq(A::Matrix, b::Vector) = (x,g) -> linlsq_fg!(x, g, A, b, similar(b)), (x,H) -> linlsq_h!(x, H, A, b)) -dummy(x) = (Base.show_backtrace(STDOUT, backtrace()); error("No f")) +dummy_f (x) = (Base.show_backtrace(STDOUT, backtrace()); error("No f")) dummy_g!(x,g) = (Base.show_backtrace(STDOUT, backtrace()); error("No g!")) function linlsq_fg!{T}(x::AbstractArray{T}, g, A, b, scratch) @@ -184,7 +229,7 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction, tr = OptimizationTrace() tracing = store_trace || show_trace || extended_trace - df = DifferentiableFunction(dummy, + df = DifferentiableFunction(dummy_f, dummy_g!, (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t)) iteration, f_calls, g_calls = 0, 0, 0 @@ -210,7 +255,7 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction, f_x /= t if alphamax >= 1 t *= mu - df = DifferentiableFunction(dummy, + df = DifferentiableFunction(dummy_f, dummy_g!, (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t)) end @@ -243,8 +288,9 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction, g_calls) end + function interior{T}(objective::Union(DifferentiableFunction, TwiceDifferentiableFunction), - initial_x::Vector{T}, + initial_x::Array{T}, constraints::AbstractConstraints; method = :cg, t = one(T), mu = 10, eps_gap = 1e-12) if method == :newton @@ -258,7 +304,7 @@ function interior{T}(objective::Union(DifferentiableFunction, TwiceDifferentiabl local results iteration, f_calls, g_calls = 0, 0, 0 while m/t > eps_gap - df = DifferentiableFunction( x -> combined_f(x, objective.f, constraints, t), + df = DifferentiableFunction( x -> combined_f (x, objective.f, constraints, t), (x,g) -> combined_g! (x, g, objective.g!, constraints, t), (x,g) -> combined_fg!(x, g, objective.fg!, constraints, t)) results = optimize(df, x, method=method) From f83256245e72a715cef388787ca392107ed70f90 Mon Sep 17 00:00:00 2001 From: timholy Date: Wed, 2 Apr 2014 16:23:00 -0500 Subject: [PATCH 04/28] Improve initial value of barrier coefficient Also support keyword arguments --- src/interior.jl | 54 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index 87e0f0ca4..7313e57f1 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -292,9 +292,41 @@ end function interior{T}(objective::Union(DifferentiableFunction, TwiceDifferentiableFunction), initial_x::Array{T}, constraints::AbstractConstraints; - method = :cg, t = one(T), mu = 10, eps_gap = 1e-12) + method = :cg, t = nan(T), mu = 10, eps_gap = 1e-12, + xtol::Real = 1e-32, + ftol::Real = 1e-8, + grtol::Real = 1e-8, + iterations::Integer = 1_000, + store_trace::Bool = false, + show_trace::Bool = false, + extended_trace::Bool = false) + if isnan(t) + vo, vc, go, gc = gnorm(objective, constraints, initial_x) + if isfinite(vo) && isfinite(vc) && go < grtol + return MultivariateOptimizationResults("Interior", + initial_x, + initial_x, + float64(vo), + 0, + false, + false, + xtol, + false, + ftol, + true, + grtol, + OptimizationTrace(), + 0, + 0) + end + if gc == 0 + t = one(T) # FIXME: bounds constraints, starting at exact center. This is probably not the right guess. + else + t = gc/(0.1*go) + end + end if method == :newton - return iterior_newton(objective, initial_x, constraints; t=t, mu=mu, eps_gap=eps_gap) + return iterior_newton(objective, initial_x, constraints; t=t, mu=mu, eps_gap=eps_gap) # FIXME end if !feasible(initial_x, constraints) error("Initial guess must be feasible") @@ -303,11 +335,11 @@ function interior{T}(objective::Union(DifferentiableFunction, TwiceDifferentiabl m = length(x) local results iteration, f_calls, g_calls = 0, 0, 0 - while m/t > eps_gap + while m/t > eps_gap || iteration == 0 df = DifferentiableFunction( x -> combined_f (x, objective.f, constraints, t), (x,g) -> combined_g! (x, g, objective.g!, constraints, t), (x,g) -> combined_fg!(x, g, objective.fg!, constraints, t)) - results = optimize(df, x, method=method) + results = optimize(df, x, method=method, xtol=xtol, ftol=ftol, grtol=grtol, iterations=iterations, store_trace=store_trace, show_trace=show_trace, extended_trace=extended_trace) copy!(x, results.minimum) iteration += results.iterations f_calls += results.f_calls @@ -319,3 +351,17 @@ function interior{T}(objective::Union(DifferentiableFunction, TwiceDifferentiabl copy!(results.initial_x, initial_x) results end + +function gnorm(objective, constraints, initial_x) + go = similar(initial_x) + gc = similar(initial_x) + fill!(gc, 0) + vo = objective.fg!(initial_x, go) + vc = barrier_fg!(initial_x, gc, constraints) + gom = gcm = zero(eltype(go)) + for i = 1:length(go) + gom += abs(go[i]) + gcm += abs(gc[i]) + end + vo, vc, gom, gcm +end From 5fabfe819acbad2fcd61ddd8939d01d81c8393d9 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Fri, 4 Apr 2014 15:41:09 -0500 Subject: [PATCH 05/28] Update for deprecation of infs --- src/constraints.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/constraints.jl b/src/constraints.jl index 434a439e7..954c0392f 100644 --- a/src/constraints.jl +++ b/src/constraints.jl @@ -15,8 +15,8 @@ immutable ConstraintsBox{T,N} <: AbstractConstraints end end ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(l, u) -ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::Nothing) = ConstraintsBox{T,N}(l, infs(T, size(l))) -ConstraintsBox{T,N}(l::Nothing, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(-infs(T,size(u)), u) +ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::Nothing) = ConstraintsBox{T,N}(l, fill(inf(T), size(l))) +ConstraintsBox{T,N}(l::Nothing, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(fill(-inf(T),size(u)), u) immutable ConstraintsL{T,M<:AbstractMatrix,N} <: AbstractConstraints A::M From 8f56008a4d00bfc5aeb291279efdfdccbe2b3a6b Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Fri, 4 Apr 2014 15:45:17 -0500 Subject: [PATCH 06/28] Check type of function value, and fix type of initial t in interior-point When the parameters are Float32, the return value of the function had better be, too, or we could be fooled about the precision. --- src/interior.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index 7313e57f1..8081b3ec5 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -24,7 +24,7 @@ function barrier_f{T}(x::Array{T}, bounds::ConstraintsBox) phi += log(ui-xi) end end - -phi + -phi::T end function barrier_g!{T}(x::Array{T}, g::Array{T}, bounds::ConstraintsBox) @@ -58,7 +58,7 @@ function barrier_fg!{T}(x::Array{T}, g::Array{T}, bounds::ConstraintsBox) g[i] += 1/(ui-xi) end end - -phi + -phi::T end function barrier_h!{T}(x::Vector{T}, H::Matrix{T}, bounds::ConstraintsBox) @@ -116,7 +116,7 @@ function barrier_fg!{T}(x::Array{T}, g::Array{T}, constraints::ConstraintsL) for i = 1:lenx g[i] += y3[i] end - -phi + -phi::T end barrier_f{T}(x::Array{T}, constraints::ConstraintsL) = barrier_fg!(x, T[], constraints) @@ -322,7 +322,7 @@ function interior{T}(objective::Union(DifferentiableFunction, TwiceDifferentiabl if gc == 0 t = one(T) # FIXME: bounds constraints, starting at exact center. This is probably not the right guess. else - t = gc/(0.1*go) + t = gc/(convert(T,0.1)*go) end end if method == :newton From de95ece90360934e3df08c214e81c376c3c4d4fc Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sat, 5 Apr 2014 07:54:21 -0500 Subject: [PATCH 07/28] Modify some asserts that were causing trouble for non-convex domains --- src/linesearch/hz_linesearch.jl | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/linesearch/hz_linesearch.jl b/src/linesearch/hz_linesearch.jl index 27d24f433..e5171c302 100644 --- a/src/linesearch/hz_linesearch.jl +++ b/src/linesearch/hz_linesearch.jl @@ -420,8 +420,8 @@ function secant2!{T}(df::Union(DifferentiableFunction, b = lsr.alpha[ib] dphia = lsr.slope[ia] dphib = lsr.slope[ib] - @assert dphia < 0 - @assert dphib >= 0 + @assert dphia < 0 && isfinite(dphia) + @assert dphib >= 0 && isfinite(dphib) c = secant(a, b, dphia, dphib) if detailed_trace & SECANT2 > 0 println("secant2: a = ", a, ", b = ", b, ", c = ", c) @@ -431,7 +431,6 @@ function secant2!{T}(df::Union(DifferentiableFunction, phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints) f_calls += f_up g_calls += g_up - @assert isfinite(phic) && isfinite(dphic) push!(lsr, c, phic, dphic) ic = length(lsr) if satisfies_wolfe(c, phic, dphic, phi0, dphi0, philim, delta, sigma) @@ -561,7 +560,12 @@ function bisect!{T}(df::Union(DifferentiableFunction, # Debugging (HZ, conditions shown following U3) @assert lsr.slope[ia] < 0 @assert lsr.value[ia] <= philim - @assert lsr.slope[ib] < 0 # otherwise we wouldn't be here + # if !(lsr.slope[ib] < 0) + # @show a, b + # @show lsr.value[ia], lsr.value[ib] + # @show lsr.slope[ia], lsr.slope[ib] + # end + # @assert lsr.slope[ib] < 0 # otherwise we wouldn't be here @assert lsr.value[ib] > philim @assert b > a while b - a > eps(b) @@ -572,7 +576,6 @@ function bisect!{T}(df::Union(DifferentiableFunction, phid, gphi, f_up, g_up = linefunc!(df, x, s, d, xtmp, g, true, constraints) f_calls += f_up g_calls += g_up - @assert isfinite(phid) && isfinite(gphi) push!(lsr, d, phid, gphi) id = length(lsr) if gphi >= 0 From 72f18eabae0fcdc9b2f152c129fa9f5386947351 Mon Sep 17 00:00:00 2001 From: timholy Date: Fri, 23 May 2014 13:28:55 -0500 Subject: [PATCH 08/28] nelder-mead: check that at least one starting point has finite value --- src/nelder_mead.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/nelder_mead.jl b/src/nelder_mead.jl index f29afa619..4952d6295 100644 --- a/src/nelder_mead.jl +++ b/src/nelder_mead.jl @@ -69,6 +69,7 @@ function nelder_mead{T}(f::Function, for i in 1:n @inbounds y[i] = f(p[:, i]) end + any(isfinite(y)) || error("At least one of the starting points must have finite penalty") f_calls += n f_x_previous, f_x = NaN, nmobjective(y, m, n) From 048394a7be294b6f6f17208b39fe2175e54b10e4 Mon Sep 17 00:00:00 2001 From: timholy Date: Wed, 10 Sep 2014 21:15:48 -0500 Subject: [PATCH 09/28] Fix call to norm on high-dimensional objects --- src/cg.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cg.jl b/src/cg.jl index b175bc5f4..94b471d3e 100644 --- a/src/cg.jl +++ b/src/cg.jl @@ -89,7 +89,7 @@ macro cgtrace() dt["g(x)"] = copy(gr) dt["Current step size"] = alpha end - grnorm = norm(gr, Inf) + grnorm = norm(gr[:], Inf) update!(tr, iteration, f_x, From 4d536b7e77b6e2d6c5025cf5041b4a2ccf4a8545 Mon Sep 17 00:00:00 2001 From: timholy Date: Wed, 10 Sep 2014 21:16:22 -0500 Subject: [PATCH 10/28] fminbox: remove unused tol argument and add mu0 --- src/fminbox.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/fminbox.jl b/src/fminbox.jl index 19da30a36..e675f7eb2 100644 --- a/src/fminbox.jl +++ b/src/fminbox.jl @@ -105,20 +105,20 @@ end const PARAMETERS_MU = one64< precondprepbox(P, x, l, u, mu), optimizer = cg) @@ -145,7 +145,7 @@ function fminbox{T}(df::DifferentiableFunction, gbarrier[i] = (isfinite(thisl) ? one(T)/(thisx-thisl) : zero(T)) + (isfinite(thisu) ? one(T)/(thisu-thisx) : zero(T)) end valfunc = df.fg!(x, gfunc) # is this used?? - mu = initialize_mu(gfunc, gbarrier; mu0factor=mufactor) + mu = isnan(mu0) ? initialize_mu(gfunc, gbarrier; mu0factor=mufactor) : mu0 if show_trace > 0 println("######## fminbox ########") println("Initial mu = ", mu) From 7f48aeb1749ff79dd033c3e5475c303f6e01a0db Mon Sep 17 00:00:00 2001 From: Cody Date: Wed, 4 Feb 2015 09:00:56 -0600 Subject: [PATCH 11/28] fixed some deprecation warnings thrown by julia 0.4. Used Compat package to ensure compatibility with earlier Julia versions. --- REQUIRE | 1 + src/Optim.jl | 3 ++- src/cg.jl | 4 ++-- src/levenberg_marquardt.jl | 4 ++-- src/linesearch/hz_linesearch.jl | 10 +++++----- 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/REQUIRE b/REQUIRE index 3ef4db562..0da2d52b7 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,3 +1,4 @@ julia 0.2- Calculus DualNumbers +Compat diff --git a/src/Optim.jl b/src/Optim.jl index 4fc17d2af..bc3afb7c7 100644 --- a/src/Optim.jl +++ b/src/Optim.jl @@ -1,6 +1,7 @@ module Optim using Calculus - + using Compat + import Base.dot, Base.length, Base.push!, diff --git a/src/cg.jl b/src/cg.jl index 94b471d3e..8d42a5569 100644 --- a/src/cg.jl +++ b/src/cg.jl @@ -150,7 +150,7 @@ function cg{T}(df::Union(DifferentiableFunction, # Store f(x) in f_x f_x = df.fg!(x, gr) @assert typeof(f_x) == T - f_x_previous = nan(T) + f_x_previous = convert(T,NaN) f_calls, g_calls = f_calls + 1, g_calls + 1 copy!(gr_previous, gr) @@ -212,7 +212,7 @@ function cg{T}(df::Union(DifferentiableFunction, @assert typeof(dphi0) == T push!(lsr, zero(T), f_x, dphi0) - alphamax = interior ? toedge(x, s, constraints) : inf(T) + alphamax = interior ? toedge(x, s, constraints) : convert(T,Inf) # Pick the initial step size (HZ #I1-I2) alpha, mayterminate, f_update, g_update = diff --git a/src/levenberg_marquardt.jl b/src/levenberg_marquardt.jl index 178b585ee..69e001811 100644 --- a/src/levenberg_marquardt.jl +++ b/src/levenberg_marquardt.jl @@ -38,7 +38,7 @@ function levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12 # Maintain a trace of the system. tr = OptimizationTrace() if show_trace - d = {"lambda" => lambda} + d = @compat Dict{Any,Any}("lambda" => lambda) os = OptimizationState(iterCt, sse(fcur), NaN, d) push!(tr, os) println(os) @@ -87,7 +87,7 @@ function levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12 # show state if show_trace gradnorm = norm(J'*fcur, Inf) - d = {"g(x)" => gradnorm, "dx" => delta_x, "lambda" => lambda} + d = @compat Dict{Any,Any}("g(x)" => gradnorm, "dx" => delta_x, "lambda" => lambda) os = OptimizationState(iterCt, sse(fcur), gradnorm, d) push!(tr, os) println(os) diff --git a/src/linesearch/hz_linesearch.jl b/src/linesearch/hz_linesearch.jl index e5171c302..938b9ec01 100644 --- a/src/linesearch/hz_linesearch.jl +++ b/src/linesearch/hz_linesearch.jl @@ -93,7 +93,7 @@ function alphatry{T}(alpha::T, gtmp::Array, lsr::LineSearchResults, constraints::AbstractConstraints = ConstraintsNone(), - alphamax::Real = inf(T), + alphamax::Real = convert(T,Inf), psi1::Real = convert(T,0.2), psi2::Real = convert(T,2), psi3::Real = convert(T,0.1), @@ -171,7 +171,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, c::Real, mayterminate::Bool, constraints::AbstractConstraints = ConstraintsNone(), - alphamax::Real = inf(T), + alphamax::Real = convert(T,Inf), delta::Real = DEFAULTDELTA, sigma::Real = DEFAULTSIGMA, rho::Real = convert(T,5), @@ -179,7 +179,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, gamma::Real = convert(T,0.66), linesearchmax::Integer = 50, psi3::Real = convert(T,0.1), - iterfinitemax::Integer = iceil(-log2(eps(T))), + iterfinitemax::Integer = (@compat ceil(Integer, -log2(eps(T)))), detailed_trace::Integer = 0) if detailed_trace & LINESEARCH > 0 println("New linesearch") @@ -554,7 +554,7 @@ function bisect!{T}(df::Union(DifferentiableFunction, detailed_trace::Integer = 0) f_calls = 0 g_calls = 0 - gphi = nan(T) + gphi = convert(T,NaN) a = lsr.alpha[ia] b = lsr.alpha[ib] # Debugging (HZ, conditions shown following U3) @@ -605,7 +605,7 @@ function linefunc!{T}(df::Union(DifferentiableFunction, f_calls = 0 g_calls = 0 step!(xtmp, x, s, alpha, constraints) - gphi = nan(eltype(g)) + gphi = convert(eltype(g),NaN) if calc_grad val = df.fg!(xtmp, g) f_calls += 1 From 0f352923d58637e5c671011fb8af2abdbe28a7d7 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Thu, 9 Apr 2015 03:47:58 -0500 Subject: [PATCH 12/28] Fix ConstraintsNone parameter problem in step! --- src/constraints.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/constraints.jl b/src/constraints.jl index 954c0392f..836bd5b3b 100644 --- a/src/constraints.jl +++ b/src/constraints.jl @@ -97,7 +97,7 @@ function project!(x::Array, bounds::ConstraintsBox) end ## step! -function step!{T}(xtmp::Array{T}, x, s, alpha::Real, constraints::AbstractConstraints = ConstraintsNone{T}()) +function step!{T}(xtmp::Array{T}, x, s, alpha::Real, constraints::AbstractConstraints = ConstraintsNone()) (length(xtmp) == length(x) == length(s)) || error("lengths of xtmp ($(length(xtmp))), x ($(length(x))), and s ($(length(s))) disagree") for i = 1:length(xtmp) @inbounds xtmp[i] = x[i] + alpha*s[i] From 03c5e04e0e53b69f6957a645224cca9e86c35c3a Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Thu, 9 Apr 2015 04:32:35 -0500 Subject: [PATCH 13/28] Fixes for 0.4 deprecations --- src/accelerated_gradient_descent.jl | 2 +- src/bfgs.jl | 4 ++-- src/brent.jl | 12 ++++++------ src/cg.jl | 2 +- src/constraints.jl | 12 ++++++------ src/fminbox.jl | 12 ++++++------ src/golden_section.jl | 10 +++++----- src/gradient_descent.jl | 2 +- src/interior.jl | 30 ++++++++++++++--------------- src/l_bfgs.jl | 2 +- src/linesearch/hz_linesearch.jl | 6 +++--- src/momentum_gradient_descent.jl | 2 +- src/nelder_mead.jl | 2 +- src/newton.jl | 2 +- src/nnls.jl | 4 ++-- src/optimize.jl | 14 +++++++------- src/problems/unconstrained.jl | 4 ++-- src/simulated_annealing.jl | 2 +- src/types.jl | 4 ++-- test/grid_search.jl | 2 +- test/type_stability.jl | 6 +++--- 21 files changed, 68 insertions(+), 68 deletions(-) diff --git a/src/accelerated_gradient_descent.jl b/src/accelerated_gradient_descent.jl index e9de5b121..1f0f41bca 100644 --- a/src/accelerated_gradient_descent.jl +++ b/src/accelerated_gradient_descent.jl @@ -139,7 +139,7 @@ function accelerated_gradient_descent{T}(d::DifferentiableFunction, return MultivariateOptimizationResults("Accelerated Gradient Descent", initial_x, x, - float64(f_x), + @compat(Float64(f_x)), iteration, iteration == iterations, x_converged, diff --git a/src/bfgs.jl b/src/bfgs.jl index 5a6073b3e..c2216bc70 100644 --- a/src/bfgs.jl +++ b/src/bfgs.jl @@ -97,7 +97,7 @@ function bfgs{T}(d::Union(DifferentiableFunction, # Increment the number of steps we've had to perform iteration += 1 - # Set the search direction + # Set the search direction # Search direction is the negative gradient divided by the approximate Hessian A_mul_B!(s, invH, gr) for i in 1:n @@ -178,7 +178,7 @@ function bfgs{T}(d::Union(DifferentiableFunction, return MultivariateOptimizationResults("BFGS", initial_x, x, - float64(f_x), + @compat(Float64(f_x)), iteration, iteration == iterations, x_converged, diff --git a/src/brent.jl b/src/brent.jl index b3d4fa0b6..71d0a7356 100644 --- a/src/brent.jl +++ b/src/brent.jl @@ -33,13 +33,13 @@ function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T; # Save for later initial_lower = x_lower initial_upper = x_upper - + const golden_ratio::T = 0.5 * (3.0 - sqrt(5.0)) x_minimum = x_lower + golden_ratio*(x_upper-x_lower) f_minimum = f(x_minimum) f_calls = 1 # Number of calls to f - + step = zero(T) step_old = zero(T) @@ -48,7 +48,7 @@ function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T; f_minimum_old = f_minimum f_minimum_old_old = f_minimum - + it = 0 converged = false @@ -72,12 +72,12 @@ function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T; end it += 1 - + if abs(step_old) > tolx # Compute parabola interpolation # x_minimum + p/q is the optimum of the parabola # Also, q is guaranteed to be positive - + r = (x_minimum - x_minimum_old) * (f_minimum - f_minimum_old_old) q = (x_minimum - x_minimum_old_old) * (f_minimum - f_minimum_old) p = (x_minimum - x_minimum_old_old) * q - (x_minimum - x_minimum_old) * r @@ -150,7 +150,7 @@ function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T; initial_lower, initial_upper, x_minimum, - float64(f_minimum), + @compat(Float64(f_minimum)), it, converged, rel_tol, diff --git a/src/cg.jl b/src/cg.jl index 8d42a5569..f3a599292 100644 --- a/src/cg.jl +++ b/src/cg.jl @@ -282,7 +282,7 @@ function cg{T}(df::Union(DifferentiableFunction, return MultivariateOptimizationResults("Conjugate Gradient", initial_x, x, - float64(f_x), + @compat(Float64(f_x)), iteration, iteration == iterations, x_converged, diff --git a/src/constraints.jl b/src/constraints.jl index 836bd5b3b..5a382ae67 100644 --- a/src/constraints.jl +++ b/src/constraints.jl @@ -15,8 +15,8 @@ immutable ConstraintsBox{T,N} <: AbstractConstraints end end ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(l, u) -ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::Nothing) = ConstraintsBox{T,N}(l, fill(inf(T), size(l))) -ConstraintsBox{T,N}(l::Nothing, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(fill(-inf(T),size(u)), u) +ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::Nothing) = ConstraintsBox{T,N}(l, fill(convert(T,Inf), size(l))) +ConstraintsBox{T,N}(l::Nothing, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(fill(-convert(T,Inf),size(u)), u) immutable ConstraintsL{T,M<:AbstractMatrix,N} <: AbstractConstraints A::M @@ -112,12 +112,12 @@ end # - tocorner returns the "distance along s" needed to be projected into the "corner" (all constraints are active) # The first is the farthest you might consider searching in a barrier method. The second is the farthest that it # makes sense to search in a projection method. -toedge{T}(x::AbstractArray{T}, s, constraints::AbstractConstraints) = inf(T) -tocorner{T}(x::AbstractArray{T}, s, constraints::AbstractConstraints) = inf(T) +toedge{T}(x::AbstractArray{T}, s, constraints::AbstractConstraints) = convert(T, Inf) +tocorner{T}(x::AbstractArray{T}, s, constraints::AbstractConstraints) = convert(T, Inf) function toedge{T}(x::AbstractArray{T}, s, bounds::ConstraintsBox) # Stop at the first coordinate to leave the box - alphamax = inf(T) + alphamax = convert(T,Inf) for i = 1:length(x) si = s[i] li = bounds.lower[i] @@ -145,7 +145,7 @@ function tocorner{T}(x::AbstractArray{T}, s, bounds::ConstraintsBox) elseif si > 0 && isfinite(ui) alphamax = max(alphamax, (ui-x[i])/si) else - return inf(T) + return convert(T,Inf) end end convert(T, alphamax) diff --git a/src/fminbox.jl b/src/fminbox.jl index e675f7eb2..861316433 100644 --- a/src/fminbox.jl +++ b/src/fminbox.jl @@ -1,7 +1,7 @@ # Attempt to compute a reasonable default mu: at the starting # position, the gradient of the input function should dominate the # gradient of the barrier. -function initialize_mu{T}(gfunc::Array{T}, gbarrier::Array{T}; mu0::T = nan(T), mu0factor::T = 0.001) +function initialize_mu{T}(gfunc::Array{T}, gbarrier::Array{T}; mu0::T = convert(T,NaN), mu0factor::T = 0.001) if isnan(mu0) gbarriernorm = sum(abs(gbarrier)) if gbarriernorm > 0 @@ -26,7 +26,7 @@ function barrier_box{T}(x::Array{T}, g, l::Array{T}, u::Array{T}) if isfinite(thisl) dx = x[i] - thisl if dx <= 0 - return inf(T) + return convert(T,Inf) end v -= log(dx) if calc_grad @@ -41,7 +41,7 @@ function barrier_box{T}(x::Array{T}, g, l::Array{T}, u::Array{T}) if isfinite(thisu) dx = thisu - x[i] if dx <= 0 - return inf(T) + return convert(T,Inf) end v -= log(dx) if calc_grad @@ -76,7 +76,7 @@ function barrier_combined{T}(x::Array{T}, g, gfunc, gbarrier, val_each::Vector{T end function limits_box{T}(x::Array{T}, d::Array{T}, l::Array{T}, u::Array{T}) - alphamax = inf(T) + alphamax = convert(T,Inf) for i = 1:length(x) if d[i] < 0 alphamax = min(alphamax, ((l[i]-x[i])+eps(l[i]))/d[i]) @@ -118,7 +118,7 @@ function fminbox{T<:FloatingPoint}(df::DifferentiableFunction, extended_trace::Bool = false, linesearch!::Function = hz_linesearch!, eta::Real = convert(T,0.4), - mu0::T = nan(T), + mu0::T = convert(T,NaN), mufactor::T = convert(T, 0.001), precondprep = (P, x, l, u, mu) -> precondprepbox(P, x, l, u, mu), optimizer = cg) @@ -152,7 +152,7 @@ function fminbox{T<:FloatingPoint}(df::DifferentiableFunction, end g = similar(x) - valboth = Array(T, 2) + valboth = Array(T, 2) fval_all = Array(Vector{T}, 0) fcount_all = 0 xold = similar(x) diff --git a/src/golden_section.jl b/src/golden_section.jl index dce3da090..e4ed75029 100644 --- a/src/golden_section.jl +++ b/src/golden_section.jl @@ -28,11 +28,11 @@ function golden_section{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T; if !(x_lower < x_upper) error("x_lower must be less than x_upper") end - + # Save for later initial_lower = x_lower initial_upper = x_upper - + const golden_ratio::T = 0.5 * (3.0 - sqrt(5.0)) x_minimum = x_lower + golden_ratio*(x_upper-x_lower) @@ -46,7 +46,7 @@ function golden_section{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T; tr = OptimizationTrace() tracing = store_trace || show_trace || extended_trace @goldensectiontrace - + while it < iterations tolx = rel_tol * abs(x_minimum) + abs_tol @@ -59,7 +59,7 @@ function golden_section{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T; end it += 1 - + if x_upper - x_minimum > x_minimum - x_lower x_new = x_minimum + golden_ratio*(x_upper - x_minimum) f_new = f(x_new) @@ -91,7 +91,7 @@ function golden_section{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T; initial_lower, initial_upper, x_minimum, - float64(f_minimum), + @compat(Float64(f_minimum)), it, converged, rel_tol, diff --git a/src/gradient_descent.jl b/src/gradient_descent.jl index fc996f4d4..dfba19fa3 100644 --- a/src/gradient_descent.jl +++ b/src/gradient_descent.jl @@ -124,7 +124,7 @@ function gradient_descent{T}(d::Union(DifferentiableFunction, return MultivariateOptimizationResults("Gradient Descent", initial_x, x, - float64(f_x), + @compat(Float64(f_x)), iteration, iteration == iterations, x_converged, diff --git a/src/interior.jl b/src/interior.jl index 8081b3ec5..295176a7b 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -16,11 +16,11 @@ function barrier_f{T}(x::Array{T}, bounds::ConstraintsBox) li, ui = l[i], u[i] xi = x[i] if isfinite(li) - xi >= li || return inf(T) + xi >= li || return convert(T,Inf) phi += log(xi-li) end if isfinite(ui) - xi <= ui || return inf(T) + xi <= ui || return convert(T,Inf) phi += log(ui-xi) end end @@ -48,12 +48,12 @@ function barrier_fg!{T}(x::Array{T}, g::Array{T}, bounds::ConstraintsBox) li, ui = l[i], u[i] xi = x[i] if isfinite(li) - xi >= li || return inf(T) # no need to compute additional gradient terms + xi >= li || return convert(T,Inf) # no need to compute additional gradient terms phi += log(xi-li) g[i] -= 1/(xi-li) end if isfinite(ui) - xi <= ui || return inf(T) + xi <= ui || return convert(T,Inf) phi += log(ui-xi) g[i] += 1/(ui-xi) end @@ -89,11 +89,11 @@ function barrier_fg!{T}(x::Array{T}, g::Array{T}, constraints::ConstraintsL) li, ui = l[i], u[i] yi = y1[i] if isfinite(li) - yi >= li || return inf(T) # no need to compute additional gradient terms + yi >= li || return convert(T,Inf) # no need to compute additional gradient terms phi += log(yi-li) end if isfinite(ui) - yi <= ui || return inf(T) + yi <= ui || return convert(T,Inf) phi += log(ui-yi) end end @@ -154,13 +154,13 @@ end # combined_f(x, objective::Union(DifferentiableFunction,TwiceDifferentiableFunction), constraints::AbstractConstraints, t) = # combined_f(x, objective.f, constraints, t) -# +# # combined_g!(x, g, objective::Union(DifferentiableFunction,TwiceDifferentiableFunction), constraints::AbstractConstraints, t) = # combined_g!(x, g, objective.g, constraints, t) -# +# # combined_fg!(x, g, objective::Union(DifferentiableFunction,TwiceDifferentiableFunction), constraints::AbstractConstraints, t) = # combined_fg!(x, g, objective.fg!, constraints, t) -# +# # combined_h!(x, H, objective::Union(DifferentiableFunction,TwiceDifferentiableFunction), constraints::AbstractConstraints, t) = # combined_h!(x, H, objective.h!, constraints, t) @@ -170,7 +170,7 @@ end ## Cost function for linear least squares ## ######################################### - + linlsq(A::Matrix, b::Vector) = TwiceDifferentiableFunction( x -> linlsq_fg!(x, Array(eltype(x),0), A, b, similar(b)), (x,g) -> linlsq_fg!(x, g, A, b, similar(b)), @@ -228,12 +228,12 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction, tr = OptimizationTrace() tracing = store_trace || show_trace || extended_trace - + df = DifferentiableFunction(dummy_f, dummy_g!, (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t)) iteration, f_calls, g_calls = 0, 0, 0 - f_x = f_x_previous = inf(T) + f_x = f_x_previous = convert(T,Inf) while m/t > eps_gap && iteration < iterations f_x_previous = f_x f_x = combined_fg!(x, gr, objective.fg!, constraints, t) @@ -274,7 +274,7 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction, MultivariateOptimizationResults("Interior/Newton", initial_x, x, - float64(f_x), + @compat(Float64(f_x)), iteration, iteration == iterations, x_converged, @@ -292,7 +292,7 @@ end function interior{T}(objective::Union(DifferentiableFunction, TwiceDifferentiableFunction), initial_x::Array{T}, constraints::AbstractConstraints; - method = :cg, t = nan(T), mu = 10, eps_gap = 1e-12, + method = :cg, t = convert(T,NaN), mu = 10, eps_gap = 1e-12, xtol::Real = 1e-32, ftol::Real = 1e-8, grtol::Real = 1e-8, @@ -306,7 +306,7 @@ function interior{T}(objective::Union(DifferentiableFunction, TwiceDifferentiabl return MultivariateOptimizationResults("Interior", initial_x, initial_x, - float64(vo), + @compat(Float64(vo)), 0, false, false, diff --git a/src/l_bfgs.jl b/src/l_bfgs.jl index 941d52b58..e439d23a8 100644 --- a/src/l_bfgs.jl +++ b/src/l_bfgs.jl @@ -215,7 +215,7 @@ function l_bfgs{T}(d::Union(DifferentiableFunction, return MultivariateOptimizationResults("L-BFGS", initial_x, x, - float64(f_x), + @compat(Float64(f_x)), iteration, iteration == iterations, x_converged, diff --git a/src/linesearch/hz_linesearch.jl b/src/linesearch/hz_linesearch.jl index 938b9ec01..a4a1a4a87 100644 --- a/src/linesearch/hz_linesearch.jl +++ b/src/linesearch/hz_linesearch.jl @@ -59,7 +59,7 @@ display_nextbit = 14 # - Support for projected-gradient methods const DEFAULTDELTA = 0.1 -const DEFAULTSIGMA = 0.9 +const DEFAULTSIGMA = 0.9 # Generate initial guess for step size (HZ, stage I0) function alphainit{T}(alpha::Real, @@ -97,7 +97,7 @@ function alphatry{T}(alpha::T, psi1::Real = convert(T,0.2), psi2::Real = convert(T,2), psi3::Real = convert(T,0.1), - iterfinitemax::Integer = iceil(-log2(eps(T))), + iterfinitemax::Integer = ceil(Int,-log2(eps(T))), detailed_trace::Integer = 0) f_calls = 0 g_calls = 0 @@ -315,7 +315,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, end while iter < linesearchmax a = lsr.alpha[ia] - b = lsr.alpha[ib] + b = lsr.alpha[ib] @assert b > a if detailed_trace & LINESEARCH > 0 println("linesearch: ia = ", ia, diff --git a/src/momentum_gradient_descent.jl b/src/momentum_gradient_descent.jl index 927e451ac..eca0f9944 100644 --- a/src/momentum_gradient_descent.jl +++ b/src/momentum_gradient_descent.jl @@ -125,7 +125,7 @@ function momentum_gradient_descent{T}(d::DifferentiableFunction, return MultivariateOptimizationResults("Momentum Gradient Descent", initial_x, x, - float64(f_x), + @compat(Float64(f_x)), iteration, iteration == iterations, x_converged, diff --git a/src/nelder_mead.jl b/src/nelder_mead.jl index 4952d6295..9dcf8280f 100644 --- a/src/nelder_mead.jl +++ b/src/nelder_mead.jl @@ -187,7 +187,7 @@ function nelder_mead{T}(f::Function, return MultivariateOptimizationResults("Nelder-Mead", initial_x, minimum, - float64(f(minimum)), + @compat(Float64(f(minimum))), iteration, iteration == iterations, false, diff --git a/src/newton.jl b/src/newton.jl index 64cc33370..8f8245b50 100644 --- a/src/newton.jl +++ b/src/newton.jl @@ -130,7 +130,7 @@ function newton{T}(d::TwiceDifferentiableFunction, return MultivariateOptimizationResults("Newton's Method", initial_x, x, - float64(f_x), + @compat(Float64(f_x)), iteration, iteration == iterations, x_converged, diff --git a/src/nnls.jl b/src/nnls.jl index f34f0ffd0..239e40389 100644 --- a/src/nnls.jl +++ b/src/nnls.jl @@ -6,8 +6,8 @@ function nnls(A::AbstractMatrix, b::AbstractVector) x = fill(one(T), size(A, 2)) # Set up constraints l = zeros(eltype(x), length(x)) - u = fill(inf(eltype(x)), length(x)) - # Perform the optimization + u = fill(convert(eltype(x),Inf), length(x)) + # Perform the optimization func = (x, g) -> nnlsobjective(x, g, A, b) df = DifferentiableFunction(x->func(x,nothing), func, func) fminbox(df, x, l, u, precondprep=(P, x, l, u, mu)->precondprepnnls(P, x, mu, a)) diff --git a/src/optimize.jl b/src/optimize.jl index 4ea952398..a96b6c37c 100644 --- a/src/optimize.jl +++ b/src/optimize.jl @@ -52,7 +52,7 @@ function optimize(d::TwiceDifferentiableFunction, elseif method == :bfgs if bfgs_initial_invH == nothing bfgs_initial_invH = eye(length(initial_x)) - end + end bfgs(d, initial_x, xtol = xtol, @@ -145,7 +145,7 @@ function optimize(d::DifferentiableFunction, elseif method == :bfgs if bfgs_initial_invH == nothing bfgs_initial_invH = eye(length(initial_x)) - end + end bfgs(d, initial_x, xtol = xtol, @@ -259,7 +259,7 @@ function optimize(f::Function, elseif method == :bfgs if bfgs_initial_invH == nothing bfgs_initial_invH = eye(length(initial_x)) - end + end d = DifferentiableFunction(f, g!) bfgs(d, initial_x, @@ -362,7 +362,7 @@ function optimize(f::Function, elseif method == :bfgs if bfgs_initial_invH == nothing bfgs_initial_invH = eye(length(initial_x)) - end + end d = DifferentiableFunction(f, g!) bfgs(d, initial_x, @@ -469,7 +469,7 @@ function optimize(f::Function, elseif method == :bfgs if bfgs_initial_invH == nothing bfgs_initial_invH = eye(length(initial_x)) - end + end bfgs(d, initial_x, xtol = xtol, @@ -514,7 +514,7 @@ function optimize{T <: Real}(f::Function, @printf "Iter Function value Gradient norm \n" end if method == :brent - brent(f, float64(lower), float64(upper); + brent(f, @compat(Float64(lower)), @compat(Float64(upper)); rel_tol = rel_tol, abs_tol = abs_tol, iterations = iterations, @@ -522,7 +522,7 @@ function optimize{T <: Real}(f::Function, show_trace = show_trace, extended_trace = extended_trace) elseif method == :golden_section - golden_section(f, float64(lower), float64(upper); + golden_section(f, @compat(Float64(lower)), @compat(Float64(upper)); rel_tol = rel_tol, abs_tol = abs_tol, iterations = iterations, diff --git a/src/problems/unconstrained.jl b/src/problems/unconstrained.jl index 7fbcf3c50..e2f1054ab 100644 --- a/src/problems/unconstrained.jl +++ b/src/problems/unconstrained.jl @@ -180,7 +180,7 @@ examples["Large Polynomial"] = OptimizationProblem("Large Polynomial", large_polynomial_gradient!, large_polynomial_hessian!, zeros(250), - {float([1:250])}, + {float([1:250;])}, true, true) @@ -268,7 +268,7 @@ examples["Polynomial"] = OptimizationProblem("Polynomial", ########################################################################## function powell(x::Vector) - return (x[1] + 10.0 * x[2])^2 + 5.0 * (x[3] - x[4])^2 + + return (x[1] + 10.0 * x[2])^2 + 5.0 * (x[3] - x[4])^2 + (x[2] - 2.0 * x[3])^4 + 10.0 * (x[1] - x[4])^4 end diff --git a/src/simulated_annealing.jl b/src/simulated_annealing.jl index 5fbb27764..879cae802 100644 --- a/src/simulated_annealing.jl +++ b/src/simulated_annealing.jl @@ -103,7 +103,7 @@ function simulated_annealing{T}(cost::Function, return MultivariateOptimizationResults("Simulated Annealing", initial_x, best_x, - float64(best_f_x), + @compat(Float64(best_f_x)), iterations, iteration == iterations, false, diff --git a/src/types.jl b/src/types.jl index 2b2dedb2e..3295c07bc 100644 --- a/src/types.jl +++ b/src/types.jl @@ -6,11 +6,11 @@ immutable OptimizationState end function OptimizationState(i::Integer, f::Real) - OptimizationState(int(i), float64(f), NaN, Dict()) + OptimizationState(int(i), @compat(Float64(f)), NaN, Dict()) end function OptimizationState(i::Integer, f::Real, g::Real) - OptimizationState(int(i), float64(f), float64(g), Dict()) + OptimizationState(int(i), @compat(Float64(f)), @compat(Float64(g)), Dict()) end immutable OptimizationTrace diff --git a/test/grid_search.jl b/test/grid_search.jl index 5f34c4da0..ff0b4f7ad 100644 --- a/test/grid_search.jl +++ b/test/grid_search.jl @@ -1,4 +1,4 @@ -Optim.grid_search(x -> (1.0 - x)^2, [-1:0.1:1.0]) +Optim.grid_search(x -> (1.0 - x)^2, [-1:0.1:1.0;]) # Cartesian product over 2 dimensions. #grid_search(x -> (1.0 - x[1])^2 + (2.0 - x[2])^2, product[-1:0.1:1.0, -1:0.1:1.0]) diff --git a/test/type_stability.jl b/test/type_stability.jl index da72b8956..56482cb8c 100644 --- a/test/type_stability.jl +++ b/test/type_stability.jl @@ -30,7 +30,7 @@ d3 = TwiceDifferentiableFunction(rosenbrock, for method in (:nelder_mead, :simulated_annealing) optimize(rosenbrock, [0.0,0,.0], method = method) - optimize(rosenbrock, float32([0.0,0.0]), method = method) + optimize(rosenbrock, [0.0f0,0.0f0], method = method) end for method in (:bfgs, @@ -40,10 +40,10 @@ for method in (:bfgs, # :accelerated_gradient_descent, :l_bfgs) optimize(d2, [0.0,0,.0], method = method) - optimize(d2, float32([0.0,0.0]), method = method) + optimize(d2, [0.0f0,0.0f0], method = method) end for method in (:newton,) optimize(d3, [0.0,0.0], method = method) - optimize(d3, float32([0.0,0.0]), method = method) + optimize(d3, [0.0f0,0.0f0], method = method) end From 4d1cdf435b3cceab2a267408a61974d2b353ab2d Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Tue, 14 Apr 2015 06:37:16 -0500 Subject: [PATCH 14/28] Bump the default linesearchmax Rarely, I see perfectly valid line searches take more than 50 iterations. --- src/linesearch/hz_linesearch.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/linesearch/hz_linesearch.jl b/src/linesearch/hz_linesearch.jl index a4a1a4a87..28254e53f 100644 --- a/src/linesearch/hz_linesearch.jl +++ b/src/linesearch/hz_linesearch.jl @@ -177,7 +177,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, rho::Real = convert(T,5), epsilon::Real = convert(T,1e-6), gamma::Real = convert(T,0.66), - linesearchmax::Integer = 50, + linesearchmax::Integer = 100, psi3::Real = convert(T,0.1), iterfinitemax::Integer = (@compat ceil(Integer, -log2(eps(T)))), detailed_trace::Integer = 0) @@ -370,6 +370,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, end iter += 1 end + @show lsr error("Linesearch failed to converge") end From c0a3a3fef1cac89df917c6c34aa8942852fdb723 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 23 Sep 2015 16:57:02 -0500 Subject: [PATCH 15/28] Julia 0.4 update Binding deprecations, mostly. --- REQUIRE | 2 +- src/bfgs.jl | 4 ++-- src/brent.jl | 2 +- src/cg.jl | 10 ++++----- src/constraints.jl | 12 +++++----- src/fminbox.jl | 2 +- src/golden_section.jl | 2 +- src/gradient_descent.jl | 4 ++-- src/interior.jl | 10 ++++----- src/l_bfgs.jl | 4 ++-- src/linesearch/backtracking_linesearch.jl | 4 ++-- src/linesearch/hz_linesearch.jl | 26 +++++++++++----------- src/linesearch/interpolating_linesearch.jl | 4 ++-- src/linesearch/mt_linesearch.jl | 4 ++-- src/problems/constrained.jl | 6 ++--- src/problems/unconstrained.jl | 18 +++++++-------- 16 files changed, 57 insertions(+), 57 deletions(-) diff --git a/REQUIRE b/REQUIRE index 0da2d52b7..da0d390b7 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,4 +1,4 @@ -julia 0.2- +julia 0.4 Calculus DualNumbers Compat diff --git a/src/bfgs.jl b/src/bfgs.jl index c2216bc70..cf2331c1e 100644 --- a/src/bfgs.jl +++ b/src/bfgs.jl @@ -23,8 +23,8 @@ macro bfgstrace() end end -function bfgs{T}(d::Union(DifferentiableFunction, - TwiceDifferentiableFunction), +function bfgs{T}(d::Union{DifferentiableFunction, + TwiceDifferentiableFunction}, initial_x::Vector{T}; initial_invH::Matrix = eye(length(initial_x)), xtol::Real = 1e-32, diff --git a/src/brent.jl b/src/brent.jl index 71d0a7356..32ea74d8b 100644 --- a/src/brent.jl +++ b/src/brent.jl @@ -18,7 +18,7 @@ macro brenttrace() end end -function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T; +function brent{T <: AbstractFloat}(f::Function, x_lower::T, x_upper::T; rel_tol::T = sqrt(eps(T)), abs_tol::T = eps(T), iterations::Integer = 1_000, diff --git a/src/cg.jl b/src/cg.jl index f3a599292..700cf0805 100644 --- a/src/cg.jl +++ b/src/cg.jl @@ -1,8 +1,8 @@ # Preconditioners # * Empty preconditioner -cg_precondfwd(out::Array, P::Nothing, A::Array) = copy!(out, A) -cg_precondfwddot(A::Array, P::Nothing, B::Array) = dot(A, B) -cg_precondinvdot(A::Array, P::Nothing, B::Array) = dot(A, B) +cg_precondfwd(out::Array, P::Void, A::Array) = copy!(out, A) +cg_precondfwddot(A::Array, P::Void, B::Array) = dot(A, B) +cg_precondinvdot(A::Array, P::Void, B::Array) = dot(A, B) # Diagonal preconditioner function cg_precondfwd(out::Array, p::Vector, A::Array) @@ -101,8 +101,8 @@ macro cgtrace() end end -function cg{T}(df::Union(DifferentiableFunction, - TwiceDifferentiableFunction), +function cg{T}(df::Union{DifferentiableFunction, + TwiceDifferentiableFunction}, initial_x::Array{T}; constraints::AbstractConstraints = ConstraintsNone(), interior::Bool = false, diff --git a/src/constraints.jl b/src/constraints.jl index 5a382ae67..365b7a5e8 100644 --- a/src/constraints.jl +++ b/src/constraints.jl @@ -15,8 +15,8 @@ immutable ConstraintsBox{T,N} <: AbstractConstraints end end ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(l, u) -ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::Nothing) = ConstraintsBox{T,N}(l, fill(convert(T,Inf), size(l))) -ConstraintsBox{T,N}(l::Nothing, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(fill(-convert(T,Inf),size(u)), u) +ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::Void) = ConstraintsBox{T,N}(l, fill(convert(T,Inf), size(l))) +ConstraintsBox{T,N}(l::Void, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(fill(-convert(T,Inf),size(u)), u) immutable ConstraintsL{T,M<:AbstractMatrix,N} <: AbstractConstraints A::M @@ -35,12 +35,12 @@ immutable ConstraintsL{T,M<:AbstractMatrix,N} <: AbstractConstraints new(A, l, u, scratch1, scratch2, scratch3) end end -ConstraintsL{T}(A::AbstractMatrix, l::Union(Vector{T},Matrix{T}), u::Union(Vector{T},Matrix{T})) = ConstraintsL{T,typeof(A),ndims(l)}(A, l, u) -ConstraintsL{T}(A::AbstractMatrix, l::Union(Vector{T},Matrix{T}), u::Nothing) = ConstraintsL(A, l, infs(T, size(l))) -ConstraintsL{T}(A::AbstractMatrix, l::Nothing, u::Union(Vector{T},Matrix{T})) = ConstraintsL(A, -infs(T,size(u)), u) +ConstraintsL{T}(A::AbstractMatrix, l::Union{Vector{T},Matrix{T}}, u::Union{Vector{T},Matrix{T}}) = ConstraintsL{T,typeof(A),ndims(l)}(A, l, u) +ConstraintsL{T}(A::AbstractMatrix, l::Union{Vector{T},Matrix{T}}, u::Void) = ConstraintsL(A, l, infs(T, size(l))) +ConstraintsL{T}(A::AbstractMatrix, l::Void, u::Union{Vector{T},Matrix{T}}) = ConstraintsL(A, -infs(T,size(u)), u) # Generic constraints -immutable ConstraintsNL{T,F<:Union(Function,DifferentiableFunction,TwiceDifferentiableFunction)} <: AbstractConstraints +immutable ConstraintsNL{T,F<:Union{Function,DifferentiableFunction,TwiceDifferentiableFunction}} <: AbstractConstraints funcs::Vector{F} lower::Vector{T} upper::Vector{T} diff --git a/src/fminbox.jl b/src/fminbox.jl index 861316433..1362f4c02 100644 --- a/src/fminbox.jl +++ b/src/fminbox.jl @@ -105,7 +105,7 @@ end const PARAMETERS_MU = one64< Date: Wed, 23 Sep 2015 18:39:33 -0500 Subject: [PATCH 16/28] More 0.4 fixes --- REQUIRE | 2 +- src/Optim.jl | 4 +++- src/interior.jl | 6 +++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/REQUIRE b/REQUIRE index da0d390b7..b55322abe 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,4 +1,4 @@ -julia 0.4 +julia 0.4- Calculus DualNumbers Compat diff --git a/src/Optim.jl b/src/Optim.jl index bc3afb7c7..29ea243fb 100644 --- a/src/Optim.jl +++ b/src/Optim.jl @@ -1,7 +1,9 @@ +isdefined(Base, :__precompile__) && __precompile__() + module Optim using Calculus using Compat - + import Base.dot, Base.length, Base.push!, diff --git a/src/interior.jl b/src/interior.jl index 71e939b1b..29592502a 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -177,7 +177,7 @@ linlsq(A::Matrix, b::Vector) = (x,g) -> linlsq_fg!(x, g, A, b, similar(b)), (x,H) -> linlsq_h!(x, H, A, b)) -dummy_f (x) = (Base.show_backtrace(STDOUT, backtrace()); error("No f")) +dummy_f(x) = (Base.show_backtrace(STDOUT, backtrace()); error("No f")) dummy_g!(x,g) = (Base.show_backtrace(STDOUT, backtrace()); error("No g!")) function linlsq_fg!{T}(x::AbstractArray{T}, g, A, b, scratch) @@ -336,8 +336,8 @@ function interior{T}(objective::Union{DifferentiableFunction, TwiceDifferentiabl local results iteration, f_calls, g_calls = 0, 0, 0 while m/t > eps_gap || iteration == 0 - df = DifferentiableFunction( x -> combined_f (x, objective.f, constraints, t), - (x,g) -> combined_g! (x, g, objective.g!, constraints, t), + df = DifferentiableFunction( x -> combined_f(x, objective.f, constraints, t), + (x,g) -> combined_g!(x, g, objective.g!, constraints, t), (x,g) -> combined_fg!(x, g, objective.fg!, constraints, t)) results = optimize(df, x, method=method, xtol=xtol, ftol=ftol, grtol=grtol, iterations=iterations, store_trace=store_trace, show_trace=show_trace, extended_trace=extended_trace) copy!(x, results.minimum) From c2b7cda18ab4134201d1ec9b8a1ca4beff8d38a8 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 21 Oct 2015 12:55:10 -0500 Subject: [PATCH 17/28] Guarantee that the initial direction is a descent direction for objective w/o constraints This prevents the barrier penalty from kicking the solution out of the starting basin --- src/interior.jl | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index 29592502a..f4be826ce 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -203,6 +203,11 @@ linlsq_h!(x, H, A, b) = At_mul_B!(H, A, A) ## Interior point methods ## ######################### +# Follows the notation of +# H. Hindi (2006), "A Tutorial on Convex Optimization II: Duality and +# Interior Point Methods," Proc. 2006 American Control Conference. +# particularly his "Algorithm: Barrier method." + function interior_newton{T}(objective::TwiceDifferentiableFunction, initial_x::Vector{T}, constraints::AbstractConstraints; @@ -301,8 +306,8 @@ function interior{T}(objective::Union{DifferentiableFunction, TwiceDifferentiabl show_trace::Bool = false, extended_trace::Bool = false) if isnan(t) - vo, vc, go, gc = gnorm(objective, constraints, initial_x) - if isfinite(vo) && isfinite(vc) && go < grtol + vo, vc, gogo, gogc = gnorm(objective, constraints, initial_x) + if isfinite(vo) && isfinite(vc) && sqrt(gogo) < grtol return MultivariateOptimizationResults("Interior", initial_x, initial_x, @@ -319,10 +324,10 @@ function interior{T}(objective::Union{DifferentiableFunction, TwiceDifferentiabl 0, 0) end - if gc == 0 + if gogc == 0 t = one(T) # FIXME: bounds constraints, starting at exact center. This is probably not the right guess. else - t = gc/(convert(T,0.1)*go) + t = convert(T, 10*abs(gogc)/gogo) end end if method == :newton @@ -352,6 +357,18 @@ function interior{T}(objective::Union{DifferentiableFunction, TwiceDifferentiabl results end +""" +`vo, vc, gogo, gogc = gnorm(objective, constraints, initial_x)` +computes the values `vo`, `vc` of the `objective` and `constraints` at +position `initial_x`, as well as their gradients. `gogo` is the dot +product of the objective gradient with itself; `gogc` is the dot +product of the objective gradient and the constraint gradient. + +The latter two can be used to initialize the tradeoff between +objective and constraints so that the total gradient `t*go + gc` is +still a descent direction for the objective. This ensures that the +barrier penalty will not push the solution out of the starting basin. +""" function gnorm(objective, constraints, initial_x) go = similar(initial_x) gc = similar(initial_x) From 92a1c08a8be435d79d6f51862872167c874453d9 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 21 Oct 2015 12:57:08 -0500 Subject: [PATCH 18/28] Pass constraints through to minimizers For box constraints, this allows the algorithm to pick alphamax so the constraints will not be violated --- src/interior.jl | 2 +- src/l_bfgs.jl | 18 +++++++++++++++++- src/optimize.jl | 12 ++++++++++++ 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index f4be826ce..262ab4f0e 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -344,7 +344,7 @@ function interior{T}(objective::Union{DifferentiableFunction, TwiceDifferentiabl df = DifferentiableFunction( x -> combined_f(x, objective.f, constraints, t), (x,g) -> combined_g!(x, g, objective.g!, constraints, t), (x,g) -> combined_fg!(x, g, objective.fg!, constraints, t)) - results = optimize(df, x, method=method, xtol=xtol, ftol=ftol, grtol=grtol, iterations=iterations, store_trace=store_trace, show_trace=show_trace, extended_trace=extended_trace) + results = optimize(df, x, method=method, constraints=constraints, interior=true, xtol=xtol, ftol=ftol, grtol=grtol, iterations=iterations, store_trace=store_trace, show_trace=show_trace, extended_trace=extended_trace) copy!(x, results.minimum) iteration += results.iterations f_calls += results.f_calls diff --git a/src/l_bfgs.jl b/src/l_bfgs.jl index 0a2aacb22..a6321c20c 100644 --- a/src/l_bfgs.jl +++ b/src/l_bfgs.jl @@ -83,6 +83,8 @@ end function l_bfgs{T}(d::Union{DifferentiableFunction, TwiceDifferentiableFunction}, initial_x::Vector{T}; + constraints::AbstractConstraints = ConstraintsNone(), + interior::Bool = false, m::Integer = 10, xtol::Real = 1e-32, ftol::Real = 1e-8, @@ -158,12 +160,26 @@ function l_bfgs{T}(d::Union{DifferentiableFunction, # Refresh the line search cache dphi0 = dot(gr, s) + if dphi0 > 0 + iteration = 1 + for i = 1:n + @inbounds s[i] = -gr[i] + end + dphi0 = dot(gr, s) + end clear!(lsr) push!(lsr, zero(T), f_x, dphi0) + alphamax = interior ? toedge(x, s, constraints) : convert(T,Inf) + + # Pick the initial step size (HZ #I1-I2) + alpha, mayterminate, f_update, g_update = + alphatry(alpha, d, x, s, x_ls, gr_ls, lsr, constraints, alphamax) + f_calls, g_calls = f_calls + f_update, g_calls + g_update + # Determine the distance of movement along the search line alpha, f_update, g_update = - linesearch!(d, x, s, x_ls, gr_ls, lsr, alpha, mayterminate) + linesearch!(d, x, s, x_ls, gr_ls, lsr, alpha, mayterminate, constraints, alphamax) f_calls, g_calls = f_calls + f_update, g_calls + g_update # Maintain a record of previous position diff --git a/src/optimize.jl b/src/optimize.jl index a96b6c37c..d2a3589d3 100644 --- a/src/optimize.jl +++ b/src/optimize.jl @@ -1,6 +1,8 @@ function optimize(d::TwiceDifferentiableFunction, initial_x::Array; method::Symbol = :l_bfgs, + constraints::AbstractConstraints = ConstraintsNone(), + interior::Bool = false, xtol::Real = 1e-32, ftol::Real = 1e-8, grtol::Real = 1e-8, @@ -41,6 +43,8 @@ function optimize(d::TwiceDifferentiableFunction, elseif method == :cg cg(d, initial_x, + constraints = constraints, + interior = interior, xtol = xtol, ftol = ftol, grtol = grtol, @@ -67,6 +71,8 @@ function optimize(d::TwiceDifferentiableFunction, elseif method == :l_bfgs l_bfgs(d, initial_x, + constraints = constraints, + interior = interior, xtol = xtol, ftol = ftol, grtol = grtol, @@ -94,6 +100,8 @@ end function optimize(d::DifferentiableFunction, initial_x::Array; method::Symbol = :l_bfgs, + constraints::AbstractConstraints = ConstraintsNone(), + interior::Bool = false, xtol::Real = 1e-32, ftol::Real = 1e-8, grtol::Real = 1e-8, @@ -134,6 +142,8 @@ function optimize(d::DifferentiableFunction, elseif method == :cg cg(d, initial_x, + constraints = constraints, + interior = interior, xtol = xtol, ftol = ftol, grtol = grtol, @@ -160,6 +170,8 @@ function optimize(d::DifferentiableFunction, elseif method == :l_bfgs l_bfgs(d, initial_x, + constraints = constraints, + interior = interior, xtol = xtol, ftol = ftol, grtol = grtol, From ce8df0ade5793824cf72611caef20786e685871e Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Thu, 22 Oct 2015 13:05:53 -0500 Subject: [PATCH 19/28] Automatically try a restart if we think we've converged This should cover cases where dphi < 0 "by a hair," i.e., where the search direction is almost orthogonal to the gradient. It's an extra layer of security beyond commit 50b9d60a9788798e68f010cd5a84d2b89626e582 to increase the likelihood that when we terminate, we really have reached a minimum. --- src/l_bfgs.jl | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/src/l_bfgs.jl b/src/l_bfgs.jl index a6321c20c..cd11f8052 100644 --- a/src/l_bfgs.jl +++ b/src/l_bfgs.jl @@ -150,6 +150,7 @@ function l_bfgs{T}(d::Union{DifferentiableFunction, # Iterate until convergence converged = false + converged_iteration = typemin(Int) while !converged && iteration < iterations # Increment the number of steps we've had to perform iteration += 1 @@ -160,21 +161,28 @@ function l_bfgs{T}(d::Union{DifferentiableFunction, # Refresh the line search cache dphi0 = dot(gr, s) - if dphi0 > 0 + if dphi0 >= 0 + # The search direction is not a descent direction. Something + # is wrong, so restart the search-direction algorithm. + # See also the "restart" below. iteration = 1 for i = 1:n @inbounds s[i] = -gr[i] end dphi0 = dot(gr, s) end + dphi0 == 0 && break # we're at a stationary point clear!(lsr) push!(lsr, zero(T), f_x, dphi0) alphamax = interior ? toedge(x, s, constraints) : convert(T,Inf) - # Pick the initial step size (HZ #I1-I2) + # Pick the initial step size (HZ #I1-I2). Even though we might + # guess alpha=1 for l_bfgs most of the time, testing suggests + # this is still usually a good idea (fewer total function and + # gradient evaluations). alpha, mayterminate, f_update, g_update = - alphatry(alpha, d, x, s, x_ls, gr_ls, lsr, constraints, alphamax) + alphatry(alpha, d, x, s, x_ls, gr_ls, lsr, constraints, alphamax) f_calls, g_calls = f_calls + f_update, g_calls + g_update # Determine the distance of movement along the search line @@ -226,8 +234,28 @@ function l_bfgs{T}(d::Union{DifferentiableFunction, grtol) @lbfgstrace + + # If we think we've converged, restart and make sure we don't + # have another long descent. This should catch the case where + # dphi < 0 "by a hair," meaning that the chosen search direction + # happened to be nearly orthogonoal to the gradient. + if converged + if iteration <= 3 || converged_iteration >= 0 + if iteration <= max(m, 3) + break # we converged quickly after previous restart + end + converged_iteration += iteration + else + converged_iteration = iteration + end + converged = false + iteration = 0 # the next search direction will be -gr + end end + if converged_iteration >= 0 + iteration += converged_iteration + end return MultivariateOptimizationResults("L-BFGS", initial_x, x, From 28ce67613e8be3a750d2811c6991a8d4e795bf63 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Mon, 30 Nov 2015 20:05:15 -0600 Subject: [PATCH 20/28] Ensure that the search direction is a descent direction This no longer assumes that H is positive-definite, instead introducing a check and fix for cases where it does not already hold. --- src/interior.jl | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/interior.jl b/src/interior.jl index 262ab4f0e..1c9d26764 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -244,7 +244,30 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction, f_x = combined_fg!(x, gr, objective.fg!, constraints, t) combined_h!(x, H, objective.h!, constraints, t) @newtontrace - cH = cholfact!(H) + # To ensure descent, the Hessian must be positive-definite. + # As needed, add positive elements to the diagonal. + # cH = cholfact!(H) + A = copy(H) + retry = true + λ = sqrt(eps(eltype(H))) + while retry + retry = false + C, info = Base.LinAlg.LAPACK.potrf!('U', A) + if info != 0 + copy!(A, H) + for i = 1:size(H,1) + h = abs(H[i,i]) + if h > 0 + A[i,i] = (1+λ)*h + else + A[i,i] = λ + end + end + λ *= 10 + retry = true + end + end + cH = Base.LinAlg.Cholesky(A, :U) s = -(cH\gr) clear!(lsr) push!(lsr, zero(T), f_x, dot(s,gr)) From dc4a4c9f88336e1319e725c50babe792b170045a Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Fri, 18 Dec 2015 08:42:25 -0600 Subject: [PATCH 21/28] interior_newton: increment t if the step converged --- src/interior.jl | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index 1c9d26764..e40b51f73 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -239,6 +239,7 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction, (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t)) iteration, f_calls, g_calls = 0, 0, 0 f_x = f_x_previous = convert(T,Inf) + x_converged = f_converged = gr_converged = converged = true while m/t > eps_gap && iteration < iterations f_x_previous = f_x f_x = combined_fg!(x, gr, objective.fg!, constraints, t) @@ -281,24 +282,24 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction, f_calls += _f_calls g_calls += _g_calls f_x /= t - if alphamax >= 1 + x_converged, + f_converged, + gr_converged, + converged = assess_convergence(x, + x_previous, + f_x, + f_x_previous, + gr, + xtol, + ftol, + grtol) + if alphamax >= 1 || converged t *= mu df = DifferentiableFunction(dummy_f, dummy_g!, (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t)) end end - x_converged, - f_converged, - gr_converged, - converged = assess_convergence(x, - x_previous, - f_x, - f_x_previous, - gr, - xtol, - ftol, - grtol) MultivariateOptimizationResults("Interior/Newton", initial_x, x, From 67c9996d54410528a1800c2f72e4eaf7dc43e9a2 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Fri, 18 Dec 2015 08:44:15 -0600 Subject: [PATCH 22/28] Fix returned function value when there are 0 parameters --- src/interior.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/interior.jl b/src/interior.jl index e40b51f73..bda1de043 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -240,6 +240,9 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction, iteration, f_calls, g_calls = 0, 0, 0 f_x = f_x_previous = convert(T,Inf) x_converged = f_converged = gr_converged = converged = true + if m/t < eps_gap + f_x = combined_fg!(x, gr, objective.fg!, constraints, t) + end while m/t > eps_gap && iteration < iterations f_x_previous = f_x f_x = combined_fg!(x, gr, objective.fg!, constraints, t) From be3324acf0345a8abea250b2b888e23c815831b6 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Fri, 18 Dec 2015 08:44:48 -0600 Subject: [PATCH 23/28] Improvements to positive-definite newton step --- src/interior.jl | 49 +++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index bda1de043..b3d6ed810 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -248,31 +248,9 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction, f_x = combined_fg!(x, gr, objective.fg!, constraints, t) combined_h!(x, H, objective.h!, constraints, t) @newtontrace - # To ensure descent, the Hessian must be positive-definite. - # As needed, add positive elements to the diagonal. # cH = cholfact!(H) - A = copy(H) - retry = true - λ = sqrt(eps(eltype(H))) - while retry - retry = false - C, info = Base.LinAlg.LAPACK.potrf!('U', A) - if info != 0 - copy!(A, H) - for i = 1:size(H,1) - h = abs(H[i,i]) - if h > 0 - A[i,i] = (1+λ)*h - else - A[i,i] = λ - end - end - λ *= 10 - retry = true - end - end - cH = Base.LinAlg.Cholesky(A, :U) - s = -(cH\gr) + # To ensure descent, the Hessian must be positive-definite. + s = newtonstep(H, gr) clear!(lsr) push!(lsr, zero(T), f_x, dot(s,gr)) alphamax = toedge(x, s, constraints) @@ -409,3 +387,26 @@ function gnorm(objective, constraints, initial_x) end vo, vc, gom, gcm end + +function newtonstep(H, gr) + A = copy(H) + # Direct use of potrf means we don't have to compute the Cholesky + # decomposition twice (once in `isposdef` and again by `cholfact`) + _, info = Base.LinAlg.LAPACK.potrf!('L', A) + if info == 0 + cH = Base.LinAlg.Cholesky(A, :L) + return -(cH\gr) + end + D, V = eig(H) # very expensive, but see Optim #153 + Dabs = abs(D) + Dmax = maximum(Dabs) + if Dmax == 0 + return -gr + end + δ = sqrt(eps(eltype(D))) + reliable = Dabs .> δ*Dmax + Dinv = similar(D) + Dinv[reliable] = 1./Dabs[reliable] + Dinv[!reliable] = 1/Dmax + return -(V*(Dinv .* (V'*gr))) +end From afccefce94d686aecb64bdb3ed012a1a530d07ba Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Tue, 26 Jan 2016 08:24:20 -0600 Subject: [PATCH 24/28] Use PositiveFactorizations for the newton step --- REQUIRE | 3 ++- src/Optim.jl | 2 +- src/interior.jl | 25 +------------------------ 3 files changed, 4 insertions(+), 26 deletions(-) diff --git a/REQUIRE b/REQUIRE index b55322abe..01c9f80aa 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,4 +1,5 @@ -julia 0.4- +julia 0.4 Calculus DualNumbers Compat +PositiveFactorizations diff --git a/src/Optim.jl b/src/Optim.jl index 29ea243fb..d100b3871 100644 --- a/src/Optim.jl +++ b/src/Optim.jl @@ -1,7 +1,7 @@ isdefined(Base, :__precompile__) && __precompile__() module Optim - using Calculus + using Calculus, PositiveFactorizations using Compat import Base.dot, diff --git a/src/interior.jl b/src/interior.jl index b3d6ed810..ec9fde3ea 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -248,8 +248,6 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction, f_x = combined_fg!(x, gr, objective.fg!, constraints, t) combined_h!(x, H, objective.h!, constraints, t) @newtontrace - # cH = cholfact!(H) - # To ensure descent, the Hessian must be positive-definite. s = newtonstep(H, gr) clear!(lsr) push!(lsr, zero(T), f_x, dot(s,gr)) @@ -388,25 +386,4 @@ function gnorm(objective, constraints, initial_x) vo, vc, gom, gcm end -function newtonstep(H, gr) - A = copy(H) - # Direct use of potrf means we don't have to compute the Cholesky - # decomposition twice (once in `isposdef` and again by `cholfact`) - _, info = Base.LinAlg.LAPACK.potrf!('L', A) - if info == 0 - cH = Base.LinAlg.Cholesky(A, :L) - return -(cH\gr) - end - D, V = eig(H) # very expensive, but see Optim #153 - Dabs = abs(D) - Dmax = maximum(Dabs) - if Dmax == 0 - return -gr - end - δ = sqrt(eps(eltype(D))) - reliable = Dabs .> δ*Dmax - Dinv = similar(D) - Dinv[reliable] = 1./Dabs[reliable] - Dinv[!reliable] = 1/Dmax - return -(V*(Dinv .* (V'*gr))) -end +newtonstep(H, gr) = cholfact(Positive, H)\(-gr) From dad89d22bc10f4167530d0bd628c003e1e7ed28c Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 27 Jan 2016 07:51:59 -0600 Subject: [PATCH 25/28] Change convergence criteria in interior_newton to monitor solution, not eps_gap eps_gap introduced an absolute scale for convergence, which seemed problematic. This should be more robust, as it monitors the characteristics of the solution. It should also avoid making changes to the barrier that no longer have a consequence for the solution; previous approaches introduced numerical stability problems when the barrier penalty became extreme. --- src/interior.jl | 36 ++++++++++++++++++++++++++---------- test/constrained.jl | 16 +++++++++++++++- 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index ec9fde3ea..c5d93942b 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -211,7 +211,7 @@ linlsq_h!(x, H, A, b) = At_mul_B!(H, A, A) function interior_newton{T}(objective::TwiceDifferentiableFunction, initial_x::Vector{T}, constraints::AbstractConstraints; - t = one(T), mu = 10, eps_gap = 1e-12, + t = one(T), mu = 10, eps_gap = convert(T,NaN), xtol::Real = convert(T,1e-32), ftol::Real = convert(T,1e-8), grtol::Real = convert(T,1e-8), @@ -237,18 +237,19 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction, df = DifferentiableFunction(dummy_f, dummy_g!, (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t)) - iteration, f_calls, g_calls = 0, 0, 0 + iteration, iter_t, f_calls, g_calls = 0, 0, 0, 0 f_x = f_x_previous = convert(T,Inf) x_converged = f_converged = gr_converged = converged = true if m/t < eps_gap f_x = combined_fg!(x, gr, objective.fg!, constraints, t) end - while m/t > eps_gap && iteration < iterations +# while m/t > eps_gap && iteration < iterations + while iteration < iterations f_x_previous = f_x f_x = combined_fg!(x, gr, objective.fg!, constraints, t) combined_h!(x, H, objective.h!, constraints, t) - @newtontrace - s = newtonstep(H, gr) + F, Hd = ldltfact!(Positive, H) + s = F\(-gr) clear!(lsr) push!(lsr, zero(T), f_x, dot(s,gr)) alphamax = toedge(x, s, constraints) @@ -258,9 +259,11 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction, copy!(x_previous, x) step!(x, x, s, alpha, constraints) iteration += 1 + iter_t += 1 f_calls += _f_calls g_calls += _g_calls f_x /= t + @newtontrace x_converged, f_converged, gr_converged, @@ -272,8 +275,20 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction, xtol, ftol, grtol) - if alphamax >= 1 || converged + # It's time to change the barrier penalty if we're + # sufficiently "in the basin." That means (1) the Hessian is + # positive (semi)definite (all(Hd .> 0)) and that the Hessian + # step is accepted (alpha is approximately 1). The latter + # suggests that the Hessian is a reliable predictor of the + # function shape, and that within a few iterations we'd be at + # the minimum. + if (alpha >= 0.9 && all(Hd .>= 0)) || converged + if iter_t == 1 && converged + # if changing t didn't change the solution, quit now + break + end t *= mu + iter_t = 0 df = DifferentiableFunction(dummy_f, dummy_g!, (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t)) @@ -300,7 +315,7 @@ end function interior{T}(objective::Union{DifferentiableFunction, TwiceDifferentiableFunction}, initial_x::Array{T}, constraints::AbstractConstraints; - method = :cg, t = convert(T,NaN), mu = 10, eps_gap = 1e-12, + method = :cg, t = convert(T,NaN), mu = 10, eps_gap = convert(T,NaN), xtol::Real = 1e-32, ftol::Real = 1e-8, grtol::Real = 1e-8, @@ -333,8 +348,11 @@ function interior{T}(objective::Union{DifferentiableFunction, TwiceDifferentiabl t = convert(T, 10*abs(gogc)/gogo) end end + if isnan(eps_gap) + eps_gap = (cbrt(eps(T)))^2/t + end if method == :newton - return iterior_newton(objective, initial_x, constraints; t=t, mu=mu, eps_gap=eps_gap) # FIXME + return interior_newton(objective, initial_x, constraints; t=t, mu=mu, eps_gap=eps_gap) # FIXME end if !feasible(initial_x, constraints) error("Initial guess must be feasible") @@ -385,5 +403,3 @@ function gnorm(objective, constraints, initial_x) end vo, vc, gom, gcm end - -newtonstep(H, gr) = cholfact(Positive, H)\(-gr) diff --git a/test/constrained.jl b/test/constrained.jl index 8814afa2d..6a3b2d081 100644 --- a/test/constrained.jl +++ b/test/constrained.jl @@ -1,4 +1,4 @@ -import Optim +using Optim using Base.Test # Quadratic objective function @@ -55,3 +55,17 @@ A = [1.0 2.0; 3.0 4.0] b = [1.0,1.0] results = Optim.nnls(A, b) @test norm(results.minimum - [0,0.3]) < 1e-6 + +# 1d newton +let f(x) = (z = x[1]; -2z^2 + z^4), + g!(x, gr) = (z = x[1]; gr[1] = -4z + 4z^3; x), + h!(x, H) = (z = x[1]; H[1,1] = -4 + 12z^2; H), + objective = TwiceDifferentiableFunction(f, g!, (x,gr) -> (g!(x, gr); f(x)), h!), + constr = ConstraintsBox([-0.9],[0.9]) + + for x0 in (0.1,0.2,0.3,0.5,0.7) + x = [x0] + soln = interior(objective, x, constr, method=:newton) + @test_approx_eq_eps soln.minimum[1] 0.9 1e-6 + end +end From b66e6b6b23b4f886e298cf6160420d78a139c163 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 27 Jan 2016 09:06:26 -0600 Subject: [PATCH 26/28] dot->vecdot --- src/Optim.jl | 3 +-- src/cg.jl | 16 ++++++++-------- src/gradient_descent.jl | 2 +- src/interior.jl | 2 +- src/l_bfgs.jl | 6 +++--- src/linesearch/hz_linesearch.jl | 6 +++--- 6 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/Optim.jl b/src/Optim.jl index d100b3871..0fcb088e9 100644 --- a/src/Optim.jl +++ b/src/Optim.jl @@ -4,8 +4,7 @@ module Optim using Calculus, PositiveFactorizations using Compat - import Base.dot, - Base.length, + import Base.length, Base.push!, Base.show, Base.getindex, diff --git a/src/cg.jl b/src/cg.jl index 700cf0805..f827528ea 100644 --- a/src/cg.jl +++ b/src/cg.jl @@ -1,8 +1,8 @@ # Preconditioners # * Empty preconditioner cg_precondfwd(out::Array, P::Void, A::Array) = copy!(out, A) -cg_precondfwddot(A::Array, P::Void, B::Array) = dot(A, B) -cg_precondinvdot(A::Array, P::Void, B::Array) = dot(A, B) +cg_precondfwddot(A::Array, P::Void, B::Array) = vecdot(A, B) +cg_precondinvdot(A::Array, P::Void, B::Array) = vecdot(A, B) # Diagonal preconditioner function cg_precondfwd(out::Array, p::Vector, A::Array) @@ -195,12 +195,12 @@ function cg{T}(df::Union{DifferentiableFunction, iteration += 1 # Reset the search direction if it becomes corrupted - dphi0 = dot(gr, s) + dphi0 = vecdot(gr, s) if dphi0 >= 0 for i in 1:n @inbounds s[i] = -gr[i] end - dphi0 = dot(gr, s) + dphi0 = vecdot(gr, s) if dphi0 < 0 break end @@ -263,14 +263,14 @@ function cg{T}(df::Union{DifferentiableFunction, # Calculate the beta factor (HZ2012) precondprep(P, x) dPd = cg_precondinvdot(s, P, s) - etak::T = eta * dot(s, gr_previous) / dPd + etak::T = eta * vecdot(s, gr_previous) / dPd for i in 1:n @inbounds y[i] = gr[i] - gr_previous[i] end - ydots = dot(y, s) + ydots = vecdot(y, s) cg_precondfwd(pgr, P, gr) - betak = (dot(y, pgr) - cg_precondfwddot(y, P, y) * - dot(gr, s) / ydots) / ydots + betak = (vecdot(y, pgr) - cg_precondfwddot(y, P, y) * + vecdot(gr, s) / ydots) / ydots beta = max(betak, etak) for i in 1:n @inbounds s[i] = beta * s[i] - pgr[i] diff --git a/src/gradient_descent.jl b/src/gradient_descent.jl index d884354f4..acdd32fa8 100644 --- a/src/gradient_descent.jl +++ b/src/gradient_descent.jl @@ -85,7 +85,7 @@ function gradient_descent{T}(d::Union{DifferentiableFunction, end # Refresh the line search cache - dphi0 = dot(gr, s) + dphi0 = vecdot(gr, s) clear!(lsr) push!(lsr, zero(T), f_x, dphi0) diff --git a/src/interior.jl b/src/interior.jl index c5d93942b..d6b0e73a3 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -251,7 +251,7 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction, F, Hd = ldltfact!(Positive, H) s = F\(-gr) clear!(lsr) - push!(lsr, zero(T), f_x, dot(s,gr)) + push!(lsr, zero(T), f_x, vecdot(s,gr)) alphamax = toedge(x, s, constraints) alpha = alphamax < 1 ? 0.9*alphamax : 1.0 alpha, _f_calls, _g_calls = diff --git a/src/l_bfgs.jl b/src/l_bfgs.jl index cd11f8052..f3c5dade6 100644 --- a/src/l_bfgs.jl +++ b/src/l_bfgs.jl @@ -160,7 +160,7 @@ function l_bfgs{T}(d::Union{DifferentiableFunction, twoloop_alpha, twoloop_q) # Refresh the line search cache - dphi0 = dot(gr, s) + dphi0 = vecdot(gr, s) if dphi0 >= 0 # The search direction is not a descent direction. Something # is wrong, so restart the search-direction algorithm. @@ -169,7 +169,7 @@ function l_bfgs{T}(d::Union{DifferentiableFunction, for i = 1:n @inbounds s[i] = -gr[i] end - dphi0 = dot(gr, s) + dphi0 = vecdot(gr, s) end dphi0 == 0 && break # we're at a stationary point clear!(lsr) @@ -212,7 +212,7 @@ function l_bfgs{T}(d::Union{DifferentiableFunction, end # Update the L-BFGS history of positions and gradients - rho_iteration = 1 / dot(dx, dgr) + rho_iteration = 1 / vecdot(dx, dgr) if isinf(rho_iteration) # TODO: Introduce a formal error? There was a warning here previously break diff --git a/src/linesearch/hz_linesearch.jl b/src/linesearch/hz_linesearch.jl index a250b3cb4..7282780e8 100644 --- a/src/linesearch/hz_linesearch.jl +++ b/src/linesearch/hz_linesearch.jl @@ -1,7 +1,7 @@ # phi = (galpha, alpha) -> linefunc(galpha, alpha, func, x, d, xtmp, g) # Dot product of two "vectors", even if they don't have a vector shape -function dot(x::Array, y::Array) +function vecdot(x::Array, y::Array) d = x[1] * y[1] for i = 2:length(x) d += x[i] * y[i] @@ -10,7 +10,7 @@ function dot(x::Array, y::Array) end # Vector-norm-squared, even if it doesn't have a vector shape -norm2(x::Array) = dot(x, x) +vecnorm2(x::Array) = vecdot(x, x) # Display flags are represented as a bitfield # (not exported, but can use via OptimizeMod.ITER, for example) @@ -612,7 +612,7 @@ function linefunc!{T}(df::Union{DifferentiableFunction, f_calls += 1 g_calls += 1 if isfinite(val) - gphi = dot(g, s) + gphi = vecdot(g, s) end else val = df.f(xtmp) From db76ca0922f458dbf2da14169d4daf696b9a1282 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 27 Jan 2016 13:16:34 -0600 Subject: [PATCH 27/28] interior_newton: pass options, bail if encounter numeric instability Also cleans up a bit of the tracing & convergence-testing --- src/interior.jl | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index d6b0e73a3..4e2ac3e27 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -239,10 +239,7 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction, (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t)) iteration, iter_t, f_calls, g_calls = 0, 0, 0, 0 f_x = f_x_previous = convert(T,Inf) - x_converged = f_converged = gr_converged = converged = true - if m/t < eps_gap - f_x = combined_fg!(x, gr, objective.fg!, constraints, t) - end + x_converged = f_converged = gr_converged = converged = false # while m/t > eps_gap && iteration < iterations while iteration < iterations f_x_previous = f_x @@ -250,8 +247,16 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction, combined_h!(x, H, objective.h!, constraints, t) F, Hd = ldltfact!(Positive, H) s = F\(-gr) + sgr = vecdot(s, gr) + if !(sgr <= 0) + # Presumably due to numeric instability + converged = x_converged = f_converged = gr_converged = false + break + end clear!(lsr) - push!(lsr, zero(T), f_x, vecdot(s,gr)) + push!(lsr, zero(T), f_x, sgr) + f_x /= t + @newtontrace alphamax = toedge(x, s, constraints) alpha = alphamax < 1 ? 0.9*alphamax : 1.0 alpha, _f_calls, _g_calls = @@ -262,8 +267,6 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction, iter_t += 1 f_calls += _f_calls g_calls += _g_calls - f_x /= t - @newtontrace x_converged, f_converged, gr_converged, @@ -274,7 +277,7 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction, gr, xtol, ftol, - grtol) + t*grtol) # It's time to change the barrier penalty if we're # sufficiently "in the basin." That means (1) the Hessian is # positive (semi)definite (all(Hd .> 0)) and that the Hessian @@ -352,7 +355,7 @@ function interior{T}(objective::Union{DifferentiableFunction, TwiceDifferentiabl eps_gap = (cbrt(eps(T)))^2/t end if method == :newton - return interior_newton(objective, initial_x, constraints; t=t, mu=mu, eps_gap=eps_gap) # FIXME + return interior_newton(objective, initial_x, constraints; t=t, mu=mu, eps_gap=eps_gap, xtol=xtol, ftol=ftol, grtol=grtol, iterations=iterations, store_trace=store_trace, show_trace=show_trace, extended_trace=extended_trace) end if !feasible(initial_x, constraints) error("Initial guess must be feasible") From 2943c3f9b6886e527bb5b3e982f010661d7b9b1c Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 27 Jan 2016 15:44:48 -0600 Subject: [PATCH 28/28] interior_newton: also check for finiteness --- src/interior.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interior.jl b/src/interior.jl index 4e2ac3e27..6f25434c9 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -248,7 +248,7 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction, F, Hd = ldltfact!(Positive, H) s = F\(-gr) sgr = vecdot(s, gr) - if !(sgr <= 0) + if !(sgr <= 0) || isinf(sgr) # Presumably due to numeric instability converged = x_converged = f_converged = gr_converged = false break