diff --git a/src/Optim.jl b/src/Optim.jl index 4a4272e0c..c3efdf6c9 100644 --- a/src/Optim.jl +++ b/src/Optim.jl @@ -17,8 +17,13 @@ module Optim Base.setindex! export optimize, + isfeasible, + isinterior, + nconstraints, DifferentiableFunction, TwiceDifferentiableFunction, + DifferentiableConstraintsFunction, + TwiceDifferentiableConstraintsFunction, OptimizationOptions, OptimizationState, OptimizationTrace, @@ -30,6 +35,7 @@ module Optim Fminbox, GoldenSection, GradientDescent, + IPNewton, LBFGS, MomentumGradientDescent, NelderMead, @@ -76,6 +82,9 @@ module Optim # Constrained optimization include("fminbox.jl") + include("iplinesearch.jl") + include("interior.jl") + include("ipnewton.jl") # trust region methods include("levenberg_marquardt.jl") @@ -102,8 +111,9 @@ module Optim include("utilities/trace.jl") # Examples for testing - include(joinpath("problems", "unconstrained.jl")) + include(joinpath("problems", "multivariate.jl")) include(joinpath("problems", "univariate.jl")) + using .MultivariateProblems cgdescent(args...) = error("API has changed. Please use cg.") end diff --git a/src/deprecate.jl b/src/deprecate.jl index 17ee6f2be..b76fb4a94 100644 --- a/src/deprecate.jl +++ b/src/deprecate.jl @@ -23,3 +23,9 @@ end @deprecate interpolating_linesearch! LineSearches.strongwolfe! @deprecate backtracking_linesearch! LineSearches.backtracking! @deprecate interpbacktracking_linesearch! LineSearches.interpbacktracking! + +if VERSION >= v"0.5.0" + view5(A, i, j) = view(A, i, j) +else + view5(A, i, j) = A[i,j] +end diff --git a/src/interior.jl b/src/interior.jl new file mode 100644 index 000000000..4a2e5ee2c --- /dev/null +++ b/src/interior.jl @@ -0,0 +1,870 @@ +abstract AbstractBarrierState + +# These are used not only for the current state, but also for the step and the gradient +immutable BarrierStateVars{T} + slack_x::Vector{T} # values of slack variables for x + slack_c::Vector{T} # values of slack variables for c + λx::Vector{T} # λ for equality constraints on slack_x + λc::Vector{T} # λ for equality constraints on slack_c + λxE::Vector{T} # λ for equality constraints on x + λcE::Vector{T} # λ for linear/nonlinear equality constraints +end +# Note on λxE: +# We could just set equality-constrained variables to their +# constraint values at the beginning of optimization, but this +# might make the initial guess infeasible in terms of its +# inequality constraints. This would be a much bigger problem than +# not matching the equality constraints. So we allow them to +# differ, and require that the algorithm can cope with it. + +@compat function (::Type{BarrierStateVars{T}}){T}(bounds::ConstraintBounds) + slack_x = Array{T}(length(bounds.ineqx)) + slack_c = Array{T}(length(bounds.ineqc)) + λx = similar(slack_x) + λc = similar(slack_c) + λxE = Array{T}(length(bounds.eqx)) + λcE = Array{T}(length(bounds.eqc)) + sv = BarrierStateVars{T}(slack_x, slack_c, λx, λc, λxE, λcE) +end +BarrierStateVars{T}(bounds::ConstraintBounds{T}) = BarrierStateVars{T}(bounds) + +function BarrierStateVars{T}(bounds::ConstraintBounds{T}, x) + sv = BarrierStateVars(bounds) + setslack!(sv.slack_x, x, bounds.ineqx, bounds.σx, bounds.bx) + sv +end +function BarrierStateVars{T}(bounds::ConstraintBounds{T}, x, c) + sv = BarrierStateVars(bounds) + setslack!(sv.slack_x, x, bounds.ineqx, bounds.σx, bounds.bx) + setslack!(sv.slack_c, c, bounds.ineqc, bounds.σc, bounds.bc) + sv +end +function setslack!(slack, v, ineq, σ, b) + for i = 1:length(ineq) + dv = v[ineq[i]]-b[i] + slack[i] = abs(σ[i]*dv) + end + slack +end + +slack(bstate::BarrierStateVars) = [bstate.slack_x; bstate.slack_c] +lambdaI(bstate::BarrierStateVars) = [bstate.λx; bstate.λc] +lambdaE(bstate::BarrierStateVars) = [bstate.λxE; bstate.λcE] +lambdaI(state::AbstractBarrierState) = lambdaI(state.bstate) +lambdaE(state::AbstractBarrierState) = lambdaE(state.bstate) + +Base.similar(bstate::BarrierStateVars) = + BarrierStateVars(similar(bstate.slack_x), + similar(bstate.slack_c), + similar(bstate.λx), + similar(bstate.λc), + similar(bstate.λxE), + similar(bstate.λcE)) + +Base.copy(bstate::BarrierStateVars) = + BarrierStateVars(copy(bstate.slack_x), + copy(bstate.slack_c), + copy(bstate.λx), + copy(bstate.λc), + copy(bstate.λxE), + copy(bstate.λcE)) + +function Base.fill!(b::BarrierStateVars, val) + fill!(b.slack_x, val) + fill!(b.slack_c, val) + fill!(b.λx, val) + fill!(b.λc, val) + fill!(b.λxE, val) + fill!(b.λcE, val) + b +end + +Base.convert{T}(::Type{BarrierStateVars{T}}, bstate::BarrierStateVars) = + BarrierStateVars(convert(Array{T}, bstate.slack_x), + convert(Array{T}, bstate.slack_c), + convert(Array{T}, bstate.λx), + convert(Array{T}, bstate.λc), + convert(Array{T}, bstate.λxE), + convert(Array{T}, bstate.λcE)) + +Base.isempty(bstate::BarrierStateVars) = isempty(bstate.slack_x) & + isempty(bstate.slack_c) & isempty(bstate.λxE) & isempty(bstate.λcE) + +Base.eltype{T}(::Type{BarrierStateVars{T}}) = T +Base.eltype(sv::BarrierStateVars) = eltype(typeof(sv)) + +function Base.show(io::IO, b::BarrierStateVars) + print(io, "BarrierStateVars{$(eltype(b))}:") + for fn in (:slack_x, :slack_c, :λx, :λc, :λxE, :λcE) + print(io, "\n $fn: ") + show(io, getfield(b, fn)) + end +end + +@compat Base.:(==)(v::BarrierStateVars, w::BarrierStateVars) = + v.slack_x == w.slack_x && + v.slack_c == w.slack_c && + v.λx == w.λx && + v.λc == w.λc && + v.λxE == w.λxE && + v.λcE == w.λcE + +const bsv_seed = sizeof(UInt) == 64 ? 0x145b788192d1cde3 : 0x766a2810 +Base.hash(b::BarrierStateVars, u::UInt) = + hash(b.λcE, has(b.λxE, hash(b.λc, hash(b.λx, hash(b.slack_c, hash(b.slack_x, u+bsv_seed)))))) + +function Base.dot(v::BarrierStateVars, w::BarrierStateVars) + dot(v.slack_x,w.slack_x) + + dot(v.slack_c, w.slack_c) + + dot(v.λx, w.λx) + + dot(v.λc, w.λc) + + dot(v.λxE, w.λxE) + + dot(v.λcE, w.λcE) +end + +function Base.vecnorm(b::BarrierStateVars, p::Real) + vecnorm(b.slack_x, p) + vecnorm(b.slack_c, p) + + vecnorm(b.λx, p) + vecnorm(b.λc, p) + + vecnorm(b.λxE, p) + vecnorm(b.λcE, p) +end + +""" + BarrierLineSearch{T} + +Parameters for interior-point line search methods that use only the value +""" +immutable BarrierLineSearch{T} + c::Vector{T} # value of constraints-functions at trial point + bstate::BarrierStateVars{T} # trial point for slack and λ variables +end +Base.convert{T}(::Type{BarrierLineSearch{T}}, bsl::BarrierLineSearch) = + BarrierLineSearch(convert(Vector{T}, bsl.c), + convert(BarrierStateVars{T}, bsl.bstate)) + +""" + BarrierLineSearchGrad{T} + +Parameters for interior-point line search methods that exploit the slope. +""" +immutable BarrierLineSearchGrad{T} + c::Vector{T} # value of constraints-functions at trial point + J::Matrix{T} # constraints-Jacobian at trial point + bstate::BarrierStateVars{T} # trial point for slack and λ variables + bgrad::BarrierStateVars{T} # trial point's gradient +end +Base.convert{T}(::Type{BarrierLineSearchGrad{T}}, bsl::BarrierLineSearchGrad) = + BarrierLineSearchGrad(convert(Vector{T}, bsl.c), + convert(Matrix{T}, bsl.J), + convert(BarrierStateVars{T}, bsl.bstate), + convert(BarrierStateVars{T}, bsl.bgrad)) + +function ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, αs::NTuple{4,Number}) + ls_update!(out.slack_x, base.slack_x, step.slack_x, αs[2]) + ls_update!(out.slack_c, base.slack_c, step.slack_c, αs[2]) + ls_update!(out.λx, base.λx, step.λx, αs[3]) + ls_update!(out.λc, base.λc, step.λc, αs[3]) + ls_update!(out.λxE, base.λxE, step.λxE, αs[4]) + ls_update!(out.λcE, base.λcE, step.λcE, αs[4]) + out +end +ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, αs::Tuple{Number,Number}) = + ls_update!(out, base, step, (αs[1],αs[1],αs[2],αs[1])) +ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, α::Number) = + ls_update!(out, base, step, (α,α,α,α)) +ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, αs::AbstractVector) = + ls_update!(out, base, step, αs[1]) # (αs...,)) + +function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constraints::AbstractConstraintsFunction, initial_x::Array{T}, method::M, options::OptimizationOptions) + t0 = time() # Initial time stamp used to control early stopping by options.time_limit + + state = initial_state(method, options, d, constraints, initial_x) + + tr = OptimizationTrace{typeof(method)}() + tracing = options.store_trace || options.show_trace || options.extended_trace || options.callback != nothing + stopped, stopped_by_callback, stopped_by_time_limit = false, false, false + + x_converged, f_converged, counter_f_tol = false, false, 0 + g_converged = vecnorm(state.g, Inf) + vecnorm(state.bgrad, Inf) < options.g_tol + + converged = g_converged + iteration = 0 + + options.show_trace && print_header(method) + + while !converged && !stopped && iteration < options.iterations + # If tracing, update trace with trace!. If a callback is provided, it + # should have boolean return value that controls the variable stopped_by_callback. + # This allows for early stopping controlled by the callback. + if tracing + stopped_by_callback = trace!(tr, state, iteration, method, options) + end + iteration += 1 + + update_state!(d, constraints, state, method, options) && break # it returns true if it's forced by something in update! to stop (eg dx_dg == 0.0 in BFGS) + + update_fg!(d, constraints, state, method) + + x_converged, f_converged, + g_converged, converged = assess_convergence(state, options) + # With equality constraints, optimization is not necessarily + # monotonic in the value of the function. If the function + # change is approximately canceled by a change in the equality + # violation, it's possible to spuriously satisfy the f_tol + # criterion. Consequently, we require that the f_tol condition + # be satisfied a certain number of times in a row before + # declaring convergence. + counter_f_tol = f_converged ? counter_f_tol+1 : 0 + converged = x_converged | g_converged | (counter_f_tol > options.successive_f_tol) + + # We don't use the Hessian for anything if we have declared convergence, + # so we might as well not make the (expensive) update if converged == true + !converged && update_h!(d, constraints, state, method) + + # Check time_limit; if none is provided it is NaN and the comparison + # will always return false. + stopped_by_time_limit = time()-t0 > options.time_limit ? true : false + + # Combine the two, so see if the stopped flag should be changed to true + # and stop the while loop + stopped = stopped_by_callback || stopped_by_time_limit ? true : false + end # while + + if tracing + trace!(tr, state, iteration, method, options) + end + + after_while!(d, constraints, state, method, options) + + return MultivariateOptimizationResults(state.method_string, + initial_x, + state.x, + Float64(state.f_x), + iteration, + iteration == options.iterations, + x_converged, + options.x_tol, + f_converged, + options.f_tol, + g_converged, + options.g_tol, + tr, + state.f_calls, + state.g_calls, + state.h_calls) +end + +# Fallbacks (for methods that don't need these) +after_while!(d, constraints::AbstractConstraintsFunction, state, method, options) = nothing +update_h!(d, constraints::AbstractConstraintsFunction, state, method) = nothing +update_asneeded_fg!(d, constraints, state, method) = update_fg!(d, constraints, state, method) +update_asneeded_fg!(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}) = update_g!(d, constraints, state, method) + +""" + initialize_μ_λ!(state, bounds, μ0=:auto, β=0.01) + initialize_μ_λ!(state, bounds, (Hobj,HcI), μ0=:auto, β=0.01) + +Pick μ and λ to ensure that the equality constraints are satisfied +locally (at the current `state.x`), and that the initial gradient +including the barrier would be a descent direction for the problem +without the barrier (μ = 0). This ensures that the search isn't pushed +out of the basin of the user-supplied initial guess. + +Upon entry, the objective function gradient, constraint values, and +constraint jacobian must be set in `state.g`, `state.c`, and `state.J` +respectively. If you also wish to ensure that the projection of +Hessian is minimally-perturbed along the initial gradient, supply the +hessian of the objective (`Hobj`) and + + HcI = ∑_i (σ_i/s_i)∇∇ c_{Ii} + +for the constraints. This can be obtained as + + HcI = hessianI(state.x, constraints, 1./state.slack_c) + +You can manually specify `μ` by supplying a numerical value for +`μ0`. Whether calculated algorithmically or specified manually, the +values of `λ` are set using the chosen `μ`. +""" +function initialize_μ_λ!(state, bounds::ConstraintBounds, Hinfo, μ0::Union{Symbol,Number}, β::Number=1//100) + if nconstraints(bounds) == 0 && nconstraints_x(bounds) == 0 + state.μ = 0 + fill!(state.bstate, 0) + return state + end + gf = state.g # must be pre-set to ∇f + # Calculate projection of ∇f into the subspace spanned by the + # equality constraint Jacobian + JE = jacobianE(state, bounds) + # QRF = qrfact(JE) + # Q = QRF[:Q] + # PEg = Q'*(Q*gf) # in the subspace of JE + C = JE*JE' + Cc = cholfact(Positive, C) + Pperpg = gf-JE'*(Cc \ (JE*gf)) # in the nullspace of JE + # Set μ + JI = jacobianI(state, bounds) + if μ0 == :auto + # Calculate projections of the Lagrangian's gradient, and + # possibly hessian, along (∇f)_⟂ + Dperp = dot(Pperpg, Pperpg) + σ, s = sigma(bounds), slack(state) + σdivs = σ./s + Δg = JI'*σdivs + PperpΔg = Δg - JE'*(Cc \ (JE*Δg)) + DI = dot(PperpΔg, PperpΔg) + κperp, κI = hessian_projections(Hinfo, Pperpg, (JI*Pperpg)./s) + # Calculate μ and λI + μ = β * (κperp == 0 ? sqrt(Dperp/DI) : min(sqrt(Dperp/DI), abs(κperp/κI))) + if !isfinite(μ) + Δgtilde = JI'*(1./s) + PperpΔgtilde = Δgtilde - JE'*(Cc \ (JE*Δgtilde)) + DItilde = dot(PperpΔgtilde, PperpΔgtilde) + μ = β*sqrt(Dperp/DItilde) + end + if !isfinite(μ) || μ == 0 + μ = one(μ) + end + else + μ = convert(eltype(state.x), μ0) + end + state.μ = μ + # Set λI + state.bstate.λx[:] = μ./state.bstate.slack_x + state.bstate.λc[:] = μ./state.bstate.slack_c + # Calculate λE + λI = lambdaI(state) + ∇bI = gf - JI'*λI +# qrregularize!(QRF) # in case of any 0 eigenvalues + λE = Cc \ (JE*∇bI) + (cbar(bounds) - cE(state, bounds))/μ + k = unpack_vec!(state.bstate.λxE, λE, 0) + k = unpack_vec!(state.bstate.λcE, λE, k) + k == length(λE) || error("something is wrong") + state +end +function initialize_μ_λ!(state, bounds::ConstraintBounds, μ0::Union{Number,Symbol}, β::Number=1//100) + initialize_μ_λ!(state, bounds, nothing, μ0, β) +end + +function hessian_projections(Hinfo::Tuple{AbstractMatrix,AbstractMatrix}, Pperpg, y) + κperp = dot(Hinfo[1]*Pperpg, Pperpg) + κI = dot(Hinfo[2]*Pperpg, Pperpg) + dot(y,y) + κperp, κI +end +hessian_projections{T}(Hinfo::Void, Pperpg::AbstractVector{T}) = convert(T, Inf), zero(T) + +function jacobianE(state, bounds::ConstraintBounds) + J, x = state.constr_J, state.x + JEx = jacobianx(J, bounds.eqx) + JEc = view5(J, bounds.eqc, :) + JE = vcat(JEx, JEc) +end +jacobianE(state, constraints) = jacobianE(state, constraints.bounds) + +function jacobianI(state, bounds::ConstraintBounds) + J, x = state.constr_J, state.x + JIx = jacobianx(J, bounds.ineqx) + JIc = view5(J, bounds.ineqc, :) + JI = vcat(JIx, JIc) +end +jacobianI(state, constraints) = jacobianI(state, constraints.bounds) + +# TODO: when Optim supports sparse arrays, make a SparseMatrixCSC version +function jacobianx(J::AbstractArray, indx) + Jx = zeros(eltype(J), length(indx), size(J, 2)) + for (i,j) in enumerate(indx) + Jx[i,j] = 1 + end + Jx +end + +function sigma(bounds::ConstraintBounds) + [bounds.σx; bounds.σc] # don't include σz +end +sigma(constraints) = sigma(constraints.bounds) + +slack(state) = slack(state.bstate) + +cbar(bounds::ConstraintBounds) = [bounds.valx; bounds.valc] +cbar(constraints) = cbar(constraints.bounds) +cE(state, bounds::ConstraintBounds) = [state.x[bounds.eqx]; state.constr_c[bounds.eqc]] + +function hessianI!(h, x, constraints, λcI, μ) + λ = userλ(λcI, constraints) + constraints.h!(x, λ, h) + h +end + +""" + hessianI(x, constraints, λcI, μ) -> h + +Compute the hessian at `x` of the `λcI`-weighted sum of user-supplied +constraint functions for just the inequalities. This also includes +contributions from any variables with bounds at 0, since those do not +cause introduction of a slack variable. Other (nonzero) box +constraints do not contribute to `h`, because the hessian of `x_i` is +zero. (They contribute indirectly via their slack variables.) +""" +hessianI(x, constraints, λcI, μ) = + hessianI!(zeros(eltype(x), length(x), length(x)), x, constraints, λcI, μ) + +""" + userλ(λcI, bounds) -> λ + +Accumulates `λcI` into a vector `λ` ordered as the user-supplied +constraint functions `c`. Upper and lower bounds are summed, weighted +by `σ`. The resulting λ includes an overall negative sign so that this +becomes the coefficient for the user-supplied hessian. + +This is relevant only for the inequalities. If you want the λ for just +the equalities, you can use `λ[bounds.ceq] = λcE` for a zero-filled `λ`. +""" +function userλ(λcI, bounds::ConstraintBounds) + ineqc, σc = bounds.ineqc, bounds.σc + λ = zeros(eltype(bounds), nconstraints(bounds)) + for i = 1:length(ineqc) + λ[ineqc[i]] -= λcI[i]*σc[i] + end + λ +end +userλ(λcI, constraints) = userλ(λcI, constraints.bounds) + +## Computation of the Lagrangian and its gradient +# This is in a parametrization that is also useful during linesearch + +function lagrangian(d, bounds::ConstraintBounds, x, c, bstate::BarrierStateVars, μ) + f_x = d.f(x) + ev = equality_violation(bounds, x, c, bstate) + L_xsλ = f_x + barrier_value(bounds, x, bstate, μ) + ev + f_x, L_xsλ, ev +end + +function lagrangian_g!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ) + fill!(bgrad, 0) + d.g!(x, gx) + barrier_grad!(gx, bgrad, bounds, x, bstate, μ) + equality_grad!(gx, bgrad, bounds, x, c, J, bstate) + nothing +end + +function lagrangian_fg!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ) + fill!(bgrad, 0) + f_x = d.fg!(x, gx) + ev = equality_violation(bounds, x, c, bstate) + L_xsλ = f_x + barrier_value(bounds, x, bstate, μ) + ev + barrier_grad!(gx, bgrad, bounds, x, bstate, μ) + equality_grad!(gx, bgrad, bounds, x, c, J, bstate) + f_x, L_xsλ, ev +end + +## Computation of Lagrangian and derivatives when passing all parameters as a single vector +function lagrangian_vec(p, d, bounds::ConstraintBounds, x, c::AbstractArray, bstate::BarrierStateVars, μ) + unpack_vec!(x, bstate, p) + f_x, L_xsλ, ev = lagrangian(d, bounds, x, c, bstate, μ) + L_xsλ +end +function lagrangian_vec(p, d, bounds::ConstraintBounds, x, c::Function, bstate::BarrierStateVars, μ) + # Use this version when using automatic differentiation + unpack_vec!(x, bstate, p) + f_x, L_xsλ, ev = lagrangian(d, bounds, x, c(x), bstate, μ) + L_xsλ +end +function lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ) + unpack_vec!(x, bstate, p) + f_x, L_xsλ, ev = lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ) + pack_vec!(storage, gx, bgrad) + L_xsλ +end + +## for line searches that don't use the gradient along the line +function lagrangian_linefunc(αs, d, constraints, state) + _lagrangian_linefunc(αs, d, constraints, state)[2] +end + +function _lagrangian_linefunc(αs, d, constraints, state) + b_ls, bounds = state.b_ls, constraints.bounds + ls_update!(state.x_ls, state.x, state.s, alphax(αs)) + ls_update!(b_ls.bstate, state.bstate, state.bstep, αs) + constraints.c!(state.x_ls, b_ls.c) + lagrangian(d, constraints.bounds, state.x_ls, b_ls.c, b_ls.bstate, state.μ) +end +alphax(α::Number) = α +alphax(αs::Union{Tuple,AbstractVector}) = αs[1] + +function lagrangian_linefunc!(α, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}) + # For backtrack_constrained, the last evaluation is the one we + # keep, so it's safe to store the results in state + state.f_x, state.L, state.ev = _lagrangian_linefunc(α, d, constraints, state) + state.L +end +lagrangian_linefunc!(α, d, constraints, state, method) = lagrangian_linefunc(α, d, constraints, state) + + +## for line searches that do use the gradient along the line +function lagrangian_lineslope(αs, d, constraints, state) + f_x, L, ev, slope = _lagrangian_lineslope(αs, d, constraints, state) + L, slope +end + +function _lagrangian_lineslope(αs, d, constraints, state) + b_ls, bounds = state.b_ls, constraints.bounds + bstep, bgrad = state.bstep, b_ls.bgrad + ls_update!(state.x_ls, state.x, state.s, alphax(αs)) + ls_update!(b_ls.bstate, state.bstate, bstep, αs) + constraints.c!(state.x_ls, b_ls.c) + constraints.jacobian!(state.x_ls, b_ls.J) + f_x, L, ev = lagrangian_fg!(state.g_ls, bgrad, d, bounds, state.x_ls, b_ls.c, b_ls.J, b_ls.bstate, state.μ) + slopeα = slopealpha(state.s, state.g_ls, bstep, bgrad) + f_x, L, ev, slopeα +end + +function lagrangian_lineslope!(αs, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained_grad)}) + # For backtrack_constrained, the last evaluation is the one we + # keep, so it's safe to store the results in state + state.f_x, state.L, state.ev, slope = _lagrangian_lineslope(αs, d, constraints, state) + state.L, slope +end +lagrangian_lineslope!(αs, d, constraints, state, method) = lagrangian_lineslope(αs, d, constraints, state) + +slopealpha(sx, gx, bstep, bgrad) = dot(sx, gx) + + dot(bstep.slack_x, bgrad.slack_x) + dot(bstep.slack_c, bgrad.slack_c) + + dot(bstep.λx, bgrad.λx) + dot(bstep.λc, bgrad.λc) + + dot(bstep.λxE, bgrad.λxE) + dot(bstep.λcE, bgrad.λcE) + +if VERSION >= v"0.5.0" + function linesearch_anon(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained_grad)}) + αs->lagrangian_lineslope!(αs, d, constraints, state, method) + end + function linesearch_anon(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}) + αs->lagrangian_linefunc!(αs, d, constraints, state, method) + end +else + # 0.4 can't dispatch on a particular function + function linesearch_anon(d, constraints, state, method::IPOptimizer) + ls = method.linesearch! + if ls == backtrack_constrained_grad + return αs->lagrangian_lineslope!(αs, d, constraints, state, method) + end + αs->lagrangian_linefunc!(αs, d, constraints, state, method) + end +end + +## Computation of Lagrangian terms: barrier penalty +""" + barrier_value(constraints, state) -> val + barrier_value(bounds, x, sx, sc, μ) -> val + +Compute the value of the barrier penalty at the current `state`, or at +a position (`x`,`sx`,`sc`), where `x` is the current position, `sx` +are the coordinate slack variables, and `sc` are the linear/nonlinear +slack variables. `bounds` holds the parsed bounds. +""" +function barrier_value(bounds::ConstraintBounds, x, sx, sc, μ) + # bμ is the coefficient of μ in the barrier penalty + bμ = _bv(sx) + # coords with other bounds + _bv(sc) # linear/nonlinear constr. + μ*bμ +end +barrier_value(bounds::ConstraintBounds, x, bstate::BarrierStateVars, μ) = + barrier_value(bounds, x, bstate.slack_x, bstate.slack_c, μ) +barrier_value(bounds::ConstraintBounds, state) = + barrier_value(bounds, state.x, state.bstate.slack_x, state.bstate.slack_c, state.μ) +barrier_value(constraints::AbstractConstraintsFunction, state) = + barrier_value(constraints.bounds, state) + +# don't call this barrier_value because it lacks μ +function _bv(v, idx, σ) + ret = loginf(one(eltype(σ))*one(eltype(v))) + for (i,iv) in enumerate(idx) + ret += loginf(σ[i]*v[iv]) + end + -ret +end + +_bv(v) = isempty(v) ? loginf(one(eltype(v))) : -sum(loginf, v) + +loginf(δ) = δ > 0 ? log(δ) : -oftype(δ, Inf) + +""" + barrier_grad!(gx, bgrad, bounds, x, bstate, μ) + barrier_grad!(gx, gsx, gsc, bounds, x, sx, sc, μ) + +Compute the gradient of the barrier penalty at (`x`,`sx`,`sc`), where +`x` is the current position, `sx` are the coordinate slack variables, +and `sc` are the linear/nonlinear slack +variables. `bounds::ConstraintBounds` holds the parsed bounds. + +The result is *added* to `gx`, `gsx`, and `gsc`, so these vectors +need to be initialized appropriately. +""" +function barrier_grad!(gx, gsx, gsc, bounds::ConstraintBounds, x, sx, sc, μ) + barrier_grad!(gsx, sx, μ) + barrier_grad!(gsc, sc, μ) + nothing +end +barrier_grad!(gx, bgrad, bounds::ConstraintBounds, x, bstate, μ) = + barrier_grad!(gx, bgrad.slack_x, bgrad.slack_c, bounds, x, bstate.slack_x, bstate.slack_c, μ) + +function barrier_grad!(out, v, μ) + for i = 1:length(out) + out[i] -= μ/v[i] + end + nothing +end + + +## Computation of Lagrangian terms: equality constraints penalty + +""" + equality_violation([f=identity], bounds, x, c, bstate) -> val + equality_violation([f=identity], bounds, x, c, sx, sc, λx, λc, λxE, λcE) -> val + +Compute the sum of `f(v_i)`, where `v_i = λ_i*(target - observed)` +measures the difference between the current state and the +equality-constrained state. `bounds::ConstraintBounds` holds the +parsed bounds. `x` is the current position, `sx` are the coordinate +slack variables, and `sc` are the linear/nonlinear slack +variables. `c` holds the values of the linear-nonlinear constraints, +and the λ arguments hold the Lagrange multipliers for `x`, `sx`, `sc`, and +`c` respectively. +""" +function equality_violation(f, bounds::ConstraintBounds, x, c, sx, sc, λx, λc, λxE, λcE) + ev = equality_violation(f, sx, x, bounds.ineqx, bounds.σx, bounds.bx, λx) + + equality_violation(f, sc, c, bounds.ineqc, bounds.σc, bounds.bc, λc) + + equality_violation(f, x, bounds.valx, bounds.eqx, λxE) + + equality_violation(f, c, bounds.valc, bounds.eqc, λcE) +end +equality_violation(bounds::ConstraintBounds, x, c, sx, sc, λx, λc, λxE, λcE) = + equality_violation(identity, bounds, x, c, sx, sc, λx, λc, λxE, λcE) +function equality_violation(f, bounds::ConstraintBounds, x, c, bstate::BarrierStateVars) + equality_violation(f, bounds, x, c, bstate.slack_x, bstate.slack_c, + bstate.λx, bstate.λc, bstate.λxE, bstate.λcE) +end +equality_violation(bounds::ConstraintBounds, x, c, bstate::BarrierStateVars) = + equality_violation(identity, bounds, x, c, bstate) +equality_violation(f, bounds::ConstraintBounds, state::AbstractBarrierState) = + equality_violation(f, bounds, state.x, state.constr_c, state.bstate) +equality_violation(bounds::ConstraintBounds, state::AbstractBarrierState) = + equality_violation(identity, bounds, state) +equality_violation(f, constraints::AbstractConstraintsFunction, state::AbstractBarrierState) = + equality_violation(f, constraints.bounds, state) +equality_violation(constraints::AbstractConstraintsFunction, state::AbstractBarrierState) = + equality_violation(constraints.bounds, state) + +# violations of s = σ*(v-b) +function equality_violation(f, s, v, ineq, σ, b, λ) + ret = f(zero(eltype(λ))*(zero(eltype(s))-zero(eltype(σ))*(zero(eltype(v))-zero(eltype(b))))) + for (i,iv) in enumerate(ineq) + ret += f(λ[i]*(s[i] - σ[i]*(v[iv]-b[i]))) + end + ret +end + +# violations of v = target +function equality_violation(f, v, target, idx, λ) + ret = f(zero(eltype(λ))*(zero(eltype(v))-zero(eltype(target)))) + for (i,iv) in enumerate(idx) + ret += f(λ[i]*(target[i] - v[iv])) + end + ret +end + +""" + equality_grad!(gx, gbstate, bounds, x, c, J, bstate) + +Compute the gradient of `equality_violation`, storing the result in `gx` (an array) and `gbstate::BarrierStateVars`. +""" +function equality_grad!(gx, gsx, gsc, gλx, gλc, gλxE, gλcE, bounds::ConstraintBounds, x, c, J, sx, sc, λx, λc, λxE, λcE) + equality_grad_var!(gsx, gx, bounds.ineqx, bounds.σx, λx) + equality_grad_var!(gsc, gx, bounds.ineqc, bounds.σc, λc, J) + gx[bounds.eqx] = gx[bounds.eqx] - λxE + equality_grad_var!(gx, bounds.eqc, λcE, J) + equality_grad_λ!(gλx, sx, x, bounds.ineqx, bounds.σx, bounds.bx) + equality_grad_λ!(gλc, sc, c, bounds.ineqc, bounds.σc, bounds.bc) + equality_grad_λ!(gλxE, x, bounds.valx, bounds.eqx) + equality_grad_λ!(gλcE, c, bounds.valc, bounds.eqc) +end +equality_grad!(gx, gb::BarrierStateVars, bounds::ConstraintBounds, x, c, J, b::BarrierStateVars) = + equality_grad!(gx, gb.slack_x, gb.slack_c, gb.λx, gb.λc, gb.λxE, gb.λcE, + bounds, x, c, J, + b.slack_x, b.slack_c, b.λx, b.λc, b.λxE, b.λcE) + +# violations of s = σ*(x-b) +function equality_grad_var!(gs, gx, ineq, σ, λ) + for (i,ix) in enumerate(ineq) + λi = λ[i] + gs[i] += λi + gx[ix] -= λi*σ[i] + end + nothing +end + +function equality_grad_var!(gs, gx, ineq, σ, λ, J) + gs[:] = gs + λ + if !isempty(ineq) + gx[:] = gx - view5(J, ineq, :)'*(λ.*σ) + end + nothing +end + +function equality_grad_λ!(gλ, s, v, ineq, σ, b) + for (i,iv) in enumerate(ineq) + gλ[i] += s[i] - σ[i]*(v[iv]-b[i]) + end + nothing +end + +# violations of v = target +function equality_grad_var!(gx, idx, λ, J) + if !isempty(idx) + gx[:] = gx - view5(J, idx, :)'*λ + end + nothing +end + +function equality_grad_λ!(gλ, v, target, idx) + for (i,iv) in enumerate(idx) + gλ[i] += target[i] - v[iv] + end + nothing +end + +""" + isfeasible(constraints, state) -> Bool + isfeasible(constraints, x, c) -> Bool + isfeasible(constraints, x) -> Bool + isfeasible(bounds, x, c) -> Bool + +Return `true` if point `x` is feasible, given the `constraints` which +specify bounds `lx`, `ux`, `lc`, and `uc`. `x` is feasible if + + lx[i] <= x[i] <= ux[i] + lc[i] <= c[i] <= uc[i] + +for all possible `i`. +""" +function isfeasible(bounds::ConstraintBounds, x, c) + isf = true + for (i,j) in enumerate(bounds.eqx) + isf &= x[j] == bounds.valx[i] + end + for (i,j) in enumerate(bounds.ineqx) + isf &= bounds.σx[i]*(x[j] - bounds.bx[i]) >= 0 + end + for (i,j) in enumerate(bounds.eqc) + isf &= c[j] == bounds.valc[i] + end + for (i,j) in enumerate(bounds.ineqc) + isf &= bounds.σc[i]*(c[j] - bounds.bc[i]) >= 0 + end + isf +end +isfeasible(constraints, state::AbstractBarrierState) = isfeasible(constraints, state.x, state.constraints_c) +function isfeasible(constraints, x) + # don't assume c! returns c (which means this is a little more awkward) + c = Array{eltype(x)}(constraints.bounds.nc) + constraints.c!(x, c) + isfeasible(constraints, x, c) +end +isfeasible(constraints::AbstractConstraintsFunction, x, c) = isfeasible(constraints.bounds, x, c) +isfeasible(constraints::Void, state::AbstractBarrierState) = true +isfeasible(constraints::Void, x) = true + +""" + isinterior(constraints, state) -> Bool + isinterior(constraints, x, c) -> Bool + isinterior(constraints, x) -> Bool + isinterior(bounds, x, c) -> Bool + +Return `true` if point `x` is on the interior of the allowed region, +given the `constraints` which specify bounds `lx`, `ux`, `lc`, and +`uc`. `x` is in the interior if + + lx[i] < x[i] < ux[i] + lc[i] < c[i] < uc[i] + +for all possible `i`. +""" +function isinterior(bounds::ConstraintBounds, x, c) + isi = true + for (i,j) in enumerate(bounds.ineqx) + isi &= bounds.σx[i]*(x[j] - bounds.bx[i]) > 0 + end + for (i,j) in enumerate(bounds.ineqc) + isi &= bounds.σc[i]*(c[j] - bounds.bc[i]) > 0 + end + isi +end +isinterior(constraints, state::AbstractBarrierState) = isinterior(constraints, state.x, state.constraints_c) +function isinterior(constraints, x) + c = Array{eltype(x)}(constraints.bounds.nc) + constraints.c!(x, c) + isinterior(constraints, x, c) +end +isinterior(constraints::AbstractConstraintsFunction, x, c) = isinterior(constraints.bounds, x, c) +isinterior(constraints::Void, state::AbstractBarrierState) = true +isinterior(constraints::Void, x) = true + +## Utilities for representing total state as single vector +function pack_vec(x, b::BarrierStateVars) + n = length(x) + for fn in fieldnames(b) + n += length(getfield(b, fn)) + end + vec = Array{eltype(x)}(n) + pack_vec!(vec, x, b) +end + +function pack_vec!(vec, x, b::BarrierStateVars) + k = pack_vec!(vec, x, 0) + for fn in fieldnames(b) + k = pack_vec!(vec, getfield(b, fn), k) + end + k == length(vec) || throw(DimensionMismatch("vec should have length $k, got $(length(vec))")) + vec +end +function pack_vec!(vec, x, k::Int) + for i = 1:length(x) + vec[k+=1] = x[i] + end + k +end +function unpack_vec!(x, b::BarrierStateVars, vec::Vector) + k = unpack_vec!(x, vec, 0) + for fn in fieldnames(b) + k = unpack_vec!(getfield(b, fn), vec, k) + end + k == length(vec) || throw(DimensionMismatch("vec should have length $k, got $(length(vec))")) + x, b +end +function unpack_vec!(x, vec::Vector, k::Int) + for i = 1:length(x) + x[i] = vec[k+=1] + end + k +end + +## More utilities +function estimate_maxstep(αmax, x, s) + for i = 1:length(s) + si = s[i] + if si < 0 + αmax = min(αmax, -x[i]/si) + end + end + αmax +end + +function shrink_μ!(d, constraints, state, method, options) + state.μ *= options.μfactor + update_fg!(d, constraints, state, method) +end + +function qrregularize!(QRF) + R = QRF[:R] + for i = 1:size(R, 1) + if R[i,i] == 0 + R[i,i] = 1 + end + end + QRF +end diff --git a/src/iplinesearch.jl b/src/iplinesearch.jl new file mode 100644 index 000000000..647f6a7a9 --- /dev/null +++ b/src/iplinesearch.jl @@ -0,0 +1,62 @@ +function backtrack_constrained(ϕ, α, αmax, αImax, Lcoefsα, + c1 = 0.5, ρ=oftype(α, 0.5), αminfrac = sqrt(eps(one(α)))) + α, αI = min(α, 0.999*αmax), min(α, 0.999*αImax) + αmin = αminfrac * α + L0, L1, L2 = Lcoefsα + f_calls = 0 + while α >= αmin + f_calls += 1 + val = ϕ((α, αI)) + δ = evalgrad(L1, α, αI) + if isfinite(val) && val - (L0 + δ) <= c1*abs(val-L0) + return α, αI, f_calls, 0 + end + α *= ρ + αI *= ρ + end + ϕ((zero(α), zero(αI))) # to ensure that state gets set appropriately + return zero(α), zero(αI), f_calls, 0 +end + +function backtrack_constrained_grad(ϕ, α, αmax, Lcoefsα, + c1 = 0.9, c2 = 0.9, ρ=oftype(α, 0.5), + αminfrac = sqrt(eps(one(α))); show_linesearch::Bool=false) + α = min(α, 0.999*αmax) + αmin = αminfrac * α + L0, L1, L2 = Lcoefsα + if show_linesearch + println("L0 = $L0, L1 = $L1, L2 = $L2") + end + f_calls = 0 + while α >= αmin + f_calls += 1 + val, slopeα = ϕ(α) + δval = L1*α + δslope = L2*α + if show_linesearch + println("α = $α, value: ($L0, $val, $(L0+δval)), slope: ($L1, $slopeα, $(L1+δslope))") + end + if isfinite(val) && val - (L0 + δval) <= c1*abs(val-L0) && + (slopeα < c2*abs(L1) || + slopeα - (L1 + δslope) .<= c2*abs.(slopeα-L1)) + return α, f_calls, f_calls + end + α *= ρ + end + ϕ(zero(α)) # to ensure that state gets set appropriately + return zero(α), f_calls, f_calls +end + +# Evaluate for a step parametrized as [α, α, αI, α] +function evalgrad(slopeα, α, αI) + α*(slopeα[1] + slopeα[2] + slopeα[4]) + αI*slopeα[3] +end + +function mulhess(Hα, α, αI) + αv = [α, α, αI, α] + Hα*αv +end +function evalhess(Hα, α, αI) + αv = [α, α, αI, α] + dot(αv, Hα*αv) +end diff --git a/src/ipnewton.jl b/src/ipnewton.jl new file mode 100644 index 000000000..d71e2bd2c --- /dev/null +++ b/src/ipnewton.jl @@ -0,0 +1,391 @@ +immutable IPNewton{F} <: IPOptimizer{F} + linesearch!::F +end + +IPNewton(; linesearch!::Function = backtrack_constrained_grad) = + IPNewton(linesearch!) + +type IPNewtonState{T,N} <: AbstractBarrierState + @add_generic_fields() + x_previous::Array{T,N} + g::Array{T,N} + f_x_previous::T + H::Matrix{T} + HP + Hd::Vector{Int8} + s::Array{T,N} # step for x + # Barrier penalty fields + μ::T # coefficient of the barrier penalty + μnext::T # μ for the next iteration + L::T # value of the Lagrangian (objective + barrier + equality) + L_previous::T + bstate::BarrierStateVars{T} # value of slack and λ variables (current "position") + bgrad::BarrierStateVars{T} # gradient of slack and λ variables at current "position" + bstep::BarrierStateVars{T} # search direction for slack and λ + constr_c::Vector{T} # value of the user-supplied constraints at x + constr_J::Matrix{T} # value of the user-supplied Jacobian at x + ev::T # equality violation, ∑_i λ_Ei (c*_i - c_i) + @add_linesearch_fields() + b_ls::BarrierLineSearchGrad{T} + gtilde::Vector{T} + Htilde +end + +function Base.convert{T,S,N}(::Type{IPNewtonState{T,N}}, state::IPNewtonState{S,N}) + IPNewtonState(state.method_string, + state.n, + convert(Array{T}, state.x), + T(state.f_x), + state.f_calls, + state.g_calls, + state.h_calls, + convert(Array{T}, state.x_previous), + convert(Array{T}, state.g), + T(state.f_x_previous), + convert(Array{T}, state.H), + state.HP, + state.Hd, + convert(Array{T}, state.s), + T(state.μ), + T(state.μnext), + T(state.L), + T(state.L_previous), + convert(BarrierStateVars{T}, state.bstate), + convert(BarrierStateVars{T}, state.bgrad), + convert(BarrierStateVars{T}, state.bstep), + convert(Array{T}, state.constr_c), + convert(Array{T}, state.constr_J), + T(state.ev), + convert(Array{T}, state.x_ls), + convert(Array{T}, state.g_ls), + T(state.alpha), + state.mayterminate, + state.lsr, + convert(BarrierLineSearchGrad{T}, state.b_ls), + convert(Array{T}, state.gtilde), + state.Htilde, + ) +end + +function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunction, constraints::TwiceDifferentiableConstraintsFunction, initial_x::Array{T}) + # Check feasibility of the initial state + mc = nconstraints(constraints) + constr_c = Array{T}(mc) + constraints.c!(initial_x, constr_c) + if !isinterior(constraints, initial_x, constr_c) + warn("initial guess is not an interior point") + Base.show_backtrace(STDERR, backtrace()) + println(STDERR) + end + # Allocate fields for the objective function + n = length(initial_x) + g = Array(T, n) + s = Array(T, n) + x_ls, g_ls = Array(T, n), Array(T, n) + f_x_previous, f_x = NaN, d.fg!(initial_x, g) + f_calls, g_calls = 1, 1 + H = Array(T, n, n) + Hd = Array{Int8}(n) + d.h!(initial_x, H) + h_calls = 1 + + # More constraints + constr_J = Array{T}(mc, n) + constr_gtemp = Array{T}(n) + gtilde = similar(g) + constraints.jacobian!(initial_x, constr_J) + μ = T(1) + bstate = BarrierStateVars(constraints.bounds, initial_x, constr_c) + bgrad = similar(bstate) + bstep = similar(bstate) + # b_ls = BarrierLineSearch(similar(constr_c), similar(bstate)) + b_ls = BarrierLineSearchGrad(similar(constr_c), similar(constr_J), similar(bstate), similar(bstate)) + + state = IPNewtonState("Interior-point Newton's Method", + length(initial_x), + copy(initial_x), # Maintain current state in state.x + f_x, # Store current f in state.f_x + f_calls, # Track f calls in state.f_calls + g_calls, # Track g calls in state.g_calls + h_calls, + copy(initial_x), # Maintain current state in state.x_previous + g, # Store current gradient in state.g + T(NaN), # Store previous f in state.f_x_previous + H, + 0, # will be replaced + Hd, + similar(initial_x), # Maintain current x-search direction in state.s + μ, + μ, + T(NaN), + T(NaN), + bstate, + bgrad, + bstep, + constr_c, + constr_J, + T(NaN), + @initial_linesearch()..., # Maintain a cache for line search results in state.lsr + b_ls, + gtilde, + 0) + + d.h!(initial_x, state.H) + Hinfo = (state.H, hessianI(initial_x, constraints, 1./bstate.slack_c, 1)) + initialize_μ_λ!(state, constraints.bounds, Hinfo, options.μ0) + update_fg!(d, constraints, state, method) + update_h!(d, constraints, state, method) +end + +function update_fg!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton) + state.f_x, state.L, state.ev = lagrangian_fg!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ) + state.f_calls += 1 + state.g_calls += 1 + update_gtilde!(d, constraints, state, method) +end + +function update_g!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton) + lagrangian_g!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ) + state.g_calls += 1 + update_gtilde!(d, constraints, state, method) +end + +function update_gtilde!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton) + # Calculate the modified x-gradient for the block-eliminated problem + # gtilde is the gradient for the affine-scaling problem, i.e., + # with μ=0, used in the adaptive setting of μ. Once we calculate μ we'll correct it + gtilde, bstate, bgrad = state.gtilde, state.bstate, state.bgrad + bounds = constraints.bounds + copy!(gtilde, state.g) + JIc = view5(state.constr_J, bounds.ineqc, :) + if !isempty(JIc) + Hssc = Diagonal(bstate.λc./bstate.slack_c) + gc = JIc'*(Diagonal(bounds.σc) * (bstate.λc - Hssc*bgrad.λc)) # NOT bgrad.slack_c + for i = 1:length(gtilde) + gtilde[i] += gc[i] + end + end + for (i,j) in enumerate(bounds.ineqx) + gxi = bounds.σx[i]*(bstate.λx[i] - bgrad.λx[i]*bstate.λx[i]/bstate.slack_x[i]) + gtilde[j] += gxi + end + state +end + +function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton) + x, μ, Hxx, J = state.x, state.μ, state.H, state.constr_J + bstate, bgrad, bounds = state.bstate, state.bgrad, constraints.bounds + m, n = size(J, 1), size(J, 2) + + d.h!(state.x, Hxx) # objective's Hessian + # accumulate the constraint second derivatives + λ = userλ(bstate.λc, constraints) + λ[bounds.eqc] = -bstate.λcE # the negative sign is from the Hessian + constraints.h!(x, λ, Hxx) + # Add the Jacobian terms (JI'*Hss*JI) + JIc = view5(J, bounds.ineqc, :) + Hssc = Diagonal(bstate.λc./bstate.slack_c) + HJ = JIc'*Hssc*JIc + for j = 1:n, i = 1:n + Hxx[i,j] += HJ[i,j] + end + # Add the variable inequalities portions of J'*Hssx*J + for (i,j) in enumerate(bounds.ineqx) + Hxx[j,j] += bstate.λx[i]/bstate.slack_x[i] + end + state.Htilde = cholfact(Positive, state.H, Val{true}) + + state +end + +function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction, state::IPNewtonState{T}, method::IPNewton, options) + state.f_x_previous, state.L_previous = state.f_x, state.L + bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds + qp = solve_step!(state, constraints, options) + # If a step α=1 will not change any of the parameters, we can quit now. + # This prevents a futile linesearch. + if is_smaller_eps(state.x, state.s) && + is_smaller_eps(bstate.slack_x, bstep.slack_x) && + is_smaller_eps(bstate.slack_c, bstep.slack_c) && + is_smaller_eps(bstate.λx, bstep.λx) && + is_smaller_eps(bstate.λc, bstep.λc) + return false + end + # qp = quadratic_parameters(bounds, state) + + # Estimate αmax, the upper bound on distance of movement along the search line + αmax = convert(eltype(bstate), Inf) + αmax = estimate_maxstep(αmax, bstate.slack_x, bstep.slack_x) + αmax = estimate_maxstep(αmax, bstate.slack_c, bstep.slack_c) + αmax = estimate_maxstep(αmax, bstate.λx, bstep.λx) + αmax = estimate_maxstep(αmax, bstate.λc, bstep.λc) + + # Determine the actual distance of movement along the search line + ϕ = linesearch_anon(d, constraints, state, method) + state.alpha, f_update, g_update = + method.linesearch!(ϕ, T(1), αmax, qp; show_linesearch=options.show_linesearch) + state.f_calls, state.g_calls = state.f_calls + f_update, state.g_calls + g_update + + # Maintain a record of previous position + copy!(state.x_previous, state.x) + + # Update current position # x = x + alpha * s + ls_update!(state.x, state.x, state.s, state.alpha) + ls_update!(bstate, bstate, bstep, state.alpha) + + # Ensure that the primal-dual approach does not deviate too much from primal + # (See Waechter & Biegler 2006, eq. 16) + μ = state.μ + for i = 1:length(bstate.slack_x) + p = μ/bstate.slack_x[i] + bstate.λx[i] = max(min(bstate.λx[i], 10^10*p), p/10^10) + end + for i = 1:length(bstate.slack_c) + p = μ/bstate.slack_c[i] + bstate.λc[i] = max(min(bstate.λc[i], 10^10*p), p/10^10) + end + state.μ = state.μnext + + # Evaluate the constraints at the new position + constraints.c!(state.x, state.constr_c) + constraints.jacobian!(state.x, state.constr_J) + state.ev == equality_violation(constraints, state) + + false +end + +function solve_step!(state::IPNewtonState, constraints, options) + x, s, μ, bounds = state.x, state.s, state.μ, constraints.bounds + bstate, bstep, bgrad = state.bstate, state.bstep, state.bgrad + J, Htilde = state.constr_J, state.Htilde + # Solve the Newton step + JE = jacobianE(state, bounds) + gE = [bgrad.λxE; + bgrad.λcE] + M = JE*(Htilde \ JE') + MF = cholfact(Positive, M, Val{true}) + # These are a solution to the affine-scaling problem (with μ=0) + ΔλE0 = MF \ (gE + JE * (Htilde \ state.gtilde)) + Δx0 = Htilde \ (JE'*ΔλE0 - state.gtilde) + # Check that the solution to the linear equations represents an improvement + Hpstepx, HstepλE = full(Htilde)*Δx0 - JE'*ΔλE0, -JE*Δx0 # TODO: don't use full here + if options.show_linesearch + println("|gx| = $(norm(state.gtilde)), |Hstepx + gx| = $(norm(Hpstepx+state.gtilde))") + println("|gE| = $(norm(gE)), |HstepλE + gE| = $(norm(HstepλE+gE))") + end + if norm(gE) + norm(state.gtilde) < max(norm(HstepλE + gE), + norm(Hpstepx + state.gtilde)) + # Precision problems gave us a worse solution than the one we started with, abort + fill!(s, 0) + fill!(bstep, 0) + return state + end + # Set μ (see the predictor strategy in Nodecal & Wright, 2nd ed., section 19.3) + solve_slack!(bstep, Δx0, bounds, bstate, bgrad, J, zero(state.μ)) # store temporarily in bstep + αs = convert(eltype(bstate), 1.0) + αs = estimate_maxstep(αs, bstate.slack_x, bstep.slack_x) + αs = estimate_maxstep(αs, bstate.slack_c, bstep.slack_c) + αλ = convert(eltype(bstate), 1.0) + αλ = estimate_maxstep(αλ, bstate.λx, bstep.λx) + αλ = estimate_maxstep(αλ, bstate.λc, bstep.λc) + m = max(1, length(bstate.slack_x) + length(bstate.slack_c)) + μaff = (dot(bstate.slack_x + αs*bstep.slack_x, bstate.λx + αλ*bstep.λx) + + dot(bstate.slack_c + αs*bstep.slack_c, bstate.λc + αλ*bstep.λc))/m + μmean = (dot(bstate.slack_x, bstate.λx) + dot(bstate.slack_c, bstate.λc))/m + # When there's only one constraint, μaff can be exactly zero. So limit the decrease. + state.μnext = max((μaff/μmean)^3 * μmean, μmean/10) + μ = state.μ + # Solve for the *real* step (including μ) + μsinv = μ * [bounds.σx./bstate.slack_x; bounds.σc./bstate.slack_c] + gtildeμ = state.gtilde - jacobianI(state, bounds)' * μsinv + ΔλE = MF \ (gE + JE * (Htilde \ gtildeμ)) + Δx = Htilde \ (JE'*ΔλE - gtildeμ) + copy!(s, Δx) + k = unpack_vec!(bstep.λxE, ΔλE, 0) + k = unpack_vec!(bstep.λcE, ΔλE, k) + k == length(ΔλE) || error("exhausted targets before ΔλE") + solve_slack!(bstep, Δx, bounds, bstate, bgrad, J, μ) + # Solve for the quadratic parameters (use the real H, not the posdef H) + Hstepx, HstepλE = state.H*Δx - JE'*ΔλE, -JE*Δx + qp = state.L, slopealpha(state.s, state.g, bstep, bgrad), dot(Δx, Hstepx) + dot(ΔλE, HstepλE) + qp +end + +function solve_slack!(bstep, s, bounds, bstate, bgrad, J, μ) + # Solve for the slack variable and λI updates + for (i, j) in enumerate(bounds.ineqx) + bstep.slack_x[i] = -bgrad.λx[i] + bounds.σx[i]*s[j] + # bstep.λx[i] = -bgrad.slack_x[i] - μ*bstep.slack_x[i]/bstate.slack_x[i]^2 + # bstep.λx[i] = -bgrad.slack_x[i] - bstate.λx[i]*bstep.slack_x[i]/bstate.slack_x[i] + bstep.λx[i] = -(-μ/bstate.slack_x[i] + bstate.λx[i]) - bstate.λx[i]*bstep.slack_x[i]/bstate.slack_x[i] + end + JIc = view5(J, bounds.ineqc, :) + SigmaJIΔx = Diagonal(bounds.σc)*(JIc*s) + for i = 1:length(bstep.λc) + bstep.slack_c[i] = -bgrad.λc[i] + SigmaJIΔx[i] + # bstep.λc[i] = -bgrad.slack_c[i] - μ*bstep.slack_c[i]/bstate.slack_c[i]^2 + # bstep.λc[i] = -bgrad.slack_c[i] - bstate.λc[i]*bstep.slack_c[i]/bstate.slack_c[i] + bstep.λc[i] = -(-μ/bstate.slack_c[i] + bstate.λc[i]) - bstate.λc[i]*bstep.slack_c[i]/bstate.slack_c[i] + end + bstep +end + +function is_smaller_eps(ref, step) + ise = true + for (r, s) in zip(ref, step) + ise &= (s == 0) | (abs(s) < eps(r)) + end + ise +end + +""" + quadratic_parameters(bounds, state) -> val, slopeα, Hα + +OUTDATED! Return the parameters for the quadratic fit of the behavior of the +lagrangian for positions parametrized as a function of the 4-vector +`α = (αx, αs, αI, αE)`, where the step is + + (αx * Δx, αs * Δs, αI * ΔλI, αE * ΔλE) + +and `Δx`, `Δs`, `ΔλI`, and `ΔλE` are the current search directions in +the parameters. As a function of `α`, the local model is expressed as + + val + dot(α, slopeα) + (α'*Hα*α)/2 +""" +function quadratic_parameters(bounds::ConstraintBounds, state::IPNewtonState) + bstate, bstep, bgrad = state.bstate, state.bstep, state.bgrad + slopeα = slopealpha(state.s, state.g, bstep, bgrad) + + jic = view5(state.constr_J, bounds.ineqc, :)*state.s + HsscP = Diagonal(state.μ./bstate.slack_c.^2) # for linesearch we need primal + jix = view(state.s, bounds.ineqx) + HssxP = Diagonal(state.μ./bstate.slack_x.^2) + ji = dot(bstep.λc, Diagonal(bounds.σc)*jic) + dot(bstep.λx, Diagonal(bounds.σx)*jix) + je = dot(bstep.λcE, view5(state.constr_J, bounds.eqc, :)*state.s) + + dot(bstep.λxE, view(state.s, bounds.eqx)) + hss = dot(bstep.slack_c, HsscP*bstep.slack_c) + dot(bstep.slack_x, HssxP*bstep.slack_x) + si = dot(bstep.slack_c, bstep.λc) + dot(bstep.slack_x, bstep.λx) + hxx = dot(state.s, full(state.HP)*state.s) # TODO: don't require full here + Hα = [hxx 0 -ji -je; + 0 hss si 0; + -ji si 0 0; + -je 0 0 0] + state.L, slopeα, Hα +end + +# Utility functions that assist in testing: they return the "full +# Hessian" and "full gradient" for the equation with the slack and λI +# eliminated. +function Hf(bounds::ConstraintBounds, state) + JE = jacobianE(state, bounds) + Hf = [full(state.Htilde) -JE'; + -JE zeros(eltype(JE), size(JE, 1), size(JE, 1))] +end +Hf(constraints, state) = Hf(constraints.bounds, state) +function gf(bounds::ConstraintBounds, state) + bstate, μ = state.bstate, state.μ + μsinv = μ * [bounds.σx./bstate.slack_x; bounds.σc./bstate.slack_c] + gtildeμ = state.gtilde - jacobianI(state, bounds)' * μsinv + [gtildeμ; state.bgrad.λxE; state.bgrad.λcE] +end +gf(constraints, state) = gf(constraints.bounds, state) diff --git a/src/optimize.jl b/src/optimize.jl index 0220166ad..7283b331b 100644 --- a/src/optimize.jl +++ b/src/optimize.jl @@ -9,6 +9,7 @@ function optimize(f::Function, x_tol::Real = 1e-32, f_tol::Real = 1e-32, g_tol::Real = 1e-8, + successive_f_tol::Integer = 2, iterations::Integer = 1_000, store_trace::Bool = false, show_trace::Bool = false, @@ -17,7 +18,7 @@ function optimize(f::Function, autodiff::Bool = false, callback = nothing) options = OptimizationOptions(; - x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, + x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, successive_f_tol = successive_f_tol, iterations = iterations, store_trace = store_trace, show_trace = show_trace, extended_trace = extended_trace, callback = callback, show_every = show_every, @@ -32,6 +33,7 @@ function optimize(f::Function, x_tol::Real = 1e-32, f_tol::Real = 1e-32, g_tol::Real = 1e-8, + successive_f_tol::Integer = 2, iterations::Integer = 1_000, store_trace::Bool = false, show_trace::Bool = false, @@ -39,7 +41,7 @@ function optimize(f::Function, show_every::Integer = 1, callback = nothing) options = OptimizationOptions(; - x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, + x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, successive_f_tol = successive_f_tol, iterations = iterations, store_trace = store_trace, show_trace = show_trace, extended_trace = extended_trace, callback = callback, show_every = show_every) @@ -54,6 +56,7 @@ function optimize(f::Function, x_tol::Real = 1e-32, f_tol::Real = 1e-32, g_tol::Real = 1e-8, + successive_f_tol::Integer = 2, iterations::Integer = 1_000, store_trace::Bool = false, show_trace::Bool = false, @@ -61,7 +64,7 @@ function optimize(f::Function, show_every::Integer = 1, callback = nothing) options = OptimizationOptions(; - x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, + x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, successive_f_tol = successive_f_tol, iterations = iterations, store_trace = store_trace, show_trace = show_trace, extended_trace = extended_trace, callback = callback, show_every = show_every) @@ -74,6 +77,7 @@ function optimize(d::DifferentiableFunction, x_tol::Real = 1e-32, f_tol::Real = 1e-32, g_tol::Real = 1e-8, + successive_f_tol::Integer = 2, iterations::Integer = 1_000, store_trace::Bool = false, show_trace::Bool = false, @@ -81,7 +85,7 @@ function optimize(d::DifferentiableFunction, show_every::Integer = 1, callback = nothing) options = OptimizationOptions(; - x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, + x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, successive_f_tol = successive_f_tol, iterations = iterations, store_trace = store_trace, show_trace = show_trace, extended_trace = extended_trace, callback = callback, show_every = show_every) @@ -94,6 +98,7 @@ function optimize(d::TwiceDifferentiableFunction, x_tol::Real = 1e-32, f_tol::Real = 1e-32, g_tol::Real = 1e-8, + successive_f_tol::Integer = 2, iterations::Integer = 1_000, store_trace::Bool = false, show_trace::Bool = false, @@ -101,7 +106,7 @@ function optimize(d::TwiceDifferentiableFunction, show_every::Integer = 1, callback = nothing) options = OptimizationOptions(; - x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, + x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, successive_f_tol = successive_f_tol, iterations = iterations, store_trace = store_trace, show_trace = show_trace, extended_trace = extended_trace, callback = callback, show_every = show_every) @@ -220,7 +225,7 @@ function optimize{T, M<:Optimizer}(d, initial_x::Array{T}, method::M, options::O tracing = options.store_trace || options.show_trace || options.extended_trace || options.callback != nothing stopped, stopped_by_callback, stopped_by_time_limit = false, false, false - x_converged, f_converged = false, false + x_converged, f_converged, counter_f_tol = false, false, 0 g_converged = if typeof(method) <: NelderMead nmobjective(state.f_simplex, state.m, state.n) < options.g_tol elseif typeof(method) <: ParticleSwarm || typeof(method) <: SimulatedAnnealing @@ -242,6 +247,11 @@ function optimize{T, M<:Optimizer}(d, initial_x::Array{T}, method::M, options::O update_g!(d, state, method) x_converged, f_converged, g_converged, converged = assess_convergence(state, options) + # See optimize in interior.jl for an explanation of the next + # two lines (given the existence of the option, we'd better + # use it here too) + counter_f_tol = f_converged ? counter_f_tol+1 : 0 + converged = x_converged | g_converged | (counter_f_tol > options.successive_f_tol) # We don't use the Hessian for anything if we have declared convergence, # so we might as well not make the (expensive) update if converged == true !converged && update_h!(d, state, method) diff --git a/src/problems/constrained.jl b/src/problems/constrained.jl new file mode 100644 index 000000000..9fc1ca70b --- /dev/null +++ b/src/problems/constrained.jl @@ -0,0 +1,42 @@ +module ConstrainedProblems + +using ..OptimizationProblem, ...TwiceDifferentiableConstraintsFunction + +examples = Dict{AbstractString, OptimizationProblem}() + +hs9_obj(x::AbstractVector) = sin(π*x[1]/12) * cos(π*x[2]/16) +hs9_c!(x::AbstractVector, c::AbstractVector) = (c[1] = 4*x[1]-3*x[2]; c) + +function hs9_obj_g!(x::AbstractVector, g::AbstractVector) + g[1] = π/12 * cos(π*x[1]/12) * cos(π*x[2]/16) + g[2] = -π/16 * sin(π*x[1]/12) * sin(π*x[2]/16) + g +end +function hs9_obj_h!(x::AbstractVector, h::AbstractMatrix) + v = hs9_obj(x) + h[1,1] = -π^2*v/144 + h[2,2] = -π^2*v/256 + h[1,2] = h[2,1] = -π^2 * cos(π*x[1]/12) * sin(π*x[2]/16) / 192 + h +end + +function hs9_jacobian!(x, J) + J[1,1] = 4 + J[1,2] = -3 + J +end +hs9_h!(x, λ, h) = h + +examples["HS9"] = OptimizationProblem("HS9", + hs9_obj, + hs9_obj_g!, + hs9_obj_h!, + TwiceDifferentiableConstraintsFunction( + hs9_c!, hs9_jacobian!, hs9_h!, + [], [], [0.0], [0.0]), + [0.0, 0.0], + [[12k-3, 16k-4] for k in (0, 1, -1)], # any integer k will do... + true, + true) + +end # module diff --git a/src/problems/multivariate.jl b/src/problems/multivariate.jl new file mode 100644 index 000000000..31d8fb729 --- /dev/null +++ b/src/problems/multivariate.jl @@ -0,0 +1,27 @@ +module MultivariateProblems + +export UnconstrainedProblems, ConstrainedProblems + +immutable OptimizationProblem + name::AbstractString + f::Function + g!::Function + h!::Function + constraints + initial_x::Vector{Float64} + solutions::Vector + isdifferentiable::Bool + istwicedifferentiable::Bool +end + +function OptimizationProblem(name, f, g!, h!, + initial_x::AbstractVector, solutions, + isdifferentiable::Bool, istwicedifferentiable::Bool) + OptimizationProblem(name, f, g!, h!, nothing, + initial_x, solutions, isdifferentiable, istwicedifferentiable) +end + +include("unconstrained.jl") +include("constrained.jl") + +end diff --git a/src/problems/unconstrained.jl b/src/problems/unconstrained.jl index d5d1ff62d..fae520350 100644 --- a/src/problems/unconstrained.jl +++ b/src/problems/unconstrained.jl @@ -1,22 +1,15 @@ module UnconstrainedProblems +using ..OptimizationProblem + ### Sources ### ### [1] Ali, Khompatraporn, & Zabinsky: A Numerical Evaluation of Several Stochastic Algorithms on Selected Continuous Global Optimization Test ### Link: www.researchgate.net/profile/Montaz_Ali/publication/226654862_A_Numerical_Evaluation_of_Several_Stochastic_Algorithms_on_Selected_Continuous_Global_Optimization_Test_Problems/links/00b4952bef133a1a6b000000.pdf ### ### [2] Fletcher & Powell: A rapidly convergent descent method for minimization, - -immutable OptimizationProblem - name::AbstractString - f::Function - g!::Function - h!::Function - initial_x::Vector{Float64} - solutions::Vector - isdifferentiable::Bool - istwicedifferentiable::Bool -end +### +### [3] More, Garbow, Hillstrom (1981): Testing Unconstrained Optimization Software, ACM Trans. Math. Soft. 7: 17-41. examples = Dict{AbstractString, OptimizationProblem}() @@ -358,4 +351,65 @@ examples["Rosenbrock"] = OptimizationProblem("Rosenbrock", true, true) +########################################################################## +### +### Beale (2D) +### +### Problem 5 in [3] +### +### Sum-of-squares objective, non-convex with g'*inv(H)*g == 0 at the +### initial position. +### +########################################################################## + +const beale_y = [1.5, 2.25, 2.625] + +beale_f(x) = [beale_y[i] - x[1]*(1-x[2]^i) for i = 1:3] +beale_J(x) = hcat([-(1-x[2]^i) for i = 1:3], + [i*x[1]*x[2]^(i-1) for i = 1:3]) +function beale_H(x, i) + od = i*x[2]^(i-1) + d2 = i > 1 ? i*(i-1)*x[1]*x[2]^(i-2) : zero(x[2]) + [0 od; od d2] +end + +beale(x::AbstractVector) = sumsq_obj(beale_f, x) + +function beale_gradient!(x::AbstractVector, g::AbstractVector) + sumsq_gradient!(beale_f, beale_J, x, g) +end + +function beale_hessian!(x::AbstractVector, h::AbstractMatrix) + sumsq_hessian!(beale_f, beale_J, beale_H, x, h) +end + +examples["Beale"] = OptimizationProblem("Beale", + beale, + beale_gradient!, + beale_hessian!, + [1.0, 1.0], + [3.0, 0.5], + true, + true) + +### General utilities for sum-of-squares functions +# Requires f(x) and J(x) computes the values and jacobian at x of a set of functions, and +# that H(x, i) computes the hessian of the ith function + +sumsq_obj(f, x) = sum(f(x).^2) + +function sumsq_gradient!(f, J, x::AbstractVector, g::AbstractVector) + copy!(g, sum((2*f(x)).*J(x), 1)) +end + +function sumsq_hessian!(f, J, H, x::AbstractVector, h::AbstractMatrix) + fx = f(x) + Jx = J(x) + htmp = 2*(Jx'*Jx) + for i = 1:length(fx) + htmp += (2*fx[i])*H(x, i) + end + copy!(h, htmp) +end + end # module diff --git a/src/types.jl b/src/types.jl index ec072fa94..aab457e8d 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1,38 +1,51 @@ abstract Optimizer +abstract ConstrainedOptimizer{T} <: Optimizer +abstract IPOptimizer{T} <: ConstrainedOptimizer # interior point methods + +abstract AbstractOptimFunction + immutable OptimizationOptions{TCallback <: Union{Void, Function}} x_tol::Float64 f_tol::Float64 g_tol::Float64 + successive_f_tol::Int iterations::Int store_trace::Bool show_trace::Bool extended_trace::Bool + show_linesearch::Bool autodiff::Bool show_every::Int callback::TCallback time_limit::Float64 + μfactor::Float64 + μ0 end function OptimizationOptions(; x_tol::Real = 1e-32, f_tol::Real = 1e-32, g_tol::Real = 1e-8, + successive_f_tol::Integer = 2, iterations::Integer = 1_000, store_trace::Bool = false, show_trace::Bool = false, extended_trace::Bool = false, + show_linesearch::Bool = false, autodiff::Bool = false, show_every::Integer = 1, callback = nothing, - time_limit = NaN) + time_limit = NaN, + μfactor = 0.1, + μ0 = :auto) show_every = show_every > 0 ? show_every: 1 if extended_trace && callback == nothing show_trace = true end OptimizationOptions{typeof(callback)}( - Float64(x_tol), Float64(f_tol), Float64(g_tol), Int(iterations), - store_trace, show_trace, extended_trace, autodiff, Int(show_every), - callback, time_limit) + Float64(x_tol), Float64(f_tol), Float64(g_tol), Int(successive_f_tol), + Int(iterations), store_trace, show_trace, extended_trace, show_linesearch, + autodiff, Int(show_every), callback, time_limit, μfactor, μ0) end function print_header(options::OptimizationOptions) @@ -45,6 +58,10 @@ function print_header(method::Optimizer) @printf "Iter Function value Gradient norm \n" end +function print_header(method::IPOptimizer) + @printf "Iter Lagrangian value Function value Gradient norm |==constr.| μ\n" +end + immutable OptimizationState{T <: Optimizer} iteration::Int value::Float64 @@ -90,17 +107,17 @@ type UnivariateOptimizationResults{T,M} <: OptimizationResults f_calls::Int end -immutable NonDifferentiableFunction +immutable NonDifferentiableFunction <: AbstractOptimFunction f::Function end -immutable DifferentiableFunction +immutable DifferentiableFunction <: AbstractOptimFunction f::Function g!::Function fg!::Function end -immutable TwiceDifferentiableFunction +immutable TwiceDifferentiableFunction <: AbstractOptimFunction f::Function g!::Function fg!::Function @@ -117,6 +134,18 @@ function Base.show(io::IO, t::OptimizationState) return end +function Base.show{M<:IPOptimizer}(io::IO, t::OptimizationState{M}) + md = t.metadata + @printf io "%6d %-14e %-14e %-14e %-14e %-6.2e\n" t.iteration md["Lagrangian"] t.value t.g_norm md["ev"] md["μ"] + if !isempty(t.metadata) + for (key, value) in md + key ∈ ("Lagrangian", "μ", "ev") && continue + @printf io " * %s: %s\n" key value + end + end + return +end + function Base.show(io::IO, tr::OptimizationTrace) @printf io "Iter Function value Gradient norm \n" @printf io "------ -------------- --------------\n" @@ -126,6 +155,15 @@ function Base.show(io::IO, tr::OptimizationTrace) return end +function Base.show{M<:IPOptimizer}(io::IO, tr::OptimizationTrace{M}) + @printf io "Iter Lagrangian value Function value Gradient norm |==constr.| μ\n" + @printf io "------ ---------------- -------------- -------------- -------------- --------\n" + for state in tr + show(io, state) + end + return +end + function Base.show(io::IO, r::MultivariateOptimizationResults) @printf io "Results of Optimization Algorithm\n" @printf io " * Algorithm: %s\n" method(r) @@ -228,3 +266,265 @@ function TwiceDifferentiableFunction(f::Function, end return TwiceDifferentiableFunction(f, g!, fg!, h!) end + +### Constraints +# +# Constraints are specified by the user as +# lx_i ≤ x[i] ≤ ux_i # variable (box) constraints +# lc_i ≤ c(x)[i] ≤ uc_i # linear/nonlinear constraints +# and become equality constraints with l_i = u_i. ±∞ are allowed for l +# and u, in which case the relevant side(s) are unbounded. +# +# The user supplies functions to calculate c(x) and its derivatives. +# +# Of course we could unify the box-constraints into the +# linear/nonlinear constraints, but that would force the user to +# provide the variable-derivatives manually, which would be silly. +# +# This parametrization of the constraints gets "parsed" into a form +# that speeds and simplifies the algorithm, at the cost of many +# additional variables. See `parse_constraints` for details. + +immutable ConstraintBounds{T} + nc::Int # Number of linear/nonlinear constraints supplied by user + # Box-constraints on variables (i.e., directly on x) + eqx::Vector{Int} # index-vector of equality-constrained x (not actually variable...) + valx::Vector{T} # value of equality-constrained x + ineqx::Vector{Int} # index-vector of other inequality-constrained variables + σx::Vector{Int8} # ±1, in constraints σ(v-b) ≥ 0 (sign depends on whether v>b or v nc + +The number of linear/nonlinear constraint functions supplied by the +user. This does not include bounds-constraints on variables. + +See also: nconstraints_x. +""" +nconstraints(cb::ConstraintBounds) = cb.nc + +""" + nconstraints_x(bounds) -> nx + +The number of "meaningful" constraints (not `±Inf`) on the x coordinates. + +See also: nconstraints. +""" +function nconstraints_x(cb::ConstraintBounds) + mi = isempty(cb.ineqx) ? 0 : maximum(cb.ineqx) + me = isempty(cb.eqx) ? 0 : maximum(cb.eqx) + nmax = max(mi, me) + hasconstraint = falses(nmax) + hasconstraint[cb.ineqx] = true + hasconstraint[cb.eqx] = true + sum(hasconstraint) +end + +function Base.show(io::IO, cb::ConstraintBounds) + indent = " " + print(io, "ConstraintBounds:") + print(io, "\n Variables:") + showeq(io, indent, cb.eqx, cb.valx, 'x', :bracket) + showineq(io, indent, cb.ineqx, cb.σx, cb.bx, 'x', :bracket) + print(io, "\n Linear/nonlinear constraints:") + showeq(io, indent, cb.eqc, cb.valc, 'c', :subscript) + showineq(io, indent, cb.ineqc, cb.σc, cb.bc, 'c', :subscript) + nothing +end + +abstract AbstractConstraintsFunction + +nconstraints(constraints::AbstractConstraintsFunction) = nconstraints(constraints.bounds) + +immutable DifferentiableConstraintsFunction{F,J,T} <: AbstractConstraintsFunction + c!::F # c!(x, storage) stores the value of the constraint-functions at x + jacobian!::J # jacobian!(x, storage) stores the Jacobian of the constraint-functions + bounds::ConstraintBounds{T} +end + +function DifferentiableConstraintsFunction(c!, jacobian!, lx, ux, lc, uc) + b = ConstraintBounds(lx, ux, lc, uc) + DifferentiableConstraintsFunction(c!, jacobian!, b) +end +DifferentiableConstraintsFunction(c!, jacobian!, bounds::ConstraintBounds) = + DifferentiableConstraintsFunction{typeof(c!), typeof(jacobian!), eltype(b)}(c!, jacobian!, b) + +function DifferentiableConstraintsFunction(lx::AbstractArray, ux::AbstractArray) + bounds = ConstraintBounds(lx, ux, [], []) + DifferentiableConstraintsFunction(bounds) +end + +function DifferentiableConstraintsFunction(bounds::ConstraintBounds) + c! = (x,c)->nothing + J! = (x,J)->nothing + DifferentiableConstraintsFunction(c!, J!, bounds) +end + +immutable TwiceDifferentiableConstraintsFunction{F,J,H,T} <: AbstractConstraintsFunction + c!::F + jacobian!::J + h!::H # Hessian of the barrier terms + bounds::ConstraintBounds{T} +end +function TwiceDifferentiableConstraintsFunction(c!, jacobian!, h!, lx, ux, lc, uc) + b = ConstraintBounds(lx, ux, lc, uc) + TwiceDifferentiableConstraintsFunction(c!, jacobian!, h!, b) +end +TwiceDifferentiableConstraintsFunction(c!, jacobian!, h!, bounds::ConstraintBounds) = + TwiceDifferentiableConstraintsFunction{typeof(c!), typeof(jacobian!), typeof(h!), eltype(b)}(c!, jacobian!, h!, b) + +function TwiceDifferentiableConstraintsFunction(lx::AbstractArray, ux::AbstractArray) + bounds = ConstraintBounds(lx, ux, [], []) + TwiceDifferentiableConstraintsFunction(bounds) +end + +function TwiceDifferentiableConstraintsFunction(bounds::ConstraintBounds) + c! = (x,c)->nothing + J! = (x,J)->nothing + h! = (x,λ,h)->nothing + TwiceDifferentiableConstraintsFunction(c!, J!, h!, bounds) +end + + +## Utilities + +function symmetrize(l, u) + if isempty(l) && !isempty(u) + l = fill!(similar(u), -Inf) + end + if !isempty(l) && isempty(u) + u = fill!(similar(l), Inf) + end + # TODO: change to indices? + size(l) == size(u) || throw(DimensionMismatch("bounds arrays must be consistent, got sizes $(size(l)) and $(size(u))")) + _symmetrize(l, u) +end +_symmetrize{T,N}(l::AbstractArray{T,N}, u::AbstractArray{T,N}) = l, u +_symmetrize(l::Vector{Any}, u::Vector{Any}) = _symm(l, u) +_symmetrize(l, u) = _symm(l, u) + +# Designed to ensure that bounds supplied as [] don't cause +# unnecessary broadening of the eltype. Note this isn't type-stable; if +# the user cares, it can be avoided by supplying the same concrete +# type for both l and u. +function _symm(l, u) + if isempty(l) && isempty(u) + if eltype(l) == Any + # prevent promotion from returning eltype Any + l = Array{Union{}}(0) + end + if eltype(u) == Any + u = Array{Union{}}(0) + end + end + promote(l, u) +end + +""" + parse_constraints(T, l, u) -> eq, val, ineq, σ, b + +From user-supplied constraints of the form + + l_i ≤ v_i ≤ u_i + +(which include both inequality and equality constraints, the latter +when `l_i == u_i`), convert into the following representation: + + - `eq`, a vector of the indices for which `l[eq] == u[eq]` + - `val = l[eq] = u[eq]` + - `ineq`, `σ`, and `b` such that the inequality constraints can be written as + σ[k]*(v[ineq[k]] - b[k]) ≥ 0 + where `σ[k] = ±1`. + +Note that since the same `v_i` might have both lower and upper bounds, +`ineq` might have the same index twice (once with `σ`=-1 and once with `σ`=1). + +Supplying `±Inf` for elements of `l` and/or `u` implies that `v_i` is +unbounded in the corresponding direction. In such cases there is no +corresponding entry in `ineq`/`σ`/`b`. + +T is the element-type of the non-Int outputs +""" +function parse_constraints{T}(::Type{T}, l, u) + size(l) == size(u) || throw(DimensionMismatch("l and u must be the same size, got $(size(l)) and $(size(u))")) + eq, ineq = Int[], Int[] + val, b = T[], T[] + σ = Array{Int8}(0) + for i = 1:length(l) + li, ui = l[i], u[i] + li <= ui || throw(ArgumentError("l must be smaller than u, got $li, $ui")) + if li == ui + push!(eq, i) + push!(val, ui) + else + if isfinite(li) + push!(ineq, i) + push!(σ, 1) + push!(b, li) + end + ui = u[i] + if isfinite(ui) + push!(ineq, i) + push!(σ, -1) + push!(b, ui) + end + end + end + eq, val, ineq, σ, b +end + +### Compact printing of constraints + +immutable UnquotedString + str::AbstractString +end +Base.show(io::IO, uqstr::UnquotedString) = print(io, uqstr.str) + +Base.array_eltype_show_how(a::Vector{UnquotedString}) = false, "" + +if !isdefined(Base, :IOContext) + IOContext(io; kwargs...) = io +end + +function showeq(io, indent, eq, val, chr, style) + if !isempty(eq) + print(io, '\n', indent) + if style == :bracket + eqstrs = map((i,v) -> UnquotedString("$chr[$i]=$v"), eq, val) + else + eqstrs = map((i,v) -> UnquotedString("$(chr)_$i=$v"), eq, val) + end + Base.show_vector(IOContext(io, limit=true), eqstrs, "", "") + end +end + +function showineq(io, indent, ineqs, σs, bs, chr, style) + if !isempty(ineqs) + print(io, '\n', indent) + if style == :bracket + ineqstrs = map((i,σ,b) -> UnquotedString(string("$chr[$i]", ineqstr(σ,b))), ineqs, σs, bs) + else + ineqstrs = map((i,σ,b) -> UnquotedString(string("$(chr)_$i", ineqstr(σ,b))), ineqs, σs, bs) + end + Base.show_vector(IOContext(io, limit=true), ineqstrs, "", "") + end +end +ineqstr(σ,b) = σ>0 ? "≥$b" : "≤$b" diff --git a/src/utilities/assess_convergence.jl b/src/utilities/assess_convergence.jl index e2b284f6f..c2551ce55 100644 --- a/src/utilities/assess_convergence.jl +++ b/src/utilities/assess_convergence.jl @@ -15,7 +15,7 @@ function assess_convergence(x::Array, # Absolute Tolerance # if abs(f_x - f_x_previous) < f_tol # Relative Tolerance - if abs(f_x - f_x_previous) / (abs(f_x) + f_tol) < f_tol || nextfloat(f_x) >= f_x_previous + if abs(f_x - f_x_previous) < min(f_tol * (abs(f_x) + f_tol), eps(abs(f_x)+abs(f_x_previous))) f_converged = true end @@ -39,7 +39,7 @@ function assess_convergence(state, options) # Absolute Tolerance # if abs(f_x - f_x_previous) < f_tol # Relative Tolerance - if abs(state.f_x - state.f_x_previous) / (abs(state.f_x) + options.f_tol) < options.f_tol || nextfloat(state.f_x) >= state.f_x_previous + if abs(state.f_x - state.f_x_previous) < min(options.f_tol * (abs(state.f_x) + options.f_tol), eps(abs(state.f_x)+abs(state.f_x_previous))) || fconverged(state) f_converged = true end @@ -79,6 +79,26 @@ function assess_convergence(state::NewtonTrustRegionState, options) options.x_tol, options.f_tol, options.g_tol) + f_converged = fconverged(state) + converged |= f_converged end x_converged, f_converged, g_converged, converged end + +function assess_convergence(state::IPNewtonState, options) + # We use the whole bstate-gradient `bgrad` + bgrad = state.bgrad + assess_convergence(state.x, + state.x_previous, + state.L, + state.L_previous, + [state.g; bgrad.slack_x; bgrad.slack_c; bgrad.λx; bgrad.λc; bgrad.λxE; bgrad.λcE], + options.x_tol, + options.f_tol, + options.g_tol) +end + +# For monotonic-decreasing problems +fconverged(state) = nextfloat(state.f_x) >= state.f_x_previous +# Constrained problems are not monotonic, so we can't add a one-sided criterion +fconverged(state::IPNewtonState) = false diff --git a/src/utilities/trace.jl b/src/utilities/trace.jl index cda25b3b7..e62e7829f 100644 --- a/src/utilities/trace.jl +++ b/src/utilities/trace.jl @@ -114,3 +114,32 @@ function trace!(tr, state, iteration, method::NewtonTrustRegion, options) options.show_every, options.callback) end + +function trace!(tr, state, iteration, method::IPOptimizer, options) + dt = Dict() + dt["Lagrangian"] = state.L + dt["μ"] = state.μ + dt["ev"] = abs(state.ev) + if options.extended_trace + dt["α"] = state.alpha + dt["x"] = copy(state.x) + dt["g(x)"] = copy(state.g) + dt["h(x)"] = copy(state.H) + if !isempty(state.bstate) + dt["gtilde(x)"] = copy(state.gtilde) + dt["bstate"] = copy(state.bstate) + dt["bgrad"] = copy(state.bgrad) + dt["c"] = copy(state.constr_c) + end + end + g_norm = vecnorm(state.g, Inf) + vecnorm(state.bgrad, Inf) + update!(tr, + iteration, + state.f_x, + g_norm, + dt, + options.store_trace, + options.show_trace, + options.show_every, + options.callback) +end diff --git a/src/utilities/update.jl b/src/utilities/update.jl index 8b81dbf35..3912dab4a 100644 --- a/src/utilities/update.jl +++ b/src/utilities/update.jl @@ -27,3 +27,10 @@ function update!{T}(tr::OptimizationTrace{T}, end stopped end + +function ls_update!(out::AbstractArray, base::AbstractArray, step::AbstractArray, α) + length(out) == length(base) == length(step) || throw(DimensionMismatch("all arrays must have the same length, got $(length(out)), $(length(base)), $(length(step))")) + for i = 1:length(base) + out[i] = base[i]+α*step[i] + end +end diff --git a/test/REQUIRE b/test/REQUIRE new file mode 100644 index 000000000..94e516f56 --- /dev/null +++ b/test/REQUIRE @@ -0,0 +1 @@ +BaseTestNext diff --git a/test/constraints.jl b/test/constraints.jl new file mode 100644 index 000000000..3521cbc40 --- /dev/null +++ b/test/constraints.jl @@ -0,0 +1,471 @@ +using Optim, PositiveFactorizations +if VERSION >= v"0.5.0-dev+7720" + using Base.Test +else + using BaseTestNext + const Test = BaseTestNext +end + +if VERSION >= v"0.5.0-dev+2396" + macro inferred5(ex) + Expr(:macrocall, Symbol("@inferred"), esc(ex)) + end +else + macro inferred5(ex) + esc(ex) + end +end + +@testset "Constraints" begin + # Utility function for hand-setting the μ parameter + function setstate!(state, μ, d, constraints, method) + state.μ = μ + Optim.update_fg!(d, constraints, state, method) + Optim.update_h!(d, constraints, state, method) + end + + @testset "Bounds parsing" begin + b = @inferred5(Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 3.8], [5.0, 4.0])) + @test b.eqx == [3] + @test b.valx == [2.0] + @test b.ineqx == [1,1,2,2] + @test b.σx == [1,-1,1,-1] + @test b.bx == [0.0,1.0,0.5,1.0] + @test b.eqc == [1] + @test b.valc == [5] + @test b.ineqc == [2,2] + @test b.σc == [1,-1] + @test b.bc == [3.8,4.0] + io = IOBuffer() + show(io, b) + @test takebuf_string(io) == """ +ConstraintBounds: + Variables: + x[3]=2.0 + x[1]≥0.0,x[1]≤1.0,x[2]≥0.5,x[2]≤1.0 + Linear/nonlinear constraints: + c_1=5.0 + c_2≥3.8,c_2≤4.0""" + + b = @inferred5(Optim.ConstraintBounds(Float64[], Float64[], [5.0, 3.8], [5.0, 4.0])) + for fn in (:eqx, :valx, :ineqx, :σx, :bx) + @test isempty(getfield(b, fn)) + end + @test b.eqc == [1] + @test b.valc == [5] + @test b.ineqc == [2,2] + @test b.σc == [1,-1] + @test b.bc == [3.8,4.0] + + ba = Optim.ConstraintBounds([], [], [5.0, 3.8], [5.0, 4.0]) + @test eltype(ba) == Float64 + + @test_throws ArgumentError Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 4.8], [5.0, 4.0]) + @test_throws DimensionMismatch Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0], [5.0, 4.8], [5.0, 4.0]) + end + + @testset "IPNewton computations" begin + # Compare hand-computed gradient against that from automatic differentiation + function check_autodiff(d, bounds, x, cfun::Function, bstate, μ) + c = cfun(x) + J = ForwardDiff.jacobian(cfun, x) + p = Optim.pack_vec(x, bstate) + ftot! = (p,storage)->Optim.lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds, x, c, J, bstate, μ) + pgrad = similar(p) + ftot!(p, pgrad) + chunksize = min(8, length(p)) + TD = ForwardDiff.Dual{chunksize,eltype(p)} + xd = Array{TD}(length(x)) + bstated = Optim.BarrierStateVars{TD}(bounds) + pcmp = similar(p) + ftot = p->Optim.lagrangian_vec(p, d, bounds, xd, cfun, bstated, μ) + ForwardDiff.gradient!(pcmp, ftot, p, ForwardDiff.Chunk{chunksize}()) + @test pcmp ≈ pgrad + end + # Basic setup (using two objectives, one equal to zero and the other a Gaussian) + μ = 0.2345678 + d0 = TwiceDifferentiableFunction(x->0.0, (x,g)->fill!(g, 0), (x,h)->fill!(h,0)) + A = randn(3,3); H = A'*A + dg = TwiceDifferentiableFunction(x->(x'*H*x)[1]/2, (x,g)->(g[:] = H*x), (x,h)->(h[:,:]=H)) + x = broadcast(clamp, randn(3), -0.99, 0.99) + gx = similar(x) + cfun = x->Float64[] + c = Float64[] + J = Array{Float64}(0,0) + options = OptimizationOptions(μ0 = μ) + method = Optim.IPNewton() + ## In the code, variable constraints are special-cased (for + ## reasons of user-convenience and efficiency). It's + ## important to check that the special-casing yields the same + ## result as the general case. So in the first three + ## constrained cases below, we compare variable constraints + ## against the same kind of constraint applied generically. + cvar! = (x, c) -> copy!(c, x) + cvarJ! = (x, J) -> copy!(J, eye(size(J)...)) + cvarh! = (x, λ, h) -> h # h! adds to h, it doesn't replace it + ## No constraints + bounds = Optim.ConstraintBounds(Float64[], Float64[], Float64[], Float64[]) + bstate = Optim.BarrierStateVars(bounds, x) + bgrad = similar(bstate) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, dg, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ) + @test f_x == L == dg.f(x) + @test gx == H*x + constraints = TwiceDifferentiableConstraintsFunction( + (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds) + state = Optim.initial_state(method, options, dg, constraints, x) + @test Optim.gf(bounds, state) ≈ gx + @test Optim.Hf(constraints, state) ≈ H + ## Pure equality constraints on variables + xbar = fill(0.2, length(x)) + bounds = Optim.ConstraintBounds(xbar, xbar, [], []) + bstate = Optim.BarrierStateVars(bounds) + rand!(bstate.λxE) + bgrad = similar(bstate) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d0, bounds, x, c, J, bstate, μ) + @test f_x == 0 + @test L ≈ dot(bstate.λxE, xbar-x) + @test gx == -bstate.λxE + @test bgrad.λxE == xbar-x + check_autodiff(d0, bounds, x, cfun, bstate, μ) + constraints = TwiceDifferentiableConstraintsFunction( + (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds) + state = Optim.initial_state(method, options, d0, constraints, x) + copy!(state.bstate.λxE, bstate.λxE) + setstate!(state, μ, d0, constraints, method) + @test Optim.gf(bounds, state) ≈ [gx; xbar-x] + n = length(x) + @test Optim.Hf(constraints, state) ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)] + # Now again using the generic machinery + bounds = Optim.ConstraintBounds([], [], xbar, xbar) + constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds) + state = Optim.initial_state(method, options, d0, constraints, x) + copy!(state.bstate.λcE, bstate.λxE) + setstate!(state, μ, d0, constraints, method) + @test Optim.gf(bounds, state) ≈ [gx; xbar-x] + n = length(x) + @test Optim.Hf(constraints, state) ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)] + ## Nonnegativity constraints + bounds = Optim.ConstraintBounds(zeros(length(x)), fill(Inf,length(x)), [], []) + y = rand(length(x)) + bstate = Optim.BarrierStateVars(bounds, y) + rand!(bstate.λx) + bgrad = similar(bstate) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d0, bounds, y, Float64[], Array{Float64}(0,0), bstate, μ) + @test f_x == 0 + @test L ≈ -μ*sum(log, y) + @test bgrad.slack_x == -μ./y + bstate.λx + @test gx == -bstate.λx + check_autodiff(d0, bounds, y, cfun, bstate, μ) + constraints = TwiceDifferentiableConstraintsFunction( + (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds) + state = Optim.initial_state(method, options, d0, constraints, y) + setstate!(state, μ, d0, constraints, method) + @test Optim.gf(bounds, state) ≈ -μ./y + @test Optim.Hf(constraints, state) ≈ μ*Diagonal(1./y.^2) + # Now again using the generic machinery + bounds = Optim.ConstraintBounds([], [], zeros(length(x)), fill(Inf,length(x))) + constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds) + state = Optim.initial_state(method, options, d0, constraints, y) + setstate!(state, μ, d0, constraints, method) + @test Optim.gf(bounds, state) ≈ -μ./y + @test Optim.Hf(constraints, state) ≈ μ*Diagonal(1./y.^2) + ## General inequality constraints on variables + lb, ub = rand(length(x))-2, rand(length(x))+1 + bounds = Optim.ConstraintBounds(lb, ub, [], []) + bstate = Optim.BarrierStateVars(bounds, x) + rand!(bstate.slack_x) # intentionally displace from the correct value + rand!(bstate.λx) + bgrad = similar(bstate) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d0, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ) + @test f_x == 0 + s = bounds.σx .* (x[bounds.ineqx] - bounds.bx) + Ltarget = -μ*sum(log, bstate.slack_x) + + dot(bstate.λx, bstate.slack_x - s) + @test L ≈ Ltarget + dx = similar(gx); fill!(dx, 0) + for (i,j) in enumerate(bounds.ineqx) + dx[j] -= bounds.σx[i]*bstate.λx[i] + end + @test gx ≈ dx + @test bgrad.slack_x == -μ./bstate.slack_x + bstate.λx + check_autodiff(d0, bounds, x, cfun, bstate, μ) + constraints = TwiceDifferentiableConstraintsFunction( + (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds) + state = Optim.initial_state(method, options, d0, constraints, x) + copy!(state.bstate.slack_x, bstate.slack_x) + copy!(state.bstate.λx, bstate.λx) + setstate!(state, μ, d0, constraints, method) + gxs, hxs = zeros(length(x)), zeros(length(x)) + s, λ = state.bstate.slack_x, state.bstate.λx + for (i,j) in enumerate(bounds.ineqx) + # # Primal + # gxs[j] += -2*μ*bounds.σx[i]/s[i] + μ*(x[j]-bounds.bx[i])/s[i]^2 + # hxs[j] += μ/s[i]^2 + # Primal-dual + gstmp, gλtmp = -μ/s[i] + λ[i], s[i] - bounds.σx[i]*(x[j]-bounds.bx[i]) + htmp = λ[i]/s[i] + hxs[j] += htmp + gxs[j] += bounds.σx[i]*(gstmp - λ[i]) - bounds.σx[i]*htmp*gλtmp + end + @test Optim.gf(bounds, state) ≈ gxs + @test Optim.Hf(constraints, state) ≈ Diagonal(hxs) + # Now again using the generic machinery + bounds = Optim.ConstraintBounds([], [], lb, ub) + constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds) + state = Optim.initial_state(method, options, d0, constraints, x) + copy!(state.bstate.slack_c, bstate.slack_x) + copy!(state.bstate.λc, bstate.λx) + setstate!(state, μ, d0, constraints, method) + @test Optim.gf(bounds, state) ≈ gxs + @test Optim.Hf(constraints, state) ≈ Diagonal(hxs) + ## Nonlinear equality constraints + cfun = x->[x[1]^2+x[2]^2, x[2]*x[3]^2] + cfun! = (x, c) -> copy!(c, cfun(x)) + cJ! = (x, J) -> copy!(J, [2*x[1] 2*x[2] 0; + 0 x[3]^2 2*x[2]*x[3]]) + ch! = function(x, λ, h) + h[1,1] += 2*λ[1] + h[2,2] += 2*λ[1] + h[3,3] += 2*λ[2]*x[2] + end + c = cfun(x) + J = ForwardDiff.jacobian(cfun, x) + Jtmp = similar(J); @test cJ!(x, Jtmp) ≈ J # just to check we did it right + cbar = rand(length(c)) + bounds = Optim.ConstraintBounds([], [], cbar, cbar) + bstate = Optim.BarrierStateVars(bounds, x, c) + rand!(bstate.λcE) + bgrad = similar(bstate) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d0, bounds, x, c, J, bstate, μ) + @test f_x == 0 + @test L ≈ dot(bstate.λcE, cbar-c) + @test gx ≈ -J'*bstate.λcE + @test bgrad.λcE == cbar-c + check_autodiff(d0, bounds, x, cfun, bstate, μ) + constraints = TwiceDifferentiableConstraintsFunction(cfun!, cJ!, ch!, bounds) + state = Optim.initial_state(method, options, d0, constraints, x) + copy!(state.bstate.λcE, bstate.λcE) + setstate!(state, μ, d0, constraints, method) + heq = zeros(length(x), length(x)) + ch!(x, bstate.λcE, heq) + @test Optim.gf(bounds, state) ≈ [gx; cbar-c] + @test Optim.Hf(constraints, state) ≈ [full(cholfact(Positive, heq)) -J'; + -J zeros(size(J,1), size(J,1))] + ## Nonlinear inequality constraints + bounds = Optim.ConstraintBounds([], [], -rand(length(c))-1, rand(length(c))+2) + bstate = Optim.BarrierStateVars(bounds, x, c) + rand!(bstate.slack_c) # intentionally displace from the correct value + rand!(bstate.λc) + bgrad = similar(bstate) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d0, bounds, x, c, J, bstate, μ) + @test f_x == 0 + Ltarget = -μ*sum(log, bstate.slack_c) + + dot(bstate.λc, bstate.slack_c - bounds.σc.*(c[bounds.ineqc]-bounds.bc)) + @test L ≈ Ltarget + @test gx ≈ -J[bounds.ineqc,:]'*(bstate.λc.*bounds.σc) + @test bgrad.slack_c == -μ./bstate.slack_c + bstate.λc + @test bgrad.λc == bstate.slack_c - bounds.σc .* (c[bounds.ineqc] - bounds.bc) + check_autodiff(d0, bounds, x, cfun, bstate, μ) + constraints = TwiceDifferentiableConstraintsFunction(cfun!, cJ!, ch!, bounds) + state = Optim.initial_state(method, options, d0, constraints, x) + copy!(state.bstate.slack_c, bstate.slack_c) + copy!(state.bstate.λc, bstate.λc) + setstate!(state, μ, d0, constraints, method) + hineq = zeros(length(x), length(x)) + λ = zeros(size(J, 1)) + for (i,j) in enumerate(bounds.ineqc) + λ[j] += bstate.λc[i]*bounds.σc[i] + end + ch!(x, λ, hineq) + JI = J[bounds.ineqc,:] + # # Primal + # hxx = μ*JI'*Diagonal(1./bstate.slack_c.^2)*JI - hineq + # gf = -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - μ(bgrad.λc ./ bstate.slack_c.^2)) + # Primal-dual +# hxx = full(cholfact(Positive, -hineq)) + JI'*Diagonal(bstate.λc./bstate.slack_c)*JI + hxx = -hineq + JI'*Diagonal(bstate.λc./bstate.slack_c)*JI + gf = -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - (bgrad.λc .* bstate.λc ./ bstate.slack_c)) + @test Optim.gf(bounds, state) ≈ gf + @test Optim.Hf(constraints, state) ≈ full(cholfact(Positive, hxx, Val{true})) + end + + @testset "IPNewton initialization" begin + method = IPNewton() + options = OptimizationOptions() + x = [1.0,0.1,0.3,0.4] + ## A linear objective function (hessian is zero) + f_g = [1.0,2.0,3.0,4.0] + d = TwiceDifferentiableFunction(x->dot(x, f_g), (x,g)->copy!(g, f_g), (x,h)->fill!(h, 0)) + # Variable bounds + constraints = TwiceDifferentiableConstraintsFunction([0.5, 0.0, -Inf, -Inf], [Inf, Inf, 1.0, 0.8]) + state = Optim.initial_state(method, options, d, constraints, x) + Optim.update_fg!(d, constraints, state, method) + @test norm(f_g - state.g) ≈ 0.01*norm(f_g) + # Nonlinear inequalities + constraints = TwiceDifferentiableConstraintsFunction( + (x,c)->(c[1]=x[1]*x[2]; c[2]=3*x[3]+x[4]^2), + (x,J)->(J[:,:] = [x[2] x[1] 0 0; 0 0 3 2*x[4]]), + (x,λ,h)->(h[4,4] += λ[2]*2), + [], [], [0.05, 0.4], [0.15, 4.4]) + @test isinterior(constraints, x) + state = Optim.initial_state(method, options, d, constraints, x) + Optim.update_fg!(d, constraints, state, method) + @test norm(f_g - state.g) ≈ 0.01*norm(f_g) + # Mixed equalities and inequalities + constraints = TwiceDifferentiableConstraintsFunction( + (x,c)->(c[1]=x[1]*x[2]; c[2]=3*x[3]+x[4]^2), + (x,J)->(J[:,:] = [x[2] x[1] 0 0; 0 0 3 2*x[4]]), + (x,λ,h)->(h[4,4] += λ[2]*2), + [], [], [0.1, 0.4], [0.1, 4.4]) + @test isfeasible(constraints, x) + state = Optim.initial_state(method, options, d, constraints, x) + Optim.update_fg!(d, constraints, state, method) + J = zeros(2,4) + constraints.jacobian!(x, J) + eqnormal = vec(J[1,:]); eqnormal = eqnormal/norm(eqnormal) + @test abs(dot(state.g, eqnormal)) < 1e-12 # orthogonal to equality constraint + Pfg = f_g - dot(f_g, eqnormal)*eqnormal + Pg = state.g - dot(state.g, eqnormal)*eqnormal + @test norm(Pfg - Pg) ≈ 0.01*norm(Pfg) + ## An objective function with a nonzero hessian + hd = [1.0, 100.0, 0.01, 2.0] # diagonal terms of hessian + d = TwiceDifferentiableFunction(x->sum(hd.*x.^2)/2, (x,g)->copy!(g, hd.*x), (x,h)->copy!(h, Diagonal(hd))) + gx = d.g!(x, zeros(4)) + hx = Diagonal(hd) + # Variable bounds + constraints = TwiceDifferentiableConstraintsFunction([0.5, 0.0, -Inf, -Inf], [Inf, Inf, 1.0, 0.8]) + state = Optim.initial_state(method, options, d, constraints, x) + Optim.update_fg!(d, constraints, state, method) + @test abs(dot(gx, state.g)/dot(gx,gx) - 1) <= 0.011 + Optim.update_h!(d, constraints, state, method) + @test abs(dot(gx, state.H*gx)/dot(gx, hx*gx) - 1) <= 0.011 + # Nonlinear inequalities + constraints = TwiceDifferentiableConstraintsFunction( + (x,c)->(c[1]=x[1]*x[2]; c[2]=3*x[3]+x[4]^2), + (x,J)->(J[:,:] = [x[2] x[1] 0 0; 0 0 3 2*x[4]]), + (x,λ,h)->(h[4,4] += λ[2]*2), + [], [], [0.05, 0.4], [0.15, 4.4]) + @test isinterior(constraints, x) + state = Optim.initial_state(method, options, d, constraints, x) + Optim.update_fg!(d, constraints, state, method) + @test abs(dot(gx, state.g)/dot(gx,gx) - 1) <= 0.011 + Optim.update_h!(d, constraints, state, method) + @test abs(dot(gx, state.H*gx)/dot(gx, hx*gx) - 1) <= 0.011 + # Mixed equalities and inequalities + constraints = TwiceDifferentiableConstraintsFunction( + (x,c)->(c[1]=x[1]*x[2]; c[2]=3*x[3]+x[4]^2), + (x,J)->(J[:,:] = [x[2] x[1] 0 0; 0 0 3 2*x[4]]), + (x,λ,h)->(h[4,4] += λ[2]*2), + [], [], [0.1, 0.4], [0.1, 4.4]) + @test isfeasible(constraints, x) + state = Optim.initial_state(method, options, d, constraints, x) + Optim.update_fg!(d, constraints, state, method) + J = zeros(2,4) + constraints.jacobian!(x, J) + eqnormal = vec(J[1,:]); eqnormal = eqnormal/norm(eqnormal) + @test abs(dot(state.g, eqnormal)) < 1e-12 # orthogonal to equality constraint + Pgx = gx - dot(gx, eqnormal)*eqnormal + @test abs(dot(Pgx, state.g)/dot(Pgx,Pgx) - 1) <= 0.011 + Optim.update_h!(d, constraints, state, method) + @test abs(dot(Pgx, state.H*Pgx)/dot(Pgx, hx*Pgx) - 1) <= 0.011 + end + + @testset "IPNewton step" begin + function autoqp(d, constraints, state) + # Note that state must be fully up-to-date, and you must + # have also called Optim.solve_step! + p = Optim.pack_vec(state.x, state.bstate) + chunksize = 1 #min(8, length(p)) + TD = ForwardDiff.Dual{chunksize,eltype(p)} + TD2 = ForwardDiff.Dual{chunksize,ForwardDiff.Dual{chunksize,eltype(p)}} + stated = convert(Optim.IPNewtonState{TD,1}, state) + stated2 = convert(Optim.IPNewtonState{TD2,1}, state) + ϕd = αs->Optim.lagrangian_linefunc(αs, d, constraints, stated) + ϕd2 = αs->Optim.lagrangian_linefunc(αs, d, constraints, stated2) +# ForwardDiff.gradient(ϕd, zeros(4)), ForwardDiff.hessian(ϕd2, zeros(4)) + ForwardDiff.gradient(ϕd, [0.0]), ForwardDiff.hessian(ϕd2, [0.0]) + end + F = 1000 + d = TwiceDifferentiableFunction(x->F*x[1], (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0)) + method = Optim.IPNewton() + μ = 1e-20 + options = OptimizationOptions(μ0=μ) + x0 = μ/F*10 # minimum is at μ/F + # Nonnegativity (the case that doesn't require slack variables) + constraints = TwiceDifferentiableConstraintsFunction([0.0], []) + state = Optim.initial_state(method, options, d, constraints, [x0]) + qp = Optim.solve_step!(state, constraints, options) + @test state.s[1] ≈ -(F-μ/x0)/(state.bstate.λx[1]/x0) + g0, H0 = autoqp(d, constraints, state) + @test qp[1] ≈ F*x0-μ*log(x0) + @test [qp[2]] ≈ g0 #-(F-μ/x0)^2*x0^2/μ + @test [qp[3]] ≈ H0 # μ/x0^2*(x0 - F*x0^2/μ)^2 + bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds + αmax = Optim.estimate_maxstep(Inf, state.x[bounds.ineqx].*bounds.σx, + state.s[bounds.ineqx].*bounds.σx) + ϕ = Optim.linesearch_anon(d, constraints, state, method) + val0 = ϕ(0.0) + val0 = isa(val0, Tuple) ? val0[1] : val0 + @test val0 ≈ qp[1] + α, nf, ng = method.linesearch!(ϕ, 1.0, αmax, qp) + @test α > 1e-3 + end + + @testset "Slack" begin + σswap(σ, a, b) = σ == 1 ? (a, b) : (b, a) + # Test that we achieve a high-precision minimum for fixed + # μ. For anything other than nonnegativity/nonpositivity + # constraints, this tests whether the slack variables are + # solving the problem they were designed to address (the + # possibility that adjacent floating-point numbers are too + # widely spaced to accurately satisfy the KKT equations near a + # boundary). + F0 = 1000 + method = Optim.IPNewton() + μ = 1e-20 # smaller than eps(1.0) + options = OptimizationOptions(μ0=μ) + for σ in (1, -1) + F = σ*F0 + # Nonnegativity/nonpositivity (the case that doesn't require slack variables) + d = TwiceDifferentiableFunction(x->F*x[1], (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0)) + constraints = TwiceDifferentiableConstraintsFunction(σswap(σ, [0.0], [])...) + state = Optim.initial_state(method, options, d, constraints, [μ/F*10]) + for i = 1:10 + Optim.update_state!(d, constraints, state, method, options) + state.μ = μ + Optim.update_fg!(d, constraints, state, method) + Optim.update_h!(d, constraints, state, method) + end + @test isapprox(state.x[1], μ/F, rtol=1e-4) + # |x| ≥ 1, and check that we get slack precision better than eps(1.0) + d = TwiceDifferentiableFunction(x->F*(x[1]-σ), (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0)) + constraints = TwiceDifferentiableConstraintsFunction(σswap(σ, [Float64(σ)], [])...) + state = Optim.initial_state(method, options, d, constraints, [(1+eps(1.0))*σ]) + for i = 1:10 + Optim.update_state!(d, constraints, state, method, options) + state.μ = μ + Optim.update_fg!(d, constraints, state, method) + Optim.update_h!(d, constraints, state, method) + end + @test state.x[1] == σ + @test state.bstate.slack_x[1] < eps(float(σ)) + # |x| >= 1 using the linear/nonlinear constraints + d = TwiceDifferentiableFunction(x->F*(x[1]-σ), (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0)) + constraints = TwiceDifferentiableConstraintsFunction( + (x,c)->(c[1] = x[1]), + (x,J)->(J[1,1] = 1.0), + (x,λ,h)->nothing, + [], [], σswap(σ, [Float64(σ)], [])...) + state = Optim.initial_state(method, options, d, constraints, [(1+eps(1.0))*σ]) + for i = 1:10 + Optim.update_state!(d, constraints, state, method, options) + Optim.update_fg!(d, constraints, state, method) + Optim.update_h!(d, constraints, state, method) + end + @test state.x[1] ≈ σ + @test state.bstate.slack_c[1] < eps(float(σ)) + end + end +end + +nothing diff --git a/test/runtests.jl b/test/runtests.jl index 7f11d94a1..973122069 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -27,6 +27,7 @@ my_tests = [ "brent.jl", "type_stability.jl", "array.jl", + "constraints.jl", "constrained.jl", "callbacks.jl", "precon.jl",