From 034f2cf5d4faaebcb86d0b38cb00b6beb2b69c14 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Fri, 4 Nov 2016 05:14:35 -0500 Subject: [PATCH 01/40] Add constraints and parsing tests --- src/Optim.jl | 2 + src/types.jl | 177 ++++++++++++++++++++++++++++++++++++++++++++ test/constraints.jl | 33 +++++++++ 3 files changed, 212 insertions(+) create mode 100644 test/constraints.jl diff --git a/src/Optim.jl b/src/Optim.jl index 4a4272e0c..c66a68e3e 100644 --- a/src/Optim.jl +++ b/src/Optim.jl @@ -19,6 +19,8 @@ module Optim export optimize, DifferentiableFunction, TwiceDifferentiableFunction, + DifferentiableConstraintsFunction, + TwiceDifferentiableConstraintsFunction, OptimizationOptions, OptimizationState, OptimizationTrace, diff --git a/src/types.jl b/src/types.jl index ec072fa94..b940763c5 100644 --- a/src/types.jl +++ b/src/types.jl @@ -228,3 +228,180 @@ function TwiceDifferentiableFunction(f::Function, end return TwiceDifferentiableFunction(f, g!, fg!, h!) end + +### Constraints +# +# Constraints are specified by the user as +# lx_i ≤ x[i] ≤ ux_i # variable (box) constraints +# lc_i ≤ c(x)[i] ≤ uc_i # linear/nonlinear constraints +# and become equality constraints with l_i = u_i. ±∞ are allowed for l +# and u, in which case the relevant side(s) are unbounded. +# +# The user supplies functions to calculate c(x) and its derivatives. +# +# Of course we could unify the box-constraints into the +# linear/nonlinear constraints, but that would force the user to +# provide the variable-derivatives manually, which would be silly. +# +# This parametrization of the constraints gets "parsed" into a form +# that speeds and simplifies the algorithm, at the cost of many +# additional variables. See `parse_constraints` for details. + +immutable ConstraintBounds{T} + # Box-constraints on variables (i.e., directly on x) + eqx::Vector{Int} # index-vector of equality-constrained x (not actually variable...) + valx::Vector{T} # value of equality-constrained x + ineqx::Vector{Int} # index-vector of other inequality-constrained variables + σx::Vector{Int8} # ±1, in constraints σ(v-b) ≥ 0 (sign depends on whether v>b or v eq, val, ineq, σ, b, [iz, σz, bz] + +From user-supplied constraints of the form + + l_i ≤ v_i ≤ u_i + +(which include both inequality and equality constraints, the latter +when `l_i == u_i`), convert into the following representation: + + - `eq`, a vector of the indices for which `l[eq] == u[eq]` + - `val = l[eq] = u[eq]` + - `ineq`, `σ`, and `b` such that the inequality constraints can be written as + σ[k]*(v[ineq[k]] - b[k]) ≥ 0 + where `σ[k] = ±1`. + - optionally (with `split_signed=true`), return an index-vector + `iz` of entries where one of `l`, `u` is zero, along with + whether the constraint is `≥ 0` (σz=+1) or `≤ 0` (σz=-1). Such + are removed from `ineq`, `σ`, and `b`. For coordinate variables + this can be used to reduce the number of slack variables needed, + since when one of the bounds is 0, the variable itself *is* a + slack variable. + +Note that since the same `v_i` might have both lower and upper bounds, +`ineq` might have the same index twice (once with `σ`=-1 and once with `σ`=1). + +Supplying `±Inf` for elements of `l` and/or `u` implies that `v_i` is +unbounded in the corresponding direction. In such cases there is no +corresponding entry in `ineq`/`σ`/`b`. + +T is the element-type of the non-Int outputs +""" +function parse_constraints{T}(::Type{T}, l, u, split_signed::Bool=false) + size(l) == size(u) || throw(DimensionMismatch("l and u must be the same size, got $(size(l)) and $(size(u))")) + eq, ineq, iz = Int[], Int[], Int[] + val, b = T[], T[] + σ, σz = Array{Int8}(0), Array{Int8}(0) + for i = 1:length(l) + li, ui = l[i], u[i] + li <= ui || throw(ArgumentError("l must be smaller than u, got $li, $ui")) + if li == ui + push!(eq, i) + push!(val, ui) + else + if isfinite(li) + if split_signed && li == 0 + push!(iz, i) + push!(σz, 1) + else + push!(ineq, i) + push!(σ, 1) + push!(b, li) + end + end + ui = u[i] + if isfinite(ui) + if split_signed && ui == 0 + push!(iz, i) + push!(σz, -1) + else + push!(ineq, i) + push!(σ, -1) + push!(b, ui) + end + end + end + end + if split_signed + return eq, val, ineq, σ, b, iz, σz, zeros(T, length(iz)) + end + eq, val, ineq, σ, b +end diff --git a/test/constraints.jl b/test/constraints.jl new file mode 100644 index 000000000..3f503364f --- /dev/null +++ b/test/constraints.jl @@ -0,0 +1,33 @@ +using Optim, Base.Test + +b = @inferred(Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 3.8], [5.0, 4.0])) +@test b.eqx == [3] +@test b.valx == [2.0] +@test b.ineqx == [1,2,2] +@test b.σx == [-1,1,-1] +@test b.bx == [1.0,0.5,1.0] +@test b.iz == [1] +@test b.σz == [1] +@test b.eqc == [1] +@test b.valc == [5] +@test b.ineqc == [2,2] +@test b.σc == [1,-1] +@test b.bc == [3.8,4.0] + +b = @inferred(Optim.ConstraintBounds(Float64[], Float64[], [5.0, 3.8], [5.0, 4.0])) +for fn in (:eqx, :valx, :ineqx, :σx, :bx, :iz, :σz) + @test isempty(getfield(b, fn)) +end +@test b.eqc == [1] +@test b.valc == [5] +@test b.ineqc == [2,2] +@test b.σc == [1,-1] +@test b.bc == [3.8,4.0] + +ba = Optim.ConstraintBounds([], [], [5.0, 3.8], [5.0, 4.0]) +@test eltype(ba) == Float64 + +@test_throws ArgumentError Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 4.8], [5.0, 4.0]) +@test_throws DimensionMismatch Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0], [5.0, 4.8], [5.0, 4.0]) + +nothing From 0c7a70f54b7dfe9c379e8321cdebbd4a8dc8b007 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sat, 5 Nov 2016 04:32:01 -0500 Subject: [PATCH 02/40] Add pretty-printing of ConstraintBounds --- src/types.jl | 47 +++++++++++++++++++++++++++++++ test/constraints.jl | 67 +++++++++++++++++++++++++++------------------ 2 files changed, 88 insertions(+), 26 deletions(-) diff --git a/src/types.jl b/src/types.jl index b940763c5..06b90463d 100644 --- a/src/types.jl +++ b/src/types.jl @@ -275,6 +275,19 @@ end Base.eltype{T}(::Type{ConstraintBounds{T}}) = T Base.eltype(cb::ConstraintBounds) = eltype(typeof(cb)) +function Base.show(io::IO, cb::ConstraintBounds) + indent = " " + print(io, "ConstraintBounds:") + print(io, "\n Variables:") + showeq(io, indent, cb.eqx, cb.valx, 'x', :bracket) + showineq(io, indent, cb.ineqx, cb.σx, cb.bx, 'x', :bracket) + showineq(io, indent, cb.iz, cb.σz, cb.bz, 'x', :bracket) + print(io, "\n Linear/nonlinear constraints:") + showeq(io, indent, cb.eqc, cb.valc, 'c', :subscript) + showineq(io, indent, cb.ineqc, cb.σc, cb.bc, 'c', :subscript) + nothing +end + abstract AbstractConstraintsFunction immutable DifferentiableConstraintsFunction{F,J,T} <: AbstractConstraintsFunction @@ -405,3 +418,37 @@ function parse_constraints{T}(::Type{T}, l, u, split_signed::Bool=false) end eq, val, ineq, σ, b end + +### Compact printing of constraints + +immutable UnquotedString + str::AbstractString +end +Base.show(io::IO, uqstr::UnquotedString) = print(io, uqstr.str) + +Base.array_eltype_show_how(a::Vector{UnquotedString}) = false, "" + +function showeq(io, indent, eq, val, chr, style) + if !isempty(eq) + print(io, '\n', indent) + if style == :bracket + eqstrs = map((i,v) -> UnquotedString("$chr[$i]=$v"), eq, val) + else + eqstrs = map((i,v) -> UnquotedString("$(chr)_$i=$v"), eq, val) + end + Base.show_vector(IOContext(io, limit=true), eqstrs, "", "") + end +end + +function showineq(io, indent, ineqs, σs, bs, chr, style) + if !isempty(ineqs) + print(io, '\n', indent) + if style == :bracket + ineqstrs = map((i,σ,b) -> UnquotedString(string("$chr[$i]", ineqstr(σ,b))), ineqs, σs, bs) + else + ineqstrs = map((i,σ,b) -> UnquotedString(string("$(chr)_$i", ineqstr(σ,b))), ineqs, σs, bs) + end + Base.show_vector(IOContext(io, limit=true), ineqstrs, "", "") + end +end +ineqstr(σ,b) = σ>0 ? "≥$b" : "≤$b" diff --git a/test/constraints.jl b/test/constraints.jl index 3f503364f..642c5ae46 100644 --- a/test/constraints.jl +++ b/test/constraints.jl @@ -1,33 +1,48 @@ using Optim, Base.Test -b = @inferred(Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 3.8], [5.0, 4.0])) -@test b.eqx == [3] -@test b.valx == [2.0] -@test b.ineqx == [1,2,2] -@test b.σx == [-1,1,-1] -@test b.bx == [1.0,0.5,1.0] -@test b.iz == [1] -@test b.σz == [1] -@test b.eqc == [1] -@test b.valc == [5] -@test b.ineqc == [2,2] -@test b.σc == [1,-1] -@test b.bc == [3.8,4.0] +@testset "Constraints" begin + @testset "Bounds parsing" begin + b = @inferred(Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 3.8], [5.0, 4.0])) + @test b.eqx == [3] + @test b.valx == [2.0] + @test b.ineqx == [1,2,2] + @test b.σx == [-1,1,-1] + @test b.bx == [1.0,0.5,1.0] + @test b.iz == [1] + @test b.σz == [1] + @test b.eqc == [1] + @test b.valc == [5] + @test b.ineqc == [2,2] + @test b.σc == [1,-1] + @test b.bc == [3.8,4.0] + io = IOBuffer() + show(io, b) + @test takebuf_string(io) == """ +ConstraintBounds: + Variables: + x[3]=2.0 + x[1]≤1.0,x[2]≥0.5,x[2]≤1.0 + x[1]≥0.0 + Linear/nonlinear constraints: + c_1=5.0 + c_2≥3.8,c_2≤4.0""" -b = @inferred(Optim.ConstraintBounds(Float64[], Float64[], [5.0, 3.8], [5.0, 4.0])) -for fn in (:eqx, :valx, :ineqx, :σx, :bx, :iz, :σz) - @test isempty(getfield(b, fn)) -end -@test b.eqc == [1] -@test b.valc == [5] -@test b.ineqc == [2,2] -@test b.σc == [1,-1] -@test b.bc == [3.8,4.0] + b = @inferred(Optim.ConstraintBounds(Float64[], Float64[], [5.0, 3.8], [5.0, 4.0])) + for fn in (:eqx, :valx, :ineqx, :σx, :bx, :iz, :σz) + @test isempty(getfield(b, fn)) + end + @test b.eqc == [1] + @test b.valc == [5] + @test b.ineqc == [2,2] + @test b.σc == [1,-1] + @test b.bc == [3.8,4.0] -ba = Optim.ConstraintBounds([], [], [5.0, 3.8], [5.0, 4.0]) -@test eltype(ba) == Float64 + ba = Optim.ConstraintBounds([], [], [5.0, 3.8], [5.0, 4.0]) + @test eltype(ba) == Float64 -@test_throws ArgumentError Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 4.8], [5.0, 4.0]) -@test_throws DimensionMismatch Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0], [5.0, 4.8], [5.0, 4.0]) + @test_throws ArgumentError Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 4.8], [5.0, 4.0]) + @test_throws DimensionMismatch Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0], [5.0, 4.8], [5.0, 4.0]) + end +end nothing From 8b1e03bda0ada1a8d58a377bb82d5d8efd79ef2c Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sat, 5 Nov 2016 04:33:31 -0500 Subject: [PATCH 03/40] Implement barrier function and equality-constraints Computes and tests the gradient, too. The Hessian will come later. --- src/Optim.jl | 1 + src/interior.jl | 345 ++++++++++++++++++++++++++++++++++++++++++++ test/constraints.jl | 107 ++++++++++++++ 3 files changed, 453 insertions(+) create mode 100644 src/interior.jl diff --git a/src/Optim.jl b/src/Optim.jl index c66a68e3e..e96940c8f 100644 --- a/src/Optim.jl +++ b/src/Optim.jl @@ -78,6 +78,7 @@ module Optim # Constrained optimization include("fminbox.jl") + include("interior.jl") # trust region methods include("levenberg_marquardt.jl") diff --git a/src/interior.jl b/src/interior.jl new file mode 100644 index 000000000..301bb9a98 --- /dev/null +++ b/src/interior.jl @@ -0,0 +1,345 @@ +abstract AbstractBarrierState + +# These are used not only for the current state, but also for the step and the gradient +immutable BarrierStateVars{T} + slack_x::Vector{T} # values of slack variables for x + slack_c::Vector{T} # values of slack variables for c + λxE::Vector{T} # λ for equality constraints on x + λx::Vector{T} # λ for equality constraints on slack_x + λc::Vector{T} # λ for equality constraints on slack_c + λcE::Vector{T} # λ for linear/nonlinear equality constraints +end +# Note on λxE: +# We could just set equality-constrained variables to their +# constraint values at the beginning of optimization, but this +# might make the initial guess infeasible in terms of its +# inequality constraints. This would be a much bigger problem than +# not matching the equality constraints. So we allow them to +# differ, and require that the algorithm can cope with it. + +function (::Type{BarrierStateVars{T}}){T}(bounds::ConstraintBounds) + slack_x = Array{T}(length(bounds.ineqx)) + slack_c = Array{T}(length(bounds.ineqc)) + λxE = Array{T}(length(bounds.eqx)) + λx = similar(slack_x) + λc = similar(slack_c) + λcE = Array{T}(length(bounds.eqc)) + sv = BarrierStateVars{T}(slack_x, slack_c, λxE, λx, λc, λcE) +end +BarrierStateVars{T}(bounds::ConstraintBounds{T}) = BarrierStateVars{T}(bounds) + +function BarrierStateVars{T}(bounds::ConstraintBounds{T}, x) + sv = BarrierStateVars(bounds) + setslack!(sv.slack_x, x, bounds.ineqx, bounds.σx, bounds.bx) + sv +end +function BarrierStateVars{T}(bounds::ConstraintBounds{T}, x, c) + sv = BarrierStateVars(bounds) + setslack!(sv.slack_x, x, bounds.ineqx, bounds.σx, bounds.bx) + setslack!(sv.slack_c, c, bounds.ineqc, bounds.σc, bounds.bc) + sv +end +function setslack!(slack, v, ineq, σ, b) + for i = 1:length(ineq) + slack[i] = σ[i]*(v[ineq[i]]-b[i]) + end + slack +end + +Base.similar(bstate::BarrierStateVars) = + BarrierStateVars(similar(bstate.slack_x), + similar(bstate.slack_c), + similar(bstate.λxE), + similar(bstate.λx), + similar(bstate.λc), + similar(bstate.λcE)) + +function Base.fill!(b::BarrierStateVars, val) + fill!(b.slack_x, val) + fill!(b.slack_c, val) + fill!(b.λxE, val) + fill!(b.λx, val) + fill!(b.λc, val) + fill!(b.λcE, val) + b +end + +Base.eltype{T}(::Type{BarrierStateVars{T}}) = T +Base.eltype(sv::BarrierStateVars) = eltype(typeof(sv)) + +function Base.show(io::IO, b::BarrierStateVars) + print(io, "BarrierStateVars{$(eltype(b))}:") + for fn in fieldnames(b) + print(io, "\n $fn: ") + show(io, getfield(b, fn)) + end +end + + +## Computation of the Lagrangian and its gradient +# This is in a parametrization that is also useful during linesearch + +function lagrangian(d, bounds::ConstraintBounds, x, c, bstate::BarrierStateVars, μ, method) + f_x = d.f(x) + L_xsλ = f_x + barrier_value(bounds, x, bstate, μ) + + equality_violation(bounds, x, c, bstate) + f_x, L_xsλ +end + +function lagrangian_g!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ, method) + fill!(bgrad, 0) + d.g!(x, gx) + barrier_grad!(gx, bgrad, bounds, x, bstate, μ) + equality_grad!(gx, bgrad, bounds, x, c, J, bstate) + nothing +end + +function lagrangian_fg!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ, method) + fill!(bgrad, 0) + f_x = d.fg!(x, gx) + L_xsλ = f_x + barrier_value(bounds, x, bstate, μ) + + equality_violation(bounds, x, c, bstate) + barrier_grad!(gx, bgrad, bounds, x, bstate, μ) + equality_grad!(gx, bgrad, bounds, x, c, J, bstate) + f_x, L_xsλ +end + +## Computation of Lagrangian and derivatives when passing all parameters as a single vector +function lagrangian_vec(p, d, bounds::ConstraintBounds, x, c::AbstractArray, bstate::BarrierStateVars, μ, method) + unpack_vec!(x, bstate, p) + f_x, L_xsλ = lagrangian(d, bounds, x, c, bstate, μ, method) + L_xsλ +end +function lagrangian_vec(p, d, bounds::ConstraintBounds, x, c::Function, bstate::BarrierStateVars, μ, method) + # Use this version when using automatic differentiation + unpack_vec!(x, bstate, p) + f_x, L_xsλ = lagrangian(d, bounds, x, c(x), bstate, μ, method) + L_xsλ +end +function lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ, method) + unpack_vec!(x, bstate, p) + f_x, L_xsλ = lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ, method) + pack_vec!(storage, gx, bgrad) + L_xsλ +end + +## Computation of Lagrangian terms: barrier penalty +""" + barrier_value(constraints, state) -> val + barrier_value(bounds, x, sx, sc, μ) -> val + +Compute the value of the barrier penalty at the current `state`, or at +a position (`x`,`sx`,`sc`), where `x` is the current position, `sx` +are the coordinate slack variables, and `sc` are the linear/nonlinear +slack variables. `bounds` holds the parsed bounds. +""" +function barrier_value(bounds::ConstraintBounds, x, sx, sc, μ) + # bμ is the coefficient of μ in the barrier penalty + bμ = _bv(x, bounds.iz, bounds.σz) + # coords constrained by 0 + _bv(sx) + # coords with other bounds + _bv(sc) # linear/nonlinear constr. + μ*bμ +end +barrier_value(bounds::ConstraintBounds, x, bstate::BarrierStateVars, μ) = + barrier_value(bounds, x, bstate.slack_x, bstate.slack_c, μ) +barrier_value(bounds::ConstraintBounds, state) = + barrier_value(bounds, state.x, state.bstate.slack_x, state.bstate.slack_c, state.μ) +barrier_value(constraints::AbstractConstraintsFunction, state) = + barrier_value(constraints.bounds, state) + +# don't call this barrier_value because it lacks μ +function _bv(v, idx, σ) + ret = loginf(one(eltype(σ))*one(eltype(v))) + for (i,iv) in enumerate(idx) + ret += loginf(σ[i]*v[iv]) + end + -ret +end + +_bv(v) = isempty(v) ? loginf(one(eltype(v))) : -sum(loginf, v) + +loginf(δ) = δ > 0 ? log(δ) : -oftype(δ, Inf) + +""" + barrier_grad!(gx, bgrad, bounds, x, bstate, μ) + barrier_grad!(gx, gsx, gsc, bounds, x, sx, sc, μ) + +Compute the gradient of the barrier penalty at (`x`,`sx`,`sc`), where +`x` is the current position, `sx` are the coordinate slack variables, +and `sc` are the linear/nonlinear slack +variables. `bounds::ConstraintBounds` holds the parsed bounds. + +The result is *added* to `gx`, `gsx`, and `gsc`, so these vectors +need to be initialized appropriately. +""" +function barrier_grad!(gx, gsx, gsc, bounds::ConstraintBounds, x, sx, sc, μ) + barrier_grad!(view(gx, bounds.iz), view(x, bounds.iz), μ) + barrier_grad!(gsx, sx, μ) + barrier_grad!(gsc, sc, μ) + nothing +end +barrier_grad!(gx, bgrad, bounds::ConstraintBounds, x, bstate, μ) = + barrier_grad!(gx, bgrad.slack_x, bgrad.slack_c, bounds, x, bstate.slack_x, bstate.slack_c, μ) + +function barrier_grad!(out, v, μ) + for i = 1:length(out) + out[i] -= μ/v[i] + end + nothing +end + + +## Computation of Lagrangian terms: equality constraints penalty + +""" + equality_violation([f=identity], bounds, x, c, bstate) -> val + equality_violation([f=identity], bounds, x, c, sx, sc, λxE, λx, λc, λcE) -> val + +Compute the sum of `f(v_i)`, where `v_i = λ_i*(target - observed)` +measures the difference between the current state and the +equality-constrained state. `bounds::ConstraintBounds` holds the +parsed bounds. `x` is the current position, `sx` are the coordinate +slack variables, and `sc` are the linear/nonlinear slack +variables. `c` holds the values of the linear-nonlinear constraints, +and the λ arguments hold the Lagrange multipliers for `x`, `sx`, `sc`, and +`c` respectively. +""" +function equality_violation(f, bounds::ConstraintBounds, x, c, sx, sc, λxE, λx, λc, λcE) + ev = equality_violation(f, x, bounds.valx, bounds.eqx, λxE) + + equality_violation(f, sx, x, bounds.ineqx, bounds.σx, bounds.bx, λx) + + equality_violation(f, sc, c, bounds.ineqc, bounds.σc, bounds.bc, λc) + + equality_violation(f, c, bounds.valc, bounds.eqc, λcE) +end +equality_violation(bounds::ConstraintBounds, x, c, sx, sc, λxE, λx, λc, λcE) = + equality_violation(identity, bounds, x, c, sx, sc, λxE, λx, λc, λcE) +function equality_violation(f, bounds::ConstraintBounds, x, c, bstate::BarrierStateVars) + equality_violation(f, bounds, x, c, + bstate.slack_x, bstate.slack_c, bstate.λxE, bstate.λx, bstate.λc, bstate.λcE) +end +equality_violation(bounds::ConstraintBounds, x, c, bstate::BarrierStateVars) = + equality_violation(identity, bounds, x, c, bstate) +equality_violation(f, bounds::ConstraintBounds, state::AbstractBarrierState) = + equality_violation(f, bounds, state.x, state.constr_c, state.bstate) +equality_violation(bounds::ConstraintBounds, state::AbstractBarrierState) = + equality_violation(identity, bounds, state) +equality_violation(f, constraints::AbstractConstraintsFunction, state::AbstractBarrierState) = + equality_violation(f, constraints.bounds, state) +equality_violation(constraints::AbstractConstraintsFunction, state::AbstractBarrierState) = + equality_violation(constraints.bounds, state) + +# violations of s = σ*(v-b) +function equality_violation(f, s, v, ineq, σ, b, λ) + ret = f(zero(eltype(λ))*(zero(eltype(s))-zero(eltype(σ))*(zero(eltype(v))-zero(eltype(b))))) + for (i,iv) in enumerate(ineq) + ret += f(λ[i]*(s[i] - σ[i]*(v[iv]-b[i]))) + end + ret +end + +# violations of v = target +function equality_violation(f, v, target, idx, λ) + ret = f(zero(eltype(λ))*(zero(eltype(v))-zero(eltype(target)))) + for (i,iv) in enumerate(idx) + ret += f(λ[i]*(target[i] - v[iv])) + end + ret +end + +""" + equality_grad!(gx, gbstate, bounds, x, c, J, bstate) + +Compute the gradient of `equality_violation`, storing the result in `gx` (an array) and `gbstate::BarrierStateVars`. +""" +function equality_grad!(gx, gsx, gsc, gλxE, gλx, gλc, gλcE, bounds::ConstraintBounds, x, c, J, sx, sc, λxE, λx, λc, λcE) + gx[bounds.eqx] = gx[bounds.eqx] - λxE + equality_grad_var!(gsx, gx, bounds.ineqx, bounds.σx, λx) + equality_grad_var!(gsc, gx, bounds.ineqc, bounds.σc, λc, J) + equality_grad_var!(gx, bounds.eqc, λcE, J) + equality_grad_λ!(gλxE, x, bounds.valx, bounds.eqx) + equality_grad_λ!(gλx, sx, x, bounds.ineqx, bounds.σx, bounds.bx) + equality_grad_λ!(gλc, sc, c, bounds.ineqc, bounds.σc, bounds.bc) + equality_grad_λ!(gλcE, c, bounds.valc, bounds.eqc) +end +equality_grad!(gx, gb::BarrierStateVars, bounds::ConstraintBounds, x, c, J, b::BarrierStateVars) = + equality_grad!(gx, gb.slack_x, gb.slack_c, gb.λxE, gb.λx, gb.λc, gb.λcE, + bounds, x, c, J, + b.slack_x, b.slack_c, b.λxE, b.λx, b.λc, b.λcE) + +# violations of s = σ*(x-b) +function equality_grad_var!(gs, gx, ineq, σ, λ) + for (i,ix) in enumerate(ineq) + λi = λ[i] + gs[i] += λi + gx[ix] -= λi*σ[i] + end + nothing +end + +function equality_grad_var!(gs, gx, ineq, σ, λ, J) + gs[:] = gs + λ + if !isempty(ineq) + gx[:] = gx - view(J, ineq, :)'*(λ.*σ) + end + nothing +end + +function equality_grad_λ!(gλ, s, v, ineq, σ, b) + for (i,iv) in enumerate(ineq) + gλ[i] += s[i] - σ[i]*(v[iv]-b[i]) + end + nothing +end + +# violations of v = target +function equality_grad_var!(gx, idx, λ, J) + if !isempty(idx) + gx[:] = gx - view(J, idx, :)'*λ + end + nothing +end + +function equality_grad_λ!(gλ, v, target, idx) + for (i,iv) in enumerate(idx) + gλ[i] += target[i] - v[iv] + end + nothing +end + +## Utilities for representing total state as single vector +function pack_vec(x, b::BarrierStateVars) + n = length(x) + for fn in fieldnames(b) + n += length(getfield(b, fn)) + end + vec = Array{eltype(x)}(n) + pack_vec!(vec, x, b) +end + +function pack_vec!(vec, x, b::BarrierStateVars) + k = pack_vec!(vec, x, 0) + for fn in fieldnames(b) + k = pack_vec!(vec, getfield(b, fn), k) + end + k == length(vec) || throw(DimensionMismatch("vec should have length $k, got $(length(vec))")) + vec +end +function pack_vec!(vec, x, k::Int) + for i = 1:length(x) + vec[k+=1] = x[i] + end + k +end +function unpack_vec!(x, b::BarrierStateVars, vec::Vector) + k = unpack_vec!(x, vec, 0) + for fn in fieldnames(b) + k = unpack_vec!(getfield(b, fn), vec, k) + end + k == length(vec) || throw(DimensionMismatch("vec should have length $k, got $(length(vec))")) + x, b +end +function unpack_vec!(x, vec::Vector, k::Int) + for i = 1:length(x) + x[i] = vec[k+=1] + end + k +end diff --git a/test/constraints.jl b/test/constraints.jl index 642c5ae46..c3eab9de3 100644 --- a/test/constraints.jl +++ b/test/constraints.jl @@ -43,6 +43,113 @@ ConstraintBounds: @test_throws ArgumentError Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 4.8], [5.0, 4.0]) @test_throws DimensionMismatch Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0], [5.0, 4.8], [5.0, 4.0]) end + + @testset "Lagrangian val/grad" begin + function check_autodiff(d, bounds, x, cfun::Function, bstate, μ) + c = cfun(x) + J = ForwardDiff.jacobian(cfun, x) + # Using real-valued inputs + p = Optim.pack_vec(x, bstate) + ftot! = (p,storage)->Optim.lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds, x, c, J, bstate, μ, nothing) + pgrad = similar(p) + ftot!(p, pgrad) + # Compute with ForwardDiff + chunksize = min(8, length(p)) + TD = ForwardDiff.Dual{chunksize,eltype(p)} + xd = Array{TD}(length(x)) + bstated = Optim.BarrierStateVars{TD}(bounds) + pcmp = similar(p) + ftot = p->Optim.lagrangian_vec(p, d, bounds, xd, cfun, bstated, μ, nothing) + ForwardDiff.gradient!(pcmp, ftot, p, ForwardDiff.Chunk{chunksize}()) + @test pcmp ≈ pgrad + end + # Basic setup + μ = 0.2345678 + A = randn(3,3); H = A'*A + d = DifferentiableFunction(x->(x'*H*x)[1]/2, (x,storage)->(storage[:] = H*x)) + x = clamp.(randn(3), -0.99, 0.99) + gx = similar(x) + cfun = x->Float64[] + c = Float64[] + J = Array{Float64}(0,0) + ## No constraints + bounds = Optim.ConstraintBounds(Float64[], Float64[], Float64[], Float64[]) + bstate = Optim.BarrierStateVars(bounds, x) + bgrad = similar(bstate) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ, nothing) + @test f_x == L == d.f(x) + @test gx == H*x + ## Pure equality constraints on variables + d = DifferentiableFunction(x->0.0, (x,storage)->fill!(storage, 0)) + xbar = fill(0.2, length(x)) + bounds = Optim.ConstraintBounds(xbar, xbar, [], []) + bstate = Optim.BarrierStateVars(bounds) + rand!(bstate.λxE) + bgrad = similar(bstate) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ, nothing) + @test f_x == 0 + @test L ≈ dot(bstate.λxE, xbar-x) + @test gx == -bstate.λxE + @test bgrad.λxE == xbar-x + check_autodiff(d, bounds, x, cfun, bstate, μ) + ## Nonnegativity constraints + bounds = Optim.ConstraintBounds(zeros(length(x)), fill(Inf,length(x)), [], []) + y = rand(length(x)) + bstate = Optim.BarrierStateVars(bounds, y) + bgrad = similar(bstate) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, y, Float64[], Array{Float64}(0,0), bstate, μ, nothing) + @test f_x == 0 + @test L ≈ -μ*sum(log, y) + @test gx == -μ./y + check_autodiff(d, bounds, y, cfun, bstate, μ) + ## General inequality constraints on variables + bounds = Optim.ConstraintBounds(rand(length(x))-2, rand(length(x))+1, [], []) + bstate = Optim.BarrierStateVars(bounds, x) + rand!(bstate.slack_x) # intentionally displace from the correct value + rand!(bstate.λx) + bgrad = similar(bstate) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ, nothing) + @test f_x == 0 + Ltarget = -μ*sum(log, bstate.slack_x) + + dot(bstate.λx, bstate.slack_x - bounds.σx.*(x[bounds.ineqx]-bounds.bx)) + @test L ≈ Ltarget + dx = similar(gx); fill!(dx, 0) + for (i,j) in enumerate(bounds.ineqx) + dx[j] -= bounds.σx[i]*bstate.λx[i] + end + @test gx ≈ dx + @test bgrad.slack_x == -μ./bstate.slack_x + bstate.λx + check_autodiff(d, bounds, x, cfun, bstate, μ) + ## Nonlinear equality constraints + cfun = x->[x[1]^2+x[2]^2, x[2]*x[3]^2] + c = cfun(x) + J = ForwardDiff.jacobian(cfun, x) + cbar = rand(length(c)) + bounds = Optim.ConstraintBounds([], [], cbar, cbar) + bstate = Optim.BarrierStateVars(bounds, x, c) + rand!(bstate.λcE) + bgrad = similar(bstate) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ, nothing) + @test f_x == 0 + @test L ≈ dot(bstate.λcE, cbar-c) + @test gx ≈ -J'*bstate.λcE + @test bgrad.λcE == cbar-c + check_autodiff(d, bounds, x, cfun, bstate, μ) + ## Nonlinear inequality constraints + bounds = Optim.ConstraintBounds([], [], rand(length(c))-1, rand(length(c))+1) + bstate = Optim.BarrierStateVars(bounds, x, c) + rand!(bstate.slack_c) # intentionally displace from the correct value + rand!(bstate.λc) + bgrad = similar(bstate) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ, nothing) + @test f_x == 0 + Ltarget = -μ*sum(log, bstate.slack_c) + + dot(bstate.λc, bstate.slack_c - bounds.σc.*(c[bounds.ineqc]-bounds.bc)) + @test L ≈ Ltarget + @test gx ≈ -J[bounds.ineqc,:]'*(bstate.λc.*bounds.σc) + @test bgrad.slack_c == -μ./bstate.slack_c + bstate.λc + check_autodiff(d, bounds, x, cfun, bstate, μ) + end end nothing From 1ba0abf80d584d825525aabe2f84e23d20eb4b21 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sat, 5 Nov 2016 05:11:30 -0500 Subject: [PATCH 04/40] Fixes for julia-0.4 --- src/interior.jl | 12 +++++++++--- src/types.jl | 4 ++++ test/constraints.jl | 24 ++++++++++++++++++++---- 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index 301bb9a98..269eac003 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -17,7 +17,7 @@ end # not matching the equality constraints. So we allow them to # differ, and require that the algorithm can cope with it. -function (::Type{BarrierStateVars{T}}){T}(bounds::ConstraintBounds) +@compat function (::Type{BarrierStateVars{T}}){T}(bounds::ConstraintBounds) slack_x = Array{T}(length(bounds.ineqx)) slack_c = Array{T}(length(bounds.ineqc)) λxE = Array{T}(length(bounds.eqx)) @@ -278,7 +278,7 @@ end function equality_grad_var!(gs, gx, ineq, σ, λ, J) gs[:] = gs + λ if !isempty(ineq) - gx[:] = gx - view(J, ineq, :)'*(λ.*σ) + gx[:] = gx - view5(J, ineq, :)'*(λ.*σ) end nothing end @@ -293,7 +293,7 @@ end # violations of v = target function equality_grad_var!(gx, idx, λ, J) if !isempty(idx) - gx[:] = gx - view(J, idx, :)'*λ + gx[:] = gx - view5(J, idx, :)'*λ end nothing end @@ -343,3 +343,9 @@ function unpack_vec!(x, vec::Vector, k::Int) end k end + +if VERSION >= v"0.5.0" + view5(A, i, j) = view(A, i, j) +else + view5(A, i, j) = A[i,j] +end diff --git a/src/types.jl b/src/types.jl index 06b90463d..699fc3492 100644 --- a/src/types.jl +++ b/src/types.jl @@ -428,6 +428,10 @@ Base.show(io::IO, uqstr::UnquotedString) = print(io, uqstr.str) Base.array_eltype_show_how(a::Vector{UnquotedString}) = false, "" +if !isdefined(Base, :IOContext) + IOContext(io; kwargs...) = io +end + function showeq(io, indent, eq, val, chr, style) if !isempty(eq) print(io, '\n', indent) diff --git a/test/constraints.jl b/test/constraints.jl index c3eab9de3..07fb4a91d 100644 --- a/test/constraints.jl +++ b/test/constraints.jl @@ -1,8 +1,24 @@ -using Optim, Base.Test +using Optim +if VERSION >= v"0.5.0-dev+7720" + using Base.Test +else + using BaseTestNext + const Test = BaseTestNext +end + +if VERSION >= v"0.5.0-dev+2396" + macro inferred5(ex) + Expr(:macrocall, Symbol("@inferred"), esc(ex)) + end +else + macro inferred5(ex) + esc(ex) + end +end @testset "Constraints" begin @testset "Bounds parsing" begin - b = @inferred(Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 3.8], [5.0, 4.0])) + b = @inferred5(Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 3.8], [5.0, 4.0])) @test b.eqx == [3] @test b.valx == [2.0] @test b.ineqx == [1,2,2] @@ -27,7 +43,7 @@ ConstraintBounds: c_1=5.0 c_2≥3.8,c_2≤4.0""" - b = @inferred(Optim.ConstraintBounds(Float64[], Float64[], [5.0, 3.8], [5.0, 4.0])) + b = @inferred5(Optim.ConstraintBounds(Float64[], Float64[], [5.0, 3.8], [5.0, 4.0])) for fn in (:eqx, :valx, :ineqx, :σx, :bx, :iz, :σz) @test isempty(getfield(b, fn)) end @@ -67,7 +83,7 @@ ConstraintBounds: μ = 0.2345678 A = randn(3,3); H = A'*A d = DifferentiableFunction(x->(x'*H*x)[1]/2, (x,storage)->(storage[:] = H*x)) - x = clamp.(randn(3), -0.99, 0.99) + x = broadcast(clamp, randn(3), -0.99, 0.99) gx = similar(x) cfun = x->Float64[] c = Float64[] From 7d2ac1eccc7a960075dc588b95a21174cfcde644 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sun, 6 Nov 2016 11:43:00 -0600 Subject: [PATCH 05/40] Add interior point Newton method state, setup of Newton update equation --- src/Optim.jl | 1 + src/deprecate.jl | 6 ++ src/interior.jl | 44 +++++++++++-- src/ipnewton.jl | 151 ++++++++++++++++++++++++++++++++++++++++++++ src/types.jl | 23 +++++-- test/constraints.jl | 123 ++++++++++++++++++++++++++++++++++-- 6 files changed, 330 insertions(+), 18 deletions(-) create mode 100644 src/ipnewton.jl diff --git a/src/Optim.jl b/src/Optim.jl index e96940c8f..c900b5825 100644 --- a/src/Optim.jl +++ b/src/Optim.jl @@ -79,6 +79,7 @@ module Optim # Constrained optimization include("fminbox.jl") include("interior.jl") + include("ipnewton.jl") # trust region methods include("levenberg_marquardt.jl") diff --git a/src/deprecate.jl b/src/deprecate.jl index 17ee6f2be..b76fb4a94 100644 --- a/src/deprecate.jl +++ b/src/deprecate.jl @@ -23,3 +23,9 @@ end @deprecate interpolating_linesearch! LineSearches.strongwolfe! @deprecate backtracking_linesearch! LineSearches.backtracking! @deprecate interpbacktracking_linesearch! LineSearches.interpbacktracking! + +if VERSION >= v"0.5.0" + view5(A, i, j) = view(A, i, j) +else + view5(A, i, j) = A[i,j] +end diff --git a/src/interior.jl b/src/interior.jl index 269eac003..2f339f694 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -75,6 +75,44 @@ function Base.show(io::IO, b::BarrierStateVars) end end +@compat Base.:(==)(v::BarrierStateVars, w::BarrierStateVars) = + v.slack_x == w.slack_x && + v.slack_c == w.slack_c && + v.λxE == w.λxE && + v.λx == w.λx && + v.λc == w.λc && + v.λcE == w.λcE + +const bsv_seed = sizeof(UInt) == 64 ? 0x145b788192d1cde3 : 0x766a2810 +Base.hash(b::BarrierStateVars, u::UInt) = + hash(b.λcE, hash(b.λc, hash(b.λx, hash(b.λxE, hash(b.slack_c, hash(b.slack_x, u+bsv_seed)))))) + + +""" + BarrierLineSearch{T} + +Parameters for interior-point line search methods that use only the value +""" +immutable BarrierLineSearch{T} + c::Vector{T} # value of constraints-functions at trial point + bstate::BarrierStateVars{T} # trial point for slack and λ variables +end + +""" + BarrierLineSearchGrad{T} + +Parameters for interior-point line search methods that exploit the slope. +""" +immutable BarrierLineSearchGrad{T} + c::Vector{T} # value of constraints-functions at trial point + J::Matrix{T} # constraints-Jacobian at trial point + bstate::BarrierStateVars{T} # trial point for slack and λ variables + bgrad::BarrierStateVars{T} # trial point's gradient +end + +# Fallbacks (for methods that don't need these) +after_while!(d, constraints::AbstractConstraintsFunction, state, method, options) = nothing +update_h!(d, constraints::AbstractConstraintsFunction, state, method) = nothing ## Computation of the Lagrangian and its gradient # This is in a parametrization that is also useful during linesearch @@ -343,9 +381,3 @@ function unpack_vec!(x, vec::Vector, k::Int) end k end - -if VERSION >= v"0.5.0" - view5(A, i, j) = view(A, i, j) -else - view5(A, i, j) = A[i,j] -end diff --git a/src/ipnewton.jl b/src/ipnewton.jl new file mode 100644 index 000000000..db501f930 --- /dev/null +++ b/src/ipnewton.jl @@ -0,0 +1,151 @@ +immutable IPNewton <: IPOptimizer + linesearch!::Function +end + +IPNewton(; linesearch!::Function = backtrack_constrained!) = + IPNewton(linesearch!) + +type IPNewtonState{T,N} <: AbstractBarrierState + @add_generic_fields() + x_previous::Array{T,N} + g::Array{T,N} + f_x_previous::T + H::Matrix{T} + Hd::Vector{Int8} + s::Array{T,N} # step for x + # Barrier penalty fields + μ::T # coefficient of the barrier penalty + bstate::BarrierStateVars{T} # value of slack and λ variables (current "position") + bgrad::BarrierStateVars{T} # gradient of slack and λ variables at current "position" + constr_c::Vector{T} # value of the user-supplied constraints at x + constr_J::Matrix{T} # value of the user-supplied Jacobian at x + @add_linesearch_fields() + b_ls::BarrierLineSearch{T} + gf::Vector{T} + Hf::Matrix{T} +end + +function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunction, constraints::TwiceDifferentiableConstraintsFunction, initial_x::Array{T}) + # Check feasibility of the initial state + mc = nconstraints(constraints) + constr_c = Array{T}(mc) + constraints.c!(initial_x, constr_c) +# isfeasible(constraints, initial_x, constr_c) || error("initial guess must be feasible") + + # Allocate fields for the objective function + n = length(initial_x) + g = Array(T, n) + s = Array(T, n) + x_ls, g_ls = Array(T, n), Array(T, n) + f_x_previous, f_x = NaN, d.fg!(initial_x, g) + f_calls, g_calls = 1, 1 + H = Array(T, n, n) + Hd = Array{Int8}(n) + d.h!(initial_x, H) + h_calls = 1 + + # More constraints + constr_J = Array{T}(mc, n) + constr_gtemp = Array{T}(n) + gf = Array{T}(0) # will be replaced + Hf = Array{T}(0,0) # " + constraints.jacobian!(initial_x, constr_J) + μ = T(1) + bstate = BarrierStateVars(constraints.bounds, initial_x, constr_c) + bgrad = similar(bstate) + b_ls = BarrierLineSearch(similar(constr_c), similar(bstate)) + + state = IPNewtonState("Interior-point Newton's Method", + length(initial_x), + copy(initial_x), # Maintain current state in state.x + f_x, # Store current f in state.f_x + f_calls, # Track f calls in state.f_calls + g_calls, # Track g calls in state.g_calls + h_calls, + copy(initial_x), # Maintain current state in state.x_previous + g, # Store current gradient in state.g + T(NaN), # Store previous f in state.f_x_previous + H, + Hd, + similar(initial_x), # Maintain current x-search direction in state.s + μ, + bstate, + bgrad, + constr_c, + constr_J, + @initial_linesearch()..., # Maintain a cache for line search results in state.lsr + b_ls, + gf, + Hf) + # μ = initialize_μ_λ!(λv, λc, constraints, initial_x, g, constr_c, constr_J) + update_g!(d, constraints, state, method) + update_h!(d, constraints, state, method) +end + +function update_g!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton) + lagrangian_g!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ, method) +end + +function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton) + μ, Hxx, J = state.μ, state.H, state.constr_J + d.h!(state.x, Hxx) + # Collect the values of the coefficients of the inequality constraints + bounds = constraints.bounds + ineqc, σc, λc = bounds.ineqc, bounds.σc, state.bstate.λc + m, n = size(J, 1), size(J, 2) + λ = zeros(eltype(bounds), m) + for i = 1:length(ineqc) + λ[ineqc[i]] -= λc[i]*σc[i] + end + # Add the weighted hessian terms from the nonlinear constraints + constraints.h!(state.x, λ, Hxx) + # Add the Jacobian terms + JI = view5(J, ineqc, :) + Sinv2 = Diagonal(1./state.bstate.slack_c.^2) + HJ = JI'*Sinv2*JI + for j = 1:n, i = 1:n + Hxx[i,j] += μ*HJ[i,j] + end + # Add the variable inequalities + iz, x = bounds.iz, state.x + for i in iz + Hxx[i,i] += μ/x[i]^2 + end + ineqx, sx = bounds.ineqx, state.bstate.slack_x + for (i,j) in enumerate(ineqx) + Hxx[j,j] += μ/sx[i]^2 + end + # Perform a positive factorization + Hpc, state.Hd = ldltfact(Positive, Hxx) + Hp = full(Hpc) + # Now add the equality constraint hessian terms + eqc, λcE = bounds.eqc, state.bstate.λcE + fill!(λ, 0) + for i = 1:length(eqc) + λ[eqc[i]] -= λcE[i] + end + constraints.h!(state.x, λ, Hp) + # Also add these to Hxx so we have the true Hessian (the one + # without forcing positive-definiteness) + constraints.h!(state.x, λ, Hxx) + # Form the total Hessian + JEx = zeros(eltype(bounds), length(bounds.eqx), length(state.x)) + for (i,j) in enumerate(bounds.eqx) + JEx[i,j] = 1 + end + JEc = view5(J, eqc, :) + Jod = zeros(eltype(JEx), size(JEc, 1), size(JEx, 1)) + state.Hf = [Hp -JEx' -JEc'; + -JEx zeros(eltype(JEx), size(JEx,1), size(JEx,1)) Jod'; + -JEc Jod zeros(eltype(JEc), size(JEc,1), size(JEc,1))] + # Also form the total gradient + bgrad = state.bgrad + gI = state.g + JI'*Diagonal(σc)*(bgrad.slack_c - μ*Sinv2*bgrad.λc) + for (i,j) in enumerate(ineqx) + gI[j] += bounds.σx[i]*(bgrad.slack_x[i] - μ*bgrad.λx[i]/sx[i]^2) + end + state.gf = [gI; + bgrad.λxE; + bgrad.λcE] + state +end diff --git a/src/types.jl b/src/types.jl index 699fc3492..6e710fd61 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1,4 +1,6 @@ abstract Optimizer +abstract ConstrainedOptimizer <: Optimizer +abstract IPOptimizer <: ConstrainedOptimizer immutable OptimizationOptions{TCallback <: Union{Void, Function}} x_tol::Float64 f_tol::Float64 @@ -248,6 +250,7 @@ end # additional variables. See `parse_constraints` for details. immutable ConstraintBounds{T} + nc::Int # Number of linear/nonlinear constraints # Box-constraints on variables (i.e., directly on x) eqx::Vector{Int} # index-vector of equality-constrained x (not actually variable...) valx::Vector{T} # value of equality-constrained x @@ -269,12 +272,14 @@ function ConstraintBounds(lx, ux, lc, uc) end function _cb{Tx,Tc}(lx::AbstractArray{Tx}, ux::AbstractArray{Tx}, lc::AbstractVector{Tc}, uc::AbstractVector{Tc}) T = promote_type(Tx,Tc) - ConstraintBounds{T}(parse_constraints(T, lx, ux, true)..., parse_constraints(T, lc, uc)...) + ConstraintBounds{T}(length(lc), parse_constraints(T, lx, ux, true)..., parse_constraints(T, lc, uc)...) end Base.eltype{T}(::Type{ConstraintBounds{T}}) = T Base.eltype(cb::ConstraintBounds) = eltype(typeof(cb)) +nconstraints(cb::ConstraintBounds) = cb.nc + function Base.show(io::IO, cb::ConstraintBounds) indent = " " print(io, "ConstraintBounds:") @@ -290,27 +295,33 @@ end abstract AbstractConstraintsFunction +nconstraints(constraints::AbstractConstraintsFunction) = nconstraints(constraints.bounds) + immutable DifferentiableConstraintsFunction{F,J,T} <: AbstractConstraintsFunction - bounds::ConstraintBounds{T} c!::F # c!(x, storage) stores the value of the constraint-functions at x jacobian!::J # jacobian!(x, storage) stores the Jacobian of the constraint-functions + bounds::ConstraintBounds{T} end function DifferentiableConstraintsFunction(c!, jacobian!, lx, ux, lc, uc) b = ConstraintBounds(lx, ux, lc, uc) - DifferentiableConstraintsFunction{typeof(c!), typeof(jacobian!), eltype(b)}(b, c!, jacobian!) + DifferentiableConstraintsFunction(c!, jacobian!, b) end +DifferentiableConstraintsFunction(c!, jacobian!, bounds::ConstraintBounds) = + DifferentiableConstraintsFunction{typeof(c!), typeof(jacobian!), eltype(b)}(c!, jacobian!, b) -immutable TwiceDifferentiableConstraintsFunction{F,J,H,T,N} <: AbstractConstraintsFunction - bounds::ConstraintBounds{T} +immutable TwiceDifferentiableConstraintsFunction{F,J,H,T} <: AbstractConstraintsFunction c!::F jacobian!::J h!::H # Hessian of the barrier terms + bounds::ConstraintBounds{T} end function TwiceDifferentiableConstraintsFunction(c!, jacobian!, h!, lx, ux, lc, uc) b = ConstraintBounds(lx, ux, lc, uc) - TwiceDifferentiableConstraintsFunction{typeof(c!), typeof(jacobian!), typeof(h!), eltype(b)}(b, c!, jacobian!, h!) + TwiceDifferentiableConstraintsFunction(c!, jacobian!, h!, b) end +TwiceDifferentiableConstraintsFunction(c!, jacobian!, h!, bounds::ConstraintBounds) = + TwiceDifferentiableConstraintsFunction{typeof(c!), typeof(jacobian!), typeof(h!), eltype(b)}(c!, jacobian!, h!, b) ## Utilities diff --git a/test/constraints.jl b/test/constraints.jl index 07fb4a91d..6397ad824 100644 --- a/test/constraints.jl +++ b/test/constraints.jl @@ -1,4 +1,4 @@ -using Optim +using Optim, PositiveFactorizations if VERSION >= v"0.5.0-dev+7720" using Base.Test else @@ -60,7 +60,7 @@ ConstraintBounds: @test_throws DimensionMismatch Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0], [5.0, 4.8], [5.0, 4.0]) end - @testset "Lagrangian val/grad" begin + @testset "IPNewton" begin function check_autodiff(d, bounds, x, cfun::Function, bstate, μ) c = cfun(x) J = ForwardDiff.jacobian(cfun, x) @@ -79,15 +79,31 @@ ConstraintBounds: ForwardDiff.gradient!(pcmp, ftot, p, ForwardDiff.Chunk{chunksize}()) @test pcmp ≈ pgrad end + function setstate!(state, μ) + state.μ = μ + Optim.update_g!(d, constraints, state, method) + Optim.update_h!(d, constraints, state, method) + end # Basic setup μ = 0.2345678 A = randn(3,3); H = A'*A - d = DifferentiableFunction(x->(x'*H*x)[1]/2, (x,storage)->(storage[:] = H*x)) + d = TwiceDifferentiableFunction(x->(x'*H*x)[1]/2, (x,g)->(g[:] = H*x), (x,h)->(h[:,:]=H)) x = broadcast(clamp, randn(3), -0.99, 0.99) gx = similar(x) cfun = x->Float64[] c = Float64[] J = Array{Float64}(0,0) + method = Optim.IPNewton(identity) + options = OptimizationOptions() + ## In the code, variable constraints are special-cased (for + ## reasons of user-convenience and efficiency). It's + ## important to check that the special-casing yields the same + ## result as the general case. So in the first three + ## constrained cases below, we compare variable constraints + ## against the same kind of constraint applied generically. + cvar! = (x, c) -> copy!(c, x) + cvarJ! = (x, J) -> copy!(J, eye(size(J)...)) + cvarh! = (x, λ, h) -> h # h! adds to h, it doesn't replace it ## No constraints bounds = Optim.ConstraintBounds(Float64[], Float64[], Float64[], Float64[]) bstate = Optim.BarrierStateVars(bounds, x) @@ -95,8 +111,13 @@ ConstraintBounds: f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ, nothing) @test f_x == L == d.f(x) @test gx == H*x + constraints = TwiceDifferentiableConstraintsFunction( + (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds) + state = Optim.initial_state(method, options, d, constraints, x) + @test state.gf ≈ gx + @test state.Hf ≈ H ## Pure equality constraints on variables - d = DifferentiableFunction(x->0.0, (x,storage)->fill!(storage, 0)) + d = TwiceDifferentiableFunction(x->0.0, (x,g)->fill!(g, 0), (x,h)->fill!(h,0)) xbar = fill(0.2, length(x)) bounds = Optim.ConstraintBounds(xbar, xbar, [], []) bstate = Optim.BarrierStateVars(bounds) @@ -108,6 +129,23 @@ ConstraintBounds: @test gx == -bstate.λxE @test bgrad.λxE == xbar-x check_autodiff(d, bounds, x, cfun, bstate, μ) + constraints = TwiceDifferentiableConstraintsFunction( + (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds) + state = Optim.initial_state(method, options, d, constraints, x) + copy!(state.bstate.λxE, bstate.λxE) + setstate!(state, μ) + @test state.gf ≈ [gx; xbar-x] + n = length(x) + @test state.Hf ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)] + # Now again using the generic machinery + bounds = Optim.ConstraintBounds([], [], xbar, xbar) + constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds) + state = Optim.initial_state(method, options, d, constraints, x) + copy!(state.bstate.λcE, bstate.λxE) + setstate!(state, μ) + @test state.gf ≈ [gx; xbar-x] + n = length(x) + @test state.Hf ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)] ## Nonnegativity constraints bounds = Optim.ConstraintBounds(zeros(length(x)), fill(Inf,length(x)), [], []) y = rand(length(x)) @@ -118,16 +156,31 @@ ConstraintBounds: @test L ≈ -μ*sum(log, y) @test gx == -μ./y check_autodiff(d, bounds, y, cfun, bstate, μ) + constraints = TwiceDifferentiableConstraintsFunction( + (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds) + state = Optim.initial_state(method, options, d, constraints, y) + setstate!(state, μ) + @test state.gf ≈ -μ./y + @test state.Hf ≈ μ*Diagonal(1./y.^2) + # Now again using the generic machinery + bounds = Optim.ConstraintBounds([], [], zeros(length(x)), fill(Inf,length(x))) + constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds) + state = Optim.initial_state(method, options, d, constraints, y) + setstate!(state, μ) + @test state.gf ≈ -μ./y + @test state.Hf ≈ μ*Diagonal(1./y.^2) ## General inequality constraints on variables - bounds = Optim.ConstraintBounds(rand(length(x))-2, rand(length(x))+1, [], []) + lb, ub = rand(length(x))-2, rand(length(x))+1 + bounds = Optim.ConstraintBounds(lb, ub, [], []) bstate = Optim.BarrierStateVars(bounds, x) rand!(bstate.slack_x) # intentionally displace from the correct value rand!(bstate.λx) bgrad = similar(bstate) f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ, nothing) @test f_x == 0 + s = bounds.σx .* (x[bounds.ineqx] - bounds.bx) Ltarget = -μ*sum(log, bstate.slack_x) + - dot(bstate.λx, bstate.slack_x - bounds.σx.*(x[bounds.ineqx]-bounds.bx)) + dot(bstate.λx, bstate.slack_x - s) @test L ≈ Ltarget dx = similar(gx); fill!(dx, 0) for (i,j) in enumerate(bounds.ineqx) @@ -136,10 +189,42 @@ ConstraintBounds: @test gx ≈ dx @test bgrad.slack_x == -μ./bstate.slack_x + bstate.λx check_autodiff(d, bounds, x, cfun, bstate, μ) + constraints = TwiceDifferentiableConstraintsFunction( + (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds) + state = Optim.initial_state(method, options, d, constraints, x) + copy!(state.bstate.slack_x, bstate.slack_x) + copy!(state.bstate.λx, bstate.λx) + setstate!(state, μ) + gxs, hxs = zeros(length(x)), zeros(length(x)) + s = state.bstate.slack_x + for (i,j) in enumerate(bounds.ineqx) + gxs[j] += -2*μ*bounds.σx[i]/s[i] + μ*(x[j]-bounds.bx[i])/s[i]^2 + hxs[j] += μ/s[i]^2 + end + @test state.gf ≈ gxs + @test state.Hf ≈ Diagonal(hxs) + # Now again using the generic machinery + bounds = Optim.ConstraintBounds([], [], lb, ub) + constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds) + state = Optim.initial_state(method, options, d, constraints, x) + copy!(state.bstate.slack_c, bstate.slack_x) + copy!(state.bstate.λc, bstate.λx) + setstate!(state, μ) + @test state.gf ≈ gxs + @test state.Hf ≈ Diagonal(hxs) ## Nonlinear equality constraints cfun = x->[x[1]^2+x[2]^2, x[2]*x[3]^2] + cfun! = (x, c) -> copy!(c, cfun(x)) + cJ! = (x, J) -> copy!(J, [2*x[1] 2*x[2] 0; + 0 x[3]^2 2*x[2]*x[3]]) + ch! = function(x, λ, h) + h[1,1] += 2*λ[1] + h[2,2] += 2*λ[1] + h[3,3] += 2*λ[2]*x[2] + end c = cfun(x) J = ForwardDiff.jacobian(cfun, x) + Jtmp = similar(J); @test cJ!(x, Jtmp) ≈ J # just to check we did it right cbar = rand(length(c)) bounds = Optim.ConstraintBounds([], [], cbar, cbar) bstate = Optim.BarrierStateVars(bounds, x, c) @@ -151,6 +236,15 @@ ConstraintBounds: @test gx ≈ -J'*bstate.λcE @test bgrad.λcE == cbar-c check_autodiff(d, bounds, x, cfun, bstate, μ) + constraints = TwiceDifferentiableConstraintsFunction(cfun!, cJ!, ch!, bounds) + state = Optim.initial_state(method, options, d, constraints, x) + copy!(state.bstate.λcE, bstate.λcE) + setstate!(state, μ) + heq = zeros(length(x), length(x)) + ch!(x, bstate.λcE, heq) + @test state.gf ≈ [gx; cbar-c] + @test state.Hf ≈ [eye(length(x))-heq -J'; + -J zeros(size(J,1), size(J,1))] ## Nonlinear inequality constraints bounds = Optim.ConstraintBounds([], [], rand(length(c))-1, rand(length(c))+1) bstate = Optim.BarrierStateVars(bounds, x, c) @@ -164,7 +258,24 @@ ConstraintBounds: @test L ≈ Ltarget @test gx ≈ -J[bounds.ineqc,:]'*(bstate.λc.*bounds.σc) @test bgrad.slack_c == -μ./bstate.slack_c + bstate.λc + @test bgrad.λc == bstate.slack_c - bounds.σc .* (c[bounds.ineqc] - bounds.bc) check_autodiff(d, bounds, x, cfun, bstate, μ) + constraints = TwiceDifferentiableConstraintsFunction(cfun!, cJ!, ch!, bounds) + state = Optim.initial_state(method, options, d, constraints, x) + copy!(state.bstate.slack_c, bstate.slack_c) + copy!(state.bstate.λc, bstate.λc) + setstate!(state, μ) + hineq = zeros(length(x), length(x)) + λ = zeros(size(J, 1)) + for (i,j) in enumerate(bounds.ineqc) + λ[j] += bstate.λc[i]*bounds.σc[i] + end + ch!(x, λ, hineq) + JI = J[bounds.ineqc,:] + hxx = μ*JI'*Diagonal(1./bstate.slack_c.^2)*JI - hineq + hp = full(cholfact(Positive, hxx)) + @test state.gf ≈ -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - μ*(bgrad.λc ./ bstate.slack_c.^2)) + @test state.Hf ≈ hp end end From 6441534d85cb7790a2e5a081c84920e5b98f15a5 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Tue, 8 Nov 2016 03:54:18 -0600 Subject: [PATCH 06/40] Add interior-point Newton step update and backtracking linesearch --- src/Optim.jl | 1 + src/interior.jl | 134 +++++++++++++++++++++++++++++++++------- src/iplinesearch.jl | 15 +++++ src/ipnewton.jl | 110 +++++++++++++++++++++++++++++++-- src/types.jl | 24 +++++++ src/utilities/update.jl | 7 +++ test/constraints.jl | 134 ++++++++++++++++++++++++++++++++-------- 7 files changed, 375 insertions(+), 50 deletions(-) create mode 100644 src/iplinesearch.jl diff --git a/src/Optim.jl b/src/Optim.jl index c900b5825..1afe15fed 100644 --- a/src/Optim.jl +++ b/src/Optim.jl @@ -80,6 +80,7 @@ module Optim include("fminbox.jl") include("interior.jl") include("ipnewton.jl") + include("iplinesearch.jl") # trust region methods include("levenberg_marquardt.jl") diff --git a/src/interior.jl b/src/interior.jl index 2f339f694..b9b5b62c5 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -2,12 +2,14 @@ abstract AbstractBarrierState # These are used not only for the current state, but also for the step and the gradient immutable BarrierStateVars{T} - slack_x::Vector{T} # values of slack variables for x - slack_c::Vector{T} # values of slack variables for c - λxE::Vector{T} # λ for equality constraints on x - λx::Vector{T} # λ for equality constraints on slack_x - λc::Vector{T} # λ for equality constraints on slack_c - λcE::Vector{T} # λ for linear/nonlinear equality constraints + slack_x::Vector{T} # values of slack variables for x + slack_c::Vector{T} # values of slack variables for c + active_x::Vector{Bool} # active constraints for x (see solve_active_inequalities) + active_c::Vector{Bool} # active constraints for c + λxE::Vector{T} # λ for equality constraints on x + λx::Vector{T} # λ for equality constraints on slack_x + λc::Vector{T} # λ for equality constraints on slack_c + λcE::Vector{T} # λ for linear/nonlinear equality constraints end # Note on λxE: # We could just set equality-constrained variables to their @@ -24,24 +26,27 @@ end λx = similar(slack_x) λc = similar(slack_c) λcE = Array{T}(length(bounds.eqc)) - sv = BarrierStateVars{T}(slack_x, slack_c, λxE, λx, λc, λcE) + sv = BarrierStateVars{T}(slack_x, slack_c, fill(false, length(slack_x)), + fill(false, length(slack_c)), λxE, λx, λc, λcE) end BarrierStateVars{T}(bounds::ConstraintBounds{T}) = BarrierStateVars{T}(bounds) function BarrierStateVars{T}(bounds::ConstraintBounds{T}, x) sv = BarrierStateVars(bounds) - setslack!(sv.slack_x, x, bounds.ineqx, bounds.σx, bounds.bx) + setslack!(sv.slack_x, sv.active_x, x, bounds.ineqx, bounds.σx, bounds.bx) sv end function BarrierStateVars{T}(bounds::ConstraintBounds{T}, x, c) sv = BarrierStateVars(bounds) - setslack!(sv.slack_x, x, bounds.ineqx, bounds.σx, bounds.bx) - setslack!(sv.slack_c, c, bounds.ineqc, bounds.σc, bounds.bc) + setslack!(sv.slack_x, sv.active_x, x, bounds.ineqx, bounds.σx, bounds.bx) + setslack!(sv.slack_c, sv.active_c, c, bounds.ineqc, bounds.σc, bounds.bc) sv end -function setslack!(slack, v, ineq, σ, b) +function setslack!(slack, active, v, ineq, σ, b) for i = 1:length(ineq) - slack[i] = σ[i]*(v[ineq[i]]-b[i]) + dv = v[ineq[i]]-b[i] + slack[i] = σ[i]*dv + active[i] = dv == 0 end slack end @@ -49,6 +54,8 @@ end Base.similar(bstate::BarrierStateVars) = BarrierStateVars(similar(bstate.slack_x), similar(bstate.slack_c), + similar(bstate.active_x), + similar(bstate.active_c), similar(bstate.λxE), similar(bstate.λx), similar(bstate.λc), @@ -57,6 +64,8 @@ Base.similar(bstate::BarrierStateVars) = function Base.fill!(b::BarrierStateVars, val) fill!(b.slack_x, val) fill!(b.slack_c, val) + fill!(b.active_x, false) + fill!(b.active_c, false) fill!(b.λxE, val) fill!(b.λx, val) fill!(b.λc, val) @@ -110,6 +119,69 @@ immutable BarrierLineSearchGrad{T} bgrad::BarrierStateVars{T} # trial point's gradient end +function ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, α) + ls_update!(out.slack_x, base.slack_x, step.slack_x, α) + ls_update!(out.slack_c, base.slack_c, step.slack_c, α) + ls_update!(out.λxE, base.λxE, step.λxE, α) + ls_update!(out.λx, base.λx, step.λx, α) + ls_update!(out.λc, base.λc, step.λc, α) + ls_update!(out.λcE, base.λcE, step.λcE, α) + out +end + +# Explicit solution for slack, λ when an inequality constraint is +# "active." This is necessary (or at least helpful) when c-b == 0 due +# to roundoff error, in which case the KKT equations don't have an +# exact solution within the precision. We punt on the ∂λ equation +# (which reduces to the slack, which should be small anyway), and +# focus on the ∂x and ∂slack equations (therefore setting slack and +# λ). By setting these to their exact solutions, we blance the forces +# due to the barrier. +function solve_active_inequalities!(d, constraints, state) + x, c, bstate, bounds = state.x, state.constr_c, state.bstate, constraints.bounds + nactive, nchanged = tally_active!(bstate.active_x, 0, 0, x, bounds.ineqx, bounds.bx) + nx = nactive + nactive, nchanged = tally_active!(bstate.active_c, nactive, nchanged, c, bounds.ineqc, bounds.bc, ) + if nactive == 0 || nchanged == 0 + return nothing + end + # Calculate the necessary gradients + d.g!(state.x, state.g) + constraints.jacobian!(state.x, state.constr_J) + # Solve for the Lagrange multipliers + ic, ix = bounds.ineqc[bstate.active_c], bounds.ineqx[bstate.active_x] + Jx = view5(state.constr_J, ic, ix) + Jact = view5(state.constr_J, ic, :) + Cactive = [eye(eltype(Jx), nx, nx) Jx'; Jx Jact*Jact'] + pactive = [view(state.g, ix); Jact*state.g] + λactive = (Cactive\pactive).*[bounds.σx[bstate.active_x]; bounds.σc[bstate.active_c]] + # Set the state + k = set_active_params!(bstate.slack_x, bstate.λx, bstate.active_x, λactive, state.μ, 0) + k = set_active_params!(bstate.slack_c, bstate.λc, bstate.active_c, λactive, state.μ, k) + k == length(λactive) || error("something is wrong") + nothing +end + +function tally_active!(active, nactive, nchanged, c, ineq, b) + for (i,j) in enumerate(ineq) + isactive = c[j] == b[i] + nactive += isactive + nchanged += isactive != active[i] + active[i] = isactive + end + nactive, nchanged +end + +function set_active_params!(slack, λ, active, λtarget, μ, k) + for i = 1:length(active) + active[i] || continue + λk = λtarget[k+=1] + λ[i] = λk + slack[i] = μ/λk + end + k +end + # Fallbacks (for methods that don't need these) after_while!(d, constraints::AbstractConstraintsFunction, state, method, options) = nothing update_h!(d, constraints::AbstractConstraintsFunction, state, method) = nothing @@ -117,14 +189,14 @@ update_h!(d, constraints::AbstractConstraintsFunction, state, method) = nothing ## Computation of the Lagrangian and its gradient # This is in a parametrization that is also useful during linesearch -function lagrangian(d, bounds::ConstraintBounds, x, c, bstate::BarrierStateVars, μ, method) +function lagrangian(d, bounds::ConstraintBounds, x, c, bstate::BarrierStateVars, μ) f_x = d.f(x) L_xsλ = f_x + barrier_value(bounds, x, bstate, μ) + equality_violation(bounds, x, c, bstate) f_x, L_xsλ end -function lagrangian_g!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ, method) +function lagrangian_g!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ) fill!(bgrad, 0) d.g!(x, gx) barrier_grad!(gx, bgrad, bounds, x, bstate, μ) @@ -132,7 +204,7 @@ function lagrangian_g!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate:: nothing end -function lagrangian_fg!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ, method) +function lagrangian_fg!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ) fill!(bgrad, 0) f_x = d.fg!(x, gx) L_xsλ = f_x + barrier_value(bounds, x, bstate, μ) + @@ -143,24 +215,33 @@ function lagrangian_fg!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate: end ## Computation of Lagrangian and derivatives when passing all parameters as a single vector -function lagrangian_vec(p, d, bounds::ConstraintBounds, x, c::AbstractArray, bstate::BarrierStateVars, μ, method) +function lagrangian_vec(p, d, bounds::ConstraintBounds, x, c::AbstractArray, bstate::BarrierStateVars, μ) unpack_vec!(x, bstate, p) - f_x, L_xsλ = lagrangian(d, bounds, x, c, bstate, μ, method) + f_x, L_xsλ = lagrangian(d, bounds, x, c, bstate, μ) L_xsλ end -function lagrangian_vec(p, d, bounds::ConstraintBounds, x, c::Function, bstate::BarrierStateVars, μ, method) +function lagrangian_vec(p, d, bounds::ConstraintBounds, x, c::Function, bstate::BarrierStateVars, μ) # Use this version when using automatic differentiation unpack_vec!(x, bstate, p) - f_x, L_xsλ = lagrangian(d, bounds, x, c(x), bstate, μ, method) + f_x, L_xsλ = lagrangian(d, bounds, x, c(x), bstate, μ) L_xsλ end -function lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ, method) +function lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ) unpack_vec!(x, bstate, p) - f_x, L_xsλ = lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ, method) + f_x, L_xsλ = lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ) pack_vec!(storage, gx, bgrad) L_xsλ end +# for line searches that don't use the gradient along the line +function lagrangian_linefunc(α, d, constraints, state) + b_ls = state.b_ls + ls_update!(state.x_ls, state.x, state.s, α) + ls_update!(b_ls.bstate, state.bstate, state.bstep, α) + constraints.c!(state.x, b_ls.c) + lagrangian(d, constraints.bounds, state.x_ls, b_ls.c, b_ls.bstate, state.μ)[2] +end + ## Computation of Lagrangian terms: barrier penalty """ barrier_value(constraints, state) -> val @@ -381,3 +462,14 @@ function unpack_vec!(x, vec::Vector, k::Int) end k end + +## More utilities +function estimate_maxstep(αmax, x, s) + for i = 1:length(s) + si = s[i] + if si < 0 + αmax = min(αmax, -x[i]/si) + end + end + αmax +end diff --git a/src/iplinesearch.jl b/src/iplinesearch.jl new file mode 100644 index 000000000..c0343b5b1 --- /dev/null +++ b/src/iplinesearch.jl @@ -0,0 +1,15 @@ +function backtrack_constrained(ϕ, α, αmax, Lcoefsα, + c1 = 0.5, ρ=oftype(α, 0.5), itermax = 100) + α = min(α, 0.999*αmax) + L0, L1, L2 = Lcoefsα + f_calls = 0 + while f_calls < itermax + f_calls += 1 + val = ϕ(α) + if abs(val - (L0 + L1*α + L2*α^2/2)) <= c1*abs(val-L0) + 100*eps(abs(val)+abs(L0)) + return α, f_calls, 0 + end + α *= ρ + end + error("failed to satisfy criterion after $f_calls iterations") +end diff --git a/src/ipnewton.jl b/src/ipnewton.jl index db501f930..1bbbc3d1d 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -2,7 +2,7 @@ immutable IPNewton <: IPOptimizer linesearch!::Function end -IPNewton(; linesearch!::Function = backtrack_constrained!) = +IPNewton(; linesearch!::Function = backtrack_constrained) = IPNewton(linesearch!) type IPNewtonState{T,N} <: AbstractBarrierState @@ -15,14 +15,17 @@ type IPNewtonState{T,N} <: AbstractBarrierState s::Array{T,N} # step for x # Barrier penalty fields μ::T # coefficient of the barrier penalty + L::T # value of the Lagrangian (objective + barrier + equality) bstate::BarrierStateVars{T} # value of slack and λ variables (current "position") bgrad::BarrierStateVars{T} # gradient of slack and λ variables at current "position" + bstep::BarrierStateVars{T} # search direction for slack and λ constr_c::Vector{T} # value of the user-supplied constraints at x constr_J::Matrix{T} # value of the user-supplied Jacobian at x @add_linesearch_fields() b_ls::BarrierLineSearch{T} gf::Vector{T} Hf::Matrix{T} + stepf::Vector{T} end function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunction, constraints::TwiceDifferentiableConstraintsFunction, initial_x::Array{T}) @@ -49,10 +52,12 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct constr_gtemp = Array{T}(n) gf = Array{T}(0) # will be replaced Hf = Array{T}(0,0) # " + stepf = Array{T}(0) constraints.jacobian!(initial_x, constr_J) μ = T(1) bstate = BarrierStateVars(constraints.bounds, initial_x, constr_c) bgrad = similar(bstate) + bstep = similar(bstate) b_ls = BarrierLineSearch(similar(constr_c), similar(bstate)) state = IPNewtonState("Interior-point Newton's Method", @@ -69,21 +74,32 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct Hd, similar(initial_x), # Maintain current x-search direction in state.s μ, + T(0), bstate, bgrad, + bstep, constr_c, constr_J, @initial_linesearch()..., # Maintain a cache for line search results in state.lsr b_ls, gf, - Hf) + Hf, + stepf) + # μ = initialize_μ_λ!(λv, λc, constraints, initial_x, g, constr_c, constr_J) - update_g!(d, constraints, state, method) + update_fg!(d, constraints, state, method) update_h!(d, constraints, state, method) end +function update_fg!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton) + f_x, L = lagrangian_fg!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ) + state.f_x, state.L = f_x, L + state +end + function update_g!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton) - lagrangian_g!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ, method) + lagrangian_g!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ) + state end function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton) @@ -149,3 +165,89 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state bgrad.λcE] state end + +function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction, state::IPNewtonState{T}, method::IPNewton) + bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds + solve_step!(state, constraints) + # If a step α=1 will not change any of the parameters, we can quit now. + # This prevents a futile linesearch. + if is_smaller_eps(state.x, state.s) && + is_smaller_eps(bstate.slack_x, bstep.slack_x) && + is_smaller_eps(bstate.slack_c, bstep.slack_c) && + is_smaller_eps(bstate.λx, bstep.λx) && + is_smaller_eps(bstate.λc, bstep.λc) + return false + end + qp = quadratic_parameters(bounds, state) + + # Estimate αmax, the upper bound on distance of movement along the search line + αmax = convert(eltype(bstate), Inf) + αmax = estimate_maxstep(αmax, bstate.slack_x, bstep.slack_x) + αmax = estimate_maxstep(αmax, bstate.slack_c, bstep.slack_c) + αmax = estimate_maxstep(αmax, + view(state.x, bounds.iz).*bounds.σz, + view(state.s, bounds.iz).*bounds.σz) + + # Determine the actual distance of movement along the search line + ϕ = α->lagrangian_linefunc(α, d, constraints, state) + state.alpha, f_update, g_update = + method.linesearch!(ϕ, T(1), αmax, qp) + state.f_calls, state.g_calls = state.f_calls + f_update, state.g_calls + g_update + + # Maintain a record of previous position + copy!(state.x_previous, state.x) + + # Update current position # x = x + alpha * s + ls_update!(state.x, state.x, state.s, state.alpha) + ls_update!(bstate, bstate, bstep, state.alpha) + + # Evaluate the constraints at the new position + constraints.c!(state.x, state.constr_c) + constraints.jacobian!(state.x, state.constr_J) + + # Test for active inequalities, solve immediately for the corresponding s and λ + solve_active_inequalities!(d, constraints, state) + + false +end + +function solve_step!(state::IPNewtonState, constraints) + # Solve the Newton step + step = -(state.Hf\state.gf) # do *not* force posdef + x, s, μ, bounds = state.x, state.s, state.μ, constraints.bounds + bstate, bstep, bgrad = state.bstate, state.bstep, state.bgrad + k = unpack_vec!(s, step, 0) + k = unpack_vec!(bstep.λxE, step, k) + k = unpack_vec!(bstep.λcE, step, k) + k == length(step) || error("exhausted targets before step") + # Solve for the slack variable and λI updates + for (i, j) in enumerate(bounds.ineqx) + bstep.slack_x[i] = -bgrad.λx[i] + bounds.σx[i]*s[j] + bstep.λx[i] = -bgrad.slack_x[i] - μ*bstep.slack_x[i]/bstate.slack_x[i]^2 + end + JI = view5(state.constr_J, bounds.ineqc, :) + bstep.slack_c[:] = -bgrad.λc + Diagonal(bounds.σc)*JI*s + for i = 1:length(bstep.λc) + bstep.λc[i] = -bgrad.slack_c[i] - μ*bstep.slack_c[i]/bstate.slack_c[i]^2 + end + state.stepf = step + state +end + +function is_smaller_eps(ref, step) + ise = true + for (r, s) in zip(ref, step) + ise &= (s == 0) | (abs(s) < eps(r)) + end + ise +end + +function quadratic_parameters(bounds::ConstraintBounds, state::IPNewtonState) + slope = dot(state.stepf, state.gf) + # For the curvature, use the original hessian (before forcing + # positive-definiteness) + q = dot(state.s, state.H*state.s) + JE = view5(state.constr_J, bounds.eqc, :) + q -= 2*dot(state.s[bounds.eqx], state.bstep.λxE) + 2*dot(state.s, JE'*state.bstep.λcE) + state.L, slope, q +end diff --git a/src/types.jl b/src/types.jl index 6e710fd61..3ee044f7b 100644 --- a/src/types.jl +++ b/src/types.jl @@ -310,6 +310,17 @@ end DifferentiableConstraintsFunction(c!, jacobian!, bounds::ConstraintBounds) = DifferentiableConstraintsFunction{typeof(c!), typeof(jacobian!), eltype(b)}(c!, jacobian!, b) +function DifferentiableConstraintsFunction(lx::AbstractArray, ux::AbstractArray) + bounds = ConstraintBounds(lx, ux, [], []) + DifferentiableConstraintsFunction(bounds) +end + +function DifferentiableConstraintsFunction(bounds::ConstraintBounds) + c! = (x,c)->nothing + J! = (x,J)->nothing + DifferentiableConstraintsFunction(c!, J!, bounds) +end + immutable TwiceDifferentiableConstraintsFunction{F,J,H,T} <: AbstractConstraintsFunction c!::F jacobian!::J @@ -323,6 +334,19 @@ end TwiceDifferentiableConstraintsFunction(c!, jacobian!, h!, bounds::ConstraintBounds) = TwiceDifferentiableConstraintsFunction{typeof(c!), typeof(jacobian!), typeof(h!), eltype(b)}(c!, jacobian!, h!, b) +function TwiceDifferentiableConstraintsFunction(lx::AbstractArray, ux::AbstractArray) + bounds = ConstraintBounds(lx, ux, [], []) + TwiceDifferentiableConstraintsFunction(bounds) +end + +function TwiceDifferentiableConstraintsFunction(bounds::ConstraintBounds) + c! = (x,c)->nothing + J! = (x,J)->nothing + h! = (x,λ,h)->nothing + TwiceDifferentiableConstraintsFunction(c!, J!, h!, bounds) +end + + ## Utilities function symmetrize(l, u) diff --git a/src/utilities/update.jl b/src/utilities/update.jl index 8b81dbf35..3912dab4a 100644 --- a/src/utilities/update.jl +++ b/src/utilities/update.jl @@ -27,3 +27,10 @@ function update!{T}(tr::OptimizationTrace{T}, end stopped end + +function ls_update!(out::AbstractArray, base::AbstractArray, step::AbstractArray, α) + length(out) == length(base) == length(step) || throw(DimensionMismatch("all arrays must have the same length, got $(length(out)), $(length(base)), $(length(step))")) + for i = 1:length(base) + out[i] = base[i]+α*step[i] + end +end diff --git a/test/constraints.jl b/test/constraints.jl index 6397ad824..0509b1f9e 100644 --- a/test/constraints.jl +++ b/test/constraints.jl @@ -17,6 +17,13 @@ else end @testset "Constraints" begin + # Utility function for hand-setting the μ parameter + function setstate!(state, μ, d, constraints, method) + state.μ = μ + Optim.update_fg!(d, constraints, state, method) + Optim.update_h!(d, constraints, state, method) + end + @testset "Bounds parsing" begin b = @inferred5(Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 3.8], [5.0, 4.0])) @test b.eqx == [3] @@ -60,30 +67,24 @@ ConstraintBounds: @test_throws DimensionMismatch Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0], [5.0, 4.8], [5.0, 4.0]) end - @testset "IPNewton" begin + @testset "IPNewton computations" begin + # Compare hand-computed gradient against that from automatic differentiation function check_autodiff(d, bounds, x, cfun::Function, bstate, μ) c = cfun(x) J = ForwardDiff.jacobian(cfun, x) - # Using real-valued inputs p = Optim.pack_vec(x, bstate) - ftot! = (p,storage)->Optim.lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds, x, c, J, bstate, μ, nothing) + ftot! = (p,storage)->Optim.lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds, x, c, J, bstate, μ) pgrad = similar(p) ftot!(p, pgrad) - # Compute with ForwardDiff chunksize = min(8, length(p)) TD = ForwardDiff.Dual{chunksize,eltype(p)} xd = Array{TD}(length(x)) bstated = Optim.BarrierStateVars{TD}(bounds) pcmp = similar(p) - ftot = p->Optim.lagrangian_vec(p, d, bounds, xd, cfun, bstated, μ, nothing) + ftot = p->Optim.lagrangian_vec(p, d, bounds, xd, cfun, bstated, μ) ForwardDiff.gradient!(pcmp, ftot, p, ForwardDiff.Chunk{chunksize}()) @test pcmp ≈ pgrad end - function setstate!(state, μ) - state.μ = μ - Optim.update_g!(d, constraints, state, method) - Optim.update_h!(d, constraints, state, method) - end # Basic setup μ = 0.2345678 A = randn(3,3); H = A'*A @@ -93,8 +94,8 @@ ConstraintBounds: cfun = x->Float64[] c = Float64[] J = Array{Float64}(0,0) - method = Optim.IPNewton(identity) options = OptimizationOptions() + method = Optim.IPNewton() ## In the code, variable constraints are special-cased (for ## reasons of user-convenience and efficiency). It's ## important to check that the special-casing yields the same @@ -108,7 +109,7 @@ ConstraintBounds: bounds = Optim.ConstraintBounds(Float64[], Float64[], Float64[], Float64[]) bstate = Optim.BarrierStateVars(bounds, x) bgrad = similar(bstate) - f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ, nothing) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ) @test f_x == L == d.f(x) @test gx == H*x constraints = TwiceDifferentiableConstraintsFunction( @@ -123,7 +124,7 @@ ConstraintBounds: bstate = Optim.BarrierStateVars(bounds) rand!(bstate.λxE) bgrad = similar(bstate) - f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ, nothing) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ) @test f_x == 0 @test L ≈ dot(bstate.λxE, xbar-x) @test gx == -bstate.λxE @@ -133,7 +134,7 @@ ConstraintBounds: (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds) state = Optim.initial_state(method, options, d, constraints, x) copy!(state.bstate.λxE, bstate.λxE) - setstate!(state, μ) + setstate!(state, μ, d, constraints, method) @test state.gf ≈ [gx; xbar-x] n = length(x) @test state.Hf ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)] @@ -142,7 +143,7 @@ ConstraintBounds: constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds) state = Optim.initial_state(method, options, d, constraints, x) copy!(state.bstate.λcE, bstate.λxE) - setstate!(state, μ) + setstate!(state, μ, d, constraints, method) @test state.gf ≈ [gx; xbar-x] n = length(x) @test state.Hf ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)] @@ -151,7 +152,7 @@ ConstraintBounds: y = rand(length(x)) bstate = Optim.BarrierStateVars(bounds, y) bgrad = similar(bstate) - f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, y, Float64[], Array{Float64}(0,0), bstate, μ, nothing) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, y, Float64[], Array{Float64}(0,0), bstate, μ) @test f_x == 0 @test L ≈ -μ*sum(log, y) @test gx == -μ./y @@ -159,14 +160,14 @@ ConstraintBounds: constraints = TwiceDifferentiableConstraintsFunction( (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds) state = Optim.initial_state(method, options, d, constraints, y) - setstate!(state, μ) + setstate!(state, μ, d, constraints, method) @test state.gf ≈ -μ./y @test state.Hf ≈ μ*Diagonal(1./y.^2) # Now again using the generic machinery bounds = Optim.ConstraintBounds([], [], zeros(length(x)), fill(Inf,length(x))) constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds) state = Optim.initial_state(method, options, d, constraints, y) - setstate!(state, μ) + setstate!(state, μ, d, constraints, method) @test state.gf ≈ -μ./y @test state.Hf ≈ μ*Diagonal(1./y.^2) ## General inequality constraints on variables @@ -176,7 +177,7 @@ ConstraintBounds: rand!(bstate.slack_x) # intentionally displace from the correct value rand!(bstate.λx) bgrad = similar(bstate) - f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ, nothing) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ) @test f_x == 0 s = bounds.σx .* (x[bounds.ineqx] - bounds.bx) Ltarget = -μ*sum(log, bstate.slack_x) + @@ -194,7 +195,7 @@ ConstraintBounds: state = Optim.initial_state(method, options, d, constraints, x) copy!(state.bstate.slack_x, bstate.slack_x) copy!(state.bstate.λx, bstate.λx) - setstate!(state, μ) + setstate!(state, μ, d, constraints, method) gxs, hxs = zeros(length(x)), zeros(length(x)) s = state.bstate.slack_x for (i,j) in enumerate(bounds.ineqx) @@ -209,7 +210,7 @@ ConstraintBounds: state = Optim.initial_state(method, options, d, constraints, x) copy!(state.bstate.slack_c, bstate.slack_x) copy!(state.bstate.λc, bstate.λx) - setstate!(state, μ) + setstate!(state, μ, d, constraints, method) @test state.gf ≈ gxs @test state.Hf ≈ Diagonal(hxs) ## Nonlinear equality constraints @@ -230,7 +231,7 @@ ConstraintBounds: bstate = Optim.BarrierStateVars(bounds, x, c) rand!(bstate.λcE) bgrad = similar(bstate) - f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ, nothing) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ) @test f_x == 0 @test L ≈ dot(bstate.λcE, cbar-c) @test gx ≈ -J'*bstate.λcE @@ -239,7 +240,7 @@ ConstraintBounds: constraints = TwiceDifferentiableConstraintsFunction(cfun!, cJ!, ch!, bounds) state = Optim.initial_state(method, options, d, constraints, x) copy!(state.bstate.λcE, bstate.λcE) - setstate!(state, μ) + setstate!(state, μ, d, constraints, method) heq = zeros(length(x), length(x)) ch!(x, bstate.λcE, heq) @test state.gf ≈ [gx; cbar-c] @@ -251,7 +252,7 @@ ConstraintBounds: rand!(bstate.slack_c) # intentionally displace from the correct value rand!(bstate.λc) bgrad = similar(bstate) - f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ, nothing) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ) @test f_x == 0 Ltarget = -μ*sum(log, bstate.slack_c) + dot(bstate.λc, bstate.slack_c - bounds.σc.*(c[bounds.ineqc]-bounds.bc)) @@ -264,7 +265,7 @@ ConstraintBounds: state = Optim.initial_state(method, options, d, constraints, x) copy!(state.bstate.slack_c, bstate.slack_c) copy!(state.bstate.λc, bstate.λc) - setstate!(state, μ) + setstate!(state, μ, d, constraints, method) hineq = zeros(length(x), length(x)) λ = zeros(size(J, 1)) for (i,j) in enumerate(bounds.ineqc) @@ -277,6 +278,89 @@ ConstraintBounds: @test state.gf ≈ -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - μ*(bgrad.λc ./ bstate.slack_c.^2)) @test state.Hf ≈ hp end + + @testset "IPNewton step" begin + F = 1000 + d = TwiceDifferentiableFunction(x->F*x[1], (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0)) + method = Optim.IPNewton() + options = OptimizationOptions() + μ = 1e-20 + x0 = μ/F*10 # minimum is at μ/F + # Nonnegativity (the case that doesn't require slack variables) + constraints = TwiceDifferentiableConstraintsFunction([0.0], []) + state = Optim.initial_state(method, options, d, constraints, [x0]) + setstate!(state, μ, d, constraints, method) + Optim.solve_step!(state, constraints) + @test state.s[1] ≈ x0 - F*x0^2/μ + qp = Optim.quadratic_parameters(constraints.bounds, state) + @test qp[1] ≈ F*x0-μ*log(x0) + @test qp[2] ≈ -(F-μ/x0)^2*x0^2/μ + @test qp[3] ≈ μ/x0^2*(x0 - F*x0^2/μ)^2 + bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds + αmax = Optim.estimate_maxstep(Inf, state.x[bounds.iz].*bounds.σz, + state.s[bounds.iz].*bounds.σz) + ϕ = α->Optim.lagrangian_linefunc(α, d, constraints, state) + @test ϕ(0) ≈ qp[1] + α, nf, ng = method.linesearch!(ϕ, 1.0, αmax, qp) + @test α > 1e-3 + end + + @testset "Slack" begin + σswap(σ, a, b) = σ == 1 ? (a, b) : (b, a) + # Test that we achieve a high-precision minimum for fixed + # μ. For anything other than nonnegativity/nonpositivity + # constraints, this tests whether the slack variables are + # solving the problem they were designed to address (the + # possibility that adjacent floating-point numbers are too + # widely spaced to accurately satisfy the KKT equations near a + # boundary). + F0 = 1000 + method = Optim.IPNewton() + options = OptimizationOptions() + μ = 1e-20 # smaller than eps(1.0) + for σ in (1, -1) + F = σ*F0 + # Nonnegativity/nonpositivity (the case that doesn't require slack variables) + d = TwiceDifferentiableFunction(x->F*x[1], (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0)) + constraints = TwiceDifferentiableConstraintsFunction(σswap(σ, [0.0], [])...) + state = Optim.initial_state(method, options, d, constraints, [μ/F*10]) + setstate!(state, μ, d, constraints, method) + for i = 1:10 + Optim.update_state!(d, constraints, state, method) + Optim.update_fg!(d, constraints, state, method) + Optim.update_h!(d, constraints, state, method) + end + @test state.x[1] ≈ μ/F + # |x| ≥ 1, and check that we get slack precision better than eps(1.0) + d = TwiceDifferentiableFunction(x->F*(x[1]-σ), (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0)) + constraints = TwiceDifferentiableConstraintsFunction(σswap(σ, [Float64(σ)], [])...) + state = Optim.initial_state(method, options, d, constraints, [(1+eps(1.0))*σ]) + setstate!(state, μ, d, constraints, method) + for i = 1:10 + Optim.update_state!(d, constraints, state, method) + Optim.update_fg!(d, constraints, state, method) + Optim.update_h!(d, constraints, state, method) + end + @test state.x[1] == σ + @test state.bstate.slack_x[1] ≈ μ/abs(F) + # x >= 1 using the linear/nonlinear constraints + d = TwiceDifferentiableFunction(x->F*(x[1]-σ), (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0)) + constraints = TwiceDifferentiableConstraintsFunction( + (x,c)->(c[1] = x[1]), + (x,J)->(J[1,1] = 1.0), + (x,λ,h)->nothing, + [], [], σswap(σ, [Float64(σ)], [])...) + state = Optim.initial_state(method, options, d, constraints, [(1+eps(1.0))*σ]) + setstate!(state, μ, d, constraints, method) + for i = 1:10 + Optim.update_state!(d, constraints, state, method) + Optim.update_fg!(d, constraints, state, method) + Optim.update_h!(d, constraints, state, method) + end + @test state.x[1] == σ + @test state.bstate.slack_c[1] ≈ μ/abs(F) + end + end end nothing From 50b158293b2983347e343574f8239b420921db52 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Tue, 8 Nov 2016 08:31:47 -0600 Subject: [PATCH 07/40] =?UTF-8?q?Add=20a=20principled=20initialization=20f?= =?UTF-8?q?or=20=CE=BC=20and=20=CE=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on the notion that we want to (largely) preserve the objective function's initial descent direction. --- src/interior.jl | 61 +++++++++++++++++++++++++++++++++++++++++++++++++ src/ipnewton.jl | 5 +++- 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/src/interior.jl b/src/interior.jl index b9b5b62c5..839e12783 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -182,6 +182,67 @@ function set_active_params!(slack, λ, active, λtarget, μ, k) k end +""" + initialize_μ_λE!(λxE, λcE, constraints, x, g, constr_c, constr_J, β=0.01) -> μ + +Pick μ and λ to ensure that the equality constraints are satisfied +locally, and that the initial gradient including the barrier would be +a descent direction for the problem without the barrier (μ = 0). This +ensures that the search isn't pushed out of the basin of the +user-supplied initial guess. + +`λv` and `λc` are the Lagrange multipliers for the variables and extra +(non-variable) constraints; these are pre-allocated storage for the +output, and their input values are not used. `constraints` is an +`AbstractConstraintsFunction`, `x` is the position (must be a feasible +interior point), `g` is the gradient of the objective at `x`, and +`constr_c` and `constr_J` contain the values and Jacobian of the extra +constraints evaluated at `x`. `β` (optional) specifies the fraction of +the objective's gradient that may be diminished by the barrier. + +In addition to setting `λxE` and `λcE`, this returns `μ`, the value of +the barrier penalty. +""" +function initialize_μ_λ!(λx, λc, bounds::ConstraintBounds, x, g, c, J, β=1//100) + length(c) + length(bounds.iz) + length(bounds.ineqx) == 0 && return zero(eltype(x)) + # Calculate the projection matrix + JEx = zeros(eltype(J), length(bounds.eqx), length(x)) + for (i,j) in enumerate(bounds.eqx) + JEx[i,j] = 1 + end + JEc = view5(J, bounds.eqc, :) + JE = vcat(JEx, JEc) + CE = JE*JE' + CEc = cholfact(Positive, CE) + Pg = g - JE'*(CEc \ (JE*g)) # the projected gradient of the objective (orthog to all == constr.) + # Calculate the barrier deviation and projection onto inequality normals + Δb = [x[bounds.iz]; x[bounds.ineqx] - bounds.bx; c[bounds.ineqc] - bounds.bc] + JIx = zeros(eltype(J), length(bounds.iz)+length(bounds.ineqx), length(x)) + for (i,j) in enumerate([bounds.iz; bounds.ineqx]) + JIx[i,j] = 1 + end + JIc = view5(J, bounds.ineqc, :) + JI = vcat(JIx, JIc) + JIg = JI*Pg + # Solve for μ + λtilde = 1./Δb + μden = dot(λtilde, JIg) + if μden == 0 + μden = maximum(abs(λtilde).*abs(JIg))*length(Δb) + end + μ = β*dot(Pg, Pg)/abs(μden) + μ = μden != 0 ? μ : oftype(μ, 1) + # Solve for λE + gb = g - μ*(JI'*λtilde) + Pgb = gb - JE'*(CEc \ (JE*gb)) + λE = CEc \ (JE*Pgb) + k = unpack_vec!(λx, λE, 0) + k = unpack_vec!(λc, λE, k) + k == length(λE) || error("something is wrong") + μ +end +initialize_μ_λ!(λx, λc, constraints::AbstractConstraintsFunction, x, g, c, J, args...) = + initialize_μ_λ!(λx, λc, constraints.bounds, x, g, c, J, args...) # Fallbacks (for methods that don't need these) after_while!(d, constraints::AbstractConstraintsFunction, state, method, options) = nothing update_h!(d, constraints::AbstractConstraintsFunction, state, method) = nothing diff --git a/src/ipnewton.jl b/src/ipnewton.jl index 1bbbc3d1d..9a080dc25 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -86,7 +86,7 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct Hf, stepf) - # μ = initialize_μ_λ!(λv, λc, constraints, initial_x, g, constr_c, constr_J) + state.μ = initialize_μ_λ!(bstate.λxE, bstate.λcE, constraints, initial_x, g, constr_c, constr_J) update_fg!(d, constraints, state, method) update_h!(d, constraints, state, method) end @@ -94,11 +94,14 @@ end function update_fg!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton) f_x, L = lagrangian_fg!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ) state.f_x, state.L = f_x, L + state.f_calls += 1 + state.g_calls += 1 state end function update_g!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton) lagrangian_g!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ) + state.g_calls += 1 state end From 71029578ebaff46b5f8a3e942b5f25d923dca82d Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Tue, 8 Nov 2016 08:38:20 -0600 Subject: [PATCH 08/40] Add optimize for interior-point methods Also implements tracing --- src/Optim.jl | 2 +- src/interior.jl | 116 ++++++++++++++++++++++++++++++++++++++--- src/ipnewton.jl | 6 +-- src/types.jl | 44 +++++++++++++--- src/utilities/trace.jl | 21 ++++++++ test/runtests.jl | 1 + 6 files changed, 172 insertions(+), 18 deletions(-) diff --git a/src/Optim.jl b/src/Optim.jl index 1afe15fed..1820bf9f1 100644 --- a/src/Optim.jl +++ b/src/Optim.jl @@ -78,9 +78,9 @@ module Optim # Constrained optimization include("fminbox.jl") + include("iplinesearch.jl") include("interior.jl") include("ipnewton.jl") - include("iplinesearch.jl") # trust region methods include("levenberg_marquardt.jl") diff --git a/src/interior.jl b/src/interior.jl index 839e12783..a29be2319 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -129,13 +129,97 @@ function ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::Barrier out end +function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constraints::AbstractConstraintsFunction, initial_x::Array{T}, method::M, options::OptimizationOptions) + t0 = time() # Initial time stamp used to control early stopping by options.time_limit + + state = initial_state(method, options, d, constraints, initial_x) + + tr = OptimizationTrace{typeof(method)}() + tracing = options.store_trace || options.show_trace || options.extended_trace || options.callback != nothing + stopped, stopped_by_callback, stopped_by_time_limit = false, false, false + + x_converged, f_converged = false, false + g_converged = vecnorm(state.g, Inf) < options.g_tol + + converged = g_converged + iteration, iterationμ = 0, 0 + + options.show_trace && print_header(method) + trace!(tr, state, iteration, method, options) + + while !converged && !stopped && iteration < options.iterations + iteration += 1 + iterationμ += 1 + + update_state!(d, constraints, state, method) && break # it returns true if it's forced by something in update! to stop (eg dx_dg == 0.0 in BFGS) + update_asneeded_fg!(d, constraints, state, method) + x_converged, f_converged, + g_converged, converged = assess_convergence(state, options) + + # If tracing, update trace with trace!. If a callback is provided, it + # should have boolean return value that controls the variable stopped_by_callback. + # This allows for early stopping controlled by the callback. + if tracing + stopped_by_callback = trace!(tr, state, iteration, method, options) + end + + # Test whether we need to decrease the barrier penalty + if converged + if iterationμ > 1 + # We did real work, so it's worth decreasing the barrier penalty further + shrink_μ!(d, constraints, state, method, options) + iterationμ = 0 + converged = false + end + end + + # We don't use the Hessian for anything if we have declared convergence, + # so we might as well not make the (expensive) update if converged == true + !converged && update_h!(d, constraints, state, method) + + # Check time_limit; if none is provided it is NaN and the comparison + # will always return false. + stopped_by_time_limit = time()-t0 > options.time_limit ? true : false + + # Combine the two, so see if the stopped flag should be changed to true + # and stop the while loop + stopped = stopped_by_callback || stopped_by_time_limit ? true : false + end # while + + after_while!(d, constraints, state, method, options) + + return MultivariateOptimizationResults(state.method_string, + initial_x, + state.x, + Float64(state.f_x), + iteration, + iteration == options.iterations, + x_converged, + options.x_tol, + f_converged, + options.f_tol, + g_converged, + options.g_tol, + tr, + state.f_calls, + state.g_calls, + state.h_calls) +end + +# Fallbacks (for methods that don't need these) +after_while!(d, constraints::AbstractConstraintsFunction, state, method, options) = nothing +update_h!(d, constraints::AbstractConstraintsFunction, state, method) = nothing +update_asneeded_fg!(d, constraints, state, method) = update_fg!(d, constraints, state, method) +update_asneeded_fg!(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}) = update_g!(d, constraints, state, method) + + # Explicit solution for slack, λ when an inequality constraint is # "active." This is necessary (or at least helpful) when c-b == 0 due # to roundoff error, in which case the KKT equations don't have an # exact solution within the precision. We punt on the ∂λ equation # (which reduces to the slack, which should be small anyway), and # focus on the ∂x and ∂slack equations (therefore setting slack and -# λ). By setting these to their exact solutions, we blance the forces +# λ). By setting these to their exact solutions, we balance the forces # due to the barrier. function solve_active_inequalities!(d, constraints, state) x, c, bstate, bounds = state.x, state.constr_c, state.bstate, constraints.bounds @@ -154,7 +238,9 @@ function solve_active_inequalities!(d, constraints, state) Jact = view5(state.constr_J, ic, :) Cactive = [eye(eltype(Jx), nx, nx) Jx'; Jx Jact*Jact'] pactive = [view(state.g, ix); Jact*state.g] - λactive = (Cactive\pactive).*[bounds.σx[bstate.active_x]; bounds.σc[bstate.active_c]] + Cactivep = cholfact(Positive, Cactive) + λactive = (Cactivep\pactive).*[bounds.σx[bstate.active_x]; bounds.σc[bstate.active_c]] + any(x->x<=0, λactive) && error("something may be wrong, λ is zero or negative. Perhaps Cactive is singular?") # Set the state k = set_active_params!(bstate.slack_x, bstate.λx, bstate.active_x, λactive, state.μ, 0) k = set_active_params!(bstate.slack_c, bstate.λc, bstate.active_c, λactive, state.μ, k) @@ -227,7 +313,7 @@ function initialize_μ_λ!(λx, λc, bounds::ConstraintBounds, x, g, c, J, β=1/ # Solve for μ λtilde = 1./Δb μden = dot(λtilde, JIg) - if μden == 0 + if μden == 0 && !isempty(Δb) μden = maximum(abs(λtilde).*abs(JIg))*length(Δb) end μ = β*dot(Pg, Pg)/abs(μden) @@ -243,9 +329,6 @@ function initialize_μ_λ!(λx, λc, bounds::ConstraintBounds, x, g, c, J, β=1/ end initialize_μ_λ!(λx, λc, constraints::AbstractConstraintsFunction, x, g, c, J, args...) = initialize_μ_λ!(λx, λc, constraints.bounds, x, g, c, J, args...) -# Fallbacks (for methods that don't need these) -after_while!(d, constraints::AbstractConstraintsFunction, state, method, options) = nothing -update_h!(d, constraints::AbstractConstraintsFunction, state, method) = nothing ## Computation of the Lagrangian and its gradient # This is in a parametrization that is also useful during linesearch @@ -296,13 +379,27 @@ end # for line searches that don't use the gradient along the line function lagrangian_linefunc(α, d, constraints, state) + _lagrangian_linefunc(α, d, constraints, state)[2] +end + +function _lagrangian_linefunc(α, d, constraints, state) b_ls = state.b_ls ls_update!(state.x_ls, state.x, state.s, α) ls_update!(b_ls.bstate, state.bstate, state.bstep, α) constraints.c!(state.x, b_ls.c) - lagrangian(d, constraints.bounds, state.x_ls, b_ls.c, b_ls.bstate, state.μ)[2] + lagrangian(d, constraints.bounds, state.x_ls, b_ls.c, b_ls.bstate, state.μ) end +function lagrangian_linefunc!(α, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}) + # For backtrack_constrained, the last evaluation is the one we + # keep, so it's safe to store the results in state + f_x, L = _lagrangian_linefunc(α, d, constraints, state) + state.f_x = f_x + state.L = L + L +end +lagrangian_linefunc!(α, d, constraints, state, method) = lagrangian_linefunc(α, d, constraints, state) + ## Computation of Lagrangian terms: barrier penalty """ barrier_value(constraints, state) -> val @@ -534,3 +631,8 @@ function estimate_maxstep(αmax, x, s) end αmax end + +function shrink_μ!(d, constraints, state, method, options) + state.μ *= options.μfactor + update_fg!(d, constraints, state, method) +end diff --git a/src/ipnewton.jl b/src/ipnewton.jl index 9a080dc25..2c19de36c 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -1,5 +1,5 @@ -immutable IPNewton <: IPOptimizer - linesearch!::Function +immutable IPNewton{F} <: IPOptimizer{F} + linesearch!::F end IPNewton(; linesearch!::Function = backtrack_constrained) = @@ -192,7 +192,7 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction view(state.s, bounds.iz).*bounds.σz) # Determine the actual distance of movement along the search line - ϕ = α->lagrangian_linefunc(α, d, constraints, state) + ϕ = α->lagrangian_linefunc!(α, d, constraints, state, method) state.alpha, f_update, g_update = method.linesearch!(ϕ, T(1), αmax, qp) state.f_calls, state.g_calls = state.f_calls + f_update, state.g_calls + g_update diff --git a/src/types.jl b/src/types.jl index 3ee044f7b..9c3f67247 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1,6 +1,9 @@ abstract Optimizer -abstract ConstrainedOptimizer <: Optimizer -abstract IPOptimizer <: ConstrainedOptimizer +abstract ConstrainedOptimizer{T} <: Optimizer +abstract IPOptimizer{T} <: ConstrainedOptimizer # interior point methods + +abstract AbstractOptimFunction + immutable OptimizationOptions{TCallback <: Union{Void, Function}} x_tol::Float64 f_tol::Float64 @@ -13,6 +16,7 @@ immutable OptimizationOptions{TCallback <: Union{Void, Function}} show_every::Int callback::TCallback time_limit::Float64 + μfactor::Float64 end function OptimizationOptions(; @@ -26,7 +30,8 @@ function OptimizationOptions(; autodiff::Bool = false, show_every::Integer = 1, callback = nothing, - time_limit = NaN) + time_limit = NaN, + μfactor = 0.1) show_every = show_every > 0 ? show_every: 1 if extended_trace && callback == nothing show_trace = true @@ -34,7 +39,7 @@ function OptimizationOptions(; OptimizationOptions{typeof(callback)}( Float64(x_tol), Float64(f_tol), Float64(g_tol), Int(iterations), store_trace, show_trace, extended_trace, autodiff, Int(show_every), - callback, time_limit) + callback, time_limit, μfactor) end function print_header(options::OptimizationOptions) @@ -47,6 +52,10 @@ function print_header(method::Optimizer) @printf "Iter Function value Gradient norm \n" end +function print_header(method::IPOptimizer) + @printf "Iter Lagrangian value Function value Gradient norm μ\n" +end + immutable OptimizationState{T <: Optimizer} iteration::Int value::Float64 @@ -92,17 +101,17 @@ type UnivariateOptimizationResults{T,M} <: OptimizationResults f_calls::Int end -immutable NonDifferentiableFunction +immutable NonDifferentiableFunction <: AbstractOptimFunction f::Function end -immutable DifferentiableFunction +immutable DifferentiableFunction <: AbstractOptimFunction f::Function g!::Function fg!::Function end -immutable TwiceDifferentiableFunction +immutable TwiceDifferentiableFunction <: AbstractOptimFunction f::Function g!::Function fg!::Function @@ -119,6 +128,18 @@ function Base.show(io::IO, t::OptimizationState) return end +function Base.show{M<:IPOptimizer}(io::IO, t::OptimizationState{M}) + md = t.metadata + @printf io "%6d %-14e %-14e %-14e %-6.2e\n" t.iteration md["Lagrangian"] t.value t.g_norm md["μ"] + if !isempty(t.metadata) + for (key, value) in md + key ∈ ("Lagrangian", "μ") && continue + @printf io " * %s: %s\n" key value + end + end + return +end + function Base.show(io::IO, tr::OptimizationTrace) @printf io "Iter Function value Gradient norm \n" @printf io "------ -------------- --------------\n" @@ -128,6 +149,15 @@ function Base.show(io::IO, tr::OptimizationTrace) return end +function Base.show{M<:IPOptimizer}(io::IO, tr::OptimizationTrace{M}) + @printf io "Iter Lagrangian value Function value Gradient norm μ\n" + @printf io "------ ---------------- -------------- -------------- --------\n" + for state in tr + show(io, state) + end + return +end + function Base.show(io::IO, r::MultivariateOptimizationResults) @printf io "Results of Optimization Algorithm\n" @printf io " * Algorithm: %s\n" method(r) diff --git a/src/utilities/trace.jl b/src/utilities/trace.jl index cda25b3b7..0cb27427f 100644 --- a/src/utilities/trace.jl +++ b/src/utilities/trace.jl @@ -114,3 +114,24 @@ function trace!(tr, state, iteration, method::NewtonTrustRegion, options) options.show_every, options.callback) end + +function trace!(tr, state, iteration, method::IPOptimizer, options) + dt = Dict() + dt["Lagrangian"] = state.L + dt["μ"] = state.μ + if options.extended_trace + dt["x"] = copy(state.x) + dt["g(x)"] = copy(state.g) + dt["h(x)"] = copy(state.H) + end + g_norm = vecnorm(state.g, Inf) + update!(tr, + iteration, + state.f_x, + g_norm, + dt, + options.store_trace, + options.show_trace, + options.show_every, + options.callback) +end diff --git a/test/runtests.jl b/test/runtests.jl index 7f11d94a1..973122069 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -27,6 +27,7 @@ my_tests = [ "brent.jl", "type_stability.jl", "array.jl", + "constraints.jl", "constrained.jl", "callbacks.jl", "precon.jl", From 11b047017c97344daaaa2e7aeb349fe23f0ce2ff Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Tue, 8 Nov 2016 09:13:52 -0600 Subject: [PATCH 09/40] Add BaseTestNext to test/REQUIRE --- test/REQUIRE | 1 + 1 file changed, 1 insertion(+) create mode 100644 test/REQUIRE diff --git a/test/REQUIRE b/test/REQUIRE new file mode 100644 index 000000000..94e516f56 --- /dev/null +++ b/test/REQUIRE @@ -0,0 +1 @@ +BaseTestNext From 2cf1421a3792d6479b0916b05d194ee06f08e074 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Thu, 10 Nov 2016 03:54:08 -0600 Subject: [PATCH 10/40] Fix state bugs in linesearch and initialization --- src/interior.jl | 2 +- src/ipnewton.jl | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/interior.jl b/src/interior.jl index a29be2319..5d6ec277f 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -386,7 +386,7 @@ function _lagrangian_linefunc(α, d, constraints, state) b_ls = state.b_ls ls_update!(state.x_ls, state.x, state.s, α) ls_update!(b_ls.bstate, state.bstate, state.bstep, α) - constraints.c!(state.x, b_ls.c) + constraints.c!(state.x_ls, b_ls.c) lagrangian(d, constraints.bounds, state.x_ls, b_ls.c, b_ls.bstate, state.μ) end diff --git a/src/ipnewton.jl b/src/ipnewton.jl index 2c19de36c..12f9ce4c8 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -87,6 +87,8 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct stepf) state.μ = initialize_μ_λ!(bstate.λxE, bstate.λcE, constraints, initial_x, g, constr_c, constr_J) + bstate.λx[:] = μ./bstate.slack_x + bstate.λc[:] = μ./bstate.slack_c update_fg!(d, constraints, state, method) update_h!(d, constraints, state, method) end From f4bb08a813aac3f6400e267f66e1b85388be03f0 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Fri, 11 Nov 2016 13:52:36 -0600 Subject: [PATCH 11/40] Add isfeasible and isinterior --- src/Optim.jl | 2 ++ src/interior.jl | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/src/Optim.jl b/src/Optim.jl index 1820bf9f1..1edc0b52e 100644 --- a/src/Optim.jl +++ b/src/Optim.jl @@ -17,6 +17,8 @@ module Optim Base.setindex! export optimize, + isfeasible, + isinterior, DifferentiableFunction, TwiceDifferentiableFunction, DifferentiableConstraintsFunction, diff --git a/src/interior.jl b/src/interior.jl index 5d6ec277f..67e1abc35 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -582,6 +582,75 @@ function equality_grad_λ!(gλ, v, target, idx) nothing end +""" + isfeasible(constraints, state) -> Bool + isfeasible(constraints, x, c) -> Bool + isfeasible(constraints, x) -> Bool + isfeasible(bounds, x, c) -> Bool + +Return `true` if point `x` is feasible, given the `constraints` which +specify bounds `lx`, `ux`, `lc`, and `uc`. `x` is feasible if + + lx[i] <= x[i] <= ux[i] + lc[i] <= c[i] <= uc[i] + +for all possible `i`. +""" +function isfeasible(bounds::ConstraintBounds, x, c) + isf = true + for (i,j) in enumerate(bounds.eqx) + isf &= x[j] == bounds.valx[i] + end + for (i,j) in enumerate(bounds.ineqx) + isf &= bounds.σx[i]*(x[j] - bounds.bx[i]) >= 0 + end + for (i,j) in enumerate(bounds.iz) + isf &= bounds.σz[i]*x[j] >= 0 + end + for (i,j) in enumerate(bounds.eqc) + isf &= c[j] == bounds.valc[i] + end + for (i,j) in enumerate(bounds.ineqc) + isf &= bounds.σc[i]*(c[j] - bounds.bc[i]) >= 0 + end + isf +end +isfeasible(constraints, state::AbstractBarrierState) = isfeasible(constraints, state.x, state.constraints_c) +isfeasible(constraints, x) = isfeasible(constraints, x, constraints.c!(x, Array{eltype(x)}(constraints.bounds.nc))) +isfeasible(constraints::AbstractConstraintsFunction, x, c) = isfeasible(constraints.bounds, x, c) + +""" + isinterior(constraints, state) -> Bool + isinterior(constraints, x, c) -> Bool + isinterior(constraints, x) -> Bool + isinterior(bounds, x, c) -> Bool + +Return `true` if point `x` is on the interior of the allowed region, +given the `constraints` which specify bounds `lx`, `ux`, `lc`, and +`uc`. `x` is in the interior if + + lx[i] < x[i] < ux[i] + lc[i] < c[i] < uc[i] + +for all possible `i`. +""" +function isinterior(bounds::ConstraintBounds, x, c) + isi = true + for (i,j) in enumerate(bounds.ineqx) + isi &= bounds.σx[i]*(x[j] - bounds.bx[i]) > 0 + end + for (i,j) in enumerate(bounds.iz) + isi &= bounds.σz[i]*x[j] > 0 + end + for (i,j) in enumerate(bounds.ineqc) + isi &= bounds.σc[i]*(c[j] - bounds.bc[i]) > 0 + end + isi +end +isinterior(constraints, state::AbstractBarrierState) = isinterior(constraints, state.x, state.constraints_c) +isinterior(constraints, x) = isinterior(constraints, x, constraints.c!(x, Array{eltype(x)}(constraints.bounds.nc))) +isinterior(constraints::AbstractConstraintsFunction, x, c) = isinterior(constraints.bounds, x, c) + ## Utilities for representing total state as single vector function pack_vec(x, b::BarrierStateVars) n = length(x) From 4f5539991e3e0a7da6cc0c5a3e7d20b2472360b7 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sat, 12 Nov 2016 07:32:29 -0600 Subject: [PATCH 12/40] Add more utilities and record more variables with extended_trace --- src/Optim.jl | 1 + src/interior.jl | 19 +++++++++++++++++++ src/utilities/trace.jl | 3 +++ 3 files changed, 23 insertions(+) diff --git a/src/Optim.jl b/src/Optim.jl index 1edc0b52e..130ac822f 100644 --- a/src/Optim.jl +++ b/src/Optim.jl @@ -34,6 +34,7 @@ module Optim Fminbox, GoldenSection, GradientDescent, + IPNewton, LBFGS, MomentumGradientDescent, NelderMead, diff --git a/src/interior.jl b/src/interior.jl index 67e1abc35..01e070f87 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -61,6 +61,17 @@ Base.similar(bstate::BarrierStateVars) = similar(bstate.λc), similar(bstate.λcE)) +Base.copy(bstate::BarrierStateVars) = + BarrierStateVars(copy(bstate.slack_x), + copy(bstate.slack_c), + copy(bstate.active_x), + copy(bstate.active_c), + copy(bstate.λxE), + copy(bstate.λx), + copy(bstate.λc), + copy(bstate.λcE)) + + function Base.fill!(b::BarrierStateVars, val) fill!(b.slack_x, val) fill!(b.slack_c, val) @@ -96,6 +107,14 @@ const bsv_seed = sizeof(UInt) == 64 ? 0x145b788192d1cde3 : 0x766a2810 Base.hash(b::BarrierStateVars, u::UInt) = hash(b.λcE, hash(b.λc, hash(b.λx, hash(b.λxE, hash(b.slack_c, hash(b.slack_x, u+bsv_seed)))))) +function Base.dot(v::BarrierStateVars, w::BarrierStateVars) + dot(v.slack_x,w.slack_x) + + dot(v.slack_c, w.slack_c) + + dot(v.λxE, w.λxE) + + dot(v.λx, w.λx) + + dot(v.λc, w.λc) + + dot(v.λcE, w.λcE) +end """ BarrierLineSearch{T} diff --git a/src/utilities/trace.jl b/src/utilities/trace.jl index 0cb27427f..90a3b27e9 100644 --- a/src/utilities/trace.jl +++ b/src/utilities/trace.jl @@ -123,6 +123,9 @@ function trace!(tr, state, iteration, method::IPOptimizer, options) dt["x"] = copy(state.x) dt["g(x)"] = copy(state.g) dt["h(x)"] = copy(state.H) + dt["bstate"] = copy(state.bstate) + dt["bgrad"] = copy(state.bgrad) + dt["c"] = copy(state.constr_c) end g_norm = vecnorm(state.g, Inf) update!(tr, From 0e8477798a685a76e1512f5505abd92eeae0071b Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sat, 12 Nov 2016 11:15:57 -0600 Subject: [PATCH 13/40] =?UTF-8?q?Adopt=20exact=20updating=20of=20slack=20t?= =?UTF-8?q?erms=20and=20=CE=BBI=20during=20linesearch?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/interior.jl | 50 +++++++++++++++++++++++++++++++++------------ src/ipnewton.jl | 14 +++++++------ test/constraints.jl | 2 +- 3 files changed, 46 insertions(+), 20 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index 01e070f87..c2b3df696 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -138,13 +138,38 @@ immutable BarrierLineSearchGrad{T} bgrad::BarrierStateVars{T} # trial point's gradient end -function ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, α) - ls_update!(out.slack_x, base.slack_x, step.slack_x, α) - ls_update!(out.slack_c, base.slack_c, step.slack_c, α) +function ls_update!(out::BarrierStateVars, c, base::BarrierStateVars, step::BarrierStateVars, α, constraints, state, dslackc) + bounds = constraints.bounds + constraints.c!(state.x_ls, c) + xtarget = bounds.σx.*(state.x_ls[bounds.ineqx] - bounds.bx) + dslackx = bounds.σx.*state.s[bounds.ineqx] + ctarget = bounds.σc.*(c[bounds.ineqc] - bounds.bc) + ls_update!(out, base, step, α, state.μ, xtarget, dslackx, ctarget, dslackc) +end + +function ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, α, μ, xtarget, dslackx, ctarget, dslackc) ls_update!(out.λxE, base.λxE, step.λxE, α) - ls_update!(out.λx, base.λx, step.λx, α) - ls_update!(out.λc, base.λc, step.λc, α) ls_update!(out.λcE, base.λcE, step.λcE, α) + # For the inequality terms, we use "exact" updating + _lsu_slack!(out.slack_x, xtarget, base.slack_x, dslackx, α) + _lsu_slack!(out.slack_c, ctarget, base.slack_c, dslackc, α) + _lsu_λ!(out.λx, out.slack_x, μ) + _lsu_λ!(out.λc, out.slack_c, μ) + out +end +function _lsu_slack!(out, target, slack, dslack, α) + for i = 1:length(out) + t = target[i] + # This handles the possible loss of precision at the boundary + # by using the gradient to extrapolate the change + out[i] = t != 0 ? t : slack[i]+α*dslack[i] + end + out +end +function _lsu_λ!(out, slack, μ) + for i = 1:length(out) + out[i] = μ/slack[i] + end out end @@ -397,22 +422,21 @@ function lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds::ConstraintBounds, x end # for line searches that don't use the gradient along the line -function lagrangian_linefunc(α, d, constraints, state) - _lagrangian_linefunc(α, d, constraints, state)[2] +function lagrangian_linefunc(α, d, constraints, state, dslackc) + _lagrangian_linefunc(α, d, constraints, state, dslackc)[2] end -function _lagrangian_linefunc(α, d, constraints, state) - b_ls = state.b_ls +function _lagrangian_linefunc(α, d, constraints, state, dslackc) + b_ls, bounds = state.b_ls, constraints.bounds ls_update!(state.x_ls, state.x, state.s, α) - ls_update!(b_ls.bstate, state.bstate, state.bstep, α) - constraints.c!(state.x_ls, b_ls.c) + ls_update!(b_ls.bstate, b_ls.c, state.bstate, state.bstep, α, constraints, state, dslackc) lagrangian(d, constraints.bounds, state.x_ls, b_ls.c, b_ls.bstate, state.μ) end -function lagrangian_linefunc!(α, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}) +function lagrangian_linefunc!(α, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}, dslackc) # For backtrack_constrained, the last evaluation is the one we # keep, so it's safe to store the results in state - f_x, L = _lagrangian_linefunc(α, d, constraints, state) + f_x, L = _lagrangian_linefunc(α, d, constraints, state, dslackc) state.f_x = f_x state.L = L L diff --git a/src/ipnewton.jl b/src/ipnewton.jl index 12f9ce4c8..fe602afe6 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -173,7 +173,7 @@ end function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction, state::IPNewtonState{T}, method::IPNewton) bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds - solve_step!(state, constraints) + state, dslackc = solve_step!(state, constraints) # If a step α=1 will not change any of the parameters, we can quit now. # This prevents a futile linesearch. if is_smaller_eps(state.x, state.s) && @@ -194,7 +194,7 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction view(state.s, bounds.iz).*bounds.σz) # Determine the actual distance of movement along the search line - ϕ = α->lagrangian_linefunc!(α, d, constraints, state, method) + ϕ = α->lagrangian_linefunc!(α, d, constraints, state, method, dslackc) state.alpha, f_update, g_update = method.linesearch!(ϕ, T(1), αmax, qp) state.f_calls, state.g_calls = state.f_calls + f_update, state.g_calls + g_update @@ -204,10 +204,10 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction # Update current position # x = x + alpha * s ls_update!(state.x, state.x, state.s, state.alpha) - ls_update!(bstate, bstate, bstep, state.alpha) + ls_update!(bstate, state.constr_c, bstate, bstep, state.alpha, constraints, state, dslackc) # Evaluate the constraints at the new position - constraints.c!(state.x, state.constr_c) +# constraints.c!(state.x, state.constr_c) # already done in ls_update! constraints.jacobian!(state.x, state.constr_J) # Test for active inequalities, solve immediately for the corresponding s and λ @@ -226,17 +226,19 @@ function solve_step!(state::IPNewtonState, constraints) k = unpack_vec!(bstep.λcE, step, k) k == length(step) || error("exhausted targets before step") # Solve for the slack variable and λI updates + # These are only used to estimate αmax, otherwise these are updated by exact formulas for (i, j) in enumerate(bounds.ineqx) bstep.slack_x[i] = -bgrad.λx[i] + bounds.σx[i]*s[j] bstep.λx[i] = -bgrad.slack_x[i] - μ*bstep.slack_x[i]/bstate.slack_x[i]^2 end JI = view5(state.constr_J, bounds.ineqc, :) - bstep.slack_c[:] = -bgrad.λc + Diagonal(bounds.σc)*JI*s + dslackc = Diagonal(bounds.σc)*JI*s + bstep.slack_c[:] = -bgrad.λc + dslackc for i = 1:length(bstep.λc) bstep.λc[i] = -bgrad.slack_c[i] - μ*bstep.slack_c[i]/bstate.slack_c[i]^2 end state.stepf = step - state + state, dslackc end function is_smaller_eps(ref, step) diff --git a/test/constraints.jl b/test/constraints.jl index 0509b1f9e..6f2296953 100644 --- a/test/constraints.jl +++ b/test/constraints.jl @@ -299,7 +299,7 @@ ConstraintBounds: bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds αmax = Optim.estimate_maxstep(Inf, state.x[bounds.iz].*bounds.σz, state.s[bounds.iz].*bounds.σz) - ϕ = α->Optim.lagrangian_linefunc(α, d, constraints, state) + ϕ = α->Optim.lagrangian_linefunc(α, d, constraints, state, Float64[]) @test ϕ(0) ≈ qp[1] α, nf, ng = method.linesearch!(ϕ, 1.0, αmax, qp) @test α > 1e-3 From b4683bee4315880a15e2aa8f1e0e018030b1d02e Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sat, 12 Nov 2016 11:16:49 -0600 Subject: [PATCH 14/40] Update f_x_previous; use safer inversion --- src/ipnewton.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/ipnewton.jl b/src/ipnewton.jl index fe602afe6..7289e605f 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -172,6 +172,7 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state end function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction, state::IPNewtonState{T}, method::IPNewton) + state.f_x_previous = state.f_x bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds state, dslackc = solve_step!(state, constraints) # If a step α=1 will not change any of the parameters, we can quit now. @@ -218,7 +219,12 @@ end function solve_step!(state::IPNewtonState, constraints) # Solve the Newton step - step = -(state.Hf\state.gf) # do *not* force posdef + local step + try + step = -(state.Hf\state.gf) # do *not* force posdef + catch + step = -(svdfact(state.Hf)\state.gf) + end x, s, μ, bounds = state.x, state.s, state.μ, constraints.bounds bstate, bstep, bgrad = state.bstate, state.bstep, state.bgrad k = unpack_vec!(s, step, 0) From cada26427457fdd1c3e6d19902482dc4bfa6b579 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sat, 12 Nov 2016 11:16:07 -0600 Subject: [PATCH 15/40] Skip solve_active_inequalities; it shouldn't be necessary now. But we can't expect perfect slack precision. --- src/ipnewton.jl | 2 +- test/constraints.jl | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ipnewton.jl b/src/ipnewton.jl index 7289e605f..597f5f419 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -212,7 +212,7 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction constraints.jacobian!(state.x, state.constr_J) # Test for active inequalities, solve immediately for the corresponding s and λ - solve_active_inequalities!(d, constraints, state) + # solve_active_inequalities!(d, constraints, state) false end diff --git a/test/constraints.jl b/test/constraints.jl index 6f2296953..019cc9bdb 100644 --- a/test/constraints.jl +++ b/test/constraints.jl @@ -342,7 +342,7 @@ ConstraintBounds: Optim.update_h!(d, constraints, state, method) end @test state.x[1] == σ - @test state.bstate.slack_x[1] ≈ μ/abs(F) + @test state.bstate.slack_x[1] < eps(float(σ)) # x >= 1 using the linear/nonlinear constraints d = TwiceDifferentiableFunction(x->F*(x[1]-σ), (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0)) constraints = TwiceDifferentiableConstraintsFunction( @@ -358,7 +358,7 @@ ConstraintBounds: Optim.update_h!(d, constraints, state, method) end @test state.x[1] == σ - @test state.bstate.slack_c[1] ≈ μ/abs(F) + @test state.bstate.slack_c[1] < eps(float(σ)) end end end From 16d7ac06bc3c9245d2fc794a1fdc7318b9e504c0 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sun, 13 Nov 2016 05:16:14 -0600 Subject: [PATCH 16/40] Restrict one-sided function-value convergence to monotonic methods Also require the function-value criterion to be satisfied on successive iterations, to ensure that constrained optimization doesn't satisfy it spuriously --- src/interior.jl | 11 ++++++++++- src/optimize.jl | 22 ++++++++++++++++------ src/types.jl | 8 +++++--- src/utilities/assess_convergence.jl | 11 +++++++++-- 4 files changed, 40 insertions(+), 12 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index c2b3df696..0bc0423a0 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -182,7 +182,7 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai tracing = options.store_trace || options.show_trace || options.extended_trace || options.callback != nothing stopped, stopped_by_callback, stopped_by_time_limit = false, false, false - x_converged, f_converged = false, false + x_converged, f_converged, counter_f_tol = false, false, 0 g_converged = vecnorm(state.g, Inf) < options.g_tol converged = g_converged @@ -199,6 +199,15 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai update_asneeded_fg!(d, constraints, state, method) x_converged, f_converged, g_converged, converged = assess_convergence(state, options) + # With equality constraints, optimization is not necessarily + # monotonic in the value of the function. If the function + # change is approximately canceled by a change in the equality + # violation, it's possible to spuriously satisfy the f_tol + # criterion. Consequently, we require that the f_tol condition + # be satisfied a certain number of times in a row before + # declaring convergence. + counter_f_tol = f_converged ? counter_f_tol+1 : 0 + converged = x_converged | g_converged | (counter_f_tol > options.successive_f_tol) # If tracing, update trace with trace!. If a callback is provided, it # should have boolean return value that controls the variable stopped_by_callback. diff --git a/src/optimize.jl b/src/optimize.jl index 0220166ad..7283b331b 100644 --- a/src/optimize.jl +++ b/src/optimize.jl @@ -9,6 +9,7 @@ function optimize(f::Function, x_tol::Real = 1e-32, f_tol::Real = 1e-32, g_tol::Real = 1e-8, + successive_f_tol::Integer = 2, iterations::Integer = 1_000, store_trace::Bool = false, show_trace::Bool = false, @@ -17,7 +18,7 @@ function optimize(f::Function, autodiff::Bool = false, callback = nothing) options = OptimizationOptions(; - x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, + x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, successive_f_tol = successive_f_tol, iterations = iterations, store_trace = store_trace, show_trace = show_trace, extended_trace = extended_trace, callback = callback, show_every = show_every, @@ -32,6 +33,7 @@ function optimize(f::Function, x_tol::Real = 1e-32, f_tol::Real = 1e-32, g_tol::Real = 1e-8, + successive_f_tol::Integer = 2, iterations::Integer = 1_000, store_trace::Bool = false, show_trace::Bool = false, @@ -39,7 +41,7 @@ function optimize(f::Function, show_every::Integer = 1, callback = nothing) options = OptimizationOptions(; - x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, + x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, successive_f_tol = successive_f_tol, iterations = iterations, store_trace = store_trace, show_trace = show_trace, extended_trace = extended_trace, callback = callback, show_every = show_every) @@ -54,6 +56,7 @@ function optimize(f::Function, x_tol::Real = 1e-32, f_tol::Real = 1e-32, g_tol::Real = 1e-8, + successive_f_tol::Integer = 2, iterations::Integer = 1_000, store_trace::Bool = false, show_trace::Bool = false, @@ -61,7 +64,7 @@ function optimize(f::Function, show_every::Integer = 1, callback = nothing) options = OptimizationOptions(; - x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, + x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, successive_f_tol = successive_f_tol, iterations = iterations, store_trace = store_trace, show_trace = show_trace, extended_trace = extended_trace, callback = callback, show_every = show_every) @@ -74,6 +77,7 @@ function optimize(d::DifferentiableFunction, x_tol::Real = 1e-32, f_tol::Real = 1e-32, g_tol::Real = 1e-8, + successive_f_tol::Integer = 2, iterations::Integer = 1_000, store_trace::Bool = false, show_trace::Bool = false, @@ -81,7 +85,7 @@ function optimize(d::DifferentiableFunction, show_every::Integer = 1, callback = nothing) options = OptimizationOptions(; - x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, + x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, successive_f_tol = successive_f_tol, iterations = iterations, store_trace = store_trace, show_trace = show_trace, extended_trace = extended_trace, callback = callback, show_every = show_every) @@ -94,6 +98,7 @@ function optimize(d::TwiceDifferentiableFunction, x_tol::Real = 1e-32, f_tol::Real = 1e-32, g_tol::Real = 1e-8, + successive_f_tol::Integer = 2, iterations::Integer = 1_000, store_trace::Bool = false, show_trace::Bool = false, @@ -101,7 +106,7 @@ function optimize(d::TwiceDifferentiableFunction, show_every::Integer = 1, callback = nothing) options = OptimizationOptions(; - x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, + x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, successive_f_tol = successive_f_tol, iterations = iterations, store_trace = store_trace, show_trace = show_trace, extended_trace = extended_trace, callback = callback, show_every = show_every) @@ -220,7 +225,7 @@ function optimize{T, M<:Optimizer}(d, initial_x::Array{T}, method::M, options::O tracing = options.store_trace || options.show_trace || options.extended_trace || options.callback != nothing stopped, stopped_by_callback, stopped_by_time_limit = false, false, false - x_converged, f_converged = false, false + x_converged, f_converged, counter_f_tol = false, false, 0 g_converged = if typeof(method) <: NelderMead nmobjective(state.f_simplex, state.m, state.n) < options.g_tol elseif typeof(method) <: ParticleSwarm || typeof(method) <: SimulatedAnnealing @@ -242,6 +247,11 @@ function optimize{T, M<:Optimizer}(d, initial_x::Array{T}, method::M, options::O update_g!(d, state, method) x_converged, f_converged, g_converged, converged = assess_convergence(state, options) + # See optimize in interior.jl for an explanation of the next + # two lines (given the existence of the option, we'd better + # use it here too) + counter_f_tol = f_converged ? counter_f_tol+1 : 0 + converged = x_converged | g_converged | (counter_f_tol > options.successive_f_tol) # We don't use the Hessian for anything if we have declared convergence, # so we might as well not make the (expensive) update if converged == true !converged && update_h!(d, state, method) diff --git a/src/types.jl b/src/types.jl index 9c3f67247..d348e7211 100644 --- a/src/types.jl +++ b/src/types.jl @@ -8,6 +8,7 @@ immutable OptimizationOptions{TCallback <: Union{Void, Function}} x_tol::Float64 f_tol::Float64 g_tol::Float64 + successive_f_tol::Int iterations::Int store_trace::Bool show_trace::Bool @@ -23,6 +24,7 @@ function OptimizationOptions(; x_tol::Real = 1e-32, f_tol::Real = 1e-32, g_tol::Real = 1e-8, + successive_f_tol::Integer = 2, iterations::Integer = 1_000, store_trace::Bool = false, show_trace::Bool = false, @@ -37,9 +39,9 @@ function OptimizationOptions(; show_trace = true end OptimizationOptions{typeof(callback)}( - Float64(x_tol), Float64(f_tol), Float64(g_tol), Int(iterations), - store_trace, show_trace, extended_trace, autodiff, Int(show_every), - callback, time_limit, μfactor) + Float64(x_tol), Float64(f_tol), Float64(g_tol), Int(successive_f_tol), + Int(iterations), store_trace, show_trace, extended_trace, autodiff, + Int(show_every), callback, time_limit, μfactor) end function print_header(options::OptimizationOptions) diff --git a/src/utilities/assess_convergence.jl b/src/utilities/assess_convergence.jl index e2b284f6f..6e72fa820 100644 --- a/src/utilities/assess_convergence.jl +++ b/src/utilities/assess_convergence.jl @@ -15,7 +15,7 @@ function assess_convergence(x::Array, # Absolute Tolerance # if abs(f_x - f_x_previous) < f_tol # Relative Tolerance - if abs(f_x - f_x_previous) / (abs(f_x) + f_tol) < f_tol || nextfloat(f_x) >= f_x_previous + if abs(f_x - f_x_previous) < min(f_tol * (abs(f_x) + f_tol), eps(abs(f_x)+abs(f_x_previous))) f_converged = true end @@ -39,7 +39,7 @@ function assess_convergence(state, options) # Absolute Tolerance # if abs(f_x - f_x_previous) < f_tol # Relative Tolerance - if abs(state.f_x - state.f_x_previous) / (abs(state.f_x) + options.f_tol) < options.f_tol || nextfloat(state.f_x) >= state.f_x_previous + if abs(state.f_x - state.f_x_previous) < min(options.f_tol * (abs(state.f_x) + options.f_tol), eps(abs(state.f_x)+abs(state.f_x_previous))) || fconverged(state) f_converged = true end @@ -79,6 +79,13 @@ function assess_convergence(state::NewtonTrustRegionState, options) options.x_tol, options.f_tol, options.g_tol) + f_converged = fconverged(state) + converged |= f_converged end x_converged, f_converged, g_converged, converged end + +# For monotonic-decreasing problems +fconverged(state) = nextfloat(state.f_x) >= state.f_x_previous +# Constrained problems are not monotonic, so we can't add a one-sided criterion +fconverged(state::IPNewtonState) = false From 2eb3e636d25de5f943230a0b664be52b054e7ef9 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Mon, 14 Nov 2016 16:41:17 -0600 Subject: [PATCH 17/40] More robust isinterior/isfeasible --- src/interior.jl | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index 0bc0423a0..4a1e75c54 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -668,7 +668,12 @@ function isfeasible(bounds::ConstraintBounds, x, c) isf end isfeasible(constraints, state::AbstractBarrierState) = isfeasible(constraints, state.x, state.constraints_c) -isfeasible(constraints, x) = isfeasible(constraints, x, constraints.c!(x, Array{eltype(x)}(constraints.bounds.nc))) +function isfeasible(constraints, x) + # don't assume c! returns c (which means this is a little more awkward) + c = Array{eltype(x)}(constraints.bounds.nc) + constraints.c!(x, c) + isfeasible(constraints, x, c) +end isfeasible(constraints::AbstractConstraintsFunction, x, c) = isfeasible(constraints.bounds, x, c) """ @@ -700,7 +705,11 @@ function isinterior(bounds::ConstraintBounds, x, c) isi end isinterior(constraints, state::AbstractBarrierState) = isinterior(constraints, state.x, state.constraints_c) -isinterior(constraints, x) = isinterior(constraints, x, constraints.c!(x, Array{eltype(x)}(constraints.bounds.nc))) +function isinterior(constraints, x) + c = Array{eltype(x)}(constraints.bounds.nc) + constraints.c!(x, c) + isinterior(constraints, x, c) +end isinterior(constraints::AbstractConstraintsFunction, x, c) = isinterior(constraints.bounds, x, c) ## Utilities for representing total state as single vector From b5366380448f104f245971b6ca3186ad5083eac5 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Mon, 14 Nov 2016 19:34:49 -0600 Subject: [PATCH 18/40] Check finiteness in linesearch and eliminate eps component --- src/iplinesearch.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/iplinesearch.jl b/src/iplinesearch.jl index c0343b5b1..d37d576ce 100644 --- a/src/iplinesearch.jl +++ b/src/iplinesearch.jl @@ -1,15 +1,15 @@ function backtrack_constrained(ϕ, α, αmax, Lcoefsα, - c1 = 0.5, ρ=oftype(α, 0.5), itermax = 100) + c1 = 0.5, ρ=oftype(α, 0.5), αmin = sqrt(eps(one(α)))) α = min(α, 0.999*αmax) L0, L1, L2 = Lcoefsα f_calls = 0 - while f_calls < itermax + while α >= αmin f_calls += 1 val = ϕ(α) - if abs(val - (L0 + L1*α + L2*α^2/2)) <= c1*abs(val-L0) + 100*eps(abs(val)+abs(L0)) + if isfinite(val) && abs(val - (L0 + L1*α + L2*α^2/2)) <= c1*abs(val-L0) return α, f_calls, 0 end α *= ρ end - error("failed to satisfy criterion after $f_calls iterations") + return zero(α), f_calls, 0 end From c3ca54b5356f6ba4f0f92c9029ed2c37975a7705 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Mon, 14 Nov 2016 19:36:35 -0600 Subject: [PATCH 19/40] Allow mu decrement based on sufficient gradient decrease or lack of progress on f --- src/interior.jl | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index 4a1e75c54..76a332a41 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -116,6 +116,12 @@ function Base.dot(v::BarrierStateVars, w::BarrierStateVars) dot(v.λcE, w.λcE) end +function Base.vecnorm(b::BarrierStateVars, p::Real) + vecnorm(b.slack_x, p) + vecnorm(b.slack_c, p) + + vecnorm(b.λx, p) + vecnorm(b.λc, p) + + vecnorm(b.λxE, p) + vecnorm(b.λcE, p) +end + """ BarrierLineSearch{T} @@ -183,7 +189,8 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai stopped, stopped_by_callback, stopped_by_time_limit = false, false, false x_converged, f_converged, counter_f_tol = false, false, 0 - g_converged = vecnorm(state.g, Inf) < options.g_tol + gnorm = vecnorm(state.g, Inf) + vecnorm(state.bgrad, Inf) + g_converged = gnorm < options.g_tol converged = g_converged iteration, iterationμ = 0, 0 @@ -191,6 +198,8 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai options.show_trace && print_header(method) trace!(tr, state, iteration, method, options) + Δfmax = zero(state.f_x) + while !converged && !stopped && iteration < options.iterations iteration += 1 iterationμ += 1 @@ -208,6 +217,7 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai # declaring convergence. counter_f_tol = f_converged ? counter_f_tol+1 : 0 converged = x_converged | g_converged | (counter_f_tol > options.successive_f_tol) + gnormnew = vecnorm(state.g, Inf) + vecnorm(state.bgrad, Inf) # If tracing, update trace with trace!. If a callback is provided, it # should have boolean return value that controls the variable stopped_by_callback. @@ -216,15 +226,21 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai stopped_by_callback = trace!(tr, state, iteration, method, options) end + Δf = abs(state.f_x - state.f_x_previous) + Δfmax = max(Δfmax, abs(state.f_x - state.f_x_previous)) + # Test whether we need to decrease the barrier penalty - if converged - if iterationμ > 1 - # We did real work, so it's worth decreasing the barrier penalty further - shrink_μ!(d, constraints, state, method, options) - iterationμ = 0 - converged = false - end + if iterationμ > 1 && (converged || 100*gnormnew < gnorm || 100*Δf < Δfmax) + # Since iterationμ > 1 we must have accomplished real + # work, so it's worth trying to decrease the barrier + # penalty further. + shrink_μ!(d, constraints, state, method, options) + iterationμ = 0 + converged = false + gnormnew = oftype(gnormnew, NaN) + Δfmax = zero(Δfmax) end + gnorm = gnormnew # We don't use the Hessian for anything if we have declared convergence, # so we might as well not make the (expensive) update if converged == true From c374b8101dda8ba4a4166d2dc49e7f2aa77893ab Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 16 Nov 2016 06:33:15 -0600 Subject: [PATCH 20/40] Trace alpha too --- src/utilities/trace.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utilities/trace.jl b/src/utilities/trace.jl index 90a3b27e9..3c11f1629 100644 --- a/src/utilities/trace.jl +++ b/src/utilities/trace.jl @@ -120,6 +120,7 @@ function trace!(tr, state, iteration, method::IPOptimizer, options) dt["Lagrangian"] = state.L dt["μ"] = state.μ if options.extended_trace + dt["α"] = state.alpha dt["x"] = copy(state.x) dt["g(x)"] = copy(state.g) dt["h(x)"] = copy(state.H) From 5c0241c9b1f000f15d55f023e687671c60f9fda7 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Mon, 14 Nov 2016 19:38:19 -0600 Subject: [PATCH 21/40] Support manually-supplied mu0 and fix a bug in initialization --- src/interior.jl | 19 ++++++++++++------- src/ipnewton.jl | 6 +++--- src/types.jl | 6 ++++-- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index 76a332a41..ecf4d15ca 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -338,7 +338,7 @@ function set_active_params!(slack, λ, active, λtarget, μ, k) end """ - initialize_μ_λE!(λxE, λcE, constraints, x, g, constr_c, constr_J, β=0.01) -> μ + initialize_μ_λE!(λxE, λcE, constraints, x, g, constr_c, constr_J, μ0=:auto, β=0.01) -> μ Pick μ and λ to ensure that the equality constraints are satisfied locally, and that the initial gradient including the barrier would be @@ -356,9 +356,9 @@ constraints evaluated at `x`. `β` (optional) specifies the fraction of the objective's gradient that may be diminished by the barrier. In addition to setting `λxE` and `λcE`, this returns `μ`, the value of -the barrier penalty. +the barrier penalty. You can manually specify μ by supplying μ0. """ -function initialize_μ_λ!(λx, λc, bounds::ConstraintBounds, x, g, c, J, β=1//100) +function initialize_μ_λ!(λx, λc, bounds::ConstraintBounds, x, g, c, J, μ0, β=1//100) length(c) + length(bounds.iz) + length(bounds.ineqx) == 0 && return zero(eltype(x)) # Calculate the projection matrix JEx = zeros(eltype(J), length(bounds.eqx), length(x)) @@ -371,7 +371,6 @@ function initialize_μ_λ!(λx, λc, bounds::ConstraintBounds, x, g, c, J, β=1/ CEc = cholfact(Positive, CE) Pg = g - JE'*(CEc \ (JE*g)) # the projected gradient of the objective (orthog to all == constr.) # Calculate the barrier deviation and projection onto inequality normals - Δb = [x[bounds.iz]; x[bounds.ineqx] - bounds.bx; c[bounds.ineqc] - bounds.bc] JIx = zeros(eltype(J), length(bounds.iz)+length(bounds.ineqx), length(x)) for (i,j) in enumerate([bounds.iz; bounds.ineqx]) JIx[i,j] = 1 @@ -380,15 +379,21 @@ function initialize_μ_λ!(λx, λc, bounds::ConstraintBounds, x, g, c, J, β=1/ JI = vcat(JIx, JIc) JIg = JI*Pg # Solve for μ - λtilde = 1./Δb - μden = dot(λtilde, JIg) + # Δb = [bounds.σz.*x[bounds.iz]; bounds.σx.*(x[bounds.ineqx] - bounds.bx); bounds.σc.*(c[bounds.ineqc] - bounds.bc)] + Δb = [x[bounds.iz]; x[bounds.ineqx] - bounds.bx; c[bounds.ineqc] - bounds.bc] + σ = [bounds.σz; bounds.σx; bounds.σc] + λtilde = σ./Δb + μden = dot(σ.*λtilde, JIg) if μden == 0 && !isempty(Δb) μden = maximum(abs(λtilde).*abs(JIg))*length(Δb) end μ = β*dot(Pg, Pg)/abs(μden) μ = μden != 0 ? μ : oftype(μ, 1) + if μ0 != :auto + μ = μ0 + end # Solve for λE - gb = g - μ*(JI'*λtilde) + gb = g - μ*(JI'*(σ.*λtilde)) Pgb = gb - JE'*(CEc \ (JE*gb)) λE = CEc \ (JE*Pgb) k = unpack_vec!(λx, λE, 0) diff --git a/src/ipnewton.jl b/src/ipnewton.jl index 597f5f419..e75223762 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -86,9 +86,9 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct Hf, stepf) - state.μ = initialize_μ_λ!(bstate.λxE, bstate.λcE, constraints, initial_x, g, constr_c, constr_J) - bstate.λx[:] = μ./bstate.slack_x - bstate.λc[:] = μ./bstate.slack_c + state.μ = initialize_μ_λ!(bstate.λxE, bstate.λcE, constraints, initial_x, g, constr_c, constr_J, options.μ0) + bstate.λx[:] = state.μ./bstate.slack_x + bstate.λc[:] = state.μ./bstate.slack_c update_fg!(d, constraints, state, method) update_h!(d, constraints, state, method) end diff --git a/src/types.jl b/src/types.jl index d348e7211..be5949460 100644 --- a/src/types.jl +++ b/src/types.jl @@ -18,6 +18,7 @@ immutable OptimizationOptions{TCallback <: Union{Void, Function}} callback::TCallback time_limit::Float64 μfactor::Float64 + μ0 end function OptimizationOptions(; @@ -33,7 +34,8 @@ function OptimizationOptions(; show_every::Integer = 1, callback = nothing, time_limit = NaN, - μfactor = 0.1) + μfactor = 0.1, + μ0 = :auto) show_every = show_every > 0 ? show_every: 1 if extended_trace && callback == nothing show_trace = true @@ -41,7 +43,7 @@ function OptimizationOptions(; OptimizationOptions{typeof(callback)}( Float64(x_tol), Float64(f_tol), Float64(g_tol), Int(successive_f_tol), Int(iterations), store_trace, show_trace, extended_trace, autodiff, - Int(show_every), callback, time_limit, μfactor) + Int(show_every), callback, time_limit, μfactor, μ0) end function print_header(options::OptimizationOptions) From 61d68a13ad177be5bff57d0afb68a8c967af1de1 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 16 Nov 2016 06:33:03 -0600 Subject: [PATCH 22/40] =?UTF-8?q?Improve=20initialization=20of=20=CE=BC,?= =?UTF-8?q?=20=CE=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This corrects some errors in the handling of equality constraints, and also ensure that: - we approximately match the gradient (don't just make a descent direction) - the hessian (when available) is also relatively unperturbed along the direction of the initial gradient. --- src/interior.jl | 241 +++++++++++++++++++++++++++++++++----------- src/ipnewton.jl | 35 +++---- src/types.jl | 29 +++++- test/constraints.jl | 83 ++++++++++++++- 4 files changed, 308 insertions(+), 80 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index ecf4d15ca..cd1e56b8b 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -51,6 +51,12 @@ function setslack!(slack, active, v, ineq, σ, b) slack end +slack(bstate::BarrierStateVars) = [bstate.slack_x; bstate.slack_c] +lambdaI(bstate::BarrierStateVars) = [bstate.λx; bstate.λc] +lambdaE(bstate::BarrierStateVars) = [bstate.λxE; bstate.λcE] +lambdaI(state::AbstractBarrierState) = lambdaI(state.bstate) +lambdaE(state::AbstractBarrierState) = lambdaE(state.bstate) + Base.similar(bstate::BarrierStateVars) = BarrierStateVars(similar(bstate.slack_x), similar(bstate.slack_c), @@ -338,71 +344,182 @@ function set_active_params!(slack, λ, active, λtarget, μ, k) end """ - initialize_μ_λE!(λxE, λcE, constraints, x, g, constr_c, constr_J, μ0=:auto, β=0.01) -> μ + initialize_μ_λ!(state, bounds, μ0=:auto, β=0.01) + initialize_μ_λ!(state, bounds, (Hobj,HcI), μ0=:auto, β=0.01) Pick μ and λ to ensure that the equality constraints are satisfied -locally, and that the initial gradient including the barrier would be -a descent direction for the problem without the barrier (μ = 0). This -ensures that the search isn't pushed out of the basin of the -user-supplied initial guess. - -`λv` and `λc` are the Lagrange multipliers for the variables and extra -(non-variable) constraints; these are pre-allocated storage for the -output, and their input values are not used. `constraints` is an -`AbstractConstraintsFunction`, `x` is the position (must be a feasible -interior point), `g` is the gradient of the objective at `x`, and -`constr_c` and `constr_J` contain the values and Jacobian of the extra -constraints evaluated at `x`. `β` (optional) specifies the fraction of -the objective's gradient that may be diminished by the barrier. - -In addition to setting `λxE` and `λcE`, this returns `μ`, the value of -the barrier penalty. You can manually specify μ by supplying μ0. +locally (at the current `state.x`), and that the initial gradient +including the barrier would be a descent direction for the problem +without the barrier (μ = 0). This ensures that the search isn't pushed +out of the basin of the user-supplied initial guess. + +Upon entry, the objective function gradient, constraint values, and +constraint jacobian must be set in `state.g`, `state.c`, and `state.J` +respectively. If you also wish to ensure that the projection of +Hessian is minimally-perturbed along the initial gradient, supply the +hessian of the objective (`Hobj`) and + + HcI = ∑_i (σ_i/s_i)∇∇ c_{Ii} + +for the constraints. This can be obtained as + + HcI = hessianI(state.x, constraints, 1./state.slack_c) + +You can manually specify `μ` by supplying a numerical value for +`μ0`. Whether calculated algorithmically or specified manually, the +values of `λ` are set using the chosen `μ`. """ -function initialize_μ_λ!(λx, λc, bounds::ConstraintBounds, x, g, c, J, μ0, β=1//100) - length(c) + length(bounds.iz) + length(bounds.ineqx) == 0 && return zero(eltype(x)) - # Calculate the projection matrix - JEx = zeros(eltype(J), length(bounds.eqx), length(x)) - for (i,j) in enumerate(bounds.eqx) - JEx[i,j] = 1 - end +function initialize_μ_λ!(state, bounds::ConstraintBounds, Hinfo, μ0::Union{Symbol,Number}, β=1//100) + if nconstraints(bounds) == 0 && nconstraints_x(bounds) == 0 + state.μ = 0 + fill!(state.bstate, 0) + return state + end + gf = state.g # must be pre-set to ∇f + # Calculate projection of ∇f into the subspace spanned by the + # equality constraint Jacobian + JE = jacobianE(state, bounds) + # QRF = qrfact(JE) + # Q = QRF[:Q] + # PEg = Q'*(Q*gf) # in the subspace of JE + C = JE*JE' + Cc = cholfact(Positive, C) + Pperpg = gf-JE'*(Cc \ (JE*gf)) # in the nullspace of JE + # Set μ + JI = jacobianI(state, bounds) + xzi = xzinv(state.x, bounds) + if μ0 == :auto + # Calculate projections of the Lagrangian's gradient, and + # possibly hessian, along (∇f)_⟂ + Dperp = dot(Pperpg, Pperpg) + σ, s = sigma(bounds), slack(state) + σdivs = σ./s + Δg = xzi + JI'*σdivs + PperpΔg = Δg - JE'*(Cc \ (JE*Δg)) + DI = dot(PperpΔg, PperpΔg) + κperp, κI = hessian_projections(Hinfo, Pperpg, (JI*Pperpg)./s) + # Calculate μ and λI + μ = β * (κperp == 0 ? sqrt(Dperp/DI) : min(sqrt(Dperp/DI), abs(κperp/κI))) + if !isfinite(μ) + Δgtilde = abs(xzi) + JI'*(1./s) + PperpΔgtilde = Δgtilde - JE'*(Cc \ (JE*Δgtilde)) + DItilde = dot(PperpΔgtilde, PperpΔgtilde) + μ = β*sqrt(Dperp/DItilde) + end + if !isfinite(μ) || μ == 0 + μ = one(μ) + end + else + μ = convert(eltype(state.x), μ0) + end + state.μ = μ + # Set λI + state.bstate.λx[:] = μ./state.bstate.slack_x + state.bstate.λc[:] = μ./state.bstate.slack_c + # Calculate λE + λI = lambdaI(state) + ∇bI = gf - μ*xzi - JI'*λI +# qrregularize!(QRF) # in case of any 0 eigenvalues + λE = Cc \ (JE*∇bI) + (cbar(bounds) - cE(state, bounds))/μ + k = unpack_vec!(state.bstate.λxE, λE, 0) + k = unpack_vec!(state.bstate.λcE, λE, k) + k == length(λE) || error("something is wrong") + state +end +function initialize_μ_λ!(state, bounds::ConstraintBounds, μ0::Union{Number,Symbol}, β=1//100) + initialize_μ_λ!(state, bounds, nothing, μ0, β) +end + +function hessian_projections(Hinfo::Tuple{AbstractMatrix,AbstractMatrix}, Pperpg, y) + κperp = dot(Hinfo[1]*Pperpg, Pperpg) + κI = dot(Hinfo[2]*Pperpg, Pperpg) + dot(y,y) + κperp, κI +end +hessian_projections{T}(Hinfo::Void, Pperpg::AbstractVector{T}) = convert(T, Inf), zero(T) + +function jacobianE(state, bounds::ConstraintBounds) + J, x = state.constr_J, state.x + JEx = jacobianx(J, bounds.eqx) JEc = view5(J, bounds.eqc, :) JE = vcat(JEx, JEc) - CE = JE*JE' - CEc = cholfact(Positive, CE) - Pg = g - JE'*(CEc \ (JE*g)) # the projected gradient of the objective (orthog to all == constr.) - # Calculate the barrier deviation and projection onto inequality normals - JIx = zeros(eltype(J), length(bounds.iz)+length(bounds.ineqx), length(x)) - for (i,j) in enumerate([bounds.iz; bounds.ineqx]) - JIx[i,j] = 1 - end +end +jacobianE(state, constraints) = jacobianE(state, constraints.bounds) + +function jacobianI(state, bounds::ConstraintBounds) + J, x = state.constr_J, state.x + JIx = jacobianx(J, bounds.ineqx) # skip iz: there is no λIz, so don't put in JI JIc = view5(J, bounds.ineqc, :) JI = vcat(JIx, JIc) - JIg = JI*Pg - # Solve for μ - # Δb = [bounds.σz.*x[bounds.iz]; bounds.σx.*(x[bounds.ineqx] - bounds.bx); bounds.σc.*(c[bounds.ineqc] - bounds.bc)] - Δb = [x[bounds.iz]; x[bounds.ineqx] - bounds.bx; c[bounds.ineqc] - bounds.bc] - σ = [bounds.σz; bounds.σx; bounds.σc] - λtilde = σ./Δb - μden = dot(σ.*λtilde, JIg) - if μden == 0 && !isempty(Δb) - μden = maximum(abs(λtilde).*abs(JIg))*length(Δb) - end - μ = β*dot(Pg, Pg)/abs(μden) - μ = μden != 0 ? μ : oftype(μ, 1) - if μ0 != :auto - μ = μ0 - end - # Solve for λE - gb = g - μ*(JI'*(σ.*λtilde)) - Pgb = gb - JE'*(CEc \ (JE*gb)) - λE = CEc \ (JE*Pgb) - k = unpack_vec!(λx, λE, 0) - k = unpack_vec!(λc, λE, k) - k == length(λE) || error("something is wrong") - μ end -initialize_μ_λ!(λx, λc, constraints::AbstractConstraintsFunction, x, g, c, J, args...) = - initialize_μ_λ!(λx, λc, constraints.bounds, x, g, c, J, args...) +jacobianI(state, constraints) = jacobianI(state, constraints.bounds) + +# TODO: when Optim supports sparse arrays, make a SparseMatrixCSC version +function jacobianx(J::AbstractArray, indx) + Jx = zeros(eltype(J), length(indx), size(J, 2)) + for (i,j) in enumerate(indx) + Jx[i,j] = 1 + end + Jx +end + +function sigma(bounds::ConstraintBounds) + [bounds.σx; bounds.σc] # don't include σz +end +sigma(constraints) = sigma(constraints.bounds) + +slack(state) = slack(state.bstate) +function xzinv(x, bounds::ConstraintBounds) + xzi = zero(x) + xzi[bounds.iz] = 1./x[bounds.iz] + xzi +end + +cbar(bounds::ConstraintBounds) = [bounds.valx; bounds.valc] +cbar(constraints) = cbar(constraints.bounds) +cE(state, bounds::ConstraintBounds) = [state.x[bounds.eqx]; state.constr_c[bounds.eqc]] + +function hessianI!(h, x, constraints, λcI, μ) + λ = userλ(λcI, constraints) + constraints.h!(x, λ, h) + for i in constraints.bounds.iz + h[i,i] += μ/x[i]^2 + end + h +end + +""" + hessianI(x, constraints, λcI, μ) -> h + +Compute the hessian at `x` of the `λcI`-weighted sum of user-supplied +constraint functions for just the inequalities. This also includes +contributions from any variables with bounds at 0, since those do not +cause introduction of a slack variable. Other (nonzero) box +constraints do not contribute to `h`, because the hessian of `x_i` is +zero. (They contribute indirectly via their slack variables.) +""" +hessianI(x, constraints, λcI, μ) = + hessianI!(zeros(eltype(x), length(x), length(x)), x, constraints, λcI, μ) + +""" + userλ(λcI, bounds) -> λ + +Accumulates `λcI` into a vector `λ` ordered as the user-supplied +constraint functions `c`. Upper and lower bounds are summed, weighted +by `σ`. The resulting λ includes an overall negative sign so that this +becomes the coefficient for the user-supplied hessian. + +This is relevant only for the inequalities. If you want the λ for just +the equalities, you can use `λ[bounds.ceq] = λcE` for a zero-filled `λ`. +""" +function userλ(λcI, bounds::ConstraintBounds) + ineqc, σc = bounds.ineqc, bounds.σc + λ = zeros(eltype(bounds), nconstraints(bounds)) + for i = 1:length(ineqc) + λ[ineqc[i]] -= λcI[i]*σc[i] + end + λ +end +userλ(λcI, constraints) = userλ(λcI, constraints.bounds) ## Computation of the Lagrangian and its gradient # This is in a parametrization that is also useful during linesearch @@ -787,3 +904,13 @@ function shrink_μ!(d, constraints, state, method, options) state.μ *= options.μfactor update_fg!(d, constraints, state, method) end + +function qrregularize!(QRF) + R = QRF[:R] + for i = 1:size(R, 1) + if R[i,i] == 0 + R[i,i] = 1 + end + end + QRF +end diff --git a/src/ipnewton.jl b/src/ipnewton.jl index e75223762..a30c5bc9f 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -86,9 +86,10 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct Hf, stepf) - state.μ = initialize_μ_λ!(bstate.λxE, bstate.λcE, constraints, initial_x, g, constr_c, constr_J, options.μ0) - bstate.λx[:] = state.μ./bstate.slack_x - bstate.λc[:] = state.μ./bstate.slack_c + d.h!(initial_x, state.H) + # state.μ = initialize_μ_λ!(bstate.λxE, bstate.λcE, constraints, initial_x, g, constr_c, constr_J, options.μ0) + Hinfo = (state.H, hessianI(initial_x, constraints, 1./bstate.slack_c, 1)) + initialize_μ_λ!(state, constraints.bounds, Hinfo, options.μ0) update_fg!(d, constraints, state, method) update_h!(d, constraints, state, method) end @@ -108,30 +109,22 @@ function update_g!(d, constraints::TwiceDifferentiableConstraintsFunction, state end function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton) + x = state.x μ, Hxx, J = state.μ, state.H, state.constr_J - d.h!(state.x, Hxx) - # Collect the values of the coefficients of the inequality constraints bounds = constraints.bounds - ineqc, σc, λc = bounds.ineqc, bounds.σc, state.bstate.λc m, n = size(J, 1), size(J, 2) - λ = zeros(eltype(bounds), m) - for i = 1:length(ineqc) - λ[ineqc[i]] -= λc[i]*σc[i] - end - # Add the weighted hessian terms from the nonlinear constraints - constraints.h!(state.x, λ, Hxx) - # Add the Jacobian terms - JI = view5(J, ineqc, :) + + d.h!(state.x, Hxx) + hessianI!(Hxx, state.x, constraints, state.bstate.λc, μ) # accumulate the inequality second derivatives + # Add the Jacobian terms (J'*S^{-2}*J) + JI = view5(J, bounds.ineqc, :) Sinv2 = Diagonal(1./state.bstate.slack_c.^2) HJ = JI'*Sinv2*JI for j = 1:n, i = 1:n Hxx[i,j] += μ*HJ[i,j] end - # Add the variable inequalities - iz, x = bounds.iz, state.x - for i in iz - Hxx[i,i] += μ/x[i]^2 - end + # Add the variable inequalities portions of J'*S^{-2}*J + # The iz terms are already in Hxx (from hessianI!) ineqx, sx = bounds.ineqx, state.bstate.slack_x for (i,j) in enumerate(ineqx) Hxx[j,j] += μ/sx[i]^2 @@ -141,7 +134,7 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state Hp = full(Hpc) # Now add the equality constraint hessian terms eqc, λcE = bounds.eqc, state.bstate.λcE - fill!(λ, 0) + λ = zeros(eltype(x), nconstraints(bounds)) for i = 1:length(eqc) λ[eqc[i]] -= λcE[i] end @@ -161,7 +154,7 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state -JEc Jod zeros(eltype(JEc), size(JEc,1), size(JEc,1))] # Also form the total gradient bgrad = state.bgrad - gI = state.g + JI'*Diagonal(σc)*(bgrad.slack_c - μ*Sinv2*bgrad.λc) + gI = state.g + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - μ*Sinv2*bgrad.λc) for (i,j) in enumerate(ineqx) gI[j] += bounds.σx[i]*(bgrad.slack_x[i] - μ*bgrad.λx[i]/sx[i]^2) end diff --git a/src/types.jl b/src/types.jl index be5949460..06550fd74 100644 --- a/src/types.jl +++ b/src/types.jl @@ -284,7 +284,7 @@ end # additional variables. See `parse_constraints` for details. immutable ConstraintBounds{T} - nc::Int # Number of linear/nonlinear constraints + nc::Int # Number of linear/nonlinear constraints supplied by user # Box-constraints on variables (i.e., directly on x) eqx::Vector{Int} # index-vector of equality-constrained x (not actually variable...) valx::Vector{T} # value of equality-constrained x @@ -312,8 +312,35 @@ end Base.eltype{T}(::Type{ConstraintBounds{T}}) = T Base.eltype(cb::ConstraintBounds) = eltype(typeof(cb)) +""" + nconstraints(bounds) -> nc + +The number of linear/nonlinear constraint functions supplied by the +user. This does not include bounds-constraints on variables. + +See also: nconstraints_x. +""" nconstraints(cb::ConstraintBounds) = cb.nc +""" + nconstraints_x(bounds) -> nx + +The number of "meaningful" constraints (not `±Inf`) on the x coordinates. + +See also: nconstraints. +""" +function nconstraints_x(cb::ConstraintBounds) + mz = isempty(cb.iz) ? 0 : maximum(cb.iz) + mi = isempty(cb.ineqx) ? 0 : maximum(cb.ineqx) + me = isempty(cb.eqx) ? 0 : maximum(cb.eqx) + nmax = max(mz, mi, me) + hasconstraint = falses(nmax) + hasconstraint[cb.iz] = true + hasconstraint[cb.ineqx] = true + hasconstraint[cb.eqx] = true + sum(hasconstraint) +end + function Base.show(io::IO, cb::ConstraintBounds) indent = " " print(io, "ConstraintBounds:") diff --git a/test/constraints.jl b/test/constraints.jl index 019cc9bdb..89916fc95 100644 --- a/test/constraints.jl +++ b/test/constraints.jl @@ -94,7 +94,7 @@ ConstraintBounds: cfun = x->Float64[] c = Float64[] J = Array{Float64}(0,0) - options = OptimizationOptions() + options = OptimizationOptions(μ0 = μ) method = Optim.IPNewton() ## In the code, variable constraints are special-cased (for ## reasons of user-convenience and efficiency). It's @@ -279,6 +279,87 @@ ConstraintBounds: @test state.Hf ≈ hp end + @testset "IPNewton initialization" begin + method = IPNewton() + options = OptimizationOptions() + x = [1.0,0.1,0.3,0.4] + ## A linear objective function (hessian is zero) + f_g = [1.0,2.0,3.0,4.0] + d = TwiceDifferentiableFunction(x->dot(x, f_g), (x,g)->copy!(g, f_g), (x,h)->fill!(h, 0)) + # Variable bounds + constraints = TwiceDifferentiableConstraintsFunction([0.5, 0.0, -Inf, -Inf], [Inf, Inf, 1.0, 0.8]) + state = Optim.initial_state(method, options, d, constraints, x) + Optim.update_fg!(d, constraints, state, method) + @test norm(f_g - state.g) ≈ 0.01*norm(f_g) + # Nonlinear inequalities + constraints = TwiceDifferentiableConstraintsFunction( + (x,c)->(c[1]=x[1]*x[2]; c[2]=3*x[3]+x[4]^2), + (x,J)->(J[:,:] = [x[2] x[1] 0 0; 0 0 3 2*x[4]]), + (x,λ,h)->(h[4,4] += λ[2]*2), + [], [], [0.05, 0.4], [0.15, 4.4]) + @test isinterior(constraints, x) + state = Optim.initial_state(method, options, d, constraints, x) + Optim.update_fg!(d, constraints, state, method) + @test norm(f_g - state.g) ≈ 0.01*norm(f_g) + # Mixed equalities and inequalities + constraints = TwiceDifferentiableConstraintsFunction( + (x,c)->(c[1]=x[1]*x[2]; c[2]=3*x[3]+x[4]^2), + (x,J)->(J[:,:] = [x[2] x[1] 0 0; 0 0 3 2*x[4]]), + (x,λ,h)->(h[4,4] += λ[2]*2), + [], [], [0.1, 0.4], [0.1, 4.4]) + @test isfeasible(constraints, x) + state = Optim.initial_state(method, options, d, constraints, x) + Optim.update_fg!(d, constraints, state, method) + J = zeros(2,4) + constraints.jacobian!(x, J) + eqnormal = J[1,:]; eqnormal = eqnormal/norm(eqnormal) + @test abs(dot(state.g, eqnormal)) < 1e-12 # orthogonal to equality constraint + Pfg = f_g - dot(f_g, eqnormal)*eqnormal + Pg = state.g - dot(state.g, eqnormal)*eqnormal + @test norm(Pfg - Pg) ≈ 0.01*norm(Pfg) + ## An objective function with a nonzero hessian + hd = [1.0, 100.0, 0.01, 2.0] # diagonal terms of hessian + d = TwiceDifferentiableFunction(x->sum(hd.*x.^2)/2, (x,g)->copy!(g, hd.*x), (x,h)->copy!(h, Diagonal(hd))) + gx = d.g!(x, zeros(4)) + hx = Diagonal(hd) + # Variable bounds + constraints = TwiceDifferentiableConstraintsFunction([0.5, 0.0, -Inf, -Inf], [Inf, Inf, 1.0, 0.8]) + state = Optim.initial_state(method, options, d, constraints, x) + Optim.update_fg!(d, constraints, state, method) + @test abs(dot(gx, state.g)/dot(gx,gx) - 1) <= 0.011 + Optim.update_h!(d, constraints, state, method) + @test abs(dot(gx, state.H*gx)/dot(gx, hx*gx) - 1) <= 0.011 + # Nonlinear inequalities + constraints = TwiceDifferentiableConstraintsFunction( + (x,c)->(c[1]=x[1]*x[2]; c[2]=3*x[3]+x[4]^2), + (x,J)->(J[:,:] = [x[2] x[1] 0 0; 0 0 3 2*x[4]]), + (x,λ,h)->(h[4,4] += λ[2]*2), + [], [], [0.05, 0.4], [0.15, 4.4]) + @test isinterior(constraints, x) + state = Optim.initial_state(method, options, d, constraints, x) + Optim.update_fg!(d, constraints, state, method) + @test abs(dot(gx, state.g)/dot(gx,gx) - 1) <= 0.011 + Optim.update_h!(d, constraints, state, method) + @test abs(dot(gx, state.H*gx)/dot(gx, hx*gx) - 1) <= 0.011 + # Mixed equalities and inequalities + constraints = TwiceDifferentiableConstraintsFunction( + (x,c)->(c[1]=x[1]*x[2]; c[2]=3*x[3]+x[4]^2), + (x,J)->(J[:,:] = [x[2] x[1] 0 0; 0 0 3 2*x[4]]), + (x,λ,h)->(h[4,4] += λ[2]*2), + [], [], [0.1, 0.4], [0.1, 4.4]) + @test isfeasible(constraints, x) + state = Optim.initial_state(method, options, d, constraints, x) + Optim.update_fg!(d, constraints, state, method) + J = zeros(2,4) + constraints.jacobian!(x, J) + eqnormal = J[1,:]; eqnormal = eqnormal/norm(eqnormal) + @test abs(dot(state.g, eqnormal)) < 1e-12 # orthogonal to equality constraint + Pgx = gx - dot(gx, eqnormal)*eqnormal + @test abs(dot(Pgx, state.g)/dot(Pgx,Pgx) - 1) <= 0.011 + Optim.update_h!(d, constraints, state, method) + @test abs(dot(Pgx, state.H*Pgx)/dot(Pgx, hx*Pgx) - 1) <= 0.011 + end + @testset "IPNewton step" begin F = 1000 d = TwiceDifferentiableFunction(x->F*x[1], (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0)) From 11234bc81bafc07285458411127bc89443e239d4 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 16 Nov 2016 11:22:44 -0600 Subject: [PATCH 23/40] Use Lagrangian val/grad rather than objective val/grad in assessing convergence --- src/interior.jl | 4 +++- src/ipnewton.jl | 6 ++++-- src/utilities/assess_convergence.jl | 11 +++++++++++ src/utilities/trace.jl | 4 +++- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index cd1e56b8b..9b21a303b 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -233,7 +233,9 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai end Δf = abs(state.f_x - state.f_x_previous) - Δfmax = max(Δfmax, abs(state.f_x - state.f_x_previous)) + if iterationμ > 1 + Δfmax = max(Δfmax, abs(state.f_x - state.f_x_previous)) + end # Test whether we need to decrease the barrier penalty if iterationμ > 1 && (converged || 100*gnormnew < gnorm || 100*Δf < Δfmax) diff --git a/src/ipnewton.jl b/src/ipnewton.jl index a30c5bc9f..73116d60b 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -16,6 +16,7 @@ type IPNewtonState{T,N} <: AbstractBarrierState # Barrier penalty fields μ::T # coefficient of the barrier penalty L::T # value of the Lagrangian (objective + barrier + equality) + L_previous::T bstate::BarrierStateVars{T} # value of slack and λ variables (current "position") bgrad::BarrierStateVars{T} # gradient of slack and λ variables at current "position" bstep::BarrierStateVars{T} # search direction for slack and λ @@ -74,7 +75,8 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct Hd, similar(initial_x), # Maintain current x-search direction in state.s μ, - T(0), + T(NaN), + T(NaN), bstate, bgrad, bstep, @@ -165,7 +167,7 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state end function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction, state::IPNewtonState{T}, method::IPNewton) - state.f_x_previous = state.f_x + state.f_x_previous, state.L_previous = state.f_x, state.L bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds state, dslackc = solve_step!(state, constraints) # If a step α=1 will not change any of the parameters, we can quit now. diff --git a/src/utilities/assess_convergence.jl b/src/utilities/assess_convergence.jl index 6e72fa820..b11800254 100644 --- a/src/utilities/assess_convergence.jl +++ b/src/utilities/assess_convergence.jl @@ -85,6 +85,17 @@ function assess_convergence(state::NewtonTrustRegionState, options) x_converged, f_converged, g_converged, converged end +function assess_convergence(state::IPNewtonState, options) + assess_convergence(state.x, + state.x_previous, + state.L, + state.L_previous, + state.gf, + options.x_tol, + options.f_tol, + options.g_tol) +end + # For monotonic-decreasing problems fconverged(state) = nextfloat(state.f_x) >= state.f_x_previous # Constrained problems are not monotonic, so we can't add a one-sided criterion diff --git a/src/utilities/trace.jl b/src/utilities/trace.jl index 3c11f1629..bb574745c 100644 --- a/src/utilities/trace.jl +++ b/src/utilities/trace.jl @@ -123,12 +123,14 @@ function trace!(tr, state, iteration, method::IPOptimizer, options) dt["α"] = state.alpha dt["x"] = copy(state.x) dt["g(x)"] = copy(state.g) + dt["gf(x)"] = copy(state.gf) dt["h(x)"] = copy(state.H) + dt["hf(x)"] = copy(state.Hf) dt["bstate"] = copy(state.bstate) dt["bgrad"] = copy(state.bgrad) dt["c"] = copy(state.constr_c) end - g_norm = vecnorm(state.g, Inf) + g_norm = vecnorm(state.gf, Inf) update!(tr, iteration, state.f_x, From a3bbf90fc437a15bd9b8ddd8a11f0fad4234d128 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Fri, 18 Nov 2016 06:09:31 -0600 Subject: [PATCH 24/40] Switch to primal-dual and clean up architecture - use slack vars even for nonnegative/nonpositive constraints (needed for dual) - eliminate "active" boolean state and delete solve_active_inequalities! - reorder the fields in BarrierStateVars - update slack and lambda vars using linesearch - better support for initial infeasible state --- src/interior.jl | 199 ++++++++++---------------------------------- src/iplinesearch.jl | 11 +-- src/ipnewton.jl | 68 +++++++-------- src/types.jl | 50 +++-------- test/constraints.jl | 128 ++++++++++++++-------------- 5 files changed, 162 insertions(+), 294 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index 9b21a303b..0e0cc4b14 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -4,11 +4,9 @@ abstract AbstractBarrierState immutable BarrierStateVars{T} slack_x::Vector{T} # values of slack variables for x slack_c::Vector{T} # values of slack variables for c - active_x::Vector{Bool} # active constraints for x (see solve_active_inequalities) - active_c::Vector{Bool} # active constraints for c - λxE::Vector{T} # λ for equality constraints on x λx::Vector{T} # λ for equality constraints on slack_x λc::Vector{T} # λ for equality constraints on slack_c + λxE::Vector{T} # λ for equality constraints on x λcE::Vector{T} # λ for linear/nonlinear equality constraints end # Note on λxE: @@ -22,31 +20,29 @@ end @compat function (::Type{BarrierStateVars{T}}){T}(bounds::ConstraintBounds) slack_x = Array{T}(length(bounds.ineqx)) slack_c = Array{T}(length(bounds.ineqc)) - λxE = Array{T}(length(bounds.eqx)) λx = similar(slack_x) λc = similar(slack_c) + λxE = Array{T}(length(bounds.eqx)) λcE = Array{T}(length(bounds.eqc)) - sv = BarrierStateVars{T}(slack_x, slack_c, fill(false, length(slack_x)), - fill(false, length(slack_c)), λxE, λx, λc, λcE) + sv = BarrierStateVars{T}(slack_x, slack_c, λx, λc, λxE, λcE) end BarrierStateVars{T}(bounds::ConstraintBounds{T}) = BarrierStateVars{T}(bounds) function BarrierStateVars{T}(bounds::ConstraintBounds{T}, x) sv = BarrierStateVars(bounds) - setslack!(sv.slack_x, sv.active_x, x, bounds.ineqx, bounds.σx, bounds.bx) + setslack!(sv.slack_x, x, bounds.ineqx, bounds.σx, bounds.bx) sv end function BarrierStateVars{T}(bounds::ConstraintBounds{T}, x, c) sv = BarrierStateVars(bounds) - setslack!(sv.slack_x, sv.active_x, x, bounds.ineqx, bounds.σx, bounds.bx) - setslack!(sv.slack_c, sv.active_c, c, bounds.ineqc, bounds.σc, bounds.bc) + setslack!(sv.slack_x, x, bounds.ineqx, bounds.σx, bounds.bx) + setslack!(sv.slack_c, c, bounds.ineqc, bounds.σc, bounds.bc) sv end -function setslack!(slack, active, v, ineq, σ, b) +function setslack!(slack, v, ineq, σ, b) for i = 1:length(ineq) dv = v[ineq[i]]-b[i] - slack[i] = σ[i]*dv - active[i] = dv == 0 + slack[i] = abs(σ[i]*dv) end slack end @@ -60,32 +56,26 @@ lambdaE(state::AbstractBarrierState) = lambdaE(state.bstate) Base.similar(bstate::BarrierStateVars) = BarrierStateVars(similar(bstate.slack_x), similar(bstate.slack_c), - similar(bstate.active_x), - similar(bstate.active_c), - similar(bstate.λxE), similar(bstate.λx), similar(bstate.λc), + similar(bstate.λxE), similar(bstate.λcE)) Base.copy(bstate::BarrierStateVars) = BarrierStateVars(copy(bstate.slack_x), copy(bstate.slack_c), - copy(bstate.active_x), - copy(bstate.active_c), - copy(bstate.λxE), copy(bstate.λx), copy(bstate.λc), + copy(bstate.λxE), copy(bstate.λcE)) function Base.fill!(b::BarrierStateVars, val) fill!(b.slack_x, val) fill!(b.slack_c, val) - fill!(b.active_x, false) - fill!(b.active_c, false) - fill!(b.λxE, val) fill!(b.λx, val) fill!(b.λc, val) + fill!(b.λxE, val) fill!(b.λcE, val) b end @@ -95,7 +85,7 @@ Base.eltype(sv::BarrierStateVars) = eltype(typeof(sv)) function Base.show(io::IO, b::BarrierStateVars) print(io, "BarrierStateVars{$(eltype(b))}:") - for fn in fieldnames(b) + for fn in (:slack_x, :slack_c, :λx, :λc, :λxE, :λcE) print(io, "\n $fn: ") show(io, getfield(b, fn)) end @@ -104,21 +94,21 @@ end @compat Base.:(==)(v::BarrierStateVars, w::BarrierStateVars) = v.slack_x == w.slack_x && v.slack_c == w.slack_c && - v.λxE == w.λxE && v.λx == w.λx && v.λc == w.λc && + v.λxE == w.λxE && v.λcE == w.λcE const bsv_seed = sizeof(UInt) == 64 ? 0x145b788192d1cde3 : 0x766a2810 Base.hash(b::BarrierStateVars, u::UInt) = - hash(b.λcE, hash(b.λc, hash(b.λx, hash(b.λxE, hash(b.slack_c, hash(b.slack_x, u+bsv_seed)))))) + hash(b.λcE, has(b.λxE, hash(b.λc, hash(b.λx, hash(b.slack_c, hash(b.slack_x, u+bsv_seed)))))) function Base.dot(v::BarrierStateVars, w::BarrierStateVars) dot(v.slack_x,w.slack_x) + dot(v.slack_c, w.slack_c) + - dot(v.λxE, w.λxE) + dot(v.λx, w.λx) + dot(v.λc, w.λc) + + dot(v.λxE, w.λxE) + dot(v.λcE, w.λcE) end @@ -150,38 +140,13 @@ immutable BarrierLineSearchGrad{T} bgrad::BarrierStateVars{T} # trial point's gradient end -function ls_update!(out::BarrierStateVars, c, base::BarrierStateVars, step::BarrierStateVars, α, constraints, state, dslackc) - bounds = constraints.bounds - constraints.c!(state.x_ls, c) - xtarget = bounds.σx.*(state.x_ls[bounds.ineqx] - bounds.bx) - dslackx = bounds.σx.*state.s[bounds.ineqx] - ctarget = bounds.σc.*(c[bounds.ineqc] - bounds.bc) - ls_update!(out, base, step, α, state.μ, xtarget, dslackx, ctarget, dslackc) -end - -function ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, α, μ, xtarget, dslackx, ctarget, dslackc) +function ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, α, αI) + ls_update!(out.slack_x, base.slack_x, step.slack_x, α) + ls_update!(out.slack_c, base.slack_c, step.slack_c, α) ls_update!(out.λxE, base.λxE, step.λxE, α) ls_update!(out.λcE, base.λcE, step.λcE, α) - # For the inequality terms, we use "exact" updating - _lsu_slack!(out.slack_x, xtarget, base.slack_x, dslackx, α) - _lsu_slack!(out.slack_c, ctarget, base.slack_c, dslackc, α) - _lsu_λ!(out.λx, out.slack_x, μ) - _lsu_λ!(out.λc, out.slack_c, μ) - out -end -function _lsu_slack!(out, target, slack, dslack, α) - for i = 1:length(out) - t = target[i] - # This handles the possible loss of precision at the boundary - # by using the gradient to extrapolate the change - out[i] = t != 0 ? t : slack[i]+α*dslack[i] - end - out -end -function _lsu_λ!(out, slack, μ) - for i = 1:length(out) - out[i] = μ/slack[i] - end + ls_update!(out.λx, base.λx, step.λx, αI) + ls_update!(out.λc, base.λc, step.λc, αI) out end @@ -289,62 +254,6 @@ update_h!(d, constraints::AbstractConstraintsFunction, state, method) = nothing update_asneeded_fg!(d, constraints, state, method) = update_fg!(d, constraints, state, method) update_asneeded_fg!(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}) = update_g!(d, constraints, state, method) - -# Explicit solution for slack, λ when an inequality constraint is -# "active." This is necessary (or at least helpful) when c-b == 0 due -# to roundoff error, in which case the KKT equations don't have an -# exact solution within the precision. We punt on the ∂λ equation -# (which reduces to the slack, which should be small anyway), and -# focus on the ∂x and ∂slack equations (therefore setting slack and -# λ). By setting these to their exact solutions, we balance the forces -# due to the barrier. -function solve_active_inequalities!(d, constraints, state) - x, c, bstate, bounds = state.x, state.constr_c, state.bstate, constraints.bounds - nactive, nchanged = tally_active!(bstate.active_x, 0, 0, x, bounds.ineqx, bounds.bx) - nx = nactive - nactive, nchanged = tally_active!(bstate.active_c, nactive, nchanged, c, bounds.ineqc, bounds.bc, ) - if nactive == 0 || nchanged == 0 - return nothing - end - # Calculate the necessary gradients - d.g!(state.x, state.g) - constraints.jacobian!(state.x, state.constr_J) - # Solve for the Lagrange multipliers - ic, ix = bounds.ineqc[bstate.active_c], bounds.ineqx[bstate.active_x] - Jx = view5(state.constr_J, ic, ix) - Jact = view5(state.constr_J, ic, :) - Cactive = [eye(eltype(Jx), nx, nx) Jx'; Jx Jact*Jact'] - pactive = [view(state.g, ix); Jact*state.g] - Cactivep = cholfact(Positive, Cactive) - λactive = (Cactivep\pactive).*[bounds.σx[bstate.active_x]; bounds.σc[bstate.active_c]] - any(x->x<=0, λactive) && error("something may be wrong, λ is zero or negative. Perhaps Cactive is singular?") - # Set the state - k = set_active_params!(bstate.slack_x, bstate.λx, bstate.active_x, λactive, state.μ, 0) - k = set_active_params!(bstate.slack_c, bstate.λc, bstate.active_c, λactive, state.μ, k) - k == length(λactive) || error("something is wrong") - nothing -end - -function tally_active!(active, nactive, nchanged, c, ineq, b) - for (i,j) in enumerate(ineq) - isactive = c[j] == b[i] - nactive += isactive - nchanged += isactive != active[i] - active[i] = isactive - end - nactive, nchanged -end - -function set_active_params!(slack, λ, active, λtarget, μ, k) - for i = 1:length(active) - active[i] || continue - λk = λtarget[k+=1] - λ[i] = λk - slack[i] = μ/λk - end - k -end - """ initialize_μ_λ!(state, bounds, μ0=:auto, β=0.01) initialize_μ_λ!(state, bounds, (Hobj,HcI), μ0=:auto, β=0.01) @@ -389,21 +298,20 @@ function initialize_μ_λ!(state, bounds::ConstraintBounds, Hinfo, μ0::Union{Sy Pperpg = gf-JE'*(Cc \ (JE*gf)) # in the nullspace of JE # Set μ JI = jacobianI(state, bounds) - xzi = xzinv(state.x, bounds) if μ0 == :auto # Calculate projections of the Lagrangian's gradient, and # possibly hessian, along (∇f)_⟂ Dperp = dot(Pperpg, Pperpg) σ, s = sigma(bounds), slack(state) σdivs = σ./s - Δg = xzi + JI'*σdivs + Δg = JI'*σdivs PperpΔg = Δg - JE'*(Cc \ (JE*Δg)) DI = dot(PperpΔg, PperpΔg) κperp, κI = hessian_projections(Hinfo, Pperpg, (JI*Pperpg)./s) # Calculate μ and λI μ = β * (κperp == 0 ? sqrt(Dperp/DI) : min(sqrt(Dperp/DI), abs(κperp/κI))) if !isfinite(μ) - Δgtilde = abs(xzi) + JI'*(1./s) + Δgtilde = JI'*(1./s) PperpΔgtilde = Δgtilde - JE'*(Cc \ (JE*Δgtilde)) DItilde = dot(PperpΔgtilde, PperpΔgtilde) μ = β*sqrt(Dperp/DItilde) @@ -420,7 +328,7 @@ function initialize_μ_λ!(state, bounds::ConstraintBounds, Hinfo, μ0::Union{Sy state.bstate.λc[:] = μ./state.bstate.slack_c # Calculate λE λI = lambdaI(state) - ∇bI = gf - μ*xzi - JI'*λI + ∇bI = gf - JI'*λI # qrregularize!(QRF) # in case of any 0 eigenvalues λE = Cc \ (JE*∇bI) + (cbar(bounds) - cE(state, bounds))/μ k = unpack_vec!(state.bstate.λxE, λE, 0) @@ -449,7 +357,7 @@ jacobianE(state, constraints) = jacobianE(state, constraints.bounds) function jacobianI(state, bounds::ConstraintBounds) J, x = state.constr_J, state.x - JIx = jacobianx(J, bounds.ineqx) # skip iz: there is no λIz, so don't put in JI + JIx = jacobianx(J, bounds.ineqx) JIc = view5(J, bounds.ineqc, :) JI = vcat(JIx, JIc) end @@ -470,11 +378,6 @@ end sigma(constraints) = sigma(constraints.bounds) slack(state) = slack(state.bstate) -function xzinv(x, bounds::ConstraintBounds) - xzi = zero(x) - xzi[bounds.iz] = 1./x[bounds.iz] - xzi -end cbar(bounds::ConstraintBounds) = [bounds.valx; bounds.valc] cbar(constraints) = cbar(constraints.bounds) @@ -483,9 +386,6 @@ cE(state, bounds::ConstraintBounds) = [state.x[bounds.eqx]; state.constr_c[bound function hessianI!(h, x, constraints, λcI, μ) λ = userλ(λcI, constraints) constraints.h!(x, λ, h) - for i in constraints.bounds.iz - h[i,i] += μ/x[i]^2 - end h end @@ -571,26 +471,27 @@ function lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds::ConstraintBounds, x end # for line searches that don't use the gradient along the line -function lagrangian_linefunc(α, d, constraints, state, dslackc) - _lagrangian_linefunc(α, d, constraints, state, dslackc)[2] +function lagrangian_linefunc(α, αI, d, constraints, state) + _lagrangian_linefunc(α, αI, d, constraints, state)[2] end -function _lagrangian_linefunc(α, d, constraints, state, dslackc) +function _lagrangian_linefunc(α, αI, d, constraints, state) b_ls, bounds = state.b_ls, constraints.bounds ls_update!(state.x_ls, state.x, state.s, α) - ls_update!(b_ls.bstate, b_ls.c, state.bstate, state.bstep, α, constraints, state, dslackc) + ls_update!(b_ls.bstate, state.bstate, state.bstep, α, αI) + constraints.c!(state.x_ls, b_ls.c) lagrangian(d, constraints.bounds, state.x_ls, b_ls.c, b_ls.bstate, state.μ) end -function lagrangian_linefunc!(α, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}, dslackc) +function lagrangian_linefunc!(α, αI, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}) # For backtrack_constrained, the last evaluation is the one we # keep, so it's safe to store the results in state - f_x, L = _lagrangian_linefunc(α, d, constraints, state, dslackc) + f_x, L = _lagrangian_linefunc(α, αI, d, constraints, state) state.f_x = f_x state.L = L L end -lagrangian_linefunc!(α, d, constraints, state, method) = lagrangian_linefunc(α, d, constraints, state) +lagrangian_linefunc!(α, αI, d, constraints, state, method) = lagrangian_linefunc(α, αI, d, constraints, state) ## Computation of Lagrangian terms: barrier penalty """ @@ -604,8 +505,7 @@ slack variables. `bounds` holds the parsed bounds. """ function barrier_value(bounds::ConstraintBounds, x, sx, sc, μ) # bμ is the coefficient of μ in the barrier penalty - bμ = _bv(x, bounds.iz, bounds.σz) + # coords constrained by 0 - _bv(sx) + # coords with other bounds + bμ = _bv(sx) + # coords with other bounds _bv(sc) # linear/nonlinear constr. μ*bμ end @@ -642,7 +542,6 @@ The result is *added* to `gx`, `gsx`, and `gsc`, so these vectors need to be initialized appropriately. """ function barrier_grad!(gx, gsx, gsc, bounds::ConstraintBounds, x, sx, sc, μ) - barrier_grad!(view(gx, bounds.iz), view(x, bounds.iz), μ) barrier_grad!(gsx, sx, μ) barrier_grad!(gsc, sc, μ) nothing @@ -662,7 +561,7 @@ end """ equality_violation([f=identity], bounds, x, c, bstate) -> val - equality_violation([f=identity], bounds, x, c, sx, sc, λxE, λx, λc, λcE) -> val + equality_violation([f=identity], bounds, x, c, sx, sc, λx, λc, λxE, λcE) -> val Compute the sum of `f(v_i)`, where `v_i = λ_i*(target - observed)` measures the difference between the current state and the @@ -673,17 +572,17 @@ variables. `c` holds the values of the linear-nonlinear constraints, and the λ arguments hold the Lagrange multipliers for `x`, `sx`, `sc`, and `c` respectively. """ -function equality_violation(f, bounds::ConstraintBounds, x, c, sx, sc, λxE, λx, λc, λcE) - ev = equality_violation(f, x, bounds.valx, bounds.eqx, λxE) + - equality_violation(f, sx, x, bounds.ineqx, bounds.σx, bounds.bx, λx) + +function equality_violation(f, bounds::ConstraintBounds, x, c, sx, sc, λx, λc, λxE, λcE) + ev = equality_violation(f, sx, x, bounds.ineqx, bounds.σx, bounds.bx, λx) + equality_violation(f, sc, c, bounds.ineqc, bounds.σc, bounds.bc, λc) + + equality_violation(f, x, bounds.valx, bounds.eqx, λxE) + equality_violation(f, c, bounds.valc, bounds.eqc, λcE) end -equality_violation(bounds::ConstraintBounds, x, c, sx, sc, λxE, λx, λc, λcE) = - equality_violation(identity, bounds, x, c, sx, sc, λxE, λx, λc, λcE) +equality_violation(bounds::ConstraintBounds, x, c, sx, sc, λx, λc, λxE, λcE) = + equality_violation(identity, bounds, x, c, sx, sc, λx, λc, λxE, λcE) function equality_violation(f, bounds::ConstraintBounds, x, c, bstate::BarrierStateVars) - equality_violation(f, bounds, x, c, - bstate.slack_x, bstate.slack_c, bstate.λxE, bstate.λx, bstate.λc, bstate.λcE) + equality_violation(f, bounds, x, c, bstate.slack_x, bstate.slack_c, + bstate.λx, bstate.λc, bstate.λxE, bstate.λcE) end equality_violation(bounds::ConstraintBounds, x, c, bstate::BarrierStateVars) = equality_violation(identity, bounds, x, c, bstate) @@ -719,20 +618,20 @@ end Compute the gradient of `equality_violation`, storing the result in `gx` (an array) and `gbstate::BarrierStateVars`. """ -function equality_grad!(gx, gsx, gsc, gλxE, gλx, gλc, gλcE, bounds::ConstraintBounds, x, c, J, sx, sc, λxE, λx, λc, λcE) - gx[bounds.eqx] = gx[bounds.eqx] - λxE +function equality_grad!(gx, gsx, gsc, gλx, gλc, gλxE, gλcE, bounds::ConstraintBounds, x, c, J, sx, sc, λx, λc, λxE, λcE) equality_grad_var!(gsx, gx, bounds.ineqx, bounds.σx, λx) equality_grad_var!(gsc, gx, bounds.ineqc, bounds.σc, λc, J) + gx[bounds.eqx] = gx[bounds.eqx] - λxE equality_grad_var!(gx, bounds.eqc, λcE, J) - equality_grad_λ!(gλxE, x, bounds.valx, bounds.eqx) equality_grad_λ!(gλx, sx, x, bounds.ineqx, bounds.σx, bounds.bx) equality_grad_λ!(gλc, sc, c, bounds.ineqc, bounds.σc, bounds.bc) + equality_grad_λ!(gλxE, x, bounds.valx, bounds.eqx) equality_grad_λ!(gλcE, c, bounds.valc, bounds.eqc) end equality_grad!(gx, gb::BarrierStateVars, bounds::ConstraintBounds, x, c, J, b::BarrierStateVars) = - equality_grad!(gx, gb.slack_x, gb.slack_c, gb.λxE, gb.λx, gb.λc, gb.λcE, + equality_grad!(gx, gb.slack_x, gb.slack_c, gb.λx, gb.λc, gb.λxE, gb.λcE, bounds, x, c, J, - b.slack_x, b.slack_c, b.λxE, b.λx, b.λc, b.λcE) + b.slack_x, b.slack_c, b.λx, b.λc, b.λxE, b.λcE) # violations of s = σ*(x-b) function equality_grad_var!(gs, gx, ineq, σ, λ) @@ -796,9 +695,6 @@ function isfeasible(bounds::ConstraintBounds, x, c) for (i,j) in enumerate(bounds.ineqx) isf &= bounds.σx[i]*(x[j] - bounds.bx[i]) >= 0 end - for (i,j) in enumerate(bounds.iz) - isf &= bounds.σz[i]*x[j] >= 0 - end for (i,j) in enumerate(bounds.eqc) isf &= c[j] == bounds.valc[i] end @@ -836,9 +732,6 @@ function isinterior(bounds::ConstraintBounds, x, c) for (i,j) in enumerate(bounds.ineqx) isi &= bounds.σx[i]*(x[j] - bounds.bx[i]) > 0 end - for (i,j) in enumerate(bounds.iz) - isi &= bounds.σz[i]*x[j] > 0 - end for (i,j) in enumerate(bounds.ineqc) isi &= bounds.σc[i]*(c[j] - bounds.bc[i]) > 0 end diff --git a/src/iplinesearch.jl b/src/iplinesearch.jl index d37d576ce..a195f9ea6 100644 --- a/src/iplinesearch.jl +++ b/src/iplinesearch.jl @@ -1,15 +1,16 @@ -function backtrack_constrained(ϕ, α, αmax, Lcoefsα, +function backtrack_constrained(ϕ, α, αmax, αImax, Lcoefsα, c1 = 0.5, ρ=oftype(α, 0.5), αmin = sqrt(eps(one(α)))) - α = min(α, 0.999*αmax) + α, αI = min(α, 0.999*αmax), min(α, 0.999*αImax) L0, L1, L2 = Lcoefsα f_calls = 0 while α >= αmin f_calls += 1 - val = ϕ(α) + val = ϕ(α, αI) if isfinite(val) && abs(val - (L0 + L1*α + L2*α^2/2)) <= c1*abs(val-L0) - return α, f_calls, 0 + return α, αI, f_calls, 0 end α *= ρ + αI *= ρ end - return zero(α), f_calls, 0 + return zero(α), zero(αI), f_calls, 0 end diff --git a/src/ipnewton.jl b/src/ipnewton.jl index 73116d60b..6028f9b54 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -34,7 +34,7 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct mc = nconstraints(constraints) constr_c = Array{T}(mc) constraints.c!(initial_x, constr_c) -# isfeasible(constraints, initial_x, constr_c) || error("initial guess must be feasible") + isinterior(constraints, initial_x, constr_c) || (warn("initial guess is not an interior point"); Base.show_backtrace(STDOUT, backtrace())) # Allocate fields for the objective function n = length(initial_x) @@ -89,7 +89,6 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct stepf) d.h!(initial_x, state.H) - # state.μ = initialize_μ_λ!(bstate.λxE, bstate.λcE, constraints, initial_x, g, constr_c, constr_J, options.μ0) Hinfo = (state.H, hessianI(initial_x, constraints, 1./bstate.slack_c, 1)) initialize_μ_λ!(state, constraints.bounds, Hinfo, options.μ0) update_fg!(d, constraints, state, method) @@ -113,29 +112,27 @@ end function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton) x = state.x μ, Hxx, J = state.μ, state.H, state.constr_J - bounds = constraints.bounds + bstate, bounds = state.bstate, constraints.bounds m, n = size(J, 1), size(J, 2) - d.h!(state.x, Hxx) - hessianI!(Hxx, state.x, constraints, state.bstate.λc, μ) # accumulate the inequality second derivatives - # Add the Jacobian terms (J'*S^{-2}*J) + d.h!(state.x, Hxx) # objective's Hessian + hessianI!(Hxx, state.x, constraints, bstate.λc, μ) # accumulate the inequality second derivatives + # Add the Jacobian terms (J'*Hss*J) JI = view5(J, bounds.ineqc, :) - Sinv2 = Diagonal(1./state.bstate.slack_c.^2) - HJ = JI'*Sinv2*JI + Hssc = Diagonal(bstate.λc./bstate.slack_c) + HJ = JI'*Hssc*JI for j = 1:n, i = 1:n - Hxx[i,j] += μ*HJ[i,j] + Hxx[i,j] += HJ[i,j] end - # Add the variable inequalities portions of J'*S^{-2}*J - # The iz terms are already in Hxx (from hessianI!) - ineqx, sx = bounds.ineqx, state.bstate.slack_x - for (i,j) in enumerate(ineqx) - Hxx[j,j] += μ/sx[i]^2 + # Add the variable inequalities portions of J'*Hssx*J + for (i,j) in enumerate(bounds.ineqx) + Hxx[j,j] += bstate.λx[i]/bstate.slack_x[i] end # Perform a positive factorization Hpc, state.Hd = ldltfact(Positive, Hxx) Hp = full(Hpc) # Now add the equality constraint hessian terms - eqc, λcE = bounds.eqc, state.bstate.λcE + eqc, λcE = bounds.eqc, bstate.λcE λ = zeros(eltype(x), nconstraints(bounds)) for i = 1:length(eqc) λ[eqc[i]] -= λcE[i] @@ -156,9 +153,9 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state -JEc Jod zeros(eltype(JEc), size(JEc,1), size(JEc,1))] # Also form the total gradient bgrad = state.bgrad - gI = state.g + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - μ*Sinv2*bgrad.λc) - for (i,j) in enumerate(ineqx) - gI[j] += bounds.σx[i]*(bgrad.slack_x[i] - μ*bgrad.λx[i]/sx[i]^2) + gI = state.g + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - Hssc*bgrad.λc) + for (i,j) in enumerate(bounds.ineqx) + gI[j] += -μ*bounds.σx[i]./bstate.slack_x[i] + bstate.λx[i]*(x[j]-bounds.bx[i])/bstate.slack_x[i] end state.gf = [gI; bgrad.λxE; @@ -169,7 +166,7 @@ end function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction, state::IPNewtonState{T}, method::IPNewton) state.f_x_previous, state.L_previous = state.f_x, state.L bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds - state, dslackc = solve_step!(state, constraints) + state = solve_step!(state, constraints) # If a step α=1 will not change any of the parameters, we can quit now. # This prevents a futile linesearch. if is_smaller_eps(state.x, state.s) && @@ -182,17 +179,16 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction qp = quadratic_parameters(bounds, state) # Estimate αmax, the upper bound on distance of movement along the search line - αmax = convert(eltype(bstate), Inf) + αmax = αImax = convert(eltype(bstate), Inf) αmax = estimate_maxstep(αmax, bstate.slack_x, bstep.slack_x) αmax = estimate_maxstep(αmax, bstate.slack_c, bstep.slack_c) - αmax = estimate_maxstep(αmax, - view(state.x, bounds.iz).*bounds.σz, - view(state.s, bounds.iz).*bounds.σz) + αImax = estimate_maxstep(αImax, bstate.λx, bstep.λx) + αImax = estimate_maxstep(αImax, bstate.λc, bstep.λc) # Determine the actual distance of movement along the search line - ϕ = α->lagrangian_linefunc!(α, d, constraints, state, method, dslackc) - state.alpha, f_update, g_update = - method.linesearch!(ϕ, T(1), αmax, qp) + ϕ = (α,αI)->lagrangian_linefunc!(α, αI, d, constraints, state, method) + state.alpha, αI, f_update, g_update = + method.linesearch!(ϕ, T(1), αmax, αImax, qp) state.f_calls, state.g_calls = state.f_calls + f_update, state.g_calls + g_update # Maintain a record of previous position @@ -200,15 +196,12 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction # Update current position # x = x + alpha * s ls_update!(state.x, state.x, state.s, state.alpha) - ls_update!(bstate, state.constr_c, bstate, bstep, state.alpha, constraints, state, dslackc) + ls_update!(bstate, bstate, bstep, state.alpha, αI) # Evaluate the constraints at the new position -# constraints.c!(state.x, state.constr_c) # already done in ls_update! + constraints.c!(state.x, state.constr_c) constraints.jacobian!(state.x, state.constr_J) - # Test for active inequalities, solve immediately for the corresponding s and λ - # solve_active_inequalities!(d, constraints, state) - false end @@ -227,19 +220,20 @@ function solve_step!(state::IPNewtonState, constraints) k = unpack_vec!(bstep.λcE, step, k) k == length(step) || error("exhausted targets before step") # Solve for the slack variable and λI updates - # These are only used to estimate αmax, otherwise these are updated by exact formulas for (i, j) in enumerate(bounds.ineqx) bstep.slack_x[i] = -bgrad.λx[i] + bounds.σx[i]*s[j] - bstep.λx[i] = -bgrad.slack_x[i] - μ*bstep.slack_x[i]/bstate.slack_x[i]^2 + # bstep.λx[i] = -bgrad.slack_x[i] - μ*bstep.slack_x[i]/bstate.slack_x[i]^2 + bstep.λx[i] = -bgrad.slack_x[i] - bstate.λx[i]*bstep.slack_x[i]/bstate.slack_x[i] end JI = view5(state.constr_J, bounds.ineqc, :) - dslackc = Diagonal(bounds.σc)*JI*s - bstep.slack_c[:] = -bgrad.λc + dslackc + SigmaJIΔx = Diagonal(bounds.σc)*(JI*state.s) for i = 1:length(bstep.λc) - bstep.λc[i] = -bgrad.slack_c[i] - μ*bstep.slack_c[i]/bstate.slack_c[i]^2 + bstep.slack_c[i] = -bgrad.λc[i] + SigmaJIΔx[i] + # bstep.λc[i] = -bgrad.slack_c[i] - μ*bstep.slack_c[i]/bstate.slack_c[i]^2 + bstep.λc[i] = -bgrad.slack_c[i] - bstate.λc[i]*bstep.slack_c[i]/bstate.slack_c[i] end state.stepf = step - state, dslackc + state end function is_smaller_eps(ref, step) diff --git a/src/types.jl b/src/types.jl index 06550fd74..e3a7c5ffd 100644 --- a/src/types.jl +++ b/src/types.jl @@ -291,9 +291,6 @@ immutable ConstraintBounds{T} ineqx::Vector{Int} # index-vector of other inequality-constrained variables σx::Vector{Int8} # ±1, in constraints σ(v-b) ≥ 0 (sign depends on whether v>b or v eq, val, ineq, σ, b, [iz, σz, bz] + parse_constraints(T, l, u) -> eq, val, ineq, σ, b From user-supplied constraints of the form @@ -457,13 +451,6 @@ when `l_i == u_i`), convert into the following representation: - `ineq`, `σ`, and `b` such that the inequality constraints can be written as σ[k]*(v[ineq[k]] - b[k]) ≥ 0 where `σ[k] = ±1`. - - optionally (with `split_signed=true`), return an index-vector - `iz` of entries where one of `l`, `u` is zero, along with - whether the constraint is `≥ 0` (σz=+1) or `≤ 0` (σz=-1). Such - are removed from `ineq`, `σ`, and `b`. For coordinate variables - this can be used to reduce the number of slack variables needed, - since when one of the bounds is 0, the variable itself *is* a - slack variable. Note that since the same `v_i` might have both lower and upper bounds, `ineq` might have the same index twice (once with `σ`=-1 and once with `σ`=1). @@ -474,11 +461,11 @@ corresponding entry in `ineq`/`σ`/`b`. T is the element-type of the non-Int outputs """ -function parse_constraints{T}(::Type{T}, l, u, split_signed::Bool=false) +function parse_constraints{T}(::Type{T}, l, u) size(l) == size(u) || throw(DimensionMismatch("l and u must be the same size, got $(size(l)) and $(size(u))")) - eq, ineq, iz = Int[], Int[], Int[] + eq, ineq = Int[], Int[] val, b = T[], T[] - σ, σz = Array{Int8}(0), Array{Int8}(0) + σ = Array{Int8}(0) for i = 1:length(l) li, ui = l[i], u[i] li <= ui || throw(ArgumentError("l must be smaller than u, got $li, $ui")) @@ -487,31 +474,18 @@ function parse_constraints{T}(::Type{T}, l, u, split_signed::Bool=false) push!(val, ui) else if isfinite(li) - if split_signed && li == 0 - push!(iz, i) - push!(σz, 1) - else - push!(ineq, i) - push!(σ, 1) - push!(b, li) - end + push!(ineq, i) + push!(σ, 1) + push!(b, li) end ui = u[i] if isfinite(ui) - if split_signed && ui == 0 - push!(iz, i) - push!(σz, -1) - else - push!(ineq, i) - push!(σ, -1) - push!(b, ui) - end + push!(ineq, i) + push!(σ, -1) + push!(b, ui) end end end - if split_signed - return eq, val, ineq, σ, b, iz, σz, zeros(T, length(iz)) - end eq, val, ineq, σ, b end diff --git a/test/constraints.jl b/test/constraints.jl index 89916fc95..57f3d2b4a 100644 --- a/test/constraints.jl +++ b/test/constraints.jl @@ -28,11 +28,9 @@ end b = @inferred5(Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 3.8], [5.0, 4.0])) @test b.eqx == [3] @test b.valx == [2.0] - @test b.ineqx == [1,2,2] - @test b.σx == [-1,1,-1] - @test b.bx == [1.0,0.5,1.0] - @test b.iz == [1] - @test b.σz == [1] + @test b.ineqx == [1,1,2,2] + @test b.σx == [1,-1,1,-1] + @test b.bx == [0.0,1.0,0.5,1.0] @test b.eqc == [1] @test b.valc == [5] @test b.ineqc == [2,2] @@ -44,14 +42,13 @@ end ConstraintBounds: Variables: x[3]=2.0 - x[1]≤1.0,x[2]≥0.5,x[2]≤1.0 - x[1]≥0.0 + x[1]≥0.0,x[1]≤1.0,x[2]≥0.5,x[2]≤1.0 Linear/nonlinear constraints: c_1=5.0 c_2≥3.8,c_2≤4.0""" b = @inferred5(Optim.ConstraintBounds(Float64[], Float64[], [5.0, 3.8], [5.0, 4.0])) - for fn in (:eqx, :valx, :ineqx, :σx, :bx, :iz, :σz) + for fn in (:eqx, :valx, :ineqx, :σx, :bx) @test isempty(getfield(b, fn)) end @test b.eqc == [1] @@ -85,10 +82,11 @@ ConstraintBounds: ForwardDiff.gradient!(pcmp, ftot, p, ForwardDiff.Chunk{chunksize}()) @test pcmp ≈ pgrad end - # Basic setup + # Basic setup (using two objectives, one equal to zero and the other a Gaussian) μ = 0.2345678 + d0 = TwiceDifferentiableFunction(x->0.0, (x,g)->fill!(g, 0), (x,h)->fill!(h,0)) A = randn(3,3); H = A'*A - d = TwiceDifferentiableFunction(x->(x'*H*x)[1]/2, (x,g)->(g[:] = H*x), (x,h)->(h[:,:]=H)) + dg = TwiceDifferentiableFunction(x->(x'*H*x)[1]/2, (x,g)->(g[:] = H*x), (x,h)->(h[:,:]=H)) x = broadcast(clamp, randn(3), -0.99, 0.99) gx = similar(x) cfun = x->Float64[] @@ -109,41 +107,40 @@ ConstraintBounds: bounds = Optim.ConstraintBounds(Float64[], Float64[], Float64[], Float64[]) bstate = Optim.BarrierStateVars(bounds, x) bgrad = similar(bstate) - f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ) - @test f_x == L == d.f(x) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, dg, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ) + @test f_x == L == dg.f(x) @test gx == H*x constraints = TwiceDifferentiableConstraintsFunction( (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds) - state = Optim.initial_state(method, options, d, constraints, x) + state = Optim.initial_state(method, options, dg, constraints, x) @test state.gf ≈ gx @test state.Hf ≈ H ## Pure equality constraints on variables - d = TwiceDifferentiableFunction(x->0.0, (x,g)->fill!(g, 0), (x,h)->fill!(h,0)) xbar = fill(0.2, length(x)) bounds = Optim.ConstraintBounds(xbar, xbar, [], []) bstate = Optim.BarrierStateVars(bounds) rand!(bstate.λxE) bgrad = similar(bstate) - f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d0, bounds, x, c, J, bstate, μ) @test f_x == 0 @test L ≈ dot(bstate.λxE, xbar-x) @test gx == -bstate.λxE @test bgrad.λxE == xbar-x - check_autodiff(d, bounds, x, cfun, bstate, μ) + check_autodiff(d0, bounds, x, cfun, bstate, μ) constraints = TwiceDifferentiableConstraintsFunction( (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds) - state = Optim.initial_state(method, options, d, constraints, x) + state = Optim.initial_state(method, options, d0, constraints, x) copy!(state.bstate.λxE, bstate.λxE) - setstate!(state, μ, d, constraints, method) + setstate!(state, μ, d0, constraints, method) @test state.gf ≈ [gx; xbar-x] n = length(x) @test state.Hf ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)] # Now again using the generic machinery bounds = Optim.ConstraintBounds([], [], xbar, xbar) constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds) - state = Optim.initial_state(method, options, d, constraints, x) + state = Optim.initial_state(method, options, d0, constraints, x) copy!(state.bstate.λcE, bstate.λxE) - setstate!(state, μ, d, constraints, method) + setstate!(state, μ, d0, constraints, method) @test state.gf ≈ [gx; xbar-x] n = length(x) @test state.Hf ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)] @@ -151,23 +148,25 @@ ConstraintBounds: bounds = Optim.ConstraintBounds(zeros(length(x)), fill(Inf,length(x)), [], []) y = rand(length(x)) bstate = Optim.BarrierStateVars(bounds, y) + rand!(bstate.λx) bgrad = similar(bstate) - f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, y, Float64[], Array{Float64}(0,0), bstate, μ) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d0, bounds, y, Float64[], Array{Float64}(0,0), bstate, μ) @test f_x == 0 @test L ≈ -μ*sum(log, y) - @test gx == -μ./y - check_autodiff(d, bounds, y, cfun, bstate, μ) + @test bgrad.slack_x == -μ./y + bstate.λx + @test gx == -bstate.λx + check_autodiff(d0, bounds, y, cfun, bstate, μ) constraints = TwiceDifferentiableConstraintsFunction( (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds) - state = Optim.initial_state(method, options, d, constraints, y) - setstate!(state, μ, d, constraints, method) + state = Optim.initial_state(method, options, d0, constraints, y) + setstate!(state, μ, d0, constraints, method) @test state.gf ≈ -μ./y @test state.Hf ≈ μ*Diagonal(1./y.^2) # Now again using the generic machinery bounds = Optim.ConstraintBounds([], [], zeros(length(x)), fill(Inf,length(x))) constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds) - state = Optim.initial_state(method, options, d, constraints, y) - setstate!(state, μ, d, constraints, method) + state = Optim.initial_state(method, options, d0, constraints, y) + setstate!(state, μ, d0, constraints, method) @test state.gf ≈ -μ./y @test state.Hf ≈ μ*Diagonal(1./y.^2) ## General inequality constraints on variables @@ -177,7 +176,7 @@ ConstraintBounds: rand!(bstate.slack_x) # intentionally displace from the correct value rand!(bstate.λx) bgrad = similar(bstate) - f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d0, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ) @test f_x == 0 s = bounds.σx .* (x[bounds.ineqx] - bounds.bx) Ltarget = -μ*sum(log, bstate.slack_x) + @@ -189,28 +188,34 @@ ConstraintBounds: end @test gx ≈ dx @test bgrad.slack_x == -μ./bstate.slack_x + bstate.λx - check_autodiff(d, bounds, x, cfun, bstate, μ) + check_autodiff(d0, bounds, x, cfun, bstate, μ) constraints = TwiceDifferentiableConstraintsFunction( (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds) - state = Optim.initial_state(method, options, d, constraints, x) + state = Optim.initial_state(method, options, d0, constraints, x) copy!(state.bstate.slack_x, bstate.slack_x) copy!(state.bstate.λx, bstate.λx) - setstate!(state, μ, d, constraints, method) + setstate!(state, μ, d0, constraints, method) gxs, hxs = zeros(length(x)), zeros(length(x)) - s = state.bstate.slack_x + s, λ = state.bstate.slack_x, state.bstate.λx for (i,j) in enumerate(bounds.ineqx) - gxs[j] += -2*μ*bounds.σx[i]/s[i] + μ*(x[j]-bounds.bx[i])/s[i]^2 - hxs[j] += μ/s[i]^2 + # # Primal + # gxs[j] += -2*μ*bounds.σx[i]/s[i] + μ*(x[j]-bounds.bx[i])/s[i]^2 + # hxs[j] += μ/s[i]^2 + # Primal-dual + gstmp, gλtmp = -μ/s[i] + λ[i], s[i] - bounds.σx[i]*(x[j]-bounds.bx[i]) + htmp = λ[i]/s[i] + hxs[j] += htmp + gxs[j] += bounds.σx[i]*(gstmp - λ[i]) - bounds.σx[i]*htmp*gλtmp end @test state.gf ≈ gxs @test state.Hf ≈ Diagonal(hxs) # Now again using the generic machinery bounds = Optim.ConstraintBounds([], [], lb, ub) constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds) - state = Optim.initial_state(method, options, d, constraints, x) + state = Optim.initial_state(method, options, d0, constraints, x) copy!(state.bstate.slack_c, bstate.slack_x) copy!(state.bstate.λc, bstate.λx) - setstate!(state, μ, d, constraints, method) + setstate!(state, μ, d0, constraints, method) @test state.gf ≈ gxs @test state.Hf ≈ Diagonal(hxs) ## Nonlinear equality constraints @@ -231,28 +236,28 @@ ConstraintBounds: bstate = Optim.BarrierStateVars(bounds, x, c) rand!(bstate.λcE) bgrad = similar(bstate) - f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d0, bounds, x, c, J, bstate, μ) @test f_x == 0 @test L ≈ dot(bstate.λcE, cbar-c) @test gx ≈ -J'*bstate.λcE @test bgrad.λcE == cbar-c - check_autodiff(d, bounds, x, cfun, bstate, μ) + check_autodiff(d0, bounds, x, cfun, bstate, μ) constraints = TwiceDifferentiableConstraintsFunction(cfun!, cJ!, ch!, bounds) - state = Optim.initial_state(method, options, d, constraints, x) + state = Optim.initial_state(method, options, d0, constraints, x) copy!(state.bstate.λcE, bstate.λcE) - setstate!(state, μ, d, constraints, method) + setstate!(state, μ, d0, constraints, method) heq = zeros(length(x), length(x)) ch!(x, bstate.λcE, heq) @test state.gf ≈ [gx; cbar-c] @test state.Hf ≈ [eye(length(x))-heq -J'; -J zeros(size(J,1), size(J,1))] ## Nonlinear inequality constraints - bounds = Optim.ConstraintBounds([], [], rand(length(c))-1, rand(length(c))+1) + bounds = Optim.ConstraintBounds([], [], -rand(length(c))-1, rand(length(c))+2) bstate = Optim.BarrierStateVars(bounds, x, c) rand!(bstate.slack_c) # intentionally displace from the correct value rand!(bstate.λc) bgrad = similar(bstate) - f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ) + f_x, L = Optim.lagrangian_fg!(gx, bgrad, d0, bounds, x, c, J, bstate, μ) @test f_x == 0 Ltarget = -μ*sum(log, bstate.slack_c) + dot(bstate.λc, bstate.slack_c - bounds.σc.*(c[bounds.ineqc]-bounds.bc)) @@ -260,12 +265,12 @@ ConstraintBounds: @test gx ≈ -J[bounds.ineqc,:]'*(bstate.λc.*bounds.σc) @test bgrad.slack_c == -μ./bstate.slack_c + bstate.λc @test bgrad.λc == bstate.slack_c - bounds.σc .* (c[bounds.ineqc] - bounds.bc) - check_autodiff(d, bounds, x, cfun, bstate, μ) + check_autodiff(d0, bounds, x, cfun, bstate, μ) constraints = TwiceDifferentiableConstraintsFunction(cfun!, cJ!, ch!, bounds) - state = Optim.initial_state(method, options, d, constraints, x) + state = Optim.initial_state(method, options, d0, constraints, x) copy!(state.bstate.slack_c, bstate.slack_c) copy!(state.bstate.λc, bstate.λc) - setstate!(state, μ, d, constraints, method) + setstate!(state, μ, d0, constraints, method) hineq = zeros(length(x), length(x)) λ = zeros(size(J, 1)) for (i,j) in enumerate(bounds.ineqc) @@ -273,9 +278,14 @@ ConstraintBounds: end ch!(x, λ, hineq) JI = J[bounds.ineqc,:] - hxx = μ*JI'*Diagonal(1./bstate.slack_c.^2)*JI - hineq + # # Primal + # hxx = μ*JI'*Diagonal(1./bstate.slack_c.^2)*JI - hineq + # gf = -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - μ(bgrad.λc ./ bstate.slack_c.^2)) + # Primal-dual + hxx = JI'*Diagonal(bstate.λc./bstate.slack_c)*JI - hineq + gf = -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - (bgrad.λc .* bstate.λc ./ bstate.slack_c)) hp = full(cholfact(Positive, hxx)) - @test state.gf ≈ -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - μ*(bgrad.λc ./ bstate.slack_c.^2)) + @test state.gf ≈ gf @test state.Hf ≈ hp end @@ -364,25 +374,24 @@ ConstraintBounds: F = 1000 d = TwiceDifferentiableFunction(x->F*x[1], (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0)) method = Optim.IPNewton() - options = OptimizationOptions() μ = 1e-20 + options = OptimizationOptions(μ0=μ) x0 = μ/F*10 # minimum is at μ/F # Nonnegativity (the case that doesn't require slack variables) constraints = TwiceDifferentiableConstraintsFunction([0.0], []) state = Optim.initial_state(method, options, d, constraints, [x0]) - setstate!(state, μ, d, constraints, method) Optim.solve_step!(state, constraints) - @test state.s[1] ≈ x0 - F*x0^2/μ + @test state.s[1] ≈ -(F-μ/x0)/(state.bstate.λx[1]/x0) qp = Optim.quadratic_parameters(constraints.bounds, state) @test qp[1] ≈ F*x0-μ*log(x0) @test qp[2] ≈ -(F-μ/x0)^2*x0^2/μ @test qp[3] ≈ μ/x0^2*(x0 - F*x0^2/μ)^2 bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds - αmax = Optim.estimate_maxstep(Inf, state.x[bounds.iz].*bounds.σz, - state.s[bounds.iz].*bounds.σz) - ϕ = α->Optim.lagrangian_linefunc(α, d, constraints, state, Float64[]) - @test ϕ(0) ≈ qp[1] - α, nf, ng = method.linesearch!(ϕ, 1.0, αmax, qp) + αmax = Optim.estimate_maxstep(Inf, state.x[bounds.ineqx].*bounds.σx, + state.s[bounds.ineqx].*bounds.σx) + ϕ = (α,αI)->Optim.lagrangian_linefunc(α, αI, d, constraints, state) + @test ϕ(0,0) ≈ qp[1] + α, nf, ng = method.linesearch!(ϕ, 1.0, αmax, Inf, qp) @test α > 1e-3 end @@ -397,15 +406,14 @@ ConstraintBounds: # boundary). F0 = 1000 method = Optim.IPNewton() - options = OptimizationOptions() μ = 1e-20 # smaller than eps(1.0) + options = OptimizationOptions(μ0=μ) for σ in (1, -1) F = σ*F0 # Nonnegativity/nonpositivity (the case that doesn't require slack variables) d = TwiceDifferentiableFunction(x->F*x[1], (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0)) constraints = TwiceDifferentiableConstraintsFunction(σswap(σ, [0.0], [])...) state = Optim.initial_state(method, options, d, constraints, [μ/F*10]) - setstate!(state, μ, d, constraints, method) for i = 1:10 Optim.update_state!(d, constraints, state, method) Optim.update_fg!(d, constraints, state, method) @@ -416,7 +424,6 @@ ConstraintBounds: d = TwiceDifferentiableFunction(x->F*(x[1]-σ), (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0)) constraints = TwiceDifferentiableConstraintsFunction(σswap(σ, [Float64(σ)], [])...) state = Optim.initial_state(method, options, d, constraints, [(1+eps(1.0))*σ]) - setstate!(state, μ, d, constraints, method) for i = 1:10 Optim.update_state!(d, constraints, state, method) Optim.update_fg!(d, constraints, state, method) @@ -424,7 +431,7 @@ ConstraintBounds: end @test state.x[1] == σ @test state.bstate.slack_x[1] < eps(float(σ)) - # x >= 1 using the linear/nonlinear constraints + # |x| >= 1 using the linear/nonlinear constraints d = TwiceDifferentiableFunction(x->F*(x[1]-σ), (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0)) constraints = TwiceDifferentiableConstraintsFunction( (x,c)->(c[1] = x[1]), @@ -432,13 +439,12 @@ ConstraintBounds: (x,λ,h)->nothing, [], [], σswap(σ, [Float64(σ)], [])...) state = Optim.initial_state(method, options, d, constraints, [(1+eps(1.0))*σ]) - setstate!(state, μ, d, constraints, method) for i = 1:10 Optim.update_state!(d, constraints, state, method) Optim.update_fg!(d, constraints, state, method) Optim.update_h!(d, constraints, state, method) end - @test state.x[1] == σ + @test state.x[1] ≈ σ @test state.bstate.slack_c[1] < eps(float(σ)) end end From dcfe788c2fc775c967b64acc82fe85e5e983c89f Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sat, 19 Nov 2016 09:37:59 -0600 Subject: [PATCH 25/40] Start Optim.ConstrainedProblems --- src/Optim.jl | 4 +++- src/problems/constrained.jl | 42 +++++++++++++++++++++++++++++++++++ src/problems/multivariate.jl | 27 ++++++++++++++++++++++ src/problems/unconstrained.jl | 13 ++--------- 4 files changed, 74 insertions(+), 12 deletions(-) create mode 100644 src/problems/constrained.jl create mode 100644 src/problems/multivariate.jl diff --git a/src/Optim.jl b/src/Optim.jl index 130ac822f..c3efdf6c9 100644 --- a/src/Optim.jl +++ b/src/Optim.jl @@ -19,6 +19,7 @@ module Optim export optimize, isfeasible, isinterior, + nconstraints, DifferentiableFunction, TwiceDifferentiableFunction, DifferentiableConstraintsFunction, @@ -110,8 +111,9 @@ module Optim include("utilities/trace.jl") # Examples for testing - include(joinpath("problems", "unconstrained.jl")) + include(joinpath("problems", "multivariate.jl")) include(joinpath("problems", "univariate.jl")) + using .MultivariateProblems cgdescent(args...) = error("API has changed. Please use cg.") end diff --git a/src/problems/constrained.jl b/src/problems/constrained.jl new file mode 100644 index 000000000..9fc1ca70b --- /dev/null +++ b/src/problems/constrained.jl @@ -0,0 +1,42 @@ +module ConstrainedProblems + +using ..OptimizationProblem, ...TwiceDifferentiableConstraintsFunction + +examples = Dict{AbstractString, OptimizationProblem}() + +hs9_obj(x::AbstractVector) = sin(π*x[1]/12) * cos(π*x[2]/16) +hs9_c!(x::AbstractVector, c::AbstractVector) = (c[1] = 4*x[1]-3*x[2]; c) + +function hs9_obj_g!(x::AbstractVector, g::AbstractVector) + g[1] = π/12 * cos(π*x[1]/12) * cos(π*x[2]/16) + g[2] = -π/16 * sin(π*x[1]/12) * sin(π*x[2]/16) + g +end +function hs9_obj_h!(x::AbstractVector, h::AbstractMatrix) + v = hs9_obj(x) + h[1,1] = -π^2*v/144 + h[2,2] = -π^2*v/256 + h[1,2] = h[2,1] = -π^2 * cos(π*x[1]/12) * sin(π*x[2]/16) / 192 + h +end + +function hs9_jacobian!(x, J) + J[1,1] = 4 + J[1,2] = -3 + J +end +hs9_h!(x, λ, h) = h + +examples["HS9"] = OptimizationProblem("HS9", + hs9_obj, + hs9_obj_g!, + hs9_obj_h!, + TwiceDifferentiableConstraintsFunction( + hs9_c!, hs9_jacobian!, hs9_h!, + [], [], [0.0], [0.0]), + [0.0, 0.0], + [[12k-3, 16k-4] for k in (0, 1, -1)], # any integer k will do... + true, + true) + +end # module diff --git a/src/problems/multivariate.jl b/src/problems/multivariate.jl new file mode 100644 index 000000000..31d8fb729 --- /dev/null +++ b/src/problems/multivariate.jl @@ -0,0 +1,27 @@ +module MultivariateProblems + +export UnconstrainedProblems, ConstrainedProblems + +immutable OptimizationProblem + name::AbstractString + f::Function + g!::Function + h!::Function + constraints + initial_x::Vector{Float64} + solutions::Vector + isdifferentiable::Bool + istwicedifferentiable::Bool +end + +function OptimizationProblem(name, f, g!, h!, + initial_x::AbstractVector, solutions, + isdifferentiable::Bool, istwicedifferentiable::Bool) + OptimizationProblem(name, f, g!, h!, nothing, + initial_x, solutions, isdifferentiable, istwicedifferentiable) +end + +include("unconstrained.jl") +include("constrained.jl") + +end diff --git a/src/problems/unconstrained.jl b/src/problems/unconstrained.jl index d5d1ff62d..431ae0a53 100644 --- a/src/problems/unconstrained.jl +++ b/src/problems/unconstrained.jl @@ -1,5 +1,7 @@ module UnconstrainedProblems +using ..OptimizationProblem + ### Sources ### ### [1] Ali, Khompatraporn, & Zabinsky: A Numerical Evaluation of Several Stochastic Algorithms on Selected Continuous Global Optimization Test @@ -7,17 +9,6 @@ module UnconstrainedProblems ### ### [2] Fletcher & Powell: A rapidly convergent descent method for minimization, -immutable OptimizationProblem - name::AbstractString - f::Function - g!::Function - h!::Function - initial_x::Vector{Float64} - solutions::Vector - isdifferentiable::Bool - istwicedifferentiable::Bool -end - examples = Dict{AbstractString, OptimizationProblem}() ########################################################################## From fd8f0d09d89618b0440eecff17368840d2121acc Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sun, 20 Nov 2016 14:10:02 -0600 Subject: [PATCH 26/40] Trace the total equality violation --- src/interior.jl | 44 ++++++++++++++++++++++-------------------- src/ipnewton.jl | 5 +++-- src/types.jl | 10 +++++----- src/utilities/trace.jl | 3 ++- 4 files changed, 33 insertions(+), 29 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index 0e0cc4b14..5e88e5c13 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -167,11 +167,16 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai iteration, iterationμ = 0, 0 options.show_trace && print_header(method) - trace!(tr, state, iteration, method, options) Δfmax = zero(state.f_x) while !converged && !stopped && iteration < options.iterations + # If tracing, update trace with trace!. If a callback is provided, it + # should have boolean return value that controls the variable stopped_by_callback. + # This allows for early stopping controlled by the callback. + if tracing + stopped_by_callback = trace!(tr, state, iteration, method, options) + end iteration += 1 iterationμ += 1 @@ -190,13 +195,6 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai converged = x_converged | g_converged | (counter_f_tol > options.successive_f_tol) gnormnew = vecnorm(state.g, Inf) + vecnorm(state.bgrad, Inf) - # If tracing, update trace with trace!. If a callback is provided, it - # should have boolean return value that controls the variable stopped_by_callback. - # This allows for early stopping controlled by the callback. - if tracing - stopped_by_callback = trace!(tr, state, iteration, method, options) - end - Δf = abs(state.f_x - state.f_x_previous) if iterationμ > 1 Δfmax = max(Δfmax, abs(state.f_x - state.f_x_previous)) @@ -228,6 +226,10 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai stopped = stopped_by_callback || stopped_by_time_limit ? true : false end # while + if tracing + trace!(tr, state, iteration, method, options) + end + after_while!(d, constraints, state, method, options) return MultivariateOptimizationResults(state.method_string, @@ -428,9 +430,9 @@ userλ(λcI, constraints) = userλ(λcI, constraints.bounds) function lagrangian(d, bounds::ConstraintBounds, x, c, bstate::BarrierStateVars, μ) f_x = d.f(x) - L_xsλ = f_x + barrier_value(bounds, x, bstate, μ) + - equality_violation(bounds, x, c, bstate) - f_x, L_xsλ + ev = equality_violation(bounds, x, c, bstate) + L_xsλ = f_x + barrier_value(bounds, x, bstate, μ) + ev + f_x, L_xsλ, ev end function lagrangian_g!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ) @@ -444,28 +446,28 @@ end function lagrangian_fg!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ) fill!(bgrad, 0) f_x = d.fg!(x, gx) - L_xsλ = f_x + barrier_value(bounds, x, bstate, μ) + - equality_violation(bounds, x, c, bstate) + ev = equality_violation(bounds, x, c, bstate) + L_xsλ = f_x + barrier_value(bounds, x, bstate, μ) + ev barrier_grad!(gx, bgrad, bounds, x, bstate, μ) equality_grad!(gx, bgrad, bounds, x, c, J, bstate) - f_x, L_xsλ + f_x, L_xsλ, ev end ## Computation of Lagrangian and derivatives when passing all parameters as a single vector function lagrangian_vec(p, d, bounds::ConstraintBounds, x, c::AbstractArray, bstate::BarrierStateVars, μ) unpack_vec!(x, bstate, p) - f_x, L_xsλ = lagrangian(d, bounds, x, c, bstate, μ) + f_x, L_xsλ, ev = lagrangian(d, bounds, x, c, bstate, μ) L_xsλ end function lagrangian_vec(p, d, bounds::ConstraintBounds, x, c::Function, bstate::BarrierStateVars, μ) # Use this version when using automatic differentiation unpack_vec!(x, bstate, p) - f_x, L_xsλ = lagrangian(d, bounds, x, c(x), bstate, μ) + f_x, L_xsλ, ev = lagrangian(d, bounds, x, c(x), bstate, μ) L_xsλ end function lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ) unpack_vec!(x, bstate, p) - f_x, L_xsλ = lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ) + f_x, L_xsλ, ev = lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ) pack_vec!(storage, gx, bgrad) L_xsλ end @@ -486,10 +488,8 @@ end function lagrangian_linefunc!(α, αI, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}) # For backtrack_constrained, the last evaluation is the one we # keep, so it's safe to store the results in state - f_x, L = _lagrangian_linefunc(α, αI, d, constraints, state) - state.f_x = f_x - state.L = L - L + state.f_x, state.L, state.ev = _lagrangian_linefunc(α, αI, d, constraints, state) + state.L end lagrangian_linefunc!(α, αI, d, constraints, state, method) = lagrangian_linefunc(α, αI, d, constraints, state) @@ -711,6 +711,7 @@ function isfeasible(constraints, x) isfeasible(constraints, x, c) end isfeasible(constraints::AbstractConstraintsFunction, x, c) = isfeasible(constraints.bounds, x, c) +isfeasible(constraints::Void, x) = true """ isinterior(constraints, state) -> Bool @@ -744,6 +745,7 @@ function isinterior(constraints, x) isinterior(constraints, x, c) end isinterior(constraints::AbstractConstraintsFunction, x, c) = isinterior(constraints.bounds, x, c) +isinterior(constraints::Void, x) = true ## Utilities for representing total state as single vector function pack_vec(x, b::BarrierStateVars) diff --git a/src/ipnewton.jl b/src/ipnewton.jl index 6028f9b54..4a05b98da 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -22,6 +22,7 @@ type IPNewtonState{T,N} <: AbstractBarrierState bstep::BarrierStateVars{T} # search direction for slack and λ constr_c::Vector{T} # value of the user-supplied constraints at x constr_J::Matrix{T} # value of the user-supplied Jacobian at x + ev::T # equality violation, ∑_i λ_Ei (c*_i - c_i) @add_linesearch_fields() b_ls::BarrierLineSearch{T} gf::Vector{T} @@ -82,6 +83,7 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct bstep, constr_c, constr_J, + T(NaN), @initial_linesearch()..., # Maintain a cache for line search results in state.lsr b_ls, gf, @@ -96,8 +98,7 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct end function update_fg!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton) - f_x, L = lagrangian_fg!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ) - state.f_x, state.L = f_x, L + state.f_x, state.L, state.ev = lagrangian_fg!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ) state.f_calls += 1 state.g_calls += 1 state diff --git a/src/types.jl b/src/types.jl index e3a7c5ffd..72a2ffcb2 100644 --- a/src/types.jl +++ b/src/types.jl @@ -57,7 +57,7 @@ function print_header(method::Optimizer) end function print_header(method::IPOptimizer) - @printf "Iter Lagrangian value Function value Gradient norm μ\n" + @printf "Iter Lagrangian value Function value Gradient norm |==constr.| μ\n" end immutable OptimizationState{T <: Optimizer} @@ -134,10 +134,10 @@ end function Base.show{M<:IPOptimizer}(io::IO, t::OptimizationState{M}) md = t.metadata - @printf io "%6d %-14e %-14e %-14e %-6.2e\n" t.iteration md["Lagrangian"] t.value t.g_norm md["μ"] + @printf io "%6d %-14e %-14e %-14e %-14e %-6.2e\n" t.iteration md["Lagrangian"] t.value t.g_norm md["ev"] md["μ"] if !isempty(t.metadata) for (key, value) in md - key ∈ ("Lagrangian", "μ") && continue + key ∈ ("Lagrangian", "μ", "ev") && continue @printf io " * %s: %s\n" key value end end @@ -154,8 +154,8 @@ function Base.show(io::IO, tr::OptimizationTrace) end function Base.show{M<:IPOptimizer}(io::IO, tr::OptimizationTrace{M}) - @printf io "Iter Lagrangian value Function value Gradient norm μ\n" - @printf io "------ ---------------- -------------- -------------- --------\n" + @printf io "Iter Lagrangian value Function value Gradient norm |==constr.| μ\n" + @printf io "------ ---------------- -------------- -------------- -------------- --------\n" for state in tr show(io, state) end diff --git a/src/utilities/trace.jl b/src/utilities/trace.jl index bb574745c..4f08ae506 100644 --- a/src/utilities/trace.jl +++ b/src/utilities/trace.jl @@ -119,6 +119,7 @@ function trace!(tr, state, iteration, method::IPOptimizer, options) dt = Dict() dt["Lagrangian"] = state.L dt["μ"] = state.μ + dt["ev"] = state.ev if options.extended_trace dt["α"] = state.alpha dt["x"] = copy(state.x) @@ -130,7 +131,7 @@ function trace!(tr, state, iteration, method::IPOptimizer, options) dt["bgrad"] = copy(state.bgrad) dt["c"] = copy(state.constr_c) end - g_norm = vecnorm(state.gf, Inf) + g_norm = vecnorm(state.g, Inf) + vecnorm(state.bgrad, Inf) update!(tr, iteration, state.f_x, From 9715589fbdb859eb0196234d585cb3a153f0172f Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sun, 20 Nov 2016 14:53:31 -0600 Subject: [PATCH 27/40] Store less state for IPNewton update --- src/iplinesearch.jl | 1 + src/ipnewton.jl | 129 ++++++++++++++-------------- src/utilities/assess_convergence.jl | 4 +- src/utilities/trace.jl | 5 +- test/constraints.jl | 38 ++++---- 5 files changed, 91 insertions(+), 86 deletions(-) diff --git a/src/iplinesearch.jl b/src/iplinesearch.jl index a195f9ea6..681d7682b 100644 --- a/src/iplinesearch.jl +++ b/src/iplinesearch.jl @@ -12,5 +12,6 @@ function backtrack_constrained(ϕ, α, αmax, αImax, Lcoefsα, α *= ρ αI *= ρ end + ϕ(zero(α), zero(αI)) # to ensure that state gets set appropriately return zero(α), zero(αI), f_calls, 0 end diff --git a/src/ipnewton.jl b/src/ipnewton.jl index 4a05b98da..9d52649af 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -25,9 +25,7 @@ type IPNewtonState{T,N} <: AbstractBarrierState ev::T # equality violation, ∑_i λ_Ei (c*_i - c_i) @add_linesearch_fields() b_ls::BarrierLineSearch{T} - gf::Vector{T} - Hf::Matrix{T} - stepf::Vector{T} + gtilde::Vector{T} end function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunction, constraints::TwiceDifferentiableConstraintsFunction, initial_x::Array{T}) @@ -52,9 +50,7 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct # More constraints constr_J = Array{T}(mc, n) constr_gtemp = Array{T}(n) - gf = Array{T}(0) # will be replaced - Hf = Array{T}(0,0) # " - stepf = Array{T}(0) + gtilde = similar(g) constraints.jacobian!(initial_x, constr_J) μ = T(1) bstate = BarrierStateVars(constraints.bounds, initial_x, constr_c) @@ -86,9 +82,7 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct T(NaN), @initial_linesearch()..., # Maintain a cache for line search results in state.lsr b_ls, - gf, - Hf, - stepf) + gtilde) d.h!(initial_x, state.H) Hinfo = (state.H, hessianI(initial_x, constraints, 1./bstate.slack_c, 1)) @@ -101,27 +95,49 @@ function update_fg!(d, constraints::TwiceDifferentiableConstraintsFunction, stat state.f_x, state.L, state.ev = lagrangian_fg!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ) state.f_calls += 1 state.g_calls += 1 - state + update_gtilde!(d, constraints, state, method) end function update_g!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton) lagrangian_g!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ) state.g_calls += 1 + update_gtilde!(d, constraints, state, method) +end + +function update_gtilde!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton) + # Calculate the modified x-gradient for the block-eliminated problem + gtilde, bstate, bgrad = state.gtilde, state.bstate, state.bgrad + bounds = constraints.bounds + copy!(gtilde, state.g) + JIc = view5(state.constr_J, bounds.ineqc, :) + if !isempty(JIc) + Hssc = Diagonal(bstate.λc./bstate.slack_c) + gc = JIc'*(Diagonal(bounds.σc) * (bgrad.slack_c - Hssc*bgrad.λc)) + for i = 1:length(gtilde) + gtilde[i] += gc[i] + end + end + for (i,j) in enumerate(bounds.ineqx) + gxi = bounds.σx[i]*(bgrad.slack_x[i] - bgrad.λx[i]*bstate.λx[i]/bstate.slack_x[i]) + gtilde[j] += gxi + end state end function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton) - x = state.x - μ, Hxx, J = state.μ, state.H, state.constr_J - bstate, bounds = state.bstate, constraints.bounds + x, μ, Hxx, J = state.x, state.μ, state.H, state.constr_J + bstate, bgrad, bounds = state.bstate, state.bgrad, constraints.bounds m, n = size(J, 1), size(J, 2) d.h!(state.x, Hxx) # objective's Hessian - hessianI!(Hxx, state.x, constraints, bstate.λc, μ) # accumulate the inequality second derivatives + # accumulate the constraint second derivatives + λ = userλ(bstate.λc, constraints) + λ[bounds.eqc] = -bstate.λcE # the negative sign is from the Hessian + constraints.h!(x, λ, Hxx) # Add the Jacobian terms (J'*Hss*J) - JI = view5(J, bounds.ineqc, :) + JIc = view5(J, bounds.ineqc, :) Hssc = Diagonal(bstate.λc./bstate.slack_c) - HJ = JI'*Hssc*JI + HJ = JIc'*Hssc*JIc for j = 1:n, i = 1:n Hxx[i,j] += HJ[i,j] end @@ -129,38 +145,7 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state for (i,j) in enumerate(bounds.ineqx) Hxx[j,j] += bstate.λx[i]/bstate.slack_x[i] end - # Perform a positive factorization - Hpc, state.Hd = ldltfact(Positive, Hxx) - Hp = full(Hpc) - # Now add the equality constraint hessian terms - eqc, λcE = bounds.eqc, bstate.λcE - λ = zeros(eltype(x), nconstraints(bounds)) - for i = 1:length(eqc) - λ[eqc[i]] -= λcE[i] - end - constraints.h!(state.x, λ, Hp) - # Also add these to Hxx so we have the true Hessian (the one - # without forcing positive-definiteness) - constraints.h!(state.x, λ, Hxx) - # Form the total Hessian - JEx = zeros(eltype(bounds), length(bounds.eqx), length(state.x)) - for (i,j) in enumerate(bounds.eqx) - JEx[i,j] = 1 - end - JEc = view5(J, eqc, :) - Jod = zeros(eltype(JEx), size(JEc, 1), size(JEx, 1)) - state.Hf = [Hp -JEx' -JEc'; - -JEx zeros(eltype(JEx), size(JEx,1), size(JEx,1)) Jod'; - -JEc Jod zeros(eltype(JEc), size(JEc,1), size(JEc,1))] - # Also form the total gradient - bgrad = state.bgrad - gI = state.g + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - Hssc*bgrad.λc) - for (i,j) in enumerate(bounds.ineqx) - gI[j] += -μ*bounds.σx[i]./bstate.slack_x[i] + bstate.λx[i]*(x[j]-bounds.bx[i])/bstate.slack_x[i] - end - state.gf = [gI; - bgrad.λxE; - bgrad.λcE] + state end @@ -202,38 +187,42 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction # Evaluate the constraints at the new position constraints.c!(state.x, state.constr_c) constraints.jacobian!(state.x, state.constr_J) + @assert state.ev == equality_violation(constraints, state) false end function solve_step!(state::IPNewtonState, constraints) - # Solve the Newton step - local step - try - step = -(state.Hf\state.gf) # do *not* force posdef - catch - step = -(svdfact(state.Hf)\state.gf) - end x, s, μ, bounds = state.x, state.s, state.μ, constraints.bounds bstate, bstep, bgrad = state.bstate, state.bstep, state.bgrad - k = unpack_vec!(s, step, 0) - k = unpack_vec!(bstep.λxE, step, k) - k = unpack_vec!(bstep.λcE, step, k) - k == length(step) || error("exhausted targets before step") + # Solve the Newton step + Hxx = state.H + JE = jacobianE(state, bounds) + # Q, R, p = qr(JE', Val{true}) + gE = [bgrad.λxE; + bgrad.λcE] + HxxF = cholfact(Positive, Hxx, Val{true}) + M = JE*(HxxF \ JE') + MF = cholfact(Positive, M, Val{true}) + ΔλE = MF \ (gE + JE * (HxxF \ state.gtilde)) + Δx = HxxF \ (JE'*ΔλE - state.gtilde) + copy!(s, Δx) + k = unpack_vec!(bstep.λxE, ΔλE, 0) + k = unpack_vec!(bstep.λcE, ΔλE, k) + k == length(ΔλE) || error("exhausted targets before ΔλE") # Solve for the slack variable and λI updates for (i, j) in enumerate(bounds.ineqx) bstep.slack_x[i] = -bgrad.λx[i] + bounds.σx[i]*s[j] # bstep.λx[i] = -bgrad.slack_x[i] - μ*bstep.slack_x[i]/bstate.slack_x[i]^2 bstep.λx[i] = -bgrad.slack_x[i] - bstate.λx[i]*bstep.slack_x[i]/bstate.slack_x[i] end - JI = view5(state.constr_J, bounds.ineqc, :) - SigmaJIΔx = Diagonal(bounds.σc)*(JI*state.s) + JIc = view5(state.constr_J, bounds.ineqc, :) + SigmaJIΔx = Diagonal(bounds.σc)*(JIc*state.s) for i = 1:length(bstep.λc) bstep.slack_c[i] = -bgrad.λc[i] + SigmaJIΔx[i] # bstep.λc[i] = -bgrad.slack_c[i] - μ*bstep.slack_c[i]/bstate.slack_c[i]^2 bstep.λc[i] = -bgrad.slack_c[i] - bstate.λc[i]*bstep.slack_c[i]/bstate.slack_c[i] end - state.stepf = step state end @@ -246,7 +235,9 @@ function is_smaller_eps(ref, step) end function quadratic_parameters(bounds::ConstraintBounds, state::IPNewtonState) - slope = dot(state.stepf, state.gf) + slope = dot(state.s, state.gtilde) + + dot(state.bstep.λxE, state.bgrad.λxE) + + dot(state.bstep.λcE, state.bgrad.λcE) # For the curvature, use the original hessian (before forcing # positive-definiteness) q = dot(state.s, state.H*state.s) @@ -254,3 +245,15 @@ function quadratic_parameters(bounds::ConstraintBounds, state::IPNewtonState) q -= 2*dot(state.s[bounds.eqx], state.bstep.λxE) + 2*dot(state.s, JE'*state.bstep.λcE) state.L, slope, q end + +# Utility functions that assist in testing: they return the "full +# Hessian" and "full gradient" for the equation with the slack and λI +# eliminated. +function Hf(bounds::ConstraintBounds, state) + JE = jacobianE(state, bounds) + HxxF = cholfact(Positive, state.H) + Hf = [full(HxxF) -JE'; + -JE zeros(eltype(JE), size(JE, 1), size(JE, 1))] +end +Hf(constraints, state) = Hf(constraints.bounds, state) +gf(state) = [state.gtilde; state.bgrad.λxE; state.bgrad.λcE] diff --git a/src/utilities/assess_convergence.jl b/src/utilities/assess_convergence.jl index b11800254..c2551ce55 100644 --- a/src/utilities/assess_convergence.jl +++ b/src/utilities/assess_convergence.jl @@ -86,11 +86,13 @@ function assess_convergence(state::NewtonTrustRegionState, options) end function assess_convergence(state::IPNewtonState, options) + # We use the whole bstate-gradient `bgrad` + bgrad = state.bgrad assess_convergence(state.x, state.x_previous, state.L, state.L_previous, - state.gf, + [state.g; bgrad.slack_x; bgrad.slack_c; bgrad.λx; bgrad.λc; bgrad.λxE; bgrad.λcE], options.x_tol, options.f_tol, options.g_tol) diff --git a/src/utilities/trace.jl b/src/utilities/trace.jl index 4f08ae506..1ad4139dc 100644 --- a/src/utilities/trace.jl +++ b/src/utilities/trace.jl @@ -119,14 +119,13 @@ function trace!(tr, state, iteration, method::IPOptimizer, options) dt = Dict() dt["Lagrangian"] = state.L dt["μ"] = state.μ - dt["ev"] = state.ev + dt["ev"] = abs(state.ev) if options.extended_trace dt["α"] = state.alpha dt["x"] = copy(state.x) dt["g(x)"] = copy(state.g) - dt["gf(x)"] = copy(state.gf) + dt["gtilde(x)"] = copy(state.gtilde) dt["h(x)"] = copy(state.H) - dt["hf(x)"] = copy(state.Hf) dt["bstate"] = copy(state.bstate) dt["bgrad"] = copy(state.bgrad) dt["c"] = copy(state.constr_c) diff --git a/test/constraints.jl b/test/constraints.jl index 57f3d2b4a..497cad4a4 100644 --- a/test/constraints.jl +++ b/test/constraints.jl @@ -113,8 +113,8 @@ ConstraintBounds: constraints = TwiceDifferentiableConstraintsFunction( (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds) state = Optim.initial_state(method, options, dg, constraints, x) - @test state.gf ≈ gx - @test state.Hf ≈ H + @test Optim.gf(state) ≈ gx + @test Optim.Hf(constraints, state) ≈ H ## Pure equality constraints on variables xbar = fill(0.2, length(x)) bounds = Optim.ConstraintBounds(xbar, xbar, [], []) @@ -132,18 +132,18 @@ ConstraintBounds: state = Optim.initial_state(method, options, d0, constraints, x) copy!(state.bstate.λxE, bstate.λxE) setstate!(state, μ, d0, constraints, method) - @test state.gf ≈ [gx; xbar-x] + @test Optim.gf(state) ≈ [gx; xbar-x] n = length(x) - @test state.Hf ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)] + @test Optim.Hf(constraints, state) ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)] # Now again using the generic machinery bounds = Optim.ConstraintBounds([], [], xbar, xbar) constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds) state = Optim.initial_state(method, options, d0, constraints, x) copy!(state.bstate.λcE, bstate.λxE) setstate!(state, μ, d0, constraints, method) - @test state.gf ≈ [gx; xbar-x] + @test Optim.gf(state) ≈ [gx; xbar-x] n = length(x) - @test state.Hf ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)] + @test Optim.Hf(constraints, state) ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)] ## Nonnegativity constraints bounds = Optim.ConstraintBounds(zeros(length(x)), fill(Inf,length(x)), [], []) y = rand(length(x)) @@ -160,15 +160,15 @@ ConstraintBounds: (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds) state = Optim.initial_state(method, options, d0, constraints, y) setstate!(state, μ, d0, constraints, method) - @test state.gf ≈ -μ./y - @test state.Hf ≈ μ*Diagonal(1./y.^2) + @test Optim.gf(state) ≈ -μ./y + @test Optim.Hf(constraints, state) ≈ μ*Diagonal(1./y.^2) # Now again using the generic machinery bounds = Optim.ConstraintBounds([], [], zeros(length(x)), fill(Inf,length(x))) constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds) state = Optim.initial_state(method, options, d0, constraints, y) setstate!(state, μ, d0, constraints, method) - @test state.gf ≈ -μ./y - @test state.Hf ≈ μ*Diagonal(1./y.^2) + @test Optim.gf(state) ≈ -μ./y + @test Optim.Hf(constraints, state) ≈ μ*Diagonal(1./y.^2) ## General inequality constraints on variables lb, ub = rand(length(x))-2, rand(length(x))+1 bounds = Optim.ConstraintBounds(lb, ub, [], []) @@ -207,8 +207,8 @@ ConstraintBounds: hxs[j] += htmp gxs[j] += bounds.σx[i]*(gstmp - λ[i]) - bounds.σx[i]*htmp*gλtmp end - @test state.gf ≈ gxs - @test state.Hf ≈ Diagonal(hxs) + @test Optim.gf(state) ≈ gxs + @test Optim.Hf(constraints, state) ≈ Diagonal(hxs) # Now again using the generic machinery bounds = Optim.ConstraintBounds([], [], lb, ub) constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds) @@ -216,8 +216,8 @@ ConstraintBounds: copy!(state.bstate.slack_c, bstate.slack_x) copy!(state.bstate.λc, bstate.λx) setstate!(state, μ, d0, constraints, method) - @test state.gf ≈ gxs - @test state.Hf ≈ Diagonal(hxs) + @test Optim.gf(state) ≈ gxs + @test Optim.Hf(constraints, state) ≈ Diagonal(hxs) ## Nonlinear equality constraints cfun = x->[x[1]^2+x[2]^2, x[2]*x[3]^2] cfun! = (x, c) -> copy!(c, cfun(x)) @@ -248,9 +248,9 @@ ConstraintBounds: setstate!(state, μ, d0, constraints, method) heq = zeros(length(x), length(x)) ch!(x, bstate.λcE, heq) - @test state.gf ≈ [gx; cbar-c] - @test state.Hf ≈ [eye(length(x))-heq -J'; - -J zeros(size(J,1), size(J,1))] + @test Optim.gf(state) ≈ [gx; cbar-c] + @test Optim.Hf(constraints, state) ≈ [heq -J'; + -J zeros(size(J,1), size(J,1))] ## Nonlinear inequality constraints bounds = Optim.ConstraintBounds([], [], -rand(length(c))-1, rand(length(c))+2) bstate = Optim.BarrierStateVars(bounds, x, c) @@ -285,8 +285,8 @@ ConstraintBounds: hxx = JI'*Diagonal(bstate.λc./bstate.slack_c)*JI - hineq gf = -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - (bgrad.λc .* bstate.λc ./ bstate.slack_c)) hp = full(cholfact(Positive, hxx)) - @test state.gf ≈ gf - @test state.Hf ≈ hp + @test Optim.gf(state) ≈ gf + @test Optim.Hf(constraints, state) ≈ hp end @testset "IPNewton initialization" begin From 3772033c148b3bbe409a8f3e05c6de57a94f0d53 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Mon, 21 Nov 2016 11:12:35 -0600 Subject: [PATCH 28/40] Implement some convert methods needed to leverage ForwardDiff --- src/interior.jl | 18 +++++++++++++++++- src/ipnewton.jl | 33 +++++++++++++++++++++++++++++++++ test/constraints.jl | 13 +++++++++++++ 3 files changed, 63 insertions(+), 1 deletion(-) diff --git a/src/interior.jl b/src/interior.jl index 5e88e5c13..939a64e09 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -69,7 +69,6 @@ Base.copy(bstate::BarrierStateVars) = copy(bstate.λxE), copy(bstate.λcE)) - function Base.fill!(b::BarrierStateVars, val) fill!(b.slack_x, val) fill!(b.slack_c, val) @@ -80,6 +79,14 @@ function Base.fill!(b::BarrierStateVars, val) b end +Base.convert{T}(::Type{BarrierStateVars{T}}, bstate::BarrierStateVars) = + BarrierStateVars(convert(Array{T}, bstate.slack_x), + convert(Array{T}, bstate.slack_c), + convert(Array{T}, bstate.λx), + convert(Array{T}, bstate.λc), + convert(Array{T}, bstate.λxE), + convert(Array{T}, bstate.λcE)) + Base.eltype{T}(::Type{BarrierStateVars{T}}) = T Base.eltype(sv::BarrierStateVars) = eltype(typeof(sv)) @@ -127,6 +134,9 @@ immutable BarrierLineSearch{T} c::Vector{T} # value of constraints-functions at trial point bstate::BarrierStateVars{T} # trial point for slack and λ variables end +Base.convert{T}(::Type{BarrierLineSearch{T}}, bsl::BarrierLineSearch) = + BarrierLineSearch(convert(Vector{T}, bsl.c), + convert(BarrierStateVars{T}, bsl.bstate)) """ BarrierLineSearchGrad{T} @@ -139,6 +149,12 @@ immutable BarrierLineSearchGrad{T} bstate::BarrierStateVars{T} # trial point for slack and λ variables bgrad::BarrierStateVars{T} # trial point's gradient end +Base.convert{T}(::Type{BarrierLineSearchGrad{T}}, bsl::BarrierLineSearchGrad) = + BarrierLineSearchGrad(convert(Vector{T}, bsl.c), + convert(Matrix{T}, bsl.J), + convert(BarrierStateVars{T}, bsl.bstate), + convert(BarrierStateVars{T}, bsl.bgrad)) + function ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, α, αI) ls_update!(out.slack_x, base.slack_x, step.slack_x, α) diff --git a/src/ipnewton.jl b/src/ipnewton.jl index 9d52649af..08f14162c 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -28,6 +28,39 @@ type IPNewtonState{T,N} <: AbstractBarrierState gtilde::Vector{T} end +function Base.convert{T,S,N}(::Type{IPNewtonState{T,N}}, state::IPNewtonState{S,N}) + IPNewtonState(state.method_string, + state.n, + convert(Array{T}, state.x), + T(state.f_x), + state.f_calls, + state.g_calls, + state.h_calls, + convert(Array{T}, state.x_previous), + convert(Array{T}, state.g), + T(state.f_x_previous), + convert(Array{T}, state.H), + state.Hd, + convert(Array{T}, state.s), + T(state.μ), + T(state.L), + T(state.L_previous), + convert(BarrierStateVars{T}, state.bstate), + convert(BarrierStateVars{T}, state.bgrad), + convert(BarrierStateVars{T}, state.bstep), + convert(Array{T}, state.constr_c), + convert(Array{T}, state.constr_J), + T(state.ev), + convert(Array{T}, state.x_ls), + convert(Array{T}, state.g_ls), + T(state.alpha), + state.mayterminate, + state.lsr, + convert(BarrierLineSearchGrad{T}, state.b_ls), + convert(Array{T}, state.gtilde) + ) +end + function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunction, constraints::TwiceDifferentiableConstraintsFunction, initial_x::Array{T}) # Check feasibility of the initial state mc = nconstraints(constraints) diff --git a/test/constraints.jl b/test/constraints.jl index 497cad4a4..28d0bbc0e 100644 --- a/test/constraints.jl +++ b/test/constraints.jl @@ -371,6 +371,19 @@ ConstraintBounds: end @testset "IPNewton step" begin + function autoqp(d, constraints, state) + # Note that state must be fully up-to-date, and you must + # have also called Optim.solve_step! + p = Optim.pack_vec(state.x, state.bstate) + chunksize = min(8, max(length(p), 4)) # since αs is of length 4 + TD = ForwardDiff.Dual{chunksize,eltype(p)} + TD2 = ForwardDiff.Dual{chunksize,ForwardDiff.Dual{chunksize,eltype(p)}} + stated = convert(Optim.IPNewtonState{TD,1}, state) + stated2 = convert(Optim.IPNewtonState{TD2,1}, state) + ϕd = αs->Optim.lagrangian_linefunc(αs, d, constraints, stated) + ϕd2 = αs->Optim.lagrangian_linefunc(αs, d, constraints, stated2) + ForwardDiff.gradient(ϕd, zeros(4)), ForwardDiff.hessian(ϕd2, zeros(4)) + end F = 1000 d = TwiceDifferentiableFunction(x->F*x[1], (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0)) method = Optim.IPNewton() From d627e367f45602ffe7c63788a54c1ab5cee6064a Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Mon, 21 Nov 2016 12:38:40 -0600 Subject: [PATCH 29/40] =?UTF-8?q?Support=20multi-parameter=20=CE=B1=20in?= =?UTF-8?q?=20linesearch,=20check=20slope=20during=20linesearch,=20and=20f?= =?UTF-8?q?ix=20quadratic=20parameters?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the step size for the inequality λ may not be the same as the step size for the rest of the parameters, we need a more sophisticated model of the function's behavior during linesearch. This commit implements a 4-parameter vector (αx, αs, αI, αE) for changes to the position, slack, inequality LM, and equality LM respectively (LM=lagrange multiplier). Consequently the "slope" becomes a 4-parameter vector and the "curvature" a 4x4 matrix. In practice this helps convergence on some "hard" problems (e.g., HATFLDF). --- src/interior.jl | 74 ++++++++++++++++++++++++++++++++++++--------- src/iplinesearch.jl | 48 ++++++++++++++++++++++++++--- src/ipnewton.jl | 58 ++++++++++++++++++++++++++--------- test/constraints.jl | 15 +++++---- 4 files changed, 156 insertions(+), 39 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index 939a64e09..3082a8e08 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -155,16 +155,21 @@ Base.convert{T}(::Type{BarrierLineSearchGrad{T}}, bsl::BarrierLineSearchGrad) = convert(BarrierStateVars{T}, bsl.bstate), convert(BarrierStateVars{T}, bsl.bgrad)) - -function ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, α, αI) - ls_update!(out.slack_x, base.slack_x, step.slack_x, α) - ls_update!(out.slack_c, base.slack_c, step.slack_c, α) - ls_update!(out.λxE, base.λxE, step.λxE, α) - ls_update!(out.λcE, base.λcE, step.λcE, α) - ls_update!(out.λx, base.λx, step.λx, αI) - ls_update!(out.λc, base.λc, step.λc, αI) +function ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, αs::NTuple{4,Number}) + ls_update!(out.slack_x, base.slack_x, step.slack_x, αs[2]) + ls_update!(out.slack_c, base.slack_c, step.slack_c, αs[2]) + ls_update!(out.λx, base.λx, step.λx, αs[3]) + ls_update!(out.λc, base.λc, step.λc, αs[3]) + ls_update!(out.λxE, base.λxE, step.λxE, αs[4]) + ls_update!(out.λcE, base.λcE, step.λcE, αs[4]) out end +ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, αs::Tuple{Number,Number}) = + ls_update!(out, base, step, (αs[1],αs[1],αs[2],αs[1])) +ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, α::Number) = + ls_update!(out, base, step, (α,α,α,α)) +ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, αs::AbstractVector) = + ls_update!(out, base, step, (αs...,)) function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constraints::AbstractConstraintsFunction, initial_x::Array{T}, method::M, options::OptimizationOptions) t0 = time() # Initial time stamp used to control early stopping by options.time_limit @@ -488,18 +493,20 @@ function lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds::ConstraintBounds, x L_xsλ end -# for line searches that don't use the gradient along the line -function lagrangian_linefunc(α, αI, d, constraints, state) - _lagrangian_linefunc(α, αI, d, constraints, state)[2] +## for line searches that don't use the gradient along the line +function lagrangian_linefunc(αs, d, constraints, state) + _lagrangian_linefunc(αs, d, constraints, state)[2] end -function _lagrangian_linefunc(α, αI, d, constraints, state) +function _lagrangian_linefunc(αs, d, constraints, state) b_ls, bounds = state.b_ls, constraints.bounds - ls_update!(state.x_ls, state.x, state.s, α) - ls_update!(b_ls.bstate, state.bstate, state.bstep, α, αI) + ls_update!(state.x_ls, state.x, state.s, alphax(αs)) + ls_update!(b_ls.bstate, state.bstate, state.bstep, αs) constraints.c!(state.x_ls, b_ls.c) lagrangian(d, constraints.bounds, state.x_ls, b_ls.c, b_ls.bstate, state.μ) end +alphax(α::Number) = α +alphax(αs::Union{Tuple,AbstractVector}) = αs[1] function lagrangian_linefunc!(α, αI, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}) # For backtrack_constrained, the last evaluation is the one we @@ -509,6 +516,45 @@ function lagrangian_linefunc!(α, αI, d, constraints, state, method::IPOptimize end lagrangian_linefunc!(α, αI, d, constraints, state, method) = lagrangian_linefunc(α, αI, d, constraints, state) + +## for line searches that do use the gradient along the line +function lagrangian_lineslope(αs, d, constraints, state) + f_x, L, ev, slope = _lagrangian_lineslope(αs, d, constraints, state) + L, slope +end + +function _lagrangian_lineslope(αs, d, constraints, state) + b_ls, bounds = state.b_ls, constraints.bounds + bstep, bgrad = state.bstep, b_ls.bgrad + ls_update!(state.x_ls, state.x, state.s, alphax(αs)) + ls_update!(b_ls.bstate, state.bstate, bstep, αs) + constraints.c!(state.x_ls, b_ls.c) + constraints.jacobian!(state.x_ls, b_ls.J) + f_x, L, ev = lagrangian_fg!(state.g_ls, bgrad, d, bounds, state.x_ls, b_ls.c, b_ls.J, b_ls.bstate, state.μ) + slopeα = slopealpha(state.s, state.g_ls, bstep, bgrad) + f_x, L, ev, slopeα +end + +function lagrangian_lineslope!(αs, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained_grad)}) + # For backtrack_constrained, the last evaluation is the one we + # keep, so it's safe to store the results in state + state.f_x, state.L, state.ev, slope = _lagrangian_lineslope(αs, d, constraints, state) + state.L, slope +end +lagrangian_lineslope!(αs, d, constraints, state, method) = lagrangian_lineslope(αs, d, constraints, state) + +slopealpha(sx, gx, bstep, bgrad) = [dot(sx, gx), + dot(bstep.slack_x, bgrad.slack_x) + dot(bstep.slack_c, bgrad.slack_c), + dot(bstep.λx, bgrad.λx) + dot(bstep.λc, bgrad.λc), + dot(bstep.λxE, bgrad.λxE) + dot(bstep.λcE, bgrad.λcE)] + +function linesearch_anon(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained_grad)}) + αs->lagrangian_lineslope!(αs, d, constraints, state, method) +end +function linesearch_anon(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}) + αs->lagrangian_linefunc!(αs, d, constraints, state, method) +end + ## Computation of Lagrangian terms: barrier penalty """ barrier_value(constraints, state) -> val diff --git a/src/iplinesearch.jl b/src/iplinesearch.jl index 681d7682b..881377a2c 100644 --- a/src/iplinesearch.jl +++ b/src/iplinesearch.jl @@ -1,17 +1,57 @@ function backtrack_constrained(ϕ, α, αmax, αImax, Lcoefsα, - c1 = 0.5, ρ=oftype(α, 0.5), αmin = sqrt(eps(one(α)))) + c1 = 0.5, ρ=oftype(α, 0.5), αminfrac = sqrt(eps(one(α)))) α, αI = min(α, 0.999*αmax), min(α, 0.999*αImax) + αmin = αminfrac * α L0, L1, L2 = Lcoefsα f_calls = 0 while α >= αmin f_calls += 1 - val = ϕ(α, αI) - if isfinite(val) && abs(val - (L0 + L1*α + L2*α^2/2)) <= c1*abs(val-L0) + val = ϕ((α, αI)) + δ = evalgrad(L1, α, αI) + if isfinite(val) && abs(val - (L0 + δ)) <= c1*abs(val-L0) return α, αI, f_calls, 0 end α *= ρ αI *= ρ end - ϕ(zero(α), zero(αI)) # to ensure that state gets set appropriately + ϕ((zero(α), zero(αI))) # to ensure that state gets set appropriately return zero(α), zero(αI), f_calls, 0 end + +function backtrack_constrained_grad(ϕ, α, αmax, αImax, Lcoefsα, + c1 = 0.9, c2 = 0.9, ρ=oftype(α, 0.5), αminfrac = sqrt(eps(one(α)))) + α, αI = min(α, 0.999*αmax), min(α, 0.999*αImax) + αmin = αminfrac * α + L0, L1, L2 = Lcoefsα + f_calls = 0 + while α >= αmin + f_calls += 1 + val, slopeα = ϕ((α, αI)) + δval = evalgrad(L1, α, αI) + evalhess(L2, α, αI)/2 + δslope = mulhess(L2, α, αI) + # r0, r1 = abs(val - (L0 + δval)) / (c1*abs(val-L0)), norm(slopeα - (L1 + δslope))/(c2*norm(slopeα-L1)) + # @show (α, αI, r0, r1) + if isfinite(val) && abs(val - (L0 + δval)) <= c1*abs(val-L0) && + norm(slopeα - (L1 + δslope)) <= c2*norm(slopeα-L1) + return α, αI, f_calls, f_calls + end + α *= ρ + αI *= ρ + end + ϕ((zero(α), zero(αI))) # to ensure that state gets set appropriately + return zero(α), zero(αI), f_calls, f_calls +end + +# Evaluate for a step parametrized as [α, α, αI, α] +function evalgrad(slopeα, α, αI) + α*(slopeα[1] + slopeα[2] + slopeα[4]) + αI*slopeα[3] +end + +function mulhess(Hα, α, αI) + αv = [α, α, αI, α] + Hα*αv +end +function evalhess(Hα, α, αI) + αv = [α, α, αI, α] + dot(αv, Hα*αv) +end diff --git a/src/ipnewton.jl b/src/ipnewton.jl index 08f14162c..9672c755a 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -2,7 +2,7 @@ immutable IPNewton{F} <: IPOptimizer{F} linesearch!::F end -IPNewton(; linesearch!::Function = backtrack_constrained) = +IPNewton(; linesearch!::Function = backtrack_constrained_grad) = IPNewton(linesearch!) type IPNewtonState{T,N} <: AbstractBarrierState @@ -24,7 +24,7 @@ type IPNewtonState{T,N} <: AbstractBarrierState constr_J::Matrix{T} # value of the user-supplied Jacobian at x ev::T # equality violation, ∑_i λ_Ei (c*_i - c_i) @add_linesearch_fields() - b_ls::BarrierLineSearch{T} + b_ls::BarrierLineSearchGrad{T} gtilde::Vector{T} end @@ -89,7 +89,8 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct bstate = BarrierStateVars(constraints.bounds, initial_x, constr_c) bgrad = similar(bstate) bstep = similar(bstate) - b_ls = BarrierLineSearch(similar(constr_c), similar(bstate)) + # b_ls = BarrierLineSearch(similar(constr_c), similar(bstate)) + b_ls = BarrierLineSearchGrad(similar(constr_c), similar(constr_J), similar(bstate), similar(bstate)) state = IPNewtonState("Interior-point Newton's Method", length(initial_x), @@ -167,7 +168,7 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state λ = userλ(bstate.λc, constraints) λ[bounds.eqc] = -bstate.λcE # the negative sign is from the Hessian constraints.h!(x, λ, Hxx) - # Add the Jacobian terms (J'*Hss*J) + # Add the Jacobian terms (JI'*Hss*JI) JIc = view5(J, bounds.ineqc, :) Hssc = Diagonal(bstate.λc./bstate.slack_c) HJ = JIc'*Hssc*JIc @@ -205,7 +206,7 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction αImax = estimate_maxstep(αImax, bstate.λc, bstep.λc) # Determine the actual distance of movement along the search line - ϕ = (α,αI)->lagrangian_linefunc!(α, αI, d, constraints, state, method) + ϕ = linesearch_anon(d, constraints, state, method) state.alpha, αI, f_update, g_update = method.linesearch!(ϕ, T(1), αmax, αImax, qp) state.f_calls, state.g_calls = state.f_calls + f_update, state.g_calls + g_update @@ -215,7 +216,7 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction # Update current position # x = x + alpha * s ls_update!(state.x, state.x, state.s, state.alpha) - ls_update!(bstate, bstate, bstep, state.alpha, αI) + ls_update!(bstate, bstate, bstep, (state.alpha, αI)) # Evaluate the constraints at the new position constraints.c!(state.x, state.constr_c) @@ -267,16 +268,43 @@ function is_smaller_eps(ref, step) ise end +""" + quadratic_parameters(bounds, state) -> val, slopeα, Hα + +Return the parameters for the quadratic fit of the behavior of the +lagrangian for positions parametrized as a function of the 4-vector +`α = (αx, αs, αI, αE)`, where the step is + + (αx * Δx, αs * Δs, αI * ΔλI, αE * ΔλE) + +and `Δx`, `Δs`, `ΔλI`, and `ΔλE` are the current search directions in +the parameters. As a function of `α`, the local model is expressed as + + val + dot(α, slopeα) + (α'*Hα*α)/2 +""" function quadratic_parameters(bounds::ConstraintBounds, state::IPNewtonState) - slope = dot(state.s, state.gtilde) + - dot(state.bstep.λxE, state.bgrad.λxE) + - dot(state.bstep.λcE, state.bgrad.λcE) - # For the curvature, use the original hessian (before forcing - # positive-definiteness) - q = dot(state.s, state.H*state.s) - JE = view5(state.constr_J, bounds.eqc, :) - q -= 2*dot(state.s[bounds.eqx], state.bstep.λxE) + 2*dot(state.s, JE'*state.bstep.λcE) - state.L, slope, q + bstate, bstep, bgrad = state.bstate, state.bstep, state.bgrad + slopeα = slopealpha(state.s, state.g, bstep, bgrad) + # For the curvature, use the original hessian (before adding the JI'*Hss*JI term) + # This undoes the dual correction. However, for linesearch we need + # primal, so calculate both. + jic = view5(state.constr_J, bounds.ineqc, :)*state.s + HsscD = Diagonal(bstate.λc./bstate.slack_c) + HsscP = Diagonal(state.μ./bstate.slack_c.^2) + jix = view(state.s, bounds.ineqx) + HssxD = Diagonal(bstate.λx./bstate.slack_x) + HssxP = Diagonal(state.μ./bstate.slack_x.^2) + jHj = dot(jic, HsscD*jic) + dot(jix, HssxD*jix) + ji = dot(bstep.λc, Diagonal(bounds.σc)*jic) + dot(bstep.λx, Diagonal(bounds.σx)*jix) + je = dot(bstep.λcE, view5(state.constr_J, bounds.eqc, :)*state.s) + + dot(bstep.λxE, view(state.s, bounds.eqx)) + hss = dot(bstep.slack_c, HsscP*bstep.slack_c) + dot(bstep.slack_x, HssxP*bstep.slack_x) + si = dot(bstep.slack_c, bstep.λc) + dot(bstep.slack_x, bstep.λx) + Hα = [state.s'*state.H*state.s - jHj 0 -ji -je; + 0 hss si 0; + -ji si 0 0; + -je 0 0 0] + state.L, slopeα, Hα end # Utility functions that assist in testing: they return the "full diff --git a/test/constraints.jl b/test/constraints.jl index 28d0bbc0e..abc48c0cf 100644 --- a/test/constraints.jl +++ b/test/constraints.jl @@ -249,7 +249,7 @@ ConstraintBounds: heq = zeros(length(x), length(x)) ch!(x, bstate.λcE, heq) @test Optim.gf(state) ≈ [gx; cbar-c] - @test Optim.Hf(constraints, state) ≈ [heq -J'; + @test Optim.Hf(constraints, state) ≈ [full(cholfact(Positive, heq)) -J'; -J zeros(size(J,1), size(J,1))] ## Nonlinear inequality constraints bounds = Optim.ConstraintBounds([], [], -rand(length(c))-1, rand(length(c))+2) @@ -396,15 +396,18 @@ ConstraintBounds: Optim.solve_step!(state, constraints) @test state.s[1] ≈ -(F-μ/x0)/(state.bstate.λx[1]/x0) qp = Optim.quadratic_parameters(constraints.bounds, state) + g0, H0 = autoqp(d, constraints, state) @test qp[1] ≈ F*x0-μ*log(x0) - @test qp[2] ≈ -(F-μ/x0)^2*x0^2/μ - @test qp[3] ≈ μ/x0^2*(x0 - F*x0^2/μ)^2 + @test qp[2] ≈ g0 #-(F-μ/x0)^2*x0^2/μ + @test qp[3] ≈ H0 # μ/x0^2*(x0 - F*x0^2/μ)^2 bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds αmax = Optim.estimate_maxstep(Inf, state.x[bounds.ineqx].*bounds.σx, state.s[bounds.ineqx].*bounds.σx) - ϕ = (α,αI)->Optim.lagrangian_linefunc(α, αI, d, constraints, state) - @test ϕ(0,0) ≈ qp[1] - α, nf, ng = method.linesearch!(ϕ, 1.0, αmax, Inf, qp) + ϕ = Optim.linesearch_anon(d, constraints, state, method) + val0 = ϕ((0,0)) + val0 = isa(val0, Tuple) ? val0[1] : val0 + @test val0 ≈ qp[1] + α, αI, nf, ng = method.linesearch!(ϕ, 1.0, αmax, Inf, qp) @test α > 1e-3 end From b91eac6bce67eeeef3e61d564b8a189adb60ea49 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Mon, 21 Nov 2016 12:38:56 -0600 Subject: [PATCH 30/40] Check that solution has enough precision to count --- src/ipnewton.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/ipnewton.jl b/src/ipnewton.jl index 9672c755a..2a654f386 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -240,6 +240,13 @@ function solve_step!(state::IPNewtonState, constraints) MF = cholfact(Positive, M, Val{true}) ΔλE = MF \ (gE + JE * (HxxF \ state.gtilde)) Δx = HxxF \ (JE'*ΔλE - state.gtilde) + if norm(gE) < norm(gE - JE*Δx) # || + # norm(state.gtilde) < norm(full(HxxF)*Δx - JE'*ΔλE + state.gtilde) + # Precision problems gave us a worse solution than the one we started with, abort + fill!(s, 0) + fill!(bstep, 0) + return state + end copy!(s, Δx) k = unpack_vec!(bstep.λxE, ΔλE, 0) k = unpack_vec!(bstep.λcE, ΔλE, k) From 69bd212cd807c1ea3ab7c19a99862e4fa11c2fba Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Tue, 22 Nov 2016 08:26:58 -0600 Subject: [PATCH 31/40] Add the Beale unconstrained problem This one revealed problems with the interior-point Newton method, specifically the linesearch --- src/problems/unconstrained.jl | 63 +++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/src/problems/unconstrained.jl b/src/problems/unconstrained.jl index 431ae0a53..fae520350 100644 --- a/src/problems/unconstrained.jl +++ b/src/problems/unconstrained.jl @@ -8,6 +8,8 @@ using ..OptimizationProblem ### Link: www.researchgate.net/profile/Montaz_Ali/publication/226654862_A_Numerical_Evaluation_of_Several_Stochastic_Algorithms_on_Selected_Continuous_Global_Optimization_Test_Problems/links/00b4952bef133a1a6b000000.pdf ### ### [2] Fletcher & Powell: A rapidly convergent descent method for minimization, +### +### [3] More, Garbow, Hillstrom (1981): Testing Unconstrained Optimization Software, ACM Trans. Math. Soft. 7: 17-41. examples = Dict{AbstractString, OptimizationProblem}() @@ -349,4 +351,65 @@ examples["Rosenbrock"] = OptimizationProblem("Rosenbrock", true, true) +########################################################################## +### +### Beale (2D) +### +### Problem 5 in [3] +### +### Sum-of-squares objective, non-convex with g'*inv(H)*g == 0 at the +### initial position. +### +########################################################################## + +const beale_y = [1.5, 2.25, 2.625] + +beale_f(x) = [beale_y[i] - x[1]*(1-x[2]^i) for i = 1:3] +beale_J(x) = hcat([-(1-x[2]^i) for i = 1:3], + [i*x[1]*x[2]^(i-1) for i = 1:3]) +function beale_H(x, i) + od = i*x[2]^(i-1) + d2 = i > 1 ? i*(i-1)*x[1]*x[2]^(i-2) : zero(x[2]) + [0 od; od d2] +end + +beale(x::AbstractVector) = sumsq_obj(beale_f, x) + +function beale_gradient!(x::AbstractVector, g::AbstractVector) + sumsq_gradient!(beale_f, beale_J, x, g) +end + +function beale_hessian!(x::AbstractVector, h::AbstractMatrix) + sumsq_hessian!(beale_f, beale_J, beale_H, x, h) +end + +examples["Beale"] = OptimizationProblem("Beale", + beale, + beale_gradient!, + beale_hessian!, + [1.0, 1.0], + [3.0, 0.5], + true, + true) + +### General utilities for sum-of-squares functions +# Requires f(x) and J(x) computes the values and jacobian at x of a set of functions, and +# that H(x, i) computes the hessian of the ith function + +sumsq_obj(f, x) = sum(f(x).^2) + +function sumsq_gradient!(f, J, x::AbstractVector, g::AbstractVector) + copy!(g, sum((2*f(x)).*J(x), 1)) +end + +function sumsq_hessian!(f, J, H, x::AbstractVector, h::AbstractMatrix) + fx = f(x) + Jx = J(x) + htmp = 2*(Jx'*Jx) + for i = 1:length(fx) + htmp += (2*fx[i])*H(x, i) + end + copy!(h, htmp) +end + end # module From d4e5192075c19a0b082f0fa1908f1ca647a398ed Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Tue, 22 Nov 2016 08:27:37 -0600 Subject: [PATCH 32/40] WIP --- src/iplinesearch.jl | 3 +++ src/ipnewton.jl | 9 +++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/iplinesearch.jl b/src/iplinesearch.jl index 881377a2c..f43dc43d7 100644 --- a/src/iplinesearch.jl +++ b/src/iplinesearch.jl @@ -23,6 +23,7 @@ function backtrack_constrained_grad(ϕ, α, αmax, αImax, Lcoefsα, α, αI = min(α, 0.999*αmax), min(α, 0.999*αImax) αmin = αminfrac * α L0, L1, L2 = Lcoefsα + # @show L2 f_calls = 0 while α >= αmin f_calls += 1 @@ -30,6 +31,8 @@ function backtrack_constrained_grad(ϕ, α, αmax, αImax, Lcoefsα, δval = evalgrad(L1, α, αI) + evalhess(L2, α, αI)/2 δslope = mulhess(L2, α, αI) # r0, r1 = abs(val - (L0 + δval)) / (c1*abs(val-L0)), norm(slopeα - (L1 + δslope))/(c2*norm(slopeα-L1)) + # @show val L0 L0+δval + # @show slopeα L1 L1+δslope # @show (α, αI, r0, r1) if isfinite(val) && abs(val - (L0 + δval)) <= c1*abs(val-L0) && norm(slopeα - (L1 + δslope)) <= c2*norm(slopeα-L1) diff --git a/src/ipnewton.jl b/src/ipnewton.jl index 2a654f386..0982d9ec8 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -307,10 +307,11 @@ function quadratic_parameters(bounds::ConstraintBounds, state::IPNewtonState) dot(bstep.λxE, view(state.s, bounds.eqx)) hss = dot(bstep.slack_c, HsscP*bstep.slack_c) + dot(bstep.slack_x, HssxP*bstep.slack_x) si = dot(bstep.slack_c, bstep.λc) + dot(bstep.slack_x, bstep.λx) - Hα = [state.s'*state.H*state.s - jHj 0 -ji -je; - 0 hss si 0; - -ji si 0 0; - -je 0 0 0] + hxx = dot(state.s, state.H*state.s) - jHj + Hα = [hxx 0 -ji -je; + 0 hss si 0; + -ji si 0 0; + -je 0 0 0] state.L, slopeα, Hα end From 0aee5158c280258c065368ae10b01fc4c2953cc0 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Tue, 22 Nov 2016 23:26:58 -0600 Subject: [PATCH 33/40] ipnewton linesearch: switch to one-sided tests This appears to fix many fail-to-converge problems. In some cases, the Hessian prediction of the slope change is approximately or exactly 0; in that case the slope condition is impossible to satisfy. A cure is to use the positive-definite modified Hessian (which will never give 0 for the estimated Hessian effect on the slope) in the computation of the linesearch local-model estimate. This only makes sense in the context of switching the line search slope criterion to an upper bound (which is sensible in its own right). An unfortunate consequence is that we have to compute 3 cholesky factorizations rather than 2. The 3rd is necessary only for the linesearch, which is unfortunate. --- src/iplinesearch.jl | 6 +++--- src/ipnewton.jl | 45 ++++++++++++++++++++++++--------------------- test/constraints.jl | 13 ++++++------- 3 files changed, 33 insertions(+), 31 deletions(-) diff --git a/src/iplinesearch.jl b/src/iplinesearch.jl index f43dc43d7..b1b3bd240 100644 --- a/src/iplinesearch.jl +++ b/src/iplinesearch.jl @@ -8,7 +8,7 @@ function backtrack_constrained(ϕ, α, αmax, αImax, Lcoefsα, f_calls += 1 val = ϕ((α, αI)) δ = evalgrad(L1, α, αI) - if isfinite(val) && abs(val - (L0 + δ)) <= c1*abs(val-L0) + if isfinite(val) && val - (L0 + δ) <= c1*abs(val-L0) return α, αI, f_calls, 0 end α *= ρ @@ -34,8 +34,8 @@ function backtrack_constrained_grad(ϕ, α, αmax, αImax, Lcoefsα, # @show val L0 L0+δval # @show slopeα L1 L1+δslope # @show (α, αI, r0, r1) - if isfinite(val) && abs(val - (L0 + δval)) <= c1*abs(val-L0) && - norm(slopeα - (L1 + δslope)) <= c2*norm(slopeα-L1) + if isfinite(val) && val - (L0 + δval) <= c1*abs(val-L0) && + all(slopeα - (L1 + δslope) .<= c2*abs.(slopeα-L1)) return α, αI, f_calls, f_calls end α *= ρ diff --git a/src/ipnewton.jl b/src/ipnewton.jl index 0982d9ec8..66ba9829f 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -11,6 +11,7 @@ type IPNewtonState{T,N} <: AbstractBarrierState g::Array{T,N} f_x_previous::T H::Matrix{T} + HP Hd::Vector{Int8} s::Array{T,N} # step for x # Barrier penalty fields @@ -26,6 +27,7 @@ type IPNewtonState{T,N} <: AbstractBarrierState @add_linesearch_fields() b_ls::BarrierLineSearchGrad{T} gtilde::Vector{T} + Htilde end function Base.convert{T,S,N}(::Type{IPNewtonState{T,N}}, state::IPNewtonState{S,N}) @@ -40,6 +42,7 @@ function Base.convert{T,S,N}(::Type{IPNewtonState{T,N}}, state::IPNewtonState{S, convert(Array{T}, state.g), T(state.f_x_previous), convert(Array{T}, state.H), + state.HP, state.Hd, convert(Array{T}, state.s), T(state.μ), @@ -57,7 +60,8 @@ function Base.convert{T,S,N}(::Type{IPNewtonState{T,N}}, state::IPNewtonState{S, state.mayterminate, state.lsr, convert(BarrierLineSearchGrad{T}, state.b_ls), - convert(Array{T}, state.gtilde) + convert(Array{T}, state.gtilde), + state.Htilde, ) end @@ -103,6 +107,7 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct g, # Store current gradient in state.g T(NaN), # Store previous f in state.f_x_previous H, + 0, # will be replaced Hd, similar(initial_x), # Maintain current x-search direction in state.s μ, @@ -116,7 +121,8 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct T(NaN), @initial_linesearch()..., # Maintain a cache for line search results in state.lsr b_ls, - gtilde) + gtilde, + 0) d.h!(initial_x, state.H) Hinfo = (state.H, hessianI(initial_x, constraints, 1./bstate.slack_c, 1)) @@ -168,17 +174,20 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state λ = userλ(bstate.λc, constraints) λ[bounds.eqc] = -bstate.λcE # the negative sign is from the Hessian constraints.h!(x, λ, Hxx) + state.HP = cholfact(Positive, Hxx, Val{true}) # Add the Jacobian terms (JI'*Hss*JI) + Htilde = full(state.HP) JIc = view5(J, bounds.ineqc, :) Hssc = Diagonal(bstate.λc./bstate.slack_c) HJ = JIc'*Hssc*JIc for j = 1:n, i = 1:n - Hxx[i,j] += HJ[i,j] + Htilde[i,j] += HJ[i,j] end # Add the variable inequalities portions of J'*Hssx*J for (i,j) in enumerate(bounds.ineqx) - Hxx[j,j] += bstate.λx[i]/bstate.slack_x[i] + Htilde[j,j] += bstate.λx[i]/bstate.slack_x[i] end + state.Htilde = cholfact(Hermitian(Htilde)) state end @@ -230,18 +239,18 @@ function solve_step!(state::IPNewtonState, constraints) x, s, μ, bounds = state.x, state.s, state.μ, constraints.bounds bstate, bstep, bgrad = state.bstate, state.bstep, state.bgrad # Solve the Newton step - Hxx = state.H + Htilde = state.Htilde JE = jacobianE(state, bounds) # Q, R, p = qr(JE', Val{true}) gE = [bgrad.λxE; bgrad.λcE] - HxxF = cholfact(Positive, Hxx, Val{true}) - M = JE*(HxxF \ JE') + M = JE*(Htilde \ JE') MF = cholfact(Positive, M, Val{true}) - ΔλE = MF \ (gE + JE * (HxxF \ state.gtilde)) - Δx = HxxF \ (JE'*ΔλE - state.gtilde) - if norm(gE) < norm(gE - JE*Δx) # || - # norm(state.gtilde) < norm(full(HxxF)*Δx - JE'*ΔλE + state.gtilde) + ΔλE = MF \ (gE + JE * (Htilde \ state.gtilde)) + Δx = Htilde \ (JE'*ΔλE - state.gtilde) + # TODO: don't require full here + if norm(gE) + norm(state.gtilde) < max(norm(gE - JE*Δx), + norm(full(Htilde)*Δx - JE'*ΔλE + state.gtilde)) # Precision problems gave us a worse solution than the one we started with, abort fill!(s, 0) fill!(bstep, 0) @@ -292,22 +301,17 @@ the parameters. As a function of `α`, the local model is expressed as function quadratic_parameters(bounds::ConstraintBounds, state::IPNewtonState) bstate, bstep, bgrad = state.bstate, state.bstep, state.bgrad slopeα = slopealpha(state.s, state.g, bstep, bgrad) - # For the curvature, use the original hessian (before adding the JI'*Hss*JI term) - # This undoes the dual correction. However, for linesearch we need - # primal, so calculate both. + jic = view5(state.constr_J, bounds.ineqc, :)*state.s - HsscD = Diagonal(bstate.λc./bstate.slack_c) - HsscP = Diagonal(state.μ./bstate.slack_c.^2) + HsscP = Diagonal(state.μ./bstate.slack_c.^2) # for linesearch we need primal jix = view(state.s, bounds.ineqx) - HssxD = Diagonal(bstate.λx./bstate.slack_x) HssxP = Diagonal(state.μ./bstate.slack_x.^2) - jHj = dot(jic, HsscD*jic) + dot(jix, HssxD*jix) ji = dot(bstep.λc, Diagonal(bounds.σc)*jic) + dot(bstep.λx, Diagonal(bounds.σx)*jix) je = dot(bstep.λcE, view5(state.constr_J, bounds.eqc, :)*state.s) + dot(bstep.λxE, view(state.s, bounds.eqx)) hss = dot(bstep.slack_c, HsscP*bstep.slack_c) + dot(bstep.slack_x, HssxP*bstep.slack_x) si = dot(bstep.slack_c, bstep.λc) + dot(bstep.slack_x, bstep.λx) - hxx = dot(state.s, state.H*state.s) - jHj + hxx = dot(state.s, full(state.HP)*state.s) # TODO: don't require full here Hα = [hxx 0 -ji -je; 0 hss si 0; -ji si 0 0; @@ -320,8 +324,7 @@ end # eliminated. function Hf(bounds::ConstraintBounds, state) JE = jacobianE(state, bounds) - HxxF = cholfact(Positive, state.H) - Hf = [full(HxxF) -JE'; + Hf = [full(state.Htilde) -JE'; -JE zeros(eltype(JE), size(JE, 1), size(JE, 1))] end Hf(constraints, state) = Hf(constraints.bounds, state) diff --git a/test/constraints.jl b/test/constraints.jl index abc48c0cf..ecd072a29 100644 --- a/test/constraints.jl +++ b/test/constraints.jl @@ -161,14 +161,14 @@ ConstraintBounds: state = Optim.initial_state(method, options, d0, constraints, y) setstate!(state, μ, d0, constraints, method) @test Optim.gf(state) ≈ -μ./y - @test Optim.Hf(constraints, state) ≈ μ*Diagonal(1./y.^2) + @test Optim.Hf(constraints, state) ≈ eye(length(y),length(y)) + μ*Diagonal(1./y.^2) # Now again using the generic machinery bounds = Optim.ConstraintBounds([], [], zeros(length(x)), fill(Inf,length(x))) constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds) state = Optim.initial_state(method, options, d0, constraints, y) setstate!(state, μ, d0, constraints, method) @test Optim.gf(state) ≈ -μ./y - @test Optim.Hf(constraints, state) ≈ μ*Diagonal(1./y.^2) + @test Optim.Hf(constraints, state) ≈ eye(length(y),length(y)) + μ*Diagonal(1./y.^2) ## General inequality constraints on variables lb, ub = rand(length(x))-2, rand(length(x))+1 bounds = Optim.ConstraintBounds(lb, ub, [], []) @@ -208,7 +208,7 @@ ConstraintBounds: gxs[j] += bounds.σx[i]*(gstmp - λ[i]) - bounds.σx[i]*htmp*gλtmp end @test Optim.gf(state) ≈ gxs - @test Optim.Hf(constraints, state) ≈ Diagonal(hxs) + @test Optim.Hf(constraints, state) ≈ Diagonal(1 + hxs) # Now again using the generic machinery bounds = Optim.ConstraintBounds([], [], lb, ub) constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds) @@ -217,7 +217,7 @@ ConstraintBounds: copy!(state.bstate.λc, bstate.λx) setstate!(state, μ, d0, constraints, method) @test Optim.gf(state) ≈ gxs - @test Optim.Hf(constraints, state) ≈ Diagonal(hxs) + @test Optim.Hf(constraints, state) ≈ Diagonal(1 + hxs) ## Nonlinear equality constraints cfun = x->[x[1]^2+x[2]^2, x[2]*x[3]^2] cfun! = (x, c) -> copy!(c, cfun(x)) @@ -282,11 +282,10 @@ ConstraintBounds: # hxx = μ*JI'*Diagonal(1./bstate.slack_c.^2)*JI - hineq # gf = -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - μ(bgrad.λc ./ bstate.slack_c.^2)) # Primal-dual - hxx = JI'*Diagonal(bstate.λc./bstate.slack_c)*JI - hineq + hxx = full(cholfact(Positive, -hineq)) + JI'*Diagonal(bstate.λc./bstate.slack_c)*JI gf = -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - (bgrad.λc .* bstate.λc ./ bstate.slack_c)) - hp = full(cholfact(Positive, hxx)) @test Optim.gf(state) ≈ gf - @test Optim.Hf(constraints, state) ≈ hp + @test Optim.Hf(constraints, state) ≈ hxx end @testset "IPNewton initialization" begin From 3b5d08b30144064e0606205cb0fe505b629fa082 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 23 Nov 2016 00:00:08 -0600 Subject: [PATCH 34/40] Add option to show linesearch progress --- src/interior.jl | 2 +- src/iplinesearch.jl | 19 +++++++++++++------ src/ipnewton.jl | 11 +++++++---- src/types.jl | 6 ++++-- test/constraints.jl | 6 +++--- 5 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index 3082a8e08..497b142b8 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -201,7 +201,7 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai iteration += 1 iterationμ += 1 - update_state!(d, constraints, state, method) && break # it returns true if it's forced by something in update! to stop (eg dx_dg == 0.0 in BFGS) + update_state!(d, constraints, state, method, options) && break # it returns true if it's forced by something in update! to stop (eg dx_dg == 0.0 in BFGS) update_asneeded_fg!(d, constraints, state, method) x_converged, f_converged, g_converged, converged = assess_convergence(state, options) diff --git a/src/iplinesearch.jl b/src/iplinesearch.jl index b1b3bd240..4c2d690c2 100644 --- a/src/iplinesearch.jl +++ b/src/iplinesearch.jl @@ -19,21 +19,28 @@ function backtrack_constrained(ϕ, α, αmax, αImax, Lcoefsα, end function backtrack_constrained_grad(ϕ, α, αmax, αImax, Lcoefsα, - c1 = 0.9, c2 = 0.9, ρ=oftype(α, 0.5), αminfrac = sqrt(eps(one(α)))) + c1 = 0.9, c2 = 0.9, ρ=oftype(α, 0.5), + αminfrac = sqrt(eps(one(α))); show_linesearch::Bool=false) α, αI = min(α, 0.999*αmax), min(α, 0.999*αImax) αmin = αminfrac * α L0, L1, L2 = Lcoefsα - # @show L2 + if show_linesearch + println("L0 = $L0, L1 = $L1, L2 = ") + Base.showarray(STDOUT, L2, false) + end f_calls = 0 while α >= αmin f_calls += 1 val, slopeα = ϕ((α, αI)) δval = evalgrad(L1, α, αI) + evalhess(L2, α, αI)/2 δslope = mulhess(L2, α, αI) - # r0, r1 = abs(val - (L0 + δval)) / (c1*abs(val-L0)), norm(slopeα - (L1 + δslope))/(c2*norm(slopeα-L1)) - # @show val L0 L0+δval - # @show slopeα L1 L1+δslope - # @show (α, αI, r0, r1) + if show_linesearch + @show (α, αI) + @show val L0 L0+δval + @show slopeα L1 L1+δslope + r0, r1 = (val - (L0 + δval)) / (c1*abs(val-L0)), (slopeα - (L1 + δslope))./(c2*(slopeα-L1)) + @show (r0, r1) + end if isfinite(val) && val - (L0 + δval) <= c1*abs(val-L0) && all(slopeα - (L1 + δslope) .<= c2*abs.(slopeα-L1)) return α, αI, f_calls, f_calls diff --git a/src/ipnewton.jl b/src/ipnewton.jl index 66ba9829f..ebccf1035 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -70,8 +70,11 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct mc = nconstraints(constraints) constr_c = Array{T}(mc) constraints.c!(initial_x, constr_c) - isinterior(constraints, initial_x, constr_c) || (warn("initial guess is not an interior point"); Base.show_backtrace(STDOUT, backtrace())) - + if !isinterior(constraints, initial_x, constr_c) + warn("initial guess is not an interior point") + Base.show_backtrace(STDERR, backtrace()) + println(STDERR) + end # Allocate fields for the objective function n = length(initial_x) g = Array(T, n) @@ -192,7 +195,7 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state state end -function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction, state::IPNewtonState{T}, method::IPNewton) +function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction, state::IPNewtonState{T}, method::IPNewton, options) state.f_x_previous, state.L_previous = state.f_x, state.L bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds state = solve_step!(state, constraints) @@ -217,7 +220,7 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction # Determine the actual distance of movement along the search line ϕ = linesearch_anon(d, constraints, state, method) state.alpha, αI, f_update, g_update = - method.linesearch!(ϕ, T(1), αmax, αImax, qp) + method.linesearch!(ϕ, T(1), αmax, αImax, qp; show_linesearch=options.show_linesearch) state.f_calls, state.g_calls = state.f_calls + f_update, state.g_calls + g_update # Maintain a record of previous position diff --git a/src/types.jl b/src/types.jl index 72a2ffcb2..aab457e8d 100644 --- a/src/types.jl +++ b/src/types.jl @@ -13,6 +13,7 @@ immutable OptimizationOptions{TCallback <: Union{Void, Function}} store_trace::Bool show_trace::Bool extended_trace::Bool + show_linesearch::Bool autodiff::Bool show_every::Int callback::TCallback @@ -30,6 +31,7 @@ function OptimizationOptions(; store_trace::Bool = false, show_trace::Bool = false, extended_trace::Bool = false, + show_linesearch::Bool = false, autodiff::Bool = false, show_every::Integer = 1, callback = nothing, @@ -42,8 +44,8 @@ function OptimizationOptions(; end OptimizationOptions{typeof(callback)}( Float64(x_tol), Float64(f_tol), Float64(g_tol), Int(successive_f_tol), - Int(iterations), store_trace, show_trace, extended_trace, autodiff, - Int(show_every), callback, time_limit, μfactor, μ0) + Int(iterations), store_trace, show_trace, extended_trace, show_linesearch, + autodiff, Int(show_every), callback, time_limit, μfactor, μ0) end function print_header(options::OptimizationOptions) diff --git a/test/constraints.jl b/test/constraints.jl index ecd072a29..3f0cd3830 100644 --- a/test/constraints.jl +++ b/test/constraints.jl @@ -430,7 +430,7 @@ ConstraintBounds: constraints = TwiceDifferentiableConstraintsFunction(σswap(σ, [0.0], [])...) state = Optim.initial_state(method, options, d, constraints, [μ/F*10]) for i = 1:10 - Optim.update_state!(d, constraints, state, method) + Optim.update_state!(d, constraints, state, method, options) Optim.update_fg!(d, constraints, state, method) Optim.update_h!(d, constraints, state, method) end @@ -440,7 +440,7 @@ ConstraintBounds: constraints = TwiceDifferentiableConstraintsFunction(σswap(σ, [Float64(σ)], [])...) state = Optim.initial_state(method, options, d, constraints, [(1+eps(1.0))*σ]) for i = 1:10 - Optim.update_state!(d, constraints, state, method) + Optim.update_state!(d, constraints, state, method, options) Optim.update_fg!(d, constraints, state, method) Optim.update_h!(d, constraints, state, method) end @@ -455,7 +455,7 @@ ConstraintBounds: [], [], σswap(σ, [Float64(σ)], [])...) state = Optim.initial_state(method, options, d, constraints, [(1+eps(1.0))*σ]) for i = 1:10 - Optim.update_state!(d, constraints, state, method) + Optim.update_state!(d, constraints, state, method, options) Optim.update_fg!(d, constraints, state, method) Optim.update_h!(d, constraints, state, method) end From b250e77166fff32a2760837f32ec6a9c01794c9d Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 23 Nov 2016 10:12:02 -0600 Subject: [PATCH 35/40] Switch back to single-component alpha in linesearch The slope criterion is easier, and there are fewer factorizations needed this way This also improves printing during linesearch --- src/interior.jl | 13 +++++++----- src/iplinesearch.jl | 29 +++++++++++-------------- src/ipnewton.jl | 48 ++++++++++++++++++++++++------------------ src/utilities/trace.jl | 10 +++++---- test/constraints.jl | 27 ++++++++++++------------ 5 files changed, 67 insertions(+), 60 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index 497b142b8..eef0d72b1 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -87,6 +87,9 @@ Base.convert{T}(::Type{BarrierStateVars{T}}, bstate::BarrierStateVars) = convert(Array{T}, bstate.λxE), convert(Array{T}, bstate.λcE)) +Base.isempty(bstate::BarrierStateVars) = isempty(bstate.slack_x) & + isempty(bstate.slack_c) & isempty(bstate.λxE) & isempty(bstate.λcE) + Base.eltype{T}(::Type{BarrierStateVars{T}}) = T Base.eltype(sv::BarrierStateVars) = eltype(typeof(sv)) @@ -169,7 +172,7 @@ ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, α::Number) = ls_update!(out, base, step, (α,α,α,α)) ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, αs::AbstractVector) = - ls_update!(out, base, step, (αs...,)) + ls_update!(out, base, step, αs[1]) # (αs...,)) function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constraints::AbstractConstraintsFunction, initial_x::Array{T}, method::M, options::OptimizationOptions) t0 = time() # Initial time stamp used to control early stopping by options.time_limit @@ -543,10 +546,10 @@ function lagrangian_lineslope!(αs, d, constraints, state, method::IPOptimizer{t end lagrangian_lineslope!(αs, d, constraints, state, method) = lagrangian_lineslope(αs, d, constraints, state) -slopealpha(sx, gx, bstep, bgrad) = [dot(sx, gx), - dot(bstep.slack_x, bgrad.slack_x) + dot(bstep.slack_c, bgrad.slack_c), - dot(bstep.λx, bgrad.λx) + dot(bstep.λc, bgrad.λc), - dot(bstep.λxE, bgrad.λxE) + dot(bstep.λcE, bgrad.λcE)] +slopealpha(sx, gx, bstep, bgrad) = dot(sx, gx) + + dot(bstep.slack_x, bgrad.slack_x) + dot(bstep.slack_c, bgrad.slack_c) + + dot(bstep.λx, bgrad.λx) + dot(bstep.λc, bgrad.λc) + + dot(bstep.λxE, bgrad.λxE) + dot(bstep.λcE, bgrad.λcE) function linesearch_anon(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained_grad)}) αs->lagrangian_lineslope!(αs, d, constraints, state, method) diff --git a/src/iplinesearch.jl b/src/iplinesearch.jl index 4c2d690c2..647f6a7a9 100644 --- a/src/iplinesearch.jl +++ b/src/iplinesearch.jl @@ -18,38 +18,33 @@ function backtrack_constrained(ϕ, α, αmax, αImax, Lcoefsα, return zero(α), zero(αI), f_calls, 0 end -function backtrack_constrained_grad(ϕ, α, αmax, αImax, Lcoefsα, +function backtrack_constrained_grad(ϕ, α, αmax, Lcoefsα, c1 = 0.9, c2 = 0.9, ρ=oftype(α, 0.5), αminfrac = sqrt(eps(one(α))); show_linesearch::Bool=false) - α, αI = min(α, 0.999*αmax), min(α, 0.999*αImax) + α = min(α, 0.999*αmax) αmin = αminfrac * α L0, L1, L2 = Lcoefsα if show_linesearch - println("L0 = $L0, L1 = $L1, L2 = ") - Base.showarray(STDOUT, L2, false) + println("L0 = $L0, L1 = $L1, L2 = $L2") end f_calls = 0 while α >= αmin f_calls += 1 - val, slopeα = ϕ((α, αI)) - δval = evalgrad(L1, α, αI) + evalhess(L2, α, αI)/2 - δslope = mulhess(L2, α, αI) + val, slopeα = ϕ(α) + δval = L1*α + δslope = L2*α if show_linesearch - @show (α, αI) - @show val L0 L0+δval - @show slopeα L1 L1+δslope - r0, r1 = (val - (L0 + δval)) / (c1*abs(val-L0)), (slopeα - (L1 + δslope))./(c2*(slopeα-L1)) - @show (r0, r1) + println("α = $α, value: ($L0, $val, $(L0+δval)), slope: ($L1, $slopeα, $(L1+δslope))") end if isfinite(val) && val - (L0 + δval) <= c1*abs(val-L0) && - all(slopeα - (L1 + δslope) .<= c2*abs.(slopeα-L1)) - return α, αI, f_calls, f_calls + (slopeα < c2*abs(L1) || + slopeα - (L1 + δslope) .<= c2*abs.(slopeα-L1)) + return α, f_calls, f_calls end α *= ρ - αI *= ρ end - ϕ((zero(α), zero(αI))) # to ensure that state gets set appropriately - return zero(α), zero(αI), f_calls, f_calls + ϕ(zero(α)) # to ensure that state gets set appropriately + return zero(α), f_calls, f_calls end # Evaluate for a step parametrized as [α, α, αI, α] diff --git a/src/ipnewton.jl b/src/ipnewton.jl index ebccf1035..d25e08f06 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -177,20 +177,18 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state λ = userλ(bstate.λc, constraints) λ[bounds.eqc] = -bstate.λcE # the negative sign is from the Hessian constraints.h!(x, λ, Hxx) - state.HP = cholfact(Positive, Hxx, Val{true}) # Add the Jacobian terms (JI'*Hss*JI) - Htilde = full(state.HP) JIc = view5(J, bounds.ineqc, :) Hssc = Diagonal(bstate.λc./bstate.slack_c) HJ = JIc'*Hssc*JIc for j = 1:n, i = 1:n - Htilde[i,j] += HJ[i,j] + Hxx[i,j] += HJ[i,j] end # Add the variable inequalities portions of J'*Hssx*J for (i,j) in enumerate(bounds.ineqx) - Htilde[j,j] += bstate.λx[i]/bstate.slack_x[i] + Hxx[j,j] += bstate.λx[i]/bstate.slack_x[i] end - state.Htilde = cholfact(Hermitian(Htilde)) + state.Htilde = cholfact(Positive, state.H, Val{true}) state end @@ -198,7 +196,7 @@ end function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction, state::IPNewtonState{T}, method::IPNewton, options) state.f_x_previous, state.L_previous = state.f_x, state.L bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds - state = solve_step!(state, constraints) + qp = solve_step!(state, constraints, options) # If a step α=1 will not change any of the parameters, we can quit now. # This prevents a futile linesearch. if is_smaller_eps(state.x, state.s) && @@ -208,19 +206,19 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction is_smaller_eps(bstate.λc, bstep.λc) return false end - qp = quadratic_parameters(bounds, state) + # qp = quadratic_parameters(bounds, state) # Estimate αmax, the upper bound on distance of movement along the search line - αmax = αImax = convert(eltype(bstate), Inf) + αmax = convert(eltype(bstate), Inf) αmax = estimate_maxstep(αmax, bstate.slack_x, bstep.slack_x) αmax = estimate_maxstep(αmax, bstate.slack_c, bstep.slack_c) - αImax = estimate_maxstep(αImax, bstate.λx, bstep.λx) - αImax = estimate_maxstep(αImax, bstate.λc, bstep.λc) + αmax = estimate_maxstep(αmax, bstate.λx, bstep.λx) + αmax = estimate_maxstep(αmax, bstate.λc, bstep.λc) # Determine the actual distance of movement along the search line ϕ = linesearch_anon(d, constraints, state, method) - state.alpha, αI, f_update, g_update = - method.linesearch!(ϕ, T(1), αmax, αImax, qp; show_linesearch=options.show_linesearch) + state.alpha, f_update, g_update = + method.linesearch!(ϕ, T(1), αmax, qp; show_linesearch=options.show_linesearch) state.f_calls, state.g_calls = state.f_calls + f_update, state.g_calls + g_update # Maintain a record of previous position @@ -228,7 +226,7 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction # Update current position # x = x + alpha * s ls_update!(state.x, state.x, state.s, state.alpha) - ls_update!(bstate, bstate, bstep, (state.alpha, αI)) + ls_update!(bstate, bstate, bstep, state.alpha) # Evaluate the constraints at the new position constraints.c!(state.x, state.constr_c) @@ -238,22 +236,28 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction false end -function solve_step!(state::IPNewtonState, constraints) +function solve_step!(state::IPNewtonState, constraints, options) x, s, μ, bounds = state.x, state.s, state.μ, constraints.bounds bstate, bstep, bgrad = state.bstate, state.bstep, state.bgrad - # Solve the Newton step Htilde = state.Htilde + # Solve the Newton step JE = jacobianE(state, bounds) - # Q, R, p = qr(JE', Val{true}) gE = [bgrad.λxE; bgrad.λcE] M = JE*(Htilde \ JE') MF = cholfact(Positive, M, Val{true}) ΔλE = MF \ (gE + JE * (Htilde \ state.gtilde)) Δx = Htilde \ (JE'*ΔλE - state.gtilde) - # TODO: don't require full here - if norm(gE) + norm(state.gtilde) < max(norm(gE - JE*Δx), - norm(full(Htilde)*Δx - JE'*ΔλE + state.gtilde)) + # Use the real H in estimating the linesearch quadratic parameters + Hstepx, HstepλE = state.H*Δx - JE'*ΔλE, -JE*Δx + # Also check that the solution to the linear equations represents an improvement + Hpstepx = full(Htilde)*Δx - JE'*ΔλE # TODO: don't use full here + if options.show_linesearch + println("|gx| = $(norm(state.gtilde)), |Hstepx + gx| = $(norm(Hpstepx+state.gtilde))") + println("|gE| = $(norm(gE)), |HstepλE + gE| = $(norm(HstepλE+gE))") + end + if norm(gE) + norm(state.gtilde) < max(norm(HstepλE + gE), + norm(Hpstepx + state.gtilde)) # Precision problems gave us a worse solution than the one we started with, abort fill!(s, 0) fill!(bstep, 0) @@ -276,7 +280,9 @@ function solve_step!(state::IPNewtonState, constraints) # bstep.λc[i] = -bgrad.slack_c[i] - μ*bstep.slack_c[i]/bstate.slack_c[i]^2 bstep.λc[i] = -bgrad.slack_c[i] - bstate.λc[i]*bstep.slack_c[i]/bstate.slack_c[i] end - state + # Solve for the quadratic parameters + qp = state.L, slopealpha(state.s, state.g, bstep, bgrad), dot(Δx, Hstepx) + dot(ΔλE, HstepλE) + qp end function is_smaller_eps(ref, step) @@ -290,7 +296,7 @@ end """ quadratic_parameters(bounds, state) -> val, slopeα, Hα -Return the parameters for the quadratic fit of the behavior of the +OUTDATED! Return the parameters for the quadratic fit of the behavior of the lagrangian for positions parametrized as a function of the 4-vector `α = (αx, αs, αI, αE)`, where the step is diff --git a/src/utilities/trace.jl b/src/utilities/trace.jl index 1ad4139dc..e62e7829f 100644 --- a/src/utilities/trace.jl +++ b/src/utilities/trace.jl @@ -124,11 +124,13 @@ function trace!(tr, state, iteration, method::IPOptimizer, options) dt["α"] = state.alpha dt["x"] = copy(state.x) dt["g(x)"] = copy(state.g) - dt["gtilde(x)"] = copy(state.gtilde) dt["h(x)"] = copy(state.H) - dt["bstate"] = copy(state.bstate) - dt["bgrad"] = copy(state.bgrad) - dt["c"] = copy(state.constr_c) + if !isempty(state.bstate) + dt["gtilde(x)"] = copy(state.gtilde) + dt["bstate"] = copy(state.bstate) + dt["bgrad"] = copy(state.bgrad) + dt["c"] = copy(state.constr_c) + end end g_norm = vecnorm(state.g, Inf) + vecnorm(state.bgrad, Inf) update!(tr, diff --git a/test/constraints.jl b/test/constraints.jl index 3f0cd3830..7449c03cb 100644 --- a/test/constraints.jl +++ b/test/constraints.jl @@ -161,14 +161,14 @@ ConstraintBounds: state = Optim.initial_state(method, options, d0, constraints, y) setstate!(state, μ, d0, constraints, method) @test Optim.gf(state) ≈ -μ./y - @test Optim.Hf(constraints, state) ≈ eye(length(y),length(y)) + μ*Diagonal(1./y.^2) + @test Optim.Hf(constraints, state) ≈ μ*Diagonal(1./y.^2) # Now again using the generic machinery bounds = Optim.ConstraintBounds([], [], zeros(length(x)), fill(Inf,length(x))) constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds) state = Optim.initial_state(method, options, d0, constraints, y) setstate!(state, μ, d0, constraints, method) @test Optim.gf(state) ≈ -μ./y - @test Optim.Hf(constraints, state) ≈ eye(length(y),length(y)) + μ*Diagonal(1./y.^2) + @test Optim.Hf(constraints, state) ≈ μ*Diagonal(1./y.^2) ## General inequality constraints on variables lb, ub = rand(length(x))-2, rand(length(x))+1 bounds = Optim.ConstraintBounds(lb, ub, [], []) @@ -208,7 +208,7 @@ ConstraintBounds: gxs[j] += bounds.σx[i]*(gstmp - λ[i]) - bounds.σx[i]*htmp*gλtmp end @test Optim.gf(state) ≈ gxs - @test Optim.Hf(constraints, state) ≈ Diagonal(1 + hxs) + @test Optim.Hf(constraints, state) ≈ Diagonal(hxs) # Now again using the generic machinery bounds = Optim.ConstraintBounds([], [], lb, ub) constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds) @@ -217,7 +217,7 @@ ConstraintBounds: copy!(state.bstate.λc, bstate.λx) setstate!(state, μ, d0, constraints, method) @test Optim.gf(state) ≈ gxs - @test Optim.Hf(constraints, state) ≈ Diagonal(1 + hxs) + @test Optim.Hf(constraints, state) ≈ Diagonal(hxs) ## Nonlinear equality constraints cfun = x->[x[1]^2+x[2]^2, x[2]*x[3]^2] cfun! = (x, c) -> copy!(c, cfun(x)) @@ -282,10 +282,11 @@ ConstraintBounds: # hxx = μ*JI'*Diagonal(1./bstate.slack_c.^2)*JI - hineq # gf = -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - μ(bgrad.λc ./ bstate.slack_c.^2)) # Primal-dual - hxx = full(cholfact(Positive, -hineq)) + JI'*Diagonal(bstate.λc./bstate.slack_c)*JI +# hxx = full(cholfact(Positive, -hineq)) + JI'*Diagonal(bstate.λc./bstate.slack_c)*JI + hxx = -hineq + JI'*Diagonal(bstate.λc./bstate.slack_c)*JI gf = -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - (bgrad.λc .* bstate.λc ./ bstate.slack_c)) @test Optim.gf(state) ≈ gf - @test Optim.Hf(constraints, state) ≈ hxx + @test Optim.Hf(constraints, state) ≈ full(cholfact(Positive, hxx, Val{true})) end @testset "IPNewton initialization" begin @@ -374,14 +375,15 @@ ConstraintBounds: # Note that state must be fully up-to-date, and you must # have also called Optim.solve_step! p = Optim.pack_vec(state.x, state.bstate) - chunksize = min(8, max(length(p), 4)) # since αs is of length 4 + chunksize = 1 #min(8, length(p)) TD = ForwardDiff.Dual{chunksize,eltype(p)} TD2 = ForwardDiff.Dual{chunksize,ForwardDiff.Dual{chunksize,eltype(p)}} stated = convert(Optim.IPNewtonState{TD,1}, state) stated2 = convert(Optim.IPNewtonState{TD2,1}, state) ϕd = αs->Optim.lagrangian_linefunc(αs, d, constraints, stated) ϕd2 = αs->Optim.lagrangian_linefunc(αs, d, constraints, stated2) - ForwardDiff.gradient(ϕd, zeros(4)), ForwardDiff.hessian(ϕd2, zeros(4)) +# ForwardDiff.gradient(ϕd, zeros(4)), ForwardDiff.hessian(ϕd2, zeros(4)) + ForwardDiff.gradient(ϕd, [0.0]), ForwardDiff.hessian(ϕd2, [0.0]) end F = 1000 d = TwiceDifferentiableFunction(x->F*x[1], (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0)) @@ -392,13 +394,12 @@ ConstraintBounds: # Nonnegativity (the case that doesn't require slack variables) constraints = TwiceDifferentiableConstraintsFunction([0.0], []) state = Optim.initial_state(method, options, d, constraints, [x0]) - Optim.solve_step!(state, constraints) + qp = Optim.solve_step!(state, constraints) @test state.s[1] ≈ -(F-μ/x0)/(state.bstate.λx[1]/x0) - qp = Optim.quadratic_parameters(constraints.bounds, state) g0, H0 = autoqp(d, constraints, state) @test qp[1] ≈ F*x0-μ*log(x0) - @test qp[2] ≈ g0 #-(F-μ/x0)^2*x0^2/μ - @test qp[3] ≈ H0 # μ/x0^2*(x0 - F*x0^2/μ)^2 + @test [qp[2]] ≈ g0 #-(F-μ/x0)^2*x0^2/μ + @test [qp[3]] ≈ H0 # μ/x0^2*(x0 - F*x0^2/μ)^2 bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds αmax = Optim.estimate_maxstep(Inf, state.x[bounds.ineqx].*bounds.σx, state.s[bounds.ineqx].*bounds.σx) @@ -406,7 +407,7 @@ ConstraintBounds: val0 = ϕ((0,0)) val0 = isa(val0, Tuple) ? val0[1] : val0 @test val0 ≈ qp[1] - α, αI, nf, ng = method.linesearch!(ϕ, 1.0, αmax, Inf, qp) + α, nf, ng = method.linesearch!(ϕ, 1.0, αmax, qp) @test α > 1e-3 end From 5bade6574c2de42d76076adf3de65b6854a62110 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 23 Nov 2016 11:04:03 -0600 Subject: [PATCH 36/40] Adopt an adaptive barrier penalty based on complementarity --- src/interior.jl | 51 ++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index eef0d72b1..725e17f4c 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -184,16 +184,13 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai stopped, stopped_by_callback, stopped_by_time_limit = false, false, false x_converged, f_converged, counter_f_tol = false, false, 0 - gnorm = vecnorm(state.g, Inf) + vecnorm(state.bgrad, Inf) - g_converged = gnorm < options.g_tol + g_converged = vecnorm(state.g, Inf) + vecnorm(state.bgrad, Inf) < options.g_tol converged = g_converged - iteration, iterationμ = 0, 0 + iteration = 0 options.show_trace && print_header(method) - Δfmax = zero(state.f_x) - while !converged && !stopped && iteration < options.iterations # If tracing, update trace with trace!. If a callback is provided, it # should have boolean return value that controls the variable stopped_by_callback. @@ -202,10 +199,14 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai stopped_by_callback = trace!(tr, state, iteration, method, options) end iteration += 1 - iterationμ += 1 update_state!(d, constraints, state, method, options) && break # it returns true if it's forced by something in update! to stop (eg dx_dg == 0.0 in BFGS) - update_asneeded_fg!(d, constraints, state, method) + + # Adaptive μ + μ, ξ = complementarity_μ(state.bstate) + state.μ = μ + update_fg!(d, constraints, state, method) + x_converged, f_converged, g_converged, converged = assess_convergence(state, options) # With equality constraints, optimization is not necessarily @@ -217,25 +218,6 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai # declaring convergence. counter_f_tol = f_converged ? counter_f_tol+1 : 0 converged = x_converged | g_converged | (counter_f_tol > options.successive_f_tol) - gnormnew = vecnorm(state.g, Inf) + vecnorm(state.bgrad, Inf) - - Δf = abs(state.f_x - state.f_x_previous) - if iterationμ > 1 - Δfmax = max(Δfmax, abs(state.f_x - state.f_x_previous)) - end - - # Test whether we need to decrease the barrier penalty - if iterationμ > 1 && (converged || 100*gnormnew < gnorm || 100*Δf < Δfmax) - # Since iterationμ > 1 we must have accomplished real - # work, so it's worth trying to decrease the barrier - # penalty further. - shrink_μ!(d, constraints, state, method, options) - iterationμ = 0 - converged = false - gnormnew = oftype(gnormnew, NaN) - Δfmax = zero(Δfmax) - end - gnorm = gnormnew # We don't use the Hessian for anything if we have declared convergence, # so we might as well not make the (expensive) update if converged == true @@ -867,6 +849,23 @@ function shrink_μ!(d, constraints, state, method, options) update_fg!(d, constraints, state, method) end +function complementarity_μ(bstate) + # Adaptively update μ using the complementarity condition and the + # coordinate-by-coordinate deviation from the mean. See Nodecal & + # Wright, 2nd ed., section 19.3. + m = max(length(bstate.λx) + length(bstate.λc), 1) + μmean = (dot(bstate.λx, bstate.slack_x) + dot(bstate.λc, bstate.slack_c))/m + ξ = oftype(μmean, 1) + if !isempty(bstate.slack_x) + ξ = min(ξ, Base.minimum(bstate.λx .* bstate.slack_x)/μmean) + end + if !isempty(bstate.slack_c) + ξ = min(ξ, Base.minimum(bstate.λc .* bstate.slack_c)/μmean) + end + μ = (min((1-ξ)/ξ/20, 2))^3/10 * μmean + μ, ξ +end + function qrregularize!(QRF) R = QRF[:R] for i = 1:size(R, 1) From 39eb0019e85326ae79a0f32db91db282cab714ce Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 23 Nov 2016 11:04:17 -0600 Subject: [PATCH 37/40] Introduce a primal-dual guard condition --- src/ipnewton.jl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/ipnewton.jl b/src/ipnewton.jl index d25e08f06..eaa4df201 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -228,6 +228,18 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction ls_update!(state.x, state.x, state.s, state.alpha) ls_update!(bstate, bstate, bstep, state.alpha) + # Ensure that the primal-dual approach does not deviate too much from primal + # (See Waechter & Biegler 2006, eq. 16) + μ = state.μ + for i = 1:length(bstate.slack_x) + p = μ/bstate.slack_x[i] + bstate.λx[i] = max(min(bstate.λx[i], 10^10*p), p/10^10) + end + for i = 1:length(bstate.slack_c) + p = μ/bstate.slack_c[i] + bstate.λc[i] = max(min(bstate.λc[i], 10^10*p), p/10^10) + end + # Evaluate the constraints at the new position constraints.c!(state.x, state.constr_c) constraints.jacobian!(state.x, state.constr_J) From e79a3c7e464c6091f4fcc79cd08624593f9f3fda Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 23 Nov 2016 14:13:39 -0600 Subject: [PATCH 38/40] =?UTF-8?q?Switch=20to=20a=20predictor=20algorithm?= =?UTF-8?q?=20for=20computing=20=CE=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was motivated by the observation that the deviation-based algorithm doesn't work when there's only one constraint. The predictor algorithm has a little trouble with this case too, but it's not nearly so severe (it *can* increase μ, you just have to prevent it from decreasing it to 0). Moreover, it seems a little more regular in its changes. --- src/interior.jl | 20 --------------- src/ipnewton.jl | 67 ++++++++++++++++++++++++++++++++++++------------- 2 files changed, 50 insertions(+), 37 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index 725e17f4c..3b99df757 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -202,9 +202,6 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai update_state!(d, constraints, state, method, options) && break # it returns true if it's forced by something in update! to stop (eg dx_dg == 0.0 in BFGS) - # Adaptive μ - μ, ξ = complementarity_μ(state.bstate) - state.μ = μ update_fg!(d, constraints, state, method) x_converged, f_converged, @@ -849,23 +846,6 @@ function shrink_μ!(d, constraints, state, method, options) update_fg!(d, constraints, state, method) end -function complementarity_μ(bstate) - # Adaptively update μ using the complementarity condition and the - # coordinate-by-coordinate deviation from the mean. See Nodecal & - # Wright, 2nd ed., section 19.3. - m = max(length(bstate.λx) + length(bstate.λc), 1) - μmean = (dot(bstate.λx, bstate.slack_x) + dot(bstate.λc, bstate.slack_c))/m - ξ = oftype(μmean, 1) - if !isempty(bstate.slack_x) - ξ = min(ξ, Base.minimum(bstate.λx .* bstate.slack_x)/μmean) - end - if !isempty(bstate.slack_c) - ξ = min(ξ, Base.minimum(bstate.λc .* bstate.slack_c)/μmean) - end - μ = (min((1-ξ)/ξ/20, 2))^3/10 * μmean - μ, ξ -end - function qrregularize!(QRF) R = QRF[:R] for i = 1:size(R, 1) diff --git a/src/ipnewton.jl b/src/ipnewton.jl index eaa4df201..71b019e1d 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -16,6 +16,7 @@ type IPNewtonState{T,N} <: AbstractBarrierState s::Array{T,N} # step for x # Barrier penalty fields μ::T # coefficient of the barrier penalty + μnext::T # μ for the next iteration L::T # value of the Lagrangian (objective + barrier + equality) L_previous::T bstate::BarrierStateVars{T} # value of slack and λ variables (current "position") @@ -46,6 +47,7 @@ function Base.convert{T,S,N}(::Type{IPNewtonState{T,N}}, state::IPNewtonState{S, state.Hd, convert(Array{T}, state.s), T(state.μ), + T(state.μnext), T(state.L), T(state.L_previous), convert(BarrierStateVars{T}, state.bstate), @@ -114,6 +116,7 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct Hd, similar(initial_x), # Maintain current x-search direction in state.s μ, + μ, T(NaN), T(NaN), bstate, @@ -149,19 +152,21 @@ end function update_gtilde!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton) # Calculate the modified x-gradient for the block-eliminated problem + # gtilde is the gradient for the affine-scaling problem, i.e., + # with μ=0, used in the adaptive setting of μ. Once we calculate μ we'll correct it gtilde, bstate, bgrad = state.gtilde, state.bstate, state.bgrad bounds = constraints.bounds copy!(gtilde, state.g) JIc = view5(state.constr_J, bounds.ineqc, :) if !isempty(JIc) Hssc = Diagonal(bstate.λc./bstate.slack_c) - gc = JIc'*(Diagonal(bounds.σc) * (bgrad.slack_c - Hssc*bgrad.λc)) + gc = JIc'*(Diagonal(bounds.σc) * (bstate.λc - Hssc*bgrad.λc)) # NOT bgrad.slack_c for i = 1:length(gtilde) gtilde[i] += gc[i] end end for (i,j) in enumerate(bounds.ineqx) - gxi = bounds.σx[i]*(bgrad.slack_x[i] - bgrad.λx[i]*bstate.λx[i]/bstate.slack_x[i]) + gxi = bounds.σx[i]*(bstate.λx[i] - bgrad.λx[i]*bstate.λx[i]/bstate.slack_x[i]) gtilde[j] += gxi end state @@ -239,11 +244,12 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction p = μ/bstate.slack_c[i] bstate.λc[i] = max(min(bstate.λc[i], 10^10*p), p/10^10) end + state.μ = state.μnext # Evaluate the constraints at the new position constraints.c!(state.x, state.constr_c) constraints.jacobian!(state.x, state.constr_J) - @assert state.ev == equality_violation(constraints, state) + state.ev == equality_violation(constraints, state) false end @@ -251,19 +257,18 @@ end function solve_step!(state::IPNewtonState, constraints, options) x, s, μ, bounds = state.x, state.s, state.μ, constraints.bounds bstate, bstep, bgrad = state.bstate, state.bstep, state.bgrad - Htilde = state.Htilde + J, Htilde = state.constr_J, state.Htilde # Solve the Newton step JE = jacobianE(state, bounds) gE = [bgrad.λxE; bgrad.λcE] M = JE*(Htilde \ JE') MF = cholfact(Positive, M, Val{true}) - ΔλE = MF \ (gE + JE * (Htilde \ state.gtilde)) - Δx = Htilde \ (JE'*ΔλE - state.gtilde) - # Use the real H in estimating the linesearch quadratic parameters - Hstepx, HstepλE = state.H*Δx - JE'*ΔλE, -JE*Δx - # Also check that the solution to the linear equations represents an improvement - Hpstepx = full(Htilde)*Δx - JE'*ΔλE # TODO: don't use full here + # These are a solution to the affine-scaling problem (with μ=0) + ΔλE0 = MF \ (gE + JE * (Htilde \ state.gtilde)) + Δx0 = Htilde \ (JE'*ΔλE0 - state.gtilde) + # Check that the solution to the linear equations represents an improvement + Hpstepx, HstepλE = full(Htilde)*Δx0 - JE'*ΔλE0, -JE*Δx0 # TODO: don't use full here if options.show_linesearch println("|gx| = $(norm(state.gtilde)), |Hstepx + gx| = $(norm(Hpstepx+state.gtilde))") println("|gE| = $(norm(gE)), |HstepλE + gE| = $(norm(HstepλE+gE))") @@ -275,26 +280,54 @@ function solve_step!(state::IPNewtonState, constraints, options) fill!(bstep, 0) return state end + # Set μ (see the predictor strategy in Nodecal & Wright, 2nd ed., section 19.3) + solve_slack!(bstep, Δx0, bounds, bstate, bgrad, J, zero(state.μ)) # store temporarily in bstep + αs = convert(eltype(bstate), 1.0) + αs = estimate_maxstep(αs, bstate.slack_x, bstep.slack_x) + αs = estimate_maxstep(αs, bstate.slack_c, bstep.slack_c) + αλ = convert(eltype(bstate), 1.0) + αλ = estimate_maxstep(αλ, bstate.λx, bstep.λx) + αλ = estimate_maxstep(αλ, bstate.λc, bstep.λc) + m = max(1, length(bstate.slack_x) + length(bstate.slack_c)) + μaff = (dot(bstate.slack_x + αs*bstep.slack_x, bstate.λx + αλ*bstep.λx) + + dot(bstate.slack_c + αs*bstep.slack_c, bstate.λc + αλ*bstep.λc))/m + μmean = (dot(bstate.slack_x, bstate.λx) + dot(bstate.slack_c, bstate.λc))/m + # When there's only one constraint, μaff can be exactly zero. So limit the decrease. + state.μnext = max((μaff/μmean)^3 * μmean, μmean/10) + μ = state.μ + # Solve for the *real* step (including μ) + μsinv = μ * [bounds.σx./bstate.slack_x; bounds.σc./bstate.slack_c] + gtildeμ = state.gtilde - jacobianI(state, bounds)' * μsinv + ΔλE = MF \ (gE + JE * (Htilde \ gtildeμ)) + Δx = Htilde \ (JE'*ΔλE - gtildeμ) copy!(s, Δx) k = unpack_vec!(bstep.λxE, ΔλE, 0) k = unpack_vec!(bstep.λcE, ΔλE, k) k == length(ΔλE) || error("exhausted targets before ΔλE") + solve_slack!(bstep, Δx, bounds, bstate, bgrad, J, μ) + # Solve for the quadratic parameters (use the real H, not the posdef H) + Hstepx, HstepλE = state.H*Δx - JE'*ΔλE, -JE*Δx + qp = state.L, slopealpha(state.s, state.g, bstep, bgrad), dot(Δx, Hstepx) + dot(ΔλE, HstepλE) + qp +end + +function solve_slack!(bstep, s, bounds, bstate, bgrad, J, μ) # Solve for the slack variable and λI updates for (i, j) in enumerate(bounds.ineqx) bstep.slack_x[i] = -bgrad.λx[i] + bounds.σx[i]*s[j] # bstep.λx[i] = -bgrad.slack_x[i] - μ*bstep.slack_x[i]/bstate.slack_x[i]^2 - bstep.λx[i] = -bgrad.slack_x[i] - bstate.λx[i]*bstep.slack_x[i]/bstate.slack_x[i] + # bstep.λx[i] = -bgrad.slack_x[i] - bstate.λx[i]*bstep.slack_x[i]/bstate.slack_x[i] + bstep.λx[i] = -(-μ/bstate.slack_x[i] + bstate.λx[i]) - bstate.λx[i]*bstep.slack_x[i]/bstate.slack_x[i] end - JIc = view5(state.constr_J, bounds.ineqc, :) - SigmaJIΔx = Diagonal(bounds.σc)*(JIc*state.s) + JIc = view5(J, bounds.ineqc, :) + SigmaJIΔx = Diagonal(bounds.σc)*(JIc*s) for i = 1:length(bstep.λc) bstep.slack_c[i] = -bgrad.λc[i] + SigmaJIΔx[i] # bstep.λc[i] = -bgrad.slack_c[i] - μ*bstep.slack_c[i]/bstate.slack_c[i]^2 - bstep.λc[i] = -bgrad.slack_c[i] - bstate.λc[i]*bstep.slack_c[i]/bstate.slack_c[i] + # bstep.λc[i] = -bgrad.slack_c[i] - bstate.λc[i]*bstep.slack_c[i]/bstate.slack_c[i] + bstep.λc[i] = -(-μ/bstate.slack_c[i] + bstate.λc[i]) - bstate.λc[i]*bstep.slack_c[i]/bstate.slack_c[i] end - # Solve for the quadratic parameters - qp = state.L, slopealpha(state.s, state.g, bstep, bgrad), dot(Δx, Hstepx) + dot(ΔλE, HstepλE) - qp + bstep end function is_smaller_eps(ref, step) From ad06e8a5cfc11bf354cd6eb99f497346485d027f Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 23 Nov 2016 14:46:31 -0600 Subject: [PATCH 39/40] Fix tests --- src/ipnewton.jl | 8 +++++++- test/constraints.jl | 24 +++++++++++++----------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/ipnewton.jl b/src/ipnewton.jl index 71b019e1d..d71e2bd2c 100644 --- a/src/ipnewton.jl +++ b/src/ipnewton.jl @@ -382,4 +382,10 @@ function Hf(bounds::ConstraintBounds, state) -JE zeros(eltype(JE), size(JE, 1), size(JE, 1))] end Hf(constraints, state) = Hf(constraints.bounds, state) -gf(state) = [state.gtilde; state.bgrad.λxE; state.bgrad.λcE] +function gf(bounds::ConstraintBounds, state) + bstate, μ = state.bstate, state.μ + μsinv = μ * [bounds.σx./bstate.slack_x; bounds.σc./bstate.slack_c] + gtildeμ = state.gtilde - jacobianI(state, bounds)' * μsinv + [gtildeμ; state.bgrad.λxE; state.bgrad.λcE] +end +gf(constraints, state) = gf(constraints.bounds, state) diff --git a/test/constraints.jl b/test/constraints.jl index 7449c03cb..6646e53e7 100644 --- a/test/constraints.jl +++ b/test/constraints.jl @@ -113,7 +113,7 @@ ConstraintBounds: constraints = TwiceDifferentiableConstraintsFunction( (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds) state = Optim.initial_state(method, options, dg, constraints, x) - @test Optim.gf(state) ≈ gx + @test Optim.gf(bounds, state) ≈ gx @test Optim.Hf(constraints, state) ≈ H ## Pure equality constraints on variables xbar = fill(0.2, length(x)) @@ -132,7 +132,7 @@ ConstraintBounds: state = Optim.initial_state(method, options, d0, constraints, x) copy!(state.bstate.λxE, bstate.λxE) setstate!(state, μ, d0, constraints, method) - @test Optim.gf(state) ≈ [gx; xbar-x] + @test Optim.gf(bounds, state) ≈ [gx; xbar-x] n = length(x) @test Optim.Hf(constraints, state) ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)] # Now again using the generic machinery @@ -141,7 +141,7 @@ ConstraintBounds: state = Optim.initial_state(method, options, d0, constraints, x) copy!(state.bstate.λcE, bstate.λxE) setstate!(state, μ, d0, constraints, method) - @test Optim.gf(state) ≈ [gx; xbar-x] + @test Optim.gf(bounds, state) ≈ [gx; xbar-x] n = length(x) @test Optim.Hf(constraints, state) ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)] ## Nonnegativity constraints @@ -160,14 +160,14 @@ ConstraintBounds: (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds) state = Optim.initial_state(method, options, d0, constraints, y) setstate!(state, μ, d0, constraints, method) - @test Optim.gf(state) ≈ -μ./y + @test Optim.gf(bounds, state) ≈ -μ./y @test Optim.Hf(constraints, state) ≈ μ*Diagonal(1./y.^2) # Now again using the generic machinery bounds = Optim.ConstraintBounds([], [], zeros(length(x)), fill(Inf,length(x))) constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds) state = Optim.initial_state(method, options, d0, constraints, y) setstate!(state, μ, d0, constraints, method) - @test Optim.gf(state) ≈ -μ./y + @test Optim.gf(bounds, state) ≈ -μ./y @test Optim.Hf(constraints, state) ≈ μ*Diagonal(1./y.^2) ## General inequality constraints on variables lb, ub = rand(length(x))-2, rand(length(x))+1 @@ -207,7 +207,7 @@ ConstraintBounds: hxs[j] += htmp gxs[j] += bounds.σx[i]*(gstmp - λ[i]) - bounds.σx[i]*htmp*gλtmp end - @test Optim.gf(state) ≈ gxs + @test Optim.gf(bounds, state) ≈ gxs @test Optim.Hf(constraints, state) ≈ Diagonal(hxs) # Now again using the generic machinery bounds = Optim.ConstraintBounds([], [], lb, ub) @@ -216,7 +216,7 @@ ConstraintBounds: copy!(state.bstate.slack_c, bstate.slack_x) copy!(state.bstate.λc, bstate.λx) setstate!(state, μ, d0, constraints, method) - @test Optim.gf(state) ≈ gxs + @test Optim.gf(bounds, state) ≈ gxs @test Optim.Hf(constraints, state) ≈ Diagonal(hxs) ## Nonlinear equality constraints cfun = x->[x[1]^2+x[2]^2, x[2]*x[3]^2] @@ -248,7 +248,7 @@ ConstraintBounds: setstate!(state, μ, d0, constraints, method) heq = zeros(length(x), length(x)) ch!(x, bstate.λcE, heq) - @test Optim.gf(state) ≈ [gx; cbar-c] + @test Optim.gf(bounds, state) ≈ [gx; cbar-c] @test Optim.Hf(constraints, state) ≈ [full(cholfact(Positive, heq)) -J'; -J zeros(size(J,1), size(J,1))] ## Nonlinear inequality constraints @@ -285,7 +285,7 @@ ConstraintBounds: # hxx = full(cholfact(Positive, -hineq)) + JI'*Diagonal(bstate.λc./bstate.slack_c)*JI hxx = -hineq + JI'*Diagonal(bstate.λc./bstate.slack_c)*JI gf = -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - (bgrad.λc .* bstate.λc ./ bstate.slack_c)) - @test Optim.gf(state) ≈ gf + @test Optim.gf(bounds, state) ≈ gf @test Optim.Hf(constraints, state) ≈ full(cholfact(Positive, hxx, Val{true})) end @@ -394,7 +394,7 @@ ConstraintBounds: # Nonnegativity (the case that doesn't require slack variables) constraints = TwiceDifferentiableConstraintsFunction([0.0], []) state = Optim.initial_state(method, options, d, constraints, [x0]) - qp = Optim.solve_step!(state, constraints) + qp = Optim.solve_step!(state, constraints, options) @test state.s[1] ≈ -(F-μ/x0)/(state.bstate.λx[1]/x0) g0, H0 = autoqp(d, constraints, state) @test qp[1] ≈ F*x0-μ*log(x0) @@ -432,16 +432,18 @@ ConstraintBounds: state = Optim.initial_state(method, options, d, constraints, [μ/F*10]) for i = 1:10 Optim.update_state!(d, constraints, state, method, options) + state.μ = μ Optim.update_fg!(d, constraints, state, method) Optim.update_h!(d, constraints, state, method) end - @test state.x[1] ≈ μ/F + @test isapprox(state.x[1], μ/F, rtol=1e-4) # |x| ≥ 1, and check that we get slack precision better than eps(1.0) d = TwiceDifferentiableFunction(x->F*(x[1]-σ), (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0)) constraints = TwiceDifferentiableConstraintsFunction(σswap(σ, [Float64(σ)], [])...) state = Optim.initial_state(method, options, d, constraints, [(1+eps(1.0))*σ]) for i = 1:10 Optim.update_state!(d, constraints, state, method, options) + state.μ = μ Optim.update_fg!(d, constraints, state, method) Optim.update_h!(d, constraints, state, method) end From 1cbad4872ddefc483989996719f4b5e34fa83b8e Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 23 Nov 2016 15:00:57 -0600 Subject: [PATCH 40/40] Fix ambiguities, tests on julia 0.4 --- src/interior.jl | 33 +++++++++++++++++++++++---------- test/constraints.jl | 6 +++--- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/interior.jl b/src/interior.jl index 3b99df757..4a2e5ee2c 100644 --- a/src/interior.jl +++ b/src/interior.jl @@ -285,7 +285,7 @@ You can manually specify `μ` by supplying a numerical value for `μ0`. Whether calculated algorithmically or specified manually, the values of `λ` are set using the chosen `μ`. """ -function initialize_μ_λ!(state, bounds::ConstraintBounds, Hinfo, μ0::Union{Symbol,Number}, β=1//100) +function initialize_μ_λ!(state, bounds::ConstraintBounds, Hinfo, μ0::Union{Symbol,Number}, β::Number=1//100) if nconstraints(bounds) == 0 && nconstraints_x(bounds) == 0 state.μ = 0 fill!(state.bstate, 0) @@ -341,7 +341,7 @@ function initialize_μ_λ!(state, bounds::ConstraintBounds, Hinfo, μ0::Union{Sy k == length(λE) || error("something is wrong") state end -function initialize_μ_λ!(state, bounds::ConstraintBounds, μ0::Union{Number,Symbol}, β=1//100) +function initialize_μ_λ!(state, bounds::ConstraintBounds, μ0::Union{Number,Symbol}, β::Number=1//100) initialize_μ_λ!(state, bounds, nothing, μ0, β) end @@ -490,13 +490,13 @@ end alphax(α::Number) = α alphax(αs::Union{Tuple,AbstractVector}) = αs[1] -function lagrangian_linefunc!(α, αI, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}) +function lagrangian_linefunc!(α, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}) # For backtrack_constrained, the last evaluation is the one we # keep, so it's safe to store the results in state - state.f_x, state.L, state.ev = _lagrangian_linefunc(α, αI, d, constraints, state) + state.f_x, state.L, state.ev = _lagrangian_linefunc(α, d, constraints, state) state.L end -lagrangian_linefunc!(α, αI, d, constraints, state, method) = lagrangian_linefunc(α, αI, d, constraints, state) +lagrangian_linefunc!(α, d, constraints, state, method) = lagrangian_linefunc(α, d, constraints, state) ## for line searches that do use the gradient along the line @@ -530,11 +530,22 @@ slopealpha(sx, gx, bstep, bgrad) = dot(sx, gx) + dot(bstep.λx, bgrad.λx) + dot(bstep.λc, bgrad.λc) + dot(bstep.λxE, bgrad.λxE) + dot(bstep.λcE, bgrad.λcE) -function linesearch_anon(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained_grad)}) - αs->lagrangian_lineslope!(αs, d, constraints, state, method) -end -function linesearch_anon(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}) - αs->lagrangian_linefunc!(αs, d, constraints, state, method) +if VERSION >= v"0.5.0" + function linesearch_anon(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained_grad)}) + αs->lagrangian_lineslope!(αs, d, constraints, state, method) + end + function linesearch_anon(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}) + αs->lagrangian_linefunc!(αs, d, constraints, state, method) + end +else + # 0.4 can't dispatch on a particular function + function linesearch_anon(d, constraints, state, method::IPOptimizer) + ls = method.linesearch! + if ls == backtrack_constrained_grad + return αs->lagrangian_lineslope!(αs, d, constraints, state, method) + end + αs->lagrangian_linefunc!(αs, d, constraints, state, method) + end end ## Computation of Lagrangian terms: barrier penalty @@ -755,6 +766,7 @@ function isfeasible(constraints, x) isfeasible(constraints, x, c) end isfeasible(constraints::AbstractConstraintsFunction, x, c) = isfeasible(constraints.bounds, x, c) +isfeasible(constraints::Void, state::AbstractBarrierState) = true isfeasible(constraints::Void, x) = true """ @@ -789,6 +801,7 @@ function isinterior(constraints, x) isinterior(constraints, x, c) end isinterior(constraints::AbstractConstraintsFunction, x, c) = isinterior(constraints.bounds, x, c) +isinterior(constraints::Void, state::AbstractBarrierState) = true isinterior(constraints::Void, x) = true ## Utilities for representing total state as single vector diff --git a/test/constraints.jl b/test/constraints.jl index 6646e53e7..3521cbc40 100644 --- a/test/constraints.jl +++ b/test/constraints.jl @@ -322,7 +322,7 @@ ConstraintBounds: Optim.update_fg!(d, constraints, state, method) J = zeros(2,4) constraints.jacobian!(x, J) - eqnormal = J[1,:]; eqnormal = eqnormal/norm(eqnormal) + eqnormal = vec(J[1,:]); eqnormal = eqnormal/norm(eqnormal) @test abs(dot(state.g, eqnormal)) < 1e-12 # orthogonal to equality constraint Pfg = f_g - dot(f_g, eqnormal)*eqnormal Pg = state.g - dot(state.g, eqnormal)*eqnormal @@ -362,7 +362,7 @@ ConstraintBounds: Optim.update_fg!(d, constraints, state, method) J = zeros(2,4) constraints.jacobian!(x, J) - eqnormal = J[1,:]; eqnormal = eqnormal/norm(eqnormal) + eqnormal = vec(J[1,:]); eqnormal = eqnormal/norm(eqnormal) @test abs(dot(state.g, eqnormal)) < 1e-12 # orthogonal to equality constraint Pgx = gx - dot(gx, eqnormal)*eqnormal @test abs(dot(Pgx, state.g)/dot(Pgx,Pgx) - 1) <= 0.011 @@ -404,7 +404,7 @@ ConstraintBounds: αmax = Optim.estimate_maxstep(Inf, state.x[bounds.ineqx].*bounds.σx, state.s[bounds.ineqx].*bounds.σx) ϕ = Optim.linesearch_anon(d, constraints, state, method) - val0 = ϕ((0,0)) + val0 = ϕ(0.0) val0 = isa(val0, Tuple) ? val0[1] : val0 @test val0 ≈ qp[1] α, nf, ng = method.linesearch!(ϕ, 1.0, αmax, qp)