From 034f2cf5d4faaebcb86d0b38cb00b6beb2b69c14 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Fri, 4 Nov 2016 05:14:35 -0500
Subject: [PATCH 01/40] Add constraints and parsing tests

---
 src/Optim.jl        |   2 +
 src/types.jl        | 177 ++++++++++++++++++++++++++++++++++++++++++++
 test/constraints.jl |  33 +++++++++
 3 files changed, 212 insertions(+)
 create mode 100644 test/constraints.jl

diff --git a/src/Optim.jl b/src/Optim.jl
index 4a4272e0c..c66a68e3e 100644
--- a/src/Optim.jl
+++ b/src/Optim.jl
@@ -19,6 +19,8 @@ module Optim
     export optimize,
            DifferentiableFunction,
            TwiceDifferentiableFunction,
+           DifferentiableConstraintsFunction,
+           TwiceDifferentiableConstraintsFunction,
            OptimizationOptions,
            OptimizationState,
            OptimizationTrace,
diff --git a/src/types.jl b/src/types.jl
index ec072fa94..b940763c5 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -228,3 +228,180 @@ function TwiceDifferentiableFunction(f::Function,
     end
     return TwiceDifferentiableFunction(f, g!, fg!, h!)
 end
+
+### Constraints
+#
+# Constraints are specified by the user as
+#    lx_i ≤   x[i]  ≤ ux_i  # variable (box) constraints
+#    lc_i ≤ c(x)[i] ≤ uc_i  # linear/nonlinear constraints
+# and become equality constraints with l_i = u_i. ±∞ are allowed for l
+# and u, in which case the relevant side(s) are unbounded.
+#
+# The user supplies functions to calculate c(x) and its derivatives.
+#
+# Of course we could unify the box-constraints into the
+# linear/nonlinear constraints, but that would force the user to
+# provide the variable-derivatives manually, which would be silly.
+#
+# This parametrization of the constraints gets "parsed" into a form
+# that speeds and simplifies the algorithm, at the cost of many
+# additional variables. See `parse_constraints` for details.
+
+immutable ConstraintBounds{T}
+    # Box-constraints on variables (i.e., directly on x)
+    eqx::Vector{Int} # index-vector of equality-constrained x (not actually variable...)
+    valx::Vector{T}  # value of equality-constrained x
+    ineqx::Vector{Int}  # index-vector of other inequality-constrained variables
+    σx::Vector{Int8}    # ±1, in constraints σ(v-b) ≥ 0 (sign depends on whether v>b or v<b)
+    bx::Vector{T}       # bound (upper or lower) on variable
+    iz::Vector{Int}     # index-vector of nonnegative or nonpositive variables
+    σz::Vector{Int8}    # ±1 depending on whether nonnegative or nonpositive
+    bz::Vector{T}       # all-zeros, convenience for evaluation of barrier penalty
+    # Linear/nonlinear constraint functions and bounds
+    eqc::Vector{Int}    # index-vector equality-constrained entries in c
+    valc::Vector{T}     # value of the equality-constraint
+    ineqc::Vector{Int}  # index-vector of inequality-constraints
+    σc::Vector{Int8}    # same as σx, bx except for the nonlinear constraints
+    bc::Vector{T}
+end
+function ConstraintBounds(lx, ux, lc, uc)
+    _cb(symmetrize(lx, ux)..., symmetrize(lc, uc)...)
+end
+function _cb{Tx,Tc}(lx::AbstractArray{Tx}, ux::AbstractArray{Tx}, lc::AbstractVector{Tc}, uc::AbstractVector{Tc})
+    T = promote_type(Tx,Tc)
+    ConstraintBounds{T}(parse_constraints(T, lx, ux, true)..., parse_constraints(T, lc, uc)...)
+end
+
+Base.eltype{T}(::Type{ConstraintBounds{T}}) = T
+Base.eltype(cb::ConstraintBounds) = eltype(typeof(cb))
+
+abstract AbstractConstraintsFunction
+
+immutable DifferentiableConstraintsFunction{F,J,T} <: AbstractConstraintsFunction
+    bounds::ConstraintBounds{T}
+    c!::F         # c!(x, storage) stores the value of the constraint-functions at x
+    jacobian!::J  # jacobian!(x, storage) stores the Jacobian of the constraint-functions
+end
+
+function DifferentiableConstraintsFunction(c!, jacobian!, lx, ux, lc, uc)
+    b = ConstraintBounds(lx, ux, lc, uc)
+    DifferentiableConstraintsFunction{typeof(c!), typeof(jacobian!), eltype(b)}(b, c!, jacobian!)
+end
+
+immutable TwiceDifferentiableConstraintsFunction{F,J,H,T,N} <: AbstractConstraintsFunction
+    bounds::ConstraintBounds{T}
+    c!::F
+    jacobian!::J
+    h!::H   # Hessian of the barrier terms
+end
+function TwiceDifferentiableConstraintsFunction(c!, jacobian!, h!, lx, ux, lc, uc)
+    b = ConstraintBounds(lx, ux, lc, uc)
+    TwiceDifferentiableConstraintsFunction{typeof(c!), typeof(jacobian!), typeof(h!), eltype(b)}(b, c!, jacobian!, h!)
+end
+
+## Utilities
+
+function symmetrize(l, u)
+    if isempty(l) && !isempty(u)
+        l = fill!(similar(u), -Inf)
+    end
+    if !isempty(l) && isempty(u)
+        u = fill!(similar(l), Inf)
+    end
+    # TODO: change to indices?
+    size(l) == size(u) || throw(DimensionMismatch("bounds arrays must be consistent, got sizes $(size(l)) and $(size(u))"))
+    _symmetrize(l, u)
+end
+_symmetrize{T,N}(l::AbstractArray{T,N}, u::AbstractArray{T,N}) = l, u
+_symmetrize(l::Vector{Any}, u::Vector{Any}) = _symm(l, u)
+_symmetrize(l, u) = _symm(l, u)
+
+# Designed to ensure that bounds supplied as [] don't cause
+# unnecessary broadening of the eltype. Note this isn't type-stable; if
+# the user cares, it can be avoided by supplying the same concrete
+# type for both l and u.
+function _symm(l, u)
+    if isempty(l) && isempty(u)
+        if eltype(l) == Any
+            # prevent promotion from returning eltype Any
+            l = Array{Union{}}(0)
+        end
+        if eltype(u) == Any
+            u = Array{Union{}}(0)
+        end
+    end
+    promote(l, u)
+end
+
+"""
+    parse_constraints(T, l, u, split_signed=false) -> eq, val, ineq, σ, b, [iz, σz, bz]
+
+From user-supplied constraints of the form
+
+    l_i ≤  v_i  ≤ u_i
+
+(which include both inequality and equality constraints, the latter
+when `l_i == u_i`), convert into the following representation:
+
+    - `eq`, a vector of the indices for which `l[eq] == u[eq]`
+    - `val = l[eq] = u[eq]`
+    - `ineq`, `σ`, and `b` such that the inequality constraints can be written as
+             σ[k]*(v[ineq[k]] - b[k]) ≥ 0
+       where `σ[k] = ±1`.
+    - optionally (with `split_signed=true`), return an index-vector
+      `iz` of entries where one of `l`, `u` is zero, along with
+      whether the constraint is `≥ 0` (σz=+1) or `≤ 0` (σz=-1). Such
+      are removed from `ineq`, `σ`, and `b`. For coordinate variables
+      this can be used to reduce the number of slack variables needed,
+      since when one of the bounds is 0, the variable itself *is* a
+      slack variable.
+
+Note that since the same `v_i` might have both lower and upper bounds,
+`ineq` might have the same index twice (once with `σ`=-1 and once with `σ`=1).
+
+Supplying `±Inf` for elements of `l` and/or `u` implies that `v_i` is
+unbounded in the corresponding direction. In such cases there is no
+corresponding entry in `ineq`/`σ`/`b`.
+
+T is the element-type of the non-Int outputs
+"""
+function parse_constraints{T}(::Type{T}, l, u, split_signed::Bool=false)
+    size(l) == size(u) || throw(DimensionMismatch("l and u must be the same size, got $(size(l)) and $(size(u))"))
+    eq, ineq, iz = Int[], Int[], Int[]
+    val, b = T[], T[]
+    σ, σz = Array{Int8}(0), Array{Int8}(0)
+    for i = 1:length(l)
+        li, ui = l[i], u[i]
+        li <= ui || throw(ArgumentError("l must be smaller than u, got $li, $ui"))
+        if li == ui
+            push!(eq, i)
+            push!(val, ui)
+        else
+            if isfinite(li)
+                if split_signed && li == 0
+                    push!(iz, i)
+                    push!(σz, 1)
+                else
+                    push!(ineq, i)
+                    push!(σ, 1)
+                    push!(b, li)
+                end
+            end
+            ui = u[i]
+            if isfinite(ui)
+                if split_signed && ui == 0
+                    push!(iz, i)
+                    push!(σz, -1)
+                else
+                    push!(ineq, i)
+                    push!(σ, -1)
+                    push!(b, ui)
+                end
+            end
+        end
+    end
+    if split_signed
+        return eq, val, ineq, σ, b, iz, σz, zeros(T, length(iz))
+    end
+    eq, val, ineq, σ, b
+end
diff --git a/test/constraints.jl b/test/constraints.jl
new file mode 100644
index 000000000..3f503364f
--- /dev/null
+++ b/test/constraints.jl
@@ -0,0 +1,33 @@
+using Optim, Base.Test
+
+b = @inferred(Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 3.8], [5.0, 4.0]))
+@test b.eqx == [3]
+@test b.valx == [2.0]
+@test b.ineqx == [1,2,2]
+@test b.σx == [-1,1,-1]
+@test b.bx == [1.0,0.5,1.0]
+@test b.iz == [1]
+@test b.σz == [1]
+@test b.eqc == [1]
+@test b.valc == [5]
+@test b.ineqc == [2,2]
+@test b.σc == [1,-1]
+@test b.bc == [3.8,4.0]
+
+b = @inferred(Optim.ConstraintBounds(Float64[], Float64[], [5.0, 3.8], [5.0, 4.0]))
+for fn in (:eqx, :valx, :ineqx, :σx, :bx, :iz, :σz)
+    @test isempty(getfield(b, fn))
+end
+@test b.eqc == [1]
+@test b.valc == [5]
+@test b.ineqc == [2,2]
+@test b.σc == [1,-1]
+@test b.bc == [3.8,4.0]
+
+ba = Optim.ConstraintBounds([], [], [5.0, 3.8], [5.0, 4.0])
+@test eltype(ba) == Float64
+
+@test_throws ArgumentError Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 4.8], [5.0, 4.0])
+@test_throws DimensionMismatch Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0], [5.0, 4.8], [5.0, 4.0])
+
+nothing

From 0c7a70f54b7dfe9c379e8321cdebbd4a8dc8b007 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Sat, 5 Nov 2016 04:32:01 -0500
Subject: [PATCH 02/40] Add pretty-printing of ConstraintBounds

---
 src/types.jl        | 47 +++++++++++++++++++++++++++++++
 test/constraints.jl | 67 +++++++++++++++++++++++++++------------------
 2 files changed, 88 insertions(+), 26 deletions(-)

diff --git a/src/types.jl b/src/types.jl
index b940763c5..06b90463d 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -275,6 +275,19 @@ end
 Base.eltype{T}(::Type{ConstraintBounds{T}}) = T
 Base.eltype(cb::ConstraintBounds) = eltype(typeof(cb))
 
+function Base.show(io::IO, cb::ConstraintBounds)
+    indent = "    "
+    print(io, "ConstraintBounds:")
+    print(io, "\n  Variables:")
+    showeq(io, indent, cb.eqx, cb.valx, 'x', :bracket)
+    showineq(io, indent, cb.ineqx, cb.σx, cb.bx, 'x', :bracket)
+    showineq(io, indent, cb.iz, cb.σz, cb.bz, 'x', :bracket)
+    print(io, "\n  Linear/nonlinear constraints:")
+    showeq(io, indent, cb.eqc, cb.valc, 'c', :subscript)
+    showineq(io, indent, cb.ineqc, cb.σc, cb.bc, 'c', :subscript)
+    nothing
+end
+
 abstract AbstractConstraintsFunction
 
 immutable DifferentiableConstraintsFunction{F,J,T} <: AbstractConstraintsFunction
@@ -405,3 +418,37 @@ function parse_constraints{T}(::Type{T}, l, u, split_signed::Bool=false)
     end
     eq, val, ineq, σ, b
 end
+
+### Compact printing of constraints
+
+immutable UnquotedString
+    str::AbstractString
+end
+Base.show(io::IO, uqstr::UnquotedString) = print(io, uqstr.str)
+
+Base.array_eltype_show_how(a::Vector{UnquotedString}) = false, ""
+
+function showeq(io, indent, eq, val, chr, style)
+    if !isempty(eq)
+        print(io, '\n', indent)
+        if style == :bracket
+            eqstrs = map((i,v) -> UnquotedString("$chr[$i]=$v"), eq, val)
+        else
+            eqstrs = map((i,v) -> UnquotedString("$(chr)_$i=$v"), eq, val)
+        end
+        Base.show_vector(IOContext(io, limit=true), eqstrs, "", "")
+    end
+end
+
+function showineq(io, indent, ineqs, σs, bs, chr, style)
+    if !isempty(ineqs)
+        print(io, '\n', indent)
+        if style == :bracket
+            ineqstrs = map((i,σ,b) -> UnquotedString(string("$chr[$i]", ineqstr(σ,b))), ineqs, σs, bs)
+        else
+            ineqstrs = map((i,σ,b) -> UnquotedString(string("$(chr)_$i", ineqstr(σ,b))), ineqs, σs, bs)
+        end
+        Base.show_vector(IOContext(io, limit=true), ineqstrs, "", "")
+    end
+end
+ineqstr(σ,b) = σ>0 ? "≥$b" : "≤$b"
diff --git a/test/constraints.jl b/test/constraints.jl
index 3f503364f..642c5ae46 100644
--- a/test/constraints.jl
+++ b/test/constraints.jl
@@ -1,33 +1,48 @@
 using Optim, Base.Test
 
-b = @inferred(Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 3.8], [5.0, 4.0]))
-@test b.eqx == [3]
-@test b.valx == [2.0]
-@test b.ineqx == [1,2,2]
-@test b.σx == [-1,1,-1]
-@test b.bx == [1.0,0.5,1.0]
-@test b.iz == [1]
-@test b.σz == [1]
-@test b.eqc == [1]
-@test b.valc == [5]
-@test b.ineqc == [2,2]
-@test b.σc == [1,-1]
-@test b.bc == [3.8,4.0]
+@testset "Constraints" begin
+    @testset "Bounds parsing" begin
+        b = @inferred(Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 3.8], [5.0, 4.0]))
+        @test b.eqx == [3]
+        @test b.valx == [2.0]
+        @test b.ineqx == [1,2,2]
+        @test b.σx == [-1,1,-1]
+        @test b.bx == [1.0,0.5,1.0]
+        @test b.iz == [1]
+        @test b.σz == [1]
+        @test b.eqc == [1]
+        @test b.valc == [5]
+        @test b.ineqc == [2,2]
+        @test b.σc == [1,-1]
+        @test b.bc == [3.8,4.0]
+        io = IOBuffer()
+        show(io, b)
+        @test takebuf_string(io) == """
+ConstraintBounds:
+  Variables:
+    x[3]=2.0
+    x[1]≤1.0,x[2]≥0.5,x[2]≤1.0
+    x[1]≥0.0
+  Linear/nonlinear constraints:
+    c_1=5.0
+    c_2≥3.8,c_2≤4.0"""
 
-b = @inferred(Optim.ConstraintBounds(Float64[], Float64[], [5.0, 3.8], [5.0, 4.0]))
-for fn in (:eqx, :valx, :ineqx, :σx, :bx, :iz, :σz)
-    @test isempty(getfield(b, fn))
-end
-@test b.eqc == [1]
-@test b.valc == [5]
-@test b.ineqc == [2,2]
-@test b.σc == [1,-1]
-@test b.bc == [3.8,4.0]
+        b = @inferred(Optim.ConstraintBounds(Float64[], Float64[], [5.0, 3.8], [5.0, 4.0]))
+        for fn in (:eqx, :valx, :ineqx, :σx, :bx, :iz, :σz)
+            @test isempty(getfield(b, fn))
+        end
+        @test b.eqc == [1]
+        @test b.valc == [5]
+        @test b.ineqc == [2,2]
+        @test b.σc == [1,-1]
+        @test b.bc == [3.8,4.0]
 
-ba = Optim.ConstraintBounds([], [], [5.0, 3.8], [5.0, 4.0])
-@test eltype(ba) == Float64
+        ba = Optim.ConstraintBounds([], [], [5.0, 3.8], [5.0, 4.0])
+        @test eltype(ba) == Float64
 
-@test_throws ArgumentError Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 4.8], [5.0, 4.0])
-@test_throws DimensionMismatch Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0], [5.0, 4.8], [5.0, 4.0])
+        @test_throws ArgumentError Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 4.8], [5.0, 4.0])
+        @test_throws DimensionMismatch Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0], [5.0, 4.8], [5.0, 4.0])
+    end
+end
 
 nothing

From 8b1e03bda0ada1a8d58a377bb82d5d8efd79ef2c Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Sat, 5 Nov 2016 04:33:31 -0500
Subject: [PATCH 03/40] Implement barrier function and equality-constraints

Computes and tests the gradient, too. The Hessian will come later.
---
 src/Optim.jl        |   1 +
 src/interior.jl     | 345 ++++++++++++++++++++++++++++++++++++++++++++
 test/constraints.jl | 107 ++++++++++++++
 3 files changed, 453 insertions(+)
 create mode 100644 src/interior.jl

diff --git a/src/Optim.jl b/src/Optim.jl
index c66a68e3e..e96940c8f 100644
--- a/src/Optim.jl
+++ b/src/Optim.jl
@@ -78,6 +78,7 @@ module Optim
 
     # Constrained optimization
     include("fminbox.jl")
+    include("interior.jl")
 
     # trust region methods
     include("levenberg_marquardt.jl")
diff --git a/src/interior.jl b/src/interior.jl
new file mode 100644
index 000000000..301bb9a98
--- /dev/null
+++ b/src/interior.jl
@@ -0,0 +1,345 @@
+abstract AbstractBarrierState
+
+# These are used not only for the current state, but also for the step and the gradient
+immutable BarrierStateVars{T}
+    slack_x::Vector{T}    # values of slack variables for x
+    slack_c::Vector{T}    # values of slack variables for c
+    λxE::Vector{T}        # λ for equality constraints on x
+    λx::Vector{T}         # λ for equality constraints on slack_x
+    λc::Vector{T}         # λ for equality constraints on slack_c
+    λcE::Vector{T}        # λ for linear/nonlinear equality constraints
+end
+# Note on λxE:
+# We could just set equality-constrained variables to their
+# constraint values at the beginning of optimization, but this
+# might make the initial guess infeasible in terms of its
+# inequality constraints. This would be a much bigger problem than
+# not matching the equality constraints.  So we allow them to
+# differ, and require that the algorithm can cope with it.
+
+function (::Type{BarrierStateVars{T}}){T}(bounds::ConstraintBounds)
+    slack_x = Array{T}(length(bounds.ineqx))
+    slack_c = Array{T}(length(bounds.ineqc))
+    λxE = Array{T}(length(bounds.eqx))
+    λx = similar(slack_x)
+    λc = similar(slack_c)
+    λcE = Array{T}(length(bounds.eqc))
+    sv = BarrierStateVars{T}(slack_x, slack_c, λxE, λx, λc, λcE)
+end
+BarrierStateVars{T}(bounds::ConstraintBounds{T}) = BarrierStateVars{T}(bounds)
+
+function BarrierStateVars{T}(bounds::ConstraintBounds{T}, x)
+    sv = BarrierStateVars(bounds)
+    setslack!(sv.slack_x, x, bounds.ineqx, bounds.σx, bounds.bx)
+    sv
+end
+function BarrierStateVars{T}(bounds::ConstraintBounds{T}, x, c)
+    sv = BarrierStateVars(bounds)
+    setslack!(sv.slack_x, x, bounds.ineqx, bounds.σx, bounds.bx)
+    setslack!(sv.slack_c, c, bounds.ineqc, bounds.σc, bounds.bc)
+    sv
+end
+function setslack!(slack, v, ineq, σ, b)
+    for i = 1:length(ineq)
+        slack[i] = σ[i]*(v[ineq[i]]-b[i])
+    end
+    slack
+end
+
+Base.similar(bstate::BarrierStateVars) =
+    BarrierStateVars(similar(bstate.slack_x),
+                     similar(bstate.slack_c),
+                     similar(bstate.λxE),
+                     similar(bstate.λx),
+                     similar(bstate.λc),
+                     similar(bstate.λcE))
+
+function Base.fill!(b::BarrierStateVars, val)
+    fill!(b.slack_x, val)
+    fill!(b.slack_c, val)
+    fill!(b.λxE, val)
+    fill!(b.λx, val)
+    fill!(b.λc, val)
+    fill!(b.λcE, val)
+    b
+end
+
+Base.eltype{T}(::Type{BarrierStateVars{T}}) = T
+Base.eltype(sv::BarrierStateVars) = eltype(typeof(sv))
+
+function Base.show(io::IO, b::BarrierStateVars)
+    print(io, "BarrierStateVars{$(eltype(b))}:")
+    for fn in fieldnames(b)
+        print(io, "\n  $fn: ")
+        show(io, getfield(b, fn))
+    end
+end
+
+
+## Computation of the Lagrangian and its gradient
+# This is in a parametrization that is also useful during linesearch
+
+function lagrangian(d, bounds::ConstraintBounds, x, c, bstate::BarrierStateVars, μ, method)
+    f_x = d.f(x)
+    L_xsλ = f_x + barrier_value(bounds, x, bstate, μ) +
+            equality_violation(bounds, x, c, bstate)
+    f_x, L_xsλ
+end
+
+function lagrangian_g!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ, method)
+    fill!(bgrad, 0)
+    d.g!(x, gx)
+    barrier_grad!(gx, bgrad, bounds, x, bstate, μ)
+    equality_grad!(gx, bgrad, bounds, x, c, J, bstate)
+    nothing
+end
+
+function lagrangian_fg!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ, method)
+    fill!(bgrad, 0)
+    f_x = d.fg!(x, gx)
+    L_xsλ = f_x + barrier_value(bounds, x, bstate, μ) +
+        equality_violation(bounds, x, c, bstate)
+    barrier_grad!(gx, bgrad, bounds, x, bstate, μ)
+    equality_grad!(gx, bgrad, bounds, x, c, J, bstate)
+    f_x, L_xsλ
+end
+
+## Computation of Lagrangian and derivatives when passing all parameters as a single vector
+function lagrangian_vec(p, d, bounds::ConstraintBounds, x, c::AbstractArray, bstate::BarrierStateVars, μ, method)
+    unpack_vec!(x, bstate, p)
+    f_x, L_xsλ = lagrangian(d, bounds, x, c, bstate, μ, method)
+    L_xsλ
+end
+function lagrangian_vec(p, d, bounds::ConstraintBounds, x, c::Function, bstate::BarrierStateVars, μ, method)
+    # Use this version when using automatic differentiation
+    unpack_vec!(x, bstate, p)
+    f_x, L_xsλ = lagrangian(d, bounds, x, c(x), bstate, μ, method)
+    L_xsλ
+end
+function lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ, method)
+    unpack_vec!(x, bstate, p)
+    f_x, L_xsλ = lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ, method)
+    pack_vec!(storage, gx, bgrad)
+    L_xsλ
+end
+
+## Computation of Lagrangian terms: barrier penalty
+"""
+    barrier_value(constraints, state) -> val
+    barrier_value(bounds, x, sx, sc, μ) -> val
+
+Compute the value of the barrier penalty at the current `state`, or at
+a position (`x`,`sx`,`sc`), where `x` is the current position, `sx`
+are the coordinate slack variables, and `sc` are the linear/nonlinear
+slack variables. `bounds` holds the parsed bounds.
+"""
+function barrier_value(bounds::ConstraintBounds, x, sx, sc, μ)
+    # bμ is the coefficient of μ in the barrier penalty
+    bμ = _bv(x, bounds.iz, bounds.σz) +      # coords constrained by 0
+         _bv(sx) +  # coords with other bounds
+         _bv(sc)    # linear/nonlinear constr.
+    μ*bμ
+end
+barrier_value(bounds::ConstraintBounds, x, bstate::BarrierStateVars, μ) =
+    barrier_value(bounds, x, bstate.slack_x, bstate.slack_c, μ)
+barrier_value(bounds::ConstraintBounds, state) =
+    barrier_value(bounds, state.x, state.bstate.slack_x, state.bstate.slack_c, state.μ)
+barrier_value(constraints::AbstractConstraintsFunction, state) =
+    barrier_value(constraints.bounds, state)
+
+# don't call this barrier_value because it lacks μ
+function _bv(v, idx, σ)
+    ret = loginf(one(eltype(σ))*one(eltype(v)))
+    for (i,iv) in enumerate(idx)
+        ret += loginf(σ[i]*v[iv])
+    end
+    -ret
+end
+
+_bv(v) = isempty(v) ? loginf(one(eltype(v))) : -sum(loginf, v)
+
+loginf(δ) = δ > 0 ? log(δ) : -oftype(δ, Inf)
+
+"""
+    barrier_grad!(gx, bgrad, bounds, x, bstate, μ)
+    barrier_grad!(gx, gsx, gsc, bounds, x, sx, sc, μ)
+
+Compute the gradient of the barrier penalty at (`x`,`sx`,`sc`), where
+`x` is the current position, `sx` are the coordinate slack variables,
+and `sc` are the linear/nonlinear slack
+variables. `bounds::ConstraintBounds` holds the parsed bounds.
+
+The result is *added* to `gx`, `gsx`, and `gsc`, so these vectors
+need to be initialized appropriately.
+"""
+function barrier_grad!(gx, gsx, gsc, bounds::ConstraintBounds, x, sx, sc, μ)
+    barrier_grad!(view(gx, bounds.iz), view(x, bounds.iz), μ)
+    barrier_grad!(gsx, sx, μ)
+    barrier_grad!(gsc, sc, μ)
+    nothing
+end
+barrier_grad!(gx, bgrad, bounds::ConstraintBounds, x, bstate, μ) =
+    barrier_grad!(gx, bgrad.slack_x, bgrad.slack_c, bounds, x, bstate.slack_x, bstate.slack_c, μ)
+
+function barrier_grad!(out, v, μ)
+    for i = 1:length(out)
+        out[i] -= μ/v[i]
+    end
+    nothing
+end
+
+
+## Computation of Lagrangian terms: equality constraints penalty
+
+"""
+    equality_violation([f=identity], bounds, x, c, bstate) -> val
+    equality_violation([f=identity], bounds, x, c, sx, sc, λxE, λx, λc, λcE) -> val
+
+Compute the sum of `f(v_i)`, where `v_i = λ_i*(target - observed)`
+measures the difference between the current state and the
+equality-constrained state. `bounds::ConstraintBounds` holds the
+parsed bounds. `x` is the current position, `sx` are the coordinate
+slack variables, and `sc` are the linear/nonlinear slack
+variables. `c` holds the values of the linear-nonlinear constraints,
+and the λ arguments hold the Lagrange multipliers for `x`, `sx`, `sc`, and
+`c` respectively.
+"""
+function equality_violation(f, bounds::ConstraintBounds, x, c, sx, sc, λxE, λx, λc, λcE)
+    ev = equality_violation(f, x, bounds.valx, bounds.eqx, λxE) +
+         equality_violation(f, sx, x, bounds.ineqx, bounds.σx, bounds.bx, λx) +
+         equality_violation(f, sc, c, bounds.ineqc, bounds.σc, bounds.bc, λc) +
+         equality_violation(f, c, bounds.valc, bounds.eqc, λcE)
+end
+equality_violation(bounds::ConstraintBounds, x, c, sx, sc, λxE, λx, λc, λcE) =
+    equality_violation(identity, bounds, x, c, sx, sc, λxE, λx, λc, λcE)
+function equality_violation(f, bounds::ConstraintBounds, x, c, bstate::BarrierStateVars)
+    equality_violation(f, bounds, x, c,
+                       bstate.slack_x, bstate.slack_c, bstate.λxE, bstate.λx, bstate.λc, bstate.λcE)
+end
+equality_violation(bounds::ConstraintBounds, x, c, bstate::BarrierStateVars) =
+    equality_violation(identity, bounds, x, c, bstate)
+equality_violation(f, bounds::ConstraintBounds, state::AbstractBarrierState) =
+    equality_violation(f, bounds, state.x, state.constr_c, state.bstate)
+equality_violation(bounds::ConstraintBounds, state::AbstractBarrierState) =
+    equality_violation(identity, bounds, state)
+equality_violation(f, constraints::AbstractConstraintsFunction, state::AbstractBarrierState) =
+    equality_violation(f, constraints.bounds, state)
+equality_violation(constraints::AbstractConstraintsFunction, state::AbstractBarrierState) =
+    equality_violation(constraints.bounds, state)
+
+# violations of s = σ*(v-b)
+function equality_violation(f, s, v, ineq, σ, b, λ)
+    ret = f(zero(eltype(λ))*(zero(eltype(s))-zero(eltype(σ))*(zero(eltype(v))-zero(eltype(b)))))
+    for (i,iv) in enumerate(ineq)
+        ret += f(λ[i]*(s[i] - σ[i]*(v[iv]-b[i])))
+    end
+    ret
+end
+
+# violations of v = target
+function equality_violation(f, v, target, idx, λ)
+    ret = f(zero(eltype(λ))*(zero(eltype(v))-zero(eltype(target))))
+    for (i,iv) in enumerate(idx)
+        ret += f(λ[i]*(target[i] - v[iv]))
+    end
+    ret
+end
+
+"""
+    equality_grad!(gx, gbstate, bounds, x, c, J, bstate)
+
+Compute the gradient of `equality_violation`, storing the result in `gx` (an array) and `gbstate::BarrierStateVars`.
+"""
+function equality_grad!(gx, gsx, gsc, gλxE, gλx, gλc, gλcE, bounds::ConstraintBounds, x, c, J, sx, sc, λxE, λx, λc, λcE)
+    gx[bounds.eqx] = gx[bounds.eqx] - λxE
+    equality_grad_var!(gsx, gx, bounds.ineqx, bounds.σx, λx)
+    equality_grad_var!(gsc, gx, bounds.ineqc, bounds.σc, λc, J)
+    equality_grad_var!(gx, bounds.eqc, λcE, J)
+    equality_grad_λ!(gλxE, x, bounds.valx, bounds.eqx)
+    equality_grad_λ!(gλx, sx, x, bounds.ineqx, bounds.σx, bounds.bx)
+    equality_grad_λ!(gλc, sc, c, bounds.ineqc, bounds.σc, bounds.bc)
+    equality_grad_λ!(gλcE, c, bounds.valc, bounds.eqc)
+end
+equality_grad!(gx, gb::BarrierStateVars, bounds::ConstraintBounds, x, c, J, b::BarrierStateVars) =
+    equality_grad!(gx, gb.slack_x, gb.slack_c, gb.λxE, gb.λx, gb.λc, gb.λcE,
+                   bounds, x, c, J,
+                   b.slack_x, b.slack_c, b.λxE, b.λx, b.λc, b.λcE)
+
+# violations of s = σ*(x-b)
+function equality_grad_var!(gs, gx, ineq, σ, λ)
+    for (i,ix) in enumerate(ineq)
+        λi = λ[i]
+        gs[i] += λi
+        gx[ix] -= λi*σ[i]
+    end
+    nothing
+end
+
+function equality_grad_var!(gs, gx, ineq, σ, λ, J)
+    gs[:] = gs + λ
+    if !isempty(ineq)
+        gx[:] = gx - view(J, ineq, :)'*(λ.*σ)
+    end
+    nothing
+end
+
+function equality_grad_λ!(gλ, s, v, ineq, σ, b)
+    for (i,iv) in enumerate(ineq)
+        gλ[i] += s[i] - σ[i]*(v[iv]-b[i])
+    end
+    nothing
+end
+
+# violations of v = target
+function equality_grad_var!(gx, idx, λ, J)
+    if !isempty(idx)
+        gx[:] = gx - view(J, idx, :)'*λ
+    end
+    nothing
+end
+
+function equality_grad_λ!(gλ, v, target, idx)
+    for (i,iv) in enumerate(idx)
+        gλ[i] += target[i] - v[iv]
+    end
+    nothing
+end
+
+## Utilities for representing total state as single vector
+function pack_vec(x, b::BarrierStateVars)
+    n = length(x)
+    for fn in fieldnames(b)
+        n += length(getfield(b, fn))
+    end
+    vec = Array{eltype(x)}(n)
+    pack_vec!(vec, x, b)
+end
+
+function pack_vec!(vec, x, b::BarrierStateVars)
+    k = pack_vec!(vec, x, 0)
+    for fn in fieldnames(b)
+        k = pack_vec!(vec, getfield(b, fn), k)
+    end
+    k == length(vec) || throw(DimensionMismatch("vec should have length $k, got $(length(vec))"))
+    vec
+end
+function pack_vec!(vec, x, k::Int)
+    for i = 1:length(x)
+        vec[k+=1] = x[i]
+    end
+    k
+end
+function unpack_vec!(x, b::BarrierStateVars, vec::Vector)
+    k = unpack_vec!(x, vec, 0)
+    for fn in fieldnames(b)
+        k = unpack_vec!(getfield(b, fn), vec, k)
+    end
+    k == length(vec) || throw(DimensionMismatch("vec should have length $k, got $(length(vec))"))
+    x, b
+end
+function unpack_vec!(x, vec::Vector, k::Int)
+    for i = 1:length(x)
+        x[i] = vec[k+=1]
+    end
+    k
+end
diff --git a/test/constraints.jl b/test/constraints.jl
index 642c5ae46..c3eab9de3 100644
--- a/test/constraints.jl
+++ b/test/constraints.jl
@@ -43,6 +43,113 @@ ConstraintBounds:
         @test_throws ArgumentError Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 4.8], [5.0, 4.0])
         @test_throws DimensionMismatch Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0], [5.0, 4.8], [5.0, 4.0])
     end
+
+    @testset "Lagrangian val/grad" begin
+        function check_autodiff(d, bounds, x, cfun::Function, bstate, μ)
+            c = cfun(x)
+            J = ForwardDiff.jacobian(cfun, x)
+            # Using real-valued inputs
+            p = Optim.pack_vec(x, bstate)
+            ftot! = (p,storage)->Optim.lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds, x, c, J, bstate, μ, nothing)
+            pgrad = similar(p)
+            ftot!(p, pgrad)
+            # Compute with ForwardDiff
+            chunksize = min(8, length(p))
+            TD = ForwardDiff.Dual{chunksize,eltype(p)}
+            xd = Array{TD}(length(x))
+            bstated = Optim.BarrierStateVars{TD}(bounds)
+            pcmp = similar(p)
+            ftot = p->Optim.lagrangian_vec(p, d, bounds, xd, cfun, bstated, μ, nothing)
+            ForwardDiff.gradient!(pcmp, ftot, p, ForwardDiff.Chunk{chunksize}())
+            @test pcmp ≈ pgrad
+        end
+        # Basic setup
+        μ = 0.2345678
+        A = randn(3,3); H = A'*A
+        d = DifferentiableFunction(x->(x'*H*x)[1]/2, (x,storage)->(storage[:] = H*x))
+        x = clamp.(randn(3), -0.99, 0.99)
+        gx = similar(x)
+        cfun = x->Float64[]
+        c = Float64[]
+        J = Array{Float64}(0,0)
+        ## No constraints
+        bounds = Optim.ConstraintBounds(Float64[], Float64[], Float64[], Float64[])
+        bstate = Optim.BarrierStateVars(bounds, x)
+        bgrad = similar(bstate)
+        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ, nothing)
+        @test f_x == L == d.f(x)
+        @test gx == H*x
+        ## Pure equality constraints on variables
+        d = DifferentiableFunction(x->0.0, (x,storage)->fill!(storage, 0))
+        xbar = fill(0.2, length(x))
+        bounds = Optim.ConstraintBounds(xbar, xbar, [], [])
+        bstate = Optim.BarrierStateVars(bounds)
+        rand!(bstate.λxE)
+        bgrad = similar(bstate)
+        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ, nothing)
+        @test f_x == 0
+        @test L ≈ dot(bstate.λxE, xbar-x)
+        @test gx == -bstate.λxE
+        @test bgrad.λxE == xbar-x
+        check_autodiff(d, bounds, x, cfun, bstate, μ)
+        ## Nonnegativity constraints
+        bounds = Optim.ConstraintBounds(zeros(length(x)), fill(Inf,length(x)), [], [])
+        y = rand(length(x))
+        bstate = Optim.BarrierStateVars(bounds, y)
+        bgrad = similar(bstate)
+        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, y, Float64[], Array{Float64}(0,0), bstate, μ, nothing)
+        @test f_x == 0
+        @test L ≈ -μ*sum(log, y)
+        @test gx == -μ./y
+        check_autodiff(d, bounds, y, cfun, bstate, μ)
+        ## General inequality constraints on variables
+        bounds = Optim.ConstraintBounds(rand(length(x))-2, rand(length(x))+1, [], [])
+        bstate = Optim.BarrierStateVars(bounds, x)
+        rand!(bstate.slack_x)  # intentionally displace from the correct value
+        rand!(bstate.λx)
+        bgrad = similar(bstate)
+        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ, nothing)
+        @test f_x == 0
+        Ltarget = -μ*sum(log, bstate.slack_x) +
+            dot(bstate.λx, bstate.slack_x - bounds.σx.*(x[bounds.ineqx]-bounds.bx))
+        @test L ≈ Ltarget
+        dx = similar(gx); fill!(dx, 0)
+        for (i,j) in enumerate(bounds.ineqx)
+            dx[j] -= bounds.σx[i]*bstate.λx[i]
+        end
+        @test gx ≈ dx
+        @test bgrad.slack_x == -μ./bstate.slack_x + bstate.λx
+        check_autodiff(d, bounds, x, cfun, bstate, μ)
+        ## Nonlinear equality constraints
+        cfun = x->[x[1]^2+x[2]^2, x[2]*x[3]^2]
+        c = cfun(x)
+        J = ForwardDiff.jacobian(cfun, x)
+        cbar = rand(length(c))
+        bounds = Optim.ConstraintBounds([], [], cbar, cbar)
+        bstate = Optim.BarrierStateVars(bounds, x, c)
+        rand!(bstate.λcE)
+        bgrad = similar(bstate)
+        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ, nothing)
+        @test f_x == 0
+        @test L ≈ dot(bstate.λcE, cbar-c)
+        @test gx ≈ -J'*bstate.λcE
+        @test bgrad.λcE == cbar-c
+        check_autodiff(d, bounds, x, cfun, bstate, μ)
+        ## Nonlinear inequality constraints
+        bounds = Optim.ConstraintBounds([], [], rand(length(c))-1, rand(length(c))+1)
+        bstate = Optim.BarrierStateVars(bounds, x, c)
+        rand!(bstate.slack_c)  # intentionally displace from the correct value
+        rand!(bstate.λc)
+        bgrad = similar(bstate)
+        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ, nothing)
+        @test f_x == 0
+        Ltarget = -μ*sum(log, bstate.slack_c) +
+            dot(bstate.λc, bstate.slack_c - bounds.σc.*(c[bounds.ineqc]-bounds.bc))
+        @test L ≈ Ltarget
+        @test gx ≈ -J[bounds.ineqc,:]'*(bstate.λc.*bounds.σc)
+        @test bgrad.slack_c == -μ./bstate.slack_c + bstate.λc
+        check_autodiff(d, bounds, x, cfun, bstate, μ)
+    end
 end
 
 nothing

From 1ba0abf80d584d825525aabe2f84e23d20eb4b21 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Sat, 5 Nov 2016 05:11:30 -0500
Subject: [PATCH 04/40] Fixes for julia-0.4

---
 src/interior.jl     | 12 +++++++++---
 src/types.jl        |  4 ++++
 test/constraints.jl | 24 ++++++++++++++++++++----
 3 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index 301bb9a98..269eac003 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -17,7 +17,7 @@ end
 # not matching the equality constraints.  So we allow them to
 # differ, and require that the algorithm can cope with it.
 
-function (::Type{BarrierStateVars{T}}){T}(bounds::ConstraintBounds)
+@compat function (::Type{BarrierStateVars{T}}){T}(bounds::ConstraintBounds)
     slack_x = Array{T}(length(bounds.ineqx))
     slack_c = Array{T}(length(bounds.ineqc))
     λxE = Array{T}(length(bounds.eqx))
@@ -278,7 +278,7 @@ end
 function equality_grad_var!(gs, gx, ineq, σ, λ, J)
     gs[:] = gs + λ
     if !isempty(ineq)
-        gx[:] = gx - view(J, ineq, :)'*(λ.*σ)
+        gx[:] = gx - view5(J, ineq, :)'*(λ.*σ)
     end
     nothing
 end
@@ -293,7 +293,7 @@ end
 # violations of v = target
 function equality_grad_var!(gx, idx, λ, J)
     if !isempty(idx)
-        gx[:] = gx - view(J, idx, :)'*λ
+        gx[:] = gx - view5(J, idx, :)'*λ
     end
     nothing
 end
@@ -343,3 +343,9 @@ function unpack_vec!(x, vec::Vector, k::Int)
     end
     k
 end
+
+if VERSION >= v"0.5.0"
+    view5(A, i, j) = view(A, i, j)
+else
+    view5(A, i, j) = A[i,j]
+end
diff --git a/src/types.jl b/src/types.jl
index 06b90463d..699fc3492 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -428,6 +428,10 @@ Base.show(io::IO, uqstr::UnquotedString) = print(io, uqstr.str)
 
 Base.array_eltype_show_how(a::Vector{UnquotedString}) = false, ""
 
+if !isdefined(Base, :IOContext)
+    IOContext(io; kwargs...) = io
+end
+
 function showeq(io, indent, eq, val, chr, style)
     if !isempty(eq)
         print(io, '\n', indent)
diff --git a/test/constraints.jl b/test/constraints.jl
index c3eab9de3..07fb4a91d 100644
--- a/test/constraints.jl
+++ b/test/constraints.jl
@@ -1,8 +1,24 @@
-using Optim, Base.Test
+using Optim
+if VERSION >= v"0.5.0-dev+7720"
+    using Base.Test
+else
+    using BaseTestNext
+    const Test = BaseTestNext
+end
+
+if VERSION >= v"0.5.0-dev+2396"
+    macro inferred5(ex)
+        Expr(:macrocall, Symbol("@inferred"), esc(ex))
+    end
+else
+    macro inferred5(ex)
+        esc(ex)
+    end
+end
 
 @testset "Constraints" begin
     @testset "Bounds parsing" begin
-        b = @inferred(Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 3.8], [5.0, 4.0]))
+        b = @inferred5(Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 3.8], [5.0, 4.0]))
         @test b.eqx == [3]
         @test b.valx == [2.0]
         @test b.ineqx == [1,2,2]
@@ -27,7 +43,7 @@ ConstraintBounds:
     c_1=5.0
     c_2≥3.8,c_2≤4.0"""
 
-        b = @inferred(Optim.ConstraintBounds(Float64[], Float64[], [5.0, 3.8], [5.0, 4.0]))
+        b = @inferred5(Optim.ConstraintBounds(Float64[], Float64[], [5.0, 3.8], [5.0, 4.0]))
         for fn in (:eqx, :valx, :ineqx, :σx, :bx, :iz, :σz)
             @test isempty(getfield(b, fn))
         end
@@ -67,7 +83,7 @@ ConstraintBounds:
         μ = 0.2345678
         A = randn(3,3); H = A'*A
         d = DifferentiableFunction(x->(x'*H*x)[1]/2, (x,storage)->(storage[:] = H*x))
-        x = clamp.(randn(3), -0.99, 0.99)
+        x = broadcast(clamp, randn(3), -0.99, 0.99)
         gx = similar(x)
         cfun = x->Float64[]
         c = Float64[]

From 7d2ac1eccc7a960075dc588b95a21174cfcde644 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Sun, 6 Nov 2016 11:43:00 -0600
Subject: [PATCH 05/40] Add interior point Newton method state, setup of Newton
 update equation

---
 src/Optim.jl        |   1 +
 src/deprecate.jl    |   6 ++
 src/interior.jl     |  44 +++++++++++--
 src/ipnewton.jl     | 151 ++++++++++++++++++++++++++++++++++++++++++++
 src/types.jl        |  23 +++++--
 test/constraints.jl | 123 ++++++++++++++++++++++++++++++++++--
 6 files changed, 330 insertions(+), 18 deletions(-)
 create mode 100644 src/ipnewton.jl

diff --git a/src/Optim.jl b/src/Optim.jl
index e96940c8f..c900b5825 100644
--- a/src/Optim.jl
+++ b/src/Optim.jl
@@ -79,6 +79,7 @@ module Optim
     # Constrained optimization
     include("fminbox.jl")
     include("interior.jl")
+    include("ipnewton.jl")
 
     # trust region methods
     include("levenberg_marquardt.jl")
diff --git a/src/deprecate.jl b/src/deprecate.jl
index 17ee6f2be..b76fb4a94 100644
--- a/src/deprecate.jl
+++ b/src/deprecate.jl
@@ -23,3 +23,9 @@ end
 @deprecate interpolating_linesearch! LineSearches.strongwolfe!
 @deprecate backtracking_linesearch! LineSearches.backtracking!
 @deprecate interpbacktracking_linesearch! LineSearches.interpbacktracking!
+
+if VERSION >= v"0.5.0"
+    view5(A, i, j) = view(A, i, j)
+else
+    view5(A, i, j) = A[i,j]
+end
diff --git a/src/interior.jl b/src/interior.jl
index 269eac003..2f339f694 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -75,6 +75,44 @@ function Base.show(io::IO, b::BarrierStateVars)
     end
 end
 
+@compat Base.:(==)(v::BarrierStateVars, w::BarrierStateVars) =
+    v.slack_x == w.slack_x &&
+    v.slack_c == w.slack_c &&
+    v.λxE == w.λxE &&
+    v.λx == w.λx &&
+    v.λc == w.λc &&
+    v.λcE == w.λcE
+
+const bsv_seed = sizeof(UInt) == 64 ? 0x145b788192d1cde3 : 0x766a2810
+Base.hash(b::BarrierStateVars, u::UInt) =
+    hash(b.λcE, hash(b.λc, hash(b.λx, hash(b.λxE, hash(b.slack_c, hash(b.slack_x, u+bsv_seed))))))
+
+
+"""
+    BarrierLineSearch{T}
+
+Parameters for interior-point line search methods that use only the value
+"""
+immutable BarrierLineSearch{T}
+    c::Vector{T}                  # value of constraints-functions at trial point
+    bstate::BarrierStateVars{T}   # trial point for slack and λ variables
+end
+
+"""
+    BarrierLineSearchGrad{T}
+
+Parameters for interior-point line search methods that exploit the slope.
+"""
+immutable BarrierLineSearchGrad{T}
+    c::Vector{T}                  # value of constraints-functions at trial point
+    J::Matrix{T}                  # constraints-Jacobian at trial point
+    bstate::BarrierStateVars{T}   # trial point for slack and λ variables
+    bgrad::BarrierStateVars{T}    # trial point's gradient
+end
+
+# Fallbacks (for methods that don't need these)
+after_while!(d, constraints::AbstractConstraintsFunction, state, method, options) = nothing
+update_h!(d, constraints::AbstractConstraintsFunction, state, method) = nothing
 
 ## Computation of the Lagrangian and its gradient
 # This is in a parametrization that is also useful during linesearch
@@ -343,9 +381,3 @@ function unpack_vec!(x, vec::Vector, k::Int)
     end
     k
 end
-
-if VERSION >= v"0.5.0"
-    view5(A, i, j) = view(A, i, j)
-else
-    view5(A, i, j) = A[i,j]
-end
diff --git a/src/ipnewton.jl b/src/ipnewton.jl
new file mode 100644
index 000000000..db501f930
--- /dev/null
+++ b/src/ipnewton.jl
@@ -0,0 +1,151 @@
+immutable IPNewton <: IPOptimizer
+    linesearch!::Function
+end
+
+IPNewton(; linesearch!::Function = backtrack_constrained!) =
+  IPNewton(linesearch!)
+
+type IPNewtonState{T,N} <: AbstractBarrierState
+    @add_generic_fields()
+    x_previous::Array{T,N}
+    g::Array{T,N}
+    f_x_previous::T
+    H::Matrix{T}
+    Hd::Vector{Int8}
+    s::Array{T,N}  # step for x
+    # Barrier penalty fields
+    μ::T                  # coefficient of the barrier penalty
+    bstate::BarrierStateVars{T}   # value of slack and λ variables (current "position")
+    bgrad::BarrierStateVars{T}    # gradient of slack and λ variables at current "position"
+    constr_c::Vector{T}   # value of the user-supplied constraints at x
+    constr_J::Matrix{T}   # value of the user-supplied Jacobian at x
+    @add_linesearch_fields()
+    b_ls::BarrierLineSearch{T}
+    gf::Vector{T}
+    Hf::Matrix{T}
+end
+
+function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunction, constraints::TwiceDifferentiableConstraintsFunction, initial_x::Array{T})
+    # Check feasibility of the initial state
+    mc = nconstraints(constraints)
+    constr_c = Array{T}(mc)
+    constraints.c!(initial_x, constr_c)
+#    isfeasible(constraints, initial_x, constr_c) || error("initial guess must be feasible")
+
+    # Allocate fields for the objective function
+    n = length(initial_x)
+    g = Array(T, n)
+    s = Array(T, n)
+    x_ls, g_ls = Array(T, n), Array(T, n)
+    f_x_previous, f_x = NaN, d.fg!(initial_x, g)
+    f_calls, g_calls = 1, 1
+    H = Array(T, n, n)
+    Hd = Array{Int8}(n)
+    d.h!(initial_x, H)
+    h_calls = 1
+
+    # More constraints
+    constr_J = Array{T}(mc, n)
+    constr_gtemp = Array{T}(n)
+    gf = Array{T}(0)    # will be replaced
+    Hf = Array{T}(0,0)  #   "
+    constraints.jacobian!(initial_x, constr_J)
+    μ = T(1)
+    bstate = BarrierStateVars(constraints.bounds, initial_x, constr_c)
+    bgrad = similar(bstate)
+    b_ls = BarrierLineSearch(similar(constr_c), similar(bstate))
+
+    state = IPNewtonState("Interior-point Newton's Method",
+        length(initial_x),
+        copy(initial_x), # Maintain current state in state.x
+        f_x, # Store current f in state.f_x
+        f_calls, # Track f calls in state.f_calls
+        g_calls, # Track g calls in state.g_calls
+        h_calls,
+        copy(initial_x), # Maintain current state in state.x_previous
+        g, # Store current gradient in state.g
+        T(NaN), # Store previous f in state.f_x_previous
+        H,
+        Hd,
+        similar(initial_x), # Maintain current x-search direction in state.s
+        μ,
+        bstate,
+        bgrad,
+        constr_c,
+        constr_J,
+        @initial_linesearch()..., # Maintain a cache for line search results in state.lsr
+        b_ls,
+        gf,
+        Hf)
+    #    μ = initialize_μ_λ!(λv, λc, constraints, initial_x, g, constr_c, constr_J)
+    update_g!(d, constraints, state, method)
+    update_h!(d, constraints, state, method)
+end
+
+function update_g!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton)
+    lagrangian_g!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ, method)
+end
+
+function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton)
+    μ, Hxx, J = state.μ, state.H, state.constr_J
+    d.h!(state.x, Hxx)
+    # Collect the values of the coefficients of the inequality constraints
+    bounds = constraints.bounds
+    ineqc, σc, λc = bounds.ineqc, bounds.σc, state.bstate.λc
+    m, n = size(J, 1), size(J, 2)
+    λ = zeros(eltype(bounds), m)
+    for i = 1:length(ineqc)
+        λ[ineqc[i]] -= λc[i]*σc[i]
+    end
+    # Add the weighted hessian terms from the nonlinear constraints
+    constraints.h!(state.x, λ, Hxx)
+    # Add the Jacobian terms
+    JI = view5(J, ineqc, :)
+    Sinv2 = Diagonal(1./state.bstate.slack_c.^2)
+    HJ = JI'*Sinv2*JI
+    for j = 1:n, i = 1:n
+        Hxx[i,j] += μ*HJ[i,j]
+    end
+    # Add the variable inequalities
+    iz, x = bounds.iz, state.x
+    for i in iz
+        Hxx[i,i] += μ/x[i]^2
+    end
+    ineqx, sx = bounds.ineqx, state.bstate.slack_x
+    for (i,j) in enumerate(ineqx)
+        Hxx[j,j] += μ/sx[i]^2
+    end
+    # Perform a positive factorization
+    Hpc, state.Hd = ldltfact(Positive, Hxx)
+    Hp = full(Hpc)
+    # Now add the equality constraint hessian terms
+    eqc, λcE = bounds.eqc, state.bstate.λcE
+    fill!(λ, 0)
+    for i = 1:length(eqc)
+        λ[eqc[i]] -= λcE[i]
+    end
+    constraints.h!(state.x, λ, Hp)
+    # Also add these to Hxx so we have the true Hessian (the one
+    # without forcing positive-definiteness)
+    constraints.h!(state.x, λ, Hxx)
+    # Form the total Hessian
+    JEx = zeros(eltype(bounds), length(bounds.eqx), length(state.x))
+    for (i,j) in enumerate(bounds.eqx)
+        JEx[i,j] = 1
+    end
+    JEc = view5(J, eqc, :)
+    Jod = zeros(eltype(JEx), size(JEc, 1), size(JEx, 1))
+    state.Hf = [Hp -JEx' -JEc';
+                -JEx zeros(eltype(JEx), size(JEx,1), size(JEx,1)) Jod';
+                -JEc Jod zeros(eltype(JEc), size(JEc,1), size(JEc,1))]
+    # Also form the total gradient
+    bgrad = state.bgrad
+    gI = state.g + JI'*Diagonal(σc)*(bgrad.slack_c - μ*Sinv2*bgrad.λc)
+    for (i,j) in enumerate(ineqx)
+        gI[j] += bounds.σx[i]*(bgrad.slack_x[i] - μ*bgrad.λx[i]/sx[i]^2)
+    end
+    state.gf = [gI;
+                bgrad.λxE;
+                bgrad.λcE]
+    state
+end
diff --git a/src/types.jl b/src/types.jl
index 699fc3492..6e710fd61 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -1,4 +1,6 @@
 abstract Optimizer
+abstract ConstrainedOptimizer <: Optimizer
+abstract IPOptimizer <: ConstrainedOptimizer
 immutable OptimizationOptions{TCallback <: Union{Void, Function}}
     x_tol::Float64
     f_tol::Float64
@@ -248,6 +250,7 @@ end
 # additional variables. See `parse_constraints` for details.
 
 immutable ConstraintBounds{T}
+    nc::Int          # Number of linear/nonlinear constraints
     # Box-constraints on variables (i.e., directly on x)
     eqx::Vector{Int} # index-vector of equality-constrained x (not actually variable...)
     valx::Vector{T}  # value of equality-constrained x
@@ -269,12 +272,14 @@ function ConstraintBounds(lx, ux, lc, uc)
 end
 function _cb{Tx,Tc}(lx::AbstractArray{Tx}, ux::AbstractArray{Tx}, lc::AbstractVector{Tc}, uc::AbstractVector{Tc})
     T = promote_type(Tx,Tc)
-    ConstraintBounds{T}(parse_constraints(T, lx, ux, true)..., parse_constraints(T, lc, uc)...)
+    ConstraintBounds{T}(length(lc), parse_constraints(T, lx, ux, true)..., parse_constraints(T, lc, uc)...)
 end
 
 Base.eltype{T}(::Type{ConstraintBounds{T}}) = T
 Base.eltype(cb::ConstraintBounds) = eltype(typeof(cb))
 
+nconstraints(cb::ConstraintBounds) = cb.nc
+
 function Base.show(io::IO, cb::ConstraintBounds)
     indent = "    "
     print(io, "ConstraintBounds:")
@@ -290,27 +295,33 @@ end
 
 abstract AbstractConstraintsFunction
 
+nconstraints(constraints::AbstractConstraintsFunction) = nconstraints(constraints.bounds)
+
 immutable DifferentiableConstraintsFunction{F,J,T} <: AbstractConstraintsFunction
-    bounds::ConstraintBounds{T}
     c!::F         # c!(x, storage) stores the value of the constraint-functions at x
     jacobian!::J  # jacobian!(x, storage) stores the Jacobian of the constraint-functions
+    bounds::ConstraintBounds{T}
 end
 
 function DifferentiableConstraintsFunction(c!, jacobian!, lx, ux, lc, uc)
     b = ConstraintBounds(lx, ux, lc, uc)
-    DifferentiableConstraintsFunction{typeof(c!), typeof(jacobian!), eltype(b)}(b, c!, jacobian!)
+    DifferentiableConstraintsFunction(c!, jacobian!, b)
 end
+DifferentiableConstraintsFunction(c!, jacobian!, bounds::ConstraintBounds) =
+    DifferentiableConstraintsFunction{typeof(c!), typeof(jacobian!), eltype(b)}(c!, jacobian!, b)
 
-immutable TwiceDifferentiableConstraintsFunction{F,J,H,T,N} <: AbstractConstraintsFunction
-    bounds::ConstraintBounds{T}
+immutable TwiceDifferentiableConstraintsFunction{F,J,H,T} <: AbstractConstraintsFunction
     c!::F
     jacobian!::J
     h!::H   # Hessian of the barrier terms
+    bounds::ConstraintBounds{T}
 end
 function TwiceDifferentiableConstraintsFunction(c!, jacobian!, h!, lx, ux, lc, uc)
     b = ConstraintBounds(lx, ux, lc, uc)
-    TwiceDifferentiableConstraintsFunction{typeof(c!), typeof(jacobian!), typeof(h!), eltype(b)}(b, c!, jacobian!, h!)
+    TwiceDifferentiableConstraintsFunction(c!, jacobian!, h!, b)
 end
+TwiceDifferentiableConstraintsFunction(c!, jacobian!, h!, bounds::ConstraintBounds) =
+    TwiceDifferentiableConstraintsFunction{typeof(c!), typeof(jacobian!), typeof(h!), eltype(b)}(c!, jacobian!, h!, b)
 
 ## Utilities
 
diff --git a/test/constraints.jl b/test/constraints.jl
index 07fb4a91d..6397ad824 100644
--- a/test/constraints.jl
+++ b/test/constraints.jl
@@ -1,4 +1,4 @@
-using Optim
+using Optim, PositiveFactorizations
 if VERSION >= v"0.5.0-dev+7720"
     using Base.Test
 else
@@ -60,7 +60,7 @@ ConstraintBounds:
         @test_throws DimensionMismatch Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0], [5.0, 4.8], [5.0, 4.0])
     end
 
-    @testset "Lagrangian val/grad" begin
+    @testset "IPNewton" begin
         function check_autodiff(d, bounds, x, cfun::Function, bstate, μ)
             c = cfun(x)
             J = ForwardDiff.jacobian(cfun, x)
@@ -79,15 +79,31 @@ ConstraintBounds:
             ForwardDiff.gradient!(pcmp, ftot, p, ForwardDiff.Chunk{chunksize}())
             @test pcmp ≈ pgrad
         end
+        function setstate!(state, μ)
+            state.μ = μ
+            Optim.update_g!(d, constraints, state, method)
+            Optim.update_h!(d, constraints, state, method)
+        end
         # Basic setup
         μ = 0.2345678
         A = randn(3,3); H = A'*A
-        d = DifferentiableFunction(x->(x'*H*x)[1]/2, (x,storage)->(storage[:] = H*x))
+        d = TwiceDifferentiableFunction(x->(x'*H*x)[1]/2, (x,g)->(g[:] = H*x), (x,h)->(h[:,:]=H))
         x = broadcast(clamp, randn(3), -0.99, 0.99)
         gx = similar(x)
         cfun = x->Float64[]
         c = Float64[]
         J = Array{Float64}(0,0)
+        method = Optim.IPNewton(identity)
+        options = OptimizationOptions()
+        ## In the code, variable constraints are special-cased (for
+        ## reasons of user-convenience and efficiency).  It's
+        ## important to check that the special-casing yields the same
+        ## result as the general case. So in the first three
+        ## constrained cases below, we compare variable constraints
+        ## against the same kind of constraint applied generically.
+        cvar! = (x, c) -> copy!(c, x)
+        cvarJ! = (x, J) -> copy!(J, eye(size(J)...))
+        cvarh! = (x, λ, h) -> h  # h! adds to h, it doesn't replace it
         ## No constraints
         bounds = Optim.ConstraintBounds(Float64[], Float64[], Float64[], Float64[])
         bstate = Optim.BarrierStateVars(bounds, x)
@@ -95,8 +111,13 @@ ConstraintBounds:
         f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ, nothing)
         @test f_x == L == d.f(x)
         @test gx == H*x
+        constraints = TwiceDifferentiableConstraintsFunction(
+            (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds)
+        state = Optim.initial_state(method, options, d, constraints, x)
+        @test state.gf ≈ gx
+        @test state.Hf ≈ H
         ## Pure equality constraints on variables
-        d = DifferentiableFunction(x->0.0, (x,storage)->fill!(storage, 0))
+        d = TwiceDifferentiableFunction(x->0.0, (x,g)->fill!(g, 0), (x,h)->fill!(h,0))
         xbar = fill(0.2, length(x))
         bounds = Optim.ConstraintBounds(xbar, xbar, [], [])
         bstate = Optim.BarrierStateVars(bounds)
@@ -108,6 +129,23 @@ ConstraintBounds:
         @test gx == -bstate.λxE
         @test bgrad.λxE == xbar-x
         check_autodiff(d, bounds, x, cfun, bstate, μ)
+        constraints = TwiceDifferentiableConstraintsFunction(
+            (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds)
+        state = Optim.initial_state(method, options, d, constraints, x)
+        copy!(state.bstate.λxE, bstate.λxE)
+        setstate!(state, μ)
+        @test state.gf ≈ [gx; xbar-x]
+        n = length(x)
+        @test state.Hf ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)]
+        # Now again using the generic machinery
+        bounds = Optim.ConstraintBounds([], [], xbar, xbar)
+        constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds)
+        state = Optim.initial_state(method, options, d, constraints, x)
+        copy!(state.bstate.λcE, bstate.λxE)
+        setstate!(state, μ)
+        @test state.gf ≈ [gx; xbar-x]
+        n = length(x)
+        @test state.Hf ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)]
         ## Nonnegativity constraints
         bounds = Optim.ConstraintBounds(zeros(length(x)), fill(Inf,length(x)), [], [])
         y = rand(length(x))
@@ -118,16 +156,31 @@ ConstraintBounds:
         @test L ≈ -μ*sum(log, y)
         @test gx == -μ./y
         check_autodiff(d, bounds, y, cfun, bstate, μ)
+        constraints = TwiceDifferentiableConstraintsFunction(
+            (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds)
+        state = Optim.initial_state(method, options, d, constraints, y)
+        setstate!(state, μ)
+        @test state.gf ≈ -μ./y
+        @test state.Hf ≈ μ*Diagonal(1./y.^2)
+        # Now again using the generic machinery
+        bounds = Optim.ConstraintBounds([], [], zeros(length(x)), fill(Inf,length(x)))
+        constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds)
+        state = Optim.initial_state(method, options, d, constraints, y)
+        setstate!(state, μ)
+        @test state.gf ≈ -μ./y
+        @test state.Hf ≈ μ*Diagonal(1./y.^2)
         ## General inequality constraints on variables
-        bounds = Optim.ConstraintBounds(rand(length(x))-2, rand(length(x))+1, [], [])
+        lb, ub = rand(length(x))-2, rand(length(x))+1
+        bounds = Optim.ConstraintBounds(lb, ub, [], [])
         bstate = Optim.BarrierStateVars(bounds, x)
         rand!(bstate.slack_x)  # intentionally displace from the correct value
         rand!(bstate.λx)
         bgrad = similar(bstate)
         f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ, nothing)
         @test f_x == 0
+        s = bounds.σx .* (x[bounds.ineqx] - bounds.bx)
         Ltarget = -μ*sum(log, bstate.slack_x) +
-            dot(bstate.λx, bstate.slack_x - bounds.σx.*(x[bounds.ineqx]-bounds.bx))
+            dot(bstate.λx, bstate.slack_x - s)
         @test L ≈ Ltarget
         dx = similar(gx); fill!(dx, 0)
         for (i,j) in enumerate(bounds.ineqx)
@@ -136,10 +189,42 @@ ConstraintBounds:
         @test gx ≈ dx
         @test bgrad.slack_x == -μ./bstate.slack_x + bstate.λx
         check_autodiff(d, bounds, x, cfun, bstate, μ)
+        constraints = TwiceDifferentiableConstraintsFunction(
+            (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds)
+        state = Optim.initial_state(method, options, d, constraints, x)
+        copy!(state.bstate.slack_x, bstate.slack_x)
+        copy!(state.bstate.λx, bstate.λx)
+        setstate!(state, μ)
+        gxs, hxs = zeros(length(x)), zeros(length(x))
+        s = state.bstate.slack_x
+        for (i,j) in enumerate(bounds.ineqx)
+            gxs[j] += -2*μ*bounds.σx[i]/s[i] + μ*(x[j]-bounds.bx[i])/s[i]^2
+            hxs[j] += μ/s[i]^2
+        end
+        @test state.gf ≈ gxs
+        @test state.Hf ≈ Diagonal(hxs)
+        # Now again using the generic machinery
+        bounds = Optim.ConstraintBounds([], [], lb, ub)
+        constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds)
+        state = Optim.initial_state(method, options, d, constraints, x)
+        copy!(state.bstate.slack_c, bstate.slack_x)
+        copy!(state.bstate.λc, bstate.λx)
+        setstate!(state, μ)
+        @test state.gf ≈ gxs
+        @test state.Hf ≈ Diagonal(hxs)
         ## Nonlinear equality constraints
         cfun = x->[x[1]^2+x[2]^2, x[2]*x[3]^2]
+        cfun! = (x, c) -> copy!(c, cfun(x))
+        cJ! = (x, J) -> copy!(J, [2*x[1] 2*x[2] 0;
+                                  0 x[3]^2 2*x[2]*x[3]])
+        ch! = function(x, λ, h)
+            h[1,1] += 2*λ[1]
+            h[2,2] += 2*λ[1]
+            h[3,3] += 2*λ[2]*x[2]
+        end
         c = cfun(x)
         J = ForwardDiff.jacobian(cfun, x)
+        Jtmp = similar(J); @test cJ!(x, Jtmp) ≈ J  # just to check we did it right
         cbar = rand(length(c))
         bounds = Optim.ConstraintBounds([], [], cbar, cbar)
         bstate = Optim.BarrierStateVars(bounds, x, c)
@@ -151,6 +236,15 @@ ConstraintBounds:
         @test gx ≈ -J'*bstate.λcE
         @test bgrad.λcE == cbar-c
         check_autodiff(d, bounds, x, cfun, bstate, μ)
+        constraints = TwiceDifferentiableConstraintsFunction(cfun!, cJ!, ch!, bounds)
+        state = Optim.initial_state(method, options, d, constraints, x)
+        copy!(state.bstate.λcE, bstate.λcE)
+        setstate!(state, μ)
+        heq = zeros(length(x), length(x))
+        ch!(x, bstate.λcE, heq)
+        @test state.gf ≈ [gx; cbar-c]
+        @test state.Hf ≈ [eye(length(x))-heq -J';
+                          -J zeros(size(J,1), size(J,1))]
         ## Nonlinear inequality constraints
         bounds = Optim.ConstraintBounds([], [], rand(length(c))-1, rand(length(c))+1)
         bstate = Optim.BarrierStateVars(bounds, x, c)
@@ -164,7 +258,24 @@ ConstraintBounds:
         @test L ≈ Ltarget
         @test gx ≈ -J[bounds.ineqc,:]'*(bstate.λc.*bounds.σc)
         @test bgrad.slack_c == -μ./bstate.slack_c + bstate.λc
+        @test bgrad.λc == bstate.slack_c - bounds.σc .* (c[bounds.ineqc] - bounds.bc)
         check_autodiff(d, bounds, x, cfun, bstate, μ)
+        constraints = TwiceDifferentiableConstraintsFunction(cfun!, cJ!, ch!, bounds)
+        state = Optim.initial_state(method, options, d, constraints, x)
+        copy!(state.bstate.slack_c, bstate.slack_c)
+        copy!(state.bstate.λc, bstate.λc)
+        setstate!(state, μ)
+        hineq = zeros(length(x), length(x))
+        λ = zeros(size(J, 1))
+        for (i,j) in enumerate(bounds.ineqc)
+            λ[j] += bstate.λc[i]*bounds.σc[i]
+        end
+        ch!(x, λ, hineq)
+        JI = J[bounds.ineqc,:]
+        hxx = μ*JI'*Diagonal(1./bstate.slack_c.^2)*JI - hineq
+        hp = full(cholfact(Positive, hxx))
+        @test state.gf ≈ -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - μ*(bgrad.λc ./ bstate.slack_c.^2))
+        @test state.Hf ≈ hp
     end
 end
 

From 6441534d85cb7790a2e5a081c84920e5b98f15a5 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Tue, 8 Nov 2016 03:54:18 -0600
Subject: [PATCH 06/40] Add interior-point Newton step update and backtracking
 linesearch

---
 src/Optim.jl            |   1 +
 src/interior.jl         | 134 +++++++++++++++++++++++++++++++++-------
 src/iplinesearch.jl     |  15 +++++
 src/ipnewton.jl         | 110 +++++++++++++++++++++++++++++++--
 src/types.jl            |  24 +++++++
 src/utilities/update.jl |   7 +++
 test/constraints.jl     | 134 ++++++++++++++++++++++++++++++++--------
 7 files changed, 375 insertions(+), 50 deletions(-)
 create mode 100644 src/iplinesearch.jl

diff --git a/src/Optim.jl b/src/Optim.jl
index c900b5825..1afe15fed 100644
--- a/src/Optim.jl
+++ b/src/Optim.jl
@@ -80,6 +80,7 @@ module Optim
     include("fminbox.jl")
     include("interior.jl")
     include("ipnewton.jl")
+    include("iplinesearch.jl")
 
     # trust region methods
     include("levenberg_marquardt.jl")
diff --git a/src/interior.jl b/src/interior.jl
index 2f339f694..b9b5b62c5 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -2,12 +2,14 @@ abstract AbstractBarrierState
 
 # These are used not only for the current state, but also for the step and the gradient
 immutable BarrierStateVars{T}
-    slack_x::Vector{T}    # values of slack variables for x
-    slack_c::Vector{T}    # values of slack variables for c
-    λxE::Vector{T}        # λ for equality constraints on x
-    λx::Vector{T}         # λ for equality constraints on slack_x
-    λc::Vector{T}         # λ for equality constraints on slack_c
-    λcE::Vector{T}        # λ for linear/nonlinear equality constraints
+    slack_x::Vector{T}     # values of slack variables for x
+    slack_c::Vector{T}     # values of slack variables for c
+    active_x::Vector{Bool} # active constraints for x (see solve_active_inequalities)
+    active_c::Vector{Bool} # active constraints for c
+    λxE::Vector{T}         # λ for equality constraints on x
+    λx::Vector{T}          # λ for equality constraints on slack_x
+    λc::Vector{T}          # λ for equality constraints on slack_c
+    λcE::Vector{T}         # λ for linear/nonlinear equality constraints
 end
 # Note on λxE:
 # We could just set equality-constrained variables to their
@@ -24,24 +26,27 @@ end
     λx = similar(slack_x)
     λc = similar(slack_c)
     λcE = Array{T}(length(bounds.eqc))
-    sv = BarrierStateVars{T}(slack_x, slack_c, λxE, λx, λc, λcE)
+    sv = BarrierStateVars{T}(slack_x, slack_c, fill(false, length(slack_x)),
+                             fill(false, length(slack_c)), λxE, λx, λc, λcE)
 end
 BarrierStateVars{T}(bounds::ConstraintBounds{T}) = BarrierStateVars{T}(bounds)
 
 function BarrierStateVars{T}(bounds::ConstraintBounds{T}, x)
     sv = BarrierStateVars(bounds)
-    setslack!(sv.slack_x, x, bounds.ineqx, bounds.σx, bounds.bx)
+    setslack!(sv.slack_x, sv.active_x, x, bounds.ineqx, bounds.σx, bounds.bx)
     sv
 end
 function BarrierStateVars{T}(bounds::ConstraintBounds{T}, x, c)
     sv = BarrierStateVars(bounds)
-    setslack!(sv.slack_x, x, bounds.ineqx, bounds.σx, bounds.bx)
-    setslack!(sv.slack_c, c, bounds.ineqc, bounds.σc, bounds.bc)
+    setslack!(sv.slack_x, sv.active_x, x, bounds.ineqx, bounds.σx, bounds.bx)
+    setslack!(sv.slack_c, sv.active_c, c, bounds.ineqc, bounds.σc, bounds.bc)
     sv
 end
-function setslack!(slack, v, ineq, σ, b)
+function setslack!(slack, active, v, ineq, σ, b)
     for i = 1:length(ineq)
-        slack[i] = σ[i]*(v[ineq[i]]-b[i])
+        dv = v[ineq[i]]-b[i]
+        slack[i] = σ[i]*dv
+        active[i] = dv == 0
     end
     slack
 end
@@ -49,6 +54,8 @@ end
 Base.similar(bstate::BarrierStateVars) =
     BarrierStateVars(similar(bstate.slack_x),
                      similar(bstate.slack_c),
+                     similar(bstate.active_x),
+                     similar(bstate.active_c),
                      similar(bstate.λxE),
                      similar(bstate.λx),
                      similar(bstate.λc),
@@ -57,6 +64,8 @@ Base.similar(bstate::BarrierStateVars) =
 function Base.fill!(b::BarrierStateVars, val)
     fill!(b.slack_x, val)
     fill!(b.slack_c, val)
+    fill!(b.active_x, false)
+    fill!(b.active_c, false)
     fill!(b.λxE, val)
     fill!(b.λx, val)
     fill!(b.λc, val)
@@ -110,6 +119,69 @@ immutable BarrierLineSearchGrad{T}
     bgrad::BarrierStateVars{T}    # trial point's gradient
 end
 
+function ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, α)
+    ls_update!(out.slack_x, base.slack_x, step.slack_x, α)
+    ls_update!(out.slack_c, base.slack_c, step.slack_c, α)
+    ls_update!(out.λxE, base.λxE, step.λxE, α)
+    ls_update!(out.λx, base.λx, step.λx, α)
+    ls_update!(out.λc, base.λc, step.λc, α)
+    ls_update!(out.λcE, base.λcE, step.λcE, α)
+    out
+end
+
+# Explicit solution for slack, λ when an inequality constraint is
+# "active." This is necessary (or at least helpful) when c-b == 0 due
+# to roundoff error, in which case the KKT equations don't have an
+# exact solution within the precision.  We punt on the ∂λ equation
+# (which reduces to the slack, which should be small anyway), and
+# focus on the ∂x and ∂slack equations (therefore setting slack and
+# λ). By setting these to their exact solutions, we blance the forces
+# due to the barrier.
+function solve_active_inequalities!(d, constraints, state)
+    x, c, bstate, bounds = state.x, state.constr_c, state.bstate, constraints.bounds
+    nactive, nchanged = tally_active!(bstate.active_x, 0, 0, x, bounds.ineqx, bounds.bx)
+    nx = nactive
+    nactive, nchanged = tally_active!(bstate.active_c, nactive, nchanged, c, bounds.ineqc, bounds.bc, )
+    if nactive == 0 || nchanged == 0
+        return nothing
+    end
+    # Calculate the necessary gradients
+    d.g!(state.x, state.g)
+    constraints.jacobian!(state.x, state.constr_J)
+    # Solve for the Lagrange multipliers
+    ic, ix = bounds.ineqc[bstate.active_c], bounds.ineqx[bstate.active_x]
+    Jx = view5(state.constr_J, ic, ix)
+    Jact = view5(state.constr_J, ic, :)
+    Cactive = [eye(eltype(Jx), nx, nx) Jx'; Jx Jact*Jact']
+    pactive = [view(state.g, ix); Jact*state.g]
+    λactive = (Cactive\pactive).*[bounds.σx[bstate.active_x]; bounds.σc[bstate.active_c]]
+    # Set the state
+    k = set_active_params!(bstate.slack_x, bstate.λx, bstate.active_x, λactive, state.μ, 0)
+    k = set_active_params!(bstate.slack_c, bstate.λc, bstate.active_c, λactive, state.μ, k)
+    k == length(λactive) || error("something is wrong")
+    nothing
+end
+
+function tally_active!(active, nactive, nchanged, c, ineq, b)
+    for (i,j) in enumerate(ineq)
+        isactive = c[j] == b[i]
+        nactive += isactive
+        nchanged += isactive != active[i]
+        active[i] = isactive
+    end
+    nactive, nchanged
+end
+
+function set_active_params!(slack, λ, active, λtarget, μ, k)
+    for i = 1:length(active)
+        active[i] || continue
+        λk = λtarget[k+=1]
+        λ[i] = λk
+        slack[i] = μ/λk
+    end
+    k
+end
+
 # Fallbacks (for methods that don't need these)
 after_while!(d, constraints::AbstractConstraintsFunction, state, method, options) = nothing
 update_h!(d, constraints::AbstractConstraintsFunction, state, method) = nothing
@@ -117,14 +189,14 @@ update_h!(d, constraints::AbstractConstraintsFunction, state, method) = nothing
 ## Computation of the Lagrangian and its gradient
 # This is in a parametrization that is also useful during linesearch
 
-function lagrangian(d, bounds::ConstraintBounds, x, c, bstate::BarrierStateVars, μ, method)
+function lagrangian(d, bounds::ConstraintBounds, x, c, bstate::BarrierStateVars, μ)
     f_x = d.f(x)
     L_xsλ = f_x + barrier_value(bounds, x, bstate, μ) +
             equality_violation(bounds, x, c, bstate)
     f_x, L_xsλ
 end
 
-function lagrangian_g!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ, method)
+function lagrangian_g!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ)
     fill!(bgrad, 0)
     d.g!(x, gx)
     barrier_grad!(gx, bgrad, bounds, x, bstate, μ)
@@ -132,7 +204,7 @@ function lagrangian_g!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::
     nothing
 end
 
-function lagrangian_fg!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ, method)
+function lagrangian_fg!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ)
     fill!(bgrad, 0)
     f_x = d.fg!(x, gx)
     L_xsλ = f_x + barrier_value(bounds, x, bstate, μ) +
@@ -143,24 +215,33 @@ function lagrangian_fg!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate:
 end
 
 ## Computation of Lagrangian and derivatives when passing all parameters as a single vector
-function lagrangian_vec(p, d, bounds::ConstraintBounds, x, c::AbstractArray, bstate::BarrierStateVars, μ, method)
+function lagrangian_vec(p, d, bounds::ConstraintBounds, x, c::AbstractArray, bstate::BarrierStateVars, μ)
     unpack_vec!(x, bstate, p)
-    f_x, L_xsλ = lagrangian(d, bounds, x, c, bstate, μ, method)
+    f_x, L_xsλ = lagrangian(d, bounds, x, c, bstate, μ)
     L_xsλ
 end
-function lagrangian_vec(p, d, bounds::ConstraintBounds, x, c::Function, bstate::BarrierStateVars, μ, method)
+function lagrangian_vec(p, d, bounds::ConstraintBounds, x, c::Function, bstate::BarrierStateVars, μ)
     # Use this version when using automatic differentiation
     unpack_vec!(x, bstate, p)
-    f_x, L_xsλ = lagrangian(d, bounds, x, c(x), bstate, μ, method)
+    f_x, L_xsλ = lagrangian(d, bounds, x, c(x), bstate, μ)
     L_xsλ
 end
-function lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ, method)
+function lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ)
     unpack_vec!(x, bstate, p)
-    f_x, L_xsλ = lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ, method)
+    f_x, L_xsλ = lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ)
     pack_vec!(storage, gx, bgrad)
     L_xsλ
 end
 
+# for line searches that don't use the gradient along the line
+function lagrangian_linefunc(α, d, constraints, state)
+    b_ls = state.b_ls
+    ls_update!(state.x_ls, state.x, state.s, α)
+    ls_update!(b_ls.bstate, state.bstate, state.bstep, α)
+    constraints.c!(state.x, b_ls.c)
+    lagrangian(d, constraints.bounds, state.x_ls, b_ls.c, b_ls.bstate, state.μ)[2]
+end
+
 ## Computation of Lagrangian terms: barrier penalty
 """
     barrier_value(constraints, state) -> val
@@ -381,3 +462,14 @@ function unpack_vec!(x, vec::Vector, k::Int)
     end
     k
 end
+
+## More utilities
+function estimate_maxstep(αmax, x, s)
+    for i = 1:length(s)
+        si = s[i]
+        if si < 0
+            αmax = min(αmax, -x[i]/si)
+        end
+    end
+    αmax
+end
diff --git a/src/iplinesearch.jl b/src/iplinesearch.jl
new file mode 100644
index 000000000..c0343b5b1
--- /dev/null
+++ b/src/iplinesearch.jl
@@ -0,0 +1,15 @@
+function backtrack_constrained(ϕ, α, αmax, Lcoefsα,
+                               c1 = 0.5, ρ=oftype(α, 0.5), itermax = 100)
+    α = min(α, 0.999*αmax)
+    L0, L1, L2 = Lcoefsα
+    f_calls = 0
+    while f_calls < itermax
+        f_calls += 1
+        val = ϕ(α)
+        if abs(val - (L0 + L1*α + L2*α^2/2)) <= c1*abs(val-L0) + 100*eps(abs(val)+abs(L0))
+            return α, f_calls, 0
+        end
+        α *= ρ
+    end
+    error("failed to satisfy criterion after $f_calls iterations")
+end
diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index db501f930..1bbbc3d1d 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -2,7 +2,7 @@ immutable IPNewton <: IPOptimizer
     linesearch!::Function
 end
 
-IPNewton(; linesearch!::Function = backtrack_constrained!) =
+IPNewton(; linesearch!::Function = backtrack_constrained) =
   IPNewton(linesearch!)
 
 type IPNewtonState{T,N} <: AbstractBarrierState
@@ -15,14 +15,17 @@ type IPNewtonState{T,N} <: AbstractBarrierState
     s::Array{T,N}  # step for x
     # Barrier penalty fields
     μ::T                  # coefficient of the barrier penalty
+    L::T                  # value of the Lagrangian (objective + barrier + equality)
     bstate::BarrierStateVars{T}   # value of slack and λ variables (current "position")
     bgrad::BarrierStateVars{T}    # gradient of slack and λ variables at current "position"
+    bstep::BarrierStateVars{T}    # search direction for slack and λ
     constr_c::Vector{T}   # value of the user-supplied constraints at x
     constr_J::Matrix{T}   # value of the user-supplied Jacobian at x
     @add_linesearch_fields()
     b_ls::BarrierLineSearch{T}
     gf::Vector{T}
     Hf::Matrix{T}
+    stepf::Vector{T}
 end
 
 function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunction, constraints::TwiceDifferentiableConstraintsFunction, initial_x::Array{T})
@@ -49,10 +52,12 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct
     constr_gtemp = Array{T}(n)
     gf = Array{T}(0)    # will be replaced
     Hf = Array{T}(0,0)  #   "
+    stepf = Array{T}(0)
     constraints.jacobian!(initial_x, constr_J)
     μ = T(1)
     bstate = BarrierStateVars(constraints.bounds, initial_x, constr_c)
     bgrad = similar(bstate)
+    bstep = similar(bstate)
     b_ls = BarrierLineSearch(similar(constr_c), similar(bstate))
 
     state = IPNewtonState("Interior-point Newton's Method",
@@ -69,21 +74,32 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct
         Hd,
         similar(initial_x), # Maintain current x-search direction in state.s
         μ,
+        T(0),
         bstate,
         bgrad,
+        bstep,
         constr_c,
         constr_J,
         @initial_linesearch()..., # Maintain a cache for line search results in state.lsr
         b_ls,
         gf,
-        Hf)
+        Hf,
+        stepf)
+
     #    μ = initialize_μ_λ!(λv, λc, constraints, initial_x, g, constr_c, constr_J)
-    update_g!(d, constraints, state, method)
+    update_fg!(d, constraints, state, method)
     update_h!(d, constraints, state, method)
 end
 
+function update_fg!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton)
+    f_x, L = lagrangian_fg!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ)
+    state.f_x, state.L = f_x, L
+    state
+end
+
 function update_g!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton)
-    lagrangian_g!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ, method)
+    lagrangian_g!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ)
+    state
 end
 
 function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton)
@@ -149,3 +165,89 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state
                 bgrad.λcE]
     state
 end
+
+function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction, state::IPNewtonState{T}, method::IPNewton)
+    bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds
+    solve_step!(state, constraints)
+    # If a step α=1 will not change any of the parameters, we can quit now.
+    # This prevents a futile linesearch.
+    if is_smaller_eps(state.x, state.s) &&
+        is_smaller_eps(bstate.slack_x, bstep.slack_x) &&
+        is_smaller_eps(bstate.slack_c, bstep.slack_c) &&
+        is_smaller_eps(bstate.λx, bstep.λx) &&
+        is_smaller_eps(bstate.λc, bstep.λc)
+        return false
+    end
+    qp = quadratic_parameters(bounds, state)
+
+    # Estimate αmax, the upper bound on distance of movement along the search line
+    αmax = convert(eltype(bstate), Inf)
+    αmax = estimate_maxstep(αmax, bstate.slack_x, bstep.slack_x)
+    αmax = estimate_maxstep(αmax, bstate.slack_c, bstep.slack_c)
+    αmax = estimate_maxstep(αmax,
+                            view(state.x, bounds.iz).*bounds.σz,
+                            view(state.s, bounds.iz).*bounds.σz)
+
+    # Determine the actual distance of movement along the search line
+    ϕ = α->lagrangian_linefunc(α, d, constraints, state)
+    state.alpha, f_update, g_update =
+        method.linesearch!(ϕ, T(1), αmax, qp)
+    state.f_calls, state.g_calls = state.f_calls + f_update, state.g_calls + g_update
+
+    # Maintain a record of previous position
+    copy!(state.x_previous, state.x)
+
+    # Update current position # x = x + alpha * s
+    ls_update!(state.x, state.x, state.s, state.alpha)
+    ls_update!(bstate, bstate, bstep, state.alpha)
+
+    # Evaluate the constraints at the new position
+    constraints.c!(state.x, state.constr_c)
+    constraints.jacobian!(state.x, state.constr_J)
+
+    # Test for active inequalities, solve immediately for the corresponding s and λ
+    solve_active_inequalities!(d, constraints, state)
+
+    false
+end
+
+function solve_step!(state::IPNewtonState, constraints)
+    # Solve the Newton step
+    step = -(state.Hf\state.gf)  # do *not* force posdef
+    x, s, μ, bounds = state.x, state.s, state.μ, constraints.bounds
+    bstate, bstep, bgrad = state.bstate, state.bstep, state.bgrad
+    k = unpack_vec!(s, step, 0)
+    k = unpack_vec!(bstep.λxE, step, k)
+    k = unpack_vec!(bstep.λcE, step, k)
+    k == length(step) || error("exhausted targets before step")
+    # Solve for the slack variable and λI updates
+    for (i, j) in enumerate(bounds.ineqx)
+        bstep.slack_x[i] = -bgrad.λx[i] + bounds.σx[i]*s[j]
+        bstep.λx[i] = -bgrad.slack_x[i] - μ*bstep.slack_x[i]/bstate.slack_x[i]^2
+    end
+    JI = view5(state.constr_J, bounds.ineqc, :)
+    bstep.slack_c[:] = -bgrad.λc + Diagonal(bounds.σc)*JI*s
+    for i = 1:length(bstep.λc)
+        bstep.λc[i] = -bgrad.slack_c[i] - μ*bstep.slack_c[i]/bstate.slack_c[i]^2
+    end
+    state.stepf = step
+    state
+end
+
+function is_smaller_eps(ref, step)
+    ise = true
+    for (r, s) in zip(ref, step)
+        ise &= (s == 0) | (abs(s) < eps(r))
+    end
+    ise
+end
+
+function quadratic_parameters(bounds::ConstraintBounds, state::IPNewtonState)
+    slope = dot(state.stepf, state.gf)
+    # For the curvature, use the original hessian (before forcing
+    # positive-definiteness)
+    q = dot(state.s, state.H*state.s)
+    JE = view5(state.constr_J, bounds.eqc, :)
+    q -= 2*dot(state.s[bounds.eqx], state.bstep.λxE) + 2*dot(state.s, JE'*state.bstep.λcE)
+    state.L, slope, q
+end
diff --git a/src/types.jl b/src/types.jl
index 6e710fd61..3ee044f7b 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -310,6 +310,17 @@ end
 DifferentiableConstraintsFunction(c!, jacobian!, bounds::ConstraintBounds) =
     DifferentiableConstraintsFunction{typeof(c!), typeof(jacobian!), eltype(b)}(c!, jacobian!, b)
 
+function DifferentiableConstraintsFunction(lx::AbstractArray, ux::AbstractArray)
+    bounds = ConstraintBounds(lx, ux, [], [])
+    DifferentiableConstraintsFunction(bounds)
+end
+
+function DifferentiableConstraintsFunction(bounds::ConstraintBounds)
+    c! = (x,c)->nothing
+    J! = (x,J)->nothing
+    DifferentiableConstraintsFunction(c!, J!, bounds)
+end
+
 immutable TwiceDifferentiableConstraintsFunction{F,J,H,T} <: AbstractConstraintsFunction
     c!::F
     jacobian!::J
@@ -323,6 +334,19 @@ end
 TwiceDifferentiableConstraintsFunction(c!, jacobian!, h!, bounds::ConstraintBounds) =
     TwiceDifferentiableConstraintsFunction{typeof(c!), typeof(jacobian!), typeof(h!), eltype(b)}(c!, jacobian!, h!, b)
 
+function TwiceDifferentiableConstraintsFunction(lx::AbstractArray, ux::AbstractArray)
+    bounds = ConstraintBounds(lx, ux, [], [])
+    TwiceDifferentiableConstraintsFunction(bounds)
+end
+
+function TwiceDifferentiableConstraintsFunction(bounds::ConstraintBounds)
+    c! = (x,c)->nothing
+    J! = (x,J)->nothing
+    h! = (x,λ,h)->nothing
+    TwiceDifferentiableConstraintsFunction(c!, J!, h!, bounds)
+end
+
+
 ## Utilities
 
 function symmetrize(l, u)
diff --git a/src/utilities/update.jl b/src/utilities/update.jl
index 8b81dbf35..3912dab4a 100644
--- a/src/utilities/update.jl
+++ b/src/utilities/update.jl
@@ -27,3 +27,10 @@ function update!{T}(tr::OptimizationTrace{T},
     end
     stopped
 end
+
+function ls_update!(out::AbstractArray, base::AbstractArray, step::AbstractArray, α)
+    length(out) == length(base) == length(step) || throw(DimensionMismatch("all arrays must have the same length, got $(length(out)), $(length(base)), $(length(step))"))
+    for i = 1:length(base)
+        out[i] = base[i]+α*step[i]
+    end
+end
diff --git a/test/constraints.jl b/test/constraints.jl
index 6397ad824..0509b1f9e 100644
--- a/test/constraints.jl
+++ b/test/constraints.jl
@@ -17,6 +17,13 @@ else
 end
 
 @testset "Constraints" begin
+    # Utility function for hand-setting the μ parameter
+    function setstate!(state, μ, d, constraints, method)
+        state.μ = μ
+        Optim.update_fg!(d, constraints, state, method)
+        Optim.update_h!(d, constraints, state, method)
+    end
+
     @testset "Bounds parsing" begin
         b = @inferred5(Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 3.8], [5.0, 4.0]))
         @test b.eqx == [3]
@@ -60,30 +67,24 @@ ConstraintBounds:
         @test_throws DimensionMismatch Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0], [5.0, 4.8], [5.0, 4.0])
     end
 
-    @testset "IPNewton" begin
+    @testset "IPNewton computations" begin
+        # Compare hand-computed gradient against that from automatic differentiation
         function check_autodiff(d, bounds, x, cfun::Function, bstate, μ)
             c = cfun(x)
             J = ForwardDiff.jacobian(cfun, x)
-            # Using real-valued inputs
             p = Optim.pack_vec(x, bstate)
-            ftot! = (p,storage)->Optim.lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds, x, c, J, bstate, μ, nothing)
+            ftot! = (p,storage)->Optim.lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds, x, c, J, bstate, μ)
             pgrad = similar(p)
             ftot!(p, pgrad)
-            # Compute with ForwardDiff
             chunksize = min(8, length(p))
             TD = ForwardDiff.Dual{chunksize,eltype(p)}
             xd = Array{TD}(length(x))
             bstated = Optim.BarrierStateVars{TD}(bounds)
             pcmp = similar(p)
-            ftot = p->Optim.lagrangian_vec(p, d, bounds, xd, cfun, bstated, μ, nothing)
+            ftot = p->Optim.lagrangian_vec(p, d, bounds, xd, cfun, bstated, μ)
             ForwardDiff.gradient!(pcmp, ftot, p, ForwardDiff.Chunk{chunksize}())
             @test pcmp ≈ pgrad
         end
-        function setstate!(state, μ)
-            state.μ = μ
-            Optim.update_g!(d, constraints, state, method)
-            Optim.update_h!(d, constraints, state, method)
-        end
         # Basic setup
         μ = 0.2345678
         A = randn(3,3); H = A'*A
@@ -93,8 +94,8 @@ ConstraintBounds:
         cfun = x->Float64[]
         c = Float64[]
         J = Array{Float64}(0,0)
-        method = Optim.IPNewton(identity)
         options = OptimizationOptions()
+        method = Optim.IPNewton()
         ## In the code, variable constraints are special-cased (for
         ## reasons of user-convenience and efficiency).  It's
         ## important to check that the special-casing yields the same
@@ -108,7 +109,7 @@ ConstraintBounds:
         bounds = Optim.ConstraintBounds(Float64[], Float64[], Float64[], Float64[])
         bstate = Optim.BarrierStateVars(bounds, x)
         bgrad = similar(bstate)
-        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ, nothing)
+        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ)
         @test f_x == L == d.f(x)
         @test gx == H*x
         constraints = TwiceDifferentiableConstraintsFunction(
@@ -123,7 +124,7 @@ ConstraintBounds:
         bstate = Optim.BarrierStateVars(bounds)
         rand!(bstate.λxE)
         bgrad = similar(bstate)
-        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ, nothing)
+        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ)
         @test f_x == 0
         @test L ≈ dot(bstate.λxE, xbar-x)
         @test gx == -bstate.λxE
@@ -133,7 +134,7 @@ ConstraintBounds:
             (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds)
         state = Optim.initial_state(method, options, d, constraints, x)
         copy!(state.bstate.λxE, bstate.λxE)
-        setstate!(state, μ)
+        setstate!(state, μ, d, constraints, method)
         @test state.gf ≈ [gx; xbar-x]
         n = length(x)
         @test state.Hf ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)]
@@ -142,7 +143,7 @@ ConstraintBounds:
         constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds)
         state = Optim.initial_state(method, options, d, constraints, x)
         copy!(state.bstate.λcE, bstate.λxE)
-        setstate!(state, μ)
+        setstate!(state, μ, d, constraints, method)
         @test state.gf ≈ [gx; xbar-x]
         n = length(x)
         @test state.Hf ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)]
@@ -151,7 +152,7 @@ ConstraintBounds:
         y = rand(length(x))
         bstate = Optim.BarrierStateVars(bounds, y)
         bgrad = similar(bstate)
-        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, y, Float64[], Array{Float64}(0,0), bstate, μ, nothing)
+        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, y, Float64[], Array{Float64}(0,0), bstate, μ)
         @test f_x == 0
         @test L ≈ -μ*sum(log, y)
         @test gx == -μ./y
@@ -159,14 +160,14 @@ ConstraintBounds:
         constraints = TwiceDifferentiableConstraintsFunction(
             (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds)
         state = Optim.initial_state(method, options, d, constraints, y)
-        setstate!(state, μ)
+        setstate!(state, μ, d, constraints, method)
         @test state.gf ≈ -μ./y
         @test state.Hf ≈ μ*Diagonal(1./y.^2)
         # Now again using the generic machinery
         bounds = Optim.ConstraintBounds([], [], zeros(length(x)), fill(Inf,length(x)))
         constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds)
         state = Optim.initial_state(method, options, d, constraints, y)
-        setstate!(state, μ)
+        setstate!(state, μ, d, constraints, method)
         @test state.gf ≈ -μ./y
         @test state.Hf ≈ μ*Diagonal(1./y.^2)
         ## General inequality constraints on variables
@@ -176,7 +177,7 @@ ConstraintBounds:
         rand!(bstate.slack_x)  # intentionally displace from the correct value
         rand!(bstate.λx)
         bgrad = similar(bstate)
-        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ, nothing)
+        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ)
         @test f_x == 0
         s = bounds.σx .* (x[bounds.ineqx] - bounds.bx)
         Ltarget = -μ*sum(log, bstate.slack_x) +
@@ -194,7 +195,7 @@ ConstraintBounds:
         state = Optim.initial_state(method, options, d, constraints, x)
         copy!(state.bstate.slack_x, bstate.slack_x)
         copy!(state.bstate.λx, bstate.λx)
-        setstate!(state, μ)
+        setstate!(state, μ, d, constraints, method)
         gxs, hxs = zeros(length(x)), zeros(length(x))
         s = state.bstate.slack_x
         for (i,j) in enumerate(bounds.ineqx)
@@ -209,7 +210,7 @@ ConstraintBounds:
         state = Optim.initial_state(method, options, d, constraints, x)
         copy!(state.bstate.slack_c, bstate.slack_x)
         copy!(state.bstate.λc, bstate.λx)
-        setstate!(state, μ)
+        setstate!(state, μ, d, constraints, method)
         @test state.gf ≈ gxs
         @test state.Hf ≈ Diagonal(hxs)
         ## Nonlinear equality constraints
@@ -230,7 +231,7 @@ ConstraintBounds:
         bstate = Optim.BarrierStateVars(bounds, x, c)
         rand!(bstate.λcE)
         bgrad = similar(bstate)
-        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ, nothing)
+        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ)
         @test f_x == 0
         @test L ≈ dot(bstate.λcE, cbar-c)
         @test gx ≈ -J'*bstate.λcE
@@ -239,7 +240,7 @@ ConstraintBounds:
         constraints = TwiceDifferentiableConstraintsFunction(cfun!, cJ!, ch!, bounds)
         state = Optim.initial_state(method, options, d, constraints, x)
         copy!(state.bstate.λcE, bstate.λcE)
-        setstate!(state, μ)
+        setstate!(state, μ, d, constraints, method)
         heq = zeros(length(x), length(x))
         ch!(x, bstate.λcE, heq)
         @test state.gf ≈ [gx; cbar-c]
@@ -251,7 +252,7 @@ ConstraintBounds:
         rand!(bstate.slack_c)  # intentionally displace from the correct value
         rand!(bstate.λc)
         bgrad = similar(bstate)
-        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ, nothing)
+        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ)
         @test f_x == 0
         Ltarget = -μ*sum(log, bstate.slack_c) +
             dot(bstate.λc, bstate.slack_c - bounds.σc.*(c[bounds.ineqc]-bounds.bc))
@@ -264,7 +265,7 @@ ConstraintBounds:
         state = Optim.initial_state(method, options, d, constraints, x)
         copy!(state.bstate.slack_c, bstate.slack_c)
         copy!(state.bstate.λc, bstate.λc)
-        setstate!(state, μ)
+        setstate!(state, μ, d, constraints, method)
         hineq = zeros(length(x), length(x))
         λ = zeros(size(J, 1))
         for (i,j) in enumerate(bounds.ineqc)
@@ -277,6 +278,89 @@ ConstraintBounds:
         @test state.gf ≈ -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - μ*(bgrad.λc ./ bstate.slack_c.^2))
         @test state.Hf ≈ hp
     end
+
+    @testset "IPNewton step" begin
+        F = 1000
+        d = TwiceDifferentiableFunction(x->F*x[1], (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0))
+        method = Optim.IPNewton()
+        options = OptimizationOptions()
+        μ = 1e-20
+        x0 = μ/F*10  # minimum is at μ/F
+        # Nonnegativity (the case that doesn't require slack variables)
+        constraints = TwiceDifferentiableConstraintsFunction([0.0], [])
+        state = Optim.initial_state(method, options, d, constraints, [x0])
+        setstate!(state, μ, d, constraints, method)
+        Optim.solve_step!(state, constraints)
+        @test state.s[1] ≈ x0 - F*x0^2/μ
+        qp = Optim.quadratic_parameters(constraints.bounds, state)
+        @test qp[1] ≈ F*x0-μ*log(x0)
+        @test qp[2] ≈ -(F-μ/x0)^2*x0^2/μ
+        @test qp[3] ≈ μ/x0^2*(x0 - F*x0^2/μ)^2
+        bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds
+        αmax = Optim.estimate_maxstep(Inf, state.x[bounds.iz].*bounds.σz,
+                                      state.s[bounds.iz].*bounds.σz)
+        ϕ = α->Optim.lagrangian_linefunc(α, d, constraints, state)
+        @test ϕ(0) ≈ qp[1]
+        α, nf, ng = method.linesearch!(ϕ, 1.0, αmax, qp)
+        @test α > 1e-3
+    end
+
+    @testset "Slack" begin
+        σswap(σ, a, b) = σ == 1 ? (a, b) : (b, a)
+        # Test that we achieve a high-precision minimum for fixed
+        # μ. For anything other than nonnegativity/nonpositivity
+        # constraints, this tests whether the slack variables are
+        # solving the problem they were designed to address (the
+        # possibility that adjacent floating-point numbers are too
+        # widely spaced to accurately satisfy the KKT equations near a
+        # boundary).
+        F0 = 1000
+        method = Optim.IPNewton()
+        options = OptimizationOptions()
+        μ = 1e-20   # smaller than eps(1.0)
+        for σ in (1, -1)
+            F = σ*F0
+            # Nonnegativity/nonpositivity (the case that doesn't require slack variables)
+            d = TwiceDifferentiableFunction(x->F*x[1], (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0))
+            constraints = TwiceDifferentiableConstraintsFunction(σswap(σ, [0.0], [])...)
+            state = Optim.initial_state(method, options, d, constraints, [μ/F*10])
+            setstate!(state, μ, d, constraints, method)
+            for i = 1:10
+                Optim.update_state!(d, constraints, state, method)
+                Optim.update_fg!(d, constraints, state, method)
+                Optim.update_h!(d, constraints, state, method)
+            end
+            @test state.x[1] ≈ μ/F
+            # |x| ≥ 1, and check that we get slack precision better than eps(1.0)
+            d = TwiceDifferentiableFunction(x->F*(x[1]-σ), (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0))
+            constraints = TwiceDifferentiableConstraintsFunction(σswap(σ, [Float64(σ)], [])...)
+            state = Optim.initial_state(method, options, d, constraints, [(1+eps(1.0))*σ])
+            setstate!(state, μ, d, constraints, method)
+            for i = 1:10
+                Optim.update_state!(d, constraints, state, method)
+                Optim.update_fg!(d, constraints, state, method)
+                Optim.update_h!(d, constraints, state, method)
+            end
+            @test state.x[1] == σ
+            @test state.bstate.slack_x[1] ≈ μ/abs(F)
+            # x >= 1 using the linear/nonlinear constraints
+            d = TwiceDifferentiableFunction(x->F*(x[1]-σ), (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0))
+            constraints = TwiceDifferentiableConstraintsFunction(
+                (x,c)->(c[1] = x[1]),
+                (x,J)->(J[1,1] = 1.0),
+                (x,λ,h)->nothing,
+                [], [], σswap(σ, [Float64(σ)], [])...)
+            state = Optim.initial_state(method, options, d, constraints, [(1+eps(1.0))*σ])
+            setstate!(state, μ, d, constraints, method)
+            for i = 1:10
+                Optim.update_state!(d, constraints, state, method)
+                Optim.update_fg!(d, constraints, state, method)
+                Optim.update_h!(d, constraints, state, method)
+            end
+            @test state.x[1] == σ
+            @test state.bstate.slack_c[1] ≈ μ/abs(F)
+        end
+    end
 end
 
 nothing

From 50b158293b2983347e343574f8239b420921db52 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Tue, 8 Nov 2016 08:31:47 -0600
Subject: [PATCH 07/40] =?UTF-8?q?Add=20a=20principled=20initialization=20f?=
 =?UTF-8?q?or=20=CE=BC=20and=20=CE=BB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Based on the notion that we want to (largely) preserve the objective function's initial descent direction.
---
 src/interior.jl | 61 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/ipnewton.jl |  5 +++-
 2 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/src/interior.jl b/src/interior.jl
index b9b5b62c5..839e12783 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -182,6 +182,67 @@ function set_active_params!(slack, λ, active, λtarget, μ, k)
     k
 end
 
+"""
+    initialize_μ_λE!(λxE, λcE, constraints, x, g, constr_c, constr_J, β=0.01) -> μ
+
+Pick μ and λ to ensure that the equality constraints are satisfied
+locally, and that the initial gradient including the barrier would be
+a descent direction for the problem without the barrier (μ = 0). This
+ensures that the search isn't pushed out of the basin of the
+user-supplied initial guess.
+
+`λv` and `λc` are the Lagrange multipliers for the variables and extra
+(non-variable) constraints; these are pre-allocated storage for the
+output, and their input values are not used. `constraints` is an
+`AbstractConstraintsFunction`, `x` is the position (must be a feasible
+interior point), `g` is the gradient of the objective at `x`, and
+`constr_c` and `constr_J` contain the values and Jacobian of the extra
+constraints evaluated at `x`. `β` (optional) specifies the fraction of
+the objective's gradient that may be diminished by the barrier.
+
+In addition to setting `λxE` and `λcE`, this returns `μ`, the value of
+the barrier penalty.
+"""
+function initialize_μ_λ!(λx, λc, bounds::ConstraintBounds, x, g, c, J, β=1//100)
+    length(c) + length(bounds.iz) + length(bounds.ineqx) == 0 && return zero(eltype(x))
+    # Calculate the projection matrix
+    JEx = zeros(eltype(J), length(bounds.eqx), length(x))
+    for (i,j) in enumerate(bounds.eqx)
+        JEx[i,j] = 1
+    end
+    JEc = view5(J, bounds.eqc, :)
+    JE = vcat(JEx, JEc)
+    CE = JE*JE'
+    CEc = cholfact(Positive, CE)
+    Pg = g - JE'*(CEc \ (JE*g)) # the projected gradient of the objective (orthog to all == constr.)
+    # Calculate the barrier deviation and projection onto inequality normals
+    Δb = [x[bounds.iz]; x[bounds.ineqx] - bounds.bx; c[bounds.ineqc] - bounds.bc]
+    JIx = zeros(eltype(J), length(bounds.iz)+length(bounds.ineqx), length(x))
+    for (i,j) in enumerate([bounds.iz; bounds.ineqx])
+        JIx[i,j] = 1
+    end
+    JIc = view5(J, bounds.ineqc, :)
+    JI = vcat(JIx, JIc)
+    JIg = JI*Pg
+    # Solve for μ
+    λtilde = 1./Δb
+    μden = dot(λtilde, JIg)
+    if μden == 0
+        μden = maximum(abs(λtilde).*abs(JIg))*length(Δb)
+    end
+    μ = β*dot(Pg, Pg)/abs(μden)
+    μ = μden != 0 ? μ : oftype(μ, 1)
+    # Solve for λE
+    gb = g - μ*(JI'*λtilde)
+    Pgb = gb - JE'*(CEc \ (JE*gb))
+    λE = CEc \ (JE*Pgb)
+    k = unpack_vec!(λx, λE, 0)
+    k = unpack_vec!(λc, λE, k)
+    k == length(λE) || error("something is wrong")
+    μ
+end
+initialize_μ_λ!(λx, λc, constraints::AbstractConstraintsFunction, x, g, c, J, args...) =
+    initialize_μ_λ!(λx, λc, constraints.bounds, x, g, c, J, args...)
 # Fallbacks (for methods that don't need these)
 after_while!(d, constraints::AbstractConstraintsFunction, state, method, options) = nothing
 update_h!(d, constraints::AbstractConstraintsFunction, state, method) = nothing
diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index 1bbbc3d1d..9a080dc25 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -86,7 +86,7 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct
         Hf,
         stepf)
 
-    #    μ = initialize_μ_λ!(λv, λc, constraints, initial_x, g, constr_c, constr_J)
+    state.μ = initialize_μ_λ!(bstate.λxE, bstate.λcE, constraints, initial_x, g, constr_c, constr_J)
     update_fg!(d, constraints, state, method)
     update_h!(d, constraints, state, method)
 end
@@ -94,11 +94,14 @@ end
 function update_fg!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton)
     f_x, L = lagrangian_fg!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ)
     state.f_x, state.L = f_x, L
+    state.f_calls += 1
+    state.g_calls += 1
     state
 end
 
 function update_g!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton)
     lagrangian_g!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ)
+    state.g_calls += 1
     state
 end
 

From 71029578ebaff46b5f8a3e942b5f25d923dca82d Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Tue, 8 Nov 2016 08:38:20 -0600
Subject: [PATCH 08/40] Add optimize for interior-point methods

Also implements tracing
---
 src/Optim.jl           |   2 +-
 src/interior.jl        | 116 ++++++++++++++++++++++++++++++++++++++---
 src/ipnewton.jl        |   6 +--
 src/types.jl           |  44 +++++++++++++---
 src/utilities/trace.jl |  21 ++++++++
 test/runtests.jl       |   1 +
 6 files changed, 172 insertions(+), 18 deletions(-)

diff --git a/src/Optim.jl b/src/Optim.jl
index 1afe15fed..1820bf9f1 100644
--- a/src/Optim.jl
+++ b/src/Optim.jl
@@ -78,9 +78,9 @@ module Optim
 
     # Constrained optimization
     include("fminbox.jl")
+    include("iplinesearch.jl")
     include("interior.jl")
     include("ipnewton.jl")
-    include("iplinesearch.jl")
 
     # trust region methods
     include("levenberg_marquardt.jl")
diff --git a/src/interior.jl b/src/interior.jl
index 839e12783..a29be2319 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -129,13 +129,97 @@ function ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::Barrier
     out
 end
 
+function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constraints::AbstractConstraintsFunction, initial_x::Array{T}, method::M, options::OptimizationOptions)
+    t0 = time() # Initial time stamp used to control early stopping by options.time_limit
+
+    state = initial_state(method, options, d, constraints, initial_x)
+
+    tr = OptimizationTrace{typeof(method)}()
+    tracing = options.store_trace || options.show_trace || options.extended_trace || options.callback != nothing
+    stopped, stopped_by_callback, stopped_by_time_limit = false, false, false
+
+    x_converged, f_converged = false, false
+    g_converged = vecnorm(state.g, Inf) < options.g_tol
+
+    converged = g_converged
+    iteration, iterationμ = 0, 0
+
+    options.show_trace && print_header(method)
+    trace!(tr, state, iteration, method, options)
+
+    while !converged && !stopped && iteration < options.iterations
+        iteration += 1
+        iterationμ += 1
+
+        update_state!(d, constraints, state, method) && break # it returns true if it's forced by something in update! to stop (eg dx_dg == 0.0 in BFGS)
+        update_asneeded_fg!(d, constraints, state, method)
+        x_converged, f_converged,
+        g_converged, converged = assess_convergence(state, options)
+
+        # If tracing, update trace with trace!. If a callback is provided, it
+        # should have boolean return value that controls the variable stopped_by_callback.
+        # This allows for early stopping controlled by the callback.
+        if tracing
+            stopped_by_callback = trace!(tr, state, iteration, method, options)
+        end
+
+        # Test whether we need to decrease the barrier penalty
+        if converged
+            if iterationμ > 1
+                # We did real work, so it's worth decreasing the barrier penalty further
+                shrink_μ!(d, constraints, state, method, options)
+                iterationμ = 0
+                converged = false
+            end
+        end
+
+        # We don't use the Hessian for anything if we have declared convergence,
+        # so we might as well not make the (expensive) update if converged == true
+        !converged && update_h!(d, constraints, state, method)
+
+        # Check time_limit; if none is provided it is NaN and the comparison
+        # will always return false.
+        stopped_by_time_limit = time()-t0 > options.time_limit ? true : false
+
+        # Combine the two, so see if the stopped flag should be changed to true
+        # and stop the while loop
+        stopped = stopped_by_callback || stopped_by_time_limit ? true : false
+    end # while
+
+    after_while!(d, constraints, state, method, options)
+
+    return MultivariateOptimizationResults(state.method_string,
+                                            initial_x,
+                                            state.x,
+                                            Float64(state.f_x),
+                                            iteration,
+                                            iteration == options.iterations,
+                                            x_converged,
+                                            options.x_tol,
+                                            f_converged,
+                                            options.f_tol,
+                                            g_converged,
+                                            options.g_tol,
+                                            tr,
+                                            state.f_calls,
+                                            state.g_calls,
+                                            state.h_calls)
+end
+
+# Fallbacks (for methods that don't need these)
+after_while!(d, constraints::AbstractConstraintsFunction, state, method, options) = nothing
+update_h!(d, constraints::AbstractConstraintsFunction, state, method) = nothing
+update_asneeded_fg!(d, constraints, state, method) = update_fg!(d, constraints, state, method)
+update_asneeded_fg!(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}) = update_g!(d, constraints, state, method)
+
+
 # Explicit solution for slack, λ when an inequality constraint is
 # "active." This is necessary (or at least helpful) when c-b == 0 due
 # to roundoff error, in which case the KKT equations don't have an
 # exact solution within the precision.  We punt on the ∂λ equation
 # (which reduces to the slack, which should be small anyway), and
 # focus on the ∂x and ∂slack equations (therefore setting slack and
-# λ). By setting these to their exact solutions, we blance the forces
+# λ). By setting these to their exact solutions, we balance the forces
 # due to the barrier.
 function solve_active_inequalities!(d, constraints, state)
     x, c, bstate, bounds = state.x, state.constr_c, state.bstate, constraints.bounds
@@ -154,7 +238,9 @@ function solve_active_inequalities!(d, constraints, state)
     Jact = view5(state.constr_J, ic, :)
     Cactive = [eye(eltype(Jx), nx, nx) Jx'; Jx Jact*Jact']
     pactive = [view(state.g, ix); Jact*state.g]
-    λactive = (Cactive\pactive).*[bounds.σx[bstate.active_x]; bounds.σc[bstate.active_c]]
+    Cactivep = cholfact(Positive, Cactive)
+    λactive = (Cactivep\pactive).*[bounds.σx[bstate.active_x]; bounds.σc[bstate.active_c]]
+    any(x->x<=0, λactive) && error("something may be wrong, λ is zero or negative. Perhaps Cactive is singular?")
     # Set the state
     k = set_active_params!(bstate.slack_x, bstate.λx, bstate.active_x, λactive, state.μ, 0)
     k = set_active_params!(bstate.slack_c, bstate.λc, bstate.active_c, λactive, state.μ, k)
@@ -227,7 +313,7 @@ function initialize_μ_λ!(λx, λc, bounds::ConstraintBounds, x, g, c, J, β=1/
     # Solve for μ
     λtilde = 1./Δb
     μden = dot(λtilde, JIg)
-    if μden == 0
+    if μden == 0 && !isempty(Δb)
         μden = maximum(abs(λtilde).*abs(JIg))*length(Δb)
     end
     μ = β*dot(Pg, Pg)/abs(μden)
@@ -243,9 +329,6 @@ function initialize_μ_λ!(λx, λc, bounds::ConstraintBounds, x, g, c, J, β=1/
 end
 initialize_μ_λ!(λx, λc, constraints::AbstractConstraintsFunction, x, g, c, J, args...) =
     initialize_μ_λ!(λx, λc, constraints.bounds, x, g, c, J, args...)
-# Fallbacks (for methods that don't need these)
-after_while!(d, constraints::AbstractConstraintsFunction, state, method, options) = nothing
-update_h!(d, constraints::AbstractConstraintsFunction, state, method) = nothing
 
 ## Computation of the Lagrangian and its gradient
 # This is in a parametrization that is also useful during linesearch
@@ -296,13 +379,27 @@ end
 
 # for line searches that don't use the gradient along the line
 function lagrangian_linefunc(α, d, constraints, state)
+    _lagrangian_linefunc(α, d, constraints, state)[2]
+end
+
+function _lagrangian_linefunc(α, d, constraints, state)
     b_ls = state.b_ls
     ls_update!(state.x_ls, state.x, state.s, α)
     ls_update!(b_ls.bstate, state.bstate, state.bstep, α)
     constraints.c!(state.x, b_ls.c)
-    lagrangian(d, constraints.bounds, state.x_ls, b_ls.c, b_ls.bstate, state.μ)[2]
+    lagrangian(d, constraints.bounds, state.x_ls, b_ls.c, b_ls.bstate, state.μ)
 end
 
+function lagrangian_linefunc!(α, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)})
+    # For backtrack_constrained, the last evaluation is the one we
+    # keep, so it's safe to store the results in state
+    f_x, L = _lagrangian_linefunc(α, d, constraints, state)
+    state.f_x = f_x
+    state.L = L
+    L
+end
+lagrangian_linefunc!(α, d, constraints, state, method) = lagrangian_linefunc(α, d, constraints, state)
+
 ## Computation of Lagrangian terms: barrier penalty
 """
     barrier_value(constraints, state) -> val
@@ -534,3 +631,8 @@ function estimate_maxstep(αmax, x, s)
     end
     αmax
 end
+
+function shrink_μ!(d, constraints, state, method, options)
+    state.μ *= options.μfactor
+    update_fg!(d, constraints, state, method)
+end
diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index 9a080dc25..2c19de36c 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -1,5 +1,5 @@
-immutable IPNewton <: IPOptimizer
-    linesearch!::Function
+immutable IPNewton{F} <: IPOptimizer{F}
+    linesearch!::F
 end
 
 IPNewton(; linesearch!::Function = backtrack_constrained) =
@@ -192,7 +192,7 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction
                             view(state.s, bounds.iz).*bounds.σz)
 
     # Determine the actual distance of movement along the search line
-    ϕ = α->lagrangian_linefunc(α, d, constraints, state)
+    ϕ = α->lagrangian_linefunc!(α, d, constraints, state, method)
     state.alpha, f_update, g_update =
         method.linesearch!(ϕ, T(1), αmax, qp)
     state.f_calls, state.g_calls = state.f_calls + f_update, state.g_calls + g_update
diff --git a/src/types.jl b/src/types.jl
index 3ee044f7b..9c3f67247 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -1,6 +1,9 @@
 abstract Optimizer
-abstract ConstrainedOptimizer <: Optimizer
-abstract IPOptimizer <: ConstrainedOptimizer
+abstract ConstrainedOptimizer{T} <: Optimizer
+abstract IPOptimizer{T} <: ConstrainedOptimizer  # interior point methods
+
+abstract AbstractOptimFunction
+
 immutable OptimizationOptions{TCallback <: Union{Void, Function}}
     x_tol::Float64
     f_tol::Float64
@@ -13,6 +16,7 @@ immutable OptimizationOptions{TCallback <: Union{Void, Function}}
     show_every::Int
     callback::TCallback
     time_limit::Float64
+    μfactor::Float64
 end
 
 function OptimizationOptions(;
@@ -26,7 +30,8 @@ function OptimizationOptions(;
         autodiff::Bool = false,
         show_every::Integer = 1,
         callback = nothing,
-        time_limit = NaN)
+        time_limit = NaN,
+        μfactor = 0.1)
     show_every = show_every > 0 ? show_every: 1
     if extended_trace && callback == nothing
         show_trace = true
@@ -34,7 +39,7 @@ function OptimizationOptions(;
     OptimizationOptions{typeof(callback)}(
         Float64(x_tol), Float64(f_tol), Float64(g_tol), Int(iterations),
         store_trace, show_trace, extended_trace, autodiff, Int(show_every),
-        callback, time_limit)
+        callback, time_limit, μfactor)
 end
 
 function print_header(options::OptimizationOptions)
@@ -47,6 +52,10 @@ function print_header(method::Optimizer)
         @printf "Iter     Function value   Gradient norm \n"
 end
 
+function print_header(method::IPOptimizer)
+        @printf "Iter     Lagrangian value Function value   Gradient norm    μ\n"
+end
+
 immutable OptimizationState{T <: Optimizer}
     iteration::Int
     value::Float64
@@ -92,17 +101,17 @@ type UnivariateOptimizationResults{T,M} <: OptimizationResults
     f_calls::Int
 end
 
-immutable NonDifferentiableFunction
+immutable NonDifferentiableFunction <: AbstractOptimFunction
     f::Function
 end
 
-immutable DifferentiableFunction
+immutable DifferentiableFunction <: AbstractOptimFunction
     f::Function
     g!::Function
     fg!::Function
 end
 
-immutable TwiceDifferentiableFunction
+immutable TwiceDifferentiableFunction <: AbstractOptimFunction
     f::Function
     g!::Function
     fg!::Function
@@ -119,6 +128,18 @@ function Base.show(io::IO, t::OptimizationState)
     return
 end
 
+function Base.show{M<:IPOptimizer}(io::IO, t::OptimizationState{M})
+    md = t.metadata
+    @printf io "%6d   %-14e   %-14e   %-14e   %-6.2e\n" t.iteration md["Lagrangian"] t.value t.g_norm md["μ"]
+    if !isempty(t.metadata)
+        for (key, value) in md
+            key ∈ ("Lagrangian", "μ") && continue
+            @printf io " * %s: %s\n" key value
+        end
+    end
+    return
+end
+
 function Base.show(io::IO, tr::OptimizationTrace)
     @printf io "Iter     Function value   Gradient norm \n"
     @printf io "------   --------------   --------------\n"
@@ -128,6 +149,15 @@ function Base.show(io::IO, tr::OptimizationTrace)
     return
 end
 
+function Base.show{M<:IPOptimizer}(io::IO, tr::OptimizationTrace{M})
+    @printf io "Iter     Lagrangian value Function value   Gradient norm    μ\n"
+    @printf io "------   ---------------- --------------   --------------   --------\n"
+    for state in tr
+        show(io, state)
+    end
+    return
+end
+
 function Base.show(io::IO, r::MultivariateOptimizationResults)
     @printf io "Results of Optimization Algorithm\n"
     @printf io " * Algorithm: %s\n" method(r)
diff --git a/src/utilities/trace.jl b/src/utilities/trace.jl
index cda25b3b7..0cb27427f 100644
--- a/src/utilities/trace.jl
+++ b/src/utilities/trace.jl
@@ -114,3 +114,24 @@ function trace!(tr, state, iteration, method::NewtonTrustRegion, options)
             options.show_every,
             options.callback)
 end
+
+function trace!(tr, state, iteration, method::IPOptimizer, options)
+    dt = Dict()
+    dt["Lagrangian"] = state.L
+    dt["μ"] = state.μ
+    if options.extended_trace
+        dt["x"] = copy(state.x)
+        dt["g(x)"] = copy(state.g)
+        dt["h(x)"] = copy(state.H)
+    end
+    g_norm = vecnorm(state.g, Inf)
+    update!(tr,
+            iteration,
+            state.f_x,
+            g_norm,
+            dt,
+            options.store_trace,
+            options.show_trace,
+            options.show_every,
+            options.callback)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 7f11d94a1..973122069 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -27,6 +27,7 @@ my_tests = [
     "brent.jl",
     "type_stability.jl",
     "array.jl",
+    "constraints.jl",
     "constrained.jl",
     "callbacks.jl",
     "precon.jl",

From 11b047017c97344daaaa2e7aeb349fe23f0ce2ff Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Tue, 8 Nov 2016 09:13:52 -0600
Subject: [PATCH 09/40] Add BaseTestNext to test/REQUIRE

---
 test/REQUIRE | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 test/REQUIRE

diff --git a/test/REQUIRE b/test/REQUIRE
new file mode 100644
index 000000000..94e516f56
--- /dev/null
+++ b/test/REQUIRE
@@ -0,0 +1 @@
+BaseTestNext

From 2cf1421a3792d6479b0916b05d194ee06f08e074 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Thu, 10 Nov 2016 03:54:08 -0600
Subject: [PATCH 10/40] Fix state bugs in linesearch and initialization

---
 src/interior.jl | 2 +-
 src/ipnewton.jl | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/interior.jl b/src/interior.jl
index a29be2319..5d6ec277f 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -386,7 +386,7 @@ function _lagrangian_linefunc(α, d, constraints, state)
     b_ls = state.b_ls
     ls_update!(state.x_ls, state.x, state.s, α)
     ls_update!(b_ls.bstate, state.bstate, state.bstep, α)
-    constraints.c!(state.x, b_ls.c)
+    constraints.c!(state.x_ls, b_ls.c)
     lagrangian(d, constraints.bounds, state.x_ls, b_ls.c, b_ls.bstate, state.μ)
 end
 
diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index 2c19de36c..12f9ce4c8 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -87,6 +87,8 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct
         stepf)
 
     state.μ = initialize_μ_λ!(bstate.λxE, bstate.λcE, constraints, initial_x, g, constr_c, constr_J)
+    bstate.λx[:] = μ./bstate.slack_x
+    bstate.λc[:] = μ./bstate.slack_c
     update_fg!(d, constraints, state, method)
     update_h!(d, constraints, state, method)
 end

From f4bb08a813aac3f6400e267f66e1b85388be03f0 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Fri, 11 Nov 2016 13:52:36 -0600
Subject: [PATCH 11/40] Add isfeasible and isinterior

---
 src/Optim.jl    |  2 ++
 src/interior.jl | 69 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)

diff --git a/src/Optim.jl b/src/Optim.jl
index 1820bf9f1..1edc0b52e 100644
--- a/src/Optim.jl
+++ b/src/Optim.jl
@@ -17,6 +17,8 @@ module Optim
            Base.setindex!
 
     export optimize,
+           isfeasible,
+           isinterior,
            DifferentiableFunction,
            TwiceDifferentiableFunction,
            DifferentiableConstraintsFunction,
diff --git a/src/interior.jl b/src/interior.jl
index 5d6ec277f..67e1abc35 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -582,6 +582,75 @@ function equality_grad_λ!(gλ, v, target, idx)
     nothing
 end
 
+"""
+    isfeasible(constraints, state) -> Bool
+    isfeasible(constraints, x, c) -> Bool
+    isfeasible(constraints, x) -> Bool
+    isfeasible(bounds, x, c) -> Bool
+
+Return `true` if point `x` is feasible, given the `constraints` which
+specify bounds `lx`, `ux`, `lc`, and `uc`. `x` is feasible if
+
+    lx[i] <= x[i] <= ux[i]
+    lc[i] <= c[i] <= uc[i]
+
+for all possible `i`.
+"""
+function isfeasible(bounds::ConstraintBounds, x, c)
+    isf = true
+    for (i,j) in enumerate(bounds.eqx)
+        isf &= x[j] == bounds.valx[i]
+    end
+    for (i,j) in enumerate(bounds.ineqx)
+        isf &= bounds.σx[i]*(x[j] - bounds.bx[i]) >= 0
+    end
+    for (i,j) in enumerate(bounds.iz)
+        isf &= bounds.σz[i]*x[j] >= 0
+    end
+    for (i,j) in enumerate(bounds.eqc)
+        isf &= c[j] == bounds.valc[i]
+    end
+    for (i,j) in enumerate(bounds.ineqc)
+        isf &= bounds.σc[i]*(c[j] - bounds.bc[i]) >= 0
+    end
+    isf
+end
+isfeasible(constraints, state::AbstractBarrierState) = isfeasible(constraints, state.x, state.constraints_c)
+isfeasible(constraints, x) = isfeasible(constraints, x, constraints.c!(x, Array{eltype(x)}(constraints.bounds.nc)))
+isfeasible(constraints::AbstractConstraintsFunction, x, c) = isfeasible(constraints.bounds, x, c)
+
+"""
+    isinterior(constraints, state) -> Bool
+    isinterior(constraints, x, c) -> Bool
+    isinterior(constraints, x) -> Bool
+    isinterior(bounds, x, c) -> Bool
+
+Return `true` if point `x` is on the interior of the allowed region,
+given the `constraints` which specify bounds `lx`, `ux`, `lc`, and
+`uc`. `x` is in the interior if
+
+    lx[i] < x[i] < ux[i]
+    lc[i] < c[i] < uc[i]
+
+for all possible `i`.
+"""
+function isinterior(bounds::ConstraintBounds, x, c)
+    isi = true
+    for (i,j) in enumerate(bounds.ineqx)
+        isi &= bounds.σx[i]*(x[j] - bounds.bx[i]) > 0
+    end
+    for (i,j) in enumerate(bounds.iz)
+        isi &= bounds.σz[i]*x[j] > 0
+    end
+    for (i,j) in enumerate(bounds.ineqc)
+        isi &= bounds.σc[i]*(c[j] - bounds.bc[i]) > 0
+    end
+    isi
+end
+isinterior(constraints, state::AbstractBarrierState) = isinterior(constraints, state.x, state.constraints_c)
+isinterior(constraints, x) = isinterior(constraints, x, constraints.c!(x, Array{eltype(x)}(constraints.bounds.nc)))
+isinterior(constraints::AbstractConstraintsFunction, x, c) = isinterior(constraints.bounds, x, c)
+
 ## Utilities for representing total state as single vector
 function pack_vec(x, b::BarrierStateVars)
     n = length(x)

From 4f5539991e3e0a7da6cc0c5a3e7d20b2472360b7 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Sat, 12 Nov 2016 07:32:29 -0600
Subject: [PATCH 12/40] Add more utilities and record more variables with
 extended_trace

---
 src/Optim.jl           |  1 +
 src/interior.jl        | 19 +++++++++++++++++++
 src/utilities/trace.jl |  3 +++
 3 files changed, 23 insertions(+)

diff --git a/src/Optim.jl b/src/Optim.jl
index 1edc0b52e..130ac822f 100644
--- a/src/Optim.jl
+++ b/src/Optim.jl
@@ -34,6 +34,7 @@ module Optim
            Fminbox,
            GoldenSection,
            GradientDescent,
+           IPNewton,
            LBFGS,
            MomentumGradientDescent,
            NelderMead,
diff --git a/src/interior.jl b/src/interior.jl
index 67e1abc35..01e070f87 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -61,6 +61,17 @@ Base.similar(bstate::BarrierStateVars) =
                      similar(bstate.λc),
                      similar(bstate.λcE))
 
+Base.copy(bstate::BarrierStateVars) =
+    BarrierStateVars(copy(bstate.slack_x),
+                     copy(bstate.slack_c),
+                     copy(bstate.active_x),
+                     copy(bstate.active_c),
+                     copy(bstate.λxE),
+                     copy(bstate.λx),
+                     copy(bstate.λc),
+                     copy(bstate.λcE))
+
+
 function Base.fill!(b::BarrierStateVars, val)
     fill!(b.slack_x, val)
     fill!(b.slack_c, val)
@@ -96,6 +107,14 @@ const bsv_seed = sizeof(UInt) == 64 ? 0x145b788192d1cde3 : 0x766a2810
 Base.hash(b::BarrierStateVars, u::UInt) =
     hash(b.λcE, hash(b.λc, hash(b.λx, hash(b.λxE, hash(b.slack_c, hash(b.slack_x, u+bsv_seed))))))
 
+function Base.dot(v::BarrierStateVars, w::BarrierStateVars)
+    dot(v.slack_x,w.slack_x) +
+        dot(v.slack_c, w.slack_c) +
+        dot(v.λxE, w.λxE) +
+        dot(v.λx, w.λx) +
+        dot(v.λc, w.λc) +
+        dot(v.λcE, w.λcE)
+end
 
 """
     BarrierLineSearch{T}
diff --git a/src/utilities/trace.jl b/src/utilities/trace.jl
index 0cb27427f..90a3b27e9 100644
--- a/src/utilities/trace.jl
+++ b/src/utilities/trace.jl
@@ -123,6 +123,9 @@ function trace!(tr, state, iteration, method::IPOptimizer, options)
         dt["x"] = copy(state.x)
         dt["g(x)"] = copy(state.g)
         dt["h(x)"] = copy(state.H)
+        dt["bstate"] = copy(state.bstate)
+        dt["bgrad"] = copy(state.bgrad)
+        dt["c"] = copy(state.constr_c)
     end
     g_norm = vecnorm(state.g, Inf)
     update!(tr,

From 0e8477798a685a76e1512f5505abd92eeae0071b Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Sat, 12 Nov 2016 11:15:57 -0600
Subject: [PATCH 13/40] =?UTF-8?q?Adopt=20exact=20updating=20of=20slack=20t?=
 =?UTF-8?q?erms=20and=20=CE=BBI=20during=20linesearch?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/interior.jl     | 50 +++++++++++++++++++++++++++++++++------------
 src/ipnewton.jl     | 14 +++++++------
 test/constraints.jl |  2 +-
 3 files changed, 46 insertions(+), 20 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index 01e070f87..c2b3df696 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -138,13 +138,38 @@ immutable BarrierLineSearchGrad{T}
     bgrad::BarrierStateVars{T}    # trial point's gradient
 end
 
-function ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, α)
-    ls_update!(out.slack_x, base.slack_x, step.slack_x, α)
-    ls_update!(out.slack_c, base.slack_c, step.slack_c, α)
+function ls_update!(out::BarrierStateVars, c, base::BarrierStateVars, step::BarrierStateVars, α, constraints, state, dslackc)
+    bounds = constraints.bounds
+    constraints.c!(state.x_ls, c)
+    xtarget = bounds.σx.*(state.x_ls[bounds.ineqx] - bounds.bx)
+    dslackx = bounds.σx.*state.s[bounds.ineqx]
+    ctarget = bounds.σc.*(c[bounds.ineqc] - bounds.bc)
+    ls_update!(out, base, step, α, state.μ, xtarget, dslackx, ctarget, dslackc)
+end
+
+function ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, α, μ, xtarget, dslackx, ctarget, dslackc)
     ls_update!(out.λxE, base.λxE, step.λxE, α)
-    ls_update!(out.λx, base.λx, step.λx, α)
-    ls_update!(out.λc, base.λc, step.λc, α)
     ls_update!(out.λcE, base.λcE, step.λcE, α)
+    # For the inequality terms, we use "exact" updating
+    _lsu_slack!(out.slack_x, xtarget, base.slack_x, dslackx, α)
+    _lsu_slack!(out.slack_c, ctarget, base.slack_c, dslackc, α)
+    _lsu_λ!(out.λx, out.slack_x, μ)
+    _lsu_λ!(out.λc, out.slack_c, μ)
+    out
+end
+function _lsu_slack!(out, target, slack, dslack, α)
+    for i = 1:length(out)
+        t = target[i]
+        # This handles the possible loss of precision at the boundary
+        # by using the gradient to extrapolate the change
+        out[i] = t != 0 ? t : slack[i]+α*dslack[i]
+    end
+    out
+end
+function _lsu_λ!(out, slack, μ)
+    for i = 1:length(out)
+        out[i] = μ/slack[i]
+    end
     out
 end
 
@@ -397,22 +422,21 @@ function lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds::ConstraintBounds, x
 end
 
 # for line searches that don't use the gradient along the line
-function lagrangian_linefunc(α, d, constraints, state)
-    _lagrangian_linefunc(α, d, constraints, state)[2]
+function lagrangian_linefunc(α, d, constraints, state, dslackc)
+    _lagrangian_linefunc(α, d, constraints, state, dslackc)[2]
 end
 
-function _lagrangian_linefunc(α, d, constraints, state)
-    b_ls = state.b_ls
+function _lagrangian_linefunc(α, d, constraints, state, dslackc)
+    b_ls, bounds = state.b_ls, constraints.bounds
     ls_update!(state.x_ls, state.x, state.s, α)
-    ls_update!(b_ls.bstate, state.bstate, state.bstep, α)
-    constraints.c!(state.x_ls, b_ls.c)
+    ls_update!(b_ls.bstate, b_ls.c, state.bstate, state.bstep, α, constraints, state, dslackc)
     lagrangian(d, constraints.bounds, state.x_ls, b_ls.c, b_ls.bstate, state.μ)
 end
 
-function lagrangian_linefunc!(α, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)})
+function lagrangian_linefunc!(α, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}, dslackc)
     # For backtrack_constrained, the last evaluation is the one we
     # keep, so it's safe to store the results in state
-    f_x, L = _lagrangian_linefunc(α, d, constraints, state)
+    f_x, L = _lagrangian_linefunc(α, d, constraints, state, dslackc)
     state.f_x = f_x
     state.L = L
     L
diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index 12f9ce4c8..fe602afe6 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -173,7 +173,7 @@ end
 
 function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction, state::IPNewtonState{T}, method::IPNewton)
     bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds
-    solve_step!(state, constraints)
+    state, dslackc = solve_step!(state, constraints)
     # If a step α=1 will not change any of the parameters, we can quit now.
     # This prevents a futile linesearch.
     if is_smaller_eps(state.x, state.s) &&
@@ -194,7 +194,7 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction
                             view(state.s, bounds.iz).*bounds.σz)
 
     # Determine the actual distance of movement along the search line
-    ϕ = α->lagrangian_linefunc!(α, d, constraints, state, method)
+    ϕ = α->lagrangian_linefunc!(α, d, constraints, state, method, dslackc)
     state.alpha, f_update, g_update =
         method.linesearch!(ϕ, T(1), αmax, qp)
     state.f_calls, state.g_calls = state.f_calls + f_update, state.g_calls + g_update
@@ -204,10 +204,10 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction
 
     # Update current position # x = x + alpha * s
     ls_update!(state.x, state.x, state.s, state.alpha)
-    ls_update!(bstate, bstate, bstep, state.alpha)
+    ls_update!(bstate, state.constr_c, bstate, bstep, state.alpha, constraints, state, dslackc)
 
     # Evaluate the constraints at the new position
-    constraints.c!(state.x, state.constr_c)
+#    constraints.c!(state.x, state.constr_c)  # already done in ls_update!
     constraints.jacobian!(state.x, state.constr_J)
 
     # Test for active inequalities, solve immediately for the corresponding s and λ
@@ -226,17 +226,19 @@ function solve_step!(state::IPNewtonState, constraints)
     k = unpack_vec!(bstep.λcE, step, k)
     k == length(step) || error("exhausted targets before step")
     # Solve for the slack variable and λI updates
+    # These are only used to estimate αmax, otherwise these are updated by exact formulas
     for (i, j) in enumerate(bounds.ineqx)
         bstep.slack_x[i] = -bgrad.λx[i] + bounds.σx[i]*s[j]
         bstep.λx[i] = -bgrad.slack_x[i] - μ*bstep.slack_x[i]/bstate.slack_x[i]^2
     end
     JI = view5(state.constr_J, bounds.ineqc, :)
-    bstep.slack_c[:] = -bgrad.λc + Diagonal(bounds.σc)*JI*s
+    dslackc = Diagonal(bounds.σc)*JI*s
+    bstep.slack_c[:] = -bgrad.λc + dslackc
     for i = 1:length(bstep.λc)
         bstep.λc[i] = -bgrad.slack_c[i] - μ*bstep.slack_c[i]/bstate.slack_c[i]^2
     end
     state.stepf = step
-    state
+    state, dslackc
 end
 
 function is_smaller_eps(ref, step)
diff --git a/test/constraints.jl b/test/constraints.jl
index 0509b1f9e..6f2296953 100644
--- a/test/constraints.jl
+++ b/test/constraints.jl
@@ -299,7 +299,7 @@ ConstraintBounds:
         bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds
         αmax = Optim.estimate_maxstep(Inf, state.x[bounds.iz].*bounds.σz,
                                       state.s[bounds.iz].*bounds.σz)
-        ϕ = α->Optim.lagrangian_linefunc(α, d, constraints, state)
+        ϕ = α->Optim.lagrangian_linefunc(α, d, constraints, state, Float64[])
         @test ϕ(0) ≈ qp[1]
         α, nf, ng = method.linesearch!(ϕ, 1.0, αmax, qp)
         @test α > 1e-3

From b4683bee4315880a15e2aa8f1e0e018030b1d02e Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Sat, 12 Nov 2016 11:16:49 -0600
Subject: [PATCH 14/40] Update f_x_previous; use safer inversion

---
 src/ipnewton.jl | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index fe602afe6..7289e605f 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -172,6 +172,7 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state
 end
 
 function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction, state::IPNewtonState{T}, method::IPNewton)
+    state.f_x_previous = state.f_x
     bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds
     state, dslackc = solve_step!(state, constraints)
     # If a step α=1 will not change any of the parameters, we can quit now.
@@ -218,7 +219,12 @@ end
 
 function solve_step!(state::IPNewtonState, constraints)
     # Solve the Newton step
-    step = -(state.Hf\state.gf)  # do *not* force posdef
+    local step
+    try
+        step = -(state.Hf\state.gf)  # do *not* force posdef
+    catch
+        step = -(svdfact(state.Hf)\state.gf)
+    end
     x, s, μ, bounds = state.x, state.s, state.μ, constraints.bounds
     bstate, bstep, bgrad = state.bstate, state.bstep, state.bgrad
     k = unpack_vec!(s, step, 0)

From cada26427457fdd1c3e6d19902482dc4bfa6b579 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Sat, 12 Nov 2016 11:16:07 -0600
Subject: [PATCH 15/40] Skip solve_active_inequalities; it shouldn't be
 necessary now.

But we can't expect perfect slack precision.
---
 src/ipnewton.jl     | 2 +-
 test/constraints.jl | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index 7289e605f..597f5f419 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -212,7 +212,7 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction
     constraints.jacobian!(state.x, state.constr_J)
 
     # Test for active inequalities, solve immediately for the corresponding s and λ
-    solve_active_inequalities!(d, constraints, state)
+    # solve_active_inequalities!(d, constraints, state)
 
     false
 end
diff --git a/test/constraints.jl b/test/constraints.jl
index 6f2296953..019cc9bdb 100644
--- a/test/constraints.jl
+++ b/test/constraints.jl
@@ -342,7 +342,7 @@ ConstraintBounds:
                 Optim.update_h!(d, constraints, state, method)
             end
             @test state.x[1] == σ
-            @test state.bstate.slack_x[1] ≈ μ/abs(F)
+            @test state.bstate.slack_x[1] < eps(float(σ))
             # x >= 1 using the linear/nonlinear constraints
             d = TwiceDifferentiableFunction(x->F*(x[1]-σ), (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0))
             constraints = TwiceDifferentiableConstraintsFunction(
@@ -358,7 +358,7 @@ ConstraintBounds:
                 Optim.update_h!(d, constraints, state, method)
             end
             @test state.x[1] == σ
-            @test state.bstate.slack_c[1] ≈ μ/abs(F)
+            @test state.bstate.slack_c[1] < eps(float(σ))
         end
     end
 end

From 16d7ac06bc3c9245d2fc794a1fdc7318b9e504c0 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Sun, 13 Nov 2016 05:16:14 -0600
Subject: [PATCH 16/40] Restrict one-sided function-value convergence to
 monotonic methods

Also require the function-value criterion to be satisfied on successive iterations, to ensure that constrained optimization doesn't satisfy it spuriously
---
 src/interior.jl                     | 11 ++++++++++-
 src/optimize.jl                     | 22 ++++++++++++++++------
 src/types.jl                        |  8 +++++---
 src/utilities/assess_convergence.jl | 11 +++++++++--
 4 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index c2b3df696..0bc0423a0 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -182,7 +182,7 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai
     tracing = options.store_trace || options.show_trace || options.extended_trace || options.callback != nothing
     stopped, stopped_by_callback, stopped_by_time_limit = false, false, false
 
-    x_converged, f_converged = false, false
+    x_converged, f_converged, counter_f_tol = false, false, 0
     g_converged = vecnorm(state.g, Inf) < options.g_tol
 
     converged = g_converged
@@ -199,6 +199,15 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai
         update_asneeded_fg!(d, constraints, state, method)
         x_converged, f_converged,
         g_converged, converged = assess_convergence(state, options)
+        # With equality constraints, optimization is not necessarily
+        # monotonic in the value of the function. If the function
+        # change is approximately canceled by a change in the equality
+        # violation, it's possible to spuriously satisfy the f_tol
+        # criterion. Consequently, we require that the f_tol condition
+        # be satisfied a certain number of times in a row before
+        # declaring convergence.
+        counter_f_tol = f_converged ? counter_f_tol+1 : 0
+        converged = x_converged | g_converged | (counter_f_tol > options.successive_f_tol)
 
         # If tracing, update trace with trace!. If a callback is provided, it
         # should have boolean return value that controls the variable stopped_by_callback.
diff --git a/src/optimize.jl b/src/optimize.jl
index 0220166ad..7283b331b 100644
--- a/src/optimize.jl
+++ b/src/optimize.jl
@@ -9,6 +9,7 @@ function optimize(f::Function,
                   x_tol::Real = 1e-32,
                   f_tol::Real = 1e-32,
                   g_tol::Real = 1e-8,
+                  successive_f_tol::Integer = 2,
                   iterations::Integer = 1_000,
                   store_trace::Bool = false,
                   show_trace::Bool = false,
@@ -17,7 +18,7 @@ function optimize(f::Function,
                   autodiff::Bool = false,
                   callback = nothing)
     options = OptimizationOptions(;
-        x_tol = x_tol, f_tol = f_tol, g_tol = g_tol,
+        x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, successive_f_tol = successive_f_tol,
         iterations = iterations, store_trace = store_trace,
         show_trace = show_trace, extended_trace = extended_trace,
         callback = callback, show_every = show_every,
@@ -32,6 +33,7 @@ function optimize(f::Function,
                   x_tol::Real = 1e-32,
                   f_tol::Real = 1e-32,
                   g_tol::Real = 1e-8,
+                  successive_f_tol::Integer = 2,
                   iterations::Integer = 1_000,
                   store_trace::Bool = false,
                   show_trace::Bool = false,
@@ -39,7 +41,7 @@ function optimize(f::Function,
                   show_every::Integer = 1,
                   callback = nothing)
     options = OptimizationOptions(;
-        x_tol = x_tol, f_tol = f_tol, g_tol = g_tol,
+        x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, successive_f_tol = successive_f_tol,
         iterations = iterations, store_trace = store_trace,
         show_trace = show_trace, extended_trace = extended_trace,
         callback = callback, show_every = show_every)
@@ -54,6 +56,7 @@ function optimize(f::Function,
                   x_tol::Real = 1e-32,
                   f_tol::Real = 1e-32,
                   g_tol::Real = 1e-8,
+                  successive_f_tol::Integer = 2,
                   iterations::Integer = 1_000,
                   store_trace::Bool = false,
                   show_trace::Bool = false,
@@ -61,7 +64,7 @@ function optimize(f::Function,
                   show_every::Integer = 1,
                   callback = nothing)
     options = OptimizationOptions(;
-        x_tol = x_tol, f_tol = f_tol, g_tol = g_tol,
+        x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, successive_f_tol = successive_f_tol,
         iterations = iterations, store_trace = store_trace,
         show_trace = show_trace, extended_trace = extended_trace,
         callback = callback, show_every = show_every)
@@ -74,6 +77,7 @@ function optimize(d::DifferentiableFunction,
                   x_tol::Real = 1e-32,
                   f_tol::Real = 1e-32,
                   g_tol::Real = 1e-8,
+                  successive_f_tol::Integer = 2,
                   iterations::Integer = 1_000,
                   store_trace::Bool = false,
                   show_trace::Bool = false,
@@ -81,7 +85,7 @@ function optimize(d::DifferentiableFunction,
                   show_every::Integer = 1,
                   callback = nothing)
     options = OptimizationOptions(;
-        x_tol = x_tol, f_tol = f_tol, g_tol = g_tol,
+        x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, successive_f_tol = successive_f_tol,
         iterations = iterations, store_trace = store_trace,
         show_trace = show_trace, extended_trace = extended_trace,
         callback = callback, show_every = show_every)
@@ -94,6 +98,7 @@ function optimize(d::TwiceDifferentiableFunction,
                   x_tol::Real = 1e-32,
                   f_tol::Real = 1e-32,
                   g_tol::Real = 1e-8,
+                  successive_f_tol::Integer = 2,
                   iterations::Integer = 1_000,
                   store_trace::Bool = false,
                   show_trace::Bool = false,
@@ -101,7 +106,7 @@ function optimize(d::TwiceDifferentiableFunction,
                   show_every::Integer = 1,
                   callback = nothing)
     options = OptimizationOptions(;
-        x_tol = x_tol, f_tol = f_tol, g_tol = g_tol,
+        x_tol = x_tol, f_tol = f_tol, g_tol = g_tol, successive_f_tol = successive_f_tol,
         iterations = iterations, store_trace = store_trace,
         show_trace = show_trace, extended_trace = extended_trace,
         callback = callback, show_every = show_every)
@@ -220,7 +225,7 @@ function optimize{T, M<:Optimizer}(d, initial_x::Array{T}, method::M, options::O
     tracing = options.store_trace || options.show_trace || options.extended_trace || options.callback != nothing
     stopped, stopped_by_callback, stopped_by_time_limit = false, false, false
 
-    x_converged, f_converged = false, false
+    x_converged, f_converged, counter_f_tol = false, false, 0
     g_converged = if typeof(method) <: NelderMead
         nmobjective(state.f_simplex, state.m, state.n) < options.g_tol
     elseif  typeof(method) <: ParticleSwarm || typeof(method) <: SimulatedAnnealing
@@ -242,6 +247,11 @@ function optimize{T, M<:Optimizer}(d, initial_x::Array{T}, method::M, options::O
         update_g!(d, state, method)
         x_converged, f_converged,
         g_converged, converged = assess_convergence(state, options)
+        # See optimize in interior.jl for an explanation of the next
+        # two lines (given the existence of the option, we'd better
+        # use it here too)
+        counter_f_tol = f_converged ? counter_f_tol+1 : 0
+        converged = x_converged | g_converged | (counter_f_tol > options.successive_f_tol)
         # We don't use the Hessian for anything if we have declared convergence,
         # so we might as well not make the (expensive) update if converged == true
         !converged && update_h!(d, state, method)
diff --git a/src/types.jl b/src/types.jl
index 9c3f67247..d348e7211 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -8,6 +8,7 @@ immutable OptimizationOptions{TCallback <: Union{Void, Function}}
     x_tol::Float64
     f_tol::Float64
     g_tol::Float64
+    successive_f_tol::Int
     iterations::Int
     store_trace::Bool
     show_trace::Bool
@@ -23,6 +24,7 @@ function OptimizationOptions(;
         x_tol::Real = 1e-32,
         f_tol::Real = 1e-32,
         g_tol::Real = 1e-8,
+        successive_f_tol::Integer = 2,
         iterations::Integer = 1_000,
         store_trace::Bool = false,
         show_trace::Bool = false,
@@ -37,9 +39,9 @@ function OptimizationOptions(;
         show_trace = true
     end
     OptimizationOptions{typeof(callback)}(
-        Float64(x_tol), Float64(f_tol), Float64(g_tol), Int(iterations),
-        store_trace, show_trace, extended_trace, autodiff, Int(show_every),
-        callback, time_limit, μfactor)
+        Float64(x_tol), Float64(f_tol), Float64(g_tol), Int(successive_f_tol),
+        Int(iterations), store_trace, show_trace, extended_trace, autodiff,
+        Int(show_every), callback, time_limit, μfactor)
 end
 
 function print_header(options::OptimizationOptions)
diff --git a/src/utilities/assess_convergence.jl b/src/utilities/assess_convergence.jl
index e2b284f6f..6e72fa820 100644
--- a/src/utilities/assess_convergence.jl
+++ b/src/utilities/assess_convergence.jl
@@ -15,7 +15,7 @@ function assess_convergence(x::Array,
     # Absolute Tolerance
     # if abs(f_x - f_x_previous) < f_tol
     # Relative Tolerance
-    if abs(f_x - f_x_previous) / (abs(f_x) + f_tol) < f_tol || nextfloat(f_x) >= f_x_previous
+    if abs(f_x - f_x_previous) < min(f_tol * (abs(f_x) + f_tol), eps(abs(f_x)+abs(f_x_previous)))
         f_converged = true
     end
 
@@ -39,7 +39,7 @@ function assess_convergence(state, options)
     # Absolute Tolerance
     # if abs(f_x - f_x_previous) < f_tol
     # Relative Tolerance
-    if abs(state.f_x - state.f_x_previous) / (abs(state.f_x) + options.f_tol) < options.f_tol || nextfloat(state.f_x) >= state.f_x_previous
+    if abs(state.f_x - state.f_x_previous) < min(options.f_tol * (abs(state.f_x) + options.f_tol), eps(abs(state.f_x)+abs(state.f_x_previous))) || fconverged(state)
         f_converged = true
     end
 
@@ -79,6 +79,13 @@ function assess_convergence(state::NewtonTrustRegionState, options)
                                        options.x_tol,
                                        options.f_tol,
                                        options.g_tol)
+        f_converged = fconverged(state)
+        converged |= f_converged
     end
     x_converged, f_converged, g_converged, converged
 end
+
+# For monotonic-decreasing problems
+fconverged(state) = nextfloat(state.f_x) >= state.f_x_previous
+# Constrained problems are not monotonic, so we can't add a one-sided criterion
+fconverged(state::IPNewtonState) = false

From 2eb3e636d25de5f943230a0b664be52b054e7ef9 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Mon, 14 Nov 2016 16:41:17 -0600
Subject: [PATCH 17/40] More robust isinterior/isfeasible

---
 src/interior.jl | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index 0bc0423a0..4a1e75c54 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -668,7 +668,12 @@ function isfeasible(bounds::ConstraintBounds, x, c)
     isf
 end
 isfeasible(constraints, state::AbstractBarrierState) = isfeasible(constraints, state.x, state.constraints_c)
-isfeasible(constraints, x) = isfeasible(constraints, x, constraints.c!(x, Array{eltype(x)}(constraints.bounds.nc)))
+function isfeasible(constraints, x)
+    # don't assume c! returns c (which means this is a little more awkward)
+    c = Array{eltype(x)}(constraints.bounds.nc)
+    constraints.c!(x, c)
+    isfeasible(constraints, x, c)
+end
 isfeasible(constraints::AbstractConstraintsFunction, x, c) = isfeasible(constraints.bounds, x, c)
 
 """
@@ -700,7 +705,11 @@ function isinterior(bounds::ConstraintBounds, x, c)
     isi
 end
 isinterior(constraints, state::AbstractBarrierState) = isinterior(constraints, state.x, state.constraints_c)
-isinterior(constraints, x) = isinterior(constraints, x, constraints.c!(x, Array{eltype(x)}(constraints.bounds.nc)))
+function isinterior(constraints, x)
+    c = Array{eltype(x)}(constraints.bounds.nc)
+    constraints.c!(x, c)
+    isinterior(constraints, x, c)
+end
 isinterior(constraints::AbstractConstraintsFunction, x, c) = isinterior(constraints.bounds, x, c)
 
 ## Utilities for representing total state as single vector

From b5366380448f104f245971b6ca3186ad5083eac5 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Mon, 14 Nov 2016 19:34:49 -0600
Subject: [PATCH 18/40] Check finiteness in linesearch and eliminate eps
 component

---
 src/iplinesearch.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/iplinesearch.jl b/src/iplinesearch.jl
index c0343b5b1..d37d576ce 100644
--- a/src/iplinesearch.jl
+++ b/src/iplinesearch.jl
@@ -1,15 +1,15 @@
 function backtrack_constrained(ϕ, α, αmax, Lcoefsα,
-                               c1 = 0.5, ρ=oftype(α, 0.5), itermax = 100)
+                               c1 = 0.5, ρ=oftype(α, 0.5), αmin = sqrt(eps(one(α))))
     α = min(α, 0.999*αmax)
     L0, L1, L2 = Lcoefsα
     f_calls = 0
-    while f_calls < itermax
+    while α >= αmin
         f_calls += 1
         val = ϕ(α)
-        if abs(val - (L0 + L1*α + L2*α^2/2)) <= c1*abs(val-L0) + 100*eps(abs(val)+abs(L0))
+        if isfinite(val) && abs(val - (L0 + L1*α + L2*α^2/2)) <= c1*abs(val-L0)
             return α, f_calls, 0
         end
         α *= ρ
     end
-    error("failed to satisfy criterion after $f_calls iterations")
+    return zero(α), f_calls, 0
 end

From c3ca54b5356f6ba4f0f92c9029ed2c37975a7705 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Mon, 14 Nov 2016 19:36:35 -0600
Subject: [PATCH 19/40] Allow mu decrement based on sufficient gradient
 decrease or lack of progress on f

---
 src/interior.jl | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index 4a1e75c54..76a332a41 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -116,6 +116,12 @@ function Base.dot(v::BarrierStateVars, w::BarrierStateVars)
         dot(v.λcE, w.λcE)
 end
 
+function Base.vecnorm(b::BarrierStateVars, p::Real)
+    vecnorm(b.slack_x, p) + vecnorm(b.slack_c, p) +
+        vecnorm(b.λx, p) + vecnorm(b.λc, p) +
+        vecnorm(b.λxE, p) + vecnorm(b.λcE, p)
+end
+
 """
     BarrierLineSearch{T}
 
@@ -183,7 +189,8 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai
     stopped, stopped_by_callback, stopped_by_time_limit = false, false, false
 
     x_converged, f_converged, counter_f_tol = false, false, 0
-    g_converged = vecnorm(state.g, Inf) < options.g_tol
+    gnorm = vecnorm(state.g, Inf) + vecnorm(state.bgrad, Inf)
+    g_converged = gnorm < options.g_tol
 
     converged = g_converged
     iteration, iterationμ = 0, 0
@@ -191,6 +198,8 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai
     options.show_trace && print_header(method)
     trace!(tr, state, iteration, method, options)
 
+    Δfmax = zero(state.f_x)
+
     while !converged && !stopped && iteration < options.iterations
         iteration += 1
         iterationμ += 1
@@ -208,6 +217,7 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai
         # declaring convergence.
         counter_f_tol = f_converged ? counter_f_tol+1 : 0
         converged = x_converged | g_converged | (counter_f_tol > options.successive_f_tol)
+        gnormnew = vecnorm(state.g, Inf) + vecnorm(state.bgrad, Inf)
 
         # If tracing, update trace with trace!. If a callback is provided, it
         # should have boolean return value that controls the variable stopped_by_callback.
@@ -216,15 +226,21 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai
             stopped_by_callback = trace!(tr, state, iteration, method, options)
         end
 
+        Δf = abs(state.f_x - state.f_x_previous)
+        Δfmax = max(Δfmax, abs(state.f_x - state.f_x_previous))
+
         # Test whether we need to decrease the barrier penalty
-        if converged
-            if iterationμ > 1
-                # We did real work, so it's worth decreasing the barrier penalty further
-                shrink_μ!(d, constraints, state, method, options)
-                iterationμ = 0
-                converged = false
-            end
+        if iterationμ > 1 && (converged || 100*gnormnew < gnorm || 100*Δf < Δfmax)
+            # Since iterationμ > 1 we must have accomplished real
+            # work, so it's worth trying to decrease the barrier
+            # penalty further.
+            shrink_μ!(d, constraints, state, method, options)
+            iterationμ = 0
+            converged = false
+            gnormnew = oftype(gnormnew, NaN)
+            Δfmax = zero(Δfmax)
         end
+        gnorm = gnormnew
 
         # We don't use the Hessian for anything if we have declared convergence,
         # so we might as well not make the (expensive) update if converged == true

From c374b8101dda8ba4a4166d2dc49e7f2aa77893ab Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 16 Nov 2016 06:33:15 -0600
Subject: [PATCH 20/40] Trace alpha too

---
 src/utilities/trace.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/utilities/trace.jl b/src/utilities/trace.jl
index 90a3b27e9..3c11f1629 100644
--- a/src/utilities/trace.jl
+++ b/src/utilities/trace.jl
@@ -120,6 +120,7 @@ function trace!(tr, state, iteration, method::IPOptimizer, options)
     dt["Lagrangian"] = state.L
     dt["μ"] = state.μ
     if options.extended_trace
+        dt["α"] = state.alpha
         dt["x"] = copy(state.x)
         dt["g(x)"] = copy(state.g)
         dt["h(x)"] = copy(state.H)

From 5c0241c9b1f000f15d55f023e687671c60f9fda7 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Mon, 14 Nov 2016 19:38:19 -0600
Subject: [PATCH 21/40] Support manually-supplied mu0 and fix a bug in
 initialization

---
 src/interior.jl | 19 ++++++++++++-------
 src/ipnewton.jl |  6 +++---
 src/types.jl    |  6 ++++--
 3 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index 76a332a41..ecf4d15ca 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -338,7 +338,7 @@ function set_active_params!(slack, λ, active, λtarget, μ, k)
 end
 
 """
-    initialize_μ_λE!(λxE, λcE, constraints, x, g, constr_c, constr_J, β=0.01) -> μ
+    initialize_μ_λE!(λxE, λcE, constraints, x, g, constr_c, constr_J, μ0=:auto, β=0.01) -> μ
 
 Pick μ and λ to ensure that the equality constraints are satisfied
 locally, and that the initial gradient including the barrier would be
@@ -356,9 +356,9 @@ constraints evaluated at `x`. `β` (optional) specifies the fraction of
 the objective's gradient that may be diminished by the barrier.
 
 In addition to setting `λxE` and `λcE`, this returns `μ`, the value of
-the barrier penalty.
+the barrier penalty. You can manually specify μ by supplying μ0.
 """
-function initialize_μ_λ!(λx, λc, bounds::ConstraintBounds, x, g, c, J, β=1//100)
+function initialize_μ_λ!(λx, λc, bounds::ConstraintBounds, x, g, c, J, μ0, β=1//100)
     length(c) + length(bounds.iz) + length(bounds.ineqx) == 0 && return zero(eltype(x))
     # Calculate the projection matrix
     JEx = zeros(eltype(J), length(bounds.eqx), length(x))
@@ -371,7 +371,6 @@ function initialize_μ_λ!(λx, λc, bounds::ConstraintBounds, x, g, c, J, β=1/
     CEc = cholfact(Positive, CE)
     Pg = g - JE'*(CEc \ (JE*g)) # the projected gradient of the objective (orthog to all == constr.)
     # Calculate the barrier deviation and projection onto inequality normals
-    Δb = [x[bounds.iz]; x[bounds.ineqx] - bounds.bx; c[bounds.ineqc] - bounds.bc]
     JIx = zeros(eltype(J), length(bounds.iz)+length(bounds.ineqx), length(x))
     for (i,j) in enumerate([bounds.iz; bounds.ineqx])
         JIx[i,j] = 1
@@ -380,15 +379,21 @@ function initialize_μ_λ!(λx, λc, bounds::ConstraintBounds, x, g, c, J, β=1/
     JI = vcat(JIx, JIc)
     JIg = JI*Pg
     # Solve for μ
-    λtilde = 1./Δb
-    μden = dot(λtilde, JIg)
+    # Δb = [bounds.σz.*x[bounds.iz]; bounds.σx.*(x[bounds.ineqx] - bounds.bx); bounds.σc.*(c[bounds.ineqc] - bounds.bc)]
+    Δb = [x[bounds.iz]; x[bounds.ineqx] - bounds.bx; c[bounds.ineqc] - bounds.bc]
+    σ = [bounds.σz; bounds.σx; bounds.σc]
+    λtilde = σ./Δb
+    μden = dot(σ.*λtilde, JIg)
     if μden == 0 && !isempty(Δb)
         μden = maximum(abs(λtilde).*abs(JIg))*length(Δb)
     end
     μ = β*dot(Pg, Pg)/abs(μden)
     μ = μden != 0 ? μ : oftype(μ, 1)
+    if μ0 != :auto
+        μ = μ0
+    end
     # Solve for λE
-    gb = g - μ*(JI'*λtilde)
+    gb = g - μ*(JI'*(σ.*λtilde))
     Pgb = gb - JE'*(CEc \ (JE*gb))
     λE = CEc \ (JE*Pgb)
     k = unpack_vec!(λx, λE, 0)
diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index 597f5f419..e75223762 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -86,9 +86,9 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct
         Hf,
         stepf)
 
-    state.μ = initialize_μ_λ!(bstate.λxE, bstate.λcE, constraints, initial_x, g, constr_c, constr_J)
-    bstate.λx[:] = μ./bstate.slack_x
-    bstate.λc[:] = μ./bstate.slack_c
+    state.μ = initialize_μ_λ!(bstate.λxE, bstate.λcE, constraints, initial_x, g, constr_c, constr_J, options.μ0)
+    bstate.λx[:] = state.μ./bstate.slack_x
+    bstate.λc[:] = state.μ./bstate.slack_c
     update_fg!(d, constraints, state, method)
     update_h!(d, constraints, state, method)
 end
diff --git a/src/types.jl b/src/types.jl
index d348e7211..be5949460 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -18,6 +18,7 @@ immutable OptimizationOptions{TCallback <: Union{Void, Function}}
     callback::TCallback
     time_limit::Float64
     μfactor::Float64
+    μ0
 end
 
 function OptimizationOptions(;
@@ -33,7 +34,8 @@ function OptimizationOptions(;
         show_every::Integer = 1,
         callback = nothing,
         time_limit = NaN,
-        μfactor = 0.1)
+        μfactor = 0.1,
+        μ0 = :auto)
     show_every = show_every > 0 ? show_every: 1
     if extended_trace && callback == nothing
         show_trace = true
@@ -41,7 +43,7 @@ function OptimizationOptions(;
     OptimizationOptions{typeof(callback)}(
         Float64(x_tol), Float64(f_tol), Float64(g_tol), Int(successive_f_tol),
         Int(iterations), store_trace, show_trace, extended_trace, autodiff,
-        Int(show_every), callback, time_limit, μfactor)
+        Int(show_every), callback, time_limit, μfactor, μ0)
 end
 
 function print_header(options::OptimizationOptions)

From 61d68a13ad177be5bff57d0afb68a8c967af1de1 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 16 Nov 2016 06:33:03 -0600
Subject: [PATCH 22/40] =?UTF-8?q?Improve=20initialization=20of=20=CE=BC,?=
 =?UTF-8?q?=20=CE=BB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This corrects some errors in the handling of equality constraints, and also ensure that:
- we approximately match the gradient (don't just make a descent direction)
- the hessian (when available) is also relatively unperturbed along the direction of the initial gradient.
---
 src/interior.jl     | 241 +++++++++++++++++++++++++++++++++-----------
 src/ipnewton.jl     |  35 +++----
 src/types.jl        |  29 +++++-
 test/constraints.jl |  83 ++++++++++++++-
 4 files changed, 308 insertions(+), 80 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index ecf4d15ca..cd1e56b8b 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -51,6 +51,12 @@ function setslack!(slack, active, v, ineq, σ, b)
     slack
 end
 
+slack(bstate::BarrierStateVars) = [bstate.slack_x; bstate.slack_c]
+lambdaI(bstate::BarrierStateVars) = [bstate.λx; bstate.λc]
+lambdaE(bstate::BarrierStateVars) = [bstate.λxE; bstate.λcE]
+lambdaI(state::AbstractBarrierState) = lambdaI(state.bstate)
+lambdaE(state::AbstractBarrierState) = lambdaE(state.bstate)
+
 Base.similar(bstate::BarrierStateVars) =
     BarrierStateVars(similar(bstate.slack_x),
                      similar(bstate.slack_c),
@@ -338,71 +344,182 @@ function set_active_params!(slack, λ, active, λtarget, μ, k)
 end
 
 """
-    initialize_μ_λE!(λxE, λcE, constraints, x, g, constr_c, constr_J, μ0=:auto, β=0.01) -> μ
+    initialize_μ_λ!(state, bounds, μ0=:auto, β=0.01)
+    initialize_μ_λ!(state, bounds, (Hobj,HcI), μ0=:auto, β=0.01)
 
 Pick μ and λ to ensure that the equality constraints are satisfied
-locally, and that the initial gradient including the barrier would be
-a descent direction for the problem without the barrier (μ = 0). This
-ensures that the search isn't pushed out of the basin of the
-user-supplied initial guess.
-
-`λv` and `λc` are the Lagrange multipliers for the variables and extra
-(non-variable) constraints; these are pre-allocated storage for the
-output, and their input values are not used. `constraints` is an
-`AbstractConstraintsFunction`, `x` is the position (must be a feasible
-interior point), `g` is the gradient of the objective at `x`, and
-`constr_c` and `constr_J` contain the values and Jacobian of the extra
-constraints evaluated at `x`. `β` (optional) specifies the fraction of
-the objective's gradient that may be diminished by the barrier.
-
-In addition to setting `λxE` and `λcE`, this returns `μ`, the value of
-the barrier penalty. You can manually specify μ by supplying μ0.
+locally (at the current `state.x`), and that the initial gradient
+including the barrier would be a descent direction for the problem
+without the barrier (μ = 0). This ensures that the search isn't pushed
+out of the basin of the user-supplied initial guess.
+
+Upon entry, the objective function gradient, constraint values, and
+constraint jacobian must be set in `state.g`, `state.c`, and `state.J`
+respectively. If you also wish to ensure that the projection of
+Hessian is minimally-perturbed along the initial gradient, supply the
+hessian of the objective (`Hobj`) and
+
+    HcI = ∑_i (σ_i/s_i)∇∇ c_{Ii}
+
+for the constraints. This can be obtained as
+
+    HcI = hessianI(state.x, constraints, 1./state.slack_c)
+
+You can manually specify `μ` by supplying a numerical value for
+`μ0`. Whether calculated algorithmically or specified manually, the
+values of `λ` are set using the chosen `μ`.
 """
-function initialize_μ_λ!(λx, λc, bounds::ConstraintBounds, x, g, c, J, μ0, β=1//100)
-    length(c) + length(bounds.iz) + length(bounds.ineqx) == 0 && return zero(eltype(x))
-    # Calculate the projection matrix
-    JEx = zeros(eltype(J), length(bounds.eqx), length(x))
-    for (i,j) in enumerate(bounds.eqx)
-        JEx[i,j] = 1
-    end
+function initialize_μ_λ!(state, bounds::ConstraintBounds, Hinfo, μ0::Union{Symbol,Number}, β=1//100)
+    if nconstraints(bounds) == 0 && nconstraints_x(bounds) == 0
+        state.μ = 0
+        fill!(state.bstate, 0)
+        return state
+    end
+    gf = state.g  # must be pre-set to ∇f
+    # Calculate projection of ∇f into the subspace spanned by the
+    # equality constraint Jacobian
+    JE = jacobianE(state, bounds)
+    # QRF = qrfact(JE)
+    # Q = QRF[:Q]
+    # PEg = Q'*(Q*gf)   # in the subspace of JE
+    C = JE*JE'
+    Cc = cholfact(Positive, C)
+    Pperpg = gf-JE'*(Cc \ (JE*gf))   # in the nullspace of JE
+    # Set μ
+    JI = jacobianI(state, bounds)
+    xzi = xzinv(state.x, bounds)
+    if μ0 == :auto
+        # Calculate projections of the Lagrangian's gradient, and
+        # possibly hessian, along (∇f)_⟂
+        Dperp = dot(Pperpg, Pperpg)
+        σ, s = sigma(bounds), slack(state)
+        σdivs = σ./s
+        Δg = xzi + JI'*σdivs
+        PperpΔg = Δg - JE'*(Cc \ (JE*Δg))
+        DI = dot(PperpΔg, PperpΔg)
+        κperp, κI = hessian_projections(Hinfo, Pperpg, (JI*Pperpg)./s)
+        # Calculate μ and λI
+        μ = β * (κperp == 0 ? sqrt(Dperp/DI) : min(sqrt(Dperp/DI), abs(κperp/κI)))
+        if !isfinite(μ)
+            Δgtilde = abs(xzi) + JI'*(1./s)
+            PperpΔgtilde = Δgtilde - JE'*(Cc \ (JE*Δgtilde))
+            DItilde = dot(PperpΔgtilde, PperpΔgtilde)
+            μ = β*sqrt(Dperp/DItilde)
+        end
+        if !isfinite(μ) || μ == 0
+            μ = one(μ)
+        end
+    else
+        μ = convert(eltype(state.x), μ0)
+    end
+    state.μ = μ
+    # Set λI
+    state.bstate.λx[:] = μ./state.bstate.slack_x
+    state.bstate.λc[:] = μ./state.bstate.slack_c
+    # Calculate λE
+    λI = lambdaI(state)
+    ∇bI = gf - μ*xzi - JI'*λI
+#    qrregularize!(QRF)  # in case of any 0 eigenvalues
+    λE = Cc \ (JE*∇bI) + (cbar(bounds) - cE(state, bounds))/μ
+    k = unpack_vec!(state.bstate.λxE, λE, 0)
+    k = unpack_vec!(state.bstate.λcE, λE, k)
+    k == length(λE) || error("something is wrong")
+    state
+end
+function initialize_μ_λ!(state, bounds::ConstraintBounds, μ0::Union{Number,Symbol}, β=1//100)
+    initialize_μ_λ!(state, bounds, nothing, μ0, β)
+end
+
+function hessian_projections(Hinfo::Tuple{AbstractMatrix,AbstractMatrix}, Pperpg, y)
+    κperp = dot(Hinfo[1]*Pperpg, Pperpg)
+    κI = dot(Hinfo[2]*Pperpg, Pperpg) + dot(y,y)
+    κperp, κI
+end
+hessian_projections{T}(Hinfo::Void, Pperpg::AbstractVector{T}) = convert(T, Inf), zero(T)
+
+function jacobianE(state, bounds::ConstraintBounds)
+    J, x = state.constr_J, state.x
+    JEx = jacobianx(J, bounds.eqx)
     JEc = view5(J, bounds.eqc, :)
     JE = vcat(JEx, JEc)
-    CE = JE*JE'
-    CEc = cholfact(Positive, CE)
-    Pg = g - JE'*(CEc \ (JE*g)) # the projected gradient of the objective (orthog to all == constr.)
-    # Calculate the barrier deviation and projection onto inequality normals
-    JIx = zeros(eltype(J), length(bounds.iz)+length(bounds.ineqx), length(x))
-    for (i,j) in enumerate([bounds.iz; bounds.ineqx])
-        JIx[i,j] = 1
-    end
+end
+jacobianE(state, constraints) = jacobianE(state, constraints.bounds)
+
+function jacobianI(state, bounds::ConstraintBounds)
+    J, x = state.constr_J, state.x
+    JIx = jacobianx(J, bounds.ineqx)  # skip iz: there is no λIz, so don't put in JI
     JIc = view5(J, bounds.ineqc, :)
     JI = vcat(JIx, JIc)
-    JIg = JI*Pg
-    # Solve for μ
-    # Δb = [bounds.σz.*x[bounds.iz]; bounds.σx.*(x[bounds.ineqx] - bounds.bx); bounds.σc.*(c[bounds.ineqc] - bounds.bc)]
-    Δb = [x[bounds.iz]; x[bounds.ineqx] - bounds.bx; c[bounds.ineqc] - bounds.bc]
-    σ = [bounds.σz; bounds.σx; bounds.σc]
-    λtilde = σ./Δb
-    μden = dot(σ.*λtilde, JIg)
-    if μden == 0 && !isempty(Δb)
-        μden = maximum(abs(λtilde).*abs(JIg))*length(Δb)
-    end
-    μ = β*dot(Pg, Pg)/abs(μden)
-    μ = μden != 0 ? μ : oftype(μ, 1)
-    if μ0 != :auto
-        μ = μ0
-    end
-    # Solve for λE
-    gb = g - μ*(JI'*(σ.*λtilde))
-    Pgb = gb - JE'*(CEc \ (JE*gb))
-    λE = CEc \ (JE*Pgb)
-    k = unpack_vec!(λx, λE, 0)
-    k = unpack_vec!(λc, λE, k)
-    k == length(λE) || error("something is wrong")
-    μ
 end
-initialize_μ_λ!(λx, λc, constraints::AbstractConstraintsFunction, x, g, c, J, args...) =
-    initialize_μ_λ!(λx, λc, constraints.bounds, x, g, c, J, args...)
+jacobianI(state, constraints) = jacobianI(state, constraints.bounds)
+
+# TODO: when Optim supports sparse arrays, make a SparseMatrixCSC version
+function jacobianx(J::AbstractArray, indx)
+    Jx = zeros(eltype(J), length(indx), size(J, 2))
+    for (i,j) in enumerate(indx)
+        Jx[i,j] = 1
+    end
+    Jx
+end
+
+function sigma(bounds::ConstraintBounds)
+    [bounds.σx; bounds.σc]  # don't include σz
+end
+sigma(constraints) = sigma(constraints.bounds)
+
+slack(state) = slack(state.bstate)
+function xzinv(x, bounds::ConstraintBounds)
+    xzi = zero(x)
+    xzi[bounds.iz] = 1./x[bounds.iz]
+    xzi
+end
+
+cbar(bounds::ConstraintBounds) = [bounds.valx; bounds.valc]
+cbar(constraints) = cbar(constraints.bounds)
+cE(state, bounds::ConstraintBounds) = [state.x[bounds.eqx]; state.constr_c[bounds.eqc]]
+
+function hessianI!(h, x, constraints, λcI, μ)
+    λ = userλ(λcI, constraints)
+    constraints.h!(x, λ, h)
+    for i in constraints.bounds.iz
+        h[i,i] += μ/x[i]^2
+    end
+    h
+end
+
+"""
+   hessianI(x, constraints, λcI, μ) -> h
+
+Compute the hessian at `x` of the `λcI`-weighted sum of user-supplied
+constraint functions for just the inequalities.  This also includes
+contributions from any variables with bounds at 0, since those do not
+cause introduction of a slack variable. Other (nonzero) box
+constraints do not contribute to `h`, because the hessian of `x_i` is
+zero. (They contribute indirectly via their slack variables.)
+"""
+hessianI(x, constraints, λcI, μ) =
+    hessianI!(zeros(eltype(x), length(x), length(x)), x, constraints, λcI, μ)
+
+"""
+    userλ(λcI, bounds) -> λ
+
+Accumulates `λcI` into a vector `λ` ordered as the user-supplied
+constraint functions `c`. Upper and lower bounds are summed, weighted
+by `σ`. The resulting λ includes an overall negative sign so that this
+becomes the coefficient for the user-supplied hessian.
+
+This is relevant only for the inequalities. If you want the λ for just
+the equalities, you can use `λ[bounds.ceq] = λcE` for a zero-filled `λ`.
+"""
+function userλ(λcI, bounds::ConstraintBounds)
+    ineqc, σc = bounds.ineqc, bounds.σc
+    λ = zeros(eltype(bounds), nconstraints(bounds))
+    for i = 1:length(ineqc)
+        λ[ineqc[i]] -= λcI[i]*σc[i]
+    end
+    λ
+end
+userλ(λcI, constraints) = userλ(λcI, constraints.bounds)
 
 ## Computation of the Lagrangian and its gradient
 # This is in a parametrization that is also useful during linesearch
@@ -787,3 +904,13 @@ function shrink_μ!(d, constraints, state, method, options)
     state.μ *= options.μfactor
     update_fg!(d, constraints, state, method)
 end
+
+function qrregularize!(QRF)
+    R = QRF[:R]
+    for i = 1:size(R, 1)
+        if R[i,i] == 0
+            R[i,i] = 1
+        end
+    end
+    QRF
+end
diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index e75223762..a30c5bc9f 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -86,9 +86,10 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct
         Hf,
         stepf)
 
-    state.μ = initialize_μ_λ!(bstate.λxE, bstate.λcE, constraints, initial_x, g, constr_c, constr_J, options.μ0)
-    bstate.λx[:] = state.μ./bstate.slack_x
-    bstate.λc[:] = state.μ./bstate.slack_c
+    d.h!(initial_x, state.H)
+    # state.μ = initialize_μ_λ!(bstate.λxE, bstate.λcE, constraints, initial_x, g, constr_c, constr_J, options.μ0)
+    Hinfo = (state.H, hessianI(initial_x, constraints, 1./bstate.slack_c, 1))
+    initialize_μ_λ!(state, constraints.bounds, Hinfo, options.μ0)
     update_fg!(d, constraints, state, method)
     update_h!(d, constraints, state, method)
 end
@@ -108,30 +109,22 @@ function update_g!(d, constraints::TwiceDifferentiableConstraintsFunction, state
 end
 
 function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton)
+    x = state.x
     μ, Hxx, J = state.μ, state.H, state.constr_J
-    d.h!(state.x, Hxx)
-    # Collect the values of the coefficients of the inequality constraints
     bounds = constraints.bounds
-    ineqc, σc, λc = bounds.ineqc, bounds.σc, state.bstate.λc
     m, n = size(J, 1), size(J, 2)
-    λ = zeros(eltype(bounds), m)
-    for i = 1:length(ineqc)
-        λ[ineqc[i]] -= λc[i]*σc[i]
-    end
-    # Add the weighted hessian terms from the nonlinear constraints
-    constraints.h!(state.x, λ, Hxx)
-    # Add the Jacobian terms
-    JI = view5(J, ineqc, :)
+
+    d.h!(state.x, Hxx)
+    hessianI!(Hxx, state.x, constraints, state.bstate.λc, μ)  # accumulate the inequality second derivatives
+    # Add the Jacobian terms (J'*S^{-2}*J)
+    JI = view5(J, bounds.ineqc, :)
     Sinv2 = Diagonal(1./state.bstate.slack_c.^2)
     HJ = JI'*Sinv2*JI
     for j = 1:n, i = 1:n
         Hxx[i,j] += μ*HJ[i,j]
     end
-    # Add the variable inequalities
-    iz, x = bounds.iz, state.x
-    for i in iz
-        Hxx[i,i] += μ/x[i]^2
-    end
+    # Add the variable inequalities portions of J'*S^{-2}*J
+    # The iz terms are already in Hxx (from hessianI!)
     ineqx, sx = bounds.ineqx, state.bstate.slack_x
     for (i,j) in enumerate(ineqx)
         Hxx[j,j] += μ/sx[i]^2
@@ -141,7 +134,7 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state
     Hp = full(Hpc)
     # Now add the equality constraint hessian terms
     eqc, λcE = bounds.eqc, state.bstate.λcE
-    fill!(λ, 0)
+    λ = zeros(eltype(x), nconstraints(bounds))
     for i = 1:length(eqc)
         λ[eqc[i]] -= λcE[i]
     end
@@ -161,7 +154,7 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state
                 -JEc Jod zeros(eltype(JEc), size(JEc,1), size(JEc,1))]
     # Also form the total gradient
     bgrad = state.bgrad
-    gI = state.g + JI'*Diagonal(σc)*(bgrad.slack_c - μ*Sinv2*bgrad.λc)
+    gI = state.g + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - μ*Sinv2*bgrad.λc)
     for (i,j) in enumerate(ineqx)
         gI[j] += bounds.σx[i]*(bgrad.slack_x[i] - μ*bgrad.λx[i]/sx[i]^2)
     end
diff --git a/src/types.jl b/src/types.jl
index be5949460..06550fd74 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -284,7 +284,7 @@ end
 # additional variables. See `parse_constraints` for details.
 
 immutable ConstraintBounds{T}
-    nc::Int          # Number of linear/nonlinear constraints
+    nc::Int          # Number of linear/nonlinear constraints supplied by user
     # Box-constraints on variables (i.e., directly on x)
     eqx::Vector{Int} # index-vector of equality-constrained x (not actually variable...)
     valx::Vector{T}  # value of equality-constrained x
@@ -312,8 +312,35 @@ end
 Base.eltype{T}(::Type{ConstraintBounds{T}}) = T
 Base.eltype(cb::ConstraintBounds) = eltype(typeof(cb))
 
+"""
+    nconstraints(bounds) -> nc
+
+The number of linear/nonlinear constraint functions supplied by the
+user. This does not include bounds-constraints on variables.
+
+See also: nconstraints_x.
+"""
 nconstraints(cb::ConstraintBounds) = cb.nc
 
+"""
+    nconstraints_x(bounds) -> nx
+
+The number of "meaningful" constraints (not `±Inf`) on the x coordinates.
+
+See also: nconstraints.
+"""
+function nconstraints_x(cb::ConstraintBounds)
+    mz = isempty(cb.iz) ? 0 : maximum(cb.iz)
+    mi = isempty(cb.ineqx) ? 0 : maximum(cb.ineqx)
+    me = isempty(cb.eqx) ? 0 : maximum(cb.eqx)
+    nmax = max(mz, mi, me)
+    hasconstraint = falses(nmax)
+    hasconstraint[cb.iz] = true
+    hasconstraint[cb.ineqx] = true
+    hasconstraint[cb.eqx] = true
+    sum(hasconstraint)
+end
+
 function Base.show(io::IO, cb::ConstraintBounds)
     indent = "    "
     print(io, "ConstraintBounds:")
diff --git a/test/constraints.jl b/test/constraints.jl
index 019cc9bdb..89916fc95 100644
--- a/test/constraints.jl
+++ b/test/constraints.jl
@@ -94,7 +94,7 @@ ConstraintBounds:
         cfun = x->Float64[]
         c = Float64[]
         J = Array{Float64}(0,0)
-        options = OptimizationOptions()
+        options = OptimizationOptions(μ0 = μ)
         method = Optim.IPNewton()
         ## In the code, variable constraints are special-cased (for
         ## reasons of user-convenience and efficiency).  It's
@@ -279,6 +279,87 @@ ConstraintBounds:
         @test state.Hf ≈ hp
     end
 
+    @testset "IPNewton initialization" begin
+        method = IPNewton()
+        options = OptimizationOptions()
+        x = [1.0,0.1,0.3,0.4]
+        ## A linear objective function (hessian is zero)
+        f_g = [1.0,2.0,3.0,4.0]
+        d = TwiceDifferentiableFunction(x->dot(x, f_g), (x,g)->copy!(g, f_g), (x,h)->fill!(h, 0))
+        # Variable bounds
+        constraints = TwiceDifferentiableConstraintsFunction([0.5, 0.0, -Inf, -Inf], [Inf, Inf, 1.0, 0.8])
+        state = Optim.initial_state(method, options, d, constraints, x)
+        Optim.update_fg!(d, constraints, state, method)
+        @test norm(f_g - state.g) ≈ 0.01*norm(f_g)
+        # Nonlinear inequalities
+        constraints = TwiceDifferentiableConstraintsFunction(
+            (x,c)->(c[1]=x[1]*x[2]; c[2]=3*x[3]+x[4]^2),
+            (x,J)->(J[:,:] = [x[2] x[1] 0 0; 0 0 3 2*x[4]]),
+            (x,λ,h)->(h[4,4] += λ[2]*2),
+            [], [], [0.05, 0.4], [0.15, 4.4])
+        @test isinterior(constraints, x)
+        state = Optim.initial_state(method, options, d, constraints, x)
+        Optim.update_fg!(d, constraints, state, method)
+        @test norm(f_g - state.g) ≈ 0.01*norm(f_g)
+        # Mixed equalities and inequalities
+        constraints = TwiceDifferentiableConstraintsFunction(
+            (x,c)->(c[1]=x[1]*x[2]; c[2]=3*x[3]+x[4]^2),
+            (x,J)->(J[:,:] = [x[2] x[1] 0 0; 0 0 3 2*x[4]]),
+            (x,λ,h)->(h[4,4] += λ[2]*2),
+            [], [], [0.1, 0.4], [0.1, 4.4])
+        @test isfeasible(constraints, x)
+        state = Optim.initial_state(method, options, d, constraints, x)
+        Optim.update_fg!(d, constraints, state, method)
+        J = zeros(2,4)
+        constraints.jacobian!(x, J)
+        eqnormal = J[1,:]; eqnormal = eqnormal/norm(eqnormal)
+        @test abs(dot(state.g, eqnormal)) < 1e-12  # orthogonal to equality constraint
+        Pfg = f_g - dot(f_g, eqnormal)*eqnormal
+        Pg = state.g - dot(state.g, eqnormal)*eqnormal
+        @test norm(Pfg - Pg) ≈ 0.01*norm(Pfg)
+        ## An objective function with a nonzero hessian
+        hd = [1.0, 100.0, 0.01, 2.0]   # diagonal terms of hessian
+        d = TwiceDifferentiableFunction(x->sum(hd.*x.^2)/2, (x,g)->copy!(g, hd.*x), (x,h)->copy!(h, Diagonal(hd)))
+        gx = d.g!(x, zeros(4))
+        hx = Diagonal(hd)
+        # Variable bounds
+        constraints = TwiceDifferentiableConstraintsFunction([0.5, 0.0, -Inf, -Inf], [Inf, Inf, 1.0, 0.8])
+        state = Optim.initial_state(method, options, d, constraints, x)
+        Optim.update_fg!(d, constraints, state, method)
+        @test abs(dot(gx, state.g)/dot(gx,gx) - 1) <= 0.011
+        Optim.update_h!(d, constraints, state, method)
+        @test abs(dot(gx, state.H*gx)/dot(gx, hx*gx) - 1) <= 0.011
+        # Nonlinear inequalities
+        constraints = TwiceDifferentiableConstraintsFunction(
+            (x,c)->(c[1]=x[1]*x[2]; c[2]=3*x[3]+x[4]^2),
+            (x,J)->(J[:,:] = [x[2] x[1] 0 0; 0 0 3 2*x[4]]),
+            (x,λ,h)->(h[4,4] += λ[2]*2),
+            [], [], [0.05, 0.4], [0.15, 4.4])
+        @test isinterior(constraints, x)
+        state = Optim.initial_state(method, options, d, constraints, x)
+        Optim.update_fg!(d, constraints, state, method)
+        @test abs(dot(gx, state.g)/dot(gx,gx) - 1) <= 0.011
+        Optim.update_h!(d, constraints, state, method)
+        @test abs(dot(gx, state.H*gx)/dot(gx, hx*gx) - 1) <= 0.011
+        # Mixed equalities and inequalities
+        constraints = TwiceDifferentiableConstraintsFunction(
+            (x,c)->(c[1]=x[1]*x[2]; c[2]=3*x[3]+x[4]^2),
+            (x,J)->(J[:,:] = [x[2] x[1] 0 0; 0 0 3 2*x[4]]),
+            (x,λ,h)->(h[4,4] += λ[2]*2),
+            [], [], [0.1, 0.4], [0.1, 4.4])
+        @test isfeasible(constraints, x)
+        state = Optim.initial_state(method, options, d, constraints, x)
+        Optim.update_fg!(d, constraints, state, method)
+        J = zeros(2,4)
+        constraints.jacobian!(x, J)
+        eqnormal = J[1,:]; eqnormal = eqnormal/norm(eqnormal)
+        @test abs(dot(state.g, eqnormal)) < 1e-12  # orthogonal to equality constraint
+        Pgx = gx - dot(gx, eqnormal)*eqnormal
+        @test abs(dot(Pgx, state.g)/dot(Pgx,Pgx) - 1) <= 0.011
+        Optim.update_h!(d, constraints, state, method)
+        @test abs(dot(Pgx, state.H*Pgx)/dot(Pgx, hx*Pgx) - 1) <= 0.011
+    end
+
     @testset "IPNewton step" begin
         F = 1000
         d = TwiceDifferentiableFunction(x->F*x[1], (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0))

From 11234bc81bafc07285458411127bc89443e239d4 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 16 Nov 2016 11:22:44 -0600
Subject: [PATCH 23/40] Use Lagrangian val/grad rather than objective val/grad
 in assessing convergence

---
 src/interior.jl                     |  4 +++-
 src/ipnewton.jl                     |  6 ++++--
 src/utilities/assess_convergence.jl | 11 +++++++++++
 src/utilities/trace.jl              |  4 +++-
 4 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index cd1e56b8b..9b21a303b 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -233,7 +233,9 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai
         end
 
         Δf = abs(state.f_x - state.f_x_previous)
-        Δfmax = max(Δfmax, abs(state.f_x - state.f_x_previous))
+        if iterationμ > 1
+            Δfmax = max(Δfmax, abs(state.f_x - state.f_x_previous))
+        end
 
         # Test whether we need to decrease the barrier penalty
         if iterationμ > 1 && (converged || 100*gnormnew < gnorm || 100*Δf < Δfmax)
diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index a30c5bc9f..73116d60b 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -16,6 +16,7 @@ type IPNewtonState{T,N} <: AbstractBarrierState
     # Barrier penalty fields
     μ::T                  # coefficient of the barrier penalty
     L::T                  # value of the Lagrangian (objective + barrier + equality)
+    L_previous::T
     bstate::BarrierStateVars{T}   # value of slack and λ variables (current "position")
     bgrad::BarrierStateVars{T}    # gradient of slack and λ variables at current "position"
     bstep::BarrierStateVars{T}    # search direction for slack and λ
@@ -74,7 +75,8 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct
         Hd,
         similar(initial_x), # Maintain current x-search direction in state.s
         μ,
-        T(0),
+        T(NaN),
+        T(NaN),
         bstate,
         bgrad,
         bstep,
@@ -165,7 +167,7 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state
 end
 
 function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction, state::IPNewtonState{T}, method::IPNewton)
-    state.f_x_previous = state.f_x
+    state.f_x_previous, state.L_previous = state.f_x, state.L
     bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds
     state, dslackc = solve_step!(state, constraints)
     # If a step α=1 will not change any of the parameters, we can quit now.
diff --git a/src/utilities/assess_convergence.jl b/src/utilities/assess_convergence.jl
index 6e72fa820..b11800254 100644
--- a/src/utilities/assess_convergence.jl
+++ b/src/utilities/assess_convergence.jl
@@ -85,6 +85,17 @@ function assess_convergence(state::NewtonTrustRegionState, options)
     x_converged, f_converged, g_converged, converged
 end
 
+function assess_convergence(state::IPNewtonState, options)
+    assess_convergence(state.x,
+                       state.x_previous,
+                       state.L,
+                       state.L_previous,
+                       state.gf,
+                       options.x_tol,
+                       options.f_tol,
+                       options.g_tol)
+end
+
 # For monotonic-decreasing problems
 fconverged(state) = nextfloat(state.f_x) >= state.f_x_previous
 # Constrained problems are not monotonic, so we can't add a one-sided criterion
diff --git a/src/utilities/trace.jl b/src/utilities/trace.jl
index 3c11f1629..bb574745c 100644
--- a/src/utilities/trace.jl
+++ b/src/utilities/trace.jl
@@ -123,12 +123,14 @@ function trace!(tr, state, iteration, method::IPOptimizer, options)
         dt["α"] = state.alpha
         dt["x"] = copy(state.x)
         dt["g(x)"] = copy(state.g)
+        dt["gf(x)"] = copy(state.gf)
         dt["h(x)"] = copy(state.H)
+        dt["hf(x)"] = copy(state.Hf)
         dt["bstate"] = copy(state.bstate)
         dt["bgrad"] = copy(state.bgrad)
         dt["c"] = copy(state.constr_c)
     end
-    g_norm = vecnorm(state.g, Inf)
+    g_norm = vecnorm(state.gf, Inf)
     update!(tr,
             iteration,
             state.f_x,

From a3bbf90fc437a15bd9b8ddd8a11f0fad4234d128 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Fri, 18 Nov 2016 06:09:31 -0600
Subject: [PATCH 24/40] Switch to primal-dual and clean up architecture

- use slack vars even for nonnegative/nonpositive constraints (needed for dual)
- eliminate "active" boolean state and delete solve_active_inequalities!
- reorder the fields in BarrierStateVars
- update slack and lambda vars using linesearch
- better support for initial infeasible state
---
 src/interior.jl     | 199 ++++++++++----------------------------------
 src/iplinesearch.jl |  11 +--
 src/ipnewton.jl     |  68 +++++++--------
 src/types.jl        |  50 +++--------
 test/constraints.jl | 128 ++++++++++++++--------------
 5 files changed, 162 insertions(+), 294 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index 9b21a303b..0e0cc4b14 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -4,11 +4,9 @@ abstract AbstractBarrierState
 immutable BarrierStateVars{T}
     slack_x::Vector{T}     # values of slack variables for x
     slack_c::Vector{T}     # values of slack variables for c
-    active_x::Vector{Bool} # active constraints for x (see solve_active_inequalities)
-    active_c::Vector{Bool} # active constraints for c
-    λxE::Vector{T}         # λ for equality constraints on x
     λx::Vector{T}          # λ for equality constraints on slack_x
     λc::Vector{T}          # λ for equality constraints on slack_c
+    λxE::Vector{T}         # λ for equality constraints on x
     λcE::Vector{T}         # λ for linear/nonlinear equality constraints
 end
 # Note on λxE:
@@ -22,31 +20,29 @@ end
 @compat function (::Type{BarrierStateVars{T}}){T}(bounds::ConstraintBounds)
     slack_x = Array{T}(length(bounds.ineqx))
     slack_c = Array{T}(length(bounds.ineqc))
-    λxE = Array{T}(length(bounds.eqx))
     λx = similar(slack_x)
     λc = similar(slack_c)
+    λxE = Array{T}(length(bounds.eqx))
     λcE = Array{T}(length(bounds.eqc))
-    sv = BarrierStateVars{T}(slack_x, slack_c, fill(false, length(slack_x)),
-                             fill(false, length(slack_c)), λxE, λx, λc, λcE)
+    sv = BarrierStateVars{T}(slack_x, slack_c, λx, λc, λxE, λcE)
 end
 BarrierStateVars{T}(bounds::ConstraintBounds{T}) = BarrierStateVars{T}(bounds)
 
 function BarrierStateVars{T}(bounds::ConstraintBounds{T}, x)
     sv = BarrierStateVars(bounds)
-    setslack!(sv.slack_x, sv.active_x, x, bounds.ineqx, bounds.σx, bounds.bx)
+    setslack!(sv.slack_x, x, bounds.ineqx, bounds.σx, bounds.bx)
     sv
 end
 function BarrierStateVars{T}(bounds::ConstraintBounds{T}, x, c)
     sv = BarrierStateVars(bounds)
-    setslack!(sv.slack_x, sv.active_x, x, bounds.ineqx, bounds.σx, bounds.bx)
-    setslack!(sv.slack_c, sv.active_c, c, bounds.ineqc, bounds.σc, bounds.bc)
+    setslack!(sv.slack_x, x, bounds.ineqx, bounds.σx, bounds.bx)
+    setslack!(sv.slack_c, c, bounds.ineqc, bounds.σc, bounds.bc)
     sv
 end
-function setslack!(slack, active, v, ineq, σ, b)
+function setslack!(slack, v, ineq, σ, b)
     for i = 1:length(ineq)
         dv = v[ineq[i]]-b[i]
-        slack[i] = σ[i]*dv
-        active[i] = dv == 0
+        slack[i] = abs(σ[i]*dv)
     end
     slack
 end
@@ -60,32 +56,26 @@ lambdaE(state::AbstractBarrierState) = lambdaE(state.bstate)
 Base.similar(bstate::BarrierStateVars) =
     BarrierStateVars(similar(bstate.slack_x),
                      similar(bstate.slack_c),
-                     similar(bstate.active_x),
-                     similar(bstate.active_c),
-                     similar(bstate.λxE),
                      similar(bstate.λx),
                      similar(bstate.λc),
+                     similar(bstate.λxE),
                      similar(bstate.λcE))
 
 Base.copy(bstate::BarrierStateVars) =
     BarrierStateVars(copy(bstate.slack_x),
                      copy(bstate.slack_c),
-                     copy(bstate.active_x),
-                     copy(bstate.active_c),
-                     copy(bstate.λxE),
                      copy(bstate.λx),
                      copy(bstate.λc),
+                     copy(bstate.λxE),
                      copy(bstate.λcE))
 
 
 function Base.fill!(b::BarrierStateVars, val)
     fill!(b.slack_x, val)
     fill!(b.slack_c, val)
-    fill!(b.active_x, false)
-    fill!(b.active_c, false)
-    fill!(b.λxE, val)
     fill!(b.λx, val)
     fill!(b.λc, val)
+    fill!(b.λxE, val)
     fill!(b.λcE, val)
     b
 end
@@ -95,7 +85,7 @@ Base.eltype(sv::BarrierStateVars) = eltype(typeof(sv))
 
 function Base.show(io::IO, b::BarrierStateVars)
     print(io, "BarrierStateVars{$(eltype(b))}:")
-    for fn in fieldnames(b)
+    for fn in (:slack_x, :slack_c, :λx, :λc, :λxE, :λcE)
         print(io, "\n  $fn: ")
         show(io, getfield(b, fn))
     end
@@ -104,21 +94,21 @@ end
 @compat Base.:(==)(v::BarrierStateVars, w::BarrierStateVars) =
     v.slack_x == w.slack_x &&
     v.slack_c == w.slack_c &&
-    v.λxE == w.λxE &&
     v.λx == w.λx &&
     v.λc == w.λc &&
+    v.λxE == w.λxE &&
     v.λcE == w.λcE
 
 const bsv_seed = sizeof(UInt) == 64 ? 0x145b788192d1cde3 : 0x766a2810
 Base.hash(b::BarrierStateVars, u::UInt) =
-    hash(b.λcE, hash(b.λc, hash(b.λx, hash(b.λxE, hash(b.slack_c, hash(b.slack_x, u+bsv_seed))))))
+    hash(b.λcE, has(b.λxE, hash(b.λc, hash(b.λx, hash(b.slack_c, hash(b.slack_x, u+bsv_seed))))))
 
 function Base.dot(v::BarrierStateVars, w::BarrierStateVars)
     dot(v.slack_x,w.slack_x) +
         dot(v.slack_c, w.slack_c) +
-        dot(v.λxE, w.λxE) +
         dot(v.λx, w.λx) +
         dot(v.λc, w.λc) +
+        dot(v.λxE, w.λxE) +
         dot(v.λcE, w.λcE)
 end
 
@@ -150,38 +140,13 @@ immutable BarrierLineSearchGrad{T}
     bgrad::BarrierStateVars{T}    # trial point's gradient
 end
 
-function ls_update!(out::BarrierStateVars, c, base::BarrierStateVars, step::BarrierStateVars, α, constraints, state, dslackc)
-    bounds = constraints.bounds
-    constraints.c!(state.x_ls, c)
-    xtarget = bounds.σx.*(state.x_ls[bounds.ineqx] - bounds.bx)
-    dslackx = bounds.σx.*state.s[bounds.ineqx]
-    ctarget = bounds.σc.*(c[bounds.ineqc] - bounds.bc)
-    ls_update!(out, base, step, α, state.μ, xtarget, dslackx, ctarget, dslackc)
-end
-
-function ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, α, μ, xtarget, dslackx, ctarget, dslackc)
+function ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, α, αI)
+    ls_update!(out.slack_x, base.slack_x, step.slack_x, α)
+    ls_update!(out.slack_c, base.slack_c, step.slack_c, α)
     ls_update!(out.λxE, base.λxE, step.λxE, α)
     ls_update!(out.λcE, base.λcE, step.λcE, α)
-    # For the inequality terms, we use "exact" updating
-    _lsu_slack!(out.slack_x, xtarget, base.slack_x, dslackx, α)
-    _lsu_slack!(out.slack_c, ctarget, base.slack_c, dslackc, α)
-    _lsu_λ!(out.λx, out.slack_x, μ)
-    _lsu_λ!(out.λc, out.slack_c, μ)
-    out
-end
-function _lsu_slack!(out, target, slack, dslack, α)
-    for i = 1:length(out)
-        t = target[i]
-        # This handles the possible loss of precision at the boundary
-        # by using the gradient to extrapolate the change
-        out[i] = t != 0 ? t : slack[i]+α*dslack[i]
-    end
-    out
-end
-function _lsu_λ!(out, slack, μ)
-    for i = 1:length(out)
-        out[i] = μ/slack[i]
-    end
+    ls_update!(out.λx, base.λx, step.λx, αI)
+    ls_update!(out.λc, base.λc, step.λc, αI)
     out
 end
 
@@ -289,62 +254,6 @@ update_h!(d, constraints::AbstractConstraintsFunction, state, method) = nothing
 update_asneeded_fg!(d, constraints, state, method) = update_fg!(d, constraints, state, method)
 update_asneeded_fg!(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}) = update_g!(d, constraints, state, method)
 
-
-# Explicit solution for slack, λ when an inequality constraint is
-# "active." This is necessary (or at least helpful) when c-b == 0 due
-# to roundoff error, in which case the KKT equations don't have an
-# exact solution within the precision.  We punt on the ∂λ equation
-# (which reduces to the slack, which should be small anyway), and
-# focus on the ∂x and ∂slack equations (therefore setting slack and
-# λ). By setting these to their exact solutions, we balance the forces
-# due to the barrier.
-function solve_active_inequalities!(d, constraints, state)
-    x, c, bstate, bounds = state.x, state.constr_c, state.bstate, constraints.bounds
-    nactive, nchanged = tally_active!(bstate.active_x, 0, 0, x, bounds.ineqx, bounds.bx)
-    nx = nactive
-    nactive, nchanged = tally_active!(bstate.active_c, nactive, nchanged, c, bounds.ineqc, bounds.bc, )
-    if nactive == 0 || nchanged == 0
-        return nothing
-    end
-    # Calculate the necessary gradients
-    d.g!(state.x, state.g)
-    constraints.jacobian!(state.x, state.constr_J)
-    # Solve for the Lagrange multipliers
-    ic, ix = bounds.ineqc[bstate.active_c], bounds.ineqx[bstate.active_x]
-    Jx = view5(state.constr_J, ic, ix)
-    Jact = view5(state.constr_J, ic, :)
-    Cactive = [eye(eltype(Jx), nx, nx) Jx'; Jx Jact*Jact']
-    pactive = [view(state.g, ix); Jact*state.g]
-    Cactivep = cholfact(Positive, Cactive)
-    λactive = (Cactivep\pactive).*[bounds.σx[bstate.active_x]; bounds.σc[bstate.active_c]]
-    any(x->x<=0, λactive) && error("something may be wrong, λ is zero or negative. Perhaps Cactive is singular?")
-    # Set the state
-    k = set_active_params!(bstate.slack_x, bstate.λx, bstate.active_x, λactive, state.μ, 0)
-    k = set_active_params!(bstate.slack_c, bstate.λc, bstate.active_c, λactive, state.μ, k)
-    k == length(λactive) || error("something is wrong")
-    nothing
-end
-
-function tally_active!(active, nactive, nchanged, c, ineq, b)
-    for (i,j) in enumerate(ineq)
-        isactive = c[j] == b[i]
-        nactive += isactive
-        nchanged += isactive != active[i]
-        active[i] = isactive
-    end
-    nactive, nchanged
-end
-
-function set_active_params!(slack, λ, active, λtarget, μ, k)
-    for i = 1:length(active)
-        active[i] || continue
-        λk = λtarget[k+=1]
-        λ[i] = λk
-        slack[i] = μ/λk
-    end
-    k
-end
-
 """
     initialize_μ_λ!(state, bounds, μ0=:auto, β=0.01)
     initialize_μ_λ!(state, bounds, (Hobj,HcI), μ0=:auto, β=0.01)
@@ -389,21 +298,20 @@ function initialize_μ_λ!(state, bounds::ConstraintBounds, Hinfo, μ0::Union{Sy
     Pperpg = gf-JE'*(Cc \ (JE*gf))   # in the nullspace of JE
     # Set μ
     JI = jacobianI(state, bounds)
-    xzi = xzinv(state.x, bounds)
     if μ0 == :auto
         # Calculate projections of the Lagrangian's gradient, and
         # possibly hessian, along (∇f)_⟂
         Dperp = dot(Pperpg, Pperpg)
         σ, s = sigma(bounds), slack(state)
         σdivs = σ./s
-        Δg = xzi + JI'*σdivs
+        Δg = JI'*σdivs
         PperpΔg = Δg - JE'*(Cc \ (JE*Δg))
         DI = dot(PperpΔg, PperpΔg)
         κperp, κI = hessian_projections(Hinfo, Pperpg, (JI*Pperpg)./s)
         # Calculate μ and λI
         μ = β * (κperp == 0 ? sqrt(Dperp/DI) : min(sqrt(Dperp/DI), abs(κperp/κI)))
         if !isfinite(μ)
-            Δgtilde = abs(xzi) + JI'*(1./s)
+            Δgtilde = JI'*(1./s)
             PperpΔgtilde = Δgtilde - JE'*(Cc \ (JE*Δgtilde))
             DItilde = dot(PperpΔgtilde, PperpΔgtilde)
             μ = β*sqrt(Dperp/DItilde)
@@ -420,7 +328,7 @@ function initialize_μ_λ!(state, bounds::ConstraintBounds, Hinfo, μ0::Union{Sy
     state.bstate.λc[:] = μ./state.bstate.slack_c
     # Calculate λE
     λI = lambdaI(state)
-    ∇bI = gf - μ*xzi - JI'*λI
+    ∇bI = gf - JI'*λI
 #    qrregularize!(QRF)  # in case of any 0 eigenvalues
     λE = Cc \ (JE*∇bI) + (cbar(bounds) - cE(state, bounds))/μ
     k = unpack_vec!(state.bstate.λxE, λE, 0)
@@ -449,7 +357,7 @@ jacobianE(state, constraints) = jacobianE(state, constraints.bounds)
 
 function jacobianI(state, bounds::ConstraintBounds)
     J, x = state.constr_J, state.x
-    JIx = jacobianx(J, bounds.ineqx)  # skip iz: there is no λIz, so don't put in JI
+    JIx = jacobianx(J, bounds.ineqx)
     JIc = view5(J, bounds.ineqc, :)
     JI = vcat(JIx, JIc)
 end
@@ -470,11 +378,6 @@ end
 sigma(constraints) = sigma(constraints.bounds)
 
 slack(state) = slack(state.bstate)
-function xzinv(x, bounds::ConstraintBounds)
-    xzi = zero(x)
-    xzi[bounds.iz] = 1./x[bounds.iz]
-    xzi
-end
 
 cbar(bounds::ConstraintBounds) = [bounds.valx; bounds.valc]
 cbar(constraints) = cbar(constraints.bounds)
@@ -483,9 +386,6 @@ cE(state, bounds::ConstraintBounds) = [state.x[bounds.eqx]; state.constr_c[bound
 function hessianI!(h, x, constraints, λcI, μ)
     λ = userλ(λcI, constraints)
     constraints.h!(x, λ, h)
-    for i in constraints.bounds.iz
-        h[i,i] += μ/x[i]^2
-    end
     h
 end
 
@@ -571,26 +471,27 @@ function lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds::ConstraintBounds, x
 end
 
 # for line searches that don't use the gradient along the line
-function lagrangian_linefunc(α, d, constraints, state, dslackc)
-    _lagrangian_linefunc(α, d, constraints, state, dslackc)[2]
+function lagrangian_linefunc(α, αI, d, constraints, state)
+    _lagrangian_linefunc(α, αI, d, constraints, state)[2]
 end
 
-function _lagrangian_linefunc(α, d, constraints, state, dslackc)
+function _lagrangian_linefunc(α, αI, d, constraints, state)
     b_ls, bounds = state.b_ls, constraints.bounds
     ls_update!(state.x_ls, state.x, state.s, α)
-    ls_update!(b_ls.bstate, b_ls.c, state.bstate, state.bstep, α, constraints, state, dslackc)
+    ls_update!(b_ls.bstate, state.bstate, state.bstep, α, αI)
+    constraints.c!(state.x_ls, b_ls.c)
     lagrangian(d, constraints.bounds, state.x_ls, b_ls.c, b_ls.bstate, state.μ)
 end
 
-function lagrangian_linefunc!(α, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)}, dslackc)
+function lagrangian_linefunc!(α, αI, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)})
     # For backtrack_constrained, the last evaluation is the one we
     # keep, so it's safe to store the results in state
-    f_x, L = _lagrangian_linefunc(α, d, constraints, state, dslackc)
+    f_x, L = _lagrangian_linefunc(α, αI, d, constraints, state)
     state.f_x = f_x
     state.L = L
     L
 end
-lagrangian_linefunc!(α, d, constraints, state, method) = lagrangian_linefunc(α, d, constraints, state)
+lagrangian_linefunc!(α, αI, d, constraints, state, method) = lagrangian_linefunc(α, αI, d, constraints, state)
 
 ## Computation of Lagrangian terms: barrier penalty
 """
@@ -604,8 +505,7 @@ slack variables. `bounds` holds the parsed bounds.
 """
 function barrier_value(bounds::ConstraintBounds, x, sx, sc, μ)
     # bμ is the coefficient of μ in the barrier penalty
-    bμ = _bv(x, bounds.iz, bounds.σz) +      # coords constrained by 0
-         _bv(sx) +  # coords with other bounds
+    bμ = _bv(sx) +  # coords with other bounds
          _bv(sc)    # linear/nonlinear constr.
     μ*bμ
 end
@@ -642,7 +542,6 @@ The result is *added* to `gx`, `gsx`, and `gsc`, so these vectors
 need to be initialized appropriately.
 """
 function barrier_grad!(gx, gsx, gsc, bounds::ConstraintBounds, x, sx, sc, μ)
-    barrier_grad!(view(gx, bounds.iz), view(x, bounds.iz), μ)
     barrier_grad!(gsx, sx, μ)
     barrier_grad!(gsc, sc, μ)
     nothing
@@ -662,7 +561,7 @@ end
 
 """
     equality_violation([f=identity], bounds, x, c, bstate) -> val
-    equality_violation([f=identity], bounds, x, c, sx, sc, λxE, λx, λc, λcE) -> val
+    equality_violation([f=identity], bounds, x, c, sx, sc, λx, λc, λxE, λcE) -> val
 
 Compute the sum of `f(v_i)`, where `v_i = λ_i*(target - observed)`
 measures the difference between the current state and the
@@ -673,17 +572,17 @@ variables. `c` holds the values of the linear-nonlinear constraints,
 and the λ arguments hold the Lagrange multipliers for `x`, `sx`, `sc`, and
 `c` respectively.
 """
-function equality_violation(f, bounds::ConstraintBounds, x, c, sx, sc, λxE, λx, λc, λcE)
-    ev = equality_violation(f, x, bounds.valx, bounds.eqx, λxE) +
-         equality_violation(f, sx, x, bounds.ineqx, bounds.σx, bounds.bx, λx) +
+function equality_violation(f, bounds::ConstraintBounds, x, c, sx, sc, λx, λc, λxE, λcE)
+    ev = equality_violation(f, sx, x, bounds.ineqx, bounds.σx, bounds.bx, λx) +
          equality_violation(f, sc, c, bounds.ineqc, bounds.σc, bounds.bc, λc) +
+         equality_violation(f, x, bounds.valx, bounds.eqx, λxE) +
          equality_violation(f, c, bounds.valc, bounds.eqc, λcE)
 end
-equality_violation(bounds::ConstraintBounds, x, c, sx, sc, λxE, λx, λc, λcE) =
-    equality_violation(identity, bounds, x, c, sx, sc, λxE, λx, λc, λcE)
+equality_violation(bounds::ConstraintBounds, x, c, sx, sc, λx, λc, λxE, λcE) =
+    equality_violation(identity, bounds, x, c, sx, sc, λx, λc, λxE, λcE)
 function equality_violation(f, bounds::ConstraintBounds, x, c, bstate::BarrierStateVars)
-    equality_violation(f, bounds, x, c,
-                       bstate.slack_x, bstate.slack_c, bstate.λxE, bstate.λx, bstate.λc, bstate.λcE)
+    equality_violation(f, bounds, x, c, bstate.slack_x, bstate.slack_c,
+                       bstate.λx, bstate.λc, bstate.λxE, bstate.λcE)
 end
 equality_violation(bounds::ConstraintBounds, x, c, bstate::BarrierStateVars) =
     equality_violation(identity, bounds, x, c, bstate)
@@ -719,20 +618,20 @@ end
 
 Compute the gradient of `equality_violation`, storing the result in `gx` (an array) and `gbstate::BarrierStateVars`.
 """
-function equality_grad!(gx, gsx, gsc, gλxE, gλx, gλc, gλcE, bounds::ConstraintBounds, x, c, J, sx, sc, λxE, λx, λc, λcE)
-    gx[bounds.eqx] = gx[bounds.eqx] - λxE
+function equality_grad!(gx, gsx, gsc, gλx, gλc, gλxE, gλcE, bounds::ConstraintBounds, x, c, J, sx, sc, λx, λc, λxE, λcE)
     equality_grad_var!(gsx, gx, bounds.ineqx, bounds.σx, λx)
     equality_grad_var!(gsc, gx, bounds.ineqc, bounds.σc, λc, J)
+    gx[bounds.eqx] = gx[bounds.eqx] - λxE
     equality_grad_var!(gx, bounds.eqc, λcE, J)
-    equality_grad_λ!(gλxE, x, bounds.valx, bounds.eqx)
     equality_grad_λ!(gλx, sx, x, bounds.ineqx, bounds.σx, bounds.bx)
     equality_grad_λ!(gλc, sc, c, bounds.ineqc, bounds.σc, bounds.bc)
+    equality_grad_λ!(gλxE, x, bounds.valx, bounds.eqx)
     equality_grad_λ!(gλcE, c, bounds.valc, bounds.eqc)
 end
 equality_grad!(gx, gb::BarrierStateVars, bounds::ConstraintBounds, x, c, J, b::BarrierStateVars) =
-    equality_grad!(gx, gb.slack_x, gb.slack_c, gb.λxE, gb.λx, gb.λc, gb.λcE,
+    equality_grad!(gx, gb.slack_x, gb.slack_c, gb.λx, gb.λc, gb.λxE, gb.λcE,
                    bounds, x, c, J,
-                   b.slack_x, b.slack_c, b.λxE, b.λx, b.λc, b.λcE)
+                   b.slack_x, b.slack_c, b.λx, b.λc, b.λxE, b.λcE)
 
 # violations of s = σ*(x-b)
 function equality_grad_var!(gs, gx, ineq, σ, λ)
@@ -796,9 +695,6 @@ function isfeasible(bounds::ConstraintBounds, x, c)
     for (i,j) in enumerate(bounds.ineqx)
         isf &= bounds.σx[i]*(x[j] - bounds.bx[i]) >= 0
     end
-    for (i,j) in enumerate(bounds.iz)
-        isf &= bounds.σz[i]*x[j] >= 0
-    end
     for (i,j) in enumerate(bounds.eqc)
         isf &= c[j] == bounds.valc[i]
     end
@@ -836,9 +732,6 @@ function isinterior(bounds::ConstraintBounds, x, c)
     for (i,j) in enumerate(bounds.ineqx)
         isi &= bounds.σx[i]*(x[j] - bounds.bx[i]) > 0
     end
-    for (i,j) in enumerate(bounds.iz)
-        isi &= bounds.σz[i]*x[j] > 0
-    end
     for (i,j) in enumerate(bounds.ineqc)
         isi &= bounds.σc[i]*(c[j] - bounds.bc[i]) > 0
     end
diff --git a/src/iplinesearch.jl b/src/iplinesearch.jl
index d37d576ce..a195f9ea6 100644
--- a/src/iplinesearch.jl
+++ b/src/iplinesearch.jl
@@ -1,15 +1,16 @@
-function backtrack_constrained(ϕ, α, αmax, Lcoefsα,
+function backtrack_constrained(ϕ, α, αmax, αImax, Lcoefsα,
                                c1 = 0.5, ρ=oftype(α, 0.5), αmin = sqrt(eps(one(α))))
-    α = min(α, 0.999*αmax)
+    α, αI = min(α, 0.999*αmax), min(α, 0.999*αImax)
     L0, L1, L2 = Lcoefsα
     f_calls = 0
     while α >= αmin
         f_calls += 1
-        val = ϕ(α)
+        val = ϕ(α, αI)
         if isfinite(val) && abs(val - (L0 + L1*α + L2*α^2/2)) <= c1*abs(val-L0)
-            return α, f_calls, 0
+            return α, αI, f_calls, 0
         end
         α *= ρ
+        αI *= ρ
     end
-    return zero(α), f_calls, 0
+    return zero(α), zero(αI), f_calls, 0
 end
diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index 73116d60b..6028f9b54 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -34,7 +34,7 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct
     mc = nconstraints(constraints)
     constr_c = Array{T}(mc)
     constraints.c!(initial_x, constr_c)
-#    isfeasible(constraints, initial_x, constr_c) || error("initial guess must be feasible")
+    isinterior(constraints, initial_x, constr_c) || (warn("initial guess is not an interior point"); Base.show_backtrace(STDOUT, backtrace()))
 
     # Allocate fields for the objective function
     n = length(initial_x)
@@ -89,7 +89,6 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct
         stepf)
 
     d.h!(initial_x, state.H)
-    # state.μ = initialize_μ_λ!(bstate.λxE, bstate.λcE, constraints, initial_x, g, constr_c, constr_J, options.μ0)
     Hinfo = (state.H, hessianI(initial_x, constraints, 1./bstate.slack_c, 1))
     initialize_μ_λ!(state, constraints.bounds, Hinfo, options.μ0)
     update_fg!(d, constraints, state, method)
@@ -113,29 +112,27 @@ end
 function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton)
     x = state.x
     μ, Hxx, J = state.μ, state.H, state.constr_J
-    bounds = constraints.bounds
+    bstate, bounds = state.bstate, constraints.bounds
     m, n = size(J, 1), size(J, 2)
 
-    d.h!(state.x, Hxx)
-    hessianI!(Hxx, state.x, constraints, state.bstate.λc, μ)  # accumulate the inequality second derivatives
-    # Add the Jacobian terms (J'*S^{-2}*J)
+    d.h!(state.x, Hxx)  # objective's Hessian
+    hessianI!(Hxx, state.x, constraints, bstate.λc, μ)  # accumulate the inequality second derivatives
+    # Add the Jacobian terms (J'*Hss*J)
     JI = view5(J, bounds.ineqc, :)
-    Sinv2 = Diagonal(1./state.bstate.slack_c.^2)
-    HJ = JI'*Sinv2*JI
+    Hssc = Diagonal(bstate.λc./bstate.slack_c)
+    HJ = JI'*Hssc*JI
     for j = 1:n, i = 1:n
-        Hxx[i,j] += μ*HJ[i,j]
+        Hxx[i,j] += HJ[i,j]
     end
-    # Add the variable inequalities portions of J'*S^{-2}*J
-    # The iz terms are already in Hxx (from hessianI!)
-    ineqx, sx = bounds.ineqx, state.bstate.slack_x
-    for (i,j) in enumerate(ineqx)
-        Hxx[j,j] += μ/sx[i]^2
+    # Add the variable inequalities portions of J'*Hssx*J
+    for (i,j) in enumerate(bounds.ineqx)
+        Hxx[j,j] += bstate.λx[i]/bstate.slack_x[i]
     end
     # Perform a positive factorization
     Hpc, state.Hd = ldltfact(Positive, Hxx)
     Hp = full(Hpc)
     # Now add the equality constraint hessian terms
-    eqc, λcE = bounds.eqc, state.bstate.λcE
+    eqc, λcE = bounds.eqc, bstate.λcE
     λ = zeros(eltype(x), nconstraints(bounds))
     for i = 1:length(eqc)
         λ[eqc[i]] -= λcE[i]
@@ -156,9 +153,9 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state
                 -JEc Jod zeros(eltype(JEc), size(JEc,1), size(JEc,1))]
     # Also form the total gradient
     bgrad = state.bgrad
-    gI = state.g + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - μ*Sinv2*bgrad.λc)
-    for (i,j) in enumerate(ineqx)
-        gI[j] += bounds.σx[i]*(bgrad.slack_x[i] - μ*bgrad.λx[i]/sx[i]^2)
+    gI = state.g + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - Hssc*bgrad.λc)
+    for (i,j) in enumerate(bounds.ineqx)
+        gI[j] += -μ*bounds.σx[i]./bstate.slack_x[i] + bstate.λx[i]*(x[j]-bounds.bx[i])/bstate.slack_x[i]
     end
     state.gf = [gI;
                 bgrad.λxE;
@@ -169,7 +166,7 @@ end
 function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction, state::IPNewtonState{T}, method::IPNewton)
     state.f_x_previous, state.L_previous = state.f_x, state.L
     bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds
-    state, dslackc = solve_step!(state, constraints)
+    state = solve_step!(state, constraints)
     # If a step α=1 will not change any of the parameters, we can quit now.
     # This prevents a futile linesearch.
     if is_smaller_eps(state.x, state.s) &&
@@ -182,17 +179,16 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction
     qp = quadratic_parameters(bounds, state)
 
     # Estimate αmax, the upper bound on distance of movement along the search line
-    αmax = convert(eltype(bstate), Inf)
+    αmax = αImax = convert(eltype(bstate), Inf)
     αmax = estimate_maxstep(αmax, bstate.slack_x, bstep.slack_x)
     αmax = estimate_maxstep(αmax, bstate.slack_c, bstep.slack_c)
-    αmax = estimate_maxstep(αmax,
-                            view(state.x, bounds.iz).*bounds.σz,
-                            view(state.s, bounds.iz).*bounds.σz)
+    αImax = estimate_maxstep(αImax, bstate.λx, bstep.λx)
+    αImax = estimate_maxstep(αImax, bstate.λc, bstep.λc)
 
     # Determine the actual distance of movement along the search line
-    ϕ = α->lagrangian_linefunc!(α, d, constraints, state, method, dslackc)
-    state.alpha, f_update, g_update =
-        method.linesearch!(ϕ, T(1), αmax, qp)
+    ϕ = (α,αI)->lagrangian_linefunc!(α, αI, d, constraints, state, method)
+    state.alpha, αI, f_update, g_update =
+        method.linesearch!(ϕ, T(1), αmax, αImax, qp)
     state.f_calls, state.g_calls = state.f_calls + f_update, state.g_calls + g_update
 
     # Maintain a record of previous position
@@ -200,15 +196,12 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction
 
     # Update current position # x = x + alpha * s
     ls_update!(state.x, state.x, state.s, state.alpha)
-    ls_update!(bstate, state.constr_c, bstate, bstep, state.alpha, constraints, state, dslackc)
+    ls_update!(bstate, bstate, bstep, state.alpha, αI)
 
     # Evaluate the constraints at the new position
-#    constraints.c!(state.x, state.constr_c)  # already done in ls_update!
+    constraints.c!(state.x, state.constr_c)
     constraints.jacobian!(state.x, state.constr_J)
 
-    # Test for active inequalities, solve immediately for the corresponding s and λ
-    # solve_active_inequalities!(d, constraints, state)
-
     false
 end
 
@@ -227,19 +220,20 @@ function solve_step!(state::IPNewtonState, constraints)
     k = unpack_vec!(bstep.λcE, step, k)
     k == length(step) || error("exhausted targets before step")
     # Solve for the slack variable and λI updates
-    # These are only used to estimate αmax, otherwise these are updated by exact formulas
     for (i, j) in enumerate(bounds.ineqx)
         bstep.slack_x[i] = -bgrad.λx[i] + bounds.σx[i]*s[j]
-        bstep.λx[i] = -bgrad.slack_x[i] - μ*bstep.slack_x[i]/bstate.slack_x[i]^2
+        # bstep.λx[i] = -bgrad.slack_x[i] - μ*bstep.slack_x[i]/bstate.slack_x[i]^2
+        bstep.λx[i] = -bgrad.slack_x[i] - bstate.λx[i]*bstep.slack_x[i]/bstate.slack_x[i]
     end
     JI = view5(state.constr_J, bounds.ineqc, :)
-    dslackc = Diagonal(bounds.σc)*JI*s
-    bstep.slack_c[:] = -bgrad.λc + dslackc
+    SigmaJIΔx = Diagonal(bounds.σc)*(JI*state.s)
     for i = 1:length(bstep.λc)
-        bstep.λc[i] = -bgrad.slack_c[i] - μ*bstep.slack_c[i]/bstate.slack_c[i]^2
+        bstep.slack_c[i] = -bgrad.λc[i] + SigmaJIΔx[i]
+        # bstep.λc[i] = -bgrad.slack_c[i] - μ*bstep.slack_c[i]/bstate.slack_c[i]^2
+        bstep.λc[i] = -bgrad.slack_c[i] - bstate.λc[i]*bstep.slack_c[i]/bstate.slack_c[i]
     end
     state.stepf = step
-    state, dslackc
+    state
 end
 
 function is_smaller_eps(ref, step)
diff --git a/src/types.jl b/src/types.jl
index 06550fd74..e3a7c5ffd 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -291,9 +291,6 @@ immutable ConstraintBounds{T}
     ineqx::Vector{Int}  # index-vector of other inequality-constrained variables
     σx::Vector{Int8}    # ±1, in constraints σ(v-b) ≥ 0 (sign depends on whether v>b or v<b)
     bx::Vector{T}       # bound (upper or lower) on variable
-    iz::Vector{Int}     # index-vector of nonnegative or nonpositive variables
-    σz::Vector{Int8}    # ±1 depending on whether nonnegative or nonpositive
-    bz::Vector{T}       # all-zeros, convenience for evaluation of barrier penalty
     # Linear/nonlinear constraint functions and bounds
     eqc::Vector{Int}    # index-vector equality-constrained entries in c
     valc::Vector{T}     # value of the equality-constraint
@@ -306,7 +303,7 @@ function ConstraintBounds(lx, ux, lc, uc)
 end
 function _cb{Tx,Tc}(lx::AbstractArray{Tx}, ux::AbstractArray{Tx}, lc::AbstractVector{Tc}, uc::AbstractVector{Tc})
     T = promote_type(Tx,Tc)
-    ConstraintBounds{T}(length(lc), parse_constraints(T, lx, ux, true)..., parse_constraints(T, lc, uc)...)
+    ConstraintBounds{T}(length(lc), parse_constraints(T, lx, ux)..., parse_constraints(T, lc, uc)...)
 end
 
 Base.eltype{T}(::Type{ConstraintBounds{T}}) = T
@@ -330,12 +327,10 @@ The number of "meaningful" constraints (not `±Inf`) on the x coordinates.
 See also: nconstraints.
 """
 function nconstraints_x(cb::ConstraintBounds)
-    mz = isempty(cb.iz) ? 0 : maximum(cb.iz)
     mi = isempty(cb.ineqx) ? 0 : maximum(cb.ineqx)
     me = isempty(cb.eqx) ? 0 : maximum(cb.eqx)
-    nmax = max(mz, mi, me)
+    nmax = max(mi, me)
     hasconstraint = falses(nmax)
-    hasconstraint[cb.iz] = true
     hasconstraint[cb.ineqx] = true
     hasconstraint[cb.eqx] = true
     sum(hasconstraint)
@@ -347,7 +342,6 @@ function Base.show(io::IO, cb::ConstraintBounds)
     print(io, "\n  Variables:")
     showeq(io, indent, cb.eqx, cb.valx, 'x', :bracket)
     showineq(io, indent, cb.ineqx, cb.σx, cb.bx, 'x', :bracket)
-    showineq(io, indent, cb.iz, cb.σz, cb.bz, 'x', :bracket)
     print(io, "\n  Linear/nonlinear constraints:")
     showeq(io, indent, cb.eqc, cb.valc, 'c', :subscript)
     showineq(io, indent, cb.ineqc, cb.σc, cb.bc, 'c', :subscript)
@@ -443,7 +437,7 @@ function _symm(l, u)
 end
 
 """
-    parse_constraints(T, l, u, split_signed=false) -> eq, val, ineq, σ, b, [iz, σz, bz]
+    parse_constraints(T, l, u) -> eq, val, ineq, σ, b
 
 From user-supplied constraints of the form
 
@@ -457,13 +451,6 @@ when `l_i == u_i`), convert into the following representation:
     - `ineq`, `σ`, and `b` such that the inequality constraints can be written as
              σ[k]*(v[ineq[k]] - b[k]) ≥ 0
        where `σ[k] = ±1`.
-    - optionally (with `split_signed=true`), return an index-vector
-      `iz` of entries where one of `l`, `u` is zero, along with
-      whether the constraint is `≥ 0` (σz=+1) or `≤ 0` (σz=-1). Such
-      are removed from `ineq`, `σ`, and `b`. For coordinate variables
-      this can be used to reduce the number of slack variables needed,
-      since when one of the bounds is 0, the variable itself *is* a
-      slack variable.
 
 Note that since the same `v_i` might have both lower and upper bounds,
 `ineq` might have the same index twice (once with `σ`=-1 and once with `σ`=1).
@@ -474,11 +461,11 @@ corresponding entry in `ineq`/`σ`/`b`.
 
 T is the element-type of the non-Int outputs
 """
-function parse_constraints{T}(::Type{T}, l, u, split_signed::Bool=false)
+function parse_constraints{T}(::Type{T}, l, u)
     size(l) == size(u) || throw(DimensionMismatch("l and u must be the same size, got $(size(l)) and $(size(u))"))
-    eq, ineq, iz = Int[], Int[], Int[]
+    eq, ineq = Int[], Int[]
     val, b = T[], T[]
-    σ, σz = Array{Int8}(0), Array{Int8}(0)
+    σ = Array{Int8}(0)
     for i = 1:length(l)
         li, ui = l[i], u[i]
         li <= ui || throw(ArgumentError("l must be smaller than u, got $li, $ui"))
@@ -487,31 +474,18 @@ function parse_constraints{T}(::Type{T}, l, u, split_signed::Bool=false)
             push!(val, ui)
         else
             if isfinite(li)
-                if split_signed && li == 0
-                    push!(iz, i)
-                    push!(σz, 1)
-                else
-                    push!(ineq, i)
-                    push!(σ, 1)
-                    push!(b, li)
-                end
+                push!(ineq, i)
+                push!(σ, 1)
+                push!(b, li)
             end
             ui = u[i]
             if isfinite(ui)
-                if split_signed && ui == 0
-                    push!(iz, i)
-                    push!(σz, -1)
-                else
-                    push!(ineq, i)
-                    push!(σ, -1)
-                    push!(b, ui)
-                end
+                push!(ineq, i)
+                push!(σ, -1)
+                push!(b, ui)
             end
         end
     end
-    if split_signed
-        return eq, val, ineq, σ, b, iz, σz, zeros(T, length(iz))
-    end
     eq, val, ineq, σ, b
 end
 
diff --git a/test/constraints.jl b/test/constraints.jl
index 89916fc95..57f3d2b4a 100644
--- a/test/constraints.jl
+++ b/test/constraints.jl
@@ -28,11 +28,9 @@ end
         b = @inferred5(Optim.ConstraintBounds([0.0, 0.5, 2.0], [1.0, 1.0, 2.0], [5.0, 3.8], [5.0, 4.0]))
         @test b.eqx == [3]
         @test b.valx == [2.0]
-        @test b.ineqx == [1,2,2]
-        @test b.σx == [-1,1,-1]
-        @test b.bx == [1.0,0.5,1.0]
-        @test b.iz == [1]
-        @test b.σz == [1]
+        @test b.ineqx == [1,1,2,2]
+        @test b.σx == [1,-1,1,-1]
+        @test b.bx == [0.0,1.0,0.5,1.0]
         @test b.eqc == [1]
         @test b.valc == [5]
         @test b.ineqc == [2,2]
@@ -44,14 +42,13 @@ end
 ConstraintBounds:
   Variables:
     x[3]=2.0
-    x[1]≤1.0,x[2]≥0.5,x[2]≤1.0
-    x[1]≥0.0
+    x[1]≥0.0,x[1]≤1.0,x[2]≥0.5,x[2]≤1.0
   Linear/nonlinear constraints:
     c_1=5.0
     c_2≥3.8,c_2≤4.0"""
 
         b = @inferred5(Optim.ConstraintBounds(Float64[], Float64[], [5.0, 3.8], [5.0, 4.0]))
-        for fn in (:eqx, :valx, :ineqx, :σx, :bx, :iz, :σz)
+        for fn in (:eqx, :valx, :ineqx, :σx, :bx)
             @test isempty(getfield(b, fn))
         end
         @test b.eqc == [1]
@@ -85,10 +82,11 @@ ConstraintBounds:
             ForwardDiff.gradient!(pcmp, ftot, p, ForwardDiff.Chunk{chunksize}())
             @test pcmp ≈ pgrad
         end
-        # Basic setup
+        # Basic setup (using two objectives, one equal to zero and the other a Gaussian)
         μ = 0.2345678
+        d0 = TwiceDifferentiableFunction(x->0.0, (x,g)->fill!(g, 0), (x,h)->fill!(h,0))
         A = randn(3,3); H = A'*A
-        d = TwiceDifferentiableFunction(x->(x'*H*x)[1]/2, (x,g)->(g[:] = H*x), (x,h)->(h[:,:]=H))
+        dg = TwiceDifferentiableFunction(x->(x'*H*x)[1]/2, (x,g)->(g[:] = H*x), (x,h)->(h[:,:]=H))
         x = broadcast(clamp, randn(3), -0.99, 0.99)
         gx = similar(x)
         cfun = x->Float64[]
@@ -109,41 +107,40 @@ ConstraintBounds:
         bounds = Optim.ConstraintBounds(Float64[], Float64[], Float64[], Float64[])
         bstate = Optim.BarrierStateVars(bounds, x)
         bgrad = similar(bstate)
-        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ)
-        @test f_x == L == d.f(x)
+        f_x, L = Optim.lagrangian_fg!(gx, bgrad, dg, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ)
+        @test f_x == L == dg.f(x)
         @test gx == H*x
         constraints = TwiceDifferentiableConstraintsFunction(
             (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds)
-        state = Optim.initial_state(method, options, d, constraints, x)
+        state = Optim.initial_state(method, options, dg, constraints, x)
         @test state.gf ≈ gx
         @test state.Hf ≈ H
         ## Pure equality constraints on variables
-        d = TwiceDifferentiableFunction(x->0.0, (x,g)->fill!(g, 0), (x,h)->fill!(h,0))
         xbar = fill(0.2, length(x))
         bounds = Optim.ConstraintBounds(xbar, xbar, [], [])
         bstate = Optim.BarrierStateVars(bounds)
         rand!(bstate.λxE)
         bgrad = similar(bstate)
-        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ)
+        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d0, bounds, x, c, J, bstate, μ)
         @test f_x == 0
         @test L ≈ dot(bstate.λxE, xbar-x)
         @test gx == -bstate.λxE
         @test bgrad.λxE == xbar-x
-        check_autodiff(d, bounds, x, cfun, bstate, μ)
+        check_autodiff(d0, bounds, x, cfun, bstate, μ)
         constraints = TwiceDifferentiableConstraintsFunction(
             (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds)
-        state = Optim.initial_state(method, options, d, constraints, x)
+        state = Optim.initial_state(method, options, d0, constraints, x)
         copy!(state.bstate.λxE, bstate.λxE)
-        setstate!(state, μ, d, constraints, method)
+        setstate!(state, μ, d0, constraints, method)
         @test state.gf ≈ [gx; xbar-x]
         n = length(x)
         @test state.Hf ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)]
         # Now again using the generic machinery
         bounds = Optim.ConstraintBounds([], [], xbar, xbar)
         constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds)
-        state = Optim.initial_state(method, options, d, constraints, x)
+        state = Optim.initial_state(method, options, d0, constraints, x)
         copy!(state.bstate.λcE, bstate.λxE)
-        setstate!(state, μ, d, constraints, method)
+        setstate!(state, μ, d0, constraints, method)
         @test state.gf ≈ [gx; xbar-x]
         n = length(x)
         @test state.Hf ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)]
@@ -151,23 +148,25 @@ ConstraintBounds:
         bounds = Optim.ConstraintBounds(zeros(length(x)), fill(Inf,length(x)), [], [])
         y = rand(length(x))
         bstate = Optim.BarrierStateVars(bounds, y)
+        rand!(bstate.λx)
         bgrad = similar(bstate)
-        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, y, Float64[], Array{Float64}(0,0), bstate, μ)
+        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d0, bounds, y, Float64[], Array{Float64}(0,0), bstate, μ)
         @test f_x == 0
         @test L ≈ -μ*sum(log, y)
-        @test gx == -μ./y
-        check_autodiff(d, bounds, y, cfun, bstate, μ)
+        @test bgrad.slack_x == -μ./y + bstate.λx
+        @test gx == -bstate.λx
+        check_autodiff(d0, bounds, y, cfun, bstate, μ)
         constraints = TwiceDifferentiableConstraintsFunction(
             (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds)
-        state = Optim.initial_state(method, options, d, constraints, y)
-        setstate!(state, μ, d, constraints, method)
+        state = Optim.initial_state(method, options, d0, constraints, y)
+        setstate!(state, μ, d0, constraints, method)
         @test state.gf ≈ -μ./y
         @test state.Hf ≈ μ*Diagonal(1./y.^2)
         # Now again using the generic machinery
         bounds = Optim.ConstraintBounds([], [], zeros(length(x)), fill(Inf,length(x)))
         constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds)
-        state = Optim.initial_state(method, options, d, constraints, y)
-        setstate!(state, μ, d, constraints, method)
+        state = Optim.initial_state(method, options, d0, constraints, y)
+        setstate!(state, μ, d0, constraints, method)
         @test state.gf ≈ -μ./y
         @test state.Hf ≈ μ*Diagonal(1./y.^2)
         ## General inequality constraints on variables
@@ -177,7 +176,7 @@ ConstraintBounds:
         rand!(bstate.slack_x)  # intentionally displace from the correct value
         rand!(bstate.λx)
         bgrad = similar(bstate)
-        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ)
+        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d0, bounds, x, Float64[], Array{Float64}(0,0), bstate, μ)
         @test f_x == 0
         s = bounds.σx .* (x[bounds.ineqx] - bounds.bx)
         Ltarget = -μ*sum(log, bstate.slack_x) +
@@ -189,28 +188,34 @@ ConstraintBounds:
         end
         @test gx ≈ dx
         @test bgrad.slack_x == -μ./bstate.slack_x + bstate.λx
-        check_autodiff(d, bounds, x, cfun, bstate, μ)
+        check_autodiff(d0, bounds, x, cfun, bstate, μ)
         constraints = TwiceDifferentiableConstraintsFunction(
             (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds)
-        state = Optim.initial_state(method, options, d, constraints, x)
+        state = Optim.initial_state(method, options, d0, constraints, x)
         copy!(state.bstate.slack_x, bstate.slack_x)
         copy!(state.bstate.λx, bstate.λx)
-        setstate!(state, μ, d, constraints, method)
+        setstate!(state, μ, d0, constraints, method)
         gxs, hxs = zeros(length(x)), zeros(length(x))
-        s = state.bstate.slack_x
+        s, λ = state.bstate.slack_x, state.bstate.λx
         for (i,j) in enumerate(bounds.ineqx)
-            gxs[j] += -2*μ*bounds.σx[i]/s[i] + μ*(x[j]-bounds.bx[i])/s[i]^2
-            hxs[j] += μ/s[i]^2
+            # # Primal
+            # gxs[j] += -2*μ*bounds.σx[i]/s[i] + μ*(x[j]-bounds.bx[i])/s[i]^2
+            # hxs[j] += μ/s[i]^2
+            # Primal-dual
+            gstmp, gλtmp = -μ/s[i] + λ[i], s[i] - bounds.σx[i]*(x[j]-bounds.bx[i])
+            htmp = λ[i]/s[i]
+            hxs[j] += htmp
+            gxs[j] += bounds.σx[i]*(gstmp - λ[i]) - bounds.σx[i]*htmp*gλtmp
         end
         @test state.gf ≈ gxs
         @test state.Hf ≈ Diagonal(hxs)
         # Now again using the generic machinery
         bounds = Optim.ConstraintBounds([], [], lb, ub)
         constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds)
-        state = Optim.initial_state(method, options, d, constraints, x)
+        state = Optim.initial_state(method, options, d0, constraints, x)
         copy!(state.bstate.slack_c, bstate.slack_x)
         copy!(state.bstate.λc, bstate.λx)
-        setstate!(state, μ, d, constraints, method)
+        setstate!(state, μ, d0, constraints, method)
         @test state.gf ≈ gxs
         @test state.Hf ≈ Diagonal(hxs)
         ## Nonlinear equality constraints
@@ -231,28 +236,28 @@ ConstraintBounds:
         bstate = Optim.BarrierStateVars(bounds, x, c)
         rand!(bstate.λcE)
         bgrad = similar(bstate)
-        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ)
+        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d0, bounds, x, c, J, bstate, μ)
         @test f_x == 0
         @test L ≈ dot(bstate.λcE, cbar-c)
         @test gx ≈ -J'*bstate.λcE
         @test bgrad.λcE == cbar-c
-        check_autodiff(d, bounds, x, cfun, bstate, μ)
+        check_autodiff(d0, bounds, x, cfun, bstate, μ)
         constraints = TwiceDifferentiableConstraintsFunction(cfun!, cJ!, ch!, bounds)
-        state = Optim.initial_state(method, options, d, constraints, x)
+        state = Optim.initial_state(method, options, d0, constraints, x)
         copy!(state.bstate.λcE, bstate.λcE)
-        setstate!(state, μ, d, constraints, method)
+        setstate!(state, μ, d0, constraints, method)
         heq = zeros(length(x), length(x))
         ch!(x, bstate.λcE, heq)
         @test state.gf ≈ [gx; cbar-c]
         @test state.Hf ≈ [eye(length(x))-heq -J';
                           -J zeros(size(J,1), size(J,1))]
         ## Nonlinear inequality constraints
-        bounds = Optim.ConstraintBounds([], [], rand(length(c))-1, rand(length(c))+1)
+        bounds = Optim.ConstraintBounds([], [], -rand(length(c))-1, rand(length(c))+2)
         bstate = Optim.BarrierStateVars(bounds, x, c)
         rand!(bstate.slack_c)  # intentionally displace from the correct value
         rand!(bstate.λc)
         bgrad = similar(bstate)
-        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ)
+        f_x, L = Optim.lagrangian_fg!(gx, bgrad, d0, bounds, x, c, J, bstate, μ)
         @test f_x == 0
         Ltarget = -μ*sum(log, bstate.slack_c) +
             dot(bstate.λc, bstate.slack_c - bounds.σc.*(c[bounds.ineqc]-bounds.bc))
@@ -260,12 +265,12 @@ ConstraintBounds:
         @test gx ≈ -J[bounds.ineqc,:]'*(bstate.λc.*bounds.σc)
         @test bgrad.slack_c == -μ./bstate.slack_c + bstate.λc
         @test bgrad.λc == bstate.slack_c - bounds.σc .* (c[bounds.ineqc] - bounds.bc)
-        check_autodiff(d, bounds, x, cfun, bstate, μ)
+        check_autodiff(d0, bounds, x, cfun, bstate, μ)
         constraints = TwiceDifferentiableConstraintsFunction(cfun!, cJ!, ch!, bounds)
-        state = Optim.initial_state(method, options, d, constraints, x)
+        state = Optim.initial_state(method, options, d0, constraints, x)
         copy!(state.bstate.slack_c, bstate.slack_c)
         copy!(state.bstate.λc, bstate.λc)
-        setstate!(state, μ, d, constraints, method)
+        setstate!(state, μ, d0, constraints, method)
         hineq = zeros(length(x), length(x))
         λ = zeros(size(J, 1))
         for (i,j) in enumerate(bounds.ineqc)
@@ -273,9 +278,14 @@ ConstraintBounds:
         end
         ch!(x, λ, hineq)
         JI = J[bounds.ineqc,:]
-        hxx = μ*JI'*Diagonal(1./bstate.slack_c.^2)*JI - hineq
+        # # Primal
+        # hxx = μ*JI'*Diagonal(1./bstate.slack_c.^2)*JI - hineq
+        # gf = -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - μ(bgrad.λc ./ bstate.slack_c.^2))
+        # Primal-dual
+        hxx = JI'*Diagonal(bstate.λc./bstate.slack_c)*JI - hineq
+        gf = -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - (bgrad.λc .* bstate.λc ./ bstate.slack_c))
         hp = full(cholfact(Positive, hxx))
-        @test state.gf ≈ -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - μ*(bgrad.λc ./ bstate.slack_c.^2))
+        @test state.gf ≈ gf
         @test state.Hf ≈ hp
     end
 
@@ -364,25 +374,24 @@ ConstraintBounds:
         F = 1000
         d = TwiceDifferentiableFunction(x->F*x[1], (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0))
         method = Optim.IPNewton()
-        options = OptimizationOptions()
         μ = 1e-20
+        options = OptimizationOptions(μ0=μ)
         x0 = μ/F*10  # minimum is at μ/F
         # Nonnegativity (the case that doesn't require slack variables)
         constraints = TwiceDifferentiableConstraintsFunction([0.0], [])
         state = Optim.initial_state(method, options, d, constraints, [x0])
-        setstate!(state, μ, d, constraints, method)
         Optim.solve_step!(state, constraints)
-        @test state.s[1] ≈ x0 - F*x0^2/μ
+        @test state.s[1] ≈ -(F-μ/x0)/(state.bstate.λx[1]/x0)
         qp = Optim.quadratic_parameters(constraints.bounds, state)
         @test qp[1] ≈ F*x0-μ*log(x0)
         @test qp[2] ≈ -(F-μ/x0)^2*x0^2/μ
         @test qp[3] ≈ μ/x0^2*(x0 - F*x0^2/μ)^2
         bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds
-        αmax = Optim.estimate_maxstep(Inf, state.x[bounds.iz].*bounds.σz,
-                                      state.s[bounds.iz].*bounds.σz)
-        ϕ = α->Optim.lagrangian_linefunc(α, d, constraints, state, Float64[])
-        @test ϕ(0) ≈ qp[1]
-        α, nf, ng = method.linesearch!(ϕ, 1.0, αmax, qp)
+        αmax = Optim.estimate_maxstep(Inf, state.x[bounds.ineqx].*bounds.σx,
+                                           state.s[bounds.ineqx].*bounds.σx)
+        ϕ = (α,αI)->Optim.lagrangian_linefunc(α, αI, d, constraints, state)
+        @test ϕ(0,0) ≈ qp[1]
+        α, nf, ng = method.linesearch!(ϕ, 1.0, αmax, Inf, qp)
         @test α > 1e-3
     end
 
@@ -397,15 +406,14 @@ ConstraintBounds:
         # boundary).
         F0 = 1000
         method = Optim.IPNewton()
-        options = OptimizationOptions()
         μ = 1e-20   # smaller than eps(1.0)
+        options = OptimizationOptions(μ0=μ)
         for σ in (1, -1)
             F = σ*F0
             # Nonnegativity/nonpositivity (the case that doesn't require slack variables)
             d = TwiceDifferentiableFunction(x->F*x[1], (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0))
             constraints = TwiceDifferentiableConstraintsFunction(σswap(σ, [0.0], [])...)
             state = Optim.initial_state(method, options, d, constraints, [μ/F*10])
-            setstate!(state, μ, d, constraints, method)
             for i = 1:10
                 Optim.update_state!(d, constraints, state, method)
                 Optim.update_fg!(d, constraints, state, method)
@@ -416,7 +424,6 @@ ConstraintBounds:
             d = TwiceDifferentiableFunction(x->F*(x[1]-σ), (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0))
             constraints = TwiceDifferentiableConstraintsFunction(σswap(σ, [Float64(σ)], [])...)
             state = Optim.initial_state(method, options, d, constraints, [(1+eps(1.0))*σ])
-            setstate!(state, μ, d, constraints, method)
             for i = 1:10
                 Optim.update_state!(d, constraints, state, method)
                 Optim.update_fg!(d, constraints, state, method)
@@ -424,7 +431,7 @@ ConstraintBounds:
             end
             @test state.x[1] == σ
             @test state.bstate.slack_x[1] < eps(float(σ))
-            # x >= 1 using the linear/nonlinear constraints
+            # |x| >= 1 using the linear/nonlinear constraints
             d = TwiceDifferentiableFunction(x->F*(x[1]-σ), (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0))
             constraints = TwiceDifferentiableConstraintsFunction(
                 (x,c)->(c[1] = x[1]),
@@ -432,13 +439,12 @@ ConstraintBounds:
                 (x,λ,h)->nothing,
                 [], [], σswap(σ, [Float64(σ)], [])...)
             state = Optim.initial_state(method, options, d, constraints, [(1+eps(1.0))*σ])
-            setstate!(state, μ, d, constraints, method)
             for i = 1:10
                 Optim.update_state!(d, constraints, state, method)
                 Optim.update_fg!(d, constraints, state, method)
                 Optim.update_h!(d, constraints, state, method)
             end
-            @test state.x[1] == σ
+            @test state.x[1] ≈ σ
             @test state.bstate.slack_c[1] < eps(float(σ))
         end
     end

From dcfe788c2fc775c967b64acc82fe85e5e983c89f Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Sat, 19 Nov 2016 09:37:59 -0600
Subject: [PATCH 25/40] Start Optim.ConstrainedProblems

---
 src/Optim.jl                  |  4 +++-
 src/problems/constrained.jl   | 42 +++++++++++++++++++++++++++++++++++
 src/problems/multivariate.jl  | 27 ++++++++++++++++++++++
 src/problems/unconstrained.jl | 13 ++---------
 4 files changed, 74 insertions(+), 12 deletions(-)
 create mode 100644 src/problems/constrained.jl
 create mode 100644 src/problems/multivariate.jl

diff --git a/src/Optim.jl b/src/Optim.jl
index 130ac822f..c3efdf6c9 100644
--- a/src/Optim.jl
+++ b/src/Optim.jl
@@ -19,6 +19,7 @@ module Optim
     export optimize,
            isfeasible,
            isinterior,
+           nconstraints,
            DifferentiableFunction,
            TwiceDifferentiableFunction,
            DifferentiableConstraintsFunction,
@@ -110,8 +111,9 @@ module Optim
     include("utilities/trace.jl")
 
     # Examples for testing
-    include(joinpath("problems", "unconstrained.jl"))
+    include(joinpath("problems", "multivariate.jl"))
     include(joinpath("problems", "univariate.jl"))
+    using .MultivariateProblems
 
     cgdescent(args...) = error("API has changed. Please use cg.")
 end
diff --git a/src/problems/constrained.jl b/src/problems/constrained.jl
new file mode 100644
index 000000000..9fc1ca70b
--- /dev/null
+++ b/src/problems/constrained.jl
@@ -0,0 +1,42 @@
+module ConstrainedProblems
+
+using ..OptimizationProblem, ...TwiceDifferentiableConstraintsFunction
+
+examples = Dict{AbstractString, OptimizationProblem}()
+
+hs9_obj(x::AbstractVector) = sin(π*x[1]/12) * cos(π*x[2]/16)
+hs9_c!(x::AbstractVector, c::AbstractVector) = (c[1] = 4*x[1]-3*x[2]; c)
+
+function hs9_obj_g!(x::AbstractVector, g::AbstractVector)
+    g[1] = π/12 * cos(π*x[1]/12) * cos(π*x[2]/16)
+    g[2] = -π/16 * sin(π*x[1]/12) * sin(π*x[2]/16)
+    g
+end
+function hs9_obj_h!(x::AbstractVector, h::AbstractMatrix)
+    v = hs9_obj(x)
+    h[1,1] = -π^2*v/144
+    h[2,2] = -π^2*v/256
+    h[1,2] = h[2,1] = -π^2 * cos(π*x[1]/12) * sin(π*x[2]/16) / 192
+    h
+end
+
+function hs9_jacobian!(x, J)
+    J[1,1] = 4
+    J[1,2] = -3
+    J
+end
+hs9_h!(x, λ, h) = h
+
+examples["HS9"] = OptimizationProblem("HS9",
+                                      hs9_obj,
+                                      hs9_obj_g!,
+                                      hs9_obj_h!,
+                                      TwiceDifferentiableConstraintsFunction(
+                                          hs9_c!, hs9_jacobian!, hs9_h!,
+                                          [], [], [0.0], [0.0]),
+                                      [0.0, 0.0],
+                                      [[12k-3, 16k-4] for k in (0, 1, -1)], # any integer k will do...
+                                      true,
+                                      true)
+
+end  # module
diff --git a/src/problems/multivariate.jl b/src/problems/multivariate.jl
new file mode 100644
index 000000000..31d8fb729
--- /dev/null
+++ b/src/problems/multivariate.jl
@@ -0,0 +1,27 @@
+module MultivariateProblems
+
+export UnconstrainedProblems, ConstrainedProblems
+
+immutable OptimizationProblem
+    name::AbstractString
+    f::Function
+    g!::Function
+    h!::Function
+    constraints
+    initial_x::Vector{Float64}
+    solutions::Vector
+    isdifferentiable::Bool
+    istwicedifferentiable::Bool
+end
+
+function OptimizationProblem(name, f, g!, h!,
+                             initial_x::AbstractVector, solutions,
+                             isdifferentiable::Bool, istwicedifferentiable::Bool)
+    OptimizationProblem(name, f, g!, h!, nothing,
+                        initial_x, solutions, isdifferentiable, istwicedifferentiable)
+end
+
+include("unconstrained.jl")
+include("constrained.jl")
+
+end
diff --git a/src/problems/unconstrained.jl b/src/problems/unconstrained.jl
index d5d1ff62d..431ae0a53 100644
--- a/src/problems/unconstrained.jl
+++ b/src/problems/unconstrained.jl
@@ -1,5 +1,7 @@
 module UnconstrainedProblems
 
+using ..OptimizationProblem
+
 ### Sources
 ###
 ### [1] Ali, Khompatraporn, & Zabinsky: A Numerical Evaluation of Several Stochastic Algorithms on Selected Continuous Global Optimization Test
@@ -7,17 +9,6 @@ module UnconstrainedProblems
 ###
 ### [2] Fletcher & Powell: A rapidly convergent descent method for minimization,
 
-immutable OptimizationProblem
-    name::AbstractString
-    f::Function
-    g!::Function
-    h!::Function
-    initial_x::Vector{Float64}
-    solutions::Vector
-    isdifferentiable::Bool
-    istwicedifferentiable::Bool
-end
-
 examples = Dict{AbstractString, OptimizationProblem}()
 
 ##########################################################################

From fd8f0d09d89618b0440eecff17368840d2121acc Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Sun, 20 Nov 2016 14:10:02 -0600
Subject: [PATCH 26/40] Trace the total equality violation

---
 src/interior.jl        | 44 ++++++++++++++++++++++--------------------
 src/ipnewton.jl        |  5 +++--
 src/types.jl           | 10 +++++-----
 src/utilities/trace.jl |  3 ++-
 4 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index 0e0cc4b14..5e88e5c13 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -167,11 +167,16 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai
     iteration, iterationμ = 0, 0
 
     options.show_trace && print_header(method)
-    trace!(tr, state, iteration, method, options)
 
     Δfmax = zero(state.f_x)
 
     while !converged && !stopped && iteration < options.iterations
+        # If tracing, update trace with trace!. If a callback is provided, it
+        # should have boolean return value that controls the variable stopped_by_callback.
+        # This allows for early stopping controlled by the callback.
+        if tracing
+            stopped_by_callback = trace!(tr, state, iteration, method, options)
+        end
         iteration += 1
         iterationμ += 1
 
@@ -190,13 +195,6 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai
         converged = x_converged | g_converged | (counter_f_tol > options.successive_f_tol)
         gnormnew = vecnorm(state.g, Inf) + vecnorm(state.bgrad, Inf)
 
-        # If tracing, update trace with trace!. If a callback is provided, it
-        # should have boolean return value that controls the variable stopped_by_callback.
-        # This allows for early stopping controlled by the callback.
-        if tracing
-            stopped_by_callback = trace!(tr, state, iteration, method, options)
-        end
-
         Δf = abs(state.f_x - state.f_x_previous)
         if iterationμ > 1
             Δfmax = max(Δfmax, abs(state.f_x - state.f_x_previous))
@@ -228,6 +226,10 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai
         stopped = stopped_by_callback || stopped_by_time_limit ? true : false
     end # while
 
+    if tracing
+        trace!(tr, state, iteration, method, options)
+    end
+
     after_while!(d, constraints, state, method, options)
 
     return MultivariateOptimizationResults(state.method_string,
@@ -428,9 +430,9 @@ userλ(λcI, constraints) = userλ(λcI, constraints.bounds)
 
 function lagrangian(d, bounds::ConstraintBounds, x, c, bstate::BarrierStateVars, μ)
     f_x = d.f(x)
-    L_xsλ = f_x + barrier_value(bounds, x, bstate, μ) +
-            equality_violation(bounds, x, c, bstate)
-    f_x, L_xsλ
+    ev = equality_violation(bounds, x, c, bstate)
+    L_xsλ = f_x + barrier_value(bounds, x, bstate, μ) + ev
+    f_x, L_xsλ, ev
 end
 
 function lagrangian_g!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ)
@@ -444,28 +446,28 @@ end
 function lagrangian_fg!(gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ)
     fill!(bgrad, 0)
     f_x = d.fg!(x, gx)
-    L_xsλ = f_x + barrier_value(bounds, x, bstate, μ) +
-        equality_violation(bounds, x, c, bstate)
+    ev = equality_violation(bounds, x, c, bstate)
+    L_xsλ = f_x + barrier_value(bounds, x, bstate, μ) + ev
     barrier_grad!(gx, bgrad, bounds, x, bstate, μ)
     equality_grad!(gx, bgrad, bounds, x, c, J, bstate)
-    f_x, L_xsλ
+    f_x, L_xsλ, ev
 end
 
 ## Computation of Lagrangian and derivatives when passing all parameters as a single vector
 function lagrangian_vec(p, d, bounds::ConstraintBounds, x, c::AbstractArray, bstate::BarrierStateVars, μ)
     unpack_vec!(x, bstate, p)
-    f_x, L_xsλ = lagrangian(d, bounds, x, c, bstate, μ)
+    f_x, L_xsλ, ev = lagrangian(d, bounds, x, c, bstate, μ)
     L_xsλ
 end
 function lagrangian_vec(p, d, bounds::ConstraintBounds, x, c::Function, bstate::BarrierStateVars, μ)
     # Use this version when using automatic differentiation
     unpack_vec!(x, bstate, p)
-    f_x, L_xsλ = lagrangian(d, bounds, x, c(x), bstate, μ)
+    f_x, L_xsλ, ev = lagrangian(d, bounds, x, c(x), bstate, μ)
     L_xsλ
 end
 function lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds::ConstraintBounds, x, c, J, bstate::BarrierStateVars, μ)
     unpack_vec!(x, bstate, p)
-    f_x, L_xsλ = lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ)
+    f_x, L_xsλ, ev = lagrangian_fg!(gx, bgrad, d, bounds, x, c, J, bstate, μ)
     pack_vec!(storage, gx, bgrad)
     L_xsλ
 end
@@ -486,10 +488,8 @@ end
 function lagrangian_linefunc!(α, αI, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)})
     # For backtrack_constrained, the last evaluation is the one we
     # keep, so it's safe to store the results in state
-    f_x, L = _lagrangian_linefunc(α, αI, d, constraints, state)
-    state.f_x = f_x
-    state.L = L
-    L
+    state.f_x, state.L, state.ev = _lagrangian_linefunc(α, αI, d, constraints, state)
+    state.L
 end
 lagrangian_linefunc!(α, αI, d, constraints, state, method) = lagrangian_linefunc(α, αI, d, constraints, state)
 
@@ -711,6 +711,7 @@ function isfeasible(constraints, x)
     isfeasible(constraints, x, c)
 end
 isfeasible(constraints::AbstractConstraintsFunction, x, c) = isfeasible(constraints.bounds, x, c)
+isfeasible(constraints::Void, x) = true
 
 """
     isinterior(constraints, state) -> Bool
@@ -744,6 +745,7 @@ function isinterior(constraints, x)
     isinterior(constraints, x, c)
 end
 isinterior(constraints::AbstractConstraintsFunction, x, c) = isinterior(constraints.bounds, x, c)
+isinterior(constraints::Void, x) = true
 
 ## Utilities for representing total state as single vector
 function pack_vec(x, b::BarrierStateVars)
diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index 6028f9b54..4a05b98da 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -22,6 +22,7 @@ type IPNewtonState{T,N} <: AbstractBarrierState
     bstep::BarrierStateVars{T}    # search direction for slack and λ
     constr_c::Vector{T}   # value of the user-supplied constraints at x
     constr_J::Matrix{T}   # value of the user-supplied Jacobian at x
+    ev::T                 # equality violation, ∑_i λ_Ei (c*_i - c_i)
     @add_linesearch_fields()
     b_ls::BarrierLineSearch{T}
     gf::Vector{T}
@@ -82,6 +83,7 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct
         bstep,
         constr_c,
         constr_J,
+        T(NaN),
         @initial_linesearch()..., # Maintain a cache for line search results in state.lsr
         b_ls,
         gf,
@@ -96,8 +98,7 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct
 end
 
 function update_fg!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton)
-    f_x, L = lagrangian_fg!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ)
-    state.f_x, state.L = f_x, L
+    state.f_x, state.L, state.ev = lagrangian_fg!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ)
     state.f_calls += 1
     state.g_calls += 1
     state
diff --git a/src/types.jl b/src/types.jl
index e3a7c5ffd..72a2ffcb2 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -57,7 +57,7 @@ function print_header(method::Optimizer)
 end
 
 function print_header(method::IPOptimizer)
-        @printf "Iter     Lagrangian value Function value   Gradient norm    μ\n"
+        @printf "Iter     Lagrangian value Function value   Gradient norm    |==constr.|      μ\n"
 end
 
 immutable OptimizationState{T <: Optimizer}
@@ -134,10 +134,10 @@ end
 
 function Base.show{M<:IPOptimizer}(io::IO, t::OptimizationState{M})
     md = t.metadata
-    @printf io "%6d   %-14e   %-14e   %-14e   %-6.2e\n" t.iteration md["Lagrangian"] t.value t.g_norm md["μ"]
+    @printf io "%6d   %-14e   %-14e   %-14e   %-14e   %-6.2e\n" t.iteration md["Lagrangian"] t.value t.g_norm md["ev"] md["μ"]
     if !isempty(t.metadata)
         for (key, value) in md
-            key ∈ ("Lagrangian", "μ") && continue
+            key ∈ ("Lagrangian", "μ", "ev") && continue
             @printf io " * %s: %s\n" key value
         end
     end
@@ -154,8 +154,8 @@ function Base.show(io::IO, tr::OptimizationTrace)
 end
 
 function Base.show{M<:IPOptimizer}(io::IO, tr::OptimizationTrace{M})
-    @printf io "Iter     Lagrangian value Function value   Gradient norm    μ\n"
-    @printf io "------   ---------------- --------------   --------------   --------\n"
+    @printf io "Iter     Lagrangian value Function value   Gradient norm    |==constr.|      μ\n"
+    @printf io "------   ---------------- --------------   --------------   --------------   --------\n"
     for state in tr
         show(io, state)
     end
diff --git a/src/utilities/trace.jl b/src/utilities/trace.jl
index bb574745c..4f08ae506 100644
--- a/src/utilities/trace.jl
+++ b/src/utilities/trace.jl
@@ -119,6 +119,7 @@ function trace!(tr, state, iteration, method::IPOptimizer, options)
     dt = Dict()
     dt["Lagrangian"] = state.L
     dt["μ"] = state.μ
+    dt["ev"] = state.ev
     if options.extended_trace
         dt["α"] = state.alpha
         dt["x"] = copy(state.x)
@@ -130,7 +131,7 @@ function trace!(tr, state, iteration, method::IPOptimizer, options)
         dt["bgrad"] = copy(state.bgrad)
         dt["c"] = copy(state.constr_c)
     end
-    g_norm = vecnorm(state.gf, Inf)
+    g_norm = vecnorm(state.g, Inf) + vecnorm(state.bgrad, Inf)
     update!(tr,
             iteration,
             state.f_x,

From 9715589fbdb859eb0196234d585cb3a153f0172f Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Sun, 20 Nov 2016 14:53:31 -0600
Subject: [PATCH 27/40] Store less state for IPNewton update

---
 src/iplinesearch.jl                 |   1 +
 src/ipnewton.jl                     | 129 ++++++++++++++--------------
 src/utilities/assess_convergence.jl |   4 +-
 src/utilities/trace.jl              |   5 +-
 test/constraints.jl                 |  38 ++++----
 5 files changed, 91 insertions(+), 86 deletions(-)

diff --git a/src/iplinesearch.jl b/src/iplinesearch.jl
index a195f9ea6..681d7682b 100644
--- a/src/iplinesearch.jl
+++ b/src/iplinesearch.jl
@@ -12,5 +12,6 @@ function backtrack_constrained(ϕ, α, αmax, αImax, Lcoefsα,
         α *= ρ
         αI *= ρ
     end
+    ϕ(zero(α), zero(αI))  # to ensure that state gets set appropriately
     return zero(α), zero(αI), f_calls, 0
 end
diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index 4a05b98da..9d52649af 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -25,9 +25,7 @@ type IPNewtonState{T,N} <: AbstractBarrierState
     ev::T                 # equality violation, ∑_i λ_Ei (c*_i - c_i)
     @add_linesearch_fields()
     b_ls::BarrierLineSearch{T}
-    gf::Vector{T}
-    Hf::Matrix{T}
-    stepf::Vector{T}
+    gtilde::Vector{T}
 end
 
 function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunction, constraints::TwiceDifferentiableConstraintsFunction, initial_x::Array{T})
@@ -52,9 +50,7 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct
     # More constraints
     constr_J = Array{T}(mc, n)
     constr_gtemp = Array{T}(n)
-    gf = Array{T}(0)    # will be replaced
-    Hf = Array{T}(0,0)  #   "
-    stepf = Array{T}(0)
+    gtilde = similar(g)
     constraints.jacobian!(initial_x, constr_J)
     μ = T(1)
     bstate = BarrierStateVars(constraints.bounds, initial_x, constr_c)
@@ -86,9 +82,7 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct
         T(NaN),
         @initial_linesearch()..., # Maintain a cache for line search results in state.lsr
         b_ls,
-        gf,
-        Hf,
-        stepf)
+        gtilde)
 
     d.h!(initial_x, state.H)
     Hinfo = (state.H, hessianI(initial_x, constraints, 1./bstate.slack_c, 1))
@@ -101,27 +95,49 @@ function update_fg!(d, constraints::TwiceDifferentiableConstraintsFunction, stat
     state.f_x, state.L, state.ev = lagrangian_fg!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ)
     state.f_calls += 1
     state.g_calls += 1
-    state
+    update_gtilde!(d, constraints, state, method)
 end
 
 function update_g!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton)
     lagrangian_g!(state.g, state.bgrad, d, constraints.bounds, state.x, state.constr_c, state.constr_J, state.bstate, state.μ)
     state.g_calls += 1
+    update_gtilde!(d, constraints, state, method)
+end
+
+function update_gtilde!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton)
+    # Calculate the modified x-gradient for the block-eliminated problem
+    gtilde, bstate, bgrad = state.gtilde, state.bstate, state.bgrad
+    bounds = constraints.bounds
+    copy!(gtilde, state.g)
+    JIc = view5(state.constr_J, bounds.ineqc, :)
+    if !isempty(JIc)
+        Hssc = Diagonal(bstate.λc./bstate.slack_c)
+        gc = JIc'*(Diagonal(bounds.σc) * (bgrad.slack_c - Hssc*bgrad.λc))
+        for i = 1:length(gtilde)
+            gtilde[i] += gc[i]
+        end
+    end
+    for (i,j) in enumerate(bounds.ineqx)
+        gxi = bounds.σx[i]*(bgrad.slack_x[i] -  bgrad.λx[i]*bstate.λx[i]/bstate.slack_x[i])
+        gtilde[j] += gxi
+    end
     state
 end
 
 function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton)
-    x = state.x
-    μ, Hxx, J = state.μ, state.H, state.constr_J
-    bstate, bounds = state.bstate, constraints.bounds
+    x, μ, Hxx, J = state.x, state.μ, state.H, state.constr_J
+    bstate, bgrad, bounds = state.bstate, state.bgrad, constraints.bounds
     m, n = size(J, 1), size(J, 2)
 
     d.h!(state.x, Hxx)  # objective's Hessian
-    hessianI!(Hxx, state.x, constraints, bstate.λc, μ)  # accumulate the inequality second derivatives
+    # accumulate the constraint second derivatives
+    λ = userλ(bstate.λc, constraints)
+    λ[bounds.eqc] = -bstate.λcE  # the negative sign is from the Hessian
+    constraints.h!(x, λ, Hxx)
     # Add the Jacobian terms (J'*Hss*J)
-    JI = view5(J, bounds.ineqc, :)
+    JIc = view5(J, bounds.ineqc, :)
     Hssc = Diagonal(bstate.λc./bstate.slack_c)
-    HJ = JI'*Hssc*JI
+    HJ = JIc'*Hssc*JIc
     for j = 1:n, i = 1:n
         Hxx[i,j] += HJ[i,j]
     end
@@ -129,38 +145,7 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state
     for (i,j) in enumerate(bounds.ineqx)
         Hxx[j,j] += bstate.λx[i]/bstate.slack_x[i]
     end
-    # Perform a positive factorization
-    Hpc, state.Hd = ldltfact(Positive, Hxx)
-    Hp = full(Hpc)
-    # Now add the equality constraint hessian terms
-    eqc, λcE = bounds.eqc, bstate.λcE
-    λ = zeros(eltype(x), nconstraints(bounds))
-    for i = 1:length(eqc)
-        λ[eqc[i]] -= λcE[i]
-    end
-    constraints.h!(state.x, λ, Hp)
-    # Also add these to Hxx so we have the true Hessian (the one
-    # without forcing positive-definiteness)
-    constraints.h!(state.x, λ, Hxx)
-    # Form the total Hessian
-    JEx = zeros(eltype(bounds), length(bounds.eqx), length(state.x))
-    for (i,j) in enumerate(bounds.eqx)
-        JEx[i,j] = 1
-    end
-    JEc = view5(J, eqc, :)
-    Jod = zeros(eltype(JEx), size(JEc, 1), size(JEx, 1))
-    state.Hf = [Hp -JEx' -JEc';
-                -JEx zeros(eltype(JEx), size(JEx,1), size(JEx,1)) Jod';
-                -JEc Jod zeros(eltype(JEc), size(JEc,1), size(JEc,1))]
-    # Also form the total gradient
-    bgrad = state.bgrad
-    gI = state.g + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - Hssc*bgrad.λc)
-    for (i,j) in enumerate(bounds.ineqx)
-        gI[j] += -μ*bounds.σx[i]./bstate.slack_x[i] + bstate.λx[i]*(x[j]-bounds.bx[i])/bstate.slack_x[i]
-    end
-    state.gf = [gI;
-                bgrad.λxE;
-                bgrad.λcE]
+
     state
 end
 
@@ -202,38 +187,42 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction
     # Evaluate the constraints at the new position
     constraints.c!(state.x, state.constr_c)
     constraints.jacobian!(state.x, state.constr_J)
+    @assert state.ev == equality_violation(constraints, state)
 
     false
 end
 
 function solve_step!(state::IPNewtonState, constraints)
-    # Solve the Newton step
-    local step
-    try
-        step = -(state.Hf\state.gf)  # do *not* force posdef
-    catch
-        step = -(svdfact(state.Hf)\state.gf)
-    end
     x, s, μ, bounds = state.x, state.s, state.μ, constraints.bounds
     bstate, bstep, bgrad = state.bstate, state.bstep, state.bgrad
-    k = unpack_vec!(s, step, 0)
-    k = unpack_vec!(bstep.λxE, step, k)
-    k = unpack_vec!(bstep.λcE, step, k)
-    k == length(step) || error("exhausted targets before step")
+    # Solve the Newton step
+    Hxx = state.H
+    JE = jacobianE(state, bounds)
+    # Q, R, p = qr(JE', Val{true})
+    gE = [bgrad.λxE;
+          bgrad.λcE]
+    HxxF = cholfact(Positive, Hxx, Val{true})
+    M = JE*(HxxF \ JE')
+    MF = cholfact(Positive, M, Val{true})
+    ΔλE = MF \ (gE + JE * (HxxF \ state.gtilde))
+    Δx = HxxF \ (JE'*ΔλE - state.gtilde)
+    copy!(s, Δx)
+    k = unpack_vec!(bstep.λxE, ΔλE, 0)
+    k = unpack_vec!(bstep.λcE, ΔλE, k)
+    k == length(ΔλE) || error("exhausted targets before ΔλE")
     # Solve for the slack variable and λI updates
     for (i, j) in enumerate(bounds.ineqx)
         bstep.slack_x[i] = -bgrad.λx[i] + bounds.σx[i]*s[j]
         # bstep.λx[i] = -bgrad.slack_x[i] - μ*bstep.slack_x[i]/bstate.slack_x[i]^2
         bstep.λx[i] = -bgrad.slack_x[i] - bstate.λx[i]*bstep.slack_x[i]/bstate.slack_x[i]
     end
-    JI = view5(state.constr_J, bounds.ineqc, :)
-    SigmaJIΔx = Diagonal(bounds.σc)*(JI*state.s)
+    JIc = view5(state.constr_J, bounds.ineqc, :)
+    SigmaJIΔx = Diagonal(bounds.σc)*(JIc*state.s)
     for i = 1:length(bstep.λc)
         bstep.slack_c[i] = -bgrad.λc[i] + SigmaJIΔx[i]
         # bstep.λc[i] = -bgrad.slack_c[i] - μ*bstep.slack_c[i]/bstate.slack_c[i]^2
         bstep.λc[i] = -bgrad.slack_c[i] - bstate.λc[i]*bstep.slack_c[i]/bstate.slack_c[i]
     end
-    state.stepf = step
     state
 end
 
@@ -246,7 +235,9 @@ function is_smaller_eps(ref, step)
 end
 
 function quadratic_parameters(bounds::ConstraintBounds, state::IPNewtonState)
-    slope = dot(state.stepf, state.gf)
+    slope = dot(state.s, state.gtilde) +
+        dot(state.bstep.λxE, state.bgrad.λxE) +
+        dot(state.bstep.λcE, state.bgrad.λcE)
     # For the curvature, use the original hessian (before forcing
     # positive-definiteness)
     q = dot(state.s, state.H*state.s)
@@ -254,3 +245,15 @@ function quadratic_parameters(bounds::ConstraintBounds, state::IPNewtonState)
     q -= 2*dot(state.s[bounds.eqx], state.bstep.λxE) + 2*dot(state.s, JE'*state.bstep.λcE)
     state.L, slope, q
 end
+
+# Utility functions that assist in testing: they return the "full
+# Hessian" and "full gradient" for the equation with the slack and λI
+# eliminated.
+function Hf(bounds::ConstraintBounds, state)
+    JE = jacobianE(state, bounds)
+    HxxF = cholfact(Positive, state.H)
+    Hf = [full(HxxF) -JE';
+          -JE zeros(eltype(JE), size(JE, 1), size(JE, 1))]
+end
+Hf(constraints, state) = Hf(constraints.bounds, state)
+gf(state) = [state.gtilde; state.bgrad.λxE; state.bgrad.λcE]
diff --git a/src/utilities/assess_convergence.jl b/src/utilities/assess_convergence.jl
index b11800254..c2551ce55 100644
--- a/src/utilities/assess_convergence.jl
+++ b/src/utilities/assess_convergence.jl
@@ -86,11 +86,13 @@ function assess_convergence(state::NewtonTrustRegionState, options)
 end
 
 function assess_convergence(state::IPNewtonState, options)
+    # We use the whole bstate-gradient `bgrad`
+    bgrad = state.bgrad
     assess_convergence(state.x,
                        state.x_previous,
                        state.L,
                        state.L_previous,
-                       state.gf,
+                       [state.g; bgrad.slack_x; bgrad.slack_c; bgrad.λx; bgrad.λc; bgrad.λxE; bgrad.λcE],
                        options.x_tol,
                        options.f_tol,
                        options.g_tol)
diff --git a/src/utilities/trace.jl b/src/utilities/trace.jl
index 4f08ae506..1ad4139dc 100644
--- a/src/utilities/trace.jl
+++ b/src/utilities/trace.jl
@@ -119,14 +119,13 @@ function trace!(tr, state, iteration, method::IPOptimizer, options)
     dt = Dict()
     dt["Lagrangian"] = state.L
     dt["μ"] = state.μ
-    dt["ev"] = state.ev
+    dt["ev"] = abs(state.ev)
     if options.extended_trace
         dt["α"] = state.alpha
         dt["x"] = copy(state.x)
         dt["g(x)"] = copy(state.g)
-        dt["gf(x)"] = copy(state.gf)
+        dt["gtilde(x)"] = copy(state.gtilde)
         dt["h(x)"] = copy(state.H)
-        dt["hf(x)"] = copy(state.Hf)
         dt["bstate"] = copy(state.bstate)
         dt["bgrad"] = copy(state.bgrad)
         dt["c"] = copy(state.constr_c)
diff --git a/test/constraints.jl b/test/constraints.jl
index 57f3d2b4a..497cad4a4 100644
--- a/test/constraints.jl
+++ b/test/constraints.jl
@@ -113,8 +113,8 @@ ConstraintBounds:
         constraints = TwiceDifferentiableConstraintsFunction(
             (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds)
         state = Optim.initial_state(method, options, dg, constraints, x)
-        @test state.gf ≈ gx
-        @test state.Hf ≈ H
+        @test Optim.gf(state) ≈ gx
+        @test Optim.Hf(constraints, state) ≈ H
         ## Pure equality constraints on variables
         xbar = fill(0.2, length(x))
         bounds = Optim.ConstraintBounds(xbar, xbar, [], [])
@@ -132,18 +132,18 @@ ConstraintBounds:
         state = Optim.initial_state(method, options, d0, constraints, x)
         copy!(state.bstate.λxE, bstate.λxE)
         setstate!(state, μ, d0, constraints, method)
-        @test state.gf ≈ [gx; xbar-x]
+        @test Optim.gf(state) ≈ [gx; xbar-x]
         n = length(x)
-        @test state.Hf ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)]
+        @test Optim.Hf(constraints, state) ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)]
         # Now again using the generic machinery
         bounds = Optim.ConstraintBounds([], [], xbar, xbar)
         constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds)
         state = Optim.initial_state(method, options, d0, constraints, x)
         copy!(state.bstate.λcE, bstate.λxE)
         setstate!(state, μ, d0, constraints, method)
-        @test state.gf ≈ [gx; xbar-x]
+        @test Optim.gf(state) ≈ [gx; xbar-x]
         n = length(x)
-        @test state.Hf ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)]
+        @test Optim.Hf(constraints, state) ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)]
         ## Nonnegativity constraints
         bounds = Optim.ConstraintBounds(zeros(length(x)), fill(Inf,length(x)), [], [])
         y = rand(length(x))
@@ -160,15 +160,15 @@ ConstraintBounds:
             (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds)
         state = Optim.initial_state(method, options, d0, constraints, y)
         setstate!(state, μ, d0, constraints, method)
-        @test state.gf ≈ -μ./y
-        @test state.Hf ≈ μ*Diagonal(1./y.^2)
+        @test Optim.gf(state) ≈ -μ./y
+        @test Optim.Hf(constraints, state) ≈ μ*Diagonal(1./y.^2)
         # Now again using the generic machinery
         bounds = Optim.ConstraintBounds([], [], zeros(length(x)), fill(Inf,length(x)))
         constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds)
         state = Optim.initial_state(method, options, d0, constraints, y)
         setstate!(state, μ, d0, constraints, method)
-        @test state.gf ≈ -μ./y
-        @test state.Hf ≈ μ*Diagonal(1./y.^2)
+        @test Optim.gf(state) ≈ -μ./y
+        @test Optim.Hf(constraints, state) ≈ μ*Diagonal(1./y.^2)
         ## General inequality constraints on variables
         lb, ub = rand(length(x))-2, rand(length(x))+1
         bounds = Optim.ConstraintBounds(lb, ub, [], [])
@@ -207,8 +207,8 @@ ConstraintBounds:
             hxs[j] += htmp
             gxs[j] += bounds.σx[i]*(gstmp - λ[i]) - bounds.σx[i]*htmp*gλtmp
         end
-        @test state.gf ≈ gxs
-        @test state.Hf ≈ Diagonal(hxs)
+        @test Optim.gf(state) ≈ gxs
+        @test Optim.Hf(constraints, state) ≈ Diagonal(hxs)
         # Now again using the generic machinery
         bounds = Optim.ConstraintBounds([], [], lb, ub)
         constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds)
@@ -216,8 +216,8 @@ ConstraintBounds:
         copy!(state.bstate.slack_c, bstate.slack_x)
         copy!(state.bstate.λc, bstate.λx)
         setstate!(state, μ, d0, constraints, method)
-        @test state.gf ≈ gxs
-        @test state.Hf ≈ Diagonal(hxs)
+        @test Optim.gf(state) ≈ gxs
+        @test Optim.Hf(constraints, state) ≈ Diagonal(hxs)
         ## Nonlinear equality constraints
         cfun = x->[x[1]^2+x[2]^2, x[2]*x[3]^2]
         cfun! = (x, c) -> copy!(c, cfun(x))
@@ -248,9 +248,9 @@ ConstraintBounds:
         setstate!(state, μ, d0, constraints, method)
         heq = zeros(length(x), length(x))
         ch!(x, bstate.λcE, heq)
-        @test state.gf ≈ [gx; cbar-c]
-        @test state.Hf ≈ [eye(length(x))-heq -J';
-                          -J zeros(size(J,1), size(J,1))]
+        @test Optim.gf(state) ≈ [gx; cbar-c]
+        @test Optim.Hf(constraints, state) ≈ [heq -J';
+                                              -J zeros(size(J,1), size(J,1))]
         ## Nonlinear inequality constraints
         bounds = Optim.ConstraintBounds([], [], -rand(length(c))-1, rand(length(c))+2)
         bstate = Optim.BarrierStateVars(bounds, x, c)
@@ -285,8 +285,8 @@ ConstraintBounds:
         hxx = JI'*Diagonal(bstate.λc./bstate.slack_c)*JI - hineq
         gf = -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - (bgrad.λc .* bstate.λc ./ bstate.slack_c))
         hp = full(cholfact(Positive, hxx))
-        @test state.gf ≈ gf
-        @test state.Hf ≈ hp
+        @test Optim.gf(state) ≈ gf
+        @test Optim.Hf(constraints, state) ≈ hp
     end
 
     @testset "IPNewton initialization" begin

From 3772033c148b3bbe409a8f3e05c6de57a94f0d53 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Mon, 21 Nov 2016 11:12:35 -0600
Subject: [PATCH 28/40] Implement some convert methods needed to leverage
 ForwardDiff

---
 src/interior.jl     | 18 +++++++++++++++++-
 src/ipnewton.jl     | 33 +++++++++++++++++++++++++++++++++
 test/constraints.jl | 13 +++++++++++++
 3 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/src/interior.jl b/src/interior.jl
index 5e88e5c13..939a64e09 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -69,7 +69,6 @@ Base.copy(bstate::BarrierStateVars) =
                      copy(bstate.λxE),
                      copy(bstate.λcE))
 
-
 function Base.fill!(b::BarrierStateVars, val)
     fill!(b.slack_x, val)
     fill!(b.slack_c, val)
@@ -80,6 +79,14 @@ function Base.fill!(b::BarrierStateVars, val)
     b
 end
 
+Base.convert{T}(::Type{BarrierStateVars{T}}, bstate::BarrierStateVars) =
+    BarrierStateVars(convert(Array{T}, bstate.slack_x),
+                     convert(Array{T}, bstate.slack_c),
+                     convert(Array{T}, bstate.λx),
+                     convert(Array{T}, bstate.λc),
+                     convert(Array{T}, bstate.λxE),
+                     convert(Array{T}, bstate.λcE))
+
 Base.eltype{T}(::Type{BarrierStateVars{T}}) = T
 Base.eltype(sv::BarrierStateVars) = eltype(typeof(sv))
 
@@ -127,6 +134,9 @@ immutable BarrierLineSearch{T}
     c::Vector{T}                  # value of constraints-functions at trial point
     bstate::BarrierStateVars{T}   # trial point for slack and λ variables
 end
+Base.convert{T}(::Type{BarrierLineSearch{T}}, bsl::BarrierLineSearch) =
+    BarrierLineSearch(convert(Vector{T}, bsl.c),
+                      convert(BarrierStateVars{T}, bsl.bstate))
 
 """
     BarrierLineSearchGrad{T}
@@ -139,6 +149,12 @@ immutable BarrierLineSearchGrad{T}
     bstate::BarrierStateVars{T}   # trial point for slack and λ variables
     bgrad::BarrierStateVars{T}    # trial point's gradient
 end
+Base.convert{T}(::Type{BarrierLineSearchGrad{T}}, bsl::BarrierLineSearchGrad) =
+    BarrierLineSearchGrad(convert(Vector{T}, bsl.c),
+                          convert(Matrix{T}, bsl.J),
+                          convert(BarrierStateVars{T}, bsl.bstate),
+                          convert(BarrierStateVars{T}, bsl.bgrad))
+
 
 function ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, α, αI)
     ls_update!(out.slack_x, base.slack_x, step.slack_x, α)
diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index 9d52649af..08f14162c 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -28,6 +28,39 @@ type IPNewtonState{T,N} <: AbstractBarrierState
     gtilde::Vector{T}
 end
 
+function Base.convert{T,S,N}(::Type{IPNewtonState{T,N}}, state::IPNewtonState{S,N})
+    IPNewtonState(state.method_string,
+                  state.n,
+                  convert(Array{T}, state.x),
+                  T(state.f_x),
+                  state.f_calls,
+                  state.g_calls,
+                  state.h_calls,
+                  convert(Array{T}, state.x_previous),
+                  convert(Array{T}, state.g),
+                  T(state.f_x_previous),
+                  convert(Array{T}, state.H),
+                  state.Hd,
+                  convert(Array{T}, state.s),
+                  T(state.μ),
+                  T(state.L),
+                  T(state.L_previous),
+                  convert(BarrierStateVars{T}, state.bstate),
+                  convert(BarrierStateVars{T}, state.bgrad),
+                  convert(BarrierStateVars{T}, state.bstep),
+                  convert(Array{T}, state.constr_c),
+                  convert(Array{T}, state.constr_J),
+                  T(state.ev),
+                  convert(Array{T}, state.x_ls),
+                  convert(Array{T}, state.g_ls),
+                  T(state.alpha),
+                  state.mayterminate,
+                  state.lsr,
+                  convert(BarrierLineSearchGrad{T}, state.b_ls),
+                  convert(Array{T}, state.gtilde)
+                  )
+end
+
 function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunction, constraints::TwiceDifferentiableConstraintsFunction, initial_x::Array{T})
     # Check feasibility of the initial state
     mc = nconstraints(constraints)
diff --git a/test/constraints.jl b/test/constraints.jl
index 497cad4a4..28d0bbc0e 100644
--- a/test/constraints.jl
+++ b/test/constraints.jl
@@ -371,6 +371,19 @@ ConstraintBounds:
     end
 
     @testset "IPNewton step" begin
+        function autoqp(d, constraints, state)
+            # Note that state must be fully up-to-date, and you must
+            # have also called Optim.solve_step!
+            p = Optim.pack_vec(state.x, state.bstate)
+            chunksize = min(8, max(length(p), 4))  # since αs is of length 4
+            TD = ForwardDiff.Dual{chunksize,eltype(p)}
+            TD2 = ForwardDiff.Dual{chunksize,ForwardDiff.Dual{chunksize,eltype(p)}}
+            stated = convert(Optim.IPNewtonState{TD,1}, state)
+            stated2 = convert(Optim.IPNewtonState{TD2,1}, state)
+            ϕd = αs->Optim.lagrangian_linefunc(αs, d, constraints, stated)
+            ϕd2 = αs->Optim.lagrangian_linefunc(αs, d, constraints, stated2)
+            ForwardDiff.gradient(ϕd, zeros(4)), ForwardDiff.hessian(ϕd2, zeros(4))
+        end
         F = 1000
         d = TwiceDifferentiableFunction(x->F*x[1], (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0))
         method = Optim.IPNewton()

From d627e367f45602ffe7c63788a54c1ab5cee6064a Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Mon, 21 Nov 2016 12:38:40 -0600
Subject: [PATCH 29/40] =?UTF-8?q?Support=20multi-parameter=20=CE=B1=20in?=
 =?UTF-8?q?=20linesearch,=20check=20slope=20during=20linesearch,=20and=20f?=
 =?UTF-8?q?ix=20quadratic=20parameters?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since the step size for the inequality λ may not be the same as the step size for the rest of the parameters, we need a more sophisticated model of the function's behavior during linesearch. This commit implements a 4-parameter vector (αx, αs, αI, αE) for changes to the position, slack, inequality LM, and equality LM respectively (LM=lagrange multiplier). Consequently the "slope" becomes a 4-parameter vector and the "curvature" a 4x4 matrix.


In practice this helps convergence on some "hard" problems (e.g., HATFLDF).
---
 src/interior.jl     | 74 ++++++++++++++++++++++++++++++++++++---------
 src/iplinesearch.jl | 48 ++++++++++++++++++++++++++---
 src/ipnewton.jl     | 58 ++++++++++++++++++++++++++---------
 test/constraints.jl | 15 +++++----
 4 files changed, 156 insertions(+), 39 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index 939a64e09..3082a8e08 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -155,16 +155,21 @@ Base.convert{T}(::Type{BarrierLineSearchGrad{T}}, bsl::BarrierLineSearchGrad) =
                           convert(BarrierStateVars{T}, bsl.bstate),
                           convert(BarrierStateVars{T}, bsl.bgrad))
 
-
-function ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, α, αI)
-    ls_update!(out.slack_x, base.slack_x, step.slack_x, α)
-    ls_update!(out.slack_c, base.slack_c, step.slack_c, α)
-    ls_update!(out.λxE, base.λxE, step.λxE, α)
-    ls_update!(out.λcE, base.λcE, step.λcE, α)
-    ls_update!(out.λx, base.λx, step.λx, αI)
-    ls_update!(out.λc, base.λc, step.λc, αI)
+function ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, αs::NTuple{4,Number})
+    ls_update!(out.slack_x, base.slack_x, step.slack_x, αs[2])
+    ls_update!(out.slack_c, base.slack_c, step.slack_c, αs[2])
+    ls_update!(out.λx, base.λx, step.λx, αs[3])
+    ls_update!(out.λc, base.λc, step.λc, αs[3])
+    ls_update!(out.λxE, base.λxE, step.λxE, αs[4])
+    ls_update!(out.λcE, base.λcE, step.λcE, αs[4])
     out
 end
+ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, αs::Tuple{Number,Number}) =
+    ls_update!(out, base, step, (αs[1],αs[1],αs[2],αs[1]))
+ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, α::Number) =
+    ls_update!(out, base, step, (α,α,α,α))
+ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, αs::AbstractVector) =
+    ls_update!(out, base, step, (αs...,))
 
 function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constraints::AbstractConstraintsFunction, initial_x::Array{T}, method::M, options::OptimizationOptions)
     t0 = time() # Initial time stamp used to control early stopping by options.time_limit
@@ -488,18 +493,20 @@ function lagrangian_fgvec!(p, storage, gx, bgrad, d, bounds::ConstraintBounds, x
     L_xsλ
 end
 
-# for line searches that don't use the gradient along the line
-function lagrangian_linefunc(α, αI, d, constraints, state)
-    _lagrangian_linefunc(α, αI, d, constraints, state)[2]
+## for line searches that don't use the gradient along the line
+function lagrangian_linefunc(αs, d, constraints, state)
+    _lagrangian_linefunc(αs, d, constraints, state)[2]
 end
 
-function _lagrangian_linefunc(α, αI, d, constraints, state)
+function _lagrangian_linefunc(αs, d, constraints, state)
     b_ls, bounds = state.b_ls, constraints.bounds
-    ls_update!(state.x_ls, state.x, state.s, α)
-    ls_update!(b_ls.bstate, state.bstate, state.bstep, α, αI)
+    ls_update!(state.x_ls, state.x, state.s, alphax(αs))
+    ls_update!(b_ls.bstate, state.bstate, state.bstep, αs)
     constraints.c!(state.x_ls, b_ls.c)
     lagrangian(d, constraints.bounds, state.x_ls, b_ls.c, b_ls.bstate, state.μ)
 end
+alphax(α::Number) = α
+alphax(αs::Union{Tuple,AbstractVector}) = αs[1]
 
 function lagrangian_linefunc!(α, αI, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)})
     # For backtrack_constrained, the last evaluation is the one we
@@ -509,6 +516,45 @@ function lagrangian_linefunc!(α, αI, d, constraints, state, method::IPOptimize
 end
 lagrangian_linefunc!(α, αI, d, constraints, state, method) = lagrangian_linefunc(α, αI, d, constraints, state)
 
+
+## for line searches that do use the gradient along the line
+function lagrangian_lineslope(αs, d, constraints, state)
+    f_x, L, ev, slope = _lagrangian_lineslope(αs, d, constraints, state)
+    L, slope
+end
+
+function _lagrangian_lineslope(αs, d, constraints, state)
+    b_ls, bounds = state.b_ls, constraints.bounds
+    bstep, bgrad = state.bstep, b_ls.bgrad
+    ls_update!(state.x_ls, state.x, state.s, alphax(αs))
+    ls_update!(b_ls.bstate, state.bstate, bstep, αs)
+    constraints.c!(state.x_ls, b_ls.c)
+    constraints.jacobian!(state.x_ls, b_ls.J)
+    f_x, L, ev = lagrangian_fg!(state.g_ls, bgrad, d, bounds, state.x_ls, b_ls.c, b_ls.J, b_ls.bstate, state.μ)
+    slopeα = slopealpha(state.s, state.g_ls, bstep, bgrad)
+    f_x, L, ev, slopeα
+end
+
+function lagrangian_lineslope!(αs, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained_grad)})
+    # For backtrack_constrained, the last evaluation is the one we
+    # keep, so it's safe to store the results in state
+    state.f_x, state.L, state.ev, slope = _lagrangian_lineslope(αs, d, constraints, state)
+     state.L, slope
+end
+lagrangian_lineslope!(αs, d, constraints, state, method) = lagrangian_lineslope(αs, d, constraints, state)
+
+slopealpha(sx, gx, bstep, bgrad) = [dot(sx, gx),
+                                    dot(bstep.slack_x, bgrad.slack_x) + dot(bstep.slack_c, bgrad.slack_c),
+                                    dot(bstep.λx, bgrad.λx) + dot(bstep.λc, bgrad.λc),
+                                    dot(bstep.λxE, bgrad.λxE) + dot(bstep.λcE, bgrad.λcE)]
+
+function linesearch_anon(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained_grad)})
+    αs->lagrangian_lineslope!(αs, d, constraints, state, method)
+end
+function linesearch_anon(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)})
+    αs->lagrangian_linefunc!(αs, d, constraints, state, method)
+end
+
 ## Computation of Lagrangian terms: barrier penalty
 """
     barrier_value(constraints, state) -> val
diff --git a/src/iplinesearch.jl b/src/iplinesearch.jl
index 681d7682b..881377a2c 100644
--- a/src/iplinesearch.jl
+++ b/src/iplinesearch.jl
@@ -1,17 +1,57 @@
 function backtrack_constrained(ϕ, α, αmax, αImax, Lcoefsα,
-                               c1 = 0.5, ρ=oftype(α, 0.5), αmin = sqrt(eps(one(α))))
+                               c1 = 0.5, ρ=oftype(α, 0.5), αminfrac = sqrt(eps(one(α))))
     α, αI = min(α, 0.999*αmax), min(α, 0.999*αImax)
+    αmin = αminfrac * α
     L0, L1, L2 = Lcoefsα
     f_calls = 0
     while α >= αmin
         f_calls += 1
-        val = ϕ(α, αI)
-        if isfinite(val) && abs(val - (L0 + L1*α + L2*α^2/2)) <= c1*abs(val-L0)
+        val = ϕ((α, αI))
+        δ = evalgrad(L1, α, αI)
+        if isfinite(val) && abs(val - (L0 + δ)) <= c1*abs(val-L0)
             return α, αI, f_calls, 0
         end
         α *= ρ
         αI *= ρ
     end
-    ϕ(zero(α), zero(αI))  # to ensure that state gets set appropriately
+    ϕ((zero(α), zero(αI)))  # to ensure that state gets set appropriately
     return zero(α), zero(αI), f_calls, 0
 end
+
+function backtrack_constrained_grad(ϕ, α, αmax, αImax, Lcoefsα,
+                                    c1 = 0.9, c2 = 0.9, ρ=oftype(α, 0.5), αminfrac = sqrt(eps(one(α))))
+    α, αI = min(α, 0.999*αmax), min(α, 0.999*αImax)
+    αmin = αminfrac * α
+    L0, L1, L2 = Lcoefsα
+    f_calls = 0
+    while α >= αmin
+        f_calls += 1
+        val, slopeα = ϕ((α, αI))
+        δval = evalgrad(L1, α, αI) + evalhess(L2, α, αI)/2
+        δslope = mulhess(L2, α, αI)
+        # r0, r1 = abs(val - (L0 + δval)) / (c1*abs(val-L0)), norm(slopeα - (L1 + δslope))/(c2*norm(slopeα-L1))
+        # @show (α, αI, r0, r1)
+        if isfinite(val) && abs(val - (L0 + δval)) <= c1*abs(val-L0) &&
+                            norm(slopeα - (L1 + δslope)) <= c2*norm(slopeα-L1)
+            return α, αI, f_calls, f_calls
+        end
+        α *= ρ
+        αI *= ρ
+    end
+    ϕ((zero(α), zero(αI)))  # to ensure that state gets set appropriately
+    return zero(α), zero(αI), f_calls, f_calls
+end
+
+# Evaluate for a step parametrized as [α, α, αI, α]
+function evalgrad(slopeα, α, αI)
+    α*(slopeα[1] + slopeα[2] + slopeα[4]) + αI*slopeα[3]
+end
+
+function mulhess(Hα, α, αI)
+    αv = [α, α, αI, α]
+    Hα*αv
+end
+function evalhess(Hα, α, αI)
+    αv = [α, α, αI, α]
+    dot(αv, Hα*αv)
+end
diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index 08f14162c..9672c755a 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -2,7 +2,7 @@ immutable IPNewton{F} <: IPOptimizer{F}
     linesearch!::F
 end
 
-IPNewton(; linesearch!::Function = backtrack_constrained) =
+IPNewton(; linesearch!::Function = backtrack_constrained_grad) =
   IPNewton(linesearch!)
 
 type IPNewtonState{T,N} <: AbstractBarrierState
@@ -24,7 +24,7 @@ type IPNewtonState{T,N} <: AbstractBarrierState
     constr_J::Matrix{T}   # value of the user-supplied Jacobian at x
     ev::T                 # equality violation, ∑_i λ_Ei (c*_i - c_i)
     @add_linesearch_fields()
-    b_ls::BarrierLineSearch{T}
+    b_ls::BarrierLineSearchGrad{T}
     gtilde::Vector{T}
 end
 
@@ -89,7 +89,8 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct
     bstate = BarrierStateVars(constraints.bounds, initial_x, constr_c)
     bgrad = similar(bstate)
     bstep = similar(bstate)
-    b_ls = BarrierLineSearch(similar(constr_c), similar(bstate))
+    # b_ls = BarrierLineSearch(similar(constr_c), similar(bstate))
+    b_ls = BarrierLineSearchGrad(similar(constr_c), similar(constr_J), similar(bstate), similar(bstate))
 
     state = IPNewtonState("Interior-point Newton's Method",
         length(initial_x),
@@ -167,7 +168,7 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state
     λ = userλ(bstate.λc, constraints)
     λ[bounds.eqc] = -bstate.λcE  # the negative sign is from the Hessian
     constraints.h!(x, λ, Hxx)
-    # Add the Jacobian terms (J'*Hss*J)
+    # Add the Jacobian terms (JI'*Hss*JI)
     JIc = view5(J, bounds.ineqc, :)
     Hssc = Diagonal(bstate.λc./bstate.slack_c)
     HJ = JIc'*Hssc*JIc
@@ -205,7 +206,7 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction
     αImax = estimate_maxstep(αImax, bstate.λc, bstep.λc)
 
     # Determine the actual distance of movement along the search line
-    ϕ = (α,αI)->lagrangian_linefunc!(α, αI, d, constraints, state, method)
+    ϕ = linesearch_anon(d, constraints, state, method)
     state.alpha, αI, f_update, g_update =
         method.linesearch!(ϕ, T(1), αmax, αImax, qp)
     state.f_calls, state.g_calls = state.f_calls + f_update, state.g_calls + g_update
@@ -215,7 +216,7 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction
 
     # Update current position # x = x + alpha * s
     ls_update!(state.x, state.x, state.s, state.alpha)
-    ls_update!(bstate, bstate, bstep, state.alpha, αI)
+    ls_update!(bstate, bstate, bstep, (state.alpha, αI))
 
     # Evaluate the constraints at the new position
     constraints.c!(state.x, state.constr_c)
@@ -267,16 +268,43 @@ function is_smaller_eps(ref, step)
     ise
 end
 
+"""
+    quadratic_parameters(bounds, state) -> val, slopeα, Hα
+
+Return the parameters for the quadratic fit of the behavior of the
+lagrangian for positions parametrized as a function of the 4-vector
+`α = (αx, αs, αI, αE)`, where the step is
+
+    (αx * Δx, αs * Δs, αI * ΔλI, αE * ΔλE)
+
+and `Δx`, `Δs`, `ΔλI`, and `ΔλE` are the current search directions in
+the parameters. As a function of `α`, the local model is expressed as
+
+    val + dot(α, slopeα) + (α'*Hα*α)/2
+"""
 function quadratic_parameters(bounds::ConstraintBounds, state::IPNewtonState)
-    slope = dot(state.s, state.gtilde) +
-        dot(state.bstep.λxE, state.bgrad.λxE) +
-        dot(state.bstep.λcE, state.bgrad.λcE)
-    # For the curvature, use the original hessian (before forcing
-    # positive-definiteness)
-    q = dot(state.s, state.H*state.s)
-    JE = view5(state.constr_J, bounds.eqc, :)
-    q -= 2*dot(state.s[bounds.eqx], state.bstep.λxE) + 2*dot(state.s, JE'*state.bstep.λcE)
-    state.L, slope, q
+    bstate, bstep, bgrad = state.bstate, state.bstep, state.bgrad
+    slopeα = slopealpha(state.s, state.g, bstep, bgrad)
+    # For the curvature, use the original hessian (before adding the JI'*Hss*JI term)
+    # This undoes the dual correction. However, for linesearch we need
+    # primal, so calculate both.
+    jic = view5(state.constr_J, bounds.ineqc, :)*state.s
+    HsscD = Diagonal(bstate.λc./bstate.slack_c)
+    HsscP = Diagonal(state.μ./bstate.slack_c.^2)
+    jix = view(state.s, bounds.ineqx)
+    HssxD = Diagonal(bstate.λx./bstate.slack_x)
+    HssxP = Diagonal(state.μ./bstate.slack_x.^2)
+    jHj = dot(jic, HsscD*jic) + dot(jix, HssxD*jix)
+    ji = dot(bstep.λc, Diagonal(bounds.σc)*jic) + dot(bstep.λx, Diagonal(bounds.σx)*jix)
+    je = dot(bstep.λcE, view5(state.constr_J, bounds.eqc, :)*state.s) +
+         dot(bstep.λxE, view(state.s, bounds.eqx))
+    hss = dot(bstep.slack_c, HsscP*bstep.slack_c) + dot(bstep.slack_x, HssxP*bstep.slack_x)
+    si = dot(bstep.slack_c, bstep.λc) + dot(bstep.slack_x, bstep.λx)
+    Hα = [state.s'*state.H*state.s - jHj 0    -ji   -je;
+          0                              hss  si    0;
+          -ji                            si   0     0;
+          -je                            0    0     0]
+    state.L, slopeα, Hα
 end
 
 # Utility functions that assist in testing: they return the "full
diff --git a/test/constraints.jl b/test/constraints.jl
index 28d0bbc0e..abc48c0cf 100644
--- a/test/constraints.jl
+++ b/test/constraints.jl
@@ -249,7 +249,7 @@ ConstraintBounds:
         heq = zeros(length(x), length(x))
         ch!(x, bstate.λcE, heq)
         @test Optim.gf(state) ≈ [gx; cbar-c]
-        @test Optim.Hf(constraints, state) ≈ [heq -J';
+        @test Optim.Hf(constraints, state) ≈ [full(cholfact(Positive, heq)) -J';
                                               -J zeros(size(J,1), size(J,1))]
         ## Nonlinear inequality constraints
         bounds = Optim.ConstraintBounds([], [], -rand(length(c))-1, rand(length(c))+2)
@@ -396,15 +396,18 @@ ConstraintBounds:
         Optim.solve_step!(state, constraints)
         @test state.s[1] ≈ -(F-μ/x0)/(state.bstate.λx[1]/x0)
         qp = Optim.quadratic_parameters(constraints.bounds, state)
+        g0, H0 = autoqp(d, constraints, state)
         @test qp[1] ≈ F*x0-μ*log(x0)
-        @test qp[2] ≈ -(F-μ/x0)^2*x0^2/μ
-        @test qp[3] ≈ μ/x0^2*(x0 - F*x0^2/μ)^2
+        @test qp[2] ≈ g0 #-(F-μ/x0)^2*x0^2/μ
+        @test qp[3] ≈ H0 # μ/x0^2*(x0 - F*x0^2/μ)^2
         bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds
         αmax = Optim.estimate_maxstep(Inf, state.x[bounds.ineqx].*bounds.σx,
                                            state.s[bounds.ineqx].*bounds.σx)
-        ϕ = (α,αI)->Optim.lagrangian_linefunc(α, αI, d, constraints, state)
-        @test ϕ(0,0) ≈ qp[1]
-        α, nf, ng = method.linesearch!(ϕ, 1.0, αmax, Inf, qp)
+        ϕ = Optim.linesearch_anon(d, constraints, state, method)
+        val0 = ϕ((0,0))
+        val0 = isa(val0, Tuple) ? val0[1] : val0
+        @test val0 ≈ qp[1]
+        α, αI, nf, ng = method.linesearch!(ϕ, 1.0, αmax, Inf, qp)
         @test α > 1e-3
     end
 

From b91eac6bce67eeeef3e61d564b8a189adb60ea49 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Mon, 21 Nov 2016 12:38:56 -0600
Subject: [PATCH 30/40] Check that solution has enough precision to count

---
 src/ipnewton.jl | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index 9672c755a..2a654f386 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -240,6 +240,13 @@ function solve_step!(state::IPNewtonState, constraints)
     MF = cholfact(Positive, M, Val{true})
     ΔλE = MF \ (gE + JE * (HxxF \ state.gtilde))
     Δx = HxxF \ (JE'*ΔλE - state.gtilde)
+    if norm(gE) < norm(gE - JE*Δx) # ||
+        # norm(state.gtilde) < norm(full(HxxF)*Δx - JE'*ΔλE + state.gtilde)
+        # Precision problems gave us a worse solution than the one we started with, abort
+        fill!(s, 0)
+        fill!(bstep, 0)
+        return state
+    end
     copy!(s, Δx)
     k = unpack_vec!(bstep.λxE, ΔλE, 0)
     k = unpack_vec!(bstep.λcE, ΔλE, k)

From 69bd212cd807c1ea3ab7c19a99862e4fa11c2fba Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Tue, 22 Nov 2016 08:26:58 -0600
Subject: [PATCH 31/40] Add the Beale unconstrained problem

This one revealed problems with the interior-point Newton method, specifically the linesearch
---
 src/problems/unconstrained.jl | 63 +++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/src/problems/unconstrained.jl b/src/problems/unconstrained.jl
index 431ae0a53..fae520350 100644
--- a/src/problems/unconstrained.jl
+++ b/src/problems/unconstrained.jl
@@ -8,6 +8,8 @@ using ..OptimizationProblem
 ### Link: www.researchgate.net/profile/Montaz_Ali/publication/226654862_A_Numerical_Evaluation_of_Several_Stochastic_Algorithms_on_Selected_Continuous_Global_Optimization_Test_Problems/links/00b4952bef133a1a6b000000.pdf
 ###
 ### [2] Fletcher & Powell: A rapidly convergent descent method for minimization,
+###
+### [3] More, Garbow, Hillstrom (1981): Testing Unconstrained Optimization Software, ACM Trans. Math. Soft. 7: 17-41.
 
 examples = Dict{AbstractString, OptimizationProblem}()
 
@@ -349,4 +351,65 @@ examples["Rosenbrock"] = OptimizationProblem("Rosenbrock",
                                              true,
                                              true)
 
+##########################################################################
+###
+### Beale (2D)
+###
+### Problem 5 in [3]
+###
+### Sum-of-squares objective, non-convex with g'*inv(H)*g == 0 at the
+### initial position.
+###
+##########################################################################
+
+const beale_y = [1.5, 2.25, 2.625]
+
+beale_f(x) = [beale_y[i] - x[1]*(1-x[2]^i) for i = 1:3]
+beale_J(x) = hcat([-(1-x[2]^i) for i = 1:3],
+                 [i*x[1]*x[2]^(i-1) for i = 1:3])
+function beale_H(x, i)
+    od = i*x[2]^(i-1)
+    d2 = i > 1 ? i*(i-1)*x[1]*x[2]^(i-2) : zero(x[2])
+    [0 od; od d2]
+end
+
+beale(x::AbstractVector) = sumsq_obj(beale_f, x)
+
+function beale_gradient!(x::AbstractVector, g::AbstractVector)
+    sumsq_gradient!(beale_f, beale_J, x, g)
+end
+
+function beale_hessian!(x::AbstractVector, h::AbstractMatrix)
+    sumsq_hessian!(beale_f, beale_J, beale_H, x, h)
+end
+
+examples["Beale"] = OptimizationProblem("Beale",
+                                       beale,
+                                       beale_gradient!,
+                                       beale_hessian!,
+                                       [1.0, 1.0],
+                                       [3.0, 0.5],
+                                       true,
+                                       true)
+
+### General utilities for sum-of-squares functions
+# Requires f(x) and J(x) computes the values and jacobian at x of a set of functions, and
+# that H(x, i) computes the hessian of the ith function
+
+sumsq_obj(f, x) = sum(f(x).^2)
+
+function sumsq_gradient!(f, J, x::AbstractVector, g::AbstractVector)
+    copy!(g, sum((2*f(x)).*J(x), 1))
+end
+
+function sumsq_hessian!(f, J, H, x::AbstractVector, h::AbstractMatrix)
+    fx = f(x)
+    Jx = J(x)
+    htmp = 2*(Jx'*Jx)
+    for i = 1:length(fx)
+        htmp += (2*fx[i])*H(x, i)
+    end
+    copy!(h, htmp)
+end
+
 end # module

From d4e5192075c19a0b082f0fa1908f1ca647a398ed Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Tue, 22 Nov 2016 08:27:37 -0600
Subject: [PATCH 32/40] WIP

---
 src/iplinesearch.jl | 3 +++
 src/ipnewton.jl     | 9 +++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/iplinesearch.jl b/src/iplinesearch.jl
index 881377a2c..f43dc43d7 100644
--- a/src/iplinesearch.jl
+++ b/src/iplinesearch.jl
@@ -23,6 +23,7 @@ function backtrack_constrained_grad(ϕ, α, αmax, αImax, Lcoefsα,
     α, αI = min(α, 0.999*αmax), min(α, 0.999*αImax)
     αmin = αminfrac * α
     L0, L1, L2 = Lcoefsα
+    # @show L2
     f_calls = 0
     while α >= αmin
         f_calls += 1
@@ -30,6 +31,8 @@ function backtrack_constrained_grad(ϕ, α, αmax, αImax, Lcoefsα,
         δval = evalgrad(L1, α, αI) + evalhess(L2, α, αI)/2
         δslope = mulhess(L2, α, αI)
         # r0, r1 = abs(val - (L0 + δval)) / (c1*abs(val-L0)), norm(slopeα - (L1 + δslope))/(c2*norm(slopeα-L1))
+        # @show val L0 L0+δval
+        # @show slopeα L1 L1+δslope
         # @show (α, αI, r0, r1)
         if isfinite(val) && abs(val - (L0 + δval)) <= c1*abs(val-L0) &&
                             norm(slopeα - (L1 + δslope)) <= c2*norm(slopeα-L1)
diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index 2a654f386..0982d9ec8 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -307,10 +307,11 @@ function quadratic_parameters(bounds::ConstraintBounds, state::IPNewtonState)
          dot(bstep.λxE, view(state.s, bounds.eqx))
     hss = dot(bstep.slack_c, HsscP*bstep.slack_c) + dot(bstep.slack_x, HssxP*bstep.slack_x)
     si = dot(bstep.slack_c, bstep.λc) + dot(bstep.slack_x, bstep.λx)
-    Hα = [state.s'*state.H*state.s - jHj 0    -ji   -je;
-          0                              hss  si    0;
-          -ji                            si   0     0;
-          -je                            0    0     0]
+    hxx = dot(state.s, state.H*state.s) - jHj
+    Hα = [hxx    0    -ji   -je;
+          0      hss  si    0;
+          -ji    si   0     0;
+          -je    0    0     0]
     state.L, slopeα, Hα
 end
 

From 0aee5158c280258c065368ae10b01fc4c2953cc0 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Tue, 22 Nov 2016 23:26:58 -0600
Subject: [PATCH 33/40] ipnewton linesearch: switch to one-sided tests

This appears to fix many fail-to-converge problems. In some cases, the Hessian prediction of the slope change is approximately or exactly 0; in that case the slope condition is impossible to satisfy. A cure is to use the positive-definite modified Hessian (which will never give 0 for the estimated Hessian effect on the slope) in the computation of the linesearch local-model estimate. This only makes sense in the context of switching the line search slope criterion to an upper bound (which is sensible in its own right).

An unfortunate consequence is that we have to compute 3 cholesky factorizations rather than 2. The 3rd is necessary only for the linesearch, which is unfortunate.
---
 src/iplinesearch.jl |  6 +++---
 src/ipnewton.jl     | 45 ++++++++++++++++++++++++---------------------
 test/constraints.jl | 13 ++++++-------
 3 files changed, 33 insertions(+), 31 deletions(-)

diff --git a/src/iplinesearch.jl b/src/iplinesearch.jl
index f43dc43d7..b1b3bd240 100644
--- a/src/iplinesearch.jl
+++ b/src/iplinesearch.jl
@@ -8,7 +8,7 @@ function backtrack_constrained(ϕ, α, αmax, αImax, Lcoefsα,
         f_calls += 1
         val = ϕ((α, αI))
         δ = evalgrad(L1, α, αI)
-        if isfinite(val) && abs(val - (L0 + δ)) <= c1*abs(val-L0)
+        if isfinite(val) && val - (L0 + δ) <= c1*abs(val-L0)
             return α, αI, f_calls, 0
         end
         α *= ρ
@@ -34,8 +34,8 @@ function backtrack_constrained_grad(ϕ, α, αmax, αImax, Lcoefsα,
         # @show val L0 L0+δval
         # @show slopeα L1 L1+δslope
         # @show (α, αI, r0, r1)
-        if isfinite(val) && abs(val - (L0 + δval)) <= c1*abs(val-L0) &&
-                            norm(slopeα - (L1 + δslope)) <= c2*norm(slopeα-L1)
+        if isfinite(val) && val - (L0 + δval) <= c1*abs(val-L0) &&
+                            all(slopeα - (L1 + δslope) .<= c2*abs.(slopeα-L1))
             return α, αI, f_calls, f_calls
         end
         α *= ρ
diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index 0982d9ec8..66ba9829f 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -11,6 +11,7 @@ type IPNewtonState{T,N} <: AbstractBarrierState
     g::Array{T,N}
     f_x_previous::T
     H::Matrix{T}
+    HP
     Hd::Vector{Int8}
     s::Array{T,N}  # step for x
     # Barrier penalty fields
@@ -26,6 +27,7 @@ type IPNewtonState{T,N} <: AbstractBarrierState
     @add_linesearch_fields()
     b_ls::BarrierLineSearchGrad{T}
     gtilde::Vector{T}
+    Htilde
 end
 
 function Base.convert{T,S,N}(::Type{IPNewtonState{T,N}}, state::IPNewtonState{S,N})
@@ -40,6 +42,7 @@ function Base.convert{T,S,N}(::Type{IPNewtonState{T,N}}, state::IPNewtonState{S,
                   convert(Array{T}, state.g),
                   T(state.f_x_previous),
                   convert(Array{T}, state.H),
+                  state.HP,
                   state.Hd,
                   convert(Array{T}, state.s),
                   T(state.μ),
@@ -57,7 +60,8 @@ function Base.convert{T,S,N}(::Type{IPNewtonState{T,N}}, state::IPNewtonState{S,
                   state.mayterminate,
                   state.lsr,
                   convert(BarrierLineSearchGrad{T}, state.b_ls),
-                  convert(Array{T}, state.gtilde)
+                  convert(Array{T}, state.gtilde),
+                  state.Htilde,
                   )
 end
 
@@ -103,6 +107,7 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct
         g, # Store current gradient in state.g
         T(NaN), # Store previous f in state.f_x_previous
         H,
+        0,    # will be replaced
         Hd,
         similar(initial_x), # Maintain current x-search direction in state.s
         μ,
@@ -116,7 +121,8 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct
         T(NaN),
         @initial_linesearch()..., # Maintain a cache for line search results in state.lsr
         b_ls,
-        gtilde)
+        gtilde,
+        0)
 
     d.h!(initial_x, state.H)
     Hinfo = (state.H, hessianI(initial_x, constraints, 1./bstate.slack_c, 1))
@@ -168,17 +174,20 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state
     λ = userλ(bstate.λc, constraints)
     λ[bounds.eqc] = -bstate.λcE  # the negative sign is from the Hessian
     constraints.h!(x, λ, Hxx)
+    state.HP = cholfact(Positive, Hxx, Val{true})
     # Add the Jacobian terms (JI'*Hss*JI)
+    Htilde = full(state.HP)
     JIc = view5(J, bounds.ineqc, :)
     Hssc = Diagonal(bstate.λc./bstate.slack_c)
     HJ = JIc'*Hssc*JIc
     for j = 1:n, i = 1:n
-        Hxx[i,j] += HJ[i,j]
+        Htilde[i,j] += HJ[i,j]
     end
     # Add the variable inequalities portions of J'*Hssx*J
     for (i,j) in enumerate(bounds.ineqx)
-        Hxx[j,j] += bstate.λx[i]/bstate.slack_x[i]
+        Htilde[j,j] += bstate.λx[i]/bstate.slack_x[i]
     end
+    state.Htilde = cholfact(Hermitian(Htilde))
 
     state
 end
@@ -230,18 +239,18 @@ function solve_step!(state::IPNewtonState, constraints)
     x, s, μ, bounds = state.x, state.s, state.μ, constraints.bounds
     bstate, bstep, bgrad = state.bstate, state.bstep, state.bgrad
     # Solve the Newton step
-    Hxx = state.H
+    Htilde = state.Htilde
     JE = jacobianE(state, bounds)
     # Q, R, p = qr(JE', Val{true})
     gE = [bgrad.λxE;
           bgrad.λcE]
-    HxxF = cholfact(Positive, Hxx, Val{true})
-    M = JE*(HxxF \ JE')
+    M = JE*(Htilde \ JE')
     MF = cholfact(Positive, M, Val{true})
-    ΔλE = MF \ (gE + JE * (HxxF \ state.gtilde))
-    Δx = HxxF \ (JE'*ΔλE - state.gtilde)
-    if norm(gE) < norm(gE - JE*Δx) # ||
-        # norm(state.gtilde) < norm(full(HxxF)*Δx - JE'*ΔλE + state.gtilde)
+    ΔλE = MF \ (gE + JE * (Htilde \ state.gtilde))
+    Δx = Htilde \ (JE'*ΔλE - state.gtilde)
+    # TODO: don't require full here
+    if norm(gE) + norm(state.gtilde) < max(norm(gE - JE*Δx),
+                                           norm(full(Htilde)*Δx - JE'*ΔλE + state.gtilde))
         # Precision problems gave us a worse solution than the one we started with, abort
         fill!(s, 0)
         fill!(bstep, 0)
@@ -292,22 +301,17 @@ the parameters. As a function of `α`, the local model is expressed as
 function quadratic_parameters(bounds::ConstraintBounds, state::IPNewtonState)
     bstate, bstep, bgrad = state.bstate, state.bstep, state.bgrad
     slopeα = slopealpha(state.s, state.g, bstep, bgrad)
-    # For the curvature, use the original hessian (before adding the JI'*Hss*JI term)
-    # This undoes the dual correction. However, for linesearch we need
-    # primal, so calculate both.
+
     jic = view5(state.constr_J, bounds.ineqc, :)*state.s
-    HsscD = Diagonal(bstate.λc./bstate.slack_c)
-    HsscP = Diagonal(state.μ./bstate.slack_c.^2)
+    HsscP = Diagonal(state.μ./bstate.slack_c.^2)  # for linesearch we need primal
     jix = view(state.s, bounds.ineqx)
-    HssxD = Diagonal(bstate.λx./bstate.slack_x)
     HssxP = Diagonal(state.μ./bstate.slack_x.^2)
-    jHj = dot(jic, HsscD*jic) + dot(jix, HssxD*jix)
     ji = dot(bstep.λc, Diagonal(bounds.σc)*jic) + dot(bstep.λx, Diagonal(bounds.σx)*jix)
     je = dot(bstep.λcE, view5(state.constr_J, bounds.eqc, :)*state.s) +
          dot(bstep.λxE, view(state.s, bounds.eqx))
     hss = dot(bstep.slack_c, HsscP*bstep.slack_c) + dot(bstep.slack_x, HssxP*bstep.slack_x)
     si = dot(bstep.slack_c, bstep.λc) + dot(bstep.slack_x, bstep.λx)
-    hxx = dot(state.s, state.H*state.s) - jHj
+    hxx = dot(state.s, full(state.HP)*state.s)  # TODO: don't require full here
     Hα = [hxx    0    -ji   -je;
           0      hss  si    0;
           -ji    si   0     0;
@@ -320,8 +324,7 @@ end
 # eliminated.
 function Hf(bounds::ConstraintBounds, state)
     JE = jacobianE(state, bounds)
-    HxxF = cholfact(Positive, state.H)
-    Hf = [full(HxxF) -JE';
+    Hf = [full(state.Htilde) -JE';
           -JE zeros(eltype(JE), size(JE, 1), size(JE, 1))]
 end
 Hf(constraints, state) = Hf(constraints.bounds, state)
diff --git a/test/constraints.jl b/test/constraints.jl
index abc48c0cf..ecd072a29 100644
--- a/test/constraints.jl
+++ b/test/constraints.jl
@@ -161,14 +161,14 @@ ConstraintBounds:
         state = Optim.initial_state(method, options, d0, constraints, y)
         setstate!(state, μ, d0, constraints, method)
         @test Optim.gf(state) ≈ -μ./y
-        @test Optim.Hf(constraints, state) ≈ μ*Diagonal(1./y.^2)
+        @test Optim.Hf(constraints, state) ≈ eye(length(y),length(y)) + μ*Diagonal(1./y.^2)
         # Now again using the generic machinery
         bounds = Optim.ConstraintBounds([], [], zeros(length(x)), fill(Inf,length(x)))
         constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds)
         state = Optim.initial_state(method, options, d0, constraints, y)
         setstate!(state, μ, d0, constraints, method)
         @test Optim.gf(state) ≈ -μ./y
-        @test Optim.Hf(constraints, state) ≈ μ*Diagonal(1./y.^2)
+        @test Optim.Hf(constraints, state) ≈ eye(length(y),length(y)) + μ*Diagonal(1./y.^2)
         ## General inequality constraints on variables
         lb, ub = rand(length(x))-2, rand(length(x))+1
         bounds = Optim.ConstraintBounds(lb, ub, [], [])
@@ -208,7 +208,7 @@ ConstraintBounds:
             gxs[j] += bounds.σx[i]*(gstmp - λ[i]) - bounds.σx[i]*htmp*gλtmp
         end
         @test Optim.gf(state) ≈ gxs
-        @test Optim.Hf(constraints, state) ≈ Diagonal(hxs)
+        @test Optim.Hf(constraints, state) ≈ Diagonal(1 + hxs)
         # Now again using the generic machinery
         bounds = Optim.ConstraintBounds([], [], lb, ub)
         constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds)
@@ -217,7 +217,7 @@ ConstraintBounds:
         copy!(state.bstate.λc, bstate.λx)
         setstate!(state, μ, d0, constraints, method)
         @test Optim.gf(state) ≈ gxs
-        @test Optim.Hf(constraints, state) ≈ Diagonal(hxs)
+        @test Optim.Hf(constraints, state) ≈ Diagonal(1 + hxs)
         ## Nonlinear equality constraints
         cfun = x->[x[1]^2+x[2]^2, x[2]*x[3]^2]
         cfun! = (x, c) -> copy!(c, cfun(x))
@@ -282,11 +282,10 @@ ConstraintBounds:
         # hxx = μ*JI'*Diagonal(1./bstate.slack_c.^2)*JI - hineq
         # gf = -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - μ(bgrad.λc ./ bstate.slack_c.^2))
         # Primal-dual
-        hxx = JI'*Diagonal(bstate.λc./bstate.slack_c)*JI - hineq
+        hxx = full(cholfact(Positive, -hineq)) + JI'*Diagonal(bstate.λc./bstate.slack_c)*JI
         gf = -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - (bgrad.λc .* bstate.λc ./ bstate.slack_c))
-        hp = full(cholfact(Positive, hxx))
         @test Optim.gf(state) ≈ gf
-        @test Optim.Hf(constraints, state) ≈ hp
+        @test Optim.Hf(constraints, state) ≈ hxx
     end
 
     @testset "IPNewton initialization" begin

From 3b5d08b30144064e0606205cb0fe505b629fa082 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 23 Nov 2016 00:00:08 -0600
Subject: [PATCH 34/40] Add option to show linesearch progress

---
 src/interior.jl     |  2 +-
 src/iplinesearch.jl | 19 +++++++++++++------
 src/ipnewton.jl     | 11 +++++++----
 src/types.jl        |  6 ++++--
 test/constraints.jl |  6 +++---
 5 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index 3082a8e08..497b142b8 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -201,7 +201,7 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai
         iteration += 1
         iterationμ += 1
 
-        update_state!(d, constraints, state, method) && break # it returns true if it's forced by something in update! to stop (eg dx_dg == 0.0 in BFGS)
+        update_state!(d, constraints, state, method, options) && break # it returns true if it's forced by something in update! to stop (eg dx_dg == 0.0 in BFGS)
         update_asneeded_fg!(d, constraints, state, method)
         x_converged, f_converged,
         g_converged, converged = assess_convergence(state, options)
diff --git a/src/iplinesearch.jl b/src/iplinesearch.jl
index b1b3bd240..4c2d690c2 100644
--- a/src/iplinesearch.jl
+++ b/src/iplinesearch.jl
@@ -19,21 +19,28 @@ function backtrack_constrained(ϕ, α, αmax, αImax, Lcoefsα,
 end
 
 function backtrack_constrained_grad(ϕ, α, αmax, αImax, Lcoefsα,
-                                    c1 = 0.9, c2 = 0.9, ρ=oftype(α, 0.5), αminfrac = sqrt(eps(one(α))))
+                                    c1 = 0.9, c2 = 0.9, ρ=oftype(α, 0.5),
+                                    αminfrac = sqrt(eps(one(α))); show_linesearch::Bool=false)
     α, αI = min(α, 0.999*αmax), min(α, 0.999*αImax)
     αmin = αminfrac * α
     L0, L1, L2 = Lcoefsα
-    # @show L2
+    if show_linesearch
+        println("L0 = $L0, L1 = $L1, L2 = ")
+        Base.showarray(STDOUT, L2, false)
+    end
     f_calls = 0
     while α >= αmin
         f_calls += 1
         val, slopeα = ϕ((α, αI))
         δval = evalgrad(L1, α, αI) + evalhess(L2, α, αI)/2
         δslope = mulhess(L2, α, αI)
-        # r0, r1 = abs(val - (L0 + δval)) / (c1*abs(val-L0)), norm(slopeα - (L1 + δslope))/(c2*norm(slopeα-L1))
-        # @show val L0 L0+δval
-        # @show slopeα L1 L1+δslope
-        # @show (α, αI, r0, r1)
+        if show_linesearch
+            @show (α, αI)
+            @show val L0 L0+δval
+            @show slopeα L1 L1+δslope
+            r0, r1 = (val - (L0 + δval)) / (c1*abs(val-L0)), (slopeα - (L1 + δslope))./(c2*(slopeα-L1))
+            @show (r0, r1)
+        end
         if isfinite(val) && val - (L0 + δval) <= c1*abs(val-L0) &&
                             all(slopeα - (L1 + δslope) .<= c2*abs.(slopeα-L1))
             return α, αI, f_calls, f_calls
diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index 66ba9829f..ebccf1035 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -70,8 +70,11 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct
     mc = nconstraints(constraints)
     constr_c = Array{T}(mc)
     constraints.c!(initial_x, constr_c)
-    isinterior(constraints, initial_x, constr_c) || (warn("initial guess is not an interior point"); Base.show_backtrace(STDOUT, backtrace()))
-
+    if !isinterior(constraints, initial_x, constr_c)
+        warn("initial guess is not an interior point")
+        Base.show_backtrace(STDERR, backtrace())
+        println(STDERR)
+    end
     # Allocate fields for the objective function
     n = length(initial_x)
     g = Array(T, n)
@@ -192,7 +195,7 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state
     state
 end
 
-function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction, state::IPNewtonState{T}, method::IPNewton)
+function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction, state::IPNewtonState{T}, method::IPNewton, options)
     state.f_x_previous, state.L_previous = state.f_x, state.L
     bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds
     state = solve_step!(state, constraints)
@@ -217,7 +220,7 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction
     # Determine the actual distance of movement along the search line
     ϕ = linesearch_anon(d, constraints, state, method)
     state.alpha, αI, f_update, g_update =
-        method.linesearch!(ϕ, T(1), αmax, αImax, qp)
+        method.linesearch!(ϕ, T(1), αmax, αImax, qp; show_linesearch=options.show_linesearch)
     state.f_calls, state.g_calls = state.f_calls + f_update, state.g_calls + g_update
 
     # Maintain a record of previous position
diff --git a/src/types.jl b/src/types.jl
index 72a2ffcb2..aab457e8d 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -13,6 +13,7 @@ immutable OptimizationOptions{TCallback <: Union{Void, Function}}
     store_trace::Bool
     show_trace::Bool
     extended_trace::Bool
+    show_linesearch::Bool
     autodiff::Bool
     show_every::Int
     callback::TCallback
@@ -30,6 +31,7 @@ function OptimizationOptions(;
         store_trace::Bool = false,
         show_trace::Bool = false,
         extended_trace::Bool = false,
+        show_linesearch::Bool = false,
         autodiff::Bool = false,
         show_every::Integer = 1,
         callback = nothing,
@@ -42,8 +44,8 @@ function OptimizationOptions(;
     end
     OptimizationOptions{typeof(callback)}(
         Float64(x_tol), Float64(f_tol), Float64(g_tol), Int(successive_f_tol),
-        Int(iterations), store_trace, show_trace, extended_trace, autodiff,
-        Int(show_every), callback, time_limit, μfactor, μ0)
+        Int(iterations), store_trace, show_trace, extended_trace, show_linesearch,
+        autodiff, Int(show_every), callback, time_limit, μfactor, μ0)
 end
 
 function print_header(options::OptimizationOptions)
diff --git a/test/constraints.jl b/test/constraints.jl
index ecd072a29..3f0cd3830 100644
--- a/test/constraints.jl
+++ b/test/constraints.jl
@@ -430,7 +430,7 @@ ConstraintBounds:
             constraints = TwiceDifferentiableConstraintsFunction(σswap(σ, [0.0], [])...)
             state = Optim.initial_state(method, options, d, constraints, [μ/F*10])
             for i = 1:10
-                Optim.update_state!(d, constraints, state, method)
+                Optim.update_state!(d, constraints, state, method, options)
                 Optim.update_fg!(d, constraints, state, method)
                 Optim.update_h!(d, constraints, state, method)
             end
@@ -440,7 +440,7 @@ ConstraintBounds:
             constraints = TwiceDifferentiableConstraintsFunction(σswap(σ, [Float64(σ)], [])...)
             state = Optim.initial_state(method, options, d, constraints, [(1+eps(1.0))*σ])
             for i = 1:10
-                Optim.update_state!(d, constraints, state, method)
+                Optim.update_state!(d, constraints, state, method, options)
                 Optim.update_fg!(d, constraints, state, method)
                 Optim.update_h!(d, constraints, state, method)
             end
@@ -455,7 +455,7 @@ ConstraintBounds:
                 [], [], σswap(σ, [Float64(σ)], [])...)
             state = Optim.initial_state(method, options, d, constraints, [(1+eps(1.0))*σ])
             for i = 1:10
-                Optim.update_state!(d, constraints, state, method)
+                Optim.update_state!(d, constraints, state, method, options)
                 Optim.update_fg!(d, constraints, state, method)
                 Optim.update_h!(d, constraints, state, method)
             end

From b250e77166fff32a2760837f32ec6a9c01794c9d Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 23 Nov 2016 10:12:02 -0600
Subject: [PATCH 35/40] Switch back to single-component alpha in linesearch

The slope criterion is easier, and there are fewer factorizations needed this way
This also improves printing during linesearch
---
 src/interior.jl        | 13 +++++++-----
 src/iplinesearch.jl    | 29 +++++++++++--------------
 src/ipnewton.jl        | 48 ++++++++++++++++++++++++------------------
 src/utilities/trace.jl | 10 +++++----
 test/constraints.jl    | 27 ++++++++++++------------
 5 files changed, 67 insertions(+), 60 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index 497b142b8..eef0d72b1 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -87,6 +87,9 @@ Base.convert{T}(::Type{BarrierStateVars{T}}, bstate::BarrierStateVars) =
                      convert(Array{T}, bstate.λxE),
                      convert(Array{T}, bstate.λcE))
 
+Base.isempty(bstate::BarrierStateVars) = isempty(bstate.slack_x) &
+    isempty(bstate.slack_c) & isempty(bstate.λxE) & isempty(bstate.λcE)
+
 Base.eltype{T}(::Type{BarrierStateVars{T}}) = T
 Base.eltype(sv::BarrierStateVars) = eltype(typeof(sv))
 
@@ -169,7 +172,7 @@ ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars
 ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, α::Number) =
     ls_update!(out, base, step, (α,α,α,α))
 ls_update!(out::BarrierStateVars, base::BarrierStateVars, step::BarrierStateVars, αs::AbstractVector) =
-    ls_update!(out, base, step, (αs...,))
+    ls_update!(out, base, step, αs[1]) # (αs...,))
 
 function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constraints::AbstractConstraintsFunction, initial_x::Array{T}, method::M, options::OptimizationOptions)
     t0 = time() # Initial time stamp used to control early stopping by options.time_limit
@@ -543,10 +546,10 @@ function lagrangian_lineslope!(αs, d, constraints, state, method::IPOptimizer{t
 end
 lagrangian_lineslope!(αs, d, constraints, state, method) = lagrangian_lineslope(αs, d, constraints, state)
 
-slopealpha(sx, gx, bstep, bgrad) = [dot(sx, gx),
-                                    dot(bstep.slack_x, bgrad.slack_x) + dot(bstep.slack_c, bgrad.slack_c),
-                                    dot(bstep.λx, bgrad.λx) + dot(bstep.λc, bgrad.λc),
-                                    dot(bstep.λxE, bgrad.λxE) + dot(bstep.λcE, bgrad.λcE)]
+slopealpha(sx, gx, bstep, bgrad) = dot(sx, gx) +
+    dot(bstep.slack_x, bgrad.slack_x) + dot(bstep.slack_c, bgrad.slack_c) +
+    dot(bstep.λx, bgrad.λx) + dot(bstep.λc, bgrad.λc) +
+    dot(bstep.λxE, bgrad.λxE) + dot(bstep.λcE, bgrad.λcE)
 
 function linesearch_anon(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained_grad)})
     αs->lagrangian_lineslope!(αs, d, constraints, state, method)
diff --git a/src/iplinesearch.jl b/src/iplinesearch.jl
index 4c2d690c2..647f6a7a9 100644
--- a/src/iplinesearch.jl
+++ b/src/iplinesearch.jl
@@ -18,38 +18,33 @@ function backtrack_constrained(ϕ, α, αmax, αImax, Lcoefsα,
     return zero(α), zero(αI), f_calls, 0
 end
 
-function backtrack_constrained_grad(ϕ, α, αmax, αImax, Lcoefsα,
+function backtrack_constrained_grad(ϕ, α, αmax, Lcoefsα,
                                     c1 = 0.9, c2 = 0.9, ρ=oftype(α, 0.5),
                                     αminfrac = sqrt(eps(one(α))); show_linesearch::Bool=false)
-    α, αI = min(α, 0.999*αmax), min(α, 0.999*αImax)
+    α = min(α, 0.999*αmax)
     αmin = αminfrac * α
     L0, L1, L2 = Lcoefsα
     if show_linesearch
-        println("L0 = $L0, L1 = $L1, L2 = ")
-        Base.showarray(STDOUT, L2, false)
+        println("L0 = $L0, L1 = $L1, L2 = $L2")
     end
     f_calls = 0
     while α >= αmin
         f_calls += 1
-        val, slopeα = ϕ((α, αI))
-        δval = evalgrad(L1, α, αI) + evalhess(L2, α, αI)/2
-        δslope = mulhess(L2, α, αI)
+        val, slopeα = ϕ(α)
+        δval = L1*α
+        δslope = L2*α
         if show_linesearch
-            @show (α, αI)
-            @show val L0 L0+δval
-            @show slopeα L1 L1+δslope
-            r0, r1 = (val - (L0 + δval)) / (c1*abs(val-L0)), (slopeα - (L1 + δslope))./(c2*(slopeα-L1))
-            @show (r0, r1)
+            println("α = $α, value: ($L0, $val, $(L0+δval)), slope: ($L1, $slopeα, $(L1+δslope))")
         end
         if isfinite(val) && val - (L0 + δval) <= c1*abs(val-L0) &&
-                            all(slopeα - (L1 + δslope) .<= c2*abs.(slopeα-L1))
-            return α, αI, f_calls, f_calls
+            (slopeα < c2*abs(L1) ||
+             slopeα - (L1 + δslope) .<= c2*abs.(slopeα-L1))
+            return α, f_calls, f_calls
         end
         α *= ρ
-        αI *= ρ
     end
-    ϕ((zero(α), zero(αI)))  # to ensure that state gets set appropriately
-    return zero(α), zero(αI), f_calls, f_calls
+    ϕ(zero(α))  # to ensure that state gets set appropriately
+    return zero(α), f_calls, f_calls
 end
 
 # Evaluate for a step parametrized as [α, α, αI, α]
diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index ebccf1035..d25e08f06 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -177,20 +177,18 @@ function update_h!(d, constraints::TwiceDifferentiableConstraintsFunction, state
     λ = userλ(bstate.λc, constraints)
     λ[bounds.eqc] = -bstate.λcE  # the negative sign is from the Hessian
     constraints.h!(x, λ, Hxx)
-    state.HP = cholfact(Positive, Hxx, Val{true})
     # Add the Jacobian terms (JI'*Hss*JI)
-    Htilde = full(state.HP)
     JIc = view5(J, bounds.ineqc, :)
     Hssc = Diagonal(bstate.λc./bstate.slack_c)
     HJ = JIc'*Hssc*JIc
     for j = 1:n, i = 1:n
-        Htilde[i,j] += HJ[i,j]
+        Hxx[i,j] += HJ[i,j]
     end
     # Add the variable inequalities portions of J'*Hssx*J
     for (i,j) in enumerate(bounds.ineqx)
-        Htilde[j,j] += bstate.λx[i]/bstate.slack_x[i]
+        Hxx[j,j] += bstate.λx[i]/bstate.slack_x[i]
     end
-    state.Htilde = cholfact(Hermitian(Htilde))
+    state.Htilde = cholfact(Positive, state.H, Val{true})
 
     state
 end
@@ -198,7 +196,7 @@ end
 function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction, state::IPNewtonState{T}, method::IPNewton, options)
     state.f_x_previous, state.L_previous = state.f_x, state.L
     bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds
-    state = solve_step!(state, constraints)
+    qp = solve_step!(state, constraints, options)
     # If a step α=1 will not change any of the parameters, we can quit now.
     # This prevents a futile linesearch.
     if is_smaller_eps(state.x, state.s) &&
@@ -208,19 +206,19 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction
         is_smaller_eps(bstate.λc, bstep.λc)
         return false
     end
-    qp = quadratic_parameters(bounds, state)
+    # qp = quadratic_parameters(bounds, state)
 
     # Estimate αmax, the upper bound on distance of movement along the search line
-    αmax = αImax = convert(eltype(bstate), Inf)
+    αmax = convert(eltype(bstate), Inf)
     αmax = estimate_maxstep(αmax, bstate.slack_x, bstep.slack_x)
     αmax = estimate_maxstep(αmax, bstate.slack_c, bstep.slack_c)
-    αImax = estimate_maxstep(αImax, bstate.λx, bstep.λx)
-    αImax = estimate_maxstep(αImax, bstate.λc, bstep.λc)
+    αmax = estimate_maxstep(αmax, bstate.λx, bstep.λx)
+    αmax = estimate_maxstep(αmax, bstate.λc, bstep.λc)
 
     # Determine the actual distance of movement along the search line
     ϕ = linesearch_anon(d, constraints, state, method)
-    state.alpha, αI, f_update, g_update =
-        method.linesearch!(ϕ, T(1), αmax, αImax, qp; show_linesearch=options.show_linesearch)
+    state.alpha, f_update, g_update =
+        method.linesearch!(ϕ, T(1), αmax, qp; show_linesearch=options.show_linesearch)
     state.f_calls, state.g_calls = state.f_calls + f_update, state.g_calls + g_update
 
     # Maintain a record of previous position
@@ -228,7 +226,7 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction
 
     # Update current position # x = x + alpha * s
     ls_update!(state.x, state.x, state.s, state.alpha)
-    ls_update!(bstate, bstate, bstep, (state.alpha, αI))
+    ls_update!(bstate, bstate, bstep, state.alpha)
 
     # Evaluate the constraints at the new position
     constraints.c!(state.x, state.constr_c)
@@ -238,22 +236,28 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction
     false
 end
 
-function solve_step!(state::IPNewtonState, constraints)
+function solve_step!(state::IPNewtonState, constraints, options)
     x, s, μ, bounds = state.x, state.s, state.μ, constraints.bounds
     bstate, bstep, bgrad = state.bstate, state.bstep, state.bgrad
-    # Solve the Newton step
     Htilde = state.Htilde
+    # Solve the Newton step
     JE = jacobianE(state, bounds)
-    # Q, R, p = qr(JE', Val{true})
     gE = [bgrad.λxE;
           bgrad.λcE]
     M = JE*(Htilde \ JE')
     MF = cholfact(Positive, M, Val{true})
     ΔλE = MF \ (gE + JE * (Htilde \ state.gtilde))
     Δx = Htilde \ (JE'*ΔλE - state.gtilde)
-    # TODO: don't require full here
-    if norm(gE) + norm(state.gtilde) < max(norm(gE - JE*Δx),
-                                           norm(full(Htilde)*Δx - JE'*ΔλE + state.gtilde))
+    # Use the real H in estimating the linesearch quadratic parameters
+    Hstepx, HstepλE = state.H*Δx - JE'*ΔλE, -JE*Δx
+    # Also check that the solution to the linear equations represents an improvement
+    Hpstepx = full(Htilde)*Δx - JE'*ΔλE  # TODO: don't use full here
+    if options.show_linesearch
+        println("|gx| = $(norm(state.gtilde)), |Hstepx + gx| = $(norm(Hpstepx+state.gtilde))")
+        println("|gE| = $(norm(gE)), |HstepλE + gE| = $(norm(HstepλE+gE))")
+    end
+    if norm(gE) + norm(state.gtilde) < max(norm(HstepλE + gE),
+                                           norm(Hpstepx  + state.gtilde))
         # Precision problems gave us a worse solution than the one we started with, abort
         fill!(s, 0)
         fill!(bstep, 0)
@@ -276,7 +280,9 @@ function solve_step!(state::IPNewtonState, constraints)
         # bstep.λc[i] = -bgrad.slack_c[i] - μ*bstep.slack_c[i]/bstate.slack_c[i]^2
         bstep.λc[i] = -bgrad.slack_c[i] - bstate.λc[i]*bstep.slack_c[i]/bstate.slack_c[i]
     end
-    state
+    # Solve for the quadratic parameters
+    qp = state.L, slopealpha(state.s, state.g, bstep, bgrad), dot(Δx, Hstepx) + dot(ΔλE, HstepλE)
+    qp
 end
 
 function is_smaller_eps(ref, step)
@@ -290,7 +296,7 @@ end
 """
     quadratic_parameters(bounds, state) -> val, slopeα, Hα
 
-Return the parameters for the quadratic fit of the behavior of the
+OUTDATED! Return the parameters for the quadratic fit of the behavior of the
 lagrangian for positions parametrized as a function of the 4-vector
 `α = (αx, αs, αI, αE)`, where the step is
 
diff --git a/src/utilities/trace.jl b/src/utilities/trace.jl
index 1ad4139dc..e62e7829f 100644
--- a/src/utilities/trace.jl
+++ b/src/utilities/trace.jl
@@ -124,11 +124,13 @@ function trace!(tr, state, iteration, method::IPOptimizer, options)
         dt["α"] = state.alpha
         dt["x"] = copy(state.x)
         dt["g(x)"] = copy(state.g)
-        dt["gtilde(x)"] = copy(state.gtilde)
         dt["h(x)"] = copy(state.H)
-        dt["bstate"] = copy(state.bstate)
-        dt["bgrad"] = copy(state.bgrad)
-        dt["c"] = copy(state.constr_c)
+        if !isempty(state.bstate)
+            dt["gtilde(x)"] = copy(state.gtilde)
+            dt["bstate"] = copy(state.bstate)
+            dt["bgrad"] = copy(state.bgrad)
+            dt["c"] = copy(state.constr_c)
+        end
     end
     g_norm = vecnorm(state.g, Inf) + vecnorm(state.bgrad, Inf)
     update!(tr,
diff --git a/test/constraints.jl b/test/constraints.jl
index 3f0cd3830..7449c03cb 100644
--- a/test/constraints.jl
+++ b/test/constraints.jl
@@ -161,14 +161,14 @@ ConstraintBounds:
         state = Optim.initial_state(method, options, d0, constraints, y)
         setstate!(state, μ, d0, constraints, method)
         @test Optim.gf(state) ≈ -μ./y
-        @test Optim.Hf(constraints, state) ≈ eye(length(y),length(y)) + μ*Diagonal(1./y.^2)
+        @test Optim.Hf(constraints, state) ≈ μ*Diagonal(1./y.^2)
         # Now again using the generic machinery
         bounds = Optim.ConstraintBounds([], [], zeros(length(x)), fill(Inf,length(x)))
         constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds)
         state = Optim.initial_state(method, options, d0, constraints, y)
         setstate!(state, μ, d0, constraints, method)
         @test Optim.gf(state) ≈ -μ./y
-        @test Optim.Hf(constraints, state) ≈ eye(length(y),length(y)) + μ*Diagonal(1./y.^2)
+        @test Optim.Hf(constraints, state) ≈ μ*Diagonal(1./y.^2)
         ## General inequality constraints on variables
         lb, ub = rand(length(x))-2, rand(length(x))+1
         bounds = Optim.ConstraintBounds(lb, ub, [], [])
@@ -208,7 +208,7 @@ ConstraintBounds:
             gxs[j] += bounds.σx[i]*(gstmp - λ[i]) - bounds.σx[i]*htmp*gλtmp
         end
         @test Optim.gf(state) ≈ gxs
-        @test Optim.Hf(constraints, state) ≈ Diagonal(1 + hxs)
+        @test Optim.Hf(constraints, state) ≈ Diagonal(hxs)
         # Now again using the generic machinery
         bounds = Optim.ConstraintBounds([], [], lb, ub)
         constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds)
@@ -217,7 +217,7 @@ ConstraintBounds:
         copy!(state.bstate.λc, bstate.λx)
         setstate!(state, μ, d0, constraints, method)
         @test Optim.gf(state) ≈ gxs
-        @test Optim.Hf(constraints, state) ≈ Diagonal(1 + hxs)
+        @test Optim.Hf(constraints, state) ≈ Diagonal(hxs)
         ## Nonlinear equality constraints
         cfun = x->[x[1]^2+x[2]^2, x[2]*x[3]^2]
         cfun! = (x, c) -> copy!(c, cfun(x))
@@ -282,10 +282,11 @@ ConstraintBounds:
         # hxx = μ*JI'*Diagonal(1./bstate.slack_c.^2)*JI - hineq
         # gf = -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - μ(bgrad.λc ./ bstate.slack_c.^2))
         # Primal-dual
-        hxx = full(cholfact(Positive, -hineq)) + JI'*Diagonal(bstate.λc./bstate.slack_c)*JI
+#        hxx = full(cholfact(Positive, -hineq)) + JI'*Diagonal(bstate.λc./bstate.slack_c)*JI
+        hxx = -hineq + JI'*Diagonal(bstate.λc./bstate.slack_c)*JI
         gf = -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - (bgrad.λc .* bstate.λc ./ bstate.slack_c))
         @test Optim.gf(state) ≈ gf
-        @test Optim.Hf(constraints, state) ≈ hxx
+        @test Optim.Hf(constraints, state) ≈ full(cholfact(Positive, hxx, Val{true}))
     end
 
     @testset "IPNewton initialization" begin
@@ -374,14 +375,15 @@ ConstraintBounds:
             # Note that state must be fully up-to-date, and you must
             # have also called Optim.solve_step!
             p = Optim.pack_vec(state.x, state.bstate)
-            chunksize = min(8, max(length(p), 4))  # since αs is of length 4
+            chunksize = 1 #min(8, length(p))
             TD = ForwardDiff.Dual{chunksize,eltype(p)}
             TD2 = ForwardDiff.Dual{chunksize,ForwardDiff.Dual{chunksize,eltype(p)}}
             stated = convert(Optim.IPNewtonState{TD,1}, state)
             stated2 = convert(Optim.IPNewtonState{TD2,1}, state)
             ϕd = αs->Optim.lagrangian_linefunc(αs, d, constraints, stated)
             ϕd2 = αs->Optim.lagrangian_linefunc(αs, d, constraints, stated2)
-            ForwardDiff.gradient(ϕd, zeros(4)), ForwardDiff.hessian(ϕd2, zeros(4))
+#            ForwardDiff.gradient(ϕd, zeros(4)), ForwardDiff.hessian(ϕd2, zeros(4))
+            ForwardDiff.gradient(ϕd, [0.0]), ForwardDiff.hessian(ϕd2, [0.0])
         end
         F = 1000
         d = TwiceDifferentiableFunction(x->F*x[1], (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0))
@@ -392,13 +394,12 @@ ConstraintBounds:
         # Nonnegativity (the case that doesn't require slack variables)
         constraints = TwiceDifferentiableConstraintsFunction([0.0], [])
         state = Optim.initial_state(method, options, d, constraints, [x0])
-        Optim.solve_step!(state, constraints)
+        qp = Optim.solve_step!(state, constraints)
         @test state.s[1] ≈ -(F-μ/x0)/(state.bstate.λx[1]/x0)
-        qp = Optim.quadratic_parameters(constraints.bounds, state)
         g0, H0 = autoqp(d, constraints, state)
         @test qp[1] ≈ F*x0-μ*log(x0)
-        @test qp[2] ≈ g0 #-(F-μ/x0)^2*x0^2/μ
-        @test qp[3] ≈ H0 # μ/x0^2*(x0 - F*x0^2/μ)^2
+        @test [qp[2]] ≈ g0 #-(F-μ/x0)^2*x0^2/μ
+        @test [qp[3]] ≈ H0 # μ/x0^2*(x0 - F*x0^2/μ)^2
         bstate, bstep, bounds = state.bstate, state.bstep, constraints.bounds
         αmax = Optim.estimate_maxstep(Inf, state.x[bounds.ineqx].*bounds.σx,
                                            state.s[bounds.ineqx].*bounds.σx)
@@ -406,7 +407,7 @@ ConstraintBounds:
         val0 = ϕ((0,0))
         val0 = isa(val0, Tuple) ? val0[1] : val0
         @test val0 ≈ qp[1]
-        α, αI, nf, ng = method.linesearch!(ϕ, 1.0, αmax, Inf, qp)
+        α, nf, ng = method.linesearch!(ϕ, 1.0, αmax, qp)
         @test α > 1e-3
     end
 

From 5bade6574c2de42d76076adf3de65b6854a62110 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 23 Nov 2016 11:04:03 -0600
Subject: [PATCH 36/40] Adopt an adaptive barrier penalty based on
 complementarity

---
 src/interior.jl | 51 ++++++++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index eef0d72b1..725e17f4c 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -184,16 +184,13 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai
     stopped, stopped_by_callback, stopped_by_time_limit = false, false, false
 
     x_converged, f_converged, counter_f_tol = false, false, 0
-    gnorm = vecnorm(state.g, Inf) + vecnorm(state.bgrad, Inf)
-    g_converged = gnorm < options.g_tol
+    g_converged = vecnorm(state.g, Inf) + vecnorm(state.bgrad, Inf) < options.g_tol
 
     converged = g_converged
-    iteration, iterationμ = 0, 0
+    iteration = 0
 
     options.show_trace && print_header(method)
 
-    Δfmax = zero(state.f_x)
-
     while !converged && !stopped && iteration < options.iterations
         # If tracing, update trace with trace!. If a callback is provided, it
         # should have boolean return value that controls the variable stopped_by_callback.
@@ -202,10 +199,14 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai
             stopped_by_callback = trace!(tr, state, iteration, method, options)
         end
         iteration += 1
-        iterationμ += 1
 
         update_state!(d, constraints, state, method, options) && break # it returns true if it's forced by something in update! to stop (eg dx_dg == 0.0 in BFGS)
-        update_asneeded_fg!(d, constraints, state, method)
+
+        # Adaptive μ
+        μ, ξ = complementarity_μ(state.bstate)
+        state.μ = μ
+        update_fg!(d, constraints, state, method)
+
         x_converged, f_converged,
         g_converged, converged = assess_convergence(state, options)
         # With equality constraints, optimization is not necessarily
@@ -217,25 +218,6 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai
         # declaring convergence.
         counter_f_tol = f_converged ? counter_f_tol+1 : 0
         converged = x_converged | g_converged | (counter_f_tol > options.successive_f_tol)
-        gnormnew = vecnorm(state.g, Inf) + vecnorm(state.bgrad, Inf)
-
-        Δf = abs(state.f_x - state.f_x_previous)
-        if iterationμ > 1
-            Δfmax = max(Δfmax, abs(state.f_x - state.f_x_previous))
-        end
-
-        # Test whether we need to decrease the barrier penalty
-        if iterationμ > 1 && (converged || 100*gnormnew < gnorm || 100*Δf < Δfmax)
-            # Since iterationμ > 1 we must have accomplished real
-            # work, so it's worth trying to decrease the barrier
-            # penalty further.
-            shrink_μ!(d, constraints, state, method, options)
-            iterationμ = 0
-            converged = false
-            gnormnew = oftype(gnormnew, NaN)
-            Δfmax = zero(Δfmax)
-        end
-        gnorm = gnormnew
 
         # We don't use the Hessian for anything if we have declared convergence,
         # so we might as well not make the (expensive) update if converged == true
@@ -867,6 +849,23 @@ function shrink_μ!(d, constraints, state, method, options)
     update_fg!(d, constraints, state, method)
 end
 
+function complementarity_μ(bstate)
+    # Adaptively update μ using the complementarity condition and the
+    # coordinate-by-coordinate deviation from the mean. See Nodecal &
+    # Wright, 2nd ed., section 19.3.
+    m = max(length(bstate.λx) + length(bstate.λc), 1)
+    μmean = (dot(bstate.λx, bstate.slack_x) + dot(bstate.λc, bstate.slack_c))/m
+    ξ = oftype(μmean, 1)
+    if !isempty(bstate.slack_x)
+        ξ = min(ξ, Base.minimum(bstate.λx .* bstate.slack_x)/μmean)
+    end
+    if !isempty(bstate.slack_c)
+        ξ = min(ξ, Base.minimum(bstate.λc .* bstate.slack_c)/μmean)
+    end
+    μ = (min((1-ξ)/ξ/20, 2))^3/10 * μmean
+    μ, ξ
+end
+
 function qrregularize!(QRF)
     R = QRF[:R]
     for i = 1:size(R, 1)

From 39eb0019e85326ae79a0f32db91db282cab714ce Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 23 Nov 2016 11:04:17 -0600
Subject: [PATCH 37/40] Introduce a primal-dual guard condition

---
 src/ipnewton.jl | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index d25e08f06..eaa4df201 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -228,6 +228,18 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction
     ls_update!(state.x, state.x, state.s, state.alpha)
     ls_update!(bstate, bstate, bstep, state.alpha)
 
+    # Ensure that the primal-dual approach does not deviate too much from primal
+    # (See Waechter & Biegler 2006, eq. 16)
+    μ = state.μ
+    for i = 1:length(bstate.slack_x)
+        p = μ/bstate.slack_x[i]
+        bstate.λx[i] = max(min(bstate.λx[i], 10^10*p), p/10^10)
+    end
+    for i = 1:length(bstate.slack_c)
+        p = μ/bstate.slack_c[i]
+        bstate.λc[i] = max(min(bstate.λc[i], 10^10*p), p/10^10)
+    end
+
     # Evaluate the constraints at the new position
     constraints.c!(state.x, state.constr_c)
     constraints.jacobian!(state.x, state.constr_J)

From e79a3c7e464c6091f4fcc79cd08624593f9f3fda Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 23 Nov 2016 14:13:39 -0600
Subject: [PATCH 38/40] =?UTF-8?q?Switch=20to=20a=20predictor=20algorithm?=
 =?UTF-8?q?=20for=20computing=20=CE=BC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This was motivated by the observation that the deviation-based algorithm doesn't work when there's only one constraint. The predictor algorithm has a little trouble with this case too, but it's not nearly so severe (it *can* increase μ, you just have to prevent it from decreasing it to 0). Moreover, it seems a little more regular in its changes.
---
 src/interior.jl | 20 ---------------
 src/ipnewton.jl | 67 ++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 50 insertions(+), 37 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index 725e17f4c..3b99df757 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -202,9 +202,6 @@ function optimize{T, M<:ConstrainedOptimizer}(d::AbstractOptimFunction, constrai
 
         update_state!(d, constraints, state, method, options) && break # it returns true if it's forced by something in update! to stop (eg dx_dg == 0.0 in BFGS)
 
-        # Adaptive μ
-        μ, ξ = complementarity_μ(state.bstate)
-        state.μ = μ
         update_fg!(d, constraints, state, method)
 
         x_converged, f_converged,
@@ -849,23 +846,6 @@ function shrink_μ!(d, constraints, state, method, options)
     update_fg!(d, constraints, state, method)
 end
 
-function complementarity_μ(bstate)
-    # Adaptively update μ using the complementarity condition and the
-    # coordinate-by-coordinate deviation from the mean. See Nodecal &
-    # Wright, 2nd ed., section 19.3.
-    m = max(length(bstate.λx) + length(bstate.λc), 1)
-    μmean = (dot(bstate.λx, bstate.slack_x) + dot(bstate.λc, bstate.slack_c))/m
-    ξ = oftype(μmean, 1)
-    if !isempty(bstate.slack_x)
-        ξ = min(ξ, Base.minimum(bstate.λx .* bstate.slack_x)/μmean)
-    end
-    if !isempty(bstate.slack_c)
-        ξ = min(ξ, Base.minimum(bstate.λc .* bstate.slack_c)/μmean)
-    end
-    μ = (min((1-ξ)/ξ/20, 2))^3/10 * μmean
-    μ, ξ
-end
-
 function qrregularize!(QRF)
     R = QRF[:R]
     for i = 1:size(R, 1)
diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index eaa4df201..71b019e1d 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -16,6 +16,7 @@ type IPNewtonState{T,N} <: AbstractBarrierState
     s::Array{T,N}  # step for x
     # Barrier penalty fields
     μ::T                  # coefficient of the barrier penalty
+    μnext::T              # μ for the next iteration
     L::T                  # value of the Lagrangian (objective + barrier + equality)
     L_previous::T
     bstate::BarrierStateVars{T}   # value of slack and λ variables (current "position")
@@ -46,6 +47,7 @@ function Base.convert{T,S,N}(::Type{IPNewtonState{T,N}}, state::IPNewtonState{S,
                   state.Hd,
                   convert(Array{T}, state.s),
                   T(state.μ),
+                  T(state.μnext),
                   T(state.L),
                   T(state.L_previous),
                   convert(BarrierStateVars{T}, state.bstate),
@@ -114,6 +116,7 @@ function initial_state{T}(method::IPNewton, options, d::TwiceDifferentiableFunct
         Hd,
         similar(initial_x), # Maintain current x-search direction in state.s
         μ,
+        μ,
         T(NaN),
         T(NaN),
         bstate,
@@ -149,19 +152,21 @@ end
 
 function update_gtilde!(d, constraints::TwiceDifferentiableConstraintsFunction, state, method::IPNewton)
     # Calculate the modified x-gradient for the block-eliminated problem
+    # gtilde is the gradient for the affine-scaling problem, i.e.,
+    # with μ=0, used in the adaptive setting of μ. Once we calculate μ we'll correct it
     gtilde, bstate, bgrad = state.gtilde, state.bstate, state.bgrad
     bounds = constraints.bounds
     copy!(gtilde, state.g)
     JIc = view5(state.constr_J, bounds.ineqc, :)
     if !isempty(JIc)
         Hssc = Diagonal(bstate.λc./bstate.slack_c)
-        gc = JIc'*(Diagonal(bounds.σc) * (bgrad.slack_c - Hssc*bgrad.λc))
+        gc = JIc'*(Diagonal(bounds.σc) * (bstate.λc - Hssc*bgrad.λc))  # NOT bgrad.slack_c
         for i = 1:length(gtilde)
             gtilde[i] += gc[i]
         end
     end
     for (i,j) in enumerate(bounds.ineqx)
-        gxi = bounds.σx[i]*(bgrad.slack_x[i] -  bgrad.λx[i]*bstate.λx[i]/bstate.slack_x[i])
+        gxi = bounds.σx[i]*(bstate.λx[i] -  bgrad.λx[i]*bstate.λx[i]/bstate.slack_x[i])
         gtilde[j] += gxi
     end
     state
@@ -239,11 +244,12 @@ function update_state!{T}(d, constraints::TwiceDifferentiableConstraintsFunction
         p = μ/bstate.slack_c[i]
         bstate.λc[i] = max(min(bstate.λc[i], 10^10*p), p/10^10)
     end
+    state.μ = state.μnext
 
     # Evaluate the constraints at the new position
     constraints.c!(state.x, state.constr_c)
     constraints.jacobian!(state.x, state.constr_J)
-    @assert state.ev == equality_violation(constraints, state)
+    state.ev == equality_violation(constraints, state)
 
     false
 end
@@ -251,19 +257,18 @@ end
 function solve_step!(state::IPNewtonState, constraints, options)
     x, s, μ, bounds = state.x, state.s, state.μ, constraints.bounds
     bstate, bstep, bgrad = state.bstate, state.bstep, state.bgrad
-    Htilde = state.Htilde
+    J, Htilde = state.constr_J, state.Htilde
     # Solve the Newton step
     JE = jacobianE(state, bounds)
     gE = [bgrad.λxE;
           bgrad.λcE]
     M = JE*(Htilde \ JE')
     MF = cholfact(Positive, M, Val{true})
-    ΔλE = MF \ (gE + JE * (Htilde \ state.gtilde))
-    Δx = Htilde \ (JE'*ΔλE - state.gtilde)
-    # Use the real H in estimating the linesearch quadratic parameters
-    Hstepx, HstepλE = state.H*Δx - JE'*ΔλE, -JE*Δx
-    # Also check that the solution to the linear equations represents an improvement
-    Hpstepx = full(Htilde)*Δx - JE'*ΔλE  # TODO: don't use full here
+    # These are a solution to the affine-scaling problem (with μ=0)
+    ΔλE0 = MF \ (gE + JE * (Htilde \ state.gtilde))
+    Δx0 = Htilde \ (JE'*ΔλE0 - state.gtilde)
+    # Check that the solution to the linear equations represents an improvement
+    Hpstepx, HstepλE = full(Htilde)*Δx0 - JE'*ΔλE0, -JE*Δx0  # TODO: don't use full here
     if options.show_linesearch
         println("|gx| = $(norm(state.gtilde)), |Hstepx + gx| = $(norm(Hpstepx+state.gtilde))")
         println("|gE| = $(norm(gE)), |HstepλE + gE| = $(norm(HstepλE+gE))")
@@ -275,26 +280,54 @@ function solve_step!(state::IPNewtonState, constraints, options)
         fill!(bstep, 0)
         return state
     end
+    # Set μ (see the predictor strategy in Nodecal & Wright, 2nd ed., section 19.3)
+    solve_slack!(bstep, Δx0, bounds, bstate, bgrad, J, zero(state.μ)) # store temporarily in bstep
+    αs = convert(eltype(bstate), 1.0)
+    αs = estimate_maxstep(αs, bstate.slack_x, bstep.slack_x)
+    αs = estimate_maxstep(αs, bstate.slack_c, bstep.slack_c)
+    αλ = convert(eltype(bstate), 1.0)
+    αλ = estimate_maxstep(αλ, bstate.λx, bstep.λx)
+    αλ = estimate_maxstep(αλ, bstate.λc, bstep.λc)
+    m = max(1, length(bstate.slack_x) + length(bstate.slack_c))
+    μaff = (dot(bstate.slack_x + αs*bstep.slack_x, bstate.λx + αλ*bstep.λx) +
+            dot(bstate.slack_c + αs*bstep.slack_c, bstate.λc + αλ*bstep.λc))/m
+    μmean = (dot(bstate.slack_x, bstate.λx) + dot(bstate.slack_c, bstate.λc))/m
+    # When there's only one constraint, μaff can be exactly zero. So limit the decrease.
+    state.μnext = max((μaff/μmean)^3 * μmean, μmean/10)
+    μ = state.μ
+    # Solve for the *real* step (including μ)
+    μsinv = μ * [bounds.σx./bstate.slack_x; bounds.σc./bstate.slack_c]
+    gtildeμ = state.gtilde  - jacobianI(state, bounds)' * μsinv
+    ΔλE = MF \ (gE + JE * (Htilde \ gtildeμ))
+    Δx = Htilde \ (JE'*ΔλE - gtildeμ)
     copy!(s, Δx)
     k = unpack_vec!(bstep.λxE, ΔλE, 0)
     k = unpack_vec!(bstep.λcE, ΔλE, k)
     k == length(ΔλE) || error("exhausted targets before ΔλE")
+    solve_slack!(bstep, Δx, bounds, bstate, bgrad, J, μ)
+    # Solve for the quadratic parameters (use the real H, not the posdef H)
+    Hstepx, HstepλE  = state.H*Δx - JE'*ΔλE, -JE*Δx
+    qp = state.L, slopealpha(state.s, state.g, bstep, bgrad), dot(Δx, Hstepx) + dot(ΔλE, HstepλE)
+    qp
+end
+
+function solve_slack!(bstep, s, bounds, bstate, bgrad, J, μ)
     # Solve for the slack variable and λI updates
     for (i, j) in enumerate(bounds.ineqx)
         bstep.slack_x[i] = -bgrad.λx[i] + bounds.σx[i]*s[j]
         # bstep.λx[i] = -bgrad.slack_x[i] - μ*bstep.slack_x[i]/bstate.slack_x[i]^2
-        bstep.λx[i] = -bgrad.slack_x[i] - bstate.λx[i]*bstep.slack_x[i]/bstate.slack_x[i]
+        # bstep.λx[i] = -bgrad.slack_x[i] - bstate.λx[i]*bstep.slack_x[i]/bstate.slack_x[i]
+        bstep.λx[i] = -(-μ/bstate.slack_x[i] + bstate.λx[i]) - bstate.λx[i]*bstep.slack_x[i]/bstate.slack_x[i]
     end
-    JIc = view5(state.constr_J, bounds.ineqc, :)
-    SigmaJIΔx = Diagonal(bounds.σc)*(JIc*state.s)
+    JIc = view5(J, bounds.ineqc, :)
+    SigmaJIΔx = Diagonal(bounds.σc)*(JIc*s)
     for i = 1:length(bstep.λc)
         bstep.slack_c[i] = -bgrad.λc[i] + SigmaJIΔx[i]
         # bstep.λc[i] = -bgrad.slack_c[i] - μ*bstep.slack_c[i]/bstate.slack_c[i]^2
-        bstep.λc[i] = -bgrad.slack_c[i] - bstate.λc[i]*bstep.slack_c[i]/bstate.slack_c[i]
+        # bstep.λc[i] = -bgrad.slack_c[i] - bstate.λc[i]*bstep.slack_c[i]/bstate.slack_c[i]
+        bstep.λc[i] = -(-μ/bstate.slack_c[i] + bstate.λc[i]) - bstate.λc[i]*bstep.slack_c[i]/bstate.slack_c[i]
     end
-    # Solve for the quadratic parameters
-    qp = state.L, slopealpha(state.s, state.g, bstep, bgrad), dot(Δx, Hstepx) + dot(ΔλE, HstepλE)
-    qp
+    bstep
 end
 
 function is_smaller_eps(ref, step)

From ad06e8a5cfc11bf354cd6eb99f497346485d027f Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 23 Nov 2016 14:46:31 -0600
Subject: [PATCH 39/40] Fix tests

---
 src/ipnewton.jl     |  8 +++++++-
 test/constraints.jl | 24 +++++++++++++-----------
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/src/ipnewton.jl b/src/ipnewton.jl
index 71b019e1d..d71e2bd2c 100644
--- a/src/ipnewton.jl
+++ b/src/ipnewton.jl
@@ -382,4 +382,10 @@ function Hf(bounds::ConstraintBounds, state)
           -JE zeros(eltype(JE), size(JE, 1), size(JE, 1))]
 end
 Hf(constraints, state) = Hf(constraints.bounds, state)
-gf(state) = [state.gtilde; state.bgrad.λxE; state.bgrad.λcE]
+function gf(bounds::ConstraintBounds, state)
+    bstate, μ = state.bstate, state.μ
+    μsinv = μ * [bounds.σx./bstate.slack_x; bounds.σc./bstate.slack_c]
+    gtildeμ = state.gtilde  - jacobianI(state, bounds)' * μsinv
+    [gtildeμ; state.bgrad.λxE; state.bgrad.λcE]
+end
+gf(constraints, state) = gf(constraints.bounds, state)
diff --git a/test/constraints.jl b/test/constraints.jl
index 7449c03cb..6646e53e7 100644
--- a/test/constraints.jl
+++ b/test/constraints.jl
@@ -113,7 +113,7 @@ ConstraintBounds:
         constraints = TwiceDifferentiableConstraintsFunction(
             (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds)
         state = Optim.initial_state(method, options, dg, constraints, x)
-        @test Optim.gf(state) ≈ gx
+        @test Optim.gf(bounds, state) ≈ gx
         @test Optim.Hf(constraints, state) ≈ H
         ## Pure equality constraints on variables
         xbar = fill(0.2, length(x))
@@ -132,7 +132,7 @@ ConstraintBounds:
         state = Optim.initial_state(method, options, d0, constraints, x)
         copy!(state.bstate.λxE, bstate.λxE)
         setstate!(state, μ, d0, constraints, method)
-        @test Optim.gf(state) ≈ [gx; xbar-x]
+        @test Optim.gf(bounds, state) ≈ [gx; xbar-x]
         n = length(x)
         @test Optim.Hf(constraints, state) ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)]
         # Now again using the generic machinery
@@ -141,7 +141,7 @@ ConstraintBounds:
         state = Optim.initial_state(method, options, d0, constraints, x)
         copy!(state.bstate.λcE, bstate.λxE)
         setstate!(state, μ, d0, constraints, method)
-        @test Optim.gf(state) ≈ [gx; xbar-x]
+        @test Optim.gf(bounds, state) ≈ [gx; xbar-x]
         n = length(x)
         @test Optim.Hf(constraints, state) ≈ [eye(n,n) -eye(n,n); -eye(n,n) zeros(n,n)]
         ## Nonnegativity constraints
@@ -160,14 +160,14 @@ ConstraintBounds:
             (x,c)->nothing, (x,J)->nothing, (x,λ,H)->nothing, bounds)
         state = Optim.initial_state(method, options, d0, constraints, y)
         setstate!(state, μ, d0, constraints, method)
-        @test Optim.gf(state) ≈ -μ./y
+        @test Optim.gf(bounds, state) ≈ -μ./y
         @test Optim.Hf(constraints, state) ≈ μ*Diagonal(1./y.^2)
         # Now again using the generic machinery
         bounds = Optim.ConstraintBounds([], [], zeros(length(x)), fill(Inf,length(x)))
         constraints = TwiceDifferentiableConstraintsFunction(cvar!, cvarJ!, cvarh!, bounds)
         state = Optim.initial_state(method, options, d0, constraints, y)
         setstate!(state, μ, d0, constraints, method)
-        @test Optim.gf(state) ≈ -μ./y
+        @test Optim.gf(bounds, state) ≈ -μ./y
         @test Optim.Hf(constraints, state) ≈ μ*Diagonal(1./y.^2)
         ## General inequality constraints on variables
         lb, ub = rand(length(x))-2, rand(length(x))+1
@@ -207,7 +207,7 @@ ConstraintBounds:
             hxs[j] += htmp
             gxs[j] += bounds.σx[i]*(gstmp - λ[i]) - bounds.σx[i]*htmp*gλtmp
         end
-        @test Optim.gf(state) ≈ gxs
+        @test Optim.gf(bounds, state) ≈ gxs
         @test Optim.Hf(constraints, state) ≈ Diagonal(hxs)
         # Now again using the generic machinery
         bounds = Optim.ConstraintBounds([], [], lb, ub)
@@ -216,7 +216,7 @@ ConstraintBounds:
         copy!(state.bstate.slack_c, bstate.slack_x)
         copy!(state.bstate.λc, bstate.λx)
         setstate!(state, μ, d0, constraints, method)
-        @test Optim.gf(state) ≈ gxs
+        @test Optim.gf(bounds, state) ≈ gxs
         @test Optim.Hf(constraints, state) ≈ Diagonal(hxs)
         ## Nonlinear equality constraints
         cfun = x->[x[1]^2+x[2]^2, x[2]*x[3]^2]
@@ -248,7 +248,7 @@ ConstraintBounds:
         setstate!(state, μ, d0, constraints, method)
         heq = zeros(length(x), length(x))
         ch!(x, bstate.λcE, heq)
-        @test Optim.gf(state) ≈ [gx; cbar-c]
+        @test Optim.gf(bounds, state) ≈ [gx; cbar-c]
         @test Optim.Hf(constraints, state) ≈ [full(cholfact(Positive, heq)) -J';
                                               -J zeros(size(J,1), size(J,1))]
         ## Nonlinear inequality constraints
@@ -285,7 +285,7 @@ ConstraintBounds:
 #        hxx = full(cholfact(Positive, -hineq)) + JI'*Diagonal(bstate.λc./bstate.slack_c)*JI
         hxx = -hineq + JI'*Diagonal(bstate.λc./bstate.slack_c)*JI
         gf = -JI'*(bounds.σc .* bstate.λc) + JI'*Diagonal(bounds.σc)*(bgrad.slack_c - (bgrad.λc .* bstate.λc ./ bstate.slack_c))
-        @test Optim.gf(state) ≈ gf
+        @test Optim.gf(bounds, state) ≈ gf
         @test Optim.Hf(constraints, state) ≈ full(cholfact(Positive, hxx, Val{true}))
     end
 
@@ -394,7 +394,7 @@ ConstraintBounds:
         # Nonnegativity (the case that doesn't require slack variables)
         constraints = TwiceDifferentiableConstraintsFunction([0.0], [])
         state = Optim.initial_state(method, options, d, constraints, [x0])
-        qp = Optim.solve_step!(state, constraints)
+        qp = Optim.solve_step!(state, constraints, options)
         @test state.s[1] ≈ -(F-μ/x0)/(state.bstate.λx[1]/x0)
         g0, H0 = autoqp(d, constraints, state)
         @test qp[1] ≈ F*x0-μ*log(x0)
@@ -432,16 +432,18 @@ ConstraintBounds:
             state = Optim.initial_state(method, options, d, constraints, [μ/F*10])
             for i = 1:10
                 Optim.update_state!(d, constraints, state, method, options)
+                state.μ = μ
                 Optim.update_fg!(d, constraints, state, method)
                 Optim.update_h!(d, constraints, state, method)
             end
-            @test state.x[1] ≈ μ/F
+            @test isapprox(state.x[1], μ/F, rtol=1e-4)
             # |x| ≥ 1, and check that we get slack precision better than eps(1.0)
             d = TwiceDifferentiableFunction(x->F*(x[1]-σ), (x,g) -> (g[1] = F), (x,h) -> (h[1,1] = 0))
             constraints = TwiceDifferentiableConstraintsFunction(σswap(σ, [Float64(σ)], [])...)
             state = Optim.initial_state(method, options, d, constraints, [(1+eps(1.0))*σ])
             for i = 1:10
                 Optim.update_state!(d, constraints, state, method, options)
+                state.μ = μ
                 Optim.update_fg!(d, constraints, state, method)
                 Optim.update_h!(d, constraints, state, method)
             end

From 1cbad4872ddefc483989996719f4b5e34fa83b8e Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 23 Nov 2016 15:00:57 -0600
Subject: [PATCH 40/40] Fix ambiguities, tests on julia 0.4

---
 src/interior.jl     | 33 +++++++++++++++++++++++----------
 test/constraints.jl |  6 +++---
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index 3b99df757..4a2e5ee2c 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -285,7 +285,7 @@ You can manually specify `μ` by supplying a numerical value for
 `μ0`. Whether calculated algorithmically or specified manually, the
 values of `λ` are set using the chosen `μ`.
 """
-function initialize_μ_λ!(state, bounds::ConstraintBounds, Hinfo, μ0::Union{Symbol,Number}, β=1//100)
+function initialize_μ_λ!(state, bounds::ConstraintBounds, Hinfo, μ0::Union{Symbol,Number}, β::Number=1//100)
     if nconstraints(bounds) == 0 && nconstraints_x(bounds) == 0
         state.μ = 0
         fill!(state.bstate, 0)
@@ -341,7 +341,7 @@ function initialize_μ_λ!(state, bounds::ConstraintBounds, Hinfo, μ0::Union{Sy
     k == length(λE) || error("something is wrong")
     state
 end
-function initialize_μ_λ!(state, bounds::ConstraintBounds, μ0::Union{Number,Symbol}, β=1//100)
+function initialize_μ_λ!(state, bounds::ConstraintBounds, μ0::Union{Number,Symbol}, β::Number=1//100)
     initialize_μ_λ!(state, bounds, nothing, μ0, β)
 end
 
@@ -490,13 +490,13 @@ end
 alphax(α::Number) = α
 alphax(αs::Union{Tuple,AbstractVector}) = αs[1]
 
-function lagrangian_linefunc!(α, αI, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)})
+function lagrangian_linefunc!(α, d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)})
     # For backtrack_constrained, the last evaluation is the one we
     # keep, so it's safe to store the results in state
-    state.f_x, state.L, state.ev = _lagrangian_linefunc(α, αI, d, constraints, state)
+    state.f_x, state.L, state.ev = _lagrangian_linefunc(α, d, constraints, state)
     state.L
 end
-lagrangian_linefunc!(α, αI, d, constraints, state, method) = lagrangian_linefunc(α, αI, d, constraints, state)
+lagrangian_linefunc!(α, d, constraints, state, method) = lagrangian_linefunc(α, d, constraints, state)
 
 
 ## for line searches that do use the gradient along the line
@@ -530,11 +530,22 @@ slopealpha(sx, gx, bstep, bgrad) = dot(sx, gx) +
     dot(bstep.λx, bgrad.λx) + dot(bstep.λc, bgrad.λc) +
     dot(bstep.λxE, bgrad.λxE) + dot(bstep.λcE, bgrad.λcE)
 
-function linesearch_anon(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained_grad)})
-    αs->lagrangian_lineslope!(αs, d, constraints, state, method)
-end
-function linesearch_anon(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)})
-    αs->lagrangian_linefunc!(αs, d, constraints, state, method)
+if VERSION >= v"0.5.0"
+    function linesearch_anon(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained_grad)})
+        αs->lagrangian_lineslope!(αs, d, constraints, state, method)
+    end
+    function linesearch_anon(d, constraints, state, method::IPOptimizer{typeof(backtrack_constrained)})
+        αs->lagrangian_linefunc!(αs, d, constraints, state, method)
+    end
+else
+    # 0.4 can't dispatch on a particular function
+    function linesearch_anon(d, constraints, state, method::IPOptimizer)
+        ls = method.linesearch!
+        if ls == backtrack_constrained_grad
+            return αs->lagrangian_lineslope!(αs, d, constraints, state, method)
+        end
+        αs->lagrangian_linefunc!(αs, d, constraints, state, method)
+    end
 end
 
 ## Computation of Lagrangian terms: barrier penalty
@@ -755,6 +766,7 @@ function isfeasible(constraints, x)
     isfeasible(constraints, x, c)
 end
 isfeasible(constraints::AbstractConstraintsFunction, x, c) = isfeasible(constraints.bounds, x, c)
+isfeasible(constraints::Void, state::AbstractBarrierState) = true
 isfeasible(constraints::Void, x) = true
 
 """
@@ -789,6 +801,7 @@ function isinterior(constraints, x)
     isinterior(constraints, x, c)
 end
 isinterior(constraints::AbstractConstraintsFunction, x, c) = isinterior(constraints.bounds, x, c)
+isinterior(constraints::Void, state::AbstractBarrierState) = true
 isinterior(constraints::Void, x) = true
 
 ## Utilities for representing total state as single vector
diff --git a/test/constraints.jl b/test/constraints.jl
index 6646e53e7..3521cbc40 100644
--- a/test/constraints.jl
+++ b/test/constraints.jl
@@ -322,7 +322,7 @@ ConstraintBounds:
         Optim.update_fg!(d, constraints, state, method)
         J = zeros(2,4)
         constraints.jacobian!(x, J)
-        eqnormal = J[1,:]; eqnormal = eqnormal/norm(eqnormal)
+        eqnormal = vec(J[1,:]); eqnormal = eqnormal/norm(eqnormal)
         @test abs(dot(state.g, eqnormal)) < 1e-12  # orthogonal to equality constraint
         Pfg = f_g - dot(f_g, eqnormal)*eqnormal
         Pg = state.g - dot(state.g, eqnormal)*eqnormal
@@ -362,7 +362,7 @@ ConstraintBounds:
         Optim.update_fg!(d, constraints, state, method)
         J = zeros(2,4)
         constraints.jacobian!(x, J)
-        eqnormal = J[1,:]; eqnormal = eqnormal/norm(eqnormal)
+        eqnormal = vec(J[1,:]); eqnormal = eqnormal/norm(eqnormal)
         @test abs(dot(state.g, eqnormal)) < 1e-12  # orthogonal to equality constraint
         Pgx = gx - dot(gx, eqnormal)*eqnormal
         @test abs(dot(Pgx, state.g)/dot(Pgx,Pgx) - 1) <= 0.011
@@ -404,7 +404,7 @@ ConstraintBounds:
         αmax = Optim.estimate_maxstep(Inf, state.x[bounds.ineqx].*bounds.σx,
                                            state.s[bounds.ineqx].*bounds.σx)
         ϕ = Optim.linesearch_anon(d, constraints, state, method)
-        val0 = ϕ((0,0))
+        val0 = ϕ(0.0)
         val0 = isa(val0, Tuple) ? val0[1] : val0
         @test val0 ≈ qp[1]
         α, nf, ng = method.linesearch!(ϕ, 1.0, αmax, qp)