From 680edc760617a8e5ad93c9260fe791c9908a9e9d Mon Sep 17 00:00:00 2001
From: timholy <tim.holy@gmail.com>
Date: Sat, 8 Mar 2014 13:48:39 -0600
Subject: [PATCH 01/28] Add support for constraints and a interior-point
 methods

---
 src/Optim.jl                    |  11 +-
 src/cg.jl                       |  21 ++-
 src/constraints.jl              | 121 ++++++++++++++
 src/interior.jl                 | 275 ++++++++++++++++++++++++++++++++
 src/linesearch/hz_linesearch.jl | 133 +++++++--------
 5 files changed, 488 insertions(+), 73 deletions(-)
 create mode 100644 src/constraints.jl
 create mode 100644 src/interior.jl

diff --git a/src/Optim.jl b/src/Optim.jl
index 64d10de41..37fa65c25 100644
--- a/src/Optim.jl
+++ b/src/Optim.jl
@@ -9,12 +9,18 @@ module Optim
            Base.setindex!
 
     export optimize,
+           interior,
+           linlsq,
            DifferentiableFunction,
-           TwiceDifferentiableFunction
+           TwiceDifferentiableFunction,
+           ConstraintsBox
 
     # Types
     include("types.jl")
 
+    # Types for constrained optimization
+    include("constraints.jl")
+
     # Automatic differentiation utilities
     include("autodiff.jl")
 
@@ -65,6 +71,9 @@ module Optim
     include("golden_section.jl")
     include("brent.jl")
 
+    # Constrained optimization algorithms
+    include("interior.jl")
+
     # End-User Facing Wrapper Functions
     include("optimize.jl")
 
diff --git a/src/cg.jl b/src/cg.jl
index fb2834344..b175bc5f4 100644
--- a/src/cg.jl
+++ b/src/cg.jl
@@ -104,6 +104,8 @@ end
 function cg{T}(df::Union(DifferentiableFunction,
                          TwiceDifferentiableFunction),
                initial_x::Array{T};
+               constraints::AbstractConstraints = ConstraintsNone(),
+               interior::Bool = false,
                xtol::Real = convert(T,1e-32),
                ftol::Real = convert(T,1e-8),
                grtol::Real = convert(T,1e-8),
@@ -117,7 +119,9 @@ function cg{T}(df::Union(DifferentiableFunction,
                precondprep::Function = (P, x) -> nothing)
 
     # Maintain current state in x and previous state in x_previous
-    x, x_previous = copy(initial_x), copy(initial_x)
+    x = copy(initial_x)
+    project!(x, constraints)
+    x_previous = copy(x)
 
     # Count the total number of iterations
     iteration = 0
@@ -208,23 +212,28 @@ function cg{T}(df::Union(DifferentiableFunction,
         @assert typeof(dphi0) == T
         push!(lsr, zero(T), f_x, dphi0)
 
+        alphamax = interior ? toedge(x, s, constraints) : inf(T)
+
         # Pick the initial step size (HZ #I1-I2)
         alpha, mayterminate, f_update, g_update =
-          alphatry(alpha, df, x, s, x_ls, gr_ls, lsr)
+          alphatry(alpha, df, x, s, x_ls, gr_ls, lsr, constraints, alphamax)
         f_calls, g_calls = f_calls + f_update, g_calls + g_update
 
+        if alpha == zero(T)
+            x_converged = true
+            break
+        end
+
         # Determine the distance of movement along the search line
         alpha, f_update, g_update =
-          linesearch!(df, x, s, x_ls, gr_ls, lsr, alpha, mayterminate)
+          linesearch!(df, x, s, x_ls, gr_ls, lsr, alpha, mayterminate, constraints, alphamax)
         f_calls, g_calls = f_calls + f_update, g_calls + g_update
 
         # Maintain a record of previous position
         copy!(x_previous, x)
 
         # Update current position
-        for i in 1:n
-            @inbounds x[i] = x[i] + alpha * s[i]
-        end
+        step!(x, x, s, alpha, constraints)
 
         # Maintain a record of the previous gradient
         copy!(gr_previous, gr)
diff --git a/src/constraints.jl b/src/constraints.jl
new file mode 100644
index 000000000..9a4e30504
--- /dev/null
+++ b/src/constraints.jl
@@ -0,0 +1,121 @@
+abstract AbstractConstraints
+
+immutable ConstraintsNone <: AbstractConstraints end
+
+immutable ConstraintsBox{T,N} <: AbstractConstraints
+    lower::Array{T,N}
+    upper::Array{T,N}
+
+    function ConstraintsBox(l::AbstractArray{T}, u::AbstractArray{T})
+        size(l) == size(u) || error("The sizes of the bounds must match")
+        for i = 1:length(l)
+            l[i] <= u[i] || error("The lower bound must be smaller than the upper bound")
+        end
+        new(l, u)
+    end
+end
+ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::AbstractArray{T,N})
+ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::Nothing) = ConstraintsBox{T,N}(l::AbstractArray{T,N}, infs(T, size(l)))
+ConstraintsBox{T,N}(l::Nothing, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(-infs(T,size(u)), u::AbstractArray{T,N})
+
+# FIXME: need a way to indicate equality/inequality
+immutable ConstraintsL{T,M<:AbstractMatrix,V<:AbstractVector} <: AbstractConstraints
+    A::M
+    b::V
+end
+
+# Generic constraints
+immutable ConstraintsNL{F<:Union(Function,DifferentiableFunction,TwiceDifferentiableFunction)} <: AbstractConstraints
+    funcs::Vector{F}
+end
+
+type Constraints
+    bounds::AbstractConstraints
+    leq::AbstractConstraints
+    lineq::AbstractConstraints
+    nleq::AbstractConstraints
+    nlineq::AbstractConstraints
+    mu::Real
+end
+Constraints() = Constraints(ConstraintsNone(), ConstraintsNone(), ConstraintsNone(), ConstraintsNone(), ConstraintsNone())
+
+
+## feasible
+feasible(x, constraints::ConstraintsNone, ineq::Bool) = true
+
+function feasible(x, constraints::ConstraintsBox)
+    l = constraints.lower
+    u = constraints.upper
+    for i = 1:length(x)
+        l[i] <= x[i] <= u[i] || return false
+    end
+    true
+end
+
+
+## project!
+project!(x, constraints::AbstractConstraints) = x
+
+function project!(x::Array, bounds::ConstraintsBox)
+    for i = 1:length(x)
+        x[i] = max(bounds.lower[i], min(bounds.upper[i], x[i]))
+    end
+    x
+end
+
+## step!
+function step!{T}(xtmp::Array{T}, x, s, alpha::Real, constraints::AbstractConstraints = ConstraintsNone{T}())
+    (length(xtmp) == length(x) == length(s)) || error("lengths of xtmp ($(length(xtmp))), x ($(length(x))), and s ($(length(s))) disagree")
+    for i = 1:length(xtmp)
+        @inbounds xtmp[i] = x[i] + alpha*s[i]
+    end
+    project!(xtmp, constraints)
+end
+
+
+## toedge and tocorner
+# For bounds constraints,
+#  - toedge returns the "distance along s" (the value of the coefficient alpha) to reach the boundary
+#  - tocorner returns the "distance along s" needed to be projected into the "corner" (all constraints are active)
+# The first is the farthest you might consider searching in a barrier method. The second is the farthest that it
+# makes sense to search in a projection method.
+toedge{T}(x::AbstractArray{T}, s, constraints::AbstractConstraints) = inf(T)
+tocorner{T}(x::AbstractArray{T}, s, constraints::AbstractConstraints) = inf(T)
+
+function toedge{T}(x::AbstractArray{T}, s, bounds::ConstraintsBox)
+    # Stop at the first coordinate to leave the box
+    alphamax = inf(T)
+    for i = 1:length(x)
+        si = s[i]
+        li = bounds.lower[i]
+        ui = bounds.upper[i]
+        if si < 0 && isfinite(li)
+            alphamax = min(alphamax, (li-x[i])/si)
+        elseif si > 0 && isfinite(ui)
+            alphamax = min(alphamax, (ui-x[i])/si)
+        end
+    end
+    convert(T, alphamax)
+end
+
+toedge(x, s, constraints::Constraints) = toedge(x, s, constraints.bounds)
+
+function tocorner{T}(x::AbstractArray{T}, s, bounds::ConstraintsBox)
+    # Stop at the last coordinate to leave the box
+    alphamax = zero(T)
+    for i = 1:length(x)
+        si = s[i]
+        li = bounds.lower[i]
+        ui = bounds.upper[i]
+        if si < 0 && isfinite(li)
+            alphamax = max(alphamax, (li-x[i])/si)
+        elseif si > 0 && isfinite(ui)
+            alphamax = max(alphamax, (ui-x[i])/si)
+        else
+            return inf(T)
+        end
+    end
+    convert(T, alphamax)
+end
+
+tocorner(x, s, constraints::Constraints) = tocorner(x, s, constraints.bounds)
diff --git a/src/interior.jl b/src/interior.jl
new file mode 100644
index 000000000..be1835830
--- /dev/null
+++ b/src/interior.jl
@@ -0,0 +1,275 @@
+############
+##
+##  Barriers
+##
+############
+# Note: barrier_g! and barrier_h! simply add to an existing g or H
+# (they do not initialize with zeros)
+
+## Box constraints
+function barrier_f{T}(x::Array{T}, bounds::ConstraintsBox)
+    n = length(x)
+    phi = zero(T)
+    l = bounds.lower
+    u = bounds.upper
+    for i = 1:n
+        li, ui = l[i], u[i]
+        xi = x[i]
+        if isfinite(li)
+            xi >= li || return inf(T)
+            phi += log(xi-li)
+        end
+        if isfinite(ui)
+            xi <= ui || return inf(T)
+            phi += log(ui-xi)
+        end
+    end
+    -phi
+end
+
+function barrier_g!{T}(x::Array{T}, g::Array{T}, bounds::ConstraintsBox)
+    n = length(x)
+    l = bounds.lower
+    u = bounds.upper
+    for i = 1:n
+        li, ui = l[i], u[i]
+        xi = x[i]
+        g[i] -= 1/(xi-li)
+        g[i] += 1/(ui-xi)
+    end
+end
+
+function barrier_fg!{T}(x::Array{T}, g::Array{T}, bounds::ConstraintsBox)
+    n = length(x)
+    phi = zero(T)
+    l = bounds.lower
+    u = bounds.upper
+    for i = 1:n
+        li, ui = l[i], u[i]
+        xi = x[i]
+        if isfinite(li)
+            xi >= li || return inf(T)  # no need to compute additional gradient terms
+            phi += log(xi-li)
+            g[i] -= 1/(xi-li)
+        end
+        if isfinite(ui)
+            xi <= ui || return inf(T)
+            phi += log(ui-xi)
+            g[i] += 1/(ui-xi)
+        end
+    end
+    -phi
+end
+
+function barrier_h!{T}(x::Vector{T}, H::Matrix{T}, bounds::ConstraintsBox)
+    n = length(x)
+    l = bounds.lower
+    u = bounds.upper
+    for i = 1:n
+        li, ui = l[i], u[i]
+        xi = x[i]
+        dx = xi-li
+        H[i,i] += 1/(dx*dx)
+        dx = ui-xi
+        H[i,i] += 1/(dx*dx)
+    end
+end
+
+## Linear inequalities
+
+## Nonlinear inequalities
+
+
+#########################
+##
+## Combined cost function
+##
+#########################
+combined_f(x, objective_f::Function, constraints::AbstractConstraints, t) =
+    t*objective_f(x) + barrier_f(x, constraints)
+
+function combined_g!(x, g, objective_g!::Function, constraints::AbstractConstraints, t)
+    objective_g!(x, g)
+    scale!(g, t)
+    barrier_fg!(x, g, constraints)
+end
+
+function combined_fg!(x, g, objective_fg!::Function, constraints::AbstractConstraints, t)
+    f_x = t*objective_fg!(x, g)
+    scale!(g, t)
+    f_x += barrier_fg!(x, g, constraints)
+    f_x
+end
+
+function combined_h!(x, H, objective_h!::Function, constraints::AbstractConstraints, t)
+    objective_h!(x, H)
+    scale!(H, t)
+    barrier_h!(x, H, constraints)
+end
+
+# combined_f(x, objective::Union(DifferentiableFunction,TwiceDifferentiableFunction), constraints::AbstractConstraints, t) =
+#     combined_f(x, objective.f, constraints, t)
+# 
+# combined_g!(x, g, objective::Union(DifferentiableFunction,TwiceDifferentiableFunction), constraints::AbstractConstraints, t) =
+#     combined_g!(x, g, objective.g, constraints, t)
+# 
+# combined_fg!(x, g, objective::Union(DifferentiableFunction,TwiceDifferentiableFunction), constraints::AbstractConstraints, t) =
+#     combined_fg!(x, g, objective.fg!, constraints, t)
+# 
+# combined_h!(x, H, objective::Union(DifferentiableFunction,TwiceDifferentiableFunction), constraints::AbstractConstraints, t) =
+#     combined_h!(x, H, objective.h!, constraints, t)
+
+
+#########################################
+##
+## Cost function for linear least squares
+##
+#########################################
+    
+linlsq(A::Matrix, b::Vector) =
+    TwiceDifferentiableFunction( x    -> linlsq_fg!(x, Array(eltype(x),0), A, b, similar(b)),
+                                (x,g) -> linlsq_fg!(x, g, A, b, similar(b)),
+                                (x,g) -> linlsq_fg!(x, g, A, b, similar(b)),
+                                (x,H) -> linlsq_h!(x, H, A, b))
+
+dummy(x) = (Base.show_backtrace(STDOUT, backtrace()); error("No f"))
+dummy_g!(x,g) = (Base.show_backtrace(STDOUT, backtrace()); error("No g!"))
+
+function linlsq_fg!{T}(x::AbstractArray{T}, g, A, b, scratch)
+    A_mul_B!(scratch, A, x)
+    f_x = zero(typeof(one(T)*one(T)+one(T)*one(T)))
+    for i = 1:length(g)
+        tmp = scratch[i]
+        tmp -= b[i]
+        f_x += tmp*tmp
+        scratch[i] = tmp
+    end
+    if !isempty(g)
+        At_mul_B!(g, A, scratch)
+    end
+    f_x/2
+end
+
+linlsq_h!(x, H, A, b) = At_mul_B!(H, A, A)
+
+
+#########################
+##
+## Interior point methods
+##
+#########################
+function interior_newton{T}(objective::TwiceDifferentiableFunction,
+                            initial_x::Vector{T},
+                            constraints::AbstractConstraints;
+                            t = one(T), mu = 10, eps_gap = 1e-12,
+                            xtol::Real = convert(T,1e-32),
+                            ftol::Real = convert(T,1e-8),
+                            grtol::Real = convert(T,1e-8),
+                            iterations::Integer = 1_000,
+                            store_trace::Bool = false,
+                            show_trace::Bool = false,
+                            extended_trace::Bool = false)
+
+    if !feasible(initial_x, constraints)
+        error("Initial guess must be feasible")
+    end
+    x = copy(initial_x)
+    xtmp = similar(x)
+    x_previous = similar(x)
+    m = length(x)
+    gr = similar(x)
+    H = Array(T, m, m)
+    lsr = LineSearchResults(T)
+
+    tr = OptimizationTrace()
+    tracing = store_trace || show_trace || extended_trace
+    
+    df = DifferentiableFunction(dummy,
+                                dummy_g!,
+                                (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t))
+    iteration, f_calls, g_calls = 0, 0, 0
+    f_x = f_x_previous = inf(T)
+    while m/t > eps_gap && iteration < iterations
+        f_x_previous = f_x
+        f_x = combined_fg!(x, gr, objective.fg!, constraints, t)
+        combined_h!(x, H, objective.h!, constraints, t)
+        @newtontrace
+        cH = cholfact!(H)
+        s = -(cH\gr)
+        clear!(lsr)
+        push!(lsr, zero(T), f_x, dot(s,gr))
+        alphamax = toedge(x, s, constraints)
+        alpha = alphamax < 1 ? 0.9*alphamax : 1.0
+        alpha, _f_calls, _g_calls =
+            hz_linesearch!(df, x, s, xtmp, gr, lsr, alpha, true, constraints, alphamax)
+        copy!(x_previous, x)
+        step!(x, x, s, alpha, constraints)
+        iteration += 1
+        f_calls += _f_calls
+        g_calls += _g_calls
+        f_x /= t
+        if alphamax >= 1
+            t *= mu
+            df = DifferentiableFunction(dummy,
+                                        dummy_g!,
+                                        (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t))
+        end
+    end
+    x_converged,
+    f_converged,
+    gr_converged,
+    converged = assess_convergence(x,
+                                   x_previous,
+                                   f_x,
+                                   f_x_previous,
+                                   gr,
+                                   xtol,
+                                   ftol,
+                                   grtol)
+    MultivariateOptimizationResults("Interior/Newton",
+                                    initial_x,
+                                    x,
+                                    float64(f_x),
+                                    iteration,
+                                    iteration == iterations,
+                                    x_converged,
+                                    xtol,
+                                    f_converged,
+                                    ftol,
+                                    gr_converged,
+                                    grtol,
+                                    tr,
+                                    f_calls,
+                                    g_calls)
+end
+
+function interior{T}(objective::Union(DifferentiableFunction, TwiceDifferentiableFunction),
+                     initial_x::Vector{T},
+                     constraints::AbstractConstraints;
+                     method = :cg, t = one(T), mu = 10, eps_gap = 1e-12)
+    if method == :newton
+        return iterior_newton(objective, initial_x, constraints; t=t, mu=mu, eps_gap=eps_gap)
+    end
+    if !feasible(initial_x, constraints)
+        error("Initial guess must be feasible")
+    end
+    x = copy(initial_x)
+    m = length(x)
+    local results
+    iteration, f_calls, g_calls = 0, 0, 0
+    while m/t > eps_gap
+        df = DifferentiableFunction( x    -> combined_f(x, objective.f, constraints, t),
+                                    (x,g) -> combined_g! (x, g, objective.g!, constraints, t),
+                                    (x,g) -> combined_fg!(x, g, objective.fg!, constraints, t))
+        results = optimize(df, x, method=method)
+        copy!(x, results.minimum)
+        iteration += results.iterations
+        f_calls += results.f_calls
+        g_calls += results.g_calls
+        results.f_minimum /= t
+        t *= mu
+    end
+    results.iterations, results.f_calls, results.g_calls = iteration, f_calls, g_calls
+    copy!(results.initial_x, initial_x)
+    results
+end
diff --git a/src/linesearch/hz_linesearch.jl b/src/linesearch/hz_linesearch.jl
index 815a1112a..27d24f433 100644
--- a/src/linesearch/hz_linesearch.jl
+++ b/src/linesearch/hz_linesearch.jl
@@ -35,23 +35,17 @@ display_nextbit = 14
 # paper (these may or may not be extensions of the cg_descent code
 # that can be downloaded from Hager's site; his code has undergone
 # numerous revisions since publication of the paper):
-#   cgdescent: the termination condition employs a "unit-correct"
-#     expression rather than a condition on gradient
-#     components---whether this is a good or bad idea will require
-#     additional experience, but preliminary evidence seems to suggest
-#     that it makes "reasonable" choices over a wider range of problem
-#     types.
-#   linesearch: the Wolfe conditions are checked only after alpha is
+#   - The Wolfe conditions are checked only after alpha is
 #     generated either by quadratic interpolation or secant
 #     interpolation, not when alpha is generated by bisection or
 #     expansion. This increases the likelihood that alpha will be a
 #     good approximation of the minimum.
-#   linesearch: In step I2, we multiply by psi2 only if the convexity
+#   - In step I2, we multiply by psi2 only if the convexity
 #     test failed, not if the function-value test failed. This
 #     prevents one from going uphill further when you already know
 #     you're already higher than the point at alpha=0.
-#   both: checks for Inf/NaN function values
-#   both: support maximum value of alpha (equivalently, c). This
+#   - Checks for Inf/NaN function values
+#   - Support maximum value of alpha (equivalently, c). This
 #     facilitates using these routines for constrained minimization
 #     when you can calculate the distance along the path to the
 #     disallowed region. (When you can't easily calculate that
@@ -62,6 +56,7 @@ display_nextbit = 14
 #     which a finite value will be returned.  See, e.g., limits_box
 #     below.  The default value for alphamax is Inf. See alphamaxfunc
 #     for cgdescent and alphamax for linesearch_hz.
+#   - Support for projected-gradient methods
 
 const DEFAULTDELTA = 0.1
 const DEFAULTSIGMA = 0.9 
@@ -97,12 +92,13 @@ function alphatry{T}(alpha::T,
                      xtmp::Array,
                      gtmp::Array,
                      lsr::LineSearchResults,
+                     constraints::AbstractConstraints = ConstraintsNone(),
+                     alphamax::Real = inf(T),
                      psi1::Real = convert(T,0.2),
                      psi2::Real = convert(T,2),
                      psi3::Real = convert(T,0.1),
                      iterfinitemax::Integer = iceil(-log2(eps(T))),
-                     alphamax::Real = inf(T),
-                     display::Integer = 0)
+                     detailed_trace::Integer = 0)
     f_calls = 0
     g_calls = 0
 
@@ -110,17 +106,16 @@ function alphatry{T}(alpha::T,
     dphi0 = lsr.slope[1]
 
     alphatest = psi1 * alpha
+    alphamax = min(alphamax, tocorner(x, s, constraints))
     alphatest = min(alphatest, alphamax)
 
-    # Use xtmp here
-    phitest = d.f(x + alphatest * s)
+    phitest = d.f(step!(xtmp, x, s, alphatest, constraints))
     f_calls += 1
 
     iterfinite = 1
     while !isfinite(phitest)
         alphatest = psi3 * alphatest
-        # Use xtmp here
-        phitest = d.f(x + alphatest * s)
+        phitest = d.f(step!(xtmp, x, s, alphatest, constraints))
         f_calls += 1
         lsr.nfailures += 1
         iterfinite += 1
@@ -130,7 +125,7 @@ function alphatry{T}(alpha::T,
         end
     end
     a = ((phitest-phi0)/alphatest - dphi0)/alphatest  # quadratic fit
-    if display & ALPHAGUESS > 0
+    if detailed_trace & ALPHAGUESS > 0
         println("quadfit: alphatest = ", alphatest,
                 ", phi0 = ", phi0,
                 ", phitest = ", phitest,
@@ -148,7 +143,7 @@ function alphatry{T}(alpha::T,
             alpha = alphamax
             mayterminate = false
         end
-        if display & ALPHAGUESS > 0
+        if detailed_trace & ALPHAGUESS > 0
             println("alpha guess (quadratic): ", alpha,
                     ",(mayterminate = ", mayterminate, ")")
         end
@@ -160,7 +155,7 @@ function alphatry{T}(alpha::T,
         end
     end
     alpha = min(alphamax, alpha)
-    if display & ALPHAGUESS > 0
+    if detailed_trace & ALPHAGUESS > 0
         println("alpha guess (expand): ", alpha)
     end
     return alpha, mayterminate, f_calls, g_calls
@@ -175,29 +170,32 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
                            lsr::LineSearchResults{T},
                            c::Real,
                            mayterminate::Bool,
+                           constraints::AbstractConstraints = ConstraintsNone(),
+                           alphamax::Real = inf(T),
                            delta::Real = DEFAULTDELTA,
                            sigma::Real = DEFAULTSIGMA,
-                           alphamax::Real = inf(T),
                            rho::Real = convert(T,5),
                            epsilon::Real = convert(T,1e-6),
                            gamma::Real = convert(T,0.66),
                            linesearchmax::Integer = 50,
                            psi3::Real = convert(T,0.1),
                            iterfinitemax::Integer = iceil(-log2(eps(T))),
-                           display::Integer = 0)
-    if display & LINESEARCH > 0
+                           detailed_trace::Integer = 0)
+    if detailed_trace & LINESEARCH > 0
         println("New linesearch")
     end
     f_calls = 0
     g_calls = 0
 
+    alphamax = min(alphamax, tocorner(x, s, constraints))
+
     phi0 = lsr.value[1]
     dphi0 = lsr.slope[1]
     (isfinite(phi0) && isfinite(dphi0)) || error("Initial value and slope must be finite")
     philim = phi0 + epsilon * abs(phi0)
     @assert c > 0
     @assert isfinite(c) && c <= alphamax
-    phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true)
+    phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints)
     f_calls += f_up
     g_calls += g_up
     iterfinite = 1
@@ -206,7 +204,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
         lsr.nfailures += 1
         iterfinite += 1
         c *= psi3
-        phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true)
+        phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints)
         f_calls += f_up
         g_calls += g_up
     end
@@ -219,7 +217,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
     # satisfies the Wolfe conditions
     if mayterminate &&
           satisfies_wolfe(c, phic, dphic, phi0, dphi0, philim, delta, sigma)
-        if display & LINESEARCH > 0
+        if detailed_trace & LINESEARCH > 0
             println("Wolfe condition satisfied on point alpha = ", c)
         end
         return c, f_calls, g_calls # phic
@@ -232,7 +230,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
     iter = 1
     cold = -one(T)
     while !isbracketed && iter < linesearchmax
-        if display & BRACKET > 0
+        if detailed_trace & BRACKET > 0
             println("bracketing: ia = ", ia,
                     ", ib = ", ib,
                     ", c = ", c,
@@ -259,7 +257,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
                 error("c = ", c, ", lsr = ", lsr)
             end
             # ia, ib = bisect(phi, lsr, ia, ib, philim) # TODO: Pass options
-            ia, ib, f_up, g_up = bisect!(df, x, s, xtmp, g, lsr, ia, ib, philim, display)
+            ia, ib, f_up, g_up = bisect!(df, x, s, xtmp, g, lsr, ia, ib, philim, constraints, detailed_trace)
             f_calls += f_up
             g_calls += g_up
             isbracketed = true
@@ -269,7 +267,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
             c *= rho
             if c > alphamax
                 c = (alphamax + cold)/2
-                if display & BRACKET > 0
+                if detailed_trace & BRACKET > 0
                     println("bracket: exceeding alphamax, bisecting: alphamax = ", alphamax,
                             ", cold = ", cold, ", new c = ", c)
                 end
@@ -277,7 +275,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
                     return cold, f_calls, g_calls
                 end
             end
-            phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true)
+            phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints)
             f_calls += f_up
             g_calls += g_up
             iterfinite = 1
@@ -285,11 +283,11 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
                 alphamax = c
                 lsr.nfailures += 1
                 iterfinite += 1
-                if display & BRACKET > 0
+                if detailed_trace & BRACKET > 0
                     println("bracket: non-finite value, bisection")
                 end
                 c = (cold + c) / 2
-                phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true)
+                phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints)
                 f_calls += f_up
                 g_calls += g_up
             end
@@ -319,18 +317,19 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
         a = lsr.alpha[ia]
         b = lsr.alpha[ib]        
         @assert b > a
-        if display & LINESEARCH > 0
+        if detailed_trace & LINESEARCH > 0
             println("linesearch: ia = ", ia,
                     ", ib = ", ib,
                     ", a = ", a,
                     ", b = ", b,
                     ", phi(a) = ", lsr.value[ia],
                     ", phi(b) = ", lsr.value[ib])
+            println("phi(a) == phi(b) ? ", lsr.value[ia] == lsr.value[ib])
         end
         if b - a <= eps(b)
             return a, f_calls, g_calls # lsr.value[ia]
         end
-        iswolfe, iA, iB, f_up, g_up = secant2!(df, x, s, xtmp, g, lsr, ia, ib, philim, delta, sigma, display)
+        iswolfe, iA, iB, f_up, g_up = secant2!(df, x, s, xtmp, g, lsr, ia, ib, philim, constraints, delta, sigma, detailed_trace)
         f_calls += f_up
         g_calls += g_up
         if iswolfe
@@ -340,12 +339,12 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
         B = lsr.alpha[iB]
         @assert B > A
         if B - A < gamma * (b - a)
-            if display & LINESEARCH > 0
+            if detailed_trace & LINESEARCH > 0
                 println("Linesearch: secant succeeded")
             end
             if nextfloat(lsr.value[ia]) >= lsr.value[ib] && nextfloat(lsr.value[iA]) >= lsr.value[iB]
                 # It's so flat, secant didn't do anything useful, time to quit
-                if display & LINESEARCH > 0
+                if detailed_trace & LINESEARCH > 0
                     println("Linesearch: secant suggests it's flat")
                 end
                 return A, f_calls, g_calls
@@ -354,18 +353,18 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
             ib = iB
         else
             # Secant is converging too slowly, use bisection
-            if display & LINESEARCH > 0
+            if detailed_trace & LINESEARCH > 0
                 println("Linesearch: secant failed, using bisection")
             end
             c = (A + B) / convert(T, 2)
             # phic = phi(gphi, c) # TODO: Replace
-            phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true)
+            phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints)
             f_calls += f_up
             g_calls += g_up
             @assert isfinite(phic) && isfinite(dphic)
             push!(lsr, c, phic, dphic)
             # ia, ib = update(phi, lsr, iA, iB, length(lsr), philim) # TODO: Pass options
-            ia, ib, f_up, g_up = update!(df, x, s, xtmp, g, lsr, iA, iB, length(lsr), philim, display)
+            ia, ib, f_up, g_up = update!(df, x, s, xtmp, g, lsr, iA, iB, length(lsr), philim, constraints, detailed_trace)
             f_calls += f_up
             g_calls += g_up
         end
@@ -408,9 +407,10 @@ function secant2!{T}(df::Union(DifferentiableFunction,
                      ia::Integer,
                      ib::Integer,
                      philim::Real,
+                     constraints::AbstractConstraints = ConstraintsNone(),
                      delta::Real = DEFAULTDELTA,
                      sigma::Real = DEFAULTSIGMA,
-                     display::Integer = 0)
+                     detailed_trace::Integer = 0)
     f_calls = 0
     g_calls = 0
 
@@ -423,28 +423,28 @@ function secant2!{T}(df::Union(DifferentiableFunction,
     @assert dphia < 0
     @assert dphib >= 0
     c = secant(a, b, dphia, dphib)
-    if display & SECANT2 > 0
+    if detailed_trace & SECANT2 > 0
         println("secant2: a = ", a, ", b = ", b, ", c = ", c)
     end
     @assert isfinite(c)
     # phic = phi(tmpc, c) # Replace
-    phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true)
+    phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints)
     f_calls += f_up
     g_calls += g_up
     @assert isfinite(phic) && isfinite(dphic)
     push!(lsr, c, phic, dphic)
     ic = length(lsr)
     if satisfies_wolfe(c, phic, dphic, phi0, dphi0, philim, delta, sigma)
-        if display & SECANT2 > 0
+        if detailed_trace & SECANT2 > 0
             println("secant2: first c satisfied Wolfe conditions")
         end
         return true, ic, ic, f_calls, g_calls
     end
     # iA, iB = update(phi, lsr, ia, ib, ic, philim)
-    iA, iB, f_up, g_up = update!(df, x, s, xtmp, g, lsr, ia, ib, ic, philim, display)
+    iA, iB, f_up, g_up = update!(df, x, s, xtmp, g, lsr, ia, ib, ic, philim, constraints, detailed_trace)
     f_calls += f_up
     g_calls += g_up
-    if display & SECANT2 > 0
+    if detailed_trace & SECANT2 > 0
         println("secant2: iA = ", iA, ", iB = ", iB, ", ic = ", ic)
     end
     a = lsr.alpha[iA]
@@ -458,11 +458,11 @@ function secant2!{T}(df::Union(DifferentiableFunction,
         c = secant(lsr, ia, iA)
     end
     if a <= c <= b
-        if display & SECANT2 > 0
+        if detailed_trace & SECANT2 > 0
             println("secant2: second c = ", c)
         end
         # phic = phi(tmpc, c) # TODO: Replace
-        phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true)
+        phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints)
         f_calls += f_up
         g_calls += g_up
         @assert isfinite(phic) && isfinite(dphic)
@@ -470,16 +470,16 @@ function secant2!{T}(df::Union(DifferentiableFunction,
         ic = length(lsr)
         # Check arguments here
         if satisfies_wolfe(c, phic, dphic, phi0, dphi0, philim, delta, sigma)
-            if display & SECANT2 > 0
+            if detailed_trace & SECANT2 > 0
                 println("secant2: second c satisfied Wolfe conditions")
             end
             return true, ic, ic, f_calls, g_calls
         end
-        iA, iB, f_up, g_up = update!(df, x, s, xtmp, g, lsr, iA, iB, ic, philim, display)
+        iA, iB, f_up, g_up = update!(df, x, s, xtmp, g, lsr, iA, iB, ic, philim, constraints, detailed_trace)
         f_calls += f_up
         g_calls += g_up
     end
-    if display & SECANT2 > 0
+    if detailed_trace & SECANT2 > 0
         println("secant2 output: a = ", lsr.alpha[iA], ", b = ", lsr.alpha[iB])
     end
     return false, iA, iB, f_calls, g_calls
@@ -500,7 +500,8 @@ function update!(df::Union(DifferentiableFunction,
                  ib::Integer,
                  ic::Integer,
                  philim::Real,
-                 display::Integer = 0)
+                 constraints::AbstractConstraints = ConstraintsNone(),
+                 detailed_trace::Integer = 0)
     a = lsr.alpha[ia]
     b = lsr.alpha[ib]
     # Debugging (HZ, eq. 4.4):
@@ -511,7 +512,7 @@ function update!(df::Union(DifferentiableFunction,
     c = lsr.alpha[ic]
     phic = lsr.value[ic]
     dphic = lsr.slope[ic]
-    if display & UPDATE > 0
+    if detailed_trace & UPDATE > 0
         println("update: ia = ", ia,
                 ", a = ", a,
                 ", ib = ", ib,
@@ -536,7 +537,7 @@ function update!(df::Union(DifferentiableFunction,
     end
     # phic is bigger than phi0, which implies that the minimum
     # lies between a and c. Find it via bisection.
-    return bisect!(df, x, s, xtmp, g, lsr, ia, ic, philim, display)
+    return bisect!(df, x, s, xtmp, g, lsr, ia, ic, philim, constraints, detailed_trace)
 end
 
 # HZ, stage U3 (with theta=0.5)
@@ -550,7 +551,8 @@ function bisect!{T}(df::Union(DifferentiableFunction,
                     ia::Integer,
                     ib::Integer,
                     philim::Real,
-                    display::Integer = 0)
+                    constraints::AbstractConstraints = ConstraintsNone(),
+                    detailed_trace::Integer = 0)
     f_calls = 0
     g_calls = 0
     gphi = nan(T)
@@ -563,11 +565,11 @@ function bisect!{T}(df::Union(DifferentiableFunction,
     @assert lsr.value[ib] > philim
     @assert b > a
     while b - a > eps(b)
-        if display & BISECT > 0
+        if detailed_trace & BISECT > 0
             println("bisect: a = ", a, ", b = ", b, ", b - a = ", b - a)
         end
         d = (a + b) / convert(T, 2)
-        phid, gphi, f_up, g_up = linefunc!(df, x, s, d, xtmp, g, true)
+        phid, gphi, f_up, g_up = linefunc!(df, x, s, d, xtmp, g, true, constraints)
         f_calls += f_up
         g_calls += g_up
         @assert isfinite(phid) && isfinite(gphi)
@@ -588,19 +590,18 @@ function bisect!{T}(df::Union(DifferentiableFunction,
 end
 
 # Define one-parameter function for line searches
-function linefunc!(df::Union(DifferentiableFunction,
-                            TwiceDifferentiableFunction),
-                   x::Array,
-                   s::Array,
-                   alpha::Real,
-                   xtmp::Array,
-                   g::Array,
-                   calc_grad::Bool)
+function linefunc!{T}(df::Union(DifferentiableFunction,
+                                TwiceDifferentiableFunction),
+                      x::Array{T},
+                      s::Array,
+                      alpha::Real,
+                      xtmp::Array,
+                      g::Array,
+                      calc_grad::Bool,
+                      constraints::AbstractConstraints = ConstraintsNone())
     f_calls = 0
     g_calls = 0
-    for i = 1:length(x)
-        xtmp[i] = x[i] + alpha * s[i]
-    end
+    step!(xtmp, x, s, alpha, constraints)
     gphi = nan(eltype(g))
     if calc_grad
         val = df.fg!(xtmp, g)

From 809be9637c825eb4f3545e75c9af48fdcb827a13 Mon Sep 17 00:00:00 2001
From: timholy <tim.holy@gmail.com>
Date: Mon, 10 Mar 2014 22:00:53 -0500
Subject: [PATCH 02/28] Add constrained tests, and don't load problems unless
 running tests

---
 src/Optim.jl                      |  9 ++--
 src/problems/constrained.jl       | 77 +++++++++++++++++++++++++++++++
 test/cg.jl                        | 34 +++++++++-----
 test/momentum_gradient_descent.jl |  2 +-
 test/runtests.jl                  |  4 ++
 5 files changed, 111 insertions(+), 15 deletions(-)
 create mode 100644 src/problems/constrained.jl

diff --git a/src/Optim.jl b/src/Optim.jl
index 37fa65c25..4fc17d2af 100644
--- a/src/Optim.jl
+++ b/src/Optim.jl
@@ -77,8 +77,11 @@ module Optim
     # End-User Facing Wrapper Functions
     include("optimize.jl")
 
-    # Examples for testing
-    include(joinpath("problems", "unconstrained.jl"))
-
     cgdescent(args...) = error("API has changed. Please use cg.")
+
+    # Tests
+    const basedir = dirname(Base.source_path())
+    const testpaths = [joinpath(basedir, "problems", "unconstrained.jl"),
+                       joinpath(basedir, "problems", "constrained.jl")]
+
 end
diff --git a/src/problems/constrained.jl b/src/problems/constrained.jl
new file mode 100644
index 000000000..37f8710a6
--- /dev/null
+++ b/src/problems/constrained.jl
@@ -0,0 +1,77 @@
+module ConstrainedProblems
+
+using Optim
+
+immutable ConstrainedProblem{C<:Optim.AbstractConstraints}
+    name::ASCIIString
+    f::Function
+    g!::Function
+    h!::Function
+    constraints::C
+    initial_x::Vector{Float64}
+    solutions::Vector
+    isdifferentiable::Bool
+    istwicedifferentiable::Bool
+end
+
+pexamples = Dict{ASCIIString, ConstrainedProblem}()
+
+#####################################
+###
+### Minimum distance, box constraints
+###
+#####################################
+
+function sqrdist(x)
+    dx1 = x[1] - 5
+    dx2 = x[2] - 3
+    (dx1*dx1 + dx2*dx2)/2
+end
+
+function sqrdist_gradient!(x, g)
+    g[1] = x[1] - 5
+    g[2] = x[2] - 3
+end
+
+function sqrdist_hessian!(x, H)
+    H[1,1] = H[2,2] = 1
+    H[1,2] = H[2,1] = 0
+end
+
+constraints = Optim.ConstraintsBox([-2.0,-2.0],[2.0,2.0])
+
+pexamples["SqrdistBox"] = ConstrainedProblem("SqrdistBox",
+                                            sqrdist,
+                                            sqrdist_gradient!,
+                                            sqrdist_hessian!,
+                                            constraints,
+                                            [0.0, 0.0],
+                                            {[2.0, 2.0]},
+                                            true,
+                                            true)
+
+constraints = Optim.ConstraintsBox(nothing,[2.0,2.0])
+
+pexamples["SqrdistLwr"] = ConstrainedProblem("SqrdistLwr",
+                                            sqrdist,
+                                            sqrdist_gradient!,
+                                            sqrdist_hessian!,
+                                            constraints,
+                                            [0.0, 0.0],
+                                            {[2.0, 2.0]},
+                                            true,
+                                            true)
+
+constraints = Optim.ConstraintsBox([-2.0,-2.0],nothing)
+
+pexamples["SqrdistLwr"] = ConstrainedProblem("SqrdistLwr",
+                                            sqrdist,
+                                            sqrdist_gradient!,
+                                            sqrdist_hessian!,
+                                            constraints,
+                                            [0.0, 0.0],
+                                            {[5.0, 3.0]},
+                                            true,
+                                            true)
+
+end # module
diff --git a/test/cg.jl b/test/cg.jl
index fa99fd27a..22742be8a 100644
--- a/test/cg.jl
+++ b/test/cg.jl
@@ -1,18 +1,30 @@
 using Optim
 
-for (name, prob) in Optim.UnconstrainedProblems.examples
-	if prob.isdifferentiable
-		if name == "Himmelbrau"
-			continue
-		end
-		df = DifferentiableFunction(prob.f, prob.g!)
-		res = Optim.cg(df, prob.initial_x)
-		if length(prob.solutions) == 1
-			@assert norm(res.minimum - prob.solutions[1]) < 1e-2
-		end
-	end
+for (name, prob) in UnconstrainedProblems.examples
+    if prob.isdifferentiable
+        if name == "Himmelbrau"
+            continue
+        end
+        df = DifferentiableFunction(prob.f, prob.g!)
+        res = Optim.cg(df, prob.initial_x)
+        if length(prob.solutions) == 1
+            @assert norm(res.minimum - prob.solutions[1]) < 1e-2
+        end
+    end
 end
 
+for (name, prob) in ConstrainedProblems.pexamples
+    if prob.isdifferentiable
+        df = DifferentiableFunction(prob.f, prob.g!)
+        res = Optim.cg(df, prob.initial_x, constraints=prob.constraints)
+        if length(prob.solutions) == 1
+            @assert norm(res.minimum - prob.solutions[1]) < 1e-2
+        end
+    end
+end
+
+
+# Functions of arrays
 let
 objective(X, B) = sum((X.-B).^2)/2
 
diff --git a/test/momentum_gradient_descent.jl b/test/momentum_gradient_descent.jl
index abe0fe7a3..e79c3bfb3 100644
--- a/test/momentum_gradient_descent.jl
+++ b/test/momentum_gradient_descent.jl
@@ -1,4 +1,4 @@
-p = Optim.UnconstrainedProblems.examples["Rosenbrock"]
+p = UnconstrainedProblems.examples["Rosenbrock"]
 
 df = DifferentiableFunction(p.f, p.g!)
 
diff --git a/test/runtests.jl b/test/runtests.jl
index 85ba62b63..d20712504 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,6 +4,10 @@
 
 using Optim
 
+for path in Optim.testpaths
+    require(path)
+end
+
 my_tests = ["bfgs.jl",
             "gradient_descent.jl",
             "momentum_gradient_descent.jl",

From 83a0bc33077545109fa66d67843d9c69877a0aba Mon Sep 17 00:00:00 2001
From: timholy <tim.holy@gmail.com>
Date: Fri, 14 Mar 2014 07:27:32 -0500
Subject: [PATCH 03/28] Implement linear constraints

---
 src/constraints.jl | 55 ++++++++++++++++++++++++++++++++++---------
 src/interior.jl    | 58 +++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 96 insertions(+), 17 deletions(-)

diff --git a/src/constraints.jl b/src/constraints.jl
index 9a4e30504..434a439e7 100644
--- a/src/constraints.jl
+++ b/src/constraints.jl
@@ -14,27 +14,50 @@ immutable ConstraintsBox{T,N} <: AbstractConstraints
         new(l, u)
     end
 end
-ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::AbstractArray{T,N})
-ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::Nothing) = ConstraintsBox{T,N}(l::AbstractArray{T,N}, infs(T, size(l)))
-ConstraintsBox{T,N}(l::Nothing, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(-infs(T,size(u)), u::AbstractArray{T,N})
+ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(l, u)
+ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::Nothing) = ConstraintsBox{T,N}(l, infs(T, size(l)))
+ConstraintsBox{T,N}(l::Nothing, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(-infs(T,size(u)), u)
 
-# FIXME: need a way to indicate equality/inequality
-immutable ConstraintsL{T,M<:AbstractMatrix,V<:AbstractVector} <: AbstractConstraints
+immutable ConstraintsL{T,M<:AbstractMatrix,N} <: AbstractConstraints
     A::M
-    b::V
+    lower::Array{T,N}
+    upper::Array{T,N}
+    scratch1::Array{T,N}
+    scratch2::Array{T,N}
+    scratch3::Array{T,N}
+
+    function ConstraintsL(A, l::Array{T,N}, u::Array{T,N},
+                          scratch1 = similar(l), scratch2 = similar(l), scratch3 = Array(T, ndims(l) == 1 ? size(A, 2) : (size(A,2),size(l,2))))
+        size(A, 1) == size(l,1) == size(u,1) || error("The sizes of the bounds must match the size of A")
+        for i = 1:length(l)
+            l[i] <= u[i] || error("The lower bound must be smaller than the upper bound")
+        end
+        new(A, l, u, scratch1, scratch2, scratch3)
+    end
 end
+ConstraintsL{T}(A::AbstractMatrix, l::Union(Vector{T},Matrix{T}), u::Union(Vector{T},Matrix{T})) = ConstraintsL{T,typeof(A),ndims(l)}(A, l, u)
+ConstraintsL{T}(A::AbstractMatrix, l::Union(Vector{T},Matrix{T}), u::Nothing) = ConstraintsL(A, l, infs(T, size(l)))
+ConstraintsL{T}(A::AbstractMatrix, l::Nothing, u::Union(Vector{T},Matrix{T})) = ConstraintsL(A, -infs(T,size(u)), u)
 
 # Generic constraints
-immutable ConstraintsNL{F<:Union(Function,DifferentiableFunction,TwiceDifferentiableFunction)} <: AbstractConstraints
+immutable ConstraintsNL{T,F<:Union(Function,DifferentiableFunction,TwiceDifferentiableFunction)} <: AbstractConstraints
     funcs::Vector{F}
+    lower::Vector{T}
+    upper::Vector{T}
+
+    function ConstraintsNL(funcs::Vector, l::AbstractVector{T}, u::AbstractVector{T})
+        size(A, 1) == length(l) == length(u) || error("The sizes of the bounds must match the ")
+        for i = 1:length(l)
+            l[i] <= u[i] || error("The lower bound must be smaller than the upper bound")
+        end
+        new(funcs, l, u)
+    end
 end
 
 type Constraints
     bounds::AbstractConstraints
-    leq::AbstractConstraints
-    lineq::AbstractConstraints
-    nleq::AbstractConstraints
-    nlineq::AbstractConstraints
+    linear::AbstractConstraints
+    nonlinear::AbstractConstraints
     mu::Real
 end
 Constraints() = Constraints(ConstraintsNone(), ConstraintsNone(), ConstraintsNone(), ConstraintsNone(), ConstraintsNone())
@@ -52,6 +75,16 @@ function feasible(x, constraints::ConstraintsBox)
     true
 end
 
+function feasible(x, constraints::ConstraintsL)
+    l = constraints.lower
+    u = constraints.upper
+    y = constraints.scratch1
+    A_mul_B!(y, constraints.A, x)
+    for i = 1:length(y)
+        l[i] <= y[i] <= u[i] || return false
+    end
+    true
+end
 
 ## project!
 project!(x, constraints::AbstractConstraints) = x
diff --git a/src/interior.jl b/src/interior.jl
index be1835830..87e0f0ca4 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -75,7 +75,52 @@ function barrier_h!{T}(x::Vector{T}, H::Matrix{T}, bounds::ConstraintsBox)
     end
 end
 
-## Linear inequalities
+## Linear constraints
+function barrier_fg!{T}(x::Array{T}, g::Array{T}, constraints::ConstraintsL)
+    m, n = size(constraints.A)
+    y1 = constraints.scratch1
+    A_mul_B!(y1, constraints.A, x)
+    phi = zero(T)
+    l = constraints.lower
+    u = constraints.upper
+    lenx = length(x)
+    lenc = length(y1)
+    for i = 1:lenc
+        li, ui = l[i], u[i]
+        yi = y1[i]
+        if isfinite(li)
+            yi >= li || return inf(T)  # no need to compute additional gradient terms
+            phi += log(yi-li)
+        end
+        if isfinite(ui)
+            yi <= ui || return inf(T)
+            phi += log(ui-yi)
+        end
+    end
+    if isempty(g)
+        return -phi
+    end
+    y2 = constraints.scratch2
+    y3 = constraints.scratch3
+    for i = 1:lenc
+        y2[i] = 1/(y1[i] - l[i])
+    end
+    At_mul_B!(y3, constraints.A, y2)
+    for i = 1:lenx
+        g[i] -= y3[i]
+    end
+    for i = 1:lenc
+        y2[i] = 1/(u[i] - y1[i])
+    end
+    At_mul_B!(y3, constraints.A, y2)
+    for i = 1:lenx
+        g[i] += y3[i]
+    end
+    -phi
+end
+
+barrier_f{T}(x::Array{T}, constraints::ConstraintsL) = barrier_fg!(x, T[], constraints)
+
 
 ## Nonlinear inequalities
 
@@ -132,7 +177,7 @@ linlsq(A::Matrix, b::Vector) =
                                 (x,g) -> linlsq_fg!(x, g, A, b, similar(b)),
                                 (x,H) -> linlsq_h!(x, H, A, b))
 
-dummy(x) = (Base.show_backtrace(STDOUT, backtrace()); error("No f"))
+dummy_f (x)   = (Base.show_backtrace(STDOUT, backtrace()); error("No f"))
 dummy_g!(x,g) = (Base.show_backtrace(STDOUT, backtrace()); error("No g!"))
 
 function linlsq_fg!{T}(x::AbstractArray{T}, g, A, b, scratch)
@@ -184,7 +229,7 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
     tr = OptimizationTrace()
     tracing = store_trace || show_trace || extended_trace
     
-    df = DifferentiableFunction(dummy,
+    df = DifferentiableFunction(dummy_f,
                                 dummy_g!,
                                 (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t))
     iteration, f_calls, g_calls = 0, 0, 0
@@ -210,7 +255,7 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
         f_x /= t
         if alphamax >= 1
             t *= mu
-            df = DifferentiableFunction(dummy,
+            df = DifferentiableFunction(dummy_f,
                                         dummy_g!,
                                         (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t))
         end
@@ -243,8 +288,9 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
                                     g_calls)
 end
 
+
 function interior{T}(objective::Union(DifferentiableFunction, TwiceDifferentiableFunction),
-                     initial_x::Vector{T},
+                     initial_x::Array{T},
                      constraints::AbstractConstraints;
                      method = :cg, t = one(T), mu = 10, eps_gap = 1e-12)
     if method == :newton
@@ -258,7 +304,7 @@ function interior{T}(objective::Union(DifferentiableFunction, TwiceDifferentiabl
     local results
     iteration, f_calls, g_calls = 0, 0, 0
     while m/t > eps_gap
-        df = DifferentiableFunction( x    -> combined_f(x, objective.f, constraints, t),
+        df = DifferentiableFunction( x    -> combined_f  (x, objective.f, constraints, t),
                                     (x,g) -> combined_g! (x, g, objective.g!, constraints, t),
                                     (x,g) -> combined_fg!(x, g, objective.fg!, constraints, t))
         results = optimize(df, x, method=method)

From f83256245e72a715cef388787ca392107ed70f90 Mon Sep 17 00:00:00 2001
From: timholy <tim.holy@gmail.com>
Date: Wed, 2 Apr 2014 16:23:00 -0500
Subject: [PATCH 04/28] Improve initial value of barrier coefficient

Also support keyword arguments
---
 src/interior.jl | 54 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 50 insertions(+), 4 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index 87e0f0ca4..7313e57f1 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -292,9 +292,41 @@ end
 function interior{T}(objective::Union(DifferentiableFunction, TwiceDifferentiableFunction),
                      initial_x::Array{T},
                      constraints::AbstractConstraints;
-                     method = :cg, t = one(T), mu = 10, eps_gap = 1e-12)
+                     method = :cg, t = nan(T), mu = 10, eps_gap = 1e-12,
+                     xtol::Real = 1e-32,
+                     ftol::Real = 1e-8,
+                     grtol::Real = 1e-8,
+                     iterations::Integer = 1_000,
+                     store_trace::Bool = false,
+                     show_trace::Bool = false,
+                     extended_trace::Bool = false)
+    if isnan(t)
+        vo, vc, go, gc = gnorm(objective, constraints, initial_x)
+        if isfinite(vo) && isfinite(vc) && go < grtol
+            return MultivariateOptimizationResults("Interior",
+                                    initial_x,
+                                    initial_x,
+                                    float64(vo),
+                                    0,
+                                    false,
+                                    false,
+                                    xtol,
+                                    false,
+                                    ftol,
+                                    true,
+                                    grtol,
+                                    OptimizationTrace(),
+                                    0,
+                                    0)
+        end
+        if gc == 0
+            t = one(T)  # FIXME: bounds constraints, starting at exact center. This is probably not the right guess.
+        else
+            t = gc/(0.1*go)
+        end
+    end
     if method == :newton
-        return iterior_newton(objective, initial_x, constraints; t=t, mu=mu, eps_gap=eps_gap)
+        return iterior_newton(objective, initial_x, constraints; t=t, mu=mu, eps_gap=eps_gap) # FIXME
     end
     if !feasible(initial_x, constraints)
         error("Initial guess must be feasible")
@@ -303,11 +335,11 @@ function interior{T}(objective::Union(DifferentiableFunction, TwiceDifferentiabl
     m = length(x)
     local results
     iteration, f_calls, g_calls = 0, 0, 0
-    while m/t > eps_gap
+    while m/t > eps_gap || iteration == 0
         df = DifferentiableFunction( x    -> combined_f  (x, objective.f, constraints, t),
                                     (x,g) -> combined_g! (x, g, objective.g!, constraints, t),
                                     (x,g) -> combined_fg!(x, g, objective.fg!, constraints, t))
-        results = optimize(df, x, method=method)
+        results = optimize(df, x, method=method, xtol=xtol, ftol=ftol, grtol=grtol, iterations=iterations, store_trace=store_trace, show_trace=show_trace, extended_trace=extended_trace)
         copy!(x, results.minimum)
         iteration += results.iterations
         f_calls += results.f_calls
@@ -319,3 +351,17 @@ function interior{T}(objective::Union(DifferentiableFunction, TwiceDifferentiabl
     copy!(results.initial_x, initial_x)
     results
 end
+
+function gnorm(objective, constraints, initial_x)
+    go = similar(initial_x)
+    gc = similar(initial_x)
+    fill!(gc, 0)
+    vo = objective.fg!(initial_x, go)
+    vc = barrier_fg!(initial_x, gc, constraints)
+    gom = gcm = zero(eltype(go))
+    for i = 1:length(go)
+        gom += abs(go[i])
+        gcm += abs(gc[i])
+    end
+    vo, vc, gom, gcm
+end

From 5fabfe819acbad2fcd61ddd8939d01d81c8393d9 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Fri, 4 Apr 2014 15:41:09 -0500
Subject: [PATCH 05/28] Update for deprecation of infs

---
 src/constraints.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/constraints.jl b/src/constraints.jl
index 434a439e7..954c0392f 100644
--- a/src/constraints.jl
+++ b/src/constraints.jl
@@ -15,8 +15,8 @@ immutable ConstraintsBox{T,N} <: AbstractConstraints
     end
 end
 ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(l, u)
-ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::Nothing) = ConstraintsBox{T,N}(l, infs(T, size(l)))
-ConstraintsBox{T,N}(l::Nothing, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(-infs(T,size(u)), u)
+ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::Nothing) = ConstraintsBox{T,N}(l, fill(inf(T), size(l)))
+ConstraintsBox{T,N}(l::Nothing, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(fill(-inf(T),size(u)), u)
 
 immutable ConstraintsL{T,M<:AbstractMatrix,N} <: AbstractConstraints
     A::M

From 8f56008a4d00bfc5aeb291279efdfdccbe2b3a6b Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Fri, 4 Apr 2014 15:45:17 -0500
Subject: [PATCH 06/28] Check type of function value, and fix type of initial t
 in interior-point

When the parameters are Float32, the return value of the function had
better be, too, or we could be fooled about the precision.
---
 src/interior.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index 7313e57f1..8081b3ec5 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -24,7 +24,7 @@ function barrier_f{T}(x::Array{T}, bounds::ConstraintsBox)
             phi += log(ui-xi)
         end
     end
-    -phi
+    -phi::T
 end
 
 function barrier_g!{T}(x::Array{T}, g::Array{T}, bounds::ConstraintsBox)
@@ -58,7 +58,7 @@ function barrier_fg!{T}(x::Array{T}, g::Array{T}, bounds::ConstraintsBox)
             g[i] += 1/(ui-xi)
         end
     end
-    -phi
+    -phi::T
 end
 
 function barrier_h!{T}(x::Vector{T}, H::Matrix{T}, bounds::ConstraintsBox)
@@ -116,7 +116,7 @@ function barrier_fg!{T}(x::Array{T}, g::Array{T}, constraints::ConstraintsL)
     for i = 1:lenx
         g[i] += y3[i]
     end
-    -phi
+    -phi::T
 end
 
 barrier_f{T}(x::Array{T}, constraints::ConstraintsL) = barrier_fg!(x, T[], constraints)
@@ -322,7 +322,7 @@ function interior{T}(objective::Union(DifferentiableFunction, TwiceDifferentiabl
         if gc == 0
             t = one(T)  # FIXME: bounds constraints, starting at exact center. This is probably not the right guess.
         else
-            t = gc/(0.1*go)
+            t = gc/(convert(T,0.1)*go)
         end
     end
     if method == :newton

From de95ece90360934e3df08c214e81c376c3c4d4fc Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Sat, 5 Apr 2014 07:54:21 -0500
Subject: [PATCH 07/28] Modify some asserts that were causing trouble for
 non-convex domains

---
 src/linesearch/hz_linesearch.jl | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/linesearch/hz_linesearch.jl b/src/linesearch/hz_linesearch.jl
index 27d24f433..e5171c302 100644
--- a/src/linesearch/hz_linesearch.jl
+++ b/src/linesearch/hz_linesearch.jl
@@ -420,8 +420,8 @@ function secant2!{T}(df::Union(DifferentiableFunction,
     b = lsr.alpha[ib]
     dphia = lsr.slope[ia]
     dphib = lsr.slope[ib]
-    @assert dphia < 0
-    @assert dphib >= 0
+    @assert dphia < 0 && isfinite(dphia)
+    @assert dphib >= 0 && isfinite(dphib)
     c = secant(a, b, dphia, dphib)
     if detailed_trace & SECANT2 > 0
         println("secant2: a = ", a, ", b = ", b, ", c = ", c)
@@ -431,7 +431,6 @@ function secant2!{T}(df::Union(DifferentiableFunction,
     phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints)
     f_calls += f_up
     g_calls += g_up
-    @assert isfinite(phic) && isfinite(dphic)
     push!(lsr, c, phic, dphic)
     ic = length(lsr)
     if satisfies_wolfe(c, phic, dphic, phi0, dphi0, philim, delta, sigma)
@@ -561,7 +560,12 @@ function bisect!{T}(df::Union(DifferentiableFunction,
     # Debugging (HZ, conditions shown following U3)
     @assert lsr.slope[ia] < 0
     @assert lsr.value[ia] <= philim
-    @assert lsr.slope[ib] < 0       # otherwise we wouldn't be here
+    # if !(lsr.slope[ib] < 0)
+    #     @show a, b
+    #     @show lsr.value[ia], lsr.value[ib]
+    #     @show lsr.slope[ia], lsr.slope[ib]
+    # end
+    # @assert lsr.slope[ib] < 0       # otherwise we wouldn't be here
     @assert lsr.value[ib] > philim
     @assert b > a
     while b - a > eps(b)
@@ -572,7 +576,6 @@ function bisect!{T}(df::Union(DifferentiableFunction,
         phid, gphi, f_up, g_up = linefunc!(df, x, s, d, xtmp, g, true, constraints)
         f_calls += f_up
         g_calls += g_up
-        @assert isfinite(phid) && isfinite(gphi)
         push!(lsr, d, phid, gphi)
         id = length(lsr)
         if gphi >= 0

From 72f18eabae0fcdc9b2f152c129fa9f5386947351 Mon Sep 17 00:00:00 2001
From: timholy <tim.holy@gmail.com>
Date: Fri, 23 May 2014 13:28:55 -0500
Subject: [PATCH 08/28] nelder-mead: check that at least one starting point has
 finite value

---
 src/nelder_mead.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/nelder_mead.jl b/src/nelder_mead.jl
index f29afa619..4952d6295 100644
--- a/src/nelder_mead.jl
+++ b/src/nelder_mead.jl
@@ -69,6 +69,7 @@ function nelder_mead{T}(f::Function,
     for i in 1:n
         @inbounds y[i] = f(p[:, i])
     end
+    any(isfinite(y)) || error("At least one of the starting points must have finite penalty")
     f_calls += n
 
     f_x_previous, f_x = NaN, nmobjective(y, m, n)

From 048394a7be294b6f6f17208b39fe2175e54b10e4 Mon Sep 17 00:00:00 2001
From: timholy <tim.holy@gmail.com>
Date: Wed, 10 Sep 2014 21:15:48 -0500
Subject: [PATCH 09/28] Fix call to norm on high-dimensional objects

---
 src/cg.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cg.jl b/src/cg.jl
index b175bc5f4..94b471d3e 100644
--- a/src/cg.jl
+++ b/src/cg.jl
@@ -89,7 +89,7 @@ macro cgtrace()
                 dt["g(x)"] = copy(gr)
                 dt["Current step size"] = alpha
             end
-            grnorm = norm(gr, Inf)
+            grnorm = norm(gr[:], Inf)
             update!(tr,
                     iteration,
                     f_x,

From 4d536b7e77b6e2d6c5025cf5041b4a2ccf4a8545 Mon Sep 17 00:00:00 2001
From: timholy <tim.holy@gmail.com>
Date: Wed, 10 Sep 2014 21:16:22 -0500
Subject: [PATCH 10/28] fminbox: remove unused tol argument and add mu0

---
 src/fminbox.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/fminbox.jl b/src/fminbox.jl
index 19da30a36..e675f7eb2 100644
--- a/src/fminbox.jl
+++ b/src/fminbox.jl
@@ -105,20 +105,20 @@ end
 const PARAMETERS_MU = one64<<display_nextbit
 display_nextbit += 1
 
-function fminbox{T}(df::DifferentiableFunction,
+function fminbox{T<:FloatingPoint}(df::DifferentiableFunction,
                     initial_x::Array{T},
                     l::Array{T},
                     u::Array{T};
                     xtol::T = eps(T),
                     ftol::T = sqrt(eps(T)),
                     grtol::T = sqrt(eps(T)),
-                    tol::T = eps(T)^(2/3),
                     iterations::Integer = 1_000,
                     store_trace::Bool = false,
                     show_trace::Bool = false,
                     extended_trace::Bool = false,
                     linesearch!::Function = hz_linesearch!,
                     eta::Real = convert(T,0.4),
+                    mu0::T = nan(T),
                     mufactor::T = convert(T, 0.001),
                     precondprep = (P, x, l, u, mu) -> precondprepbox(P, x, l, u, mu),
                     optimizer = cg)
@@ -145,7 +145,7 @@ function fminbox{T}(df::DifferentiableFunction,
         gbarrier[i] = (isfinite(thisl) ? one(T)/(thisx-thisl) : zero(T)) + (isfinite(thisu) ? one(T)/(thisu-thisx) : zero(T))
     end
     valfunc = df.fg!(x, gfunc)  # is this used??
-    mu = initialize_mu(gfunc, gbarrier; mu0factor=mufactor)
+    mu = isnan(mu0) ? initialize_mu(gfunc, gbarrier; mu0factor=mufactor) : mu0
     if show_trace > 0
         println("######## fminbox ########")
         println("Initial mu = ", mu)

From 7f48aeb1749ff79dd033c3e5475c303f6e01a0db Mon Sep 17 00:00:00 2001
From: Cody <codyjgreer@gmail.com>
Date: Wed, 4 Feb 2015 09:00:56 -0600
Subject: [PATCH 11/28] fixed some deprecation warnings thrown by julia 0.4. 
 Used Compat package to ensure compatibility with earlier Julia versions.

---
 REQUIRE                         |  1 +
 src/Optim.jl                    |  3 ++-
 src/cg.jl                       |  4 ++--
 src/levenberg_marquardt.jl      |  4 ++--
 src/linesearch/hz_linesearch.jl | 10 +++++-----
 5 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/REQUIRE b/REQUIRE
index 3ef4db562..0da2d52b7 100644
--- a/REQUIRE
+++ b/REQUIRE
@@ -1,3 +1,4 @@
 julia 0.2-
 Calculus
 DualNumbers
+Compat
diff --git a/src/Optim.jl b/src/Optim.jl
index 4fc17d2af..bc3afb7c7 100644
--- a/src/Optim.jl
+++ b/src/Optim.jl
@@ -1,6 +1,7 @@
 module Optim
     using Calculus
-
+    using Compat
+    
     import Base.dot,
            Base.length,
            Base.push!,
diff --git a/src/cg.jl b/src/cg.jl
index 94b471d3e..8d42a5569 100644
--- a/src/cg.jl
+++ b/src/cg.jl
@@ -150,7 +150,7 @@ function cg{T}(df::Union(DifferentiableFunction,
     # Store f(x) in f_x
     f_x = df.fg!(x, gr)
     @assert typeof(f_x) == T
-    f_x_previous = nan(T)
+    f_x_previous = convert(T,NaN)
     f_calls, g_calls = f_calls + 1, g_calls + 1
     copy!(gr_previous, gr)
 
@@ -212,7 +212,7 @@ function cg{T}(df::Union(DifferentiableFunction,
         @assert typeof(dphi0) == T
         push!(lsr, zero(T), f_x, dphi0)
 
-        alphamax = interior ? toedge(x, s, constraints) : inf(T)
+        alphamax = interior ? toedge(x, s, constraints) : convert(T,Inf)
 
         # Pick the initial step size (HZ #I1-I2)
         alpha, mayterminate, f_update, g_update =
diff --git a/src/levenberg_marquardt.jl b/src/levenberg_marquardt.jl
index 178b585ee..69e001811 100644
--- a/src/levenberg_marquardt.jl
+++ b/src/levenberg_marquardt.jl
@@ -38,7 +38,7 @@ function levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12
 	# Maintain a trace of the system.
 	tr = OptimizationTrace()
 	if show_trace
-		d = {"lambda" => lambda}
+		d = @compat Dict{Any,Any}("lambda" => lambda)
 		os = OptimizationState(iterCt, sse(fcur), NaN, d)
 		push!(tr, os)
 		println(os)
@@ -87,7 +87,7 @@ function levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12
 		# show state
 		if show_trace
 			gradnorm = norm(J'*fcur, Inf)
-			d = {"g(x)" => gradnorm, "dx" => delta_x, "lambda" => lambda}
+			d = @compat Dict{Any,Any}("g(x)" => gradnorm, "dx" => delta_x, "lambda" => lambda)
 			os = OptimizationState(iterCt, sse(fcur), gradnorm, d)
 			push!(tr, os)
 			println(os)
diff --git a/src/linesearch/hz_linesearch.jl b/src/linesearch/hz_linesearch.jl
index e5171c302..938b9ec01 100644
--- a/src/linesearch/hz_linesearch.jl
+++ b/src/linesearch/hz_linesearch.jl
@@ -93,7 +93,7 @@ function alphatry{T}(alpha::T,
                      gtmp::Array,
                      lsr::LineSearchResults,
                      constraints::AbstractConstraints = ConstraintsNone(),
-                     alphamax::Real = inf(T),
+                     alphamax::Real = convert(T,Inf),
                      psi1::Real = convert(T,0.2),
                      psi2::Real = convert(T,2),
                      psi3::Real = convert(T,0.1),
@@ -171,7 +171,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
                            c::Real,
                            mayterminate::Bool,
                            constraints::AbstractConstraints = ConstraintsNone(),
-                           alphamax::Real = inf(T),
+                           alphamax::Real = convert(T,Inf),
                            delta::Real = DEFAULTDELTA,
                            sigma::Real = DEFAULTSIGMA,
                            rho::Real = convert(T,5),
@@ -179,7 +179,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
                            gamma::Real = convert(T,0.66),
                            linesearchmax::Integer = 50,
                            psi3::Real = convert(T,0.1),
-                           iterfinitemax::Integer = iceil(-log2(eps(T))),
+                           iterfinitemax::Integer = (@compat ceil(Integer, -log2(eps(T)))),
                            detailed_trace::Integer = 0)
     if detailed_trace & LINESEARCH > 0
         println("New linesearch")
@@ -554,7 +554,7 @@ function bisect!{T}(df::Union(DifferentiableFunction,
                     detailed_trace::Integer = 0)
     f_calls = 0
     g_calls = 0
-    gphi = nan(T)
+    gphi = convert(T,NaN)
     a = lsr.alpha[ia]
     b = lsr.alpha[ib]
     # Debugging (HZ, conditions shown following U3)
@@ -605,7 +605,7 @@ function linefunc!{T}(df::Union(DifferentiableFunction,
     f_calls = 0
     g_calls = 0
     step!(xtmp, x, s, alpha, constraints)
-    gphi = nan(eltype(g))
+    gphi = convert(eltype(g),NaN)
     if calc_grad
         val = df.fg!(xtmp, g)
         f_calls += 1

From 0f352923d58637e5c671011fb8af2abdbe28a7d7 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Thu, 9 Apr 2015 03:47:58 -0500
Subject: [PATCH 12/28] Fix ConstraintsNone parameter problem in step!

---
 src/constraints.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/constraints.jl b/src/constraints.jl
index 954c0392f..836bd5b3b 100644
--- a/src/constraints.jl
+++ b/src/constraints.jl
@@ -97,7 +97,7 @@ function project!(x::Array, bounds::ConstraintsBox)
 end
 
 ## step!
-function step!{T}(xtmp::Array{T}, x, s, alpha::Real, constraints::AbstractConstraints = ConstraintsNone{T}())
+function step!{T}(xtmp::Array{T}, x, s, alpha::Real, constraints::AbstractConstraints = ConstraintsNone())
     (length(xtmp) == length(x) == length(s)) || error("lengths of xtmp ($(length(xtmp))), x ($(length(x))), and s ($(length(s))) disagree")
     for i = 1:length(xtmp)
         @inbounds xtmp[i] = x[i] + alpha*s[i]

From 03c5e04e0e53b69f6957a645224cca9e86c35c3a Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Thu, 9 Apr 2015 04:32:35 -0500
Subject: [PATCH 13/28] Fixes for 0.4 deprecations

---
 src/accelerated_gradient_descent.jl |  2 +-
 src/bfgs.jl                         |  4 ++--
 src/brent.jl                        | 12 ++++++------
 src/cg.jl                           |  2 +-
 src/constraints.jl                  | 12 ++++++------
 src/fminbox.jl                      | 12 ++++++------
 src/golden_section.jl               | 10 +++++-----
 src/gradient_descent.jl             |  2 +-
 src/interior.jl                     | 30 ++++++++++++++---------------
 src/l_bfgs.jl                       |  2 +-
 src/linesearch/hz_linesearch.jl     |  6 +++---
 src/momentum_gradient_descent.jl    |  2 +-
 src/nelder_mead.jl                  |  2 +-
 src/newton.jl                       |  2 +-
 src/nnls.jl                         |  4 ++--
 src/optimize.jl                     | 14 +++++++-------
 src/problems/unconstrained.jl       |  4 ++--
 src/simulated_annealing.jl          |  2 +-
 src/types.jl                        |  4 ++--
 test/grid_search.jl                 |  2 +-
 test/type_stability.jl              |  6 +++---
 21 files changed, 68 insertions(+), 68 deletions(-)

diff --git a/src/accelerated_gradient_descent.jl b/src/accelerated_gradient_descent.jl
index e9de5b121..1f0f41bca 100644
--- a/src/accelerated_gradient_descent.jl
+++ b/src/accelerated_gradient_descent.jl
@@ -139,7 +139,7 @@ function accelerated_gradient_descent{T}(d::DifferentiableFunction,
     return MultivariateOptimizationResults("Accelerated Gradient Descent",
                                            initial_x,
                                            x,
-                                           float64(f_x),
+                                           @compat(Float64(f_x)),
                                            iteration,
                                            iteration == iterations,
                                            x_converged,
diff --git a/src/bfgs.jl b/src/bfgs.jl
index 5a6073b3e..c2216bc70 100644
--- a/src/bfgs.jl
+++ b/src/bfgs.jl
@@ -97,7 +97,7 @@ function bfgs{T}(d::Union(DifferentiableFunction,
         # Increment the number of steps we've had to perform
         iteration += 1
 
-        # Set the search direction        
+        # Set the search direction
         # Search direction is the negative gradient divided by the approximate Hessian
         A_mul_B!(s, invH, gr)
         for i in 1:n
@@ -178,7 +178,7 @@ function bfgs{T}(d::Union(DifferentiableFunction,
     return MultivariateOptimizationResults("BFGS",
                                            initial_x,
                                            x,
-                                           float64(f_x),
+                                           @compat(Float64(f_x)),
                                            iteration,
                                            iteration == iterations,
                                            x_converged,
diff --git a/src/brent.jl b/src/brent.jl
index b3d4fa0b6..71d0a7356 100644
--- a/src/brent.jl
+++ b/src/brent.jl
@@ -33,13 +33,13 @@ function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
     # Save for later
     initial_lower = x_lower
     initial_upper = x_upper
-    
+
     const golden_ratio::T = 0.5 * (3.0 - sqrt(5.0))
 
     x_minimum = x_lower + golden_ratio*(x_upper-x_lower)
     f_minimum = f(x_minimum)
     f_calls = 1 # Number of calls to f
-    
+
     step = zero(T)
     step_old = zero(T)
 
@@ -48,7 +48,7 @@ function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
 
     f_minimum_old = f_minimum
     f_minimum_old_old = f_minimum
-    
+
     it = 0
     converged = false
 
@@ -72,12 +72,12 @@ function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
         end
 
         it += 1
-        
+
         if abs(step_old) > tolx
             # Compute parabola interpolation
             # x_minimum + p/q is the optimum of the parabola
             # Also, q is guaranteed to be positive
-            
+
             r = (x_minimum - x_minimum_old) * (f_minimum - f_minimum_old_old)
             q = (x_minimum - x_minimum_old_old) * (f_minimum - f_minimum_old)
             p = (x_minimum - x_minimum_old_old) * q - (x_minimum - x_minimum_old) * r
@@ -150,7 +150,7 @@ function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
                                          initial_lower,
                                          initial_upper,
                                          x_minimum,
-                                         float64(f_minimum),
+                                         @compat(Float64(f_minimum)),
                                          it,
                                          converged,
                                          rel_tol,
diff --git a/src/cg.jl b/src/cg.jl
index 8d42a5569..f3a599292 100644
--- a/src/cg.jl
+++ b/src/cg.jl
@@ -282,7 +282,7 @@ function cg{T}(df::Union(DifferentiableFunction,
     return MultivariateOptimizationResults("Conjugate Gradient",
                                            initial_x,
                                            x,
-                                           float64(f_x),
+                                           @compat(Float64(f_x)),
                                            iteration,
                                            iteration == iterations,
                                            x_converged,
diff --git a/src/constraints.jl b/src/constraints.jl
index 836bd5b3b..5a382ae67 100644
--- a/src/constraints.jl
+++ b/src/constraints.jl
@@ -15,8 +15,8 @@ immutable ConstraintsBox{T,N} <: AbstractConstraints
     end
 end
 ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(l, u)
-ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::Nothing) = ConstraintsBox{T,N}(l, fill(inf(T), size(l)))
-ConstraintsBox{T,N}(l::Nothing, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(fill(-inf(T),size(u)), u)
+ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::Nothing) = ConstraintsBox{T,N}(l, fill(convert(T,Inf), size(l)))
+ConstraintsBox{T,N}(l::Nothing, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(fill(-convert(T,Inf),size(u)), u)
 
 immutable ConstraintsL{T,M<:AbstractMatrix,N} <: AbstractConstraints
     A::M
@@ -112,12 +112,12 @@ end
 #  - tocorner returns the "distance along s" needed to be projected into the "corner" (all constraints are active)
 # The first is the farthest you might consider searching in a barrier method. The second is the farthest that it
 # makes sense to search in a projection method.
-toedge{T}(x::AbstractArray{T}, s, constraints::AbstractConstraints) = inf(T)
-tocorner{T}(x::AbstractArray{T}, s, constraints::AbstractConstraints) = inf(T)
+toedge{T}(x::AbstractArray{T}, s, constraints::AbstractConstraints) = convert(T, Inf)
+tocorner{T}(x::AbstractArray{T}, s, constraints::AbstractConstraints) = convert(T, Inf)
 
 function toedge{T}(x::AbstractArray{T}, s, bounds::ConstraintsBox)
     # Stop at the first coordinate to leave the box
-    alphamax = inf(T)
+    alphamax = convert(T,Inf)
     for i = 1:length(x)
         si = s[i]
         li = bounds.lower[i]
@@ -145,7 +145,7 @@ function tocorner{T}(x::AbstractArray{T}, s, bounds::ConstraintsBox)
         elseif si > 0 && isfinite(ui)
             alphamax = max(alphamax, (ui-x[i])/si)
         else
-            return inf(T)
+            return convert(T,Inf)
         end
     end
     convert(T, alphamax)
diff --git a/src/fminbox.jl b/src/fminbox.jl
index e675f7eb2..861316433 100644
--- a/src/fminbox.jl
+++ b/src/fminbox.jl
@@ -1,7 +1,7 @@
 # Attempt to compute a reasonable default mu: at the starting
 # position, the gradient of the input function should dominate the
 # gradient of the barrier.
-function initialize_mu{T}(gfunc::Array{T}, gbarrier::Array{T}; mu0::T = nan(T), mu0factor::T = 0.001)
+function initialize_mu{T}(gfunc::Array{T}, gbarrier::Array{T}; mu0::T = convert(T,NaN), mu0factor::T = 0.001)
     if isnan(mu0)
         gbarriernorm = sum(abs(gbarrier))
         if gbarriernorm > 0
@@ -26,7 +26,7 @@ function barrier_box{T}(x::Array{T}, g, l::Array{T}, u::Array{T})
         if isfinite(thisl)
             dx = x[i] - thisl
             if dx <= 0
-                return inf(T)
+                return convert(T,Inf)
             end
             v -= log(dx)
             if calc_grad
@@ -41,7 +41,7 @@ function barrier_box{T}(x::Array{T}, g, l::Array{T}, u::Array{T})
         if isfinite(thisu)
             dx = thisu - x[i]
             if dx <= 0
-                return inf(T)
+                return convert(T,Inf)
             end
             v -= log(dx)
             if calc_grad
@@ -76,7 +76,7 @@ function barrier_combined{T}(x::Array{T}, g, gfunc, gbarrier, val_each::Vector{T
 end
 
 function limits_box{T}(x::Array{T}, d::Array{T}, l::Array{T}, u::Array{T})
-    alphamax = inf(T)
+    alphamax = convert(T,Inf)
     for i = 1:length(x)
         if d[i] < 0
             alphamax = min(alphamax, ((l[i]-x[i])+eps(l[i]))/d[i])
@@ -118,7 +118,7 @@ function fminbox{T<:FloatingPoint}(df::DifferentiableFunction,
                     extended_trace::Bool = false,
                     linesearch!::Function = hz_linesearch!,
                     eta::Real = convert(T,0.4),
-                    mu0::T = nan(T),
+                    mu0::T = convert(T,NaN),
                     mufactor::T = convert(T, 0.001),
                     precondprep = (P, x, l, u, mu) -> precondprepbox(P, x, l, u, mu),
                     optimizer = cg)
@@ -152,7 +152,7 @@ function fminbox{T<:FloatingPoint}(df::DifferentiableFunction,
     end
 
     g = similar(x)
-    valboth = Array(T, 2)    
+    valboth = Array(T, 2)
     fval_all = Array(Vector{T}, 0)
     fcount_all = 0
     xold = similar(x)
diff --git a/src/golden_section.jl b/src/golden_section.jl
index dce3da090..e4ed75029 100644
--- a/src/golden_section.jl
+++ b/src/golden_section.jl
@@ -28,11 +28,11 @@ function golden_section{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
     if !(x_lower < x_upper)
         error("x_lower must be less than x_upper")
     end
-    
+
     # Save for later
     initial_lower = x_lower
     initial_upper = x_upper
-    
+
     const golden_ratio::T = 0.5 * (3.0 - sqrt(5.0))
 
     x_minimum = x_lower + golden_ratio*(x_upper-x_lower)
@@ -46,7 +46,7 @@ function golden_section{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
     tr = OptimizationTrace()
     tracing = store_trace || show_trace || extended_trace
     @goldensectiontrace
-    
+
     while it < iterations
 
         tolx = rel_tol * abs(x_minimum) + abs_tol
@@ -59,7 +59,7 @@ function golden_section{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
         end
 
         it += 1
-        
+
         if x_upper - x_minimum > x_minimum - x_lower
             x_new = x_minimum + golden_ratio*(x_upper - x_minimum)
             f_new = f(x_new)
@@ -91,7 +91,7 @@ function golden_section{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
                                          initial_lower,
                                          initial_upper,
                                          x_minimum,
-                                         float64(f_minimum),
+                                         @compat(Float64(f_minimum)),
                                          it,
                                          converged,
                                          rel_tol,
diff --git a/src/gradient_descent.jl b/src/gradient_descent.jl
index fc996f4d4..dfba19fa3 100644
--- a/src/gradient_descent.jl
+++ b/src/gradient_descent.jl
@@ -124,7 +124,7 @@ function gradient_descent{T}(d::Union(DifferentiableFunction,
     return MultivariateOptimizationResults("Gradient Descent",
                                            initial_x,
                                            x,
-                                           float64(f_x),
+                                           @compat(Float64(f_x)),
                                            iteration,
                                            iteration == iterations,
                                            x_converged,
diff --git a/src/interior.jl b/src/interior.jl
index 8081b3ec5..295176a7b 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -16,11 +16,11 @@ function barrier_f{T}(x::Array{T}, bounds::ConstraintsBox)
         li, ui = l[i], u[i]
         xi = x[i]
         if isfinite(li)
-            xi >= li || return inf(T)
+            xi >= li || return convert(T,Inf)
             phi += log(xi-li)
         end
         if isfinite(ui)
-            xi <= ui || return inf(T)
+            xi <= ui || return convert(T,Inf)
             phi += log(ui-xi)
         end
     end
@@ -48,12 +48,12 @@ function barrier_fg!{T}(x::Array{T}, g::Array{T}, bounds::ConstraintsBox)
         li, ui = l[i], u[i]
         xi = x[i]
         if isfinite(li)
-            xi >= li || return inf(T)  # no need to compute additional gradient terms
+            xi >= li || return convert(T,Inf)  # no need to compute additional gradient terms
             phi += log(xi-li)
             g[i] -= 1/(xi-li)
         end
         if isfinite(ui)
-            xi <= ui || return inf(T)
+            xi <= ui || return convert(T,Inf)
             phi += log(ui-xi)
             g[i] += 1/(ui-xi)
         end
@@ -89,11 +89,11 @@ function barrier_fg!{T}(x::Array{T}, g::Array{T}, constraints::ConstraintsL)
         li, ui = l[i], u[i]
         yi = y1[i]
         if isfinite(li)
-            yi >= li || return inf(T)  # no need to compute additional gradient terms
+            yi >= li || return convert(T,Inf)  # no need to compute additional gradient terms
             phi += log(yi-li)
         end
         if isfinite(ui)
-            yi <= ui || return inf(T)
+            yi <= ui || return convert(T,Inf)
             phi += log(ui-yi)
         end
     end
@@ -154,13 +154,13 @@ end
 
 # combined_f(x, objective::Union(DifferentiableFunction,TwiceDifferentiableFunction), constraints::AbstractConstraints, t) =
 #     combined_f(x, objective.f, constraints, t)
-# 
+#
 # combined_g!(x, g, objective::Union(DifferentiableFunction,TwiceDifferentiableFunction), constraints::AbstractConstraints, t) =
 #     combined_g!(x, g, objective.g, constraints, t)
-# 
+#
 # combined_fg!(x, g, objective::Union(DifferentiableFunction,TwiceDifferentiableFunction), constraints::AbstractConstraints, t) =
 #     combined_fg!(x, g, objective.fg!, constraints, t)
-# 
+#
 # combined_h!(x, H, objective::Union(DifferentiableFunction,TwiceDifferentiableFunction), constraints::AbstractConstraints, t) =
 #     combined_h!(x, H, objective.h!, constraints, t)
 
@@ -170,7 +170,7 @@ end
 ## Cost function for linear least squares
 ##
 #########################################
-    
+
 linlsq(A::Matrix, b::Vector) =
     TwiceDifferentiableFunction( x    -> linlsq_fg!(x, Array(eltype(x),0), A, b, similar(b)),
                                 (x,g) -> linlsq_fg!(x, g, A, b, similar(b)),
@@ -228,12 +228,12 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
 
     tr = OptimizationTrace()
     tracing = store_trace || show_trace || extended_trace
-    
+
     df = DifferentiableFunction(dummy_f,
                                 dummy_g!,
                                 (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t))
     iteration, f_calls, g_calls = 0, 0, 0
-    f_x = f_x_previous = inf(T)
+    f_x = f_x_previous = convert(T,Inf)
     while m/t > eps_gap && iteration < iterations
         f_x_previous = f_x
         f_x = combined_fg!(x, gr, objective.fg!, constraints, t)
@@ -274,7 +274,7 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
     MultivariateOptimizationResults("Interior/Newton",
                                     initial_x,
                                     x,
-                                    float64(f_x),
+                                    @compat(Float64(f_x)),
                                     iteration,
                                     iteration == iterations,
                                     x_converged,
@@ -292,7 +292,7 @@ end
 function interior{T}(objective::Union(DifferentiableFunction, TwiceDifferentiableFunction),
                      initial_x::Array{T},
                      constraints::AbstractConstraints;
-                     method = :cg, t = nan(T), mu = 10, eps_gap = 1e-12,
+                     method = :cg, t = convert(T,NaN), mu = 10, eps_gap = 1e-12,
                      xtol::Real = 1e-32,
                      ftol::Real = 1e-8,
                      grtol::Real = 1e-8,
@@ -306,7 +306,7 @@ function interior{T}(objective::Union(DifferentiableFunction, TwiceDifferentiabl
             return MultivariateOptimizationResults("Interior",
                                     initial_x,
                                     initial_x,
-                                    float64(vo),
+                                    @compat(Float64(vo)),
                                     0,
                                     false,
                                     false,
diff --git a/src/l_bfgs.jl b/src/l_bfgs.jl
index 941d52b58..e439d23a8 100644
--- a/src/l_bfgs.jl
+++ b/src/l_bfgs.jl
@@ -215,7 +215,7 @@ function l_bfgs{T}(d::Union(DifferentiableFunction,
     return MultivariateOptimizationResults("L-BFGS",
                                            initial_x,
                                            x,
-                                           float64(f_x),
+                                           @compat(Float64(f_x)),
                                            iteration,
                                            iteration == iterations,
                                            x_converged,
diff --git a/src/linesearch/hz_linesearch.jl b/src/linesearch/hz_linesearch.jl
index 938b9ec01..a4a1a4a87 100644
--- a/src/linesearch/hz_linesearch.jl
+++ b/src/linesearch/hz_linesearch.jl
@@ -59,7 +59,7 @@ display_nextbit = 14
 #   - Support for projected-gradient methods
 
 const DEFAULTDELTA = 0.1
-const DEFAULTSIGMA = 0.9 
+const DEFAULTSIGMA = 0.9
 
 # Generate initial guess for step size (HZ, stage I0)
 function alphainit{T}(alpha::Real,
@@ -97,7 +97,7 @@ function alphatry{T}(alpha::T,
                      psi1::Real = convert(T,0.2),
                      psi2::Real = convert(T,2),
                      psi3::Real = convert(T,0.1),
-                     iterfinitemax::Integer = iceil(-log2(eps(T))),
+                     iterfinitemax::Integer = ceil(Int,-log2(eps(T))),
                      detailed_trace::Integer = 0)
     f_calls = 0
     g_calls = 0
@@ -315,7 +315,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
     end
     while iter < linesearchmax
         a = lsr.alpha[ia]
-        b = lsr.alpha[ib]        
+        b = lsr.alpha[ib]
         @assert b > a
         if detailed_trace & LINESEARCH > 0
             println("linesearch: ia = ", ia,
diff --git a/src/momentum_gradient_descent.jl b/src/momentum_gradient_descent.jl
index 927e451ac..eca0f9944 100644
--- a/src/momentum_gradient_descent.jl
+++ b/src/momentum_gradient_descent.jl
@@ -125,7 +125,7 @@ function momentum_gradient_descent{T}(d::DifferentiableFunction,
     return MultivariateOptimizationResults("Momentum Gradient Descent",
                                            initial_x,
                                            x,
-                                           float64(f_x),
+                                           @compat(Float64(f_x)),
                                            iteration,
                                            iteration == iterations,
                                            x_converged,
diff --git a/src/nelder_mead.jl b/src/nelder_mead.jl
index 4952d6295..9dcf8280f 100644
--- a/src/nelder_mead.jl
+++ b/src/nelder_mead.jl
@@ -187,7 +187,7 @@ function nelder_mead{T}(f::Function,
     return MultivariateOptimizationResults("Nelder-Mead",
                                            initial_x,
                                            minimum,
-                                           float64(f(minimum)),
+                                           @compat(Float64(f(minimum))),
                                            iteration,
                                            iteration == iterations,
                                            false,
diff --git a/src/newton.jl b/src/newton.jl
index 64cc33370..8f8245b50 100644
--- a/src/newton.jl
+++ b/src/newton.jl
@@ -130,7 +130,7 @@ function newton{T}(d::TwiceDifferentiableFunction,
     return MultivariateOptimizationResults("Newton's Method",
                                            initial_x,
                                            x,
-                                           float64(f_x),
+                                           @compat(Float64(f_x)),
                                            iteration,
                                            iteration == iterations,
                                            x_converged,
diff --git a/src/nnls.jl b/src/nnls.jl
index f34f0ffd0..239e40389 100644
--- a/src/nnls.jl
+++ b/src/nnls.jl
@@ -6,8 +6,8 @@ function nnls(A::AbstractMatrix, b::AbstractVector)
     x = fill(one(T), size(A, 2))
     # Set up constraints
     l = zeros(eltype(x), length(x))
-    u = fill(inf(eltype(x)), length(x))
-    # Perform the optimization    
+    u = fill(convert(eltype(x),Inf), length(x))
+    # Perform the optimization
     func = (x, g) -> nnlsobjective(x, g, A, b)
     df = DifferentiableFunction(x->func(x,nothing), func, func)
     fminbox(df, x, l, u, precondprep=(P, x, l, u, mu)->precondprepnnls(P, x, mu, a))
diff --git a/src/optimize.jl b/src/optimize.jl
index 4ea952398..a96b6c37c 100644
--- a/src/optimize.jl
+++ b/src/optimize.jl
@@ -52,7 +52,7 @@ function optimize(d::TwiceDifferentiableFunction,
     elseif method == :bfgs
         if bfgs_initial_invH == nothing
             bfgs_initial_invH = eye(length(initial_x))
-        end        
+        end
         bfgs(d,
              initial_x,
              xtol = xtol,
@@ -145,7 +145,7 @@ function optimize(d::DifferentiableFunction,
     elseif method == :bfgs
         if bfgs_initial_invH == nothing
             bfgs_initial_invH = eye(length(initial_x))
-        end        
+        end
         bfgs(d,
              initial_x,
              xtol = xtol,
@@ -259,7 +259,7 @@ function optimize(f::Function,
     elseif method == :bfgs
         if bfgs_initial_invH == nothing
             bfgs_initial_invH = eye(length(initial_x))
-        end        
+        end
         d = DifferentiableFunction(f, g!)
         bfgs(d,
              initial_x,
@@ -362,7 +362,7 @@ function optimize(f::Function,
     elseif method == :bfgs
         if bfgs_initial_invH == nothing
             bfgs_initial_invH = eye(length(initial_x))
-        end        
+        end
         d = DifferentiableFunction(f, g!)
         bfgs(d,
              initial_x,
@@ -469,7 +469,7 @@ function optimize(f::Function,
     elseif method == :bfgs
         if bfgs_initial_invH == nothing
             bfgs_initial_invH = eye(length(initial_x))
-        end        
+        end
         bfgs(d,
              initial_x,
              xtol = xtol,
@@ -514,7 +514,7 @@ function optimize{T <: Real}(f::Function,
         @printf "Iter     Function value   Gradient norm \n"
     end
     if method == :brent
-        brent(f, float64(lower), float64(upper);
+        brent(f, @compat(Float64(lower)), @compat(Float64(upper));
               rel_tol = rel_tol,
               abs_tol = abs_tol,
               iterations = iterations,
@@ -522,7 +522,7 @@ function optimize{T <: Real}(f::Function,
               show_trace = show_trace,
               extended_trace = extended_trace)
     elseif method == :golden_section
-        golden_section(f, float64(lower), float64(upper);
+        golden_section(f, @compat(Float64(lower)), @compat(Float64(upper));
                        rel_tol = rel_tol,
                        abs_tol = abs_tol,
                        iterations = iterations,
diff --git a/src/problems/unconstrained.jl b/src/problems/unconstrained.jl
index 7fbcf3c50..e2f1054ab 100644
--- a/src/problems/unconstrained.jl
+++ b/src/problems/unconstrained.jl
@@ -180,7 +180,7 @@ examples["Large Polynomial"] = OptimizationProblem("Large Polynomial",
                                                    large_polynomial_gradient!,
                                                    large_polynomial_hessian!,
                                                    zeros(250),
-                                                   {float([1:250])},
+                                                   {float([1:250;])},
                                                    true,
                                                    true)
 
@@ -268,7 +268,7 @@ examples["Polynomial"] = OptimizationProblem("Polynomial",
 ##########################################################################
 
 function powell(x::Vector)
-    return (x[1] + 10.0 * x[2])^2 + 5.0 * (x[3] - x[4])^2 + 
+    return (x[1] + 10.0 * x[2])^2 + 5.0 * (x[3] - x[4])^2 +
             (x[2] - 2.0 * x[3])^4 + 10.0 * (x[1] - x[4])^4
 end
 
diff --git a/src/simulated_annealing.jl b/src/simulated_annealing.jl
index 5fbb27764..879cae802 100644
--- a/src/simulated_annealing.jl
+++ b/src/simulated_annealing.jl
@@ -103,7 +103,7 @@ function simulated_annealing{T}(cost::Function,
     return MultivariateOptimizationResults("Simulated Annealing",
                                            initial_x,
                                            best_x,
-                                           float64(best_f_x),
+                                           @compat(Float64(best_f_x)),
                                            iterations,
                                            iteration == iterations,
                                            false,
diff --git a/src/types.jl b/src/types.jl
index 2b2dedb2e..3295c07bc 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -6,11 +6,11 @@ immutable OptimizationState
 end
 
 function OptimizationState(i::Integer, f::Real)
-    OptimizationState(int(i), float64(f), NaN, Dict())
+    OptimizationState(int(i), @compat(Float64(f)), NaN, Dict())
 end
 
 function OptimizationState(i::Integer, f::Real, g::Real)
-    OptimizationState(int(i), float64(f), float64(g), Dict())
+    OptimizationState(int(i), @compat(Float64(f)), @compat(Float64(g)), Dict())
 end
 
 immutable OptimizationTrace
diff --git a/test/grid_search.jl b/test/grid_search.jl
index 5f34c4da0..ff0b4f7ad 100644
--- a/test/grid_search.jl
+++ b/test/grid_search.jl
@@ -1,4 +1,4 @@
-Optim.grid_search(x -> (1.0 - x)^2, [-1:0.1:1.0])
+Optim.grid_search(x -> (1.0 - x)^2, [-1:0.1:1.0;])
 
 # Cartesian product over 2 dimensions.
 #grid_search(x -> (1.0 - x[1])^2 + (2.0 - x[2])^2, product[-1:0.1:1.0, -1:0.1:1.0])
diff --git a/test/type_stability.jl b/test/type_stability.jl
index da72b8956..56482cb8c 100644
--- a/test/type_stability.jl
+++ b/test/type_stability.jl
@@ -30,7 +30,7 @@ d3 = TwiceDifferentiableFunction(rosenbrock,
 for method in (:nelder_mead,
                :simulated_annealing)
     optimize(rosenbrock, [0.0,0,.0], method = method)
-    optimize(rosenbrock, float32([0.0,0.0]), method = method)
+    optimize(rosenbrock, [0.0f0,0.0f0], method = method)
 end
 
 for method in (:bfgs,
@@ -40,10 +40,10 @@ for method in (:bfgs,
 #                :accelerated_gradient_descent,
                :l_bfgs)
     optimize(d2, [0.0,0,.0], method = method)
-    optimize(d2, float32([0.0,0.0]), method = method)
+    optimize(d2, [0.0f0,0.0f0], method = method)
 end
 
 for method in (:newton,)
     optimize(d3, [0.0,0.0], method = method)
-    optimize(d3, float32([0.0,0.0]), method = method)
+    optimize(d3, [0.0f0,0.0f0], method = method)
 end

From 4d1cdf435b3cceab2a267408a61974d2b353ab2d Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Tue, 14 Apr 2015 06:37:16 -0500
Subject: [PATCH 14/28] Bump the default linesearchmax

Rarely, I see perfectly valid line searches take more than 50 iterations.
---
 src/linesearch/hz_linesearch.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/linesearch/hz_linesearch.jl b/src/linesearch/hz_linesearch.jl
index a4a1a4a87..28254e53f 100644
--- a/src/linesearch/hz_linesearch.jl
+++ b/src/linesearch/hz_linesearch.jl
@@ -177,7 +177,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
                            rho::Real = convert(T,5),
                            epsilon::Real = convert(T,1e-6),
                            gamma::Real = convert(T,0.66),
-                           linesearchmax::Integer = 50,
+                           linesearchmax::Integer = 100,
                            psi3::Real = convert(T,0.1),
                            iterfinitemax::Integer = (@compat ceil(Integer, -log2(eps(T)))),
                            detailed_trace::Integer = 0)
@@ -370,6 +370,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
         end
         iter += 1
     end
+    @show lsr
     error("Linesearch failed to converge")
 end
 

From c0a3a3fef1cac89df917c6c34aa8942852fdb723 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 23 Sep 2015 16:57:02 -0500
Subject: [PATCH 15/28] Julia 0.4 update

Binding deprecations, mostly.
---
 REQUIRE                                    |  2 +-
 src/bfgs.jl                                |  4 ++--
 src/brent.jl                               |  2 +-
 src/cg.jl                                  | 10 ++++-----
 src/constraints.jl                         | 12 +++++-----
 src/fminbox.jl                             |  2 +-
 src/golden_section.jl                      |  2 +-
 src/gradient_descent.jl                    |  4 ++--
 src/interior.jl                            | 10 ++++-----
 src/l_bfgs.jl                              |  4 ++--
 src/linesearch/backtracking_linesearch.jl  |  4 ++--
 src/linesearch/hz_linesearch.jl            | 26 +++++++++++-----------
 src/linesearch/interpolating_linesearch.jl |  4 ++--
 src/linesearch/mt_linesearch.jl            |  4 ++--
 src/problems/constrained.jl                |  6 ++---
 src/problems/unconstrained.jl              | 18 +++++++--------
 16 files changed, 57 insertions(+), 57 deletions(-)

diff --git a/REQUIRE b/REQUIRE
index 0da2d52b7..da0d390b7 100644
--- a/REQUIRE
+++ b/REQUIRE
@@ -1,4 +1,4 @@
-julia 0.2-
+julia 0.4
 Calculus
 DualNumbers
 Compat
diff --git a/src/bfgs.jl b/src/bfgs.jl
index c2216bc70..cf2331c1e 100644
--- a/src/bfgs.jl
+++ b/src/bfgs.jl
@@ -23,8 +23,8 @@ macro bfgstrace()
     end
 end
 
-function bfgs{T}(d::Union(DifferentiableFunction,
-                          TwiceDifferentiableFunction),
+function bfgs{T}(d::Union{DifferentiableFunction,
+                          TwiceDifferentiableFunction},
                  initial_x::Vector{T};
                  initial_invH::Matrix = eye(length(initial_x)),
                  xtol::Real = 1e-32,
diff --git a/src/brent.jl b/src/brent.jl
index 71d0a7356..32ea74d8b 100644
--- a/src/brent.jl
+++ b/src/brent.jl
@@ -18,7 +18,7 @@ macro brenttrace()
     end
 end
 
-function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
+function brent{T <: AbstractFloat}(f::Function, x_lower::T, x_upper::T;
                                    rel_tol::T = sqrt(eps(T)),
                                    abs_tol::T = eps(T),
                                    iterations::Integer = 1_000,
diff --git a/src/cg.jl b/src/cg.jl
index f3a599292..700cf0805 100644
--- a/src/cg.jl
+++ b/src/cg.jl
@@ -1,8 +1,8 @@
 # Preconditioners
 #  * Empty preconditioner
-cg_precondfwd(out::Array, P::Nothing, A::Array) = copy!(out, A)
-cg_precondfwddot(A::Array, P::Nothing, B::Array) = dot(A, B)
-cg_precondinvdot(A::Array, P::Nothing, B::Array) = dot(A, B)
+cg_precondfwd(out::Array, P::Void, A::Array) = copy!(out, A)
+cg_precondfwddot(A::Array, P::Void, B::Array) = dot(A, B)
+cg_precondinvdot(A::Array, P::Void, B::Array) = dot(A, B)
 
 # Diagonal preconditioner
 function cg_precondfwd(out::Array, p::Vector, A::Array)
@@ -101,8 +101,8 @@ macro cgtrace()
     end
 end
 
-function cg{T}(df::Union(DifferentiableFunction,
-                         TwiceDifferentiableFunction),
+function cg{T}(df::Union{DifferentiableFunction,
+                         TwiceDifferentiableFunction},
                initial_x::Array{T};
                constraints::AbstractConstraints = ConstraintsNone(),
                interior::Bool = false,
diff --git a/src/constraints.jl b/src/constraints.jl
index 5a382ae67..365b7a5e8 100644
--- a/src/constraints.jl
+++ b/src/constraints.jl
@@ -15,8 +15,8 @@ immutable ConstraintsBox{T,N} <: AbstractConstraints
     end
 end
 ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(l, u)
-ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::Nothing) = ConstraintsBox{T,N}(l, fill(convert(T,Inf), size(l)))
-ConstraintsBox{T,N}(l::Nothing, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(fill(-convert(T,Inf),size(u)), u)
+ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::Void) = ConstraintsBox{T,N}(l, fill(convert(T,Inf), size(l)))
+ConstraintsBox{T,N}(l::Void, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(fill(-convert(T,Inf),size(u)), u)
 
 immutable ConstraintsL{T,M<:AbstractMatrix,N} <: AbstractConstraints
     A::M
@@ -35,12 +35,12 @@ immutable ConstraintsL{T,M<:AbstractMatrix,N} <: AbstractConstraints
         new(A, l, u, scratch1, scratch2, scratch3)
     end
 end
-ConstraintsL{T}(A::AbstractMatrix, l::Union(Vector{T},Matrix{T}), u::Union(Vector{T},Matrix{T})) = ConstraintsL{T,typeof(A),ndims(l)}(A, l, u)
-ConstraintsL{T}(A::AbstractMatrix, l::Union(Vector{T},Matrix{T}), u::Nothing) = ConstraintsL(A, l, infs(T, size(l)))
-ConstraintsL{T}(A::AbstractMatrix, l::Nothing, u::Union(Vector{T},Matrix{T})) = ConstraintsL(A, -infs(T,size(u)), u)
+ConstraintsL{T}(A::AbstractMatrix, l::Union{Vector{T},Matrix{T}}, u::Union{Vector{T},Matrix{T}}) = ConstraintsL{T,typeof(A),ndims(l)}(A, l, u)
+ConstraintsL{T}(A::AbstractMatrix, l::Union{Vector{T},Matrix{T}}, u::Void) = ConstraintsL(A, l, infs(T, size(l)))
+ConstraintsL{T}(A::AbstractMatrix, l::Void, u::Union{Vector{T},Matrix{T}}) = ConstraintsL(A, -infs(T,size(u)), u)
 
 # Generic constraints
-immutable ConstraintsNL{T,F<:Union(Function,DifferentiableFunction,TwiceDifferentiableFunction)} <: AbstractConstraints
+immutable ConstraintsNL{T,F<:Union{Function,DifferentiableFunction,TwiceDifferentiableFunction}} <: AbstractConstraints
     funcs::Vector{F}
     lower::Vector{T}
     upper::Vector{T}
diff --git a/src/fminbox.jl b/src/fminbox.jl
index 861316433..1362f4c02 100644
--- a/src/fminbox.jl
+++ b/src/fminbox.jl
@@ -105,7 +105,7 @@ end
 const PARAMETERS_MU = one64<<display_nextbit
 display_nextbit += 1
 
-function fminbox{T<:FloatingPoint}(df::DifferentiableFunction,
+function fminbox{T<:AbstractFloat}(df::DifferentiableFunction,
                     initial_x::Array{T},
                     l::Array{T},
                     u::Array{T};
diff --git a/src/golden_section.jl b/src/golden_section.jl
index e4ed75029..55b1a2e0d 100644
--- a/src/golden_section.jl
+++ b/src/golden_section.jl
@@ -18,7 +18,7 @@ macro goldensectiontrace()
     end
 end
 
-function golden_section{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
+function golden_section{T <: AbstractFloat}(f::Function, x_lower::T, x_upper::T;
                                             rel_tol::T = sqrt(eps(T)),
                                             abs_tol::T = eps(T),
                                             iterations::Integer = 1_000,
diff --git a/src/gradient_descent.jl b/src/gradient_descent.jl
index dfba19fa3..d884354f4 100644
--- a/src/gradient_descent.jl
+++ b/src/gradient_descent.jl
@@ -18,8 +18,8 @@ macro gdtrace()
     end
 end
 
-function gradient_descent{T}(d::Union(DifferentiableFunction,
-                                      TwiceDifferentiableFunction),
+function gradient_descent{T}(d::Union{DifferentiableFunction,
+                                      TwiceDifferentiableFunction},
                              initial_x::Array{T};
                              xtol::Real = 1e-32,
                              ftol::Real = 1e-8,
diff --git a/src/interior.jl b/src/interior.jl
index 295176a7b..71e939b1b 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -152,16 +152,16 @@ function combined_h!(x, H, objective_h!::Function, constraints::AbstractConstrai
     barrier_h!(x, H, constraints)
 end
 
-# combined_f(x, objective::Union(DifferentiableFunction,TwiceDifferentiableFunction), constraints::AbstractConstraints, t) =
+# combined_f(x, objective::Union{DifferentiableFunction,TwiceDifferentiableFunction}, constraints::AbstractConstraints, t) =
 #     combined_f(x, objective.f, constraints, t)
 #
-# combined_g!(x, g, objective::Union(DifferentiableFunction,TwiceDifferentiableFunction), constraints::AbstractConstraints, t) =
+# combined_g!(x, g, objective::Union{DifferentiableFunction,TwiceDifferentiableFunction}, constraints::AbstractConstraints, t) =
 #     combined_g!(x, g, objective.g, constraints, t)
 #
-# combined_fg!(x, g, objective::Union(DifferentiableFunction,TwiceDifferentiableFunction), constraints::AbstractConstraints, t) =
+# combined_fg!(x, g, objective::Union{DifferentiableFunction,TwiceDifferentiableFunction}, constraints::AbstractConstraints, t) =
 #     combined_fg!(x, g, objective.fg!, constraints, t)
 #
-# combined_h!(x, H, objective::Union(DifferentiableFunction,TwiceDifferentiableFunction), constraints::AbstractConstraints, t) =
+# combined_h!(x, H, objective::Union{DifferentiableFunction,TwiceDifferentiableFunction}, constraints::AbstractConstraints, t) =
 #     combined_h!(x, H, objective.h!, constraints, t)
 
 
@@ -289,7 +289,7 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
 end
 
 
-function interior{T}(objective::Union(DifferentiableFunction, TwiceDifferentiableFunction),
+function interior{T}(objective::Union{DifferentiableFunction, TwiceDifferentiableFunction},
                      initial_x::Array{T},
                      constraints::AbstractConstraints;
                      method = :cg, t = convert(T,NaN), mu = 10, eps_gap = 1e-12,
diff --git a/src/l_bfgs.jl b/src/l_bfgs.jl
index e439d23a8..0a2aacb22 100644
--- a/src/l_bfgs.jl
+++ b/src/l_bfgs.jl
@@ -80,8 +80,8 @@ macro lbfgstrace()
     end
 end
 
-function l_bfgs{T}(d::Union(DifferentiableFunction,
-                            TwiceDifferentiableFunction),
+function l_bfgs{T}(d::Union{DifferentiableFunction,
+                            TwiceDifferentiableFunction},
                    initial_x::Vector{T};
                    m::Integer = 10,
                    xtol::Real = 1e-32,
diff --git a/src/linesearch/backtracking_linesearch.jl b/src/linesearch/backtracking_linesearch.jl
index c8e498601..ed4810a60 100644
--- a/src/linesearch/backtracking_linesearch.jl
+++ b/src/linesearch/backtracking_linesearch.jl
@@ -1,5 +1,5 @@
-function backtracking_linesearch!{T}(d::Union(DifferentiableFunction,
-                                              TwiceDifferentiableFunction),
+function backtracking_linesearch!{T}(d::Union{DifferentiableFunction,
+                                              TwiceDifferentiableFunction},
                                      x::Vector{T},
                                      s::Vector,
                                      x_scratch::Vector,
diff --git a/src/linesearch/hz_linesearch.jl b/src/linesearch/hz_linesearch.jl
index 28254e53f..a250b3cb4 100644
--- a/src/linesearch/hz_linesearch.jl
+++ b/src/linesearch/hz_linesearch.jl
@@ -14,7 +14,7 @@ norm2(x::Array) = dot(x, x)
 
 # Display flags are represented as a bitfield
 # (not exported, but can use via OptimizeMod.ITER, for example)
-const one64 = convert(Uint64, 1)
+const one64 = convert(UInt64, 1)
 const FINAL       = one64
 const ITER        = one64 << 1
 const PARAMETERS  = one64 << 2
@@ -85,8 +85,8 @@ end
 # Code used to use phi, a 1-parameter function induced by f and s
 # Need to pass s as an explicit parameter
 function alphatry{T}(alpha::T,
-                     d::Union(DifferentiableFunction,
-                              TwiceDifferentiableFunction),
+                     d::Union{DifferentiableFunction,
+                              TwiceDifferentiableFunction},
                      x::Array,
                      s::Array,
                      xtmp::Array,
@@ -161,8 +161,8 @@ function alphatry{T}(alpha::T,
     return alpha, mayterminate, f_calls, g_calls
 end
 
-function hz_linesearch!{T}(df::Union(DifferentiableFunction,
-                                     TwiceDifferentiableFunction),
+function hz_linesearch!{T}(df::Union{DifferentiableFunction,
+                                     TwiceDifferentiableFunction},
                            x::Array{T},
                            s::Array,
                            xtmp::Array,
@@ -398,8 +398,8 @@ function secant(lsr::LineSearchResults, ia::Integer, ib::Integer)
     return secant(lsr.alpha[ia], lsr.alpha[ib], lsr.slope[ia], lsr.slope[ib])
 end
 # phi
-function secant2!{T}(df::Union(DifferentiableFunction,
-                              TwiceDifferentiableFunction),
+function secant2!{T}(df::Union{DifferentiableFunction,
+                              TwiceDifferentiableFunction},
                      x::Array,
                      s::Array,
                      xtmp::Array,
@@ -489,8 +489,8 @@ end
 # Given a third point, pick the best two that retain the bracket
 # around the minimum (as defined by HZ, eq. 29)
 # b will be the upper bound, and a the lower bound
-function update!(df::Union(DifferentiableFunction,
-                           TwiceDifferentiableFunction),
+function update!(df::Union{DifferentiableFunction,
+                           TwiceDifferentiableFunction},
                  x::Array,
                  s::Array,
                  xtmp::Array,
@@ -541,8 +541,8 @@ function update!(df::Union(DifferentiableFunction,
 end
 
 # HZ, stage U3 (with theta=0.5)
-function bisect!{T}(df::Union(DifferentiableFunction,
-                              TwiceDifferentiableFunction),
+function bisect!{T}(df::Union{DifferentiableFunction,
+                              TwiceDifferentiableFunction},
                     x::Array,
                     s::Array,
                     xtmp::Array,
@@ -594,8 +594,8 @@ function bisect!{T}(df::Union(DifferentiableFunction,
 end
 
 # Define one-parameter function for line searches
-function linefunc!{T}(df::Union(DifferentiableFunction,
-                                TwiceDifferentiableFunction),
+function linefunc!{T}(df::Union{DifferentiableFunction,
+                                TwiceDifferentiableFunction},
                       x::Array{T},
                       s::Array,
                       alpha::Real,
diff --git a/src/linesearch/interpolating_linesearch.jl b/src/linesearch/interpolating_linesearch.jl
index 2c3486c4e..f17e12176 100644
--- a/src/linesearch/interpolating_linesearch.jl
+++ b/src/linesearch/interpolating_linesearch.jl
@@ -1,8 +1,8 @@
 # TODO: Optimize for fg! calls
 # TODO: Implement safeguards
 
-function interpolating_linesearch!{T}(d::Union(DifferentiableFunction,
-                                               TwiceDifferentiableFunction),
+function interpolating_linesearch!{T}(d::Union{DifferentiableFunction,
+                                               TwiceDifferentiableFunction},
                                       x::Vector,
                                       p::Vector,
                                       x_new::Vector,
diff --git a/src/linesearch/mt_linesearch.jl b/src/linesearch/mt_linesearch.jl
index a8d4511fa..b4b488e01 100644
--- a/src/linesearch/mt_linesearch.jl
+++ b/src/linesearch/mt_linesearch.jl
@@ -132,8 +132,8 @@
 # TODO: Decide whether to update x, f, g and info
 #       or just return step and nfev and let existing code do its job
 
-function mt_linesearch!{T}(fcn::Union(DifferentiableFunction,
-                                      TwiceDifferentiableFunction),
+function mt_linesearch!{T}(fcn::Union{DifferentiableFunction,
+                                      TwiceDifferentiableFunction},
                          x::Vector,
                          s::Vector,
                          new_x::Vector,
diff --git a/src/problems/constrained.jl b/src/problems/constrained.jl
index 37f8710a6..7683e08ff 100644
--- a/src/problems/constrained.jl
+++ b/src/problems/constrained.jl
@@ -46,7 +46,7 @@ pexamples["SqrdistBox"] = ConstrainedProblem("SqrdistBox",
                                             sqrdist_hessian!,
                                             constraints,
                                             [0.0, 0.0],
-                                            {[2.0, 2.0]},
+                                            Any[[2.0, 2.0]],
                                             true,
                                             true)
 
@@ -58,7 +58,7 @@ pexamples["SqrdistLwr"] = ConstrainedProblem("SqrdistLwr",
                                             sqrdist_hessian!,
                                             constraints,
                                             [0.0, 0.0],
-                                            {[2.0, 2.0]},
+                                            Any[[2.0, 2.0]],
                                             true,
                                             true)
 
@@ -70,7 +70,7 @@ pexamples["SqrdistLwr"] = ConstrainedProblem("SqrdistLwr",
                                             sqrdist_hessian!,
                                             constraints,
                                             [0.0, 0.0],
-                                            {[5.0, 3.0]},
+                                            Any[[5.0, 3.0]],
                                             true,
                                             true)
 
diff --git a/src/problems/unconstrained.jl b/src/problems/unconstrained.jl
index e2f1054ab..9d12d2ae1 100644
--- a/src/problems/unconstrained.jl
+++ b/src/problems/unconstrained.jl
@@ -40,7 +40,7 @@ examples["Exponential"] = OptimizationProblem("Exponential",
                                               exponential_gradient!,
                                               exponential_hessian!,
                                               [0.0, 0.0],
-                                              {[2.0, 3.0]},
+                                              Any[[2.0, 3.0]],
                                               true,
                                               true)
 
@@ -78,7 +78,7 @@ examples["Fletcher-Powell"] = OptimizationProblem("Fletcher-Powell",
                                                   fletcher_powell_gradient!,
                                                   fletcher_powell_hessian!,
                                                   [0.0, 0.0, 0.0],
-                                                  {[0.0, 0.0, 0.0]}, # TODO: Fix
+                                                  Any[[0.0, 0.0, 0.0]], # TODO: Fix
                                                   false,
                                                   false)
 
@@ -109,7 +109,7 @@ examples["Himmelbrau"] = OptimizationProblem("Himmelbrau",
                                              himmelbrau_gradient!,
                                              himmelbrau_hessian!,
                                              [0.0, 0.0],
-                                             {[1.0, 0.0]}, # TODO: Fix
+                                             Any[[1.0, 0.0]], # TODO: Fix
                                              true,
                                              false)
 ##########################################################################
@@ -138,7 +138,7 @@ examples["Hosaki"] = OptimizationProblem("Hosaki",
                                          hosaki_gradient!,
                                          hosaki_hessian!,
                                          [0.0, 0.0],
-                                         {[4.0, 2.0]},
+                                         Any[[4.0, 2.0]],
                                          false,
                                          false)
 
@@ -180,7 +180,7 @@ examples["Large Polynomial"] = OptimizationProblem("Large Polynomial",
                                                    large_polynomial_gradient!,
                                                    large_polynomial_hessian!,
                                                    zeros(250),
-                                                   {float([1:250;])},
+                                                   Any[float([1:250;])],
                                                    true,
                                                    true)
 
@@ -220,7 +220,7 @@ examples["Parabola"] = OptimizationProblem("Parabola",
                                            parabola_gradient!,
                                            parabola_hessian!,
                                            [0.0, 0.0, 0.0, 0.0, 0.0],
-                                           {[1.0, 2.0, 3.0, 5.0, 8.0]},
+                                           Any[[1.0, 2.0, 3.0, 5.0, 8.0]],
                                            true,
                                            true)
 
@@ -257,7 +257,7 @@ examples["Polynomial"] = OptimizationProblem("Polynomial",
                                              polynomial_gradient!,
                                              polynomial_hessian!,
                                              [0.0, 0.0, 0.0],
-                                             {[10.0, 7.0, 108.0]},
+                                             Any[[10.0, 7.0, 108.0]],
                                              true,
                                              true)
 
@@ -303,7 +303,7 @@ examples["Powell"] = OptimizationProblem("Powell",
                                          powell_gradient!,
                                          powell_hessian!,
                                          [3.0, -1.0, 0.0, 1.0],
-                                         {[0.0, 0.0, 0.0, 0.0]}, # TODO: Fix
+                                         Any[[0.0, 0.0, 0.0, 0.0]], # TODO: Fix
                                          true,
                                          true)
 
@@ -334,7 +334,7 @@ examples["Rosenbrock"] = OptimizationProblem("Rosenbrock",
                                              rosenbrock_gradient!,
                                              rosenbrock_hessian!,
                                              [0.0, 0.0],
-                                             {[1.0, 1.0]},
+                                             Any[[1.0, 1.0]],
                                              true,
                                              true)
 

From 5c1e65e6e9b6ef83d9db7811bce77f31a50b3c35 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 23 Sep 2015 18:39:33 -0500
Subject: [PATCH 16/28] More 0.4 fixes

---
 REQUIRE         | 2 +-
 src/Optim.jl    | 4 +++-
 src/interior.jl | 6 +++---
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/REQUIRE b/REQUIRE
index da0d390b7..b55322abe 100644
--- a/REQUIRE
+++ b/REQUIRE
@@ -1,4 +1,4 @@
-julia 0.4
+julia 0.4-
 Calculus
 DualNumbers
 Compat
diff --git a/src/Optim.jl b/src/Optim.jl
index bc3afb7c7..29ea243fb 100644
--- a/src/Optim.jl
+++ b/src/Optim.jl
@@ -1,7 +1,9 @@
+isdefined(Base, :__precompile__) && __precompile__()
+
 module Optim
     using Calculus
     using Compat
-    
+
     import Base.dot,
            Base.length,
            Base.push!,
diff --git a/src/interior.jl b/src/interior.jl
index 71e939b1b..29592502a 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -177,7 +177,7 @@ linlsq(A::Matrix, b::Vector) =
                                 (x,g) -> linlsq_fg!(x, g, A, b, similar(b)),
                                 (x,H) -> linlsq_h!(x, H, A, b))
 
-dummy_f (x)   = (Base.show_backtrace(STDOUT, backtrace()); error("No f"))
+dummy_f(x)    = (Base.show_backtrace(STDOUT, backtrace()); error("No f"))
 dummy_g!(x,g) = (Base.show_backtrace(STDOUT, backtrace()); error("No g!"))
 
 function linlsq_fg!{T}(x::AbstractArray{T}, g, A, b, scratch)
@@ -336,8 +336,8 @@ function interior{T}(objective::Union{DifferentiableFunction, TwiceDifferentiabl
     local results
     iteration, f_calls, g_calls = 0, 0, 0
     while m/t > eps_gap || iteration == 0
-        df = DifferentiableFunction( x    -> combined_f  (x, objective.f, constraints, t),
-                                    (x,g) -> combined_g! (x, g, objective.g!, constraints, t),
+        df = DifferentiableFunction( x    ->   combined_f(x, objective.f, constraints, t),
+                                    (x,g) ->  combined_g!(x, g, objective.g!, constraints, t),
                                     (x,g) -> combined_fg!(x, g, objective.fg!, constraints, t))
         results = optimize(df, x, method=method, xtol=xtol, ftol=ftol, grtol=grtol, iterations=iterations, store_trace=store_trace, show_trace=show_trace, extended_trace=extended_trace)
         copy!(x, results.minimum)

From c2b7cda18ab4134201d1ec9b8a1ca4beff8d38a8 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 21 Oct 2015 12:55:10 -0500
Subject: [PATCH 17/28] Guarantee that the initial direction is a descent
 direction for objective w/o constraints

This prevents the barrier penalty from kicking the solution out of the starting basin
---
 src/interior.jl | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index 29592502a..f4be826ce 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -203,6 +203,11 @@ linlsq_h!(x, H, A, b) = At_mul_B!(H, A, A)
 ## Interior point methods
 ##
 #########################
+# Follows the notation of
+# H. Hindi (2006), "A Tutorial on Convex Optimization II: Duality and
+#     Interior Point Methods," Proc. 2006 American Control Conference.
+# particularly his "Algorithm: Barrier method."
+
 function interior_newton{T}(objective::TwiceDifferentiableFunction,
                             initial_x::Vector{T},
                             constraints::AbstractConstraints;
@@ -301,8 +306,8 @@ function interior{T}(objective::Union{DifferentiableFunction, TwiceDifferentiabl
                      show_trace::Bool = false,
                      extended_trace::Bool = false)
     if isnan(t)
-        vo, vc, go, gc = gnorm(objective, constraints, initial_x)
-        if isfinite(vo) && isfinite(vc) && go < grtol
+        vo, vc, gogo, gogc = gnorm(objective, constraints, initial_x)
+        if isfinite(vo) && isfinite(vc) && sqrt(gogo) < grtol
             return MultivariateOptimizationResults("Interior",
                                     initial_x,
                                     initial_x,
@@ -319,10 +324,10 @@ function interior{T}(objective::Union{DifferentiableFunction, TwiceDifferentiabl
                                     0,
                                     0)
         end
-        if gc == 0
+        if gogc == 0
             t = one(T)  # FIXME: bounds constraints, starting at exact center. This is probably not the right guess.
         else
-            t = gc/(convert(T,0.1)*go)
+            t = convert(T, 10*abs(gogc)/gogo)
         end
     end
     if method == :newton
@@ -352,6 +357,18 @@ function interior{T}(objective::Union{DifferentiableFunction, TwiceDifferentiabl
     results
 end
 
+"""
+`vo, vc, gogo, gogc = gnorm(objective, constraints, initial_x)`
+computes the values `vo`, `vc` of the `objective` and `constraints` at
+position `initial_x`, as well as their gradients. `gogo` is the dot
+product of the objective gradient with itself; `gogc` is the dot
+product of the objective gradient and the constraint gradient.
+
+The latter two can be used to initialize the tradeoff between
+objective and constraints so that the total gradient `t*go + gc` is
+still a descent direction for the objective. This ensures that the
+barrier penalty will not push the solution out of the starting basin.
+"""
 function gnorm(objective, constraints, initial_x)
     go = similar(initial_x)
     gc = similar(initial_x)

From 92a1c08a8be435d79d6f51862872167c874453d9 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 21 Oct 2015 12:57:08 -0500
Subject: [PATCH 18/28] Pass constraints through to minimizers

For box constraints, this allows the algorithm to pick alphamax so the constraints will not be violated
---
 src/interior.jl |  2 +-
 src/l_bfgs.jl   | 18 +++++++++++++++++-
 src/optimize.jl | 12 ++++++++++++
 3 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index f4be826ce..262ab4f0e 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -344,7 +344,7 @@ function interior{T}(objective::Union{DifferentiableFunction, TwiceDifferentiabl
         df = DifferentiableFunction( x    ->   combined_f(x, objective.f, constraints, t),
                                     (x,g) ->  combined_g!(x, g, objective.g!, constraints, t),
                                     (x,g) -> combined_fg!(x, g, objective.fg!, constraints, t))
-        results = optimize(df, x, method=method, xtol=xtol, ftol=ftol, grtol=grtol, iterations=iterations, store_trace=store_trace, show_trace=show_trace, extended_trace=extended_trace)
+        results = optimize(df, x, method=method, constraints=constraints, interior=true, xtol=xtol, ftol=ftol, grtol=grtol, iterations=iterations, store_trace=store_trace, show_trace=show_trace, extended_trace=extended_trace)
         copy!(x, results.minimum)
         iteration += results.iterations
         f_calls += results.f_calls
diff --git a/src/l_bfgs.jl b/src/l_bfgs.jl
index 0a2aacb22..a6321c20c 100644
--- a/src/l_bfgs.jl
+++ b/src/l_bfgs.jl
@@ -83,6 +83,8 @@ end
 function l_bfgs{T}(d::Union{DifferentiableFunction,
                             TwiceDifferentiableFunction},
                    initial_x::Vector{T};
+                   constraints::AbstractConstraints = ConstraintsNone(),
+                   interior::Bool = false,
                    m::Integer = 10,
                    xtol::Real = 1e-32,
                    ftol::Real = 1e-8,
@@ -158,12 +160,26 @@ function l_bfgs{T}(d::Union{DifferentiableFunction,
 
         # Refresh the line search cache
         dphi0 = dot(gr, s)
+        if dphi0 > 0
+            iteration = 1
+            for i = 1:n
+                @inbounds s[i] = -gr[i]
+            end
+            dphi0 = dot(gr, s)
+        end
         clear!(lsr)
         push!(lsr, zero(T), f_x, dphi0)
 
+        alphamax = interior ? toedge(x, s, constraints) : convert(T,Inf)
+
+        # Pick the initial step size (HZ #I1-I2)
+        alpha, mayterminate, f_update, g_update =
+          alphatry(alpha, d, x, s, x_ls, gr_ls, lsr, constraints, alphamax)
+        f_calls, g_calls = f_calls + f_update, g_calls + g_update
+
         # Determine the distance of movement along the search line
         alpha, f_update, g_update =
-          linesearch!(d, x, s, x_ls, gr_ls, lsr, alpha, mayterminate)
+          linesearch!(d, x, s, x_ls, gr_ls, lsr, alpha, mayterminate, constraints, alphamax)
         f_calls, g_calls = f_calls + f_update, g_calls + g_update
 
         # Maintain a record of previous position
diff --git a/src/optimize.jl b/src/optimize.jl
index a96b6c37c..d2a3589d3 100644
--- a/src/optimize.jl
+++ b/src/optimize.jl
@@ -1,6 +1,8 @@
 function optimize(d::TwiceDifferentiableFunction,
                   initial_x::Array;
                   method::Symbol = :l_bfgs,
+                  constraints::AbstractConstraints = ConstraintsNone(),
+                  interior::Bool = false,
                   xtol::Real = 1e-32,
                   ftol::Real = 1e-8,
                   grtol::Real = 1e-8,
@@ -41,6 +43,8 @@ function optimize(d::TwiceDifferentiableFunction,
     elseif method == :cg
         cg(d,
            initial_x,
+           constraints = constraints,
+           interior = interior,
            xtol = xtol,
            ftol = ftol,
            grtol = grtol,
@@ -67,6 +71,8 @@ function optimize(d::TwiceDifferentiableFunction,
     elseif method == :l_bfgs
         l_bfgs(d,
                initial_x,
+               constraints = constraints,
+               interior = interior,
                xtol = xtol,
                ftol = ftol,
                grtol = grtol,
@@ -94,6 +100,8 @@ end
 function optimize(d::DifferentiableFunction,
                   initial_x::Array;
                   method::Symbol = :l_bfgs,
+                  constraints::AbstractConstraints = ConstraintsNone(),
+                  interior::Bool = false,
                   xtol::Real = 1e-32,
                   ftol::Real = 1e-8,
                   grtol::Real = 1e-8,
@@ -134,6 +142,8 @@ function optimize(d::DifferentiableFunction,
     elseif method == :cg
         cg(d,
            initial_x,
+           constraints = constraints,
+           interior = interior,
            xtol = xtol,
            ftol = ftol,
            grtol = grtol,
@@ -160,6 +170,8 @@ function optimize(d::DifferentiableFunction,
     elseif method == :l_bfgs
         l_bfgs(d,
                initial_x,
+               constraints = constraints,
+               interior = interior,
                xtol = xtol,
                ftol = ftol,
                grtol = grtol,

From ce8df0ade5793824cf72611caef20786e685871e Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Thu, 22 Oct 2015 13:05:53 -0500
Subject: [PATCH 19/28] Automatically try a restart if we think we've converged

This should cover cases where dphi < 0 "by a hair," i.e., where the search direction is almost orthogonal to the gradient. It's an extra layer of security beyond commit 50b9d60a9788798e68f010cd5a84d2b89626e582 to increase the likelihood that when we terminate, we really have reached a minimum.
---
 src/l_bfgs.jl | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/src/l_bfgs.jl b/src/l_bfgs.jl
index a6321c20c..cd11f8052 100644
--- a/src/l_bfgs.jl
+++ b/src/l_bfgs.jl
@@ -150,6 +150,7 @@ function l_bfgs{T}(d::Union{DifferentiableFunction,
 
     # Iterate until convergence
     converged = false
+    converged_iteration = typemin(Int)
     while !converged && iteration < iterations
         # Increment the number of steps we've had to perform
         iteration += 1
@@ -160,21 +161,28 @@ function l_bfgs{T}(d::Union{DifferentiableFunction,
 
         # Refresh the line search cache
         dphi0 = dot(gr, s)
-        if dphi0 > 0
+        if dphi0 >= 0
+            # The search direction is not a descent direction. Something
+            # is wrong, so restart the search-direction algorithm.
+            # See also the "restart" below.
             iteration = 1
             for i = 1:n
                 @inbounds s[i] = -gr[i]
             end
             dphi0 = dot(gr, s)
         end
+        dphi0 == 0 && break   # we're at a stationary point
         clear!(lsr)
         push!(lsr, zero(T), f_x, dphi0)
 
         alphamax = interior ? toedge(x, s, constraints) : convert(T,Inf)
 
-        # Pick the initial step size (HZ #I1-I2)
+        # Pick the initial step size (HZ #I1-I2). Even though we might
+        # guess alpha=1 for l_bfgs most of the time, testing suggests
+        # this is still usually a good idea (fewer total function and
+        # gradient evaluations).
         alpha, mayterminate, f_update, g_update =
-          alphatry(alpha, d, x, s, x_ls, gr_ls, lsr, constraints, alphamax)
+            alphatry(alpha, d, x, s, x_ls, gr_ls, lsr, constraints, alphamax)
         f_calls, g_calls = f_calls + f_update, g_calls + g_update
 
         # Determine the distance of movement along the search line
@@ -226,8 +234,28 @@ function l_bfgs{T}(d::Union{DifferentiableFunction,
                                        grtol)
 
         @lbfgstrace
+
+        # If we think we've converged, restart and make sure we don't
+        # have another long descent. This should catch the case where
+        # dphi < 0 "by a hair," meaning that the chosen search direction
+        # happened to be nearly orthogonoal to the gradient.
+        if converged
+            if iteration <= 3 || converged_iteration >= 0
+                if iteration <= max(m, 3)
+                    break  # we converged quickly after previous restart
+                end
+                converged_iteration += iteration
+            else
+                converged_iteration = iteration
+            end
+            converged = false
+            iteration = 0  # the next search direction will be -gr
+        end
     end
 
+    if converged_iteration >= 0
+        iteration += converged_iteration
+    end
     return MultivariateOptimizationResults("L-BFGS",
                                            initial_x,
                                            x,

From 28ce67613e8be3a750d2811c6991a8d4e795bf63 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Mon, 30 Nov 2015 20:05:15 -0600
Subject: [PATCH 20/28] Ensure that the search direction is a descent direction

This no longer assumes that H is positive-definite, instead introducing a check and fix for cases where it does not already hold.
---
 src/interior.jl | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/interior.jl b/src/interior.jl
index 262ab4f0e..1c9d26764 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -244,7 +244,30 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
         f_x = combined_fg!(x, gr, objective.fg!, constraints, t)
         combined_h!(x, H, objective.h!, constraints, t)
         @newtontrace
-        cH = cholfact!(H)
+        # To ensure descent, the Hessian must be positive-definite.
+        # As needed, add positive elements to the diagonal.
+        # cH = cholfact!(H)
+        A = copy(H)
+        retry = true
+        λ = sqrt(eps(eltype(H)))
+        while retry
+            retry = false
+            C, info = Base.LinAlg.LAPACK.potrf!('U', A)
+            if info != 0
+                copy!(A, H)
+                for i = 1:size(H,1)
+                    h = abs(H[i,i])
+                    if h > 0
+                        A[i,i] = (1+λ)*h
+                    else
+                        A[i,i] = λ
+                    end
+                end
+                λ *= 10
+                retry = true
+            end
+        end
+        cH = Base.LinAlg.Cholesky(A, :U)
         s = -(cH\gr)
         clear!(lsr)
         push!(lsr, zero(T), f_x, dot(s,gr))

From dc4a4c9f88336e1319e725c50babe792b170045a Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Fri, 18 Dec 2015 08:42:25 -0600
Subject: [PATCH 21/28] interior_newton: increment t if the step converged

---
 src/interior.jl | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index 1c9d26764..e40b51f73 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -239,6 +239,7 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
                                 (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t))
     iteration, f_calls, g_calls = 0, 0, 0
     f_x = f_x_previous = convert(T,Inf)
+    x_converged = f_converged = gr_converged = converged = true
     while m/t > eps_gap && iteration < iterations
         f_x_previous = f_x
         f_x = combined_fg!(x, gr, objective.fg!, constraints, t)
@@ -281,24 +282,24 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
         f_calls += _f_calls
         g_calls += _g_calls
         f_x /= t
-        if alphamax >= 1
+        x_converged,
+        f_converged,
+        gr_converged,
+        converged = assess_convergence(x,
+                                       x_previous,
+                                       f_x,
+                                       f_x_previous,
+                                       gr,
+                                       xtol,
+                                       ftol,
+                                       grtol)
+        if alphamax >= 1 || converged
             t *= mu
             df = DifferentiableFunction(dummy_f,
                                         dummy_g!,
                                         (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t))
         end
     end
-    x_converged,
-    f_converged,
-    gr_converged,
-    converged = assess_convergence(x,
-                                   x_previous,
-                                   f_x,
-                                   f_x_previous,
-                                   gr,
-                                   xtol,
-                                   ftol,
-                                   grtol)
     MultivariateOptimizationResults("Interior/Newton",
                                     initial_x,
                                     x,

From 67c9996d54410528a1800c2f72e4eaf7dc43e9a2 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Fri, 18 Dec 2015 08:44:15 -0600
Subject: [PATCH 22/28] Fix returned function value when there are 0 parameters

---
 src/interior.jl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/interior.jl b/src/interior.jl
index e40b51f73..bda1de043 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -240,6 +240,9 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
     iteration, f_calls, g_calls = 0, 0, 0
     f_x = f_x_previous = convert(T,Inf)
     x_converged = f_converged = gr_converged = converged = true
+    if m/t < eps_gap
+        f_x = combined_fg!(x, gr, objective.fg!, constraints, t)
+    end
     while m/t > eps_gap && iteration < iterations
         f_x_previous = f_x
         f_x = combined_fg!(x, gr, objective.fg!, constraints, t)

From be3324acf0345a8abea250b2b888e23c815831b6 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Fri, 18 Dec 2015 08:44:48 -0600
Subject: [PATCH 23/28] Improvements to positive-definite newton step

---
 src/interior.jl | 49 +++++++++++++++++++++++++------------------------
 1 file changed, 25 insertions(+), 24 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index bda1de043..b3d6ed810 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -248,31 +248,9 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
         f_x = combined_fg!(x, gr, objective.fg!, constraints, t)
         combined_h!(x, H, objective.h!, constraints, t)
         @newtontrace
-        # To ensure descent, the Hessian must be positive-definite.
-        # As needed, add positive elements to the diagonal.
         # cH = cholfact!(H)
-        A = copy(H)
-        retry = true
-        λ = sqrt(eps(eltype(H)))
-        while retry
-            retry = false
-            C, info = Base.LinAlg.LAPACK.potrf!('U', A)
-            if info != 0
-                copy!(A, H)
-                for i = 1:size(H,1)
-                    h = abs(H[i,i])
-                    if h > 0
-                        A[i,i] = (1+λ)*h
-                    else
-                        A[i,i] = λ
-                    end
-                end
-                λ *= 10
-                retry = true
-            end
-        end
-        cH = Base.LinAlg.Cholesky(A, :U)
-        s = -(cH\gr)
+        # To ensure descent, the Hessian must be positive-definite.
+        s = newtonstep(H, gr)
         clear!(lsr)
         push!(lsr, zero(T), f_x, dot(s,gr))
         alphamax = toedge(x, s, constraints)
@@ -409,3 +387,26 @@ function gnorm(objective, constraints, initial_x)
     end
     vo, vc, gom, gcm
 end
+
+function newtonstep(H, gr)
+    A = copy(H)
+    # Direct use of potrf means we don't have to compute the Cholesky
+    # decomposition twice (once in `isposdef` and again by `cholfact`)
+    _, info = Base.LinAlg.LAPACK.potrf!('L', A)
+    if info == 0
+        cH = Base.LinAlg.Cholesky(A, :L)
+        return -(cH\gr)
+    end
+    D, V = eig(H)   # very expensive, but see Optim #153
+    Dabs = abs(D)
+    Dmax = maximum(Dabs)
+    if Dmax == 0
+        return -gr
+    end
+    δ = sqrt(eps(eltype(D)))
+    reliable = Dabs .> δ*Dmax
+    Dinv = similar(D)
+    Dinv[reliable] = 1./Dabs[reliable]
+    Dinv[!reliable] = 1/Dmax
+    return -(V*(Dinv .* (V'*gr)))
+end

From afccefce94d686aecb64bdb3ed012a1a530d07ba Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Tue, 26 Jan 2016 08:24:20 -0600
Subject: [PATCH 24/28] Use PositiveFactorizations for the newton step

---
 REQUIRE         |  3 ++-
 src/Optim.jl    |  2 +-
 src/interior.jl | 25 +------------------------
 3 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/REQUIRE b/REQUIRE
index b55322abe..01c9f80aa 100644
--- a/REQUIRE
+++ b/REQUIRE
@@ -1,4 +1,5 @@
-julia 0.4-
+julia 0.4
 Calculus
 DualNumbers
 Compat
+PositiveFactorizations
diff --git a/src/Optim.jl b/src/Optim.jl
index 29ea243fb..d100b3871 100644
--- a/src/Optim.jl
+++ b/src/Optim.jl
@@ -1,7 +1,7 @@
 isdefined(Base, :__precompile__) && __precompile__()
 
 module Optim
-    using Calculus
+    using Calculus, PositiveFactorizations
     using Compat
 
     import Base.dot,
diff --git a/src/interior.jl b/src/interior.jl
index b3d6ed810..ec9fde3ea 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -248,8 +248,6 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
         f_x = combined_fg!(x, gr, objective.fg!, constraints, t)
         combined_h!(x, H, objective.h!, constraints, t)
         @newtontrace
-        # cH = cholfact!(H)
-        # To ensure descent, the Hessian must be positive-definite.
         s = newtonstep(H, gr)
         clear!(lsr)
         push!(lsr, zero(T), f_x, dot(s,gr))
@@ -388,25 +386,4 @@ function gnorm(objective, constraints, initial_x)
     vo, vc, gom, gcm
 end
 
-function newtonstep(H, gr)
-    A = copy(H)
-    # Direct use of potrf means we don't have to compute the Cholesky
-    # decomposition twice (once in `isposdef` and again by `cholfact`)
-    _, info = Base.LinAlg.LAPACK.potrf!('L', A)
-    if info == 0
-        cH = Base.LinAlg.Cholesky(A, :L)
-        return -(cH\gr)
-    end
-    D, V = eig(H)   # very expensive, but see Optim #153
-    Dabs = abs(D)
-    Dmax = maximum(Dabs)
-    if Dmax == 0
-        return -gr
-    end
-    δ = sqrt(eps(eltype(D)))
-    reliable = Dabs .> δ*Dmax
-    Dinv = similar(D)
-    Dinv[reliable] = 1./Dabs[reliable]
-    Dinv[!reliable] = 1/Dmax
-    return -(V*(Dinv .* (V'*gr)))
-end
+newtonstep(H, gr) = cholfact(Positive, H)\(-gr)

From dad89d22bc10f4167530d0bd628c003e1e7ed28c Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 27 Jan 2016 07:51:59 -0600
Subject: [PATCH 25/28] Change convergence criteria in interior_newton to
 monitor solution, not eps_gap

eps_gap introduced an absolute scale for convergence, which seemed problematic. This should be more robust, as it monitors the characteristics of the solution. It should also avoid making changes to the barrier that no longer have a consequence for the solution; previous approaches introduced numerical stability problems when the barrier penalty became extreme.
---
 src/interior.jl     | 36 ++++++++++++++++++++++++++----------
 test/constrained.jl | 16 +++++++++++++++-
 2 files changed, 41 insertions(+), 11 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index ec9fde3ea..c5d93942b 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -211,7 +211,7 @@ linlsq_h!(x, H, A, b) = At_mul_B!(H, A, A)
 function interior_newton{T}(objective::TwiceDifferentiableFunction,
                             initial_x::Vector{T},
                             constraints::AbstractConstraints;
-                            t = one(T), mu = 10, eps_gap = 1e-12,
+                            t = one(T), mu = 10, eps_gap = convert(T,NaN),
                             xtol::Real = convert(T,1e-32),
                             ftol::Real = convert(T,1e-8),
                             grtol::Real = convert(T,1e-8),
@@ -237,18 +237,19 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
     df = DifferentiableFunction(dummy_f,
                                 dummy_g!,
                                 (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t))
-    iteration, f_calls, g_calls = 0, 0, 0
+    iteration, iter_t, f_calls, g_calls = 0, 0, 0, 0
     f_x = f_x_previous = convert(T,Inf)
     x_converged = f_converged = gr_converged = converged = true
     if m/t < eps_gap
         f_x = combined_fg!(x, gr, objective.fg!, constraints, t)
     end
-    while m/t > eps_gap && iteration < iterations
+#    while m/t > eps_gap && iteration < iterations
+    while iteration < iterations
         f_x_previous = f_x
         f_x = combined_fg!(x, gr, objective.fg!, constraints, t)
         combined_h!(x, H, objective.h!, constraints, t)
-        @newtontrace
-        s = newtonstep(H, gr)
+        F, Hd = ldltfact!(Positive, H)
+        s = F\(-gr)
         clear!(lsr)
         push!(lsr, zero(T), f_x, dot(s,gr))
         alphamax = toedge(x, s, constraints)
@@ -258,9 +259,11 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
         copy!(x_previous, x)
         step!(x, x, s, alpha, constraints)
         iteration += 1
+        iter_t += 1
         f_calls += _f_calls
         g_calls += _g_calls
         f_x /= t
+        @newtontrace
         x_converged,
         f_converged,
         gr_converged,
@@ -272,8 +275,20 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
                                        xtol,
                                        ftol,
                                        grtol)
-        if alphamax >= 1 || converged
+        # It's time to change the barrier penalty if we're
+        # sufficiently "in the basin." That means (1) the Hessian is
+        # positive (semi)definite (all(Hd .> 0)) and that the Hessian
+        # step is accepted (alpha is approximately 1).  The latter
+        # suggests that the Hessian is a reliable predictor of the
+        # function shape, and that within a few iterations we'd be at
+        # the minimum.
+        if (alpha >= 0.9 && all(Hd .>= 0)) || converged
+            if iter_t == 1 && converged
+                # if changing t didn't change the solution, quit now
+                break
+            end
             t *= mu
+            iter_t = 0
             df = DifferentiableFunction(dummy_f,
                                         dummy_g!,
                                         (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t))
@@ -300,7 +315,7 @@ end
 function interior{T}(objective::Union{DifferentiableFunction, TwiceDifferentiableFunction},
                      initial_x::Array{T},
                      constraints::AbstractConstraints;
-                     method = :cg, t = convert(T,NaN), mu = 10, eps_gap = 1e-12,
+                     method = :cg, t = convert(T,NaN), mu = 10, eps_gap = convert(T,NaN),
                      xtol::Real = 1e-32,
                      ftol::Real = 1e-8,
                      grtol::Real = 1e-8,
@@ -333,8 +348,11 @@ function interior{T}(objective::Union{DifferentiableFunction, TwiceDifferentiabl
             t = convert(T, 10*abs(gogc)/gogo)
         end
     end
+    if isnan(eps_gap)
+        eps_gap = (cbrt(eps(T)))^2/t
+    end
     if method == :newton
-        return iterior_newton(objective, initial_x, constraints; t=t, mu=mu, eps_gap=eps_gap) # FIXME
+        return interior_newton(objective, initial_x, constraints; t=t, mu=mu, eps_gap=eps_gap) # FIXME
     end
     if !feasible(initial_x, constraints)
         error("Initial guess must be feasible")
@@ -385,5 +403,3 @@ function gnorm(objective, constraints, initial_x)
     end
     vo, vc, gom, gcm
 end
-
-newtonstep(H, gr) = cholfact(Positive, H)\(-gr)
diff --git a/test/constrained.jl b/test/constrained.jl
index 8814afa2d..6a3b2d081 100644
--- a/test/constrained.jl
+++ b/test/constrained.jl
@@ -1,4 +1,4 @@
-import Optim
+using Optim
 using Base.Test
 
 # Quadratic objective function
@@ -55,3 +55,17 @@ A = [1.0 2.0; 3.0 4.0]
 b = [1.0,1.0]
 results = Optim.nnls(A, b)
 @test norm(results.minimum - [0,0.3]) < 1e-6
+
+# 1d newton
+let f(x) = (z = x[1]; -2z^2 + z^4),
+    g!(x, gr) = (z = x[1]; gr[1] = -4z + 4z^3; x),
+    h!(x, H) = (z = x[1]; H[1,1] = -4 + 12z^2; H),
+    objective = TwiceDifferentiableFunction(f, g!, (x,gr) -> (g!(x, gr); f(x)), h!),
+    constr = ConstraintsBox([-0.9],[0.9])
+
+    for x0 in (0.1,0.2,0.3,0.5,0.7)
+        x = [x0]
+        soln = interior(objective, x, constr, method=:newton)
+        @test_approx_eq_eps soln.minimum[1] 0.9 1e-6
+    end
+end

From b66e6b6b23b4f886e298cf6160420d78a139c163 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 27 Jan 2016 09:06:26 -0600
Subject: [PATCH 26/28] dot->vecdot

---
 src/Optim.jl                    |  3 +--
 src/cg.jl                       | 16 ++++++++--------
 src/gradient_descent.jl         |  2 +-
 src/interior.jl                 |  2 +-
 src/l_bfgs.jl                   |  6 +++---
 src/linesearch/hz_linesearch.jl |  6 +++---
 6 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/src/Optim.jl b/src/Optim.jl
index d100b3871..0fcb088e9 100644
--- a/src/Optim.jl
+++ b/src/Optim.jl
@@ -4,8 +4,7 @@ module Optim
     using Calculus, PositiveFactorizations
     using Compat
 
-    import Base.dot,
-           Base.length,
+    import Base.length,
            Base.push!,
            Base.show,
            Base.getindex,
diff --git a/src/cg.jl b/src/cg.jl
index 700cf0805..f827528ea 100644
--- a/src/cg.jl
+++ b/src/cg.jl
@@ -1,8 +1,8 @@
 # Preconditioners
 #  * Empty preconditioner
 cg_precondfwd(out::Array, P::Void, A::Array) = copy!(out, A)
-cg_precondfwddot(A::Array, P::Void, B::Array) = dot(A, B)
-cg_precondinvdot(A::Array, P::Void, B::Array) = dot(A, B)
+cg_precondfwddot(A::Array, P::Void, B::Array) = vecdot(A, B)
+cg_precondinvdot(A::Array, P::Void, B::Array) = vecdot(A, B)
 
 # Diagonal preconditioner
 function cg_precondfwd(out::Array, p::Vector, A::Array)
@@ -195,12 +195,12 @@ function cg{T}(df::Union{DifferentiableFunction,
         iteration += 1
 
         # Reset the search direction if it becomes corrupted
-        dphi0 = dot(gr, s)
+        dphi0 = vecdot(gr, s)
         if dphi0 >= 0
             for i in 1:n
                 @inbounds s[i] = -gr[i]
             end
-            dphi0 = dot(gr, s)
+            dphi0 = vecdot(gr, s)
             if dphi0 < 0
                 break
             end
@@ -263,14 +263,14 @@ function cg{T}(df::Union{DifferentiableFunction,
         #  Calculate the beta factor (HZ2012)
         precondprep(P, x)
         dPd = cg_precondinvdot(s, P, s)
-        etak::T = eta * dot(s, gr_previous) / dPd
+        etak::T = eta * vecdot(s, gr_previous) / dPd
         for i in 1:n
             @inbounds y[i] = gr[i] - gr_previous[i]
         end
-        ydots = dot(y, s)
+        ydots = vecdot(y, s)
         cg_precondfwd(pgr, P, gr)
-        betak = (dot(y, pgr) - cg_precondfwddot(y, P, y) *
-                 dot(gr, s) / ydots) / ydots
+        betak = (vecdot(y, pgr) - cg_precondfwddot(y, P, y) *
+                 vecdot(gr, s) / ydots) / ydots
         beta = max(betak, etak)
         for i in 1:n
             @inbounds s[i] = beta * s[i] - pgr[i]
diff --git a/src/gradient_descent.jl b/src/gradient_descent.jl
index d884354f4..acdd32fa8 100644
--- a/src/gradient_descent.jl
+++ b/src/gradient_descent.jl
@@ -85,7 +85,7 @@ function gradient_descent{T}(d::Union{DifferentiableFunction,
         end
 
         # Refresh the line search cache
-        dphi0 = dot(gr, s)
+        dphi0 = vecdot(gr, s)
         clear!(lsr)
         push!(lsr, zero(T), f_x, dphi0)
 
diff --git a/src/interior.jl b/src/interior.jl
index c5d93942b..d6b0e73a3 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -251,7 +251,7 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
         F, Hd = ldltfact!(Positive, H)
         s = F\(-gr)
         clear!(lsr)
-        push!(lsr, zero(T), f_x, dot(s,gr))
+        push!(lsr, zero(T), f_x, vecdot(s,gr))
         alphamax = toedge(x, s, constraints)
         alpha = alphamax < 1 ? 0.9*alphamax : 1.0
         alpha, _f_calls, _g_calls =
diff --git a/src/l_bfgs.jl b/src/l_bfgs.jl
index cd11f8052..f3c5dade6 100644
--- a/src/l_bfgs.jl
+++ b/src/l_bfgs.jl
@@ -160,7 +160,7 @@ function l_bfgs{T}(d::Union{DifferentiableFunction,
                  twoloop_alpha, twoloop_q)
 
         # Refresh the line search cache
-        dphi0 = dot(gr, s)
+        dphi0 = vecdot(gr, s)
         if dphi0 >= 0
             # The search direction is not a descent direction. Something
             # is wrong, so restart the search-direction algorithm.
@@ -169,7 +169,7 @@ function l_bfgs{T}(d::Union{DifferentiableFunction,
             for i = 1:n
                 @inbounds s[i] = -gr[i]
             end
-            dphi0 = dot(gr, s)
+            dphi0 = vecdot(gr, s)
         end
         dphi0 == 0 && break   # we're at a stationary point
         clear!(lsr)
@@ -212,7 +212,7 @@ function l_bfgs{T}(d::Union{DifferentiableFunction,
         end
 
         # Update the L-BFGS history of positions and gradients
-        rho_iteration = 1 / dot(dx, dgr)
+        rho_iteration = 1 / vecdot(dx, dgr)
         if isinf(rho_iteration)
             # TODO: Introduce a formal error? There was a warning here previously
             break
diff --git a/src/linesearch/hz_linesearch.jl b/src/linesearch/hz_linesearch.jl
index a250b3cb4..7282780e8 100644
--- a/src/linesearch/hz_linesearch.jl
+++ b/src/linesearch/hz_linesearch.jl
@@ -1,7 +1,7 @@
 # phi = (galpha, alpha) -> linefunc(galpha, alpha, func, x, d, xtmp, g)
 
 # Dot product of two "vectors", even if they don't have a vector shape
-function dot(x::Array, y::Array)
+function vecdot(x::Array, y::Array)
     d = x[1] * y[1]
     for i = 2:length(x)
         d += x[i] * y[i]
@@ -10,7 +10,7 @@ function dot(x::Array, y::Array)
 end
 
 # Vector-norm-squared, even if it doesn't have a vector shape
-norm2(x::Array) = dot(x, x)
+vecnorm2(x::Array) = vecdot(x, x)
 
 # Display flags are represented as a bitfield
 # (not exported, but can use via OptimizeMod.ITER, for example)
@@ -612,7 +612,7 @@ function linefunc!{T}(df::Union{DifferentiableFunction,
         f_calls += 1
         g_calls += 1
         if isfinite(val)
-            gphi = dot(g, s)
+            gphi = vecdot(g, s)
         end
     else
         val = df.f(xtmp)

From db76ca0922f458dbf2da14169d4daf696b9a1282 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 27 Jan 2016 13:16:34 -0600
Subject: [PATCH 27/28] interior_newton: pass options, bail if encounter
 numeric instability

Also cleans up a bit of the tracing & convergence-testing
---
 src/interior.jl | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/interior.jl b/src/interior.jl
index d6b0e73a3..4e2ac3e27 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -239,10 +239,7 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
                                 (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t))
     iteration, iter_t, f_calls, g_calls = 0, 0, 0, 0
     f_x = f_x_previous = convert(T,Inf)
-    x_converged = f_converged = gr_converged = converged = true
-    if m/t < eps_gap
-        f_x = combined_fg!(x, gr, objective.fg!, constraints, t)
-    end
+    x_converged = f_converged = gr_converged = converged = false
 #    while m/t > eps_gap && iteration < iterations
     while iteration < iterations
         f_x_previous = f_x
@@ -250,8 +247,16 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
         combined_h!(x, H, objective.h!, constraints, t)
         F, Hd = ldltfact!(Positive, H)
         s = F\(-gr)
+        sgr = vecdot(s, gr)
+        if !(sgr <= 0)
+            # Presumably due to numeric instability
+            converged = x_converged = f_converged = gr_converged = false
+            break
+        end
         clear!(lsr)
-        push!(lsr, zero(T), f_x, vecdot(s,gr))
+        push!(lsr, zero(T), f_x, sgr)
+        f_x /= t
+        @newtontrace
         alphamax = toedge(x, s, constraints)
         alpha = alphamax < 1 ? 0.9*alphamax : 1.0
         alpha, _f_calls, _g_calls =
@@ -262,8 +267,6 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
         iter_t += 1
         f_calls += _f_calls
         g_calls += _g_calls
-        f_x /= t
-        @newtontrace
         x_converged,
         f_converged,
         gr_converged,
@@ -274,7 +277,7 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
                                        gr,
                                        xtol,
                                        ftol,
-                                       grtol)
+                                       t*grtol)
         # It's time to change the barrier penalty if we're
         # sufficiently "in the basin." That means (1) the Hessian is
         # positive (semi)definite (all(Hd .> 0)) and that the Hessian
@@ -352,7 +355,7 @@ function interior{T}(objective::Union{DifferentiableFunction, TwiceDifferentiabl
         eps_gap = (cbrt(eps(T)))^2/t
     end
     if method == :newton
-        return interior_newton(objective, initial_x, constraints; t=t, mu=mu, eps_gap=eps_gap) # FIXME
+        return interior_newton(objective, initial_x, constraints; t=t, mu=mu, eps_gap=eps_gap, xtol=xtol, ftol=ftol, grtol=grtol, iterations=iterations, store_trace=store_trace, show_trace=show_trace, extended_trace=extended_trace)
     end
     if !feasible(initial_x, constraints)
         error("Initial guess must be feasible")

From 2943c3f9b6886e527bb5b3e982f010661d7b9b1c Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 27 Jan 2016 15:44:48 -0600
Subject: [PATCH 28/28] interior_newton: also check for finiteness

---
 src/interior.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/interior.jl b/src/interior.jl
index 4e2ac3e27..6f25434c9 100644
--- a/src/interior.jl
+++ b/src/interior.jl
@@ -248,7 +248,7 @@ function interior_newton{T}(objective::TwiceDifferentiableFunction,
         F, Hd = ldltfact!(Positive, H)
         s = F\(-gr)
         sgr = vecdot(s, gr)
-        if !(sgr <= 0)
+        if !(sgr <= 0) || isinf(sgr)
             # Presumably due to numeric instability
             converged = x_converged = f_converged = gr_converged = false
             break