diff --git a/REQUIRE b/REQUIRE
index 3ef4db562..01c9f80aa 100644
--- a/REQUIRE
+++ b/REQUIRE
@@ -1,3 +1,5 @@
-julia 0.2-
+julia 0.4
 Calculus
 DualNumbers
+Compat
+PositiveFactorizations
diff --git a/src/Optim.jl b/src/Optim.jl
index 64d10de41..0fcb088e9 100644
--- a/src/Optim.jl
+++ b/src/Optim.jl
@@ -1,20 +1,28 @@
+isdefined(Base, :__precompile__) && __precompile__()
+
 module Optim
-    using Calculus
+    using Calculus, PositiveFactorizations
+    using Compat
 
-    import Base.dot,
-           Base.length,
+    import Base.length,
            Base.push!,
            Base.show,
            Base.getindex,
            Base.setindex!
 
     export optimize,
+           interior,
+           linlsq,
            DifferentiableFunction,
-           TwiceDifferentiableFunction
+           TwiceDifferentiableFunction,
+           ConstraintsBox
 
     # Types
     include("types.jl")
 
+    # Types for constrained optimization
+    include("constraints.jl")
+
     # Automatic differentiation utilities
     include("autodiff.jl")
 
@@ -65,11 +73,17 @@ module Optim
     include("golden_section.jl")
     include("brent.jl")
 
+    # Constrained optimization algorithms
+    include("interior.jl")
+
     # End-User Facing Wrapper Functions
     include("optimize.jl")
 
-    # Examples for testing
-    include(joinpath("problems", "unconstrained.jl"))
-
     cgdescent(args...) = error("API has changed. Please use cg.")
+
+    # Tests
+    const basedir = dirname(Base.source_path())
+    const testpaths = [joinpath(basedir, "problems", "unconstrained.jl"),
+                       joinpath(basedir, "problems", "constrained.jl")]
+
 end
diff --git a/src/accelerated_gradient_descent.jl b/src/accelerated_gradient_descent.jl
index e9de5b121..1f0f41bca 100644
--- a/src/accelerated_gradient_descent.jl
+++ b/src/accelerated_gradient_descent.jl
@@ -139,7 +139,7 @@ function accelerated_gradient_descent{T}(d::DifferentiableFunction,
     return MultivariateOptimizationResults("Accelerated Gradient Descent",
                                            initial_x,
                                            x,
-                                           float64(f_x),
+                                           @compat(Float64(f_x)),
                                            iteration,
                                            iteration == iterations,
                                            x_converged,
diff --git a/src/bfgs.jl b/src/bfgs.jl
index 5a6073b3e..cf2331c1e 100644
--- a/src/bfgs.jl
+++ b/src/bfgs.jl
@@ -23,8 +23,8 @@ macro bfgstrace()
     end
 end
 
-function bfgs{T}(d::Union(DifferentiableFunction,
-                          TwiceDifferentiableFunction),
+function bfgs{T}(d::Union{DifferentiableFunction,
+                          TwiceDifferentiableFunction},
                  initial_x::Vector{T};
                  initial_invH::Matrix = eye(length(initial_x)),
                  xtol::Real = 1e-32,
@@ -97,7 +97,7 @@ function bfgs{T}(d::Union(DifferentiableFunction,
         # Increment the number of steps we've had to perform
         iteration += 1
 
-        # Set the search direction        
+        # Set the search direction
         # Search direction is the negative gradient divided by the approximate Hessian
         A_mul_B!(s, invH, gr)
         for i in 1:n
@@ -178,7 +178,7 @@ function bfgs{T}(d::Union(DifferentiableFunction,
     return MultivariateOptimizationResults("BFGS",
                                            initial_x,
                                            x,
-                                           float64(f_x),
+                                           @compat(Float64(f_x)),
                                            iteration,
                                            iteration == iterations,
                                            x_converged,
diff --git a/src/brent.jl b/src/brent.jl
index b3d4fa0b6..32ea74d8b 100644
--- a/src/brent.jl
+++ b/src/brent.jl
@@ -18,7 +18,7 @@ macro brenttrace()
     end
 end
 
-function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
+function brent{T <: AbstractFloat}(f::Function, x_lower::T, x_upper::T;
                                    rel_tol::T = sqrt(eps(T)),
                                    abs_tol::T = eps(T),
                                    iterations::Integer = 1_000,
@@ -33,13 +33,13 @@ function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
     # Save for later
     initial_lower = x_lower
     initial_upper = x_upper
-    
+
     const golden_ratio::T = 0.5 * (3.0 - sqrt(5.0))
 
     x_minimum = x_lower + golden_ratio*(x_upper-x_lower)
     f_minimum = f(x_minimum)
     f_calls = 1 # Number of calls to f
-    
+
     step = zero(T)
     step_old = zero(T)
 
@@ -48,7 +48,7 @@ function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
 
     f_minimum_old = f_minimum
     f_minimum_old_old = f_minimum
-    
+
     it = 0
     converged = false
 
@@ -72,12 +72,12 @@ function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
         end
 
         it += 1
-        
+
         if abs(step_old) > tolx
             # Compute parabola interpolation
             # x_minimum + p/q is the optimum of the parabola
             # Also, q is guaranteed to be positive
-            
+
             r = (x_minimum - x_minimum_old) * (f_minimum - f_minimum_old_old)
             q = (x_minimum - x_minimum_old_old) * (f_minimum - f_minimum_old)
             p = (x_minimum - x_minimum_old_old) * q - (x_minimum - x_minimum_old) * r
@@ -150,7 +150,7 @@ function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
                                          initial_lower,
                                          initial_upper,
                                          x_minimum,
-                                         float64(f_minimum),
+                                         @compat(Float64(f_minimum)),
                                          it,
                                          converged,
                                          rel_tol,
diff --git a/src/cg.jl b/src/cg.jl
index fb2834344..f827528ea 100644
--- a/src/cg.jl
+++ b/src/cg.jl
@@ -1,8 +1,8 @@
 # Preconditioners
 #  * Empty preconditioner
-cg_precondfwd(out::Array, P::Nothing, A::Array) = copy!(out, A)
-cg_precondfwddot(A::Array, P::Nothing, B::Array) = dot(A, B)
-cg_precondinvdot(A::Array, P::Nothing, B::Array) = dot(A, B)
+cg_precondfwd(out::Array, P::Void, A::Array) = copy!(out, A)
+cg_precondfwddot(A::Array, P::Void, B::Array) = vecdot(A, B)
+cg_precondinvdot(A::Array, P::Void, B::Array) = vecdot(A, B)
 
 # Diagonal preconditioner
 function cg_precondfwd(out::Array, p::Vector, A::Array)
@@ -89,7 +89,7 @@ macro cgtrace()
                 dt["g(x)"] = copy(gr)
                 dt["Current step size"] = alpha
             end
-            grnorm = norm(gr, Inf)
+            grnorm = norm(gr[:], Inf)
             update!(tr,
                     iteration,
                     f_x,
@@ -101,9 +101,11 @@ macro cgtrace()
     end
 end
 
-function cg{T}(df::Union(DifferentiableFunction,
-                         TwiceDifferentiableFunction),
+function cg{T}(df::Union{DifferentiableFunction,
+                         TwiceDifferentiableFunction},
                initial_x::Array{T};
+               constraints::AbstractConstraints = ConstraintsNone(),
+               interior::Bool = false,
                xtol::Real = convert(T,1e-32),
                ftol::Real = convert(T,1e-8),
                grtol::Real = convert(T,1e-8),
@@ -117,7 +119,9 @@ function cg{T}(df::Union(DifferentiableFunction,
                precondprep::Function = (P, x) -> nothing)
 
     # Maintain current state in x and previous state in x_previous
-    x, x_previous = copy(initial_x), copy(initial_x)
+    x = copy(initial_x)
+    project!(x, constraints)
+    x_previous = copy(x)
 
     # Count the total number of iterations
     iteration = 0
@@ -146,7 +150,7 @@ function cg{T}(df::Union(DifferentiableFunction,
     # Store f(x) in f_x
     f_x = df.fg!(x, gr)
     @assert typeof(f_x) == T
-    f_x_previous = nan(T)
+    f_x_previous = convert(T,NaN)
     f_calls, g_calls = f_calls + 1, g_calls + 1
     copy!(gr_previous, gr)
 
@@ -191,12 +195,12 @@ function cg{T}(df::Union(DifferentiableFunction,
         iteration += 1
 
         # Reset the search direction if it becomes corrupted
-        dphi0 = dot(gr, s)
+        dphi0 = vecdot(gr, s)
         if dphi0 >= 0
             for i in 1:n
                 @inbounds s[i] = -gr[i]
             end
-            dphi0 = dot(gr, s)
+            dphi0 = vecdot(gr, s)
             if dphi0 < 0
                 break
             end
@@ -208,23 +212,28 @@ function cg{T}(df::Union(DifferentiableFunction,
         @assert typeof(dphi0) == T
         push!(lsr, zero(T), f_x, dphi0)
 
+        alphamax = interior ? toedge(x, s, constraints) : convert(T,Inf)
+
         # Pick the initial step size (HZ #I1-I2)
         alpha, mayterminate, f_update, g_update =
-          alphatry(alpha, df, x, s, x_ls, gr_ls, lsr)
+          alphatry(alpha, df, x, s, x_ls, gr_ls, lsr, constraints, alphamax)
         f_calls, g_calls = f_calls + f_update, g_calls + g_update
 
+        if alpha == zero(T)
+            x_converged = true
+            break
+        end
+
         # Determine the distance of movement along the search line
         alpha, f_update, g_update =
-          linesearch!(df, x, s, x_ls, gr_ls, lsr, alpha, mayterminate)
+          linesearch!(df, x, s, x_ls, gr_ls, lsr, alpha, mayterminate, constraints, alphamax)
         f_calls, g_calls = f_calls + f_update, g_calls + g_update
 
         # Maintain a record of previous position
         copy!(x_previous, x)
 
         # Update current position
-        for i in 1:n
-            @inbounds x[i] = x[i] + alpha * s[i]
-        end
+        step!(x, x, s, alpha, constraints)
 
         # Maintain a record of the previous gradient
         copy!(gr_previous, gr)
@@ -254,14 +263,14 @@ function cg{T}(df::Union(DifferentiableFunction,
         #  Calculate the beta factor (HZ2012)
         precondprep(P, x)
         dPd = cg_precondinvdot(s, P, s)
-        etak::T = eta * dot(s, gr_previous) / dPd
+        etak::T = eta * vecdot(s, gr_previous) / dPd
         for i in 1:n
             @inbounds y[i] = gr[i] - gr_previous[i]
         end
-        ydots = dot(y, s)
+        ydots = vecdot(y, s)
         cg_precondfwd(pgr, P, gr)
-        betak = (dot(y, pgr) - cg_precondfwddot(y, P, y) *
-                 dot(gr, s) / ydots) / ydots
+        betak = (vecdot(y, pgr) - cg_precondfwddot(y, P, y) *
+                 vecdot(gr, s) / ydots) / ydots
         beta = max(betak, etak)
         for i in 1:n
             @inbounds s[i] = beta * s[i] - pgr[i]
@@ -273,7 +282,7 @@ function cg{T}(df::Union(DifferentiableFunction,
     return MultivariateOptimizationResults("Conjugate Gradient",
                                            initial_x,
                                            x,
-                                           float64(f_x),
+                                           @compat(Float64(f_x)),
                                            iteration,
                                            iteration == iterations,
                                            x_converged,
diff --git a/src/constraints.jl b/src/constraints.jl
new file mode 100644
index 000000000..365b7a5e8
--- /dev/null
+++ b/src/constraints.jl
@@ -0,0 +1,154 @@
+abstract AbstractConstraints
+
+immutable ConstraintsNone <: AbstractConstraints end
+
+immutable ConstraintsBox{T,N} <: AbstractConstraints
+    lower::Array{T,N}
+    upper::Array{T,N}
+
+    function ConstraintsBox(l::AbstractArray{T}, u::AbstractArray{T})
+        size(l) == size(u) || error("The sizes of the bounds must match")
+        for i = 1:length(l)
+            l[i] <= u[i] || error("The lower bound must be smaller than the upper bound")
+        end
+        new(l, u)
+    end
+end
+ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(l, u)
+ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::Void) = ConstraintsBox{T,N}(l, fill(convert(T,Inf), size(l)))
+ConstraintsBox{T,N}(l::Void, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(fill(-convert(T,Inf),size(u)), u)
+
+immutable ConstraintsL{T,M<:AbstractMatrix,N} <: AbstractConstraints
+    A::M
+    lower::Array{T,N}
+    upper::Array{T,N}
+    scratch1::Array{T,N}
+    scratch2::Array{T,N}
+    scratch3::Array{T,N}
+
+    function ConstraintsL(A, l::Array{T,N}, u::Array{T,N},
+                          scratch1 = similar(l), scratch2 = similar(l), scratch3 = Array(T, ndims(l) == 1 ? size(A, 2) : (size(A,2),size(l,2))))
+        size(A, 1) == size(l,1) == size(u,1) || error("The sizes of the bounds must match the size of A")
+        for i = 1:length(l)
+            l[i] <= u[i] || error("The lower bound must be smaller than the upper bound")
+        end
+        new(A, l, u, scratch1, scratch2, scratch3)
+    end
+end
+ConstraintsL{T}(A::AbstractMatrix, l::Union{Vector{T},Matrix{T}}, u::Union{Vector{T},Matrix{T}}) = ConstraintsL{T,typeof(A),ndims(l)}(A, l, u)
+ConstraintsL{T}(A::AbstractMatrix, l::Union{Vector{T},Matrix{T}}, u::Void) = ConstraintsL(A, l, infs(T, size(l)))
+ConstraintsL{T}(A::AbstractMatrix, l::Void, u::Union{Vector{T},Matrix{T}}) = ConstraintsL(A, -infs(T,size(u)), u)
+
+# Generic constraints
+immutable ConstraintsNL{T,F<:Union{Function,DifferentiableFunction,TwiceDifferentiableFunction}} <: AbstractConstraints
+    funcs::Vector{F}
+    lower::Vector{T}
+    upper::Vector{T}
+
+    function ConstraintsNL(funcs::Vector, l::AbstractVector{T}, u::AbstractVector{T})
+        size(A, 1) == length(l) == length(u) || error("The sizes of the bounds must match the ")
+        for i = 1:length(l)
+            l[i] <= u[i] || error("The lower bound must be smaller than the upper bound")
+        end
+        new(funcs, l, u)
+    end
+end
+
+type Constraints
+    bounds::AbstractConstraints
+    linear::AbstractConstraints
+    nonlinear::AbstractConstraints
+    mu::Real
+end
+Constraints() = Constraints(ConstraintsNone(), ConstraintsNone(), ConstraintsNone(), ConstraintsNone(), ConstraintsNone())
+
+
+## feasible
+feasible(x, constraints::ConstraintsNone, ineq::Bool) = true
+
+function feasible(x, constraints::ConstraintsBox)
+    l = constraints.lower
+    u = constraints.upper
+    for i = 1:length(x)
+        l[i] <= x[i] <= u[i] || return false
+    end
+    true
+end
+
+function feasible(x, constraints::ConstraintsL)
+    l = constraints.lower
+    u = constraints.upper
+    y = constraints.scratch1
+    A_mul_B!(y, constraints.A, x)
+    for i = 1:length(y)
+        l[i] <= y[i] <= u[i] || return false
+    end
+    true
+end
+
+## project!
+project!(x, constraints::AbstractConstraints) = x
+
+function project!(x::Array, bounds::ConstraintsBox)
+    for i = 1:length(x)
+        x[i] = max(bounds.lower[i], min(bounds.upper[i], x[i]))
+    end
+    x
+end
+
+## step!
+function step!{T}(xtmp::Array{T}, x, s, alpha::Real, constraints::AbstractConstraints = ConstraintsNone())
+    (length(xtmp) == length(x) == length(s)) || error("lengths of xtmp ($(length(xtmp))), x ($(length(x))), and s ($(length(s))) disagree")
+    for i = 1:length(xtmp)
+        @inbounds xtmp[i] = x[i] + alpha*s[i]
+    end
+    project!(xtmp, constraints)
+end
+
+
+## toedge and tocorner
+# For bounds constraints,
+#  - toedge returns the "distance along s" (the value of the coefficient alpha) to reach the boundary
+#  - tocorner returns the "distance along s" needed to be projected into the "corner" (all constraints are active)
+# The first is the farthest you might consider searching in a barrier method. The second is the farthest that it
+# makes sense to search in a projection method.
+toedge{T}(x::AbstractArray{T}, s, constraints::AbstractConstraints) = convert(T, Inf)
+tocorner{T}(x::AbstractArray{T}, s, constraints::AbstractConstraints) = convert(T, Inf)
+
+function toedge{T}(x::AbstractArray{T}, s, bounds::ConstraintsBox)
+    # Stop at the first coordinate to leave the box
+    alphamax = convert(T,Inf)
+    for i = 1:length(x)
+        si = s[i]
+        li = bounds.lower[i]
+        ui = bounds.upper[i]
+        if si < 0 && isfinite(li)
+            alphamax = min(alphamax, (li-x[i])/si)
+        elseif si > 0 && isfinite(ui)
+            alphamax = min(alphamax, (ui-x[i])/si)
+        end
+    end
+    convert(T, alphamax)
+end
+
+toedge(x, s, constraints::Constraints) = toedge(x, s, constraints.bounds)
+
+function tocorner{T}(x::AbstractArray{T}, s, bounds::ConstraintsBox)
+    # Stop at the last coordinate to leave the box
+    alphamax = zero(T)
+    for i = 1:length(x)
+        si = s[i]
+        li = bounds.lower[i]
+        ui = bounds.upper[i]
+        if si < 0 && isfinite(li)
+            alphamax = max(alphamax, (li-x[i])/si)
+        elseif si > 0 && isfinite(ui)
+            alphamax = max(alphamax, (ui-x[i])/si)
+        else
+            return convert(T,Inf)
+        end
+    end
+    convert(T, alphamax)
+end
+
+tocorner(x, s, constraints::Constraints) = tocorner(x, s, constraints.bounds)
diff --git a/src/fminbox.jl b/src/fminbox.jl
index 19da30a36..1362f4c02 100644
--- a/src/fminbox.jl
+++ b/src/fminbox.jl
@@ -1,7 +1,7 @@
 # Attempt to compute a reasonable default mu: at the starting
 # position, the gradient of the input function should dominate the
 # gradient of the barrier.
-function initialize_mu{T}(gfunc::Array{T}, gbarrier::Array{T}; mu0::T = nan(T), mu0factor::T = 0.001)
+function initialize_mu{T}(gfunc::Array{T}, gbarrier::Array{T}; mu0::T = convert(T,NaN), mu0factor::T = 0.001)
     if isnan(mu0)
         gbarriernorm = sum(abs(gbarrier))
         if gbarriernorm > 0
@@ -26,7 +26,7 @@ function barrier_box{T}(x::Array{T}, g, l::Array{T}, u::Array{T})
         if isfinite(thisl)
             dx = x[i] - thisl
             if dx <= 0
-                return inf(T)
+                return convert(T,Inf)
             end
             v -= log(dx)
             if calc_grad
@@ -41,7 +41,7 @@ function barrier_box{T}(x::Array{T}, g, l::Array{T}, u::Array{T})
         if isfinite(thisu)
             dx = thisu - x[i]
             if dx <= 0
-                return inf(T)
+                return convert(T,Inf)
             end
             v -= log(dx)
             if calc_grad
@@ -76,7 +76,7 @@ function barrier_combined{T}(x::Array{T}, g, gfunc, gbarrier, val_each::Vector{T
 end
 
 function limits_box{T}(x::Array{T}, d::Array{T}, l::Array{T}, u::Array{T})
-    alphamax = inf(T)
+    alphamax = convert(T,Inf)
     for i = 1:length(x)
         if d[i] < 0
             alphamax = min(alphamax, ((l[i]-x[i])+eps(l[i]))/d[i])
@@ -105,20 +105,20 @@ end
 const PARAMETERS_MU = one64<<display_nextbit
 display_nextbit += 1
 
-function fminbox{T}(df::DifferentiableFunction,
+function fminbox{T<:AbstractFloat}(df::DifferentiableFunction,
                     initial_x::Array{T},
                     l::Array{T},
                     u::Array{T};
                     xtol::T = eps(T),
                     ftol::T = sqrt(eps(T)),
                     grtol::T = sqrt(eps(T)),
-                    tol::T = eps(T)^(2/3),
                     iterations::Integer = 1_000,
                     store_trace::Bool = false,
                     show_trace::Bool = false,
                     extended_trace::Bool = false,
                     linesearch!::Function = hz_linesearch!,
                     eta::Real = convert(T,0.4),
+                    mu0::T = convert(T,NaN),
                     mufactor::T = convert(T, 0.001),
                     precondprep = (P, x, l, u, mu) -> precondprepbox(P, x, l, u, mu),
                     optimizer = cg)
@@ -145,14 +145,14 @@ function fminbox{T}(df::DifferentiableFunction,
         gbarrier[i] = (isfinite(thisl) ? one(T)/(thisx-thisl) : zero(T)) + (isfinite(thisu) ? one(T)/(thisu-thisx) : zero(T))
     end
     valfunc = df.fg!(x, gfunc)  # is this used??
-    mu = initialize_mu(gfunc, gbarrier; mu0factor=mufactor)
+    mu = isnan(mu0) ? initialize_mu(gfunc, gbarrier; mu0factor=mufactor) : mu0
     if show_trace > 0
         println("######## fminbox ########")
         println("Initial mu = ", mu)
     end
 
     g = similar(x)
-    valboth = Array(T, 2)    
+    valboth = Array(T, 2)
     fval_all = Array(Vector{T}, 0)
     fcount_all = 0
     xold = similar(x)
diff --git a/src/golden_section.jl b/src/golden_section.jl
index dce3da090..55b1a2e0d 100644
--- a/src/golden_section.jl
+++ b/src/golden_section.jl
@@ -18,7 +18,7 @@ macro goldensectiontrace()
     end
 end
 
-function golden_section{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
+function golden_section{T <: AbstractFloat}(f::Function, x_lower::T, x_upper::T;
                                             rel_tol::T = sqrt(eps(T)),
                                             abs_tol::T = eps(T),
                                             iterations::Integer = 1_000,
@@ -28,11 +28,11 @@ function golden_section{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
     if !(x_lower < x_upper)
         error("x_lower must be less than x_upper")
     end
-    
+
     # Save for later
     initial_lower = x_lower
     initial_upper = x_upper
-    
+
     const golden_ratio::T = 0.5 * (3.0 - sqrt(5.0))
 
     x_minimum = x_lower + golden_ratio*(x_upper-x_lower)
@@ -46,7 +46,7 @@ function golden_section{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
     tr = OptimizationTrace()
     tracing = store_trace || show_trace || extended_trace
     @goldensectiontrace
-    
+
     while it < iterations
 
         tolx = rel_tol * abs(x_minimum) + abs_tol
@@ -59,7 +59,7 @@ function golden_section{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
         end
 
         it += 1
-        
+
         if x_upper - x_minimum > x_minimum - x_lower
             x_new = x_minimum + golden_ratio*(x_upper - x_minimum)
             f_new = f(x_new)
@@ -91,7 +91,7 @@ function golden_section{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
                                          initial_lower,
                                          initial_upper,
                                          x_minimum,
-                                         float64(f_minimum),
+                                         @compat(Float64(f_minimum)),
                                          it,
                                          converged,
                                          rel_tol,
diff --git a/src/gradient_descent.jl b/src/gradient_descent.jl
index fc996f4d4..acdd32fa8 100644
--- a/src/gradient_descent.jl
+++ b/src/gradient_descent.jl
@@ -18,8 +18,8 @@ macro gdtrace()
     end
 end
 
-function gradient_descent{T}(d::Union(DifferentiableFunction,
-                                      TwiceDifferentiableFunction),
+function gradient_descent{T}(d::Union{DifferentiableFunction,
+                                      TwiceDifferentiableFunction},
                              initial_x::Array{T};
                              xtol::Real = 1e-32,
                              ftol::Real = 1e-8,
@@ -85,7 +85,7 @@ function gradient_descent{T}(d::Union(DifferentiableFunction,
         end
 
         # Refresh the line search cache
-        dphi0 = dot(gr, s)
+        dphi0 = vecdot(gr, s)
         clear!(lsr)
         push!(lsr, zero(T), f_x, dphi0)
 
@@ -124,7 +124,7 @@ function gradient_descent{T}(d::Union(DifferentiableFunction,
     return MultivariateOptimizationResults("Gradient Descent",
                                            initial_x,
                                            x,
-                                           float64(f_x),
+                                           @compat(Float64(f_x)),
                                            iteration,
                                            iteration == iterations,
                                            x_converged,
diff --git a/src/interior.jl b/src/interior.jl
new file mode 100644
index 000000000..6f25434c9
--- /dev/null
+++ b/src/interior.jl
@@ -0,0 +1,408 @@
+############
+##
+##  Barriers
+##
+############
+# Note: barrier_g! and barrier_h! simply add to an existing g or H
+# (they do not initialize with zeros)
+
+## Box constraints
+function barrier_f{T}(x::Array{T}, bounds::ConstraintsBox)
+    n = length(x)
+    phi = zero(T)
+    l = bounds.lower
+    u = bounds.upper
+    for i = 1:n
+        li, ui = l[i], u[i]
+        xi = x[i]
+        if isfinite(li)
+            xi >= li || return convert(T,Inf)
+            phi += log(xi-li)
+        end
+        if isfinite(ui)
+            xi <= ui || return convert(T,Inf)
+            phi += log(ui-xi)
+        end
+    end
+    -phi::T
+end
+
+function barrier_g!{T}(x::Array{T}, g::Array{T}, bounds::ConstraintsBox)
+    n = length(x)
+    l = bounds.lower
+    u = bounds.upper
+    for i = 1:n
+        li, ui = l[i], u[i]
+        xi = x[i]
+        g[i] -= 1/(xi-li)
+        g[i] += 1/(ui-xi)
+    end
+end
+
+function barrier_fg!{T}(x::Array{T}, g::Array{T}, bounds::ConstraintsBox)
+    n = length(x)
+    phi = zero(T)
+    l = bounds.lower
+    u = bounds.upper
+    for i = 1:n
+        li, ui = l[i], u[i]
+        xi = x[i]
+        if isfinite(li)
+            xi >= li || return convert(T,Inf)  # no need to compute additional gradient terms
+            phi += log(xi-li)
+            g[i] -= 1/(xi-li)
+        end
+        if isfinite(ui)
+            xi <= ui || return convert(T,Inf)
+            phi += log(ui-xi)
+            g[i] += 1/(ui-xi)
+        end
+    end
+    -phi::T
+end
+
+function barrier_h!{T}(x::Vector{T}, H::Matrix{T}, bounds::ConstraintsBox)
+    n = length(x)
+    l = bounds.lower
+    u = bounds.upper
+    for i = 1:n
+        li, ui = l[i], u[i]
+        xi = x[i]
+        dx = xi-li
+        H[i,i] += 1/(dx*dx)
+        dx = ui-xi
+        H[i,i] += 1/(dx*dx)
+    end
+end
+
+## Linear constraints
+function barrier_fg!{T}(x::Array{T}, g::Array{T}, constraints::ConstraintsL)
+    m, n = size(constraints.A)
+    y1 = constraints.scratch1
+    A_mul_B!(y1, constraints.A, x)
+    phi = zero(T)
+    l = constraints.lower
+    u = constraints.upper
+    lenx = length(x)
+    lenc = length(y1)
+    for i = 1:lenc
+        li, ui = l[i], u[i]
+        yi = y1[i]
+        if isfinite(li)
+            yi >= li || return convert(T,Inf)  # no need to compute additional gradient terms
+            phi += log(yi-li)
+        end
+        if isfinite(ui)
+            yi <= ui || return convert(T,Inf)
+            phi += log(ui-yi)
+        end
+    end
+    if isempty(g)
+        return -phi
+    end
+    y2 = constraints.scratch2
+    y3 = constraints.scratch3
+    for i = 1:lenc
+        y2[i] = 1/(y1[i] - l[i])
+    end
+    At_mul_B!(y3, constraints.A, y2)
+    for i = 1:lenx
+        g[i] -= y3[i]
+    end
+    for i = 1:lenc
+        y2[i] = 1/(u[i] - y1[i])
+    end
+    At_mul_B!(y3, constraints.A, y2)
+    for i = 1:lenx
+        g[i] += y3[i]
+    end
+    -phi::T
+end
+
+barrier_f{T}(x::Array{T}, constraints::ConstraintsL) = barrier_fg!(x, T[], constraints)
+
+
+## Nonlinear inequalities
+
+
+#########################
+##
+## Combined cost function
+##
+#########################
+combined_f(x, objective_f::Function, constraints::AbstractConstraints, t) =
+    t*objective_f(x) + barrier_f(x, constraints)
+
+function combined_g!(x, g, objective_g!::Function, constraints::AbstractConstraints, t)
+    objective_g!(x, g)
+    scale!(g, t)
+    barrier_fg!(x, g, constraints)
+end
+
+function combined_fg!(x, g, objective_fg!::Function, constraints::AbstractConstraints, t)
+    f_x = t*objective_fg!(x, g)
+    scale!(g, t)
+    f_x += barrier_fg!(x, g, constraints)
+    f_x
+end
+
+function combined_h!(x, H, objective_h!::Function, constraints::AbstractConstraints, t)
+    objective_h!(x, H)
+    scale!(H, t)
+    barrier_h!(x, H, constraints)
+end
+
+# combined_f(x, objective::Union{DifferentiableFunction,TwiceDifferentiableFunction}, constraints::AbstractConstraints, t) =
+#     combined_f(x, objective.f, constraints, t)
+#
+# combined_g!(x, g, objective::Union{DifferentiableFunction,TwiceDifferentiableFunction}, constraints::AbstractConstraints, t) =
+#     combined_g!(x, g, objective.g, constraints, t)
+#
+# combined_fg!(x, g, objective::Union{DifferentiableFunction,TwiceDifferentiableFunction}, constraints::AbstractConstraints, t) =
+#     combined_fg!(x, g, objective.fg!, constraints, t)
+#
+# combined_h!(x, H, objective::Union{DifferentiableFunction,TwiceDifferentiableFunction}, constraints::AbstractConstraints, t) =
+#     combined_h!(x, H, objective.h!, constraints, t)
+
+
+#########################################
+##
+## Cost function for linear least squares
+##
+#########################################
+
+linlsq(A::Matrix, b::Vector) =
+    TwiceDifferentiableFunction( x    -> linlsq_fg!(x, Array(eltype(x),0), A, b, similar(b)),
+                                (x,g) -> linlsq_fg!(x, g, A, b, similar(b)),
+                                (x,g) -> linlsq_fg!(x, g, A, b, similar(b)),
+                                (x,H) -> linlsq_h!(x, H, A, b))
+
+dummy_f(x)    = (Base.show_backtrace(STDOUT, backtrace()); error("No f"))
+dummy_g!(x,g) = (Base.show_backtrace(STDOUT, backtrace()); error("No g!"))
+
+function linlsq_fg!{T}(x::AbstractArray{T}, g, A, b, scratch)
+    A_mul_B!(scratch, A, x)
+    f_x = zero(typeof(one(T)*one(T)+one(T)*one(T)))
+    for i = 1:length(g)
+        tmp = scratch[i]
+        tmp -= b[i]
+        f_x += tmp*tmp
+        scratch[i] = tmp
+    end
+    if !isempty(g)
+        At_mul_B!(g, A, scratch)
+    end
+    f_x/2
+end
+
+linlsq_h!(x, H, A, b) = At_mul_B!(H, A, A)
+
+
+#########################
+##
+## Interior point methods
+##
+#########################
+# Follows the notation of
+# H. Hindi (2006), "A Tutorial on Convex Optimization II: Duality and
+#     Interior Point Methods," Proc. 2006 American Control Conference.
+# particularly his "Algorithm: Barrier method."
+
+function interior_newton{T}(objective::TwiceDifferentiableFunction,
+                            initial_x::Vector{T},
+                            constraints::AbstractConstraints;
+                            t = one(T), mu = 10, eps_gap = convert(T,NaN),
+                            xtol::Real = convert(T,1e-32),
+                            ftol::Real = convert(T,1e-8),
+                            grtol::Real = convert(T,1e-8),
+                            iterations::Integer = 1_000,
+                            store_trace::Bool = false,
+                            show_trace::Bool = false,
+                            extended_trace::Bool = false)
+
+    if !feasible(initial_x, constraints)
+        error("Initial guess must be feasible")
+    end
+    x = copy(initial_x)
+    xtmp = similar(x)
+    x_previous = similar(x)
+    m = length(x)
+    gr = similar(x)
+    H = Array(T, m, m)
+    lsr = LineSearchResults(T)
+
+    tr = OptimizationTrace()
+    tracing = store_trace || show_trace || extended_trace
+
+    df = DifferentiableFunction(dummy_f,
+                                dummy_g!,
+                                (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t))
+    iteration, iter_t, f_calls, g_calls = 0, 0, 0, 0
+    f_x = f_x_previous = convert(T,Inf)
+    x_converged = f_converged = gr_converged = converged = false
+#    while m/t > eps_gap && iteration < iterations
+    while iteration < iterations
+        f_x_previous = f_x
+        f_x = combined_fg!(x, gr, objective.fg!, constraints, t)
+        combined_h!(x, H, objective.h!, constraints, t)
+        F, Hd = ldltfact!(Positive, H)
+        s = F\(-gr)
+        sgr = vecdot(s, gr)
+        if !(sgr <= 0) || isinf(sgr)
+            # Presumably due to numeric instability
+            converged = x_converged = f_converged = gr_converged = false
+            break
+        end
+        clear!(lsr)
+        push!(lsr, zero(T), f_x, sgr)
+        f_x /= t
+        @newtontrace
+        alphamax = toedge(x, s, constraints)
+        alpha = alphamax < 1 ? 0.9*alphamax : 1.0
+        alpha, _f_calls, _g_calls =
+            hz_linesearch!(df, x, s, xtmp, gr, lsr, alpha, true, constraints, alphamax)
+        copy!(x_previous, x)
+        step!(x, x, s, alpha, constraints)
+        iteration += 1
+        iter_t += 1
+        f_calls += _f_calls
+        g_calls += _g_calls
+        x_converged,
+        f_converged,
+        gr_converged,
+        converged = assess_convergence(x,
+                                       x_previous,
+                                       f_x,
+                                       f_x_previous,
+                                       gr,
+                                       xtol,
+                                       ftol,
+                                       t*grtol)
+        # It's time to change the barrier penalty if we're
+        # sufficiently "in the basin." That means (1) the Hessian is
+        # positive (semi)definite (all(Hd .> 0)) and that the Hessian
+        # step is accepted (alpha is approximately 1).  The latter
+        # suggests that the Hessian is a reliable predictor of the
+        # function shape, and that within a few iterations we'd be at
+        # the minimum.
+        if (alpha >= 0.9 && all(Hd .>= 0)) || converged
+            if iter_t == 1 && converged
+                # if changing t didn't change the solution, quit now
+                break
+            end
+            t *= mu
+            iter_t = 0
+            df = DifferentiableFunction(dummy_f,
+                                        dummy_g!,
+                                        (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t))
+        end
+    end
+    MultivariateOptimizationResults("Interior/Newton",
+                                    initial_x,
+                                    x,
+                                    @compat(Float64(f_x)),
+                                    iteration,
+                                    iteration == iterations,
+                                    x_converged,
+                                    xtol,
+                                    f_converged,
+                                    ftol,
+                                    gr_converged,
+                                    grtol,
+                                    tr,
+                                    f_calls,
+                                    g_calls)
+end
+
+
+function interior{T}(objective::Union{DifferentiableFunction, TwiceDifferentiableFunction},
+                     initial_x::Array{T},
+                     constraints::AbstractConstraints;
+                     method = :cg, t = convert(T,NaN), mu = 10, eps_gap = convert(T,NaN),
+                     xtol::Real = 1e-32,
+                     ftol::Real = 1e-8,
+                     grtol::Real = 1e-8,
+                     iterations::Integer = 1_000,
+                     store_trace::Bool = false,
+                     show_trace::Bool = false,
+                     extended_trace::Bool = false)
+    if isnan(t)
+        vo, vc, gogo, gogc = gnorm(objective, constraints, initial_x)
+        if isfinite(vo) && isfinite(vc) && sqrt(gogo) < grtol
+            return MultivariateOptimizationResults("Interior",
+                                    initial_x,
+                                    initial_x,
+                                    @compat(Float64(vo)),
+                                    0,
+                                    false,
+                                    false,
+                                    xtol,
+                                    false,
+                                    ftol,
+                                    true,
+                                    grtol,
+                                    OptimizationTrace(),
+                                    0,
+                                    0)
+        end
+        if gogc == 0
+            t = one(T)  # FIXME: bounds constraints, starting at exact center. This is probably not the right guess.
+        else
+            t = convert(T, 10*abs(gogc)/gogo)
+        end
+    end
+    if isnan(eps_gap)
+        eps_gap = (cbrt(eps(T)))^2/t
+    end
+    if method == :newton
+        return interior_newton(objective, initial_x, constraints; t=t, mu=mu, eps_gap=eps_gap, xtol=xtol, ftol=ftol, grtol=grtol, iterations=iterations, store_trace=store_trace, show_trace=show_trace, extended_trace=extended_trace)
+    end
+    if !feasible(initial_x, constraints)
+        error("Initial guess must be feasible")
+    end
+    x = copy(initial_x)
+    m = length(x)
+    local results
+    iteration, f_calls, g_calls = 0, 0, 0
+    while m/t > eps_gap || iteration == 0
+        df = DifferentiableFunction( x    ->   combined_f(x, objective.f, constraints, t),
+                                    (x,g) ->  combined_g!(x, g, objective.g!, constraints, t),
+                                    (x,g) -> combined_fg!(x, g, objective.fg!, constraints, t))
+        results = optimize(df, x, method=method, constraints=constraints, interior=true, xtol=xtol, ftol=ftol, grtol=grtol, iterations=iterations, store_trace=store_trace, show_trace=show_trace, extended_trace=extended_trace)
+        copy!(x, results.minimum)
+        iteration += results.iterations
+        f_calls += results.f_calls
+        g_calls += results.g_calls
+        results.f_minimum /= t
+        t *= mu
+    end
+    results.iterations, results.f_calls, results.g_calls = iteration, f_calls, g_calls
+    copy!(results.initial_x, initial_x)
+    results
+end
+
+"""
+`vo, vc, gogo, gogc = gnorm(objective, constraints, initial_x)`
+computes the values `vo`, `vc` of the `objective` and `constraints` at
+position `initial_x`, as well as their gradients. `gogo` is the dot
+product of the objective gradient with itself; `gogc` is the dot
+product of the objective gradient and the constraint gradient.
+
+The latter two can be used to initialize the tradeoff between
+objective and constraints so that the total gradient `t*go + gc` is
+still a descent direction for the objective. This ensures that the
+barrier penalty will not push the solution out of the starting basin.
+"""
+function gnorm(objective, constraints, initial_x)
+    go = similar(initial_x)
+    gc = similar(initial_x)
+    fill!(gc, 0)
+    vo = objective.fg!(initial_x, go)
+    vc = barrier_fg!(initial_x, gc, constraints)
+    gom = gcm = zero(eltype(go))
+    for i = 1:length(go)
+        gom += abs(go[i])
+        gcm += abs(gc[i])
+    end
+    vo, vc, gom, gcm
+end
diff --git a/src/l_bfgs.jl b/src/l_bfgs.jl
index 941d52b58..f3c5dade6 100644
--- a/src/l_bfgs.jl
+++ b/src/l_bfgs.jl
@@ -80,9 +80,11 @@ macro lbfgstrace()
     end
 end
 
-function l_bfgs{T}(d::Union(DifferentiableFunction,
-                            TwiceDifferentiableFunction),
+function l_bfgs{T}(d::Union{DifferentiableFunction,
+                            TwiceDifferentiableFunction},
                    initial_x::Vector{T};
+                   constraints::AbstractConstraints = ConstraintsNone(),
+                   interior::Bool = false,
                    m::Integer = 10,
                    xtol::Real = 1e-32,
                    ftol::Real = 1e-8,
@@ -148,6 +150,7 @@ function l_bfgs{T}(d::Union(DifferentiableFunction,
 
     # Iterate until convergence
     converged = false
+    converged_iteration = typemin(Int)
     while !converged && iteration < iterations
         # Increment the number of steps we've had to perform
         iteration += 1
@@ -157,13 +160,34 @@ function l_bfgs{T}(d::Union(DifferentiableFunction,
                  twoloop_alpha, twoloop_q)
 
         # Refresh the line search cache
-        dphi0 = dot(gr, s)
+        dphi0 = vecdot(gr, s)
+        if dphi0 >= 0
+            # The search direction is not a descent direction. Something
+            # is wrong, so restart the search-direction algorithm.
+            # See also the "restart" below.
+            iteration = 1
+            for i = 1:n
+                @inbounds s[i] = -gr[i]
+            end
+            dphi0 = vecdot(gr, s)
+        end
+        dphi0 == 0 && break   # we're at a stationary point
         clear!(lsr)
         push!(lsr, zero(T), f_x, dphi0)
 
+        alphamax = interior ? toedge(x, s, constraints) : convert(T,Inf)
+
+        # Pick the initial step size (HZ #I1-I2). Even though we might
+        # guess alpha=1 for l_bfgs most of the time, testing suggests
+        # this is still usually a good idea (fewer total function and
+        # gradient evaluations).
+        alpha, mayterminate, f_update, g_update =
+            alphatry(alpha, d, x, s, x_ls, gr_ls, lsr, constraints, alphamax)
+        f_calls, g_calls = f_calls + f_update, g_calls + g_update
+
         # Determine the distance of movement along the search line
         alpha, f_update, g_update =
-          linesearch!(d, x, s, x_ls, gr_ls, lsr, alpha, mayterminate)
+          linesearch!(d, x, s, x_ls, gr_ls, lsr, alpha, mayterminate, constraints, alphamax)
         f_calls, g_calls = f_calls + f_update, g_calls + g_update
 
         # Maintain a record of previous position
@@ -188,7 +212,7 @@ function l_bfgs{T}(d::Union(DifferentiableFunction,
         end
 
         # Update the L-BFGS history of positions and gradients
-        rho_iteration = 1 / dot(dx, dgr)
+        rho_iteration = 1 / vecdot(dx, dgr)
         if isinf(rho_iteration)
             # TODO: Introduce a formal error? There was a warning here previously
             break
@@ -210,12 +234,32 @@ function l_bfgs{T}(d::Union(DifferentiableFunction,
                                        grtol)
 
         @lbfgstrace
+
+        # If we think we've converged, restart and make sure we don't
+        # have another long descent. This should catch the case where
+        # dphi < 0 "by a hair," meaning that the chosen search direction
+        # happened to be nearly orthogonoal to the gradient.
+        if converged
+            if iteration <= 3 || converged_iteration >= 0
+                if iteration <= max(m, 3)
+                    break  # we converged quickly after previous restart
+                end
+                converged_iteration += iteration
+            else
+                converged_iteration = iteration
+            end
+            converged = false
+            iteration = 0  # the next search direction will be -gr
+        end
     end
 
+    if converged_iteration >= 0
+        iteration += converged_iteration
+    end
     return MultivariateOptimizationResults("L-BFGS",
                                            initial_x,
                                            x,
-                                           float64(f_x),
+                                           @compat(Float64(f_x)),
                                            iteration,
                                            iteration == iterations,
                                            x_converged,
diff --git a/src/levenberg_marquardt.jl b/src/levenberg_marquardt.jl
index 178b585ee..69e001811 100644
--- a/src/levenberg_marquardt.jl
+++ b/src/levenberg_marquardt.jl
@@ -38,7 +38,7 @@ function levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12
 	# Maintain a trace of the system.
 	tr = OptimizationTrace()
 	if show_trace
-		d = {"lambda" => lambda}
+		d = @compat Dict{Any,Any}("lambda" => lambda)
 		os = OptimizationState(iterCt, sse(fcur), NaN, d)
 		push!(tr, os)
 		println(os)
@@ -87,7 +87,7 @@ function levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12
 		# show state
 		if show_trace
 			gradnorm = norm(J'*fcur, Inf)
-			d = {"g(x)" => gradnorm, "dx" => delta_x, "lambda" => lambda}
+			d = @compat Dict{Any,Any}("g(x)" => gradnorm, "dx" => delta_x, "lambda" => lambda)
 			os = OptimizationState(iterCt, sse(fcur), gradnorm, d)
 			push!(tr, os)
 			println(os)
diff --git a/src/linesearch/backtracking_linesearch.jl b/src/linesearch/backtracking_linesearch.jl
index c8e498601..ed4810a60 100644
--- a/src/linesearch/backtracking_linesearch.jl
+++ b/src/linesearch/backtracking_linesearch.jl
@@ -1,5 +1,5 @@
-function backtracking_linesearch!{T}(d::Union(DifferentiableFunction,
-                                              TwiceDifferentiableFunction),
+function backtracking_linesearch!{T}(d::Union{DifferentiableFunction,
+                                              TwiceDifferentiableFunction},
                                      x::Vector{T},
                                      s::Vector,
                                      x_scratch::Vector,
diff --git a/src/linesearch/hz_linesearch.jl b/src/linesearch/hz_linesearch.jl
index 815a1112a..7282780e8 100644
--- a/src/linesearch/hz_linesearch.jl
+++ b/src/linesearch/hz_linesearch.jl
@@ -1,7 +1,7 @@
 # phi = (galpha, alpha) -> linefunc(galpha, alpha, func, x, d, xtmp, g)
 
 # Dot product of two "vectors", even if they don't have a vector shape
-function dot(x::Array, y::Array)
+function vecdot(x::Array, y::Array)
     d = x[1] * y[1]
     for i = 2:length(x)
         d += x[i] * y[i]
@@ -10,11 +10,11 @@ function dot(x::Array, y::Array)
 end
 
 # Vector-norm-squared, even if it doesn't have a vector shape
-norm2(x::Array) = dot(x, x)
+vecnorm2(x::Array) = vecdot(x, x)
 
 # Display flags are represented as a bitfield
 # (not exported, but can use via OptimizeMod.ITER, for example)
-const one64 = convert(Uint64, 1)
+const one64 = convert(UInt64, 1)
 const FINAL       = one64
 const ITER        = one64 << 1
 const PARAMETERS  = one64 << 2
@@ -35,23 +35,17 @@ display_nextbit = 14
 # paper (these may or may not be extensions of the cg_descent code
 # that can be downloaded from Hager's site; his code has undergone
 # numerous revisions since publication of the paper):
-#   cgdescent: the termination condition employs a "unit-correct"
-#     expression rather than a condition on gradient
-#     components---whether this is a good or bad idea will require
-#     additional experience, but preliminary evidence seems to suggest
-#     that it makes "reasonable" choices over a wider range of problem
-#     types.
-#   linesearch: the Wolfe conditions are checked only after alpha is
+#   - The Wolfe conditions are checked only after alpha is
 #     generated either by quadratic interpolation or secant
 #     interpolation, not when alpha is generated by bisection or
 #     expansion. This increases the likelihood that alpha will be a
 #     good approximation of the minimum.
-#   linesearch: In step I2, we multiply by psi2 only if the convexity
+#   - In step I2, we multiply by psi2 only if the convexity
 #     test failed, not if the function-value test failed. This
 #     prevents one from going uphill further when you already know
 #     you're already higher than the point at alpha=0.
-#   both: checks for Inf/NaN function values
-#   both: support maximum value of alpha (equivalently, c). This
+#   - Checks for Inf/NaN function values
+#   - Support maximum value of alpha (equivalently, c). This
 #     facilitates using these routines for constrained minimization
 #     when you can calculate the distance along the path to the
 #     disallowed region. (When you can't easily calculate that
@@ -62,9 +56,10 @@ display_nextbit = 14
 #     which a finite value will be returned.  See, e.g., limits_box
 #     below.  The default value for alphamax is Inf. See alphamaxfunc
 #     for cgdescent and alphamax for linesearch_hz.
+#   - Support for projected-gradient methods
 
 const DEFAULTDELTA = 0.1
-const DEFAULTSIGMA = 0.9 
+const DEFAULTSIGMA = 0.9
 
 # Generate initial guess for step size (HZ, stage I0)
 function alphainit{T}(alpha::Real,
@@ -90,19 +85,20 @@ end
 # Code used to use phi, a 1-parameter function induced by f and s
 # Need to pass s as an explicit parameter
 function alphatry{T}(alpha::T,
-                     d::Union(DifferentiableFunction,
-                              TwiceDifferentiableFunction),
+                     d::Union{DifferentiableFunction,
+                              TwiceDifferentiableFunction},
                      x::Array,
                      s::Array,
                      xtmp::Array,
                      gtmp::Array,
                      lsr::LineSearchResults,
+                     constraints::AbstractConstraints = ConstraintsNone(),
+                     alphamax::Real = convert(T,Inf),
                      psi1::Real = convert(T,0.2),
                      psi2::Real = convert(T,2),
                      psi3::Real = convert(T,0.1),
-                     iterfinitemax::Integer = iceil(-log2(eps(T))),
-                     alphamax::Real = inf(T),
-                     display::Integer = 0)
+                     iterfinitemax::Integer = ceil(Int,-log2(eps(T))),
+                     detailed_trace::Integer = 0)
     f_calls = 0
     g_calls = 0
 
@@ -110,17 +106,16 @@ function alphatry{T}(alpha::T,
     dphi0 = lsr.slope[1]
 
     alphatest = psi1 * alpha
+    alphamax = min(alphamax, tocorner(x, s, constraints))
     alphatest = min(alphatest, alphamax)
 
-    # Use xtmp here
-    phitest = d.f(x + alphatest * s)
+    phitest = d.f(step!(xtmp, x, s, alphatest, constraints))
     f_calls += 1
 
     iterfinite = 1
     while !isfinite(phitest)
         alphatest = psi3 * alphatest
-        # Use xtmp here
-        phitest = d.f(x + alphatest * s)
+        phitest = d.f(step!(xtmp, x, s, alphatest, constraints))
         f_calls += 1
         lsr.nfailures += 1
         iterfinite += 1
@@ -130,7 +125,7 @@ function alphatry{T}(alpha::T,
         end
     end
     a = ((phitest-phi0)/alphatest - dphi0)/alphatest  # quadratic fit
-    if display & ALPHAGUESS > 0
+    if detailed_trace & ALPHAGUESS > 0
         println("quadfit: alphatest = ", alphatest,
                 ", phi0 = ", phi0,
                 ", phitest = ", phitest,
@@ -148,7 +143,7 @@ function alphatry{T}(alpha::T,
             alpha = alphamax
             mayterminate = false
         end
-        if display & ALPHAGUESS > 0
+        if detailed_trace & ALPHAGUESS > 0
             println("alpha guess (quadratic): ", alpha,
                     ",(mayterminate = ", mayterminate, ")")
         end
@@ -160,14 +155,14 @@ function alphatry{T}(alpha::T,
         end
     end
     alpha = min(alphamax, alpha)
-    if display & ALPHAGUESS > 0
+    if detailed_trace & ALPHAGUESS > 0
         println("alpha guess (expand): ", alpha)
     end
     return alpha, mayterminate, f_calls, g_calls
 end
 
-function hz_linesearch!{T}(df::Union(DifferentiableFunction,
-                                     TwiceDifferentiableFunction),
+function hz_linesearch!{T}(df::Union{DifferentiableFunction,
+                                     TwiceDifferentiableFunction},
                            x::Array{T},
                            s::Array,
                            xtmp::Array,
@@ -175,29 +170,32 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
                            lsr::LineSearchResults{T},
                            c::Real,
                            mayterminate::Bool,
+                           constraints::AbstractConstraints = ConstraintsNone(),
+                           alphamax::Real = convert(T,Inf),
                            delta::Real = DEFAULTDELTA,
                            sigma::Real = DEFAULTSIGMA,
-                           alphamax::Real = inf(T),
                            rho::Real = convert(T,5),
                            epsilon::Real = convert(T,1e-6),
                            gamma::Real = convert(T,0.66),
-                           linesearchmax::Integer = 50,
+                           linesearchmax::Integer = 100,
                            psi3::Real = convert(T,0.1),
-                           iterfinitemax::Integer = iceil(-log2(eps(T))),
-                           display::Integer = 0)
-    if display & LINESEARCH > 0
+                           iterfinitemax::Integer = (@compat ceil(Integer, -log2(eps(T)))),
+                           detailed_trace::Integer = 0)
+    if detailed_trace & LINESEARCH > 0
         println("New linesearch")
     end
     f_calls = 0
     g_calls = 0
 
+    alphamax = min(alphamax, tocorner(x, s, constraints))
+
     phi0 = lsr.value[1]
     dphi0 = lsr.slope[1]
     (isfinite(phi0) && isfinite(dphi0)) || error("Initial value and slope must be finite")
     philim = phi0 + epsilon * abs(phi0)
     @assert c > 0
     @assert isfinite(c) && c <= alphamax
-    phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true)
+    phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints)
     f_calls += f_up
     g_calls += g_up
     iterfinite = 1
@@ -206,7 +204,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
         lsr.nfailures += 1
         iterfinite += 1
         c *= psi3
-        phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true)
+        phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints)
         f_calls += f_up
         g_calls += g_up
     end
@@ -219,7 +217,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
     # satisfies the Wolfe conditions
     if mayterminate &&
           satisfies_wolfe(c, phic, dphic, phi0, dphi0, philim, delta, sigma)
-        if display & LINESEARCH > 0
+        if detailed_trace & LINESEARCH > 0
             println("Wolfe condition satisfied on point alpha = ", c)
         end
         return c, f_calls, g_calls # phic
@@ -232,7 +230,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
     iter = 1
     cold = -one(T)
     while !isbracketed && iter < linesearchmax
-        if display & BRACKET > 0
+        if detailed_trace & BRACKET > 0
             println("bracketing: ia = ", ia,
                     ", ib = ", ib,
                     ", c = ", c,
@@ -259,7 +257,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
                 error("c = ", c, ", lsr = ", lsr)
             end
             # ia, ib = bisect(phi, lsr, ia, ib, philim) # TODO: Pass options
-            ia, ib, f_up, g_up = bisect!(df, x, s, xtmp, g, lsr, ia, ib, philim, display)
+            ia, ib, f_up, g_up = bisect!(df, x, s, xtmp, g, lsr, ia, ib, philim, constraints, detailed_trace)
             f_calls += f_up
             g_calls += g_up
             isbracketed = true
@@ -269,7 +267,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
             c *= rho
             if c > alphamax
                 c = (alphamax + cold)/2
-                if display & BRACKET > 0
+                if detailed_trace & BRACKET > 0
                     println("bracket: exceeding alphamax, bisecting: alphamax = ", alphamax,
                             ", cold = ", cold, ", new c = ", c)
                 end
@@ -277,7 +275,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
                     return cold, f_calls, g_calls
                 end
             end
-            phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true)
+            phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints)
             f_calls += f_up
             g_calls += g_up
             iterfinite = 1
@@ -285,11 +283,11 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
                 alphamax = c
                 lsr.nfailures += 1
                 iterfinite += 1
-                if display & BRACKET > 0
+                if detailed_trace & BRACKET > 0
                     println("bracket: non-finite value, bisection")
                 end
                 c = (cold + c) / 2
-                phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true)
+                phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints)
                 f_calls += f_up
                 g_calls += g_up
             end
@@ -317,20 +315,21 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
     end
     while iter < linesearchmax
         a = lsr.alpha[ia]
-        b = lsr.alpha[ib]        
+        b = lsr.alpha[ib]
         @assert b > a
-        if display & LINESEARCH > 0
+        if detailed_trace & LINESEARCH > 0
             println("linesearch: ia = ", ia,
                     ", ib = ", ib,
                     ", a = ", a,
                     ", b = ", b,
                     ", phi(a) = ", lsr.value[ia],
                     ", phi(b) = ", lsr.value[ib])
+            println("phi(a) == phi(b) ? ", lsr.value[ia] == lsr.value[ib])
         end
         if b - a <= eps(b)
             return a, f_calls, g_calls # lsr.value[ia]
         end
-        iswolfe, iA, iB, f_up, g_up = secant2!(df, x, s, xtmp, g, lsr, ia, ib, philim, delta, sigma, display)
+        iswolfe, iA, iB, f_up, g_up = secant2!(df, x, s, xtmp, g, lsr, ia, ib, philim, constraints, delta, sigma, detailed_trace)
         f_calls += f_up
         g_calls += g_up
         if iswolfe
@@ -340,12 +339,12 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
         B = lsr.alpha[iB]
         @assert B > A
         if B - A < gamma * (b - a)
-            if display & LINESEARCH > 0
+            if detailed_trace & LINESEARCH > 0
                 println("Linesearch: secant succeeded")
             end
             if nextfloat(lsr.value[ia]) >= lsr.value[ib] && nextfloat(lsr.value[iA]) >= lsr.value[iB]
                 # It's so flat, secant didn't do anything useful, time to quit
-                if display & LINESEARCH > 0
+                if detailed_trace & LINESEARCH > 0
                     println("Linesearch: secant suggests it's flat")
                 end
                 return A, f_calls, g_calls
@@ -354,23 +353,24 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction,
             ib = iB
         else
             # Secant is converging too slowly, use bisection
-            if display & LINESEARCH > 0
+            if detailed_trace & LINESEARCH > 0
                 println("Linesearch: secant failed, using bisection")
             end
             c = (A + B) / convert(T, 2)
             # phic = phi(gphi, c) # TODO: Replace
-            phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true)
+            phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints)
             f_calls += f_up
             g_calls += g_up
             @assert isfinite(phic) && isfinite(dphic)
             push!(lsr, c, phic, dphic)
             # ia, ib = update(phi, lsr, iA, iB, length(lsr), philim) # TODO: Pass options
-            ia, ib, f_up, g_up = update!(df, x, s, xtmp, g, lsr, iA, iB, length(lsr), philim, display)
+            ia, ib, f_up, g_up = update!(df, x, s, xtmp, g, lsr, iA, iB, length(lsr), philim, constraints, detailed_trace)
             f_calls += f_up
             g_calls += g_up
         end
         iter += 1
     end
+    @show lsr
     error("Linesearch failed to converge")
 end
 
@@ -398,8 +398,8 @@ function secant(lsr::LineSearchResults, ia::Integer, ib::Integer)
     return secant(lsr.alpha[ia], lsr.alpha[ib], lsr.slope[ia], lsr.slope[ib])
 end
 # phi
-function secant2!{T}(df::Union(DifferentiableFunction,
-                              TwiceDifferentiableFunction),
+function secant2!{T}(df::Union{DifferentiableFunction,
+                              TwiceDifferentiableFunction},
                      x::Array,
                      s::Array,
                      xtmp::Array,
@@ -408,9 +408,10 @@ function secant2!{T}(df::Union(DifferentiableFunction,
                      ia::Integer,
                      ib::Integer,
                      philim::Real,
+                     constraints::AbstractConstraints = ConstraintsNone(),
                      delta::Real = DEFAULTDELTA,
                      sigma::Real = DEFAULTSIGMA,
-                     display::Integer = 0)
+                     detailed_trace::Integer = 0)
     f_calls = 0
     g_calls = 0
 
@@ -420,31 +421,30 @@ function secant2!{T}(df::Union(DifferentiableFunction,
     b = lsr.alpha[ib]
     dphia = lsr.slope[ia]
     dphib = lsr.slope[ib]
-    @assert dphia < 0
-    @assert dphib >= 0
+    @assert dphia < 0 && isfinite(dphia)
+    @assert dphib >= 0 && isfinite(dphib)
     c = secant(a, b, dphia, dphib)
-    if display & SECANT2 > 0
+    if detailed_trace & SECANT2 > 0
         println("secant2: a = ", a, ", b = ", b, ", c = ", c)
     end
     @assert isfinite(c)
     # phic = phi(tmpc, c) # Replace
-    phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true)
+    phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints)
     f_calls += f_up
     g_calls += g_up
-    @assert isfinite(phic) && isfinite(dphic)
     push!(lsr, c, phic, dphic)
     ic = length(lsr)
     if satisfies_wolfe(c, phic, dphic, phi0, dphi0, philim, delta, sigma)
-        if display & SECANT2 > 0
+        if detailed_trace & SECANT2 > 0
             println("secant2: first c satisfied Wolfe conditions")
         end
         return true, ic, ic, f_calls, g_calls
     end
     # iA, iB = update(phi, lsr, ia, ib, ic, philim)
-    iA, iB, f_up, g_up = update!(df, x, s, xtmp, g, lsr, ia, ib, ic, philim, display)
+    iA, iB, f_up, g_up = update!(df, x, s, xtmp, g, lsr, ia, ib, ic, philim, constraints, detailed_trace)
     f_calls += f_up
     g_calls += g_up
-    if display & SECANT2 > 0
+    if detailed_trace & SECANT2 > 0
         println("secant2: iA = ", iA, ", iB = ", iB, ", ic = ", ic)
     end
     a = lsr.alpha[iA]
@@ -458,11 +458,11 @@ function secant2!{T}(df::Union(DifferentiableFunction,
         c = secant(lsr, ia, iA)
     end
     if a <= c <= b
-        if display & SECANT2 > 0
+        if detailed_trace & SECANT2 > 0
             println("secant2: second c = ", c)
         end
         # phic = phi(tmpc, c) # TODO: Replace
-        phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true)
+        phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints)
         f_calls += f_up
         g_calls += g_up
         @assert isfinite(phic) && isfinite(dphic)
@@ -470,16 +470,16 @@ function secant2!{T}(df::Union(DifferentiableFunction,
         ic = length(lsr)
         # Check arguments here
         if satisfies_wolfe(c, phic, dphic, phi0, dphi0, philim, delta, sigma)
-            if display & SECANT2 > 0
+            if detailed_trace & SECANT2 > 0
                 println("secant2: second c satisfied Wolfe conditions")
             end
             return true, ic, ic, f_calls, g_calls
         end
-        iA, iB, f_up, g_up = update!(df, x, s, xtmp, g, lsr, iA, iB, ic, philim, display)
+        iA, iB, f_up, g_up = update!(df, x, s, xtmp, g, lsr, iA, iB, ic, philim, constraints, detailed_trace)
         f_calls += f_up
         g_calls += g_up
     end
-    if display & SECANT2 > 0
+    if detailed_trace & SECANT2 > 0
         println("secant2 output: a = ", lsr.alpha[iA], ", b = ", lsr.alpha[iB])
     end
     return false, iA, iB, f_calls, g_calls
@@ -489,8 +489,8 @@ end
 # Given a third point, pick the best two that retain the bracket
 # around the minimum (as defined by HZ, eq. 29)
 # b will be the upper bound, and a the lower bound
-function update!(df::Union(DifferentiableFunction,
-                           TwiceDifferentiableFunction),
+function update!(df::Union{DifferentiableFunction,
+                           TwiceDifferentiableFunction},
                  x::Array,
                  s::Array,
                  xtmp::Array,
@@ -500,7 +500,8 @@ function update!(df::Union(DifferentiableFunction,
                  ib::Integer,
                  ic::Integer,
                  philim::Real,
-                 display::Integer = 0)
+                 constraints::AbstractConstraints = ConstraintsNone(),
+                 detailed_trace::Integer = 0)
     a = lsr.alpha[ia]
     b = lsr.alpha[ib]
     # Debugging (HZ, eq. 4.4):
@@ -511,7 +512,7 @@ function update!(df::Union(DifferentiableFunction,
     c = lsr.alpha[ic]
     phic = lsr.value[ic]
     dphic = lsr.slope[ic]
-    if display & UPDATE > 0
+    if detailed_trace & UPDATE > 0
         println("update: ia = ", ia,
                 ", a = ", a,
                 ", ib = ", ib,
@@ -536,12 +537,12 @@ function update!(df::Union(DifferentiableFunction,
     end
     # phic is bigger than phi0, which implies that the minimum
     # lies between a and c. Find it via bisection.
-    return bisect!(df, x, s, xtmp, g, lsr, ia, ic, philim, display)
+    return bisect!(df, x, s, xtmp, g, lsr, ia, ic, philim, constraints, detailed_trace)
 end
 
 # HZ, stage U3 (with theta=0.5)
-function bisect!{T}(df::Union(DifferentiableFunction,
-                              TwiceDifferentiableFunction),
+function bisect!{T}(df::Union{DifferentiableFunction,
+                              TwiceDifferentiableFunction},
                     x::Array,
                     s::Array,
                     xtmp::Array,
@@ -550,27 +551,32 @@ function bisect!{T}(df::Union(DifferentiableFunction,
                     ia::Integer,
                     ib::Integer,
                     philim::Real,
-                    display::Integer = 0)
+                    constraints::AbstractConstraints = ConstraintsNone(),
+                    detailed_trace::Integer = 0)
     f_calls = 0
     g_calls = 0
-    gphi = nan(T)
+    gphi = convert(T,NaN)
     a = lsr.alpha[ia]
     b = lsr.alpha[ib]
     # Debugging (HZ, conditions shown following U3)
     @assert lsr.slope[ia] < 0
     @assert lsr.value[ia] <= philim
-    @assert lsr.slope[ib] < 0       # otherwise we wouldn't be here
+    # if !(lsr.slope[ib] < 0)
+    #     @show a, b
+    #     @show lsr.value[ia], lsr.value[ib]
+    #     @show lsr.slope[ia], lsr.slope[ib]
+    # end
+    # @assert lsr.slope[ib] < 0       # otherwise we wouldn't be here
     @assert lsr.value[ib] > philim
     @assert b > a
     while b - a > eps(b)
-        if display & BISECT > 0
+        if detailed_trace & BISECT > 0
             println("bisect: a = ", a, ", b = ", b, ", b - a = ", b - a)
         end
         d = (a + b) / convert(T, 2)
-        phid, gphi, f_up, g_up = linefunc!(df, x, s, d, xtmp, g, true)
+        phid, gphi, f_up, g_up = linefunc!(df, x, s, d, xtmp, g, true, constraints)
         f_calls += f_up
         g_calls += g_up
-        @assert isfinite(phid) && isfinite(gphi)
         push!(lsr, d, phid, gphi)
         id = length(lsr)
         if gphi >= 0
@@ -588,26 +594,25 @@ function bisect!{T}(df::Union(DifferentiableFunction,
 end
 
 # Define one-parameter function for line searches
-function linefunc!(df::Union(DifferentiableFunction,
-                            TwiceDifferentiableFunction),
-                   x::Array,
-                   s::Array,
-                   alpha::Real,
-                   xtmp::Array,
-                   g::Array,
-                   calc_grad::Bool)
+function linefunc!{T}(df::Union{DifferentiableFunction,
+                                TwiceDifferentiableFunction},
+                      x::Array{T},
+                      s::Array,
+                      alpha::Real,
+                      xtmp::Array,
+                      g::Array,
+                      calc_grad::Bool,
+                      constraints::AbstractConstraints = ConstraintsNone())
     f_calls = 0
     g_calls = 0
-    for i = 1:length(x)
-        xtmp[i] = x[i] + alpha * s[i]
-    end
-    gphi = nan(eltype(g))
+    step!(xtmp, x, s, alpha, constraints)
+    gphi = convert(eltype(g),NaN)
     if calc_grad
         val = df.fg!(xtmp, g)
         f_calls += 1
         g_calls += 1
         if isfinite(val)
-            gphi = dot(g, s)
+            gphi = vecdot(g, s)
         end
     else
         val = df.f(xtmp)
diff --git a/src/linesearch/interpolating_linesearch.jl b/src/linesearch/interpolating_linesearch.jl
index 2c3486c4e..f17e12176 100644
--- a/src/linesearch/interpolating_linesearch.jl
+++ b/src/linesearch/interpolating_linesearch.jl
@@ -1,8 +1,8 @@
 # TODO: Optimize for fg! calls
 # TODO: Implement safeguards
 
-function interpolating_linesearch!{T}(d::Union(DifferentiableFunction,
-                                               TwiceDifferentiableFunction),
+function interpolating_linesearch!{T}(d::Union{DifferentiableFunction,
+                                               TwiceDifferentiableFunction},
                                       x::Vector,
                                       p::Vector,
                                       x_new::Vector,
diff --git a/src/linesearch/mt_linesearch.jl b/src/linesearch/mt_linesearch.jl
index a8d4511fa..b4b488e01 100644
--- a/src/linesearch/mt_linesearch.jl
+++ b/src/linesearch/mt_linesearch.jl
@@ -132,8 +132,8 @@
 # TODO: Decide whether to update x, f, g and info
 #       or just return step and nfev and let existing code do its job
 
-function mt_linesearch!{T}(fcn::Union(DifferentiableFunction,
-                                      TwiceDifferentiableFunction),
+function mt_linesearch!{T}(fcn::Union{DifferentiableFunction,
+                                      TwiceDifferentiableFunction},
                          x::Vector,
                          s::Vector,
                          new_x::Vector,
diff --git a/src/momentum_gradient_descent.jl b/src/momentum_gradient_descent.jl
index 927e451ac..eca0f9944 100644
--- a/src/momentum_gradient_descent.jl
+++ b/src/momentum_gradient_descent.jl
@@ -125,7 +125,7 @@ function momentum_gradient_descent{T}(d::DifferentiableFunction,
     return MultivariateOptimizationResults("Momentum Gradient Descent",
                                            initial_x,
                                            x,
-                                           float64(f_x),
+                                           @compat(Float64(f_x)),
                                            iteration,
                                            iteration == iterations,
                                            x_converged,
diff --git a/src/nelder_mead.jl b/src/nelder_mead.jl
index f29afa619..9dcf8280f 100644
--- a/src/nelder_mead.jl
+++ b/src/nelder_mead.jl
@@ -69,6 +69,7 @@ function nelder_mead{T}(f::Function,
     for i in 1:n
         @inbounds y[i] = f(p[:, i])
     end
+    any(isfinite(y)) || error("At least one of the starting points must have finite penalty")
     f_calls += n
 
     f_x_previous, f_x = NaN, nmobjective(y, m, n)
@@ -186,7 +187,7 @@ function nelder_mead{T}(f::Function,
     return MultivariateOptimizationResults("Nelder-Mead",
                                            initial_x,
                                            minimum,
-                                           float64(f(minimum)),
+                                           @compat(Float64(f(minimum))),
                                            iteration,
                                            iteration == iterations,
                                            false,
diff --git a/src/newton.jl b/src/newton.jl
index 64cc33370..8f8245b50 100644
--- a/src/newton.jl
+++ b/src/newton.jl
@@ -130,7 +130,7 @@ function newton{T}(d::TwiceDifferentiableFunction,
     return MultivariateOptimizationResults("Newton's Method",
                                            initial_x,
                                            x,
-                                           float64(f_x),
+                                           @compat(Float64(f_x)),
                                            iteration,
                                            iteration == iterations,
                                            x_converged,
diff --git a/src/nnls.jl b/src/nnls.jl
index f34f0ffd0..239e40389 100644
--- a/src/nnls.jl
+++ b/src/nnls.jl
@@ -6,8 +6,8 @@ function nnls(A::AbstractMatrix, b::AbstractVector)
     x = fill(one(T), size(A, 2))
     # Set up constraints
     l = zeros(eltype(x), length(x))
-    u = fill(inf(eltype(x)), length(x))
-    # Perform the optimization    
+    u = fill(convert(eltype(x),Inf), length(x))
+    # Perform the optimization
     func = (x, g) -> nnlsobjective(x, g, A, b)
     df = DifferentiableFunction(x->func(x,nothing), func, func)
     fminbox(df, x, l, u, precondprep=(P, x, l, u, mu)->precondprepnnls(P, x, mu, a))
diff --git a/src/optimize.jl b/src/optimize.jl
index 4ea952398..d2a3589d3 100644
--- a/src/optimize.jl
+++ b/src/optimize.jl
@@ -1,6 +1,8 @@
 function optimize(d::TwiceDifferentiableFunction,
                   initial_x::Array;
                   method::Symbol = :l_bfgs,
+                  constraints::AbstractConstraints = ConstraintsNone(),
+                  interior::Bool = false,
                   xtol::Real = 1e-32,
                   ftol::Real = 1e-8,
                   grtol::Real = 1e-8,
@@ -41,6 +43,8 @@ function optimize(d::TwiceDifferentiableFunction,
     elseif method == :cg
         cg(d,
            initial_x,
+           constraints = constraints,
+           interior = interior,
            xtol = xtol,
            ftol = ftol,
            grtol = grtol,
@@ -52,7 +56,7 @@ function optimize(d::TwiceDifferentiableFunction,
     elseif method == :bfgs
         if bfgs_initial_invH == nothing
             bfgs_initial_invH = eye(length(initial_x))
-        end        
+        end
         bfgs(d,
              initial_x,
              xtol = xtol,
@@ -67,6 +71,8 @@ function optimize(d::TwiceDifferentiableFunction,
     elseif method == :l_bfgs
         l_bfgs(d,
                initial_x,
+               constraints = constraints,
+               interior = interior,
                xtol = xtol,
                ftol = ftol,
                grtol = grtol,
@@ -94,6 +100,8 @@ end
 function optimize(d::DifferentiableFunction,
                   initial_x::Array;
                   method::Symbol = :l_bfgs,
+                  constraints::AbstractConstraints = ConstraintsNone(),
+                  interior::Bool = false,
                   xtol::Real = 1e-32,
                   ftol::Real = 1e-8,
                   grtol::Real = 1e-8,
@@ -134,6 +142,8 @@ function optimize(d::DifferentiableFunction,
     elseif method == :cg
         cg(d,
            initial_x,
+           constraints = constraints,
+           interior = interior,
            xtol = xtol,
            ftol = ftol,
            grtol = grtol,
@@ -145,7 +155,7 @@ function optimize(d::DifferentiableFunction,
     elseif method == :bfgs
         if bfgs_initial_invH == nothing
             bfgs_initial_invH = eye(length(initial_x))
-        end        
+        end
         bfgs(d,
              initial_x,
              xtol = xtol,
@@ -160,6 +170,8 @@ function optimize(d::DifferentiableFunction,
     elseif method == :l_bfgs
         l_bfgs(d,
                initial_x,
+               constraints = constraints,
+               interior = interior,
                xtol = xtol,
                ftol = ftol,
                grtol = grtol,
@@ -259,7 +271,7 @@ function optimize(f::Function,
     elseif method == :bfgs
         if bfgs_initial_invH == nothing
             bfgs_initial_invH = eye(length(initial_x))
-        end        
+        end
         d = DifferentiableFunction(f, g!)
         bfgs(d,
              initial_x,
@@ -362,7 +374,7 @@ function optimize(f::Function,
     elseif method == :bfgs
         if bfgs_initial_invH == nothing
             bfgs_initial_invH = eye(length(initial_x))
-        end        
+        end
         d = DifferentiableFunction(f, g!)
         bfgs(d,
              initial_x,
@@ -469,7 +481,7 @@ function optimize(f::Function,
     elseif method == :bfgs
         if bfgs_initial_invH == nothing
             bfgs_initial_invH = eye(length(initial_x))
-        end        
+        end
         bfgs(d,
              initial_x,
              xtol = xtol,
@@ -514,7 +526,7 @@ function optimize{T <: Real}(f::Function,
         @printf "Iter     Function value   Gradient norm \n"
     end
     if method == :brent
-        brent(f, float64(lower), float64(upper);
+        brent(f, @compat(Float64(lower)), @compat(Float64(upper));
               rel_tol = rel_tol,
               abs_tol = abs_tol,
               iterations = iterations,
@@ -522,7 +534,7 @@ function optimize{T <: Real}(f::Function,
               show_trace = show_trace,
               extended_trace = extended_trace)
     elseif method == :golden_section
-        golden_section(f, float64(lower), float64(upper);
+        golden_section(f, @compat(Float64(lower)), @compat(Float64(upper));
                        rel_tol = rel_tol,
                        abs_tol = abs_tol,
                        iterations = iterations,
diff --git a/src/problems/constrained.jl b/src/problems/constrained.jl
new file mode 100644
index 000000000..7683e08ff
--- /dev/null
+++ b/src/problems/constrained.jl
@@ -0,0 +1,77 @@
+module ConstrainedProblems
+
+using Optim
+
+immutable ConstrainedProblem{C<:Optim.AbstractConstraints}
+    name::ASCIIString
+    f::Function
+    g!::Function
+    h!::Function
+    constraints::C
+    initial_x::Vector{Float64}
+    solutions::Vector
+    isdifferentiable::Bool
+    istwicedifferentiable::Bool
+end
+
+pexamples = Dict{ASCIIString, ConstrainedProblem}()
+
+#####################################
+###
+### Minimum distance, box constraints
+###
+#####################################
+
+function sqrdist(x)
+    dx1 = x[1] - 5
+    dx2 = x[2] - 3
+    (dx1*dx1 + dx2*dx2)/2
+end
+
+function sqrdist_gradient!(x, g)
+    g[1] = x[1] - 5
+    g[2] = x[2] - 3
+end
+
+function sqrdist_hessian!(x, H)
+    H[1,1] = H[2,2] = 1
+    H[1,2] = H[2,1] = 0
+end
+
+constraints = Optim.ConstraintsBox([-2.0,-2.0],[2.0,2.0])
+
+pexamples["SqrdistBox"] = ConstrainedProblem("SqrdistBox",
+                                            sqrdist,
+                                            sqrdist_gradient!,
+                                            sqrdist_hessian!,
+                                            constraints,
+                                            [0.0, 0.0],
+                                            Any[[2.0, 2.0]],
+                                            true,
+                                            true)
+
+constraints = Optim.ConstraintsBox(nothing,[2.0,2.0])
+
+pexamples["SqrdistLwr"] = ConstrainedProblem("SqrdistLwr",
+                                            sqrdist,
+                                            sqrdist_gradient!,
+                                            sqrdist_hessian!,
+                                            constraints,
+                                            [0.0, 0.0],
+                                            Any[[2.0, 2.0]],
+                                            true,
+                                            true)
+
+constraints = Optim.ConstraintsBox([-2.0,-2.0],nothing)
+
+pexamples["SqrdistLwr"] = ConstrainedProblem("SqrdistLwr",
+                                            sqrdist,
+                                            sqrdist_gradient!,
+                                            sqrdist_hessian!,
+                                            constraints,
+                                            [0.0, 0.0],
+                                            Any[[5.0, 3.0]],
+                                            true,
+                                            true)
+
+end # module
diff --git a/src/problems/unconstrained.jl b/src/problems/unconstrained.jl
index 7fbcf3c50..9d12d2ae1 100644
--- a/src/problems/unconstrained.jl
+++ b/src/problems/unconstrained.jl
@@ -40,7 +40,7 @@ examples["Exponential"] = OptimizationProblem("Exponential",
                                               exponential_gradient!,
                                               exponential_hessian!,
                                               [0.0, 0.0],
-                                              {[2.0, 3.0]},
+                                              Any[[2.0, 3.0]],
                                               true,
                                               true)
 
@@ -78,7 +78,7 @@ examples["Fletcher-Powell"] = OptimizationProblem("Fletcher-Powell",
                                                   fletcher_powell_gradient!,
                                                   fletcher_powell_hessian!,
                                                   [0.0, 0.0, 0.0],
-                                                  {[0.0, 0.0, 0.0]}, # TODO: Fix
+                                                  Any[[0.0, 0.0, 0.0]], # TODO: Fix
                                                   false,
                                                   false)
 
@@ -109,7 +109,7 @@ examples["Himmelbrau"] = OptimizationProblem("Himmelbrau",
                                              himmelbrau_gradient!,
                                              himmelbrau_hessian!,
                                              [0.0, 0.0],
-                                             {[1.0, 0.0]}, # TODO: Fix
+                                             Any[[1.0, 0.0]], # TODO: Fix
                                              true,
                                              false)
 ##########################################################################
@@ -138,7 +138,7 @@ examples["Hosaki"] = OptimizationProblem("Hosaki",
                                          hosaki_gradient!,
                                          hosaki_hessian!,
                                          [0.0, 0.0],
-                                         {[4.0, 2.0]},
+                                         Any[[4.0, 2.0]],
                                          false,
                                          false)
 
@@ -180,7 +180,7 @@ examples["Large Polynomial"] = OptimizationProblem("Large Polynomial",
                                                    large_polynomial_gradient!,
                                                    large_polynomial_hessian!,
                                                    zeros(250),
-                                                   {float([1:250])},
+                                                   Any[float([1:250;])],
                                                    true,
                                                    true)
 
@@ -220,7 +220,7 @@ examples["Parabola"] = OptimizationProblem("Parabola",
                                            parabola_gradient!,
                                            parabola_hessian!,
                                            [0.0, 0.0, 0.0, 0.0, 0.0],
-                                           {[1.0, 2.0, 3.0, 5.0, 8.0]},
+                                           Any[[1.0, 2.0, 3.0, 5.0, 8.0]],
                                            true,
                                            true)
 
@@ -257,7 +257,7 @@ examples["Polynomial"] = OptimizationProblem("Polynomial",
                                              polynomial_gradient!,
                                              polynomial_hessian!,
                                              [0.0, 0.0, 0.0],
-                                             {[10.0, 7.0, 108.0]},
+                                             Any[[10.0, 7.0, 108.0]],
                                              true,
                                              true)
 
@@ -268,7 +268,7 @@ examples["Polynomial"] = OptimizationProblem("Polynomial",
 ##########################################################################
 
 function powell(x::Vector)
-    return (x[1] + 10.0 * x[2])^2 + 5.0 * (x[3] - x[4])^2 + 
+    return (x[1] + 10.0 * x[2])^2 + 5.0 * (x[3] - x[4])^2 +
             (x[2] - 2.0 * x[3])^4 + 10.0 * (x[1] - x[4])^4
 end
 
@@ -303,7 +303,7 @@ examples["Powell"] = OptimizationProblem("Powell",
                                          powell_gradient!,
                                          powell_hessian!,
                                          [3.0, -1.0, 0.0, 1.0],
-                                         {[0.0, 0.0, 0.0, 0.0]}, # TODO: Fix
+                                         Any[[0.0, 0.0, 0.0, 0.0]], # TODO: Fix
                                          true,
                                          true)
 
@@ -334,7 +334,7 @@ examples["Rosenbrock"] = OptimizationProblem("Rosenbrock",
                                              rosenbrock_gradient!,
                                              rosenbrock_hessian!,
                                              [0.0, 0.0],
-                                             {[1.0, 1.0]},
+                                             Any[[1.0, 1.0]],
                                              true,
                                              true)
 
diff --git a/src/simulated_annealing.jl b/src/simulated_annealing.jl
index 5fbb27764..879cae802 100644
--- a/src/simulated_annealing.jl
+++ b/src/simulated_annealing.jl
@@ -103,7 +103,7 @@ function simulated_annealing{T}(cost::Function,
     return MultivariateOptimizationResults("Simulated Annealing",
                                            initial_x,
                                            best_x,
-                                           float64(best_f_x),
+                                           @compat(Float64(best_f_x)),
                                            iterations,
                                            iteration == iterations,
                                            false,
diff --git a/src/types.jl b/src/types.jl
index 2b2dedb2e..3295c07bc 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -6,11 +6,11 @@ immutable OptimizationState
 end
 
 function OptimizationState(i::Integer, f::Real)
-    OptimizationState(int(i), float64(f), NaN, Dict())
+    OptimizationState(int(i), @compat(Float64(f)), NaN, Dict())
 end
 
 function OptimizationState(i::Integer, f::Real, g::Real)
-    OptimizationState(int(i), float64(f), float64(g), Dict())
+    OptimizationState(int(i), @compat(Float64(f)), @compat(Float64(g)), Dict())
 end
 
 immutable OptimizationTrace
diff --git a/test/cg.jl b/test/cg.jl
index fa99fd27a..22742be8a 100644
--- a/test/cg.jl
+++ b/test/cg.jl
@@ -1,18 +1,30 @@
 using Optim
 
-for (name, prob) in Optim.UnconstrainedProblems.examples
-	if prob.isdifferentiable
-		if name == "Himmelbrau"
-			continue
-		end
-		df = DifferentiableFunction(prob.f, prob.g!)
-		res = Optim.cg(df, prob.initial_x)
-		if length(prob.solutions) == 1
-			@assert norm(res.minimum - prob.solutions[1]) < 1e-2
-		end
-	end
+for (name, prob) in UnconstrainedProblems.examples
+    if prob.isdifferentiable
+        if name == "Himmelbrau"
+            continue
+        end
+        df = DifferentiableFunction(prob.f, prob.g!)
+        res = Optim.cg(df, prob.initial_x)
+        if length(prob.solutions) == 1
+            @assert norm(res.minimum - prob.solutions[1]) < 1e-2
+        end
+    end
 end
 
+for (name, prob) in ConstrainedProblems.pexamples
+    if prob.isdifferentiable
+        df = DifferentiableFunction(prob.f, prob.g!)
+        res = Optim.cg(df, prob.initial_x, constraints=prob.constraints)
+        if length(prob.solutions) == 1
+            @assert norm(res.minimum - prob.solutions[1]) < 1e-2
+        end
+    end
+end
+
+
+# Functions of arrays
 let
 objective(X, B) = sum((X.-B).^2)/2
 
diff --git a/test/constrained.jl b/test/constrained.jl
index 8814afa2d..6a3b2d081 100644
--- a/test/constrained.jl
+++ b/test/constrained.jl
@@ -1,4 +1,4 @@
-import Optim
+using Optim
 using Base.Test
 
 # Quadratic objective function
@@ -55,3 +55,17 @@ A = [1.0 2.0; 3.0 4.0]
 b = [1.0,1.0]
 results = Optim.nnls(A, b)
 @test norm(results.minimum - [0,0.3]) < 1e-6
+
+# 1d newton
+let f(x) = (z = x[1]; -2z^2 + z^4),
+    g!(x, gr) = (z = x[1]; gr[1] = -4z + 4z^3; x),
+    h!(x, H) = (z = x[1]; H[1,1] = -4 + 12z^2; H),
+    objective = TwiceDifferentiableFunction(f, g!, (x,gr) -> (g!(x, gr); f(x)), h!),
+    constr = ConstraintsBox([-0.9],[0.9])
+
+    for x0 in (0.1,0.2,0.3,0.5,0.7)
+        x = [x0]
+        soln = interior(objective, x, constr, method=:newton)
+        @test_approx_eq_eps soln.minimum[1] 0.9 1e-6
+    end
+end
diff --git a/test/grid_search.jl b/test/grid_search.jl
index 5f34c4da0..ff0b4f7ad 100644
--- a/test/grid_search.jl
+++ b/test/grid_search.jl
@@ -1,4 +1,4 @@
-Optim.grid_search(x -> (1.0 - x)^2, [-1:0.1:1.0])
+Optim.grid_search(x -> (1.0 - x)^2, [-1:0.1:1.0;])
 
 # Cartesian product over 2 dimensions.
 #grid_search(x -> (1.0 - x[1])^2 + (2.0 - x[2])^2, product[-1:0.1:1.0, -1:0.1:1.0])
diff --git a/test/momentum_gradient_descent.jl b/test/momentum_gradient_descent.jl
index abe0fe7a3..e79c3bfb3 100644
--- a/test/momentum_gradient_descent.jl
+++ b/test/momentum_gradient_descent.jl
@@ -1,4 +1,4 @@
-p = Optim.UnconstrainedProblems.examples["Rosenbrock"]
+p = UnconstrainedProblems.examples["Rosenbrock"]
 
 df = DifferentiableFunction(p.f, p.g!)
 
diff --git a/test/runtests.jl b/test/runtests.jl
index 85ba62b63..d20712504 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,6 +4,10 @@
 
 using Optim
 
+for path in Optim.testpaths
+    require(path)
+end
+
 my_tests = ["bfgs.jl",
             "gradient_descent.jl",
             "momentum_gradient_descent.jl",
diff --git a/test/type_stability.jl b/test/type_stability.jl
index da72b8956..56482cb8c 100644
--- a/test/type_stability.jl
+++ b/test/type_stability.jl
@@ -30,7 +30,7 @@ d3 = TwiceDifferentiableFunction(rosenbrock,
 for method in (:nelder_mead,
                :simulated_annealing)
     optimize(rosenbrock, [0.0,0,.0], method = method)
-    optimize(rosenbrock, float32([0.0,0.0]), method = method)
+    optimize(rosenbrock, [0.0f0,0.0f0], method = method)
 end
 
 for method in (:bfgs,
@@ -40,10 +40,10 @@ for method in (:bfgs,
 #                :accelerated_gradient_descent,
                :l_bfgs)
     optimize(d2, [0.0,0,.0], method = method)
-    optimize(d2, float32([0.0,0.0]), method = method)
+    optimize(d2, [0.0f0,0.0f0], method = method)
 end
 
 for method in (:newton,)
     optimize(d3, [0.0,0.0], method = method)
-    optimize(d3, float32([0.0,0.0]), method = method)
+    optimize(d3, [0.0f0,0.0f0], method = method)
 end