JuliaNLSolvers · timholy · Mar 8, 2014 · Mar 11, 2014 · Mar 14, 2014 · Apr 2, 2014
diff --git a/REQUIRE b/REQUIRE
@@ -1,3 +1,5 @@
-julia 0.2-
+julia 0.4
 Calculus
 DualNumbers
+Compat
+PositiveFactorizations
diff --git a/src/Optim.jl b/src/Optim.jl
@@ -1,20 +1,28 @@
+isdefined(Base, :__precompile__) && __precompile__()
+
 module Optim
-    using Calculus
+    using Calculus, PositiveFactorizations
+    using Compat
 
-    import Base.dot,
-           Base.length,
+    import Base.length,
            Base.push!,
            Base.show,
            Base.getindex,
            Base.setindex!
 
     export optimize,
+           interior,
+           linlsq,
            DifferentiableFunction,
-           TwiceDifferentiableFunction
+           TwiceDifferentiableFunction,
+           ConstraintsBox
 
     # Types
     include("types.jl")
 
+    # Types for constrained optimization
+    include("constraints.jl")
+
     # Automatic differentiation utilities
     include("autodiff.jl")
 
@@ -65,11 +73,17 @@ module Optim
     include("golden_section.jl")
     include("brent.jl")
 
+    # Constrained optimization algorithms
+    include("interior.jl")
+
     # End-User Facing Wrapper Functions
     include("optimize.jl")
 
-    # Examples for testing
-    include(joinpath("problems", "unconstrained.jl"))
-
     cgdescent(args...) = error("API has changed. Please use cg.")
+
+    # Tests
+    const basedir = dirname(Base.source_path())
+    const testpaths = [joinpath(basedir, "problems", "unconstrained.jl"),
+                       joinpath(basedir, "problems", "constrained.jl")]
+
 end
diff --git a/src/accelerated_gradient_descent.jl b/src/accelerated_gradient_descent.jl
@@ -139,7 +139,7 @@ function accelerated_gradient_descent{T}(d::DifferentiableFunction,
     return MultivariateOptimizationResults("Accelerated Gradient Descent",
                                            initial_x,
                                            x,
-                                           float64(f_x),
+                                           @compat(Float64(f_x)),
                                            iteration,
                                            iteration == iterations,
                                            x_converged,

diff --git a/src/bfgs.jl b/src/bfgs.jl
@@ -23,8 +23,8 @@ macro bfgstrace()
     end
 end
 
-function bfgs{T}(d::Union(DifferentiableFunction,
-                          TwiceDifferentiableFunction),
+function bfgs{T}(d::Union{DifferentiableFunction,
+                          TwiceDifferentiableFunction},
                  initial_x::Vector{T};
                  initial_invH::Matrix = eye(length(initial_x)),
                  xtol::Real = 1e-32,
@@ -97,7 +97,7 @@ function bfgs{T}(d::Union(DifferentiableFunction,
         # Increment the number of steps we've had to perform
         iteration += 1
 
-        # Set the search direction        
+        # Set the search direction
         # Search direction is the negative gradient divided by the approximate Hessian
         A_mul_B!(s, invH, gr)
         for i in 1:n
@@ -178,7 +178,7 @@ function bfgs{T}(d::Union(DifferentiableFunction,
     return MultivariateOptimizationResults("BFGS",
                                            initial_x,
                                            x,
-                                           float64(f_x),
+                                           @compat(Float64(f_x)),
                                            iteration,
                                            iteration == iterations,
                                            x_converged,

diff --git a/src/brent.jl b/src/brent.jl
@@ -18,7 +18,7 @@ macro brenttrace()
     end
 end
 
-function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
+function brent{T <: AbstractFloat}(f::Function, x_lower::T, x_upper::T;
                                    rel_tol::T = sqrt(eps(T)),
                                    abs_tol::T = eps(T),
                                    iterations::Integer = 1_000,
@@ -33,13 +33,13 @@ function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
     # Save for later
     initial_lower = x_lower
     initial_upper = x_upper
-    
+
     const golden_ratio::T = 0.5 * (3.0 - sqrt(5.0))
 
     x_minimum = x_lower + golden_ratio*(x_upper-x_lower)
     f_minimum = f(x_minimum)
     f_calls = 1 # Number of calls to f
-    
+
     step = zero(T)
     step_old = zero(T)
 
@@ -48,7 +48,7 @@ function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
 
     f_minimum_old = f_minimum
     f_minimum_old_old = f_minimum
-    
+
     it = 0
     converged = false
 
@@ -72,12 +72,12 @@ function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
         end
 
         it += 1
-        
+
         if abs(step_old) > tolx
             # Compute parabola interpolation
             # x_minimum + p/q is the optimum of the parabola
             # Also, q is guaranteed to be positive
-            
+
             r = (x_minimum - x_minimum_old) * (f_minimum - f_minimum_old_old)
             q = (x_minimum - x_minimum_old_old) * (f_minimum - f_minimum_old)
             p = (x_minimum - x_minimum_old_old) * q - (x_minimum - x_minimum_old) * r
@@ -150,7 +150,7 @@ function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T;
                                          initial_lower,
                                          initial_upper,
                                          x_minimum,
-                                         float64(f_minimum),
+                                         @compat(Float64(f_minimum)),
                                          it,
                                          converged,
                                          rel_tol,

diff --git a/src/cg.jl b/src/cg.jl
@@ -1,8 +1,8 @@
 # Preconditioners
 #  * Empty preconditioner
-cg_precondfwd(out::Array, P::Nothing, A::Array) = copy!(out, A)
-cg_precondfwddot(A::Array, P::Nothing, B::Array) = dot(A, B)
-cg_precondinvdot(A::Array, P::Nothing, B::Array) = dot(A, B)
+cg_precondfwd(out::Array, P::Void, A::Array) = copy!(out, A)
+cg_precondfwddot(A::Array, P::Void, B::Array) = vecdot(A, B)
+cg_precondinvdot(A::Array, P::Void, B::Array) = vecdot(A, B)
 
 # Diagonal preconditioner
 function cg_precondfwd(out::Array, p::Vector, A::Array)
@@ -89,7 +89,7 @@ macro cgtrace()
                 dt["g(x)"] = copy(gr)
                 dt["Current step size"] = alpha
             end
-            grnorm = norm(gr, Inf)
+            grnorm = norm(gr[:], Inf)
             update!(tr,
                     iteration,
                     f_x,
@@ -101,9 +101,11 @@ macro cgtrace()
     end
 end
 
-function cg{T}(df::Union(DifferentiableFunction,
-                         TwiceDifferentiableFunction),
+function cg{T}(df::Union{DifferentiableFunction,
+                         TwiceDifferentiableFunction},
                initial_x::Array{T};
+               constraints::AbstractConstraints = ConstraintsNone(),
+               interior::Bool = false,
                xtol::Real = convert(T,1e-32),
                ftol::Real = convert(T,1e-8),
                grtol::Real = convert(T,1e-8),
@@ -117,7 +119,9 @@ function cg{T}(df::Union(DifferentiableFunction,
                precondprep::Function = (P, x) -> nothing)
 
     # Maintain current state in x and previous state in x_previous
-    x, x_previous = copy(initial_x), copy(initial_x)
+    x = copy(initial_x)
+    project!(x, constraints)
+    x_previous = copy(x)
 
     # Count the total number of iterations
     iteration = 0
@@ -146,7 +150,7 @@ function cg{T}(df::Union(DifferentiableFunction,
     # Store f(x) in f_x
     f_x = df.fg!(x, gr)
     @assert typeof(f_x) == T
-    f_x_previous = nan(T)
+    f_x_previous = convert(T,NaN)
     f_calls, g_calls = f_calls + 1, g_calls + 1
     copy!(gr_previous, gr)
 
@@ -191,12 +195,12 @@ function cg{T}(df::Union(DifferentiableFunction,
         iteration += 1
 
         # Reset the search direction if it becomes corrupted
-        dphi0 = dot(gr, s)
+        dphi0 = vecdot(gr, s)
         if dphi0 >= 0
             for i in 1:n
                 @inbounds s[i] = -gr[i]
             end
-            dphi0 = dot(gr, s)
+            dphi0 = vecdot(gr, s)
             if dphi0 < 0
                 break
             end
@@ -208,23 +212,28 @@ function cg{T}(df::Union(DifferentiableFunction,
         @assert typeof(dphi0) == T
         push!(lsr, zero(T), f_x, dphi0)
 
+        alphamax = interior ? toedge(x, s, constraints) : convert(T,Inf)
+
         # Pick the initial step size (HZ #I1-I2)
         alpha, mayterminate, f_update, g_update =
-          alphatry(alpha, df, x, s, x_ls, gr_ls, lsr)
+          alphatry(alpha, df, x, s, x_ls, gr_ls, lsr, constraints, alphamax)
         f_calls, g_calls = f_calls + f_update, g_calls + g_update
 
+        if alpha == zero(T)
+            x_converged = true
+            break
+        end
+
         # Determine the distance of movement along the search line
         alpha, f_update, g_update =
-          linesearch!(df, x, s, x_ls, gr_ls, lsr, alpha, mayterminate)
+          linesearch!(df, x, s, x_ls, gr_ls, lsr, alpha, mayterminate, constraints, alphamax)
         f_calls, g_calls = f_calls + f_update, g_calls + g_update
 
         # Maintain a record of previous position
         copy!(x_previous, x)
 
         # Update current position
-        for i in 1:n
-            @inbounds x[i] = x[i] + alpha * s[i]
-        end
+        step!(x, x, s, alpha, constraints)
 
         # Maintain a record of the previous gradient
         copy!(gr_previous, gr)
@@ -254,14 +263,14 @@ function cg{T}(df::Union(DifferentiableFunction,
         #  Calculate the beta factor (HZ2012)
         precondprep(P, x)
         dPd = cg_precondinvdot(s, P, s)
-        etak::T = eta * dot(s, gr_previous) / dPd
+        etak::T = eta * vecdot(s, gr_previous) / dPd
         for i in 1:n
             @inbounds y[i] = gr[i] - gr_previous[i]
         end
-        ydots = dot(y, s)
+        ydots = vecdot(y, s)
         cg_precondfwd(pgr, P, gr)
-        betak = (dot(y, pgr) - cg_precondfwddot(y, P, y) *
-                 dot(gr, s) / ydots) / ydots
+        betak = (vecdot(y, pgr) - cg_precondfwddot(y, P, y) *
+                 vecdot(gr, s) / ydots) / ydots
         beta = max(betak, etak)
         for i in 1:n
             @inbounds s[i] = beta * s[i] - pgr[i]
@@ -273,7 +282,7 @@ function cg{T}(df::Union(DifferentiableFunction,
     return MultivariateOptimizationResults("Conjugate Gradient",
                                            initial_x,
                                            x,
-                                           float64(f_x),
+                                           @compat(Float64(f_x)),
                                            iteration,
                                            iteration == iterations,
                                            x_converged,