diff --git a/REQUIRE b/REQUIRE index 3ef4db562..01c9f80aa 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,3 +1,5 @@ -julia 0.2- +julia 0.4 Calculus DualNumbers +Compat +PositiveFactorizations diff --git a/src/Optim.jl b/src/Optim.jl index 64d10de41..0fcb088e9 100644 --- a/src/Optim.jl +++ b/src/Optim.jl @@ -1,20 +1,28 @@ +isdefined(Base, :__precompile__) && __precompile__() + module Optim - using Calculus + using Calculus, PositiveFactorizations + using Compat - import Base.dot, - Base.length, + import Base.length, Base.push!, Base.show, Base.getindex, Base.setindex! export optimize, + interior, + linlsq, DifferentiableFunction, - TwiceDifferentiableFunction + TwiceDifferentiableFunction, + ConstraintsBox # Types include("types.jl") + # Types for constrained optimization + include("constraints.jl") + # Automatic differentiation utilities include("autodiff.jl") @@ -65,11 +73,17 @@ module Optim include("golden_section.jl") include("brent.jl") + # Constrained optimization algorithms + include("interior.jl") + # End-User Facing Wrapper Functions include("optimize.jl") - # Examples for testing - include(joinpath("problems", "unconstrained.jl")) - cgdescent(args...) = error("API has changed. Please use cg.") + + # Tests + const basedir = dirname(Base.source_path()) + const testpaths = [joinpath(basedir, "problems", "unconstrained.jl"), + joinpath(basedir, "problems", "constrained.jl")] + end diff --git a/src/accelerated_gradient_descent.jl b/src/accelerated_gradient_descent.jl index e9de5b121..1f0f41bca 100644 --- a/src/accelerated_gradient_descent.jl +++ b/src/accelerated_gradient_descent.jl @@ -139,7 +139,7 @@ function accelerated_gradient_descent{T}(d::DifferentiableFunction, return MultivariateOptimizationResults("Accelerated Gradient Descent", initial_x, x, - float64(f_x), + @compat(Float64(f_x)), iteration, iteration == iterations, x_converged, diff --git a/src/bfgs.jl b/src/bfgs.jl index 5a6073b3e..cf2331c1e 100644 --- a/src/bfgs.jl +++ b/src/bfgs.jl @@ -23,8 +23,8 @@ macro bfgstrace() end end -function bfgs{T}(d::Union(DifferentiableFunction, - TwiceDifferentiableFunction), +function bfgs{T}(d::Union{DifferentiableFunction, + TwiceDifferentiableFunction}, initial_x::Vector{T}; initial_invH::Matrix = eye(length(initial_x)), xtol::Real = 1e-32, @@ -97,7 +97,7 @@ function bfgs{T}(d::Union(DifferentiableFunction, # Increment the number of steps we've had to perform iteration += 1 - # Set the search direction + # Set the search direction # Search direction is the negative gradient divided by the approximate Hessian A_mul_B!(s, invH, gr) for i in 1:n @@ -178,7 +178,7 @@ function bfgs{T}(d::Union(DifferentiableFunction, return MultivariateOptimizationResults("BFGS", initial_x, x, - float64(f_x), + @compat(Float64(f_x)), iteration, iteration == iterations, x_converged, diff --git a/src/brent.jl b/src/brent.jl index b3d4fa0b6..32ea74d8b 100644 --- a/src/brent.jl +++ b/src/brent.jl @@ -18,7 +18,7 @@ macro brenttrace() end end -function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T; +function brent{T <: AbstractFloat}(f::Function, x_lower::T, x_upper::T; rel_tol::T = sqrt(eps(T)), abs_tol::T = eps(T), iterations::Integer = 1_000, @@ -33,13 +33,13 @@ function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T; # Save for later initial_lower = x_lower initial_upper = x_upper - + const golden_ratio::T = 0.5 * (3.0 - sqrt(5.0)) x_minimum = x_lower + golden_ratio*(x_upper-x_lower) f_minimum = f(x_minimum) f_calls = 1 # Number of calls to f - + step = zero(T) step_old = zero(T) @@ -48,7 +48,7 @@ function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T; f_minimum_old = f_minimum f_minimum_old_old = f_minimum - + it = 0 converged = false @@ -72,12 +72,12 @@ function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T; end it += 1 - + if abs(step_old) > tolx # Compute parabola interpolation # x_minimum + p/q is the optimum of the parabola # Also, q is guaranteed to be positive - + r = (x_minimum - x_minimum_old) * (f_minimum - f_minimum_old_old) q = (x_minimum - x_minimum_old_old) * (f_minimum - f_minimum_old) p = (x_minimum - x_minimum_old_old) * q - (x_minimum - x_minimum_old) * r @@ -150,7 +150,7 @@ function brent{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T; initial_lower, initial_upper, x_minimum, - float64(f_minimum), + @compat(Float64(f_minimum)), it, converged, rel_tol, diff --git a/src/cg.jl b/src/cg.jl index fb2834344..f827528ea 100644 --- a/src/cg.jl +++ b/src/cg.jl @@ -1,8 +1,8 @@ # Preconditioners # * Empty preconditioner -cg_precondfwd(out::Array, P::Nothing, A::Array) = copy!(out, A) -cg_precondfwddot(A::Array, P::Nothing, B::Array) = dot(A, B) -cg_precondinvdot(A::Array, P::Nothing, B::Array) = dot(A, B) +cg_precondfwd(out::Array, P::Void, A::Array) = copy!(out, A) +cg_precondfwddot(A::Array, P::Void, B::Array) = vecdot(A, B) +cg_precondinvdot(A::Array, P::Void, B::Array) = vecdot(A, B) # Diagonal preconditioner function cg_precondfwd(out::Array, p::Vector, A::Array) @@ -89,7 +89,7 @@ macro cgtrace() dt["g(x)"] = copy(gr) dt["Current step size"] = alpha end - grnorm = norm(gr, Inf) + grnorm = norm(gr[:], Inf) update!(tr, iteration, f_x, @@ -101,9 +101,11 @@ macro cgtrace() end end -function cg{T}(df::Union(DifferentiableFunction, - TwiceDifferentiableFunction), +function cg{T}(df::Union{DifferentiableFunction, + TwiceDifferentiableFunction}, initial_x::Array{T}; + constraints::AbstractConstraints = ConstraintsNone(), + interior::Bool = false, xtol::Real = convert(T,1e-32), ftol::Real = convert(T,1e-8), grtol::Real = convert(T,1e-8), @@ -117,7 +119,9 @@ function cg{T}(df::Union(DifferentiableFunction, precondprep::Function = (P, x) -> nothing) # Maintain current state in x and previous state in x_previous - x, x_previous = copy(initial_x), copy(initial_x) + x = copy(initial_x) + project!(x, constraints) + x_previous = copy(x) # Count the total number of iterations iteration = 0 @@ -146,7 +150,7 @@ function cg{T}(df::Union(DifferentiableFunction, # Store f(x) in f_x f_x = df.fg!(x, gr) @assert typeof(f_x) == T - f_x_previous = nan(T) + f_x_previous = convert(T,NaN) f_calls, g_calls = f_calls + 1, g_calls + 1 copy!(gr_previous, gr) @@ -191,12 +195,12 @@ function cg{T}(df::Union(DifferentiableFunction, iteration += 1 # Reset the search direction if it becomes corrupted - dphi0 = dot(gr, s) + dphi0 = vecdot(gr, s) if dphi0 >= 0 for i in 1:n @inbounds s[i] = -gr[i] end - dphi0 = dot(gr, s) + dphi0 = vecdot(gr, s) if dphi0 < 0 break end @@ -208,23 +212,28 @@ function cg{T}(df::Union(DifferentiableFunction, @assert typeof(dphi0) == T push!(lsr, zero(T), f_x, dphi0) + alphamax = interior ? toedge(x, s, constraints) : convert(T,Inf) + # Pick the initial step size (HZ #I1-I2) alpha, mayterminate, f_update, g_update = - alphatry(alpha, df, x, s, x_ls, gr_ls, lsr) + alphatry(alpha, df, x, s, x_ls, gr_ls, lsr, constraints, alphamax) f_calls, g_calls = f_calls + f_update, g_calls + g_update + if alpha == zero(T) + x_converged = true + break + end + # Determine the distance of movement along the search line alpha, f_update, g_update = - linesearch!(df, x, s, x_ls, gr_ls, lsr, alpha, mayterminate) + linesearch!(df, x, s, x_ls, gr_ls, lsr, alpha, mayterminate, constraints, alphamax) f_calls, g_calls = f_calls + f_update, g_calls + g_update # Maintain a record of previous position copy!(x_previous, x) # Update current position - for i in 1:n - @inbounds x[i] = x[i] + alpha * s[i] - end + step!(x, x, s, alpha, constraints) # Maintain a record of the previous gradient copy!(gr_previous, gr) @@ -254,14 +263,14 @@ function cg{T}(df::Union(DifferentiableFunction, # Calculate the beta factor (HZ2012) precondprep(P, x) dPd = cg_precondinvdot(s, P, s) - etak::T = eta * dot(s, gr_previous) / dPd + etak::T = eta * vecdot(s, gr_previous) / dPd for i in 1:n @inbounds y[i] = gr[i] - gr_previous[i] end - ydots = dot(y, s) + ydots = vecdot(y, s) cg_precondfwd(pgr, P, gr) - betak = (dot(y, pgr) - cg_precondfwddot(y, P, y) * - dot(gr, s) / ydots) / ydots + betak = (vecdot(y, pgr) - cg_precondfwddot(y, P, y) * + vecdot(gr, s) / ydots) / ydots beta = max(betak, etak) for i in 1:n @inbounds s[i] = beta * s[i] - pgr[i] @@ -273,7 +282,7 @@ function cg{T}(df::Union(DifferentiableFunction, return MultivariateOptimizationResults("Conjugate Gradient", initial_x, x, - float64(f_x), + @compat(Float64(f_x)), iteration, iteration == iterations, x_converged, diff --git a/src/constraints.jl b/src/constraints.jl new file mode 100644 index 000000000..365b7a5e8 --- /dev/null +++ b/src/constraints.jl @@ -0,0 +1,154 @@ +abstract AbstractConstraints + +immutable ConstraintsNone <: AbstractConstraints end + +immutable ConstraintsBox{T,N} <: AbstractConstraints + lower::Array{T,N} + upper::Array{T,N} + + function ConstraintsBox(l::AbstractArray{T}, u::AbstractArray{T}) + size(l) == size(u) || error("The sizes of the bounds must match") + for i = 1:length(l) + l[i] <= u[i] || error("The lower bound must be smaller than the upper bound") + end + new(l, u) + end +end +ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(l, u) +ConstraintsBox{T,N}(l::AbstractArray{T,N}, u::Void) = ConstraintsBox{T,N}(l, fill(convert(T,Inf), size(l))) +ConstraintsBox{T,N}(l::Void, u::AbstractArray{T,N}) = ConstraintsBox{T,N}(fill(-convert(T,Inf),size(u)), u) + +immutable ConstraintsL{T,M<:AbstractMatrix,N} <: AbstractConstraints + A::M + lower::Array{T,N} + upper::Array{T,N} + scratch1::Array{T,N} + scratch2::Array{T,N} + scratch3::Array{T,N} + + function ConstraintsL(A, l::Array{T,N}, u::Array{T,N}, + scratch1 = similar(l), scratch2 = similar(l), scratch3 = Array(T, ndims(l) == 1 ? size(A, 2) : (size(A,2),size(l,2)))) + size(A, 1) == size(l,1) == size(u,1) || error("The sizes of the bounds must match the size of A") + for i = 1:length(l) + l[i] <= u[i] || error("The lower bound must be smaller than the upper bound") + end + new(A, l, u, scratch1, scratch2, scratch3) + end +end +ConstraintsL{T}(A::AbstractMatrix, l::Union{Vector{T},Matrix{T}}, u::Union{Vector{T},Matrix{T}}) = ConstraintsL{T,typeof(A),ndims(l)}(A, l, u) +ConstraintsL{T}(A::AbstractMatrix, l::Union{Vector{T},Matrix{T}}, u::Void) = ConstraintsL(A, l, infs(T, size(l))) +ConstraintsL{T}(A::AbstractMatrix, l::Void, u::Union{Vector{T},Matrix{T}}) = ConstraintsL(A, -infs(T,size(u)), u) + +# Generic constraints +immutable ConstraintsNL{T,F<:Union{Function,DifferentiableFunction,TwiceDifferentiableFunction}} <: AbstractConstraints + funcs::Vector{F} + lower::Vector{T} + upper::Vector{T} + + function ConstraintsNL(funcs::Vector, l::AbstractVector{T}, u::AbstractVector{T}) + size(A, 1) == length(l) == length(u) || error("The sizes of the bounds must match the ") + for i = 1:length(l) + l[i] <= u[i] || error("The lower bound must be smaller than the upper bound") + end + new(funcs, l, u) + end +end + +type Constraints + bounds::AbstractConstraints + linear::AbstractConstraints + nonlinear::AbstractConstraints + mu::Real +end +Constraints() = Constraints(ConstraintsNone(), ConstraintsNone(), ConstraintsNone(), ConstraintsNone(), ConstraintsNone()) + + +## feasible +feasible(x, constraints::ConstraintsNone, ineq::Bool) = true + +function feasible(x, constraints::ConstraintsBox) + l = constraints.lower + u = constraints.upper + for i = 1:length(x) + l[i] <= x[i] <= u[i] || return false + end + true +end + +function feasible(x, constraints::ConstraintsL) + l = constraints.lower + u = constraints.upper + y = constraints.scratch1 + A_mul_B!(y, constraints.A, x) + for i = 1:length(y) + l[i] <= y[i] <= u[i] || return false + end + true +end + +## project! +project!(x, constraints::AbstractConstraints) = x + +function project!(x::Array, bounds::ConstraintsBox) + for i = 1:length(x) + x[i] = max(bounds.lower[i], min(bounds.upper[i], x[i])) + end + x +end + +## step! +function step!{T}(xtmp::Array{T}, x, s, alpha::Real, constraints::AbstractConstraints = ConstraintsNone()) + (length(xtmp) == length(x) == length(s)) || error("lengths of xtmp ($(length(xtmp))), x ($(length(x))), and s ($(length(s))) disagree") + for i = 1:length(xtmp) + @inbounds xtmp[i] = x[i] + alpha*s[i] + end + project!(xtmp, constraints) +end + + +## toedge and tocorner +# For bounds constraints, +# - toedge returns the "distance along s" (the value of the coefficient alpha) to reach the boundary +# - tocorner returns the "distance along s" needed to be projected into the "corner" (all constraints are active) +# The first is the farthest you might consider searching in a barrier method. The second is the farthest that it +# makes sense to search in a projection method. +toedge{T}(x::AbstractArray{T}, s, constraints::AbstractConstraints) = convert(T, Inf) +tocorner{T}(x::AbstractArray{T}, s, constraints::AbstractConstraints) = convert(T, Inf) + +function toedge{T}(x::AbstractArray{T}, s, bounds::ConstraintsBox) + # Stop at the first coordinate to leave the box + alphamax = convert(T,Inf) + for i = 1:length(x) + si = s[i] + li = bounds.lower[i] + ui = bounds.upper[i] + if si < 0 && isfinite(li) + alphamax = min(alphamax, (li-x[i])/si) + elseif si > 0 && isfinite(ui) + alphamax = min(alphamax, (ui-x[i])/si) + end + end + convert(T, alphamax) +end + +toedge(x, s, constraints::Constraints) = toedge(x, s, constraints.bounds) + +function tocorner{T}(x::AbstractArray{T}, s, bounds::ConstraintsBox) + # Stop at the last coordinate to leave the box + alphamax = zero(T) + for i = 1:length(x) + si = s[i] + li = bounds.lower[i] + ui = bounds.upper[i] + if si < 0 && isfinite(li) + alphamax = max(alphamax, (li-x[i])/si) + elseif si > 0 && isfinite(ui) + alphamax = max(alphamax, (ui-x[i])/si) + else + return convert(T,Inf) + end + end + convert(T, alphamax) +end + +tocorner(x, s, constraints::Constraints) = tocorner(x, s, constraints.bounds) diff --git a/src/fminbox.jl b/src/fminbox.jl index 19da30a36..1362f4c02 100644 --- a/src/fminbox.jl +++ b/src/fminbox.jl @@ -1,7 +1,7 @@ # Attempt to compute a reasonable default mu: at the starting # position, the gradient of the input function should dominate the # gradient of the barrier. -function initialize_mu{T}(gfunc::Array{T}, gbarrier::Array{T}; mu0::T = nan(T), mu0factor::T = 0.001) +function initialize_mu{T}(gfunc::Array{T}, gbarrier::Array{T}; mu0::T = convert(T,NaN), mu0factor::T = 0.001) if isnan(mu0) gbarriernorm = sum(abs(gbarrier)) if gbarriernorm > 0 @@ -26,7 +26,7 @@ function barrier_box{T}(x::Array{T}, g, l::Array{T}, u::Array{T}) if isfinite(thisl) dx = x[i] - thisl if dx <= 0 - return inf(T) + return convert(T,Inf) end v -= log(dx) if calc_grad @@ -41,7 +41,7 @@ function barrier_box{T}(x::Array{T}, g, l::Array{T}, u::Array{T}) if isfinite(thisu) dx = thisu - x[i] if dx <= 0 - return inf(T) + return convert(T,Inf) end v -= log(dx) if calc_grad @@ -76,7 +76,7 @@ function barrier_combined{T}(x::Array{T}, g, gfunc, gbarrier, val_each::Vector{T end function limits_box{T}(x::Array{T}, d::Array{T}, l::Array{T}, u::Array{T}) - alphamax = inf(T) + alphamax = convert(T,Inf) for i = 1:length(x) if d[i] < 0 alphamax = min(alphamax, ((l[i]-x[i])+eps(l[i]))/d[i]) @@ -105,20 +105,20 @@ end const PARAMETERS_MU = one64< precondprepbox(P, x, l, u, mu), optimizer = cg) @@ -145,14 +145,14 @@ function fminbox{T}(df::DifferentiableFunction, gbarrier[i] = (isfinite(thisl) ? one(T)/(thisx-thisl) : zero(T)) + (isfinite(thisu) ? one(T)/(thisu-thisx) : zero(T)) end valfunc = df.fg!(x, gfunc) # is this used?? - mu = initialize_mu(gfunc, gbarrier; mu0factor=mufactor) + mu = isnan(mu0) ? initialize_mu(gfunc, gbarrier; mu0factor=mufactor) : mu0 if show_trace > 0 println("######## fminbox ########") println("Initial mu = ", mu) end g = similar(x) - valboth = Array(T, 2) + valboth = Array(T, 2) fval_all = Array(Vector{T}, 0) fcount_all = 0 xold = similar(x) diff --git a/src/golden_section.jl b/src/golden_section.jl index dce3da090..55b1a2e0d 100644 --- a/src/golden_section.jl +++ b/src/golden_section.jl @@ -18,7 +18,7 @@ macro goldensectiontrace() end end -function golden_section{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T; +function golden_section{T <: AbstractFloat}(f::Function, x_lower::T, x_upper::T; rel_tol::T = sqrt(eps(T)), abs_tol::T = eps(T), iterations::Integer = 1_000, @@ -28,11 +28,11 @@ function golden_section{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T; if !(x_lower < x_upper) error("x_lower must be less than x_upper") end - + # Save for later initial_lower = x_lower initial_upper = x_upper - + const golden_ratio::T = 0.5 * (3.0 - sqrt(5.0)) x_minimum = x_lower + golden_ratio*(x_upper-x_lower) @@ -46,7 +46,7 @@ function golden_section{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T; tr = OptimizationTrace() tracing = store_trace || show_trace || extended_trace @goldensectiontrace - + while it < iterations tolx = rel_tol * abs(x_minimum) + abs_tol @@ -59,7 +59,7 @@ function golden_section{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T; end it += 1 - + if x_upper - x_minimum > x_minimum - x_lower x_new = x_minimum + golden_ratio*(x_upper - x_minimum) f_new = f(x_new) @@ -91,7 +91,7 @@ function golden_section{T <: FloatingPoint}(f::Function, x_lower::T, x_upper::T; initial_lower, initial_upper, x_minimum, - float64(f_minimum), + @compat(Float64(f_minimum)), it, converged, rel_tol, diff --git a/src/gradient_descent.jl b/src/gradient_descent.jl index fc996f4d4..acdd32fa8 100644 --- a/src/gradient_descent.jl +++ b/src/gradient_descent.jl @@ -18,8 +18,8 @@ macro gdtrace() end end -function gradient_descent{T}(d::Union(DifferentiableFunction, - TwiceDifferentiableFunction), +function gradient_descent{T}(d::Union{DifferentiableFunction, + TwiceDifferentiableFunction}, initial_x::Array{T}; xtol::Real = 1e-32, ftol::Real = 1e-8, @@ -85,7 +85,7 @@ function gradient_descent{T}(d::Union(DifferentiableFunction, end # Refresh the line search cache - dphi0 = dot(gr, s) + dphi0 = vecdot(gr, s) clear!(lsr) push!(lsr, zero(T), f_x, dphi0) @@ -124,7 +124,7 @@ function gradient_descent{T}(d::Union(DifferentiableFunction, return MultivariateOptimizationResults("Gradient Descent", initial_x, x, - float64(f_x), + @compat(Float64(f_x)), iteration, iteration == iterations, x_converged, diff --git a/src/interior.jl b/src/interior.jl new file mode 100644 index 000000000..6f25434c9 --- /dev/null +++ b/src/interior.jl @@ -0,0 +1,408 @@ +############ +## +## Barriers +## +############ +# Note: barrier_g! and barrier_h! simply add to an existing g or H +# (they do not initialize with zeros) + +## Box constraints +function barrier_f{T}(x::Array{T}, bounds::ConstraintsBox) + n = length(x) + phi = zero(T) + l = bounds.lower + u = bounds.upper + for i = 1:n + li, ui = l[i], u[i] + xi = x[i] + if isfinite(li) + xi >= li || return convert(T,Inf) + phi += log(xi-li) + end + if isfinite(ui) + xi <= ui || return convert(T,Inf) + phi += log(ui-xi) + end + end + -phi::T +end + +function barrier_g!{T}(x::Array{T}, g::Array{T}, bounds::ConstraintsBox) + n = length(x) + l = bounds.lower + u = bounds.upper + for i = 1:n + li, ui = l[i], u[i] + xi = x[i] + g[i] -= 1/(xi-li) + g[i] += 1/(ui-xi) + end +end + +function barrier_fg!{T}(x::Array{T}, g::Array{T}, bounds::ConstraintsBox) + n = length(x) + phi = zero(T) + l = bounds.lower + u = bounds.upper + for i = 1:n + li, ui = l[i], u[i] + xi = x[i] + if isfinite(li) + xi >= li || return convert(T,Inf) # no need to compute additional gradient terms + phi += log(xi-li) + g[i] -= 1/(xi-li) + end + if isfinite(ui) + xi <= ui || return convert(T,Inf) + phi += log(ui-xi) + g[i] += 1/(ui-xi) + end + end + -phi::T +end + +function barrier_h!{T}(x::Vector{T}, H::Matrix{T}, bounds::ConstraintsBox) + n = length(x) + l = bounds.lower + u = bounds.upper + for i = 1:n + li, ui = l[i], u[i] + xi = x[i] + dx = xi-li + H[i,i] += 1/(dx*dx) + dx = ui-xi + H[i,i] += 1/(dx*dx) + end +end + +## Linear constraints +function barrier_fg!{T}(x::Array{T}, g::Array{T}, constraints::ConstraintsL) + m, n = size(constraints.A) + y1 = constraints.scratch1 + A_mul_B!(y1, constraints.A, x) + phi = zero(T) + l = constraints.lower + u = constraints.upper + lenx = length(x) + lenc = length(y1) + for i = 1:lenc + li, ui = l[i], u[i] + yi = y1[i] + if isfinite(li) + yi >= li || return convert(T,Inf) # no need to compute additional gradient terms + phi += log(yi-li) + end + if isfinite(ui) + yi <= ui || return convert(T,Inf) + phi += log(ui-yi) + end + end + if isempty(g) + return -phi + end + y2 = constraints.scratch2 + y3 = constraints.scratch3 + for i = 1:lenc + y2[i] = 1/(y1[i] - l[i]) + end + At_mul_B!(y3, constraints.A, y2) + for i = 1:lenx + g[i] -= y3[i] + end + for i = 1:lenc + y2[i] = 1/(u[i] - y1[i]) + end + At_mul_B!(y3, constraints.A, y2) + for i = 1:lenx + g[i] += y3[i] + end + -phi::T +end + +barrier_f{T}(x::Array{T}, constraints::ConstraintsL) = barrier_fg!(x, T[], constraints) + + +## Nonlinear inequalities + + +######################### +## +## Combined cost function +## +######################### +combined_f(x, objective_f::Function, constraints::AbstractConstraints, t) = + t*objective_f(x) + barrier_f(x, constraints) + +function combined_g!(x, g, objective_g!::Function, constraints::AbstractConstraints, t) + objective_g!(x, g) + scale!(g, t) + barrier_fg!(x, g, constraints) +end + +function combined_fg!(x, g, objective_fg!::Function, constraints::AbstractConstraints, t) + f_x = t*objective_fg!(x, g) + scale!(g, t) + f_x += barrier_fg!(x, g, constraints) + f_x +end + +function combined_h!(x, H, objective_h!::Function, constraints::AbstractConstraints, t) + objective_h!(x, H) + scale!(H, t) + barrier_h!(x, H, constraints) +end + +# combined_f(x, objective::Union{DifferentiableFunction,TwiceDifferentiableFunction}, constraints::AbstractConstraints, t) = +# combined_f(x, objective.f, constraints, t) +# +# combined_g!(x, g, objective::Union{DifferentiableFunction,TwiceDifferentiableFunction}, constraints::AbstractConstraints, t) = +# combined_g!(x, g, objective.g, constraints, t) +# +# combined_fg!(x, g, objective::Union{DifferentiableFunction,TwiceDifferentiableFunction}, constraints::AbstractConstraints, t) = +# combined_fg!(x, g, objective.fg!, constraints, t) +# +# combined_h!(x, H, objective::Union{DifferentiableFunction,TwiceDifferentiableFunction}, constraints::AbstractConstraints, t) = +# combined_h!(x, H, objective.h!, constraints, t) + + +######################################### +## +## Cost function for linear least squares +## +######################################### + +linlsq(A::Matrix, b::Vector) = + TwiceDifferentiableFunction( x -> linlsq_fg!(x, Array(eltype(x),0), A, b, similar(b)), + (x,g) -> linlsq_fg!(x, g, A, b, similar(b)), + (x,g) -> linlsq_fg!(x, g, A, b, similar(b)), + (x,H) -> linlsq_h!(x, H, A, b)) + +dummy_f(x) = (Base.show_backtrace(STDOUT, backtrace()); error("No f")) +dummy_g!(x,g) = (Base.show_backtrace(STDOUT, backtrace()); error("No g!")) + +function linlsq_fg!{T}(x::AbstractArray{T}, g, A, b, scratch) + A_mul_B!(scratch, A, x) + f_x = zero(typeof(one(T)*one(T)+one(T)*one(T))) + for i = 1:length(g) + tmp = scratch[i] + tmp -= b[i] + f_x += tmp*tmp + scratch[i] = tmp + end + if !isempty(g) + At_mul_B!(g, A, scratch) + end + f_x/2 +end + +linlsq_h!(x, H, A, b) = At_mul_B!(H, A, A) + + +######################### +## +## Interior point methods +## +######################### +# Follows the notation of +# H. Hindi (2006), "A Tutorial on Convex Optimization II: Duality and +# Interior Point Methods," Proc. 2006 American Control Conference. +# particularly his "Algorithm: Barrier method." + +function interior_newton{T}(objective::TwiceDifferentiableFunction, + initial_x::Vector{T}, + constraints::AbstractConstraints; + t = one(T), mu = 10, eps_gap = convert(T,NaN), + xtol::Real = convert(T,1e-32), + ftol::Real = convert(T,1e-8), + grtol::Real = convert(T,1e-8), + iterations::Integer = 1_000, + store_trace::Bool = false, + show_trace::Bool = false, + extended_trace::Bool = false) + + if !feasible(initial_x, constraints) + error("Initial guess must be feasible") + end + x = copy(initial_x) + xtmp = similar(x) + x_previous = similar(x) + m = length(x) + gr = similar(x) + H = Array(T, m, m) + lsr = LineSearchResults(T) + + tr = OptimizationTrace() + tracing = store_trace || show_trace || extended_trace + + df = DifferentiableFunction(dummy_f, + dummy_g!, + (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t)) + iteration, iter_t, f_calls, g_calls = 0, 0, 0, 0 + f_x = f_x_previous = convert(T,Inf) + x_converged = f_converged = gr_converged = converged = false +# while m/t > eps_gap && iteration < iterations + while iteration < iterations + f_x_previous = f_x + f_x = combined_fg!(x, gr, objective.fg!, constraints, t) + combined_h!(x, H, objective.h!, constraints, t) + F, Hd = ldltfact!(Positive, H) + s = F\(-gr) + sgr = vecdot(s, gr) + if !(sgr <= 0) || isinf(sgr) + # Presumably due to numeric instability + converged = x_converged = f_converged = gr_converged = false + break + end + clear!(lsr) + push!(lsr, zero(T), f_x, sgr) + f_x /= t + @newtontrace + alphamax = toedge(x, s, constraints) + alpha = alphamax < 1 ? 0.9*alphamax : 1.0 + alpha, _f_calls, _g_calls = + hz_linesearch!(df, x, s, xtmp, gr, lsr, alpha, true, constraints, alphamax) + copy!(x_previous, x) + step!(x, x, s, alpha, constraints) + iteration += 1 + iter_t += 1 + f_calls += _f_calls + g_calls += _g_calls + x_converged, + f_converged, + gr_converged, + converged = assess_convergence(x, + x_previous, + f_x, + f_x_previous, + gr, + xtol, + ftol, + t*grtol) + # It's time to change the barrier penalty if we're + # sufficiently "in the basin." That means (1) the Hessian is + # positive (semi)definite (all(Hd .> 0)) and that the Hessian + # step is accepted (alpha is approximately 1). The latter + # suggests that the Hessian is a reliable predictor of the + # function shape, and that within a few iterations we'd be at + # the minimum. + if (alpha >= 0.9 && all(Hd .>= 0)) || converged + if iter_t == 1 && converged + # if changing t didn't change the solution, quit now + break + end + t *= mu + iter_t = 0 + df = DifferentiableFunction(dummy_f, + dummy_g!, + (x,gr) -> combined_fg!(x, gr, objective.fg!, constraints, t)) + end + end + MultivariateOptimizationResults("Interior/Newton", + initial_x, + x, + @compat(Float64(f_x)), + iteration, + iteration == iterations, + x_converged, + xtol, + f_converged, + ftol, + gr_converged, + grtol, + tr, + f_calls, + g_calls) +end + + +function interior{T}(objective::Union{DifferentiableFunction, TwiceDifferentiableFunction}, + initial_x::Array{T}, + constraints::AbstractConstraints; + method = :cg, t = convert(T,NaN), mu = 10, eps_gap = convert(T,NaN), + xtol::Real = 1e-32, + ftol::Real = 1e-8, + grtol::Real = 1e-8, + iterations::Integer = 1_000, + store_trace::Bool = false, + show_trace::Bool = false, + extended_trace::Bool = false) + if isnan(t) + vo, vc, gogo, gogc = gnorm(objective, constraints, initial_x) + if isfinite(vo) && isfinite(vc) && sqrt(gogo) < grtol + return MultivariateOptimizationResults("Interior", + initial_x, + initial_x, + @compat(Float64(vo)), + 0, + false, + false, + xtol, + false, + ftol, + true, + grtol, + OptimizationTrace(), + 0, + 0) + end + if gogc == 0 + t = one(T) # FIXME: bounds constraints, starting at exact center. This is probably not the right guess. + else + t = convert(T, 10*abs(gogc)/gogo) + end + end + if isnan(eps_gap) + eps_gap = (cbrt(eps(T)))^2/t + end + if method == :newton + return interior_newton(objective, initial_x, constraints; t=t, mu=mu, eps_gap=eps_gap, xtol=xtol, ftol=ftol, grtol=grtol, iterations=iterations, store_trace=store_trace, show_trace=show_trace, extended_trace=extended_trace) + end + if !feasible(initial_x, constraints) + error("Initial guess must be feasible") + end + x = copy(initial_x) + m = length(x) + local results + iteration, f_calls, g_calls = 0, 0, 0 + while m/t > eps_gap || iteration == 0 + df = DifferentiableFunction( x -> combined_f(x, objective.f, constraints, t), + (x,g) -> combined_g!(x, g, objective.g!, constraints, t), + (x,g) -> combined_fg!(x, g, objective.fg!, constraints, t)) + results = optimize(df, x, method=method, constraints=constraints, interior=true, xtol=xtol, ftol=ftol, grtol=grtol, iterations=iterations, store_trace=store_trace, show_trace=show_trace, extended_trace=extended_trace) + copy!(x, results.minimum) + iteration += results.iterations + f_calls += results.f_calls + g_calls += results.g_calls + results.f_minimum /= t + t *= mu + end + results.iterations, results.f_calls, results.g_calls = iteration, f_calls, g_calls + copy!(results.initial_x, initial_x) + results +end + +""" +`vo, vc, gogo, gogc = gnorm(objective, constraints, initial_x)` +computes the values `vo`, `vc` of the `objective` and `constraints` at +position `initial_x`, as well as their gradients. `gogo` is the dot +product of the objective gradient with itself; `gogc` is the dot +product of the objective gradient and the constraint gradient. + +The latter two can be used to initialize the tradeoff between +objective and constraints so that the total gradient `t*go + gc` is +still a descent direction for the objective. This ensures that the +barrier penalty will not push the solution out of the starting basin. +""" +function gnorm(objective, constraints, initial_x) + go = similar(initial_x) + gc = similar(initial_x) + fill!(gc, 0) + vo = objective.fg!(initial_x, go) + vc = barrier_fg!(initial_x, gc, constraints) + gom = gcm = zero(eltype(go)) + for i = 1:length(go) + gom += abs(go[i]) + gcm += abs(gc[i]) + end + vo, vc, gom, gcm +end diff --git a/src/l_bfgs.jl b/src/l_bfgs.jl index 941d52b58..f3c5dade6 100644 --- a/src/l_bfgs.jl +++ b/src/l_bfgs.jl @@ -80,9 +80,11 @@ macro lbfgstrace() end end -function l_bfgs{T}(d::Union(DifferentiableFunction, - TwiceDifferentiableFunction), +function l_bfgs{T}(d::Union{DifferentiableFunction, + TwiceDifferentiableFunction}, initial_x::Vector{T}; + constraints::AbstractConstraints = ConstraintsNone(), + interior::Bool = false, m::Integer = 10, xtol::Real = 1e-32, ftol::Real = 1e-8, @@ -148,6 +150,7 @@ function l_bfgs{T}(d::Union(DifferentiableFunction, # Iterate until convergence converged = false + converged_iteration = typemin(Int) while !converged && iteration < iterations # Increment the number of steps we've had to perform iteration += 1 @@ -157,13 +160,34 @@ function l_bfgs{T}(d::Union(DifferentiableFunction, twoloop_alpha, twoloop_q) # Refresh the line search cache - dphi0 = dot(gr, s) + dphi0 = vecdot(gr, s) + if dphi0 >= 0 + # The search direction is not a descent direction. Something + # is wrong, so restart the search-direction algorithm. + # See also the "restart" below. + iteration = 1 + for i = 1:n + @inbounds s[i] = -gr[i] + end + dphi0 = vecdot(gr, s) + end + dphi0 == 0 && break # we're at a stationary point clear!(lsr) push!(lsr, zero(T), f_x, dphi0) + alphamax = interior ? toedge(x, s, constraints) : convert(T,Inf) + + # Pick the initial step size (HZ #I1-I2). Even though we might + # guess alpha=1 for l_bfgs most of the time, testing suggests + # this is still usually a good idea (fewer total function and + # gradient evaluations). + alpha, mayterminate, f_update, g_update = + alphatry(alpha, d, x, s, x_ls, gr_ls, lsr, constraints, alphamax) + f_calls, g_calls = f_calls + f_update, g_calls + g_update + # Determine the distance of movement along the search line alpha, f_update, g_update = - linesearch!(d, x, s, x_ls, gr_ls, lsr, alpha, mayterminate) + linesearch!(d, x, s, x_ls, gr_ls, lsr, alpha, mayterminate, constraints, alphamax) f_calls, g_calls = f_calls + f_update, g_calls + g_update # Maintain a record of previous position @@ -188,7 +212,7 @@ function l_bfgs{T}(d::Union(DifferentiableFunction, end # Update the L-BFGS history of positions and gradients - rho_iteration = 1 / dot(dx, dgr) + rho_iteration = 1 / vecdot(dx, dgr) if isinf(rho_iteration) # TODO: Introduce a formal error? There was a warning here previously break @@ -210,12 +234,32 @@ function l_bfgs{T}(d::Union(DifferentiableFunction, grtol) @lbfgstrace + + # If we think we've converged, restart and make sure we don't + # have another long descent. This should catch the case where + # dphi < 0 "by a hair," meaning that the chosen search direction + # happened to be nearly orthogonoal to the gradient. + if converged + if iteration <= 3 || converged_iteration >= 0 + if iteration <= max(m, 3) + break # we converged quickly after previous restart + end + converged_iteration += iteration + else + converged_iteration = iteration + end + converged = false + iteration = 0 # the next search direction will be -gr + end end + if converged_iteration >= 0 + iteration += converged_iteration + end return MultivariateOptimizationResults("L-BFGS", initial_x, x, - float64(f_x), + @compat(Float64(f_x)), iteration, iteration == iterations, x_converged, diff --git a/src/levenberg_marquardt.jl b/src/levenberg_marquardt.jl index 178b585ee..69e001811 100644 --- a/src/levenberg_marquardt.jl +++ b/src/levenberg_marquardt.jl @@ -38,7 +38,7 @@ function levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12 # Maintain a trace of the system. tr = OptimizationTrace() if show_trace - d = {"lambda" => lambda} + d = @compat Dict{Any,Any}("lambda" => lambda) os = OptimizationState(iterCt, sse(fcur), NaN, d) push!(tr, os) println(os) @@ -87,7 +87,7 @@ function levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12 # show state if show_trace gradnorm = norm(J'*fcur, Inf) - d = {"g(x)" => gradnorm, "dx" => delta_x, "lambda" => lambda} + d = @compat Dict{Any,Any}("g(x)" => gradnorm, "dx" => delta_x, "lambda" => lambda) os = OptimizationState(iterCt, sse(fcur), gradnorm, d) push!(tr, os) println(os) diff --git a/src/linesearch/backtracking_linesearch.jl b/src/linesearch/backtracking_linesearch.jl index c8e498601..ed4810a60 100644 --- a/src/linesearch/backtracking_linesearch.jl +++ b/src/linesearch/backtracking_linesearch.jl @@ -1,5 +1,5 @@ -function backtracking_linesearch!{T}(d::Union(DifferentiableFunction, - TwiceDifferentiableFunction), +function backtracking_linesearch!{T}(d::Union{DifferentiableFunction, + TwiceDifferentiableFunction}, x::Vector{T}, s::Vector, x_scratch::Vector, diff --git a/src/linesearch/hz_linesearch.jl b/src/linesearch/hz_linesearch.jl index 815a1112a..7282780e8 100644 --- a/src/linesearch/hz_linesearch.jl +++ b/src/linesearch/hz_linesearch.jl @@ -1,7 +1,7 @@ # phi = (galpha, alpha) -> linefunc(galpha, alpha, func, x, d, xtmp, g) # Dot product of two "vectors", even if they don't have a vector shape -function dot(x::Array, y::Array) +function vecdot(x::Array, y::Array) d = x[1] * y[1] for i = 2:length(x) d += x[i] * y[i] @@ -10,11 +10,11 @@ function dot(x::Array, y::Array) end # Vector-norm-squared, even if it doesn't have a vector shape -norm2(x::Array) = dot(x, x) +vecnorm2(x::Array) = vecdot(x, x) # Display flags are represented as a bitfield # (not exported, but can use via OptimizeMod.ITER, for example) -const one64 = convert(Uint64, 1) +const one64 = convert(UInt64, 1) const FINAL = one64 const ITER = one64 << 1 const PARAMETERS = one64 << 2 @@ -35,23 +35,17 @@ display_nextbit = 14 # paper (these may or may not be extensions of the cg_descent code # that can be downloaded from Hager's site; his code has undergone # numerous revisions since publication of the paper): -# cgdescent: the termination condition employs a "unit-correct" -# expression rather than a condition on gradient -# components---whether this is a good or bad idea will require -# additional experience, but preliminary evidence seems to suggest -# that it makes "reasonable" choices over a wider range of problem -# types. -# linesearch: the Wolfe conditions are checked only after alpha is +# - The Wolfe conditions are checked only after alpha is # generated either by quadratic interpolation or secant # interpolation, not when alpha is generated by bisection or # expansion. This increases the likelihood that alpha will be a # good approximation of the minimum. -# linesearch: In step I2, we multiply by psi2 only if the convexity +# - In step I2, we multiply by psi2 only if the convexity # test failed, not if the function-value test failed. This # prevents one from going uphill further when you already know # you're already higher than the point at alpha=0. -# both: checks for Inf/NaN function values -# both: support maximum value of alpha (equivalently, c). This +# - Checks for Inf/NaN function values +# - Support maximum value of alpha (equivalently, c). This # facilitates using these routines for constrained minimization # when you can calculate the distance along the path to the # disallowed region. (When you can't easily calculate that @@ -62,9 +56,10 @@ display_nextbit = 14 # which a finite value will be returned. See, e.g., limits_box # below. The default value for alphamax is Inf. See alphamaxfunc # for cgdescent and alphamax for linesearch_hz. +# - Support for projected-gradient methods const DEFAULTDELTA = 0.1 -const DEFAULTSIGMA = 0.9 +const DEFAULTSIGMA = 0.9 # Generate initial guess for step size (HZ, stage I0) function alphainit{T}(alpha::Real, @@ -90,19 +85,20 @@ end # Code used to use phi, a 1-parameter function induced by f and s # Need to pass s as an explicit parameter function alphatry{T}(alpha::T, - d::Union(DifferentiableFunction, - TwiceDifferentiableFunction), + d::Union{DifferentiableFunction, + TwiceDifferentiableFunction}, x::Array, s::Array, xtmp::Array, gtmp::Array, lsr::LineSearchResults, + constraints::AbstractConstraints = ConstraintsNone(), + alphamax::Real = convert(T,Inf), psi1::Real = convert(T,0.2), psi2::Real = convert(T,2), psi3::Real = convert(T,0.1), - iterfinitemax::Integer = iceil(-log2(eps(T))), - alphamax::Real = inf(T), - display::Integer = 0) + iterfinitemax::Integer = ceil(Int,-log2(eps(T))), + detailed_trace::Integer = 0) f_calls = 0 g_calls = 0 @@ -110,17 +106,16 @@ function alphatry{T}(alpha::T, dphi0 = lsr.slope[1] alphatest = psi1 * alpha + alphamax = min(alphamax, tocorner(x, s, constraints)) alphatest = min(alphatest, alphamax) - # Use xtmp here - phitest = d.f(x + alphatest * s) + phitest = d.f(step!(xtmp, x, s, alphatest, constraints)) f_calls += 1 iterfinite = 1 while !isfinite(phitest) alphatest = psi3 * alphatest - # Use xtmp here - phitest = d.f(x + alphatest * s) + phitest = d.f(step!(xtmp, x, s, alphatest, constraints)) f_calls += 1 lsr.nfailures += 1 iterfinite += 1 @@ -130,7 +125,7 @@ function alphatry{T}(alpha::T, end end a = ((phitest-phi0)/alphatest - dphi0)/alphatest # quadratic fit - if display & ALPHAGUESS > 0 + if detailed_trace & ALPHAGUESS > 0 println("quadfit: alphatest = ", alphatest, ", phi0 = ", phi0, ", phitest = ", phitest, @@ -148,7 +143,7 @@ function alphatry{T}(alpha::T, alpha = alphamax mayterminate = false end - if display & ALPHAGUESS > 0 + if detailed_trace & ALPHAGUESS > 0 println("alpha guess (quadratic): ", alpha, ",(mayterminate = ", mayterminate, ")") end @@ -160,14 +155,14 @@ function alphatry{T}(alpha::T, end end alpha = min(alphamax, alpha) - if display & ALPHAGUESS > 0 + if detailed_trace & ALPHAGUESS > 0 println("alpha guess (expand): ", alpha) end return alpha, mayterminate, f_calls, g_calls end -function hz_linesearch!{T}(df::Union(DifferentiableFunction, - TwiceDifferentiableFunction), +function hz_linesearch!{T}(df::Union{DifferentiableFunction, + TwiceDifferentiableFunction}, x::Array{T}, s::Array, xtmp::Array, @@ -175,29 +170,32 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, lsr::LineSearchResults{T}, c::Real, mayterminate::Bool, + constraints::AbstractConstraints = ConstraintsNone(), + alphamax::Real = convert(T,Inf), delta::Real = DEFAULTDELTA, sigma::Real = DEFAULTSIGMA, - alphamax::Real = inf(T), rho::Real = convert(T,5), epsilon::Real = convert(T,1e-6), gamma::Real = convert(T,0.66), - linesearchmax::Integer = 50, + linesearchmax::Integer = 100, psi3::Real = convert(T,0.1), - iterfinitemax::Integer = iceil(-log2(eps(T))), - display::Integer = 0) - if display & LINESEARCH > 0 + iterfinitemax::Integer = (@compat ceil(Integer, -log2(eps(T)))), + detailed_trace::Integer = 0) + if detailed_trace & LINESEARCH > 0 println("New linesearch") end f_calls = 0 g_calls = 0 + alphamax = min(alphamax, tocorner(x, s, constraints)) + phi0 = lsr.value[1] dphi0 = lsr.slope[1] (isfinite(phi0) && isfinite(dphi0)) || error("Initial value and slope must be finite") philim = phi0 + epsilon * abs(phi0) @assert c > 0 @assert isfinite(c) && c <= alphamax - phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true) + phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints) f_calls += f_up g_calls += g_up iterfinite = 1 @@ -206,7 +204,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, lsr.nfailures += 1 iterfinite += 1 c *= psi3 - phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true) + phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints) f_calls += f_up g_calls += g_up end @@ -219,7 +217,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, # satisfies the Wolfe conditions if mayterminate && satisfies_wolfe(c, phic, dphic, phi0, dphi0, philim, delta, sigma) - if display & LINESEARCH > 0 + if detailed_trace & LINESEARCH > 0 println("Wolfe condition satisfied on point alpha = ", c) end return c, f_calls, g_calls # phic @@ -232,7 +230,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, iter = 1 cold = -one(T) while !isbracketed && iter < linesearchmax - if display & BRACKET > 0 + if detailed_trace & BRACKET > 0 println("bracketing: ia = ", ia, ", ib = ", ib, ", c = ", c, @@ -259,7 +257,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, error("c = ", c, ", lsr = ", lsr) end # ia, ib = bisect(phi, lsr, ia, ib, philim) # TODO: Pass options - ia, ib, f_up, g_up = bisect!(df, x, s, xtmp, g, lsr, ia, ib, philim, display) + ia, ib, f_up, g_up = bisect!(df, x, s, xtmp, g, lsr, ia, ib, philim, constraints, detailed_trace) f_calls += f_up g_calls += g_up isbracketed = true @@ -269,7 +267,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, c *= rho if c > alphamax c = (alphamax + cold)/2 - if display & BRACKET > 0 + if detailed_trace & BRACKET > 0 println("bracket: exceeding alphamax, bisecting: alphamax = ", alphamax, ", cold = ", cold, ", new c = ", c) end @@ -277,7 +275,7 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, return cold, f_calls, g_calls end end - phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true) + phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints) f_calls += f_up g_calls += g_up iterfinite = 1 @@ -285,11 +283,11 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, alphamax = c lsr.nfailures += 1 iterfinite += 1 - if display & BRACKET > 0 + if detailed_trace & BRACKET > 0 println("bracket: non-finite value, bisection") end c = (cold + c) / 2 - phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true) + phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints) f_calls += f_up g_calls += g_up end @@ -317,20 +315,21 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, end while iter < linesearchmax a = lsr.alpha[ia] - b = lsr.alpha[ib] + b = lsr.alpha[ib] @assert b > a - if display & LINESEARCH > 0 + if detailed_trace & LINESEARCH > 0 println("linesearch: ia = ", ia, ", ib = ", ib, ", a = ", a, ", b = ", b, ", phi(a) = ", lsr.value[ia], ", phi(b) = ", lsr.value[ib]) + println("phi(a) == phi(b) ? ", lsr.value[ia] == lsr.value[ib]) end if b - a <= eps(b) return a, f_calls, g_calls # lsr.value[ia] end - iswolfe, iA, iB, f_up, g_up = secant2!(df, x, s, xtmp, g, lsr, ia, ib, philim, delta, sigma, display) + iswolfe, iA, iB, f_up, g_up = secant2!(df, x, s, xtmp, g, lsr, ia, ib, philim, constraints, delta, sigma, detailed_trace) f_calls += f_up g_calls += g_up if iswolfe @@ -340,12 +339,12 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, B = lsr.alpha[iB] @assert B > A if B - A < gamma * (b - a) - if display & LINESEARCH > 0 + if detailed_trace & LINESEARCH > 0 println("Linesearch: secant succeeded") end if nextfloat(lsr.value[ia]) >= lsr.value[ib] && nextfloat(lsr.value[iA]) >= lsr.value[iB] # It's so flat, secant didn't do anything useful, time to quit - if display & LINESEARCH > 0 + if detailed_trace & LINESEARCH > 0 println("Linesearch: secant suggests it's flat") end return A, f_calls, g_calls @@ -354,23 +353,24 @@ function hz_linesearch!{T}(df::Union(DifferentiableFunction, ib = iB else # Secant is converging too slowly, use bisection - if display & LINESEARCH > 0 + if detailed_trace & LINESEARCH > 0 println("Linesearch: secant failed, using bisection") end c = (A + B) / convert(T, 2) # phic = phi(gphi, c) # TODO: Replace - phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true) + phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints) f_calls += f_up g_calls += g_up @assert isfinite(phic) && isfinite(dphic) push!(lsr, c, phic, dphic) # ia, ib = update(phi, lsr, iA, iB, length(lsr), philim) # TODO: Pass options - ia, ib, f_up, g_up = update!(df, x, s, xtmp, g, lsr, iA, iB, length(lsr), philim, display) + ia, ib, f_up, g_up = update!(df, x, s, xtmp, g, lsr, iA, iB, length(lsr), philim, constraints, detailed_trace) f_calls += f_up g_calls += g_up end iter += 1 end + @show lsr error("Linesearch failed to converge") end @@ -398,8 +398,8 @@ function secant(lsr::LineSearchResults, ia::Integer, ib::Integer) return secant(lsr.alpha[ia], lsr.alpha[ib], lsr.slope[ia], lsr.slope[ib]) end # phi -function secant2!{T}(df::Union(DifferentiableFunction, - TwiceDifferentiableFunction), +function secant2!{T}(df::Union{DifferentiableFunction, + TwiceDifferentiableFunction}, x::Array, s::Array, xtmp::Array, @@ -408,9 +408,10 @@ function secant2!{T}(df::Union(DifferentiableFunction, ia::Integer, ib::Integer, philim::Real, + constraints::AbstractConstraints = ConstraintsNone(), delta::Real = DEFAULTDELTA, sigma::Real = DEFAULTSIGMA, - display::Integer = 0) + detailed_trace::Integer = 0) f_calls = 0 g_calls = 0 @@ -420,31 +421,30 @@ function secant2!{T}(df::Union(DifferentiableFunction, b = lsr.alpha[ib] dphia = lsr.slope[ia] dphib = lsr.slope[ib] - @assert dphia < 0 - @assert dphib >= 0 + @assert dphia < 0 && isfinite(dphia) + @assert dphib >= 0 && isfinite(dphib) c = secant(a, b, dphia, dphib) - if display & SECANT2 > 0 + if detailed_trace & SECANT2 > 0 println("secant2: a = ", a, ", b = ", b, ", c = ", c) end @assert isfinite(c) # phic = phi(tmpc, c) # Replace - phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true) + phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints) f_calls += f_up g_calls += g_up - @assert isfinite(phic) && isfinite(dphic) push!(lsr, c, phic, dphic) ic = length(lsr) if satisfies_wolfe(c, phic, dphic, phi0, dphi0, philim, delta, sigma) - if display & SECANT2 > 0 + if detailed_trace & SECANT2 > 0 println("secant2: first c satisfied Wolfe conditions") end return true, ic, ic, f_calls, g_calls end # iA, iB = update(phi, lsr, ia, ib, ic, philim) - iA, iB, f_up, g_up = update!(df, x, s, xtmp, g, lsr, ia, ib, ic, philim, display) + iA, iB, f_up, g_up = update!(df, x, s, xtmp, g, lsr, ia, ib, ic, philim, constraints, detailed_trace) f_calls += f_up g_calls += g_up - if display & SECANT2 > 0 + if detailed_trace & SECANT2 > 0 println("secant2: iA = ", iA, ", iB = ", iB, ", ic = ", ic) end a = lsr.alpha[iA] @@ -458,11 +458,11 @@ function secant2!{T}(df::Union(DifferentiableFunction, c = secant(lsr, ia, iA) end if a <= c <= b - if display & SECANT2 > 0 + if detailed_trace & SECANT2 > 0 println("secant2: second c = ", c) end # phic = phi(tmpc, c) # TODO: Replace - phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true) + phic, dphic, f_up, g_up = linefunc!(df, x, s, c, xtmp, g, true, constraints) f_calls += f_up g_calls += g_up @assert isfinite(phic) && isfinite(dphic) @@ -470,16 +470,16 @@ function secant2!{T}(df::Union(DifferentiableFunction, ic = length(lsr) # Check arguments here if satisfies_wolfe(c, phic, dphic, phi0, dphi0, philim, delta, sigma) - if display & SECANT2 > 0 + if detailed_trace & SECANT2 > 0 println("secant2: second c satisfied Wolfe conditions") end return true, ic, ic, f_calls, g_calls end - iA, iB, f_up, g_up = update!(df, x, s, xtmp, g, lsr, iA, iB, ic, philim, display) + iA, iB, f_up, g_up = update!(df, x, s, xtmp, g, lsr, iA, iB, ic, philim, constraints, detailed_trace) f_calls += f_up g_calls += g_up end - if display & SECANT2 > 0 + if detailed_trace & SECANT2 > 0 println("secant2 output: a = ", lsr.alpha[iA], ", b = ", lsr.alpha[iB]) end return false, iA, iB, f_calls, g_calls @@ -489,8 +489,8 @@ end # Given a third point, pick the best two that retain the bracket # around the minimum (as defined by HZ, eq. 29) # b will be the upper bound, and a the lower bound -function update!(df::Union(DifferentiableFunction, - TwiceDifferentiableFunction), +function update!(df::Union{DifferentiableFunction, + TwiceDifferentiableFunction}, x::Array, s::Array, xtmp::Array, @@ -500,7 +500,8 @@ function update!(df::Union(DifferentiableFunction, ib::Integer, ic::Integer, philim::Real, - display::Integer = 0) + constraints::AbstractConstraints = ConstraintsNone(), + detailed_trace::Integer = 0) a = lsr.alpha[ia] b = lsr.alpha[ib] # Debugging (HZ, eq. 4.4): @@ -511,7 +512,7 @@ function update!(df::Union(DifferentiableFunction, c = lsr.alpha[ic] phic = lsr.value[ic] dphic = lsr.slope[ic] - if display & UPDATE > 0 + if detailed_trace & UPDATE > 0 println("update: ia = ", ia, ", a = ", a, ", ib = ", ib, @@ -536,12 +537,12 @@ function update!(df::Union(DifferentiableFunction, end # phic is bigger than phi0, which implies that the minimum # lies between a and c. Find it via bisection. - return bisect!(df, x, s, xtmp, g, lsr, ia, ic, philim, display) + return bisect!(df, x, s, xtmp, g, lsr, ia, ic, philim, constraints, detailed_trace) end # HZ, stage U3 (with theta=0.5) -function bisect!{T}(df::Union(DifferentiableFunction, - TwiceDifferentiableFunction), +function bisect!{T}(df::Union{DifferentiableFunction, + TwiceDifferentiableFunction}, x::Array, s::Array, xtmp::Array, @@ -550,27 +551,32 @@ function bisect!{T}(df::Union(DifferentiableFunction, ia::Integer, ib::Integer, philim::Real, - display::Integer = 0) + constraints::AbstractConstraints = ConstraintsNone(), + detailed_trace::Integer = 0) f_calls = 0 g_calls = 0 - gphi = nan(T) + gphi = convert(T,NaN) a = lsr.alpha[ia] b = lsr.alpha[ib] # Debugging (HZ, conditions shown following U3) @assert lsr.slope[ia] < 0 @assert lsr.value[ia] <= philim - @assert lsr.slope[ib] < 0 # otherwise we wouldn't be here + # if !(lsr.slope[ib] < 0) + # @show a, b + # @show lsr.value[ia], lsr.value[ib] + # @show lsr.slope[ia], lsr.slope[ib] + # end + # @assert lsr.slope[ib] < 0 # otherwise we wouldn't be here @assert lsr.value[ib] > philim @assert b > a while b - a > eps(b) - if display & BISECT > 0 + if detailed_trace & BISECT > 0 println("bisect: a = ", a, ", b = ", b, ", b - a = ", b - a) end d = (a + b) / convert(T, 2) - phid, gphi, f_up, g_up = linefunc!(df, x, s, d, xtmp, g, true) + phid, gphi, f_up, g_up = linefunc!(df, x, s, d, xtmp, g, true, constraints) f_calls += f_up g_calls += g_up - @assert isfinite(phid) && isfinite(gphi) push!(lsr, d, phid, gphi) id = length(lsr) if gphi >= 0 @@ -588,26 +594,25 @@ function bisect!{T}(df::Union(DifferentiableFunction, end # Define one-parameter function for line searches -function linefunc!(df::Union(DifferentiableFunction, - TwiceDifferentiableFunction), - x::Array, - s::Array, - alpha::Real, - xtmp::Array, - g::Array, - calc_grad::Bool) +function linefunc!{T}(df::Union{DifferentiableFunction, + TwiceDifferentiableFunction}, + x::Array{T}, + s::Array, + alpha::Real, + xtmp::Array, + g::Array, + calc_grad::Bool, + constraints::AbstractConstraints = ConstraintsNone()) f_calls = 0 g_calls = 0 - for i = 1:length(x) - xtmp[i] = x[i] + alpha * s[i] - end - gphi = nan(eltype(g)) + step!(xtmp, x, s, alpha, constraints) + gphi = convert(eltype(g),NaN) if calc_grad val = df.fg!(xtmp, g) f_calls += 1 g_calls += 1 if isfinite(val) - gphi = dot(g, s) + gphi = vecdot(g, s) end else val = df.f(xtmp) diff --git a/src/linesearch/interpolating_linesearch.jl b/src/linesearch/interpolating_linesearch.jl index 2c3486c4e..f17e12176 100644 --- a/src/linesearch/interpolating_linesearch.jl +++ b/src/linesearch/interpolating_linesearch.jl @@ -1,8 +1,8 @@ # TODO: Optimize for fg! calls # TODO: Implement safeguards -function interpolating_linesearch!{T}(d::Union(DifferentiableFunction, - TwiceDifferentiableFunction), +function interpolating_linesearch!{T}(d::Union{DifferentiableFunction, + TwiceDifferentiableFunction}, x::Vector, p::Vector, x_new::Vector, diff --git a/src/linesearch/mt_linesearch.jl b/src/linesearch/mt_linesearch.jl index a8d4511fa..b4b488e01 100644 --- a/src/linesearch/mt_linesearch.jl +++ b/src/linesearch/mt_linesearch.jl @@ -132,8 +132,8 @@ # TODO: Decide whether to update x, f, g and info # or just return step and nfev and let existing code do its job -function mt_linesearch!{T}(fcn::Union(DifferentiableFunction, - TwiceDifferentiableFunction), +function mt_linesearch!{T}(fcn::Union{DifferentiableFunction, + TwiceDifferentiableFunction}, x::Vector, s::Vector, new_x::Vector, diff --git a/src/momentum_gradient_descent.jl b/src/momentum_gradient_descent.jl index 927e451ac..eca0f9944 100644 --- a/src/momentum_gradient_descent.jl +++ b/src/momentum_gradient_descent.jl @@ -125,7 +125,7 @@ function momentum_gradient_descent{T}(d::DifferentiableFunction, return MultivariateOptimizationResults("Momentum Gradient Descent", initial_x, x, - float64(f_x), + @compat(Float64(f_x)), iteration, iteration == iterations, x_converged, diff --git a/src/nelder_mead.jl b/src/nelder_mead.jl index f29afa619..9dcf8280f 100644 --- a/src/nelder_mead.jl +++ b/src/nelder_mead.jl @@ -69,6 +69,7 @@ function nelder_mead{T}(f::Function, for i in 1:n @inbounds y[i] = f(p[:, i]) end + any(isfinite(y)) || error("At least one of the starting points must have finite penalty") f_calls += n f_x_previous, f_x = NaN, nmobjective(y, m, n) @@ -186,7 +187,7 @@ function nelder_mead{T}(f::Function, return MultivariateOptimizationResults("Nelder-Mead", initial_x, minimum, - float64(f(minimum)), + @compat(Float64(f(minimum))), iteration, iteration == iterations, false, diff --git a/src/newton.jl b/src/newton.jl index 64cc33370..8f8245b50 100644 --- a/src/newton.jl +++ b/src/newton.jl @@ -130,7 +130,7 @@ function newton{T}(d::TwiceDifferentiableFunction, return MultivariateOptimizationResults("Newton's Method", initial_x, x, - float64(f_x), + @compat(Float64(f_x)), iteration, iteration == iterations, x_converged, diff --git a/src/nnls.jl b/src/nnls.jl index f34f0ffd0..239e40389 100644 --- a/src/nnls.jl +++ b/src/nnls.jl @@ -6,8 +6,8 @@ function nnls(A::AbstractMatrix, b::AbstractVector) x = fill(one(T), size(A, 2)) # Set up constraints l = zeros(eltype(x), length(x)) - u = fill(inf(eltype(x)), length(x)) - # Perform the optimization + u = fill(convert(eltype(x),Inf), length(x)) + # Perform the optimization func = (x, g) -> nnlsobjective(x, g, A, b) df = DifferentiableFunction(x->func(x,nothing), func, func) fminbox(df, x, l, u, precondprep=(P, x, l, u, mu)->precondprepnnls(P, x, mu, a)) diff --git a/src/optimize.jl b/src/optimize.jl index 4ea952398..d2a3589d3 100644 --- a/src/optimize.jl +++ b/src/optimize.jl @@ -1,6 +1,8 @@ function optimize(d::TwiceDifferentiableFunction, initial_x::Array; method::Symbol = :l_bfgs, + constraints::AbstractConstraints = ConstraintsNone(), + interior::Bool = false, xtol::Real = 1e-32, ftol::Real = 1e-8, grtol::Real = 1e-8, @@ -41,6 +43,8 @@ function optimize(d::TwiceDifferentiableFunction, elseif method == :cg cg(d, initial_x, + constraints = constraints, + interior = interior, xtol = xtol, ftol = ftol, grtol = grtol, @@ -52,7 +56,7 @@ function optimize(d::TwiceDifferentiableFunction, elseif method == :bfgs if bfgs_initial_invH == nothing bfgs_initial_invH = eye(length(initial_x)) - end + end bfgs(d, initial_x, xtol = xtol, @@ -67,6 +71,8 @@ function optimize(d::TwiceDifferentiableFunction, elseif method == :l_bfgs l_bfgs(d, initial_x, + constraints = constraints, + interior = interior, xtol = xtol, ftol = ftol, grtol = grtol, @@ -94,6 +100,8 @@ end function optimize(d::DifferentiableFunction, initial_x::Array; method::Symbol = :l_bfgs, + constraints::AbstractConstraints = ConstraintsNone(), + interior::Bool = false, xtol::Real = 1e-32, ftol::Real = 1e-8, grtol::Real = 1e-8, @@ -134,6 +142,8 @@ function optimize(d::DifferentiableFunction, elseif method == :cg cg(d, initial_x, + constraints = constraints, + interior = interior, xtol = xtol, ftol = ftol, grtol = grtol, @@ -145,7 +155,7 @@ function optimize(d::DifferentiableFunction, elseif method == :bfgs if bfgs_initial_invH == nothing bfgs_initial_invH = eye(length(initial_x)) - end + end bfgs(d, initial_x, xtol = xtol, @@ -160,6 +170,8 @@ function optimize(d::DifferentiableFunction, elseif method == :l_bfgs l_bfgs(d, initial_x, + constraints = constraints, + interior = interior, xtol = xtol, ftol = ftol, grtol = grtol, @@ -259,7 +271,7 @@ function optimize(f::Function, elseif method == :bfgs if bfgs_initial_invH == nothing bfgs_initial_invH = eye(length(initial_x)) - end + end d = DifferentiableFunction(f, g!) bfgs(d, initial_x, @@ -362,7 +374,7 @@ function optimize(f::Function, elseif method == :bfgs if bfgs_initial_invH == nothing bfgs_initial_invH = eye(length(initial_x)) - end + end d = DifferentiableFunction(f, g!) bfgs(d, initial_x, @@ -469,7 +481,7 @@ function optimize(f::Function, elseif method == :bfgs if bfgs_initial_invH == nothing bfgs_initial_invH = eye(length(initial_x)) - end + end bfgs(d, initial_x, xtol = xtol, @@ -514,7 +526,7 @@ function optimize{T <: Real}(f::Function, @printf "Iter Function value Gradient norm \n" end if method == :brent - brent(f, float64(lower), float64(upper); + brent(f, @compat(Float64(lower)), @compat(Float64(upper)); rel_tol = rel_tol, abs_tol = abs_tol, iterations = iterations, @@ -522,7 +534,7 @@ function optimize{T <: Real}(f::Function, show_trace = show_trace, extended_trace = extended_trace) elseif method == :golden_section - golden_section(f, float64(lower), float64(upper); + golden_section(f, @compat(Float64(lower)), @compat(Float64(upper)); rel_tol = rel_tol, abs_tol = abs_tol, iterations = iterations, diff --git a/src/problems/constrained.jl b/src/problems/constrained.jl new file mode 100644 index 000000000..7683e08ff --- /dev/null +++ b/src/problems/constrained.jl @@ -0,0 +1,77 @@ +module ConstrainedProblems + +using Optim + +immutable ConstrainedProblem{C<:Optim.AbstractConstraints} + name::ASCIIString + f::Function + g!::Function + h!::Function + constraints::C + initial_x::Vector{Float64} + solutions::Vector + isdifferentiable::Bool + istwicedifferentiable::Bool +end + +pexamples = Dict{ASCIIString, ConstrainedProblem}() + +##################################### +### +### Minimum distance, box constraints +### +##################################### + +function sqrdist(x) + dx1 = x[1] - 5 + dx2 = x[2] - 3 + (dx1*dx1 + dx2*dx2)/2 +end + +function sqrdist_gradient!(x, g) + g[1] = x[1] - 5 + g[2] = x[2] - 3 +end + +function sqrdist_hessian!(x, H) + H[1,1] = H[2,2] = 1 + H[1,2] = H[2,1] = 0 +end + +constraints = Optim.ConstraintsBox([-2.0,-2.0],[2.0,2.0]) + +pexamples["SqrdistBox"] = ConstrainedProblem("SqrdistBox", + sqrdist, + sqrdist_gradient!, + sqrdist_hessian!, + constraints, + [0.0, 0.0], + Any[[2.0, 2.0]], + true, + true) + +constraints = Optim.ConstraintsBox(nothing,[2.0,2.0]) + +pexamples["SqrdistLwr"] = ConstrainedProblem("SqrdistLwr", + sqrdist, + sqrdist_gradient!, + sqrdist_hessian!, + constraints, + [0.0, 0.0], + Any[[2.0, 2.0]], + true, + true) + +constraints = Optim.ConstraintsBox([-2.0,-2.0],nothing) + +pexamples["SqrdistLwr"] = ConstrainedProblem("SqrdistLwr", + sqrdist, + sqrdist_gradient!, + sqrdist_hessian!, + constraints, + [0.0, 0.0], + Any[[5.0, 3.0]], + true, + true) + +end # module diff --git a/src/problems/unconstrained.jl b/src/problems/unconstrained.jl index 7fbcf3c50..9d12d2ae1 100644 --- a/src/problems/unconstrained.jl +++ b/src/problems/unconstrained.jl @@ -40,7 +40,7 @@ examples["Exponential"] = OptimizationProblem("Exponential", exponential_gradient!, exponential_hessian!, [0.0, 0.0], - {[2.0, 3.0]}, + Any[[2.0, 3.0]], true, true) @@ -78,7 +78,7 @@ examples["Fletcher-Powell"] = OptimizationProblem("Fletcher-Powell", fletcher_powell_gradient!, fletcher_powell_hessian!, [0.0, 0.0, 0.0], - {[0.0, 0.0, 0.0]}, # TODO: Fix + Any[[0.0, 0.0, 0.0]], # TODO: Fix false, false) @@ -109,7 +109,7 @@ examples["Himmelbrau"] = OptimizationProblem("Himmelbrau", himmelbrau_gradient!, himmelbrau_hessian!, [0.0, 0.0], - {[1.0, 0.0]}, # TODO: Fix + Any[[1.0, 0.0]], # TODO: Fix true, false) ########################################################################## @@ -138,7 +138,7 @@ examples["Hosaki"] = OptimizationProblem("Hosaki", hosaki_gradient!, hosaki_hessian!, [0.0, 0.0], - {[4.0, 2.0]}, + Any[[4.0, 2.0]], false, false) @@ -180,7 +180,7 @@ examples["Large Polynomial"] = OptimizationProblem("Large Polynomial", large_polynomial_gradient!, large_polynomial_hessian!, zeros(250), - {float([1:250])}, + Any[float([1:250;])], true, true) @@ -220,7 +220,7 @@ examples["Parabola"] = OptimizationProblem("Parabola", parabola_gradient!, parabola_hessian!, [0.0, 0.0, 0.0, 0.0, 0.0], - {[1.0, 2.0, 3.0, 5.0, 8.0]}, + Any[[1.0, 2.0, 3.0, 5.0, 8.0]], true, true) @@ -257,7 +257,7 @@ examples["Polynomial"] = OptimizationProblem("Polynomial", polynomial_gradient!, polynomial_hessian!, [0.0, 0.0, 0.0], - {[10.0, 7.0, 108.0]}, + Any[[10.0, 7.0, 108.0]], true, true) @@ -268,7 +268,7 @@ examples["Polynomial"] = OptimizationProblem("Polynomial", ########################################################################## function powell(x::Vector) - return (x[1] + 10.0 * x[2])^2 + 5.0 * (x[3] - x[4])^2 + + return (x[1] + 10.0 * x[2])^2 + 5.0 * (x[3] - x[4])^2 + (x[2] - 2.0 * x[3])^4 + 10.0 * (x[1] - x[4])^4 end @@ -303,7 +303,7 @@ examples["Powell"] = OptimizationProblem("Powell", powell_gradient!, powell_hessian!, [3.0, -1.0, 0.0, 1.0], - {[0.0, 0.0, 0.0, 0.0]}, # TODO: Fix + Any[[0.0, 0.0, 0.0, 0.0]], # TODO: Fix true, true) @@ -334,7 +334,7 @@ examples["Rosenbrock"] = OptimizationProblem("Rosenbrock", rosenbrock_gradient!, rosenbrock_hessian!, [0.0, 0.0], - {[1.0, 1.0]}, + Any[[1.0, 1.0]], true, true) diff --git a/src/simulated_annealing.jl b/src/simulated_annealing.jl index 5fbb27764..879cae802 100644 --- a/src/simulated_annealing.jl +++ b/src/simulated_annealing.jl @@ -103,7 +103,7 @@ function simulated_annealing{T}(cost::Function, return MultivariateOptimizationResults("Simulated Annealing", initial_x, best_x, - float64(best_f_x), + @compat(Float64(best_f_x)), iterations, iteration == iterations, false, diff --git a/src/types.jl b/src/types.jl index 2b2dedb2e..3295c07bc 100644 --- a/src/types.jl +++ b/src/types.jl @@ -6,11 +6,11 @@ immutable OptimizationState end function OptimizationState(i::Integer, f::Real) - OptimizationState(int(i), float64(f), NaN, Dict()) + OptimizationState(int(i), @compat(Float64(f)), NaN, Dict()) end function OptimizationState(i::Integer, f::Real, g::Real) - OptimizationState(int(i), float64(f), float64(g), Dict()) + OptimizationState(int(i), @compat(Float64(f)), @compat(Float64(g)), Dict()) end immutable OptimizationTrace diff --git a/test/cg.jl b/test/cg.jl index fa99fd27a..22742be8a 100644 --- a/test/cg.jl +++ b/test/cg.jl @@ -1,18 +1,30 @@ using Optim -for (name, prob) in Optim.UnconstrainedProblems.examples - if prob.isdifferentiable - if name == "Himmelbrau" - continue - end - df = DifferentiableFunction(prob.f, prob.g!) - res = Optim.cg(df, prob.initial_x) - if length(prob.solutions) == 1 - @assert norm(res.minimum - prob.solutions[1]) < 1e-2 - end - end +for (name, prob) in UnconstrainedProblems.examples + if prob.isdifferentiable + if name == "Himmelbrau" + continue + end + df = DifferentiableFunction(prob.f, prob.g!) + res = Optim.cg(df, prob.initial_x) + if length(prob.solutions) == 1 + @assert norm(res.minimum - prob.solutions[1]) < 1e-2 + end + end end +for (name, prob) in ConstrainedProblems.pexamples + if prob.isdifferentiable + df = DifferentiableFunction(prob.f, prob.g!) + res = Optim.cg(df, prob.initial_x, constraints=prob.constraints) + if length(prob.solutions) == 1 + @assert norm(res.minimum - prob.solutions[1]) < 1e-2 + end + end +end + + +# Functions of arrays let objective(X, B) = sum((X.-B).^2)/2 diff --git a/test/constrained.jl b/test/constrained.jl index 8814afa2d..6a3b2d081 100644 --- a/test/constrained.jl +++ b/test/constrained.jl @@ -1,4 +1,4 @@ -import Optim +using Optim using Base.Test # Quadratic objective function @@ -55,3 +55,17 @@ A = [1.0 2.0; 3.0 4.0] b = [1.0,1.0] results = Optim.nnls(A, b) @test norm(results.minimum - [0,0.3]) < 1e-6 + +# 1d newton +let f(x) = (z = x[1]; -2z^2 + z^4), + g!(x, gr) = (z = x[1]; gr[1] = -4z + 4z^3; x), + h!(x, H) = (z = x[1]; H[1,1] = -4 + 12z^2; H), + objective = TwiceDifferentiableFunction(f, g!, (x,gr) -> (g!(x, gr); f(x)), h!), + constr = ConstraintsBox([-0.9],[0.9]) + + for x0 in (0.1,0.2,0.3,0.5,0.7) + x = [x0] + soln = interior(objective, x, constr, method=:newton) + @test_approx_eq_eps soln.minimum[1] 0.9 1e-6 + end +end diff --git a/test/grid_search.jl b/test/grid_search.jl index 5f34c4da0..ff0b4f7ad 100644 --- a/test/grid_search.jl +++ b/test/grid_search.jl @@ -1,4 +1,4 @@ -Optim.grid_search(x -> (1.0 - x)^2, [-1:0.1:1.0]) +Optim.grid_search(x -> (1.0 - x)^2, [-1:0.1:1.0;]) # Cartesian product over 2 dimensions. #grid_search(x -> (1.0 - x[1])^2 + (2.0 - x[2])^2, product[-1:0.1:1.0, -1:0.1:1.0]) diff --git a/test/momentum_gradient_descent.jl b/test/momentum_gradient_descent.jl index abe0fe7a3..e79c3bfb3 100644 --- a/test/momentum_gradient_descent.jl +++ b/test/momentum_gradient_descent.jl @@ -1,4 +1,4 @@ -p = Optim.UnconstrainedProblems.examples["Rosenbrock"] +p = UnconstrainedProblems.examples["Rosenbrock"] df = DifferentiableFunction(p.f, p.g!) diff --git a/test/runtests.jl b/test/runtests.jl index 85ba62b63..d20712504 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,6 +4,10 @@ using Optim +for path in Optim.testpaths + require(path) +end + my_tests = ["bfgs.jl", "gradient_descent.jl", "momentum_gradient_descent.jl", diff --git a/test/type_stability.jl b/test/type_stability.jl index da72b8956..56482cb8c 100644 --- a/test/type_stability.jl +++ b/test/type_stability.jl @@ -30,7 +30,7 @@ d3 = TwiceDifferentiableFunction(rosenbrock, for method in (:nelder_mead, :simulated_annealing) optimize(rosenbrock, [0.0,0,.0], method = method) - optimize(rosenbrock, float32([0.0,0.0]), method = method) + optimize(rosenbrock, [0.0f0,0.0f0], method = method) end for method in (:bfgs, @@ -40,10 +40,10 @@ for method in (:bfgs, # :accelerated_gradient_descent, :l_bfgs) optimize(d2, [0.0,0,.0], method = method) - optimize(d2, float32([0.0,0.0]), method = method) + optimize(d2, [0.0f0,0.0f0], method = method) end for method in (:newton,) optimize(d3, [0.0,0.0], method = method) - optimize(d3, float32([0.0,0.0]), method = method) + optimize(d3, [0.0f0,0.0f0], method = method) end