From 15d353b5c5af5146d2dd831461e0432b2832a4b3 Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH Date: Thu, 12 Sep 2024 11:58:07 -0400 Subject: [PATCH 01/38] Add R2N, R2DH and update LM --- src/LM_alg.jl | 20 ++- src/R2DH.jl | 297 +++++++++++++++++++++++++++++++++ src/R2N.jl | 292 ++++++++++++++++++++++++++++++++ src/RegularizedOptimization.jl | 2 + 4 files changed, 604 insertions(+), 7 deletions(-) create mode 100644 src/R2DH.jl create mode 100644 src/R2N.jl diff --git a/src/LM_alg.jl b/src/LM_alg.jl index 56bc5356..a23973a4 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -104,13 +104,15 @@ function LM( k = 0 Fobj_hist = zeros(maxIter) Hobj_hist = zeros(maxIter) + R = eltype(xk) + sqrt_ξ1_νInv = one(R) Complex_hist = zeros(Int, maxIter) Grad_hist = zeros(Int, maxIter) Resid_hist = zeros(Int, maxIter) if verbose > 0 #! format: off - @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√ξ1" "√ξ" "ρ" "σ" "‖x‖" "‖s‖" "‖Jₖ‖²" "reg" + @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√(ξ1/ν)" "√ξ" "ρ" "σ" "‖x‖" "‖s‖" "‖Jₖ‖²" "reg" #! format: on end @@ -176,14 +178,15 @@ function LM( prox!(s, ψ, ∇fk, ν) ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps() # TODO: isn't mk(s) returned by subsolver? ξ1 > 0 || error("LM: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + sqrt_ξ1_νInv = sqrt(ξ1 * νInv) if ξ1 ≥ 0 && k == 1 - ϵ_increment = ϵr * sqrt(ξ1) + ϵ_increment = ϵr * sqrt_ξ1_νInv ϵ += ϵ_increment # make stopping test absolute and relative ϵ_subsolver += ϵ_increment end - if sqrt(ξ1) < ϵ + if sqrt_ξ1_νInv < ϵ # the current xk is approximately first-order stationary optimal = true continue @@ -191,7 +194,8 @@ function LM( subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10)) subsolver_options.ν = ν - subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : () + #subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : () + subsolver_args = subsolver == R2DH ? (SpectralGradient(1., nls.meta.nvar),) : () @debug "setting inner stopping tolerance to" subsolver_options.optTol s, iter, _ = with_logger(subsolver_logger) do subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) @@ -221,7 +225,7 @@ function LM( if (verbose > 0) && (k % ptf == 0) #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1) sqrt(ξ) ρk σk norm(xk) norm(s) νInv σ_stat + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt_ξ1_νInv sqrt(ξ) ρk σk norm(xk) norm(s) νInv σ_stat #! format: off end @@ -244,6 +248,7 @@ function LM( jtprod_residual!(nls, xk, Fk, ∇fk) σmax = opnorm(Jk) + Complex_hist[k] += 1 end @@ -252,6 +257,7 @@ function LM( σk = σk * γ end νInv = (1 + θ) * (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ + tired = k ≥ maxIter || elapsed_time > maxTime end @@ -260,9 +266,9 @@ function LM( @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk elseif optimal #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1) sqrt(ξ1) "" σk norm(xk) norm(s) νInv + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt_ξ1_νInv sqrt(ξ1) "" σk norm(xk) norm(s) νInv #! format: on - @info "LM: terminating with √ξ1 = $(sqrt(ξ1))" + @info "LM: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" end end status = if optimal diff --git a/src/R2DH.jl b/src/R2DH.jl new file mode 100644 index 00000000..0e7157b0 --- /dev/null +++ b/src/R2DH.jl @@ -0,0 +1,297 @@ +export R2DH + +""" + R2DH(nlp, h, options) + R2DH(f, ∇f!, h, options, x0) + +A first-order quadratic regularization method for the problem + + min f(x) + h(x) + +where f: ℝⁿ → ℝ has a Lipschitz-continuous gradient, and h: ℝⁿ → ℝ is +lower semi-continuous, proper and prox-bounded. + +About each iterate xₖ, a step sₖ is computed as a solution of + + min φ(s; xₖ) + ψ(s; xₖ) + +where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ (σₖ+Dₖ) s (if `summation = true`) and φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ (σₖ+Dₖ) s (if `summation = false`) is a quadratic approximation of f about xₖ, +ψ(s; xₖ) = h(xₖ + s), ‖⋅‖ is a user-defined norm, Dₖ is a diagonal Hessian approximation +and σₖ > 0 is the regularization parameter. + +### Arguments + +* `nlp::AbstractDiagonalQNModel`: a smooth optimization problem +* `h`: a regularizer such as those defined in ProximalOperators +* `options::ROSolverOptions`: a structure containing algorithmic parameters +* `x0::AbstractVector`: an initial guess (in the second calling form) + +### Keyword Arguments + +* `x0::AbstractVector`: an initial guess (in the first calling form: default = `nlp.meta.x0`) +* `selected::AbstractVector{<:Integer}`: (default `1:length(x0)`). +* `Bk`: initial diagonal Hessian approximation (default: `(one(R) / options.ν) * I`). +* `summation`: boolean used to choose between the two versions of R2DH (see above, default : `true`). + +The objective and gradient of `nlp` will be accessed. + +In the second form, instead of `nlp`, the user may pass in + +* `f` a function such that `f(x)` returns the value of f at x +* `∇f!` a function to evaluate the gradient in place, i.e., such that `∇f!(g, x)` store ∇f(x) in `g` + +### Return values + +* `xk`: the final iterate +* `Fobj_hist`: an array with the history of values of the smooth objective +* `Hobj_hist`: an array with the history of values of the nonsmooth objective +* `Complex_hist`: an array with the history of number of inner iterations. +""" +function R2DH( + nlp::AbstractDiagonalQNModel{R, S}, + h, + options::ROSolverOptions{R}; + kwargs..., + ) where {R <: Real, S} + kwargs_dict = Dict(kwargs...) + x0 = pop!(kwargs_dict, :x0, nlp.meta.x0) + xk, k, outdict = R2DH( + x -> obj(nlp, x), + (g, x) -> grad!(nlp, x, g), + h, + hess_op(nlp, x0), + options, + x0; + kwargs..., + ) + ξ = outdict[:ξ] + stats = GenericExecutionStats(nlp) + set_status!(stats, outdict[:status]) + set_solution!(stats, xk) + set_objective!(stats, outdict[:fk] + outdict[:hk]) + set_residuals!(stats, zero(eltype(xk)), ξ) + set_iter!(stats, k) + set_time!(stats, outdict[:elapsed_time]) + set_solver_specific!(stats, :Fhist, outdict[:Fhist]) + set_solver_specific!(stats, :Hhist, outdict[:Hhist]) + set_solver_specific!(stats, :NonSmooth, outdict[:NonSmooth]) + set_solver_specific!(stats, :SubsolverCounter, outdict[:Chist]) + return stats + end + +function R2DH( + f::F, + ∇f!::G, + h::H, + D::DQN, + options::ROSolverOptions{R}, + x0::AbstractVector{R}; + Mmonotone::Int = 5, + selected::AbstractVector{<:Integer} = 1:length(x0), + summation::Bool = true, + kwargs..., +) where {F <: Function, G <: Function, H, R <: Real, DQN <: AbstractDiagonalQuasiNewtonOperator} + start_time = time() + elapsed_time = 0.0 + ϵ = options.ϵa + ϵr = options.ϵr + neg_tol = options.neg_tol + verbose = options.verbose + maxIter = options.maxIter + maxTime = options.maxTime + σmin = options.σmin + η1 = options.η1 + η2 = options.η2 + ν = options.ν + γ = options.γ + + local l_bound, u_bound + has_bnds = false + for (key, val) in kwargs + if key == :l_bound + l_bound = val + has_bnds = has_bnds || any(l_bound .!= R(-Inf)) + elseif key == :u_bound + u_bound = val + has_bnds = has_bnds || any(u_bound .!= R(Inf)) + end + end + + if verbose == 0 + ptf = Inf + elseif verbose == 1 + ptf = round(maxIter / 10) + elseif verbose == 2 + ptf = round(maxIter / 100) + else + ptf = 1 + end + + # initialize parameters + xk = copy(x0) + hk = h(xk[selected]) + if hk == Inf + verbose > 0 && @info "R2DH: finding initial guess where nonsmooth term is finite" + prox!(xk, h, x0, one(eltype(x0))) + hk = h(xk[selected]) + hk < Inf || error("prox computation must be erroneous") + verbose > 0 && @debug "R2DH: found point where h has value" hk + end + hk == -Inf && error("nonsmooth term is not proper") + + xkn = similar(xk) + s = zero(xk) + ψ = has_bnds ? shifted(h, xk, l_bound - xk, u_bound - xk, selected) : shifted(h, xk) + + Fobj_hist = zeros(maxIter) + Hobj_hist = zeros(maxIter) + FHobj_hist = fill!(Vector{R}(undef, Mmonotone), R(-Inf)) + Complex_hist = zeros(Int, maxIter) + if verbose > 0 + #! format: off + @info @sprintf "%6s %8s %8s %7s %8s %7s %7s %7s %1s" "iter" "f(x)" "h(x)" "√ξ" "ρ" "σ" "‖x‖" "‖s‖" "" + #! format: off + end + + local ξ + k = 0 + σk = summation ? σmin : max(1 / ν, σmin) + + fk = f(xk) + ∇fk = similar(xk) + ∇f!(∇fk, xk) + ∇fk⁻ = copy(∇fk) + spectral_test = isa(D, SpectralGradient) + D.d .= summation ? D.d .+ σk : D.d .* σk + DNorm = norm(D.d, Inf) + + + ν = 1 / DNorm + mν∇fk = -ν * ∇fk + sqrt_ξ_νInv = one(R) + + optimal = false + tired = maxIter > 0 && k ≥ maxIter || elapsed_time > maxTime + + while !(optimal || tired) + k = k + 1 + elapsed_time = time() - start_time + Fobj_hist[k] = fk + Hobj_hist[k] = hk + Mmonotone > 0 && (FHobj_hist[mod(k-1, Mmonotone) + 1] = fk + hk) + + D.d .= max.(D.d, eps(R)) + + + # model with diagonal hessian + φ(d) = ∇fk' * d + (d' * (D.d .* d)) / 2 + mk(d) = φ(d) + ψ(d) + + if spectral_test + prox!(s, ψ, mν∇fk, ν) + else + iprox!(s, ψ, ∇fk, D) + end + + # iprox!(s, ψ, ∇fk, D) + + Complex_hist[k] += 1 + xkn .= xk .+ s + fkn = f(xkn) + hkn = h(xkn[selected]) + hkn == -Inf && error("nonsmooth term is not proper") + + fhmax = Mmonotone > 0 ? maximum(FHobj_hist) : fk + hk + Δobj = fhmax - (fkn + hkn) + max(1, abs(fhmax)) * 10 * eps() + Δmod = fhmax - (fk + mk(s)) + max(1, abs(hk)) * 10 * eps() + ξ = hk - mk(s) + max(1, abs(hk)) * 10 * eps() + sqrt_ξ_νInv = ξ ≥ 0 ? sqrt(ξ / ν) : sqrt(-ξ / ν) + + if ξ ≥ 0 && k == 1 + ϵ += ϵr * sqrt_ξ_νInv # make stopping test absolute and relative + end + + if (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv < ϵ) + # the current xk is approximately first-order stationary + optimal = true + continue + end + + if (ξ ≤ 0 || isnan(ξ)) + error("R2DH: failed to compute a step: ξ = $ξ") + end + + ρk = Δobj / Δmod + + σ_stat = (η2 ≤ ρk < Inf) ? "↘" : (ρk < η1 ? "↗" : "=") + + if (verbose > 0) && (k % ptf == 0) + #! format: off + @info @sprintf "%6d %8.1e %8.1e %7.1e %8.1e %7.1e %7.1e %7.1e %1s" k fk hk sqrt_ξ_νInv ρk σk norm(xk) norm(s) σ_stat + #! format: on + end + + if η2 ≤ ρk < Inf + σk = max(σk / γ, σmin) + end + + if η1 ≤ ρk < Inf + xk .= xkn + has_bnds && set_bounds!(ψ, l_bound - xk, u_bound - xk) + fk = fkn + hk = hkn + shift!(ψ, xk) + ∇f!(∇fk, xk) + push!(D, s, ∇fk - ∇fk⁻) # update QN operator + DNorm = norm(D.d, Inf) + ∇fk⁻ .= ∇fk + end + + if ρk < η1 || ρk == Inf + σk = σk * γ + end + + D.d .= summation ? D.d .+ σk : D.d .* σk + DNorm = norm(D.d, Inf) + ν = 1 / DNorm + + tired = maxIter > 0 && k ≥ maxIter + if !tired + @. mν∇fk = -ν * ∇fk + end + end + + if verbose > 0 + if k == 1 + @info @sprintf "%6d %8.1e %8.1e" k fk hk + elseif optimal + #! format: off + @info @sprintf "%6d %8.1e %8.1e %7.1e %8s %7.1e %7.1e %7.1e" k fk hk sqrt_ξ_νInv "" σk norm(xk) norm(s) + #! format: on + @info "R2DH: terminating with √(ξ/ν) = $(sqrt_ξ_νInv))" + end + end + + status = if optimal + :first_order + elseif elapsed_time > maxTime + :max_time + elseif tired + :max_iter + else + :exception + end + outdict = Dict( + :Fhist => Fobj_hist[1:k], + :Hhist => Hobj_hist[1:k], + :Chist => Complex_hist[1:k], + :NonSmooth => h, + :status => status, + :fk => fk, + :hk => hk, + :ξ => ξ, + :elapsed_time => elapsed_time, + ) + + return xk, k, outdict +end diff --git a/src/R2N.jl b/src/R2N.jl new file mode 100644 index 00000000..b90b66cf --- /dev/null +++ b/src/R2N.jl @@ -0,0 +1,292 @@ +export R2N + +""" +R2N(nlp, h, χ, options; kwargs...) + +A regularized quasi-Newton method for the problem + + min f(x) + h(x) + +where f: ℝⁿ → ℝ has a Lipschitz-continuous Jacobian, and h: ℝⁿ → ℝ is +lower semi-continuous and proper. + +About each iterate xₖ, a step sₖ is computed as an approximate solution of + + min φ(s; xₖ) + ½ σₖ ‖s‖² + ψ(s; xₖ) + +where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ Bₖ s is a quadratic approximation of f about xₖ, +ψ(s; xₖ) = h(xₖ + s) and σₖ > 0 is the regularization parameter. +The subproblem is solved inexactly by way of a first-order method such as the proximal-gradient +method or the quadratic regularization method. + +### Arguments + +* `nlp::AbstractNLPModel`: a smooth optimization problem +* `h`: a regularizer such as those defined in ProximalOperators +* `options::ROSolverOptions`: a structure containing algorithmic parameters + +The objective, gradient and Hessian of `nlp` will be accessed. +The Hessian is accessed as an abstract operator and need not be the exact Hessian. + +### Keyword arguments + +* `x0::AbstractVector`: an initial guess (default: `nlp.meta.x0`) +* `subsolver_logger::AbstractLogger`: a logger to pass to the subproblem solver (default: the null logger) +* `subsolver`: the procedure used to compute a step (`PG` or `R2`) +* `subsolver_options::ROSolverOptions`: default options to pass to the subsolver (default: all defaut options) +* `selected::AbstractVector{<:Integer}`: (default `1:f.meta.nvar`). + +### Return values + +* `xk`: the final iterate +* `Fobj_hist`: an array with the history of values of the smooth objective +* `Hobj_hist`: an array with the history of values of the nonsmooth objective +* `Complex_hist`: an array with the history of number of inner iterations. +""" +function R2N( + f::AbstractNLPModel, + h::H, + options::ROSolverOptions{R}; + x0::AbstractVector = f.meta.x0, + subsolver_logger::Logging.AbstractLogger = Logging.NullLogger(), + subsolver = R2, + subsolver_options = ROSolverOptions(ϵa = options.ϵa), + selected::AbstractVector{<:Integer} = 1:(f.meta.nvar), +) where {H, R} + start_time = time() + elapsed_time = 0.0 + # initialize passed options + ϵ = options.ϵa + ϵ_subsolver_init = subsolver_options.ϵa + ϵ_subsolver = copy(ϵ_subsolver_init) + ϵr = options.ϵr + Δk = options.Δk + verbose = options.verbose + maxIter = options.maxIter + maxTime = options.maxTime + η1 = options.η1 + η2 = options.η2 + γ = options.γ + θ = options.θ + σmin = options.σmin + α = options.α + β = options.β + + # store initial values of the subsolver_options fields that will be modified + ν_subsolver = subsolver_options.ν + ϵa_subsolver = subsolver_options.ϵa + + local l_bound, u_bound + if has_bounds(f) + l_bound = f.meta.lvar + u_bound = f.meta.uvar + end + + if verbose == 0 + ptf = Inf + elseif verbose == 1 + ptf = round(maxIter / 10) + elseif verbose == 2 + ptf = round(maxIter / 100) + else + ptf = 1 + end + + # initialize parameters + #σk = max(1 / options.ν, σmin) #SVM + σk = σmin + xk = copy(x0) + hk = h(xk[selected]) + if hk == Inf + verbose > 0 && @info "R2N: finding initial guess where nonsmooth term is finite" + prox!(xk, h, x0, one(eltype(x0))) + hk = h(xk[selected]) + hk < Inf || error("prox computation must be erroneous") + verbose > 0 && @debug "R2N: found point where h has value" hk + end + hk == -Inf && error("nonsmooth term is not proper") + + xkn = similar(xk) + s = zero(xk) + ψ = has_bounds(f) ? shifted(h, xk, l_bound - xk, u_bound - xk, selected) : shifted(h, xk) + + Fobj_hist = zeros(maxIter) + Hobj_hist = zeros(maxIter) + Complex_hist = zeros(Int, maxIter) + if verbose > 0 + #! format: off + @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√(ξ1/ν)" "√ξ" "ρ" "σ" "‖x‖" "‖s‖" "‖Bₖ‖" "R2N" + #! format: on + end + + # main algorithm initialization + + local ξ1 + k = 0 + + fk = obj(f, xk) + ∇fk = grad(f, xk) + ∇fk⁻ = copy(∇fk) + + quasiNewtTest = isa(f, QuasiNewtonModel) + Bk = hess_op(f, xk) + + λmax = opnorm(Bk) + νInv = (1 + θ) *( σk + λmax) + sqrt_ξ1_νInv = one(R) + + optimal = false + tired = k ≥ maxIter || elapsed_time > maxTime + + while !(optimal || tired) + k = k + 1 + elapsed_time = time() - start_time + Fobj_hist[k] = fk + Hobj_hist[k] = hk + + # model for first prox-gradient step and ξ1 + φ1(d) = ∇fk' * d + mk1(d) = φ1(d) + ψ(d) + + # model for subsequent prox-gradient steps and ξ + φ(d) = (d' * (Bk * d)) / 2 + ∇fk' * d + σk * dot(d, d) / 2 + + ∇φ!(g, d) = begin + mul!(g, Bk, d) + g .+= ∇fk + g .+= σk * d + g + end + + mk(d) = φ(d) + ψ(d) + + # take first proximal gradient step s1 and see if current xk is nearly stationary + # s1 minimizes φ1(s) + ‖s‖² / 2 / ν + ψ(s) ⟺ s1 ∈ prox{νψ}(-ν∇φ1(0)). + + subsolver_options.ν = 1 / νInv + prox!(s, ψ, -subsolver_options.ν * ∇fk, subsolver_options.ν) + ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() + ξ1 > 0 || error("R2N: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + sqrt_ξ1_νInv = sqrt(ξ1 * νInv) + # println("sqrt_ξ1_νInv: ", sqrt_ξ1_νInv) + # println("ξ1: ", ξ1) + # println("νInv: ", νInv) + + if ξ1 ≥ 0 && k == 1 + ϵ_increment = ϵr * sqrt_ξ1_νInv + ϵ += ϵ_increment # make stopping test absolute and relative + ϵ_subsolver += ϵ_increment + end + + if sqrt_ξ1_νInv < ϵ + # the current xk is approximately first-order stationary + optimal = true + continue + end + s1 = copy(s) + + # subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10)) + subsolver_options.ϵa = k == 1 ? 1.0e-3 : max(ϵ_subsolver, min(1e-3, sqrt_ξ1_νInv)) # 1.0e-5 default + @debug "setting inner stopping tolerance to" subsolver_options.optTol + subsolver_args = subsolver == R2DH ? (SpectralGradient(1., f.meta.nvar),) : () + s, iter, _ = with_logger(subsolver_logger) do + subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) + end + + if norm(s) > β * norm(s1) + s .= s1 + end + # restore initial subsolver_options.ϵa here so that subsolver_options.ϵa + # is not modified if there is an error + + subsolver_options.ν = ν_subsolver + subsolver_options.ϵa = ϵ_subsolver_init + Complex_hist[k] = iter + + xkn .= xk .+ s + fkn = obj(f, xkn) + hkn = h(xkn[selected]) + hkn == -Inf && error("nonsmooth term is not proper") + mks = mk(s) #- σk * dot(s, s) / 2 + Δobj = fk + hk - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() + ξ = hk - mks + max(1, abs(hk)) * 10 * eps() + + if (ξ ≤ 0 || isnan(ξ)) + error("R2N: failed to compute a step: ξ = $ξ") + end + + ρk = Δobj / ξ + + R2N_stat = (η2 ≤ ρk < Inf) ? "↗" : (ρk < η1 ? "↘" : "=") + + if (verbose > 0) && (k % ptf == 0) + #! format: off + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt_ξ1_νInv sqrt(ξ1) ρk σk norm(xk) norm(s) λmax R2N_stat + #! format: off + end + + if η2 ≤ ρk < Inf + σk = max(σk/γ, σmin) + end + + if η1 ≤ ρk < Inf + xk .= xkn + has_bounds(f) && set_bounds!(ψ, l_bound - xk, u_bound - xk) + + #update functions + fk = fkn + hk = hkn + + # update gradient & Hessian + shift!(ψ, xk) + ∇fk = grad(f, xk) + if quasiNewtTest + push!(f, s, ∇fk - ∇fk⁻) + end + Bk = hess_op(f, xk) + λmax = opnorm(Bk) + ∇fk⁻ .= ∇fk + end + + if ρk < η1 || ρk == Inf + σk = σk * γ + end + νInv = (1 + θ) *( σk + λmax) + + tired = k ≥ maxIter || elapsed_time > maxTime + end + + if verbose > 0 + if k == 1 + @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk + elseif optimal + #! format: off + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt_ξ1_νInv sqrt(ξ1) "" σk norm(xk) norm(s) λmax + #! format: on + @info "R2N: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" + end + end + + status = if optimal + :first_order + elseif elapsed_time > maxTime + :max_time + elseif tired + :max_iter + else + :exception + end + + stats = GenericExecutionStats(f) + set_status!(stats, status) + set_solution!(stats, xk) + set_objective!(stats, fk + hk) + set_residuals!(stats, zero(eltype(xk)), sqrt_ξ1_νInv) + set_iter!(stats, k) + set_time!(stats, elapsed_time) + set_solver_specific!(stats, :Fhist, Fobj_hist[1:k]) + set_solver_specific!(stats, :Hhist, Hobj_hist[1:k]) + set_solver_specific!(stats, :NonSmooth, h) + set_solver_specific!(stats, :SubsolverCounter, Complex_hist[1:k]) + return stats +end \ No newline at end of file diff --git a/src/RegularizedOptimization.jl b/src/RegularizedOptimization.jl index 22c1c814..5983cab9 100644 --- a/src/RegularizedOptimization.jl +++ b/src/RegularizedOptimization.jl @@ -20,5 +20,7 @@ include("TRDH_alg.jl") include("R2_alg.jl") include("LM_alg.jl") include("LMTR_alg.jl") +include("R2DH.jl") +include("R2N.jl") end # module RegularizedOptimization From 5a06b65be18e6337486e2ddaa4f2735c13030cec Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH Date: Mon, 16 Sep 2024 14:47:48 -0400 Subject: [PATCH 02/38] add theta to nu and remove summation --- src/LM_alg.jl | 4 ---- src/R2DH.jl | 46 ++++++++++++++++++++++++++-------------------- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/src/LM_alg.jl b/src/LM_alg.jl index a23973a4..5568e67f 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -246,10 +246,7 @@ function LM( shift!(ψ, xk) Jk = jac_op_residual(nls, xk) jtprod_residual!(nls, xk, Fk, ∇fk) - σmax = opnorm(Jk) - - Complex_hist[k] += 1 end @@ -257,7 +254,6 @@ function LM( σk = σk * γ end νInv = (1 + θ) * (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ - tired = k ≥ maxIter || elapsed_time > maxTime end diff --git a/src/R2DH.jl b/src/R2DH.jl index 0e7157b0..b9235085 100644 --- a/src/R2DH.jl +++ b/src/R2DH.jl @@ -15,7 +15,7 @@ About each iterate xₖ, a step sₖ is computed as a solution of min φ(s; xₖ) + ψ(s; xₖ) -where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ (σₖ+Dₖ) s (if `summation = true`) and φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ (σₖ+Dₖ) s (if `summation = false`) is a quadratic approximation of f about xₖ, +where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ (σₖ+Dₖ) s is a quadratic approximation of f about xₖ, ψ(s; xₖ) = h(xₖ + s), ‖⋅‖ is a user-defined norm, Dₖ is a diagonal Hessian approximation and σₖ > 0 is the regularization parameter. @@ -31,7 +31,6 @@ and σₖ > 0 is the regularization parameter. * `x0::AbstractVector`: an initial guess (in the first calling form: default = `nlp.meta.x0`) * `selected::AbstractVector{<:Integer}`: (default `1:length(x0)`). * `Bk`: initial diagonal Hessian approximation (default: `(one(R) / options.ν) * I`). -* `summation`: boolean used to choose between the two versions of R2DH (see above, default : `true`). The objective and gradient of `nlp` will be accessed. @@ -64,16 +63,17 @@ function R2DH( x0; kwargs..., ) - ξ = outdict[:ξ] + sqrt_ξ_νInv = outdict[:sqrt_ξ_νInv] stats = GenericExecutionStats(nlp) set_status!(stats, outdict[:status]) set_solution!(stats, xk) set_objective!(stats, outdict[:fk] + outdict[:hk]) - set_residuals!(stats, zero(eltype(xk)), ξ) + set_residuals!(stats, zero(eltype(xk)), sqrt_ξ_νInv) set_iter!(stats, k) set_time!(stats, outdict[:elapsed_time]) set_solver_specific!(stats, :Fhist, outdict[:Fhist]) set_solver_specific!(stats, :Hhist, outdict[:Hhist]) + set_solver_specific!(stats, :Time_hist, outdict[:Time_hist]) set_solver_specific!(stats, :NonSmooth, outdict[:NonSmooth]) set_solver_specific!(stats, :SubsolverCounter, outdict[:Chist]) return stats @@ -88,7 +88,6 @@ function R2DH( x0::AbstractVector{R}; Mmonotone::Int = 5, selected::AbstractVector{<:Integer} = 1:length(x0), - summation::Bool = true, kwargs..., ) where {F <: Function, G <: Function, H, R <: Real, DQN <: AbstractDiagonalQuasiNewtonOperator} start_time = time() @@ -104,6 +103,7 @@ function R2DH( η2 = options.η2 ν = options.ν γ = options.γ + θ = options.θ local l_bound, u_bound has_bnds = false @@ -145,28 +145,28 @@ function R2DH( Fobj_hist = zeros(maxIter) Hobj_hist = zeros(maxIter) + time_hist = zeros(maxIter) FHobj_hist = fill!(Vector{R}(undef, Mmonotone), R(-Inf)) Complex_hist = zeros(Int, maxIter) if verbose > 0 #! format: off - @info @sprintf "%6s %8s %8s %7s %8s %7s %7s %7s %1s" "iter" "f(x)" "h(x)" "√ξ" "ρ" "σ" "‖x‖" "‖s‖" "" + @info @sprintf "%6s %8s %8s %7s %8s %7s %7s %7s %1s" "iter" "f(x)" "h(x)" "√(ξ/ν)" "ρ" "σ" "‖x‖" "‖s‖" "" #! format: off end local ξ k = 0 - σk = summation ? σmin : max(1 / ν, σmin) + σk = σmin fk = f(xk) ∇fk = similar(xk) ∇f!(∇fk, xk) ∇fk⁻ = copy(∇fk) spectral_test = isa(D, SpectralGradient) - D.d .= summation ? D.d .+ σk : D.d .* σk + Dkσk = D.d .+ σk DNorm = norm(D.d, Inf) - - ν = 1 / DNorm + ν = 1 / ((DNorm + σk) * (1 + θ)) mν∇fk = -ν * ∇fk sqrt_ξ_νInv = one(R) @@ -178,22 +178,27 @@ function R2DH( elapsed_time = time() - start_time Fobj_hist[k] = fk Hobj_hist[k] = hk + time_hist[k] = elapsed_time Mmonotone > 0 && (FHobj_hist[mod(k-1, Mmonotone) + 1] = fk + hk) - D.d .= max.(D.d, eps(R)) - - # model with diagonal hessian - φ(d) = ∇fk' * d + (d' * (D.d .* d)) / 2 + φ(d) = ∇fk' * d + (d' * (Dkσk .* d)) / 2 mk(d) = φ(d) + ψ(d) if spectral_test prox!(s, ψ, mν∇fk, ν) else - iprox!(s, ψ, ∇fk, D) + iprox!(s, ψ, ∇fk, Dkσk) end - # iprox!(s, ψ, ∇fk, D) + if mk(s) > 1/eps(R) + σk = σk * γ + Dkσk .= D.d .+ σk + DNorm = norm(D.d, Inf) + ν = 1 / ((DNorm + σk) * (1 + θ)) + @. mν∇fk = -ν * ∇fk + continue + end Complex_hist[k] += 1 xkn .= xk .+ s @@ -225,7 +230,7 @@ function R2DH( σ_stat = (η2 ≤ ρk < Inf) ? "↘" : (ρk < η1 ? "↗" : "=") - if (verbose > 0) && (k % ptf == 0) + if (verbose > 0) && ((k % ptf == 0) || (k == 1)) #! format: off @info @sprintf "%6d %8.1e %8.1e %7.1e %8.1e %7.1e %7.1e %7.1e %1s" k fk hk sqrt_ξ_νInv ρk σk norm(xk) norm(s) σ_stat #! format: on @@ -251,9 +256,9 @@ function R2DH( σk = σk * γ end - D.d .= summation ? D.d .+ σk : D.d .* σk + Dkσk .= D.d .+ σk DNorm = norm(D.d, Inf) - ν = 1 / DNorm + ν = 1 / ((DNorm + σk) * (1 + θ)) tired = maxIter > 0 && k ≥ maxIter if !tired @@ -284,12 +289,13 @@ function R2DH( outdict = Dict( :Fhist => Fobj_hist[1:k], :Hhist => Hobj_hist[1:k], + :Time_hist => time_hist[1:k], :Chist => Complex_hist[1:k], :NonSmooth => h, :status => status, :fk => fk, :hk => hk, - :ξ => ξ, + :sqrt_ξ_νInv => sqrt_ξ_νInv, :elapsed_time => elapsed_time, ) From 15fbe1e562881e654ac2e219fde9dd3e9b430b7e Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH Date: Tue, 17 Sep 2024 03:14:13 -0400 Subject: [PATCH 03/38] Change R2_alg so that it returns list of times --- src/R2DH.jl | 18 ++++++++++-------- src/R2_alg.jl | 9 +++++++++ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/R2DH.jl b/src/R2DH.jl index b9235085..644a5170 100644 --- a/src/R2DH.jl +++ b/src/R2DH.jl @@ -143,11 +143,11 @@ function R2DH( s = zero(xk) ψ = has_bnds ? shifted(h, xk, l_bound - xk, u_bound - xk, selected) : shifted(h, xk) - Fobj_hist = zeros(maxIter) - Hobj_hist = zeros(maxIter) - time_hist = zeros(maxIter) + Fobj_hist = zeros(maxIter+1) + Hobj_hist = zeros(maxIter+1) + time_hist = zeros(maxIter+1) FHobj_hist = fill!(Vector{R}(undef, Mmonotone), R(-Inf)) - Complex_hist = zeros(Int, maxIter) + Complex_hist = zeros(Int, maxIter+1) if verbose > 0 #! format: off @info @sprintf "%6s %8s %8s %7s %8s %7s %7s %7s %1s" "iter" "f(x)" "h(x)" "√(ξ/ν)" "ρ" "σ" "‖x‖" "‖s‖" "" @@ -181,6 +181,8 @@ function R2DH( time_hist[k] = elapsed_time Mmonotone > 0 && (FHobj_hist[mod(k-1, Mmonotone) + 1] = fk + hk) + #Dkσk .= max.(Dkσk, eps(R)) + # model with diagonal hessian φ(d) = ∇fk' * d + (d' * (Dkσk .* d)) / 2 mk(d) = φ(d) + ψ(d) @@ -190,8 +192,9 @@ function R2DH( else iprox!(s, ψ, ∇fk, Dkσk) end + mks = mk(s) - if mk(s) > 1/eps(R) + if mks < -1e5 σk = σk * γ Dkσk .= D.d .+ σk DNorm = norm(D.d, Inf) @@ -208,8 +211,8 @@ function R2DH( fhmax = Mmonotone > 0 ? maximum(FHobj_hist) : fk + hk Δobj = fhmax - (fkn + hkn) + max(1, abs(fhmax)) * 10 * eps() - Δmod = fhmax - (fk + mk(s)) + max(1, abs(hk)) * 10 * eps() - ξ = hk - mk(s) + max(1, abs(hk)) * 10 * eps() + Δmod = fhmax - (fk + mks) + max(1, abs(hk)) * 10 * eps() + ξ = hk - mks + max(1, abs(hk)) * 10 * eps() sqrt_ξ_νInv = ξ ≥ 0 ? sqrt(ξ / ν) : sqrt(-ξ / ν) if ξ ≥ 0 && k == 1 @@ -257,7 +260,6 @@ function R2DH( end Dkσk .= D.d .+ σk - DNorm = norm(D.d, Inf) ν = 1 / ((DNorm + σk) * (1 + θ)) tired = maxIter > 0 && k ≥ maxIter diff --git a/src/R2_alg.jl b/src/R2_alg.jl index da20a32f..79543300 100644 --- a/src/R2_alg.jl +++ b/src/R2_alg.jl @@ -21,6 +21,7 @@ mutable struct R2Solver{ Fobj_hist::Vector{R} Hobj_hist::Vector{R} Complex_hist::Vector{Int} + Time_hist::Vector{R} end function R2Solver( @@ -46,6 +47,7 @@ function R2Solver( end Fobj_hist = zeros(R, maxIter + 2) Hobj_hist = zeros(R, maxIter + 2) + Time_hist = zeros(R, maxIter + 2) Complex_hist = zeros(Int, maxIter + 2) return R2Solver( xk, @@ -62,6 +64,7 @@ function R2Solver( Fobj_hist, Hobj_hist, Complex_hist, + Time_hist, ) end @@ -87,6 +90,7 @@ function R2Solver(reg_nlp::AbstractRegularizedNLPModel{T, V}; max_iter::Int = 10 end Fobj_hist = zeros(T, max_iter + 2) Hobj_hist = zeros(T, max_iter + 2) + Time_hist = zeros(T, max_iter + 2) Complex_hist = zeros(Int, max_iter + 2) ψ = @@ -107,6 +111,7 @@ function R2Solver(reg_nlp::AbstractRegularizedNLPModel{T, V}; max_iter::Int = 10 Fobj_hist, Hobj_hist, Complex_hist, + Time_hist, ) end @@ -202,6 +207,7 @@ function R2( γ = options.γ; kwargs_dict..., ) + return stats end function R2( @@ -234,6 +240,7 @@ function R2( outdict = Dict( :Fhist => stats.solver_specific[:Fhist], :Hhist => stats.solver_specific[:Hhist], + :Time_hist => stats.solver_specific[:Time_hist], :Chist => stats.solver_specific[:SubsolverCounter], :NonSmooth => h, :status => stats.status, @@ -277,6 +284,7 @@ function R2( outdict = Dict( :Fhist => stats.solver_specific[:Fhist], :Hhist => stats.solver_specific[:Hhist], + :Time_hist => stats.solver_specific[:Time_hist], :Chist => stats.solver_specific[:SubsolverCounter], :NonSmooth => h, :status => stats.status, @@ -305,6 +313,7 @@ function R2(reg_nlp::AbstractRegularizedNLPModel; kwargs...) solve!(solver, reg_nlp, stats; callback = cb, max_iter = max_iter, kwargs...) set_solver_specific!(stats, :Fhist, solver.Fobj_hist[1:(stats.iter + 1)]) set_solver_specific!(stats, :Hhist, solver.Hobj_hist[1:(stats.iter + 1)]) + set_solver_specific!(stats, :Time_hist, solver.Time_hist[1:(stats.iter + 1)]) set_solver_specific!(stats, :SubsolverCounter, solver.Complex_hist[1:(stats.iter + 1)]) return stats end From 63e682a7e6a5548757a31ba43c6beaba4e82fdde Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH Date: Wed, 25 Sep 2024 17:02:09 -0400 Subject: [PATCH 04/38] add updated parameter of subsolvers and add sigam-k to input.jl structure --- src/R2DH.jl | 27 ++++++++++++++++----------- src/R2N.jl | 24 +++++++++++++----------- src/input_struct.jl | 4 ++++ 3 files changed, 33 insertions(+), 22 deletions(-) diff --git a/src/R2DH.jl b/src/R2DH.jl index 644a5170..1b4b53b4 100644 --- a/src/R2DH.jl +++ b/src/R2DH.jl @@ -61,6 +61,8 @@ function R2DH( hess_op(nlp, x0), options, x0; + l_bound = nlp.meta.lvar, + u_bound = nlp.meta.uvar, kwargs..., ) sqrt_ξ_νInv = outdict[:sqrt_ξ_νInv] @@ -99,6 +101,7 @@ function R2DH( maxIter = options.maxIter maxTime = options.maxTime σmin = options.σmin + σk = options.σk η1 = options.η1 η2 = options.η2 ν = options.ν @@ -148,6 +151,7 @@ function R2DH( time_hist = zeros(maxIter+1) FHobj_hist = fill!(Vector{R}(undef, Mmonotone), R(-Inf)) Complex_hist = zeros(Int, maxIter+1) + k_prox = 0 if verbose > 0 #! format: off @info @sprintf "%6s %8s %8s %7s %8s %7s %7s %7s %1s" "iter" "f(x)" "h(x)" "√(ξ/ν)" "ρ" "σ" "‖x‖" "‖s‖" "" @@ -156,13 +160,13 @@ function R2DH( local ξ k = 0 - σk = σmin fk = f(xk) ∇fk = similar(xk) ∇f!(∇fk, xk) ∇fk⁻ = copy(∇fk) spectral_test = isa(D, SpectralGradient) + # D.d .= D.d .+ σk Dkσk = D.d .+ σk DNorm = norm(D.d, Inf) @@ -174,15 +178,7 @@ function R2DH( tired = maxIter > 0 && k ≥ maxIter || elapsed_time > maxTime while !(optimal || tired) - k = k + 1 - elapsed_time = time() - start_time - Fobj_hist[k] = fk - Hobj_hist[k] = hk - time_hist[k] = elapsed_time - Mmonotone > 0 && (FHobj_hist[mod(k-1, Mmonotone) + 1] = fk + hk) - - #Dkσk .= max.(Dkσk, eps(R)) - + k_prox += 1 # model with diagonal hessian φ(d) = ∇fk' * d + (d' * (Dkσk .* d)) / 2 mk(d) = φ(d) + ψ(d) @@ -203,7 +199,15 @@ function R2DH( continue end - Complex_hist[k] += 1 + k = k + 1 + elapsed_time = time() - start_time + Fobj_hist[k] = fk + Hobj_hist[k] = hk + time_hist[k] = elapsed_time + Mmonotone > 0 && (FHobj_hist[mod(k-1, Mmonotone) + 1] = fk + hk) + + Complex_hist[k] += k_prox + k_prox = 0 xkn .= xk .+ s fkn = f(xkn) hkn = h(xkn[selected]) @@ -260,6 +264,7 @@ function R2DH( end Dkσk .= D.d .+ σk + DNorm = norm(D.d, Inf) ν = 1 / ((DNorm + σk) * (1 + θ)) tired = maxIter > 0 && k ≥ maxIter diff --git a/src/R2N.jl b/src/R2N.jl index b90b66cf..fe8124e0 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -51,6 +51,7 @@ function R2N( subsolver_logger::Logging.AbstractLogger = Logging.NullLogger(), subsolver = R2, subsolver_options = ROSolverOptions(ϵa = options.ϵa), + Mmonotone::Int = 0, selected::AbstractVector{<:Integer} = 1:(f.meta.nvar), ) where {H, R} start_time = time() @@ -71,6 +72,7 @@ function R2N( σmin = options.σmin α = options.α β = options.β + σk = options.σk # store initial values of the subsolver_options fields that will be modified ν_subsolver = subsolver_options.ν @@ -94,7 +96,6 @@ function R2N( # initialize parameters #σk = max(1 / options.ν, σmin) #SVM - σk = σmin xk = copy(x0) hk = h(xk[selected]) if hk == Inf @@ -112,6 +113,7 @@ function R2N( Fobj_hist = zeros(maxIter) Hobj_hist = zeros(maxIter) + FHobj_hist = fill!(Vector{R}(undef, Mmonotone), R(-Inf)) Complex_hist = zeros(Int, maxIter) if verbose > 0 #! format: off @@ -143,6 +145,7 @@ function R2N( elapsed_time = time() - start_time Fobj_hist[k] = fk Hobj_hist[k] = hk + Mmonotone > 0 && (FHobj_hist[mod(k-1, Mmonotone) + 1] = fk + hk) # model for first prox-gradient step and ξ1 φ1(d) = ∇fk' * d @@ -168,9 +171,6 @@ function R2N( ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() ξ1 > 0 || error("R2N: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") sqrt_ξ1_νInv = sqrt(ξ1 * νInv) - # println("sqrt_ξ1_νInv: ", sqrt_ξ1_νInv) - # println("ξ1: ", ξ1) - # println("νInv: ", νInv) if ξ1 ≥ 0 && k == 1 ϵ_increment = ϵr * sqrt_ξ1_νInv @@ -186,9 +186,9 @@ function R2N( s1 = copy(s) # subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10)) - subsolver_options.ϵa = k == 1 ? 1.0e-3 : max(ϵ_subsolver, min(1e-3, sqrt_ξ1_νInv)) # 1.0e-5 default + subsolver_options.ϵa = k == 1 ? 1.0e-3 : min(sqrt_ξ1_νInv ^ (1.5) , sqrt_ξ1_νInv * 1e-3) # 1.0e-5 default @debug "setting inner stopping tolerance to" subsolver_options.optTol - subsolver_args = subsolver == R2DH ? (SpectralGradient(1., f.meta.nvar),) : () + subsolver_args = subsolver == R2DH ? (SpectralGradient(νInv, f.meta.nvar),) : () s, iter, _ = with_logger(subsolver_logger) do subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) end @@ -208,18 +208,21 @@ function R2N( hkn = h(xkn[selected]) hkn == -Inf && error("nonsmooth term is not proper") mks = mk(s) #- σk * dot(s, s) / 2 - Δobj = fk + hk - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() + + fhmax = Mmonotone > 0 ? maximum(FHobj_hist) : fk + hk + Δobj = fhmax - (fkn + hkn) + max(1, abs(fhmax)) * 10 * eps() + Δmod = fhmax - (fk + mks) + max(1, abs(hk)) * 10 * eps() ξ = hk - mks + max(1, abs(hk)) * 10 * eps() if (ξ ≤ 0 || isnan(ξ)) error("R2N: failed to compute a step: ξ = $ξ") end - ρk = Δobj / ξ + ρk = Δobj / Δmod R2N_stat = (η2 ≤ ρk < Inf) ? "↗" : (ρk < η1 ? "↘" : "=") - if (verbose > 0) && (k % ptf == 0) + if (verbose > 0) && ((k % ptf == 0) || (k == 1)) #! format: off @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt_ξ1_νInv sqrt(ξ1) ρk σk norm(xk) norm(s) λmax R2N_stat #! format: off @@ -251,8 +254,7 @@ function R2N( if ρk < η1 || ρk == Inf σk = σk * γ end - νInv = (1 + θ) *( σk + λmax) - + νInv = (1 + θ) * (σk + λmax) tired = k ≥ maxIter || elapsed_time > maxTime end diff --git a/src/input_struct.jl b/src/input_struct.jl index 2e9937ce..7242b90a 100644 --- a/src/input_struct.jl +++ b/src/input_struct.jl @@ -9,6 +9,7 @@ mutable struct ROSolverOptions{R} maxIter::Int # maximum amount of inner iterations maxTime::Float64 #maximum time allotted to the algorithm in s σmin::R # minimum σk allowed for LM/R2 method + σk::R # initial σk η1::R # step acceptance threshold η2::R # trust-region increase threshold α::R # νk Δ^{-1} parameter @@ -27,6 +28,7 @@ mutable struct ROSolverOptions{R} maxIter::Int = 10000, maxTime::Float64 = 3600.0, σmin::R = eps(R), + σk::R = eps(R)^(1 / 5), η1::R = √√eps(R), η2::R = R(0.9), α::R = 1 / eps(R), @@ -44,6 +46,7 @@ mutable struct ROSolverOptions{R} @assert maxIter ≥ 0 @assert maxTime ≥ 0 @assert σmin ≥ 0 + @assert σk ≥ 0 @assert 0 < η1 < η2 < 1 @assert α > 0 @assert ν > 0 @@ -59,6 +62,7 @@ mutable struct ROSolverOptions{R} maxIter, maxTime, σmin, + σk, η1, η2, α, From 17f24ea6c7fb32a3ed6f58e4c8bd34878a827b46 Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH Date: Fri, 27 Sep 2024 17:34:26 -0400 Subject: [PATCH 05/38] change subsolver parameters of LM --- src/LM_alg.jl | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/LM_alg.jl b/src/LM_alg.jl index 5568e67f..b304b3b5 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -47,6 +47,7 @@ function LM( subsolver = R2, subsolver_options = ROSolverOptions(ϵa = options.ϵa), selected::AbstractVector{<:Integer} = 1:(nls.meta.nvar), + nonlinear::Bool = true, ) where {H} start_time = time() elapsed_time = 0.0 @@ -62,6 +63,7 @@ function LM( γ = options.γ θ = options.θ σmin = options.σmin + σk = options.σk # store initial values of the subsolver_options fields that will be modified ν_subsolver = subsolver_options.ν @@ -85,7 +87,6 @@ function LM( end # initialize parameters - σk = max(1 / options.ν, σmin) xk = copy(x0) hk = h(xk[selected]) if hk == Inf @@ -178,7 +179,7 @@ function LM( prox!(s, ψ, ∇fk, ν) ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps() # TODO: isn't mk(s) returned by subsolver? ξ1 > 0 || error("LM: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") - sqrt_ξ1_νInv = sqrt(ξ1 * νInv) + sqrt_ξ1_νInv = ξ1 > 0 ? sqrt(ξ1 * νInv) : 10. if ξ1 ≥ 0 && k == 1 ϵ_increment = ϵr * sqrt_ξ1_νInv @@ -192,10 +193,10 @@ function LM( continue end - subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10)) + # subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10)) + subsolver_options.ϵa = k == 1 ? 1.0e-3 : min(sqrt_ξ1_νInv ^ (1.5) , sqrt_ξ1_νInv * 1e-3) # 1.0e-5 default subsolver_options.ν = ν - #subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : () - subsolver_args = subsolver == R2DH ? (SpectralGradient(1., nls.meta.nvar),) : () + subsolver_args = subsolver == R2DH ? (SpectralGradient(νInv, nls.meta.nvar),) : () @debug "setting inner stopping tolerance to" subsolver_options.optTol s, iter, _ = with_logger(subsolver_logger) do subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) @@ -246,7 +247,12 @@ function LM( shift!(ψ, xk) Jk = jac_op_residual(nls, xk) jtprod_residual!(nls, xk, Fk, ∇fk) - σmax = opnorm(Jk) + + # update opnorm if not linear least squares + if nonlinear == true + σmax = opnorm(Jk) + end + Complex_hist[k] += 1 end @@ -254,6 +260,7 @@ function LM( σk = σk * γ end νInv = (1 + θ) * (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ + tired = k ≥ maxIter || elapsed_time > maxTime end @@ -281,7 +288,7 @@ function LM( set_status!(stats, status) set_solution!(stats, xk) set_objective!(stats, fk + hk) - set_residuals!(stats, zero(eltype(xk)), ξ1 ≥ 0 ? sqrt(ξ1) : ξ1) + set_residuals!(stats, zero(eltype(xk)), ξ1 ≥ 0 ? sqrt_ξ1_νInv : ξ1) set_iter!(stats, k) set_time!(stats, elapsed_time) set_solver_specific!(stats, :Fhist, Fobj_hist[1:k]) From 187aabd1d6ec222912f108cddec522f01175d676 Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH Date: Fri, 27 Sep 2024 17:40:28 -0400 Subject: [PATCH 06/38] reset LM alg and change it in another PR --- src/LM_alg.jl | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/src/LM_alg.jl b/src/LM_alg.jl index b304b3b5..56bc5356 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -47,7 +47,6 @@ function LM( subsolver = R2, subsolver_options = ROSolverOptions(ϵa = options.ϵa), selected::AbstractVector{<:Integer} = 1:(nls.meta.nvar), - nonlinear::Bool = true, ) where {H} start_time = time() elapsed_time = 0.0 @@ -63,7 +62,6 @@ function LM( γ = options.γ θ = options.θ σmin = options.σmin - σk = options.σk # store initial values of the subsolver_options fields that will be modified ν_subsolver = subsolver_options.ν @@ -87,6 +85,7 @@ function LM( end # initialize parameters + σk = max(1 / options.ν, σmin) xk = copy(x0) hk = h(xk[selected]) if hk == Inf @@ -105,15 +104,13 @@ function LM( k = 0 Fobj_hist = zeros(maxIter) Hobj_hist = zeros(maxIter) - R = eltype(xk) - sqrt_ξ1_νInv = one(R) Complex_hist = zeros(Int, maxIter) Grad_hist = zeros(Int, maxIter) Resid_hist = zeros(Int, maxIter) if verbose > 0 #! format: off - @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√(ξ1/ν)" "√ξ" "ρ" "σ" "‖x‖" "‖s‖" "‖Jₖ‖²" "reg" + @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√ξ1" "√ξ" "ρ" "σ" "‖x‖" "‖s‖" "‖Jₖ‖²" "reg" #! format: on end @@ -179,24 +176,22 @@ function LM( prox!(s, ψ, ∇fk, ν) ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps() # TODO: isn't mk(s) returned by subsolver? ξ1 > 0 || error("LM: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") - sqrt_ξ1_νInv = ξ1 > 0 ? sqrt(ξ1 * νInv) : 10. if ξ1 ≥ 0 && k == 1 - ϵ_increment = ϵr * sqrt_ξ1_νInv + ϵ_increment = ϵr * sqrt(ξ1) ϵ += ϵ_increment # make stopping test absolute and relative ϵ_subsolver += ϵ_increment end - if sqrt_ξ1_νInv < ϵ + if sqrt(ξ1) < ϵ # the current xk is approximately first-order stationary optimal = true continue end - # subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10)) - subsolver_options.ϵa = k == 1 ? 1.0e-3 : min(sqrt_ξ1_νInv ^ (1.5) , sqrt_ξ1_νInv * 1e-3) # 1.0e-5 default + subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10)) subsolver_options.ν = ν - subsolver_args = subsolver == R2DH ? (SpectralGradient(νInv, nls.meta.nvar),) : () + subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : () @debug "setting inner stopping tolerance to" subsolver_options.optTol s, iter, _ = with_logger(subsolver_logger) do subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) @@ -226,7 +221,7 @@ function LM( if (verbose > 0) && (k % ptf == 0) #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt_ξ1_νInv sqrt(ξ) ρk σk norm(xk) norm(s) νInv σ_stat + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1) sqrt(ξ) ρk σk norm(xk) norm(s) νInv σ_stat #! format: off end @@ -248,10 +243,7 @@ function LM( Jk = jac_op_residual(nls, xk) jtprod_residual!(nls, xk, Fk, ∇fk) - # update opnorm if not linear least squares - if nonlinear == true - σmax = opnorm(Jk) - end + σmax = opnorm(Jk) Complex_hist[k] += 1 end @@ -260,7 +252,6 @@ function LM( σk = σk * γ end νInv = (1 + θ) * (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ - tired = k ≥ maxIter || elapsed_time > maxTime end @@ -269,9 +260,9 @@ function LM( @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk elseif optimal #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt_ξ1_νInv sqrt(ξ1) "" σk norm(xk) norm(s) νInv + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1) sqrt(ξ1) "" σk norm(xk) norm(s) νInv #! format: on - @info "LM: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" + @info "LM: terminating with √ξ1 = $(sqrt(ξ1))" end end status = if optimal @@ -288,7 +279,7 @@ function LM( set_status!(stats, status) set_solution!(stats, xk) set_objective!(stats, fk + hk) - set_residuals!(stats, zero(eltype(xk)), ξ1 ≥ 0 ? sqrt_ξ1_νInv : ξ1) + set_residuals!(stats, zero(eltype(xk)), ξ1 ≥ 0 ? sqrt(ξ1) : ξ1) set_iter!(stats, k) set_time!(stats, elapsed_time) set_solver_specific!(stats, :Fhist, Fobj_hist[1:k]) From cdc9637d83076f21da256a583f202dea85176fb4 Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH Date: Mon, 21 Oct 2024 13:16:09 -0400 Subject: [PATCH 07/38] Remove the changes in R2_alg.jl --- src/R2_alg.jl | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/R2_alg.jl b/src/R2_alg.jl index 79543300..da20a32f 100644 --- a/src/R2_alg.jl +++ b/src/R2_alg.jl @@ -21,7 +21,6 @@ mutable struct R2Solver{ Fobj_hist::Vector{R} Hobj_hist::Vector{R} Complex_hist::Vector{Int} - Time_hist::Vector{R} end function R2Solver( @@ -47,7 +46,6 @@ function R2Solver( end Fobj_hist = zeros(R, maxIter + 2) Hobj_hist = zeros(R, maxIter + 2) - Time_hist = zeros(R, maxIter + 2) Complex_hist = zeros(Int, maxIter + 2) return R2Solver( xk, @@ -64,7 +62,6 @@ function R2Solver( Fobj_hist, Hobj_hist, Complex_hist, - Time_hist, ) end @@ -90,7 +87,6 @@ function R2Solver(reg_nlp::AbstractRegularizedNLPModel{T, V}; max_iter::Int = 10 end Fobj_hist = zeros(T, max_iter + 2) Hobj_hist = zeros(T, max_iter + 2) - Time_hist = zeros(T, max_iter + 2) Complex_hist = zeros(Int, max_iter + 2) ψ = @@ -111,7 +107,6 @@ function R2Solver(reg_nlp::AbstractRegularizedNLPModel{T, V}; max_iter::Int = 10 Fobj_hist, Hobj_hist, Complex_hist, - Time_hist, ) end @@ -207,7 +202,6 @@ function R2( γ = options.γ; kwargs_dict..., ) - return stats end function R2( @@ -240,7 +234,6 @@ function R2( outdict = Dict( :Fhist => stats.solver_specific[:Fhist], :Hhist => stats.solver_specific[:Hhist], - :Time_hist => stats.solver_specific[:Time_hist], :Chist => stats.solver_specific[:SubsolverCounter], :NonSmooth => h, :status => stats.status, @@ -284,7 +277,6 @@ function R2( outdict = Dict( :Fhist => stats.solver_specific[:Fhist], :Hhist => stats.solver_specific[:Hhist], - :Time_hist => stats.solver_specific[:Time_hist], :Chist => stats.solver_specific[:SubsolverCounter], :NonSmooth => h, :status => stats.status, @@ -313,7 +305,6 @@ function R2(reg_nlp::AbstractRegularizedNLPModel; kwargs...) solve!(solver, reg_nlp, stats; callback = cb, max_iter = max_iter, kwargs...) set_solver_specific!(stats, :Fhist, solver.Fobj_hist[1:(stats.iter + 1)]) set_solver_specific!(stats, :Hhist, solver.Hobj_hist[1:(stats.iter + 1)]) - set_solver_specific!(stats, :Time_hist, solver.Time_hist[1:(stats.iter + 1)]) set_solver_specific!(stats, :SubsolverCounter, solver.Complex_hist[1:(stats.iter + 1)]) return stats end From c54867616b11d2e43734ce4cb198270f4f4d8ddb Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Thu, 31 Oct 2024 14:19:31 -0400 Subject: [PATCH 08/38] correct documentation Co-authored-by: Dominique --- src/R2N.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/R2N.jl b/src/R2N.jl index fe8124e0..f426a4d2 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -7,8 +7,7 @@ A regularized quasi-Newton method for the problem min f(x) + h(x) -where f: ℝⁿ → ℝ has a Lipschitz-continuous Jacobian, and h: ℝⁿ → ℝ is -lower semi-continuous and proper. +where f: ℝⁿ → ℝ is C¹ and h: ℝⁿ → ℝ is lower semi-continuous and proper. About each iterate xₖ, a step sₖ is computed as an approximate solution of From e86af66000d5b60067f697df0eb069a34eb7ebac Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Thu, 31 Oct 2024 14:19:57 -0400 Subject: [PATCH 09/38] Update src/R2N.jl correct documentation Co-authored-by: Dominique --- src/R2N.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/R2N.jl b/src/R2N.jl index f426a4d2..bdc4da03 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -13,7 +13,7 @@ About each iterate xₖ, a step sₖ is computed as an approximate solution of min φ(s; xₖ) + ½ σₖ ‖s‖² + ψ(s; xₖ) -where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ Bₖ s is a quadratic approximation of f about xₖ, +where φ(s; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀBₖs is a quadratic approximation of f about xₖ, ψ(s; xₖ) = h(xₖ + s) and σₖ > 0 is the regularization parameter. The subproblem is solved inexactly by way of a first-order method such as the proximal-gradient method or the quadratic regularization method. From a5cb28dee928f2f90ff2ec5d8469eafd5ceb6cd2 Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Thu, 31 Oct 2024 14:20:32 -0400 Subject: [PATCH 10/38] Update src/R2N.jl Co-authored-by: Dominique --- src/R2N.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/R2N.jl b/src/R2N.jl index bdc4da03..c4e1f3c8 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -22,7 +22,7 @@ method or the quadratic regularization method. * `nlp::AbstractNLPModel`: a smooth optimization problem * `h`: a regularizer such as those defined in ProximalOperators -* `options::ROSolverOptions`: a structure containing algorithmic parameters +* `options::ROSolverOptions`: a structure containing algorithmic parameters. The objective, gradient and Hessian of `nlp` will be accessed. The Hessian is accessed as an abstract operator and need not be the exact Hessian. From 7d8a796931a5160e33bd88a7d68706818868749e Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Thu, 31 Oct 2024 14:20:42 -0400 Subject: [PATCH 11/38] Update src/R2N.jl Co-authored-by: Dominique --- src/R2N.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/R2N.jl b/src/R2N.jl index c4e1f3c8..88c1ac6b 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -31,7 +31,7 @@ The Hessian is accessed as an abstract operator and need not be the exact Hessia * `x0::AbstractVector`: an initial guess (default: `nlp.meta.x0`) * `subsolver_logger::AbstractLogger`: a logger to pass to the subproblem solver (default: the null logger) -* `subsolver`: the procedure used to compute a step (`PG` or `R2`) +* `subsolver`: the procedure used to compute a step (`R2DH`, `R2` or `PG`) * `subsolver_options::ROSolverOptions`: default options to pass to the subsolver (default: all defaut options) * `selected::AbstractVector{<:Integer}`: (default `1:f.meta.nvar`). From f4b650f94cc545a5b21135c06f92014fbe88ba6a Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Thu, 31 Oct 2024 14:21:24 -0400 Subject: [PATCH 12/38] update documentation of R2N Co-authored-by: Dominique --- src/R2N.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/R2N.jl b/src/R2N.jl index 88c1ac6b..f26c1d8d 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -33,7 +33,7 @@ The Hessian is accessed as an abstract operator and need not be the exact Hessia * `subsolver_logger::AbstractLogger`: a logger to pass to the subproblem solver (default: the null logger) * `subsolver`: the procedure used to compute a step (`R2DH`, `R2` or `PG`) * `subsolver_options::ROSolverOptions`: default options to pass to the subsolver (default: all defaut options) -* `selected::AbstractVector{<:Integer}`: (default `1:f.meta.nvar`). +* `selected::AbstractVector{<:Integer}`: subset of variables to which `h` is applied (default `1:nlp.meta.nvar`). ### Return values From 79c1bb712989b1f245bcc47bd1003fae995fff31 Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Thu, 31 Oct 2024 14:41:20 -0400 Subject: [PATCH 13/38] Update documentation of src/R2DH.jl Co-authored-by: Dominique --- src/R2DH.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/R2DH.jl b/src/R2DH.jl index 1b4b53b4..7e657cbe 100644 --- a/src/R2DH.jl +++ b/src/R2DH.jl @@ -15,7 +15,7 @@ About each iterate xₖ, a step sₖ is computed as a solution of min φ(s; xₖ) + ψ(s; xₖ) -where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ (σₖ+Dₖ) s is a quadratic approximation of f about xₖ, +where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ(Dₖ + σₖI)s is a quadratic approximation of f about xₖ, ψ(s; xₖ) = h(xₖ + s), ‖⋅‖ is a user-defined norm, Dₖ is a diagonal Hessian approximation and σₖ > 0 is the regularization parameter. From 10cb328e5e559bb82066d75218528778e0372ee6 Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Thu, 31 Oct 2024 14:42:00 -0400 Subject: [PATCH 14/38] Update src/R2N.jl remove dead code Co-authored-by: Dominique --- src/R2N.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/R2N.jl b/src/R2N.jl index f26c1d8d..ae5dc2d9 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -94,7 +94,6 @@ function R2N( end # initialize parameters - #σk = max(1 / options.ν, σmin) #SVM xk = copy(x0) hk = h(xk[selected]) if hk == Inf From 6380b810d12d62aebcbb5cabc70b9d25fb100886 Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Thu, 31 Oct 2024 14:42:48 -0400 Subject: [PATCH 15/38] Update src/R2N.jl Co-authored-by: Dominique --- src/R2N.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/R2N.jl b/src/R2N.jl index ae5dc2d9..3952c4b7 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -194,9 +194,9 @@ function R2N( if norm(s) > β * norm(s1) s .= s1 end + # restore initial subsolver_options.ϵa here so that subsolver_options.ϵa # is not modified if there is an error - subsolver_options.ν = ν_subsolver subsolver_options.ϵa = ϵ_subsolver_init Complex_hist[k] = iter From 30e7532d9c8e51f422a601ccbef3ba620da2f2af Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Thu, 31 Oct 2024 14:43:04 -0400 Subject: [PATCH 16/38] Update src/R2N.jl Co-authored-by: Dominique --- src/R2N.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/R2N.jl b/src/R2N.jl index 3952c4b7..f5ae412f 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -132,7 +132,7 @@ function R2N( Bk = hess_op(f, xk) λmax = opnorm(Bk) - νInv = (1 + θ) *( σk + λmax) + νInv = (1 + θ) * (σk + λmax) sqrt_ξ1_νInv = one(R) optimal = false From b58744c8720ba1d1d45e4e5ee96dd73ee575304e Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Thu, 31 Oct 2024 14:44:05 -0400 Subject: [PATCH 17/38] Update src/R2N.jl precise when to debug Co-authored-by: Dominique --- src/R2N.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/R2N.jl b/src/R2N.jl index f5ae412f..cb09b784 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -185,7 +185,7 @@ function R2N( # subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10)) subsolver_options.ϵa = k == 1 ? 1.0e-3 : min(sqrt_ξ1_νInv ^ (1.5) , sqrt_ξ1_νInv * 1e-3) # 1.0e-5 default - @debug "setting inner stopping tolerance to" subsolver_options.optTol + verbose > 0 && @debug "setting inner stopping tolerance to" subsolver_options.optTol subsolver_args = subsolver == R2DH ? (SpectralGradient(νInv, f.meta.nvar),) : () s, iter, _ = with_logger(subsolver_logger) do subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) From 0f643d098beb6671a46afae83d27ccca73c4fca1 Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Thu, 31 Oct 2024 14:45:14 -0400 Subject: [PATCH 18/38] Update src/R2N.jl correct $\Delta_{mod}$ Co-authored-by: Dominique --- src/R2N.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/R2N.jl b/src/R2N.jl index cb09b784..682d9114 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -209,7 +209,7 @@ function R2N( fhmax = Mmonotone > 0 ? maximum(FHobj_hist) : fk + hk Δobj = fhmax - (fkn + hkn) + max(1, abs(fhmax)) * 10 * eps() - Δmod = fhmax - (fk + mks) + max(1, abs(hk)) * 10 * eps() + Δmod = fhmax - (fk + mks) + max(1, abs(fhmax)) * 10 * eps() ξ = hk - mks + max(1, abs(hk)) * 10 * eps() if (ξ ≤ 0 || isnan(ξ)) From 8496c6db34f9e44734e98342c26b3b546aef39c5 Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH Date: Thu, 31 Oct 2024 15:15:06 -0400 Subject: [PATCH 19/38] Update documentation and remove dead code --- src/R2DH.jl | 12 +++++------- src/R2N.jl | 4 ++-- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/R2DH.jl b/src/R2DH.jl index 7e657cbe..5346d2ac 100644 --- a/src/R2DH.jl +++ b/src/R2DH.jl @@ -4,20 +4,18 @@ export R2DH R2DH(nlp, h, options) R2DH(f, ∇f!, h, options, x0) -A first-order quadratic regularization method for the problem +A second-order quadratic regularization method for the problem min f(x) + h(x) -where f: ℝⁿ → ℝ has a Lipschitz-continuous gradient, and h: ℝⁿ → ℝ is -lower semi-continuous, proper and prox-bounded. +where f: ℝⁿ → ℝ is C¹ and h: ℝⁿ → ℝ is lower semi-continuous and proper. About each iterate xₖ, a step sₖ is computed as a solution of min φ(s; xₖ) + ψ(s; xₖ) where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ(Dₖ + σₖI)s is a quadratic approximation of f about xₖ, -ψ(s; xₖ) = h(xₖ + s), ‖⋅‖ is a user-defined norm, Dₖ is a diagonal Hessian approximation -and σₖ > 0 is the regularization parameter. +ψ(s; xₖ) = h(xₖ + s), Dₖ is a diagonal Hessian approximation and σₖ > 0 is the regularization parameter. ### Arguments @@ -30,7 +28,8 @@ and σₖ > 0 is the regularization parameter. * `x0::AbstractVector`: an initial guess (in the first calling form: default = `nlp.meta.x0`) * `selected::AbstractVector{<:Integer}`: (default `1:length(x0)`). -* `Bk`: initial diagonal Hessian approximation (default: `(one(R) / options.ν) * I`). +* `D`: Diagonal quasi-Newton operator. +* `Mmonotone::Int`: number of previous values of the objective to consider for the non-monotonicity variant (default: 5). The objective and gradient of `nlp` will be accessed. @@ -166,7 +165,6 @@ function R2DH( ∇f!(∇fk, xk) ∇fk⁻ = copy(∇fk) spectral_test = isa(D, SpectralGradient) - # D.d .= D.d .+ σk Dkσk = D.d .+ σk DNorm = norm(D.d, Inf) diff --git a/src/R2N.jl b/src/R2N.jl index 682d9114..8ed00ac8 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -34,6 +34,7 @@ The Hessian is accessed as an abstract operator and need not be the exact Hessia * `subsolver`: the procedure used to compute a step (`R2DH`, `R2` or `PG`) * `subsolver_options::ROSolverOptions`: default options to pass to the subsolver (default: all defaut options) * `selected::AbstractVector{<:Integer}`: subset of variables to which `h` is applied (default `1:nlp.meta.nvar`). +* `Mmonotone::Int`: number of previous values of the objective to consider for the non-monotonicity variant (default: 0). ### Return values @@ -183,7 +184,6 @@ function R2N( end s1 = copy(s) - # subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10)) subsolver_options.ϵa = k == 1 ? 1.0e-3 : min(sqrt_ξ1_νInv ^ (1.5) , sqrt_ξ1_νInv * 1e-3) # 1.0e-5 default verbose > 0 && @debug "setting inner stopping tolerance to" subsolver_options.optTol subsolver_args = subsolver == R2DH ? (SpectralGradient(νInv, f.meta.nvar),) : () @@ -205,7 +205,7 @@ function R2N( fkn = obj(f, xkn) hkn = h(xkn[selected]) hkn == -Inf && error("nonsmooth term is not proper") - mks = mk(s) #- σk * dot(s, s) / 2 + mks = mk(s) fhmax = Mmonotone > 0 ? maximum(FHobj_hist) : fk + hk Δobj = fhmax - (fkn + hkn) + max(1, abs(fhmax)) * 10 * eps() From 6c73721ad56d7e4afbbe2661f86338806e933cfc Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH Date: Thu, 31 Oct 2024 18:07:24 -0400 Subject: [PATCH 20/38] Update runtests.jl Add some unit tests for R2DH/R2N/R2N_R2DH --- test/runtests.jl | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index 39f3a9a0..010700bf 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -134,5 +134,33 @@ for (h, h_name) ∈ ((NormL1(λ), "l1"),) end end +R2N_R2DH(args...; kwargs...) = R2N(args...; subsolver = R2DH, kwargs...) +for (mod, mod_name) ∈ ((SpectralGradientModel, "spg"), (DiagonalPSBModel, "psb"), (LSR1Model, "lsr1"), (LBFGSModel, "lbfgs")) + for (h, h_name) ∈ ((NormL0(λ), "l0"), (NormL1(λ), "l1")) + for solver_sym ∈ (:R2DH, :R2N, :R2N_R2DH) + solver_sym == (:R2N,:R2N_R2DH) && mod_name == ("spg", "psb") && continue + solver_sym == :R2DH && mod_name != "spg" && continue + solver_sym == :R2N_R2DH && h_name == "l1" && continue # this test seems to fail because s seems to be equal to zeros within the subsolver + solver_name = string(solver_sym) + solver = eval(solver_sym) + @testset "bpdn-$(mod_name)-$(solver_name)-$(h_name)" begin + x0 = zeros(bpdn.meta.nvar) + out = solver(mod(bpdn), h, options, x0 = x0) + @test typeof(out.solution) == typeof(bpdn.meta.x0) + @test length(out.solution) == bpdn.meta.nvar + @test typeof(out.solver_specific[:Fhist]) == typeof(out.solution) + @test typeof(out.solver_specific[:Hhist]) == typeof(out.solution) + @test typeof(out.solver_specific[:SubsolverCounter]) == Array{Int, 1} + @test typeof(out.dual_feas) == eltype(out.solution) + @test length(out.solver_specific[:Fhist]) == length(out.solver_specific[:Hhist]) + @test length(out.solver_specific[:Fhist]) == length(out.solver_specific[:SubsolverCounter]) + @test obj(bpdn, out.solution) == out.solver_specific[:Fhist][end] + @test h(out.solution) == out.solver_specific[:Hhist][end] + @test out.status == :first_order + end + end + end +end + include("test_bounds.jl") include("test_allocs.jl") From bed48f9440a298a68e96bf5cf279092710d64f9f Mon Sep 17 00:00:00 2001 From: Dominique Date: Fri, 1 Nov 2024 09:06:32 -0400 Subject: [PATCH 21/38] Update src/R2DH.jl --- src/R2DH.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/R2DH.jl b/src/R2DH.jl index 5346d2ac..ff4349f1 100644 --- a/src/R2DH.jl +++ b/src/R2DH.jl @@ -29,7 +29,7 @@ where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ(Dₖ + σₖI)s is a qua * `x0::AbstractVector`: an initial guess (in the first calling form: default = `nlp.meta.x0`) * `selected::AbstractVector{<:Integer}`: (default `1:length(x0)`). * `D`: Diagonal quasi-Newton operator. -* `Mmonotone::Int`: number of previous values of the objective to consider for the non-monotonicity variant (default: 5). +* `Mmonotone::Int`: number of previous values of the objective to consider for the non-monotone variant (default: 5). The objective and gradient of `nlp` will be accessed. From 464698a444da9f6ff1168c2f7c2667410ce76422 Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Fri, 1 Nov 2024 17:09:42 -0400 Subject: [PATCH 22/38] Update src/R2N.jl Co-authored-by: Dominique --- src/R2N.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/R2N.jl b/src/R2N.jl index 8ed00ac8..01294629 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -34,7 +34,7 @@ The Hessian is accessed as an abstract operator and need not be the exact Hessia * `subsolver`: the procedure used to compute a step (`R2DH`, `R2` or `PG`) * `subsolver_options::ROSolverOptions`: default options to pass to the subsolver (default: all defaut options) * `selected::AbstractVector{<:Integer}`: subset of variables to which `h` is applied (default `1:nlp.meta.nvar`). -* `Mmonotone::Int`: number of previous values of the objective to consider for the non-monotonicity variant (default: 0). +* `Mmonotone::Int`: number of previous values of the objective to consider for the non-monotone variant (default: 0). ### Return values From d38532e7a0ca6e0af62cc2fc550538dd420f5a62 Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH Date: Mon, 4 Nov 2024 00:42:38 -0500 Subject: [PATCH 23/38] update Mmonotone as in the paper --- src/R2DH.jl | 10 +++++----- src/R2N.jl | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/R2DH.jl b/src/R2DH.jl index ff4349f1..73dc5b22 100644 --- a/src/R2DH.jl +++ b/src/R2DH.jl @@ -29,7 +29,7 @@ where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ(Dₖ + σₖI)s is a qua * `x0::AbstractVector`: an initial guess (in the first calling form: default = `nlp.meta.x0`) * `selected::AbstractVector{<:Integer}`: (default `1:length(x0)`). * `D`: Diagonal quasi-Newton operator. -* `Mmonotone::Int`: number of previous values of the objective to consider for the non-monotone variant (default: 5). +* `Mmonotone::Int`: number of previous values of the objective to consider for the non-monotone variant (default: 6). The objective and gradient of `nlp` will be accessed. @@ -87,7 +87,7 @@ function R2DH( D::DQN, options::ROSolverOptions{R}, x0::AbstractVector{R}; - Mmonotone::Int = 5, + Mmonotone::Int = 6, selected::AbstractVector{<:Integer} = 1:length(x0), kwargs..., ) where {F <: Function, G <: Function, H, R <: Real, DQN <: AbstractDiagonalQuasiNewtonOperator} @@ -148,7 +148,7 @@ function R2DH( Fobj_hist = zeros(maxIter+1) Hobj_hist = zeros(maxIter+1) time_hist = zeros(maxIter+1) - FHobj_hist = fill!(Vector{R}(undef, Mmonotone), R(-Inf)) + FHobj_hist = fill!(Vector{R}(undef, - 1), R(-Inf)) Complex_hist = zeros(Int, maxIter+1) k_prox = 0 if verbose > 0 @@ -202,7 +202,7 @@ function R2DH( Fobj_hist[k] = fk Hobj_hist[k] = hk time_hist[k] = elapsed_time - Mmonotone > 0 && (FHobj_hist[mod(k-1, Mmonotone) + 1] = fk + hk) + Mmonotone > 1 && (FHobj_hist[mod(k-1, Mmonotone - 1) + 1] = fk + hk) Complex_hist[k] += k_prox k_prox = 0 @@ -211,7 +211,7 @@ function R2DH( hkn = h(xkn[selected]) hkn == -Inf && error("nonsmooth term is not proper") - fhmax = Mmonotone > 0 ? maximum(FHobj_hist) : fk + hk + fhmax = Mmonotone > 1 ? maximum(FHobj_hist) : fk + hk Δobj = fhmax - (fkn + hkn) + max(1, abs(fhmax)) * 10 * eps() Δmod = fhmax - (fk + mks) + max(1, abs(hk)) * 10 * eps() ξ = hk - mks + max(1, abs(hk)) * 10 * eps() diff --git a/src/R2N.jl b/src/R2N.jl index 01294629..d2daab82 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -34,7 +34,7 @@ The Hessian is accessed as an abstract operator and need not be the exact Hessia * `subsolver`: the procedure used to compute a step (`R2DH`, `R2` or `PG`) * `subsolver_options::ROSolverOptions`: default options to pass to the subsolver (default: all defaut options) * `selected::AbstractVector{<:Integer}`: subset of variables to which `h` is applied (default `1:nlp.meta.nvar`). -* `Mmonotone::Int`: number of previous values of the objective to consider for the non-monotone variant (default: 0). +* `Mmonotone::Int`: number of previous values of the objective to consider for the non-monotone variant (default: 1). ### Return values @@ -51,7 +51,7 @@ function R2N( subsolver_logger::Logging.AbstractLogger = Logging.NullLogger(), subsolver = R2, subsolver_options = ROSolverOptions(ϵa = options.ϵa), - Mmonotone::Int = 0, + Mmonotone::Int = 1, selected::AbstractVector{<:Integer} = 1:(f.meta.nvar), ) where {H, R} start_time = time() @@ -112,7 +112,7 @@ function R2N( Fobj_hist = zeros(maxIter) Hobj_hist = zeros(maxIter) - FHobj_hist = fill!(Vector{R}(undef, Mmonotone), R(-Inf)) + FHobj_hist = fill!(Vector{R}(undef, Mmonotone - 1), R(-Inf)) Complex_hist = zeros(Int, maxIter) if verbose > 0 #! format: off @@ -144,7 +144,7 @@ function R2N( elapsed_time = time() - start_time Fobj_hist[k] = fk Hobj_hist[k] = hk - Mmonotone > 0 && (FHobj_hist[mod(k-1, Mmonotone) + 1] = fk + hk) + Mmonotone > 1 && (FHobj_hist[mod(k-1, Mmonotone - 1) + 1] = fk + hk) # model for first prox-gradient step and ξ1 φ1(d) = ∇fk' * d @@ -207,7 +207,7 @@ function R2N( hkn == -Inf && error("nonsmooth term is not proper") mks = mk(s) - fhmax = Mmonotone > 0 ? maximum(FHobj_hist) : fk + hk + fhmax = Mmonotone > 1 ? maximum(FHobj_hist) : fk + hk Δobj = fhmax - (fkn + hkn) + max(1, abs(fhmax)) * 10 * eps() Δmod = fhmax - (fk + mks) + max(1, abs(fhmax)) * 10 * eps() ξ = hk - mks + max(1, abs(hk)) * 10 * eps() From 6e6e02796d86379e5ae56bd954cb6f6c2d03d811 Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Thu, 14 Nov 2024 14:20:11 -0500 Subject: [PATCH 24/38] Update src/R2DH.jl Co-authored-by: Dominique --- src/R2DH.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/R2DH.jl b/src/R2DH.jl index 73dc5b22..f4225565 100644 --- a/src/R2DH.jl +++ b/src/R2DH.jl @@ -8,7 +8,7 @@ A second-order quadratic regularization method for the problem min f(x) + h(x) -where f: ℝⁿ → ℝ is C¹ and h: ℝⁿ → ℝ is lower semi-continuous and proper. +where f: ℝⁿ → ℝ is C¹ and h: ℝⁿ → ℝ is lower semi-continuous, proper, and prox-bounded. About each iterate xₖ, a step sₖ is computed as a solution of From 0201b8629fa2e3bc5397c54844ff29fffa7a1a6c Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Thu, 14 Nov 2024 14:20:41 -0500 Subject: [PATCH 25/38] Update src/R2DH.jl Co-authored-by: Dominique --- src/R2DH.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/R2DH.jl b/src/R2DH.jl index f4225565..c2c1b2ea 100644 --- a/src/R2DH.jl +++ b/src/R2DH.jl @@ -27,7 +27,7 @@ where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ(Dₖ + σₖI)s is a qua ### Keyword Arguments * `x0::AbstractVector`: an initial guess (in the first calling form: default = `nlp.meta.x0`) -* `selected::AbstractVector{<:Integer}`: (default `1:length(x0)`). +* `selected::AbstractVector{<:Integer}`: subset of variables to which `h` is applied (default `1:length(x0)`). * `D`: Diagonal quasi-Newton operator. * `Mmonotone::Int`: number of previous values of the objective to consider for the non-monotone variant (default: 6). From 7c3e27bceb805f77d6428921a3439839fb785bbd Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH Date: Thu, 14 Nov 2024 14:28:06 -0500 Subject: [PATCH 26/38] Update doc --- src/R2N.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/R2N.jl b/src/R2N.jl index d2daab82..49130279 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -7,7 +7,7 @@ A regularized quasi-Newton method for the problem min f(x) + h(x) -where f: ℝⁿ → ℝ is C¹ and h: ℝⁿ → ℝ is lower semi-continuous and proper. +where f: ℝⁿ → ℝ is C¹ and h: ℝⁿ → ℝ is lower semi-continuous, proper and prox-bounded. About each iterate xₖ, a step sₖ is computed as an approximate solution of From 5ba37f8a170bc8696e08a894d03540008f083ef9 Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH Date: Thu, 14 Nov 2024 15:35:17 -0500 Subject: [PATCH 27/38] Update R2DH.jl I will add (-Inf) condition in ShiftedProximalOperators as well --- src/R2DH.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/R2DH.jl b/src/R2DH.jl index c2c1b2ea..3b9be133 100644 --- a/src/R2DH.jl +++ b/src/R2DH.jl @@ -148,7 +148,7 @@ function R2DH( Fobj_hist = zeros(maxIter+1) Hobj_hist = zeros(maxIter+1) time_hist = zeros(maxIter+1) - FHobj_hist = fill!(Vector{R}(undef, - 1), R(-Inf)) + FHobj_hist = fill!(Vector{R}(undef, Mmonotone - 1), R(-Inf)) Complex_hist = zeros(Int, maxIter+1) k_prox = 0 if verbose > 0 @@ -188,7 +188,7 @@ function R2DH( end mks = mk(s) - if mks < -1e5 + if mks == -Inf σk = σk * γ Dkσk .= D.d .+ σk DNorm = norm(D.d, Inf) From bebb8ce71fd56397e2dc711b939608d2b1a53252 Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Fri, 10 Jan 2025 15:00:25 -0500 Subject: [PATCH 28/38] Update src/R2N.jl Co-authored-by: Dominique --- src/R2N.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/R2N.jl b/src/R2N.jl index 49130279..2079a120 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -33,8 +33,8 @@ The Hessian is accessed as an abstract operator and need not be the exact Hessia * `subsolver_logger::AbstractLogger`: a logger to pass to the subproblem solver (default: the null logger) * `subsolver`: the procedure used to compute a step (`R2DH`, `R2` or `PG`) * `subsolver_options::ROSolverOptions`: default options to pass to the subsolver (default: all defaut options) -* `selected::AbstractVector{<:Integer}`: subset of variables to which `h` is applied (default `1:nlp.meta.nvar`). * `Mmonotone::Int`: number of previous values of the objective to consider for the non-monotone variant (default: 1). +* `selected::AbstractVector{<:Integer}`: subset of variables to which `h` is applied (default `1:nlp.meta.nvar`). ### Return values From ff3d1727f97a6266498daa6a617d38d46033fc42 Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Fri, 10 Jan 2025 15:01:55 -0500 Subject: [PATCH 29/38] Update src/R2N.jl Co-authored-by: Dominique --- src/R2N.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/R2N.jl b/src/R2N.jl index 2079a120..855d3821 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -151,7 +151,7 @@ function R2N( mk1(d) = φ1(d) + ψ(d) # model for subsequent prox-gradient steps and ξ - φ(d) = (d' * (Bk * d)) / 2 + ∇fk' * d + σk * dot(d, d) / 2 + φ(d) = ∇fk' * d + dot(d, Bk * d) / 2 + σk * dot(d, d) / 2 ∇φ!(g, d) = begin mul!(g, Bk, d) From cebac6ce640ef60f1875df782f08d37318a15efc Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Fri, 10 Jan 2025 15:02:27 -0500 Subject: [PATCH 30/38] Update src/R2N.jl Co-authored-by: Dominique --- src/R2N.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/R2N.jl b/src/R2N.jl index 855d3821..582bcf0c 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -184,7 +184,7 @@ function R2N( end s1 = copy(s) - subsolver_options.ϵa = k == 1 ? 1.0e-3 : min(sqrt_ξ1_νInv ^ (1.5) , sqrt_ξ1_νInv * 1e-3) # 1.0e-5 default + subsolver_options.ϵa = k == 1 ? 1.0e-3 : min(sqrt_ξ1_νInv ^ (1.5) , sqrt_ξ1_νInv * 1e-3) verbose > 0 && @debug "setting inner stopping tolerance to" subsolver_options.optTol subsolver_args = subsolver == R2DH ? (SpectralGradient(νInv, f.meta.nvar),) : () s, iter, _ = with_logger(subsolver_logger) do From 33ad276f61537688bbf2d3186e79d4d7cde1f2be Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Fri, 10 Jan 2025 15:03:10 -0500 Subject: [PATCH 31/38] Update src/R2N.jl Co-authored-by: Dominique --- src/R2N.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/R2N.jl b/src/R2N.jl index 582bcf0c..3d17959c 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -218,7 +218,7 @@ function R2N( ρk = Δobj / Δmod - R2N_stat = (η2 ≤ ρk < Inf) ? "↗" : (ρk < η1 ? "↘" : "=") + R2N_stat = (η2 ≤ ρk < Inf) ? "↘" : (ρk < η1 ? "↗" : "=") if (verbose > 0) && ((k % ptf == 0) || (k == 1)) #! format: off From 5fb02dfdee7717283aacbce7197c54fa8f41961c Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Fri, 10 Jan 2025 15:04:14 -0500 Subject: [PATCH 32/38] Update src/R2DH.jl Co-authored-by: Dominique --- src/R2DH.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/R2DH.jl b/src/R2DH.jl index 3b9be133..de8e5fab 100644 --- a/src/R2DH.jl +++ b/src/R2DH.jl @@ -22,7 +22,6 @@ where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ(Dₖ + σₖI)s is a qua * `nlp::AbstractDiagonalQNModel`: a smooth optimization problem * `h`: a regularizer such as those defined in ProximalOperators * `options::ROSolverOptions`: a structure containing algorithmic parameters -* `x0::AbstractVector`: an initial guess (in the second calling form) ### Keyword Arguments From 88409d3231cb9e1abbe3c8604494d61b43a185f6 Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Fri, 10 Jan 2025 15:05:04 -0500 Subject: [PATCH 33/38] Update src/R2DH.jl clean documentation Co-authored-by: Dominique --- src/R2DH.jl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/R2DH.jl b/src/R2DH.jl index de8e5fab..4f77c921 100644 --- a/src/R2DH.jl +++ b/src/R2DH.jl @@ -32,11 +32,6 @@ where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ(Dₖ + σₖI)s is a qua The objective and gradient of `nlp` will be accessed. -In the second form, instead of `nlp`, the user may pass in - -* `f` a function such that `f(x)` returns the value of f at x -* `∇f!` a function to evaluate the gradient in place, i.e., such that `∇f!(g, x)` store ∇f(x) in `g` - ### Return values * `xk`: the final iterate From d9ca5e75090d971d520ade299d47da957e2210de Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Fri, 10 Jan 2025 15:06:29 -0500 Subject: [PATCH 34/38] Update src/R2DH.jl remove redundant DNorm Co-authored-by: Dominique --- src/R2DH.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/R2DH.jl b/src/R2DH.jl index 4f77c921..2e0d6f6e 100644 --- a/src/R2DH.jl +++ b/src/R2DH.jl @@ -256,7 +256,6 @@ function R2DH( end Dkσk .= D.d .+ σk - DNorm = norm(D.d, Inf) ν = 1 / ((DNorm + σk) * (1 + θ)) tired = maxIter > 0 && k ≥ maxIter From 2126667f29c3e0f2469ccea5eb3a91d278775d62 Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH <81633807+MohamedLaghdafHABIBOULLAH@users.noreply.github.com> Date: Fri, 10 Jan 2025 15:49:23 -0500 Subject: [PATCH 35/38] Update test/runtests.jl Co-authored-by: Maxence Gollier <134112149+MaxenceGollier@users.noreply.github.com> --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 010700bf..b496c29e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -138,7 +138,7 @@ R2N_R2DH(args...; kwargs...) = R2N(args...; subsolver = R2DH, kwargs...) for (mod, mod_name) ∈ ((SpectralGradientModel, "spg"), (DiagonalPSBModel, "psb"), (LSR1Model, "lsr1"), (LBFGSModel, "lbfgs")) for (h, h_name) ∈ ((NormL0(λ), "l0"), (NormL1(λ), "l1")) for solver_sym ∈ (:R2DH, :R2N, :R2N_R2DH) - solver_sym == (:R2N,:R2N_R2DH) && mod_name == ("spg", "psb") && continue + solver_sym ∈ (:R2N,:R2N_R2DH) && mod_name ∈ ("spg", "psb") && continue solver_sym == :R2DH && mod_name != "spg" && continue solver_sym == :R2N_R2DH && h_name == "l1" && continue # this test seems to fail because s seems to be equal to zeros within the subsolver solver_name = string(solver_sym) From 14710f1136128fd1824519c51dc348fb0cc10149 Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH Date: Fri, 10 Jan 2025 15:52:17 -0500 Subject: [PATCH 36/38] Update doc R2DH.jl Add a documentation for second calling form of R2DH --- src/R2DH.jl | 42 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/src/R2DH.jl b/src/R2DH.jl index 2e0d6f6e..004c7e3f 100644 --- a/src/R2DH.jl +++ b/src/R2DH.jl @@ -2,7 +2,6 @@ export R2DH """ R2DH(nlp, h, options) - R2DH(f, ∇f!, h, options, x0) A second-order quadratic regularization method for the problem @@ -33,11 +32,7 @@ where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ(Dₖ + σₖI)s is a qua The objective and gradient of `nlp` will be accessed. ### Return values - -* `xk`: the final iterate -* `Fobj_hist`: an array with the history of values of the smooth objective -* `Hobj_hist`: an array with the history of values of the nonsmooth objective -* `Complex_hist`: an array with the history of number of inner iterations. +The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. """ function R2DH( nlp::AbstractDiagonalQNModel{R, S}, @@ -74,6 +69,41 @@ function R2DH( return stats end +""" + R2DH(f, ∇f!, h, options, x0) + +A second calling form for `R2DH` where the objective and gradient are passed as arguments. + +### Arguments + +* `f::Function`: the objective function +* `∇f!::Function`: the gradient function +* `h`: a regularizer such as those defined in ProximalOperators +* `D`: Diagonal quasi-Newton operator. +* `options::ROSolverOptions`: a structure containing algorithmic parameters +* `x0::AbstractVector`: an initial guess + +### Keyword Arguments + +* `Mmonotone::Int`: number of previous values of the objective to consider for the non-monotone variant (default: 6). +* `selected::AbstractVector{<:Integer}`: subset of variables to which `h` is applied (default `1:length(x0)`). + +### Return values + +* `xk`: the final iterate +* `k`: the number of iterations +* `outdict`: a dictionary containing the following fields: + * `Fhist`: an array with the history of values of the smooth objective + * `Hhist`: an array with the history of values of the nonsmooth objective + * `Time_hist`: an array with the history of elapsed times + * `Chist`: an array with the history of number of inner iterations + * `NonSmooth`: the nonsmooth term + * `status`: the status of the solver either `:first_order`, `:max_iter`, `:max_time` or `:exception` + * `fk`: the value of the smooth objective at the final iterate + * `hk`: the value of the nonsmooth objective at the final iterate + * `sqrt_ξ_νInv`: the square root of the ratio of the nonsmooth term to the regularization parameter + * `elapsed_time`: the elapsed time +""" function R2DH( f::F, ∇f!::G, From 7366a48ed2e046276b3c1f8cae28ec3aae9f399d Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH Date: Tue, 14 Jan 2025 15:45:42 -0500 Subject: [PATCH 37/38] Remove the condition mk = -Inf --- src/R2DH.jl | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/R2DH.jl b/src/R2DH.jl index 004c7e3f..1b48bc07 100644 --- a/src/R2DH.jl +++ b/src/R2DH.jl @@ -174,7 +174,6 @@ function R2DH( time_hist = zeros(maxIter+1) FHobj_hist = fill!(Vector{R}(undef, Mmonotone - 1), R(-Inf)) Complex_hist = zeros(Int, maxIter+1) - k_prox = 0 if verbose > 0 #! format: off @info @sprintf "%6s %8s %8s %7s %8s %7s %7s %7s %1s" "iter" "f(x)" "h(x)" "√(ξ/ν)" "ρ" "σ" "‖x‖" "‖s‖" "" @@ -200,7 +199,6 @@ function R2DH( tired = maxIter > 0 && k ≥ maxIter || elapsed_time > maxTime while !(optimal || tired) - k_prox += 1 # model with diagonal hessian φ(d) = ∇fk' * d + (d' * (Dkσk .* d)) / 2 mk(d) = φ(d) + ψ(d) @@ -212,15 +210,6 @@ function R2DH( end mks = mk(s) - if mks == -Inf - σk = σk * γ - Dkσk .= D.d .+ σk - DNorm = norm(D.d, Inf) - ν = 1 / ((DNorm + σk) * (1 + θ)) - @. mν∇fk = -ν * ∇fk - continue - end - k = k + 1 elapsed_time = time() - start_time Fobj_hist[k] = fk @@ -228,8 +217,7 @@ function R2DH( time_hist[k] = elapsed_time Mmonotone > 1 && (FHobj_hist[mod(k-1, Mmonotone - 1) + 1] = fk + hk) - Complex_hist[k] += k_prox - k_prox = 0 + Complex_hist[k] += 1 xkn .= xk .+ s fkn = f(xkn) hkn = h(xkn[selected]) From b8e36eaaeb02b0e615e9903dbec457fde19bf64d Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH Date: Tue, 14 Jan 2025 15:47:57 -0500 Subject: [PATCH 38/38] update sigma of the subsolver --- src/R2N.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/R2N.jl b/src/R2N.jl index 3d17959c..f914372a 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -186,6 +186,7 @@ function R2N( subsolver_options.ϵa = k == 1 ? 1.0e-3 : min(sqrt_ξ1_νInv ^ (1.5) , sqrt_ξ1_νInv * 1e-3) verbose > 0 && @debug "setting inner stopping tolerance to" subsolver_options.optTol + subsolver_options.σk = σk subsolver_args = subsolver == R2DH ? (SpectralGradient(νInv, f.meta.nvar),) : () s, iter, _ = with_logger(subsolver_logger) do subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) @@ -199,6 +200,7 @@ function R2N( # is not modified if there is an error subsolver_options.ν = ν_subsolver subsolver_options.ϵa = ϵ_subsolver_init + subsolver_options.σk = σk Complex_hist[k] = iter xkn .= xk .+ s