Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update TRDH to use diagonal QN model #132

Merged
merged 2 commits into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ SolverCore = "ff4d7338-4cf1-434d-91df-b86cb86fb843"
TSVD = "9449cd9e-2762-5aa3-a617-5413e99d722e"

[compat]
LinearOperators = "2.5.2"
LinearOperators = "2.7"
NLPModels = "0.19, 0.20"
NLPModelsModifiers = "0.6"
NLPModelsModifiers = "0.7"
ProximalOperators = "0.15"
RegularizedProblems = "0.1"
ShiftedProximalOperators = "0.2"
Expand Down
2 changes: 1 addition & 1 deletion examples/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RegularizedProblems = "ea076b23-609f-44d2-bb12-a4ae45328278"
ADNLPModels = "0.7"
DifferentialEquations = "7"
NLPModels = "0.19, 0.20"
NLPModelsModifiers = "0.6"
NLPModelsModifiers = "0.7"
PGFPlots = "^3.4"
ProximalOperators = "0.15"
RegularizedOptimization = "0.1"
Expand Down
13 changes: 4 additions & 9 deletions examples/demo-bpdn-constr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,10 @@ function demo_solver(f, nls, sol, h, χ, suffix = "l0-linf")
ϵa = 1e-6,
ϵr = 1e-6,
verbose = 10,
spectral = false,
psb = true,
)

@info " using TR to solve with" h χ
reset!(f)
TR_out = TR(f, h, χ, options, x0 = f.meta.x0)
TR_out = TR(LSR1Model(f), h, χ, options, x0 = f.meta.x0)
@info "TR relative error" norm(TR_out.solution - sol) / norm(sol)
plot_bpdn(TR_out, sol, "constr-tr-r2-$(suffix)")

Expand All @@ -32,8 +29,7 @@ function demo_solver(f, nls, sol, h, χ, suffix = "l0-linf")
plot_bpdn(R2_out, sol, "constr-r2-$(suffix)")

@info " using TRDH to solve with" h χ
reset!(f)
TRDH_out = TRDH(f, h, χ, options, x0 = f.meta.x0)
TRDH_out = TRDH(DiagonalPSBModel(f), h, χ, options, x0 = f.meta.x0)
@info "TRDH relative error" norm(TRDH_out.solution - sol) / norm(sol)
# plot_bpdn(TRDH_out, sol, "constr-trdh-$(suffix)")

Expand All @@ -52,11 +48,10 @@ end

function demo_bpdn_constr(compound = 1)
model, nls_model, sol = bpdn_model(compound, bounds = true)
f = LSR1Model(model)
λ = norm(grad(model, zeros(model.meta.nvar)), Inf) / 10
demo_solver(f, nls_model, sol, NormL0(λ), NormLinf(1.0))
demo_solver(model, nls_model, sol, NormL0(λ), NormLinf(1.0))
λ = norm(grad(model, zeros(model.meta.nvar)), Inf) / 3
demo_solver(f, nls_model, sol, NormL1(λ), NormLinf(1.0), "l1-linf")
demo_solver(model, nls_model, sol, NormL1(λ), NormLinf(1.0), "l1-linf")
end

demo_bpdn_constr()
16 changes: 6 additions & 10 deletions examples/demo-nnmf-constr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,21 @@ function demo_solver(f, h, χ, selected, Avec, m, n, k, suffix = "l0-linf")
ϵr = 1e-6,
verbose = 10,
maxIter = 500,
spectral = true,
)
@info " using TR to solve with" h χ
reset!(f)
TR_out = TR(f, h, χ, options, selected = selected)
TR_out = TR(LSR1Model(f), h, χ, options, selected = selected)
plot_nnmf(TR_out, Avec, m, n, k, "tr-r2-$suffix")

@info " using R2 to solve with" h
reset!(f)
R2_out = R2(f, h, options, selected = selected)
plot_nnmf(R2_out, Avec, m, n, k, "r2-$suffix")

subsolver_options = ROSolverOptions(spectral = false, psb = true, ϵa = options.ϵa)
subsolver_options = ROSolverOptions(ϵa = options.ϵa)
@info " using TR with TRDH as subproblem to solve with" h χ
reset!(f)
TR2_out = TR(
f,
LSR1Model(f),
h,
χ,
options,
Expand All @@ -43,19 +41,17 @@ function demo_solver(f, h, χ, selected, Avec, m, n, k, suffix = "l0-linf")
plot_nnmf(TR2_out, Avec, m, n, k, "tr-trdh-$suffix")

@info " using TRDH to solve with" h χ
reset!(f)
TRDH_out = TRDH(f, h, χ, options, selected = selected)
TRDH_out = TRDH(DiagonalPSBModel(f), h, χ, options, selected = selected)
plot_nnmf(TRDH_out, Avec, m, n, k, "trdh-$suffix")
end

function demo_nnmf()
m, n, k = 100, 50, 5
model, nls_model, A, selected = nnmf_model(m, n, k)
f = LSR1Model(model)
λ = norm(grad(model, rand(model.meta.nvar)), Inf) / 200
demo_solver(f, NormL0(λ), NormLinf(1.0), selected, A, m, n, k, "l0-linf")
demo_solver(model, NormL0(λ), NormLinf(1.0), selected, A, m, n, k, "l0-linf")
λ = norm(grad(model, rand(model.meta.nvar)), Inf) / 100000
demo_solver(f, NormL1(λ), NormLinf(1.0), selected, A, m, n, k, "l1-linf")
demo_solver(model, NormL1(λ), NormLinf(1.0), selected, A, m, n, k, "l1-linf")
end

demo_nnmf()
7 changes: 4 additions & 3 deletions src/LMTR_alg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ where F(x) and J(x) are the residual and its Jacobian at x, respectively, ψ(s;

* `x0::AbstractVector`: an initial guess (default: `nls.meta.x0`)
* `subsolver_logger::AbstractLogger`: a logger to pass to the subproblem solver
* `subsolver`: the procedure used to compute a step (`PG` or `R2`)
* `subsolver`: the procedure used to compute a step (`PG`, `R2` or `TRDH`)
* `subsolver_options::ROSolverOptions`: default options to pass to the subsolver.
* `selected::AbstractVector{<:Integer}`: (default `1:f.meta.nvar`).
* `selected::AbstractVector{<:Integer}`: (default `1:nls.meta.nvar`).

### Return values

Expand Down Expand Up @@ -197,8 +197,9 @@ function LMTR(
set_radius!(ψ, ∆_effective)
subsolver_options.Δk = ∆_effective / 10
subsolver_options.ν = ν
subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : ()
s, iter, _ = with_logger(subsolver_logger) do
subsolver(φ, ∇φ!, ψ, subsolver_options, s)
subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s)
end
# restore initial values of subsolver_options here so that it is not modified
# if there is an error
Expand Down
7 changes: 4 additions & 3 deletions src/LM_alg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ and σ > 0 is a regularization parameter.

* `x0::AbstractVector`: an initial guess (default: `nls.meta.x0`)
* `subsolver_logger::AbstractLogger`: a logger to pass to the subproblem solver
* `subsolver`: the procedure used to compute a step (`PG` or `R2`)
* `subsolver`: the procedure used to compute a step (`PG`, `R2` or `TRDH`)
* `subsolver_options::ROSolverOptions`: default options to pass to the subsolver.
* `selected::AbstractVector{<:Integer}`: (default `1:f.meta.nvar`).
* `selected::AbstractVector{<:Integer}`: (default `1:nls.meta.nvar`).

### Return values

Expand Down Expand Up @@ -191,9 +191,10 @@ function LM(

subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10))
subsolver_options.ν = ν
subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : ()
@debug "setting inner stopping tolerance to" subsolver_options.optTol
s, iter, _ = with_logger(subsolver_logger) do
subsolver(φ, ∇φ!, ψ, subsolver_options, s)
subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s)
end
# restore initial subsolver_options here so that it is not modified if there is an error
subsolver_options.ν = ν_subsolver
Expand Down
47 changes: 20 additions & 27 deletions src/TRDH_alg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

### Arguments

* `nlp::AbstractNLPModel`: a smooth optimization problem
* `nlp::AbstractDiagonalQNModel`: a smooth optimization problem
* `h`: a regularizer such as those defined in ProximalOperators
* `χ`: a norm used to define the trust region in the form of a regularizer
* `options::ROSolverOptions`: a structure containing algorithmic parameters
Expand All @@ -38,7 +38,6 @@

* `x0::AbstractVector`: an initial guess (default: `nlp.meta.x0`)
* `selected::AbstractVector{<:Integer}`: (default `1:f.meta.nvar`)
* `Bk`: initial diagonal Hessian approximation (default: `(one(R) / options.ν) * I`).

### Return values

Expand All @@ -48,18 +47,19 @@
* `Complex_hist`: an array with the history of number of inner iterations.
"""
function TRDH(
nlp::AbstractNLPModel{R},
nlp::AbstractDiagonalQNModel{R, S},
h,
χ,
options::ROSolverOptions{R};
kwargs...,
) where {R <: Real}
) where {R <: Real, S}
kwargs_dict = Dict(kwargs...)
x0 = pop!(kwargs_dict, :x0, nlp.meta.x0)
xk, k, outdict = TRDH(
x -> obj(nlp, x),
(g, x) -> grad!(nlp, x, g),
h,
hess_op(nlp, x0),
options,
x0;
χ = χ,
Expand Down Expand Up @@ -97,13 +97,13 @@
f::F,
∇f!::G,
h::H,
D::DQN,
options::ROSolverOptions{R},
x0::AbstractVector{R};
χ::X = NormLinf(one(R)),
selected::AbstractVector{<:Integer} = 1:length(x0),
Bk = (one(R) / options.ν) * I,
kwargs...,
) where {R <: Real, F, G, H, X}
) where {R <: Real, F, G, H, DQN <: AbstractDiagonalQuasiNewtonOperator, X}
start_time = time()
elapsed_time = 0.0
ϵ = options.ϵa
Expand All @@ -118,9 +118,6 @@
γ = options.γ
α = options.α
β = options.β
spectral = options.spectral
psb = options.psb
hess_init_val = (Bk isa UniformScaling) ? Bk.λ : (one(R) / options.ν)
reduce_TR = options.reduce_TR

local l_bound, u_bound
Expand Down Expand Up @@ -161,8 +158,8 @@
s = zero(xk)
l_bound_k = similar(xk)
u_bound_k = similar(xk)
if h isa ShiftedProximableFunction # case TRDH is used as a subsolver
is_subsolver = true
is_subsolver = h isa ShiftedProximableFunction # case TRDH is used as a subsolver
if is_subsolver
ψ = shifted(h, xk)
@assert !has_bnds
l_bound = copy(ψ.l)
Expand All @@ -172,7 +169,6 @@
has_bnds = true
set_bounds!(ψ, l_bound_k, u_bound_k)
else
is_subsolver = false
if has_bnds
@. l_bound_k = max(-Δk, l_bound - xk)
@. u_bound_k = min(Δk, u_bound - xk)
Expand Down Expand Up @@ -203,10 +199,8 @@
∇fk = similar(xk)
∇f!(∇fk, xk)
∇fk⁻ = copy(∇fk)
Dk = spectral ? SpectralGradient(hess_init_val, length(xk)) :
((Bk isa UniformScaling) ? DiagonalQN(fill!(similar(xk), hess_init_val), psb) : DiagonalQN(diag(Bk), psb))
DkNorm = norm(Dk.d, Inf)
νInv = (DkNorm + one(R) / (α * Δk))
DNorm = norm(D.d, Inf)
νInv = DNorm + one(R) / (α * Δk)
ν = one(R) / νInv
mν∇fk = -ν .* ∇fk
sqrt_ξ_νInv = one(R)
Expand Down Expand Up @@ -252,12 +246,11 @@
set_radius!(ψ, Δ_effective)
end

# model with diagonal hessian
φ(d) = ∇fk' * d + (d' * (Dk.d .* d)) / 2
# model with diagonal hessian
φ(d) = ∇fk' * d + (d' * (D.d .* d)) / 2
mk(d) = φ(d) + ψ(d)

iprox!(s, ψ, ∇fk, Dk)
Complex_hist[k] += 1
iprox!(s, ψ, ∇fk, D)

sNorm = χ(s)
xkn .= xk .+ s
Expand Down Expand Up @@ -293,9 +286,9 @@
if (verbose > 0) && (k % ptf == 0)
#! format: off
if reduce_TR
@info @sprintf "%6d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k fk hk sqrt_ξ_νInv sqrt(ξ) ρk Δk χ(xk) sNorm norm(Dk.d) TR_stat
@info @sprintf "%6d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k fk hk sqrt_ξ_νInv sqrt(ξ) ρk Δk χ(xk) sNorm norm(D.d) TR_stat
else
@info @sprintf "%6d %8.1e %8.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k fk hk sqrt_ξ_νInv ρk Δk χ(xk) sNorm norm(Dk.d) TR_stat
@info @sprintf "%6d %8.1e %8.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k fk hk sqrt_ξ_νInv ρk Δk χ(xk) sNorm norm(D.d) TR_stat

Check warning on line 291 in src/TRDH_alg.jl

View check run for this annotation

Codecov / codecov/patch

src/TRDH_alg.jl#L291

Added line #L291 was not covered by tests
end
#! format: on
end
Expand All @@ -314,8 +307,8 @@
hk = hkn
shift!(ψ, xk)
∇f!(∇fk, xk)
push!(Dk, s, ∇fk - ∇fk⁻) # update QN operator
DkNorm = norm(Dk.d, Inf)
push!(D, s, ∇fk - ∇fk⁻) # update QN operator
DNorm = norm(D.d, Inf)
∇fk⁻ .= ∇fk
end

Expand All @@ -325,7 +318,7 @@
has_bnds ? set_bounds!(ψ, l_bound_k, u_bound_k) : set_radius!(ψ, Δk)
end

νInv = reduce_TR ? (DkNorm + one(R) / (α * Δk)) : (DkNorm + one(R) / α)
νInv = reduce_TR ? (DNorm + one(R) / (α * Δk)) : (DNorm + one(R) / α)
ν = one(R) / νInv
mν∇fk .= -ν .* ∇fk

Expand All @@ -338,12 +331,12 @@
elseif optimal
#! format: off
if reduce_TR
@info @sprintf "%6d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k fk hk sqrt_ξ_νInv sqrt(ξ1) "" Δk χ(xk) χ(s) norm(Dk.d)
@info @sprintf "%6d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k fk hk sqrt_ξ_νInv sqrt(ξ1) "" Δk χ(xk) χ(s) norm(D.d)
#! format: on
@info "TRDH: terminating with √(ξ1/ν) = $(sqrt_ξ_νInv)"
else
#! format: off
@info @sprintf "%6d %8.1e %8.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k fk hk sqrt_ξ_νInv "" Δk χ(xk) χ(s) norm(Dk.d)
@info @sprintf "%6d %8.1e %8.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k fk hk sqrt_ξ_νInv "" Δk χ(xk) χ(s) norm(D.d)

Check warning on line 339 in src/TRDH_alg.jl

View check run for this annotation

Codecov / codecov/patch

src/TRDH_alg.jl#L339

Added line #L339 was not covered by tests
#! format: on
@info "TRDH: terminating with √(ξ/ν) = $(sqrt_ξ_νInv)"
end
Expand Down
5 changes: 3 additions & 2 deletions src/TR_alg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ The Hessian is accessed as an abstract operator and need not be the exact Hessia

* `x0::AbstractVector`: an initial guess (default: `nlp.meta.x0`)
* `subsolver_logger::AbstractLogger`: a logger to pass to the subproblem solver (default: the null logger)
* `subsolver`: the procedure used to compute a step (`PG` or `R2`)
* `subsolver`: the procedure used to compute a step (`PG`, `R2` or `TRDH`)
* `subsolver_options::ROSolverOptions`: default options to pass to the subsolver (default: all defaut options)
* `selected::AbstractVector{<:Integer}`: (default `1:f.meta.nvar`).

Expand Down Expand Up @@ -186,8 +186,9 @@ function TR(
set_radius!(ψ, ∆_effective)
subsolver_options.Δk = ∆_effective / 10
subsolver_options.ν = ν
subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, f.meta.nvar),) : ()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this imply that we can only use TR-TRDH with SpectralGradient?
Also, I think the operator should be preallocated outside the while loop to facilitate the use of in-place subsolvers (once in-place methods are implemented for all the subsolvers in the same manner as R2!)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, for the moment, it’s the only option (the other two don’t perform well anyways). I plan to make this an option when I implement the solver object, so I’m not making an effort to preallocate at this point.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The others perform well on some problems, like SVM for example. I'll keep a branch on my fork then if want still want to be able to produce the results of indef-pg.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’m planning to work on the solvers soon, so the option should be in place in the near future.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a note about that in #129.

s, iter, outdict = with_logger(subsolver_logger) do
subsolver(φ, ∇φ!, ψ, subsolver_options, s; Bk = Bk)
subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s)
end
# restore initial values of subsolver_options here so that it is not modified
# if there is an error
Expand Down
6 changes: 0 additions & 6 deletions src/input_struct.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ mutable struct ROSolverOptions{R}
γ::R # trust region buffer
θ::R # step length factor in relation to Hessian norm
β::R # TR size as factor of first PG step
spectral::Bool # for TRDH: use spectral gradient update if true, otherwise DiagonalQN
psb::Bool # for TRDH with DiagonalQN (spectral = false): use PSB update if true, otherwise Andrei update
reduce_TR::Bool

function ROSolverOptions{R}(;
Expand All @@ -36,8 +34,6 @@ mutable struct ROSolverOptions{R}
γ::R = R(3),
θ::R = eps(R)^(1 / 5),
β::R = 1 / eps(R),
spectral::Bool = false,
psb::Bool = false,
reduce_TR::Bool = true,
) where {R <: Real}
@assert ϵa ≥ 0
Expand Down Expand Up @@ -70,8 +66,6 @@ mutable struct ROSolverOptions{R}
γ,
θ,
β,
spectral,
psb,
reduce_TR,
)
end
Expand Down
4 changes: 2 additions & 2 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
y::AbstractVector,
ψ::ShiftedProximableFunction,
g::AbstractVector,
D::DiagonalQN,
D::AbstractDiagonalQuasiNewtonOperator,
) = iprox!(y, ψ, g, D.d)

ShiftedProximalOperators.iprox!(
Expand All @@ -18,5 +18,5 @@
D::SpectralGradient,
) = iprox!(y, ψ, g, fill!(similar(g), D.d[1]))

LinearAlgebra.diag(op::DiagonalQN) = copy(op.d)
LinearAlgebra.diag(op::AbstractDiagonalQuasiNewtonOperator) = copy(op.d)

Check warning on line 21 in src/utils.jl

View check run for this annotation

Codecov / codecov/patch

src/utils.jl#L21

Added line #L21 was not covered by tests
LinearAlgebra.diag(op::SpectralGradient{T}) where {T} = zeros(T, op.nrow) .* op.d[1]
Loading
Loading