Skip to content

Commit

Permalink
Add Hessian support to GrayBox (#100)
Browse files Browse the repository at this point in the history
  • Loading branch information
odow authored Sep 3, 2024
1 parent 0fccbe5 commit 304e62b
Show file tree
Hide file tree
Showing 5 changed files with 213 additions and 32 deletions.
28 changes: 20 additions & 8 deletions ext/MathOptAIFluxExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ function MathOptAI.add_predictor(
x::Vector;
config::Dict = Dict{Any,Any}(),
reduced_space::Bool = false,
gray_box::Bool = false,
kwargs...,
)
inner_predictor = MathOptAI.build_predictor(predictor; config, gray_box)
inner_predictor = MathOptAI.build_predictor(predictor; config, kwargs...)
if reduced_space
inner_predictor = MathOptAI.ReducedSpace(inner_predictor)
end
Expand All @@ -85,6 +85,7 @@ end
predictor::Flux.Chain;
config::Dict = Dict{Any,Any}(),
gray_box::Bool = false,
gray_box_hessian::Bool = false,
)
Convert a trained neural network from Flux.jl to a [`Pipeline`](@ref).
Expand All @@ -110,6 +111,8 @@ Convert a trained neural network from Flux.jl to a [`Pipeline`](@ref).
`Flux.relu => MathOptAI.QuadraticReLU()`.
* `gray_box`: if `true`, the neural network is added as a user-defined
nonlinear operator, with gradients provided by `Flux.withjacobian`.
* `gray_box_hessian`: if `true`, the gray box additionally computes the Hessian
of the output using `Flux.hessian`.
## Example
Expand Down Expand Up @@ -141,12 +144,13 @@ function MathOptAI.build_predictor(
predictor::Flux.Chain;
config::Dict = Dict{Any,Any}(),
gray_box::Bool = false,
gray_box_hessian::Bool = false,
)
if gray_box
if !isempty(config)
error("cannot specify the `config` kwarg if `gray_box = true`")
end
return MathOptAI.GrayBox(predictor)
return MathOptAI.GrayBox(predictor; hessian = gray_box_hessian)
end
inner_predictor = MathOptAI.Pipeline(MathOptAI.AbstractPredictor[])
for layer in predictor.layers
Expand All @@ -155,15 +159,23 @@ function MathOptAI.build_predictor(
return inner_predictor
end

function MathOptAI.GrayBox(predictor::Flux.Chain)
function MathOptAI.GrayBox(predictor::Flux.Chain; hessian::Bool = false)
function output_size(x)
return only(Flux.outputsize(predictor, (length(x),)))
end
function with_jacobian(x)
ret = Flux.withjacobian(x -> predictor(Float32.(x)), collect(x))
return (value = ret.val, jacobian = only(ret.grad))
function callback(x)
x32 = collect(Float32.(x))
ret = Flux.withjacobian(predictor, x32)
if !hessian
return (value = ret.val, jacobian = only(ret.grad))
end
Hs = map(1:length(ret.val)) do i
return Flux.hessian(x -> predictor(x)[i], x32)
end
H = cat(Hs...; dims = 3)
return (value = ret.val, jacobian = only(ret.grad), hessian = H)
end
return MathOptAI.GrayBox(output_size, with_jacobian)
return MathOptAI.GrayBox(output_size, callback; has_hessian = hessian)
end

function _add_predictor(::MathOptAI.Pipeline, layer::Any, ::Dict)
Expand Down
33 changes: 23 additions & 10 deletions ext/MathOptAIPythonCallExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,18 @@ Add a trained neural network from Pytorch via PythonCall.jl to `model`.
The supported Symbols are `:ReLU`, `:Sigmoid`, and `:Tanh`.
* `gray_box`: if `true`, the neural network is added as a user-defined
nonlinear operator, with gradients provided by `torch.func.jacrev`.
* `gray_box_hessian`: if `true`, the gray box additionally computes the Hessian
of the output using `torch.func.hessian`.
"""
function MathOptAI.add_predictor(
model::JuMP.AbstractModel,
predictor::MathOptAI.PytorchModel,
x::Vector;
config::Dict = Dict{Any,Any}(),
reduced_space::Bool = false,
gray_box::Bool = false,
kwargs...,
)
inner_predictor = MathOptAI.build_predictor(predictor; config, gray_box)
inner_predictor = MathOptAI.build_predictor(predictor; config, kwargs...)
if reduced_space
inner_predictor = MathOptAI.ReducedSpace(inner_predictor)
end
Expand All @@ -59,6 +61,7 @@ end
predictor::MathOptAI.PytorchModel;
config::Dict = Dict{Any,Any}(),
gray_box::Bool = false,
gray_box_hessian::Bool = false,
)
Convert a trained neural network from Pytorch via PythonCall.jl to a
Expand All @@ -80,17 +83,20 @@ Convert a trained neural network from Pytorch via PythonCall.jl to a
The supported Symbols are `:ReLU`, `:Sigmoid`, and `:Tanh`.
* `gray_box`: if `true`, the neural network is added as a user-defined
nonlinear operator, with gradients provided by `torch.func.jacrev`.
* `gray_box_hessian`: if `true`, the gray box additionally computes the Hessian
of the output using `torch.func.hessian`.
"""
function MathOptAI.build_predictor(
predictor::MathOptAI.PytorchModel;
config::Dict = Dict{Any,Any}(),
gray_box::Bool = false,
gray_box_hessian::Bool = false,
)
if gray_box
if !isempty(config)
error("cannot specify the `config` kwarg if `gray_box = true`")
end
return MathOptAI.GrayBox(predictor)
return MathOptAI.GrayBox(predictor; hessian = gray_box_hessian)
end
torch = PythonCall.pyimport("torch")
nn = PythonCall.pyimport("torch.nn")
Expand Down Expand Up @@ -118,23 +124,30 @@ function _predictor(nn, layer, config)
return error("unsupported layer: $layer")
end

function MathOptAI.GrayBox(predictor::MathOptAI.PytorchModel)
function MathOptAI.GrayBox(
predictor::MathOptAI.PytorchModel;
hessian::Bool = false,
)
torch = PythonCall.pyimport("torch")
torch_model = torch.load(predictor.filename)
J = torch.func.jacrev(torch_model)
H = torch.func.hessian(torch_model)
# TODO(odow): I'm not sure if there is a better way to get the output
# dimension of a torch model object?
output_size(::Any) = PythonCall.pyconvert(Int, torch_model[-1].out_features)
function with_jacobian(x)
function callback(x)
py_x = torch.tensor(collect(x))
py_value = torch_model(py_x).detach().numpy()
value = PythonCall.pyconvert(Vector, py_value)
py_jacobian = J(py_x).detach().numpy()
return (;
value = PythonCall.pyconvert(Vector, py_value),
jacobian = PythonCall.pyconvert(Matrix, py_jacobian),
)
jacobian = PythonCall.pyconvert(Matrix, py_jacobian)
if !hessian
return (; value, jacobian)
end
hessians = PythonCall.pyconvert(Array, H(py_x).detach().numpy())
return (; value, jacobian, hessian = permutedims(hessians, (2, 3, 1)))
end
return MathOptAI.GrayBox(output_size, with_jacobian)
return MathOptAI.GrayBox(output_size, callback; has_hessian = hessian)
end

end # module
49 changes: 35 additions & 14 deletions src/predictors/GrayBox.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
"""
GrayBox(
output_size::Function,
with_jacobian::Function,
callback::Function;
has_hessian::Bool = false,
) <: AbstractPredictor
An [`AbstractPredictor`](@ref) that represents the function ``f(x)`` as a
Expand All @@ -17,10 +18,13 @@ user-defined nonlinear operator.
* `output_size(x::Vector):Int`: given an input vector `x`, return the dimension
of the output vector
* `with_jacobian(x::Vector)::NamedTuple -> (;value, jacobian)`: given an input
vector `x`, return a `NamedTuple` that computes the primal value and Jacobian
of the output value with respect to the input. `jacobian[j, i]` is the
partial derivative of `value[j]` with respect to `x[i]`.
* `callback(x::Vector)::NamedTuple -> (;value, jacobian[, hessian])`: given an
input vector `x`, return a `NamedTuple` that computes the primal value and
Jacobian of the output value with respect to the input. `jacobian[j, i]` is
the partial derivative of `value[j]` with respect to `x[i]`.
* `has_hessian`: if `true`, the `callback` additionally contains a field
`hessian`, which is an `N × N × M` matrix, where `hessian[i, j, k]` is the
partial derivative of `value[k]` with respect to `x[i]` and `x[j]`.
## Example
Expand Down Expand Up @@ -55,7 +59,16 @@ julia> y = MathOptAI.add_predictor(model, MathOptAI.ReducedSpace(f), x)
"""
struct GrayBox{F<:Function,G<:Function} <: AbstractPredictor
output_size::F
with_jacobian::G
callback::G
has_hessian::Bool

function GrayBox(
output_size::F,
callback::G;
has_hessian::Bool = false,
) where {F<:Function,G<:Function}
return new{F,G}(output_size, callback, has_hessian)
end
end

function add_predictor(model::JuMP.AbstractModel, predictor::GrayBox, x::Vector)
Expand All @@ -73,7 +86,7 @@ function add_predictor(
last_x, cache = nothing, nothing
function update(x)
if x != last_x
cache = predictor.predictor.with_jacobian(x)
cache = predictor.predictor.callback(x)
last_x = x
end
return
Expand All @@ -87,14 +100,22 @@ function add_predictor(
g .= cache.jacobian[i, :]
return
end
function ∇²f(H::AbstractMatrix{Float64}, k::Int, x...)
update(x)
for j in 1:length(x), i in j:length(x)
H[i, j] = cache.hessian[i, j, k]
end
return
end
return map(1:predictor.predictor.output_size(x)) do i
op_i = JuMP.add_nonlinear_operator(
model,
length(x),
(x...) -> f(i, x...),
(g, x...) -> ∇f(g, i, x...);
name = Symbol("op_$(gensym())"),
)
callbacks = if predictor.predictor.has_hessian
∇²fi = (H, x...) -> ∇²f(H, i, x...)
((x...) -> f(i, x...), (g, x...) -> ∇f(g, i, x...), ∇²fi)
else
((x...) -> f(i, x...), (g, x...) -> ∇f(g, i, x...))
end
name = Symbol("op_$(gensym())")
op_i = JuMP.add_nonlinear_operator(model, length(x), callbacks...; name)
return op_i(x...)
end
end
43 changes: 43 additions & 0 deletions test/test_Flux.jl
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,27 @@ function test_gray_box_scalar_output()
return
end

function test_gray_box_scalar_output_hessian()
chain = Flux.Chain(Flux.Dense(2 => 16, Flux.relu), Flux.Dense(16 => 1))
model = Model(Ipopt.Optimizer)
set_silent(model)
set_attribute(model, "max_iter", 5)
@variable(model, 0 <= x[1:2] <= 1)
y = MathOptAI.add_predictor(
model,
chain,
x;
gray_box = true,
gray_box_hessian = true,
reduced_space = true,
)
@objective(model, Max, only(y))
optimize!(model)
@test termination_status(model) == ITERATION_LIMIT
@test isapprox(value.(y), chain(Float32.(value.(x))); atol = 1e-2)
return
end

function test_gray_box_vector_output()
chain = Flux.Chain(Flux.Dense(3 => 16, Flux.relu), Flux.Dense(16 => 2))
model = Model(Ipopt.Optimizer)
Expand All @@ -251,6 +272,28 @@ function test_gray_box_vector_output()
return
end

function test_gray_box_vector_output_hessian()
chain = Flux.Chain(Flux.Dense(3 => 16, Flux.relu), Flux.Dense(16 => 2))
model = Model(Ipopt.Optimizer)
set_silent(model)
set_attribute(model, "max_iter", 5)
@variable(model, 0 <= x[1:3] <= 1)
y = MathOptAI.add_predictor(
model,
chain,
x;
gray_box = true,
gray_box_hessian = true,
reduced_space = true,
)
@test length(y) == 2
@objective(model, Max, sum(y))
optimize!(model)
@test termination_status(model) == ITERATION_LIMIT
@test isapprox(value.(y), chain(Float32.(value.(x))); atol = 1e-2)
return
end

end # module

TestFluxExt.runtests()
Loading

0 comments on commit 304e62b

Please sign in to comment.