From a8aefa53637c76c9008363bc7ae6266f9634ff43 Mon Sep 17 00:00:00 2001 From: "Documenter.jl" Date: Fri, 17 Jan 2025 00:52:57 +0000 Subject: [PATCH] build based on eec3ace --- dev/.documenter-siteinfo.json | 2 +- dev/api/index.html | 68 +- dev/developers/design_principles/index.html | 2 +- dev/index.html | 2 +- dev/manual/AbstractGPs/index.html | 2 +- dev/manual/DecisionTree/index.html | 2 +- dev/manual/Flux/index.html | 17 +- dev/manual/GLM/index.html | 6 +- dev/manual/Lux/index.html | 14 +- dev/manual/PyTorch/index.html | 2 +- dev/manual/predictors/index.html | 2 +- dev/objects.inv | Bin 1663 -> 1663 bytes dev/tutorials/decision_trees/index.html | 2 +- dev/tutorials/gaussian/00c40f9e.svg | 74 + dev/tutorials/gaussian/0b8fc589.svg | 78 - dev/tutorials/gaussian/0ef7442e.svg | 71 + dev/tutorials/gaussian/13aa5ed3.svg | 78 + dev/tutorials/gaussian/15d1ac5f.svg | 71 - dev/tutorials/gaussian/c6233b11.svg | 74 - .../gaussian/{9f473ebe.svg => e39fcdbd.svg} | 72 +- dev/tutorials/gaussian/index.html | 8 +- .../65060d29.svg => mnist/39f02ea8.svg} | 1469 ++++++++--------- .../mnist/{56267ab7.svg => 585c605e.svg} | 293 ++-- .../51e7e4a5.svg => mnist/ad9f9d48.svg} | 214 +-- .../mnist/{2880cb7b.svg => e2b9d637.svg} | 162 +- dev/tutorials/mnist/index.html | 72 +- .../mnist_lux/{63b193ec.svg => 05a744a6.svg} | 290 ++-- .../b0b1a6ac.svg => mnist_lux/4d4fd437.svg} | 1284 +++++++------- .../de9b510b.svg => mnist_lux/99779d36.svg} | 214 +-- .../mnist_lux/{ee10d13e.svg => 9e92f72c.svg} | 162 +- dev/tutorials/mnist_lux/index.html | 72 +- .../pytorch/{ea38d5e3.svg => fcabca83.svg} | 68 +- dev/tutorials/pytorch/index.html | 2 +- dev/tutorials/student_enrollment/index.html | 4 +- 34 files changed, 2481 insertions(+), 2472 deletions(-) create mode 100644 dev/tutorials/gaussian/00c40f9e.svg delete mode 100644 dev/tutorials/gaussian/0b8fc589.svg create mode 100644 dev/tutorials/gaussian/0ef7442e.svg create mode 100644 dev/tutorials/gaussian/13aa5ed3.svg delete mode 100644 dev/tutorials/gaussian/15d1ac5f.svg delete mode 100644 dev/tutorials/gaussian/c6233b11.svg rename dev/tutorials/gaussian/{9f473ebe.svg => e39fcdbd.svg} (86%) rename dev/tutorials/{mnist_lux/65060d29.svg => mnist/39f02ea8.svg} (68%) rename dev/tutorials/mnist/{56267ab7.svg => 585c605e.svg} (67%) rename dev/tutorials/{mnist_lux/51e7e4a5.svg => mnist/ad9f9d48.svg} (81%) rename dev/tutorials/mnist/{2880cb7b.svg => e2b9d637.svg} (88%) rename dev/tutorials/mnist_lux/{63b193ec.svg => 05a744a6.svg} (73%) rename dev/tutorials/{mnist/b0b1a6ac.svg => mnist_lux/4d4fd437.svg} (62%) rename dev/tutorials/{mnist/de9b510b.svg => mnist_lux/99779d36.svg} (87%) rename dev/tutorials/mnist_lux/{ee10d13e.svg => 9e92f72c.svg} (88%) rename dev/tutorials/pytorch/{ea38d5e3.svg => fcabca83.svg} (82%) diff --git a/dev/.documenter-siteinfo.json b/dev/.documenter-siteinfo.json index 77e7b00f..e2cc72b3 100644 --- a/dev/.documenter-siteinfo.json +++ b/dev/.documenter-siteinfo.json @@ -1 +1 @@ -{"documenter":{"julia_version":"1.11.2","generation_timestamp":"2025-01-17T00:26:30","documenter_version":"1.8.0"}} \ No newline at end of file +{"documenter":{"julia_version":"1.11.2","generation_timestamp":"2025-01-17T00:52:51","documenter_version":"1.8.0"}} \ No newline at end of file diff --git a/dev/api/index.html b/dev/api/index.html index f13a127b..d7b2932e 100644 --- a/dev/api/index.html +++ b/dev/api/index.html @@ -1,5 +1,5 @@ -API Reference · MathOptAI.jl

API Reference

This page lists the public API of MathOptAI.

Info

This page is an unstructured list of the MathOptAI API. For a more structured overview, read the Manual or Tutorial parts of this documentation.

Load all of the public the API into the current scope with:

using MathOptAI

Alternatively, load only the module with:

import MathOptAI

and then prefix all calls with MathOptAI. to create MathOptAI.<NAME>.

AbstractPredictor

add_predictor

MathOptAI.add_predictorFunction
add_predictor(
+API Reference · MathOptAI.jl

API Reference

This page lists the public API of MathOptAI.

Info

This page is an unstructured list of the MathOptAI API. For a more structured overview, read the Manual or Tutorial parts of this documentation.

Load all of the public the API into the current scope with:

using MathOptAI

Alternatively, load only the module with:

import MathOptAI

and then prefix all calls with MathOptAI. to create MathOptAI.<NAME>.

AbstractPredictor

add_predictor

MathOptAI.add_predictorFunction
add_predictor(
     model::JuMP.AbstractModel,
     predictor::AbstractPredictor,
     x::Vector,
@@ -23,7 +23,7 @@
 ├ variables [1]
 │ └ moai_Affine[1]
 └ constraints [1]
-  └ 2 x[1] + 3 x[2] - moai_Affine[1] = 0
source

build_predictor

Affine

build_predictor

Affine

MathOptAI.AffineType
Affine(
     A::Matrix{T},
     b::Vector{T} = zeros(T, size(A, 1)),
 ) where {T} <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = A x + b\]

Example

julia> using JuMP, MathOptAI
@@ -60,7 +60,7 @@
 julia> formulation
 ReducedSpace(Affine(A, b) [input: 2, output: 1])
 ├ variables [0]
-└ constraints [0]
source

BinaryDecisionTree

BinaryDecisionTree

MathOptAI.BinaryDecisionTreeType
BinaryDecisionTree{K,V}(
     feat_id::Int,
     feat_value::K,
     lhs::Union{V,BinaryDecisionTree{K,V}},
@@ -99,7 +99,7 @@
   ├ moai_BinaryDecisionTree_z[2] --> {x[1] ≤ 1}
   ├ moai_BinaryDecisionTree_z[3] --> {x[1] ≥ 0}
   ├ moai_BinaryDecisionTree_z[3] --> {x[1] ≥ 1}
-  └ moai_BinaryDecisionTree_z[1] - moai_BinaryDecisionTree_z[3] + moai_BinaryDecisionTree_value = 0
source

GrayBox

MathOptAI.GrayBoxType
GrayBox(
+  └ moai_BinaryDecisionTree_z[1] - moai_BinaryDecisionTree_z[3] + moai_BinaryDecisionTree_value = 0
source

GrayBox

MathOptAI.GrayBoxType
GrayBox(
     output_size::Function,
     callback::Function;
     has_hessian::Bool = false,
@@ -141,7 +141,7 @@
 julia> formulation
 ReducedSpace(GrayBox)
 ├ variables [0]
-└ constraints [0]
source

Pipeline

MathOptAI.PipelineType
Pipeline(layers::Vector{AbstractPredictor}) <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = (l_1 \circ \ldots \circ l_N)(x)\]

where $l_i$ are a list of other AbstractPredictors.

Example

julia> using JuMP, MathOptAI
+└ constraints [0]
source

Pipeline

MathOptAI.PipelineType
Pipeline(layers::Vector{AbstractPredictor}) <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = (l_1 \circ \ldots \circ l_N)(x)\]

where $l_i$ are a list of other AbstractPredictors.

Example

julia> using JuMP, MathOptAI
 
 julia> model = Model();
 
@@ -175,11 +175,11 @@
   ├ moai_ReLU[1] ≥ 0
   ├ moai_z[1] ≥ 0
   ├ moai_Affine[1] - moai_ReLU[1] + moai_z[1] = 0
-  └ moai_ReLU[1]*moai_z[1] = 0
source

PytorchModel

MathOptAI.PytorchModelType
PytorchModel(filename::String)

A wrapper struct for loading a PyTorch model.

The only supported file extension is .pt, where the .pt file has been created using torch.save(model, filename).

Warning

To use PytorchModel, your code must load the PythonCall package:

import PythonCall

Example

julia> using MathOptAI
+  └ moai_ReLU[1]*moai_z[1] = 0
source

PytorchModel

MathOptAI.PytorchModelType
PytorchModel(filename::String)

A wrapper struct for loading a PyTorch model.

The only supported file extension is .pt, where the .pt file has been created using torch.save(model, filename).

Warning

To use PytorchModel, your code must load the PythonCall package:

import PythonCall

Example

julia> using MathOptAI
 
 julia> using PythonCall  #  This line is important!
 
-julia> predictor = PytorchModel("model.pt");
source

Quantile

MathOptAI.QuantileType
Quantile{D}(distribution::D, quantiles::Vector{Float64}) where {D}

An AbstractPredictor that represents the quantiles of distribution.

Example

julia> using JuMP, Distributions, MathOptAI
+julia> predictor = PytorchModel("model.pt");
source

Quantile

MathOptAI.QuantileType
Quantile{D}(distribution::D, quantiles::Vector{Float64}) where {D}

An AbstractPredictor that represents the quantiles of distribution.

Example

julia> using JuMP, Distributions, MathOptAI
 
 julia> model = Model();
 
@@ -204,7 +204,7 @@
 │ └ moai_quantile[2]
 └ constraints [2]
   ├ moai_quantile[1] - op_quantile_0.1(x) = 0
-  └ moai_quantile[2] - op_quantile_0.9(x) = 0
source

ReducedSpace

MathOptAI.ReducedSpaceType
ReducedSpace(predictor::AbstractPredictor)

A wrapper type for other predictors that implement a reduced-space formulation.

Example

julia> using JuMP, MathOptAI
+  └ moai_quantile[2] - op_quantile_0.9(x) = 0
source

ReducedSpace

MathOptAI.ReducedSpaceType
ReducedSpace(predictor::AbstractPredictor)

A wrapper type for other predictors that implement a reduced-space formulation.

Example

julia> using JuMP, MathOptAI
 
 julia> model = Model();
 
@@ -217,7 +217,7 @@
 julia> y
 2-element Vector{NonlinearExpr}:
  max(0.0, x[1])
- max(0.0, x[2])
source

ReLU

MathOptAI.ReLUType
ReLU() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \max\{0, x\}\]

as a non-smooth nonlinear constraint.

Example

julia> using JuMP, MathOptAI
+ max(0.0, x[2])
source

ReLU

MathOptAI.ReLUType
ReLU() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \max\{0, x\}\]

as a non-smooth nonlinear constraint.

Example

julia> using JuMP, MathOptAI
 
 julia> model = Model();
 
@@ -257,7 +257,7 @@
 julia> formulation
 ReducedSpace(ReLU())
 ├ variables [0]
-└ constraints [0]
source

ReLUBigM

MathOptAI.ReLUBigMType
ReLUBigM(M::Float64) <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \max\{0, x\}\]

via the big-M MIP reformulation:

\[\begin{aligned} +└ constraints [0]

source

ReLUBigM

MathOptAI.ReLUBigMType
ReLUBigM(M::Float64) <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \max\{0, x\}\]

via the big-M MIP reformulation:

\[\begin{aligned} y \ge 0 \\ y \ge x \\ y \le M z \\ @@ -298,7 +298,7 @@ ├ moai_z[2] binary ├ -x[2] + moai_ReLU[2] ≥ 0 ├ moai_ReLU[2] - 2 moai_z[2] ≤ 0 - └ -x[2] + moai_ReLU[2] + 3 moai_z[2] ≤ 3

source

ReLUQuadratic

MathOptAI.ReLUQuadraticType
ReLUQuadratic() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \max\{0, x\}\]

by the reformulation:

\[\begin{aligned} + └ -x[2] + moai_ReLU[2] + 3 moai_z[2] ≤ 3

source

ReLUQuadratic

MathOptAI.ReLUQuadraticType
ReLUQuadratic() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \max\{0, x\}\]

by the reformulation:

\[\begin{aligned} x = y - z \\ y \cdot z = 0 \\ y, z \ge 0 @@ -337,7 +337,7 @@ ├ x[1] - moai_ReLU[1] + moai_z[1] = 0 ├ x[2] - moai_ReLU[2] + moai_z[2] = 0 ├ moai_ReLU[1]*moai_z[1] = 0 - └ moai_ReLU[2]*moai_z[2] = 0

source

ReLUSOS1

MathOptAI.ReLUSOS1Type
ReLUSOS1() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \max\{0, x\}\]

by the reformulation:

\[\begin{aligned} + └ moai_ReLU[2]*moai_z[2] = 0

source

ReLUSOS1

MathOptAI.ReLUSOS1Type
ReLUSOS1() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \max\{0, x\}\]

by the reformulation:

\[\begin{aligned} x = y - z \\ [y, z] \in SOS1 \\ y, z \ge 0 @@ -374,7 +374,7 @@ ├ x[1] - moai_ReLU[1] + moai_z[1] = 0 ├ x[2] - moai_ReLU[2] + moai_z[2] = 0 ├ [moai_ReLU[1], moai_z[1]] ∈ MathOptInterface.SOS1{Float64}([1.0, 2.0]) - └ [moai_ReLU[2], moai_z[2]] ∈ MathOptInterface.SOS1{Float64}([1.0, 2.0])

source

Scale

MathOptAI.ScaleType
Scale(
+  └ [moai_ReLU[2], moai_z[2]] ∈ MathOptInterface.SOS1{Float64}([1.0, 2.0])
source

Scale

MathOptAI.ScaleType
Scale(
     scale::Vector{T},
     bias::Vector{T},
 ) where {T} <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = Diag(scale)x + bias\]

Example

julia> using JuMP, MathOptAI
@@ -417,7 +417,7 @@
 julia> formulation
 ReducedSpace(Scale(scale, bias))
 ├ variables [0]
-└ constraints [0]
source

Sigmoid

MathOptAI.SigmoidType
Sigmoid() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \frac{1}{1 + e^{-x}}\]

as a smooth nonlinear constraint.

Example

julia> using JuMP, MathOptAI
+└ constraints [0]
source

Sigmoid

MathOptAI.SigmoidType
Sigmoid() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \frac{1}{1 + e^{-x}}\]

as a smooth nonlinear constraint.

Example

julia> using JuMP, MathOptAI
 
 julia> model = Model();
 
@@ -457,7 +457,7 @@
 julia> formulation
 ReducedSpace(Sigmoid())
 ├ variables [0]
-└ constraints [0]
source

SoftMax

MathOptAI.SoftMaxType
SoftMax() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \frac{e^{x}}{||e^{x}||_1}\]

as a smooth nonlinear constraint.

Example

julia> using JuMP, MathOptAI
+└ constraints [0]
source

SoftMax

MathOptAI.SoftMaxType
SoftMax() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \frac{e^{x}}{||e^{x}||_1}\]

as a smooth nonlinear constraint.

Example

julia> using JuMP, MathOptAI
 
 julia> model = Model();
 
@@ -503,7 +503,7 @@
 │ └ moai_SoftMax_denom
 └ constraints [2]
   ├ moai_SoftMax_denom ≥ 0
-  └ moai_SoftMax_denom - (0.0 + exp(x[2]) + exp(x[1])) = 0
source

SoftPlus

MathOptAI.SoftPlusType
SoftPlus(; beta = 1.0) <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \frac{1}{\beta} \log(1 + e^{\beta x})\]

as a smooth nonlinear constraint.

Example

julia> using JuMP, MathOptAI
+  └ moai_SoftMax_denom - (0.0 + exp(x[2]) + exp(x[1])) = 0
source

SoftPlus

MathOptAI.SoftPlusType
SoftPlus(; beta = 1.0) <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \frac{1}{\beta} \log(1 + e^{\beta x})\]

as a smooth nonlinear constraint.

Example

julia> using JuMP, MathOptAI
 
 julia> model = Model();
 
@@ -543,7 +543,7 @@
 julia> formulation
 ReducedSpace(SoftPlus(2.0))
 ├ variables [0]
-└ constraints [0]
source

Tanh

MathOptAI.TanhType
Tanh() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \tanh(x)\]

as a smooth nonlinear constraint.

Example

julia> using JuMP, MathOptAI
+└ constraints [0]
source

Tanh

MathOptAI.TanhType
Tanh() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \tanh(x)\]

as a smooth nonlinear constraint.

Example

julia> using JuMP, MathOptAI
 
 julia> model = Model();
 
@@ -583,14 +583,14 @@
 julia> formulation
 ReducedSpace(Tanh())
 ├ variables [0]
-└ constraints [0]
source

AbstractFormulation

Formulation

AbstractFormulation

Formulation

MathOptAI.FormulationType
struct Formulation{P<:AbstractPredictor} <: AbstractFormulation
     predictor::P
     variables::Vector{Any}
     constraints::Vector{Any}
-end

Fields

  • predictor: the predictor object used to build the formulation
  • variables: a vector of new decision variables added to the model
  • constraints: a vector of new constraints added to the model

Check the docstring of the predictor for an explanation of the formulation and the order of the elements in .variables and .constraints.

source

PipelineFormulation

MathOptAI.PipelineFormulationType
struct PipelineFormulation{P<:AbstractPredictor} <: AbstractFormulation
+end

Fields

  • predictor: the predictor object used to build the formulation
  • variables: a vector of new decision variables added to the model
  • constraints: a vector of new constraints added to the model

Check the docstring of the predictor for an explanation of the formulation and the order of the elements in .variables and .constraints.

source

PipelineFormulation

MathOptAI.PipelineFormulationType
struct PipelineFormulation{P<:AbstractPredictor} <: AbstractFormulation
     predictor::P
     layers::Vector{Any}
-end

Fields

  • predictor: the predictor object used to build the formulation
  • layers: the formulation associated with each of the layers in the pipeline
source

Extensions

MathOptAI.add_predictorMethod
MathOptAI.add_predictor(
+end

Fields

  • predictor: the predictor object used to build the formulation
  • layers: the formulation associated with each of the layers in the pipeline
source

Extensions

MathOptAI.add_predictorMethod
MathOptAI.add_predictor(
     model::JuMP.AbstractModel,
     predictor::MathOptAI.Quantile{<:AbstractGPs.PosteriorGP},
     x::Vector,
@@ -618,7 +618,7 @@
  moai_quantile[2]
 
 julia> @objective(model, Max, y[2] - y[1])
-moai_quantile[2] - moai_quantile[1]
source
MathOptAI.add_predictorMethod
MathOptAI.add_predictor(
     model::JuMP.AbstractModel,
     predictor::Union{DecisionTree.Root,DecisionTree.DecisionTreeClassifier},
     x::Vector,
@@ -647,7 +647,7 @@
 
 julia> y
 1-element Vector{VariableRef}:
- moai_BinaryDecisionTree_value
source
MathOptAI.build_predictorMethod
MathOptAI.build_predictor(predictor::DecisionTree.Root)

Convert a binary decision tree from DecisionTree.jl to a BinaryDecisionTree.

Example

julia> using MathOptAI, DecisionTree
+ moai_BinaryDecisionTree_value
source
MathOptAI.build_predictorMethod
MathOptAI.build_predictor(predictor::DecisionTree.Root)

Convert a binary decision tree from DecisionTree.jl to a BinaryDecisionTree.

Example

julia> using MathOptAI, DecisionTree
 
 julia> truth(x::Vector) = x[1] <= 0.5 ? -2 : (x[2] <= 0.3 ? 3 : 4)
 truth (generic function with 1 method)
@@ -665,7 +665,7 @@
 Depth:  2
 
 julia> predictor = MathOptAI.build_predictor(tree)
-BinaryDecisionTree{Float64,Int64} [leaves=3, depth=2]
source
MathOptAI.add_predictorMethod
MathOptAI.add_predictor(
     model::JuMP.AbstractModel,
     predictor::Flux.Chain,
     x::Vector;
@@ -689,7 +689,7 @@
 
 julia> y
 1-element Vector{VariableRef}:
- moai_Affine[1]
source
MathOptAI.build_predictorMethod
MathOptAI.build_predictor(
     predictor::Flux.Chain;
     config::Dict = Dict{Any,Any}(),
     gray_box::Bool = false,
@@ -714,7 +714,7 @@
 Pipeline with layers:
  * Affine(A, b) [input: 1, output: 16]
  * ReLUQuadratic()
- * Affine(A, b) [input: 16, output: 1]
source
MathOptAI.add_predictorMethod
MathOptAI.add_predictor(
     model::JuMP.AbstractModel,
     predictor::GLM.GeneralizedLinearModel{
         GLM.GlmResp{Vector{Float64},GLM.Bernoulli{Float64},GLM.LogitLink},
@@ -741,7 +741,7 @@
 
 julia> y
 1-element Vector{VariableRef}:
- moai_Sigmoid[1]
source
MathOptAI.add_predictorMethod
MathOptAI.add_predictor(
     model::JuMP.AbstractModel,
     predictor::GLM.LinearModel,
     x::Vector;
@@ -760,7 +760,7 @@
 
 julia> y
 1-element Vector{VariableRef}:
- moai_Affine[1]
source
MathOptAI.build_predictorMethod
MathOptAI.build_predictor(
     predictor::GLM.GeneralizedLinearModel{
         GLM.GlmResp{Vector{Float64},GLM.Bernoulli{Float64},GLM.LogitLink},
     };
@@ -774,14 +774,14 @@
 julia> predictor = MathOptAI.build_predictor(model)
 Pipeline with layers:
  * Affine(A, b) [input: 2, output: 1]
- * Sigmoid()
source
MathOptAI.build_predictorMethod
MathOptAI.build_predictor(predictor::GLM.LinearModel)

Convert a trained linear model from GLM.jl to an Affine layer.

Example

julia> using GLM, MathOptAI
+ * Sigmoid()
source
MathOptAI.build_predictorMethod
MathOptAI.build_predictor(predictor::GLM.LinearModel)

Convert a trained linear model from GLM.jl to an Affine layer.

Example

julia> using GLM, MathOptAI
 
 julia> X, Y = rand(10, 2), rand(10);
 
 julia> model = GLM.lm(X, Y);
 
 julia> predictor = MathOptAI.build_predictor(model)
-Affine(A, b) [input: 2, output: 1]
source
MathOptAI.add_predictorMethod
MathOptAI.add_predictor(
     model::JuMP.AbstractModel,
     predictor::Tuple{<:Lux.Chain,<:NamedTuple,<:NamedTuple},
     x::Vector;
@@ -815,7 +815,7 @@
 
 julia> y
 1-element Vector{VariableRef}:
- moai_Affine[1]
source
MathOptAI.build_predictorMethod
MathOptAI.build_predictor(
     predictor::Tuple{<:Lux.Chain,<:NamedTuple,<:NamedTuple};
     config::Dict = Dict{Any,Any}(),
 )

Convert a trained neural network from Lux.jl to a Pipeline.

Supported layers

  • Lux.Dense
  • Lux.Scale

Supported activation functions

  • Lux.relu
  • Lux.sigmoid
  • Lux.softplus
  • Lux.softmax
  • Lux.tanh

Keyword arguments

  • config: a dictionary that maps supported Lux activation functions to AbstractPredictors that control how the activation functions are reformulated. For example, Lux.sigmoid => MathOptAI.Sigmoid() or Lux.relu => MathOptAI.QuadraticReLU().

Example

julia> using Lux, MathOptAI, Random
@@ -847,7 +847,7 @@
 Pipeline with layers:
  * Affine(A, b) [input: 1, output: 16]
  * ReLUQuadratic()
- * Affine(A, b) [input: 16, output: 1]
source
MathOptAI.add_predictorMethod
MathOptAI.add_predictor(
     model::JuMP.AbstractModel,
     predictor::MathOptAI.PytorchModel,
     x::Vector;
@@ -856,13 +856,13 @@
     gray_box::Bool = false,
     gray_box_hessian::Bool = false,
     gray_box_device::String = "cpu",
-)

Add a trained neural network from PyTorch via PythonCall.jl to model.

Supported layers

  • nn.Linear
  • nn.ReLU
  • nn.Sequential
  • nn.Sigmoid
  • nn.Softplus
  • nn.Tanh

Keyword arguments

  • config: a dictionary that maps Symbols to AbstractPredictors that control how the activation functions are reformulated. For example, :Sigmoid => MathOptAI.Sigmoid() or :ReLU => MathOptAI.QuadraticReLU(). The supported Symbols are :ReLU, :Sigmoid, :SoftPlus, and :Tanh.
  • gray_box: if true, the neural network is added as a user-defined nonlinear operator, with gradients provided by torch.func.jacrev.
  • gray_box_hessian: if true, the gray box additionally computes the Hessian of the output using torch.func.hessian.
  • gray_box_device: device used to construct PyTorch tensors, e.g. "cuda" to run on an Nvidia GPU.
source
MathOptAI.build_predictorMethod
MathOptAI.build_predictor(
+)

Add a trained neural network from PyTorch via PythonCall.jl to model.

Supported layers

  • nn.Linear
  • nn.ReLU
  • nn.Sequential
  • nn.Sigmoid
  • nn.Softplus
  • nn.Tanh

Keyword arguments

  • config: a dictionary that maps Symbols to AbstractPredictors that control how the activation functions are reformulated. For example, :Sigmoid => MathOptAI.Sigmoid() or :ReLU => MathOptAI.QuadraticReLU(). The supported Symbols are :ReLU, :Sigmoid, :SoftPlus, and :Tanh.
  • gray_box: if true, the neural network is added as a user-defined nonlinear operator, with gradients provided by torch.func.jacrev.
  • gray_box_hessian: if true, the gray box additionally computes the Hessian of the output using torch.func.hessian.
  • gray_box_device: device used to construct PyTorch tensors, e.g. "cuda" to run on an Nvidia GPU.
source
MathOptAI.build_predictorMethod
MathOptAI.build_predictor(
     predictor::MathOptAI.PytorchModel;
     config::Dict = Dict{Any,Any}(),
     gray_box::Bool = false,
     gray_box_hessian::Bool = false,
     gray_box_device::String = "cpu",
-)

Convert a trained neural network from PyTorch via PythonCall.jl to a Pipeline.

Supported layers

  • nn.Linear
  • nn.ReLU
  • nn.Sequential
  • nn.Sigmoid
  • nn.Softplus
  • nn.Tanh

Keyword arguments

  • config: a dictionary that maps Symbols to AbstractPredictors that control how the activation functions are reformulated. For example, :Sigmoid => MathOptAI.Sigmoid() or :ReLU => MathOptAI.QuadraticReLU(). The supported Symbols are :ReLU, :Sigmoid, :SoftPlus, and :Tanh.
  • gray_box: if true, the neural network is added as a user-defined nonlinear operator, with gradients provided by torch.func.jacrev.
  • gray_box_hessian: if true, the gray box additionally computes the Hessian of the output using torch.func.hessian.
  • gray_box_device: device used to construct PyTorch tensors, e.g. "cuda" to run on an Nvidia GPU.
source
MathOptAI.add_predictorMethod
MathOptAI.add_predictor(
+)

Convert a trained neural network from PyTorch via PythonCall.jl to a Pipeline.

Supported layers

  • nn.Linear
  • nn.ReLU
  • nn.Sequential
  • nn.Sigmoid
  • nn.Softplus
  • nn.Tanh

Keyword arguments

  • config: a dictionary that maps Symbols to AbstractPredictors that control how the activation functions are reformulated. For example, :Sigmoid => MathOptAI.Sigmoid() or :ReLU => MathOptAI.QuadraticReLU(). The supported Symbols are :ReLU, :Sigmoid, :SoftPlus, and :Tanh.
  • gray_box: if true, the neural network is added as a user-defined nonlinear operator, with gradients provided by torch.func.jacrev.
  • gray_box_hessian: if true, the gray box additionally computes the Hessian of the output using torch.func.hessian.
  • gray_box_device: device used to construct PyTorch tensors, e.g. "cuda" to run on an Nvidia GPU.
source
MathOptAI.add_predictorMethod
MathOptAI.add_predictor(
     model::JuMP.AbstractModel,
     predictor::StatsModels.TableRegressionModel,
     x::DataFrames.DataFrame;
@@ -891,4 +891,4 @@
  moai_Affine[1]
  moai_Affine[1]
  moai_Affine[1]
- moai_Affine[1]
source
+ moai_Affine[1]source diff --git a/dev/developers/design_principles/index.html b/dev/developers/design_principles/index.html index 1658d320..f5917cae 100644 --- a/dev/developers/design_principles/index.html +++ b/dev/developers/design_principles/index.html @@ -23,4 +23,4 @@ model_reduced_space = Model() @variable(model_reduced_space, x[1:2]) @variable(model_reduced_space, y[1:3]) -@constraint(model_reduced_space, y .== max.(0, layer.A * x + layer.b))

In general, the full-space formulations have more variables and constraints but simpler nonlinear expressions, whereas the reduced-space formulations have fewer variables and constraints but more complicated nonlinear expressions.

MathOptAI.jl implements the full-space formulation by default, but some layers support the reduced-space formulation with the ReducedSpace wrapper.

+@constraint(model_reduced_space, y .== max.(0, layer.A * x + layer.b))

In general, the full-space formulations have more variables and constraints but simpler nonlinear expressions, whereas the reduced-space formulations have fewer variables and constraints but more complicated nonlinear expressions.

MathOptAI.jl implements the full-space formulation by default, but some layers support the reduced-space formulation with the ReducedSpace wrapper.

diff --git a/dev/index.html b/dev/index.html index dc7eb50d..8449ced6 100644 --- a/dev/index.html +++ b/dev/index.html @@ -27,4 +27,4 @@ moai_SoftMax[7] moai_SoftMax[8] moai_SoftMax[9] - moai_SoftMax[10]

Getting help

This package is under active development. For help, questions, comments, and suggestions, please open a GitHub issue.

Inspiration

This project is mainly inspired by two existing projects:

Other works, from which we took less inspiration, include:

The 2024 paper of López-Flores et al. is an excellent summary of the state of the field at the time that we started development of MathOptAI.

López-Flores, F.J., Ramírez-Márquez, C., Ponce-Ortega J.M. (2024). Process Systems Engineering Tools for Optimization of Trained Machine Learning Models: Comparative and Perspective. Industrial & Engineering Chemistry Research, 63(32), 13966-13979. DOI: 10.1021/acs.iecr.4c00632

+ moai_SoftMax[10]

Getting help

This package is under active development. For help, questions, comments, and suggestions, please open a GitHub issue.

Inspiration

This project is mainly inspired by two existing projects:

Other works, from which we took less inspiration, include:

The 2024 paper of López-Flores et al. is an excellent summary of the state of the field at the time that we started development of MathOptAI.

López-Flores, F.J., Ramírez-Márquez, C., Ponce-Ortega J.M. (2024). Process Systems Engineering Tools for Optimization of Trained Machine Learning Models: Comparative and Perspective. Industrial & Engineering Chemistry Research, 63(32), 13966-13979. DOI: 10.1021/acs.iecr.4c00632

diff --git a/dev/manual/AbstractGPs/index.html b/dev/manual/AbstractGPs/index.html index 1d8bd39b..e8f0accf 100644 --- a/dev/manual/AbstractGPs/index.html +++ b/dev/manual/AbstractGPs/index.html @@ -3,4 +3,4 @@ moai_quantile[1] moai_quantile[2]
julia> formulationQuantile(_, [0.1, 0.9]) ├ variables [0] -└ constraints [0]
julia> @objective(model, Max, y[2] - y[1])moai_quantile[2] - moai_quantile[1] +└ constraints [0]
julia> @objective(model, Max, y[2] - y[1])moai_quantile[2] - moai_quantile[1] diff --git a/dev/manual/DecisionTree/index.html b/dev/manual/DecisionTree/index.html index b9e6b37a..5b6bcef2 100644 --- a/dev/manual/DecisionTree/index.html +++ b/dev/manual/DecisionTree/index.html @@ -15,4 +15,4 @@ ├ moai_BinaryDecisionTree_z[2] --> {x[2] ≤ 0.41966499895337794} ├ moai_BinaryDecisionTree_z[3] --> {x[1] ≥ 0.4743457016210958} ├ moai_BinaryDecisionTree_z[3] --> {x[2] ≥ 0.41966499895337794} - └ 2 moai_BinaryDecisionTree_z[1] - 3 moai_BinaryDecisionTree_z[2] - 4 moai_BinaryDecisionTree_z[3] + moai_BinaryDecisionTree_value = 0 + └ 2 moai_BinaryDecisionTree_z[1] - 3 moai_BinaryDecisionTree_z[2] - 4 moai_BinaryDecisionTree_z[3] + moai_BinaryDecisionTree_value = 0 diff --git a/dev/manual/Flux/index.html b/dev/manual/Flux/index.html index 2a3a187d..975c1340 100644 --- a/dev/manual/Flux/index.html +++ b/dev/manual/Flux/index.html @@ -5,8 +5,8 @@ │ ├ moai_Affine[1] │ └ moai_Affine[2] └ constraints [2] - ├ 0.7435214519500732 x[1] - moai_Affine[1] = 0 - └ -1.2505210638046265 x[1] - moai_Affine[2] = 0 + ├ 0.2802408039569855 x[1] - moai_Affine[1] = 0 + └ -0.65338134765625 x[1] - moai_Affine[2] = 0 MathOptAI.ReLU() ├ variables [2] │ ├ moai_ReLU[1] @@ -19,11 +19,10 @@ Affine(A, b) [input: 2, output: 1] ├ variables [1] │ └ moai_Affine[1] -└ constraints [2] - ├ moai_Affine[1] ≤ 0 - └ -1.1201879978179932 moai_ReLU[1] - 0.5682011842727661 moai_ReLU[2] - moai_Affine[1] = 0

Reduced-space

Use the reduced_space = true keyword to formulate a reduced-space model:

julia> using JuMP, Flux, MathOptAI
julia> predictor = Flux.Chain(Flux.Dense(1 => 2, Flux.relu), Flux.Dense(2 => 1));
julia> model = Model();
julia> @variable(model, x[1:1]);
julia> y, formulation = +└ constraints [1] + └ 0.17266754806041718 moai_ReLU[1] - 0.7616938352584839 moai_ReLU[2] - moai_Affine[1] = 0

Reduced-space

Use the reduced_space = true keyword to formulate a reduced-space model:

julia> using JuMP, Flux, MathOptAI
julia> predictor = Flux.Chain(Flux.Dense(1 => 2, Flux.relu), Flux.Dense(2 => 1));
julia> model = Model();
julia> @variable(model, x[1:1]);
julia> y, formulation = MathOptAI.add_predictor(model, predictor, x; reduced_space = true);
julia> y1-element Vector{JuMP.NonlinearExpr}: - ((+(0.0) + (0.7965260148048401 * max(0.0, 1.2558213472366333 x[1]))) + (1.025068998336792 * max(0.0, -0.09778439253568649 x[1]))) + 0.0
julia> formulationReducedSpace(Affine(A, b) [input: 1, output: 2]) + ((+(0.0) + (0.2866336405277252 * max(0.0, -1.4083470106124878 x[1]))) + (1.030832290649414 * max(0.0, 1.3135437965393066 x[1]))) + 0.0
julia> formulationReducedSpace(Affine(A, b) [input: 1, output: 2]) ├ variables [0] └ constraints [0] ReducedSpace(MathOptAI.ReLU()) @@ -48,8 +47,8 @@ │ ├ moai_Affine[1] │ └ moai_Affine[2] └ constraints [2] - ├ -0.9636475443840027 x[1] - moai_Affine[1] = 0 - └ -0.224209263920784 x[1] - moai_Affine[2] = 0 + ├ 1.0782548189163208 x[1] - moai_Affine[1] = 0 + └ -0.2073817104101181 x[1] - moai_Affine[2] = 0 MathOptAI.ReLUSOS1() ├ variables [4] │ ├ moai_ReLU[1] @@ -67,4 +66,4 @@ ├ variables [1] │ └ moai_Affine[1] └ constraints [1] - └ 1.399377703666687 moai_ReLU[1] - 0.2851664125919342 moai_ReLU[2] - moai_Affine[1] = 0
+ └ 1.2061480283737183 moai_ReLU[1] - 1.3855754137039185 moai_ReLU[2] - moai_Affine[1] = 0 diff --git a/dev/manual/GLM/index.html b/dev/manual/GLM/index.html index 22c6e6e2..334cf1fe 100644 --- a/dev/manual/GLM/index.html +++ b/dev/manual/GLM/index.html @@ -4,12 +4,12 @@ ├ variables [1] │ └ moai_Affine[1] └ constraints [1] - └ 0.5943052098775468 x[1] + 0.3710399064355762 x[2] - moai_Affine[1] = 0

Logistic regression

The input x to add_predictor must be a vector with the same number of elements as columns in the training matrix. The return is a vector of JuMP variables with a single element.

julia> using GLM, JuMP, MathOptAI
julia> X, Y = rand(10, 2), rand(Bool, 10);
julia> predictor = GLM.glm(X, Y, GLM.Bernoulli());
julia> model = Model();
julia> @variable(model, x[1:2]);
julia> y, formulation = MathOptAI.add_predictor(model, predictor, x);
julia> y1-element Vector{JuMP.VariableRef}: + └ 0.42363749669649714 x[1] + 0.39423425929036604 x[2] - moai_Affine[1] = 0

Logistic regression

The input x to add_predictor must be a vector with the same number of elements as columns in the training matrix. The return is a vector of JuMP variables with a single element.

julia> using GLM, JuMP, MathOptAI
julia> X, Y = rand(10, 2), rand(Bool, 10);
julia> predictor = GLM.glm(X, Y, GLM.Bernoulli());
julia> model = Model();
julia> @variable(model, x[1:2]);
julia> y, formulation = MathOptAI.add_predictor(model, predictor, x);
julia> y1-element Vector{JuMP.VariableRef}: moai_Sigmoid[1]
julia> formulationAffine(A, b) [input: 2, output: 1] ├ variables [1] │ └ moai_Affine[1] └ constraints [1] - └ -0.38816394476620386 x[1] + 0.13876572350123043 x[2] - moai_Affine[1] = 0 + └ 1.4441603762281439 x[1] - 3.247718515896524 x[2] - moai_Affine[1] = 0 MathOptAI.Sigmoid() ├ variables [1] │ └ moai_Sigmoid[1] @@ -25,4 +25,4 @@ moai_Affine[1] moai_Affine[1] moai_Affine[1] - moai_Affine[1]
+ moai_Affine[1] diff --git a/dev/manual/Lux/index.html b/dev/manual/Lux/index.html index 16e55a2f..5fc0a613 100644 --- a/dev/manual/Lux/index.html +++ b/dev/manual/Lux/index.html @@ -9,8 +9,8 @@ │ ├ moai_Affine[1] │ └ moai_Affine[2] └ constraints [2] - ├ 0.0008783403318375349 x[1] - moai_Affine[1] = 0 - └ -0.2703956365585327 x[1] - moai_Affine[2] = 0 + ├ 0.6261571645736694 x[1] - moai_Affine[1] = 0 + └ -1.1599301099777222 x[1] - moai_Affine[2] = 0 MathOptAI.ReLU() ├ variables [2] │ ├ moai_ReLU[1] @@ -25,13 +25,13 @@ │ └ moai_Affine[1] └ constraints [2] ├ moai_Affine[1] ≥ 0 - └ 0.9404870271682739 moai_ReLU[1] + 0.8026401996612549 moai_ReLU[2] - moai_Affine[1] = 0

Reduced-space

Use the reduced_space = true keyword to formulate a reduced-space model:

julia> using JuMP, Lux, MathOptAI, Random
julia> rng = Random.MersenneTwister();
julia> chain = Lux.Chain(Lux.Dense(1 => 2, Lux.relu), Lux.Dense(2 => 1))Chain( + └ 0.04167952388525009 moai_ReLU[1] + 0.053142793476581573 moai_ReLU[2] - moai_Affine[1] = 0

Reduced-space

Use the reduced_space = true keyword to formulate a reduced-space model:

julia> using JuMP, Lux, MathOptAI, Random
julia> rng = Random.MersenneTwister();
julia> chain = Lux.Chain(Lux.Dense(1 => 2, Lux.relu), Lux.Dense(2 => 1))Chain( layer_1 = Dense(1 => 2, relu), # 4 parameters layer_2 = Dense(2 => 1), # 3 parameters ) # Total: 7 parameters, # plus 0 states.
julia> parameters, state = Lux.setup(rng, chain);
julia> predictor = (chain, parameters, state);
julia> model = Model();
julia> @variable(model, x[1:1]);
julia> y, formulation = MathOptAI.add_predictor(model, predictor, x; reduced_space = true);
julia> y1-element Vector{JuMP.NonlinearExpr}: - ((+(0.0) + (-0.13756495714187622 * max(0.0, 0.21255970001220703 x[1]))) + (-0.9397847056388855 * max(0.0, 1.045299768447876 x[1]))) + 0.0
julia> formulationReducedSpace(Affine(A, b) [input: 1, output: 2]) + ((+(0.0) + (0.21672280132770538 * max(0.0, 0.9032507538795471 x[1]))) + (0.4731472432613373 * max(0.0, -0.4107099175453186 x[1]))) + 0.0
julia> formulationReducedSpace(Affine(A, b) [input: 1, output: 2]) ├ variables [0] └ constraints [0] ReducedSpace(MathOptAI.ReLU()) @@ -54,8 +54,8 @@ │ ├ moai_Affine[1] │ └ moai_Affine[2] └ constraints [2] - ├ 0.45079728960990906 x[1] - moai_Affine[1] = 0 - └ 0.8464735746383667 x[1] - moai_Affine[2] = 0 + ├ 0.14432968199253082 x[1] - moai_Affine[1] = 0 + └ 0.4608447551727295 x[1] - moai_Affine[2] = 0 MathOptAI.ReLUSOS1() ├ variables [4] │ ├ moai_ReLU[1] @@ -73,4 +73,4 @@ ├ variables [1] │ └ moai_Affine[1] └ constraints [1] - └ 0.4085981845855713 moai_ReLU[1] - 1.0271365642547607 moai_ReLU[2] - moai_Affine[1] = 0
+ └ 0.189326673746109 moai_ReLU[1] - 0.5331228971481323 moai_ReLU[2] - moai_Affine[1] = 0 diff --git a/dev/manual/PyTorch/index.html b/dev/manual/PyTorch/index.html index d16054a9..10684dc6 100644 --- a/dev/manual/PyTorch/index.html +++ b/dev/manual/PyTorch/index.html @@ -77,4 +77,4 @@ ├ variables [1] │ └ moai_Affine[1] └ constraints [1] - └ -0.4644489884376526 moai_ReLU[1] + 0.5382549166679382 moai_ReLU[2] - moai_Affine[1] = 0.5698285102844238 + └ -0.4644489884376526 moai_ReLU[1] + 0.5382549166679382 moai_ReLU[2] - moai_Affine[1] = 0.5698285102844238 diff --git a/dev/manual/predictors/index.html b/dev/manual/predictors/index.html index 12b2f5c4..c6be2fd7 100644 --- a/dev/manual/predictors/index.html +++ b/dev/manual/predictors/index.html @@ -1,2 +1,2 @@ -Predictors · MathOptAI.jl

Predictors

The main entry point for embedding prediction models into JuMP is add_predictor.

All methods use the form y, formulation = MathOptAI.add_predictor(model, predictor, x) to add the relationship y = predictor(x) to model.

Supported predictors

The following predictors are supported. See their docstrings for details:

PredictorRelationshipDimensions
Affine$f(x) = Ax + b$$M \rightarrow N$
BinaryDecisionTreeA binary decision tree$M \rightarrow 1$
GrayBox$f(x)$$M \rightarrow N$
Pipeline$f(x) = (l_1 \circ \ldots \circ l_N)(x)$$M \rightarrow N$
QuantileThe quantiles of a distribution$M \rightarrow N$
ReLU$f(x) = \max.(0, x)$$M \rightarrow M$
ReLUBigM$f(x) = \max.(0, x)$$M \rightarrow M$
ReLUQuadratic$f(x) = \max.(0, x)$$M \rightarrow M$
ReLUSOS1$f(x) = \max.(0, x)$$M \rightarrow M$
Scale$f(x) = scale .* x .+ bias$$M \rightarrow M$
Sigmoid$f(x) = \frac{1}{1 + e^{-x}}$$M \rightarrow M$
SoftMax$f(x) = \frac{e^x}{\sum e^{x_i}}$$M \rightarrow M$
SoftPlus$f(x) = \frac{1}{\beta} \log(1 + e^{\beta x})$$M \rightarrow M$
Tanh$f(x) = \tanh.(x)$$M \rightarrow M$

Note that some predictors, such as the ReLU ones, offer multiple formulations of the same mathematical relationship. The ''right'' choice is solver- and problem-dependent.

ReLU

There are a number of different mathematical formulations for the rectified linear unit (ReLU).

  • ReLU: requires the solver to support the max nonlinear operator.
  • ReLUBigM: requires the solver to support mixed-integer linear programs, and requires the user to have prior knowledge of a suitable value for the "big-M" parameter.
  • ReLUQuadratic: requires the solver to support quadratic equality constraints
  • ReLUSOS1: requires the solver to support SOS-I constraints.

The correct choice for which ReLU formulation to use is problem- and solver-dependent.

+Predictors · MathOptAI.jl

Predictors

The main entry point for embedding prediction models into JuMP is add_predictor.

All methods use the form y, formulation = MathOptAI.add_predictor(model, predictor, x) to add the relationship y = predictor(x) to model.

Supported predictors

The following predictors are supported. See their docstrings for details:

PredictorRelationshipDimensions
Affine$f(x) = Ax + b$$M \rightarrow N$
BinaryDecisionTreeA binary decision tree$M \rightarrow 1$
GrayBox$f(x)$$M \rightarrow N$
Pipeline$f(x) = (l_1 \circ \ldots \circ l_N)(x)$$M \rightarrow N$
QuantileThe quantiles of a distribution$M \rightarrow N$
ReLU$f(x) = \max.(0, x)$$M \rightarrow M$
ReLUBigM$f(x) = \max.(0, x)$$M \rightarrow M$
ReLUQuadratic$f(x) = \max.(0, x)$$M \rightarrow M$
ReLUSOS1$f(x) = \max.(0, x)$$M \rightarrow M$
Scale$f(x) = scale .* x .+ bias$$M \rightarrow M$
Sigmoid$f(x) = \frac{1}{1 + e^{-x}}$$M \rightarrow M$
SoftMax$f(x) = \frac{e^x}{\sum e^{x_i}}$$M \rightarrow M$
SoftPlus$f(x) = \frac{1}{\beta} \log(1 + e^{\beta x})$$M \rightarrow M$
Tanh$f(x) = \tanh.(x)$$M \rightarrow M$

Note that some predictors, such as the ReLU ones, offer multiple formulations of the same mathematical relationship. The ''right'' choice is solver- and problem-dependent.

ReLU

There are a number of different mathematical formulations for the rectified linear unit (ReLU).

  • ReLU: requires the solver to support the max nonlinear operator.
  • ReLUBigM: requires the solver to support mixed-integer linear programs, and requires the user to have prior knowledge of a suitable value for the "big-M" parameter.
  • ReLUQuadratic: requires the solver to support quadratic equality constraints
  • ReLUSOS1: requires the solver to support SOS-I constraints.

The correct choice for which ReLU formulation to use is problem- and solver-dependent.

diff --git a/dev/objects.inv b/dev/objects.inv index 328024081d222e3cd1ef06f8b863f8bc3ca51fd9..1fe7455fbf5f60f7b9e26756cf41199e8b89d08e 100644 GIT binary patch delta 12 Tcmey*^Pgve3#0i)S1UFEB4`9z delta 12 Tcmey*^Pgve3!~XaS1UFEB4Y$t diff --git a/dev/tutorials/decision_trees/index.html b/dev/tutorials/decision_trees/index.html index 4bd5b13d..2a3a14d8 100644 --- a/dev/tutorials/decision_trees/index.html +++ b/dev/tutorials/decision_trees/index.html @@ -243,4 +243,4 @@ 5998 │ 5998 1165 2.81 x_merit[5998] moai_BinaryDecisionTree_valu ⋯ 5999 │ 5999 1400 3.43 x_merit[5999] moai_BinaryDecisionTree_valu 6000 │ 6000 1097 2.65 x_merit[6000] moai_BinaryDecisionTree_valu - 3 columns and 5985 rows omitted

Solution analysis

We expect that just under 2,500 students will enroll:

julia> sum(evaluate_df.enroll_sol)3530.0

We awarded merit scholarships to approximately 1 in 6 students:

julia> count(evaluate_df.merit_sol .> 1e-5)1278

The average merit scholarship was worth just under $1,000:

julia> 1_000 * Statistics.mean(evaluate_df.merit_sol[evaluate_df.merit_sol.>1e-5])938.6854460093895

This page was generated using Literate.jl.

+ 3 columns and 5985 rows omitted

Solution analysis

We expect that just under 2,500 students will enroll:

julia> sum(evaluate_df.enroll_sol)3530.0

We awarded merit scholarships to approximately 1 in 6 students:

julia> count(evaluate_df.merit_sol .> 1e-5)1278

The average merit scholarship was worth just under $1,000:

julia> 1_000 * Statistics.mean(evaluate_df.merit_sol[evaluate_df.merit_sol.>1e-5])938.6854460093895

This page was generated using Literate.jl.

diff --git a/dev/tutorials/gaussian/00c40f9e.svg b/dev/tutorials/gaussian/00c40f9e.svg new file mode 100644 index 00000000..7cd2d764 --- /dev/null +++ b/dev/tutorials/gaussian/00c40f9e.svg @@ -0,0 +1,74 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dev/tutorials/gaussian/0b8fc589.svg b/dev/tutorials/gaussian/0b8fc589.svg deleted file mode 100644 index 52aef7d8..00000000 --- a/dev/tutorials/gaussian/0b8fc589.svg +++ /dev/null @@ -1,78 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/dev/tutorials/gaussian/0ef7442e.svg b/dev/tutorials/gaussian/0ef7442e.svg new file mode 100644 index 00000000..d4591637 --- /dev/null +++ b/dev/tutorials/gaussian/0ef7442e.svg @@ -0,0 +1,71 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dev/tutorials/gaussian/13aa5ed3.svg b/dev/tutorials/gaussian/13aa5ed3.svg new file mode 100644 index 00000000..a74476cb --- /dev/null +++ b/dev/tutorials/gaussian/13aa5ed3.svg @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dev/tutorials/gaussian/15d1ac5f.svg b/dev/tutorials/gaussian/15d1ac5f.svg deleted file mode 100644 index 01f54795..00000000 --- a/dev/tutorials/gaussian/15d1ac5f.svg +++ /dev/null @@ -1,71 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/dev/tutorials/gaussian/c6233b11.svg b/dev/tutorials/gaussian/c6233b11.svg deleted file mode 100644 index 32c64e33..00000000 --- a/dev/tutorials/gaussian/c6233b11.svg +++ /dev/null @@ -1,74 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/dev/tutorials/gaussian/9f473ebe.svg b/dev/tutorials/gaussian/e39fcdbd.svg similarity index 86% rename from dev/tutorials/gaussian/9f473ebe.svg rename to dev/tutorials/gaussian/e39fcdbd.svg index ccfc4d68..50752ef4 100644 --- a/dev/tutorials/gaussian/9f473ebe.svg +++ b/dev/tutorials/gaussian/e39fcdbd.svg @@ -1,50 +1,50 @@ - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dev/tutorials/gaussian/index.html b/dev/tutorials/gaussian/index.html index 47dfdac2..c46b1081 100644 --- a/dev/tutorials/gaussian/index.html +++ b/dev/tutorials/gaussian/index.html @@ -6,13 +6,13 @@ import MathOptAI import Plots

Prediction model

Assume that we have some true underlying univariate function:

x_domain = 0:0.01:2π
 true_function(x) = sin(x)
-Plots.plot(x_domain, true_function.(x_domain); label = "truth")
Example block output

We don't know the function, but we have access to a limited set of noisy sample points:

N = 20
+Plots.plot(x_domain, true_function.(x_domain); label = "truth")
Example block output

We don't know the function, but we have access to a limited set of noisy sample points:

N = 20
 x_data = rand(x_domain, N)
 noisy_sampler(x) = true_function(x) + 0.25 * (2rand() - 1)
 y_data = noisy_sampler.(x_data)
-Plots.scatter!(x_data, y_data; label = "data")
Example block output

Using the data, we want to build a predictor y = predictor(x). One choice is a Gaussian Process:

fx = AbstractGPs.GP(AbstractGPs.Matern32Kernel())(x_data, 0.4)
+Plots.scatter!(x_data, y_data; label = "data")
Example block output

Using the data, we want to build a predictor y = predictor(x). One choice is a Gaussian Process:

fx = AbstractGPs.GP(AbstractGPs.Matern32Kernel())(x_data, 0.4)
 p_fx = AbstractGPs.posterior(fx, y_data)
-Plots.plot!(x_domain, p_fx; label = "GP", fillalpha = 0.1)
Example block output

Gaussian Processes fit a mean and variance function:

AbstractGPs.mean_and_var(p_fx, [π / 2])
([0.2959950125626034], [0.6659054323490708])

Decision model

Our goal for this JuMP model is to embed the Gaussian Process from AbstractGPs into the model and then solve for different fixed values of x to recreate the function that the Gaussian Process has learned to approximate.

First, create a JuMP model:

model = Model(Ipopt.Optimizer)
+Plots.plot!(x_domain, p_fx; label = "GP", fillalpha = 0.1)
Example block output

Gaussian Processes fit a mean and variance function:

AbstractGPs.mean_and_var(p_fx, [π / 2])
([0.9619745328577638], [0.1282325722355414])

Decision model

Our goal for this JuMP model is to embed the Gaussian Process from AbstractGPs into the model and then solve for different fixed values of x to recreate the function that the Gaussian Process has learned to approximate.

First, create a JuMP model:

model = Model(Ipopt.Optimizer)
 set_silent(model)
 @variable(model, x)

\[ x \]

Since a Gaussian Process is a infinite dimensional object (its prediction is a distribution), we need some way of converting the Gaussian Process into a finite set of scalar values. For this, we use the Quantile predictor:

predictor = MathOptAI.Quantile(p_fx, [0.25, 0.75]);
 y, _ = MathOptAI.add_predictor(model, predictor, [x])
(JuMP.VariableRef[moai_quantile[1], moai_quantile[2]], Quantile(_, [0.25, 0.75])
@@ -28,4 +28,4 @@
         push!(Y, [NaN, NaN])
     end
 end
-Plots.plot!(X, reduce(hcat, Y)'; label = ["P25" "P75"], linewidth = 3)
Example block output

This page was generated using Literate.jl.

+Plots.plot!(X, reduce(hcat, Y)'; label = ["P25" "P75"], linewidth = 3)Example block output

This page was generated using Literate.jl.

diff --git a/dev/tutorials/mnist_lux/65060d29.svg b/dev/tutorials/mnist/39f02ea8.svg similarity index 68% rename from dev/tutorials/mnist_lux/65060d29.svg rename to dev/tutorials/mnist/39f02ea8.svg index 77b7914e..5edab8a2 100644 --- a/dev/tutorials/mnist_lux/65060d29.svg +++ b/dev/tutorials/mnist/39f02ea8.svg @@ -1,33 +1,33 @@ - + - + - + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +cQ8AAABx4h4AAADixD0AAADEiXsAAACIE/cAAAAQJ+4BAAAgTtwDAABAnLgHAACAOHEPAAAAceIe +AAAA4sQ9AAAAxIl7AAAAiBP3AAAAECfuAQAAIE7cAwAAQJy4BwAAgDhxDwAAAHHiHgAAAOLEPQAA +AMSJewAAAIgT9wAAABAn7gEAACBO3AMAAECcuAcAAIA4cQ8AAABx4h4AAADixD0AAADEiXsAAACI +E/cAAAAQJ+4BAAAgTtwDAABAnLgHAACAOHEPAAAAceIeAAAA4sQ9APxv+3VAAgAAACDo/+t2BPpC +AIA5uQcAAIA5uQcAAIA5uQcAAIA5uQcAAIA5uQcAAIA5uQcAAIA5uQcAAIA5uQcAAIA5uQcAAIA5 +uQcAAIA5uQcAAIA5uQcAAIA5uQcAAIA5uQcAAIA5uQcAAIA5uQcAAIA5uQcAAIA5uQcAAIA5uQcA +AIA5uQcAAIA5uQcAAIA5uQcAAIA5uQcAAIA5uQcAAIA5uQcAAIA5uQcAAIA5uQcAAIA5uQcAAIA5 +uQcAAIA5uQcAAIA5uQcAAIA5uQcAAIC5AHPA462csbrcAAAAAElFTkSuQmCC +" transform="translate(151, 1290)"/> - + - - + + - - - - - - - - - - - + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + diff --git a/dev/tutorials/mnist/56267ab7.svg b/dev/tutorials/mnist/585c605e.svg similarity index 67% rename from dev/tutorials/mnist/56267ab7.svg rename to dev/tutorials/mnist/585c605e.svg index af0e4c8c..8066a6ed 100644 --- a/dev/tutorials/mnist/56267ab7.svg +++ b/dev/tutorials/mnist/585c605e.svg @@ -1,33 +1,33 @@ - + - + - + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + diff --git a/dev/tutorials/mnist_lux/51e7e4a5.svg b/dev/tutorials/mnist/ad9f9d48.svg similarity index 81% rename from dev/tutorials/mnist_lux/51e7e4a5.svg rename to dev/tutorials/mnist/ad9f9d48.svg index badf08b0..ef818ae9 100644 --- a/dev/tutorials/mnist_lux/51e7e4a5.svg +++ b/dev/tutorials/mnist/ad9f9d48.svg @@ -1,33 +1,33 @@ - + - + - + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + +

Training

We use a simple neural network with one hidden layer and a sigmoid activation function. (There are better performing networks; try experimenting.)

predictor = Flux.Chain(
+Plots.plot([plot_image(train_data[i]) for i in 1:6]...; layout = (2, 3))
Example block output

Training

We use a simple neural network with one hidden layer and a sigmoid activation function. (There are better performing networks; try experimenting.)

predictor = Flux.Chain(
     Flux.Dense(28^2 => 32, Flux.sigmoid),
     Flux.Dense(32 => 10),
     Flux.softmax,
@@ -49,8 +49,8 @@
     println("Accuracy = $p %")
     return
 end
score_model (generic function with 1 method)

The accuracy of our model is only around 10% before training:

score_model(predictor, train_data)
-score_model(predictor, test_data)
Accuracy = 13.39 %
-Accuracy = 13.45 %

Let's improve that by training our model.

Note

It is not the purpose of this tutorial to explain how Flux works; see the documentation at https://fluxml.ai for more details. Changing the number of epochs or the learning rate can improve the loss.

begin
+score_model(predictor, test_data)
Accuracy = 11.24 %
+Accuracy = 11.35 %

Let's improve that by training our model.

Note

It is not the purpose of this tutorial to explain how Flux works; see the documentation at https://fluxml.ai for more details. Changing the number of epochs or the learning rate can improve the loss.

begin
     train_loader = data_loader(train_data; batchsize = 256, shuffle = true)
     optimizer_state = Flux.setup(Flux.Adam(3e-4), predictor)
     for epoch in 1:30
@@ -66,47 +66,47 @@
         print("Epoch $epoch: loss = $loss\t")
         score_model(predictor, test_data)
     end
-end
Epoch 1: loss = 1.797	Accuracy = 77.46 %
-Epoch 2: loss = 1.1648	Accuracy = 83.94 %
-Epoch 3: loss = 0.863	Accuracy = 86.37 %
-Epoch 4: loss = 0.6885	Accuracy = 87.93 %
-Epoch 5: loss = 0.5774	Accuracy = 89.04 %
-Epoch 6: loss = 0.5028	Accuracy = 89.79 %
-Epoch 7: loss = 0.4491	Accuracy = 90.43 %
-Epoch 8: loss = 0.4103	Accuracy = 90.73 %
-Epoch 9: loss = 0.38	Accuracy = 91.07 %
-Epoch 10: loss = 0.3564	Accuracy = 91.38 %
-Epoch 11: loss = 0.3373	Accuracy = 91.71 %
-Epoch 12: loss = 0.3206	Accuracy = 91.81 %
-Epoch 13: loss = 0.3067	Accuracy = 92.03 %
-Epoch 14: loss = 0.2947	Accuracy = 92.31 %
-Epoch 15: loss = 0.2842	Accuracy = 92.44 %
-Epoch 16: loss = 0.2744	Accuracy = 92.66 %
-Epoch 17: loss = 0.2652	Accuracy = 92.7 %
-Epoch 18: loss = 0.2576	Accuracy = 92.94 %
-Epoch 19: loss = 0.2503	Accuracy = 92.93 %
-Epoch 20: loss = 0.2435	Accuracy = 93.09 %
-Epoch 21: loss = 0.2372	Accuracy = 93.21 %
-Epoch 22: loss = 0.2319	Accuracy = 93.32 %
-Epoch 23: loss = 0.2261	Accuracy = 93.45 %
-Epoch 24: loss = 0.2208	Accuracy = 93.57 %
-Epoch 25: loss = 0.2158	Accuracy = 93.71 %
-Epoch 26: loss = 0.2116	Accuracy = 93.91 %
-Epoch 27: loss = 0.2071	Accuracy = 93.97 %
-Epoch 28: loss = 0.2029	Accuracy = 94.05 %
-Epoch 29: loss = 0.199	Accuracy = 94.12 %
-Epoch 30: loss = 0.1955	Accuracy = 94.28 %

Here are the first eight predictions of the test data:

function plot_image(predictor, x::Matrix)
+end
Epoch 1: loss = 1.8337	Accuracy = 76.32 %
+Epoch 2: loss = 1.1815	Accuracy = 84.09 %
+Epoch 3: loss = 0.8677	Accuracy = 86.97 %
+Epoch 4: loss = 0.6872	Accuracy = 88.42 %
+Epoch 5: loss = 0.5748	Accuracy = 89.37 %
+Epoch 6: loss = 0.4989	Accuracy = 90.12 %
+Epoch 7: loss = 0.4455	Accuracy = 90.73 %
+Epoch 8: loss = 0.4059	Accuracy = 91.12 %
+Epoch 9: loss = 0.3749	Accuracy = 91.38 %
+Epoch 10: loss = 0.3504	Accuracy = 91.7 %
+Epoch 11: loss = 0.3303	Accuracy = 91.98 %
+Epoch 12: loss = 0.3132	Accuracy = 92.17 %
+Epoch 13: loss = 0.299	Accuracy = 92.41 %
+Epoch 14: loss = 0.2863	Accuracy = 92.51 %
+Epoch 15: loss = 0.275	Accuracy = 92.76 %
+Epoch 16: loss = 0.2653	Accuracy = 92.77 %
+Epoch 17: loss = 0.2567	Accuracy = 93.0 %
+Epoch 18: loss = 0.2483	Accuracy = 93.12 %
+Epoch 19: loss = 0.2407	Accuracy = 93.29 %
+Epoch 20: loss = 0.2341	Accuracy = 93.38 %
+Epoch 21: loss = 0.228	Accuracy = 93.35 %
+Epoch 22: loss = 0.2223	Accuracy = 93.54 %
+Epoch 23: loss = 0.2167	Accuracy = 93.71 %
+Epoch 24: loss = 0.212	Accuracy = 93.8 %
+Epoch 25: loss = 0.2075	Accuracy = 93.94 %
+Epoch 26: loss = 0.2029	Accuracy = 94.0 %
+Epoch 27: loss = 0.1988	Accuracy = 94.08 %
+Epoch 28: loss = 0.1948	Accuracy = 94.18 %
+Epoch 29: loss = 0.1912	Accuracy = 94.24 %
+Epoch 30: loss = 0.1873	Accuracy = 94.37 %

Here are the first eight predictions of the test data:

function plot_image(predictor, x::Matrix)
     score, index = findmax(predictor(vec(x)))
     title = "Predicted: $(index - 1) ($(round(Int, 100 * score))%)"
     return plot_image(x; title)
 end
 
 plots = [plot_image(predictor, test_data[i].features) for i in 1:8]
-Plots.plot(plots...; size = (1200, 600), layout = (2, 4))
Example block output

We can also look at the best and worst four predictions:

x, y = only(data_loader(test_data; batchsize = length(test_data)))
+Plots.plot(plots...; size = (1200, 600), layout = (2, 4))
Example block output

We can also look at the best and worst four predictions:

x, y = only(data_loader(test_data; batchsize = length(test_data)))
 losses = Flux.crossentropy(predictor(x), y; agg = identity)
 indices = sortperm(losses; dims = 2)[[1:4; end-3:end]]
 plots = [plot_image(predictor, test_data[i].features) for i in indices]
-Plots.plot(plots...; size = (1200, 600), layout = (2, 4))
Example block output

There are still some fairly bad mistakes. Can you change the model or training parameters improve to improve things?

JuMP

Now that we have a trained machine learning model, we can embed it in a JuMP model.

Here's a function which takes a test case and returns an example that maximizes the probability of the adversarial example.

function find_adversarial_image(test_case; adversary_label, δ = 0.05)
+Plots.plot(plots...; size = (1200, 600), layout = (2, 4))
Example block output

There are still some fairly bad mistakes. Can you change the model or training parameters improve to improve things?

JuMP

Now that we have a trained machine learning model, we can embed it in a JuMP model.

Here's a function which takes a test case and returns an example that maximizes the probability of the adversarial example.

function find_adversarial_image(test_case; adversary_label, δ = 0.05)
     model = Model(Ipopt.Optimizer)
     set_silent(model)
     @variable(model, 0 <= x[1:28, 1:28] <= 1)
@@ -122,4 +122,4 @@
 Plots.plot(
     plot_image(predictor, test_data[3].features),
     plot_image(predictor, Float32.(x_adversary)),
-)
Example block output

This page was generated using Literate.jl.

+)Example block output

This page was generated using Literate.jl.

diff --git a/dev/tutorials/mnist_lux/63b193ec.svg b/dev/tutorials/mnist_lux/05a744a6.svg similarity index 73% rename from dev/tutorials/mnist_lux/63b193ec.svg rename to dev/tutorials/mnist_lux/05a744a6.svg index 9971bee7..946f2861 100644 --- a/dev/tutorials/mnist_lux/63b193ec.svg +++ b/dev/tutorials/mnist_lux/05a744a6.svg @@ -1,33 +1,33 @@ - + - + - + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + diff --git a/dev/tutorials/mnist/b0b1a6ac.svg b/dev/tutorials/mnist_lux/4d4fd437.svg similarity index 62% rename from dev/tutorials/mnist/b0b1a6ac.svg rename to dev/tutorials/mnist_lux/4d4fd437.svg index fcbcc0d3..6b662929 100644 --- a/dev/tutorials/mnist/b0b1a6ac.svg +++ b/dev/tutorials/mnist_lux/4d4fd437.svg @@ -1,151 +1,144 @@ - + - + - + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - +mJN7AAAAmJN7AAAAmJN7AAAAmJN7AAAAmJN7AAAAmJN7AAAAmJN7AAAAmJN7AAAAmJN7AAAAmJN7 +AAAAmJN7AAAAmJN7AAAAmJN7AAAAmJN7AAAAmJN7AAAAmJN7AAAAmJN7AAAAmJN7AAAAmJN7AAAA +mJN7AAAAmJN7AAAAmAtg3watarlGYAAAAABJRU5ErkJggg== +" transform="translate(2551, 90)"/> - + - - + + - - - - - - - - - - - + + + + + + + + + + + +iBP3AAAAECfuAQAAIE7cAwAAQJy4BwAAgDhxDwAAAHHiHgAAAOLEPQAAAMSJewAAAIgT9wAAABAn +7gEAACBO3AMAAECcuAcAAIA4cQ8AAABx4h4AAADixD0AAADEiXsAAACIE/cAAAAQJ+4BAAAgTtwD +AADAf9uvAxIAAAAAQf9ftyPQF87JPQAAAMzJPQAAAMzJPQAAAMzJPQAAAMzJPQAAAMzJPQAAAMzJ +PQAAAMzJPQAAAMzJPQAAAMzJPQAAAMzJPQAAAMzJPQAAAMzJPQAAAMzJPQAAAMzJPQAAAMzJPQAA +AMzJPQAAAMzJPQAAAMzJPQAAAMzJPQAAAMwFY7Og6CctGawAAAAASUVORK5CYII= +" transform="translate(3751, 90)"/> - + - - + + - - - - - - - - - - - + + + + + + + + + + + +" transform="translate(151, 1290)"/> - + - - + + - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + +

Training

We use a simple neural network with one hidden layer and a sigmoid activation function. (There are better performing networks; try experimenting.)

chain = Lux.Chain(
+Plots.plot([plot_image(train_data[i]) for i in 1:6]...; layout = (2, 3))
Example block output

Training

We use a simple neural network with one hidden layer and a sigmoid activation function. (There are better performing networks; try experimenting.)

chain = Lux.Chain(
     Lux.Dense(28^2 => 32, Lux.sigmoid),
     Lux.Dense(32 => 10),
     Lux.softmax,
@@ -54,8 +54,8 @@
     println("Accuracy = $p %")
     return
 end
score_model (generic function with 1 method)

The accuracy of our model is only around 10% before training:

score_model(predictor, train_data)
-score_model(predictor, test_data)
Accuracy = 9.91 %
-Accuracy = 10.09 %

Let's improve that by training our model.

Note

It is not the purpose of this tutorial to explain how Lux works; see the documentation at https://lux.csail.mit.edu for more details. Changing the number of epochs or the learning rate can improve the loss.

begin
+score_model(predictor, test_data)
Accuracy = 9.04 %
+Accuracy = 8.92 %

Let's improve that by training our model.

Note

It is not the purpose of this tutorial to explain how Lux works; see the documentation at https://lux.csail.mit.edu for more details. Changing the number of epochs or the learning rate can improve the loss.

begin
     train_loader = data_loader(train_data; batchsize = 256, shuffle = true)
     optimizer_state = Optimisers.setup(Optimisers.Adam(0.0003f0), parameters)
     for epoch in 1:30
@@ -74,36 +74,36 @@
         print("Epoch $epoch: loss = $loss\t")
         score_model(predictor, test_data)
     end
-end
Epoch 1: loss = 1.7929	Accuracy = 78.65 %
-Epoch 2: loss = 1.1442	Accuracy = 84.75 %
-Epoch 3: loss = 0.8427	Accuracy = 87.34 %
-Epoch 4: loss = 0.6691	Accuracy = 88.76 %
-Epoch 5: loss = 0.5599	Accuracy = 89.49 %
-Epoch 6: loss = 0.4867	Accuracy = 90.26 %
-Epoch 7: loss = 0.4349	Accuracy = 90.69 %
-Epoch 8: loss = 0.3962	Accuracy = 91.0 %
-Epoch 9: loss = 0.3666	Accuracy = 91.18 %
-Epoch 10: loss = 0.3432	Accuracy = 91.48 %
-Epoch 11: loss = 0.324	Accuracy = 91.83 %
-Epoch 12: loss = 0.3077	Accuracy = 92.15 %
-Epoch 13: loss = 0.2937	Accuracy = 92.33 %
-Epoch 14: loss = 0.282	Accuracy = 92.53 %
-Epoch 15: loss = 0.2715	Accuracy = 92.69 %
-Epoch 16: loss = 0.2619	Accuracy = 92.84 %
-Epoch 17: loss = 0.2536	Accuracy = 93.07 %
-Epoch 18: loss = 0.2457	Accuracy = 93.1 %
-Epoch 19: loss = 0.239	Accuracy = 93.29 %
-Epoch 20: loss = 0.2326	Accuracy = 93.41 %
-Epoch 21: loss = 0.2264	Accuracy = 93.52 %
-Epoch 22: loss = 0.2217	Accuracy = 93.64 %
-Epoch 23: loss = 0.2157	Accuracy = 93.82 %
-Epoch 24: loss = 0.2108	Accuracy = 93.96 %
-Epoch 25: loss = 0.2062	Accuracy = 94.06 %
-Epoch 26: loss = 0.202	Accuracy = 94.28 %
-Epoch 27: loss = 0.1981	Accuracy = 94.32 %
-Epoch 28: loss = 0.1939	Accuracy = 94.46 %
-Epoch 29: loss = 0.1901	Accuracy = 94.55 %
-Epoch 30: loss = 0.1866	Accuracy = 94.65 %

Here are the first eight predictions of the test data:

function plot_image(predictor, x::Matrix)
+end
Epoch 1: loss = 1.8288	Accuracy = 78.67 %
+Epoch 2: loss = 1.1614	Accuracy = 84.24 %
+Epoch 3: loss = 0.8526	Accuracy = 86.92 %
+Epoch 4: loss = 0.6773	Accuracy = 88.39 %
+Epoch 5: loss = 0.5677	Accuracy = 89.53 %
+Epoch 6: loss = 0.4942	Accuracy = 90.02 %
+Epoch 7: loss = 0.4423	Accuracy = 90.43 %
+Epoch 8: loss = 0.4045	Accuracy = 90.78 %
+Epoch 9: loss = 0.3751	Accuracy = 91.21 %
+Epoch 10: loss = 0.3522	Accuracy = 91.51 %
+Epoch 11: loss = 0.3334	Accuracy = 91.8 %
+Epoch 12: loss = 0.3174	Accuracy = 92.07 %
+Epoch 13: loss = 0.3042	Accuracy = 92.24 %
+Epoch 14: loss = 0.2918	Accuracy = 92.44 %
+Epoch 15: loss = 0.2813	Accuracy = 92.53 %
+Epoch 16: loss = 0.2721	Accuracy = 92.74 %
+Epoch 17: loss = 0.2639	Accuracy = 92.73 %
+Epoch 18: loss = 0.2562	Accuracy = 92.99 %
+Epoch 19: loss = 0.2488	Accuracy = 93.12 %
+Epoch 20: loss = 0.2421	Accuracy = 93.25 %
+Epoch 21: loss = 0.2356	Accuracy = 93.35 %
+Epoch 22: loss = 0.2303	Accuracy = 93.43 %
+Epoch 23: loss = 0.2249	Accuracy = 93.58 %
+Epoch 24: loss = 0.2196	Accuracy = 93.66 %
+Epoch 25: loss = 0.2146	Accuracy = 93.75 %
+Epoch 26: loss = 0.2105	Accuracy = 93.89 %
+Epoch 27: loss = 0.2056	Accuracy = 93.93 %
+Epoch 28: loss = 0.2014	Accuracy = 94.05 %
+Epoch 29: loss = 0.1978	Accuracy = 94.08 %
+Epoch 30: loss = 0.1938	Accuracy = 94.13 %

Here are the first eight predictions of the test data:

function plot_image(predictor, x::Matrix)
     y, _ = chain(vec(x), parameters, state)
     score, index = findmax(y)
     title = "Predicted: $(index - 1) ($(round(Int, 100 * score))%)"
@@ -111,12 +111,12 @@
 end
 
 plots = [plot_image(predictor, test_data[i].features) for i in 1:8]
-Plots.plot(plots...; size = (1200, 600), layout = (2, 4))
Example block output

We can also look at the best and worst four predictions:

x, y = only(data_loader(test_data; batchsize = length(test_data)))
+Plots.plot(plots...; size = (1200, 600), layout = (2, 4))
Example block output

We can also look at the best and worst four predictions:

x, y = only(data_loader(test_data; batchsize = length(test_data)))
 y_model, _ = chain(x, parameters, state)
 losses = Lux.CrossEntropyLoss(; agg = identity)(y_model, y)
 indices = sortperm(losses; dims = 2)[[1:4; end-3:end]]
 plots = [plot_image(predictor, test_data[i].features) for i in indices]
-Plots.plot(plots...; size = (1200, 600), layout = (2, 4))
Example block output

There are still some fairly bad mistakes. Can you change the model or training parameters improve to improve things?

JuMP

Now that we have a trained machine learning model, we can embed it in a JuMP model.

Here's a function which takes a test case and returns an example that maximizes the probability of the adversarial example.

function find_adversarial_image(test_case; adversary_label, δ = 0.05)
+Plots.plot(plots...; size = (1200, 600), layout = (2, 4))
Example block output

There are still some fairly bad mistakes. Can you change the model or training parameters improve to improve things?

JuMP

Now that we have a trained machine learning model, we can embed it in a JuMP model.

Here's a function which takes a test case and returns an example that maximizes the probability of the adversarial example.

function find_adversarial_image(test_case; adversary_label, δ = 0.05)
     model = Model(Ipopt.Optimizer)
     set_silent(model)
     @variable(model, 0 <= x[1:28, 1:28] <= 1)
@@ -132,4 +132,4 @@
 Plots.plot(
     plot_image(predictor, test_data[3].features),
     plot_image(predictor, Float32.(x_adversary)),
-)
Example block output

This page was generated using Literate.jl.

+)Example block output

This page was generated using Literate.jl.

diff --git a/dev/tutorials/pytorch/ea38d5e3.svg b/dev/tutorials/pytorch/fcabca83.svg similarity index 82% rename from dev/tutorials/pytorch/ea38d5e3.svg rename to dev/tutorials/pytorch/fcabca83.svg index 464297ad..32703c21 100644 --- a/dev/tutorials/pytorch/ea38d5e3.svg +++ b/dev/tutorials/pytorch/fcabca83.svg @@ -1,48 +1,48 @@ - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dev/tutorials/pytorch/index.html b/dev/tutorials/pytorch/index.html index 38bb0c91..4a2b383a 100644 --- a/dev/tutorials/pytorch/index.html +++ b/dev/tutorials/pytorch/index.html @@ -41,4 +41,4 @@ push!(Y, objective_value(model)) end Plots.plot(x -> x^2 - 2x, X; label = "Truth", linestype = :dot) -Plots.plot!(X, Y; label = "Fitted")Example block output

This page was generated using Literate.jl.

+Plots.plot!(X, Y; label = "Fitted")Example block output

This page was generated using Literate.jl.

diff --git a/dev/tutorials/student_enrollment/index.html b/dev/tutorials/student_enrollment/index.html index 1df54a28..32af226d 100644 --- a/dev/tutorials/student_enrollment/index.html +++ b/dev/tutorials/student_enrollment/index.html @@ -138,7 +138,7 @@ Dual objective value : -4.91918e+02 * Work counters - Solve time (sec) : 5.99427e+00 + Solve time (sec) : 6.67519e+00 Barrier iterations : 142

Let's store the solution in evaluate_df for analysis:

julia> evaluate_df.merit_sol = value.(evaluate_df.merit);
julia> evaluate_df.enroll_sol = value.(evaluate_df.enroll);
julia> evaluate_df6000×7 DataFrame Row StudentID SAT GPA merit enroll merit_sol ⋯ │ Int64 Int64 Float64 GenericV… GenericV… Float64 ⋯ @@ -159,4 +159,4 @@ 5998 │ 5998 1165 2.81 x_merit[5998] moai_Sigmoid[1] 1.21872 ⋯ 5999 │ 5999 1400 3.43 x_merit[5999] moai_Sigmoid[1] 1.33971e-6 6000 │ 6000 1097 2.65 x_merit[6000] moai_Sigmoid[1] 1.17865 - 1 column and 5985 rows omitted

Solution analysis

We expect that just under 2,500 students will enroll:

julia> sum(evaluate_df.enroll_sol)2488.1951671444017

We awarded merit scholarships to approximately 1 in 6 students:

julia> count(evaluate_df.merit_sol .> 1e-5)1152

The average merit scholarship was worth just over $1,000:

julia> 1_000 * Statistics.mean(evaluate_df.merit_sol[evaluate_df.merit_sol.>1e-5])1041.6622990671967

This page was generated using Literate.jl.

+ 1 column and 5985 rows omitted

Solution analysis

We expect that just under 2,500 students will enroll:

julia> sum(evaluate_df.enroll_sol)2488.1951671444017

We awarded merit scholarships to approximately 1 in 6 students:

julia> count(evaluate_df.merit_sol .> 1e-5)1152

The average merit scholarship was worth just over $1,000:

julia> 1_000 * Statistics.mean(evaluate_df.merit_sol[evaluate_df.merit_sol.>1e-5])1041.6622990671967

This page was generated using Literate.jl.