jump-dev · odow · Mar 23, 2024 · Mar 14, 2024 · Mar 14, 2024 · Mar 14, 2024
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -9,6 +9,7 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244"
 Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
 Dualization = "191a621a-6537-11e9-281d-650236a99e60"
+Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 GLPK = "60bf3e95-4087-53dc-ae20-288a0d20c6a6"
 HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
@@ -46,6 +47,7 @@ Distributions = "0.25"
 Documenter = "=1.2.1"
 DocumenterCitations = "1"
 Dualization = "0.5"
+Enzyme = "0.11.19"
 GLPK = "=1.1.3"
 HTTP = "1.5.4"
 HiGHS = "=1.8.0"

diff --git a/docs/make.jl b/docs/make.jl
@@ -330,6 +330,7 @@ const _PAGES = [
             "tutorials/nonlinear/querying_hessians.md",
             "tutorials/nonlinear/complementarity.md",
             "tutorials/nonlinear/classifiers.md",
+            "tutorials/nonlinear/operator_ad.md",
         ],
         "Conic programs" => [
             "tutorials/conic/introduction.md",

diff --git a/docs/src/manual/nonlinear.md b/docs/src/manual/nonlinear.md
@@ -551,11 +551,16 @@ log(f(x))
 ### Gradients and Hessians
 
 By default, JuMP will use automatic differentiation to compute the gradient and
-Hessian of user-defined operators. If your function is not amenable to
-automatic differentiation, or you can compute analytic derivatives, you may pass
-additional arguments to [`@operator`](@ref) to compute the first- and
+Hessian of user-defined operators. If your function is not amenable to the
+default automatic differentiation, or you can compute analytic derivatives, you
+may pass additional arguments to [`@operator`](@ref) to compute the first- and
 second-derivatives.
 
+!!! tip
+    The tutorial [Automatic differentiation of user-defined operators](@ref)
+    has examples of how to use third-party Julia packages to compute automatic
+    derivatives.
+
 #### Univariate functions
 
 For univariate functions, a gradient function `∇f` returns a number that

diff --git a/docs/src/tutorials/nonlinear/operator_ad.jl b/docs/src/tutorials/nonlinear/operator_ad.jl
@@ -0,0 +1,314 @@
+# Copyright (c) 2024 Oscar Dowson and contributors                               #src
+#                                                                                #src
+# Permission is hereby granted, free of charge, to any person obtaining a copy   #src
+# of this software and associated documentation files (the "Software"), to deal  #src
+# in the Software without restriction, including without limitation the rights   #src
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell      #src
+# copies of the Software, and to permit persons to whom the Software is          #src
+# furnished to do so, subject to the following conditions:                       #src
+#                                                                                #src
+# The above copyright notice and this permission notice shall be included in all #src
+# copies or substantial portions of the Software.                                #src
+#                                                                                #src
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR     #src
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,       #src
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE    #src
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER         #src
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  #src
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE  #src
+# SOFTWARE.                                                                      #src
+
+# # Automatic differentiation of user-defined operators
+
+# The purpose of this tutorial is to demonstrate how to apply automatic
+# differentiation to [User-defined operators](@ref jump_user_defined_operators).
+
+# !!! tip
+#     This tutorial is for advanced users. As an alternative, consider using
+#     [Function tracing](@ref) instead of creating an operator, and if an
+#     operator is necessary, consider using the default of `@operator(model, op_f, N, f)`
+#     instead of passing explicit [Gradients and Hessians](@ref).
+
+# This tutorial uses the following packages:
+
+using JuMP
+import Enzyme
+import ForwardDiff
+import Ipopt
+import Test
+
+# As a simple example, we consider the Rosenbrock function:
+
+f(x::T...) where {T} = (1 - x[1])^2 + 100 * (x[2] - x[1]^2)^2
+
+# Here's the value at a random point:
+
+x = rand(2)
+
+#-
+
+f(x...)
+
+# ## Analytic derivative
+
+# If expressions are simple enough, you can provide analytic functions for the
+# gradient and Hessian.
+
+# ### Gradient
+
+# The Rosenbrock function has the gradient vector:
+
+function analytic_∇f(g::AbstractVector, x...)
+    g[1] = 400 * x[1]^3 - 400 * x[1] * x[2] + 2 * x[1] - 2
+    g[2] = 200 * (x[2] - x[1]^2)
+    return
+end
+
+# Let's evaluate it at the same vector `x`:
+
+analytic_g = zeros(2)
+analytic_∇f(analytic_g, x...)
+analytic_g
+
+# ### Hessian
+
+# The Hessian matrix is:
+
+function analytic_∇²f(H::AbstractMatrix, x...)
+    H[1, 1] = 1200 * x[1]^2 - 400 * x[2] + 2
+    ## H[1, 2] = -400 * x[1] <-- not needed because Hessian is symmetric
+    H[2, 1] = -400 * x[1]
+    H[2, 2] = 200.0
+    return
+end
+
+# Note that because the Hessian is symmetric, JuMP requires that we fill in only
+# the lower triangle.
+
+analytic_H = zeros(2, 2)
+analytic_∇²f(analytic_H, x...)
+analytic_H
+
+# ### JuMP example
+
+# Putting our analytic functions together, we get:
+
+function analytic_rosenbrock()
+    model = Model(Ipopt.Optimizer)
+    set_silent(model)
+    @variable(model, x[1:2])
+    @operator(model, op_rosenbrock, 2, f, analytic_∇f, analytic_∇²f)
+    @objective(model, Min, op_rosenbrock(x[1], x[2]))
+    optimize!(model)
+    Test.@test is_solved_and_feasible(model)
+    return value.(x)
+end
+
+analytic_rosenbrock()
+
+# ## ForwardDiff
+
+# Instead of analytic functions, you can use [ForwardDiff.jl](https://github.com/JuliaDiff/ForwardDiff.jl)
+# to compute derivatives.
+
+# ### Pros and cons
+
+# The main benefit of ForwardDiff is that it is simple, robust, and works with a
+# broad range of Julia syntax.
+
+# The main downside is that `f` must be a function that accepts arguments of
+# `x::Real...`. See [Common mistakes when writing a user-defined operator](@ref)
+# for more details.
+
+# ### Gradient
+
+# The gradient can be computed using `ForwardDiff.gradient!`. Note that
+# ForwardDiff expects a single `Vector{T}` argument, but we receive `x` as a
+# tuple, so we need `y -> f(y...)` and `collect(x)` to get things in the right
+# format.
+
+function fdiff_∇f(g::AbstractVector{T}, x::Vararg{T,N}) where {T,N}
+    ForwardDiff.gradient!(g, y -> f(y...), collect(x))
+    return
+end
+
+# Let's check that we find the analytic solution:
+
+fdiff_g = zeros(2)
+fdiff_∇f(fdiff_g, x...)
+Test.@test ≈(analytic_g, fdiff_g)
+
+# ### Hessian
+
+# The Hessian is a bit more complicated, but code to implement it is:
+
+function fdiff_∇²f(H::AbstractMatrix{T}, x::Vararg{T,N}) where {T,N}
+    h = ForwardDiff.hessian(y -> f(y...), collect(x))
+    for i in 1:N, j in 1:i
+        H[i, j] = h[i, j]
+    end
+    return
+end
+
+# Let's check that we find the analytic solution:
+
+fdiff_H = zeros(2, 2)
+fdiff_∇²f(fdiff_H, x...)
+Test.@test ≈(analytic_H, fdiff_H)
+
+# ### JuMP example
+
+# The code for computing the gradient and Hessian using ForwardDiff can be
+# re-used for many operators. Thus, it is helpful to encapsulate it into the
+# function:
+
+"""
+    fdiff_derivatives(f::Function) -> Tuple{Function,Function}
+
+Return a tuple of functions that evalute the gradient and Hessian of `f` using
+ForwardDiff.jl.
+"""
+function fdiff_derivatives(f::Function)
+    function ∇f(g::AbstractVector{T}, x::Vararg{T,N}) where {T,N}
+        ForwardDiff.gradient!(g, y -> f(y...), collect(x))
+        return
+    end
+    function ∇²f(H::AbstractMatrix{T}, x::Vararg{T,N}) where {T,N}
+        h = ForwardDiff.hessian(y -> f(y...), collect(x))
+        for i in 1:N, j in 1:i
+            H[i, j] = h[i, j]
+        end
+        return
+    end
+    return ∇f, ∇²f
+end
+
+# Here's an example using `fdiff_derivatives`:
+
+function fdiff_rosenbrock()
+    model = Model(Ipopt.Optimizer)
+    set_silent(model)
+    @variable(model, x[1:2])
+    @operator(model, op_rosenbrock, 2, f, fdiff_derivatives(f)...)
+    @objective(model, Min, op_rosenbrock(x[1], x[2]))
+    optimize!(model)
+    Test.@test is_solved_and_feasible(model)
+    return value.(x)
+end
+
+fdiff_rosenbrock()
+
+# ## Enzyme
+
+# Another library for automatic differentiation in Julia is [Enzyme.jl](https://github.com/EnzymeAD/Enzyme.jl).
+
+# ### Pros and cons
+
+# The main benefit of Enzyme is that it can produce fast derivatives for
+# functions with many input arguments.
+
+# The main downsides are that it may throw unusual errors if your code uses an
+# unsupported feature of Julia and that it may have large compile times.
+
+# !!! warning
+#     The JuMP developers cannot help you debug error messages related to
+#     Enzyme. If the operator works, it works. If not, we suggest you try a
+#     different automatic differentiation library.
+
+# ### Gradient
+
+# The gradient can be computed using `ForwardDiff.autodiff` with the
+# `Enzyme.Reverse` mode. We need to wrap `x` in `Enzyme.Active` to indicate that
+# we want to compute the derivatives with respect to these arguments.
+
+function enzyme_∇f(g::AbstractVector{T}, x::Vararg{T,N}) where {T,N}
+    g .= Enzyme.autodiff(Enzyme.Reverse, f, Enzyme.Active.(x)...)[1]
+    return
+end
+
+# Let's check that we find the analytic solution:
+
+enzyme_g = zeros(2)
+enzyme_∇f(enzyme_g, x...)
+Test.@test ≈(analytic_g, enzyme_g)
+
+# ### Hessian
+
+# We can compute the Hessian in Enzyme using forward-over-reverse automatic
+# differentiation.
+
+# The code to implement the Hessian in Enzyme is complicated, so we will not
+# explain it in detail; see the [Enzyme documentation](https://enzymead.github.io/Enzyme.jl/v0.11.19/generated/autodiff/#Vector-forward-over-reverse).
+
+function enzyme_∇²f(H::AbstractMatrix{T}, x::Vararg{T,N}) where {T,N}
+    ## direction(i) returns a tuple with a `1` in the `i`'th entry and `0`
+    ## otherwise
+    direction(i) = ntuple(j -> Enzyme.Active(T(i == j)), N)
+    hess = Enzyme.autodiff(
+        ## For the outer autodiff, use Forward mode.
+        Enzyme.Forward,
+        ## As the inner function, compute the gradient using Reverse mode
+        (x...) -> Enzyme.autodiff_deferred(Enzyme.Reverse, f, x...)[1],
+        ## Compute multiple evaluations of Forward mode, each time using `x` but
+        ## initializing with a different direction.
+        Enzyme.BatchDuplicated.(Enzyme.Active.(x), ntuple(direction, N))...,
+    )[1]
+    ## Unpack Enzyme's `hess` data structure into the the matrix `H` expected by
+    ## JuMP.
+    for j in 1:N, i in 1:j
+        H[j, i] = hess[j][i]
+    end
+    return
+end
+
+# Let's check that we find the analytic solution:
+
+enzyme_H = zeros(2, 2)
+enzyme_∇²f(enzyme_H, x...)
+Test.@test ≈(analytic_H, enzyme_H)
+
+# ### JuMP example
+
+# The code for computing the gradient and Hessian using Enzyme can be re-used
+# for many operators. Thus, it is helpful to encapsulate it into the function:
+
+"""
+    enzyme_derivatives(f::Function) -> Tuple{Function,Function}
+
+Return a tuple of functions that evalute the gradient and Hessian of `f` using
+Enzyme.jl.
+"""
+function enzyme_derivatives(f::Function)
+    function ∇f(g::AbstractVector{T}, x::Vararg{T,N}) where {T,N}
+        g .= Enzyme.autodiff(Enzyme.Reverse, f, Enzyme.Active.(x)...)[1]
+        return
+    end
+    function ∇²f(H::AbstractMatrix{T}, x::Vararg{T,N}) where {T,N}
+        direction(i) = ntuple(j -> Enzyme.Active(T(i == j)), N)
+        hess = Enzyme.autodiff(
+            Enzyme.Forward,
+            (x...) -> Enzyme.autodiff_deferred(Enzyme.Reverse, f, x...)[1],
+            Enzyme.BatchDuplicated.(Enzyme.Active.(x), ntuple(direction, N))...,
+        )[1]
+        for j in 1:N, i in 1:j
+            H[j, i] = hess[j][i]
+        end
+        return
+    end
+    return ∇f, ∇²f
+end
+
+# Here's an example using `enzyme_derivatives`:
+
+function enzyme_rosenbrock()
+    model = Model(Ipopt.Optimizer)
+    set_silent(model)
+    @variable(model, x[1:2])
+    @operator(model, op_rosenbrock, 2, f, enzyme_derivatives(f)...)
+    @objective(model, Min, op_rosenbrock(x[1], x[2]))
+    optimize!(model)
+    Test.@test is_solved_and_feasible(model)
+    return value.(x)
+end
+
+enzyme_rosenbrock()
diff --git a/docs/styles/Vocab/JuMP-Vocab/accept.txt b/docs/styles/Vocab/JuMP-Vocab/accept.txt
@@ -128,6 +128,7 @@ Dualization
 EAGO
 [Ee]mbotech
 [Ee]cos
+Enzyme
 Geomstats
 Geoopt
 GLPK

diff --git a/src/nlp_expr.jl b/src/nlp_expr.jl
@@ -957,7 +957,7 @@ function add_nonlinear_operator(
     return NonlinearOperator(f, name)
 end
 
-function _catch_redefinition_constant_error(op::Symbol, f::Function)
+function _catch_redefinition_constant_error(op::Symbol, f::Function, args...)
     if op == Symbol(f)
         error("""
         Unable to add the nonlinear operator `:$op` with the same name as