Merge pull request #342 from SciML/Vaibhavdixit02-patch-1

Vaibhavdixit02 · web-flow · commit fe7397557461 · 2022-08-18T13:18:38.000+05:30
Incorrect `convert` call for `cons_hess_prototype` since it's a vector of matrices and make manually passed derivatives take 3 arguments
diff --git a/docs/src/optimization_packages/optim.md b/docs/src/optimization_packages/optim.md
@@ -46,7 +46,7 @@ The following special keyword arguments which are not covered by the common `sol
 * `show_every`: Trace output is printed every `show_every`th iteration.
 
 
-For a more extensive documentation of all the algorithms and options please consult the 
+For a more extensive documentation of all the algorithms and options please consult the
 [`Documentation`](https://julianlsolvers.github.io/Optim.jl/stable/#)
 
 ## Local Optimizer
@@ -73,7 +73,7 @@ The Rosenbrock function can optimized using the `Optim.IPNewton()` as follows:
 
 ```julia
 rosenbrock(x, p) =  (p[1] - x[1])^2 + p[2] * (x[2] - x[1]^2)^2
-cons= (x,p) -> [x[1]^2 + x[2]^2]
+cons= (res,x,p) -> res .= [x[1]^2 + x[2]^2]
 x0 = zeros(2)
 p  = [1.0,100.0]
 prob = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff();cons= cons)
@@ -345,7 +345,7 @@ The Rosenbrock function can optimized using the `Optim.KrylovTrustRegion()` as f
 
 ```julia
 rosenbrock(x, p) =  (1 - x[1])^2 + 100 * (x[2] - x[1]^2)^2
-cons= (x,p) -> [x[1]^2 + x[2]^2]
+cons= (res,x,p) -> res .= [x[1]^2 + x[2]^2]
 x0 = zeros(2)
 p  = [1.0,100.0]
 optprob = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff();cons= cons)
diff --git a/docs/src/tutorials/rosenbrock.md b/docs/src/tutorials/rosenbrock.md
@@ -1,14 +1,14 @@
 # Solving the Rosenbrock Problem in >10 Ways
- 
+
 This tutorial is a demonstration of many different solvers to demonstrate the
 flexibility of Optimization.jl. This is a gauntlet of many solvers to get a feel
 for common workflows of the package and give copy-pastable starting points.
 
 !!! note
 
     This example uses many different solvers of Optimization.jl. Each solver
-    subpackage needs to be installed separate. For example, for the details on 
-    the installation and usage of OptimizationOptimJL.jl package, see the 
+    subpackage needs to be installed separate. For example, for the details on
+    the installation and usage of OptimizationOptimJL.jl package, see the
     [Optim.jl page](@ref optim).
 
 ```@example rosenbrock
@@ -39,8 +39,8 @@ sol = solve(prob, NelderMead())
 
 # Now a gradient-based optimizer with forward-mode automatic differentiation
 
-optf = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff()) 
-prob = OptimizationProblem(optf, x0, _p) 
+optf = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff())
+prob = OptimizationProblem(optf, x0, _p)
 sol = solve(prob, BFGS())
 
 # Now a second order optimizer using Hessians generated by forward-mode automatic differentiation
@@ -53,7 +53,7 @@ sol = solve(prob, Optim.KrylovTrustRegion())
 
 # Now derivative-based optimizers with various constraints
 
-    cons = (x,p) -> [x[1]^2 + x[2]^2]
+    cons = (res,x,p) -> res .= [x[1]^2 + x[2]^2]
     optf = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff();cons= cons)
     #prob = OptimizationProblem(optf, x0, _p)
     #sol = solve(prob, IPNewton()) # No lcons or rcons, so constraints not satisfied
@@ -64,24 +64,24 @@ sol = solve(prob, IPNewton()) # Note that -Inf < x[1]^2 + x[2]^2 < Inf is always
 prob = OptimizationProblem(optf, x0, _p, lcons = [-5.0], ucons = [10.0])
 sol = solve(prob, IPNewton()) # Again, -5.0 < x[1]^2 + x[2]^2 < 10.0
 
-prob = OptimizationProblem(optf, x0, _p, lcons = [-Inf], ucons = [Inf], 
+prob = OptimizationProblem(optf, x0, _p, lcons = [-Inf], ucons = [Inf],
                            lb = [-500.0,-500.0], ub=[50.0,50.0])
 sol = solve(prob, IPNewton())
 
-prob = OptimizationProblem(optf, x0, _p, lcons = [0.5], ucons = [0.5], 
-                           lb = [-500.0,-500.0], ub=[50.0,50.0]) 
+prob = OptimizationProblem(optf, x0, _p, lcons = [0.5], ucons = [0.5],
+                           lb = [-500.0,-500.0], ub=[50.0,50.0])
 sol = solve(prob, IPNewton()) # Notice now that x[1]^2 + x[2]^2 ≈ 0.5:
                               # cons(sol.minimizer, _p) = 0.49999999999999994
 
-function con2_c(x,p)
-    [x[1]^2 + x[2]^2, x[2]*sin(x[1])-x[1]]
+function con2_c(res,x,p)
+    res .= [x[1]^2 + x[2]^2, x[2]*sin(x[1])-x[1]]
 end
 
 optf = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff();cons= con2_c)
 prob = OptimizationProblem(optf, x0, _p, lcons = [-Inf,-Inf], ucons = [Inf,Inf])
 sol = solve(prob, IPNewton())
 
-cons_circ = (x,p) -> [x[1]^2 + x[2]^2]
+cons_circ = (x,p) -> res .= [x[1]^2 + x[2]^2]
 optf = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff();cons= cons_circ)
 prob = OptimizationProblem(optf, x0, _p, lcons = [-Inf], ucons = [0.25^2])
 sol = solve(prob, IPNewton()) # -Inf < cons_circ(sol.minimizer, _p) = 0.25^2
@@ -116,7 +116,7 @@ sol = solve(prob, Opt(:LD_LBFGS, 2))
 ## Evolutionary.jl Solvers
 
 using OptimizationEvolutionary
-sol = solve(prob, CMAES(μ =40 , λ = 100),abstol=1e-15) # -1.0 ≤ x[1], x[2] ≤ 0.8 
+sol = solve(prob, CMAES(μ =40 , λ = 100),abstol=1e-15) # -1.0 ≤ x[1], x[2] ≤ 0.8
 
 ## BlackBoxOptim.jl Solvers
 
diff --git a/lib/OptimizationOptimJL/test/runtests.jl b/lib/OptimizationOptimJL/test/runtests.jl
@@ -93,7 +93,7 @@ using Test
     sol = solve(prob, BFGS())
     @test 10 * sol.minimum < l1
 
-    function g!(G, x)
+    function g!(G, x, p = nothing)
         G[1] = -2.0 * (1.0 - x[1]) - 400.0 * (x[2] - x[1]^2) * x[1]
         G[2] = 200.0 * (x[2] - x[1]^2)
     end
diff --git a/src/function/finitediff.jl b/src/function/finitediff.jl
@@ -59,7 +59,7 @@ function instantiate_function(f, x, adtype::AutoFiniteDiff, p, num_cons = 0)
                                                                                    args...),
                                                                            θ, gradcache)
     else
-        grad = f.grad
+        grad = (G, θ, args...) -> f.grad(G, θ, p, args...)
     end
 
     if f.hess === nothing
@@ -71,7 +71,7 @@ function instantiate_function(f, x, adtype::AutoFiniteDiff, p, num_cons = 0)
                                                                           updatecache(hesscache,
                                                                                       θ))
     else
-        hess = f.hess
+        hess = (H, θ, args...) -> f.hess(H, θ, p, args...)
     end
 
     if f.hv === nothing
@@ -102,7 +102,7 @@ function instantiate_function(f, x, adtype::AutoFiniteDiff, p, num_cons = 0)
             FiniteDiff.finite_difference_jacobian!(J, cons, θ, jaccache)
         end
     else
-        cons_j = f.cons_j
+        cons_j = (J, θ) -> f.cons_j(J, θ, p)
     end
 
     if cons !== nothing && f.cons_h === nothing
@@ -120,13 +120,13 @@ function instantiate_function(f, x, adtype::AutoFiniteDiff, p, num_cons = 0)
             end
         end
     else
-        cons_h = f.cons_h
+        cons_h = (res, θ) -> f.cons_h(res, θ, p)
     end
 
     return OptimizationFunction{true}(f, adtype; grad = grad, hess = hess, hv = hv,
                                       cons = cons, cons_j = cons_j, cons_h = cons_h,
                                       cons_jac_colorvec = cons_jac_colorvec,
-                                      hess_prototype = nothing,
+                                      hess_prototype = f.hess_prototype,
                                       cons_jac_prototype = f.cons_jac_prototype,
-                                      cons_hess_prototype = nothing)
+                                      cons_hess_prototype = f.cons_hess_prototype)
 end
diff --git a/src/function/forwarddiff.jl b/src/function/forwarddiff.jl
@@ -51,15 +51,15 @@ function instantiate_function(f::OptimizationFunction{true}, x,
         grad = (res, θ, args...) -> ForwardDiff.gradient!(res, x -> _f(x, args...), θ,
                                                           gradcfg, Val{false}())
     else
-        grad = f.grad
+        grad = (G, θ, args...) -> f.grad(G, θ, p, args...)
     end
 
     if f.hess === nothing
         hesscfg = ForwardDiff.HessianConfig(_f, x, ForwardDiff.Chunk{chunksize}())
         hess = (res, θ, args...) -> ForwardDiff.hessian!(res, x -> _f(x, args...), θ,
                                                          hesscfg, Val{false}())
     else
-        hess = f.hess
+        hess = (H, θ, args...) -> f.hess(H, θ, p, args...)
     end
 
     if f.hv === nothing
@@ -85,7 +85,7 @@ function instantiate_function(f::OptimizationFunction{true}, x,
             ForwardDiff.jacobian!(J, cons_oop, θ, cjconfig)
         end
     else
-        cons_j = f.cons_j
+        cons_j = (J, θ) -> f.cons_j(J, θ, p)
     end
 
     if cons !== nothing && f.cons_h === nothing
@@ -99,12 +99,12 @@ function instantiate_function(f::OptimizationFunction{true}, x,
             end
         end
     else
-        cons_h = f.cons_h
+        cons_h = (res, θ) -> f.cons_h(res, θ, p)
     end
 
     return OptimizationFunction{true}(f.f, adtype; grad = grad, hess = hess, hv = hv,
                                       cons = cons, cons_j = cons_j, cons_h = cons_h,
-                                      hess_prototype = nothing,
+                                      hess_prototype = f.hess_prototype,
                                       cons_jac_prototype = f.cons_jac_prototype,
                                       cons_hess_prototype = f.cons_hess_prototype)
 end
diff --git a/src/function/function.jl b/src/function/function.jl
@@ -55,7 +55,8 @@ function instantiate_function(f, x, ::AbstractADType, p, num_cons = 0)
     cons_jac_prototype = f.cons_jac_prototype === nothing ? nothing :
                          convert.(eltype(x), f.cons_jac_prototype)
     cons_hess_prototype = f.cons_hess_prototype === nothing ? nothing :
-                          convert.(eltype(x), f.cons_hess_prototype)
+                          [convert.(eltype(x), f.cons_hess_prototype[i])
+                           for i in 1:num_cons]
     expr = symbolify(f.expr)
     cons_expr = symbolify.(f.cons_expr)
 
diff --git a/src/function/mtk.jl b/src/function/mtk.jl
@@ -15,15 +15,15 @@ function instantiate_function(f, x, adtype::AutoModelingToolkit, p, num_cons = 0
         grad_oop, grad_iip = ModelingToolkit.generate_gradient(sys, expression = Val{false})
         grad(J, u) = (grad_iip(J, u, p); J)
     else
-        grad = f.grad
+        grad = (G, θ, args...) -> f.grad(G, θ, p, args...)
     end
 
     if f.hess === nothing
         hess_oop, hess_iip = ModelingToolkit.generate_hessian(sys, expression = Val{false},
                                                               sparse = adtype.obj_sparse)
         hess(H, u) = (hess_iip(H, u, p); H)
     else
-        hess = f.hess
+        hess = (H, θ, args...) -> f.hess(H, θ, p, args...)
     end
 
     if f.hv === nothing
@@ -69,7 +69,7 @@ function instantiate_function(f, x, adtype::AutoModelingToolkit, p, num_cons = 0
             jac_iip(J, θ, p)
         end
     else
-        cons_j = f.cons_j
+        cons_j = (J, θ) -> f.cons_j(J, θ, p)
     end
 
     if f.cons !== nothing && f.cons_h === nothing
@@ -82,7 +82,7 @@ function instantiate_function(f, x, adtype::AutoModelingToolkit, p, num_cons = 0
             cons_hess_iip(res, θ, p)
         end
     else
-        cons_h = f.cons_h
+        cons_h = (res, θ) -> f.cons_h(res, θ, p)
     end
 
     if adtype.obj_sparse
diff --git a/src/function/reversediff.jl b/src/function/reversediff.jl
@@ -53,7 +53,7 @@ function instantiate_function(f, x, adtype::AutoReverseDiff, p = SciMLBase.NullP
         grad = (res, θ, args...) -> ReverseDiff.gradient!(res, x -> _f(x, args...), θ,
                                                           ReverseDiff.GradientConfig(θ))
     else
-        grad = f.grad
+        grad = (G, θ, args...) -> f.grad(G, θ, p, args...)
     end
 
     if f.hess === nothing
@@ -70,7 +70,7 @@ function instantiate_function(f, x, adtype::AutoReverseDiff, p = SciMLBase.NullP
             end
         end
     else
-        hess = f.hess
+        hess = (H, θ, args...) -> f.hess(H, θ, p, args...)
     end
 
     if f.hv === nothing
@@ -86,7 +86,7 @@ function instantiate_function(f, x, adtype::AutoReverseDiff, p = SciMLBase.NullP
 
     return OptimizationFunction{false}(f, adtype; grad = grad, hess = hess, hv = hv,
                                        cons = nothing, cons_j = nothing, cons_h = nothing,
-                                       hess_prototype = nothing,
+                                       hess_prototype = f.hess_prototype,
                                        cons_jac_prototype = nothing,
                                        cons_hess_prototype = nothing)
 end
diff --git a/src/function/tracker.jl b/src/function/tracker.jl
@@ -36,13 +36,13 @@ function instantiate_function(f, x, adtype::AutoTracker, p, num_cons = 0)
                                     res .= Tracker.data(Tracker.gradient(x -> _f(x, args...),
                                                                          θ)[1])
     else
-        grad = f.grad
+        grad = (G, θ, args...) -> f.grad(G, θ, p, args...)
     end
 
     if f.hess === nothing
         hess = (res, θ, args...) -> error("Hessian based methods not supported with Tracker backend, pass in the `hess` kwarg")
     else
-        hess = f.hess
+        hess = (H, θ, args...) -> f.hess(H, θ, p, args...)
     end
 
     if f.hv === nothing
@@ -53,7 +53,7 @@ function instantiate_function(f, x, adtype::AutoTracker, p, num_cons = 0)
 
     return OptimizationFunction{false}(f, adtype; grad = grad, hess = hess, hv = hv,
                                        cons = nothing, cons_j = nothing, cons_h = nothing,
-                                       hess_prototype = nothing,
+                                       hess_prototype = f.hess_prototype,
                                        cons_jac_prototype = nothing,
                                        cons_hess_prototype = nothing)
 end
diff --git a/src/function/zygote.jl b/src/function/zygote.jl
@@ -36,7 +36,7 @@ function instantiate_function(f, x, adtype::AutoZygote, p, num_cons = 0)
                                                                           θ)[1]) :
                                     res .= Zygote.gradient(x -> _f(x, args...), θ)[1]
     else
-        grad = f.grad
+        grad = (G, θ, args...) -> f.grad(G, θ, p, args...)
     end
 
     if f.hess === nothing
@@ -52,7 +52,7 @@ function instantiate_function(f, x, adtype::AutoZygote, p, num_cons = 0)
             end
         end
     else
-        hess = f.hess
+        hess = (H, θ, args...) -> f.hess(H, θ, p, args...)
     end
 
     if f.hv === nothing
@@ -68,7 +68,7 @@ function instantiate_function(f, x, adtype::AutoZygote, p, num_cons = 0)
 
     return OptimizationFunction{false}(f, adtype; grad = grad, hess = hess, hv = hv,
                                        cons = nothing, cons_j = nothing, cons_h = nothing,
-                                       hess_prototype = nothing,
+                                       hess_prototype = f.hess_prototype,
                                        cons_jac_prototype = nothing,
                                        cons_hess_prototype = nothing)
 end
diff --git a/test/ADtests.jl b/test/ADtests.jl