JuliaReinforcementLearning · HenriDeh · Jul 12, 2023 · Jul 4, 2023 · Jul 4, 2023 · Jul 11, 2023
diff --git a/src/ReinforcementLearningCore/src/utils/networks.jl b/src/ReinforcementLearningCore/src/utils/networks.jl
@@ -249,23 +249,42 @@ function (model::CovGaussianNetwork)(state::AbstractMatrix, action::AbstractMatr
     return dropdims(output, dims=2)
 end
 
+"""
+    cholesky_matrix_to_vector_index(i, j)
+
+Return the position in a cholesky_vec (of length da) of the element of the lower triangular matrix at coordinates (i,j).
+
+For example if `cholesky_vec = [1,2,3,4,5,6]`, the corresponding lower triangular matrix is
+```
+L = [1 0 0
+     2 4 0
+     3 5 6]
+```
+and `cholesky_matrix_to_vector_index(3, 2) == 5`
+
+"""
+cholesky_matrix_to_vector_index(i, j, da) = ((2da - j) * (j - 1)) ÷ 2 + i
+softplusbeta(x, beta = 10f0) = log(exp(x/beta) +1)*beta #a softer softplus to avoid vanishing values
+
+function cholesky_columns(cholesky_vec, j, batch_size, da) #return a slice (da x 1 x batchsize) containing the jth columns of the lower triangular cholesky decomposition of the covariance
+    diag_idx = cholesky_matrix_to_vector_index(j, j, da)
+    tc_diag = softplusbeta.(cholesky_vec[diag_idx:diag_idx, :, :]) .+ 1f-5
+    other_idxs = cholesky_matrix_to_vector_index(j, j, da)+1:cholesky_matrix_to_vector_index(j + 1, j + 1, da)-1 #indices of elements between two diagonal elements
+    tc_other = cholesky_vec[other_idxs, :, :]
+    zs = ignore_derivatives() do
+        zs = similar(cholesky_vec, da - size(tc_other, 1) - 1, 1, batch_size)
+        zs .= zero(eltype(cholesky_vec))
+        return zs
+    end
+    [zs; tc_diag; tc_other]
+end
+
 """
 Transform a vector containing the non-zero elements of a lower triangular da x da matrix into that matrix.
 """
 function vec_to_tril(cholesky_vec, da)
-    batch_size = size(cholesky_vec, 3)
-    c2idx(i, j) = ((2da - j) * (j - 1)) ÷ 2 + i #return the position in cholesky_vec of the element of the triangular matrix at coordinates (i,j)
-    function f(j) #return a slice (da x 1 x batchsize) containing the jth columns of the lower triangular cholesky decomposition of the covariance
-        tc_diag = softplus.(cholesky_vec[c2idx(j, j):c2idx(j, j), :, :])
-        tc_other = cholesky_vec[c2idx(j, j)+1:c2idx(j + 1, j + 1)-1, :, :]
-        zs = ignore_derivatives() do
-            zs = similar(cholesky_vec, da - size(tc_other, 1) - 1, 1, batch_size)
-            zs .= zero(eltype(cholesky_vec))
-            return zs
-        end
-        [zs; tc_diag; tc_other]
-    end
-    return mapreduce(f, hcat, 1:da)
+    batch_size = size(cholesky_vec, 3)    
+    return mapreduce(j->cholesky_columns(cholesky_vec, j, batch_size, da), hcat, 1:da)
 end
 
 #####

diff --git a/src/ReinforcementLearningCore/test/utils/networks.jl b/src/ReinforcementLearningCore/test/utils/networks.jl
@@ -171,6 +171,30 @@ using Flux: params, gradient, unsqueeze
         end
     end
     @testset "CovGaussianNetwork" begin
+        @testset "utility functions" begin
+            cholesky_vec = [1:6;]
+            cholesky_mat = [RLCore.softplusbeta(1) 0 0; 2 RLCore.softplusbeta(4) 0; 3 5 RLCore.softplusbeta(6)]
+            @test RLCore.vec_to_tril(cholesky_vec, 3) ≈ cholesky_mat
+            for i in 1:3, j in 1:i
+                inds_mat = [1 0 0; 2 4 0; 3 5 6]
+                @test RLCore.cholesky_matrix_to_vector_index(i, j, 3) == inds_mat[i,j]
+            end
+            for x in -10:10
+                @test RLCore.softplusbeta(x,1) ≈ softplus(x) ≈ log(exp(x) +1)
+            end
+            for x in -10:10
+                @test RLCore.softplusbeta(x,2) ≈ log(exp(x/2) +1)*2 >= softplus(x)
+            end
+            for x in -10:10
+                @test RLCore.softplusbeta(x,0.5) ≈ log(exp(x/0.5) +1)*0.5 <= softplus(x)
+            end
+            cholesky_mats = stack([cholesky_mat for _ in 1:5], dims = 3)
+            cholesky_vecs = stack([reshape(cholesky_vec, :, 1) for _ in 1:5], dims = 3)
+            @test RLCore.vec_to_tril(cholesky_vecs, 3) ≈ cholesky_mats
+            for i in 1:3
+                @test RLCore.cholesky_columns(cholesky_vecs, i, 5, 3) ≈ reshape(cholesky_mats[:, i, :], 3, 1, :)
+            end
+        end
         @testset "identity normalizer" begin
             pre = Dense(20,15)
             μ = Dense(15,10)

diff --git a/src/ReinforcementLearningZoo/src/algorithms/policy_gradient/mpo.jl b/src/ReinforcementLearningZoo/src/algorithms/policy_gradient/mpo.jl
@@ -224,7 +224,7 @@ end
 
 function solve_mpodual(Q::AbstractArray, ϵ)    
     g(η) = η * ϵ + η * mean(logsumexp( Q ./η .- Float32(log(size(Q, 2))), dims = 2))
-    Optim.minimizer(optimize(g, eps(ϵ), 10f0))
+    Optim.minimizer(optimize(g, eps(ϵ), maximum(abs.(Q))))
 end
 
 #For CovGaussianNetwork