Skip to content

Commit 9ebbbfa

Browse files
committed
couple of improvements
1 parent 2c96936 commit 9ebbbfa

File tree

2 files changed

+3
-2
lines changed
  • src
    • ReinforcementLearningCore/src/utils
    • ReinforcementLearningZoo/src/algorithms/policy_gradient

2 files changed

+3
-2
lines changed

src/ReinforcementLearningCore/src/utils/networks.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,8 +255,9 @@ Transform a vector containing the non-zero elements of a lower triangular da x d
255255
function vec_to_tril(cholesky_vec, da)
256256
batch_size = size(cholesky_vec, 3)
257257
c2idx(i, j) = ((2da - j) * (j - 1)) ÷ 2 + i #return the position in cholesky_vec of the element of the triangular matrix at coordinates (i,j)
258+
softplusbeta(x) = log(exp(0.1f0 * x) +1)*10f0 #a softer softplus to avoid vanishing values
258259
function f(j) #return a slice (da x 1 x batchsize) containing the jth columns of the lower triangular cholesky decomposition of the covariance
259-
tc_diag = softplus.(cholesky_vec[c2idx(j, j):c2idx(j, j), :, :])
260+
tc_diag = softplusbeta.(cholesky_vec[c2idx(j, j):c2idx(j, j), :, :]) .+ 1f-5
260261
tc_other = cholesky_vec[c2idx(j, j)+1:c2idx(j + 1, j + 1)-1, :, :]
261262
zs = ignore_derivatives() do
262263
zs = similar(cholesky_vec, da - size(tc_other, 1) - 1, 1, batch_size)

src/ReinforcementLearningZoo/src/algorithms/policy_gradient/mpo.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ end
224224

225225
function solve_mpodual(Q::AbstractArray, ϵ)
226226
g(η) = η * ϵ + η * mean(logsumexp( Q ./η .- Float32(log(size(Q, 2))), dims = 2))
227-
Optim.minimizer(optimize(g, eps(ϵ), 10f0))
227+
Optim.minimizer(optimize(g, eps(ϵ), maximum(abs.(Q))))
228228
end
229229

230230
#For CovGaussianNetwork

0 commit comments

Comments
 (0)