Skip to content

Commit 3182026

Browse files
authored
couple of improvements (#919)
1 parent b54a0b0 commit 3182026

File tree

3 files changed

+57
-14
lines changed

3 files changed

+57
-14
lines changed

src/ReinforcementLearningCore/src/utils/networks.jl

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -249,23 +249,42 @@ function (model::CovGaussianNetwork)(state::AbstractMatrix, action::AbstractMatr
249249
return dropdims(output, dims=2)
250250
end
251251

252+
"""
253+
cholesky_matrix_to_vector_index(i, j)
254+
255+
Return the position in a cholesky_vec (of length da) of the element of the lower triangular matrix at coordinates (i,j).
256+
257+
For example if `cholesky_vec = [1,2,3,4,5,6]`, the corresponding lower triangular matrix is
258+
```
259+
L = [1 0 0
260+
2 4 0
261+
3 5 6]
262+
```
263+
and `cholesky_matrix_to_vector_index(3, 2) == 5`
264+
265+
"""
266+
cholesky_matrix_to_vector_index(i, j, da) = ((2da - j) * (j - 1)) ÷ 2 + i
267+
softplusbeta(x, beta = 10f0) = log(exp(x/beta) +1)*beta #a softer softplus to avoid vanishing values
268+
269+
function cholesky_columns(cholesky_vec, j, batch_size, da) #return a slice (da x 1 x batchsize) containing the jth columns of the lower triangular cholesky decomposition of the covariance
270+
diag_idx = cholesky_matrix_to_vector_index(j, j, da)
271+
tc_diag = softplusbeta.(cholesky_vec[diag_idx:diag_idx, :, :]) .+ 1f-5
272+
other_idxs = cholesky_matrix_to_vector_index(j, j, da)+1:cholesky_matrix_to_vector_index(j + 1, j + 1, da)-1 #indices of elements between two diagonal elements
273+
tc_other = cholesky_vec[other_idxs, :, :]
274+
zs = ignore_derivatives() do
275+
zs = similar(cholesky_vec, da - size(tc_other, 1) - 1, 1, batch_size)
276+
zs .= zero(eltype(cholesky_vec))
277+
return zs
278+
end
279+
[zs; tc_diag; tc_other]
280+
end
281+
252282
"""
253283
Transform a vector containing the non-zero elements of a lower triangular da x da matrix into that matrix.
254284
"""
255285
function vec_to_tril(cholesky_vec, da)
256-
batch_size = size(cholesky_vec, 3)
257-
c2idx(i, j) = ((2da - j) * (j - 1)) ÷ 2 + i #return the position in cholesky_vec of the element of the triangular matrix at coordinates (i,j)
258-
function f(j) #return a slice (da x 1 x batchsize) containing the jth columns of the lower triangular cholesky decomposition of the covariance
259-
tc_diag = softplus.(cholesky_vec[c2idx(j, j):c2idx(j, j), :, :])
260-
tc_other = cholesky_vec[c2idx(j, j)+1:c2idx(j + 1, j + 1)-1, :, :]
261-
zs = ignore_derivatives() do
262-
zs = similar(cholesky_vec, da - size(tc_other, 1) - 1, 1, batch_size)
263-
zs .= zero(eltype(cholesky_vec))
264-
return zs
265-
end
266-
[zs; tc_diag; tc_other]
267-
end
268-
return mapreduce(f, hcat, 1:da)
286+
batch_size = size(cholesky_vec, 3)
287+
return mapreduce(j->cholesky_columns(cholesky_vec, j, batch_size, da), hcat, 1:da)
269288
end
270289

271290
#####

src/ReinforcementLearningCore/test/utils/networks.jl

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,30 @@ using Flux: params, gradient, unsqueeze
171171
end
172172
end
173173
@testset "CovGaussianNetwork" begin
174+
@testset "utility functions" begin
175+
cholesky_vec = [1:6;]
176+
cholesky_mat = [RLCore.softplusbeta(1) 0 0; 2 RLCore.softplusbeta(4) 0; 3 5 RLCore.softplusbeta(6)]
177+
@test RLCore.vec_to_tril(cholesky_vec, 3) cholesky_mat
178+
for i in 1:3, j in 1:i
179+
inds_mat = [1 0 0; 2 4 0; 3 5 6]
180+
@test RLCore.cholesky_matrix_to_vector_index(i, j, 3) == inds_mat[i,j]
181+
end
182+
for x in -10:10
183+
@test RLCore.softplusbeta(x,1) softplus(x) log(exp(x) +1)
184+
end
185+
for x in -10:10
186+
@test RLCore.softplusbeta(x,2) log(exp(x/2) +1)*2 >= softplus(x)
187+
end
188+
for x in -10:10
189+
@test RLCore.softplusbeta(x,0.5) log(exp(x/0.5) +1)*0.5 <= softplus(x)
190+
end
191+
cholesky_mats = stack([cholesky_mat for _ in 1:5], dims = 3)
192+
cholesky_vecs = stack([reshape(cholesky_vec, :, 1) for _ in 1:5], dims = 3)
193+
@test RLCore.vec_to_tril(cholesky_vecs, 3) cholesky_mats
194+
for i in 1:3
195+
@test RLCore.cholesky_columns(cholesky_vecs, i, 5, 3) reshape(cholesky_mats[:, i, :], 3, 1, :)
196+
end
197+
end
174198
@testset "identity normalizer" begin
175199
pre = Dense(20,15)
176200
μ = Dense(15,10)

src/ReinforcementLearningZoo/src/algorithms/policy_gradient/mpo.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ end
224224

225225
function solve_mpodual(Q::AbstractArray, ϵ)
226226
g(η) = η * ϵ + η * mean(logsumexp( Q ./η .- Float32(log(size(Q, 2))), dims = 2))
227-
Optim.minimizer(optimize(g, eps(ϵ), 10f0))
227+
Optim.minimizer(optimize(g, eps(ϵ), maximum(abs.(Q))))
228228
end
229229

230230
#For CovGaussianNetwork

0 commit comments

Comments
 (0)