rename to action_log_prob

HenriDeh · HenriDeh · commit e036f63a8c69 · 2023-09-12T11:28:09.000+02:00
diff --git a/src/ReinforcementLearningZoo/src/algorithms/bootstrapping/retrace.jl b/src/ReinforcementLearningZoo/src/algorithms/bootstrapping/retrace.jl
@@ -7,7 +7,7 @@ export retrace
 function retrace_operator(qnetwork, policy, batch, γ, λ)
     s = batch[:state] |> send_to_device(qnetwork)
     a = batch[:action] |> send_to_device(qnetwork)
-    behavior_log_probs = batch[:log_prob] |> send_to_device(qnetwork)
+    behavior_log_probs = batch[:action_log_problog_prob] |> send_to_device(qnetwork)
     r = batch[:reward] |> send_to_device(qnetwork)
     t = last.(batch[:terminal]) |> send_to_device(qnetwork)
     ns = batch[:next_state] |> send_to_device(qnetwork)
diff --git a/src/ReinforcementLearningZoo/test/operators.jl b/src/ReinforcementLearningZoo/test/operators.jl
@@ -2,7 +2,7 @@ import ReinforcementLearningCore
 @testset "retrace" begin
     batch = (state= [[1 2 3], [10 11 12]], 
         action = [[1 2 3],[10 11 12]], 
-        log_prob = [log.([0.2,0.2,0.2]), log.([0.1,0.1,0.1])],
+        action_log_problog_prob = [log.([0.2,0.2,0.2]), log.([0.1,0.1,0.1])],
         reward = [[1f0,2f0,3f0],[10f0,11f0,12f0]], 
         terminal= [[0,0,1], [0,0,0]], 
         next_state = [[2 3 4],[11 12 13]])