Merge pull request #40 from JuliaReinforcementLearning/episodecontroller

HenriDeh · web-flow · commit 323793d0dd9c · 2023-06-19T17:18:00.000+02:00
add Episode Controller
diff --git a/src/controllers.jl b/src/controllers.jl
@@ -1,4 +1,4 @@
-export InsertSampleRatioController, AsyncInsertSampleRatioController
+export InsertSampleRatioController, AsyncInsertSampleRatioController, EpisodeSampleRatioController
 
 """
     InsertSampleRatioController(;ratio=1., threshold=1)
@@ -15,18 +15,16 @@ Base.@kwdef mutable struct InsertSampleRatioController
     n_sampled::Int = 0
 end
 
-function on_insert!(c::InsertSampleRatioController, n::Int)
+function on_insert!(c::InsertSampleRatioController, n::Int, ::Any)
     if n > 0
         c.n_inserted += n
     end
 end
 
 function on_sample!(c::InsertSampleRatioController)
-    if c.n_inserted >= c.threshold
-        if c.n_sampled <= (c.n_inserted - c.threshold) * c.ratio
-            c.n_sampled += 1
-            return true
-        end
+    if c.n_inserted >= c.threshold && c.n_sampled <= (c.n_inserted - c.threshold) * c.ratio
+        c.n_sampled += 1
+        return true
     end
     return false
 end
@@ -59,3 +57,32 @@ function AsyncInsertSampleRatioController(
         Channel(ch_out_sz)
     )
 end
+
+"""
+    EpisodeSampleRatioController(;ratio=1., threshold=1)
+
+Used in [`Trajectory`](@ref). The `threshold` means the minimal number of
+episodes completed before sampling is allowed. The `ratio` balances the number of episodes and
+the number of samplings. For example a ratio of 1/10 will sample once every 10 
+episodes in the trajectory. Currently only works for environemnts with terminal states. 
+"""
+Base.@kwdef mutable struct EpisodeSampleRatioController
+    ratio::Float64 = 1.0
+    threshold::Int = 1
+    n_episodes::Int = 0
+    n_sampled::Int = 0
+end
+
+function on_insert!(c::EpisodeSampleRatioController, n::Int, x::NamedTuple)
+    if n > 0
+        c.n_episodes += sum(x.terminal) 
+    end
+end
+
+function on_sample!(c::EpisodeSampleRatioController)
+    if c.n_episodes >= c.threshold && c.n_sampled <= (c.n_episodes - c.threshold) * c.ratio 
+        c.n_sampled += 1
+        return true
+    end
+    return false
+end
diff --git a/src/trajectory.jl b/src/trajectory.jl
@@ -87,18 +87,18 @@ Base.setindex!(t::Trajectory{<:Any,<:Any,<:AsyncInsertSampleRatioController}, v,
 
 function Base.append!(t::Trajectory, x)
     append!(t.container, x)
-    on_insert!(t.controller, length(x))
+    on_insert!(t.controller, length(x), x)
 end
 
 # !!! by default we assume `x`  is a complete example which contains all the traces
 # When doing partial inserting, the result of undefined
 function Base.push!(t::Trajectory, x)
     push!(t.container, x)
-    on_insert!(t)
+    on_insert!(t, x)
 end
 
-on_insert!(t::Trajectory) = on_insert!(t, 1)
-on_insert!(t::Trajectory, n::Int) = on_insert!(t.controller, n)
+on_insert!(t::Trajectory, x) = on_insert!(t, 1, x)
+on_insert!(t::Trajectory, n::Int, x) = on_insert!(t.controller, n, x)
 
 #####
 # out
diff --git a/test/controllers.jl b/test/controllers.jl
@@ -0,0 +1,40 @@
+import ReinforcementLearningTrajectories: on_insert!, on_sample!
+@testset "controllers.jl" begin
+    @testset "EpisodeSampleRatioController" begin
+        #push
+        c = EpisodeSampleRatioController(ratio = 1/2, threshold = 5)
+        for st in 1:50
+            transition = (state = 1, action = 2, reward = 5., terminal = (st % 5 == 0))
+            on_insert!(c, 1, transition)
+            if st in 25:10:45 
+                @test on_sample!(c)
+                @test !on_sample!(c)
+            else
+                @test !on_sample!(c)
+            end
+        end
+        #append
+        c = EpisodeSampleRatioController(ratio = 1/2, threshold = 5)
+        for e in 1:20
+            transitions = (state = ones(5), action = ones(5), reward = ones(5), terminal = [false, false, false, false, iseven(e)])
+            on_insert!(c, length(first(transitions)), transitions)
+            if e in 10:4:20
+                @test on_sample!(c)
+                @test !on_sample!(c)
+            else
+                @test !on_sample!(c)
+            end
+        end
+        c = EpisodeSampleRatioController(ratio = 1/4, threshold = 5)
+        for e in 1:10
+            transitions = (state = ones(10), action = ones(10), reward = ones(10), terminal = [false, false, false, false, true, false, false, false, false, true])
+            on_insert!(c, length(first(transitions)), transitions)
+            if e in 3:2:10
+                @test on_sample!(c)
+                @test !on_sample!(c)
+            else
+                @test !on_sample!(c)
+            end
+        end
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -14,7 +14,7 @@ Adapt.adapt_storage(to::TestAdaptor, x) = CUDA.functional() ? CUDA.cu(x) : x
     include("traces.jl")
     include("common.jl")
     include("samplers.jl")
+    include("controllers.jl")
     include("trajectories.jl")
     include("normalization.jl")
-    include("samplers.jl")
 end