Merge pull request #51 from JuliaReinforcementLearning/episodesampler

jeremiahpslewis · web-flow · commit 0e76ebb39f64 · 2023-08-09T16:02:44.000+02:00
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "ReinforcementLearningTrajectories"
 uuid = "6486599b-a3cd-4e92-a99a-2cea90cc8c3c"
-version = "0.3.2"
+version = "0.3.3"
 
 [deps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
diff --git a/README.md b/README.md
@@ -69,6 +69,7 @@ julia> for batch in t
 - `BatchSampler`
 - `MetaSampler`
 - `MultiBatchSampler`
+- `EpisodesSampler`
 
 **Controllers**
 
diff --git a/src/samplers.jl b/src/samplers.jl
@@ -1,4 +1,5 @@
 using Random
+export EpisodesSampler, Episode, BatchSampler, NStepBatchSampler, MetaSampler, MultiBatchSampler, DummySampler
 
 struct SampleGenerator{S,T}
     sampler::S
@@ -233,3 +234,42 @@ function StatsBase.sample(s::NStepBatchSampler{names}, e::EpisodesBuffer{<:Any,
         StatsBase.sample(s, t.traces, Val(names), inds)
     )
 end
+
+"""
+    EpisodesSampler()
+
+A sampler that samples all Episodes present in the Trajectory and divides them into 
+Episode containers. Truncated Episodes (e.g. due to the buffer capacity) are sampled as well.
+There will be at most one truncated episode and it will always be the first one. 
+"""
+struct EpisodesSampler{names}
+end
+
+EpisodesSampler() = EpisodesSampler{nothing}()
+#EpisodesSampler{names}() = new{names}()
+
+
+struct Episode{names, N <: NamedTuple{names}}
+    nt::N
+end
+
+@forward Episode.nt Base.keys, Base.haskey, Base.getindex
+
+StatsBase.sample(s::EpisodesSampler{nothing}, t::EpisodesBuffer) = StatsBase.sample(s,t,keys(t))
+StatsBase.sample(s::EpisodesSampler{names}, t::EpisodesBuffer) where names = StatsBase.sample(s,t,names)
+
+function StatsBase.sample(::EpisodesSampler, t::EpisodesBuffer, names)
+    ranges = UnitRange{Int}[]
+    idx = 1
+    while idx < length(t)
+        if t.sampleable_inds[idx] == 1
+            last_state_idx = idx + t.episodes_lengths[idx] - t.step_numbers[idx] + 1
+            push!(ranges,idx:last_state_idx)
+            idx = last_state_idx + 1
+        else
+            idx += 1
+        end
+    end
+    
+    return [Episode(NamedTuple{names}(map(x -> collect(t[Val(x)][r]), names))) for r in ranges]
+end
diff --git a/test/samplers.jl b/test/samplers.jl