Skip to content

Normalizer Wrapper #12

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 27 commits into from
May 31, 2022
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ version = "0.1.0"
CircularArrayBuffers = "9de3a189-e0c0-4e15-ba3b-b14b9fb0aec1"
MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
OnlineStats = "a15396b6-48d5-5d58-9928-6d29437db91e"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Term = "22787eb5-b846-44ae-b979-8e399b8463ab"

Expand Down
1 change: 1 addition & 0 deletions src/Trajectories.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@ include("episodes.jl")
include("trajectory.jl")
include("rendering.jl")
include("common/common.jl")
include("normalization.jl")

end
141 changes: 141 additions & 0 deletions src/normalization.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import OnlineStats: OnlineStats, Group, Moments, fit!, OnlineStat, Weight, EqualWeight, mean, std
export state_normalizer, reward_normalizer, NormalizedTrajectory, Normalizer
using MacroTools

"""
Normalizer(::OnlineStat)

Wraps an OnlineStat to be used by a [`NormalizedTrajectory`](@ref).
"""
struct Normalizer{OS<:OnlineStat}
os::OS
end

MacroTools.@forward Normalizer.os OnlineStats.mean, OnlineStats.std, Base.iterate, normalize!, Base.length

function OnlineStats.fit!(n::Normalizer, y)
for yi in y
fit!(n, yi)
end
n
end

function OnlineStats.fit!(n::Normalizer, data::AbstractArray)
fit!(n.os, vec(data))
n
end

function OnlineStats.fit!(n::Normalizer, data::Number)
fit!(n.os, data)
n
end

"""
reward_normalizer(;weights = OnlineStats.EqualWeight())

Returns preconfigured normalizer for scalar rewards. By default, all rewards have equal weights.
See the [OnlineStats documentation](https://joshday.github.io/OnlineStats.jl/stable/weights/) to use variants such as exponential weights to favor the most recent observations.
"""
reward_normalizer(; weight::Weight = EqualWeight()) = Normalizer(Moments(weight = weight))

"""
state_normalizer([state_size::Tuple{Int}]; weights = OnlineStats.EqualWeight())

Returns preconfigured normalizer for scalar or array states.
For Array states, state_size is a tuple containing the dimension sizes of a state. E.g. `(10,)` for a 10-elements vector, or `(252,252)` for a square image.
For scalar states, do not provide a state_size information.
By default, all states have equal weights.
See the [OnlineStats documentation](https://joshday.github.io/OnlineStats.jl/stable/weights/) to use variants such as exponential weights to favor the most recent observations.
"""
state_normalizer(; weight::Weight = EqualWeight()) = Normalizer(Moments(weight = weight))

state_normalizer(state_size; weight::Weight = EqualWeight()) = Normalizer(Group([Moments(weight = weight) for _ in 1:prod(state_size)]))


"""
NormalizedTrajectory(trajectory, normalizer::Dict{Symbol, Normalizer})
NormalizedTrajectory(trajectory, normalizer::Pair{Symbol, Normalizer}...)

Wraps a `Trajectory` and a [`Normalizer`](@ref). When pushing new elements of `:trace` to trajectory, a `NormalizedTrajectory` will first update `normalizer[:trace]`, an online estimate of the mean and variance of :trace.
When sampling `:trace` from a normalized trajectory, it will first normalize the samples using `normalizer[:trace]`, if `:trace` is in the keys of `normalizer`, according to its current estimate.

Use a `Normalizer(Moments())` estimate for scalar traces, `Normalizer(Group([Moments() for el in trace]))` for Array estimates.
Predefined constructors are provide for scalar rewards (see [`reward_normalizer`](@ref)) and states (see [`state_normalizer`](@ref))

#Example
NormalizedTrajectory(
my_trajectory,
:state => state_normalizer((5,5)),
:reward => reward_normalizer(weight = OnlineStats.ExponentialWeight(0.9))
)

"""
struct NormalizedTrajectory{T, N}
trajectory::T
normalizer::Dict{Symbol, N}
end

NormalizedTrajectory(traj::Trajectory, pairs::Pair{<:Symbol, <:Normalizer}...) = NormalizedTrajectory(traj, Dict(pairs))

function Base.push!(nt::NormalizedTrajectory; x...)
for (key, value) in x
if key in keys(nt.normalizer)
fit!(nt.normalizer[key], value)
end
end
push!(nt.trajectory; x...)
end

function Base.append!(nt::NormalizedTrajectory; x...)
for (key, value) in x
if key in keys(nt.normalizer)
fit!(nt.normalizer[key], value)
end
end
append!(nt.trajectory; x...)
end

"""
normalize!(os::Moments, x)

Given an Moments estimate of the elements of x, a vector of scalar traces,
normalizes x elementwise to zero mean, and unit variance.
"""

function normalize!(os::Moments, x::AbstractVector)
m, s = mean(os), std(os)
x .-= m
x ./= s
end


"""
normalize!(os::Group{<:AbstractVector{<:Moments}}, x)

Given an os::Group{<:Tuple{Moments}}, that is, a multivariate estimator of the moments of each element of x,
normalizes each element of x to zero mean, and unit variance. Treats the last dimension as a batch dimension if `ndims(x) >= 2`.
"""
function normalize!(os::Group{<:AbstractVector{<:Moments}}, x::AbstractVector)
m = [mean(stat) for stat in os]
s = [std(stat) for stat in os]
x .-= m
x ./= s
end

function normalize!(os::Group{<:AbstractVector{<:Moments}}, x::AbstractArray)
for slice in eachslice(x, dims = ndims(x))
normalize!(os, vec(slice))
end
end

function Base.take!(nt::NormalizedTrajectory)
x = take!(nt.trajectory)
if isnothing(x)
x
else
for key in keys(nt.normalizer)
normalize!(nt.normalizer[key], x[key])
end
end
x
end
47 changes: 47 additions & 0 deletions test/normalization.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
using Test
using Trajectories
import Trajectories.normalize!
import OnlineStats: fit!, mean, std

#@testset "normalization.jl" begin
#scalar normalization
rewards = [1.:10;]
rn = reward_normalizer()
fit!(rn, rewards)
batch_reward = [6.,5.,10.]
output = normalize!(rn, batch_reward)
@test batch_reward == output != [6.,5.,10.]
#vector normalization
states = reshape([1:50;], 5, 10)
sn = state_normalizer((5,))
fit!(sn, eachslice(states; dims = ndims(states)))
@test [mean(stat) for stat in sn] == [mean((1:5:46) .+i) for i in 0:4]
batch_states = reshape(repeat(5.:-1:1, 5), 5,5)
normalize!(sn, batch_states)
@test all(length(unique(x)) == 1 for x in eachrow(batch_states))
#array normalization
states = reshape(1.:250, 5,5,10)
sn = state_normalizer((5,5))
fit!(sn, eachslice(states, dims = 3))
batch_states = collect(states)
normalize!(sn, batch_states)

#NormalizedTrajectory
t = Trajectory(
container=Traces(
a=Float32[],
b=Int[]
),
sampler=BatchSampler(30000),
controler=InsertSampleRatioControler(Inf, 0)
)
nt = NormalizedTrajectory(t, :a => reward_normalizer())
append!(nt, a = [1,2,3], b = [1,2,3])
push!(nt, a = 2, b = 2)
@test mean(nt.normalizer[:a]) ≈ 2.
@test std(nt.normalizer[:a]) ≈ std([1,2,2,3])
a,b = take!(nt)
@test eltype(a) == Float32
@test mean(a) ≈ 0 atol = 0.01
@test mean(b) ≈ 2 atol = 0.01 #b is not normalized
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ using Test
@testset "Trajectories.jl" begin
include("traces.jl")
include("trajectories.jl")
include("normalization.jl")
end