Skip to content

Normalizer Wrapper #12

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 27 commits into from
May 31, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Trajectories.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ include("controlers.jl")
include("traces.jl")
include("episodes.jl")
include("trajectory.jl")
include("normalization.jl")
include("rendering.jl")
include("common/common.jl")
include("normalization.jl")

end
147 changes: 82 additions & 65 deletions src/normalization.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import OnlineStats: OnlineStats, Group, Moments, fit!, OnlineStat, Weight, EqualWeight, mean, std
export state_normalizer, reward_normalizer, NormalizedTrajectory, Normalizer
using MacroTools
export scalar_normalizer, array_normalizer, NormalizedTrace, Normalizer
import MacroTools.@forward

"""
Normalizer(::OnlineStat)
Expand All @@ -11,17 +11,34 @@ struct Normalizer{OS<:OnlineStat}
os::OS
end

MacroTools.@forward Normalizer.os OnlineStats.mean, OnlineStats.std, Base.iterate, normalize!, Base.length
@forward Normalizer.os OnlineStats.mean, OnlineStats.std, Base.iterate, normalize, Base.length



#Treats last dim as batch dim
function OnlineStats.fit!(n::Normalizer, data::AbstractArray)
for d in eachslice(data, dims = ndims(data))
fit!(n.os, vec(d))
end
n
end

function OnlineStats.fit!(n::Normalizer{<:Group}, y::AbstractVector)
fit!(n.os, y)
n
end

function OnlineStats.fit!(n::Normalizer, y)
for yi in y
fit!(n, yi)
fit!(n.os, vec(yi))
end
n
end

function OnlineStats.fit!(n::Normalizer, data::AbstractArray)
fit!(n.os, vec(data))
function OnlineStats.fit!(n::Normalizer{<:Moments}, y::AbstractVector{<:Number})
for yi in y
fit!(n.os, yi)
end
n
end

Expand All @@ -31,68 +48,64 @@ function OnlineStats.fit!(n::Normalizer, data::Number)
end

"""
reward_normalizer(;weights = OnlineStats.EqualWeight())
scalar_normalizer(;weights = OnlineStats.EqualWeight())

Returns preconfigured normalizer for scalar rewards. By default, all rewards have equal weights.
Returns preconfigured normalizer for scalar traces such as rewards. By default, all samples have equal weights in the computation of the moments.
See the [OnlineStats documentation](https://joshday.github.io/OnlineStats.jl/stable/weights/) to use variants such as exponential weights to favor the most recent observations.
"""
reward_normalizer(; weight::Weight = EqualWeight()) = Normalizer(Moments(weight = weight))
scalar_normalizer(; weight::Weight = EqualWeight()) = Normalizer(Moments(weight = weight))

"""
state_normalizer([state_size::Tuple{Int}]; weights = OnlineStats.EqualWeight())
array_normalizer(size::Tuple{Int}; weights = OnlineStats.EqualWeight())

Returns preconfigured normalizer for scalar or array states.
For Array states, state_size is a tuple containing the dimension sizes of a state. E.g. `(10,)` for a 10-elements vector, or `(252,252)` for a square image.
For scalar states, do not provide a state_size information.
By default, all states have equal weights.
Returns preconfigured normalizer for array traces such as vector or matrix states.
`size` is a tuple containing the dimension sizes of a state. E.g. `(10,)` for a 10-elements vector, or `(252,252)` for a square image.
By default, all samples have equal weights in the computation of the moments.
See the [OnlineStats documentation](https://joshday.github.io/OnlineStats.jl/stable/weights/) to use variants such as exponential weights to favor the most recent observations.
"""
state_normalizer(; weight::Weight = EqualWeight()) = Normalizer(Moments(weight = weight))

state_normalizer(state_size; weight::Weight = EqualWeight()) = Normalizer(Group([Moments(weight = weight) for _ in 1:prod(state_size)]))
array_normalizer(size::NTuple{N,Int}; weight::Weight = EqualWeight()) where N = Normalizer(Group([Moments(weight = weight) for _ in 1:prod(size)]))


"""
NormalizedTrajectory(trajectory, normalizer::Dict{Symbol, Normalizer})
NormalizedTrajectory(trajectory, normalizer::Pair{Symbol, Normalizer}...)
NormalizedTrace(trace::Trace, normalizer::Normalizer)

Wraps a `Trajectory` and a [`Normalizer`](@ref). When pushing new elements of `:trace` to trajectory, a `NormalizedTrajectory` will first update `normalizer[:trace]`, an online estimate of the mean and variance of :trace.
When sampling `:trace` from a normalized trajectory, it will first normalize the samples using `normalizer[:trace]`, if `:trace` is in the keys of `normalizer`, according to its current estimate.
Wraps a [`Trace`](@ref) and a [`Normalizer`](@ref). When pushing new elements to the trace, a `NormalizedTrace` will first update a running estimate of the moments of that trace.
When sampling a normalized trace, it will first normalize the samples using to zero mean and unit variance.

Use a `Normalizer(Moments())` estimate for scalar traces, `Normalizer(Group([Moments() for el in trace]))` for Array estimates.
Predefined constructors are provide for scalar rewards (see [`reward_normalizer`](@ref)) and states (see [`state_normalizer`](@ref))
preconfigured normalizers are provided for scalar (see [`scalar_normalizer`](@ref)) and arrays (see [`array_normalizer`](@ref))

#Example
NormalizedTrajectory(
my_trajectory,
:state => state_normalizer((5,5)),
:reward => reward_normalizer(weight = OnlineStats.ExponentialWeight(0.9))
t = Trajectory(
container=Traces(
a_scalar_trace = NormalizedTrace(Float32[], scalar_normalizer()),
a_non_normalized_trace=Bool[],
a_vector_trace = NormalizedTrace(Vector{Float32}[], array_normalizer((10,))),
a_matrix_trace = NormalizedTrace(Matrix{Float32}[], array_normalizer((252,252), weight = OnlineStats.ExponientialWeight(0.9f0)))
),
sampler=BatchSampler(3),
controler=InsertSampleRatioControler(0.25, 4)
)

"""
struct NormalizedTrajectory{T, N}
trajectory::T
normalizer::Dict{Symbol, N}
struct NormalizedTrace{T <: Trace, N <: Normalizer}
trace::T
normalizer::N
end

NormalizedTrajectory(traj::Trajectory, pairs::Pair{<:Symbol, <:Normalizer}...) = NormalizedTrajectory(traj, Dict(pairs))
NormalizedTrace(x, normalizer) = NormalizedTrace(convert(Trace, x), normalizer)

function Base.push!(nt::NormalizedTrajectory; x...)
for (key, value) in x
if key in keys(nt.normalizer)
fit!(nt.normalizer[key], value)
end
end
push!(nt.trajectory; x...)
@forward NormalizedTrace.trace Base.length, Base.lastindex, Base.firstindex, Base.getindex, Base.view, Base.pop!, Base.popfirst!, Base.empty!

Base.convert(::Type{Trace}, x::NormalizedTrace) = x #ignore conversion to Trace

function Base.push!(nt::NormalizedTrace, x)
fit!(nt.normalizer, x)
push!(nt.trace, x)
end

function Base.append!(nt::NormalizedTrajectory; x...)
for (key, value) in x
if key in keys(nt.normalizer)
fit!(nt.normalizer[key], value)
end
end
append!(nt.trajectory; x...)
function Base.append!(nt::NormalizedTrace, x)
fit!(nt.normalizer, x)
append!(nt.trace, x)
end

"""
Expand All @@ -101,41 +114,45 @@ end
Given an Moments estimate of the elements of x, a vector of scalar traces,
normalizes x elementwise to zero mean, and unit variance.
"""

function normalize!(os::Moments, x::AbstractVector)
function normalize(os::Moments, x::AbstractVector)
m, s = mean(os), std(os)
x .-= m
x ./= s
return (x .- m) ./ s
end


"""
normalize!(os::Group{<:AbstractVector{<:Moments}}, x)

Given an os::Group{<:Tuple{Moments}}, that is, a multivariate estimator of the moments of each element of x,
normalizes each element of x to zero mean, and unit variance. Treats the last dimension as a batch dimension if `ndims(x) >= 2`.
"""
function normalize!(os::Group{<:AbstractVector{<:Moments}}, x::AbstractVector)
function normalize(os::Group{<:AbstractVector{<:Moments}}, x::AbstractVector)
m = [mean(stat) for stat in os]
s = [std(stat) for stat in os]
x .-= m
x ./= s
return (x .- m) ./ s
end

function normalize!(os::Group{<:AbstractVector{<:Moments}}, x::AbstractArray)
for slice in eachslice(x, dims = ndims(x))
normalize!(os, vec(slice))
function normalize(os::Group{<:AbstractVector{<:Moments}}, x::AbstractArray)
xn = similar(x)
for (i, slice) in enumerate(eachslice(x, dims = ndims(x)))
xn[repeat(:, ndims(x)-1)..., i] .= reshape(normalize(os, vec(slice)), size(x)[1:end-1]...)
end
return xn
end

function Base.take!(nt::NormalizedTrajectory)
x = take!(nt.trajectory)
if isnothing(x)
x
else
for key in keys(nt.normalizer)
normalize!(nt.normalizer[key], x[key])
end
function normalize(os::Group{<:AbstractVector{<:Moments}}, x::AbstractVector{<:AbstractArray})
xn = similar(x)
for (i,el) in enumerate(x)
xn[i] = normalize(os, vec(el))
end
x
return xn
end

function fetch(nt::NormalizedTrace, inds)
batch = deepcopy(fetch(nt.trace, inds))
normalize(nt.normalizer.os, batch)
end

function sample(s, nt::NormalizedTrace)
batch = deepcopy(sample(s, nt.trace))
normalize(nt.normalizer.os, batch)
end
2 changes: 1 addition & 1 deletion src/rendering.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ function inner_convert(::Type{Term.AbstractRenderable}, x; style="gray1", width=
end

Base.convert(T::Type{Term.AbstractRenderable}, t::Trace{<:AbstractArray}; kw...) = convert(T, Trace(collect(eachslice(t.x, dims=ndims(t.x)))); kw..., type=typeof(t), subtitle="size: $(size(t.x))")

Base.convert(T::Type{Term.AbstractRenderable}, t::NormalizedTrace; kw...) = convert(T, t.trace; kw..., type = typeof(t))
function Base.convert(
::Type{Term.AbstractRenderable},
t::Trace{<:AbstractVector};
Expand Down
4 changes: 3 additions & 1 deletion src/traces.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ Base.pop!(t::Trace) = pop!(t.x)
Base.popfirst!(t::Trace) = popfirst!(t.x)
Base.empty!(t::Trace) = empty!(t.x)

fetch(t::Trace, inds) = t[inds]

##

function sample(s::BatchSampler, t::Trace)
Expand Down Expand Up @@ -83,6 +85,6 @@ Base.empty!(t::Traces) = map(empty!, t.traces)
function sample(s::BatchSampler, t::Traces)
inds = rand(s.rng, 1:length(t), s.batch_size)
map(t.traces) do x
x[inds]
fetch(x, inds)
end |> s.transformer
end
31 changes: 16 additions & 15 deletions test/normalization.jl
Original file line number Diff line number Diff line change
@@ -1,47 +1,48 @@
using Test
using Trajectories
import Trajectories.normalize!
import Trajectories.normalize
import OnlineStats: fit!, mean, std

#@testset "normalization.jl" begin
@testset "normalization.jl" begin
#scalar normalization
rewards = [1.:10;]
rn = reward_normalizer()
rn = scalar_normalizer()
fit!(rn, rewards)
batch_reward = [6.,5.,10.]
output = normalize!(rn, batch_reward)
@test batch_reward == output != [6.,5.,10.]
#vector normalization
states = reshape([1:50;], 5, 10)
sn = state_normalizer((5,))
fit!(sn, eachslice(states; dims = ndims(states)))
sn = array_normalizer((5,))
fit!(sn, states)
@test [mean(stat) for stat in sn] == [mean((1:5:46) .+i) for i in 0:4]
batch_states = reshape(repeat(5.:-1:1, 5), 5,5)
normalize!(sn, batch_states)
@test all(length(unique(x)) == 1 for x in eachrow(batch_states))
#array normalization
states = reshape(1.:250, 5,5,10)
sn = state_normalizer((5,5))
sn = array_normalizer((5,5))
fit!(sn, eachslice(states, dims = 3))
batch_states = collect(states)
normalize!(sn, batch_states)

#NormalizedTrajectory
#NormalizedTrace
t = Trajectory(
container=Traces(
a=Float32[],
b=Int[]
a= NormalizedTrace(Float32[], scalar_normalizer()),
b=Int[],
c=NormalizedTrace(Vector{Float32}[], array_normalizer((10,))) #TODO check with ElasticArrays and Episodes
),
sampler=BatchSampler(30000),
controler=InsertSampleRatioControler(Inf, 0)
)
nt = NormalizedTrajectory(t, :a => reward_normalizer())
append!(nt, a = [1,2,3], b = [1,2,3])
push!(nt, a = 2, b = 2)
@test mean(nt.normalizer[:a]) ≈ 2.
@test std(nt.normalizer[:a]) ≈ std([1,2,2,3])
a,b = take!(nt)
append!(t, a = [1,2,3], b = [1,2,3], c = eachcol(reshape(1f0:30, 10,3)))
push!(t, a = 2, b = 2, c = fill(mean(1:30), 10))
@test mean(t.container[:a].trace.x) ≈ 2.
@test std(t.container[:a].trace.x) ≈ std([1,2,2,3])
a,b,c = take!(t)
@test eltype(a) == Float32
@test mean(a) ≈ 0 atol = 0.01
@test mean(b) ≈ 2 atol = 0.01 #b is not normalized
@test all(isapprox(0f0, atol = 0.01), vec(mean(reduce(hcat,c), dims = 2)))
end