Skip to content
This repository was archived by the owner on May 6, 2021. It is now read-only.

Add ContinuousMountainCar #8

Merged
merged 2 commits into from
Jul 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ By default, only some basic environments are installed. If you want to use some

- CartPoleEnv
- MountainCarEnv
- ContinuousMountainCarEnv
- PendulumEnv
- MDPEnv
- POMDPEnv
Expand Down
2 changes: 1 addition & 1 deletion src/environments/atari.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ struct AtariEnv{To,F} <: AbstractEnv
actions::Array{Int32, 1}
action_space::DiscreteSpace{Int}
observation_space::To
noopmax::Int64
noopmax::Int
end

action_space(env::AtariEnv) = env.action_space
Expand Down
4 changes: 2 additions & 2 deletions src/environments/classic_control/cart_pole.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ struct CartPoleEnvParams{T}
tau::T
thetathreshold::T
xthreshold::T
max_steps::Int64
max_steps::Int
end

mutable struct CartPoleEnv{T, R<:AbstractRNG} <: AbstractEnv
Expand All @@ -24,7 +24,7 @@ mutable struct CartPoleEnv{T, R<:AbstractRNG} <: AbstractEnv
state::Array{T, 1}
action::Int
done::Bool
t::Int64
t::Int
rng::R
end

Expand Down
26 changes: 13 additions & 13 deletions src/environments/classic_control/mdp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ MDPEnv(model; rng=Random.GLOBAL_RNG) = MDPEnv(
action_space(env::Union{MDPEnv, POMDPEnv}) = env.action_space
observation_space(env::Union{MDPEnv, POMDPEnv}) = env.observation_space

observationindex(env, o) = Int64(o) + 1
observationindex(env, o) = Int(o) + 1

function reset!(env::Union{POMDPEnv, MDPEnv})
initialstate(env.model, env.rng)
Expand All @@ -89,13 +89,13 @@ end
#####
"""
mutable struct SimpleMDPEnv
ns::Int64
na::Int64
state::Int64
ns::Int
na::Int
state::Int
trans_probs::Array{AbstractArray, 2}
reward::R
initialstates::Array{Int64, 1}
isterminal::Array{Int64, 1}
initialstates::Array{Int, 1}
isterminal::Array{Int, 1}
rng::S
A Markov Decision Process with `ns` states, `na` actions, current `state`,
`na`x`ns` - array of transition probabilites `trans_props` which consists for
Expand All @@ -110,11 +110,11 @@ probabilities) `reward` of type `R` (see [`DeterministicStateActionReward`](@ref
mutable struct SimpleMDPEnv{T,R,S<:AbstractRNG}
observation_space::DiscreteSpace
action_space::DiscreteSpace
state::Int64
state::Int
trans_probs::Array{T, 2}
reward::R
initialstates::Array{Int64, 1}
isterminal::Array{Int64, 1}
initialstates::Array{Int, 1}
isterminal::Array{Int, 1}
rng::S
end

Expand Down Expand Up @@ -186,10 +186,10 @@ expected_rewards(r::NormalStateActionReward, ::Any) = r.mean

# run SimpleMDPEnv
"""
run!(mdp::SimpleMDPEnv, action::Int64)
run!(mdp::SimpleMDPEnv, action::Int)
Transition to a new state given `action`. Returns the new state.
"""
function run!(mdp::SimpleMDPEnv, action::Int64)
function run!(mdp::SimpleMDPEnv, action::Int)
if mdp.isterminal[mdp.state] == 1
reset!(mdp)
else
Expand All @@ -199,9 +199,9 @@ function run!(mdp::SimpleMDPEnv, action::Int64)
end

"""
run!(mdp::SimpleMDPEnv, policy::Array{Int64, 1}) = run!(mdp, policy[mdp.state])
run!(mdp::SimpleMDPEnv, policy::Array{Int, 1}) = run!(mdp, policy[mdp.state])
"""
run!(mdp::SimpleMDPEnv, policy::Array{Int64, 1}) = run!(mdp, policy[mdp.state])
run!(mdp::SimpleMDPEnv, policy::Array{Int, 1}) = run!(mdp, policy[mdp.state])


function interact!(env::SimpleMDPEnv, action)
Expand Down
63 changes: 41 additions & 22 deletions src/environments/classic_control/mountain_car.jl
Original file line number Diff line number Diff line change
@@ -1,62 +1,81 @@
using Random
using GR

export MountainCarEnv
export MountainCarEnv, ContinuousMountainCarEnv

struct MountainCarEnvParams{T}
min_pos::T
max_pos::T
max_speed::T
goal_pos::T
max_steps::Int64
goal_velocity::T
power::T
gravity::T
max_steps::Int
end
function MountainCarEnvParams(; T = Float64, min_pos = -1.2, max_pos = .6,
max_speed = .07, goal_pos = .5, max_steps = 200,
goal_velocity = .0, power = .001, gravity = .0025)
MountainCarEnvParams{T}(min_pos, max_pos, max_speed, goal_pos,
goal_velocity, power, gravity, max_steps)
end

mutable struct MountainCarEnv{T, R<:AbstractRNG} <: AbstractEnv
mutable struct MountainCarEnv{A, T, R<:AbstractRNG} <: AbstractEnv
params::MountainCarEnvParams{T}
action_space::DiscreteSpace
action_space::A
observation_space::MultiContinuousSpace{(2,), 1}
state::Array{T, 1}
action::Int64
action::Int
done::Bool
t::Int64
t::Int
rng::R
end

function MountainCarEnv(; T = Float64, min_pos = T(-1.2), max_pos = T(.6),
max_speed = T(.07), goal_pos = T(.5), max_steps = 200)
env = MountainCarEnv(MountainCarEnvParams(min_pos, max_pos, max_speed, goal_pos, max_steps),
DiscreteSpace(3),
MultiContinuousSpace([min_pos, -max_speed], [max_pos, max_speed]),
zeros(T, 2),
1,
false,
0,
Random.GLOBAL_RNG)
function MountainCarEnv(; T = Float64, continuous = false,
rng = Random.GLOBAL_RNG, kwargs...)
if continuous
params = MountainCarEnvParams(; goal_pos = .45, power = .0015, T = T, kwargs...)
else
params = MountainCarEnvParams(; kwargs...)
end
env = MountainCarEnv(params,
continuous ? ContinuousSpace(-T(1.), T(1.)) : DiscreteSpace(3),
MultiContinuousSpace([params.min_pos, -params.max_speed],
[params.max_pos, params.max_speed]),
zeros(T, 2),
1,
false,
0,
rng)
reset!(env)
env
end
ContinuousMountainCarEnv(; kwargs...) = MountainCarEnv(; continuous = true, kwargs...)

action_space(env::MountainCarEnv) = env.action_space
observation_space(env::MountainCarEnv) = env.observation_space
observe(env::MountainCarEnv) = (observation=env.state, isdone=env.done)

function reset!(env::MountainCarEnv{T}) where T
function reset!(env::MountainCarEnv{A, T}) where {A, T}
env.state[1] = .2 * rand(env.rng, T) - .6
env.state[2] = 0.
env.done = false
env.t = 0
nothing
end

function interact!(env::MountainCarEnv, a)
interact!(env::MountainCarEnv{<:ContinuousSpace}, a) = _interact!(env, min(max(a, -1, 1)))
interact!(env::MountainCarEnv{<:DiscreteSpace}, a) = _interact!(env, a - 2)
function _interact!(env::MountainCarEnv, force)
env.t += 1
x, v = env.state
v += (a - 2)*0.001 + cos(3*x)*(-0.0025)
v += force * env.params.power + cos(3*x)*(-env.params.gravity)
v = clamp(v, -env.params.max_speed, env.params.max_speed)
x += v
x = clamp(x, env.params.min_pos, env.params.max_pos)
if x == env.params.min_pos && v < 0 v = 0 end
env.done = x >= env.params.goal_pos || env.t >= env.params.max_steps
env.done = x >= env.params.goal_pos && v >= env.params.goal_velocity ||
env.t >= env.params.max_steps
env.state[1] = x
env.state[2] = v
(observation=env.state, reward=-1., isdone=env.done)
Expand Down Expand Up @@ -87,6 +106,6 @@ function render(env::MountainCarEnv)
xs, ys = rotate(xs, ys, θ)
xs, ys = translate(xs, ys, [x, height(x)])
fillarea(xs, ys)
plotendofepisode(env.params.max_pos + .1, 0, d)
plotendofepisode(env.params.max_pos + .1, 0, d)
updatews()
end
end
4 changes: 2 additions & 2 deletions src/environments/classic_control/pendulum.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ struct PendulumEnvParams{T}
m::T
l::T
dt::T
max_steps::Int64
max_steps::Int
end

mutable struct PendulumEnv{T, R<:AbstractRNG} <: AbstractEnv
Expand All @@ -18,7 +18,7 @@ mutable struct PendulumEnv{T, R<:AbstractRNG} <: AbstractEnv
observation_space::MultiContinuousSpace{(3,), 1}
state::Array{T, 1}
done::Bool
t::Int64
t::Int
rng::R
end

Expand Down
4 changes: 2 additions & 2 deletions src/environments/hanabi.jl
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ mutable struct HanabiEnv <: AbstractEnv
state::Base.RefValue{Hanabi.LibHanabi.PyHanabiState}
moves::Vector{Base.RefValue{Hanabi.LibHanabi.PyHanabiMove}}
observation_encoder::Base.RefValue{Hanabi.LibHanabi.PyHanabiObservationEncoder}
observation_space::MultiDiscreteSpace{Int64, 1}
action_space::DiscreteSpace{Int64}
observation_space::MultiDiscreteSpace{Int, 1}
action_space::DiscreteSpace{Int}
reward::HanabiResult

function HanabiEnv(;kw...)
Expand Down
1 change: 1 addition & 0 deletions test/environments.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
:(basic_ViZDoom_env()),
:(CartPoleEnv()),
:(MountainCarEnv()),
:(ContinuousMountainCarEnv()),
:(PendulumEnv()),
:(MDPEnv(LegacyGridWorld())),
:(POMDPEnv(TigerPOMDP())),
Expand Down