Skip to content
This repository was archived by the owner on May 6, 2021. It is now read-only.

Commit 14b85ae

Browse files
committed
add continuous mountain car
1 parent 3e40a0a commit 14b85ae

File tree

3 files changed

+40
-19
lines changed

3 files changed

+40
-19
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ By default, only some basic environments are installed. If you want to use some
3838
3939
- CartPoleEnv
4040
- MountainCarEnv
41+
- ContinuousMountainCarEnv
4142
- PendulumEnv
4243
- MDPEnv
4344
- POMDPEnv

src/environments/classic_control/mountain_car.jl

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,28 @@
11
using Random
22
using GR
33

4-
export MountainCarEnv
4+
export MountainCarEnv, ContinuousMountainCarEnv
55

66
struct MountainCarEnvParams{T}
77
min_pos::T
88
max_pos::T
99
max_speed::T
1010
goal_pos::T
11+
goal_velocity::T
12+
power::T
13+
gravity::T
1114
max_steps::Int
1215
end
16+
function MountainCarEnvParams(; T = Float64, min_pos = -1.2, max_pos = .6,
17+
max_speed = .07, goal_pos = .5, max_steps = 200,
18+
goal_velocity = .0, power = .001, gravity = .0025)
19+
MountainCarEnvParams{T}(min_pos, max_pos, max_speed, goal_pos,
20+
goal_velocity, power, gravity, max_steps)
21+
end
1322

14-
mutable struct MountainCarEnv{T, R<:AbstractRNG} <: AbstractEnv
23+
mutable struct MountainCarEnv{A, T, R<:AbstractRNG} <: AbstractEnv
1524
params::MountainCarEnvParams{T}
16-
action_space::DiscreteSpace
25+
action_space::A
1726
observation_space::MultiContinuousSpace{(2,), 1}
1827
state::Array{T, 1}
1928
action::Int
@@ -22,41 +31,51 @@ mutable struct MountainCarEnv{T, R<:AbstractRNG} <: AbstractEnv
2231
rng::R
2332
end
2433

25-
function MountainCarEnv(; T = Float64, min_pos = T(-1.2), max_pos = T(.6),
26-
max_speed = T(.07), goal_pos = T(.5), max_steps = 200)
27-
env = MountainCarEnv(MountainCarEnvParams(min_pos, max_pos, max_speed, goal_pos, max_steps),
28-
DiscreteSpace(3),
29-
MultiContinuousSpace([min_pos, -max_speed], [max_pos, max_speed]),
30-
zeros(T, 2),
31-
1,
32-
false,
33-
0,
34-
Random.GLOBAL_RNG)
34+
function MountainCarEnv(; T = Float64, continuous = false,
35+
rng = Random.GLOBAL_RNG, kwargs...)
36+
if continuous
37+
params = MountainCarEnvParams(; goal_pos = .45, power = .0015, T = T, kwargs...)
38+
else
39+
params = MountainCarEnvParams(; kwargs...)
40+
end
41+
env = MountainCarEnv(params,
42+
continuous ? ContinuousSpace(-T(1.), T(1.)) : DiscreteSpace(3),
43+
MultiContinuousSpace([params.min_pos, -params.max_speed],
44+
[params.max_pos, params.max_speed]),
45+
zeros(T, 2),
46+
1,
47+
false,
48+
0,
49+
rng)
3550
reset!(env)
3651
env
3752
end
53+
ContinuousMountainCarEnv(; kwargs...) = MountainCarEnv(; continuous = true, kwargs...)
3854

3955
action_space(env::MountainCarEnv) = env.action_space
4056
observation_space(env::MountainCarEnv) = env.observation_space
4157
observe(env::MountainCarEnv) = (observation=env.state, isdone=env.done)
4258

43-
function reset!(env::MountainCarEnv{T}) where T
59+
function reset!(env::MountainCarEnv{A, T}) where {A, T}
4460
env.state[1] = .2 * rand(env.rng, T) - .6
4561
env.state[2] = 0.
4662
env.done = false
4763
env.t = 0
4864
nothing
4965
end
5066

51-
function interact!(env::MountainCarEnv, a)
67+
interact!(env::MountainCarEnv{<:ContinuousSpace}, a) = _interact!(env, min(max(a, -1, 1)))
68+
interact!(env::MountainCarEnv{<:DiscreteSpace}, a) = _interact!(env, a - 2)
69+
function _interact!(env::MountainCarEnv, force)
5270
env.t += 1
5371
x, v = env.state
54-
v += (a - 2)*0.001 + cos(3*x)*(-0.0025)
72+
v += force * env.params.power + cos(3*x)*(-env.params.gravity)
5573
v = clamp(v, -env.params.max_speed, env.params.max_speed)
5674
x += v
5775
x = clamp(x, env.params.min_pos, env.params.max_pos)
5876
if x == env.params.min_pos && v < 0 v = 0 end
59-
env.done = x >= env.params.goal_pos || env.t >= env.params.max_steps
77+
env.done = x >= env.params.goal_pos && v >= env.params.goal_velocity ||
78+
env.t >= env.params.max_steps
6079
env.state[1] = x
6180
env.state[2] = v
6281
(observation=env.state, reward=-1., isdone=env.done)
@@ -87,6 +106,6 @@ function render(env::MountainCarEnv)
87106
xs, ys = rotate(xs, ys, θ)
88107
xs, ys = translate(xs, ys, [x, height(x)])
89108
fillarea(xs, ys)
90-
plotendofepisode(env.params.max_pos + .1, 0, d)
109+
plotendofepisode(env.params.max_pos + .1, 0, d)
91110
updatews()
92-
end
111+
end

test/environments.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
:(basic_ViZDoom_env()),
5454
:(CartPoleEnv()),
5555
:(MountainCarEnv()),
56+
:(ContinuousMountainCarEnv()),
5657
:(PendulumEnv()),
5758
:(MDPEnv(LegacyGridWorld())),
5859
:(POMDPEnv(TigerPOMDP())),

0 commit comments

Comments
 (0)