1
1
using Random
2
2
using GR
3
3
4
- export MountainCarEnv
4
+ export MountainCarEnv, ContinuousMountainCarEnv
5
5
6
6
struct MountainCarEnvParams{T}
7
7
min_pos:: T
8
8
max_pos:: T
9
9
max_speed:: T
10
10
goal_pos:: T
11
+ goal_velocity:: T
12
+ power:: T
13
+ gravity:: T
11
14
max_steps:: Int
12
15
end
16
+ function MountainCarEnvParams (; T = Float64, min_pos = - 1.2 , max_pos = .6 ,
17
+ max_speed = .07 , goal_pos = .5 , max_steps = 200 ,
18
+ goal_velocity = .0 , power = .001 , gravity = .0025 )
19
+ MountainCarEnvParams {T} (min_pos, max_pos, max_speed, goal_pos,
20
+ goal_velocity, power, gravity, max_steps)
21
+ end
13
22
14
- mutable struct MountainCarEnv{T, R<: AbstractRNG } <: AbstractEnv
23
+ mutable struct MountainCarEnv{A, T, R<: AbstractRNG } <: AbstractEnv
15
24
params:: MountainCarEnvParams{T}
16
- action_space:: DiscreteSpace
25
+ action_space:: A
17
26
observation_space:: MultiContinuousSpace{(2,), 1}
18
27
state:: Array{T, 1}
19
28
action:: Int
@@ -22,41 +31,51 @@ mutable struct MountainCarEnv{T, R<:AbstractRNG} <: AbstractEnv
22
31
rng:: R
23
32
end
24
33
25
- function MountainCarEnv (; T = Float64, min_pos = T (- 1.2 ), max_pos = T (.6 ),
26
- max_speed = T (.07 ), goal_pos = T (.5 ), max_steps = 200 )
27
- env = MountainCarEnv (MountainCarEnvParams (min_pos, max_pos, max_speed, goal_pos, max_steps),
28
- DiscreteSpace (3 ),
29
- MultiContinuousSpace ([min_pos, - max_speed], [max_pos, max_speed]),
30
- zeros (T, 2 ),
31
- 1 ,
32
- false ,
33
- 0 ,
34
- Random. GLOBAL_RNG)
34
+ function MountainCarEnv (; T = Float64, continuous = false ,
35
+ rng = Random. GLOBAL_RNG, kwargs... )
36
+ if continuous
37
+ params = MountainCarEnvParams (; goal_pos = .45 , power = .0015 , T = T, kwargs... )
38
+ else
39
+ params = MountainCarEnvParams (; kwargs... )
40
+ end
41
+ env = MountainCarEnv (params,
42
+ continuous ? ContinuousSpace (- T (1. ), T (1. )) : DiscreteSpace (3 ),
43
+ MultiContinuousSpace ([params. min_pos, - params. max_speed],
44
+ [params. max_pos, params. max_speed]),
45
+ zeros (T, 2 ),
46
+ 1 ,
47
+ false ,
48
+ 0 ,
49
+ rng)
35
50
reset! (env)
36
51
env
37
52
end
53
+ ContinuousMountainCarEnv (; kwargs... ) = MountainCarEnv (; continuous = true , kwargs... )
38
54
39
55
action_space (env:: MountainCarEnv ) = env. action_space
40
56
observation_space (env:: MountainCarEnv ) = env. observation_space
41
57
observe (env:: MountainCarEnv ) = (observation= env. state, isdone= env. done)
42
58
43
- function reset! (env:: MountainCarEnv{T} ) where T
59
+ function reset! (env:: MountainCarEnv{A, T} ) where {A, T}
44
60
env. state[1 ] = .2 * rand (env. rng, T) - .6
45
61
env. state[2 ] = 0.
46
62
env. done = false
47
63
env. t = 0
48
64
nothing
49
65
end
50
66
51
- function interact! (env:: MountainCarEnv , a)
67
+ interact! (env:: MountainCarEnv{<:ContinuousSpace} , a) = _interact! (env, min (max (a, - 1 , 1 )))
68
+ interact! (env:: MountainCarEnv{<:DiscreteSpace} , a) = _interact! (env, a - 2 )
69
+ function _interact! (env:: MountainCarEnv , force)
52
70
env. t += 1
53
71
x, v = env. state
54
- v += (a - 2 ) * 0.001 + cos (3 * x)* (- 0.0025 )
72
+ v += force * env . params . power + cos (3 * x)* (- env . params . gravity )
55
73
v = clamp (v, - env. params. max_speed, env. params. max_speed)
56
74
x += v
57
75
x = clamp (x, env. params. min_pos, env. params. max_pos)
58
76
if x == env. params. min_pos && v < 0 v = 0 end
59
- env. done = x >= env. params. goal_pos || env. t >= env. params. max_steps
77
+ env. done = x >= env. params. goal_pos && v >= env. params. goal_velocity ||
78
+ env. t >= env. params. max_steps
60
79
env. state[1 ] = x
61
80
env. state[2 ] = v
62
81
(observation= env. state, reward= - 1. , isdone= env. done)
@@ -87,6 +106,6 @@ function render(env::MountainCarEnv)
87
106
xs, ys = rotate (xs, ys, θ)
88
107
xs, ys = translate (xs, ys, [x, height (x)])
89
108
fillarea (xs, ys)
90
- plotendofepisode (env. params. max_pos + .1 , 0 , d)
109
+ plotendofepisode (env. params. max_pos + .1 , 0 , d)
91
110
updatews ()
92
- end
111
+ end
0 commit comments