@@ -6,17 +6,20 @@ In Bit Flipping Environment we have n bits. The actions are 1 to n where executi
6
6
Refer [Hindsight Experience Replay paper](https://arxiv.org/pdf/1707.01495.pdf) for the motivation behind the environment.
7
7
"""
8
8
9
- struct BitFlippingEnv <: AbstractEnv
9
+ mutable struct BitFlippingEnv <: AbstractEnv
10
10
N:: Int
11
11
rng:: AbstractRNG
12
12
state:: BitArray{1}
13
13
goal_state:: BitArray{1}
14
+ max_steps:: Int
15
+ t:: Int
14
16
end
15
17
16
- function BitFlippingEnv (; N = 8 , rng = Random. GLOBAL_RNG)
18
+ function BitFlippingEnv (; N = 8 , T = N, rng = Random. GLOBAL_RNG)
17
19
state = bitrand (rng, N)
18
20
goal_state = bitrand (rng, N)
19
- BitFlippingEnv (N, rng, state, goal_state)
21
+ max_steps = T
22
+ BitFlippingEnv (N, rng, state, goal_state, max_steps, 0 )
20
23
end
21
24
22
25
Random. seed! (env:: BitFlippingEnv , s) = Random. seed! (env. rng, s)
@@ -26,6 +29,7 @@ RLBase.action_space(env::BitFlippingEnv) = Base.OneTo(env.N)
26
29
RLBase. legal_action_space (env:: BitFlippingEnv ) = Base. OneTo (env. N)
27
30
28
31
function (env:: BitFlippingEnv )(action:: Int )
32
+ env. t += 1
29
33
if 1 <= action <= env. N
30
34
env. state[action] = ! env. state[action]
31
35
nothing
@@ -39,9 +43,10 @@ RLBase.state(env::BitFlippingEnv, ::Observation) = env.state
39
43
RLBase. state (env:: BitFlippingEnv , :: GoalState ) = env. goal_state
40
44
RLBase. state_space (env:: BitFlippingEnv , :: Observation ) = Space (fill (false .. true , env. N))
41
45
RLBase. state_space (env:: BitFlippingEnv , :: GoalState ) = Space (fill (false .. true , env. N))
42
- RLBase. is_terminated (env:: BitFlippingEnv ) = env. state == env. goal_state
46
+ RLBase. is_terminated (env:: BitFlippingEnv ) = ( env. state == env. goal_state) || (env . t >= env . max_steps)
43
47
44
48
function RLBase. reset! (env:: BitFlippingEnv )
49
+ env. t = 0
45
50
env. state .= bitrand (env. rng, env. N)
46
51
env. goal_state .= bitrand (env. rng, env. N)
47
52
end
0 commit comments