Skip to content
This repository was archived by the owner on May 6, 2021. It is now read-only.

Add Bit Flipping Environment[WIP] #116

Merged
merged 2 commits into from
Jan 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions src/environments/examples/BitFlippingEnv.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
export BitFlippingEnv, GoalState

"""
In Bit Flipping Environment we have n bits. The actions are 1 to n where executing i-th action flips the i-th bit of the state. For every episode we sample uniformly and inital state as well as the target state.

Refer [Hindsight Experience Replay paper](https://arxiv.org/pdf/1707.01495.pdf) for the motivation behind the environment.
"""

struct GoalState{T} <: RLBase.AbstractStateStyle end
GoalState() = GoalState{Any}()

struct BitFlippingEnv <: AbstractEnv
N::Int
rng::AbstractRNG
state::BitArray{1}
goal_state::BitArray{1}
end

function BitFlippingEnv(; N = 8)
rng = Random.GLOBAL_RNG
state = bitrand(rng,N)
goal_state = bitrand(rng,N)
BitFlippingEnv(N,rng,state,goal_state)
end

function BitFlippingEnv(; N = 8, rng = Random.GLOBAL_RNG)
state = bitrand(rng,N)
goal_state = bitrand(rng,N)
BitFlippingEnv(N,rng,state,goal_state)
end

Random.seed!(env::BitFlippingEnv, s) = Random.seed!(env.rng, s)

RLBase.action_space(env::BitFlippingEnv) = Base.OneTo(env.N)

RLBase.legal_action_space(env::BitFlippingEnv) = Base.OneTo(env.N)

function (env::BitFlippingEnv)(action::Int)
if 1 <= action <= env.N
env.state[action] = !env.state[action]
nothing
else
@error "Invalid Action"
end
end

RLBase.state(env::BitFlippingEnv) = state(env::BitFlippingEnv, Observation{BitArray{1}}())
RLBase.state(env::BitFlippingEnv, ::Observation) = env.state
RLBase.state(env::BitFlippingEnv, ::GoalState) = env.goal_state
RLBase.state_space(env::BitFlippingEnv, ::Observation) = Space(fill(false..true,env.N))
RLBase.state_space(env::BitFlippingEnv, ::GoalState) = Space(fill(false..true,env.N))
RLBase.is_terminated(env::BitFlippingEnv) = env.state == env.goal_state

function RLBase.reset!(env::BitFlippingEnv)
env.state .= bitrand(env.rng,env.N)
env.goal_state .= bitrand(env.rng,env.N)
end

function RLBase.reward(env::BitFlippingEnv)
if env.state == env.goal_state
0.0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should return -1 instead of 0. here based on the description in the original paper:

For every episode we sample uniformly an initial state as well as a target state and the policy gets areward of−1as long as it is not in the target state

else
-1.0
end
end

RLBase.NumAgentStyle(::BitFlippingEnv) = SINGLE_AGENT
RLBase.DynamicStyle(::BitFlippingEnv) = SEQUENTIAL
RLBase.ActionStyle(::BitFlippingEnv) = MINIMAL_ACTION_SET
RLBase.InformationStyle(::BitFlippingEnv) = PERFECT_INFORMATION
RLBase.StateStyle(::BitFlippingEnv) = (Observation{BitArray{1}}(), GoalState{BitArray{1}}())
RLBase.RewardStyle(::BitFlippingEnv) = STEP_REWARD
RLBase.UtilityStyle(::BitFlippingEnv) = GENERAL_SUM
RLBase.ChanceStyle(::BitFlippingEnv) = STOCHASTIC
1 change: 1 addition & 0 deletions src/environments/examples/examples.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ include("AcrobotEnv.jl")
include("CartPoleEnv.jl")
include("MountainCarEnv.jl")
include("PendulumEnv.jl")
include("BitFlippingEnv.jl")
8 changes: 8 additions & 0 deletions test/environments/examples/bit_flipping_env.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
@testset "bit_flipping_env" begin
rng = StableRNG(123)
env = BitFlippingEnv(; N = 7, rng=rng)
test_state = state(env,GoalState())
RLBase.test_interfaces!(env)
RLBase.test_runnable!(env)

end