Skip to content
This repository was archived by the owner on Aug 11, 2023. It is now read-only.

Automatic JuliaFormatter.jl run #108

Merged
merged 1 commit into from
Dec 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions src/base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ function env_traits()
[eval(x) for x in RLBase.ENV_API if endswith(String(x), "Style")]
end

Base.show(io::IO, t::MIME"text/plain", env::AbstractEnv) = show(io, MIME"text/markdown"(), env)
Base.show(io::IO, t::MIME"text/plain", env::AbstractEnv) =
show(io, MIME"text/markdown"(), env)

function Base.show(io::IO, t::MIME"text/markdown", env::AbstractEnv)
show(io, t, Markdown.parse("""
Expand Down Expand Up @@ -62,7 +63,11 @@ function test_interfaces(env)

rng = Random.MersenneTwister(666)

@info "testing $(nameof(env)), you need to manually check these traits to make sure they are implemented correctly!" NumAgentStyle(env) DynamicStyle(env) ActionStyle(env) InformationStyle(env) StateStyle(env) RewardStyle(env) UtilityStyle(env) ChanceStyle(env)
@info "testing $(nameof(env)), you need to manually check these traits to make sure they are implemented correctly!" NumAgentStyle(
env,
) DynamicStyle(env) ActionStyle(env) InformationStyle(env) StateStyle(env) RewardStyle(
env,
) UtilityStyle(env) ChanceStyle(env)

reset!(env)

Expand Down Expand Up @@ -99,7 +104,7 @@ function test_interfaces(env)

@testset "SingleAgent" begin
if NumAgentStyle(env) === SINGLE_AGENT
total_reward = 0.
total_reward = 0.0
while !is_terminated(env)
if StateStyle(env) isa Tuple
for ss in StateStyle(env)
Expand All @@ -111,7 +116,8 @@ function test_interfaces(env)
if ActionStyle(env) === MINIMAL_ACTION_SET
action_space(env) == legal_action_space
elseif ActionStyle(env) === FULL_ACTION_SET
@test legal_action_space(env) == action_space(env)[legal_action_space_mask(env)]
@test legal_action_space(env) ==
action_space(env)[legal_action_space_mask(env)]
else
@error "TODO:"
end
Expand All @@ -133,7 +139,7 @@ function test_interfaces(env)
@testset "MultiAgent" begin
if NumAgentStyle(env) isa MultiAgent
reset!(env)
rewards = [0. for p in players(env)]
rewards = [0.0 for p in players(env)]
while !is_terminated(env)
if InformationStyle(env) === PERFECT_INFORMATION
for p in players(env)
Expand All @@ -142,7 +148,7 @@ function test_interfaces(env)
end
a = rand(rng, legal_action_space(env))
env(a)
for (i,p) in enumerate(players(env))
for (i, p) in enumerate(players(env))
@test state(env, p) ∈ state_space(env, p)
rewards[i] += reward(env, p)
end
Expand All @@ -158,7 +164,7 @@ function test_interfaces(env)
# @test isempty(legal_action_space(env, p))
end
if RewardStyle(env) === TERMINAL_REWARD
for (p,r) in zip(players(env), rewards)
for (p, r) in zip(players(env), rewards)
@test r == reward(env, p)
end
end
Expand Down Expand Up @@ -207,10 +213,10 @@ function gen_traits_table(io, envs)
print(io, "<th> $(i) </th>")
end

for k in sort(collect(keys(trait_dict)), by=nameof)
for k in sort(collect(keys(trait_dict)), by = nameof)
vs = trait_dict[k]
print(io, "<tr> <th rowspan=\"$(length(vs))\"> $(nameof(k)) </th>")
for (i,v) in enumerate(vs)
for (i, v) in enumerate(vs)
if i != 1
print(io, "<tr> ")
end
Expand Down Expand Up @@ -239,7 +245,10 @@ function gen_traits_table(io, envs)

print(io, "<ol>")
for env in envs
println(io, "<li> <a href=\"https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/$(nameof(env)).jl\"> $(nameof(env)) </a></li>")
println(
io,
"<li> <a href=\"https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/$(nameof(env)).jl\"> $(nameof(env)) </a></li>",
)
end
print(io, "</ol>")
end
Expand All @@ -255,4 +264,4 @@ watch https://github.com/JuliaMath/IntervalSets.jl/issues/66
"""
function Base.in(x::AbstractArray, s::Array{<:Interval})
size(x) == size(s) && all(x .∈ s)
end
end
54 changes: 28 additions & 26 deletions src/examples/KuhnPokerEnv.jl
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
export KuhnPokerEnv

const KUHN_POKER_CARDS = (:J, :Q, :K)
const KUHN_POKER_CARD_COMBINATIONS = ((:J, :Q), (:J, :K), (:Q, :J), (:Q, :K), (:K, :J), (:K, :Q))
const KUHN_POKER_CARD_COMBINATIONS =
((:J, :Q), (:J, :K), (:Q, :J), (:Q, :K), (:K, :J), (:K, :Q))
const KUHN_POKER_ACTIONS = (:pass, :bet)
const KUHN_POKER_STATES = (
(),
map(tuple, KUHN_POKER_CARDS)...,
KUHN_POKER_CARD_COMBINATIONS...,
(
(cards..., actions...)
for cards in ((), map(tuple, KUHN_POKER_CARDS)...)
for actions in (
(cards..., actions...) for cards in ((), map(tuple, KUHN_POKER_CARDS)...) for
actions in (
(),
(:bet,),
(:bet, :bet),
Expand All @@ -21,7 +21,7 @@ const KUHN_POKER_STATES = (
(:pass, :bet, :pass),
(:pass, :bet, :bet),
)
)...
)...,
)

"""
Expand All @@ -34,28 +34,24 @@ const KUHN_POKER_REWARD_TABLE = Dict(
(:Q, :K, :bet, :bet) => -2,
(:K, :J, :bet, :bet) => 2,
(:K, :Q, :bet, :bet) => 2,

(:J, :Q, :bet, :pass) => 1,
(:J, :K, :bet, :pass) => 1,
(:Q, :J, :bet, :pass) => 1,
(:Q, :K, :bet, :pass) => 1,
(:K, :J, :bet, :pass) => 1,
(:K, :Q, :bet, :pass) => 1,

(:J, :Q, :pass, :pass) => -1,
(:J, :K, :pass, :pass) => -1,
(:Q, :J, :pass, :pass) => 1,
(:Q, :K, :pass, :pass) => -1,
(:K, :J, :pass, :pass) => 1,
(:K, :Q, :pass, :pass) => 1,

(:J, :Q, :pass, :bet, :pass) => -1,
(:J, :K, :pass, :bet, :pass) => -1,
(:Q, :J, :pass, :bet, :pass) => -1,
(:Q, :K, :pass, :bet, :pass) => -1,
(:K, :J, :pass, :bet, :pass) => -1,
(:K, :Q, :pass, :bet, :pass) => -1,

(:J, :Q, :pass, :bet, :bet) => -2,
(:J, :K, :pass, :bet, :bet) => -2,
(:Q, :J, :pass, :bet, :bet) => 2,
Expand Down Expand Up @@ -88,7 +84,9 @@ function reset!(env::KuhnPokerEnv)
empty!(env.actions)
end

is_terminated(env::KuhnPokerEnv) = length(env.actions) == 2 && (env.actions[1] == :bet || env.actions[2] == :pass) || length(env.actions) == 3
is_terminated(env::KuhnPokerEnv) =
length(env.actions) == 2 && (env.actions[1] == :bet || env.actions[2] == :pass) ||
length(env.actions) == 3
players(env::KuhnPokerEnv) = 1:2

function state(env::KuhnPokerEnv, ::InformationSet{Tuple{Vararg{Symbol}}}, p::Int)
Expand All @@ -99,13 +97,16 @@ function state(env::KuhnPokerEnv, ::InformationSet{Tuple{Vararg{Symbol}}}, p::In
end
end

state(env::KuhnPokerEnv, ::InformationSet{Tuple{Vararg{Symbol}}}, ::ChancePlayer) = Tuple(env.cards)
state_space(env::KuhnPokerEnv, ::InformationSet{Tuple{Vararg{Symbol}}}, p) = KUHN_POKER_STATES
state(env::KuhnPokerEnv, ::InformationSet{Tuple{Vararg{Symbol}}}, ::ChancePlayer) =
Tuple(env.cards)
state_space(env::KuhnPokerEnv, ::InformationSet{Tuple{Vararg{Symbol}}}, p) =
KUHN_POKER_STATES

action_space(env::KuhnPokerEnv, ::Int) = Base.OneTo(length(KUHN_POKER_ACTIONS))
action_space(env::KuhnPokerEnv, ::ChancePlayer) = Base.OneTo(length(KUHN_POKER_CARDS))

legal_action_space(env::KuhnPokerEnv, p::ChancePlayer) = [x for x in action_space(env,p) if KUHN_POKER_CARDS[x] ∉ env.cards]
legal_action_space(env::KuhnPokerEnv, p::ChancePlayer) =
[x for x in action_space(env, p) if KUHN_POKER_CARDS[x] ∉ env.cards]

function legal_action_space_mask(env::KuhnPokerEnv, p::ChancePlayer)
m = fill(true, 3)
Expand All @@ -115,9 +116,9 @@ end

function prob(env::KuhnPokerEnv, ::ChancePlayer)
if length(env.cards) == 0
fill(1/3, 3)
fill(1 / 3, 3)
elseif length(env.cards) == 1
p = fill(1/2, 3)
p = fill(1 / 2, 3)
p[env.cards[1]] = 0
else
@error "it's not chance player's turn!"
Expand All @@ -138,16 +139,17 @@ function reward(env::KuhnPokerEnv, p)
end
end

current_player(env::KuhnPokerEnv) = if length(env.cards) < 2
CHANCE_PLAYER
elseif length(env.actions) == 0
1
elseif length(env.actions) == 1
2
elseif length(env.actions) == 2
1
else
end
current_player(env::KuhnPokerEnv) =
if length(env.cards) < 2
CHANCE_PLAYER
elseif length(env.actions) == 0
1
elseif length(env.actions) == 1
2
elseif length(env.actions) == 2
1
else
end

NumAgentStyle(::KuhnPokerEnv) = MultiAgent(2)
DynamicStyle(::KuhnPokerEnv) = SEQUENTIAL
Expand All @@ -156,4 +158,4 @@ InformationStyle(::KuhnPokerEnv) = IMPERFECT_INFORMATION
StateStyle(::KuhnPokerEnv) = InformationSet{Tuple{Vararg{Symbol}}}()
RewardStyle(::KuhnPokerEnv) = TERMINAL_REWARD
UtilityStyle(::KuhnPokerEnv) = ZERO_SUM
ChanceStyle(::KuhnPokerEnv) = EXPLICIT_STOCHASTIC
ChanceStyle(::KuhnPokerEnv) = EXPLICIT_STOCHASTIC
12 changes: 6 additions & 6 deletions src/examples/MontyHallEnv.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
export MontyHallEnv

const REWARD_OF_GOAT = 10.
const REWARD_OF_CAR = 1_000.
const REWARD_OF_GOAT = 10.0
const REWARD_OF_CAR = 1_000.0

mutable struct MontyHallEnv <: AbstractEnv
doors::Vector{Symbol}
rng::AbstractRNG
guest_action::Union{Nothing,Int}
host_action::Union{Nothing, Int}
host_action::Union{Nothing,Int}
reward::Union{Nothing,Float64}
end

Expand All @@ -24,7 +24,7 @@ Quoted from [wiki](https://en.wikipedia.org/wiki/Monty_Hall_problem):

Here we'll introduce the first environment which is of [`FULL_ACTION_SET`](@ref).
"""
function MontyHallEnv(;rng=Random.GLOBAL_RNG)
function MontyHallEnv(; rng = Random.GLOBAL_RNG)
doors = fill(:goat, 3)
doors[rand(rng, 1:3)] = :car
MontyHallEnv(doors, rng, nothing, nothing, nothing)
Expand Down Expand Up @@ -94,7 +94,7 @@ function (env::MontyHallEnv)(action)
end
end

reward(env::MontyHallEnv) = isnothing(env.reward) ? 0. : env.reward
reward(env::MontyHallEnv) = isnothing(env.reward) ? 0.0 : env.reward

is_terminated(env::MontyHallEnv) = !isnothing(env.reward)

Expand All @@ -113,4 +113,4 @@ InformationStyle(::MontyHallEnv) = IMPERFECT_INFORMATION # the distribution of
StateStyle(::MontyHallEnv) = Observation{Int}()
RewardStyle(::MontyHallEnv) = TERMINAL_REWARD
UtilityStyle(::MontyHallEnv) = GENERAL_SUM
ChanceStyle(::MontyHallEnv) = STOCHASTIC # the same action lead to different reward each time.
ChanceStyle(::MontyHallEnv) = STOCHASTIC # the same action lead to different reward each time.
12 changes: 3 additions & 9 deletions src/examples/MultiArmBanditsEnv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,9 @@ This is a **one-shot** game. The environment terminates immediately after taking
in an action. Here we use it to demonstrate how to write a customized
environment with only minimal interfaces defined.
"""
function MultiArmBanditsEnv(;true_reward=0., k = 10, rng=Random.GLOBAL_RNG)
function MultiArmBanditsEnv(; true_reward = 0.0, k = 10, rng = Random.GLOBAL_RNG)
true_values = true_reward .+ randn(rng, k)
MultiArmBanditsEnv(
true_reward,
true_values,
rng,
0.,
false
)
MultiArmBanditsEnv(true_reward, true_values, rng, 0.0, false)
end

"""
Expand Down Expand Up @@ -103,4 +97,4 @@ InformationStyle(::MultiArmBanditsEnv) = IMPERFECT_INFORMATION # the distributi
StateStyle(::MultiArmBanditsEnv) = Observation{Int}()
RewardStyle(::MultiArmBanditsEnv) = TERMINAL_REWARD
UtilityStyle(::MultiArmBanditsEnv) = GENERAL_SUM
ChanceStyle(::MultiArmBanditsEnv) = STOCHASTIC # the same action lead to different reward each time.
ChanceStyle(::MultiArmBanditsEnv) = STOCHASTIC # the same action lead to different reward each time.
14 changes: 8 additions & 6 deletions src/examples/PigEnv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ See [wiki](https://en.wikipedia.org/wiki/Pig_(dice_game)) for explanation of thi

Here we use it to demonstrate how to write a game with more than 2 players.
"""
PigEnv(;n_players=2) = PigEnv{n_players}(zeros(Int, n_players), 1, false, 0)
PigEnv(; n_players = 2) = PigEnv{n_players}(zeros(Int, n_players), 1, false, 0)

function reset!(env::PigEnv)
fill!(env.scores, 0)
Expand All @@ -26,15 +26,17 @@ function reset!(env::PigEnv)
env.tmp_score = 0
end

current_player(env::PigEnv) = env.is_chance_player_active ? CHANCE_PLAYER : env.current_player
current_player(env::PigEnv) =
env.is_chance_player_active ? CHANCE_PLAYER : env.current_player
players(env::PigEnv) = 1:length(env.scores)
action_space(env::PigEnv, ::Int) = (:roll, :hold)
action_space(env::PigEnv, ::ChancePlayer) = Base.OneTo(PIG_N_SIDES)

prob(env::PigEnv, ::ChancePlayer) = fill(1/6, 6) # TODO: uniform distribution, more memory efficient
prob(env::PigEnv, ::ChancePlayer) = fill(1 / 6, 6) # TODO: uniform distribution, more memory efficient

state(env::PigEnv, ::Observation{Vector{Int}}, p) = env.scores
state_space(env::PigEnv, ::Observation, p) = [0..(PIG_TARGET_SCORE+PIG_N_SIDES-1) for _ in env.scores]
state_space(env::PigEnv, ::Observation, p) =
[0..(PIG_TARGET_SCORE + PIG_N_SIDES - 1) for _ in env.scores]

is_terminated(env::PigEnv) = any(s >= PIG_TARGET_SCORE for s in env.scores)

Expand Down Expand Up @@ -75,11 +77,11 @@ function (env::PigEnv)(action, ::ChancePlayer)
end
end

NumAgentStyle(::PigEnv{N}) where N = MultiAgent(N)
NumAgentStyle(::PigEnv{N}) where {N} = MultiAgent(N)
DynamicStyle(::PigEnv) = SEQUENTIAL
ActionStyle(::PigEnv) = MINIMAL_ACTION_SET
InformationStyle(::PigEnv) = PERFECT_INFORMATION
StateStyle(::PigEnv) = Observation{Vector{Int}}()
RewardStyle(::PigEnv) = TERMINAL_REWARD
UtilityStyle(::PigEnv) = CONSTANT_SUM
ChanceStyle(::PigEnv) = EXPLICIT_STOCHASTIC
ChanceStyle(::PigEnv) = EXPLICIT_STOCHASTIC
10 changes: 5 additions & 5 deletions src/examples/RandomWalk1D.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Compared to the [`MultiArmBanditsEnv`](@ref):
Base.@kwdef mutable struct RandomWalk1D <: AbstractEnv
rewards::Pair{Float64,Float64} = -1.0 => 1.0
N::Int = 7
start_pos::Int = (N+1) ÷ 2
start_pos::Int = (N + 1) ÷ 2
pos::Int = start_pos
end

Expand All @@ -28,9 +28,9 @@ action_space(::RandomWalk1D) = Base.OneTo(length(ACTIONS_OF_RANDOMWALK1D))

function (env::RandomWalk1D)(action::Symbol)
if action == :left
env.pos = max(env.pos-1, 1)
env.pos = max(env.pos - 1, 1)
elseif action == :right
env.pos = min(env.pos+1, env.N)
env.pos = min(env.pos + 1, env.N)
else
@error "invalid action: $action"
end
Expand All @@ -47,7 +47,7 @@ function reward(env::RandomWalk1D)
elseif env.pos == env.N
last(env.rewards)
else
0.
0.0
end
end

Expand All @@ -58,4 +58,4 @@ InformationStyle(::RandomWalk1D) = PERFECT_INFORMATION
StateStyle(::RandomWalk1D) = Observation{Int}()
RewardStyle(::RandomWalk1D) = TERMINAL_REWARD
UtilityStyle(::RandomWalk1D) = GENERAL_SUM
ChanceStyle(::RandomWalk1D) = DETERMINISTIC
ChanceStyle(::RandomWalk1D) = DETERMINISTIC
Loading