Skip to content
This repository was archived by the owner on Aug 11, 2023. It is now read-only.

Commit 80432e3

Browse files
Format .jl files
1 parent cdc9da5 commit 80432e3

23 files changed

+233
-219
lines changed

src/base.jl

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ function env_traits()
66
[eval(x) for x in RLBase.ENV_API if endswith(String(x), "Style")]
77
end
88

9-
Base.show(io::IO, t::MIME"text/plain", env::AbstractEnv) = show(io, MIME"text/markdown"(), env)
9+
Base.show(io::IO, t::MIME"text/plain", env::AbstractEnv) =
10+
show(io, MIME"text/markdown"(), env)
1011

1112
function Base.show(io::IO, t::MIME"text/markdown", env::AbstractEnv)
1213
show(io, t, Markdown.parse("""
@@ -62,7 +63,11 @@ function test_interfaces(env)
6263

6364
rng = Random.MersenneTwister(666)
6465

65-
@info "testing $(nameof(env)), you need to manually check these traits to make sure they are implemented correctly!" NumAgentStyle(env) DynamicStyle(env) ActionStyle(env) InformationStyle(env) StateStyle(env) RewardStyle(env) UtilityStyle(env) ChanceStyle(env)
66+
@info "testing $(nameof(env)), you need to manually check these traits to make sure they are implemented correctly!" NumAgentStyle(
67+
env,
68+
) DynamicStyle(env) ActionStyle(env) InformationStyle(env) StateStyle(env) RewardStyle(
69+
env,
70+
) UtilityStyle(env) ChanceStyle(env)
6671

6772
reset!(env)
6873

@@ -99,7 +104,7 @@ function test_interfaces(env)
99104

100105
@testset "SingleAgent" begin
101106
if NumAgentStyle(env) === SINGLE_AGENT
102-
total_reward = 0.
107+
total_reward = 0.0
103108
while !is_terminated(env)
104109
if StateStyle(env) isa Tuple
105110
for ss in StateStyle(env)
@@ -111,7 +116,8 @@ function test_interfaces(env)
111116
if ActionStyle(env) === MINIMAL_ACTION_SET
112117
action_space(env) == legal_action_space
113118
elseif ActionStyle(env) === FULL_ACTION_SET
114-
@test legal_action_space(env) == action_space(env)[legal_action_space_mask(env)]
119+
@test legal_action_space(env) ==
120+
action_space(env)[legal_action_space_mask(env)]
115121
else
116122
@error "TODO:"
117123
end
@@ -133,7 +139,7 @@ function test_interfaces(env)
133139
@testset "MultiAgent" begin
134140
if NumAgentStyle(env) isa MultiAgent
135141
reset!(env)
136-
rewards = [0. for p in players(env)]
142+
rewards = [0.0 for p in players(env)]
137143
while !is_terminated(env)
138144
if InformationStyle(env) === PERFECT_INFORMATION
139145
for p in players(env)
@@ -142,7 +148,7 @@ function test_interfaces(env)
142148
end
143149
a = rand(rng, legal_action_space(env))
144150
env(a)
145-
for (i,p) in enumerate(players(env))
151+
for (i, p) in enumerate(players(env))
146152
@test state(env, p) state_space(env, p)
147153
rewards[i] += reward(env, p)
148154
end
@@ -158,7 +164,7 @@ function test_interfaces(env)
158164
# @test isempty(legal_action_space(env, p))
159165
end
160166
if RewardStyle(env) === TERMINAL_REWARD
161-
for (p,r) in zip(players(env), rewards)
167+
for (p, r) in zip(players(env), rewards)
162168
@test r == reward(env, p)
163169
end
164170
end
@@ -207,10 +213,10 @@ function gen_traits_table(io, envs)
207213
print(io, "<th> $(i) </th>")
208214
end
209215

210-
for k in sort(collect(keys(trait_dict)), by=nameof)
216+
for k in sort(collect(keys(trait_dict)), by = nameof)
211217
vs = trait_dict[k]
212218
print(io, "<tr> <th rowspan=\"$(length(vs))\"> $(nameof(k)) </th>")
213-
for (i,v) in enumerate(vs)
219+
for (i, v) in enumerate(vs)
214220
if i != 1
215221
print(io, "<tr> ")
216222
end
@@ -239,7 +245,10 @@ function gen_traits_table(io, envs)
239245

240246
print(io, "<ol>")
241247
for env in envs
242-
println(io, "<li> <a href=\"https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/$(nameof(env)).jl\"> $(nameof(env)) </a></li>")
248+
println(
249+
io,
250+
"<li> <a href=\"https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/$(nameof(env)).jl\"> $(nameof(env)) </a></li>",
251+
)
243252
end
244253
print(io, "</ol>")
245254
end
@@ -255,4 +264,4 @@ watch https://github.com/JuliaMath/IntervalSets.jl/issues/66
255264
"""
256265
function Base.in(x::AbstractArray, s::Array{<:Interval})
257266
size(x) == size(s) && all(x .∈ s)
258-
end
267+
end

src/examples/KuhnPokerEnv.jl

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
export KuhnPokerEnv
22

33
const KUHN_POKER_CARDS = (:J, :Q, :K)
4-
const KUHN_POKER_CARD_COMBINATIONS = ((:J, :Q), (:J, :K), (:Q, :J), (:Q, :K), (:K, :J), (:K, :Q))
4+
const KUHN_POKER_CARD_COMBINATIONS =
5+
((:J, :Q), (:J, :K), (:Q, :J), (:Q, :K), (:K, :J), (:K, :Q))
56
const KUHN_POKER_ACTIONS = (:pass, :bet)
67
const KUHN_POKER_STATES = (
78
(),
89
map(tuple, KUHN_POKER_CARDS)...,
910
KUHN_POKER_CARD_COMBINATIONS...,
1011
(
11-
(cards..., actions...)
12-
for cards in ((), map(tuple, KUHN_POKER_CARDS)...)
13-
for actions in (
12+
(cards..., actions...) for cards in ((), map(tuple, KUHN_POKER_CARDS)...) for
13+
actions in (
1414
(),
1515
(:bet,),
1616
(:bet, :bet),
@@ -21,7 +21,7 @@ const KUHN_POKER_STATES = (
2121
(:pass, :bet, :pass),
2222
(:pass, :bet, :bet),
2323
)
24-
)...
24+
)...,
2525
)
2626

2727
"""
@@ -34,28 +34,24 @@ const KUHN_POKER_REWARD_TABLE = Dict(
3434
(:Q, :K, :bet, :bet) => -2,
3535
(:K, :J, :bet, :bet) => 2,
3636
(:K, :Q, :bet, :bet) => 2,
37-
3837
(:J, :Q, :bet, :pass) => 1,
3938
(:J, :K, :bet, :pass) => 1,
4039
(:Q, :J, :bet, :pass) => 1,
4140
(:Q, :K, :bet, :pass) => 1,
4241
(:K, :J, :bet, :pass) => 1,
4342
(:K, :Q, :bet, :pass) => 1,
44-
4543
(:J, :Q, :pass, :pass) => -1,
4644
(:J, :K, :pass, :pass) => -1,
4745
(:Q, :J, :pass, :pass) => 1,
4846
(:Q, :K, :pass, :pass) => -1,
4947
(:K, :J, :pass, :pass) => 1,
5048
(:K, :Q, :pass, :pass) => 1,
51-
5249
(:J, :Q, :pass, :bet, :pass) => -1,
5350
(:J, :K, :pass, :bet, :pass) => -1,
5451
(:Q, :J, :pass, :bet, :pass) => -1,
5552
(:Q, :K, :pass, :bet, :pass) => -1,
5653
(:K, :J, :pass, :bet, :pass) => -1,
5754
(:K, :Q, :pass, :bet, :pass) => -1,
58-
5955
(:J, :Q, :pass, :bet, :bet) => -2,
6056
(:J, :K, :pass, :bet, :bet) => -2,
6157
(:Q, :J, :pass, :bet, :bet) => 2,
@@ -88,7 +84,9 @@ function reset!(env::KuhnPokerEnv)
8884
empty!(env.actions)
8985
end
9086

91-
is_terminated(env::KuhnPokerEnv) = length(env.actions) == 2 && (env.actions[1] == :bet || env.actions[2] == :pass) || length(env.actions) == 3
87+
is_terminated(env::KuhnPokerEnv) =
88+
length(env.actions) == 2 && (env.actions[1] == :bet || env.actions[2] == :pass) ||
89+
length(env.actions) == 3
9290
players(env::KuhnPokerEnv) = 1:2
9391

9492
function state(env::KuhnPokerEnv, ::InformationSet{Tuple{Vararg{Symbol}}}, p::Int)
@@ -99,13 +97,16 @@ function state(env::KuhnPokerEnv, ::InformationSet{Tuple{Vararg{Symbol}}}, p::In
9997
end
10098
end
10199

102-
state(env::KuhnPokerEnv, ::InformationSet{Tuple{Vararg{Symbol}}}, ::ChancePlayer) = Tuple(env.cards)
103-
state_space(env::KuhnPokerEnv, ::InformationSet{Tuple{Vararg{Symbol}}}, p) = KUHN_POKER_STATES
100+
state(env::KuhnPokerEnv, ::InformationSet{Tuple{Vararg{Symbol}}}, ::ChancePlayer) =
101+
Tuple(env.cards)
102+
state_space(env::KuhnPokerEnv, ::InformationSet{Tuple{Vararg{Symbol}}}, p) =
103+
KUHN_POKER_STATES
104104

105105
action_space(env::KuhnPokerEnv, ::Int) = Base.OneTo(length(KUHN_POKER_ACTIONS))
106106
action_space(env::KuhnPokerEnv, ::ChancePlayer) = Base.OneTo(length(KUHN_POKER_CARDS))
107107

108-
legal_action_space(env::KuhnPokerEnv, p::ChancePlayer) = [x for x in action_space(env,p) if KUHN_POKER_CARDS[x] env.cards]
108+
legal_action_space(env::KuhnPokerEnv, p::ChancePlayer) =
109+
[x for x in action_space(env, p) if KUHN_POKER_CARDS[x] env.cards]
109110

110111
function legal_action_space_mask(env::KuhnPokerEnv, p::ChancePlayer)
111112
m = fill(true, 3)
@@ -115,9 +116,9 @@ end
115116

116117
function prob(env::KuhnPokerEnv, ::ChancePlayer)
117118
if length(env.cards) == 0
118-
fill(1/3, 3)
119+
fill(1 / 3, 3)
119120
elseif length(env.cards) == 1
120-
p = fill(1/2, 3)
121+
p = fill(1 / 2, 3)
121122
p[env.cards[1]] = 0
122123
else
123124
@error "it's not chance player's turn!"
@@ -138,16 +139,17 @@ function reward(env::KuhnPokerEnv, p)
138139
end
139140
end
140141

141-
current_player(env::KuhnPokerEnv) = if length(env.cards) < 2
142-
CHANCE_PLAYER
143-
elseif length(env.actions) == 0
144-
1
145-
elseif length(env.actions) == 1
146-
2
147-
elseif length(env.actions) == 2
148-
1
149-
else
150-
end
142+
current_player(env::KuhnPokerEnv) =
143+
if length(env.cards) < 2
144+
CHANCE_PLAYER
145+
elseif length(env.actions) == 0
146+
1
147+
elseif length(env.actions) == 1
148+
2
149+
elseif length(env.actions) == 2
150+
1
151+
else
152+
end
151153

152154
NumAgentStyle(::KuhnPokerEnv) = MultiAgent(2)
153155
DynamicStyle(::KuhnPokerEnv) = SEQUENTIAL
@@ -156,4 +158,4 @@ InformationStyle(::KuhnPokerEnv) = IMPERFECT_INFORMATION
156158
StateStyle(::KuhnPokerEnv) = InformationSet{Tuple{Vararg{Symbol}}}()
157159
RewardStyle(::KuhnPokerEnv) = TERMINAL_REWARD
158160
UtilityStyle(::KuhnPokerEnv) = ZERO_SUM
159-
ChanceStyle(::KuhnPokerEnv) = EXPLICIT_STOCHASTIC
161+
ChanceStyle(::KuhnPokerEnv) = EXPLICIT_STOCHASTIC

src/examples/MontyHallEnv.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
export MontyHallEnv
22

3-
const REWARD_OF_GOAT = 10.
4-
const REWARD_OF_CAR = 1_000.
3+
const REWARD_OF_GOAT = 10.0
4+
const REWARD_OF_CAR = 1_000.0
55

66
mutable struct MontyHallEnv <: AbstractEnv
77
doors::Vector{Symbol}
88
rng::AbstractRNG
99
guest_action::Union{Nothing,Int}
10-
host_action::Union{Nothing, Int}
10+
host_action::Union{Nothing,Int}
1111
reward::Union{Nothing,Float64}
1212
end
1313

@@ -24,7 +24,7 @@ Quoted from [wiki](https://en.wikipedia.org/wiki/Monty_Hall_problem):
2424
2525
Here we'll introduce the first environment which is of [`FULL_ACTION_SET`](@ref).
2626
"""
27-
function MontyHallEnv(;rng=Random.GLOBAL_RNG)
27+
function MontyHallEnv(; rng = Random.GLOBAL_RNG)
2828
doors = fill(:goat, 3)
2929
doors[rand(rng, 1:3)] = :car
3030
MontyHallEnv(doors, rng, nothing, nothing, nothing)
@@ -94,7 +94,7 @@ function (env::MontyHallEnv)(action)
9494
end
9595
end
9696

97-
reward(env::MontyHallEnv) = isnothing(env.reward) ? 0. : env.reward
97+
reward(env::MontyHallEnv) = isnothing(env.reward) ? 0.0 : env.reward
9898

9999
is_terminated(env::MontyHallEnv) = !isnothing(env.reward)
100100

@@ -113,4 +113,4 @@ InformationStyle(::MontyHallEnv) = IMPERFECT_INFORMATION # the distribution of
113113
StateStyle(::MontyHallEnv) = Observation{Int}()
114114
RewardStyle(::MontyHallEnv) = TERMINAL_REWARD
115115
UtilityStyle(::MontyHallEnv) = GENERAL_SUM
116-
ChanceStyle(::MontyHallEnv) = STOCHASTIC # the same action lead to different reward each time.
116+
ChanceStyle(::MontyHallEnv) = STOCHASTIC # the same action lead to different reward each time.

src/examples/MultiArmBanditsEnv.jl

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,9 @@ This is a **one-shot** game. The environment terminates immediately after taking
2020
in an action. Here we use it to demonstrate how to write a customized
2121
environment with only minimal interfaces defined.
2222
"""
23-
function MultiArmBanditsEnv(;true_reward=0., k = 10, rng=Random.GLOBAL_RNG)
23+
function MultiArmBanditsEnv(; true_reward = 0.0, k = 10, rng = Random.GLOBAL_RNG)
2424
true_values = true_reward .+ randn(rng, k)
25-
MultiArmBanditsEnv(
26-
true_reward,
27-
true_values,
28-
rng,
29-
0.,
30-
false
31-
)
25+
MultiArmBanditsEnv(true_reward, true_values, rng, 0.0, false)
3226
end
3327

3428
"""
@@ -103,4 +97,4 @@ InformationStyle(::MultiArmBanditsEnv) = IMPERFECT_INFORMATION # the distributi
10397
StateStyle(::MultiArmBanditsEnv) = Observation{Int}()
10498
RewardStyle(::MultiArmBanditsEnv) = TERMINAL_REWARD
10599
UtilityStyle(::MultiArmBanditsEnv) = GENERAL_SUM
106-
ChanceStyle(::MultiArmBanditsEnv) = STOCHASTIC # the same action lead to different reward each time.
100+
ChanceStyle(::MultiArmBanditsEnv) = STOCHASTIC # the same action lead to different reward each time.

src/examples/PigEnv.jl

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ See [wiki](https://en.wikipedia.org/wiki/Pig_(dice_game)) for explanation of thi
1717
1818
Here we use it to demonstrate how to write a game with more than 2 players.
1919
"""
20-
PigEnv(;n_players=2) = PigEnv{n_players}(zeros(Int, n_players), 1, false, 0)
20+
PigEnv(; n_players = 2) = PigEnv{n_players}(zeros(Int, n_players), 1, false, 0)
2121

2222
function reset!(env::PigEnv)
2323
fill!(env.scores, 0)
@@ -26,15 +26,17 @@ function reset!(env::PigEnv)
2626
env.tmp_score = 0
2727
end
2828

29-
current_player(env::PigEnv) = env.is_chance_player_active ? CHANCE_PLAYER : env.current_player
29+
current_player(env::PigEnv) =
30+
env.is_chance_player_active ? CHANCE_PLAYER : env.current_player
3031
players(env::PigEnv) = 1:length(env.scores)
3132
action_space(env::PigEnv, ::Int) = (:roll, :hold)
3233
action_space(env::PigEnv, ::ChancePlayer) = Base.OneTo(PIG_N_SIDES)
3334

34-
prob(env::PigEnv, ::ChancePlayer) = fill(1/6, 6) # TODO: uniform distribution, more memory efficient
35+
prob(env::PigEnv, ::ChancePlayer) = fill(1 / 6, 6) # TODO: uniform distribution, more memory efficient
3536

3637
state(env::PigEnv, ::Observation{Vector{Int}}, p) = env.scores
37-
state_space(env::PigEnv, ::Observation, p) = [0..(PIG_TARGET_SCORE+PIG_N_SIDES-1) for _ in env.scores]
38+
state_space(env::PigEnv, ::Observation, p) =
39+
[0..(PIG_TARGET_SCORE + PIG_N_SIDES - 1) for _ in env.scores]
3840

3941
is_terminated(env::PigEnv) = any(s >= PIG_TARGET_SCORE for s in env.scores)
4042

@@ -75,11 +77,11 @@ function (env::PigEnv)(action, ::ChancePlayer)
7577
end
7678
end
7779

78-
NumAgentStyle(::PigEnv{N}) where N = MultiAgent(N)
80+
NumAgentStyle(::PigEnv{N}) where {N} = MultiAgent(N)
7981
DynamicStyle(::PigEnv) = SEQUENTIAL
8082
ActionStyle(::PigEnv) = MINIMAL_ACTION_SET
8183
InformationStyle(::PigEnv) = PERFECT_INFORMATION
8284
StateStyle(::PigEnv) = Observation{Vector{Int}}()
8385
RewardStyle(::PigEnv) = TERMINAL_REWARD
8486
UtilityStyle(::PigEnv) = CONSTANT_SUM
85-
ChanceStyle(::PigEnv) = EXPLICIT_STOCHASTIC
87+
ChanceStyle(::PigEnv) = EXPLICIT_STOCHASTIC

src/examples/RandomWalk1D.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Compared to the [`MultiArmBanditsEnv`](@ref):
1616
Base.@kwdef mutable struct RandomWalk1D <: AbstractEnv
1717
rewards::Pair{Float64,Float64} = -1.0 => 1.0
1818
N::Int = 7
19-
start_pos::Int = (N+1) ÷ 2
19+
start_pos::Int = (N + 1) ÷ 2
2020
pos::Int = start_pos
2121
end
2222

@@ -28,9 +28,9 @@ action_space(::RandomWalk1D) = Base.OneTo(length(ACTIONS_OF_RANDOMWALK1D))
2828

2929
function (env::RandomWalk1D)(action::Symbol)
3030
if action == :left
31-
env.pos = max(env.pos-1, 1)
31+
env.pos = max(env.pos - 1, 1)
3232
elseif action == :right
33-
env.pos = min(env.pos+1, env.N)
33+
env.pos = min(env.pos + 1, env.N)
3434
else
3535
@error "invalid action: $action"
3636
end
@@ -47,7 +47,7 @@ function reward(env::RandomWalk1D)
4747
elseif env.pos == env.N
4848
last(env.rewards)
4949
else
50-
0.
50+
0.0
5151
end
5252
end
5353

@@ -58,4 +58,4 @@ InformationStyle(::RandomWalk1D) = PERFECT_INFORMATION
5858
StateStyle(::RandomWalk1D) = Observation{Int}()
5959
RewardStyle(::RandomWalk1D) = TERMINAL_REWARD
6060
UtilityStyle(::RandomWalk1D) = GENERAL_SUM
61-
ChanceStyle(::RandomWalk1D) = DETERMINISTIC
61+
ChanceStyle(::RandomWalk1D) = DETERMINISTIC

0 commit comments

Comments
 (0)