Skip to content
This repository was archived by the owner on Aug 11, 2023. It is now read-only.

Commit e73163e

Browse files
authored
Simplify code structure (#107)
* simplify code structure * sync * rm old tests * update README * upate README * add examples * fix CommonRLInterface * fix test error
1 parent f8417aa commit e73163e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1629
-1103
lines changed

Project.toml

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,19 @@
11
name = "ReinforcementLearningBase"
22
uuid = "e575027e-6cd6-5018-9292-cdc6200d2b44"
33
authors = ["Johanni Brea <[email protected]>", "Jun Tian <[email protected]>"]
4-
version = "0.8.5"
4+
version = "0.9.0"
55

66
[deps]
77
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
88
CommonRLInterface = "d842c3ba-07a1-494f-bbec-f5741b0a3e98"
9+
IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
910
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
1011
Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
1112
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
13+
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
1214

1315
[compat]
1416
AbstractTrees = "0.3"
1517
CommonRLInterface = "0.2"
1618
MacroTools = "0.5"
1719
julia = "1.3"
18-
19-
[extras]
20-
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
21-
22-
[targets]
23-
test = ["Test"]

README.md

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,43 @@
22

33
[![Build Status](https://travis-ci.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl.svg?branch=master)](https://travis-ci.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl)
44

5-
ReinforcementLearningBase.jl holds the common types and utility functions to be shared by other components in ReinforcementLearning ecosystem.
5+
ReinforcementLearningBase.jl holds the common types and utility functions to be
6+
shared by other components in ReinforcementLearning ecosystem.
7+
8+
9+
## Examples
10+
11+
<table>
12+
<th colspan="2">Traits</th><th> 1 </th><th> 2 </th><th> 3 </th><th> 4 </th><th> 5 </th><th> 6 </th><th> 7 </th><th> 8 </th><th> 9 </th><tr> <th rowspan="2"> ActionStyle </th><th> MinimalActionSet </th><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> </td> <td> ✔ </td><td> </td> <td> ✔ </td><td> ✔ </td><td> ✔ </td></tr>
13+
<tr> <th> FullActionSet </th><td> </td> <td> </td> <td> </td> <td> ✔ </td><td> </td> <td> ✔ </td><td> </td> <td> </td> <td> </td> </tr>
14+
<tr> <th rowspan="3"> ChanceStyle </th><th> Stochastic </th><td> ✔ </td><td> </td> <td> ✔ </td><td> ✔ </td><td> </td> <td> </td> <td> </td> <td> </td> <td> </td> </tr>
15+
<tr> <th> Deterministic </th><td> </td> <td> ✔ </td><td> </td> <td> </td> <td> ✔ </td><td> ✔ </td><td> </td> <td> </td> <td> </td> </tr>
16+
<tr> <th> ExplicitStochastic </th><td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> ✔ </td><td> ✔ </td><td> ✔ </td></tr>
17+
<tr> <th rowspan="2"> DefaultStateStyle </th><th> Observation </th><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> </td> <td> ✔ </td><td> </td> </tr>
18+
<tr> <th> InformationSet </th><td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> ✔ </td><td> </td> <td> ✔ </td></tr>
19+
<tr> <th rowspan="2"> DynamicStyle </th><th> Simultaneous </th><td> </td> <td> </td> <td> </td> <td> </td> <td> ✔ </td><td> </td> <td> </td> <td> </td> <td> </td> </tr>
20+
<tr> <th> Sequential </th><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> </td> <td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td></tr>
21+
<tr> <th rowspan="2"> InformationStyle </th><th> PerfectInformation </th><td> </td> <td> ✔ </td><td> </td> <td> </td> <td> </td> <td> ✔ </td><td> </td> <td> ✔ </td><td> </td> </tr>
22+
<tr> <th> ImperfectInformation </th><td> ✔ </td><td> </td> <td> ✔ </td><td> ✔ </td><td> ✔ </td><td> </td> <td> ✔ </td><td> </td> <td> ✔ </td></tr>
23+
<tr> <th rowspan="2"> NumAgentStyle </th><th> MultiAgent </th><td> </td> <td> </td> <td> </td> <td> </td> <td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td></tr>
24+
<tr> <th> SingleAgent </th><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> </td> <td> </td> <td> </td> <td> </td> <td> </td> </tr>
25+
<tr> <th rowspan="2"> RewardStyle </th><th> TerminalReward </th><td> ✔ </td><td> ✔ </td><td> </td> <td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td></tr>
26+
<tr> <th> StepReward </th><td> </td> <td> </td> <td> ✔ </td><td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> </td> </tr>
27+
<tr> <th rowspan="3"> StateStyle </th><th> Observation </th><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> </td> <td> ✔ </td><td> </td> </tr>
28+
<tr> <th> InformationSet </th><td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> ✔ </td><td> </td> <td> ✔ </td></tr>
29+
<tr> <th> InternalState </th><td> </td> <td> </td> <td> ✔ </td><td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> </td> </tr>
30+
<tr> <th rowspan="4"> UtilityStyle </th><th> GeneralSum </th><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> </td> <td> </td> <td> </td> <td> </td> <td> </td> </tr>
31+
<tr> <th> ZeroSum </th><td> </td> <td> </td> <td> </td> <td> </td> <td> ✔ </td><td> ✔ </td><td> </td> <td> </td> <td> ✔ </td></tr>
32+
<tr> <th> ConstantSum </th><td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> ✔ </td><td> </td> </tr>
33+
<tr> <th> IdenticalUtility </th><td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> ✔ </td><td> </td> <td> </td> </tr>
34+
</table>
35+
<ol><li> <a href="https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/MultiArmBanditsEnv.jl"> MultiArmBanditsEnv </a></li>
36+
<li> <a href="https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/RandomWalk1D.jl"> RandomWalk1D </a></li>
37+
<li> <a href="https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/TigerProblemEnv.jl"> TigerProblemEnv </a></li>
38+
<li> <a href="https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/MontyHallEnv.jl"> MontyHallEnv </a></li>
39+
<li> <a href="https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/RockPaperScissorsEnv.jl"> RockPaperScissorsEnv </a></li>
40+
<li> <a href="https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/TicTacToeEnv.jl"> TicTacToeEnv </a></li>
41+
<li> <a href="https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/TinyHanabiEnv.jl"> TinyHanabiEnv </a></li>
42+
<li> <a href="https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/PigEnv.jl"> PigEnv </a></li>
43+
<li> <a href="https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/KuhnPokerEnv.jl"> KuhnPokerEnv </a></li>
44+
</ol>

src/CommonRLInterface.jl

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ end
2121
const CommonRLEnvs = Union{CommonRLEnv,CommonRLMarkovEnv,CommonRLZeroSumEnv}
2222

2323
function Base.convert(::Type{CRL.AbstractEnv}, env::AbstractEnv)
24-
if get_num_players(env) == 1
24+
if NumAgentStyle(env) === SINGLE_AGENT
2525
convert(CRL.AbstractMarkovEnv, env)
26-
elseif get_num_players(env) == 2 && UtilityStyle(env) === ZERO_SUM
26+
elseif NumAgentStyle(env) isa MultiAgent{2} && UtilityStyle(env) === ZERO_SUM
2727
convert(CRL.AbstractZeroSumEnv, env)
2828
else
2929
CommonRLEnv(env)
@@ -34,25 +34,25 @@ Base.convert(::Type{CRL.AbstractMarkovEnv}, env::AbstractEnv) = CommonRLMarkovEn
3434
Base.convert(::Type{CRL.AbstractZeroSumEnv}, env::AbstractEnv) = CommonRLZeroSumEnv(env)
3535

3636
CRL.@provide CRL.reset!(env::CommonRLEnvs) = reset!(env.env)
37-
CRL.@provide CRL.actions(env::CommonRLEnvs) = get_actions(env.env)
38-
CRL.@provide CRL.observe(env::CommonRLEnvs) = get_state(env.env)
39-
CRL.state(env::CommonRLEnvs) = get_state(env.env)
37+
CRL.@provide CRL.actions(env::CommonRLEnvs) = action_space(env.env)
38+
CRL.@provide CRL.observe(env::CommonRLEnvs) = state(env.env)
39+
CRL.state(env::CommonRLEnvs) = state(env.env)
4040
CRL.provided(::typeof(CRL.state), env::CommonRLEnvs) =
4141
InformationStyle(env.env) === PERFECT_INFORMATION
42-
CRL.@provide CRL.terminated(env::CommonRLEnvs) = get_terminal(env.env)
43-
CRL.@provide CRL.player(env::CommonRLEnvs) = get_current_player(env.env)
42+
CRL.@provide CRL.terminated(env::CommonRLEnvs) = is_terminated(env.env)
43+
CRL.@provide CRL.player(env::CommonRLEnvs) = current_player(env.env)
4444
CRL.@provide CRL.clone(env::CommonRLEnvs) = CommonRLEnv(copy(env.env))
4545

4646
CRL.@provide function CRL.act!(env::CommonRLEnvs, a)
4747
env.env(a)
48-
get_reward(env.env)
48+
reward(env.env)
4949
end
5050

51-
CRL.valid_actions(x::CommonRLEnvs) = get_legal_actions(x.env)
51+
CRL.valid_actions(x::CommonRLEnvs) = legal_action_space(x.env)
5252
CRL.provided(::typeof(CRL.valid_actions), env::CommonRLEnvs) =
5353
ActionStyle(env.env) === FullActionSet()
5454

55-
CRL.valid_action_mask(x::CommonRLEnvs) = get_legal_actions_mask(x.env)
55+
CRL.valid_action_mask(x::CommonRLEnvs) = legal_action_space_mask(x.env)
5656
CRL.provided(::typeof(CRL.valid_action_mask), env::CommonRLEnvs) =
5757
ActionStyle(env.env) === FullActionSet()
5858

@@ -68,12 +68,12 @@ end
6868
Base.convert(::Type{AbstractEnv}, env::CRL.AbstractEnv) = convert(RLBaseEnv, env)
6969
Base.convert(::Type{RLBaseEnv}, env::CRL.AbstractEnv) = RLBaseEnv(env, 0.0f0) # can not determine reward ahead. Assume `Float32`.
7070

71-
get_state(env::RLBaseEnv) = CRL.observe(env.env)
72-
get_actions(env::RLBaseEnv) = CRL.actions(env.env)
73-
get_reward(env::RLBaseEnv) = env.r
74-
get_terminal(env::RLBaseEnv) = CRL.terminated(env.env)
75-
get_legal_actions(env::RLBaseEnv) = CRL.valid_actions(env.env)
76-
get_legal_actions_mask(env::RLBaseEnv) = CRL.valid_action_mask(env.env)
71+
state(env::RLBaseEnv) = CRL.observe(env.env)
72+
action_space(env::RLBaseEnv) = CRL.actions(env.env)
73+
reward(env::RLBaseEnv) = env.r
74+
is_terminated(env::RLBaseEnv) = CRL.terminated(env.env)
75+
legal_action_space(env::RLBaseEnv) = CRL.valid_actions(env.env)
76+
legal_action_space_mask(env::RLBaseEnv) = CRL.valid_action_mask(env.env)
7777
reset!(env::RLBaseEnv) = CRL.reset!(env.env)
7878

7979
(env::RLBaseEnv)(a) = env.r = CRL.act!(env.env, a)

src/ReinforcementLearningBase.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@ module ReinforcementLearningBase
33
const RLBase = ReinforcementLearningBase
44
export RLBase
55

6+
using Random
7+
68
include("inline_export.jl")
79
include("interface.jl")
8-
include("implementations/implementations.jl")
9-
include("base.jl")
1010
include("CommonRLInterface.jl")
11+
include("base.jl")
12+
include("examples/examples.jl")
1113

1214
end # module

0 commit comments

Comments
 (0)