Skip to content
This repository was archived by the owner on Aug 11, 2023. It is now read-only.

Commit b1abf25

Browse files
authored
Simplify code structure (#112)
* keep only interfaces here * minor improvement * remove unused test cases * update readme
1 parent 9e60c91 commit b1abf25

28 files changed

+33
-1399
lines changed

Project.toml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,11 @@ version = "0.9.0"
66
[deps]
77
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
88
CommonRLInterface = "d842c3ba-07a1-494f-bbec-f5741b0a3e98"
9-
IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
10-
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
119
Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
1210
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
1311
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
1412

1513
[compat]
1614
AbstractTrees = "0.3"
1715
CommonRLInterface = "0.2"
18-
IntervalSets = "0.5"
19-
MacroTools = "0.5"
2016
julia = "1.3"

README.md

Lines changed: 9 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,43 +2,12 @@
22

33
[![Build Status](https://travis-ci.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl.svg?branch=master)](https://travis-ci.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl)
44

5-
ReinforcementLearningBase.jl holds the common types and utility functions to be
6-
shared by other components in ReinforcementLearning ecosystem.
7-
8-
9-
## Examples
10-
11-
<table>
12-
<th colspan="2">Traits</th><th> 1 </th><th> 2 </th><th> 3 </th><th> 4 </th><th> 5 </th><th> 6 </th><th> 7 </th><th> 8 </th><th> 9 </th><tr> <th rowspan="2"> ActionStyle </th><th> MinimalActionSet </th><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> </td> <td> ✔ </td><td> </td> <td> ✔ </td><td> ✔ </td><td> ✔ </td></tr>
13-
<tr> <th> FullActionSet </th><td> </td> <td> </td> <td> </td> <td> ✔ </td><td> </td> <td> ✔ </td><td> </td> <td> </td> <td> </td> </tr>
14-
<tr> <th rowspan="3"> ChanceStyle </th><th> Stochastic </th><td> ✔ </td><td> </td> <td> ✔ </td><td> ✔ </td><td> </td> <td> </td> <td> </td> <td> </td> <td> </td> </tr>
15-
<tr> <th> Deterministic </th><td> </td> <td> ✔ </td><td> </td> <td> </td> <td> ✔ </td><td> ✔ </td><td> </td> <td> </td> <td> </td> </tr>
16-
<tr> <th> ExplicitStochastic </th><td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> ✔ </td><td> ✔ </td><td> ✔ </td></tr>
17-
<tr> <th rowspan="2"> DefaultStateStyle </th><th> Observation </th><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> </td> <td> ✔ </td><td> </td> </tr>
18-
<tr> <th> InformationSet </th><td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> ✔ </td><td> </td> <td> ✔ </td></tr>
19-
<tr> <th rowspan="2"> DynamicStyle </th><th> Simultaneous </th><td> </td> <td> </td> <td> </td> <td> </td> <td> ✔ </td><td> </td> <td> </td> <td> </td> <td> </td> </tr>
20-
<tr> <th> Sequential </th><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> </td> <td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td></tr>
21-
<tr> <th rowspan="2"> InformationStyle </th><th> PerfectInformation </th><td> </td> <td> ✔ </td><td> </td> <td> </td> <td> </td> <td> ✔ </td><td> </td> <td> ✔ </td><td> </td> </tr>
22-
<tr> <th> ImperfectInformation </th><td> ✔ </td><td> </td> <td> ✔ </td><td> ✔ </td><td> ✔ </td><td> </td> <td> ✔ </td><td> </td> <td> ✔ </td></tr>
23-
<tr> <th rowspan="2"> NumAgentStyle </th><th> MultiAgent </th><td> </td> <td> </td> <td> </td> <td> </td> <td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td></tr>
24-
<tr> <th> SingleAgent </th><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> </td> <td> </td> <td> </td> <td> </td> <td> </td> </tr>
25-
<tr> <th rowspan="2"> RewardStyle </th><th> TerminalReward </th><td> ✔ </td><td> ✔ </td><td> </td> <td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td></tr>
26-
<tr> <th> StepReward </th><td> </td> <td> </td> <td> ✔ </td><td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> </td> </tr>
27-
<tr> <th rowspan="3"> StateStyle </th><th> Observation </th><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> </td> <td> ✔ </td><td> </td> </tr>
28-
<tr> <th> InformationSet </th><td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> ✔ </td><td> </td> <td> ✔ </td></tr>
29-
<tr> <th> InternalState </th><td> </td> <td> </td> <td> ✔ </td><td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> </td> </tr>
30-
<tr> <th rowspan="4"> UtilityStyle </th><th> GeneralSum </th><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> ✔ </td><td> </td> <td> </td> <td> </td> <td> </td> <td> </td> </tr>
31-
<tr> <th> ZeroSum </th><td> </td> <td> </td> <td> </td> <td> </td> <td> ✔ </td><td> ✔ </td><td> </td> <td> </td> <td> ✔ </td></tr>
32-
<tr> <th> ConstantSum </th><td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> ✔ </td><td> </td> </tr>
33-
<tr> <th> IdenticalUtility </th><td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> </td> <td> ✔ </td><td> </td> <td> </td> </tr>
34-
</table>
35-
<ol><li> <a href="https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/MultiArmBanditsEnv.jl"> MultiArmBanditsEnv </a></li>
36-
<li> <a href="https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/RandomWalk1D.jl"> RandomWalk1D </a></li>
37-
<li> <a href="https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/TigerProblemEnv.jl"> TigerProblemEnv </a></li>
38-
<li> <a href="https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/MontyHallEnv.jl"> MontyHallEnv </a></li>
39-
<li> <a href="https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/RockPaperScissorsEnv.jl"> RockPaperScissorsEnv </a></li>
40-
<li> <a href="https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/TicTacToeEnv.jl"> TicTacToeEnv </a></li>
41-
<li> <a href="https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/TinyHanabiEnv.jl"> TinyHanabiEnv </a></li>
42-
<li> <a href="https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/PigEnv.jl"> PigEnv </a></li>
43-
<li> <a href="https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/KuhnPokerEnv.jl"> KuhnPokerEnv </a></li>
44-
</ol>
5+
This package defines two core concepts in reinforcement learning:
6+
7+
- `AbstractEnv`.
8+
- Checkout
9+
[ReinforcementLearningEnvironments.jl](https://github.com/JuliaReinforcementLearning/ReinforcementLearningEnvironments.jl)
10+
for versatile varieties of environments.
11+
- `AbstractPolicy`.
12+
[ReinforcementLearningCore.jl](https://github.com/JuliaReinforcementLearning/ReinforcementLearningCore.jl)
13+
is a good start point for how to write customized policies.

src/ReinforcementLearningBase.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,5 @@ include("inline_export.jl")
99
include("interface.jl")
1010
include("CommonRLInterface.jl")
1111
include("base.jl")
12-
include("examples/examples.jl")
1312

1413
end # module

src/base.jl

Lines changed: 15 additions & 206 deletions
Original file line numberDiff line numberDiff line change
@@ -191,213 +191,22 @@ function test_interfaces!(env)
191191
reset!(env)
192192
end
193193

194-
#####
195-
# Generate README
196-
#####
197-
198-
gen_traits_table(envs) = gen_traits_table(stdout, envs)
199-
200-
function gen_traits_table(io, envs)
201-
trait_dict = Dict()
202-
for f in env_traits()
203-
for env in envs
204-
if !haskey(trait_dict, f)
205-
trait_dict[f] = Set()
206-
end
207-
t = f(env)
208-
if f == StateStyle
209-
if t isa Tuple
210-
for x in t
211-
push!(trait_dict[f], nameof(typeof(x)))
212-
end
213-
else
214-
push!(trait_dict[f], nameof(typeof(t)))
215-
end
216-
else
217-
push!(trait_dict[f], nameof(typeof(t)))
218-
end
219-
end
220-
end
221-
222-
println(io, "<table>")
223-
224-
print(io, "<th colspan=\"2\">Traits</th>")
225-
for i in 1:length(envs)
226-
print(io, "<th> $(i) </th>")
227-
end
228-
229-
for k in sort(collect(keys(trait_dict)), by = nameof)
230-
vs = trait_dict[k]
231-
print(io, "<tr> <th rowspan=\"$(length(vs))\"> $(nameof(k)) </th>")
232-
for (i, v) in enumerate(vs)
233-
if i != 1
234-
print(io, "<tr> ")
235-
end
236-
print(io, "<th> $(v) </th>")
237-
for env in envs
238-
if k == StateStyle && k(env) isa Tuple
239-
ss = k(env)
240-
if v in map(x -> nameof(typeof(x)), ss)
241-
print(io, "<td> ✔ </td>")
242-
else
243-
print(io, "<td> </td> ")
244-
end
245-
else
246-
if nameof(typeof(k(env))) == v
247-
print(io, "<td> ✔ </td>")
248-
else
249-
print(io, "<td> </td> ")
250-
end
251-
end
252-
end
253-
println(io, "</tr>")
254-
end
255-
end
256-
257-
println(io, "</table>")
258-
259-
print(io, "<ol>")
260-
for env in envs
261-
println(
262-
io,
263-
"<li> <a href=\"https://github.com/JuliaReinforcementLearning/ReinforcementLearningBase.jl/tree/master/src/examples/$(nameof(env)).jl\"> $(nameof(env)) </a></li>",
264-
)
265-
end
266-
print(io, "</ol>")
267-
end
268-
269-
#####
270-
# Utils
271-
#####
272-
273-
using IntervalSets
274-
275-
Random.rand(s::Union{Interval,Array{<:Interval}}) = rand(Random.GLOBAL_RNG, s)
276-
277-
function Random.rand(rng::AbstractRNG, s::Interval)
278-
rand(rng) * (s.right - s.left) + s.left
279-
end
280-
281-
#####
282-
# WorldSpace
283-
#####
284-
285-
export WorldSpace
286-
287-
"""
288-
In some cases, we may not be interested in the action/state space.
289-
One can return `WorldSpace()` to keep the interface consistent.
290-
"""
291-
struct WorldSpace{T} end
292-
293-
WorldSpace() = WorldSpace{Any}()
294-
295-
Base.in(x, ::WorldSpace{T}) where {T} = x isa T
296-
297-
#####
298-
# ZeroTo
299-
#####
300-
301-
export ZeroTo
302-
303-
"""
304-
Similar to `Base.OneTo`. Useful when wrapping third-party environments.
305-
"""
306-
struct ZeroTo{T<:Integer} <: AbstractUnitRange{T}
307-
stop::T
308-
ZeroTo{T}(n) where {T<:Integer} = new(max(zero(T) - one(T), n))
309-
end
310-
311-
ZeroTo(n::T) where {T<:Integer} = ZeroTo{T}(n)
312-
313-
Base.show(io::IO, r::ZeroTo) = print(io, "ZeroTo(", r.stop, ")")
314-
Base.length(r::ZeroTo{T}) where {T} = T(r.stop + one(r.stop))
315-
Base.first(r::ZeroTo{T}) where {T} = zero(r.stop)
316-
317-
function getindex(v::ZeroTo{T}, i::Integer) where {T}
318-
Base.@_inline_meta
319-
@boundscheck ((i >= 0) & (i <= v.stop)) || throw_boundserror(v, i)
320-
convert(T, i)
321-
end
322-
323-
#####
324-
# ActionProbPair
325-
#####
326-
327-
export ActionProbPair
328-
329-
"""
330-
Used in action space of chance player.
331-
"""
332-
struct ActionProbPair{A,P}
333-
action::A
334-
prob::P
335-
end
336-
337-
"""
338-
Directly copied from [StatsBase.jl](https://github.com/JuliaStats/StatsBase.jl/blob/0ea8e798c3d19609ed33b11311de5a2bd6ee9fd0/src/sampling.jl#L499-L510) to avoid depending on the whole package.
339-
Here we assume `wv` sum to `1`
340-
"""
341-
function weighted_sample(rng::AbstractRNG, wv)
342-
t = rand(rng)
343-
cw = zero(Base.first(wv))
344-
for (i, w) in enumerate(wv)
345-
cw += w
346-
if cw >= t
347-
return i
348-
end
349-
end
350-
end
351-
352-
Random.rand(rng::AbstractRNG, s::AbstractVector{<:ActionProbPair}) =
353-
s[weighted_sample(rng, (x.prob for x in s))]
354-
355-
(env::AbstractEnv)(a::ActionProbPair) = env(a.action)
356-
357-
#####
358-
# Space
359-
#####
360-
361-
export Space
362-
363-
"""
364-
A wrapper to treat each element as a sub-space which supports `Random.rand` and `Base.in`.
365-
"""
366-
struct Space{T}
367-
s::T
368-
end
369-
370-
Random.rand(s::Space) = rand(Random.GLOBAL_RNG, s)
371-
372-
Random.rand(rng::AbstractRNG, s::Space) =
373-
map(s.s) do x
374-
rand(rng, x)
375-
end
376-
377-
Random.rand(rng::AbstractRNG, s::Space{<:Dict}) = Dict(k => rand(rng, v) for (k, v) in s.s)
378-
379-
function Base.in(X, S::Space)
380-
if length(X) == length(S.s)
381-
for (x, s) in zip(X, S.s)
382-
if x s
383-
return false
384-
end
385-
end
386-
return true
387-
else
388-
return false
389-
end
390-
end
391-
392-
function Base.in(X::Dict, S::Space{<:Dict})
393-
if keys(X) == keys(S.s)
394-
for k in keys(X)
395-
if X[k] S.s[k]
396-
return false
194+
function test_runnable!(env, n = 1000;rng=Random.GLOBAL_RNG)
195+
@testset "random policy with $(nameof(env))" begin
196+
reset!(env)
197+
for _ in 1:n
198+
A = legal_action_space(env)
199+
a = rand(rng, A)
200+
@test a in A
201+
202+
S = state_space(env)
203+
s = state(env)
204+
@test s in S
205+
env(a)
206+
if is_terminated(env)
207+
reset!(env)
397208
end
398209
end
399-
return true
400-
else
401-
return false
210+
reset!(env)
402211
end
403212
end

0 commit comments

Comments
 (0)