minor rename

findmyway · findmyway · commit b9ed73fb202e · 2020-12-16T00:07:39.000+08:00
diff --git a/src/base.jl b/src/base.jl
@@ -9,42 +9,56 @@ end
 Base.show(io::IO, t::MIME"text/plain", env::AbstractEnv) = show(io, MIME"text/markdown"(), env)
 
 function Base.show(io::IO, t::MIME"text/markdown", env::AbstractEnv)
-    show(io, t, Markdown.parse("""
+    s = """
     # $(nameof(env))
 
     ## Traits
     | Trait Type | Value |
     |:---------- | ----- |
     $(join(["|$(string(f))|$(f(env))|" for f in env_traits()], "\n"))
 
-    ## Action Space
-    `$(action_space(env))`
+    ## Is Environment Terminated?
+    $(is_terminated(env) ? "Yes" : "No")
 
-    ## State Space
-    `$(state_space(env))`
+    """
 
-    """))
+    if get(io, :is_show_state_space, true)
+        s *= """
+        ## State Space
+        `$(state_space(env))`
 
-    if NumAgentStyle(env) !== SINGLE_AGENT
-        show(io, t, Markdown.parse("""
-            ## Players
-            $(join(["- `$p`" for p in players(env)], "\n"))
+        """
+    end
 
-            ## Current Player
-            `$(current_player(env))`
-            """))
+    if get(io, :is_show_action_space, true)
+        s *= """
+        ## Action Space
+        `$(action_space(env))`
+
+        """
     end
 
-    show(io, t, Markdown.parse("""
-        ## Is Environment Terminated?
-        $(is_terminated(env) ? "Yes" : "No")
+    if NumAgentStyle(env) !== SINGLE_AGENT
+        s *= """
+        ## Players
+        $(join(["- `$p`" for p in players(env)], "\n"))
 
+        ## Current Player
+        `$(current_player(env))`
+        """
+    end
+
+    if get(io, :is_show_state, true)
+        s *= """
         ## Current State
 
         ```
         $(state(env))
         ```
-        """))
+        """
+    end
+
+    show(io, t, Markdown.parse(s))
 end
 
 #####
@@ -57,48 +71,45 @@ using Test
 Call this function after writing your customized environment to make sure that
 all the necessary interfaces are implemented correctly and consistently.
 """
-function test_interfaces(env)
-    env = copy(env)  # make sure we don't touch the original environment
-
+function test_interfaces!(env)
     rng = Random.MersenneTwister(666)
 
     @info "testing $(nameof(env)), you need to manually check these traits to make sure they are implemented correctly!" NumAgentStyle(env) DynamicStyle(env) ActionStyle(env) InformationStyle(env) StateStyle(env) RewardStyle(env) UtilityStyle(env) ChanceStyle(env)
 
-    reset!(env)
-
     @testset "copy" begin
-        old_env = env
-        env = copy(env)
+        X = copy(env)
+        Y = copy(env)
+        reset!(X)
+        reset!(Y)
 
-        if ChanceStyle(env) ∉ (DETERMINISTIC, EXPLICIT_STOCHASTIC)
+        if ChanceStyle(Y) ∉ (DETERMINISTIC, EXPLICIT_STOCHASTIC)
             s = 888
-            Random.seed!(env, s)
-            Random.seed!(old_env, s)
+            Random.seed!(Y, s)
+            Random.seed!(X, s)
         end
 
-        @test env !== old_env
+        @test Y !== X
 
-        @test state(env) == state(old_env)
-        @test action_space(env) == action_space(old_env)
-        @test reward(env) == reward(old_env)
-        @test is_terminated(env) == is_terminated(old_env)
+        @test state(Y) == state(X)
+        @test action_space(Y) == action_space(X)
+        @test reward(Y) == reward(X)
+        @test is_terminated(Y) == is_terminated(X)
 
-        while !is_terminated(env)
-            A, A′ = legal_action_space(old_env), legal_action_space(env)
+        while !is_terminated(Y)
+            A, A′ = legal_action_space(X), legal_action_space(Y)
             @test A == A′
             a = rand(rng, A)
-            env(a)
-            old_env(a)
-            @test state(env) == state(old_env)
-            @test reward(env) == reward(old_env)
-            @test is_terminated(env) == is_terminated(old_env)
+            Y(a)
+            X(a)
+            @test state(Y) == state(X)
+            @test reward(Y) == reward(X)
+            @test is_terminated(Y) == is_terminated(X)
         end
     end
 
-    reset!(env)
-
     @testset "SingleAgent" begin
         if NumAgentStyle(env) === SINGLE_AGENT
+            reset!(env)
             total_reward = 0.
             while !is_terminated(env)
                 if StateStyle(env) isa Tuple
@@ -170,6 +181,8 @@ function test_interfaces(env)
             end
         end
     end
+
+    reset!(env)
 end
 
 #####
@@ -255,4 +268,81 @@ watch https://github.com/JuliaMath/IntervalSets.jl/issues/66
 """
 function Base.in(x::AbstractArray, s::Array{<:Interval})
     size(x) == size(s) && all(x .∈ s)
-end
+end
+
+Random.rand(s::Union{Interval, Array{<:Interval}}) = rand(Random.GLOBAL_RNG, s)
+
+function Random.rand(rng::AbstractRNG, s::Interval)
+    rand(rng) * (s.right - s.left) + s.left
+end
+
+function Random.rand(rng::AbstractRNG, s::Array{<:Interval})
+    map(x -> rand(rng, x), s)
+end
+
+export WorldSpace
+
+struct WorldSpace{T} end
+
+WorldSpace() = WorldSpace{Any}()
+
+Base.in(x, ::WorldSpace{T}) where T = x isa T
+
+#####
+# ZeroTo
+#####
+
+export ZeroTo
+
+"""
+Similar to `Base.OneTo`. Useful when wrapping third-party environments.
+"""
+struct ZeroTo{T<:Integer} <: AbstractUnitRange{T}
+    stop::T
+    ZeroTo{T}(n) where {T<:Integer} = new(max(zero(T)-one(T),n))
+end
+
+ZeroTo(n::T) where {T<:Integer} = ZeroTo{T}(n)
+
+Base.show(io::IO, r::ZeroTo) = print(io, "ZeroTo(", r.stop, ")")
+Base.length(r::ZeroTo{T}) where T = T(r.stop + one(r.stop))
+Base.first(r::ZeroTo{T}) where T = zero(r.stop)
+
+function getindex(v::ZeroTo{T}, i::Integer) where T
+    Base.@_inline_meta
+    @boundscheck ((i >= 0) & (i <= v.stop)) || throw_boundserror(v, i)
+    convert(T, i)
+end
+
+#####
+# ActionProbPair
+#####
+
+export ActionProbPair
+
+"""
+Used in action space of chance player.
+"""
+struct ActionProbPair{A,P}
+    action::A
+    prob::P
+end
+
+"""
+Directly copied from [StatsBase.jl](https://github.com/JuliaStats/StatsBase.jl/blob/0ea8e798c3d19609ed33b11311de5a2bd6ee9fd0/src/sampling.jl#L499-L510) to avoid depending on the whole package.
+Here we assume `wv` sum to `1`
+"""
+function weighted_sample(rng::AbstractRNG, wv)
+    t = rand(rng)
+    cw = zero(Base.first(wv))
+    for (i, w) in enumerate(wv)
+        cw += w
+        if cw >= t
+            return i
+        end
+    end
+end
+
+Random.rand(rng::AbstractRNG, s::AbstractVector{<:ActionProbPair}) = s[weighted_sample(rng, (x.prob for x in s))]
+
+(env::AbstractEnv)(a::ActionProbPair) = env(a.action)
diff --git a/src/interface.jl b/src/interface.jl
@@ -168,13 +168,13 @@ abstract type AbstractInformationStyle <: AbstractEnvStyle end
 @api const IMPERFECT_INFORMATION = ImperfectInformation()
 
 """
-    InformationStyle(env) = PERFECT_INFORMATION
+    InformationStyle(env) = IMPERFECT_INFORMATION
 
 Distinguish environments between [`PERFECT_INFORMATION`](@ref) and
-[`IMPERFECT_INFORMATION`](@ref). [`PERFECT_INFORMATION`](@ref) is returned by default.
+[`IMPERFECT_INFORMATION`](@ref). [`IMPERFECT_INFORMATION`](@ref) is returned by default.
 """
 @env_api InformationStyle(env::T) where {T<:AbstractEnv} = InformationStyle(T)
-InformationStyle(::Type{<:AbstractEnv}) = PERFECT_INFORMATION
+InformationStyle(::Type{<:AbstractEnv}) = IMPERFECT_INFORMATION
 
 #####
 ### ChanceStyle
@@ -391,7 +391,12 @@ const SPECTOR = Spector()
 
 @api (env::AbstractEnv)(action, player = current_player(env))
 
-"Make an independent copy of `env`"
+"""
+Make an independent copy of `env`, 
+
+!!! note
+    rng (if `env` has) is also copied!
+"""
 @api copy(env::AbstractEnv) = deepcopy(env)
 @api copyto!(dest::AbstractEnv, src::AbstractEnv)
 
diff --git a/test/examples/kuhn_poker.jl b/test/examples/kuhn_poker.jl
@@ -2,6 +2,6 @@
 
 env = KuhnPokerEnv()
 
-RLBase.test_interfaces(env)
+RLBase.test_interfaces!(env)
 
 end
diff --git a/test/examples/monty_hall_problem.jl b/test/examples/monty_hall_problem.jl
@@ -3,7 +3,7 @@
 rng = StableRNG(123)
 env = MontyHallEnv(;rng=rng)
 
-RLBase.test_interfaces(env)
+RLBase.test_interfaces!(env)
 
 n_win_car = 0
 N = 50_000
diff --git a/test/examples/multi_arm_bandits.jl b/test/examples/multi_arm_bandits.jl
@@ -4,7 +4,7 @@ rng = StableRNG(123)
 env = MultiArmBanditsEnv(;rng=rng)
 rewards = []
 
-RLBase.test_interfaces(env)
+RLBase.test_interfaces!(env)
 
 N = 50_000
 for _ in 1:N
diff --git a/test/examples/pig.jl b/test/examples/pig.jl
@@ -1,4 +1,4 @@
 @testset "PigEnv" begin
     env = PigEnv()
-    RLBase.test_interfaces(env)
+    RLBase.test_interfaces!(env)
 end
diff --git a/test/examples/random_walk_1d.jl b/test/examples/random_walk_1d.jl
@@ -3,7 +3,7 @@
 end_rewards = 3 => 5
 env = RandomWalk1D(;rewards=end_rewards)
 
-RLBase.test_interfaces(env)
+RLBase.test_interfaces!(env)
 
 rng = StableRNG(123)
 N = 50_000
diff --git a/test/examples/rock_paper_scissors.jl b/test/examples/rock_paper_scissors.jl
@@ -3,7 +3,7 @@
 rng = StableRNG(123)
 env = RockPaperScissorsEnv()
 
-RLBase.test_interfaces(env)
+RLBase.test_interfaces!(env)
 
 rewards = [[],[]]
 for _ in 1:50_000
diff --git a/test/examples/tic_tac_toe.jl b/test/examples/tic_tac_toe.jl
@@ -2,7 +2,7 @@
 
 env = TicTacToeEnv()
 
-RLBase.test_interfaces(env)
+RLBase.test_interfaces!(env)
 
 @test length(state_space(env, Observation{Int}())) == 5478
 
diff --git a/test/examples/tiger_problem_env.jl b/test/examples/tiger_problem_env.jl
@@ -4,7 +4,7 @@ rng = StableRNG(123)
 obs_prob = 0.85
 env = TigerProblemEnv(;rng=rng, obs_prob=obs_prob)
 
-RLBase.test_interfaces(env)
+RLBase.test_interfaces!(env)
 
 rewards = []
 for _ in 1:50_000
diff --git a/test/examples/tiny_hanabi.jl b/test/examples/tiny_hanabi.jl
@@ -2,6 +2,6 @@
 
 env = TinyHanabiEnv()
 
-RLBase.test_interfaces(env)
+RLBase.test_interfaces!(env)
 
 end