Skip to content
This repository was archived by the owner on May 6, 2021. It is now read-only.

ignore ViZDoom error for now #11

Merged
merged 4 commits into from
Aug 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,9 @@ deps/deps.jl

Manifest.toml

_vizdoom.ini
_vizdoom.ini

.vscode/*
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
27 changes: 22 additions & 5 deletions src/environments/classic_control/mdp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ end

function interact!(env::MDPEnv, action)
s = rand(env.rng, transition(env.model, env.state, env.actions[action]))
r = reward(env.model, env.state, env.actions[action])
r = POMDPs.reward(env.model, env.state, env.actions[action])
env.state = s
(observation = stateindex(env.model, s),
reward = r,
Expand Down Expand Up @@ -141,6 +141,7 @@ struct DeterministicNextStateReward
end
reward(::AbstractRNG, r::DeterministicNextStateReward, s, a, s′) = r.value[s′]
expected_rewards(r::DeterministicNextStateReward, trans_probs) = expected_rewards(r.value, trans_probs)

function expected_rewards(r::AbstractVector, trans_probs)
result = zeros(size(trans_probs))
for i in eachindex(trans_probs)
Expand Down Expand Up @@ -321,7 +322,11 @@ the same value.
function set_terminal_states!(mdp, range)
mdp.isterminal[range] .= 1
for s in findall(x -> x == 1, mdp.isterminal)
mdp.reward[:, s] .= mean(mdp.reward[:, s])
if mdp.reward isa DeterministicStateActionReward
mdp.reward.value[:, s] .= mean(mdp.reward.value[:, s])
else
mdp.reward[:, s] .= mean(mdp.reward[:, s])
end
for a in 1:length(mdp.action_space)
empty_trans_prob!(mdp.trans_probs[a, s])
end
Expand All @@ -334,7 +339,11 @@ Returns a random deterministic SimpleMDPEnv.
"""
function deterministic_MDP(; ns = 10^4, na = 10)
mdp = SimpleMDPEnv(ns, na, init = "deterministic")
mdp.reward = mdp.reward .* (mdp.reward .< -1.5)
if mdp.reward isa DeterministicStateActionReward
mdp.reward.value .*= mdp.reward.value .< -1.5
else
mdp.reward = mdp.reward .* (mdp.reward .< -1.5)
end
mdp
end

Expand All @@ -353,7 +362,11 @@ Returns a tree_MDP with random rewards.
function deterministic_tree_MDP_with_rand_reward(; args...)
mdp = deterministic_tree_MDP(; args...)
nonterminals = findall(x -> x == 0, mdp.isterminal)
mdp.reward[:, nonterminals] = -rand(length(mdp.action_space), length(nonterminals))
if mdp.reward isa DeterministicStateActionReward
mdp.reward.value[:, nonterminals] = -rand(length(mdp.action_space), length(nonterminals))
else
mdp.reward[:, nonterminals] = -rand(length(mdp.action_space), length(nonterminals))
end
mdp
end

Expand All @@ -377,7 +390,11 @@ Returns a random deterministic absorbing SimpleMDPEnv
"""
function absorbing_deterministic_tree_MDP(;ns = 10^3, na = 10)
mdp = SimpleMDPEnv(ns, na, init = "deterministic")
mdp.reward .= mdp.reward .* (mdp.reward .< -.5)
if mdp.reward isa DeterministicStateActionReward
mdp.reward.value .*= mdp.reward.value .< -.5
else
mdp.reward .= mdp.reward .* (mdp.reward .< -.5)
end
mdp.initialstates = 1:div(ns, 100)
reset!(mdp)
set_terminal_states!(mdp, ns - div(ns, 100) + 1:ns)
Expand Down
2 changes: 1 addition & 1 deletion src/environments/gym.jl
Original file line number Diff line number Diff line change
Expand Up @@ -92,5 +92,5 @@ function list_gym_env_names(;
"gym.envs.toy_text",
"gym.envs.unittest"])
gym = pyimport("gym")
[x.id for x in gym.envs.registry.all() if split(x._entry_point, ':')[1] in modules]
[x.id for x in gym.envs.registry.all() if split(x.entry_point, ':')[1] in modules]
end
2 changes: 1 addition & 1 deletion test/environments.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@

for env_exp in [
:(HanabiEnv()),
:(basic_ViZDoom_env()),
# :(basic_ViZDoom_env()), # comment out due to https://github.com/JuliaReinforcementLearning/ViZDoom.jl/issues/7
:(CartPoleEnv()),
:(MountainCarEnv()),
:(ContinuousMountainCarEnv()),
Expand Down
2 changes: 1 addition & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ using Test
using ReinforcementLearningEnvironments
using ArcadeLearningEnvironment
using POMDPModels
using ViZDoom
# using ViZDoom
using PyCall
using Hanabi

Expand Down