Skip to content
This repository was archived by the owner on May 6, 2021. It is now read-only.

Commit 6ee22d3

Browse files
committed
support string representation for TicTacToeEnv
1 parent 8141ddb commit 6ee22d3

File tree

1 file changed

+21
-1
lines changed

1 file changed

+21
-1
lines changed

src/environments/examples/TicTacToeEnv.jl

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,26 @@ RLBase.state(env::TicTacToeEnv, ::Observation{Int}, p) =
8282
RLBase.state_space(env::TicTacToeEnv, ::Observation{Int}, p) =
8383
Base.OneTo(length(get_tic_tac_toe_state_info()))
8484

85+
RLBase.state_space(env::TicTacToeEnv, ::Observation{String}, p) = WorldSpace{String}()
86+
87+
function RLBase.state(env::TicTacToeEnv, ::Observation{String}, p)
88+
buff = IOBuffer()
89+
for i in 1:3
90+
for j in 1:3
91+
if env.board[i, j, 1]
92+
x = '.'
93+
elseif env.board[i, j, 2]
94+
x = 'x'
95+
else
96+
x = 'o'
97+
end
98+
print(buff, x)
99+
end
100+
print(buff, '\n')
101+
end
102+
String(take!(buff))
103+
end
104+
85105
RLBase.is_terminated(env::TicTacToeEnv) = get_tic_tac_toe_state_info()[env].is_terminated
86106

87107
function RLBase.reward(env::TicTacToeEnv, player)
@@ -150,7 +170,7 @@ RLBase.NumAgentStyle(::TicTacToeEnv) = MultiAgent(2)
150170
RLBase.DynamicStyle(::TicTacToeEnv) = SEQUENTIAL
151171
RLBase.ActionStyle(::TicTacToeEnv) = FULL_ACTION_SET
152172
RLBase.InformationStyle(::TicTacToeEnv) = PERFECT_INFORMATION
153-
RLBase.StateStyle(::TicTacToeEnv) = (Observation{Int}(), Observation{BitArray{3}}())
173+
RLBase.StateStyle(::TicTacToeEnv) = (Observation{String}(), Observation{Int}(), Observation{BitArray{3}}())
154174
RLBase.RewardStyle(::TicTacToeEnv) = TERMINAL_REWARD
155175
RLBase.UtilityStyle(::TicTacToeEnv) = ZERO_SUM
156176
RLBase.ChanceStyle(::TicTacToeEnv) = DETERMINISTIC

0 commit comments

Comments
 (0)