21
21
const CommonRLEnvs = Union{CommonRLEnv,CommonRLMarkovEnv,CommonRLZeroSumEnv}
22
22
23
23
function Base. convert (:: Type{CRL.AbstractEnv} , env:: AbstractEnv )
24
- if get_num_players (env) == 1
24
+ if NumAgentStyle (env) === SINGLE_AGENT
25
25
convert (CRL. AbstractMarkovEnv, env)
26
- elseif get_num_players (env) == 2 && UtilityStyle (env) === ZERO_SUM
26
+ elseif NumAgentStyle (env) isa MultiAgent{ 2 } && UtilityStyle (env) === ZERO_SUM
27
27
convert (CRL. AbstractZeroSumEnv, env)
28
28
else
29
29
CommonRLEnv (env)
@@ -34,25 +34,25 @@ Base.convert(::Type{CRL.AbstractMarkovEnv}, env::AbstractEnv) = CommonRLMarkovEn
34
34
Base. convert (:: Type{CRL.AbstractZeroSumEnv} , env:: AbstractEnv ) = CommonRLZeroSumEnv (env)
35
35
36
36
CRL. @provide CRL. reset! (env:: CommonRLEnvs ) = reset! (env. env)
37
- CRL. @provide CRL. actions (env:: CommonRLEnvs ) = get_actions (env. env)
38
- CRL. @provide CRL. observe (env:: CommonRLEnvs ) = get_state (env. env)
39
- CRL. state (env:: CommonRLEnvs ) = get_state (env. env)
37
+ CRL. @provide CRL. actions (env:: CommonRLEnvs ) = action_space (env. env)
38
+ CRL. @provide CRL. observe (env:: CommonRLEnvs ) = state (env. env)
39
+ CRL. state (env:: CommonRLEnvs ) = state (env. env)
40
40
CRL. provided (:: typeof (CRL. state), env:: CommonRLEnvs ) =
41
41
InformationStyle (env. env) === PERFECT_INFORMATION
42
- CRL. @provide CRL. terminated (env:: CommonRLEnvs ) = get_terminal (env. env)
43
- CRL. @provide CRL. player (env:: CommonRLEnvs ) = get_current_player (env. env)
42
+ CRL. @provide CRL. terminated (env:: CommonRLEnvs ) = is_terminated (env. env)
43
+ CRL. @provide CRL. player (env:: CommonRLEnvs ) = current_player (env. env)
44
44
CRL. @provide CRL. clone (env:: CommonRLEnvs ) = CommonRLEnv (copy (env. env))
45
45
46
46
CRL. @provide function CRL. act! (env:: CommonRLEnvs , a)
47
47
env. env (a)
48
- get_reward (env. env)
48
+ reward (env. env)
49
49
end
50
50
51
- CRL. valid_actions (x:: CommonRLEnvs ) = get_legal_actions (x. env)
51
+ CRL. valid_actions (x:: CommonRLEnvs ) = legal_action_space (x. env)
52
52
CRL. provided (:: typeof (CRL. valid_actions), env:: CommonRLEnvs ) =
53
53
ActionStyle (env. env) === FullActionSet ()
54
54
55
- CRL. valid_action_mask (x:: CommonRLEnvs ) = get_legal_actions_mask (x. env)
55
+ CRL. valid_action_mask (x:: CommonRLEnvs ) = legal_action_space_mask (x. env)
56
56
CRL. provided (:: typeof (CRL. valid_action_mask), env:: CommonRLEnvs ) =
57
57
ActionStyle (env. env) === FullActionSet ()
58
58
68
68
Base. convert (:: Type{AbstractEnv} , env:: CRL.AbstractEnv ) = convert (RLBaseEnv, env)
69
69
Base. convert (:: Type{RLBaseEnv} , env:: CRL.AbstractEnv ) = RLBaseEnv (env, 0.0f0 ) # can not determine reward ahead. Assume `Float32`.
70
70
71
- get_state (env:: RLBaseEnv ) = CRL. observe (env. env)
72
- get_actions (env:: RLBaseEnv ) = CRL. actions (env. env)
73
- get_reward (env:: RLBaseEnv ) = env. r
74
- get_terminal (env:: RLBaseEnv ) = CRL. terminated (env. env)
75
- get_legal_actions (env:: RLBaseEnv ) = CRL. valid_actions (env. env)
76
- get_legal_actions_mask (env:: RLBaseEnv ) = CRL. valid_action_mask (env. env)
71
+ state (env:: RLBaseEnv ) = CRL. observe (env. env)
72
+ action_space (env:: RLBaseEnv ) = CRL. actions (env. env)
73
+ reward (env:: RLBaseEnv ) = env. r
74
+ is_terminated (env:: RLBaseEnv ) = CRL. terminated (env. env)
75
+ legal_action_space (env:: RLBaseEnv ) = CRL. valid_actions (env. env)
76
+ legal_action_space_mask (env:: RLBaseEnv ) = CRL. valid_action_mask (env. env)
77
77
reset! (env:: RLBaseEnv ) = CRL. reset! (env. env)
78
78
79
79
(env:: RLBaseEnv )(a) = env. r = CRL. act! (env. env, a)
0 commit comments