@@ -4,16 +4,16 @@ using Test
4
4
@testset " EpisodesBuffer" begin
5
5
@testset " with circular traces" begin
6
6
eb = EpisodesBuffer (
7
- CircularArraySARSATTraces (;
7
+ CircularArraySARTTraces (;
8
8
capacity= 10 )
9
9
)
10
10
# push a first episode l=5
11
- push! (eb, (state = 1 , action = 1 ))
11
+ push! (eb, (state = 1 ,))
12
12
@test eb. sampleable_inds[end ] == 0
13
13
@test eb. episodes_lengths[end ] == 0
14
14
@test eb. step_numbers[end ] == 1
15
15
for i = 1 : 5
16
- push! (eb, (state = i+ 1 , action = i+ 1 , reward = i, terminal = false ))
16
+ push! (eb, (state = i+ 1 , action = i, reward = i, terminal = false ))
17
17
@test eb. sampleable_inds[end ] == 0
18
18
@test eb. sampleable_inds[end - 1 ] == 1
19
19
@test eb. step_numbers[end ] == i + 1
@@ -22,7 +22,7 @@ using Test
22
22
@test eb. sampleable_inds == [1 ,1 ,1 ,1 ,1 ,0 ]
23
23
@test length (eb. traces) == 5
24
24
# start new episode of 6 periods.
25
- push! (eb, (state = 7 , action = 7 ))
25
+ push! (eb, (state = 7 ,))
26
26
@test eb. sampleable_inds[end ] == 0
27
27
@test eb. sampleable_inds[end - 1 ] == 0
28
28
@test eb. episodes_lengths[end ] == 0
@@ -32,7 +32,7 @@ using Test
32
32
ep2_len = 0
33
33
for (j,i) = enumerate (8 : 11 )
34
34
ep2_len += 1
35
- push! (eb, (state = i, action = i, reward = i- 1 , terminal = false ))
35
+ push! (eb, (state = i, action = i- 1 , reward = i- 1 , terminal = false ))
36
36
@test eb. sampleable_inds[end ] == 0
37
37
@test eb. sampleable_inds[end - 1 ] == 1
38
38
@test eb. step_numbers[end ] == j + 1
@@ -43,7 +43,7 @@ using Test
43
43
# three last steps replace oldest steps in the buffer.
44
44
for (i, s) = enumerate (12 : 13 )
45
45
ep2_len += 1
46
- push! (eb, (state = s, action = s, reward = s- 1 , terminal = false ))
46
+ push! (eb, (state = s, action = s- 1 , reward = s- 1 , terminal = false ))
47
47
@test eb. sampleable_inds[end ] == 0
48
48
@test eb. sampleable_inds[end - 1 ] == 1
49
49
@test eb. step_numbers[end ] == i + 1 + 4
@@ -59,18 +59,18 @@ using Test
59
59
end
60
60
b = eb[i]
61
61
@test b[:state ] == b[:action ] == b[:reward ] == s
62
- @test b[:next_state ] == b[ :next_action ] == s + 1
62
+ @test b[:next_state ] == s + 1
63
63
end
64
64
# episode 2
65
65
# start a third episode
66
- push! (eb, (state = 14 , action = 14 ))
66
+ push! (eb, (state = 14 , ))
67
67
@test eb. sampleable_inds[end ] == 0
68
68
@test eb. sampleable_inds[end - 1 ] == 0
69
69
@test eb. episodes_lengths[end ] == 0
70
70
@test eb. step_numbers[end ] == 1
71
71
# push until it reaches it own start
72
72
for (i,s) in enumerate (15 : 26 )
73
- push! (eb, (state = s, action = s, reward = s- 1 , terminal = false ))
73
+ push! (eb, (state = s, action = s- 1 , reward = s- 1 , terminal = false ))
74
74
end
75
75
@test eb. sampleable_inds == [fill (true , 10 ); [false ]]
76
76
@test eb. episodes_lengths == fill (length (15 : 26 ), 11 )
0 commit comments