@@ -68,8 +68,8 @@ struct Episode {
68
68
69
69
/// Filtering out bad/short episodes before we feed them as neural net training data.
70
70
func filteringBatch(
71
- episodes: [ Episode ] ,
72
- actionCount: Int
71
+ episodes: [ Episode ] ,
72
+ actionCount: Int
73
73
) -> ( input: Tensor < Float > , target: Tensor < Float > , episodeCount: Int , meanReward: Float ) {
74
74
let rewards = episodes. map { $0. reward }
75
75
let rewardBound = Float ( np. percentile ( rewards, percentile) ) !
@@ -111,10 +111,10 @@ func filteringBatch(
111
111
}
112
112
113
113
func nextBatch(
114
- env: PythonObject ,
115
- net: Net ,
116
- batchSize: Int ,
117
- actionCount: Int
114
+ env: PythonObject ,
115
+ net: Net ,
116
+ batchSize: Int ,
117
+ actionCount: Int
118
118
) -> [ Episode ] {
119
119
var observationNumpy = env. reset ( )
120
120
@@ -127,8 +127,7 @@ func nextBatch(
127
127
128
128
while true {
129
129
let observationPython = Tensor < Double > ( numpy: observationNumpy) . unwrapped ( )
130
- let actionProbabilities =
131
- softmax ( net ( Tensor ( observationPython) . reshaped ( to: [ 1 , 4 ] ) ) )
130
+ let actionProbabilities = softmax ( net ( Tensor ( observationPython) . reshaped ( to: [ 1 , 4 ] ) ) )
132
131
let actionProbabilitiesPython = actionProbabilities [ 0 ] . makeNumpyArray ( )
133
132
let len = Python . len ( actionProbabilitiesPython)
134
133
assert ( actionCount == Int ( Python . len ( actionProbabilitiesPython) ) )
@@ -138,8 +137,10 @@ func nextBatch(
138
137
// print(nextObservation)
139
138
// print(reward)
140
139
141
- steps. append ( Episode . Step ( observation: Tensor < Float > ( observationPython) ,
142
- action: Int32 ( actionPython) . unwrapped ( ) ) )
140
+ steps. append (
141
+ Episode . Step (
142
+ observation: Tensor < Float > ( observationPython) ,
143
+ action: Int32 ( actionPython) . unwrapped ( ) ) )
143
144
144
145
episodeReward += Float ( reward) . unwrapped ( )
145
146
@@ -162,7 +163,8 @@ let observationSize = Int(env.observation_space.shape[0]).unwrapped()
162
163
let actionCount = Int ( env. action_space. n) . unwrapped ( )
163
164
// print(actionCount)
164
165
165
- var net = Net ( observationSize: Int ( observationSize) , hiddenSize: hiddenSize, actionCount: actionCount)
166
+ var net = Net (
167
+ observationSize: Int ( observationSize) , hiddenSize: hiddenSize, actionCount: actionCount)
166
168
// SGD optimizer reaches convergence with ~125 mini batches, while Adam uses ~25.
167
169
// let optimizer = SGD<Net, Float>(learningRate: 0.1, momentum: 0.9)
168
170
let optimizer = Adam ( for: net, learningRate: 0.01 )
@@ -174,7 +176,7 @@ while true {
174
176
175
177
let episodes = nextBatch ( env: env, net: net, batchSize: batchSize, actionCount: actionCount)
176
178
let ( input, target, episodeCount, meanReward) = filteringBatch (
177
- episodes: episodes, actionCount: actionCount)
179
+ episodes: episodes, actionCount: actionCount)
178
180
179
181
let gradients = withLearningPhase ( . training) {
180
182
net. gradient { net -> Tensor < Float > in
0 commit comments