@@ -18,14 +18,14 @@ import TensorFlow
18
18
let np = Python . import ( " numpy " )
19
19
let gym = Python . import ( " gym " )
20
20
21
- /// Model parameters and hyper parameters .
21
+ /// Model parameters and hyperparameters .
22
22
let hiddenSize = 128
23
23
let batchSize = 16
24
24
/// Controls the amount of good/long episodes to retain for training.
25
25
let percentile = 70
26
26
27
- // Force unwrapping with ! does not provide source location when unwrapping
28
- // nil, so we instead make a util function for debuggability.
27
+ // Force unwrapping with `!` does not provide source location when unwrapping `nil`, so we instead
28
+ // make a utility function for debuggability.
29
29
fileprivate extension Optional {
30
30
func unwrapped( file: StaticString = #file, line: UInt = #line) -> Wrapped {
31
31
guard let unwrapped = self else {
@@ -43,11 +43,8 @@ struct Net: Layer {
43
43
var l1 , l2 : Dense < Float >
44
44
45
45
init ( observationSize: Int , hiddenSize: Int , actionCount: Int ) {
46
- self . l1 = Dense < Float > (
47
- inputSize: observationSize, outputSize: hiddenSize, activation: relu)
48
-
49
- self . l2 = Dense < Float > (
50
- inputSize: hiddenSize, outputSize: actionCount, activation: { $0 } )
46
+ l1 = Dense < Float > ( inputSize: observationSize, outputSize: hiddenSize, activation: relu)
47
+ l2 = Dense < Float > ( inputSize: hiddenSize, outputSize: actionCount)
51
48
}
52
49
53
50
@differentiable
@@ -69,15 +66,11 @@ struct Episode {
69
66
let reward : Float
70
67
}
71
68
72
- /// Filter out bad/short episodes before we feed them as neural net training
73
- /// data.
69
+ /// Filtering out bad/short episodes before we feed them as neural net training data.
74
70
func filteringBatch(
75
71
episodes: [ Episode ] ,
76
72
actionCount: Int
77
- ) -> ( input: Tensor < Float > ,
78
- target: Tensor < Float > ,
79
- episodeCount: Int ,
80
- meanReward: Float ) {
73
+ ) -> ( input: Tensor < Float > , target: Tensor < Float > , episodeCount: Int , meanReward: Float ) {
81
74
let rewards = episodes. map { $0. reward }
82
75
let rewardBound = Float ( np. percentile ( rewards, percentile) ) !
83
76
print ( " rewardBound = \( rewardBound) " )
@@ -174,7 +167,7 @@ var net = Net(observationSize: Int(observationSize), hiddenSize: hiddenSize, act
174
167
// let optimizer = SGD<Net, Float>(learningRate: 0.1, momentum: 0.9)
175
168
let optimizer = Adam ( for: net, learningRate: 0.01 )
176
169
var batchIndex = 0
177
- Context . local . learningPhase = . training
170
+
178
171
while true {
179
172
print ( " Processing mini batch \( batchIndex) " )
180
173
batchIndex += 1
@@ -183,11 +176,13 @@ while true {
183
176
let ( input, target, episodeCount, meanReward) = filteringBatch (
184
177
episodes: episodes, actionCount: actionCount)
185
178
186
- let gradients = gradient ( at: net) { model -> Tensor < Float > in
187
- let logits = model ( input)
188
- let loss = softmaxCrossEntropy ( logits: logits, probabilities: target)
189
- print ( " loss is \( loss) " )
190
- return loss
179
+ let gradients = withLearningPhase ( . training) {
180
+ net. gradient { net -> Tensor < Float > in
181
+ let logits = net ( input)
182
+ let loss = softmaxCrossEntropy ( logits: logits, probabilities: target)
183
+ print ( " loss is \( loss) " )
184
+ return loss
185
+ }
191
186
}
192
187
optimizer. update ( & net. allDifferentiableVariables, along: gradients)
193
188
0 commit comments