Skip to content
This repository was archived by the owner on Apr 23, 2025. It is now read-only.

Commit 3df2875

Browse files
authored
Indentation and formatting fixes in CartPole (#214)
1 parent db72e4d commit 3df2875

File tree

1 file changed

+14
-12
lines changed

1 file changed

+14
-12
lines changed

Gym/CartPole/main.swift

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ struct Episode {
6868

6969
/// Filtering out bad/short episodes before we feed them as neural net training data.
7070
func filteringBatch(
71-
episodes: [Episode],
72-
actionCount: Int
71+
episodes: [Episode],
72+
actionCount: Int
7373
) -> (input: Tensor<Float>, target: Tensor<Float>, episodeCount: Int, meanReward: Float) {
7474
let rewards = episodes.map { $0.reward }
7575
let rewardBound = Float(np.percentile(rewards, percentile))!
@@ -111,10 +111,10 @@ func filteringBatch(
111111
}
112112

113113
func nextBatch(
114-
env: PythonObject,
115-
net: Net,
116-
batchSize: Int,
117-
actionCount: Int
114+
env: PythonObject,
115+
net: Net,
116+
batchSize: Int,
117+
actionCount: Int
118118
) -> [Episode] {
119119
var observationNumpy = env.reset()
120120

@@ -127,8 +127,7 @@ func nextBatch(
127127

128128
while true {
129129
let observationPython = Tensor<Double>(numpy: observationNumpy).unwrapped()
130-
let actionProbabilities =
131-
softmax(net(Tensor(observationPython).reshaped(to: [1, 4])))
130+
let actionProbabilities = softmax(net(Tensor(observationPython).reshaped(to: [1, 4])))
132131
let actionProbabilitiesPython = actionProbabilities[0].makeNumpyArray()
133132
let len = Python.len(actionProbabilitiesPython)
134133
assert(actionCount == Int(Python.len(actionProbabilitiesPython)))
@@ -138,8 +137,10 @@ func nextBatch(
138137
// print(nextObservation)
139138
// print(reward)
140139

141-
steps.append(Episode.Step(observation: Tensor<Float>(observationPython),
142-
action: Int32(actionPython).unwrapped()))
140+
steps.append(
141+
Episode.Step(
142+
observation: Tensor<Float>(observationPython),
143+
action: Int32(actionPython).unwrapped()))
143144

144145
episodeReward += Float(reward).unwrapped()
145146

@@ -162,7 +163,8 @@ let observationSize = Int(env.observation_space.shape[0]).unwrapped()
162163
let actionCount = Int(env.action_space.n).unwrapped()
163164
// print(actionCount)
164165

165-
var net = Net(observationSize: Int(observationSize), hiddenSize: hiddenSize, actionCount: actionCount)
166+
var net = Net(
167+
observationSize: Int(observationSize), hiddenSize: hiddenSize, actionCount: actionCount)
166168
// SGD optimizer reaches convergence with ~125 mini batches, while Adam uses ~25.
167169
// let optimizer = SGD<Net, Float>(learningRate: 0.1, momentum: 0.9)
168170
let optimizer = Adam(for: net, learningRate: 0.01)
@@ -174,7 +176,7 @@ while true {
174176

175177
let episodes = nextBatch(env: env, net: net, batchSize: batchSize, actionCount: actionCount)
176178
let (input, target, episodeCount, meanReward) = filteringBatch(
177-
episodes: episodes, actionCount: actionCount)
179+
episodes: episodes, actionCount: actionCount)
178180

179181
let gradients = withLearningPhase(.training) {
180182
net.gradient { net -> Tensor<Float> in

0 commit comments

Comments
 (0)