Skip to content
This repository was archived by the owner on Apr 23, 2025. It is now read-only.

Commit b3a1f42

Browse files
dan-zhengsaeta
authored andcommitted
Change momentum and epsilon properties to scalars. (#220)
1 parent ae8eac0 commit b3a1f42

File tree

3 files changed

+4
-7
lines changed

3 files changed

+4
-7
lines changed

MiniGo/Models/GoModel.swift

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,7 @@ struct ConvBN: Layer {
5454
// TODO(jekbradbury): thread through bias and affine boolean arguments
5555
// (behavior is correct for inference but this should be changed for training)
5656
self.conv = Conv2D(filterShape: filterShape, strides: strides, padding: padding)
57-
self.norm = BatchNorm(
58-
featureCount: filterShape.3,
59-
momentum: Tensor<Float>(0.95),
60-
epsilon: Tensor<Float>(1e-5))
57+
self.norm = BatchNorm(featureCount: filterShape.3, momentum: 0.95, epsilon: 1e-5)
6158
}
6259

6360
@differentiable

Transformer/Model.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,10 +227,10 @@ struct EncoderLayer: Layer {
227227
size: size,
228228
headCount: headCount)
229229
selfAttentionDropout = Dropout(probability: dropProbability)
230-
selfAttentionNorm = LayerNorm(featureCount: size, axis: 2, epsilon: Tensor<Float>(1e-5))
230+
selfAttentionNorm = LayerNorm(featureCount: size, axis: 2, epsilon: 1e-5)
231231
feedForward = FeedForward(size: size, hidden: 4 * size, dropProbability: dropProbability)
232232
feedForwardDropout = Dropout(probability: dropProbability)
233-
feedForwardNorm = LayerNorm(featureCount: size, axis: 2, epsilon: Tensor<Float>(1e-5))
233+
feedForwardNorm = LayerNorm(featureCount: size, axis: 2, epsilon: 1e-5)
234234
}
235235

236236
@differentiable(wrt: (self, input))

Transformer/PythonCheckpointReader.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ extension LayerNorm: InitializableFromPythonCheckpoint {
7373
offset: readTensor(fromPath: path, name: scope + "/b", scalarType: Scalar.self),
7474
scale: readTensor(fromPath: path, name: scope + "/g", scalarType: Scalar.self),
7575
axis: -1,
76-
epsilon: Tensor(1e-5))
76+
epsilon: 1e-5)
7777
}
7878
}
7979

0 commit comments

Comments
 (0)