Change momentum and epsilon properties to scalars. (#525)

dan-zheng · saeta · commit b7ba0d503c2a · 2019-11-07T12:44:42.000-08:00
Change the following properties from `Scalar` to `Tensor&lt;Scalar&gt;`.
- `BatchNorm.momentum`
- `BatchNorm.epsilon`
- `LayerNorm.epsilon`
Semantically, these properties are always scalars.

Note: this will be an API breaking change in Swift for TensorFlow 0.6.
Deprecating the other `BatchNorm` and `LayerNorm` initializers is tricky
because it causes ambiguity problems.
diff --git a/Sources/TensorFlow/Layers/Normalization.swift b/Sources/TensorFlow/Layers/Normalization.swift
@@ -25,13 +25,13 @@ public struct BatchNorm<Scalar: TensorFlowFloatingPoint>: Layer {
     /// The feature dimension.
     @noDerivative public let axis: Int
     /// The momentum for the running mean and running variance.
-    @noDerivative public let momentum: Tensor<Scalar>
+    @noDerivative public let momentum: Scalar
     /// The offset value, also known as beta.
     public var offset: Tensor<Scalar>
     /// The scale value, also known as gamma.
     public var scale: Tensor<Scalar>
     /// The variance epsilon value.
-    @noDerivative public let epsilon: Tensor<Scalar>
+    @noDerivative public let epsilon: Scalar
     /// The running mean.
     @noDerivative public let runningMean: Parameter<Scalar>
     /// The running variance.
@@ -49,10 +49,10 @@ public struct BatchNorm<Scalar: TensorFlowFloatingPoint>: Layer {
     ///   - runningVariance: The running variance.
     public init(
         axis: Int,
-        momentum: Tensor<Scalar>,
+        momentum: Scalar,
         offset: Tensor<Scalar>,
         scale: Tensor<Scalar>,
-        epsilon: Tensor<Scalar>,
+        epsilon: Scalar,
         runningMean: Tensor<Scalar>,
         runningVariance: Tensor<Scalar>
     ) {
@@ -105,8 +105,8 @@ public struct BatchNorm<Scalar: TensorFlowFloatingPoint>: Layer {
     public init(
         featureCount: Int,
         axis: Int = -1,
-        momentum: Tensor<Scalar> = Tensor(0.99),
-        epsilon: Tensor<Scalar> = Tensor(0.001)
+        momentum: Scalar = 0.99,
+        epsilon: Scalar = 0.001
     ) {
         self.init(
             axis: axis,
@@ -131,14 +131,14 @@ public struct LayerNorm<Scalar: TensorFlowFloatingPoint>: Layer {
     /// The axis.
     @noDerivative public let axis: Int
     /// The variance epsilon value.
-    @noDerivative public let epsilon: Tensor<Scalar>
+    @noDerivative public let epsilon: Scalar
 
     /// Creates a layer normalization layer.
     public init(
         offset: Tensor<Scalar>,
         scale: Tensor<Scalar>,
         axis: Int,
-        epsilon: Tensor<Scalar>
+        epsilon: Scalar
     ) {
         self.offset = offset
         self.scale = scale
@@ -155,7 +155,7 @@ public struct LayerNorm<Scalar: TensorFlowFloatingPoint>: Layer {
     public init(
         featureCount: Int,
         axis: Int,
-        epsilon: Tensor<Scalar> = Tensor(0.001)
+        epsilon: Scalar = 0.001
     ) {
         self.init(
             offset: Tensor(zeros: [featureCount]),
diff --git a/Tests/TensorFlowTests/LayerTests.swift b/Tests/TensorFlowTests/LayerTests.swift
@@ -1222,15 +1222,12 @@ final class LayerTests: XCTestCase {
         Context.local.learningPhase = .inference
         // This tests for a specific failure that had impacted the MiniGo model.
         let miniGoTensor = Tensor<Float>(randomUniform: [2, 19, 19, 256])
-        let miniGoBatchNorm = BatchNorm(
-            featureCount: 256,
-            momentum: Tensor<Float>(0.95),
-            epsilon: Tensor<Float>(1e-5))
+        let miniGoBatchNorm = BatchNorm<Float>(featureCount: 256, momentum: 0.95, epsilon: 1e-5)
         let miniGoResult = miniGoBatchNorm(miniGoTensor)
         XCTAssertEqual(miniGoTensor.shape, miniGoResult.shape)
 
         let x = Tensor<Float>(rangeFrom: 0, to: 20, stride: 1).reshaped(to: [4,5])
-        let epsilon = Tensor<Float>(0.001)
+        let epsilon: Float = 0.001
         let bnLayer = BatchNorm<Float>(featureCount: 5, axis: 1, epsilon: epsilon)
         // Test inference before any training.
         assertEqual(bnLayer.inferring(from: x), x / TensorFlow.sqrt(1 + epsilon), accuracy: 1e-5)
@@ -1307,10 +1304,7 @@ final class LayerTests: XCTestCase {
         Context.local.learningPhase = .inference
         // This tests for a specific failure that had impacted the Transformer model.
         let transformerTensor = Tensor<Float>(randomUniform: [1, 1, 768])
-        let transformerLayerNorm = LayerNorm(
-            featureCount: 768,
-            axis: -1,
-            epsilon: Tensor<Float>(1e-5))
+        let transformerLayerNorm = LayerNorm<Float>(featureCount: 768, axis: -1, epsilon: 1e-5)
         let transformerResult = transformerLayerNorm(transformerTensor)
         XCTAssertEqual(transformerTensor.shape, transformerResult.shape)
     }