[Layers] Update DepthwiseConv2D(filterShape:...) bias initialization (#441)

jon-tow · rxwei · commit 2bdb02fc9a58 · 2019-09-24T11:30:33.000-07:00
**Issue**: `DepthwiseConv2D(filterShape:strides:padding:activation:filterInitializer:...)`
 produces a shape mismatch error in the conv-bias sum of `callAsFunction()` when initialized under a 
`filterShape` with channel-multiplier greater than `1`.
 More specifically, this is caused by initializing the `bias` vector with dimension equal to said channel-multiplier. 

**Fix** :This PR adds the proper `bias` vector initialization by computing the dimensionality as:
`bias vector dimension = output channel count = (input channel count * channel multiplier)`
diff --git a/Sources/TensorFlow/Layers/Convolutional.swift b/Sources/TensorFlow/Layers/Convolutional.swift
@@ -70,8 +70,8 @@ public struct Conv1D<Scalar: TensorFlowFloatingPoint>: Layer {
     ///
     /// and padding size is determined by the padding scheme.
     ///
-    /// - Parameter input: The input to the layer [batch count, input width, input channel count].
-    /// - Returns: The output of shape [batch count, output width, output channel count].
+    /// - Parameter input: The input to the layer [batch size, input width, input channel count].
+    /// - Returns: The output of shape [batch size, output width, output channel count].
     ///
     /// - Note: Padding size equals zero when using `.valid`.
     @differentiable
@@ -186,7 +186,7 @@ public struct Conv2D<Scalar: TensorFlowFloatingPoint>: Layer {
     /// and padding sizes are determined by the padding scheme.
     ///
     /// - Parameter input: The input to the layer of shape
-    ///   [batch count, input height, input width, input channel count].
+    ///   [batch size, input height, input width, input channel count].
     /// - Returns: The output of shape
     ///   [batch count, output height, output width, output channel count].
     ///
@@ -495,8 +495,10 @@ public struct DepthwiseConv2D<Scalar: TensorFlowFloatingPoint>: Layer {
 
     /// Returns the output obtained from applying the layer to the given input.
     ///
-    /// - Parameter input: The input to the layer.
-    /// - Returns: The output.
+    /// - Parameter input: The input to the layer of shape,
+    ///   [batch count, input height, input width, input channel count]
+    /// - Returns: The output of shape,
+    ///   [batch count, output height, output width, input channel count * channel multiplier]
     @differentiable
     public func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
         return activation(depthwiseConv2D(
@@ -512,7 +514,8 @@ public extension DepthwiseConv2D {
     /// element-wise activation function.
     ///
     /// - Parameters:
-    ///   - filterShape: The shape of the 4-D convolution kernel.
+    ///   - filterShape: The shape of the 4-D convolution kernel with form,
+    ///     [filter width, filter height, input channel count, channel multiplier].
     ///   - strides: The strides of the sliding window for spatial/spatio-temporal dimensions.
     ///   - padding: The padding algorithm for convolution.
     ///   - activation: The element-wise activation function.
@@ -530,7 +533,7 @@ public extension DepthwiseConv2D {
             filterShape.0, filterShape.1, filterShape.2, filterShape.3])
         self.init(
             filter: filterInitializer(filterTensorShape),
-            bias: biasInitializer([filterShape.3]),
+            bias: biasInitializer([filterShape.2 * filterShape.3]),
             activation: activation,
             strides: strides,
             padding: padding)
diff --git a/Tests/TensorFlowTests/LayerTests.swift b/Tests/TensorFlowTests/LayerTests.swift
@@ -209,6 +209,14 @@ final class LayerTests: XCTestCase {
                                      scalars: [9, 12, 23, 28, 25, 36, 55, 68, 41, 60, 87, 108,
                                                57, 84, 119, 148])
         XCTAssertEqual(output, expected)
+        
+        let channelMultiplier = 4
+        let multiplierLayer = DepthwiseConv2D<Float>(
+            filterShape: (2, 2, input.shape[3], channelMultiplier),
+            filterInitializer: glorotUniform(),
+            biasInitializer: zeros())
+        let multiplierOutput = multiplierLayer.inferring(from: input)
+        XCTAssertEqual(multiplierOutput.shape[3], input.shape[3] * channelMultiplier)
     }
 
     func testDepthwiseConv2DGradient() {