Add Avgpool3d and Maxpool3d Layers & Tests (#117)

Shashi456 · saeta · commit 99add8723aff · 2019-04-29T07:38:05.000-07:00
Adds Avgpool3d and Maxpool3d layers and tests for both 2d &amp; 3d pooling layers.
diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
@@ -828,11 +828,9 @@ public struct MaxPool2D<Scalar: TensorFlowFloatingPoint>: Layer {
     ///   - poolSize: Vertical and horizontal factors by which to downscale.
     ///   - strides: The strides.
     ///   - padding: The padding.
-    public init(poolSize: (Int, Int), strides: (Int, Int), padding: Padding = .valid) {
-        self.poolSize = (1, poolSize.0, poolSize.1, 1)
-        self.strides = (1, strides.0, strides.1, 1)
-        self.padding = padding
-    }
+    self.init(poolSize: (1, poolSize.0, poolSize.1, 1),
+              strides: (1, strides.0, strides.1, 1),
+              padding: padding)
 
     /// Returns the output obtained from applying the layer to the given input.
     ///
@@ -845,6 +843,58 @@ public struct MaxPool2D<Scalar: TensorFlowFloatingPoint>: Layer {
     }
 }
 
+/// A max pooling layer for spatial or spatio-temporal data.
+@_fixed_layout
+public struct MaxPool3D<Scalar: TensorFlowFloatingPoint>: Layer {
+    /// The size of the sliding reduction window for pooling.
+    @noDerivative let poolSize: (Int, Int, Int, Int, Int)
+    /// The strides of the sliding window for each dimension of a 5-D input.
+    /// Strides in non-spatial dimensions must be `1`.
+    @noDerivative let strides: (Int, Int, Int, Int, Int)
+    /// The padding algorithm for pooling.
+    @noDerivative let padding: Padding
+
+    /// Creates a max pooling layer.
+    public init(
+        poolSize: (Int, Int, Int, Int, Int),
+        strides: (Int, Int, Int, Int, Int),
+        padding: Padding
+    ) {
+        self.poolSize = poolSize
+        self.strides = strides
+        self.padding = padding
+    }
+
+    /// Creates a max pooling layer.
+    ///
+    /// - Parameters:
+    ///   - poolSize: Vertical and horizontal factors by which to downscale.
+    ///   - strides: The strides.
+    ///   - padding: The padding.
+    self.init(poolSize: (1, poolSize.0, poolSize.1, poolSize.2, 1),
+              strides: (1, strides.0, strides.1, strides.2, 1),
+              padding: padding)
+
+    /// Returns the output obtained from applying the layer to the given input.
+    ///
+    /// - Parameter input: The input to the layer.
+    /// - Returns: The output.
+    @differentiable
+    public func call(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
+        return input.maxPooled(kernelSize: poolSize, strides: strides, padding: padding)
+    }
+}
+
+public extension MaxPool3D {
+  /// Creates a max pooling layer with the specified pooling window size and stride. All
+  /// pooling sizes and strides are the same.
+  init(poolSize: Int, stride: Int, padding: Padding = .valid) {
+       self.init(poolsize: (poolSize, poolSize, poolSize),
+                 strides: (stride, stride, stride),
+                 padding: padding)
+  }
+}
+
 /// An average pooling layer for temporal data.
 @_fixed_layout
 public struct AvgPool1D<Scalar: TensorFlowFloatingPoint>: Layer {
@@ -894,7 +944,7 @@ public struct AvgPool2D<Scalar: TensorFlowFloatingPoint>: Layer {
     /// The padding algorithm for pooling.
     @noDerivative let padding: Padding
 
-    /// Creates a average pooling layer.
+    /// Creates an average pooling layer.
     public init(
         poolSize: (Int, Int, Int, Int),
         strides: (Int, Int, Int, Int),
@@ -905,18 +955,58 @@ public struct AvgPool2D<Scalar: TensorFlowFloatingPoint>: Layer {
         self.padding = padding
     }
 
-    /// Creates a average pooling layer.
+    /// Creates an average pooling layer.
     ///
     /// - Parameters:
     ///   - poolSize: Vertical and horizontal factors by which to downscale.
     ///   - strides: The strides.
     ///   - padding: The padding.
-    public init(poolSize: (Int, Int), strides: (Int, Int), padding: Padding = .valid) {
-        self.poolSize = (1, poolSize.0, poolSize.1, 1)
-        self.strides = (1, strides.0, strides.1, 1)
+    self.init(poolSize: (1, poolSize.0, poolSize.1, 1),
+              strides: (1, strides.0, strides.1, 1),
+              padding: padding)
+
+    /// Returns the output obtained from applying the layer to the given input.
+    ///
+    /// - Parameter input: The input to the layer.
+    /// - Returns: The output.
+    @differentiable
+    public func call(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
+        return input.averagePooled(kernelSize: poolSize, strides: strides, padding: padding)
+    }
+}
+
+/// An average pooling layer for spatial or spatio-temporal data.
+@_fixed_layout
+public struct AvgPool3D<Scalar: TensorFlowFloatingPoint>: Layer {
+    /// The size of the sliding reduction window for pooling.
+    @noDerivative let poolSize: (Int, Int, Int, Int, Int)
+    /// The strides of the sliding window for each dimension of a 5-D input.
+    /// Strides in non-spatial dimensions must be `1`.
+    @noDerivative let strides: (Int, Int, Int, Int, Int)
+    /// The padding algorithm for pooling.
+    @noDerivative let padding: Padding
+
+    /// Creates an average pooling layer.
+    public init(
+        poolSize: (Int, Int, Int, Int, Int),
+        strides: (Int, Int, Int, Int, Int),
+        padding: Padding
+    ) {
+        self.poolSize = poolSize
+        self.strides = strides
         self.padding = padding
     }
 
+    /// Creates an average pooling layer.
+    ///
+    /// - Parameters:
+    ///   - poolSize: Vertical and horizontal factors by which to downscale.
+    ///   - strides: The strides.
+    ///   - padding: The padding.
+    self.init(poolSize: (1, poolSize.0, poolSize.1, poolSize.2, 1),
+              strides: (1, strides.0, strides.1, strides.2, 1),
+              padding: padding)
+
     /// Returns the output obtained from applying the layer to the given input.
     ///
     /// - Parameter input: The input to the layer.
@@ -927,6 +1017,15 @@ public struct AvgPool2D<Scalar: TensorFlowFloatingPoint>: Layer {
     }
 }
 
+public extension AvgPool3D {
+    /// Creates an average pooling layer with the specified pooling window size and stride. All
+    /// pooling sizes and strides are the same.
+    init(poolSize: Int, strides: Int, padding: Padding = .valid) {
+        self.init(poolSize: (poolSize, poolSize, poolSize),
+                  strides: (strides, strides, strides),
+                  padding: padding)
+    }
+}
 
 /// A global average pooling layer for temporal data.
 @_fixed_layout
diff --git a/Tests/DeepLearningTests/LayerTests.swift b/Tests/DeepLearningTests/LayerTests.swift
@@ -34,6 +34,22 @@ final class LayerTests: XCTestCase {
         XCTAssertEqual(round(output), expected)
     }
 
+    func testMaxPool2D() {
+        let layer = MaxPool2D<Float>(poolSize: (2, 2), strides: (1, 1), padding:.valid)
+        let input = Tensor(shape: [1, 2, 2, 1], scalars: (0..<4).map(Float.init))
+        let output = layer.inferring(from: input)
+        let expected = Tensor<Float>([[[[3]]]])
+        XCTAssertEqual(round(output), expected)
+    }
+
+    func testMaxPool3D() {
+        let layer = MaxPool3D<Float>(poolSize: (2 ,2, 2), strides: (1, 1, 1), padding:.valid)
+        let input = Tensor(shape: [1, 2, 2, 2, 1], scalars: (0..<8).map(Float.init))
+        let output = layer.inferring(from: input)
+        let expected = Tensor<Float>([[[[[7]]]]])
+        XCTAssertEqual(round(output), expected)
+    }
+
     func testAvgPool1D() {
         let layer = AvgPool1D<Float>(poolSize: 3, stride: 1, padding: .valid)
         let input = Tensor<Float>([[0, 1, 2, 3, 4], [10, 11, 12, 13, 14]]).expandingShape(at: 2)
@@ -42,6 +58,22 @@ final class LayerTests: XCTestCase {
         XCTAssertEqual(round(output), expected)
     }
 
+    func testAvgPool2D() {
+        let layer = AvgPool2D<Float>(poolSize: (2, 5), strides: (1, 1), padding:.valid)
+        let input = Tensor(shape: [1, 2, 5, 1], scalars: (0..<10).map(Float.init))
+        let output = layer.inferring(from: input)
+        let expected = Tensor<Float>([[[[4.5]]]])
+        XCTAssertEqual(output, expected)
+    }
+
+    func testAvgPool3D() {
+        let layer = AvgPool3D<Float>(poolSize: (2, 4, 5), stride: (1, 1, 1), padding: .valid)
+        let input = Tensor(shape: [1, 2, 4, 5, 1], scalars: (0..<20).map(Float.init))
+        let output = layer.inferring(from: input)
+        let expected = Tensor<Float>([[[[[9.5]]]]])
+        XCTAssertEqual(output, expected)
+    }
+
     func testGlobalAvgPool1D() {
         let layer = GlobalAvgPool1D<Float>()
         let input = Tensor(shape: [2, 5, 1], scalars: (0..<10).map(Float.init))
@@ -150,7 +182,11 @@ final class LayerTests: XCTestCase {
     static var allTests = [
         ("testConv1D", testConv1D),
         ("testMaxPool1D", testMaxPool1D),
+        ("testMaxPool2D", testMaxPool2D),
+        ("testMaxPool3D", testMaxPool3D),
         ("testAvgPool1D", testAvgPool1D),
+        ("testAvgPool2D", testAvgPool2D),
+        ("testAvgPool3D", testAvgPool3D),
         ("testGlobalAvgPool1D", testGlobalAvgPool1D),
         ("testGlobalAvgPool2D", testGlobalAvgPool2D),
         ("testGlobalAvgPool3D", testGlobalAvgPool3D),