Added support for 'Tensor.batchGathering(atIndices:)'. (#157)

eaplatanios · web-flow · commit d26470148375 · 2019-06-21T12:21:56.000-04:00
* Enhanced the 'matmul' wrapper so that it matches the behavior of the Python one.

* Added support for the 'log1mexp' op and its VJP.

* Added a test.

* Update Sources/TensorFlow/Operators/Math.swift

Co-Authored-By: Richard Wei &lt;rxwei@google.com&gt;

* Removed the need for a general 'Tensor.withoutDerivative()' as Richard suggested.

* Addressed Richard's feedback.

* Addressed Richard's feedback.

* Added one more tests helper.

* Minor bug fix.

* Added a test for 'log1mexp'.

* Added support for 'softplus' and 'logSigmoid'.

* Minor tweak.

* Added support for 'isFinite', 'isInfinite', and 'isNaN'.

* Addressed Richard's feedback.

* Addressed Richard's feedback.

* Added support for 'gathering' and its VJP.

* Added a test for 'gathering'.

* Update Sources/TensorFlow/Operators/Basic.swift

Co-Authored-By: Richard Wei &lt;rxwei@google.com&gt;

* Removed a redundant helper.

* Update Tests/TensorFlowTests/OperatorTests/BasicTests.swift

Co-Authored-By: Richard Wei &lt;rxwei@google.com&gt;

* Added support for a 'Tensor.batchGathering(atIndices:)'.

* Fixed some of the tests.

* Made the tests pass.

* Attempt at making 'log1mexp' differentiable.

* Removed 'Tensor.nonZeroIndices()'.

* Renamed 'withoutDerivative' to 'noDerivative'.

* Added back 'Tensor.nonZeroIndices()'.

* Merged upstream changes.

* Enabled the 'logSigmoid' test.

* Minor edit.

* Style edit.

* Style edit.
diff --git a/Sources/TensorFlow/Operators/Basic.swift b/Sources/TensorFlow/Operators/Basic.swift
@@ -387,6 +387,49 @@ public extension Tensor {
         return Raw.gatherV2(params: self, indices: indices, axis: Tensor<Int32>(Int32(axis)))
     }
 
+    /// Returns slices of this tensor at `indices`, while ignoring the first `batchDims` dimensions
+    /// that correspond to batch dimensions. The gather is performed along the first non-batch
+    /// dimension.
+    ///
+    /// Performs similar functionality to `gathering`, except that the resulting tensor shape is 
+    /// now:
+    /// ```
+    /// self.shape[..<batchDims] + 
+    ///   indices.shape[batchDims...] + 
+    ///   self.shape[(batchDims + indices.rank + 1)...]
+    /// ```
+    ///
+    /// - Parameters:
+    ///   - indices: Contains the indices to gather.
+    ///   - batchDims: Number of leading batch dimensions to ignore.
+    ///
+    /// - Precondition: `batchDims` must be less than `indices.rank`.
+    ///
+    /// - Returns: The gathered tensor.
+    @inlinable
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
+    func batchGathering(atIndices indices: Tensor<Int32>) -> Tensor {
+        var batchIndices = indices
+        var accumulated = Tensor<Int32>(ones: [])
+        accumulated *= Swift.withoutDerivative(at: shapeTensor) { $0[1] }
+        let dValue = Swift.withoutDerivative(at: shapeTensor) { $0[0] }
+        let dIndices = Tensor<Int32>(
+            rangeFrom: Tensor<Int32>(zeros: []),
+            to: dValue,
+            stride: Tensor<Int32>(ones: [])
+        ) * accumulated
+        let dShape = Tensor<Int32>(concatenating: [
+            dValue.rankLifted(),
+            Tensor<Int32>([Int32](repeating: 1, count: indices.rank - 1))])
+        batchIndices += dIndices.reshaped(toShape: dShape)
+        let flatIndices = batchIndices.flattened()
+        let outerShape = Swift.withoutDerivative(at: shapeTensor) { $0[2...] }
+        let innerShape = Swift.withoutDerivative(at: shapeTensor) { $0[..<2] }.product(squeezingAxes: [0])
+        let flatTensor = reshaped(toShape: innerShape.rankLifted().concatenated(with: outerShape))
+        let flatResult = flatTensor.gathering(atIndices: flatIndices)
+        return flatResult.reshaped(toShape: indices.shapeTensor.concatenated(with: outerShape))
+    }
+
     /// Returns a tensor by gathering the values after applying the provided boolean mask to the input.
     ///
     /// For example:
diff --git a/Sources/TensorFlow/Operators/Math.swift b/Sources/TensorFlow/Operators/Math.swift
@@ -784,23 +784,6 @@ public func rsqrt<T: TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
     Raw.rsqrt(x)
 }
 
-/// Returns the cosine similarity between `x` and `y`.
-@differentiable
-public func cosineSimilarity<Scalar: TensorFlowFloatingPoint>(
-    _ x: Tensor<Scalar>, _ y: Tensor<Scalar>
-) -> Tensor<Scalar> {
-    (x * y).sum() / (sqrt(x.squared().sum()) * sqrt(y.squared().sum()))
-}
-
-/// Returns the cosine distance between `x` and `y`. Cosine distance is defined as
-/// `1 - cosineSimilarity(x, y)`.
-@differentiable
-public func cosineDistance<Scalar: TensorFlowFloatingPoint>(
-    _ x: Tensor<Scalar>, _ y: Tensor<Scalar>
-) -> Tensor<Scalar> {
-    1 - cosineSimilarity(x, y)
-}
-
 @inlinable
 internal func _vjpRsqrt<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>
@@ -925,6 +908,14 @@ internal func _vjpSigmoid<T: TensorFlowFloatingPoint>(
     (sigmoid(x), { v in Raw.sigmoidGrad(x, dy: v) })
 }
 
+/// Returns the log-sigmoid of the specified tensor element-wise. Specifically,
+/// `y = log(1 / (1 + exp(-x)))`. For numerical stability, we use `y = -softplus(-x)`.
+@inlinable
+@differentiable
+public func logSigmoid<T: TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    -softplus(-x)
+}
+
 /// Returns the softplus of the specified tensor element-wise.
 /// Specifically, computes `log(exp(features) + 1)`.
 @inlinable
@@ -1016,6 +1007,24 @@ func _vjpElu<T: TensorFlowFloatingPoint>(
     return (y, { v in Raw.eluGrad(gradients: v, outputs: y) })
 }
 
+/// Returns the Gaussian Error Linear Unit (GELU) activations of the specified tensor element-wise.
+///
+/// Specifically, `gelu` approximates `xP(X <= x)`, where `P(X <= x)` is the Standard Gaussian
+/// cumulative distribution, by computing: x * [0.5 * (1 + tanh[√(2/π) * (x + 0.044715 * x^3)])].
+///
+/// See [Gaussian Error Linear Units](https://arxiv.org/abs/1606.08415).
+@inlinable
+@differentiable
+public func gelu<T: TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    let ratio = Tensor<T>(0.7978845608) // An approximation of √(2/π).
+    // An approximation of the Gauss error function.
+    // NOTE: This is needed because the compiler otherwise gives an "unable to type-check this
+    // in reasonable time" error when the below expressions are written on a single line.
+    let approximateErf = tanh(ratio * (x + 0.044715 * pow(x, 3)))
+    let cdf = 0.5 * (1.0 + approximateErf)
+    return x * cdf
+}
+
 /// Returns a tensor by applying the leaky ReLU activation function
 /// to the specified tensor element-wise.
 /// Specifically, computes `max(x, x * alpha)`.
@@ -1053,22 +1062,15 @@ func _vjpRelu<T: TensorFlowFloatingPoint>(
     (relu(x), { v in Tensor(x .> 0) * v })
 }
 
-/// Returns the Gaussian Error Linear Unit (GELU) activations of the specified tensor element-wise.
-///
-/// Specifically, `gelu` approximates `xP(X <= x)`, where `P(X <= x)` is the Standard Gaussian
-/// cumulative distribution, by computing: x * [0.5 * (1 + tanh[√(2/π) * (x + 0.044715 * x^3)])].
-///
-/// See [Gaussian Error Linear Units](https://arxiv.org/abs/1606.08415).
-@inlinable
-@differentiable
-public func gelu<T: TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
-    let ratio = Tensor<T>(0.7978845608) // An approximation of √(2/π).
-    // An approximation of the Gauss error function.
-    // NOTE: This is needed because the compiler otherwise gives an "unable to type-check this
-    // in reasonable time" error when the below expressions are written on a single line.
-    let approximateErf = tanh(ratio * (x + 0.044715 * pow(x, 3)))
-    let cdf = 0.5 * (1.0 + approximateErf)
-    return x * cdf
+public extension Tensor where Scalar: TensorFlowFloatingPoint {
+    /// Returns a boolean tensor indicating which elements of `x` are finite.
+    @inlinable var isFinite: Tensor<Bool> { Raw.isFinite(self) }
+
+    /// Returns a boolean tensor indicating which elements of `x` are infinite.
+    @inlinable var isInfinite: Tensor<Bool> { Raw.isInf(self) }
+
+    /// Returns a boolean tensor indicating which elements of `x` are NaN-valued.
+    @inlinable var isNaN: Tensor<Bool> { Raw.isNan(self) }
 }
 
 //===------------------------------------------------------------------------------------------===//
@@ -1202,6 +1204,23 @@ internal func _vjpMinMaxHelper<T: TensorFlowFloatingPoint>(
             rhsGrad.sum(squeezingAxes: rhsAxes).reshaped(toShape: rhsShape))
 }
 
+/// Returns the cosine similarity between `x` and `y`.
+@differentiable
+public func cosineSimilarity<Scalar: TensorFlowFloatingPoint>(
+    _ x: Tensor<Scalar>, _ y: Tensor<Scalar>
+) -> Tensor<Scalar> {
+    (x * y).sum() / (sqrt(x.squared().sum()) * sqrt(y.squared().sum()))
+}
+
+/// Returns the cosine distance between `x` and `y`. Cosine distance is defined as
+/// `1 - cosineSimilarity(x, y)`.
+@differentiable
+public func cosineDistance<Scalar: TensorFlowFloatingPoint>(
+    _ x: Tensor<Scalar>, _ y: Tensor<Scalar>
+) -> Tensor<Scalar> {
+    1 - cosineSimilarity(x, y)
+}
+
 //===------------------------------------------------------------------------------------------===//
 // Selection Functions
 //===------------------------------------------------------------------------------------------===//
diff --git a/Tests/TensorFlowTests/OperatorTests/MathTests.swift b/Tests/TensorFlowTests/OperatorTests/MathTests.swift
@@ -60,34 +60,92 @@ final class MathOperatorTests: XCTestCase {
     }
 
     func testLog1p() {
-        let x = Tensor<Float>([[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]])
+        let x = Tensor<Float>([1, 2, 3, 4, 5])
         let y = log1p(x)
-        assertEqual(y, log(1 + x), accuracy: 0.0001)
+        let expectedY = Tensor<Float>([0.69315, 1.09861, 1.38629, 1.60944, 1.79176])
+        assertEqual(y, expectedY, accuracy: 0.0001)
     }
 
-    func testCosineSimilarity() {
-        let x = Tensor<Float>([1, 2, 3, 4, 5, 6, 7, 8])
-        let y = Tensor<Float>([0.5, 1, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0])
-        let z = cosineSimilarity(x, y)
-        let output: Float = 1.0
-        XCTAssertEqual(z, Tensor(output))
-    }
-
-    // FIXME(https://bugs.swift.org/browse/TF-543): Disable failing test.
-    /*
     func testExpm1() {
-        let x = Tensor<Float>([[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]])
+        let x = Tensor<Float>([1, 2, 3, 4, 5])
         let y = expm1(x)
-        assertEqual(y, exp(x - 1), accuracy: 0.0001)
+        let expectedY = Tensor<Float>([1.71828, 6.38906, 19.08554, 53.59815, 147.41316])
+        assertEqual(y, expectedY, accuracy: 0.0001)
     }
-    */
 
     func testSign() {
         let x = Tensor<Float>([[1, 2, -3, 4, 5], [1, 2, 3, 4, -5]])
         let y = sign(x)
         XCTAssertEqual(y, Tensor<Float>([[1, 1, -1, 1, 1], [1, 1, 1, 1, -1]]))
     }
 
+    func testLogSigmoid() {
+        let x = Tensor<Float>([[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]])
+        let y = logSigmoid(x)
+        assertEqual(y, log(sigmoid(x)), accuracy: 0.0001)
+    }
+
+    func testSoftplus() {
+        let x = Tensor<Float>([1.0, 2.0, 3.0])
+        let y = softplus(x)
+        let expected = Tensor<Float>([1.3132616,  2.126928, 3.0485873])
+        XCTAssertEqual(y, expected)
+    }
+
+    func testSoftsign() {
+        let x = Tensor<Float>([1.0, 4.0, 3.0])
+        let y = softsign(x)
+        let expected = Tensor<Float>([0.5 , 0.8 , 0.75])
+        XCTAssertEqual(y, expected)
+    }
+
+    func testElu() {
+        let x = Tensor<Float>([-1.0, 2.0, 3.0])
+        let y = elu(x)
+        let expected = Tensor<Float>([-0.63212055, 2, 3])
+        XCTAssertEqual(y, expected)
+    }
+
+    func testGelu() {
+        let x = Tensor<Float>([2.0, 1.0, 7.0])
+        let y = gelu(x)
+        let expected = Tensor<Float>([1.95459769, 0.84119199, 7.0])
+        XCTAssertEqual(y, expected)
+    }
+
+    func testLeakyRelu() {
+        let x = Tensor<Float>([[-1.0, 2.0, 3.0]])
+        let y = leakyRelu(x, alpha: 0.4)
+        let expected = Tensor<Float>([-0.4, 2, 3])
+        XCTAssertEqual(y, expected)
+    }
+
+    func testIsFinite() {
+        let x = Tensor<Float>([1, 2, 3, 4, -Float.infinity])
+        let y = x.isFinite
+        XCTAssertEqual(y, Tensor([true, true, true, true, false]))
+    }
+
+    func testIsInfinite() {
+        let x = Tensor<Float>([1, 2, 3, 4, log(0.0)])
+        let y = x.isInfinite
+        XCTAssertEqual(y, Tensor([false, false, false, false, true]))
+    }
+
+    func testIsNaN() {
+        let x = Tensor<Float>([1, 2, 3, 4, log(-5.0)])
+        let y = x.isNaN
+        XCTAssertEqual(y, Tensor([false, false, false, false, true]))
+    }
+
+    func testCosineSimilarity() {
+        let x = Tensor<Float>([1, 2, 3, 4, 5, 6, 7, 8])
+        let y = Tensor<Float>([0.5, 1, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0])
+        let z = cosineSimilarity(x, y)
+        let output: Float = 1.0
+        XCTAssertEqual(z, Tensor(output))
+    }
+
     func testReduction() {
         // 2 x 5
         let x = Tensor<Float>([[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]])
@@ -222,41 +280,6 @@ final class MathOperatorTests: XCTestCase {
         XCTAssertEqual(result.scalars, [12.5, 6.5])
     }
 
-    func testSoftplus() {
-        let x = Tensor<Float>([1.0, 2.0, 3.0])
-        let y = softplus(x)
-        let expected = Tensor<Float>([1.3132616,  2.126928, 3.0485873])
-        XCTAssertEqual(y, expected)
-    }
-
-    func testSoftsign() {
-        let x = Tensor<Float>([1.0, 4.0, 3.0])
-        let y = softsign(x)
-        let expected = Tensor<Float>([0.5 , 0.8 , 0.75])
-        XCTAssertEqual(y, expected)
-    }
-
-    func testElu() {
-        let x = Tensor<Float>([-1.0, 2.0, 3.0])
-        let y = elu(x)
-        let expected = Tensor<Float>([-0.63212055, 2, 3])
-        XCTAssertEqual(y, expected)
-    }
-
-    func testGelu() {
-        let x = Tensor<Float>([2.0, 1.0, 7.0])
-        let y = gelu(x)
-        let expected = Tensor<Float>([1.95459769, 0.84119199, 7.0])
-        XCTAssertEqual(y, expected)
-    }
-    
-    func testLeakyRelu() {
-        let x = Tensor<Float>([[-1.0, 2.0, 3.0]])
-        let y = leakyRelu(x, alpha: 0.4)
-        let expected = Tensor<Float>([-0.4, 2, 3])
-        XCTAssertEqual(y, expected)
-    }
-
     func testXORInference() {
         func xor(_ x: Float, _ y: Float) -> Float {
             let x = Tensor<Float>([x, y]).reshaped(to: [1, 2])
@@ -318,18 +341,26 @@ final class MathOperatorTests: XCTestCase {
 	}
 
     static var allTests = [
+        ("testElementaryFunctions", testElementaryFunctions),
         ("testLog1p", testLog1p),
-        // FIXME(https://bugs.swift.org/browse/TF-543): Disable failing test.
-        // ("testExpm1", testExpm1),
+        ("testExpm1", testExpm1),
         ("testSign", testSign),
+        ("testLogSigmoid", testLogSigmoid),
         ("testReduction", testReduction),
         ("testCosineSimilarity", testCosineSimilarity),
         ("testElu",testElu),
         ("testGelu", testGelu),
         ("testArgmax", testArgmax),
         ("testSoftplus", testSoftplus),
         ("testSoftsign", testSoftsign),
+        ("testElu",testElu),
         ("testLeakyRelu", testLeakyRelu),
+        ("testIsFinite", testIsFinite),
+        ("testIsInfinite", testIsInfinite),
+        ("testIsNaN", testIsNaN),
+        ("testCosineSimilarity", testCosineSimilarity),
+        ("testReduction", testReduction),
+        ("testArgmax", testArgmax),
         ("testCeilAndFloor", testCeilAndFloor),
         ("testSimpleMath", testSimpleMath),
         ("testStandardDeviation", testStandardDeviation),