Add 'logSumExp'. (#150)

eaplatanios · rxwei · commit b5088a79453e · 2019-06-22T15:05:04.000-07:00
diff --git a/Sources/TensorFlow/Operators/Math.swift b/Sources/TensorFlow/Operators/Math.swift
@@ -1874,10 +1874,9 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
         standardDeviation(squeezingAxes: axes)
     }
 
-    /// Returns the standard deviation of the elements along the specified axes. The reduced
-    /// dimensions are retained with value `1`. Does not apply Bessel's correction.
+    /// Returns the standard deviation of all elements in this tensor. 
+    /// Does not apply Bessel's correction.
     ///
-    /// - Parameter axes: The dimensions to reduce.
     /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
     @inlinable
     @differentiable(wrt: self)
@@ -1920,6 +1919,123 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
     func standardDeviation(alongAxes axes: Int...) -> Tensor {
         TensorFlow.sqrt(variance(alongAxes: axes))
     }
+
+    /// Returns `log(exp(self).sum(squeezingAxes: axes))`. The reduced dimensions are removed.
+    /// 
+    /// This function is more numerically stable than computing
+    /// `log(exp(self).sum(squeezingAxes: axes))` directly. It avoids overflows caused by computing 
+    /// the `exp` of large inputs and underflows caused by computing the `log` of small inputs.
+    ///
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self)
+    func logSumExp(squeezingAxes axes: Tensor<Int32>) -> Tensor {
+        let rawMax = max(alongAxes: axes)
+        let offset = Swift.withoutDerivative(at: rawMax) { rawMax in 
+            rawMax.replacing(
+                with: Tensor<Scalar>(zerosLike: rawMax),
+                where: rawMax.isFinite)
+        }
+        let result = TensorFlow.log(TensorFlow.exp(self - offset).sum(squeezingAxes: axes))
+        let resultShape = Swift.withoutDerivative(at: result.shapeTensor, in: identity)
+        return result + offset.reshaped(toShape: resultShape)
+    }
+
+    /// Returns `log(exp(self).sum(squeezingAxes: axes))`. The reduced dimensions are removed.
+    /// 
+    /// This function is more numerically stable than computing
+    /// `log(exp(self).sum(squeezingAxes: axes))` directly. It avoids overflows caused by computing 
+    /// the `exp` of large inputs and underflows caused by computing the `log` of small inputs.
+    ///
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self)
+    func logSumExp(squeezingAxes axes: [Int]) -> Tensor {
+        // TODO(TF-433): Remove workaround for differentiating `map`.
+        let axes = Swift.withoutDerivative(at: axes) { $0.map(Int32.init) }
+        return logSumExp(squeezingAxes: Tensor<Int32>(axes))
+    }
+
+    /// Returns `log(exp(self).sum(squeezingAxes: axes))`. The reduced dimensions are removed.
+    /// 
+    /// This function is more numerically stable than computing
+    /// `log(exp(self).sum(squeezingAxes: axes))` directly. It avoids overflows caused by computing 
+    /// the `exp` of large inputs and underflows caused by computing the `log` of small inputs.
+    ///
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self)
+    func logSumExp(squeezingAxes axes: Int...) -> Tensor {
+        return logSumExp(squeezingAxes: axes)
+    }
+
+    /// Returns `log(exp(self).sum())`. The result is a scalar.
+    ///
+    /// This function is more numerically stable than computing `log(exp(self).sum())` directly. It
+    /// avoids overflows caused by computing the `exp` of large inputs and underflows caused by 
+    /// computing the `log` of small inputs.
+    @inlinable
+    @differentiable(wrt: self)
+    func logSumExp() -> Tensor {
+        return logSumExp(squeezingAxes: Array(0..<shape.rank))
+    }
+
+    /// Returns `log(exp(self).sum(alongAxes: axes))`. The reduced dimensions are retained with 
+    /// value `1`.
+    ///
+    /// This function is more numerically stable than computing
+    /// `log(exp(self).sum(alongAxes: axes))` directly. It avoids overflows caused by computing 
+    /// the `exp` of large inputs and underflows caused by computing the `log` of small inputs.
+    ///
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self)
+    func logSumExp(alongAxes axes: Tensor<Int32>) -> Tensor {
+        let rawMax = max(alongAxes: axes)
+        let offset = Swift.withoutDerivative(at: rawMax) { rawMax in 
+            rawMax.replacing(
+                with: Tensor<Scalar>(zerosLike: rawMax),
+                where: rawMax.isFinite)
+        }
+        let result = TensorFlow.log(TensorFlow.exp(self - offset).sum(alongAxes: axes))
+        return result + offset
+    }
+
+    /// Returns `log(exp(self).sum(alongAxes: axes))`. The reduced dimensions are retained with 
+    /// value `1`.
+    ///
+    /// This function is more numerically stable than computing
+    /// `log(exp(self).sum(alongAxes: axes))` directly. It avoids overflows caused by computing 
+    /// the `exp` of large inputs and underflows caused by computing the `log` of small inputs.
+    ///
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self)
+    func logSumExp(alongAxes axes: [Int]) -> Tensor {
+        // TODO(TF-433): Remove workaround for differentiating `map`.
+        let axes = Swift.withoutDerivative(at: axes) { $0.map(Int32.init) }
+        return logSumExp(alongAxes: Tensor<Int32>(axes))
+    }
+
+    /// Returns `log(exp(self).sum(alongAxes: axes))`. The reduced dimensions are retained with 
+    /// value `1`.
+    ///
+    /// This function is more numerically stable than computing
+    /// `log(exp(self).sum(alongAxes: axes))` directly. It avoids overflows caused by computing 
+    /// the `exp` of large inputs and underflows caused by computing the `log` of small inputs.
+    ///
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self)
+    func logSumExp(alongAxes axes: Int...) -> Tensor {
+        return logSumExp(alongAxes: axes)
+    }
 }
 
 //===------------------------------------------------------------------------------------------===//
diff --git a/Tests/TensorFlowTests/OperatorTests/MathTests.swift b/Tests/TensorFlowTests/OperatorTests/MathTests.swift
@@ -205,6 +205,26 @@ final class MathOperatorTests: XCTestCase {
         XCTAssertEqual(scalarsArgmax.array, ShapedArray(shape: [], scalars: [5]))
     }
 
+    func testLogSumExp() {
+        let x = Tensor<Float>([
+            [0.45031791, 0.41123222, 0.53928467, 0.47167023, 0.15483777],
+            [0.49975705, 0.71807549, 0.30396056, 0.2690469 , 0.01404393],
+            [0.16950939, 0.41085612, 0.79503016, 0.11977817, 0.99728241],
+            [0.62510073, 0.17344792, 0.1540605 , 0.40758517, 0.93683817],
+            [0.15653343, 0.50502756, 0.99365925, 0.84617581, 0.17422509]])
+        let y0 = x.logSumExp()
+        let y1 = x.logSumExp(squeezingAxes: 1)
+        let y2 = x.logSumExp(alongAxes: 1)
+        let expectedY0 = Tensor<Float>(3.713885997817954)
+        let expectedY1 = Tensor<Float>(
+            [2.02318908, 1.99835067, 2.16853826, 2.1137799, 2.20261244])
+        let expectedY2 = Tensor<Float>(
+            [[2.02318908], [1.99835067], [2.16853826], [2.1137799], [2.20261244]])
+        assertEqual(y0, expectedY0, accuracy: 0.0001)
+        assertEqual(y1, expectedY1, accuracy: 0.0001)
+        assertEqual(y2, expectedY2, accuracy: 0.0001)
+    }
+
     func testCeilAndFloor() {
         let x = Tensor<Float>([-1.3, -0.4, 0.5, 1.6])
         let xFloor = floor(x)
@@ -357,6 +377,7 @@ final class MathOperatorTests: XCTestCase {
         ("testCosineSimilarity", testCosineSimilarity),
         ("testReduction", testReduction),
         ("testArgmax", testArgmax),
+        ("testLogSumExp", testLogSumExp),
         ("testCeilAndFloor", testCeilAndFloor),
         ("testSimpleMath", testSimpleMath),
         ("testStandardDeviation", testStandardDeviation),