Add variance() and variance(squeezingAxes:). (#23811)

dan-zheng · web-flow · commit ec5008687d1f · 2019-04-09T12:20:24.000+02:00
* Add `variance()` and `variance(squeezingAxes:)`.
* Reorganize reduction ops: `sum`, `product`, `mean`, `variance`.
diff --git a/stdlib/public/TensorFlow/Gradients.swift b/stdlib/public/TensorFlow/Gradients.swift
@@ -568,6 +568,11 @@ extension Tensor where Scalar : TensorFlowFloatingPoint {
 //===----------------------------------------------------------------------===//
 
 extension Tensor where Scalar : TensorFlowFloatingPoint {
+  @inlinable
+  func _vjpSum() -> (Tensor, (Tensor) -> Tensor) {
+    return (sum(), { [shape = shapeTensor] in $0.broadcast(toShape: shape) })
+  }
+
   @inlinable
   func _vjpMean() -> (Tensor, (Tensor) -> Tensor) {
     return (mean(), { [shape = shapeTensor, count = scalarCountTensor] in
@@ -576,8 +581,15 @@ extension Tensor where Scalar : TensorFlowFloatingPoint {
   }
 
   @inlinable
-  func _vjpSum() -> (Tensor, (Tensor) -> Tensor) {
-    return (sum(), { [shape = shapeTensor] in $0.broadcast(toShape: shape) })
+  func _vjpSum(alongAxes axes: [Int32]) -> (Tensor, (Tensor) -> Tensor) {
+    let value = sum(alongAxes: axes)
+    return (value, { [shape = shapeTensor] in $0.broadcast(toShape: shape) })
+  }
+
+  @inlinable
+  func _vjpSum(squeezingAxes axes: [Int32]) -> (Tensor, (Tensor) -> Tensor) {
+    let value = sum(squeezingAxes: axes)
+    return (value, { [shape = shapeTensor] in $0.broadcast(toShape: shape) })
   }
 
   @inlinable
@@ -590,9 +602,21 @@ extension Tensor where Scalar : TensorFlowFloatingPoint {
   }
 
   @inlinable
-  func _vjpSum(alongAxes axes: [Int32]) -> (Tensor, (Tensor) -> Tensor) {
-    let value = sum(alongAxes: axes)
-    return (value, { [shape = shapeTensor] in $0.broadcast(toShape: shape) })
+  func _vjpMean(squeezingAxes axes: [Int32]) -> (Tensor, (Tensor) -> Tensor) {
+    let value = mean(squeezingAxes: axes)
+    return (value, { [shape = shapeTensor,
+                      count = axes.map { shape[$0] }.reduce(1, *)] in
+      $0.broadcast(toShape: shape) / Tensor(Scalar(count))
+    })
+  }
+
+  @inlinable
+  func _vjpMean(alongAxes axes: [Int32]) -> (Tensor, (Tensor) -> Tensor) {
+    let value = mean(alongAxes: axes)
+    return (value, { [shape = shapeTensor,
+                      count = axes.map { shape[$0] }.reduce(1, *)] in
+      $0.broadcast(toShape: shape) / Tensor(Scalar(count))
+    })
   }
 }
 
diff --git a/stdlib/public/TensorFlow/Ops.swift b/stdlib/public/TensorFlow/Ops.swift
@@ -1251,18 +1251,6 @@ public extension Tensor where Scalar : Numeric & Comparable {
 }
 
 public extension Tensor where Scalar : Numeric {
-  // NOTE: This overload is necessary, otherwise `mean()` would refer
-  // to the variadic method `mean(squeezingAxes:)` with zero indices.
-  @differentiable(
-    wrt: self, vjp: _vjpMean()
-    where Scalar : TensorFlowFloatingPoint
-  )
-  @inlinable @inline(__always)
-  func mean() -> Tensor {
-    let axes = Tensor<Int32>(rangeFrom: 0, to: rank, stride: 1)
-    return Raw.mean(self, reductionIndices: axes)
-  }
-
   // NOTE: This overload is necessary, otherwise `sum()` would refer
   // to the variadic method `sum(squeezingAxes:)` with zero indices.
   @inlinable @inline(__always)
@@ -1283,30 +1271,37 @@ public extension Tensor where Scalar : Numeric {
     return Raw.prod(self, reductionIndices: axes)
   }
 
-  /// Returns the arithmetic mean along the specified axes. The reduced
-  /// dimensions are removed.
-  /// - Parameter axes: The dimensions to reduce.
-  /// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
+  // NOTE: This overload is necessary, otherwise `mean()` would refer
+  // to the variadic method `mean(squeezingAxes:)` with zero indices.
+  @differentiable(
+    wrt: self, vjp: _vjpMean()
+    where Scalar : TensorFlowFloatingPoint
+  )
   @inlinable @inline(__always)
-  func mean(squeezingAxes axes: [Int32]) -> Tensor {
-    return Raw.mean(self, reductionIndices: Tensor<Int32>(axes),
-                    keepDims: false)
+  func mean() -> Tensor {
+    let axes = Tensor<Int32>(rangeFrom: 0, to: rank, stride: 1)
+    return Raw.mean(self, reductionIndices: axes)
   }
 
-  /// Returns the arithmetic mean along the specified axes. The reduced
-  /// dimensions are removed.
-  /// - Parameter axes: The dimensions to reduce.
-  /// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
+  // NOTE: This overload is necessary, otherwise `mean()` would refer
+  // to the variadic method `mean(squeezingAxes:)` with zero indices.
+  @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
   @inlinable @inline(__always)
-  func mean(squeezingAxes axes: Int32...) -> Tensor {
-    return mean(squeezingAxes: axes)
+  func variance() -> Tensor {
+    let mean = self.mean()
+    let squaredDiff = (self - mean).squared()
+    return squaredDiff.mean()
   }
 
   /// Returns the sum along the specified axes. The reduced dimensions are
   /// removed.
   /// - Parameter axes: The dimensions to reduce.
   /// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
   @inlinable @inline(__always)
+  @differentiable(
+    wrt: self, vjp: _vjpSum(squeezingAxes:)
+    where Scalar : TensorFlowFloatingPoint
+  )
   func sum(squeezingAxes axes: [Int32]) -> Tensor {
     return Raw.sum(self, reductionIndices: Tensor<Int32>(axes), keepDims: false)
   }
@@ -1340,36 +1335,48 @@ public extension Tensor where Scalar : Numeric {
   }
 
   /// Returns the arithmetic mean along the specified axes. The reduced
-  /// dimensions are retained with value 1.
+  /// dimensions are removed.
   /// - Parameter axes: The dimensions to reduce.
-  /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+  /// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
   @inlinable @inline(__always)
   @differentiable(
-    wrt: self, vjp: _vjpMean(alongAxes:)
+    wrt: self, vjp: _vjpMean(squeezingAxes:)
     where Scalar : TensorFlowFloatingPoint
   )
-  func mean(alongAxes axes: Tensor<Int32>) -> Tensor {
-    return Raw.mean(self, reductionIndices: axes, keepDims: true)
+  func mean(squeezingAxes axes: [Int32]) -> Tensor {
+    return Raw.mean(self, reductionIndices: Tensor<Int32>(axes),
+                    keepDims: false)
   }
 
   /// Returns the arithmetic mean along the specified axes. The reduced
-  /// dimensions are retained with value 1.
+  /// dimensions are removed.
+  /// - Parameter axes: The dimensions to reduce.
+  /// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
+  @inlinable @inline(__always)
+  func mean(squeezingAxes axes: Int32...) -> Tensor {
+    return mean(squeezingAxes: axes)
+  }
+
+  /// Returns the variance along the specified axes. The reduced dimensions are
+  /// retained with value 1. Does not apply Bessel's correction.
   /// - Parameter axes: The dimensions to reduce.
   /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
   @inlinable @inline(__always)
   @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
-  func mean(alongAxes axes: [Int32]) -> Tensor {
-    return mean(alongAxes: Tensor<Int32>(axes))
+  func variance(squeezingAxes axes: Int32...) -> Tensor {
+    return variance(squeezingAxes: axes)
   }
 
-  /// Returns the arithmetic mean along the specified axes. The reduced
-  /// dimensions are retained with value 1.
+  /// Returns the variance along the specified axes. The reduced dimensions are
+  /// removed. Does not apply Bessel's correction.
   /// - Parameter axes: The dimensions to reduce.
   /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
   @inlinable @inline(__always)
   @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
-  func mean(alongAxes axes: Int32...) -> Tensor {
-    return mean(alongAxes: axes)
+  func variance(squeezingAxes axes: [Int32]) -> Tensor {
+    let mean = self.mean(alongAxes: axes)
+    let squaredDiff = (self - mean).squared()
+    return squaredDiff.mean(squeezingAxes: axes)
   }
 
   /// Returns the sum along the specified axes. The reduced dimensions are
@@ -1395,6 +1402,60 @@ public extension Tensor where Scalar : Numeric {
     return sum(alongAxes: axes)
   }
 
+  /// Returns the product along the specified axes. The reduced dimensions are
+  /// retained with value 1.
+  /// - Parameter axes: The dimensions to reduce.
+  /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+  @inlinable @inline(__always)
+  func product(alongAxes axes: [Int32]) -> Tensor {
+    return Raw.prod(self, reductionIndices: Tensor<Int32>(axes), keepDims: true)
+  }
+
+  /// Returns the product along the specified axes. The reduced dimensions are
+  /// retained with value 1.
+  /// - Parameter axes: The dimensions to reduce.
+  /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+  @inlinable @inline(__always)
+  func product(alongAxes axes: Int32...) -> Tensor {
+    return product(alongAxes: axes)
+  }
+
+  /// Returns the arithmetic mean along the specified axes. The reduced
+  /// dimensions are retained with value 1.
+  /// - Parameter axes: The dimensions to reduce.
+  /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+  @inlinable @inline(__always)
+  @differentiable(
+    wrt: self, vjp: _vjpMean(alongAxes:)
+    where Scalar : TensorFlowFloatingPoint
+  )
+  func mean(alongAxes axes: Tensor<Int32>) -> Tensor {
+    return Raw.mean(self, reductionIndices: axes, keepDims: true)
+  }
+
+  /// Returns the arithmetic mean along the specified axes. The reduced
+  /// dimensions are retained with value 1.
+  /// - Parameter axes: The dimensions to reduce.
+  /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+  @inlinable @inline(__always)
+  @differentiable(
+    wrt: self, vjp: _vjpMean(alongAxes:)
+    where Scalar : TensorFlowFloatingPoint
+  )
+  func mean(alongAxes axes: [Int32]) -> Tensor {
+    return mean(alongAxes: Tensor<Int32>(axes))
+  }
+
+  /// Returns the arithmetic mean along the specified axes. The reduced
+  /// dimensions are retained with value 1.
+  /// - Parameter axes: The dimensions to reduce.
+  /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+  @inlinable @inline(__always)
+  @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+  func mean(alongAxes axes: Int32...) -> Tensor {
+    return mean(alongAxes: axes)
+  }
+
   /// Returns the variance along the specified axes. The reduced dimensions are
   /// retained with value 1. Does not apply Bessel's correction.
   /// - Parameter axes: The dimensions to reduce.
@@ -1426,24 +1487,6 @@ public extension Tensor where Scalar : Numeric {
   func variance(alongAxes axes: [Int32]) -> Tensor {
     return variance(alongAxes: Tensor<Int32>(axes))
   }
-
-  /// Returns the product along the specified axes. The reduced dimensions are
-  /// retained with value 1.
-  /// - Parameter axes: The dimensions to reduce.
-  /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
-  @inlinable @inline(__always)
-  func product(alongAxes axes: [Int32]) -> Tensor {
-    return Raw.prod(self, reductionIndices: Tensor<Int32>(axes), keepDims: true)
-  }
-
-  /// Returns the product along the specified axes. The reduced dimensions are
-  /// retained with value 1.
-  /// - Parameter axes: The dimensions to reduce.
-  /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
-  @inlinable @inline(__always)
-  func product(alongAxes axes: Int32...) -> Tensor {
-    return product(alongAxes: axes)
-  }
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/test/TensorFlowRuntime/tensor.swift b/test/TensorFlowRuntime/tensor.swift
@@ -178,14 +178,37 @@ TensorTests.testAllBackends("Reduction") {
   #if !TPU
   // 2 x 5
   let x = Tensor<Float>([[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]])
-  expectEqual(ShapedArray(shape: [5], scalars: [2, 4, 6, 8, 10]),
-              x.sum(squeezingAxes: 0).toHost(shape: []).array)
-  expectEqual(ShapedArray(shape: [1, 5], scalars: [2, 4, 6, 8, 10]),
-              x.sum(alongAxes: 0).toHost(shape: []).array)
-  expectEqual(ShapedArray(shape: [5], scalars: [1, 4, 9, 16, 25]),
-              x.product(squeezingAxes: 0).toHost(shape: []).array)
-  expectEqual(ShapedArray(shape: [1, 5], scalars: [1, 4, 9, 16, 25]),
-              x.product(alongAxes: 0).toHost(shape: []).array)
+  expectEqual(Tensor(30), x.sum().toHost(shape: []))
+  expectEqual(Tensor(shape: [5], scalars: [2, 4, 6, 8, 10]),
+              x.sum(squeezingAxes: 0).toHost(shape: []))
+  expectEqual(Tensor(shape: [1, 5], scalars: [2, 4, 6, 8, 10]),
+              x.sum(alongAxes: 0).toHost(shape: []))
+
+  expectEqual(Tensor(14400), x.product().toHost(shape: []))
+  expectEqual(Tensor(shape: [5], scalars: [1, 4, 9, 16, 25]),
+              x.product(squeezingAxes: 0).toHost(shape: []))
+  expectEqual(Tensor(shape: [1, 5], scalars: [1, 4, 9, 16, 25]),
+              x.product(alongAxes: 0).toHost(shape: []))
+
+  expectEqual(Tensor(3), x.mean().toHost(shape: []))
+  expectEqual(Tensor(shape: [5], scalars: [1, 2, 3, 4, 5]),
+              x.mean(squeezingAxes: 0).toHost(shape: []))
+  expectEqual(Tensor(shape: [5], scalars: [1, 2, 3, 4, 5]),
+              x.mean(alongAxes: 0).toHost(shape: []))
+  expectEqual(Tensor(shape: [2], scalars: [3, 3]),
+              x.mean(squeezingAxes: 1).toHost(shape: []))
+  expectEqual(Tensor(shape: [1, 2], scalars: [3, 3]),
+              x.mean(alongAxes: 1).toHost(shape: []))
+
+  expectEqual(Tensor(2), x.variance().toHost(shape: []))
+  expectEqual(Tensor(shape: [5], scalars: [0, 0, 0, 0, 0]),
+              x.variance(squeezingAxes: 0).toHost(shape: []))
+  expectEqual(Tensor(shape: [5], scalars: [0, 0, 0, 0, 0]),
+              x.variance(alongAxes: 0).toHost(shape: []))
+  expectEqual(Tensor(shape: [2], scalars: [2, 2]),
+              x.variance(squeezingAxes: 1).toHost(shape: []))
+  expectEqual(Tensor(shape: [1, 2], scalars: [2, 2]),
+              x.variance(alongAxes: 1).toHost(shape: []))
   #endif // !TPU
 }
 
diff --git a/test/TensorFlowRuntime/tensor_autodiff_runtime.swift b/test/TensorFlowRuntime/tensor_autodiff_runtime.swift
@@ -121,6 +121,18 @@ TensorADTests.testAllBackends("mean") {
   expectEqual(expected, meanGradAlongAxes(input))
 }
 
+TensorADTests.testAllBackends("variance") {
+  let varianceGradScalar = gradient { (a: Tensor<Float>) in a.variance() }
+  // let varianceGradSqueezingAxes = gradient { (a: Tensor<Float>) in a.variance(squeezingAxes: 0, 1) }
+  let varianceGradAlongAxes = gradient { (a: Tensor<Float>) in a.variance(alongAxes: 0, 1) }
+
+  let input: Tensor<Float> = [[1, 2], [3, 4]]
+  let expected: Tensor<Float> = [[-0.75, -0.25], [0.25, 0.75]]
+  expectEqual(expected, varianceGradScalar(input))
+  // expectEqual(expected, varianceGradSqueezingAxes(input))
+  expectEqual(expected, varianceGradAlongAxes(input))
+}
+
 TensorADTests.testAllBackends("expandingShape") {
   let f1 = { (a: Tensor<Float>) in a.expandingShape(at: 0).squared() }
   let f2 = { (a: Tensor<Float>) in a.squared().expandingShape(at: 0) }