Skip to content

[AutoDiff] Add adjoints for sum and mean. #21790

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 37 additions & 10 deletions stdlib/public/TensorFlow/Gradients.swift
Original file line number Diff line number Diff line change
Expand Up @@ -48,32 +48,29 @@

public extension Differentiable {
@inlinable
func gradient<R : Differentiable>(
func gradient<R : Differentiable & FloatingPoint>(
in f: @autodiff (Self) -> Tensor<R>
) -> CotangentVector
where R : Differentiable & FloatingPoint {
) -> CotangentVector {
return self.pullback(in: f)(Tensor<R>(1))
}

@inlinable
func valueWithGradient<R : Differentiable>(
func valueWithGradient<R : Differentiable & FloatingPoint>(
in f: @autodiff (Self) -> Tensor<R>
) -> (value: Tensor<R>, gradient: CotangentVector)
where R : Differentiable & FloatingPoint {
) -> (value: Tensor<R>, gradient: CotangentVector) {
let (y, pb) = self.valueWithPullback(in: f)
return (y, pb(Tensor<R>(1)))
}

@inlinable
func gradient<T : Differentiable, R : Differentiable>(
func gradient<T : Differentiable, R : Differentiable & FloatingPoint>(
at x: T, in f: @autodiff (Self, T) -> Tensor<R>
) -> (CotangentVector, T.CotangentVector)
where R : Differentiable & FloatingPoint {
) -> (CotangentVector, T.CotangentVector) {
return self.pullback(at: x, in: f)(Tensor<R>(1))
}

@inlinable
func valueWithGradient<T : Differentiable, R : Differentiable>(
func valueWithGradient<T : Differentiable, R>(
at x: T, in f: @autodiff (Self, T) -> Tensor<R>
) -> (value: Tensor<R>, gradient: (CotangentVector, T.CotangentVector))
where R : Differentiable & FloatingPoint {
Expand Down Expand Up @@ -419,6 +416,36 @@ extension Tensor where Scalar : Differentiable & FloatingPoint {
}
}

//===----------------------------------------------------------------------===//
// Reduction
//===----------------------------------------------------------------------===//

extension Tensor where Scalar : Differentiable & FloatingPoint {
@inlinable
func _adjointMean(_ seed: Tensor, _ originalValue: Tensor) -> Tensor {
return seed.broadcast(like: self) / Tensor(scalarCountTensor)
}

@inlinable
func _adjointSum(_ seed: Tensor, _ originalValue: Tensor) -> Tensor {
return seed.broadcast(like: self)
}

@inlinable
func _adjointMean(
_ seed: Tensor, _ originalValue: Tensor, squeezingAxes axes: [Int32]
) -> Tensor {
return seed.broadcast(like: self) / Tensor(scalarCountTensor)
}

@inlinable
func _adjointSum(
_ seed: Tensor, _ originalValue: Tensor, squeezingAxes axes: [Int32]
) -> Tensor {
return seed.broadcast(like: self)
}
}

//===----------------------------------------------------------------------===//
// Normalization
//===----------------------------------------------------------------------===//
Expand Down
159 changes: 148 additions & 11 deletions stdlib/public/TensorFlow/Ops.swift
Original file line number Diff line number Diff line change
Expand Up @@ -604,10 +604,17 @@ public extension Tensor {
/// Returns a transposed tensor, with dimensions permuted in the specified
/// order.
@inlinable @inline(__always)
func transposed(withPermutations permutations: Int32...) -> Tensor {
func transposed(withPermutations permutations: [Int32]) -> Tensor {
return transposed(withPermutations: Tensor<Int32>(permutations))
}

/// Returns a transposed tensor, with dimensions permuted in the specified
/// order.
@inlinable @inline(__always)
func transposed(withPermutations permutations: Int32...) -> Tensor {
return transposed(withPermutations: permutations)
}

/// Returns a transposed tensor, with dimensions permuted in reverse order.
@inlinable @inline(__always)
func transposed() -> Tensor {
Expand Down Expand Up @@ -1033,19 +1040,37 @@ public extension Tensor where Scalar : Numeric & Comparable {
/// - Parameter axes: The dimensions to reduce.
/// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
@inlinable @inline(__always)
func max(squeezingAxes axes: Int32...) -> Tensor {
func max(squeezingAxes axes: [Int32]) -> Tensor {
return Raw.max(self, reductionIndices: Tensor<Int32>(axes), keepDims: false)
}

/// Returns the maximum values along the specified axes. The reduced
/// dimensions are removed.
/// - Parameter axes: The dimensions to reduce.
/// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
@inlinable @inline(__always)
func max(squeezingAxes axes: Int32...) -> Tensor {
return max(squeezingAxes: axes)
}

/// Returns the minimum values along the specified axes. The reduced
/// dimensions are removed.
/// - Parameter axes: The dimensions to reduce.
/// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
@inlinable @inline(__always)
func min(squeezingAxes axes: Int32...) -> Tensor {
func min(squeezingAxes axes: [Int32]) -> Tensor {
return Raw.min(self, reductionIndices: Tensor<Int32>(axes), keepDims: false)
}

/// Returns the minimum values along the specified axes. The reduced
/// dimensions are removed.
/// - Parameter axes: The dimensions to reduce.
/// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
@inlinable @inline(__always)
func min(squeezingAxes axes: Int32...) -> Tensor {
return min(squeezingAxes: axes)
}

/// Returns the indices of the maximum values along the specified axes. The
/// reduced dimensions are removed.
/// - Parameter axes: The dimensions to reduce.
Expand All @@ -1069,7 +1094,7 @@ public extension Tensor where Scalar : Numeric & Comparable {
/// - Parameter axes: The dimensions to reduce.
/// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
@inlinable @inline(__always)
func min(alongAxes axes: Int32...) -> Tensor {
func min(alongAxes axes: [Int32]) -> Tensor {
return Raw.min(self, reductionIndices: Tensor<Int32>(axes), keepDims: true)
}

Expand All @@ -1078,10 +1103,28 @@ public extension Tensor where Scalar : Numeric & Comparable {
/// - Parameter axes: The dimensions to reduce.
/// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
@inlinable @inline(__always)
func max(alongAxes axes: Int32...) -> Tensor {
func min(alongAxes axes: Int32...) -> Tensor {
return min(alongAxes: axes)
}

/// Returns the minimum along the specified axes. The reduced dimensions are
/// retained with value 1.
/// - Parameter axes: The dimensions to reduce.
/// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
@inlinable @inline(__always)
func max(alongAxes axes: [Int32]) -> Tensor {
return Raw.max(self, reductionIndices: Tensor<Int32>(axes), keepDims: true)
}

/// Returns the minimum along the specified axes. The reduced dimensions are
/// retained with value 1.
/// - Parameter axes: The dimensions to reduce.
/// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
@inlinable @inline(__always)
func max(alongAxes axes: Int32...) -> Tensor {
return max(alongAxes: axes)
}

/// Returns the index of the maximum value of the flattened scalars.
@inlinable @inline(__always)
func argmax() -> Tensor<Int32> {
Expand All @@ -1098,6 +1141,10 @@ public extension Tensor where Scalar : Numeric & Comparable {
public extension Tensor where Scalar : Numeric {
// NOTE: This overload is necessary, otherwise `mean()` would refer
// to the variadic method `mean(squeezingAxes:)` with zero indices.
@differentiable(
wrt: (self), adjoint: _adjointMean(_:_:)
where Scalar : Differentiable & FloatingPoint
)
@inlinable @inline(__always)
func mean() -> Tensor {
let axes = Tensor<Int32>(rangeFrom: 0, to: rank, stride: 1)
Expand All @@ -1107,6 +1154,10 @@ public extension Tensor where Scalar : Numeric {
// NOTE: This overload is necessary, otherwise `sum()` would refer
// to the variadic method `sum(squeezingAxes:)` with zero indices.
@inlinable @inline(__always)
@differentiable(
wrt: (self), adjoint: _adjointSum(_:_:)
where Scalar : Differentiable & FloatingPoint
)
func sum() -> Tensor {
let axes = Tensor<Int32>(rangeFrom: 0, to: rank, stride: 1)
return Raw.sum(self, reductionIndices: axes)
Expand All @@ -1125,56 +1176,142 @@ public extension Tensor where Scalar : Numeric {
/// - Parameter axes: The dimensions to reduce.
/// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
@inlinable @inline(__always)
func mean(squeezingAxes axes: Int32...) -> Tensor {
@differentiable(
wrt: (self), adjoint: _adjointMean(_:_:squeezingAxes:)
where Scalar : Differentiable & FloatingPoint
)
func mean(squeezingAxes axes: [Int32]) -> Tensor {
return Raw.mean(self, reductionIndices: Tensor<Int32>(axes),
keepDims: false)
}

/// Returns the arithmetic mean along the specified axes. The reduced
/// dimensions are removed.
/// - Parameter axes: The dimensions to reduce.
/// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
@inlinable @inline(__always)
@differentiable(
wrt: (self), adjoint: _adjointMean(_:_:squeezingAxes:)
where Scalar : Differentiable & FloatingPoint
)
func mean(squeezingAxes axes: Int32...) -> Tensor {
return mean(squeezingAxes: axes)
}

/// Returns the sum along the specified axes. The reduced dimensions are
/// removed.
/// - Parameter axes: The dimensions to reduce.
/// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
@inlinable @inline(__always)
func sum(squeezingAxes axes: Int32...) -> Tensor {
@differentiable(
wrt: (self), adjoint: _adjointSum(_:_:squeezingAxes:)
where Scalar : Differentiable & FloatingPoint
)
func sum(squeezingAxes axes: [Int32]) -> Tensor {
return Raw.sum(self, reductionIndices: Tensor<Int32>(axes), keepDims: false)
}

/// Returns the sum along the specified axes. The reduced dimensions are
/// removed.
/// - Parameter axes: The dimensions to reduce.
/// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
@inlinable @inline(__always)
@differentiable(
wrt: (self), adjoint: _adjointSum(_:_:squeezingAxes:)
where Scalar : Differentiable & FloatingPoint
)
func sum(squeezingAxes axes: Int32...) -> Tensor {
return sum(squeezingAxes: axes)
}

/// Returns the product along the specified axes. The reduced dimensions are
/// removed.
/// - Parameter axes: The dimensions to reduce.
/// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
@inlinable @inline(__always)
func product(squeezingAxes axes: Int32...) -> Tensor {
func product(squeezingAxes axes: [Int32]) -> Tensor {
return Raw.prod(self, reductionIndices: Tensor<Int32>(axes),
keepDims: false)
}

/// Returns the product along the specified axes. The reduced dimensions are
/// removed.
/// - Parameter axes: The dimensions to reduce.
/// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
@inlinable @inline(__always)
func product(squeezingAxes axes: Int32...) -> Tensor {
return product(squeezingAxes: axes)
}

/// Returns the arithmetic mean along the specified axes. The reduced
/// dimensions are retained with value 1.
/// - Parameter axes: The dimensions to reduce.
/// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
@inlinable @inline(__always)
func mean(alongAxes axes: Int32...) -> Tensor {
@differentiable(
wrt: (self), adjoint: _adjointMean(_:_:squeezingAxes:)
where Scalar : Differentiable & FloatingPoint
)
func mean(alongAxes axes: [Int32]) -> Tensor {
return Raw.mean(self, reductionIndices: Tensor<Int32>(axes), keepDims: true)
}

/// Returns the arithmetic mean along the specified axes. The reduced
/// dimensions are retained with value 1.
/// - Parameter axes: The dimensions to reduce.
/// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
@inlinable @inline(__always)
@differentiable(
wrt: (self), adjoint: _adjointMean(_:_:squeezingAxes:)
where Scalar : Differentiable & FloatingPoint
)
func mean(alongAxes axes: Int32...) -> Tensor {
return mean(alongAxes: axes)
}

/// Returns the sum along the specified axes. The reduced dimensions are
/// retained with value 1.
/// - Parameter axes: The dimensions to reduce.
/// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
@inlinable @inline(__always)
func sum(alongAxes axes: Int32...) -> Tensor {
@differentiable(
wrt: (self), adjoint: _adjointSum(_:_:squeezingAxes:)
where Scalar : Differentiable & FloatingPoint
)
func sum(alongAxes axes: [Int32]) -> Tensor {
return Raw.sum(self, reductionIndices: Tensor<Int32>(axes), keepDims: true)
}

/// Returns the sum along the specified axes. The reduced dimensions are
/// retained with value 1.
/// - Parameter axes: The dimensions to reduce.
/// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
@inlinable @inline(__always)
@differentiable(
wrt: (self), adjoint: _adjointSum(_:_:squeezingAxes:)
where Scalar : Differentiable & FloatingPoint
)
func sum(alongAxes axes: Int32...) -> Tensor {
return sum(alongAxes: axes)
}

/// Returns the product along the specified axes. The reduced dimensions are
/// retained with value 1.
/// - Parameter axes: The dimensions to reduce.
/// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
@inlinable @inline(__always)
func product(alongAxes axes: Int32...) -> Tensor {
func product(alongAxes axes: [Int32]) -> Tensor {
return Raw.prod(self, reductionIndices: Tensor<Int32>(axes), keepDims: true)
}

/// Returns the product along the specified axes. The reduced dimensions are
/// retained with value 1.
/// - Parameter axes: The dimensions to reduce.
/// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
@inlinable @inline(__always)
func product(alongAxes axes: Int32...) -> Tensor {
return product(alongAxes: axes)
}
}

//===----------------------------------------------------------------------===//
Expand Down
32 changes: 30 additions & 2 deletions test/TensorFlowRuntime/tensor_autodiff_runtime.swift
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
// RUN: %target-run-simple-swift %swift-tensorflow-test-run-extra-options
// RUN: %target-run-simple-no-vjp-swift %swift-tensorflow-test-run-extra-options
// RUN: %target-run-dynamic-compilation-swift
//
// Note: GPE testing is disabled because GPE does not interact well with
// VJP-based AD. See SR-9638.
//
// REQUIRES: executable_test
// REQUIRES: swift_test_mode_optimize
//
Expand Down Expand Up @@ -62,6 +63,33 @@ TensorADTests.testAllBackends("negate") {
expectTrue([-1] == gradient(at: [10], in: f))
}

TensorADTests.testAllBackends("sum") {
let input = Tensor<Float>(randomNormal: [2, 2])
let sumPullbackScalar = pullback(at: input) { (a: Tensor<Float>) in a.sum() }
let sumPullbackSqueezingAxes = pullback(at: input) { (a: Tensor<Float>) in a.sum(squeezingAxes: 0, 1) }
let sumPullbackAlongAxes = pullback(at: input) { (a: Tensor<Float>) in a.sum(alongAxes: 0, 1) }

let expected = Tensor<Float>(ones: [2, 2])
expectTrue(sumPullbackScalar(Tensor(1)) == expected)
expectTrue(sumPullbackSqueezingAxes(Tensor(1)) == expected)
expectTrue(sumPullbackAlongAxes(Tensor(1)) == expected)
expectTrue(sumPullbackScalar(Tensor(3)) == expected * 3)
expectTrue(sumPullbackSqueezingAxes(Tensor(3)) == expected * 3)
expectTrue(sumPullbackAlongAxes(Tensor(3)) == expected * 3)
}

TensorADTests.testAllBackends("mean") {
let meanGradScalar = gradient { (a: Tensor<Float>) in a.mean() }
let meanGradSqueezingAxes = gradient { (a: Tensor<Float>) in a.mean(squeezingAxes: 0, 1) }
let meanGradAlongAxes = gradient { (a: Tensor<Float>) in a.mean(alongAxes: 0, 1) }

let input = Tensor<Float>(ones: [2, 2])
let expected = Tensor<Float>(shape: [2, 2], repeating: 0.25)
expectTrue(meanGradScalar(input) == expected)
expectTrue(meanGradSqueezingAxes(input) == expected)
expectTrue(meanGradAlongAxes(input) == expected)
}

TensorADTests.testAllBackends("SR-9345: OwnedCheckpoints") {
@differentiable(adjoint: adjointFoo)
func foo(_ x: Tensor<Float>) -> Tensor<Float> {
Expand Down