[AutoDiff] TF-509: Make Tensor.broadcast(to:) differentiable (#24859)

bartchr808 · rxwei · commit c4bac796b286 · 2019-05-17T22:53:08.000-07:00
* Add all VJP functions, need to write tests. * PR feedback batch #1. * Use closure call to remove VJPs * Start adding tests (un)broadcast(toShape:).
diff --git a/stdlib/public/TensorFlow/Gradients.swift b/stdlib/public/TensorFlow/Gradients.swift
@@ -635,3 +635,27 @@ func _vjpRelu<T : TensorFlowFloatingPoint>(
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
   return (relu(x), { v in Tensor(x .> 0) * v })
 }
+
+//===----------------------------------------------------------------------===//
+// Broadcasting
+//===----------------------------------------------------------------------===//
+
+extension Tensor where Scalar : TensorFlowFloatingPoint {
+  @inlinable
+  func _vjpBroadcast(
+    toShape shape: Tensor<Int32>
+  ) -> (Tensor, (Tensor) -> Tensor) {
+    return (broadcast(toShape: shape), { [origShape = self.shapeTensor] v in
+      v.unbroadcast(toShape: origShape)
+    })
+  }
+
+  @inlinable
+  func _vjpUnbroadcast(
+    toShape shape: Tensor<Int32>
+  ) -> (Tensor, (Tensor) -> Tensor) {
+    return (unbroadcast(toShape: shape), { [origShape = self.shapeTensor] v in
+      v.broadcast(toShape: origShape)
+    })
+  }
+}
diff --git a/stdlib/public/TensorFlow/Ops.swift b/stdlib/public/TensorFlow/Ops.swift
@@ -1600,25 +1600,32 @@ public extension Tensor {
 
 public extension Tensor {
   @inlinable
+  @differentiable(wrt: self, vjp: _vjpBroadcast(toShape:)
+    where Scalar : TensorFlowFloatingPoint)
   func broadcast(toShape shape: Tensor<Int32>) -> Tensor {
     return Raw.broadcastTo(self, shape: shape)
   }
 
   @inlinable
+  @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
   func broadcast(to shape: TensorShape) -> Tensor {
-    return broadcast(toShape: Tensor<Int32>(shape.dimensions.map(Int32.init)))
+    return broadcast(toShape: Tensor<Int32>({ shape.dimensions.map(Int32.init) }()))
   }
 
   /// Broadcast to the same shape as the specified `Tensor`.
   /// - Precondition: The specified shape must be compatible for broadcasting.
   @inlinable
+  @differentiable(wrt: self
+    where Scalar : TensorFlowFloatingPoint)
   func broadcast<OtherScalar>(like other: Tensor<OtherScalar>) -> Tensor {
     return broadcast(toShape: other.shapeTensor)
   }
 }
 
 public extension Tensor where Scalar : Numeric {
   @inlinable
+  @differentiable(wrt: self, vjp: _vjpUnbroadcast(toShape:)
+    where Scalar : TensorFlowFloatingPoint)
   func unbroadcast(toShape otherShape: Tensor<Int32>) -> Tensor {
     let rankDiff = (rankTensor - otherShape.scalarCountTensor).rankLifted()
     let ones: Tensor<Int32> = Raw.fill(dims: rankDiff, value: Tensor<Int32>(1))
@@ -1631,13 +1638,15 @@ public extension Tensor where Scalar : Numeric {
   }
 
   @inlinable
+  @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
   func unbroadcast<OtherScalar>(like other: Tensor<OtherScalar>) -> Tensor {
     return unbroadcast(toShape: other.shapeTensor)
   }
 
   @inlinable
+  @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
   func unbroadcast(to shape: TensorShape) -> Tensor {
-    return unbroadcast(toShape: Tensor<Int32>(shape.dimensions.map(Int32.init)))
+    return unbroadcast(toShape: Tensor<Int32>({ shape.dimensions.map(Int32.init) }()))
   }
 
   @inlinable
diff --git a/test/TensorFlowRuntime/tensor_autodiff_runtime.swift b/test/TensorFlowRuntime/tensor_autodiff_runtime.swift
@@ -262,4 +262,135 @@ TensorADTests.testAllBackends("Side effects") {
   expectEqual(Tensor(48), gradient(at: Tensor(4), in: bar))
 }
 
+TensorADTests.testAllBackends("broadcast(toShape:)") {
+  func foo(tensor: Tensor<Float>, shape: Tensor<Int32>) -> Tensor<Float> {
+    tensor.broadcast(toShape: shape)
+  }
+
+  var inputTensor: Tensor<Float>
+  var expected: Tensor<Float>
+  var pb: (Tensor<Float>) -> Tensor<Float>
+
+  // [3,] -> [3,3]
+  pb = pullback(at: Tensor([99, 33, 55])) { x in 
+    foo(tensor: x, shape: Tensor([3, 3])) 
+  }
+
+  // Test 1: same shape as parameter of pullback
+  inputTensor = Tensor([
+    [1, 2, 3], 
+    [1, 2, 3], 
+    [1, 2, 3]]
+  )
+  expected = Tensor([3, 6, 9])
+  expectEqual(expected, pb(inputTensor))
+
+  // Test 2: different shape than parameter of pullback
+  inputTensor = Tensor([
+    [1, 2, 3], 
+    [1, 2, 3], 
+    [1, 2, 3],
+    [1, 2, 3]]
+  )
+  expected = Tensor([4, 8, 12])
+  expectEqual(expected, pb(inputTensor))
+
+  // Test 3: same shape as tensor we are differentiating at
+  inputTensor = Tensor([1, 2, 3])
+  expected = Tensor([1, 2, 3])
+  expectEqual(expected, pb(inputTensor))
+
+  // Test 4: extremely padded shape as tensor we are differentiating at
+  inputTensor = Tensor([[[[[[1, 2, 3]]]]]])
+  expected = Tensor([1, 2, 3])
+  expectEqual(expected, pb(inputTensor))
+
+  // [3,1] -> [3x3]
+  pb = pullback(at: Tensor([[99, 33, 55]])) { x in 
+    foo(tensor: x, shape: Tensor([3, 3])) 
+  }
+
+  // Test 5: same shape as parameter of pullback
+  inputTensor = Tensor([
+    [1, 2, 3], 
+    [1, 2, 3], 
+    [1, 2, 3]]
+  )
+  expected = Tensor([[3, 6, 9]])
+  expectEqual(expected, pb(inputTensor))
+
+  // Test 6: different shape than parameter of pullback
+  inputTensor = Tensor([
+    [1, 2, 3], 
+    [1, 2, 3], 
+    [1, 2, 3],
+    [1, 2, 3]]
+  )
+  expected = Tensor([[4, 8, 12]])
+  expectEqual(expected, pb(inputTensor))
+
+  // Test 7: same shape as tensor we are differentiating at
+  inputTensor = Tensor([[1, 2, 3]])
+  expected = Tensor([[1, 2, 3]])
+  expectEqual(expected, pb(inputTensor))
+
+  // Test 8: extremely padded shape of tensor we are differentiating at
+  inputTensor = Tensor([[[[[[1, 2, 3]]]]]])
+  expected = Tensor([[1, 2, 3]])
+  expectEqual(expected, pb(inputTensor))
+}
+
+TensorADTests.testAllBackends("unbroadcast(toShape:") {
+  func foo(tensor: Tensor<Float>, shape: Tensor<Int32>) -> Tensor<Float> {
+    tensor.unbroadcast(toShape: shape)
+  }
+
+  var inputTensor: Tensor<Float>
+  var expected: Tensor<Float>
+  var pb: (Tensor<Float>) -> Tensor<Float>
+
+  // [3,3] -> [1,3]
+  let atTensor: Tensor<Float> = Tensor([
+    [1, 2, 3], 
+    [1, 2, 3], 
+    [1, 2, 3]]
+  )
+  pb = pullback(at: atTensor) { x in 
+    foo(tensor: x, shape: Tensor([1, 3])) 
+  }
+
+  // Test 1: same shape as parameter of pullback
+  inputTensor = Tensor([[1, 2, 3]])
+  expected = atTensor
+  expectEqual(expected, pb(inputTensor))
+
+  // Test 2: different shape than parameter of pullback
+  inputTensor = Tensor([2])
+  expected = Tensor([
+    [2, 2, 2], 
+    [2, 2, 2], 
+    [2, 2, 2]]
+  )
+  expectEqual(expected, pb(inputTensor))
+
+  // Test 3: same shape as tensor we are differentiating at
+  inputTensor = Tensor([
+    [8, 1, 3], 
+    [8, 1, 3], 
+    [8, 1, 3]]
+  )
+  expected = inputTensor
+  expectEqual(expected, pb(inputTensor))
+
+  // TODO
+  // Test 4: extremely padded shape as tensor we are differentiating at
+  // inputTensor = Tensor([
+  //   [[8, 1, 3]], 
+  //   [[8, 1, 3]], 
+  //   [[8, 1, 3]]]
+  // )
+  // expected = Tensor([1, 2, 3])
+  // expectEqual(expected, pb(inputTensor))
+}
+
 runAllTests()