[AutoDiff] Destroy all pullback indirect results after adjoint accumulation. (#27711)

rxwei · web-flow · commit 0d17ddfa7c6f · 2019-10-15T18:02:36.000-07:00
When we differentiate a function (example below) with respect to a proper subset of its indirect parameters and when the function only has a derivative with respect to a proper superset of those indirect parameters, the pullback returns more indirect results than we need. However, unneeded indirect results are not destroyed, which causes a memory lifetime verification failure. This patch fixes this bug by releasing all pullback indirect results instead of just releasing the ones needed for calculating the derivative. ```swift @differentiable(wrt: x) func foo<T: Differentiable>(_ x: T, _ y: T, apply: @differentiable (T, T) -> T) -> T { return apply(x, y) } ``` This patch also uncomments a test in test/AutoDiff/superset_adjoint.swift which is now passing. This fixed a FIXME. Resolves [TF-914](https://bugs.swift.org/browse/TF-914).
diff --git a/lib/SILOptimizer/Mandatory/Differentiation.cpp b/lib/SILOptimizer/Mandatory/Differentiation.cpp
@@ -6955,7 +6955,6 @@ class PullbackEmitter final : public SILInstructionVisitor<PullbackEmitter> {
       auto tan = *allResultsIt++;
       if (tan->getType().isAddress()) {
         addToAdjointBuffer(bb, origArg, tan, loc);
-        builder.emitDestroyAddrAndFold(loc, tan);
       } else {
         if (origArg->getType().isAddress()) {
           auto *tmpBuf = builder.createAllocStack(loc, tan->getType());
@@ -6971,9 +6970,11 @@ class PullbackEmitter final : public SILInstructionVisitor<PullbackEmitter> {
         }
       }
     }
-    // Deallocate pullback indirect results.
-    for (auto *alloc : reversed(pullbackIndirectResults))
+    // Destroy and deallocate pullback indirect results.
+    for (auto *alloc : reversed(pullbackIndirectResults)) {
+      builder.emitDestroyAddrAndFold(loc, alloc);
       builder.createDeallocStack(loc, alloc);
+    }
   }
 
   /// Handle `struct` instruction.
diff --git a/test/AutoDiff/superset_adjoint.swift b/test/AutoDiff/superset_adjoint.swift
@@ -41,30 +41,34 @@ SupersetVJPTests.test("SubsetOfSubset") {
   expectEqual(0, gradient(at: 0, in: { x in foo(x, 0, 0) }))
 }
 
+SupersetVJPTests.test("ApplySubset") {
+  // TF-914
+  @differentiable(wrt: x)
+  func foo<T: Differentiable>(_ x: T, _ y: T, apply: @differentiable (T, T) -> T) -> T {
+    return apply(x, y)
+  }
+  expectEqual(1, gradient(at: Float(0)) { x in foo(x, 0) { $0 + $1 } })
+}
+
 // FIXME: The expression `(+) as @differentiable (Float, @nondiff Float) -> Float)`
 // forms a curry thunk of `Float.+` before conversion to @differentiable, and AD
 // doesn't know how to differentiate the curry thunk, so it produces a
 // "function is not differentiable" error.
-// FIXME: Propagate wrt indices correctly so that this actually takes the
-// gradient wrt only the first parameter, as intended.
 // SupersetVJPTests.test("CrossModule") {
-//   expectEqual(1, gradient(at: 1, 2, in: (+) as @differentiable (Float, @nondiff Float) -> Float))
+//   let grad = gradient(at: Float(1), Float(2), in: (+) as @differentiable (Float, @nondiff Float) -> Float)
+//   expectEqual(Float(1), grad)
 // }
 
-// FIXME: Unbreak this one.
-//
-// @differentiable(wrt: (.0, .1), vjp: dx_T)
-// func x_T<T : Differentiable>(_ x: Float, _ y: T) -> Float {
-//   if x > 1000 {
-//     return x
-//   }
-//   return x
-// }
-// func dx_T<T>(_ x: Float, _ y: T) -> (Float, (Float) -> (Float, T.TangentVector)) {
-//   return (x_T(x, y), { seed in (x, y) })
-// }
-// SupersetVJPTests.test("IndirectResults") {
-//   expectEqual(3, gradient(at: 2) { x in x_T(x, Float(3)) })
-// }
+SupersetVJPTests.test("IndirectResults") {
+  @differentiable(wrt: (x, y), vjp: dx_T)
+  func x_T<T : Differentiable>(_ x: Float, _ y: T) -> Float {
+    if x > 1000 { return x }
+    return x
+  }
+  func dx_T<T : Differentiable>(_ x: Float, _ y: T) -> (Float, (Float) -> (Float, T.TangentVector)) {
+    return (x_T(x, y), { v in (x * v, .zero) })
+  }
+  expectEqual(2, gradient(at: 2) { x in x_T(x, Float(3)) })
+}
 
 runAllTests()

Original file line number	Diff line number	Diff line change
`@@ -6955,7 +6955,6 @@ class PullbackEmitter final : public SILInstructionVisitor<PullbackEmitter> {`
`6955`	`6955`	`auto tan = *allResultsIt++;`
`6956`	`6956`	`if (tan->getType().isAddress()) {`
`6957`	`6957`	`addToAdjointBuffer(bb, origArg, tan, loc);`
`6958`		`- builder.emitDestroyAddrAndFold(loc, tan);`
`6959`	`6958`	`} else {`
`6960`	`6959`	`if (origArg->getType().isAddress()) {`
`6961`	`6960`	`auto *tmpBuf = builder.createAllocStack(loc, tan->getType());`
`@@ -6971,9 +6970,11 @@ class PullbackEmitter final : public SILInstructionVisitor<PullbackEmitter> {`
`6971`	`6970`	`}`
`6972`	`6971`	`}`
`6973`	`6972`	`}`
`6974`		`- // Deallocate pullback indirect results.`
`6975`		`- for (auto *alloc : reversed(pullbackIndirectResults))`
	`6973`	`+ // Destroy and deallocate pullback indirect results.`
	`6974`	`+ for (auto *alloc : reversed(pullbackIndirectResults)) {`
	`6975`	`+ builder.emitDestroyAddrAndFold(loc, alloc);`
`6976`	`6976`	`builder.createDeallocStack(loc, alloc);`
	`6977`	`+ }`
`6977`	`6978`	`}`
`6978`	`6979`
`6979`	`6980`	/// Handle `struct` instruction.