[Swift] Remove custom derivative for LearningPhaseDependent.forward(_:). (#309)

dan-zheng · web-flow · commit aabbc18d496e · 2019-12-09T14:29:32.000-08:00
Work around issues with `@differentiable` + `@derivative` attributes with different derivative generic signatures. Automatic differentiation can handle this enum `switch` now, so a custom derivative is no longer necessary. https://bugs.swift.org/browse/TF-1037 tracks this issue. Related discussion: swiftlang/swift#28621 (comment)
diff --git a/swift/07_batchnorm.ipynb b/swift/07_batchnorm.ipynb
@@ -306,29 +306,13 @@
     "}\n",
     "\n",
     "extension LearningPhaseDependent {\n",
-    "    // This `@differentiable` attribute is necessary, to tell the compiler that this satisfies the FALayer\n",
-    "    // protocol requirement, even though there is a `@differentiating(forward)` method below.\n",
-    "    // TODO: It seems nondeterministically necessary. Some subsequent notebooks import this successfully without it,\n",
-    "    // some require it. Investigate.\n",
     "    @differentiable\n",
     "    public func forward(_ input: Input) -> Output {\n",
     "        switch Context.local.learningPhase {\n",
     "        case .training:  return forwardTraining(input)\n",
     "        case .inference: return forwardInference(input)\n",
     "        }\n",
     "    }\n",
-    "\n",
-    "    @differentiating(forward)\n",
-    "    func gradForward(_ input: Input) ->\n",
-    "        (value: Output, pullback: (Self.Output.TangentVector) ->\n",
-    "            (Self.TangentVector, Self.Input.TangentVector)) {\n",
-    "        switch Context.local.learningPhase {\n",
-    "        case .training:\n",
-    "            return valueWithPullback(at: input) { $0.forwardTraining ($1) }\n",
-    "        case .inference:\n",
-    "            return valueWithPullback(at: input) { $0.forwardInference($1) }\n",
-    "        }\n",
-    "    }\n",
     "}"
    ]
   },
diff --git a/swift/FastaiNotebook_07_batchnorm/Sources/FastaiNotebook_07_batchnorm/07_batchnorm.swift b/swift/FastaiNotebook_07_batchnorm/Sources/FastaiNotebook_07_batchnorm/07_batchnorm.swift
@@ -24,29 +24,13 @@ public protocol LearningPhaseDependent: FALayer {
 }
 
 extension LearningPhaseDependent {
-    // This `@differentiable` attribute is necessary, to tell the compiler that this satisfies the FALayer
-    // protocol requirement, even though there is a `@differentiating(forward)` method below.
-    // TODO: It seems nondeterministically necessary. Some subsequent notebooks import this successfully without it,
-    // some require it. Investigate.
     @differentiable
     public func forward(_ input: Input) -> Output {
         switch Context.local.learningPhase {
         case .training:  return forwardTraining(input)
         case .inference: return forwardInference(input)
         }
     }
-
-    @differentiating(forward)
-    func gradForward(_ input: Input) ->
-        (value: Output, pullback: (Self.Output.TangentVector) ->
-            (Self.TangentVector, Self.Input.TangentVector)) {
-        switch Context.local.learningPhase {
-        case .training:
-            return valueWithPullback(at: input) { $0.forwardTraining ($1) }
-        case .inference:
-            return valueWithPullback(at: input) { $0.forwardInference($1) }
-        }
-    }
 }
 
 public protocol Norm: Layer where Input == Tensor<Scalar>, Output == Tensor<Scalar>{
diff --git a/swift/FastaiNotebook_08_data_block/Sources/FastaiNotebook_08_data_block/07_batchnorm.swift b/swift/FastaiNotebook_08_data_block/Sources/FastaiNotebook_08_data_block/07_batchnorm.swift
@@ -24,29 +24,13 @@ public protocol LearningPhaseDependent: FALayer {
 }
 
 extension LearningPhaseDependent {
-    // This `@differentiable` attribute is necessary, to tell the compiler that this satisfies the FALayer
-    // protocol requirement, even though there is a `@differentiating(forward)` method below.
-    // TODO: It seems nondeterministically necessary. Some subsequent notebooks import this successfully without it,
-    // some require it. Investigate.
     @differentiable
     public func forward(_ input: Input) -> Output {
         switch Context.local.learningPhase {
         case .training:  return forwardTraining(input)
         case .inference: return forwardInference(input)
         }
     }
-
-    @differentiating(forward)
-    func gradForward(_ input: Input) ->
-        (value: Output, pullback: (Self.Output.TangentVector) ->
-            (Self.TangentVector, Self.Input.TangentVector)) {
-        switch Context.local.learningPhase {
-        case .training:
-            return valueWithPullback(at: input) { $0.forwardTraining ($1) }
-        case .inference:
-            return valueWithPullback(at: input) { $0.forwardInference($1) }
-        }
-    }
 }
 
 public protocol Norm: Layer where Input == Tensor<Scalar>, Output == Tensor<Scalar>{
diff --git a/swift/FastaiNotebook_08a_heterogeneous_dictionary/Sources/FastaiNotebook_08a_heterogeneous_dictionary/07_batchnorm.swift b/swift/FastaiNotebook_08a_heterogeneous_dictionary/Sources/FastaiNotebook_08a_heterogeneous_dictionary/07_batchnorm.swift
@@ -24,29 +24,13 @@ public protocol LearningPhaseDependent: FALayer {
 }
 
 extension LearningPhaseDependent {
-    // This `@differentiable` attribute is necessary, to tell the compiler that this satisfies the FALayer
-    // protocol requirement, even though there is a `@differentiating(forward)` method below.
-    // TODO: It seems nondeterministically necessary. Some subsequent notebooks import this successfully without it,
-    // some require it. Investigate.
     @differentiable
     public func forward(_ input: Input) -> Output {
         switch Context.local.learningPhase {
         case .training:  return forwardTraining(input)
         case .inference: return forwardInference(input)
         }
     }
-
-    @differentiating(forward)
-    func gradForward(_ input: Input) ->
-        (value: Output, pullback: (Self.Output.TangentVector) ->
-            (Self.TangentVector, Self.Input.TangentVector)) {
-        switch Context.local.learningPhase {
-        case .training:
-            return valueWithPullback(at: input) { $0.forwardTraining ($1) }
-        case .inference:
-            return valueWithPullback(at: input) { $0.forwardInference($1) }
-        }
-    }
 }
 
 public protocol Norm: Layer where Input == Tensor<Scalar>, Output == Tensor<Scalar>{
diff --git a/swift/FastaiNotebook_08c_data_block_generic/Sources/FastaiNotebook_08c_data_block_generic/07_batchnorm.swift b/swift/FastaiNotebook_08c_data_block_generic/Sources/FastaiNotebook_08c_data_block_generic/07_batchnorm.swift
@@ -24,29 +24,13 @@ public protocol LearningPhaseDependent: FALayer {
 }
 
 extension LearningPhaseDependent {
-    // This `@differentiable` attribute is necessary, to tell the compiler that this satisfies the FALayer
-    // protocol requirement, even though there is a `@differentiating(forward)` method below.
-    // TODO: It seems nondeterministically necessary. Some subsequent notebooks import this successfully without it,
-    // some require it. Investigate.
     @differentiable
     public func forward(_ input: Input) -> Output {
         switch Context.local.learningPhase {
         case .training:  return forwardTraining(input)
         case .inference: return forwardInference(input)
         }
     }
-
-    @differentiating(forward)
-    func gradForward(_ input: Input) ->
-        (value: Output, pullback: (Self.Output.TangentVector) ->
-            (Self.TangentVector, Self.Input.TangentVector)) {
-        switch Context.local.learningPhase {
-        case .training:
-            return valueWithPullback(at: input) { $0.forwardTraining ($1) }
-        case .inference:
-            return valueWithPullback(at: input) { $0.forwardInference($1) }
-        }
-    }
 }
 
 public protocol Norm: Layer where Input == Tensor<Scalar>, Output == Tensor<Scalar>{
diff --git a/swift/FastaiNotebook_09_optimizer/Sources/FastaiNotebook_09_optimizer/07_batchnorm.swift b/swift/FastaiNotebook_09_optimizer/Sources/FastaiNotebook_09_optimizer/07_batchnorm.swift
@@ -24,29 +24,13 @@ public protocol LearningPhaseDependent: FALayer {
 }
 
 extension LearningPhaseDependent {
-    // This `@differentiable` attribute is necessary, to tell the compiler that this satisfies the FALayer
-    // protocol requirement, even though there is a `@differentiating(forward)` method below.
-    // TODO: It seems nondeterministically necessary. Some subsequent notebooks import this successfully without it,
-    // some require it. Investigate.
     @differentiable
     public func forward(_ input: Input) -> Output {
         switch Context.local.learningPhase {
         case .training:  return forwardTraining(input)
         case .inference: return forwardInference(input)
         }
     }
-
-    @differentiating(forward)
-    func gradForward(_ input: Input) ->
-        (value: Output, pullback: (Self.Output.TangentVector) ->
-            (Self.TangentVector, Self.Input.TangentVector)) {
-        switch Context.local.learningPhase {
-        case .training:
-            return valueWithPullback(at: input) { $0.forwardTraining ($1) }
-        case .inference:
-            return valueWithPullback(at: input) { $0.forwardInference($1) }
-        }
-    }
 }
 
 public protocol Norm: Layer where Input == Tensor<Scalar>, Output == Tensor<Scalar>{
diff --git a/swift/FastaiNotebook_10_mixup_ls/Sources/FastaiNotebook_10_mixup_ls/07_batchnorm.swift b/swift/FastaiNotebook_10_mixup_ls/Sources/FastaiNotebook_10_mixup_ls/07_batchnorm.swift
@@ -24,29 +24,13 @@ public protocol LearningPhaseDependent: FALayer {
 }
 
 extension LearningPhaseDependent {
-    // This `@differentiable` attribute is necessary, to tell the compiler that this satisfies the FALayer
-    // protocol requirement, even though there is a `@differentiating(forward)` method below.
-    // TODO: It seems nondeterministically necessary. Some subsequent notebooks import this successfully without it,
-    // some require it. Investigate.
     @differentiable
     public func forward(_ input: Input) -> Output {
         switch Context.local.learningPhase {
         case .training:  return forwardTraining(input)
         case .inference: return forwardInference(input)
         }
     }
-
-    @differentiating(forward)
-    func gradForward(_ input: Input) ->
-        (value: Output, pullback: (Self.Output.TangentVector) ->
-            (Self.TangentVector, Self.Input.TangentVector)) {
-        switch Context.local.learningPhase {
-        case .training:
-            return valueWithPullback(at: input) { $0.forwardTraining ($1) }
-        case .inference:
-            return valueWithPullback(at: input) { $0.forwardInference($1) }
-        }
-    }
 }
 
 public protocol Norm: Layer where Input == Tensor<Scalar>, Output == Tensor<Scalar>{
diff --git a/swift/FastaiNotebook_11_imagenette/Sources/FastaiNotebook_11_imagenette/07_batchnorm.swift b/swift/FastaiNotebook_11_imagenette/Sources/FastaiNotebook_11_imagenette/07_batchnorm.swift
@@ -24,29 +24,13 @@ public protocol LearningPhaseDependent: FALayer {
 }
 
 extension LearningPhaseDependent {
-    // This `@differentiable` attribute is necessary, to tell the compiler that this satisfies the FALayer
-    // protocol requirement, even though there is a `@differentiating(forward)` method below.
-    // TODO: It seems nondeterministically necessary. Some subsequent notebooks import this successfully without it,
-    // some require it. Investigate.
     @differentiable
     public func forward(_ input: Input) -> Output {
         switch Context.local.learningPhase {
         case .training:  return forwardTraining(input)
         case .inference: return forwardInference(input)
         }
     }
-
-    @differentiating(forward)
-    func gradForward(_ input: Input) ->
-        (value: Output, pullback: (Self.Output.TangentVector) ->
-            (Self.TangentVector, Self.Input.TangentVector)) {
-        switch Context.local.learningPhase {
-        case .training:
-            return valueWithPullback(at: input) { $0.forwardTraining ($1) }
-        case .inference:
-            return valueWithPullback(at: input) { $0.forwardInference($1) }
-        }
-    }
 }
 
 public protocol Norm: Layer where Input == Tensor<Scalar>, Output == Tensor<Scalar>{
diff --git a/swift/Runnable11/Sources/Runnable11/07_batchnorm.swift b/swift/Runnable11/Sources/Runnable11/07_batchnorm.swift
@@ -22,28 +22,13 @@ public protocol LearningPhaseDependent: FALayer {
 }
 
 public extension LearningPhaseDependent {
-    // This `@differentiable` attribute is necessary, to tell the compiler that this satisfies the FALayer
-    // protocol requirement, even though there is a `@differentiating(forward)` method below.
-    // TODO: It seems nondeterministically necessary. Some subsequent notebooks import this successfully without it,
-    // some require it. Investigate.
-    @differentiable(vjp: gradForward)
+    @differentiable
     public func forward(_ input: Input) -> Output {
         switch Context.local.learningPhase {
         case .training: return forwardTraining(to: input)
         case .inference: return forwardInference(to: input)
         }
     }
-
-    func gradForward(_ input: Input) ->
-        (Output, (Self.Output.TangentVector) ->
-            (Self.TangentVector, Self.Input.TangentVector)) {
-        switch Context.local.learningPhase {
-        case .training:
-            return valueWithPullback(at: input) { $0.forwardTraining(to: $1) }
-        case .inference:
-            return valueWithPullback(at: input) { $0.forwardInference(to: $1) }
-        }
-    }
 }
 
 public protocol Norm: Layer where Input == Tensor<Scalar>, Output == Tensor<Scalar>{