[WIP] Change Dense.bias type to Optional

efremale · efremale · commit 200ccd18072e · 2020-08-19T00:19:26.000+02:00
diff --git a/Sources/TensorFlow/Layers/Dense.swift b/Sources/TensorFlow/Layers/Dense.swift
@@ -28,18 +28,25 @@ import _Differentiation
 public struct Dense<Scalar: TensorFlowFloatingPoint>: Layer {
   /// The weight matrix.
   public var weight: Tensor<Scalar>
-  /// The bias vector.
-  public var bias: Tensor<Scalar>
+  /// The optional bias vector.
+  public var optionalBias: Tensor<Scalar>?
   /// The element-wise activation function.
   @noDerivative public let activation: Activation
   /// Indicates whether this is a batched dense layer.
   @noDerivative internal let batched: Bool
-  /// Workaround optionals not being handled by AD
-  @noDerivative private let useBias: Bool
 
   /// The element-wise activation function type.
   public typealias Activation = @differentiable (Tensor<Scalar>) -> Tensor<Scalar>
 
+  /// The bias vector.
+  ///
+  /// - Note: returns `Tensor.zero` if the underlying `optionalBias`  does not exist.
+  //@differentiable
+  public var bias: Tensor<Scalar> {
+    get { optionalBias ?? .zero }
+    set { optionalBias = newValue }
+  }
+
   /// Creates an instance from the given weight, optional bias, and activation function.
   ///
   /// - Note: currently, `weight` is the only differentiability parameter. `bias` can be made a
@@ -55,10 +62,9 @@ public struct Dense<Scalar: TensorFlowFloatingPoint>: Layer {
     precondition(
       bias == nil || bias!.rank <= 2, "The rank of the 'bias' tensor must be less than 3.")
     self.weight = weight
-    self.bias = bias ?? .zero
+    self.optionalBias = bias
     self.activation = activation
     self.batched = weight.rank == 3
-    useBias = (bias != nil)
   }
 
   // TODO(TF-433): Remove custom derivative after `try_apply` differentiation is supported.
@@ -81,9 +87,15 @@ public struct Dense<Scalar: TensorFlowFloatingPoint>: Layer {
   public func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
     if batched {
       let hidden = matmul(input.expandingShape(at: 1), weight).squeezingShape(at: 1)
-      return activation(useBias ? hidden + bias : hidden)
+      if let bias = optionalBias {
+        return activation(hidden + bias)
+      }
+      return activation(hidden)
+    }
+    if let bias = optionalBias {
+      return activation(matmul(input, weight) + bias)
     }
-    return activation(useBias ? (matmul(input, weight) + bias) : matmul(input, weight))
+    return activation(matmul(input, weight))
   }
 }
 
@@ -106,9 +118,68 @@ extension Dense {
     weightInitializer: ParameterInitializer<Scalar> = glorotUniform(),
     biasInitializer: ParameterInitializer<Scalar> = zeros()
   ) {
+    print("Init OLD")
     self.init(
       weight: weightInitializer([inputSize, outputSize]),
       bias: useBias ? biasInitializer([outputSize]) : nil,
       activation: activation)
   }
+
+  /// Creates a `Dense` layer with the specified input size, output size, and element-wise
+  /// activation function. The weight matrix is created with shape `[inputSize, outputSize]` and
+  /// the bias vector is created with shape `[outputSize]`.
+  ///
+  /// - Parameters:
+  ///   - inputSize: The dimensionality of the input space.
+  ///   - outputSize: The dimensionality of the output space.
+  ///   - activation: The activation function to use. The default value is `identity(_:)`.
+  ///   - weightInitializer: Initializer to use for `weight`.
+  ///   - biasInitializer: Initializer to use for `bias`.
+  public init(
+    inputSize: Int,
+    outputSize: Int,
+    activation: @escaping Activation = identity,
+    weightInitializer: ParameterInitializer<Scalar> = glorotUniform(),
+    biasInitializer: ParameterInitializer<Scalar>? = nil
+  ) {
+    print("Init NEW")
+    self.init(
+      weight: weightInitializer([inputSize, outputSize]),
+      bias: biasInitializer?([outputSize]),
+      activation: activation)
+  }
 }
+
+extension Dense.TangentVector {
+  public init(
+    weight: Tensor<Scalar>,
+    bias: Tensor<Scalar>
+  ) {
+    self.init(weight: weight, optionalBias: .init(bias))
+  }
+  
+  /// The bias vector.
+  ///
+  /// - Note: returns `Tensor.zero` if the underlying `optionalBias`  does not exist.
+  //@differentiable
+  public var bias: Tensor<Scalar> {
+    get { optionalBias.value ?? .zero }
+    set { optionalBias.value = newValue }
+  }
+}
+
+/* extension Optional : KeyPathIterable {
+  public var allKeyPaths: [PartialKeyPath<Self>] {
+    if self != nil {
+      return [ \Optional.unsafelyUnwrapped ]
+    }
+    return []
+  }
+
+  public typealias AllKeyPaths = [PartialKeyPath<Self>]
+}
+
+extension Optional.TangentVector : KeyPathIterable
+{
+  
+}*/
diff --git a/Sources/TensorFlow/StdlibExtensions.swift b/Sources/TensorFlow/StdlibExtensions.swift
@@ -282,3 +282,134 @@ extension Collection {
   /// Returns the `n`th position in `self`.
   func index(atOffset n: Int) -> Index { index(startIndex, offsetBy: n) }
 }
+
+extension Optional: EuclideanDifferentiable
+where Wrapped: EuclideanDifferentiable {
+  public var differentiableVectorView: TangentVector { .init(self?.differentiableVectorView) }
+}
+
+extension Optional.TangentVector: ElementaryFunctions
+where Wrapped.TangentVector: ElementaryFunctions {
+  /// The square root of `x`.
+  ///
+  /// For real types, if `x` is negative the result is `.nan`. For complex
+  /// types there is a branch cut on the negative real axis.
+  public static func sqrt(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// The cosine of `x`, interpreted as an angle in radians.
+  public static func cos(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// The sine of `x`, interpreted as an angle in radians.
+  public static func sin(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// The tangent of `x`, interpreted as an angle in radians.
+  public static func tan(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// The inverse cosine of `x` in radians.
+  public static func acos(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// The inverse sine of `x` in radians.
+  public static func asin(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// The inverse tangent of `x` in radians.
+  public static func atan(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// The hyperbolic cosine of `x`.
+  public static func cosh(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// The hyperbolic sine of `x`.
+  public static func sinh(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// The hyperbolic tangent of `x`.
+  public static func tanh(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// The inverse hyperbolic cosine of `x`.
+  public static func acosh(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// The inverse hyperbolic sine of `x`.
+  public static func asinh(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// The inverse hyperbolic tangent of `x`.
+  public static func atanh(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// The exponential function applied to `x`, or `e**x`.
+  public static func exp(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// Two raised to to power `x`.
+  public static func exp2(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// Ten raised to to power `x`.
+  public static func exp10(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// `exp(x) - 1` evaluated so as to preserve accuracy close to zero.
+  public static func expm1(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// The natural logarithm of `x`.
+  public static func log(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// The base-two logarithm of `x`.
+  public static func log2(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// The base-ten logarithm of `x`.
+  public static func log10(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// `log(1 + x)` evaluated so as to preserve accuracy close to zero.
+  public static func log1p(_ x: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// `exp(y log(x))` computed without loss of intermediate precision.
+  ///
+  /// For real types, if `x` is negative the result is NaN, even if `y` has
+  /// an integral value. For complex types, there is a branch cut on the
+  /// negative real axis.
+  public static func pow(_ x: Self, _ y: Self) -> Self { .init(x.value.map(Wrapped.TangentVector.sqrt)) }
+
+  /// `x` raised to the `n`th power.
+  ///
+  /// The product of `n` copies of `x`.
+    public static func pow(_ x: Self, _ n: Int) -> Self { .init(x.value.map({ x in Wrapped.TangentVector.pow(x, n)})) }
+
+  /// The `n`th root of `x`.
+  ///
+  /// For real types, if `x` is negative and `n` is even, the result is NaN.
+  /// For complex types, there is a branch cut along the negative real axis.
+  public static func root(_ x: Self, _ n: Int) -> Self { .init(x.value.map({ x in Wrapped.TangentVector.root(x, n)})) }
+}
+
+extension Optional.TangentVector: PointwiseMultiplicative
+where Wrapped.TangentVector: PointwiseMultiplicative {
+  public static var one: Self {
+    .init(Wrapped.TangentVector.one)
+  }
+
+  public var reciprocal: Self { .init(value.map { $0.reciprocal }) }
+
+  public static func .* (lhs: Self, rhs: Self) -> Self {
+    switch (lhs.value, rhs.value) {
+    case let (x?, y?): return Self(x .* y)
+    default: return Self(nil)
+    }
+  }
+
+  public static func .*= (lhs: inout Self, rhs: Self) {
+    lhs = lhs .* rhs
+  }
+}
+
+extension Optional.TangentVector: VectorProtocol
+where Wrapped.TangentVector: VectorProtocol {
+  public typealias VectorSpaceScalar = Wrapped.TangentVector.VectorSpaceScalar
+
+  public func adding(_ x: VectorSpaceScalar) -> Self { .init(value.map { $0.adding(x) }) }
+
+  public mutating func add(_ x: VectorSpaceScalar) { value?.add(x) }
+
+  public func subtracting(_ x: VectorSpaceScalar) -> Self { .init(value.map { $0.subtracting(x) }) }
+
+  public mutating func subtract(_ x: VectorSpaceScalar) { value?.subtract(x) }
+
+  public func scaled(by scale: VectorSpaceScalar) -> Self { .init(value.map { $0.scaled(by: scale) }) }
+
+  public mutating func scale(by scale: VectorSpaceScalar) {
+    value?.scale(by: scale)
+  }
+}
diff --git a/Tests/TensorFlowTests/TrivialModelTests.swift b/Tests/TensorFlowTests/TrivialModelTests.swift
@@ -51,6 +51,7 @@ final class TrivialModelTests: XCTestCase {
           return meanSquaredError(predicted: ŷ, expected: y)
         }
         optimizer.update(&classifier, along: 𝛁model)
+        dump(𝛁model)
       }
     }
     let ŷ = classifier.inferring(from: x)

Original file line number	Diff line number	Diff line change
`@@ -51,6 +51,7 @@ final class TrivialModelTests: XCTestCase {`
`51`	`51`	`return meanSquaredError(predicted: ŷ, expected: y)`
`52`	`52`	`}`
`53`	`53`	`optimizer.update(&classifier, along: 𝛁model)`
	`54`	`+ dump(𝛁model)`
`54`	`55`	`}`
`55`	`56`	`}`
`56`	`57`	`let ŷ = classifier.inferring(from: x)`