merging master

Shashi456 · Shashi456 · commit 1a7fb90ba905 · 2019-06-15T15:23:36.000+05:30
:
diff --git a/Sources/TensorFlow/Initializers.swift b/Sources/TensorFlow/Initializers.swift
@@ -449,7 +449,7 @@ public extension Tensor where Scalar: BinaryFloatingPoint,
     }
 }
 
-fileprivate extension Tensor where Scalar: BinaryFloatingPoint {
+fileprivate extension Tensor where Scalar: TensorFlowFloatingPoint {
     private static func glorot(
         fromStandardUniform randomUniform: __shared Tensor<Scalar>,
         shape: __shared TensorShape
@@ -459,7 +459,7 @@ fileprivate extension Tensor where Scalar: BinaryFloatingPoint {
         let fanIn = shape[shape.count - 2] * receptiveField
         let fanOut = shape[shape.count - 1] * receptiveField
         let minusOneToOne = 2 * randomUniform - 1
-        return sqrt(Scalar(6) / Scalar(fanIn + fanOut)) * minusOneToOne
+        return Scalar.sqrt(Scalar(6) / Scalar(fanIn + fanOut)) * minusOneToOne
     }
 }
 
@@ -483,8 +483,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
     }
 }
 
-public extension Tensor where Scalar: BinaryFloatingPoint,
-                              Scalar.RawSignificand: FixedWidthInteger {
+public extension Tensor where Scalar: TensorFlowFloatingPoint {
     /// Performs Glorot uniform initialization for the specified shape, creating a tensor by
     /// randomly sampling scalar values from a uniform distribution between `-limit` and `limit`,
     /// where limit is `sqrt(6 / (fanIn + fanOut))` and `fanIn`/`fanOut` represent the number of
diff --git a/Sources/TensorFlow/Layers/Recurrent.swift b/Sources/TensorFlow/Layers/Recurrent.swift
@@ -88,7 +88,7 @@ public struct SimpleRNNCell<Scalar: TensorFlowFloatingPoint>: RNNCell, VectorPro
     // TODO(TF-507): Revert to `typealias State = Tensor<Scalar>` after
     // SR-10697 is fixed.
     public struct State: Equatable, Differentiable, VectorProtocol, KeyPathIterable {
-        public let value: Tensor<Scalar>
+        public var value: Tensor<Scalar>
         public init(_ value: Tensor<Scalar>) {
             self.value = value
         }
diff --git a/Sources/TensorFlow/Loss.swift b/Sources/TensorFlow/Loss.swift
@@ -12,11 +12,35 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-/// Computes the mean squared error between predictions and labels.
+/// Returns the L1 loss between predictions and expectations.
 ///
 /// - Parameters:
 ///   - predicted: Predicted outputs from a neural network.
-///   - labels: Expected values, i.e. targets, that correspond to the correct output.
+///   - expected: Expected values, i.e. targets, that correspond to the correct output.
+@differentiable(wrt: predicted)
+public func l1Loss<Scalar: TensorFlowFloatingPoint>(
+    predicted: Tensor<Scalar>, expected: Tensor<Scalar>
+) -> Tensor<Scalar> {
+    return abs(expected - predicted).sum()
+}
+
+/// Returns the L2 loss between predictions and expectations.
+///
+/// - Parameters:
+///   - predicted: Predicted outputs from a neural network.
+///   - expected: Expected values, i.e. targets, that correspond to the correct output.
+@differentiable(wrt: predicted)
+public func l2Loss<Scalar: TensorFlowFloatingPoint>(
+    predicted: Tensor<Scalar>, expected: Tensor<Scalar>
+) -> Tensor<Scalar> {
+    return (expected - predicted).squared().sum()
+}
+
+/// Returns the mean squared error between predictions and expectations.
+///
+/// - Parameters:
+///   - predicted: Predicted outputs from a neural network.
+///   - expected: Expected values, i.e. targets, that correspond to the correct output.
 @differentiable(wrt: predicted)
 public func meanSquaredError<Scalar: TensorFlowFloatingPoint>(
     predicted: Tensor<Scalar>, expected: Tensor<Scalar>
@@ -41,7 +65,7 @@ public func meanSquaredLogarithmicError<Scalar: TensorFlowFloatingPoint>(
     return (logPredicted - logExpected).squared().mean()
 }
 
-/// Computes the mean absolute error between predictions and expectations.
+/// Returns the mean absolute error between predictions and expectations.
 ///
 /// - Parameters:
 ///   - predicted: Predicted outputs from a neural network.
@@ -53,6 +77,19 @@ public func meanAbsoluteError<Scalar: TensorFlowFloatingPoint>(
     return abs(expected - predicted).mean()
 }
 
+/// Returns the mean absolute percentage error between predictions and expectations.
+///
+/// - Parameters:
+///   - predicted: Predicted outputs from a neural network.
+///   - expected: Expected values, i.e. targets, that correspond to the correct output.
+@differentiable(wrt: predicted)
+public func meanAbsolutePercentageError<Scalar: TensorFlowFloatingPoint>(
+    predicted: Tensor<Scalar>, expected: Tensor<Scalar>
+) -> Tensor<Scalar> {
+    let diff = abs((expected - predicted) / abs(expected))
+    return 100 * diff.mean()
+}
+
 /// Returns the hinge loss between predictions and expectations.
 ///
 /// - Parameters:
@@ -65,6 +102,24 @@ public func hingeLoss<Scalar: TensorFlowFloatingPoint>(
     return max(Tensor(1) - expected * predicted, Tensor(0)).mean()
 }
 
+/// Returns the cosine similarity between predictions and expectations.
+///
+/// - Parameters:
+///   - predicted: Predicted outputs from a neural network.
+///   - expected: Expected values, i.e. targets, that correspond to the correct output.
+@differentiable(wrt: (predicted, expected))
+public func cosineSimilarity<Scalar: TensorFlowFloatingPoint>(
+    predicted: Tensor<Scalar>, expected: Tensor<Scalar>
+) -> Tensor<Scalar> {
+    return -(expected * predicted).sum() /
+        (sqrt(expected.squared().sum()) * sqrt(predicted.squared().sum()))
+}
+
+/// Returns the squared hinge loss between predictions and expectations.
+///
+/// - Parameters:
+///   - predicted: Predicted outputs from a neural network.
+///   - expected: Expected values, i.e. targets, that correspond to the correct output.
 @differentiable(wrt: predicted)
 public func squaredHingeLoss<Scalar: TensorFlowFloatingPoint>(
     predicted: Tensor<Scalar>, expected: Tensor<Scalar>
@@ -118,7 +173,20 @@ public func poissonLoss<Scalar: TensorFlowFloatingPoint>(
     return (predicted - expected * log(predicted)).mean()
 }
 
-/// Computes the softmax cross entropy (categorical cross entropy) between logits and labels.
+/// Returns the Kullback-Leibler divergence (KL divergence) between between expectations and predictions.
+/// Given two distributions `p` and `q`, KL divergence computes `(p * log(p / q)).sum()`.
+///
+/// - Parameters:
+///   - predicted: Predicted outputs from a neural network.
+///   - expected: Expected values, i.e. targets, that correspond to the correct output.
+@differentiable(wrt: predicted)
+public func kullbackLeiblerDivergence<Scalar: TensorFlowFloatingPoint>(
+    predicted: Tensor<Scalar>, expected: Tensor<Scalar>
+) -> Tensor<Scalar> {
+    return (expected * log(expected / predicted)).sum()
+}
+
+/// Returns the softmax cross entropy (categorical cross entropy) between logits and labels.
 ///
 /// - Parameters:
 ///   - logits: One-hot encoded outputs from a neural network.
@@ -139,7 +207,7 @@ func _vjpSoftmaxCrossEntropy<Scalar: TensorFlowFloatingPoint>(
     return (loss.mean(), { v in (v / batchSize) * grad })
 }
 
-/// Computes the softmax cross entropy (categorical cross entropy) between logits and labels.
+/// Returns the softmax cross entropy (categorical cross entropy) between logits and labels.
 ///
 /// - Parameters:
 ///   - logits: Unscaled log probabilities from a neural network.
@@ -161,7 +229,7 @@ func _vjpSoftmaxCrossEntropy<Scalar: TensorFlowFloatingPoint>(
     return (loss.mean(), { v in v / batchSize * grad })
 }
 
-/// Computes the sigmoid cross entropy (binary cross entropy) between logits and labels.
+/// Returns the sigmoid cross entropy (binary cross entropy) between logits and labels.
 ///
 /// The reduction is reduced over all elements. If reduced over batch size is intended, please
 /// consider to scale the loss.
diff --git a/Sources/TensorFlow/Operators/Math.swift b/Sources/TensorFlow/Operators/Math.swift
@@ -15,18 +15,148 @@
 infix operator .>: ComparisonPrecedence
 infix operator .==: ComparisonPrecedence
 
-// `pow` is defined in Darwin/Glibc on `Float` and `Double`, but there doesn't exist a generic
-// version for `FloatingPoint`.
-// This is a manual definition.
-@inlinable
-func pow<T: BinaryFloatingPoint>(_ x: T, _ y: T) -> T {
-    T(pow(Double(x), Double(y)))
-}
-
 // TODO:
 // - Consider explicit broadcasting for elementwise binary ops when
 //   scalarization and rank getter are implemented.
 
+//===------------------------------------------------------------------------------------------===//
+// Generic elementary functions
+//===------------------------------------------------------------------------------------------===//
+
+extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint {
+    /// The square root of `x`.
+    ///
+    /// For real types, if `x` is negative the result is `.nan`. For complex
+    /// types there is a branch cut on the negative real axis.
+    public static func sqrt(_ x: Self) -> Self {
+        TensorFlow.sqrt(x)
+    }
+
+    /// The cosine of `x`, interpreted as an angle in radians.
+    public static func cos(_ x: Self) -> Self {
+        TensorFlow.cos(x)
+    }
+
+    /// The sine of `x`, interpreted as an angle in radians.
+    public static func sin(_ x: Self) -> Self {
+        TensorFlow.sin(x)
+    }
+
+    /// The tangent of `x`, interpreted as an angle in radians.
+    public static func tan(_ x: Self) -> Self {
+        TensorFlow.tan(x)
+    }
+
+    /// The inverse cosine of `x` in radians.
+    public static func acos(_ x: Self) -> Self {
+        TensorFlow.acos(x)
+    }
+
+    /// The inverse sine of `x` in radians.
+    public static func asin(_ x: Self) -> Self {
+        TensorFlow.asin(x)
+    }
+
+    /// The inverse tangent of `x` in radians.
+    public static func atan(_ x: Self) -> Self {
+        TensorFlow.atan(x)
+    }
+
+    /// The hyperbolic cosine of `x`.
+    public static func cosh(_ x: Self) -> Self {
+        TensorFlow.cosh(x)
+    }
+
+    /// The hyperbolic sine of `x`.
+    public static func sinh(_ x: Self) -> Self {
+        TensorFlow.sinh(x)
+    }
+
+    /// The hyperbolic tangent of `x`.
+    public static func tanh(_ x: Self) -> Self {
+        TensorFlow.tanh(x)
+    }
+
+    /// The inverse hyperbolic cosine of `x`.
+    public static func acosh(_ x: Self) -> Self {
+        TensorFlow.acosh(x)
+    }
+
+    /// The inverse hyperbolic sine of `x`.
+    public static func asinh(_ x: Self) -> Self {
+        TensorFlow.asinh(x)
+    }
+
+    /// The inverse hyperbolic tangent of `x`.
+    public static func atanh(_ x: Self) -> Self {
+        TensorFlow.atanh(x)
+    }
+
+    /// The exponential function applied to `x`, or `e**x`.
+    public static func exp(_ x: Self) -> Self {
+        TensorFlow.exp(x)
+    }
+
+    /// Two raised to to power `x`.
+    public static func exp2(_ x: Self) -> Self {
+        TensorFlow.exp2(x)
+    }
+
+    /// Ten raised to to power `x`.
+    public static func exp10(_ x: Self) -> Self {
+        TensorFlow.exp10(x)
+    }
+
+    /// `exp(x) - 1` evaluated so as to preserve accuracy close to zero.
+    public static func expm1(_ x: Self) -> Self {
+        TensorFlow.expm1(x)
+    }
+
+    /// The natural logarithm of `x`.
+    public static func log(_ x: Self) -> Self {
+        TensorFlow.log(x)
+    }
+
+    /// The base-two logarithm of `x`.
+    public static func log2(_ x: Self) -> Self {
+        TensorFlow.log2(x)
+    }
+
+    /// The base-ten logarithm of `x`.
+    public static func log10(_ x: Self) -> Self {
+        TensorFlow.log10(x)
+    }
+
+    /// `log(1 + x)` evaluated so as to preserve accuracy close to zero.
+    public static func log1p(_ x: Self) -> Self {
+        TensorFlow.log1p(x)
+    }
+
+    /// `exp(y log(x))` computed without loss of intermediate precision.
+    ///
+    /// For real types, if `x` is negative the result is NaN, even if `y` has
+    /// an integral value. For complex types, there is a branch cut on the
+    /// negative real axis.
+    public static func pow(_ x: Self, _ y: Self) -> Self {
+        TensorFlow.pow(x, y)
+    }
+
+    /// `x` raised to the `n`th power.
+    ///
+    /// The product of `n` copies of `x`.
+    public static func pow(_ x: Self, _ n: Int) -> Self {
+        TensorFlow.pow(x, n)
+    }
+
+    /// The `n`th root of `x`.
+    ///
+    /// For real types, if `x` is negative and `n` is even, the result is NaN.
+    /// For complex types, there is a branch cut along the negative real axis.
+    public static func root(_ x: Self, _ n: Int) -> Self {
+        TensorFlow.root(x, n)
+    }
+}
+
 //===------------------------------------------------------------------------------------------===//
 // Vector Space
 //===------------------------------------------------------------------------------------------===//
@@ -876,7 +1006,7 @@ public func pow<T: TensorFlowFloatingPoint>(_ x: Tensor<T>, _ n: Int) -> Tensor<
 @inlinable
 // @differentiable
 public func root<T: TensorFlowFloatingPoint>(_ x: Tensor<T>, _ n: Int) -> Tensor<T> {
-    pow(x, Tensor(T(1) / T(n)))
+    sign(x) * pow(abs(x), Tensor(T(1) / T(n)))
 }
 
 /// Computes the element-wise maximum of two tensors.
@@ -1580,7 +1710,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
     @inlinable
     @differentiable(wrt: self)
     func standardDeviation(squeezingAxes axes: Tensor<Int32>) -> Tensor {
-        sqrt(variance(squeezingAxes: axes))
+        TensorFlow.sqrt(variance(squeezingAxes: axes))
     }
 
     /// Returns the standard deviation of the elements along the specified axes. The reduced
@@ -1591,7 +1721,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
     @inlinable
     @differentiable(wrt: self)
     func standardDeviation(squeezingAxes axes: [Int]) -> Tensor {
-        sqrt(variance(squeezingAxes: axes))
+        TensorFlow.sqrt(variance(squeezingAxes: axes))
     }
 
     /// Returns the standard deviation of the elements along the specified axes. The reduced
@@ -1625,7 +1755,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
     @inlinable
     @differentiable(wrt: self)
     func standardDeviation(alongAxes axes: Tensor<Int32>) -> Tensor {
-        sqrt(variance(alongAxes: axes))
+        TensorFlow.sqrt(variance(alongAxes: axes))
     }
 
     /// Returns the standard deviation of the elements along the specified axes. The reduced
@@ -1649,7 +1779,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
     @inlinable
     @differentiable(wrt: self)
     func standardDeviation(alongAxes axes: Int...) -> Tensor {
-        sqrt(variance(alongAxes: axes))
+        TensorFlow.sqrt(variance(alongAxes: axes))
     }
 }
 
diff --git a/Sources/TensorFlow/Operators/NN.swift b/Sources/TensorFlow/Operators/NN.swift
@@ -61,7 +61,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
             let norm = diff * inv
 
             let dNorm = v * scale
-            let dVariance = -(dNorm * diff).sum(alongAxes: axis) / 2 * pow(inv, -3)
+            let dVariance = -(dNorm * diff).sum(alongAxes: axis) / 2 * TensorFlow.pow(inv, -3)
             // Note: `dMean` is split into two lines to avoid the "compiler is unable to type-check
             // this expression in reasonable time" error.
             var dMean = (-dNorm * inv).sum(alongAxes: axis)
diff --git a/Sources/TensorFlow/Optimizer.swift b/Sources/TensorFlow/Optimizer.swift
@@ -291,7 +291,7 @@ public class RiemannSGD<Model: Layer, Scalar: FloatingPoint>: Optimizer
 
     public func update(_ model: inout Model.AllDifferentiableVariables,
                        along direction: Model.TangentVector) {
-        model = model.moved(along: learningRate * (.zero - direction))
+        model.move(along: learningRate * (.zero - direction))
     }
 }
 
diff --git a/Tests/TensorFlowTests/Helpers.swift b/Tests/TensorFlowTests/Helpers.swift
@@ -21,7 +21,9 @@ internal func assertEqual<T: TensorFlowFloatingPoint>(
 ) {
     for (x, y) in zip(x, y) {
         if x.isNaN || y.isNaN {
-            XCTAssertTrue(x.isNaN && y.isNaN, message, file: file, line: line)
+            XCTAssertTrue(x.isNaN && y.isNaN,
+                          "\(x) is not equal to \(y) - \(message)",
+                          file: file, line: line)
             continue
         }
         XCTAssertEqual(x, y, accuracy: accuracy, message, file: file, line: line)
diff --git a/Tests/TensorFlowTests/LossTests.swift b/Tests/TensorFlowTests/LossTests.swift
diff --git a/Tests/TensorFlowTests/OperatorTests/BasicTests.swift b/Tests/TensorFlowTests/OperatorTests/BasicTests.swift
diff --git a/Tests/TensorFlowTests/TensorAutoDiffTests.swift b/Tests/TensorFlowTests/TensorAutoDiffTests.swift

Original file line number	Diff line number	Diff line change
`@@ -449,7 +449,7 @@ public extension Tensor where Scalar: BinaryFloatingPoint,`
`449`	`449`	`}`
`450`	`450`	`}`
`451`	`451`
`452`		`-fileprivate extension Tensor where Scalar: BinaryFloatingPoint {`
	`452`	`+fileprivate extension Tensor where Scalar: TensorFlowFloatingPoint {`
`453`	`453`	`private static func glorot(`
`454`	`454`	`fromStandardUniform randomUniform: __shared Tensor<Scalar>,`
`455`	`455`	`shape: __shared TensorShape`
`@@ -459,7 +459,7 @@ fileprivate extension Tensor where Scalar: BinaryFloatingPoint {`
`459`	`459`	`let fanIn = shape[shape.count - 2] * receptiveField`
`460`	`460`	`let fanOut = shape[shape.count - 1] * receptiveField`
`461`	`461`	`let minusOneToOne = 2 * randomUniform - 1`
`462`		`- return sqrt(Scalar(6) / Scalar(fanIn + fanOut)) * minusOneToOne`
	`462`	`+ return Scalar.sqrt(Scalar(6) / Scalar(fanIn + fanOut)) * minusOneToOne`
`463`	`463`	`}`
`464`	`464`	`}`
`465`	`465`
`@@ -483,8 +483,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {`
`483`	`483`	`}`
`484`	`484`	`}`
`485`	`485`
`486`		`-public extension Tensor where Scalar: BinaryFloatingPoint,`
`487`		`- Scalar.RawSignificand: FixedWidthInteger {`
	`486`	`+public extension Tensor where Scalar: TensorFlowFloatingPoint {`
`488`	`487`	`/// Performs Glorot uniform initialization for the specified shape, creating a tensor by`
`489`	`488`	/// randomly sampling scalar values from a uniform distribution between `-limit` and `limit`,
`490`	`489`	/// where limit is `sqrt(6 / (fanIn + fanOut))` and `fanIn`/`fanOut` represent the number of
Original file line number	Diff line number	Diff line change
`@@ -88,7 +88,7 @@ public struct SimpleRNNCell<Scalar: TensorFlowFloatingPoint>: RNNCell, VectorPro`
`88`	`88`	// TODO(TF-507): Revert to `typealias State = Tensor<Scalar>` after
`89`	`89`	`// SR-10697 is fixed.`
`90`	`90`	`public struct State: Equatable, Differentiable, VectorProtocol, KeyPathIterable {`
`91`		`- public let value: Tensor<Scalar>`
	`91`	`+ public var value: Tensor<Scalar>`
`92`	`92`	`public init(_ value: Tensor<Scalar>) {`
`93`	`93`	`self.value = value`
`94`	`94`	`}`
Original file line number	Diff line number	Diff line change
`@@ -291,7 +291,7 @@ public class RiemannSGD<Model: Layer, Scalar: FloatingPoint>: Optimizer`
`291`	`291`
`292`	`292`	`public func update(_ model: inout Model.AllDifferentiableVariables,`
`293`	`293`	`along direction: Model.TangentVector) {`
`294`		`- model = model.moved(along: learningRate * (.zero - direction))`
	`294`	`+ model.move(along: learningRate * (.zero - direction))`
`295`	`295`	`}`
`296`	`296`	`}`
`297`	`297`