tensorflow · BradLarson · Feb 5, 2020 · Dec 12, 2019 · Dec 13, 2019 · Dec 13, 2019
diff --git a/Sources/TensorFlow/Loss.swift b/Sources/TensorFlow/Loss.swift
@@ -13,6 +13,8 @@
 // limitations under the License.
 
 /// Returns the L1 loss between predictions and expectations.
+/// Given `y_pred` and `y_true` vectors, the L1 loss is computed as follows:
+///  `reduction(abs(y_pred - y_true))`
 ///
 /// - Parameters:
 ///   - predicted: Predicted outputs from a neural network.
@@ -28,6 +30,8 @@ public func l1Loss<Scalar: TensorFlowFloatingPoint>(
 }
 
 /// Returns the L2 loss between predictions and expectations.
+/// Given`y_pred` and `y_true`, the L2 loss is computed as follows:
+///  `reduction((y_pred - y_true)^2)`
 ///
 /// - Parameters:
 ///   - predicted: Predicted outputs from a neural network.
@@ -43,6 +47,7 @@ public func l2Loss<Scalar: TensorFlowFloatingPoint>(
 }
 
 /// Returns the mean absolute error between predictions and expectations.
+/// Applies the mean reduction to the l1 loss
 ///
 /// - Parameters:
 ///   - predicted: Predicted outputs from a neural network.
@@ -56,6 +61,7 @@ public func meanAbsoluteError<Scalar: TensorFlowFloatingPoint>(
 }
 
 /// Returns the mean squared error between predictions and expectations.
+/// Applies the mean reduction to the l2 loss
 ///
 /// - Parameters:
 ///   - predicted: Predicted outputs from a neural network.
@@ -69,6 +75,8 @@ public func meanSquaredError<Scalar: TensorFlowFloatingPoint>(
 }
 
 /// Returns the mean squared logarithmic error between predictions and expectations.
+/// Given the `y_pred` and `y_true`, the mean squared logarithmic error is:
+///  `mean((log(y_pred) - log(y_true))^2)`
 ///
 /// - Note: Negative tensor entries will be clamped at `0` to avoid undefined
 ///   logarithmic behavior, as `log(_:)` is undefined for negative reals.
@@ -87,6 +95,8 @@ public func meanSquaredLogarithmicError<Scalar: TensorFlowFloatingPoint>(
 }
 
 /// Returns the mean absolute percentage error between predictions and expectations.
+/// Given `y_pred` and `y_true` the mean absolute percentage error is:
+///  `100 * mean(abs((y_true - y_pred) / abs(y_true)))`
 ///
 /// - Parameters:
 ///   - predicted: Predicted outputs from a neural network.
@@ -100,6 +110,8 @@ public func meanAbsolutePercentageError<Scalar: TensorFlowFloatingPoint>(
 }
 
 /// Returns the hinge loss between predictions and expectations.
+/// Given the `y_pred` and `y_true`, the Hinge loss is computed as follows:
+///  `reduction(max(0, 1 - y_pred * y_true))` 
 ///
 /// - Parameters:
 ///   - predicted: Predicted outputs from a neural network.
@@ -115,6 +127,8 @@ public func hingeLoss<Scalar: TensorFlowFloatingPoint>(
 }
 
 /// Returns the squared hinge loss between predictions and expectations.
+/// Given the `y_pred` and `y_true`, the Hinge loss is computed as follows:
+///  `reduction(max(0, 1 - y_pred * y_true)^2)` 
 ///
 /// - Parameters:
 ///   - predicted: Predicted outputs from a neural network.
@@ -164,6 +178,8 @@ public func logCoshLoss<Scalar: TensorFlowFloatingPoint>(
 }
 
 /// Returns the Poisson loss between predictions and expectations.
+/// Given `y_pred` and `y_true`, the Poisson loss is computed as follows:
+///  `reduction(y_pred - y_true * log(y_pred))`
 ///
 /// - Parameters:
 ///   - predicted: Predicted outputs from a neural network.
@@ -179,7 +195,9 @@ public func poissonLoss<Scalar: TensorFlowFloatingPoint>(
 }
 
 /// Returns the Kullback-Leibler divergence (KL divergence) between between expectations and
-/// predictions. Given two distributions `p` and `q`, KL divergence computes `p * log(p / q)`.
+/// predictions. 
+/// Given two distributions `y_pred` and `y_true`, KL divergence is computed as follows:
+/// `reduction(y_true * log(y_true / y_pred))`
 ///
 /// - Parameters:
 ///   - predicted: Predicted outputs from a neural network.
@@ -195,7 +213,7 @@ public func kullbackLeiblerDivergence<Scalar: TensorFlowFloatingPoint>(
 }
 
 /// Returns the softmax cross entropy (categorical cross entropy) between logits and labels.
-///
+/// 
 /// - Parameters:
 ///   - logits: One-hot encoded outputs from a neural network.
 ///   - labels: Indices (zero-indexed) of the correct outputs.
@@ -228,6 +246,8 @@ func _vjpSoftmaxCrossEntropyHelper<Scalar: TensorFlowFloatingPoint>(
 }
 
 /// Returns the softmax cross entropy (categorical cross entropy) between logits and labels.
+/// Given the logits and probabilites, the softmax cross entropy computes `reduction(-softmax(logits) * log(p))`
+/// Where softmax(x) = `exp(x)/sum(exp(x))`
 ///
 /// - Parameters:
 ///   - logits: Unscaled log probabilities from a neural network.
@@ -262,6 +282,8 @@ func _vjpSoftmaxCrossEntropyHelper<Scalar: TensorFlowFloatingPoint>(
 }
 
 /// Returns the sigmoid cross entropy (binary cross entropy) between logits and labels.
+/// Given the logits and probabilites, the sigmoid cross entropy computes `reduction(-sigmoid(logits) * log(p))`
+/// Where sigmoid(x) = `1/(1 + exp(-x))`
 ///
 /// The reduction is reduced over all elements. If reduced over batch size is intended, please
 /// consider to scale the loss.