Skip to content
This repository was archived by the owner on Jul 1, 2023. It is now read-only.

[WIP] Equations for losses #579

Merged
merged 23 commits into from
Feb 5, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 45 additions & 23 deletions Sources/TensorFlow/Loss.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.

/// Returns the L1 loss between predictions and expectations.
/// Computes the L1 loss between `expected` and `predicted`.
/// `loss = reduction(abs(expected - predicted))`
///
/// - Parameters:
/// - predicted: Predicted outputs from a neural network.
Expand All @@ -27,7 +28,8 @@ public func l1Loss<Scalar: TensorFlowFloatingPoint>(
reduction(abs(expected - predicted))
}

/// Returns the L2 loss between predictions and expectations.
/// Computes the L2 loss between `expected` and `predicted`.
/// `loss = reduction(square(expected - predicted))`
///
/// - Parameters:
/// - predicted: Predicted outputs from a neural network.
Expand All @@ -42,7 +44,8 @@ public func l2Loss<Scalar: TensorFlowFloatingPoint>(
reduction((expected - predicted).squared())
}

/// Returns the mean absolute error between predictions and expectations.
/// Computes the mean of absolute difference between labels and predictions.
/// `loss = mean(abs(expected - predicted))`
///
/// - Parameters:
/// - predicted: Predicted outputs from a neural network.
Expand All @@ -55,7 +58,8 @@ public func meanAbsoluteError<Scalar: TensorFlowFloatingPoint>(
l1Loss(predicted: predicted, expected: expected, reduction: _mean)
}

/// Returns the mean squared error between predictions and expectations.
/// Computes the mean of squares of errors between labels and predictions.
/// `loss = mean(square(expected - predicted))`
///
/// - Parameters:
/// - predicted: Predicted outputs from a neural network.
Expand All @@ -68,7 +72,8 @@ public func meanSquaredError<Scalar: TensorFlowFloatingPoint>(
l2Loss(predicted: predicted, expected: expected, reduction: _mean)
}

/// Returns the mean squared logarithmic error between predictions and expectations.
/// Computes the mean squared logarithmic error between `predicted` and `expected`
/// `loss = square(log(expected) - log(predicted))`
///
/// - Note: Negative tensor entries will be clamped at `0` to avoid undefined
/// logarithmic behavior, as `log(_:)` is undefined for negative reals.
Expand All @@ -86,7 +91,8 @@ public func meanSquaredLogarithmicError<Scalar: TensorFlowFloatingPoint>(
return l2Loss(predicted: logPredicted, expected: logExpected, reduction: _mean)
}

/// Returns the mean absolute percentage error between predictions and expectations.
/// Computes the mean absolute percentage error between `predicted` and `expected`.
/// `loss = 100 * mean(abs((expected - predicted) / abs(expected)))`
///
/// - Parameters:
/// - predicted: Predicted outputs from a neural network.
Expand All @@ -99,7 +105,9 @@ public func meanAbsolutePercentageError<Scalar: TensorFlowFloatingPoint>(
100 * abs((expected - predicted) / abs(expected)).mean()
}

/// Returns the hinge loss between predictions and expectations.
/// Computes the hinge loss between `predicted` and `expected`.
/// `loss = reduction(max(0, 1 - predicted * expected))`
/// `expected` values are expected to be -1 or 1.
///
/// - Parameters:
/// - predicted: Predicted outputs from a neural network.
Expand All @@ -114,7 +122,9 @@ public func hingeLoss<Scalar: TensorFlowFloatingPoint>(
reduction(max(Tensor(0), Tensor(1) - expected * predicted))
}

/// Returns the squared hinge loss between predictions and expectations.
/// Computes the squared hinge loss between `predicted` and `expected`.
/// `loss = reduction(square(max(0, 1 - predicted * expected)))`
/// `expected` values are expected to be -1 or 1.
///
/// - Parameters:
/// - predicted: Predicted outputs from a neural network.
Expand All @@ -129,7 +139,10 @@ public func squaredHingeLoss<Scalar: TensorFlowFloatingPoint>(
reduction(hingeLoss(predicted: predicted, expected: expected).squared())
}

/// Returns the hinge loss between predictions and expectations.
/// Computes the categorical hinge loss between `predicted` and `expected`.
/// `loss = maximum(negative - positive + 1, 0)`
/// where `negative = max((1 - expected) * predicted)` and
/// `positive = sum(predicted * expected)`
///
/// - Parameters:
/// - predicted: Predicted outputs from a neural network.
Expand All @@ -146,8 +159,9 @@ public func categoricalHingeLoss<Scalar: TensorFlowFloatingPoint>(
return reduction(max(Tensor(0), negative - positive + Tensor(1)))
}

/// Returns the logarithm of the hyperbolic cosine of the error between predictions and
/// expectations.
/// Computes the logarithm of the hyperbolic cosine of the prediction error.
/// `logcosh = log((exp(x) + exp(-x))/2)`,
/// where x is the error `predicted - expected`
///
/// - Parameters:
/// - predicted: Predicted outputs from a neural network.
Expand All @@ -163,7 +177,9 @@ public func logCoshLoss<Scalar: TensorFlowFloatingPoint>(
return reduction(x + softplus(Tensor(-2) * x) - log(Tensor(2)))
}

/// Returns the Poisson loss between predictions and expectations.
/// Computes the Poisson loss between predicted and expected
/// The Poisson loss is the mean of the elements of the `Tensor`
/// `predicted - expected * log(predicted)`.
///
/// - Parameters:
/// - predicted: Predicted outputs from a neural network.
Expand All @@ -178,8 +194,8 @@ public func poissonLoss<Scalar: TensorFlowFloatingPoint>(
reduction(predicted - expected * log(predicted))
}

/// Returns the Kullback-Leibler divergence (KL divergence) between between expectations and
/// predictions. Given two distributions `p` and `q`, KL divergence computes `p * log(p / q)`.
/// Computes Kullback-Leibler divergence loss between `expected` and `predicted`.
/// `loss = reduction(expected * log(expected / predicted))`
///
/// - Parameters:
/// - predicted: Predicted outputs from a neural network.
Expand All @@ -194,7 +210,10 @@ public func kullbackLeiblerDivergence<Scalar: TensorFlowFloatingPoint>(
reduction(expected * log(expected / predicted))
}

/// Returns the softmax cross entropy (categorical cross entropy) between logits and labels.
/// Computes the sparse softmax cross entropy (categorical cross entropy) between logits and labels.
/// Use this crossentropy loss function when there are two or more label classes.
/// We expect labels to be provided as integers. There should be `# classes`
/// floating point values per feature for `logits` and a single floating point value per feature for `expected`.
///
/// - Parameters:
/// - logits: One-hot encoded outputs from a neural network.
Expand Down Expand Up @@ -228,7 +247,10 @@ func _vjpSoftmaxCrossEntropyHelper<Scalar: TensorFlowFloatingPoint>(
return (loss, { $0.expandingShape(at: -1) * grad })
}

/// Returns the softmax cross entropy (categorical cross entropy) between logits and labels.
/// Computes the sparse softmax cross entropy (categorical cross entropy) between logits and labels.
/// Use this crossentropy loss function when there are two or more label classes.
/// We expect labels to be provided provided in a `one_hot` representation.
/// There should be `# classes` floating point values per feature.
///
/// - Parameters:
/// - logits: Unscaled log probabilities from a neural network.
Expand Down Expand Up @@ -263,10 +285,10 @@ func _vjpSoftmaxCrossEntropyHelper<Scalar: TensorFlowFloatingPoint>(
return (loss, { $0.expandingShape(at: -1) * grad })
}

/// Returns the sigmoid cross entropy (binary cross entropy) between logits and labels.
///
/// The reduction is reduced over all elements. If reduced over batch size is intended, please
/// consider to scale the loss.
/// Computes the sigmoid cross entropy (binary cross entropy) between logits and labels.
/// Use this cross-entropy loss when there are only two label classes (assumed to
/// be 0 and 1). For each example, there should be a single floating-point value
/// per prediction.
///
/// - Parameters:
/// - logits: The unscaled output of a neural network.
Expand All @@ -284,10 +306,10 @@ public func sigmoidCrossEntropy<Scalar: TensorFlowFloatingPoint>(
return reduction(maxLogitsWithZero - logits * labels + log1p(exp(-negAbsLogits)))
}

/// Returns the Huber loss between predictions and expectations.
/// Computes the Huber loss between `predicted` and `expected`.
///
/// For each value `x` in the difference `expected - predicted`, the loss is:
/// - `0.5 * x^2` if `abs(x) <= δ`.
/// For each value `x` in `error = expected - predicted`:
/// - `0.5 * x^2` if `|x| <= δ`.
/// - `0.5 * δ^2 + δ * (|x| - δ)` otherwise.
///
/// - Source: [Uncyclopedia article](https://en.wikipedia.org/wiki/Huber_loss).
Expand Down