Skip to content
This repository was archived by the owner on Apr 23, 2025. It is now read-only.

Commit 14e694d

Browse files
authored
Removing softmax from last layers of two models, improving loss display for LeNet example. (#232)
1 parent ab7fcf8 commit 14e694d

File tree

5 files changed

+8
-10
lines changed

5 files changed

+8
-10
lines changed

Benchmarks/Models/ImageClassificationInference.swift

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@ extension LeNet: ImageClassificationModel {}
2424

2525
class ImageClassificationInference<Model, ClassificationDataset>: Benchmark
2626
where Model: ImageClassificationModel, ClassificationDataset: ImageClassificationDataset {
27-
// TODO: (https://github.com/tensorflow/swift-models/issues/206) Datasets should have a common
28-
// interface to allow for them to be interchangeable in these benchmark cases.
2927
let dataset: ClassificationDataset
3028

3129
var model: Model

Benchmarks/Models/ImageClassificationTraining.swift

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,7 @@ where
2020
Model: ImageClassificationModel, Model.TangentVector.VectorSpaceScalar == Float,
2121
ClassificationDataset: ImageClassificationDataset
2222
{
23-
// TODO: (https://github.com/tensorflow/swift-models/issues/206) Datasets should have a common
24-
// interface to allow for them to be interchangeable in these benchmark cases.
2523
let dataset: ClassificationDataset
26-
2724
let epochs: Int
2825
let batchSize: Int
2926

Examples/LeNet-MNIST/main.swift

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ var classifier = Sequential {
2828
Flatten<Float>()
2929
Dense<Float>(inputSize: 400, outputSize: 120, activation: relu)
3030
Dense<Float>(inputSize: 120, outputSize: 84, activation: relu)
31-
Dense<Float>(inputSize: 84, outputSize: 10, activation: softmax)
31+
Dense<Float>(inputSize: 84, outputSize: 10)
3232
}
3333

3434
let optimizer = SGD(for: classifier, learningRate: 0.1)
@@ -39,6 +39,7 @@ struct Statistics {
3939
var correctGuessCount: Int = 0
4040
var totalGuessCount: Int = 0
4141
var totalLoss: Float = 0
42+
var batches: Int = 0
4243
}
4344

4445
let testBatches = dataset.testDataset.batched(batchSize)
@@ -62,6 +63,7 @@ for epoch in 1...epochCount {
6263
trainStats.totalGuessCount += batchSize
6364
let loss = softmaxCrossEntropy(logits: ŷ, labels: labels)
6465
trainStats.totalLoss += loss.scalarized()
66+
trainStats.batches += 1
6567
return loss
6668
}
6769
// Update the model's differentiable variables along the gradient vector.
@@ -78,17 +80,18 @@ for epoch in 1...epochCount {
7880
testStats.totalGuessCount += batchSize
7981
let loss = softmaxCrossEntropy(logits: ŷ, labels: labels)
8082
testStats.totalLoss += loss.scalarized()
83+
testStats.batches += 1
8184
}
8285

8386
let trainAccuracy = Float(trainStats.correctGuessCount) / Float(trainStats.totalGuessCount)
8487
let testAccuracy = Float(testStats.correctGuessCount) / Float(testStats.totalGuessCount)
8588
print(
8689
"""
8790
[Epoch \(epoch)] \
88-
Training Loss: \(trainStats.totalLoss), \
91+
Training Loss: \(trainStats.totalLoss / Float(trainStats.batches)), \
8992
Training Accuracy: \(trainStats.correctGuessCount)/\(trainStats.totalGuessCount) \
9093
(\(trainAccuracy)), \
91-
Test Loss: \(testStats.totalLoss), \
94+
Test Loss: \(testStats.totalLoss / Float(testStats.batches)), \
9295
Test Accuracy: \(testStats.correctGuessCount)/\(testStats.totalGuessCount) \
9396
(\(testAccuracy))
9497
""")

Models/ImageClassification/DenseNet121.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ public struct DenseNet121: Layer {
4242
public var dense: Dense<Float>
4343

4444
public init(classCount: Int) {
45-
dense = Dense(inputSize: 1024, outputSize: classCount, activation: softmax)
45+
dense = Dense(inputSize: 1024, outputSize: classCount)
4646
}
4747

4848
@differentiable

Models/ImageClassification/LeNet-5.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ public struct LeNet: Layer {
3030
public var flatten = Flatten<Float>()
3131
public var fc1 = Dense<Float>(inputSize: 400, outputSize: 120, activation: relu)
3232
public var fc2 = Dense<Float>(inputSize: 120, outputSize: 84, activation: relu)
33-
public var fc3 = Dense<Float>(inputSize: 84, outputSize: 10, activation: softmax)
33+
public var fc3 = Dense<Float>(inputSize: 84, outputSize: 10)
3434

3535
public init() {}
3636

0 commit comments

Comments
 (0)