Skip to content
This repository was archived by the owner on Apr 23, 2025. It is now read-only.

Removing softmax as terminal activation function from two models #232

Merged
merged 1 commit into from
Dec 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions Benchmarks/Models/ImageClassificationInference.swift
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ extension LeNet: ImageClassificationModel {}

class ImageClassificationInference<Model, ClassificationDataset>: Benchmark
where Model: ImageClassificationModel, ClassificationDataset: ImageClassificationDataset {
// TODO: (https://github.com/tensorflow/swift-models/issues/206) Datasets should have a common
// interface to allow for them to be interchangeable in these benchmark cases.
let dataset: ClassificationDataset

var model: Model
Expand Down
3 changes: 0 additions & 3 deletions Benchmarks/Models/ImageClassificationTraining.swift
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,7 @@ where
Model: ImageClassificationModel, Model.TangentVector.VectorSpaceScalar == Float,
ClassificationDataset: ImageClassificationDataset
{
// TODO: (https://github.com/tensorflow/swift-models/issues/206) Datasets should have a common
// interface to allow for them to be interchangeable in these benchmark cases.
let dataset: ClassificationDataset

let epochs: Int
let batchSize: Int

Expand Down
9 changes: 6 additions & 3 deletions Examples/LeNet-MNIST/main.swift
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ var classifier = Sequential {
Flatten<Float>()
Dense<Float>(inputSize: 400, outputSize: 120, activation: relu)
Dense<Float>(inputSize: 120, outputSize: 84, activation: relu)
Dense<Float>(inputSize: 84, outputSize: 10, activation: softmax)
Dense<Float>(inputSize: 84, outputSize: 10)
}

let optimizer = SGD(for: classifier, learningRate: 0.1)
Expand All @@ -39,6 +39,7 @@ struct Statistics {
var correctGuessCount: Int = 0
var totalGuessCount: Int = 0
var totalLoss: Float = 0
var batches: Int = 0
}

let testBatches = dataset.testDataset.batched(batchSize)
Expand All @@ -62,6 +63,7 @@ for epoch in 1...epochCount {
trainStats.totalGuessCount += batchSize
let loss = softmaxCrossEntropy(logits: ŷ, labels: labels)
trainStats.totalLoss += loss.scalarized()
trainStats.batches += 1
return loss
}
// Update the model's differentiable variables along the gradient vector.
Expand All @@ -78,17 +80,18 @@ for epoch in 1...epochCount {
testStats.totalGuessCount += batchSize
let loss = softmaxCrossEntropy(logits: ŷ, labels: labels)
testStats.totalLoss += loss.scalarized()
testStats.batches += 1
}

let trainAccuracy = Float(trainStats.correctGuessCount) / Float(trainStats.totalGuessCount)
let testAccuracy = Float(testStats.correctGuessCount) / Float(testStats.totalGuessCount)
print(
"""
[Epoch \(epoch)] \
Training Loss: \(trainStats.totalLoss), \
Training Loss: \(trainStats.totalLoss / Float(trainStats.batches)), \
Training Accuracy: \(trainStats.correctGuessCount)/\(trainStats.totalGuessCount) \
(\(trainAccuracy)), \
Test Loss: \(testStats.totalLoss), \
Test Loss: \(testStats.totalLoss / Float(testStats.batches)), \
Test Accuracy: \(testStats.correctGuessCount)/\(testStats.totalGuessCount) \
(\(testAccuracy))
""")
Expand Down
2 changes: 1 addition & 1 deletion Models/ImageClassification/DenseNet121.swift
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public struct DenseNet121: Layer {
public var dense: Dense<Float>

public init(classCount: Int) {
dense = Dense(inputSize: 1024, outputSize: classCount, activation: softmax)
dense = Dense(inputSize: 1024, outputSize: classCount)
}

@differentiable
Expand Down
2 changes: 1 addition & 1 deletion Models/ImageClassification/LeNet-5.swift
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public struct LeNet: Layer {
public var flatten = Flatten<Float>()
public var fc1 = Dense<Float>(inputSize: 400, outputSize: 120, activation: relu)
public var fc2 = Dense<Float>(inputSize: 120, outputSize: 84, activation: relu)
public var fc3 = Dense<Float>(inputSize: 84, outputSize: 10, activation: softmax)
public var fc3 = Dense<Float>(inputSize: 84, outputSize: 10)

public init() {}

Expand Down