Skip to content
This repository was archived by the owner on Apr 23, 2025. It is now read-only.

Load config variable from hparams.json file so that Transformer can… #154

Merged
merged 1 commit into from
May 9, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 38 additions & 34 deletions Transformer/PythonCheckpointReader.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,32 +14,22 @@

import TensorFlow

struct Config {
struct Config : Codable {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The space before : is unnecessary according to the Google Swift Style Guide.

let vocabSize: Int
let contextSize: Int
let embeddingSize: Int
let headCount: Int
let layerCount: Int
}

extension Config {
init(dictionary: [String: Int]) {
vocabSize = dictionary["n_vocab"]!
contextSize = dictionary["n_ctx"]!
embeddingSize = dictionary["n_embd"]!
headCount = dictionary["n_head"]!
layerCount = dictionary["n_layer"]!
enum CodingKeys: String, CodingKey {
case vocabSize = "n_vocab"
case contextSize = "n_ctx"
case embeddingSize = "n_embd"
case headCount = "n_head"
case layerCount = "n_layer"
}
}

let config = Config(dictionary: [
"n_vocab": 50257,
"n_ctx": 1024,
"n_embd": 768,
"n_head": 12,
"n_layer": 12
])

func readTensor<Scalar: TensorFlowScalar>(
fromPath path: String,
name: String,
Expand All @@ -55,18 +45,23 @@ func readTensor<Scalar: TensorFlowScalar>(
}

protocol InitializableFromPythonCheckpoint {
init(contentsOfPythonCheckpointFile path: String, scope: String)
init(contentsOfPythonCheckpointFile path: String, config: Config, scope: String)
}

extension Dense: InitializableFromPythonCheckpoint {
init(contentsOfPythonCheckpointFile path: String, scope: String) {
init(contentsOfPythonCheckpointFile path: String, config: Config, scope: String) {
let kernel = readTensor(fromPath: path, name: scope + "/w", scalarType: Scalar.self)
self.init(
weight: kernel.squeezingShape(at: 0),
bias: readTensor(fromPath: path, name: scope + "/b", scalarType: Scalar.self),
activation: identity)
}
init(contentsOfPythonCheckpointFile path: String, scope: String, activation: String) {
init(
contentsOfPythonCheckpointFile path: String,
config: Config,
scope: String,
activation: String
) {
let kernel = readTensor(fromPath: path, name: scope + "/w", scalarType: Scalar.self)
self.init(
weight: kernel.squeezingShape(at: 0),
Expand All @@ -76,7 +71,7 @@ extension Dense: InitializableFromPythonCheckpoint {
}

extension LayerNorm: InitializableFromPythonCheckpoint {
init(contentsOfPythonCheckpointFile path: String, scope: String) {
init(contentsOfPythonCheckpointFile path: String, config: Config, scope: String) {
self.init(
offset: readTensor(fromPath: path, name: scope + "/b", scalarType: Scalar.self),
scale: readTensor(fromPath: path, name: scope + "/g", scalarType: Scalar.self),
Expand All @@ -86,57 +81,66 @@ extension LayerNorm: InitializableFromPythonCheckpoint {
}

extension MultiHeadAttention: InitializableFromPythonCheckpoint {
init(contentsOfPythonCheckpointFile path: String, scope: String) {
init(contentsOfPythonCheckpointFile path: String, config: Config, scope: String) {
attention = Attention(
size: config.embeddingSize / config.headCount,
causal: true,
dropProbability: 0.2)
wqkv = TimeDistributed(Dense<Float>(
contentsOfPythonCheckpointFile: path,
config: config,
scope: scope + "/c_attn"))
wo = TimeDistributed(Dense<Float>(
contentsOfPythonCheckpointFile: path,
config: config,
scope: scope + "/c_proj"))
headCount = 12
headCount = config.headCount
}
}

extension FeedForward: InitializableFromPythonCheckpoint {
init(contentsOfPythonCheckpointFile path: String, scope: String) {
init(contentsOfPythonCheckpointFile path: String, config: Config, scope: String) {
dense1 = TimeDistributed(Dense<Float>(
contentsOfPythonCheckpointFile: path,
scope: scope + "/c_fc", activation: "gelu"))
config: config,
scope: scope + "/c_fc",
activation: "gelu"))
dense2 = TimeDistributed(Dense<Float>(
contentsOfPythonCheckpointFile: path,
config: config,
scope: scope + "/c_proj"))
dropout = Dropout(probability: 0.2)
}
}

extension EncoderLayer: InitializableFromPythonCheckpoint {
init(contentsOfPythonCheckpointFile path: String, scope: String) {
init(contentsOfPythonCheckpointFile path: String, config: Config, scope: String) {
selfAttention = MultiHeadAttention(
contentsOfPythonCheckpointFile: path,
scope: scope + "/attn")
contentsOfPythonCheckpointFile: path, config: config, scope: scope + "/attn")
selfAttentionDropout = Dropout(probability: 0.2)
selfAttentionNorm = LayerNorm(contentsOfPythonCheckpointFile: path, scope: scope + "/ln_1")
feedForward = FeedForward(contentsOfPythonCheckpointFile: path, scope: scope + "/mlp")
selfAttentionNorm = LayerNorm(
contentsOfPythonCheckpointFile: path, config: config, scope: scope + "/ln_1")
feedForward = FeedForward(
contentsOfPythonCheckpointFile: path, config: config, scope: scope + "/mlp")
feedForwardDropout = Dropout(probability: 0.2)
feedForwardNorm = LayerNorm(contentsOfPythonCheckpointFile: path, scope: scope + "/ln_2")
feedForwardNorm = LayerNorm(
contentsOfPythonCheckpointFile: path, config: config, scope: scope + "/ln_2")
}
}

extension TransformerLM: InitializableFromPythonCheckpoint {
init(contentsOfPythonCheckpointFile path: String, scope: String) {
init(contentsOfPythonCheckpointFile path: String, config: Config, scope: String) {
embedding = Embedding(
weight: readTensor(fromPath: path, name: scope + "/wte", scalarType: Float.self))
positionalEmbeddings = readTensor(
fromPath: path,
name: scope + "/wpe",
scalarType: Float.self)
layers = (0..<config.layerCount).map { i in
EncoderLayer(contentsOfPythonCheckpointFile: path, scope: scope + "/h\(i)")
EncoderLayer(
contentsOfPythonCheckpointFile: path, config: config, scope: scope + "/h\(i)")
}
norm = LayerNorm(contentsOfPythonCheckpointFile: path, scope: scope + "/ln_f")
norm = LayerNorm(
contentsOfPythonCheckpointFile: path, config: config, scope: scope + "/ln_f")
}
}
14 changes: 10 additions & 4 deletions Transformer/main.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,19 @@

import Python
import TensorFlow
import Foundation

let modelName = "117M"
let sys = Python.import("sys")
sys.path = sys.path + ["."]
let encoder = Python.import("encoder").get_encoder("117M")

let checkpoint = "models/117M/model.ckpt"
let model = TransformerLM(contentsOfPythonCheckpointFile: checkpoint, scope: "model")
let encoder = Python.import("encoder").get_encoder(modelName)

let checkpoint = "models/\(modelName)/model.ckpt"
let configFile = "models/\(modelName)/hparams.json"
let configData = try Data.init(contentsOf: URL(fileURLWithPath: configFile))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
let configData = try Data.init(contentsOf: URL(fileURLWithPath: configFile))
let configData = try Data(contentsOf: URL(fileURLWithPath: configFile))

Use the initializer syntax instead of explicit .init.

let config = try JSONDecoder().decode(Config.self, from: configData)
let model = TransformerLM(
contentsOfPythonCheckpointFile: checkpoint, config: config, scope: "model")

let start_token = Int32(encoder.encoder["<|endoftext|>"])!
var tokens = Tensor(shape: [1, 1], scalars: [start_token])
Expand Down