|
| 1 | +# https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras |
| 2 | + |
| 3 | +import json |
| 4 | +import os |
| 5 | +import tensorflow as tf |
| 6 | +import numpy as np |
| 7 | + |
| 8 | + |
| 9 | +def mnist_dataset(batch_size): |
| 10 | + (x_train, y_train), _ = tf.keras.datasets.mnist.load_data() |
| 11 | + # The `x` arrays are in uint8 and have values in the [0, 255] range. |
| 12 | + # You need to convert them to float32 with values in the [0, 1] range. |
| 13 | + x_train = x_train / np.float32(255) |
| 14 | + y_train = y_train.astype(np.int64) |
| 15 | + train_dataset = ( |
| 16 | + tf.data.Dataset.from_tensor_slices((x_train, y_train)) |
| 17 | + .shuffle(60000) |
| 18 | + .repeat() |
| 19 | + .batch(batch_size) |
| 20 | + ) |
| 21 | + return train_dataset |
| 22 | + |
| 23 | + |
| 24 | +def build_and_compile_cnn_model(): |
| 25 | + model = tf.keras.Sequential( |
| 26 | + [ |
| 27 | + tf.keras.layers.InputLayer(input_shape=(28, 28)), |
| 28 | + tf.keras.layers.Reshape(target_shape=(28, 28, 1)), |
| 29 | + tf.keras.layers.Conv2D(32, 3, activation="relu"), |
| 30 | + tf.keras.layers.Flatten(), |
| 31 | + tf.keras.layers.Dense(128, activation="relu"), |
| 32 | + tf.keras.layers.Dense(10), |
| 33 | + ] |
| 34 | + ) |
| 35 | + model.compile( |
| 36 | + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), |
| 37 | + optimizer=tf.keras.optimizers.SGD(learning_rate=0.001), |
| 38 | + metrics=["accuracy"], |
| 39 | + ) |
| 40 | + return model |
| 41 | + |
| 42 | + |
| 43 | +per_worker_batch_size = 64 |
| 44 | +tf_config = json.loads(os.environ["TF_CONFIG"]) |
| 45 | +num_workers = len(tf_config["cluster"]["worker"]) |
| 46 | + |
| 47 | +strategy = tf.distribute.MultiWorkerMirroredStrategy() |
| 48 | + |
| 49 | +global_batch_size = per_worker_batch_size * num_workers |
| 50 | +multi_worker_dataset = mnist_dataset(global_batch_size) |
| 51 | + |
| 52 | +with strategy.scope(): |
| 53 | + multi_worker_model = build_and_compile_cnn_model() |
| 54 | + |
| 55 | +multi_worker_model.fit(multi_worker_dataset, epochs=3, steps_per_epoch=70) |
0 commit comments