Adding code to make gluon MNIST scale

Macdonald · Macdonald · commit 8890bc600aa0 · 2017-12-05T11:23:33.000-08:00
diff --git a/sagemaker-python-sdk/mxnet_gluon_mnist/mnist.py b/sagemaker-python-sdk/mxnet_gluon_mnist/mnist.py
@@ -2,7 +2,7 @@
 
 import logging
 import mxnet as mx
-from mxnet import gluon, autograd
+from mxnet import gluon, autograd, kv
 from mxnet.gluon import nn
 import numpy as np
 import json
@@ -16,7 +16,7 @@
 # ------------------------------------------------------------ #
 
 
-def train(channel_input_dirs, hyperparameters, **kwargs):
+def train(channel_input_dirs, hyperparameters, hosts, **kwargs):
     # SageMaker passes num_cpus, num_gpus and other args we can use to tailor training to
     # the current container environment, but here we just use simple cpu context.
     ctx = mx.cpu()
@@ -41,8 +41,10 @@ def train(channel_input_dirs, hyperparameters, **kwargs):
     # Collect all parameters from net and its children, then initialize them.
     net.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
     # Trainer is for updating parameters with gradient.
+    store = kv.create('dist_sync' if hosts > 1 else 'local')
     trainer = gluon.Trainer(net.collect_params(), 'sgd',
-                            {'learning_rate': learning_rate, 'momentum': momentum})
+                            {'learning_rate': learning_rate, 'momentum': momentum},
+                            kvstore=store)
     metric = mx.metric.Accuracy()
     loss = gluon.loss.SoftmaxCrossEntropyLoss()