Fix division for python3.

Ignacio Quintero · Ignacio Quintero · commit 41963248a005 · 2018-03-05T10:58:32.000-08:00
Also added a comment about ShardedByS3Key.
diff --git a/sagemaker-python-sdk/mxnet_gluon_mnist/mnist.py b/sagemaker-python-sdk/mxnet_gluon_mnist/mnist.py
@@ -53,11 +53,11 @@ def train(current_host, channel_input_dirs, hyperparameters, hosts, num_gpus):
     metric = mx.metric.Accuracy()
     loss = gluon.loss.SoftmaxCrossEntropyLoss()
 
-    # shard the training data in case we are doing
-    # distributed training.
+    # shard the training data in case we are doing distributed training. Alternatively to splitting in memory,
+    # the data could be pre-split in S3 and use ShardedByS3Key to do distributed training.
     if len(hosts) > 1:
         train_data = [x for x in train_data]
-        shard_size = len(train_data) / len(hosts)
+        shard_size = len(train_data) // len(hosts)
         for i, host in enumerate(hosts):
             if host == current_host:
                 start = shard_size * i
diff --git a/sagemaker-python-sdk/mxnet_gluon_sentiment/sentiment.py b/sagemaker-python-sdk/mxnet_gluon_sentiment/sentiment.py
@@ -46,7 +46,9 @@ def train(current_host, hosts, num_cpus, num_gpus, channel_input_dirs, model_dir
     train_sentences = [[vocab.get(token, 1) for token in line if len(line)>0] for line in train_sentences]
     val_sentences = [[vocab.get(token, 1) for token in line if len(line)>0] for line in val_sentences]
 
-    shard_size = len(train_sentences) / len(hosts)
+    # Alternatively to splitting in memory, the data could be pre-split in S3 and use ShardedByS3Key
+    # to do parallel training.
+    shard_size = len(train_sentences) // len(hosts)
     for i, host in enumerate(hosts):
         if host == current_host:
             start = shard_size * i
diff --git a/sagemaker-python-sdk/mxnet_mnist/mnist.py b/sagemaker-python-sdk/mxnet_mnist/mnist.py
@@ -39,7 +39,9 @@ def train(current_host, channel_input_dirs, hyperparameters, hosts, num_cpus, nu
     (train_labels, train_images) = load_data(os.path.join(channel_input_dirs['train']))
     (test_labels, test_images) = load_data(os.path.join(channel_input_dirs['test']))
 
-    shard_size = len(train_images) / len(hosts)
+    # Alternatively to splitting in memory, the data could be pre-split in S3 and use ShardedByS3Key
+    # to do parallel training.
+    shard_size = len(train_images) // len(hosts)
     for i, host in enumerate(hosts):
         if host == current_host:
             start = shard_size * i