aws
diff --git a/‎CHANGELOG.rst
Lines changed: 10 additions & 0 deletions b/‎CHANGELOG.rst
Lines changed: 10 additions & 0 deletions
diff --git a/‎README.rst
Lines changed: 2 additions & 2 deletions b/‎README.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎setup.py
Lines changed: 1 addition & 1 deletion b/‎setup.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/sagemaker/amazon/common.py
Lines changed: 1 addition & 1 deletion b/‎src/sagemaker/amazon/common.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/sagemaker/chainer/README.rst
Lines changed: 1 addition & 1 deletion b/‎src/sagemaker/chainer/README.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/sagemaker/local/image.py
Lines changed: 4 additions & 19 deletions b/‎src/sagemaker/local/image.py
Lines changed: 4 additions & 19 deletions
diff --git a/‎src/sagemaker/mxnet/README.rst
Lines changed: 14 additions & 14 deletions b/‎src/sagemaker/mxnet/README.rst
Lines changed: 14 additions & 14 deletions
diff --git a/‎src/sagemaker/mxnet/defaults.py
Lines changed: 1 addition & 1 deletion b/‎src/sagemaker/mxnet/defaults.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/sagemaker/pytorch/README.rst
Lines changed: 1 addition & 1 deletion b/‎src/sagemaker/pytorch/README.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/sagemaker/tensorflow/README.rst
Lines changed: 18 additions & 1 deletion b/‎src/sagemaker/tensorflow/README.rst
Lines changed: 18 additions & 1 deletion
diff --git a/‎tests/conftest.py
Lines changed: 2 additions & 2 deletions b/‎tests/conftest.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/data/dummy_tensor
311 KB b/‎tests/data/dummy_tensor
311 KB
diff --git a/‎tests/integ/__init__.py
Lines changed: 2 additions & 0 deletions b/‎tests/integ/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎tests/integ/test_byo_estimator.py
Lines changed: 10 additions & 28 deletions b/‎tests/integ/test_byo_estimator.py
Lines changed: 10 additions & 28 deletions
@@ -2,6 +2,16 @@
 CHANGELOG
 =========
 
+1.9.1dev
+========
+
+* bug-fix: Estimators: Fix serialization of single records
+
+1.9.0
+=====
+
+* feature: Estimators: add support for MXNet 1.2.1
+
 1.8.0
 =====
 
 
@@ -51,7 +51,7 @@ You can install from source by cloning this repository and issuing a pip install
 
     git clone https://github.com/aws/sagemaker-python-sdk.git
     python setup.py sdist
-    pip install dist/sagemaker-1.8.0.tar.gz
+    pip install dist/sagemaker-1.9.0.tar.gz
 
 Supported Operating Systems
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -240,7 +240,7 @@ MXNet SageMaker Estimators
 
 With MXNet Estimators, you can train and host MXNet models on Amazon SageMaker.
 
-Supported versions of MXNet: ``1.1.0``, ``1.0.0``, ``0.12.1``.
+Supported versions of MXNet: ``1.2.1``, ``1.1.0``, ``1.0.0``, ``0.12.1``.
 
 More details at `MXNet SageMaker Estimators and Models`_.
 
 
@@ -23,7 +23,7 @@ def read(fname):
 
 
 setup(name="sagemaker",
-      version="1.8.0",
+      version="1.9.0",
       description="Open source library for training and deploying models on Amazon SageMaker.",
       packages=find_packages('src'),
       package_dir={'': 'src'},
 
@@ -29,7 +29,7 @@ def __init__(self, content_type='application/x-recordio-protobuf'):
 
     def __call__(self, array):
         if len(array.shape) == 1:
-            array.reshape(1, array.shape[0])
+            array = array.reshape(1, array.shape[0])
         assert len(array.shape) == 2, "Expecting a 1 or 2 dimensional array"
         buf = io.BytesIO()
         write_numpy_to_dense_tensor(buf, array)
 
@@ -158,7 +158,7 @@ The following are optional arguments. When you create a ``Chainer`` object, you
 -  ``train_volume_size`` Size in GB of the EBS volume to use for storing
    input data during training. Must be large enough to store training
    data if input_mode='File' is used (which is the default).
--  ``train_max_run`` Timeout in hours for training, after which Amazon
+-  ``train_max_run`` Timeout in seconds for training, after which Amazon
    SageMaker terminates the job regardless of its current status.
 -  ``input_mode`` The input mode that the algorithm supports. Valid
    modes: 'File' - Amazon SageMaker copies the training dataset from the
 
@@ -26,7 +26,6 @@
 import sys
 import tarfile
 import tempfile
-from fcntl import fcntl, F_GETFL, F_SETFL
 from six.moves.urllib.parse import urlparse
 from threading import Thread
 
@@ -105,7 +104,7 @@ def train(self, input_data_config, hyperparameters):
         compose_command = self._compose()
 
         _ecr_login_if_needed(self.sagemaker_session.boto_session, self.image)
-        process = subprocess.Popen(compose_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        process = subprocess.Popen(compose_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 
         try:
             _stream_output(process)
@@ -555,34 +554,20 @@ def __init__(self, host_dir, container_dir=None, channel=None):
 def _stream_output(process):
     """Stream the output of a process to stdout
 
-    This function takes an existing process that will be polled for output. Both stdout and
-    stderr will be polled and both will be sent to sys.stdout.
+    This function takes an existing process that will be polled for output. Only stdout
+    will be polled and sent to sys.stdout.
 
     Args:
         process(subprocess.Popen): a process that has been started with
-            stdout=PIPE and stderr=PIPE
+            stdout=PIPE and stderr=STDOUT
 
     Returns (int): process exit code
     """
     exit_code = None
 
-    # Get the current flags for the  stderr file descriptor
-    # And add the NONBLOCK flag to allow us to read even if there is no data.
-    # Since usually stderr will be empty unless there is an error.
-    flags = fcntl(process.stderr, F_GETFL)  # get current process.stderr flags
-    fcntl(process.stderr, F_SETFL, flags | os.O_NONBLOCK)
-
     while exit_code is None:
         stdout = process.stdout.readline().decode("utf-8")
         sys.stdout.write(stdout)
-        try:
-            stderr = process.stderr.readline().decode("utf-8")
-            sys.stdout.write(stderr)
-        except IOError:
-            # If there is nothing to read on stderr we will get an IOError
-            # this is fine.
-            pass
-
         exit_code = process.poll()
 
     if exit_code != 0:
 
@@ -5,7 +5,7 @@ MXNet SageMaker Estimators and Models
 
 With MXNet Estimators, you can train and host MXNet models on Amazon SageMaker.
 
-Supported versions of MXNet: ``1.1.0``, ``1.0.0``, ``0.12.1``.
+Supported versions of MXNet: ``1.2.1``, ``1.1.0``, ``1.0.0``, ``0.12.1``.
 
 Training with MXNet
 ~~~~~~~~~~~~~~~~~~~
@@ -81,7 +81,7 @@ If you want to run your training script locally via the Python interpreter, look
 Using MXNet and numpy
 ^^^^^^^^^^^^^^^^^^^^^
 
-You can import both ``mxnet`` and ``numpy`` in your training script. When your script runs in SageMaker, it will run with access to MXNet version 1.0.0 and numpy version 1.13.3 by default. For more information on the environment your script runs in, please see `SageMaker MXNet Containers <#sagemaker-mxnet-containers>`__.
+You can import both ``mxnet`` and ``numpy`` in your training script. When your script runs in SageMaker, it will run with access to MXNet version 1.2.1 and numpy version 1.14.5 by default. For more information on the environment your script runs in, please see `SageMaker MXNet Containers <#sagemaker-mxnet-containers>`__.
 
 Running an MXNet training script in SageMaker
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -136,7 +136,7 @@ The following are optional arguments. When you create an ``MXNet`` object, you c
 -  ``train_volume_size`` Size in GB of the EBS volume to use for storing
    input data during training. Must be large enough to store training
    data if input_mode='File' is used (which is the default).
--  ``train_max_run`` Timeout in hours for training, after which Amazon
+-  ``train_max_run`` Timeout in seconds for training, after which Amazon
    SageMaker terminates the job regardless of its current status.
 -  ``input_mode`` The input mode that the algorithm supports. Valid
    modes: 'File' - Amazon SageMaker copies the training dataset from the
@@ -581,23 +581,23 @@ When training and deploying training scripts, SageMaker runs your Python script
 
 SageMaker runs MXNet Estimator scripts in either Python 2.7 or Python 3.5. You can select the Python version by passing a ``py_version`` keyword arg to the MXNet Estimator constructor. Setting this to ``py2`` (the default) will cause your training script to be run on Python 2.7. Setting this to ``py3`` will cause your training script to be run on Python 3.5. This Python version applies to both the Training Job, created by fit, and the Endpoint, created by deploy.
 
-Your MXNet training script will be run on version 1.1.0 by default. (See below for how to choose a different version, and currently supported versions.) The decision to use the GPU or CPU version of MXNet is made by the ``train_instance_type``, set on the MXNet constructor. If you choose a GPU instance type, your training job will be run on a GPU version of MXNet. If you choose a CPU instance type, your training job will be run on a CPU version of MXNet. Similarly, when you call deploy, specifying a GPU or CPU deploy_instance_type, will control which MXNet build your Endpoint runs.
+Your MXNet training script will be run on version 1.2.1 by default. (See below for how to choose a different version, and currently supported versions.) The decision to use the GPU or CPU version of MXNet is made by the ``train_instance_type``, set on the MXNet constructor. If you choose a GPU instance type, your training job will be run on a GPU version of MXNet. If you choose a CPU instance type, your training job will be run on a CPU version of MXNet. Similarly, when you call deploy, specifying a GPU or CPU deploy_instance_type, will control which MXNet build your Endpoint runs.
 
 The Docker images have the following dependencies installed:
 
-+-------------------------+--------------+-------------+-------------+
-| Dependencies            | MXNet 0.12.1 | MXNet 1.0.0 | MXNet 1.1.0 |
-+-------------------------+--------------+-------------+-------------+
-| Python                  |   2.7 or 3.5 |   2.7 or 3.5|   2.7 or 3.5|
-+-------------------------+--------------+-------------+-------------+
-| CUDA                    |          9.0 |         9.0 |         9.0 |
-+-------------------------+--------------+-------------+-------------+
-| numpy                   |       1.13.3 |      1.13.3 |      1.13.3 |
-+-------------------------+--------------+-------------+-------------+
++-------------------------+--------------+-------------+-------------+-------------+
+| Dependencies            | MXNet 0.12.1 | MXNet 1.0.0 | MXNet 1.1.0 | MXNet 1.2.1 |
++-------------------------+--------------+-------------+-------------+-------------+
+| Python                  |   2.7 or 3.5 |   2.7 or 3.5|   2.7 or 3.5|   2.7 or 3.5|
++-------------------------+--------------+-------------+-------------+-------------+
+| CUDA                    |          9.0 |         9.0 |         9.0 |         9.0 |
++-------------------------+--------------+-------------+-------------+-------------+
+| numpy                   |       1.13.3 |      1.13.3 |      1.13.3 |      1.14.5 |
++-------------------------+--------------+-------------+-------------+-------------+
 
 The Docker images extend Ubuntu 16.04.
 
-You can select version of MXNet by passing a ``framework_version`` keyword arg to the MXNet Estimator constructor. Currently supported versions are listed in the above table. You can also set ``framework_version`` to only specify major and minor version, e.g ``1.1``, which will cause your training script to be run on the latest supported patch version of that minor version, which in this example would be 1.1.0.
+You can select version of MXNet by passing a ``framework_version`` keyword arg to the MXNet Estimator constructor. Currently supported versions are listed in the above table. You can also set ``framework_version`` to only specify major and minor version, e.g ``1.2``, which will cause your training script to be run on the latest supported patch version of that minor version, which in this example would be 1.2.1.
 Alternatively, you can build your own image by following the instructions in the SageMaker MXNet containers repository, and passing ``image_name`` to the MXNet Estimator constructor.
 
 You can visit the SageMaker MXNet containers repository here: https://github.com/aws/sagemaker-mxnet-containers/
@@ -12,4 +12,4 @@
 # language governing permissions and limitations under the License.
 from __future__ import absolute_import
 
-MXNET_VERSION = '1.1'
+MXNET_VERSION = '1.2'
@@ -187,7 +187,7 @@ The following are optional arguments. When you create a ``PyTorch`` object, you
 -  ``train_volume_size`` Size in GB of the EBS volume to use for storing
    input data during training. Must be large enough to store training
    data if input_mode='File' is used (which is the default).
--  ``train_max_run`` Timeout in hours for training, after which Amazon
+-  ``train_max_run`` Timeout in seconds for training, after which Amazon
    SageMaker terminates the job regardless of its current status.
 -  ``input_mode`` The input mode that the algorithm supports. Valid
    modes: 'File' - Amazon SageMaker copies the training dataset from the
 
@@ -414,7 +414,7 @@ you can specify these as keyword arguments.
 -  ``train_volume_size (int)`` Size in GB of the EBS volume to use for storing
    input data during training. Must be large enough to the store training
    data.
--  ``train_max_run (int)`` Timeout in hours for training, after which Amazon
+-  ``train_max_run (int)`` Timeout in seconds for training, after which Amazon
    SageMaker terminates the job regardless of its current status.
 -  ``output_path (str)`` S3 location where you want the training result (model
    artifacts and optional output files) saved. If not specified, results
@@ -826,6 +826,23 @@ If your TFRecords are compressed, you can train on Gzipped TF Records by passing
 You can learn more about ``PipeModeDataset`` in the sagemaker-tensorflow-extensions repository: https://github.com/aws/sagemaker-tensorflow-extensions
 
 
+Training with MKL-DNN disabled
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+SageMaker TensorFlow CPU images use TensorFlow built with Intel® MKL-DNN optimization.
+
+In certain cases you might be able to get a better performance by disabling this optimization
+(`for example when using small models <https://github.com/awslabs/amazon-sagemaker-examples/blob/d88d1c19861fb7733941969f5a68821d9da2982e/sagemaker-python-sdk/tensorflow_iris_dnn_classifier_using_estimators/iris_dnn_classifier.py#L7-L9>`_)
+
+You can disable MKL-DNN optimization for TensorFlow ``1.8.0`` by setting two following environment variables:
+
+.. code:: python
+
+    import os
+
+    os.environ['TF_DISABLE_MKL'] = '1'
+    os.environ['TF_DISABLE_POOL_ALLOCATOR'] = '1'
+
 
 SageMaker TensorFlow Docker containers
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -76,7 +76,7 @@ def tf_version(request):
     return request.param
 
 
-@pytest.fixture(scope='module', params=['0.12', '0.12.1', '1.0', '1.0.0', '1.1', '1.1.0'])
+@pytest.fixture(scope='module', params=['0.12', '0.12.1', '1.0', '1.0.0', '1.1', '1.1.0', '1.2', '1.2.1'])
 def mxnet_version(request):
     return request.param
 
@@ -96,7 +96,7 @@ def tf_full_version(request):
     return request.param
 
 
-@pytest.fixture(scope='module', params=['0.12.1', '1.0.0', '1.1.0'])
+@pytest.fixture(scope='module', params=['0.12.1', '1.0.0', '1.1.0', '1.2.1'])
 def mxnet_full_version(request):
     return request.param
 
 
@@ -16,6 +16,8 @@
 import os
 
 DATA_DIR = os.path.join(os.path.dirname(__file__), '..', 'data')
+TRAINING_DEFAULT_TIMEOUT_MINUTES = 20
+TUNING_DEFAULT_TIMEOUT_MINUTES = 20
 
 logging.getLogger('boto3').setLevel(logging.INFO)
 logging.getLogger('botocore').setLevel(logging.INFO)
@@ -13,22 +13,18 @@
 from __future__ import absolute_import
 
 import gzip
-import io
 import json
 import os
 import pickle
 import sys
 
-import boto3
-import numpy as np
 import pytest
 
 import sagemaker
 from sagemaker.amazon.amazon_estimator import registry
-from sagemaker.amazon.common import write_numpy_to_dense_tensor
 from sagemaker.estimator import Estimator
 from sagemaker.utils import name_from_base
-from tests.integ import DATA_DIR
+from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES
 from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name
 
 
@@ -57,27 +53,20 @@ def test_byo_estimator(sagemaker_session, region):
 
     """
     image_name = registry(region) + "/factorization-machines:1"
+    training_data_path = os.path.join(DATA_DIR, 'dummy_tensor')
 
-    with timeout(minutes=15):
+    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
         data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
         pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}
 
         with gzip.open(data_path, 'rb') as f:
             train_set, _, _ = pickle.load(f, **pickle_args)
 
-        # take 100 examples for faster execution
-        vectors = np.array([t.tolist() for t in train_set[0][:100]]).astype('float32')
-        labels = np.where(np.array([t.tolist() for t in train_set[1][:100]]) == 0, 1.0, 0.0).astype('float32')
-
-        buf = io.BytesIO()
-        write_numpy_to_dense_tensor(buf, vectors, labels)
-        buf.seek(0)
-
-        bucket = sagemaker_session.default_bucket()
         prefix = 'test_byo_estimator'
         key = 'recordio-pb-data'
-        boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
-        s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)
+
+        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
+                                                      key_prefix=os.path.join(prefix, 'train', key))
 
         estimator = Estimator(image_name=image_name,
                               role='SageMakerRole', train_instance_count=1,
@@ -111,6 +100,7 @@ def test_byo_estimator(sagemaker_session, region):
 def test_async_byo_estimator(sagemaker_session, region):
     image_name = registry(region) + "/factorization-machines:1"
     endpoint_name = name_from_base('byo')
+    training_data_path = os.path.join(DATA_DIR, 'dummy_tensor')
     training_job_name = ""
 
     with timeout(minutes=5):
@@ -120,19 +110,11 @@ def test_async_byo_estimator(sagemaker_session, region):
         with gzip.open(data_path, 'rb') as f:
             train_set, _, _ = pickle.load(f, **pickle_args)
 
-        # take 100 examples for faster execution
-        vectors = np.array([t.tolist() for t in train_set[0][:100]]).astype('float32')
-        labels = np.where(np.array([t.tolist() for t in train_set[1][:100]]) == 0, 1.0, 0.0).astype('float32')
-
-        buf = io.BytesIO()
-        write_numpy_to_dense_tensor(buf, vectors, labels)
-        buf.seek(0)
-
-        bucket = sagemaker_session.default_bucket()
         prefix = 'test_byo_estimator'
         key = 'recordio-pb-data'
-        boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
-        s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)
+
+        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
+                                                      key_prefix=os.path.join(prefix, 'train', key))
 
         estimator = Estimator(image_name=image_name,
                               role='SageMakerRole', train_instance_count=1,