Merge branch 'master' into master

IvyBazan · web-flow · commit 8a8d94971870 · 2020-03-06T16:42:02.000-08:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,17 @@
 # Changelog
 
+## v1.50.18.post0 (2020-03-05)
+
+### Documentation Changes
+
+ * correct Estimator code_location default S3 path
+
+## v1.50.18 (2020-03-04)
+
+### Bug Fixes and Other Changes
+
+ * change default compile model max run to 15 mins
+
 ## v1.50.17.post0 (2020-03-03)
 
 ### Testing and Release Infrastructure
diff --git a/README.rst b/README.rst
@@ -176,7 +176,7 @@ TensorFlow SageMaker Estimators
 
 By using TensorFlow SageMaker Estimators, you can train and host TensorFlow models on Amazon SageMaker.
 
-Supported versions of TensorFlow: ``1.4.1``, ``1.5.0``, ``1.6.0``, ``1.7.0``, ``1.8.0``, ``1.9.0``, ``1.10.0``, ``1.11.0``, ``1.12.0``, ``1.13.1``, ``1.14.0``, ``1.15.0``, ``2.0.0``.
+Supported versions of TensorFlow: ``1.4.1``, ``1.5.0``, ``1.6.0``, ``1.7.0``, ``1.8.0``, ``1.9.0``, ``1.10.0``, ``1.11.0``, ``1.12.0``, ``1.13.1``, ``1.14.0``, ``1.15.0``, ``1.15.2``, ``2.0.0``, ``2.0.1``, ``2.1.0``.
 
 Supported versions of TensorFlow for Elastic Inference: ``1.11.0``, ``1.12.0``, ``1.13.1``, ``1.14.0``.
 
@@ -208,7 +208,9 @@ PyTorch SageMaker Estimators
 
 With PyTorch SageMaker Estimators, you can train and host PyTorch models on Amazon SageMaker.
 
-Supported versions of PyTorch: ``0.4.0``, ``1.0.0``, ``1.1.0``, ``1.2.0``, ``1.3.1``.
+Supported versions of PyTorch: ``0.4.0``, ``1.0.0``, ``1.1.0``, ``1.2.0``, ``1.3.1``, ``1.4.0``.
+
+Supported versions of PyTorch for Elastic Inference: ``1.3.1``.
 
 We recommend that you use the latest supported version, because that's where we focus most of our development efforts.
 
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-1.50.18.dev0
+1.50.19.dev0
diff --git a/doc/amazon_sagemaker_operators_for_kubernetes.rst b/doc/amazon_sagemaker_operators_for_kubernetes.rst
@@ -349,7 +349,7 @@ that directory to your \ ``PATH``.
 
     export os="linux"
 
-    wget https://amazon-sagemaker-operator-for-k8s-us-east-1.s3.amazonaws.com/kubectl-smlogs-plugin/latest/${os}.amd64.tar.gz
+    wget https://amazon-sagemaker-operator-for-k8s-us-east-1.s3.amazonaws.com/kubectl-smlogs-plugin/v1/${os}.amd64.tar.gz
     tar xvzf ${os}.amd64.tar.gz
 
     # Move binaries to a directory in your homedir.
@@ -550,13 +550,13 @@ each region.
 +-------------+---------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------+
 | Region      | Controller Image                                                                            | Linux SMLogs                                                                                                           |
 +=============+=============================================================================================+========================================================================================================================+
-| us-east-1   | ``957583890962.dkr.ecr.us-east-1.amazonaws.com/amazon-sagemaker-operator-for-k8s:latest``   | https://amazon-sagemaker-operator-for-k8s-us-east-1.s3.amazonaws.com/kubectl-smlogs-plugin/latest/linux.amd64.tar.gz   |
+| us-east-1   | ``957583890962.dkr.ecr.us-east-1.amazonaws.com/amazon-sagemaker-operator-for-k8s:v1``       | https://amazon-sagemaker-operator-for-k8s-us-east-1.s3.amazonaws.com/kubectl-smlogs-plugin/v1/linux.amd64.tar.gz       |
 +-------------+---------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------+
-| us-east-2   | ``922499468684.dkr.ecr.us-east-2.amazonaws.com/amazon-sagemaker-operator-for-k8s:latest``   | https://amazon-sagemaker-operator-for-k8s-us-east-2.s3.amazonaws.com/kubectl-smlogs-plugin/latest/linux.amd64.tar.gz   |
+| us-east-2   | ``922499468684.dkr.ecr.us-east-2.amazonaws.com/amazon-sagemaker-operator-for-k8s:v1``       | https://amazon-sagemaker-operator-for-k8s-us-east-2.s3.amazonaws.com/kubectl-smlogs-plugin/v1/linux.amd64.tar.gz       |
 +-------------+---------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------+
-| us-west-2   | ``640106867763.dkr.ecr.us-west-2.amazonaws.com/amazon-sagemaker-operator-for-k8s:latest``   | https://amazon-sagemaker-operator-for-k8s-us-west-2.s3.amazonaws.com/kubectl-smlogs-plugin/latest/linux.amd64.tar.gz   |
+| us-west-2   | ``640106867763.dkr.ecr.us-west-2.amazonaws.com/amazon-sagemaker-operator-for-k8s:v1``       | https://amazon-sagemaker-operator-for-k8s-us-west-2.s3.amazonaws.com/kubectl-smlogs-plugin/v1/linux.amd64.tar.gz       |
 +-------------+---------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------+
-| eu-west-1   | ``613661167059.dkr.ecr.eu-west-1.amazonaws.com/amazon-sagemaker-operator-for-k8s:latest``   | https://amazon-sagemaker-operator-for-k8s-eu-west-1.s3.amazonaws.com/kubectl-smlogs-plugin/latest/linux.amd64.tar.gz   |
+| eu-west-1   | ``613661167059.dkr.ecr.eu-west-1.amazonaws.com/amazon-sagemaker-operator-for-k8s:v1``       | https://amazon-sagemaker-operator-for-k8s-eu-west-1.s3.amazonaws.com/kubectl-smlogs-plugin/v1/linux.amd64.tar.gz       |
 +-------------+---------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------+
 
 
diff --git a/doc/using_pytorch.rst b/doc/using_pytorch.rst
@@ -6,6 +6,8 @@ With PyTorch Estimators and Models, you can train and host PyTorch models on Ama
 
 Supported versions of PyTorch: ``0.4.0``, ``1.0.0``, ``1.1.0``, ``1.2.0``, ``1.3.1``.
 
+Supported versions of PyTorch for Elastic Inference: ``1.3.1``.
+
 We recommend that you use the latest supported version, because that's where we focus most of our development efforts.
 
 You can visit the PyTorch repository at https://github.com/pytorch/pytorch.
@@ -250,6 +252,14 @@ You use the SageMaker PyTorch model server to host your PyTorch model when you c
 Estimator. The model server runs inside a SageMaker Endpoint, which your call to ``deploy`` creates.
 You can access the name of the Endpoint by the ``name`` property on the returned ``Predictor``.
 
+PyTorch on Amazon SageMaker has support for `Elastic Inference <https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html>`_, which allows for inference acceleration to a hosted endpoint for a fraction of the cost of using a full GPU instance.
+In order to attach an Elastic Inference accelerator to your endpoint provide the accelerator type to ``accelerator_type`` to your ``deploy`` call.
+
+.. code:: python
+
+  predictor = pytorch_estimator.deploy(instance_type='ml.m4.xlarge',
+                                       initial_instance_count=1,
+                                       accelerator_type='ml.eia2.medium')
 
 The SageMaker PyTorch Model Server
 ==================================
@@ -291,6 +301,11 @@ It loads the model parameters from a ``model.pth`` file in the SageMaker model d
             model.load_state_dict(torch.load(f))
         return model
 
+However, if you are using PyTorch Elastic Inference, you do not have to provide a ``model_fn`` since the PyTorch serving
+container has a default one for you. But please note that if you are utilizing the default ``model_fn``, please save
+yor parameter file as ``model.pt`` instead of ``model.pth``. For more information on inference script, please refer to:
+`SageMaker PyTorch Default Inference Handler <https://github.com/aws/sagemaker-pytorch-serving-container/blob/master/src/sagemaker_pytorch_serving_container/default_inference_handler.py>`_.
+
 Serve a PyTorch Model
 ---------------------
 
diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
@@ -1470,7 +1470,7 @@ def __init__(
                 uploaded (default: None) - don't include a trailing slash since
                 a string prepended with a "/" is appended to ``code_location``. The code
                 file uploaded to S3 is 'code_location/job-name/source/sourcedir.tar.gz'.
-                If not specified, the default ``code location`` is s3://default_bucket/job-name/.
+                If not specified, the default ``code location`` is s3://output_bucket/job-name/.
             image_name (str): An alternate image name to use instead of the
                 official Sagemaker image for the framework. This is useful to
                 run one of the Sagemaker supported frameworks with an image
diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py
@@ -53,7 +53,14 @@
 )
 
 VALID_PY_VERSIONS = ["py2", "py3"]
-VALID_EIA_FRAMEWORKS = ["tensorflow", "tensorflow-serving", "mxnet", "mxnet-serving"]
+VALID_EIA_FRAMEWORKS = [
+    "tensorflow",
+    "tensorflow-serving",
+    "mxnet",
+    "mxnet-serving",
+    "pytorch-serving",
+]
+PY2_RESTRICTED_EIA_FRAMEWORKS = ["pytorch-serving"]
 VALID_ACCOUNTS_BY_REGION = {"us-gov-west-1": "246785580436", "us-iso-east-1": "744548109606"}
 ASIMOV_VALID_ACCOUNTS_BY_REGION = {"us-iso-east-1": "886529160074"}
 OPT_IN_ACCOUNTS_BY_REGION = {"ap-east-1": "057415533634", "me-south-1": "724002660598"}
@@ -71,6 +78,7 @@
     "mxnet-serving-eia": "mxnet-inference-eia",
     "pytorch": "pytorch-training",
     "pytorch-serving": "pytorch-inference",
+    "pytorch-serving-eia": "pytorch-inference-eia",
 }
 
 MERGED_FRAMEWORKS_LOWEST_VERSIONS = {
@@ -82,6 +90,7 @@
     "mxnet-serving-eia": [1, 4, 1],
     "pytorch": [1, 2, 0],
     "pytorch-serving": [1, 2, 0],
+    "pytorch-serving-eia": [1, 3, 1],
 }
 
 DEBUGGER_UNSUPPORTED_REGIONS = ["us-gov-west-1", "us-iso-east-1"]
@@ -207,6 +216,7 @@ def create_image_uri(
 
     if _accelerator_type_valid_for_framework(
         framework=framework,
+        py_version=py_version,
         accelerator_type=accelerator_type,
         optimized_families=optimized_families,
     ):
@@ -259,21 +269,27 @@ def create_image_uri(
 
 
 def _accelerator_type_valid_for_framework(
-    framework, accelerator_type=None, optimized_families=None
+    framework, py_version, accelerator_type=None, optimized_families=None
 ):
     """
     Args:
         framework:
+        py_version:
         accelerator_type:
         optimized_families:
     """
     if accelerator_type is None:
         return False
 
+    if py_version == "py2" and framework in PY2_RESTRICTED_EIA_FRAMEWORKS:
+        raise ValueError(
+            "{} is not supported with Amazon Elastic Inference in Python 2.".format(framework)
+        )
+
     if framework not in VALID_EIA_FRAMEWORKS:
         raise ValueError(
             "{} is not supported with Amazon Elastic Inference. Currently only "
-            "Python-based TensorFlow and MXNet are supported.".format(framework)
+            "Python-based TensorFlow, MXNet, PyTorch are supported.".format(framework)
         )
 
     if optimized_families:
diff --git a/src/sagemaker/pytorch/README.rst b/src/sagemaker/pytorch/README.rst
@@ -6,6 +6,8 @@ With PyTorch Estimators and Models, you can train and host PyTorch models on Ama
 
 Supported versions of PyTorch: ``0.4.0``, ``1.0.0``, ``1.1.0``, ``1.2.0``, ``1.3.1``, ``1.4.0``.
 
+Supported versions of PyTorch for Elastic Inference: ``1.3.1``.
+
 We recommend that you use the latest supported version, because that's where we focus most of our development efforts.
 
 You can visit the PyTorch repository at https://github.com/pytorch/pytorch.
diff --git a/src/sagemaker/pytorch/model.py b/src/sagemaker/pytorch/model.py
@@ -136,7 +136,7 @@ def prepare_container_def(self, instance_type, accelerator_type=None):
                 For example, 'ml.p2.xlarge'.
             accelerator_type (str): The Elastic Inference accelerator type to
                 deploy to the instance for loading and making inferences to the
-                model. Currently unsupported with PyTorch.
+                model.
 
         Returns:
             dict[str, str]: A container definition object usable with the
@@ -169,7 +169,7 @@ def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
                 (cpu/gpu/family-specific optimized).
             accelerator_type (str): The Elastic Inference accelerator type to
                 deploy to the instance for loading and making inferences to the
-                model. Currently unsupported with PyTorch.
+                model.
 
         Returns:
             str: The appropriate image URI based on the given parameters.
diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py
@@ -760,6 +760,23 @@ def best_estimator(self, best_training_job=None):
         be deployed to an Amazon SageMaker endpoint and return a ``sagemaker.RealTimePredictor``
         object.
 
+        Args:
+            best_training_job (dict): Dictionary containing "TrainingJobName" and
+                "TrainingJobDefinitionName".
+
+                Example:
+
+                .. code:: python
+
+                    {
+                        "TrainingJobName": "my_training_job_name",
+                        "TrainingJobDefinitionName": "my_training_job_definition_name"
+                    }
+
+        Returns:
+            sagemaker.estimator.EstimatorBase: The estimator that has the best training job
+                attached.
+
         Raises:
             Exception: If there is no best training job available for the hyperparameter tuning job.
         """
diff --git a/tests/data/pytorch_eia/empty_inference_script.py b/tests/data/pytorch_eia/empty_inference_script.py
@@ -0,0 +1,13 @@
+# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+# This file is intentionally left blank to invoke default model_fn and predict_fn
diff --git a/tests/data/pytorch_eia/model_mnist.tar.gz b/tests/data/pytorch_eia/model_mnist.tar.gz
diff --git a/tests/integ/test_pytorch_train.py b/tests/integ/test_pytorch_train.py
@@ -27,6 +27,10 @@
 MNIST_DIR = os.path.join(DATA_DIR, "pytorch_mnist")
 MNIST_SCRIPT = os.path.join(MNIST_DIR, "mnist.py")
 
+EIA_DIR = os.path.join(DATA_DIR, "pytorch_eia")
+EIA_MODEL = os.path.join(EIA_DIR, "model_mnist.tar.gz")
+EIA_SCRIPT = os.path.join(EIA_DIR, "empty_inference_script.py")
+
 
 @pytest.fixture(scope="module", name="pytorch_training_job")
 def fixture_training_job(sagemaker_session, pytorch_full_version, cpu_instance_type):
@@ -115,6 +119,32 @@ def test_deploy_model(pytorch_training_job, sagemaker_session, cpu_instance_type
         assert output.shape == (batch_size, 10)
 
 
+@pytest.mark.skipif(PYTHON_VERSION == "py2", reason="PyTorch EIA does not support Python 2.")
+def test_deploy_model_with_accelerator(sagemaker_session, cpu_instance_type):
+    endpoint_name = "test-pytorch-deploy-eia-{}".format(sagemaker_timestamp())
+    model_data = sagemaker_session.upload_data(path=EIA_MODEL)
+    pytorch = PyTorchModel(
+        model_data,
+        "SageMakerRole",
+        framework_version="1.3.1",
+        entry_point=EIA_SCRIPT,
+        sagemaker_session=sagemaker_session,
+    )
+    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
+        predictor = pytorch.deploy(
+            initial_instance_count=1,
+            instance_type=cpu_instance_type,
+            accelerator_type="ml.eia2.medium",
+            endpoint_name=endpoint_name,
+        )
+
+        batch_size = 100
+        data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32)
+        output = predictor.predict(data)
+
+        assert output.shape == (batch_size, 10)
+
+
 def _upload_training_data(pytorch):
     return pytorch.sagemaker_session.upload_data(
         path=os.path.join(MNIST_DIR, "training"),
diff --git a/tests/unit/test_fw_utils.py b/tests/unit/test_fw_utils.py
@@ -311,6 +311,37 @@ def test_mxnet_eia_images():
     )
 
 
+def test_pytorch_eia_images():
+    image_uri = fw_utils.create_image_uri(
+        "us-east-1",
+        "pytorch-serving",
+        "ml.c4.2xlarge",
+        "1.3.1",
+        "py3",
+        accelerator_type="ml.eia1.large",
+    )
+    assert (
+        image_uri
+        == "{}.dkr.ecr.us-east-1.amazonaws.com/pytorch-inference-eia:1.3.1-cpu-py3".format(
+            fw_utils.ASIMOV_PROD_ACCOUNT
+        )
+    )
+
+
+def test_pytorch_eia_py2_error():
+    error_message = "pytorch-serving is not supported with Amazon Elastic Inference in Python 2."
+    with pytest.raises(ValueError) as error:
+        fw_utils.create_image_uri(
+            "us-east-1",
+            "pytorch-serving",
+            "ml.c4.2xlarge",
+            "1.3.1",
+            "py2",
+            accelerator_type="ml.eia1.large",
+        )
+    assert error_message in str(error)
+
+
 def test_create_image_uri_override_account():
     image_uri = fw_utils.create_image_uri(
         "us-west-1", MOCK_FRAMEWORK, "ml.p3.2xlarge", "1.0rc", "py3", account="fake"
diff --git a/tests/unit/test_pytorch.py b/tests/unit/test_pytorch.py
@@ -345,11 +345,19 @@ def test_non_mms_model(repack_model, sagemaker_session):
 
 @patch("sagemaker.fw_utils.tar_and_upload_dir", MagicMock())
 def test_model_image_accelerator(sagemaker_session):
-    model = PyTorchModel(
-        MODEL_DATA, role=ROLE, entry_point=SCRIPT_PATH, sagemaker_session=sagemaker_session
+    with pytest.raises(ValueError) as error:
+        model = PyTorchModel(
+            MODEL_DATA,
+            role=ROLE,
+            entry_point=SCRIPT_PATH,
+            sagemaker_session=sagemaker_session,
+            framework_version="1.3.1",
+            py_version="py2",
+        )
+        model.deploy(1, CPU, accelerator_type=ACCELERATOR_TYPE)
+    assert "pytorch-serving is not supported with Amazon Elastic Inference in Python 2." in str(
+        error
     )
-    with pytest.raises(ValueError):
-        model.prepare_container_def(INSTANCE_TYPE, accelerator_type=ACCELERATOR_TYPE)
 
 
 def test_train_image_default(sagemaker_session):