aws · chuyang-deng · Mar 26, 2019 · Mar 16, 2019 · Mar 18, 2019 · Mar 19, 2019
@@ -41,7 +41,7 @@
                                 'Please add framework_version={} to your constructor to avoid this error.'
 
 VALID_PY_VERSIONS = ['py2', 'py3']
-VALID_EIA_FRAMEWORKS = ['tensorflow', 'mxnet']
+VALID_EIA_FRAMEWORKS = ['tensorflow', 'tensorflow-serving', 'mxnet']
 VALID_ACCOUNTS_BY_REGION = {'us-gov-west-1': '246785580436',
                             'us-iso-east-1': '744548109606'}
 

@@ -34,8 +34,6 @@ estimator object to create a SageMaker Endpoint:
 
 The code block above deploys a SageMaker Endpoint with one instance of the type 'ml.c5.xlarge'.
 
-As of now, only the Python-based TensorFlow serving endpoints support Elastic Inference. For more information, see `Deploying to Python-based Endpoints <https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/tensorflow/deploying_python.rst#deploying-to-python-based-endpoints>`_.
-
 What happens when deploy is called
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -66,6 +64,16 @@ If you already have existing model artifacts in S3, you can skip training and de
 
   predictor = model.deploy(initial_instance_count=1, instance_type='ml.c5.xlarge')
 
+Python-based TensorFlow serving on SageMaker has support for `Elastic Inference <https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html>`__, which allows for inference acceleration to a hosted endpoint for a fraction of the cost of using a full GPU instance. In order to attach an Elastic Inference accelerator to your endpoint provide the accelerator type to accelerator_type to your deploy call.
+
+.. code:: python
+
+    from sagemaker.tensorflow.serving import Model
+
+    model = Model(model_data='s3://mybucket/model.tar.gz', role='MySageMakerRole')
+
+    predictor = model.deploy(initial_instance_count=1, instance_type='ml.c5.xlarge', accelerator_type='ml.eia1.medium')
+
 Making predictions against a SageMaker Endpoint
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

@@ -46,6 +46,21 @@ def tfs_predictor(instance_type, sagemaker_session, tf_full_version):
         yield predictor
 
 
+@pytest.fixture(scope='module')
+def tfs_predictor_with_accelerator(sagemaker_session, tf_full_version):
+    endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-tensorflow-serving")
+    instance_type = 'ml.c4.large'
+    accelerator_type = 'ml.eia1.medium'
+    model_data = sagemaker_session.upload_data(path='tests/data/tensorflow-serving-test-model.tar.gz',
+                                               key_prefix='tensorflow-serving/models')
+    with tests.integ.timeout.timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
+        model = Model(model_data=model_data, role='SageMakerRole',
+                      framework_version=tf_full_version,
+                      sagemaker_session=sagemaker_session)
+        predictor = model.deploy(1, instance_type, endpoint_name=endpoint_name, accelerator_type=accelerator_type)
+        yield predictor
+
+
 @pytest.mark.canary_quick
 def test_predict(tfs_predictor, instance_type):  # pylint: disable=W0613
     input_data = {'instances': [1.0, 2.0, 5.0]}
@@ -55,6 +70,17 @@ def test_predict(tfs_predictor, instance_type):  # pylint: disable=W0613
     assert expected_result == result
 
 
+@pytest.mark.skipif(tests.integ.test_region() not in tests.integ.EI_SUPPORTED_REGIONS,
+                    reason='EI is not supported in region {}'.format(tests.integ.test_region()))
+@pytest.mark.canary_quick
+def test_predict_with_accelerator(tfs_predictor_with_accelerator):
+    input_data = {'instances': [1.0, 2.0, 5.0]}
+    expected_result = {'predictions': [3.5, 4.0, 5.5]}
+
+    result = tfs_predictor_with_accelerator.predict(input_data)
+    assert expected_result == result
+
+
 def test_predict_generic_json(tfs_predictor):
     input_data = [[1.0, 2.0, 5.0], [1.0, 2.0, 5.0]]
     expected_result = {'predictions': [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]}

@@ -86,6 +86,12 @@ def test_create_image_uri_gpu():
     assert image_uri == '23.dkr.ecr.mars-south-3.amazonaws.com/sagemaker-mlfw:1.0rc-gpu-py3'
 
 
+def test_create_image_uri_ei():
+    image_uri = fw_utils.create_image_uri(MOCK_REGION, 'tensorflow-serving', 'ml.c4.large', '1.1.0',
+                                          accelerator_type='ml.eia1.large', account='23')
+    assert image_uri == '23.dkr.ecr.mars-south-3.amazonaws.com/sagemaker-tensorflow-serving-eia:1.1.0-cpu'
+
+
 def test_create_image_uri_default_account():
     image_uri = fw_utils.create_image_uri(MOCK_REGION, MOCK_FRAMEWORK, 'ml.p3.2xlarge', '1.0rc', 'py3')
     assert image_uri == '520713654638.dkr.ecr.mars-south-3.amazonaws.com/sagemaker-mlfw:1.0rc-gpu-py3'

@@ -25,7 +25,7 @@
 CSV_CONTENT_TYPE = 'text/csv'
 INSTANCE_COUNT = 1
 INSTANCE_TYPE = 'ml.c4.4xlarge'
-ACCELERATOR_TYPE = 'ml.eia.medium'
+ACCELERATOR_TYPE = 'ml.eia1.medium'
 ROLE = 'Dummy'
 REGION = 'us-west-2'
 PREDICT_INPUT = {'instances': [1.0, 2.0, 5.0]}
@@ -79,8 +79,11 @@ def test_tfs_model(sagemaker_session, tf_version):
 def test_tfs_model_image_accelerator(sagemaker_session, tf_version):
     model = Model("s3://some/data.tar.gz", role=ROLE, framework_version=tf_version,
                   sagemaker_session=sagemaker_session)
-    with pytest.raises(ValueError):
-        model.prepare_container_def(INSTANCE_TYPE, accelerator_type=ACCELERATOR_TYPE)
+    cdef = model.prepare_container_def(INSTANCE_TYPE, accelerator_type=ACCELERATOR_TYPE)
+    assert cdef['Image'].endswith('sagemaker-tensorflow-serving-eia:{}-cpu'.format(tf_version))
+
+    predictor = model.deploy(INSTANCE_COUNT, INSTANCE_TYPE)
+    assert isinstance(predictor, Predictor)
 
 
 def test_tfs_model_with_log_level(sagemaker_session, tf_version):