Skip to content

change: add EI support for TFS framework #682

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/sagemaker/fw_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
'Please add framework_version={} to your constructor to avoid this error.'

VALID_PY_VERSIONS = ['py2', 'py3']
VALID_EIA_FRAMEWORKS = ['tensorflow', 'mxnet']
VALID_EIA_FRAMEWORKS = ['tensorflow', 'tensorflow-serving', 'mxnet']
VALID_ACCOUNTS_BY_REGION = {'us-gov-west-1': '246785580436',
'us-iso-east-1': '744548109606'}

Expand Down
12 changes: 10 additions & 2 deletions src/sagemaker/tensorflow/deploying_tensorflow_serving.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ estimator object to create a SageMaker Endpoint:

The code block above deploys a SageMaker Endpoint with one instance of the type 'ml.c5.xlarge'.

As of now, only the Python-based TensorFlow serving endpoints support Elastic Inference. For more information, see `Deploying to Python-based Endpoints <https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/tensorflow/deploying_python.rst#deploying-to-python-based-endpoints>`_.

What happens when deploy is called
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down Expand Up @@ -66,6 +64,16 @@ If you already have existing model artifacts in S3, you can skip training and de

predictor = model.deploy(initial_instance_count=1, instance_type='ml.c5.xlarge')

Python-based TensorFlow serving on SageMaker has support for `Elastic Inference <https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html>`__, which allows for inference acceleration to a hosted endpoint for a fraction of the cost of using a full GPU instance. In order to attach an Elastic Inference accelerator to your endpoint provide the accelerator type to accelerator_type to your deploy call.

.. code:: python

from sagemaker.tensorflow.serving import Model

model = Model(model_data='s3://mybucket/model.tar.gz', role='MySageMakerRole')

predictor = model.deploy(initial_instance_count=1, instance_type='ml.c5.xlarge', accelerator_type='ml.eia1.medium')

Making predictions against a SageMaker Endpoint
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
26 changes: 26 additions & 0 deletions tests/integ/test_tfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,21 @@ def tfs_predictor(instance_type, sagemaker_session, tf_full_version):
yield predictor


@pytest.fixture(scope='module')
def tfs_predictor_with_accelerator(sagemaker_session, tf_full_version):
endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-tensorflow-serving")
instance_type = 'ml.c4.large'
accelerator_type = 'ml.eia1.medium'
model_data = sagemaker_session.upload_data(path='tests/data/tensorflow-serving-test-model.tar.gz',
key_prefix='tensorflow-serving/models')
with tests.integ.timeout.timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
model = Model(model_data=model_data, role='SageMakerRole',
framework_version=tf_full_version,
sagemaker_session=sagemaker_session)
predictor = model.deploy(1, instance_type, endpoint_name=endpoint_name, accelerator_type=accelerator_type)
yield predictor


@pytest.mark.canary_quick
def test_predict(tfs_predictor, instance_type): # pylint: disable=W0613
input_data = {'instances': [1.0, 2.0, 5.0]}
Expand All @@ -55,6 +70,17 @@ def test_predict(tfs_predictor, instance_type): # pylint: disable=W0613
assert expected_result == result


@pytest.mark.skipif(tests.integ.test_region() not in tests.integ.EI_SUPPORTED_REGIONS,
reason='EI is not supported in region {}'.format(tests.integ.test_region()))
@pytest.mark.canary_quick
def test_predict_with_accelerator(tfs_predictor_with_accelerator):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be apart of the canary.

input_data = {'instances': [1.0, 2.0, 5.0]}
expected_result = {'predictions': [3.5, 4.0, 5.5]}

result = tfs_predictor_with_accelerator.predict(input_data)
assert expected_result == result


def test_predict_generic_json(tfs_predictor):
input_data = [[1.0, 2.0, 5.0], [1.0, 2.0, 5.0]]
expected_result = {'predictions': [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]}
Expand Down
6 changes: 6 additions & 0 deletions tests/unit/test_fw_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ def test_create_image_uri_gpu():
assert image_uri == '23.dkr.ecr.mars-south-3.amazonaws.com/sagemaker-mlfw:1.0rc-gpu-py3'


def test_create_image_uri_ei():
image_uri = fw_utils.create_image_uri(MOCK_REGION, 'tensorflow-serving', 'ml.c4.large', '1.1.0',
accelerator_type='ml.eia1.large', account='23')
assert image_uri == '23.dkr.ecr.mars-south-3.amazonaws.com/sagemaker-tensorflow-serving-eia:1.1.0-cpu'


def test_create_image_uri_default_account():
image_uri = fw_utils.create_image_uri(MOCK_REGION, MOCK_FRAMEWORK, 'ml.p3.2xlarge', '1.0rc', 'py3')
assert image_uri == '520713654638.dkr.ecr.mars-south-3.amazonaws.com/sagemaker-mlfw:1.0rc-gpu-py3'
Expand Down
9 changes: 6 additions & 3 deletions tests/unit/test_tfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
CSV_CONTENT_TYPE = 'text/csv'
INSTANCE_COUNT = 1
INSTANCE_TYPE = 'ml.c4.4xlarge'
ACCELERATOR_TYPE = 'ml.eia.medium'
ACCELERATOR_TYPE = 'ml.eia1.medium'
ROLE = 'Dummy'
REGION = 'us-west-2'
PREDICT_INPUT = {'instances': [1.0, 2.0, 5.0]}
Expand Down Expand Up @@ -79,8 +79,11 @@ def test_tfs_model(sagemaker_session, tf_version):
def test_tfs_model_image_accelerator(sagemaker_session, tf_version):
model = Model("s3://some/data.tar.gz", role=ROLE, framework_version=tf_version,
sagemaker_session=sagemaker_session)
with pytest.raises(ValueError):
model.prepare_container_def(INSTANCE_TYPE, accelerator_type=ACCELERATOR_TYPE)
cdef = model.prepare_container_def(INSTANCE_TYPE, accelerator_type=ACCELERATOR_TYPE)
assert cdef['Image'].endswith('sagemaker-tensorflow-serving-eia:{}-cpu'.format(tf_version))

predictor = model.deploy(INSTANCE_COUNT, INSTANCE_TYPE)
assert isinstance(predictor, Predictor)


def test_tfs_model_with_log_level(sagemaker_session, tf_version):
Expand Down