aws · chuyang-deng · Mar 4, 2019 · Feb 25, 2019 · Feb 25, 2019 · Feb 28, 2019
diff --git a/README.md b/README.md
@@ -50,13 +50,14 @@ The Docker images are built from the Dockerfiles in
 [docker/](https://github.com/aws/sagemaker-tensorflow-serving-container/tree/master/docker>).
 
 The Dockerfiles are grouped based on the version of TensorFlow Serving they support. Each supported
-processor type (e.g. "cpu", "gpu") has a different Dockerfile in each group.  
+processor type (e.g. "cpu", "gpu", "ei") has a different Dockerfile in each group.  
 
 To build an image, run the `./scripts/build.sh` script:
 
 ```bash
 ./scripts/build.sh --version 1.11 --arch cpu
 ./scripts/build.sh --version 1.11 --arch gpu
+./scripts/build.sh --version 1.11 --arch ei
 ```
 
 
@@ -67,6 +68,7 @@ in SageMaker, you need to publish it to an ECR repository in your account. The
 ```bash
 ./scripts/publish.sh --version 1.11 --arch cpu
 ./scripts/publish.sh --version 1.11 --arch gpu
+./scripts/publish.sh --version 1.11 --arch ei
 ```
 
 Note: this will publish to ECR in your default region. Use the `--region` argument to 
@@ -80,8 +82,8 @@ GPU images) will work for this, or you can use the provided `start.sh`
 and `stop.sh` scripts:
 
 ```bash
-./scripts/start.sh [--version x.xx] [--arch cpu|gpu|...]
-./scripts/stop.sh [--version x.xx] [--arch cpu|gpu|...]
+./scripts/start.sh [--version x.xx] [--arch cpu|gpu|ei|...]
+./scripts/stop.sh [--version x.xx] [--arch cpu|gpu|ei|...]
 ```
 
 When the container is running, you can send test requests to it using any HTTP client. Here's
@@ -106,6 +108,15 @@ checkers using `tox`:
 tox
 ```
 
+To test Elastic Inference with Accelerator, you will need an AWS account, publish your built image to ECR repository and run the following command:
+
+    pytest test/sagemaker/test_elastic_inference.py --aws-id <aws_account> \
+                                                      --docker-base-name <ECR_repository_name> \
+                                                      --instance-type <instance_type> \
+                                                      --accelerator-type <accelerator_type> \
+                                                      --tag <image_tag>   
+
+
 ## Contributing
 
 Please read [CONTRIBUTING.md](https://github.com/aws/sagemaker-tensorflow-serving-container/blob/master/CONTRIBUTING.md) 

diff --git a/docker/Dockerfile.ei b/docker/Dockerfile.ei
@@ -0,0 +1,25 @@
+FROM ubuntu:16.04
+LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
+
+ARG TFS_SHORT_VERSION
+
+COPY AmazonEI_TensorFlow_Serving_v${TFS_SHORT_VERSION}_v1 /usr/bin/tensorflow_model_server
+
+# downloaded 1.12 version is not executable
+RUN chmod +x /usr/bin/tensorflow_model_server
+
+# nginx + njs
+RUN \
+    apt-get update && \
+    apt-get -y install --no-install-recommends curl && \
+    curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - && \
+    echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list && \
+    apt-get update && \
+    apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip && \
+    apt-get clean
+
+COPY ./ /
+RUN rm AmazonEI_TensorFlow_Serving_v${TFS_SHORT_VERSION}_v1
+
+ENV SAGEMAKER_TFS_VERSION "${TFS_SHORT_VERSION}"
+ENV PATH "$PATH:/sagemaker"
diff --git a/scripts/build.sh b/scripts/build.sh
@@ -8,6 +8,10 @@ source scripts/shared.sh
 
 parse_std_args "$@"
 
+if [ $arch = 'ei' ]; then
+    get_tfs_executable
+fi
+
 echo "pulling previous image for layer cache... "
 $(aws ecr get-login --no-include-email --registry-id $aws_account) &>/dev/null || echo 'warning: ecr login failed'
 docker pull $aws_account.dkr.ecr.$aws_region.amazonaws.com/sagemaker-tensorflow-serving:$full_version-$arch &>/dev/null || echo 'warning: pull failed'

diff --git a/scripts/shared.sh b/scripts/shared.sh
@@ -4,7 +4,7 @@
 
 function error() {
     >&2 echo $1
-    >&2 echo "usage: $0 [--version <major-version>] [--arch (cpu*|gpu)] [--region <aws-region>]"
+    >&2 echo "usage: $0 [--version <major-version>] [--arch (cpu*|gpu|ei)] [--region <aws-region>]"
     exit 1
 }
 
@@ -28,6 +28,17 @@ function get_aws_account() {
     aws sts get-caller-identity --query 'Account' --output text
 }
 
+function get_tfs_executable() {
+    zip_file=$(aws s3 ls 's3://amazonei-tensorflow/Tensorflow Serving/v'${version}'/Ubuntu/' | awk '{print $4}')
+    aws s3 cp 's3://amazonei-tensorflow/Tensorflow Serving/v'${version}'/Ubuntu/'${zip_file} .
+
+    mkdir exec_dir
+    unzip ${zip_file} -d exec_dir
+
+    find . -name AmazonEI_TensorFlow_Serving_v${version}_v1* -exec mv {} container/ \;
+    rm ${zip_file} && rm -rf exec_dir
+}
+
 function parse_std_args() {
     # defaults
     arch='cpu'
@@ -63,7 +74,7 @@ function parse_std_args() {
     done
 
     [[ -z "${version// }" ]] && error 'missing version'
-    [[ "$arch" =~ ^(cpu|gpu)$ ]] || error "invalid arch: $arch"
+    [[ "$arch" =~ ^(cpu|gpu|ei)$ ]] || error "invalid arch: $arch"
     [[ -z "${aws_region// }" ]] && error 'missing aws region'
 
     full_version=$(get_full_version $version)

diff --git a/test/conftest.py b/test/conftest.py
@@ -10,4 +10,3 @@
 # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 # ANY KIND, either express or implied. See the License for the specific
 # language governing permissions and limitations under the License.
-
diff --git a/test/sagemaker/conftest.py b/test/sagemaker/conftest.py
@@ -0,0 +1,96 @@
+# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+
+import logging
+
+import boto3
+import pytest
+from sagemaker import Session
+from sagemaker.tensorflow import TensorFlow
+
+logger = logging.getLogger(__name__)
+logging.getLogger('boto').setLevel(logging.INFO)
+logging.getLogger('botocore').setLevel(logging.INFO)
+logging.getLogger('factory.py').setLevel(logging.INFO)
+logging.getLogger('auth.py').setLevel(logging.INFO)
+logging.getLogger('connectionpool.py').setLevel(logging.INFO)
+
+
+def pytest_addoption(parser):
+    parser.addoption('--aws-id')
+    parser.addoption('--docker-base-name', default='functional-tensorflow-serving')
+    parser.addoption('--instance-type')
+    parser.addoption('--accelerator-type', default=None)
+    parser.addoption('--region', default='us-west-2')
+    parser.addoption('--framework-version', default=TensorFlow.LATEST_VERSION)
+    parser.addoption('--processor', default='cpu', choices=['gpu', 'cpu'])
+    parser.addoption('--tag')
+
+
+@pytest.fixture(scope='session')
+def aws_id(request):
+    return request.config.getoption('--aws-id')
+
+
+@pytest.fixture(scope='session')
+def docker_base_name(request):
+    return request.config.getoption('--docker-base-name')
+
+
+@pytest.fixture(scope='session')
+def instance_type(request):
+    return request.config.getoption('--instance-type')
+
+
+@pytest.fixture(scope='session')
+def accelerator_type(request):
+    return request.config.getoption('--accelerator-type')
+
+
+@pytest.fixture(scope='session')
+def region(request):
+    return request.config.getoption('--region')
+
+
+@pytest.fixture(scope='session')
+def framework_version(request):
+    return request.config.getoption('--framework-version')
+
+
+@pytest.fixture(scope='session')
+def processor(request):
+    return request.config.getoption('--processor')
+
+
+@pytest.fixture(scope='session')
+def tag(request, framework_version, processor):
+    provided_tag = request.config.getoption('--tag')
+    default_tag = '{}-{}-py2'.format(framework_version, processor)
+    return provided_tag if provided_tag is not None else default_tag
+
+
+@pytest.fixture(scope='session')
+def docker_registry(aws_id, region):
+    return '{}.dkr.ecr.{}.amazonaws.com'.format(aws_id, region)
+
+
+@pytest.fixture(scope='module')
+def docker_image(docker_base_name, tag):
+    return '{}:{}'.format(docker_base_name, tag)
+
+
+@pytest.fixture(scope='module')
+def docker_image_uri(docker_registry, docker_image):
+    uri = '{}/{}'.format(docker_registry, docker_image)
+    return uri
+
diff --git a/test/sagemaker/test_elastic_inference.py b/test/sagemaker/test_elastic_inference.py
@@ -0,0 +1,114 @@
+# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import io
+import json
+import logging
+import time
+
+import boto3
+import numpy as np
+
+import pytest
+
+EI_SUPPORTED_REGIONS = ['us-east-1', 'us-east-2', 'us-west-2', 'eu-west-1', 'ap-northeast-1', 'ap-northeast-2']
+
+logger = logging.getLogger(__name__)
+logging.getLogger('boto3').setLevel(logging.INFO)
+logging.getLogger('botocore').setLevel(logging.INFO)
+logging.getLogger('factory.py').setLevel(logging.INFO)
+logging.getLogger('auth.py').setLevel(logging.INFO)
+logging.getLogger('connectionpool.py').setLevel(logging.INFO)
+logging.getLogger('session.py').setLevel(logging.DEBUG)
+logging.getLogger('functional').setLevel(logging.DEBUG)
+
+
+@pytest.fixture(autouse=True)
+def skip_if_no_accelerator(accelerator_type):
+    if accelerator_type is None:
+        pytest.skip('Skipping because accelerator type was not provided')
+
+
+@pytest.fixture(autouse=True)
+def skip_if_non_supported_ei_region(region):
+    if region not in EI_SUPPORTED_REGIONS:
+        pytest.skip('EI is not supported in {}'.format(region))
+
+
+@pytest.fixture
+def pretrained_model_data(region):
+    return 's3://sagemaker-sample-data-{}/tensorflow/model/resnet/resnet_50_v2_fp32_NCHW.tar.gz'.format(region)
+
+
+def _timestamp():
+    return time.strftime("%Y-%m-%d-%H-%M-%S")
+
+
+def _execution_role(session):
+    return session.resource('iam').Role('SageMakerRole').arn
+
+
+def _production_variants(model_name, instance_type, accelerator_type):
+    production_variants = [{
+        'VariantName': 'AllTraffic',
+        'ModelName': model_name,
+        'InitialInstanceCount': 1,
+        'InstanceType': instance_type,
+        'AcceleratorType': accelerator_type
+    }]
+    return production_variants
+
+
+@pytest.mark.skip_if_non_supported_ei_region
+@pytest.mark.skip_if_no_accelerator
+def test_deploy_elastic_inference_with_pretrained_model(pretrained_model_data,
+                                                        docker_image_uri,
+                                                        instance_type,
+                                                        accelerator_type):
+    endpoint_name = 'test-tfs-ei-deploy-model-{}'.format(_timestamp())
+    endpoint_config_name = 'test-tfs-endpoint-config-{}'.format(_timestamp())
+    model_name = 'test-tfs-ei-model-{}'.format(_timestamp())
+
+    session = boto3.Session()
+    client = session.client('sagemaker')
+    runtime_client = session.client('runtime.sagemaker')
+    client.create_model(ModelName=model_name,
+                        ExecutionRoleArn=_execution_role(session),
+                        PrimaryContainer={
+                            'Image': docker_image_uri,
+                            'ModelDataUrl': pretrained_model_data
+                        })
+
+    logger.info('deploying model to endpoint: {}'.format(endpoint_name))
+
+    client.create_endpoint_config(EndpointConfigName=endpoint_config_name,
+                                  ProductionVariants=_production_variants(model_name, instance_type, accelerator_type))
+
+    client.create_endpoint(EndpointName=endpoint_name,
+                           EndpointConfigName=endpoint_config_name)
+
+    try:
+        client.get_waiter('endpoint_in_service').wait(EndpointName=endpoint_name)
+    finally:
+        status = client.describe_endpoint(EndpointName=endpoint_name)['EndpointStatus']
+        if status != 'InService':
+            raise Exception('Failed to create endpoint.')
+
+    input_data = {'instances': np.random.rand(1, 1, 3, 3).tolist()}
+
+    response = runtime_client.invoke_endpoint(EndpointName=endpoint_name,
+                                              ContentType='application/json',
+                                              Body=json.dumps(input_data))
+    result = json.loads(response['Body'].read().decode())
+    assert result['predictions'] is not None
+
+    client.delete_endpoint(EndpointName=endpoint_name)
diff --git a/tox.ini b/tox.ini
@@ -43,7 +43,7 @@ require-code = True
 # Can be used to specify which tests to run, e.g.: tox -- -s
 basepython = python3
 commands =
-    python -m pytest {posargs}
+    python -m pytest {posargs} --ignore=test/sagemaker
 deps =
     pytest
     requests
Original file line number	Diff line number	Diff line change
Expand Up		@@ -10,4 +10,3 @@
		# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
		# ANY KIND, either express or implied. See the License for the specific
		# language governing permissions and limitations under the License.