Skip to content

fix: account for EI and version-based ECR repo naming in serving_image_uri() #1273

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 25 additions & 22 deletions src/sagemaker/mxnet/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@

import packaging.version

from sagemaker import fw_utils

import sagemaker
from sagemaker.fw_utils import (
create_image_uri,
Expand Down Expand Up @@ -143,29 +141,15 @@ def prepare_container_def(self, instance_type, accelerator_type=None):
dict[str, str]: A container definition object usable with the
CreateModel API.
"""
is_mms_version = packaging.version.Version(
self.framework_version
) >= packaging.version.Version(self._LOWEST_MMS_VERSION)

deploy_image = self.image
if not deploy_image:
region_name = self.sagemaker_session.boto_session.region_name

framework_name = self.__framework_name__
if is_mms_version:
framework_name += "-serving"

deploy_image = create_image_uri(
region_name,
framework_name,
instance_type,
self.framework_version,
self.py_version,
accelerator_type=accelerator_type,
deploy_image = self.serving_image_uri(
region_name, instance_type, accelerator_type=accelerator_type
)

deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
self._upload_code(deploy_key_prefix, is_mms_version)
self._upload_code(deploy_key_prefix, self._is_mms_version())
deploy_env = dict(self.env)
deploy_env.update(self._framework_env_vars())

Expand All @@ -175,22 +159,41 @@ def prepare_container_def(self, instance_type, accelerator_type=None):
deploy_image, self.repacked_model_data or self.model_data, deploy_env
)

def serving_image_uri(self, region_name, instance_type):
def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
"""Create a URI for the serving image.

Args:
region_name (str): AWS region where the image is uploaded.
instance_type (str): SageMaker instance type. Used to determine device type
(cpu/gpu/family-specific optimized).
accelerator_type (str): The Elastic Inference accelerator type to
deploy to the instance for loading and making inferences to the
model (default: None). For example, 'ml.eia1.medium'.

Returns:
str: The appropriate image URI based on the given parameters.

"""
return fw_utils.create_image_uri(
framework_name = self.__framework_name__
if self._is_mms_version():
framework_name = "{}-serving".format(framework_name)

return create_image_uri(
region_name,
"-".join([self.__framework_name__, "serving"]),
framework_name,
instance_type,
self.framework_version,
self.py_version,
accelerator_type=accelerator_type,
)

def _is_mms_version(self):
"""Whether the framework version corresponds to an inference image using
the Multi-Model Server (https://github.com/awslabs/multi-model-server).

Returns:
bool: If the framework version corresponds to an image using MMS.
"""
lowest_mms_version = packaging.version.Version(self._LOWEST_MMS_VERSION)
framework_version = packaging.version.Version(self.framework_version)
return framework_version >= lowest_mms_version
49 changes: 27 additions & 22 deletions src/sagemaker/pytorch/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

import logging
import packaging.version
from sagemaker import fw_utils

import sagemaker
from sagemaker.fw_utils import (
Expand Down Expand Up @@ -137,34 +136,21 @@ def prepare_container_def(self, instance_type, accelerator_type=None):
For example, 'ml.p2.xlarge'.
accelerator_type (str): The Elastic Inference accelerator type to
deploy to the instance for loading and making inferences to the
model. For example, 'ml.eia1.medium'.
model. Currently unsupported with PyTorch.

Returns:
dict[str, str]: A container definition object usable with the
CreateModel API.
"""
lowest_mms_version = packaging.version.Version(self._LOWEST_MMS_VERSION)
framework_version = packaging.version.Version(self.framework_version)
is_mms_version = framework_version >= lowest_mms_version

deploy_image = self.image
if not deploy_image:
region_name = self.sagemaker_session.boto_session.region_name

framework_name = self.__framework_name__
if is_mms_version:
framework_name += "-serving"

deploy_image = create_image_uri(
region_name,
framework_name,
instance_type,
self.framework_version,
self.py_version,
accelerator_type=accelerator_type,
deploy_image = self.serving_image_uri(
region_name, instance_type, accelerator_type=accelerator_type
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Minor] Keyword/Named arguments make kittens happy.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

talked offline - I don't see a significant benefit of doing region_name=region_name over region_name

)

deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
self._upload_code(deploy_key_prefix, repack=is_mms_version)
self._upload_code(deploy_key_prefix, repack=self._is_mms_version())
deploy_env = dict(self.env)
deploy_env.update(self._framework_env_vars())

Expand All @@ -174,22 +160,41 @@ def prepare_container_def(self, instance_type, accelerator_type=None):
deploy_image, self.repacked_model_data or self.model_data, deploy_env
)

def serving_image_uri(self, region_name, instance_type):
def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
"""Create a URI for the serving image.

Args:
region_name (str): AWS region where the image is uploaded.
instance_type (str): SageMaker instance type. Used to determine device type
(cpu/gpu/family-specific optimized).
accelerator_type (str): The Elastic Inference accelerator type to
deploy to the instance for loading and making inferences to the
model. Currently unsupported with PyTorch.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment as above. What happens if this is passed in? Is the img not found?


Returns:
str: The appropriate image URI based on the given parameters.

"""
return fw_utils.create_image_uri(
framework_name = self.__framework_name__
if self._is_mms_version():
framework_name = "{}-serving".format(framework_name)

return create_image_uri(
region_name,
"-".join([self.__framework_name__, "serving"]),
framework_name,
instance_type,
self.framework_version,
self.py_version,
accelerator_type=accelerator_type,
)

def _is_mms_version(self):
"""Whether the framework version corresponds to an inference image using
the Multi-Model Server (https://github.com/awslabs/multi-model-server).

Returns:
bool: If the framework version corresponds to an image using MMS.
"""
lowest_mms_version = packaging.version.Version(self._LOWEST_MMS_VERSION)
framework_version = packaging.version.Version(self.framework_version)
return framework_version >= lowest_mms_version
21 changes: 9 additions & 12 deletions src/sagemaker/tensorflow/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@

import logging

from sagemaker import fw_utils

import sagemaker
from sagemaker.fw_utils import (
create_image_uri,
Expand Down Expand Up @@ -146,13 +144,8 @@ def prepare_container_def(self, instance_type, accelerator_type=None):
deploy_image = self.image
if not deploy_image:
region_name = self.sagemaker_session.boto_region_name
deploy_image = create_image_uri(
region_name,
self.__framework_name__,
instance_type,
self.framework_version,
self.py_version,
accelerator_type=accelerator_type,
deploy_image = self.serving_image_uri(
region_name, instance_type, accelerator_type=accelerator_type
)

deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
Expand All @@ -165,22 +158,26 @@ def prepare_container_def(self, instance_type, accelerator_type=None):

return sagemaker.container_def(deploy_image, self.model_data, deploy_env)

def serving_image_uri(self, region_name, instance_type):
def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
"""Create a URI for the serving image.

Args:
region_name (str): AWS region where the image is uploaded.
instance_type (str): SageMaker instance type. Used to determine device type
(cpu/gpu/family-specific optimized).
accelerator_type (str): The Elastic Inference accelerator type to
deploy to the instance for loading and making inferences to the
model (default: None). For example, 'ml.eia1.medium'.

Returns:
str: The appropriate image URI based on the given parameters.

"""
return fw_utils.create_image_uri(
return create_image_uri(
region_name,
"-".join([self.__framework_name__, "serving"]),
self.__framework_name__,
instance_type,
self.framework_version,
self.py_version,
accelerator_type=accelerator_type,
)
9 changes: 7 additions & 2 deletions src/sagemaker/tensorflow/serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,16 +276,21 @@ def _get_image_uri(self, instance_type, accelerator_type=None):
accelerator_type=accelerator_type,
)

def serving_image_uri(self, region_name, instance_type): # pylint: disable=unused-argument
def serving_image_uri(
self, region_name, instance_type, accelerator_type=None
): # pylint: disable=unused-argument
"""Create a URI for the serving image.

Args:
region_name (str): AWS region where the image is uploaded.
instance_type (str): SageMaker instance type. Used to determine device type
(cpu/gpu/family-specific optimized).
accelerator_type (str): The Elastic Inference accelerator type to
deploy to the instance for loading and making inferences to the
model (default: None). For example, 'ml.eia1.medium'.

Returns:
str: The appropriate image URI based on the given parameters.

"""
return self._get_image_uri(instance_type=instance_type)
return self._get_image_uri(instance_type=instance_type, accelerator_type=accelerator_type)
26 changes: 13 additions & 13 deletions tests/unit/test_airflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1030,7 +1030,7 @@ def test_model_config_from_framework_estimator(sagemaker_session):
entry_point="{{ entry_point }}",
source_dir="{{ source_dir }}",
py_version="py3",
framework_version="1.3.0",
framework_version="1.6.0",
role="{{ role }}",
train_instance_count=1,
train_instance_type="ml.m4.xlarge",
Expand All @@ -1051,9 +1051,9 @@ def test_model_config_from_framework_estimator(sagemaker_session):
task_type="training",
)
expected_config = {
"ModelName": "sagemaker-mxnet-serving-%s" % TIME_STAMP,
"ModelName": "mxnet-inference-%s" % TIME_STAMP,
"PrimaryContainer": {
"Image": "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-serving:1.3.0-cpu-py3",
"Image": "763104351884.dkr.ecr.us-west-2.amazonaws.com/mxnet-inference:1.6.0-cpu-py3",
"Environment": {
"SAGEMAKER_PROGRAM": "{{ entry_point }}",
"SAGEMAKER_SUBMIT_DIRECTORY": "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Training']"
Expand Down Expand Up @@ -1184,7 +1184,7 @@ def test_transform_config_from_framework_estimator(sagemaker_session):
entry_point="{{ entry_point }}",
source_dir="{{ source_dir }}",
py_version="py3",
framework_version="1.3.0",
framework_version="1.6.0",
role="{{ role }}",
train_instance_count=1,
train_instance_type="ml.m4.xlarge",
Expand All @@ -1209,9 +1209,9 @@ def test_transform_config_from_framework_estimator(sagemaker_session):
)
expected_config = {
"Model": {
"ModelName": "sagemaker-mxnet-serving-%s" % TIME_STAMP,
"ModelName": "mxnet-inference-%s" % TIME_STAMP,
"PrimaryContainer": {
"Image": "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-serving:1.3.0-gpu-py3",
"Image": "763104351884.dkr.ecr.us-west-2.amazonaws.com/mxnet-inference:1.6.0-gpu-py3",
"Environment": {
"SAGEMAKER_PROGRAM": "{{ entry_point }}",
"SAGEMAKER_SUBMIT_DIRECTORY": "s3://output/{{ ti.xcom_pull(task_ids='task_id')"
Expand All @@ -1228,7 +1228,7 @@ def test_transform_config_from_framework_estimator(sagemaker_session):
},
"Transform": {
"TransformJobName": "{{ base_job_name }}-%s" % TIME_STAMP,
"ModelName": "sagemaker-mxnet-serving-%s" % TIME_STAMP,
"ModelName": "mxnet-inference-%s" % TIME_STAMP,
"TransformInput": {
"DataSource": {
"S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": "{{ transform_data }}"}
Expand Down Expand Up @@ -1425,7 +1425,7 @@ def test_deploy_config_from_framework_estimator(sagemaker_session):
entry_point="{{ entry_point }}",
source_dir="{{ source_dir }}",
py_version="py3",
framework_version="1.3.0",
framework_version="1.6.0",
role="{{ role }}",
train_instance_count=1,
train_instance_type="ml.m4.xlarge",
Expand All @@ -1449,9 +1449,9 @@ def test_deploy_config_from_framework_estimator(sagemaker_session):
)
expected_config = {
"Model": {
"ModelName": "sagemaker-mxnet-serving-%s" % TIME_STAMP,
"ModelName": "mxnet-inference-%s" % TIME_STAMP,
"PrimaryContainer": {
"Image": "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-serving:1.3.0-cpu-py3",
"Image": "763104351884.dkr.ecr.us-west-2.amazonaws.com/mxnet-inference:1.6.0-cpu-py3",
"Environment": {
"SAGEMAKER_PROGRAM": "{{ entry_point }}",
"SAGEMAKER_SUBMIT_DIRECTORY": "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Training']"
Expand All @@ -1466,20 +1466,20 @@ def test_deploy_config_from_framework_estimator(sagemaker_session):
"ExecutionRoleArn": "{{ role }}",
},
"EndpointConfig": {
"EndpointConfigName": "sagemaker-mxnet-serving-%s" % TIME_STAMP,
"EndpointConfigName": "mxnet-inference-%s" % TIME_STAMP,
"ProductionVariants": [
{
"InstanceType": "ml.c4.large",
"InitialInstanceCount": "{{ instance_count}}",
"ModelName": "sagemaker-mxnet-serving-%s" % TIME_STAMP,
"ModelName": "mxnet-inference-%s" % TIME_STAMP,
"VariantName": "AllTraffic",
"InitialVariantWeight": 1,
}
],
},
"Endpoint": {
"EndpointName": "mxnet-endpoint",
"EndpointConfigName": "sagemaker-mxnet-serving-%s" % TIME_STAMP,
"EndpointConfigName": "mxnet-inference-%s" % TIME_STAMP,
},
}

Expand Down