Skip to content

feature: Add Neuronx Image uri - Transformers 4.28 - PyTorch 1.13 #3844

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion src/sagemaker/huggingface/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,11 @@ def register(
)

def prepare_container_def(
self, instance_type=None, accelerator_type=None, serverless_inference_config=None
self,
instance_type=None,
accelerator_type=None,
serverless_inference_config=None,
inference_tool=None,
):
"""A container definition with framework configuration set in model environment variables.

Expand All @@ -461,6 +465,8 @@ def prepare_container_def(
serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
Specifies configuration related to serverless endpoint. Instance type is
not provided in serverless inference. So this is used to find image URIs.
inference_tool (str): the tool that will be used to aid in the inference.
Valid values: "neuron, neuronx, None" (default: None).

Returns:
dict[str, str]: A container definition object usable with the
Expand All @@ -479,6 +485,7 @@ def prepare_container_def(
instance_type,
accelerator_type=accelerator_type,
serverless_inference_config=serverless_inference_config,
inference_tool=inference_tool,
)

deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
Expand All @@ -500,6 +507,7 @@ def serving_image_uri(
instance_type=None,
accelerator_type=None,
serverless_inference_config=None,
inference_tool=None,
):
"""Create a URI for the serving image.

Expand All @@ -513,6 +521,8 @@ def serving_image_uri(
serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
Specifies configuration related to serverless endpoint. Instance type is
not provided in serverless inference. So this is used used to determine device type.
inference_tool (str): the tool that will be used to aid in the inference.
Valid values: "neuron, neuronx, None" (default: None).

Returns:
str: The appropriate image URI based on the given parameters.
Expand All @@ -534,4 +544,5 @@ def serving_image_uri(
image_scope="inference",
base_framework_version=base_framework_version,
serverless_inference_config=serverless_inference_config,
inference_tool=inference_tool,
)
98 changes: 98 additions & 0 deletions src/sagemaker/image_uri_config/huggingface-neuronx.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
{
"training": {
"processors": ["trn"],
"version_aliases": {"4.28": "4.28.1"},
"versions": {
"4.28.1": {
"version_aliases": {"pytorch1.13": "pytorch1.13.0"},
"pytorch1.13.0": {
"py_versions": ["py38"],
"repository": "huggingface-pytorch-training-neuronx",
"registries": {
"af-south-1": "626614931356",
"ap-east-1": "871362719292",
"ap-northeast-1": "763104351884",
"ap-northeast-2": "763104351884",
"ap-northeast-3": "364406365360",
"ap-south-1": "763104351884",
"ap-south-2": "772153158452",
"ap-southeast-1": "763104351884",
"ap-southeast-2": "763104351884",
"ap-southeast-4": "457447274322",
"ca-central-1": "763104351884",
"cn-north-1": "727897471807",
"cn-northwest-1": "727897471807",
"eu-central-1": "763104351884",
"eu-central-2": "380420809688",
"eu-north-1": "763104351884",
"eu-west-1": "763104351884",
"eu-west-2": "763104351884",
"eu-west-3": "763104351884",
"eu-south-1": "692866216735",
"eu-south-2": "503227376785",
"me-south-1": "217643126080",
"sa-east-1": "763104351884",
"us-east-1": "763104351884",
"us-east-2": "763104351884",
"us-gov-east-1": "446045086412",
"us-gov-west-1": "442386744353",
"us-iso-east-1": "886529160074",
"us-isob-east-1": "094389454867",
"us-west-1": "763104351884",
"us-west-2": "763104351884"
},
"container_version": {"trn": "ubuntu20.04"},
"sdk_versions": ["sdk2.9.1"]
}
}
}
},
"inference": {
"processors": ["inf"],
"version_aliases": {"4.28": "4.28.1"},
"versions": {
"4.28.1": {
"version_aliases": {"pytorch1.13": "pytorch1.13.0"},
"pytorch1.13.0": {
"py_versions": ["py38"],
"repository": "huggingface-pytorch-inference-neuronx",
"registries": {
"af-south-1": "626614931356",
"ap-east-1": "871362719292",
"ap-northeast-1": "763104351884",
"ap-northeast-2": "763104351884",
"ap-northeast-3": "364406365360",
"ap-south-1": "763104351884",
"ap-south-2": "772153158452",
"ap-southeast-1": "763104351884",
"ap-southeast-2": "763104351884",
"ap-southeast-4": "457447274322",
"ca-central-1": "763104351884",
"cn-north-1": "727897471807",
"cn-northwest-1": "727897471807",
"eu-central-1": "763104351884",
"eu-central-2": "380420809688",
"eu-north-1": "763104351884",
"eu-west-1": "763104351884",
"eu-west-2": "763104351884",
"eu-west-3": "763104351884",
"eu-south-1": "692866216735",
"eu-south-2": "503227376785",
"me-south-1": "217643126080",
"sa-east-1": "763104351884",
"us-east-1": "763104351884",
"us-east-2": "763104351884",
"us-gov-east-1": "446045086412",
"us-gov-west-1": "442386744353",
"us-iso-east-1": "886529160074",
"us-isob-east-1": "094389454867",
"us-west-1": "763104351884",
"us-west-2": "763104351884"
},
"container_version": {"inf": "ubuntu20.04"},
"sdk_versions": ["sdk2.9.1"]
}
}
}
}
}
4 changes: 2 additions & 2 deletions src/sagemaker/image_uris.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def retrieve(
sdk_version (str): the version of python-sdk that will be used in the image retrieval.
(default: None).
inference_tool (str): the tool that will be used to aid in the inference.
Valid values: "neuron, None"
Valid values: "neuron, neuronx, None"
(default: None).
serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
Specifies configuration related to serverless endpoint. Instance type is
Expand Down Expand Up @@ -158,7 +158,7 @@ def retrieve(
_framework = framework
if framework == HUGGING_FACE_FRAMEWORK or framework in TRAINIUM_ALLOWED_FRAMEWORKS:
inference_tool = _get_inference_tool(inference_tool, instance_type)
if inference_tool == "neuron":
if inference_tool in ["neuron", "neuronx"]:
_framework = f"{framework}-{inference_tool}"
final_image_scope = _get_final_image_scope(framework, instance_type, image_scope)
_validate_for_suppported_frameworks_and_instance_type(framework, instance_type)
Expand Down
30 changes: 30 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,16 +407,46 @@ def huggingface_neuron_latest_inference_pytorch_version():
return "1.9"


@pytest.fixture(scope="module")
def huggingface_neuronx_latest_inference_pytorch_version():
return "1.13"


@pytest.fixture(scope="module")
def huggingface_neuronx_latest_training_pytorch_version():
return "1.13"


@pytest.fixture(scope="module")
def huggingface_neuron_latest_inference_transformer_version():
return "4.12"


@pytest.fixture(scope="module")
def huggingface_neuronx_latest_inference_transformer_version():
return "4.28"


@pytest.fixture(scope="module")
def huggingface_neuronx_latest_training_transformer_version():
return "4.28"


@pytest.fixture(scope="module")
def huggingface_neuron_latest_inference_py_version():
return "py37"


@pytest.fixture(scope="module")
def huggingface_neuronx_latest_inference_py_version():
return "py38"


@pytest.fixture(scope="module")
def huggingface_neuronx_latest_training_py_version():
return "py38"


@pytest.fixture(scope="module")
def pytorch_neuron_version():
return "1.11"
Expand Down
19 changes: 19 additions & 0 deletions tests/unit/sagemaker/huggingface/huggingface_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

REGION = "us-east-1"
GPU_INSTANCE_TYPE = "ml.p2.xlarge"
NEURONX_INSTANCE_TYPE = "ml.trn1.2xlarge"


def get_full_gpu_image_uri(
Expand All @@ -33,3 +34,21 @@ def get_full_gpu_image_uri(
base_framework_version=base_framework_version,
container_version="cu110-ubuntu18.04",
)


def get_full_neuronx_image_uri(
version,
base_framework_version,
region=REGION,
instance_type=NEURONX_INSTANCE_TYPE,
):
return image_uris.retrieve(
"huggingface",
region,
version=version,
instance_type=instance_type,
image_scope="training",
base_framework_version=base_framework_version,
container_version="cu110-ubuntu18.04",
inference_tool="neuronx",
)
22 changes: 21 additions & 1 deletion tests/unit/sagemaker/huggingface/test_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,27 @@ def test_huggingface_neuron(
pytorch_version=huggingface_neuron_latest_inference_pytorch_version,
py_version=huggingface_neuron_latest_inference_py_version,
)
container = huggingface_model.prepare_container_def("ml.inf.xlarge")
container = huggingface_model.prepare_container_def("ml.inf1.xlarge", inference_tool="neuron")
assert container["Image"]


def test_huggingface_neuronx(
sagemaker_session,
huggingface_neuronx_latest_inference_pytorch_version,
huggingface_neuronx_latest_inference_transformer_version,
huggingface_neuronx_latest_inference_py_version,
):

inputs = "s3://mybucket/train"
huggingface_model = HuggingFaceModel(
model_data=inputs,
transformers_version=huggingface_neuronx_latest_inference_transformer_version,
role=ROLE,
sagemaker_session=sagemaker_session,
pytorch_version=huggingface_neuronx_latest_inference_pytorch_version,
py_version=huggingface_neuronx_latest_inference_py_version,
)
container = huggingface_model.prepare_container_def("ml.inf2.xlarge", inference_tool="neuronx")
assert container["Image"]


Expand Down