Skip to content

Commit 57f3bb9

Browse files
feature: Add Neuronx Image uri - Transformers 4.28 - PyTorch 1.13 (#3844)
Co-authored-by: Clayton Parnell <[email protected]>
1 parent b538525 commit 57f3bb9

File tree

6 files changed

+182
-4
lines changed

6 files changed

+182
-4
lines changed

src/sagemaker/huggingface/model.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,11 @@ def register(
448448
)
449449

450450
def prepare_container_def(
451-
self, instance_type=None, accelerator_type=None, serverless_inference_config=None
451+
self,
452+
instance_type=None,
453+
accelerator_type=None,
454+
serverless_inference_config=None,
455+
inference_tool=None,
452456
):
453457
"""A container definition with framework configuration set in model environment variables.
454458
@@ -461,6 +465,8 @@ def prepare_container_def(
461465
serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
462466
Specifies configuration related to serverless endpoint. Instance type is
463467
not provided in serverless inference. So this is used to find image URIs.
468+
inference_tool (str): the tool that will be used to aid in the inference.
469+
Valid values: "neuron, neuronx, None" (default: None).
464470
465471
Returns:
466472
dict[str, str]: A container definition object usable with the
@@ -479,6 +485,7 @@ def prepare_container_def(
479485
instance_type,
480486
accelerator_type=accelerator_type,
481487
serverless_inference_config=serverless_inference_config,
488+
inference_tool=inference_tool,
482489
)
483490

484491
deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
@@ -500,6 +507,7 @@ def serving_image_uri(
500507
instance_type=None,
501508
accelerator_type=None,
502509
serverless_inference_config=None,
510+
inference_tool=None,
503511
):
504512
"""Create a URI for the serving image.
505513
@@ -513,6 +521,8 @@ def serving_image_uri(
513521
serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
514522
Specifies configuration related to serverless endpoint. Instance type is
515523
not provided in serverless inference. So this is used used to determine device type.
524+
inference_tool (str): the tool that will be used to aid in the inference.
525+
Valid values: "neuron, neuronx, None" (default: None).
516526
517527
Returns:
518528
str: The appropriate image URI based on the given parameters.
@@ -534,4 +544,5 @@ def serving_image_uri(
534544
image_scope="inference",
535545
base_framework_version=base_framework_version,
536546
serverless_inference_config=serverless_inference_config,
547+
inference_tool=inference_tool,
537548
)
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
{
2+
"training": {
3+
"processors": ["trn"],
4+
"version_aliases": {"4.28": "4.28.1"},
5+
"versions": {
6+
"4.28.1": {
7+
"version_aliases": {"pytorch1.13": "pytorch1.13.0"},
8+
"pytorch1.13.0": {
9+
"py_versions": ["py38"],
10+
"repository": "huggingface-pytorch-training-neuronx",
11+
"registries": {
12+
"af-south-1": "626614931356",
13+
"ap-east-1": "871362719292",
14+
"ap-northeast-1": "763104351884",
15+
"ap-northeast-2": "763104351884",
16+
"ap-northeast-3": "364406365360",
17+
"ap-south-1": "763104351884",
18+
"ap-south-2": "772153158452",
19+
"ap-southeast-1": "763104351884",
20+
"ap-southeast-2": "763104351884",
21+
"ap-southeast-4": "457447274322",
22+
"ca-central-1": "763104351884",
23+
"cn-north-1": "727897471807",
24+
"cn-northwest-1": "727897471807",
25+
"eu-central-1": "763104351884",
26+
"eu-central-2": "380420809688",
27+
"eu-north-1": "763104351884",
28+
"eu-west-1": "763104351884",
29+
"eu-west-2": "763104351884",
30+
"eu-west-3": "763104351884",
31+
"eu-south-1": "692866216735",
32+
"eu-south-2": "503227376785",
33+
"me-south-1": "217643126080",
34+
"sa-east-1": "763104351884",
35+
"us-east-1": "763104351884",
36+
"us-east-2": "763104351884",
37+
"us-gov-east-1": "446045086412",
38+
"us-gov-west-1": "442386744353",
39+
"us-iso-east-1": "886529160074",
40+
"us-isob-east-1": "094389454867",
41+
"us-west-1": "763104351884",
42+
"us-west-2": "763104351884"
43+
},
44+
"container_version": {"trn": "ubuntu20.04"},
45+
"sdk_versions": ["sdk2.9.1"]
46+
}
47+
}
48+
}
49+
},
50+
"inference": {
51+
"processors": ["inf"],
52+
"version_aliases": {"4.28": "4.28.1"},
53+
"versions": {
54+
"4.28.1": {
55+
"version_aliases": {"pytorch1.13": "pytorch1.13.0"},
56+
"pytorch1.13.0": {
57+
"py_versions": ["py38"],
58+
"repository": "huggingface-pytorch-inference-neuronx",
59+
"registries": {
60+
"af-south-1": "626614931356",
61+
"ap-east-1": "871362719292",
62+
"ap-northeast-1": "763104351884",
63+
"ap-northeast-2": "763104351884",
64+
"ap-northeast-3": "364406365360",
65+
"ap-south-1": "763104351884",
66+
"ap-south-2": "772153158452",
67+
"ap-southeast-1": "763104351884",
68+
"ap-southeast-2": "763104351884",
69+
"ap-southeast-4": "457447274322",
70+
"ca-central-1": "763104351884",
71+
"cn-north-1": "727897471807",
72+
"cn-northwest-1": "727897471807",
73+
"eu-central-1": "763104351884",
74+
"eu-central-2": "380420809688",
75+
"eu-north-1": "763104351884",
76+
"eu-west-1": "763104351884",
77+
"eu-west-2": "763104351884",
78+
"eu-west-3": "763104351884",
79+
"eu-south-1": "692866216735",
80+
"eu-south-2": "503227376785",
81+
"me-south-1": "217643126080",
82+
"sa-east-1": "763104351884",
83+
"us-east-1": "763104351884",
84+
"us-east-2": "763104351884",
85+
"us-gov-east-1": "446045086412",
86+
"us-gov-west-1": "442386744353",
87+
"us-iso-east-1": "886529160074",
88+
"us-isob-east-1": "094389454867",
89+
"us-west-1": "763104351884",
90+
"us-west-2": "763104351884"
91+
},
92+
"container_version": {"inf": "ubuntu20.04"},
93+
"sdk_versions": ["sdk2.9.1"]
94+
}
95+
}
96+
}
97+
}
98+
}

src/sagemaker/image_uris.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def retrieve(
104104
sdk_version (str): the version of python-sdk that will be used in the image retrieval.
105105
(default: None).
106106
inference_tool (str): the tool that will be used to aid in the inference.
107-
Valid values: "neuron, None"
107+
Valid values: "neuron, neuronx, None"
108108
(default: None).
109109
serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
110110
Specifies configuration related to serverless endpoint. Instance type is
@@ -158,7 +158,7 @@ def retrieve(
158158
_framework = framework
159159
if framework == HUGGING_FACE_FRAMEWORK or framework in TRAINIUM_ALLOWED_FRAMEWORKS:
160160
inference_tool = _get_inference_tool(inference_tool, instance_type)
161-
if inference_tool == "neuron":
161+
if inference_tool in ["neuron", "neuronx"]:
162162
_framework = f"{framework}-{inference_tool}"
163163
final_image_scope = _get_final_image_scope(framework, instance_type, image_scope)
164164
_validate_for_suppported_frameworks_and_instance_type(framework, instance_type)

tests/conftest.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,16 +407,46 @@ def huggingface_neuron_latest_inference_pytorch_version():
407407
return "1.9"
408408

409409

410+
@pytest.fixture(scope="module")
411+
def huggingface_neuronx_latest_inference_pytorch_version():
412+
return "1.13"
413+
414+
415+
@pytest.fixture(scope="module")
416+
def huggingface_neuronx_latest_training_pytorch_version():
417+
return "1.13"
418+
419+
410420
@pytest.fixture(scope="module")
411421
def huggingface_neuron_latest_inference_transformer_version():
412422
return "4.12"
413423

414424

425+
@pytest.fixture(scope="module")
426+
def huggingface_neuronx_latest_inference_transformer_version():
427+
return "4.28"
428+
429+
430+
@pytest.fixture(scope="module")
431+
def huggingface_neuronx_latest_training_transformer_version():
432+
return "4.28"
433+
434+
415435
@pytest.fixture(scope="module")
416436
def huggingface_neuron_latest_inference_py_version():
417437
return "py37"
418438

419439

440+
@pytest.fixture(scope="module")
441+
def huggingface_neuronx_latest_inference_py_version():
442+
return "py38"
443+
444+
445+
@pytest.fixture(scope="module")
446+
def huggingface_neuronx_latest_training_py_version():
447+
return "py38"
448+
449+
420450
@pytest.fixture(scope="module")
421451
def pytorch_neuron_version():
422452
return "1.11"

tests/unit/sagemaker/huggingface/huggingface_utils.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
REGION = "us-east-1"
1818
GPU_INSTANCE_TYPE = "ml.p2.xlarge"
19+
NEURONX_INSTANCE_TYPE = "ml.trn1.2xlarge"
1920

2021

2122
def get_full_gpu_image_uri(
@@ -33,3 +34,21 @@ def get_full_gpu_image_uri(
3334
base_framework_version=base_framework_version,
3435
container_version="cu110-ubuntu18.04",
3536
)
37+
38+
39+
def get_full_neuronx_image_uri(
40+
version,
41+
base_framework_version,
42+
region=REGION,
43+
instance_type=NEURONX_INSTANCE_TYPE,
44+
):
45+
return image_uris.retrieve(
46+
"huggingface",
47+
region,
48+
version=version,
49+
instance_type=instance_type,
50+
image_scope="training",
51+
base_framework_version=base_framework_version,
52+
container_version="cu110-ubuntu18.04",
53+
inference_tool="neuronx",
54+
)

tests/unit/sagemaker/huggingface/test_estimator.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,27 @@ def test_huggingface_neuron(
269269
pytorch_version=huggingface_neuron_latest_inference_pytorch_version,
270270
py_version=huggingface_neuron_latest_inference_py_version,
271271
)
272-
container = huggingface_model.prepare_container_def("ml.inf.xlarge")
272+
container = huggingface_model.prepare_container_def("ml.inf1.xlarge", inference_tool="neuron")
273+
assert container["Image"]
274+
275+
276+
def test_huggingface_neuronx(
277+
sagemaker_session,
278+
huggingface_neuronx_latest_inference_pytorch_version,
279+
huggingface_neuronx_latest_inference_transformer_version,
280+
huggingface_neuronx_latest_inference_py_version,
281+
):
282+
283+
inputs = "s3://mybucket/train"
284+
huggingface_model = HuggingFaceModel(
285+
model_data=inputs,
286+
transformers_version=huggingface_neuronx_latest_inference_transformer_version,
287+
role=ROLE,
288+
sagemaker_session=sagemaker_session,
289+
pytorch_version=huggingface_neuronx_latest_inference_pytorch_version,
290+
py_version=huggingface_neuronx_latest_inference_py_version,
291+
)
292+
container = huggingface_model.prepare_container_def("ml.inf2.xlarge", inference_tool="neuronx")
273293
assert container["Image"]
274294

275295

0 commit comments

Comments
 (0)