Skip to content

Commit aa94918

Browse files
authored
Merge branch 'master' into feat/jumpstartmodel-attach
2 parents 7b0b80e + 4e83cce commit aa94918

File tree

19 files changed

+977
-20
lines changed

19 files changed

+977
-20
lines changed

CHANGELOG.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,35 @@
11
# Changelog
22

3+
## v2.221.0 (2024-05-20)
4+
5+
### Features
6+
7+
* onboard tei image config to pysdk
8+
9+
### Bug Fixes and Other Changes
10+
11+
* JS Model with non-TGI/non-DJL deployment failure
12+
* cover tei with image_uris.retrieve API
13+
* Add more debuging
14+
* model builder limited container support for endpoint mode.
15+
* Image URI should take precedence for HF models
16+
17+
## v2.220.0 (2024-05-15)
18+
19+
### Features
20+
21+
* AutoGluon 1.1.0 image_uris update
22+
* add new images for HF TGI release
23+
* Add telemetry support for mlflow models
24+
25+
### Bug Fixes and Other Changes
26+
27+
* add debug logs to workflow container dist creation
28+
* model builder race condition on sagemaker session
29+
* Add tensorflow_serving support for mlflow models and enable lineage tracking for mlflow models
30+
* update image_uri_configs 05-09-2024 07:17:41 PST
31+
* skip flakey tests pending investigation
32+
333
## v2.219.0 (2024-05-08)
434

535
### Features

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.219.1.dev0
1+
2.221.1.dev0

src/sagemaker/huggingface/llm_utils.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,13 @@ def get_huggingface_llm_image_uri(
6565
image_scope="inference",
6666
inference_tool="neuronx",
6767
)
68+
if backend == "huggingface-tei":
69+
return image_uris.retrieve(
70+
"huggingface-tei",
71+
region=region,
72+
version=version,
73+
image_scope="inference",
74+
)
6875
if backend == "lmi":
6976
version = version or "0.24.0"
7077
return image_uris.retrieve(framework="djl-deepspeed", region=region, version=version)
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
{
2+
"inference": {
3+
"processors": [
4+
"gpu"
5+
],
6+
"version_aliases": {
7+
"1.2": "1.2.3"
8+
},
9+
"versions": {
10+
"1.2.3": {
11+
"py_versions": [
12+
"py310"
13+
],
14+
"registries": {
15+
"af-south-1": "510948584623",
16+
"ap-east-1": "651117190479",
17+
"ap-northeast-1": "354813040037",
18+
"ap-northeast-2": "366743142698",
19+
"ap-northeast-3": "867004704886",
20+
"ap-south-1": "720646828776",
21+
"ap-south-2": "628508329040",
22+
"ap-southeast-1": "121021644041",
23+
"ap-southeast-2": "783357654285",
24+
"ap-southeast-3": "951798379941",
25+
"ap-southeast-4": "106583098589",
26+
"ca-central-1": "341280168497",
27+
"ca-west-1": "190319476487",
28+
"cn-north-1": "450853457545",
29+
"cn-northwest-1": "451049120500",
30+
"eu-central-1": "492215442770",
31+
"eu-central-2": "680994064768",
32+
"eu-north-1": "662702820516",
33+
"eu-south-1": "978288397137",
34+
"eu-south-2": "104374241257",
35+
"eu-west-1": "141502667606",
36+
"eu-west-2": "764974769150",
37+
"eu-west-3": "659782779980",
38+
"il-central-1": "898809789911",
39+
"me-central-1": "272398656194",
40+
"me-south-1": "801668240914",
41+
"sa-east-1": "737474898029",
42+
"us-east-1": "683313688378",
43+
"us-east-2": "257758044811",
44+
"us-gov-east-1": "237065988967",
45+
"us-gov-west-1": "414596584902",
46+
"us-iso-east-1": "833128469047",
47+
"us-isob-east-1": "281123927165",
48+
"us-west-1": "746614075791",
49+
"us-west-2": "246618743249"
50+
},
51+
"tag_prefix": "2.0.1-tei1.2.3",
52+
"repository": "tei",
53+
"container_version": {
54+
"gpu": "cu122-ubuntu22.04"
55+
}
56+
}
57+
}
58+
}
59+
}

src/sagemaker/image_uris.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
ECR_URI_TEMPLATE = "{registry}.dkr.{hostname}/{repository}"
3838
HUGGING_FACE_FRAMEWORK = "huggingface"
3939
HUGGING_FACE_LLM_FRAMEWORK = "huggingface-llm"
40+
HUGGING_FACE_TEI_FRAMEWORK = "huggingface-tei"
4041
HUGGING_FACE_LLM_NEURONX_FRAMEWORK = "huggingface-llm-neuronx"
4142
XGBOOST_FRAMEWORK = "xgboost"
4243
SKLEARN_FRAMEWORK = "sklearn"
@@ -477,6 +478,7 @@ def _validate_version_and_set_if_needed(version, config, framework):
477478
if version is None and framework in [
478479
DATA_WRANGLER_FRAMEWORK,
479480
HUGGING_FACE_LLM_FRAMEWORK,
481+
HUGGING_FACE_TEI_FRAMEWORK,
480482
HUGGING_FACE_LLM_NEURONX_FRAMEWORK,
481483
STABILITYAI_FRAMEWORK,
482484
]:

src/sagemaker/serve/builder/jumpstart_builder.py

Lines changed: 48 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from sagemaker import model_uris
2424
from sagemaker.serve.model_server.djl_serving.prepare import prepare_djl_js_resources
2525
from sagemaker.serve.model_server.djl_serving.utils import _get_admissible_tensor_parallel_degrees
26+
from sagemaker.serve.model_server.multi_model_server.prepare import prepare_mms_js_resources
2627
from sagemaker.serve.model_server.tgi.prepare import prepare_tgi_js_resources, _create_dir_structure
2728
from sagemaker.serve.mode.function_pointers import Mode
2829
from sagemaker.serve.utils.exceptions import (
@@ -35,6 +36,7 @@
3536
from sagemaker.serve.utils.predictors import (
3637
DjlLocalModePredictor,
3738
TgiLocalModePredictor,
39+
TransformersLocalModePredictor,
3840
)
3941
from sagemaker.serve.utils.local_hardware import (
4042
_get_nb_instance,
@@ -90,6 +92,7 @@ def __init__(self):
9092
self.existing_properties = None
9193
self.prepared_for_tgi = None
9294
self.prepared_for_djl = None
95+
self.prepared_for_mms = None
9396
self.schema_builder = None
9497
self.nb_instance_type = None
9598
self.ram_usage_model_load = None
@@ -137,7 +140,11 @@ def _js_builder_deploy_wrapper(self, *args, **kwargs) -> Type[PredictorBase]:
137140

138141
if overwrite_mode == Mode.SAGEMAKER_ENDPOINT:
139142
self.mode = self.pysdk_model.mode = Mode.SAGEMAKER_ENDPOINT
140-
if not hasattr(self, "prepared_for_djl") or not hasattr(self, "prepared_for_tgi"):
143+
if (
144+
not hasattr(self, "prepared_for_djl")
145+
or not hasattr(self, "prepared_for_tgi")
146+
or not hasattr(self, "prepared_for_mms")
147+
):
141148
self.pysdk_model.model_data, env = self._prepare_for_mode()
142149
elif overwrite_mode == Mode.LOCAL_CONTAINER:
143150
self.mode = self.pysdk_model.mode = Mode.LOCAL_CONTAINER
@@ -160,6 +167,13 @@ def _js_builder_deploy_wrapper(self, *args, **kwargs) -> Type[PredictorBase]:
160167
dependencies=self.dependencies,
161168
model_data=self.pysdk_model.model_data,
162169
)
170+
elif not hasattr(self, "prepared_for_mms"):
171+
self.js_model_config, self.prepared_for_mms = prepare_mms_js_resources(
172+
model_path=self.model_path,
173+
js_id=self.model,
174+
dependencies=self.dependencies,
175+
model_data=self.pysdk_model.model_data,
176+
)
163177

164178
self._prepare_for_mode()
165179
env = {}
@@ -179,6 +193,10 @@ def _js_builder_deploy_wrapper(self, *args, **kwargs) -> Type[PredictorBase]:
179193
predictor = TgiLocalModePredictor(
180194
self.modes[str(Mode.LOCAL_CONTAINER)], serializer, deserializer
181195
)
196+
elif self.model_server == ModelServer.MMS:
197+
predictor = TransformersLocalModePredictor(
198+
self.modes[str(Mode.LOCAL_CONTAINER)], serializer, deserializer
199+
)
182200

183201
ram_usage_before = _get_ram_usage_mb()
184202
self.modes[str(Mode.LOCAL_CONTAINER)].create_server(
@@ -254,6 +272,24 @@ def _build_for_tgi_jumpstart(self):
254272

255273
self.pysdk_model.env.update(env)
256274

275+
def _build_for_mms_jumpstart(self):
276+
"""Placeholder docstring"""
277+
278+
env = {}
279+
if self.mode == Mode.LOCAL_CONTAINER:
280+
if not hasattr(self, "prepared_for_mms"):
281+
self.js_model_config, self.prepared_for_mms = prepare_mms_js_resources(
282+
model_path=self.model_path,
283+
js_id=self.model,
284+
dependencies=self.dependencies,
285+
model_data=self.pysdk_model.model_data,
286+
)
287+
self._prepare_for_mode()
288+
elif self.mode == Mode.SAGEMAKER_ENDPOINT and hasattr(self, "prepared_for_mms"):
289+
self.pysdk_model.model_data, env = self._prepare_for_mode()
290+
291+
self.pysdk_model.env.update(env)
292+
257293
def _tune_for_js(self, sharded_supported: bool, max_tuning_duration: int = 1800):
258294
"""Tune for Jumpstart Models in Local Mode.
259295
@@ -264,7 +300,7 @@ def _tune_for_js(self, sharded_supported: bool, max_tuning_duration: int = 1800)
264300
returns:
265301
Tuned Model.
266302
"""
267-
if self.mode != Mode.LOCAL_CONTAINER:
303+
if self.mode == Mode.SAGEMAKER_ENDPOINT:
268304
logger.warning(
269305
"Tuning is only a %s capability. Returning original model.", Mode.LOCAL_CONTAINER
270306
)
@@ -438,7 +474,6 @@ def _build_for_jumpstart(self):
438474
self.jumpstart = True
439475

440476
pysdk_model = self._create_pre_trained_js_model()
441-
442477
image_uri = pysdk_model.image_uri
443478

444479
logger.info("JumpStart ID %s is packaged with Image URI: %s", self.model, image_uri)
@@ -451,7 +486,6 @@ def _build_for_jumpstart(self):
451486
if "djl-inference" in image_uri:
452487
logger.info("Building for DJL JumpStart Model ID...")
453488
self.model_server = ModelServer.DJL_SERVING
454-
455489
self.pysdk_model = pysdk_model
456490
self.image_uri = self.pysdk_model.image_uri
457491

@@ -461,16 +495,23 @@ def _build_for_jumpstart(self):
461495
elif "tgi-inference" in image_uri:
462496
logger.info("Building for TGI JumpStart Model ID...")
463497
self.model_server = ModelServer.TGI
464-
465498
self.pysdk_model = pysdk_model
466499
self.image_uri = self.pysdk_model.image_uri
467500

468501
self._build_for_tgi_jumpstart()
469502

470503
self.pysdk_model.tune = self.tune_for_tgi_jumpstart
471-
else:
504+
elif "huggingface-pytorch-inference:" in image_uri:
505+
logger.info("Building for MMS JumpStart Model ID...")
506+
self.model_server = ModelServer.MMS
507+
self.pysdk_model = pysdk_model
508+
self.image_uri = self.pysdk_model.image_uri
509+
510+
self._build_for_mms_jumpstart()
511+
elif self.mode != Mode.SAGEMAKER_ENDPOINT:
472512
raise ValueError(
473-
"JumpStart Model ID was not packaged with djl-inference or tgi-inference container."
513+
"JumpStart Model ID was not packaged "
514+
"with djl-inference, tgi-inference, or mms-inference container."
474515
)
475516

476517
return self.pysdk_model

src/sagemaker/serve/builder/model_builder.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from sagemaker.serve.detector.pickler import save_pkl, save_xgboost
3737
from sagemaker.serve.builder.serve_settings import _ServeSettings
3838
from sagemaker.serve.builder.djl_builder import DJL
39+
from sagemaker.serve.builder.tei_builder import TEI
3940
from sagemaker.serve.builder.tgi_builder import TGI
4041
from sagemaker.serve.builder.jumpstart_builder import JumpStart
4142
from sagemaker.serve.builder.transformers_builder import Transformers
@@ -95,9 +96,9 @@
9596
}
9697

9798

98-
# pylint: disable=attribute-defined-outside-init, disable=E1101, disable=R0901
99+
# pylint: disable=attribute-defined-outside-init, disable=E1101, disable=R0901, disable=R1705
99100
@dataclass
100-
class ModelBuilder(Triton, DJL, JumpStart, TGI, Transformers, TensorflowServing):
101+
class ModelBuilder(Triton, DJL, JumpStart, TGI, Transformers, TensorflowServing, TEI):
101102
"""Class that builds a deployable model.
102103
103104
Args:
@@ -753,7 +754,7 @@ def build( # pylint: disable=R0911
753754
model_task = self.model_metadata.get("HF_TASK")
754755
if self._is_jumpstart_model_id():
755756
return self._build_for_jumpstart()
756-
if self._is_djl(): # pylint: disable=R1705
757+
if self._is_djl():
757758
return self._build_for_djl()
758759
else:
759760
hf_model_md = get_huggingface_model_metadata(
@@ -764,8 +765,10 @@ def build( # pylint: disable=R0911
764765
model_task = hf_model_md.get("pipeline_tag")
765766
if self.schema_builder is None and model_task is not None:
766767
self._hf_schema_builder_init(model_task)
767-
if model_task == "text-generation": # pylint: disable=R1705
768+
if model_task == "text-generation":
768769
return self._build_for_tgi()
770+
if model_task == "sentence-similarity":
771+
return self._build_for_tei()
769772
elif self._can_fit_on_single_gpu():
770773
return self._build_for_transformers()
771774
elif (

0 commit comments

Comments
 (0)