Skip to content

Commit 58f8746

Browse files
author
Roja Reddy Sareddy
committed
feature: Enabled update_endpoint through model_builder
1 parent 249872d commit 58f8746

File tree

7 files changed

+320
-18
lines changed

7 files changed

+320
-18
lines changed

src/sagemaker/huggingface/model.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ def deploy(
218218
container_startup_health_check_timeout=None,
219219
inference_recommendation_id=None,
220220
explainer_config=None,
221+
update_endpoint: Optional[bool] = False,
221222
**kwargs,
222223
):
223224
"""Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.
@@ -296,6 +297,9 @@ def deploy(
296297
would like to deploy the model and endpoint with recommended parameters.
297298
explainer_config (sagemaker.explainer.ExplainerConfig): Specifies online explainability
298299
configuration for use with Amazon SageMaker Clarify. (default: None)
300+
update_endpoint (Optional[bool]): Flag to update the model in an existing Amazon SageMaker endpoint.
301+
If True, this will deploy a new EndpointConfig to an already existing endpoint and delete resources
302+
corresponding to the previous EndpointConfig. Default: False
299303
Raises:
300304
ValueError: If arguments combination check failed in these circumstances:
301305
- If no role is specified or
@@ -335,6 +339,7 @@ def deploy(
335339
container_startup_health_check_timeout=container_startup_health_check_timeout,
336340
inference_recommendation_id=inference_recommendation_id,
337341
explainer_config=explainer_config,
342+
update_endpoint=update_endpoint,
338343
**kwargs,
339344
)
340345

src/sagemaker/model.py

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
from sagemaker.model_card.schema_constraints import ModelApprovalStatusEnum
5454
from sagemaker.session import Session
5555
from sagemaker.model_metrics import ModelMetrics
56-
from sagemaker.deprecations import removed_kwargs
5756
from sagemaker.drift_check_baselines import DriftCheckBaselines
5857
from sagemaker.explainer import ExplainerConfig
5958
from sagemaker.metadata_properties import MetadataProperties
@@ -1386,6 +1385,7 @@ def deploy(
13861385
routing_config: Optional[Dict[str, Any]] = None,
13871386
model_reference_arn: Optional[str] = None,
13881387
inference_ami_version: Optional[str] = None,
1388+
update_endpoint: Optional[bool] = False,
13891389
**kwargs,
13901390
):
13911391
"""Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.
@@ -1497,6 +1497,10 @@ def deploy(
14971497
inference_ami_version (Optional [str]): Specifies an option from a collection of preconfigured
14981498
Amazon Machine Image (AMI) images. For a full list of options, see:
14991499
https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_ProductionVariant.html
1500+
update_endpoint (Optional[bool]): Flag to update the model in an existing Amazon SageMaker endpoint.
1501+
If True, this will deploy a new EndpointConfig to an already existing endpoint and delete resources
1502+
corresponding to the previous EndpointConfig. Default: False
1503+
Note: Currently this is supported for single model endpoints
15001504
Raises:
15011505
ValueError: If arguments combination check failed in these circumstances:
15021506
- If no role is specified or
@@ -1512,8 +1516,6 @@ def deploy(
15121516
"""
15131517
self.accept_eula = accept_eula
15141518

1515-
removed_kwargs("update_endpoint", kwargs)
1516-
15171519
self._init_sagemaker_session_if_does_not_exist(instance_type)
15181520
# Depending on the instance type, a local session (or) a session is initialized.
15191521
self.role = resolve_value_from_config(
@@ -1628,6 +1630,8 @@ def deploy(
16281630

16291631
# Support multiple models on same endpoint
16301632
if endpoint_type == EndpointType.INFERENCE_COMPONENT_BASED:
1633+
if update_endpoint:
1634+
raise ValueError("Currently update_endpoint is supported for single model endpoints")
16311635
if endpoint_name:
16321636
self.endpoint_name = endpoint_name
16331637
else:
@@ -1783,17 +1787,38 @@ def deploy(
17831787
if is_explainer_enabled:
17841788
explainer_config_dict = explainer_config._to_request_dict()
17851789

1786-
self.sagemaker_session.endpoint_from_production_variants(
1787-
name=self.endpoint_name,
1788-
production_variants=[production_variant],
1789-
tags=tags,
1790-
kms_key=kms_key,
1791-
wait=wait,
1792-
data_capture_config_dict=data_capture_config_dict,
1793-
explainer_config_dict=explainer_config_dict,
1794-
async_inference_config_dict=async_inference_config_dict,
1795-
live_logging=endpoint_logging,
1796-
)
1790+
if update_endpoint:
1791+
endpoint_config_name = self.sagemaker_session.create_endpoint_config(
1792+
name=self.name,
1793+
model_name=self.name,
1794+
initial_instance_count=initial_instance_count,
1795+
instance_type=instance_type,
1796+
accelerator_type=accelerator_type,
1797+
tags=tags,
1798+
kms_key=kms_key,
1799+
data_capture_config_dict=data_capture_config_dict,
1800+
volume_size=volume_size,
1801+
model_data_download_timeout=model_data_download_timeout,
1802+
container_startup_health_check_timeout=container_startup_health_check_timeout,
1803+
explainer_config_dict=explainer_config_dict,
1804+
async_inference_config_dict=async_inference_config_dict,
1805+
serverless_inference_config=serverless_inference_config_dict,
1806+
routing_config=routing_config,
1807+
inference_ami_version=inference_ami_version,
1808+
)
1809+
self.sagemaker_session.update_endpoint(self.endpoint_name, endpoint_config_name)
1810+
else:
1811+
self.sagemaker_session.endpoint_from_production_variants(
1812+
name=self.endpoint_name,
1813+
production_variants=[production_variant],
1814+
tags=tags,
1815+
kms_key=kms_key,
1816+
wait=wait,
1817+
data_capture_config_dict=data_capture_config_dict,
1818+
explainer_config_dict=explainer_config_dict,
1819+
async_inference_config_dict=async_inference_config_dict,
1820+
live_logging=endpoint_logging,
1821+
)
17971822

17981823
if self.predictor_cls:
17991824
predictor = self.predictor_cls(self.endpoint_name, self.sagemaker_session)

src/sagemaker/serve/builder/model_builder.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1602,6 +1602,7 @@ def deploy(
16021602
ResourceRequirements,
16031603
]
16041604
] = None,
1605+
update_endpoint: Optional[bool] = False,
16051606
) -> Union[Predictor, Transformer]:
16061607
"""Deploys the built Model.
16071608
@@ -1615,24 +1616,32 @@ def deploy(
16151616
AsyncInferenceConfig, BatchTransformInferenceConfig, ResourceRequirements]]) :
16161617
Additional Config for different deployment types such as
16171618
serverless, async, batch and multi-model/container
1619+
update_endpoint (Optional[bool]): Flag to update the model in an existing Amazon SageMaker endpoint.
1620+
If True, this will deploy a new EndpointConfig to an already existing endpoint and delete resources
1621+
corresponding to the previous EndpointConfig. Default: False
1622+
Note: Currently this is supported for single model endpoints
16181623
Returns:
16191624
Transformer for Batch Deployments
16201625
Predictors for all others
16211626
"""
16221627
if not hasattr(self, "built_model"):
16231628
raise ValueError("Model Needs to be built before deploying")
1624-
endpoint_name = unique_name_from_base(endpoint_name)
1629+
if not update_endpoint:
1630+
endpoint_name = unique_name_from_base(endpoint_name)
1631+
16251632
if not inference_config: # Real-time Deployment
16261633
return self.built_model.deploy(
16271634
instance_type=self.instance_type,
16281635
initial_instance_count=initial_instance_count,
16291636
endpoint_name=endpoint_name,
1637+
update_endpoint=update_endpoint,
16301638
)
16311639

16321640
if isinstance(inference_config, ServerlessInferenceConfig):
16331641
return self.built_model.deploy(
16341642
serverless_inference_config=inference_config,
16351643
endpoint_name=endpoint_name,
1644+
update_endpoint=update_endpoint,
16361645
)
16371646

16381647
if isinstance(inference_config, AsyncInferenceConfig):
@@ -1641,6 +1650,7 @@ def deploy(
16411650
initial_instance_count=initial_instance_count,
16421651
async_inference_config=inference_config,
16431652
endpoint_name=endpoint_name,
1653+
update_endpoint=update_endpoint,
16441654
)
16451655

16461656
if isinstance(inference_config, BatchTransformInferenceConfig):
@@ -1652,6 +1662,8 @@ def deploy(
16521662
return transformer
16531663

16541664
if isinstance(inference_config, ResourceRequirements):
1665+
if update_endpoint:
1666+
raise ValueError("Currently update_endpoint is supported for single model endpoints")
16551667
# Multi Model and MultiContainer endpoints with Inference Component
16561668
return self.built_model.deploy(
16571669
instance_type=self.instance_type,
@@ -1660,6 +1672,7 @@ def deploy(
16601672
resources=inference_config,
16611673
initial_instance_count=initial_instance_count,
16621674
role=self.role_arn,
1675+
update_endpoint=update_endpoint,
16631676
)
16641677

16651678
raise ValueError("Deployment Options not supported")

src/sagemaker/session.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4488,6 +4488,10 @@ def create_endpoint_config(
44884488
model_data_download_timeout=None,
44894489
container_startup_health_check_timeout=None,
44904490
explainer_config_dict=None,
4491+
async_inference_config_dict=None,
4492+
serverless_inference_config_dict=None,
4493+
routing_config: Optional[Dict[str, Any]] = None,
4494+
inference_ami_version: Optional[str] = None,
44914495
):
44924496
"""Create an Amazon SageMaker endpoint configuration.
44934497
@@ -4525,6 +4529,27 @@ def create_endpoint_config(
45254529
-inference-algo-ping-requests
45264530
explainer_config_dict (dict): Specifies configuration to enable explainers.
45274531
Default: None.
4532+
async_inference_config_dict (dict): Specifies
4533+
configuration related to async endpoint. Use this configuration when trying
4534+
to create async endpoint and make async inference. If empty config object
4535+
passed through, will use default config to deploy async endpoint. Deploy a
4536+
real-time endpoint if it's None. (default: None).
4537+
serverless_inference_config_dict (dict):
4538+
Specifies configuration related to serverless endpoint. Use this configuration
4539+
when trying to create serverless endpoint and make serverless inference. If
4540+
empty object passed through, will use pre-defined values in
4541+
``ServerlessInferenceConfig`` class to deploy serverless endpoint. Deploy an
4542+
instance based endpoint if it's None. (default: None).
4543+
routing_config (Optional[Dict[str, Any]): Settings the control how the endpoint routes incoming
4544+
traffic to the instances that the endpoint hosts.
4545+
Currently, support dictionary key ``RoutingStrategy``.
4546+
4547+
.. code:: python
4548+
4549+
{
4550+
"RoutingStrategy": sagemaker.enums.RoutingStrategy.RANDOM
4551+
}
4552+
inference_ami_version (Optional [str]): Specifies an option from a collection of preconfigured
45284553
45294554
Example:
45304555
>>> tags = [{'Key': 'tagname', 'Value': 'tagvalue'}]
@@ -4544,9 +4569,12 @@ def create_endpoint_config(
45444569
instance_type,
45454570
initial_instance_count,
45464571
accelerator_type=accelerator_type,
4572+
serverless_inference_config=serverless_inference_config_dict,
45474573
volume_size=volume_size,
45484574
model_data_download_timeout=model_data_download_timeout,
45494575
container_startup_health_check_timeout=container_startup_health_check_timeout,
4576+
routing_config=routing_config,
4577+
inference_ami_version=inference_ami_version,
45504578
)
45514579
production_variants = [provided_production_variant]
45524580
# Currently we just inject CoreDumpConfig.KmsKeyId from the config for production variant.
@@ -4586,6 +4614,14 @@ def create_endpoint_config(
45864614
)
45874615
request["DataCaptureConfig"] = inferred_data_capture_config_dict
45884616

4617+
if async_inference_config_dict is not None:
4618+
inferred_async_inference_config_dict = update_nested_dictionary_with_values_from_config(
4619+
async_inference_config_dict,
4620+
ENDPOINT_CONFIG_ASYNC_INFERENCE_PATH,
4621+
sagemaker_session=self,
4622+
)
4623+
request["AsyncInferenceConfig"] = inferred_async_inference_config_dict
4624+
45894625
if explainer_config_dict is not None:
45904626
request["ExplainerConfig"] = explainer_config_dict
45914627

src/sagemaker/tensorflow/model.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ def deploy(
358358
container_startup_health_check_timeout=None,
359359
inference_recommendation_id=None,
360360
explainer_config=None,
361+
update_endpoint: Optional[bool] = False,
361362
**kwargs,
362363
):
363364
"""Deploy a Tensorflow ``Model`` to a SageMaker ``Endpoint``."""
@@ -383,6 +384,7 @@ def deploy(
383384
container_startup_health_check_timeout=container_startup_health_check_timeout,
384385
inference_recommendation_id=inference_recommendation_id,
385386
explainer_config=explainer_config,
387+
update_endpoint=update_endpoint,
386388
**kwargs,
387389
)
388390

0 commit comments

Comments
 (0)