Skip to content

feat: Add support for Deployment Recommendation ID in model.deploy(). No tagging support #3920

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jun 9, 2023
135 changes: 109 additions & 26 deletions src/sagemaker/inference_recommender/inference_recommender_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,12 @@ def _update_params(
async_inference_config,
explainer_config,
)
return inference_recommendation or (instance_type, initial_instance_count)

return (
inference_recommendation
if inference_recommendation
else (instance_type, initial_instance_count)
)

def _update_params_for_right_size(
self,
Expand Down Expand Up @@ -365,12 +370,6 @@ def _update_params_for_recommendation_id(
return (instance_type, initial_instance_count)

# Validate non-compatible parameters with recommendation id
if bool(instance_type) != bool(initial_instance_count):
raise ValueError(
"Please either do not specify instance_type and initial_instance_count"
"since they are in recommendation, or specify both of them if you want"
"to override the recommendation."
)
if accelerator_type is not None:
raise ValueError("accelerator_type is not compatible with inference_recommendation_id.")
if async_inference_config is not None:
Expand All @@ -386,30 +385,38 @@ def _update_params_for_recommendation_id(

# Validate recommendation id
if not re.match(r"[a-zA-Z0-9](-*[a-zA-Z0-9]){0,63}\/\w{8}$", inference_recommendation_id):
raise ValueError("Inference Recommendation id is not valid")
recommendation_job_name = inference_recommendation_id.split("/")[0]
raise ValueError("inference_recommendation_id is not valid")
job_or_model_name = inference_recommendation_id.split("/")[0]

sage_client = self.sagemaker_session.sagemaker_client
recommendation_res = sage_client.describe_inference_recommendations_job(
JobName=recommendation_job_name
# Get recommendation from right size job and model
(
right_size_recommendation,
model_recommendation,
right_size_job_res,
) = self._get_recommendation(
sage_client=sage_client,
job_or_model_name=job_or_model_name,
inference_recommendation_id=inference_recommendation_id,
)
input_config = recommendation_res["InputConfig"]

recommendation = next(
(
rec
for rec in recommendation_res["InferenceRecommendations"]
if rec["RecommendationId"] == inference_recommendation_id
),
None,
)
# Update params beased on model recommendation
if model_recommendation:
if initial_instance_count is None:
raise ValueError("Must specify model recommendation id and instance count.")
self.env.update(model_recommendation["Environment"])
instance_type = model_recommendation["InstanceType"]
return (instance_type, initial_instance_count)

if not recommendation:
# Update params based on default inference recommendation
if bool(instance_type) != bool(initial_instance_count):
raise ValueError(
"inference_recommendation_id does not exist in InferenceRecommendations list"
"instance_type and initial_instance_count are mutually exclusive with"
"recommendation id since they are in recommendation."
"Please specify both of them if you want to override the recommendation."
)

model_config = recommendation["ModelConfiguration"]
input_config = right_size_job_res["InputConfig"]
model_config = right_size_recommendation["ModelConfiguration"]
envs = (
model_config["EnvironmentParameters"]
if "EnvironmentParameters" in model_config
Expand Down Expand Up @@ -458,8 +465,10 @@ def _update_params_for_recommendation_id(
self.model_data = compilation_res["ModelArtifacts"]["S3ModelArtifacts"]
self.image_uri = compilation_res["InferenceImage"]

instance_type = recommendation["EndpointConfiguration"]["InstanceType"]
initial_instance_count = recommendation["EndpointConfiguration"]["InitialInstanceCount"]
instance_type = right_size_recommendation["EndpointConfiguration"]["InstanceType"]
initial_instance_count = right_size_recommendation["EndpointConfiguration"][
"InitialInstanceCount"
]

return (instance_type, initial_instance_count)

Expand Down Expand Up @@ -527,3 +536,77 @@ def _convert_to_stopping_conditions_json(
threshold.to_json for threshold in model_latency_thresholds
]
return stopping_conditions

def _get_recommendation(self, sage_client, job_or_model_name, inference_recommendation_id):
"""Get recommendation from right size job and model"""
right_size_recommendation, model_recommendation, right_size_job_res = None, None, None
right_size_recommendation, right_size_job_res = self._get_right_size_recommendation(
sage_client=sage_client,
job_or_model_name=job_or_model_name,
inference_recommendation_id=inference_recommendation_id,
)
if right_size_recommendation is None:
model_recommendation = self._get_model_recommendation(
sage_client=sage_client,
job_or_model_name=job_or_model_name,
inference_recommendation_id=inference_recommendation_id,
)
if model_recommendation is None:
raise ValueError("inference_recommendation_id is not valid")

return right_size_recommendation, model_recommendation, right_size_job_res

def _get_right_size_recommendation(
self,
sage_client,
job_or_model_name,
inference_recommendation_id,
):
"""Get recommendation from right size job"""
right_size_recommendation, right_size_job_res = None, None
try:
right_size_job_res = sage_client.describe_inference_recommendations_job(
JobName=job_or_model_name
)
if right_size_job_res:
right_size_recommendation = self._search_recommendation(
recommendation_list=right_size_job_res["InferenceRecommendations"],
inference_recommendation_id=inference_recommendation_id,
)
except sage_client.exceptions.ResourceNotFound:
pass

return right_size_recommendation, right_size_job_res

def _get_model_recommendation(
self,
sage_client,
job_or_model_name,
inference_recommendation_id,
):
"""Get recommendation from model"""
model_recommendation = None
try:
model_res = sage_client.describe_model(ModelName=job_or_model_name)
if model_res:
model_recommendation = self._search_recommendation(
recommendation_list=model_res["DeploymentRecommendation"][
"RealTimeInferenceRecommendations"
],
inference_recommendation_id=inference_recommendation_id,
)
except sage_client.exceptions.ResourceNotFound:
pass

return model_recommendation

def _search_recommendation(self, recommendation_list, inference_recommendation_id):
"""Search recommendation based on recommendation id"""
return next(
(
rec
for rec in recommendation_list
if rec["RecommendationId"] == inference_recommendation_id
),
None,
)
2 changes: 2 additions & 0 deletions src/sagemaker/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1209,6 +1209,8 @@ def deploy(
inference_recommendation_id (str): The recommendation id which specifies the
recommendation you picked from inference recommendation job results and
would like to deploy the model and endpoint with recommended parameters.
This can also be a recommendation id returned from ``DescribeModel`` contained in
a list of ``RealtimeInferenceRecommendations`` within ``DeploymentRecommendation``
explainer_config (sagemaker.explainer.ExplainerConfig): Specifies online explainability
configuration for use with Amazon SageMaker Clarify. Default: None.
Raises:
Expand Down
77 changes: 77 additions & 0 deletions tests/integ/test_inference_recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,30 @@ def default_right_sized_unregistered_base_model(sagemaker_session, cpu_instance_
sagemaker_session.delete_model(ModelName=model.name)


@pytest.fixture(scope="module")
def created_base_model(sagemaker_session, cpu_instance_type):
model_data = sagemaker_session.upload_data(path=IR_SKLEARN_MODEL)
region = sagemaker_session._region_name
image_uri = image_uris.retrieve(
framework="sklearn", region=region, version="1.0-1", image_scope="inference"
)

iam_client = sagemaker_session.boto_session.client("iam")
role_arn = iam_client.get_role(RoleName="SageMakerRole")["Role"]["Arn"]

model = Model(
model_data=model_data,
role=role_arn,
entry_point=IR_SKLEARN_ENTRY_POINT,
image_uri=image_uri,
sagemaker_session=sagemaker_session,
)

model.create(instance_type=cpu_instance_type)

return model


@pytest.mark.slow_test
def test_default_right_size_and_deploy_registered_model_sklearn(
default_right_sized_model, sagemaker_session
Expand Down Expand Up @@ -453,3 +477,56 @@ def test_deploy_inference_recommendation_id_with_registered_model_sklearn(
)
predictor.delete_model()
predictor.delete_endpoint()


@pytest.mark.slow_test
def test_deploy_deployment_recommendation_id_with_model(created_base_model, sagemaker_session):
with timeout(minutes=20):
try:
deployment_recommendation = poll_for_deployment_recommendation(
created_base_model, sagemaker_session
)

assert deployment_recommendation is not None

real_time_recommendations = deployment_recommendation.get(
"RealTimeInferenceRecommendations"
)
recommendation_id = real_time_recommendations[0].get("RecommendationId")

endpoint_name = unique_name_from_base("test-rec-id-deployment-default-sklearn")
created_base_model.predictor_cls = SKLearnPredictor
predictor = created_base_model.deploy(
inference_recommendation_id=recommendation_id,
initial_instance_count=1,
endpoint_name=endpoint_name,
)

payload = pd.read_csv(IR_SKLEARN_DATA, header=None)

inference = predictor.predict(payload)
assert inference is not None
assert 26 == len(inference)
finally:
predictor.delete_model()
predictor.delete_endpoint()


def poll_for_deployment_recommendation(created_base_model, sagemaker_session):
with timeout(minutes=1):
try:
completed = False
while not completed:
describe_model_response = sagemaker_session.sagemaker_client.describe_model(
ModelName=created_base_model.name
)
deployment_recommendation = describe_model_response.get("DeploymentRecommendation")

completed = (
deployment_recommendation is not None
and "COMPLETED" == deployment_recommendation.get("RecommendationStatus")
)
return deployment_recommendation
except Exception as e:
created_base_model.delete_model()
raise e
45 changes: 45 additions & 0 deletions tests/unit/sagemaker/inference_recommender/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@

INVALID_RECOMMENDATION_ID = "ir-job6ab0ff22"
NOT_EXISTED_RECOMMENDATION_ID = IR_JOB_NAME + "/ad3ec9ee"
NOT_EXISTED_MODEL_RECOMMENDATION_ID = IR_MODEL_NAME + "/ad3ec9ee"
RECOMMENDATION_ID = IR_JOB_NAME + "/5bcee92e"
MODEL_RECOMMENDATION_ID = IR_MODEL_NAME + "/v0KObO5d"
MODEL_RECOMMENDATION_ENV = {"TS_DEFAULT_WORKERS_PER_MODEL": "4"}

IR_CONTAINER_CONFIG = {
"Domain": "MACHINE_LEARNING",
Expand Down Expand Up @@ -95,6 +98,21 @@
"Image": IR_IMAGE,
"ModelDataUrl": IR_MODEL_DATA,
},
"DeploymentRecommendation": {
"RecommendationStatus": "COMPLETED",
"RealTimeInferenceRecommendations": [
{
"RecommendationId": MODEL_RECOMMENDATION_ID,
"InstanceType": "ml.g4dn.2xlarge",
"Environment": MODEL_RECOMMENDATION_ENV,
},
{
"RecommendationId": "test-model-name/d248qVYU",
"InstanceType": "ml.c6i.large",
"Environment": {},
},
],
},
}

DESCRIBE_MODEL_PACKAGE_RESPONSE = {
Expand Down Expand Up @@ -134,3 +152,30 @@
"ModelArtifacts": {"S3ModelArtifacts": IR_COMPILATION_MODEL_DATA},
"InferenceImage": IR_COMPILATION_IMAGE,
}

IR_CONTAINER_DEF = {
"Image": IR_IMAGE,
"Environment": IR_ENV,
"ModelDataUrl": IR_MODEL_DATA,
}

DEPLOYMENT_RECOMMENDATION_CONTAINER_DEF = {
"Image": IR_IMAGE,
"Environment": MODEL_RECOMMENDATION_ENV,
"ModelDataUrl": IR_MODEL_DATA,
}

IR_COMPILATION_CONTAINER_DEF = {
"Image": IR_COMPILATION_IMAGE,
"Environment": {},
"ModelDataUrl": IR_COMPILATION_MODEL_DATA,
}

IR_MODEL_PACKAGE_CONTAINER_DEF = {
"ModelPackageName": IR_MODEL_PACKAGE_VERSION_ARN,
"Environment": IR_ENV,
}

IR_COMPILATION_MODEL_PACKAGE_CONTAINER_DEF = {
"ModelPackageName": IR_MODEL_PACKAGE_VERSION_ARN,
}
Loading