-
Notifications
You must be signed in to change notification settings - Fork 1.2k
feat: Add deployment support for deployment recommendations #3695
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
9c6bc33
1179dc0
bfed49f
0918810
e9af472
e39ec4b
f0e633e
81c052f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -351,12 +351,6 @@ def _update_params_for_recommendation_id( | |
return (instance_type, initial_instance_count) | ||
|
||
# Validate non-compatible parameters with recommendation id | ||
if bool(instance_type) != bool(initial_instance_count): | ||
raise ValueError( | ||
"Please either do not specify instance_type and initial_instance_count" | ||
"since they are in recommendation, or specify both of them if you want" | ||
"to override the recommendation." | ||
) | ||
if accelerator_type is not None: | ||
raise ValueError("accelerator_type is not compatible with inference_recommendation_id.") | ||
if async_inference_config is not None: | ||
|
@@ -371,29 +365,45 @@ def _update_params_for_recommendation_id( | |
# Validate recommendation id | ||
if not re.match(r"[a-zA-Z0-9](-*[a-zA-Z0-9]){0,63}\/\w{8}$", inference_recommendation_id): | ||
raise ValueError("Inference Recommendation id is not valid") | ||
recommendation_job_name = inference_recommendation_id.split("/")[0] | ||
job_or_model_name = inference_recommendation_id.split("/")[0] | ||
|
||
sage_client = self.sagemaker_session.sagemaker_client | ||
recommendation_res = sage_client.describe_inference_recommendations_job( | ||
JobName=recommendation_job_name | ||
|
||
# Desribe inference recommendation job and model details | ||
recommendation_res, model_res = self._describe_recommendation_job_and_model( | ||
sage_client=sage_client, | ||
job_or_model_name=job_or_model_name, | ||
) | ||
input_config = recommendation_res["InputConfig"] | ||
|
||
recommendation = next( | ||
( | ||
rec | ||
for rec in recommendation_res["InferenceRecommendations"] | ||
if rec["RecommendationId"] == inference_recommendation_id | ||
), | ||
None, | ||
# Search the recommendation from above describe results | ||
( | ||
right_size_recommendation, | ||
model_recommendation, | ||
) = self._get_right_size_and_model_recommendation( | ||
recommendation_res=recommendation_res, | ||
model_res=model_res, | ||
inference_recommendation_id=inference_recommendation_id, | ||
) | ||
|
||
if not recommendation: | ||
# Update params beased on model recommendation | ||
if model_recommendation: | ||
if initial_instance_count is None: | ||
raise ValueError( | ||
"Please specify initial_instance_count with model recommendation id" | ||
) | ||
self.env.update(model_recommendation["Environment"]) | ||
instance_type = model_recommendation["InstanceType"] | ||
return (instance_type, initial_instance_count) | ||
|
||
# Update params based on default inference recommendation | ||
if bool(instance_type) != bool(initial_instance_count): | ||
raise ValueError( | ||
"inference_recommendation_id does not exist in InferenceRecommendations list" | ||
"Please either do not specify instance_type and initial_instance_count" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. similar to this
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. modified in next rev |
||
"since they are in recommendation, or specify both of them if you want" | ||
"to override the recommendation." | ||
) | ||
|
||
model_config = recommendation["ModelConfiguration"] | ||
input_config = recommendation_res["InputConfig"] | ||
model_config = right_size_recommendation["ModelConfiguration"] | ||
envs = ( | ||
model_config["EnvironmentParameters"] | ||
if "EnvironmentParameters" in model_config | ||
|
@@ -442,8 +452,10 @@ def _update_params_for_recommendation_id( | |
self.model_data = compilation_res["ModelArtifacts"]["S3ModelArtifacts"] | ||
self.image_uri = compilation_res["InferenceImage"] | ||
|
||
instance_type = recommendation["EndpointConfiguration"]["InstanceType"] | ||
initial_instance_count = recommendation["EndpointConfiguration"]["InitialInstanceCount"] | ||
instance_type = right_size_recommendation["EndpointConfiguration"]["InstanceType"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what if right_size_recommendation is also None? Won't this fail ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This case has been caught by here. |
||
initial_instance_count = right_size_recommendation["EndpointConfiguration"][ | ||
"InitialInstanceCount" | ||
] | ||
|
||
return (instance_type, initial_instance_count) | ||
|
||
|
@@ -513,3 +525,57 @@ def _convert_to_stopping_conditions_json( | |
threshold.to_json for threshold in model_latency_thresholds | ||
] | ||
return stopping_conditions | ||
|
||
def _get_right_size_and_model_recommendation( | ||
self, | ||
model_res=None, | ||
recommendation_res=None, | ||
inference_recommendation_id=None, | ||
): | ||
"""Get recommendation from right size job or model""" | ||
right_size_recommendation, model_recommendation = None, None | ||
if recommendation_res: | ||
right_size_recommendation = self._get_recommendation( | ||
recommendation_list=recommendation_res["InferenceRecommendations"], | ||
inference_recommendation_id=inference_recommendation_id, | ||
) | ||
if model_res: | ||
model_recommendation = self._get_recommendation( | ||
recommendation_list=model_res["DeploymentRecommendation"][ | ||
"RealTimeInferenceRecommendations" | ||
], | ||
inference_recommendation_id=inference_recommendation_id, | ||
) | ||
if right_size_recommendation is None and model_recommendation is None: | ||
raise ValueError("Inference Recommendation id is not valid") | ||
|
||
return right_size_recommendation, model_recommendation | ||
|
||
def _get_recommendation(self, recommendation_list, inference_recommendation_id): | ||
"""Get recommendation based on recommendation id""" | ||
return next( | ||
( | ||
rec | ||
for rec in recommendation_list | ||
if rec["RecommendationId"] == inference_recommendation_id | ||
), | ||
None, | ||
) | ||
|
||
def _describe_recommendation_job_and_model(self, sage_client, job_or_model_name): | ||
"""Describe inference recommendation job and model results""" | ||
recommendation_res, model_res = None, None | ||
try: | ||
recommendation_res = sage_client.describe_inference_recommendations_job( | ||
JobName=job_or_model_name | ||
) | ||
except sage_client.exceptions.ResourceNotFound: | ||
pass | ||
try: | ||
model_res = sage_client.describe_model(ModelName=job_or_model_name) | ||
jinpengqi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
except sage_client.exceptions.ResourceNotFound: | ||
pass | ||
if recommendation_res is None and model_res is None: | ||
raise ValueError("Inference Recommendation id is not valid") | ||
|
||
return recommendation_res, model_res |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
prefer same style as messages in model.py
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thanks, modified in next rev