-
Notifications
You must be signed in to change notification settings - Fork 1.2k
feat: Add deployment support for deployment recommendations #3695
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
9c6bc33
1179dc0
bfed49f
0918810
e9af472
e39ec4b
f0e633e
81c052f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -323,12 +323,6 @@ def _update_params_for_recommendation_id( | |
return (instance_type, initial_instance_count) | ||
|
||
# Validate non-compatible parameters with recommendation id | ||
if bool(instance_type) != bool(initial_instance_count): | ||
raise ValueError( | ||
"Please either do not specify instance_type and initial_instance_count" | ||
"since they are in recommendation, or specify both of them if you want" | ||
"to override the recommendation." | ||
) | ||
if accelerator_type is not None: | ||
raise ValueError("accelerator_type is not compatible with inference_recommendation_id.") | ||
if async_inference_config is not None: | ||
|
@@ -346,26 +340,66 @@ def _update_params_for_recommendation_id( | |
recommendation_job_name = inference_recommendation_id.split("/")[0] | ||
|
||
sage_client = self.sagemaker_session.sagemaker_client | ||
recommendation_res = sage_client.describe_inference_recommendations_job( | ||
JobName=recommendation_job_name | ||
) | ||
input_config = recommendation_res["InputConfig"] | ||
|
||
recommendation = next( | ||
( | ||
rec | ||
for rec in recommendation_res["InferenceRecommendations"] | ||
if rec["RecommendationId"] == inference_recommendation_id | ||
), | ||
None, | ||
) | ||
# Retrieve model or inference recommendation job details | ||
recommendation_res, model_res = None, None | ||
try: | ||
recommendation_res = sage_client.describe_inference_recommendations_job( | ||
JobName=recommendation_job_name | ||
) | ||
except sage_client.exceptions.ResourceNotFound: | ||
pass | ||
try: | ||
model_res = sage_client.describe_model(ModelName=recommendation_job_name) | ||
except sage_client.exceptions.ResourceNotFound: | ||
pass | ||
if recommendation_res is None and model_res is None: | ||
raise ValueError("Inference Recommendation id is not valid") | ||
|
||
if not recommendation: | ||
raise ValueError( | ||
"inference_recommendation_id does not exist in InferenceRecommendations list" | ||
# Search the recommendation from above describe result lists | ||
inference_recommendation, instant_recommendation = None, None | ||
if recommendation_res: | ||
inference_recommendation = next( | ||
( | ||
rec | ||
for rec in recommendation_res["InferenceRecommendations"] | ||
if rec["RecommendationId"] == inference_recommendation_id | ||
), | ||
None, | ||
) | ||
if model_res: | ||
instant_recommendation = next( | ||
( | ||
rec | ||
for rec in model_res["DeploymentRecommendation"][ | ||
"RealTimeInferenceRecommendations" | ||
] | ||
if rec["RecommendationId"] == inference_recommendation_id | ||
), | ||
None, | ||
) | ||
if inference_recommendation is None and instant_recommendation is None: | ||
raise ValueError("Inference Recommendation id does not exist") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. id is invalid There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ack, will modify |
||
|
||
# Update params beased on instant recommendation | ||
if instant_recommendation: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could we not call it "instant"? maybe use the names "right_size_recommendation" and "model_recommendation"? Because that indicates the source of the recommendation There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. makes sense, will rename them |
||
if initial_instance_count is None: | ||
raise ValueError( | ||
"Please specify initial_instance_count with instant recommendation id" | ||
) | ||
self.env.update(instant_recommendation["Environment"]) | ||
instance_type = instant_recommendation["InstanceType"] | ||
return (instance_type, initial_instance_count) | ||
|
||
model_config = recommendation["ModelConfiguration"] | ||
# Update params based on default inference recommendation | ||
if bool(instance_type) != bool(initial_instance_count): | ||
raise ValueError( | ||
"Please either do not specify instance_type and initial_instance_count" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. similar to this
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. modified in next rev |
||
"since they are in recommendation, or specify both of them if you want" | ||
"to override the recommendation." | ||
) | ||
input_config = recommendation_res["InputConfig"] | ||
model_config = inference_recommendation["ModelConfiguration"] | ||
envs = ( | ||
model_config["EnvironmentParameters"] | ||
if "EnvironmentParameters" in model_config | ||
|
@@ -414,8 +448,10 @@ def _update_params_for_recommendation_id( | |
self.model_data = compilation_res["ModelArtifacts"]["S3ModelArtifacts"] | ||
self.image_uri = compilation_res["InferenceImage"] | ||
|
||
instance_type = recommendation["EndpointConfiguration"]["InstanceType"] | ||
initial_instance_count = recommendation["EndpointConfiguration"]["InitialInstanceCount"] | ||
instance_type = inference_recommendation["EndpointConfiguration"]["InstanceType"] | ||
initial_instance_count = inference_recommendation["EndpointConfiguration"][ | ||
"InitialInstanceCount" | ||
] | ||
|
||
return (instance_type, initial_instance_count) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we don't need to make this call if
describe_inference_recommendations_job
works right?May be we can create three private methods to modularize the code since this method is getting big.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
JobName
andModelName
are the same, egthanos/abcd1234
andthanos/abcd5678
, then we need to find out it's inference or model recommendation. So go through two lists(inference_recommendations and model_recommendations) to get the recommendation is needed. So here I am thinking we need to loop through two lists anywayThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we should only call describemodel if the recommendation id is not in the right_size recommendations.
Otherwise it is a wasted api call