Skip to content

feat: Add deployment support for deployment recommendations #3695

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
128 changes: 103 additions & 25 deletions src/sagemaker/inference_recommender/inference_recommender_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,12 +351,6 @@ def _update_params_for_recommendation_id(
return (instance_type, initial_instance_count)

# Validate non-compatible parameters with recommendation id
if bool(instance_type) != bool(initial_instance_count):
raise ValueError(
"Please either do not specify instance_type and initial_instance_count"
"since they are in recommendation, or specify both of them if you want"
"to override the recommendation."
)
if accelerator_type is not None:
raise ValueError("accelerator_type is not compatible with inference_recommendation_id.")
if async_inference_config is not None:
Expand All @@ -370,30 +364,38 @@ def _update_params_for_recommendation_id(

# Validate recommendation id
if not re.match(r"[a-zA-Z0-9](-*[a-zA-Z0-9]){0,63}\/\w{8}$", inference_recommendation_id):
raise ValueError("Inference Recommendation id is not valid")
recommendation_job_name = inference_recommendation_id.split("/")[0]
raise ValueError("inference_recommendation_id is not valid")
job_or_model_name = inference_recommendation_id.split("/")[0]

sage_client = self.sagemaker_session.sagemaker_client
recommendation_res = sage_client.describe_inference_recommendations_job(
JobName=recommendation_job_name
# Get recommendation from right size job and model
(
right_size_recommendation,
model_recommendation,
right_size_job_res,
) = self._get_recommendation(
sage_client=sage_client,
job_or_model_name=job_or_model_name,
inference_recommendation_id=inference_recommendation_id,
)
input_config = recommendation_res["InputConfig"]

recommendation = next(
(
rec
for rec in recommendation_res["InferenceRecommendations"]
if rec["RecommendationId"] == inference_recommendation_id
),
None,
)
# Update params beased on model recommendation
if model_recommendation:
if initial_instance_count is None:
raise ValueError("Must specify model recommendation id and instance count.")
self.env.update(model_recommendation["Environment"])
instance_type = model_recommendation["InstanceType"]
return (instance_type, initial_instance_count)

if not recommendation:
# Update params based on default inference recommendation
if bool(instance_type) != bool(initial_instance_count):
raise ValueError(
"inference_recommendation_id does not exist in InferenceRecommendations list"
"instance_type and initial_instance_count are mutually exclusive with"
"recommendation id since they are in recommendation."
"Please specify both of them if you want to override the recommendation."
)

model_config = recommendation["ModelConfiguration"]
input_config = right_size_job_res["InputConfig"]
model_config = right_size_recommendation["ModelConfiguration"]
envs = (
model_config["EnvironmentParameters"]
if "EnvironmentParameters" in model_config
Expand Down Expand Up @@ -442,8 +444,10 @@ def _update_params_for_recommendation_id(
self.model_data = compilation_res["ModelArtifacts"]["S3ModelArtifacts"]
self.image_uri = compilation_res["InferenceImage"]

instance_type = recommendation["EndpointConfiguration"]["InstanceType"]
initial_instance_count = recommendation["EndpointConfiguration"]["InitialInstanceCount"]
instance_type = right_size_recommendation["EndpointConfiguration"]["InstanceType"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what if right_size_recommendation is also None? Won't this fail ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This case has been caught by here.
So _get_recommendation will ensure either right_size_recommendation or model_recommendation exists, nevertheless it will throw exception for invalid recommendation id

initial_instance_count = right_size_recommendation["EndpointConfiguration"][
"InitialInstanceCount"
]

return (instance_type, initial_instance_count)

Expand Down Expand Up @@ -511,3 +515,77 @@ def _convert_to_stopping_conditions_json(
threshold.to_json for threshold in model_latency_thresholds
]
return stopping_conditions

def _get_recommendation(self, sage_client, job_or_model_name, inference_recommendation_id):
"""Get recommendation from right size job and model"""
right_size_recommendation, model_recommendation, right_size_job_res = None, None, None
right_size_recommendation, right_size_job_res = self._get_right_size_recommendation(
sage_client=sage_client,
job_or_model_name=job_or_model_name,
inference_recommendation_id=inference_recommendation_id,
)
if right_size_recommendation is None:
model_recommendation = self._get_model_recommendation(
sage_client=sage_client,
job_or_model_name=job_or_model_name,
inference_recommendation_id=inference_recommendation_id,
)
if model_recommendation is None:
raise ValueError("inference_recommendation_id is not valid")

return right_size_recommendation, model_recommendation, right_size_job_res

def _get_right_size_recommendation(
self,
sage_client,
job_or_model_name,
inference_recommendation_id,
):
"""Get recommendation from right size job"""
right_size_recommendation, right_size_job_res = None, None
try:
right_size_job_res = sage_client.describe_inference_recommendations_job(
JobName=job_or_model_name
)
if right_size_job_res:
right_size_recommendation = self._search_recommendation(
recommendation_list=right_size_job_res["InferenceRecommendations"],
inference_recommendation_id=inference_recommendation_id,
)
except sage_client.exceptions.ResourceNotFound:
pass

return right_size_recommendation, right_size_job_res

def _get_model_recommendation(
self,
sage_client,
job_or_model_name,
inference_recommendation_id,
):
"""Get recommendation from model"""
model_recommendation = None
try:
model_res = sage_client.describe_model(ModelName=job_or_model_name)
if model_res:
model_recommendation = self._search_recommendation(
recommendation_list=model_res["DeploymentRecommendation"][
"RealTimeInferenceRecommendations"
],
inference_recommendation_id=inference_recommendation_id,
)
except sage_client.exceptions.ResourceNotFound:
pass

return model_recommendation

def _search_recommendation(self, recommendation_list, inference_recommendation_id):
"""Search recommendation based on recommendation id"""
return next(
(
rec
for rec in recommendation_list
if rec["RecommendationId"] == inference_recommendation_id
),
None,
)
18 changes: 18 additions & 0 deletions tests/unit/sagemaker/inference_recommender/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@

INVALID_RECOMMENDATION_ID = "ir-job6ab0ff22"
NOT_EXISTED_RECOMMENDATION_ID = IR_JOB_NAME + "/ad3ec9ee"
NOT_EXISTED_MODEL_RECOMMENDATION_ID = IR_MODEL_NAME + "/ad3ec9ee"
RECOMMENDATION_ID = IR_JOB_NAME + "/5bcee92e"
MODEL_RECOMMENDATION_ID = IR_MODEL_NAME + "/v0KObO5d"
MODEL_RECOMMENDATION_ENV = {"TS_DEFAULT_WORKERS_PER_MODEL": "4"}

IR_CONTAINER_CONFIG = {
"Domain": "MACHINE_LEARNING",
Expand Down Expand Up @@ -95,6 +98,21 @@
"Image": IR_IMAGE,
"ModelDataUrl": IR_MODEL_DATA,
},
"DeploymentRecommendation": {
"RecommendationStatus": "COMPLETED",
"RealTimeInferenceRecommendations": [
{
"RecommendationId": MODEL_RECOMMENDATION_ID,
"InstanceType": "ml.g4dn.2xlarge",
"Environment": MODEL_RECOMMENDATION_ENV,
},
{
"RecommendationId": "test-model-name/d248qVYU",
"InstanceType": "ml.c6i.large",
"Environment": {},
},
],
},
}

DESCRIBE_MODEL_PACKAGE_RESPONSE = {
Expand Down
73 changes: 58 additions & 15 deletions tests/unit/sagemaker/model/test_deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
DESCRIBE_COMPILATION_JOB_RESPONSE,
DESCRIBE_MODEL_PACKAGE_RESPONSE,
DESCRIBE_MODEL_RESPONSE,
MODEL_RECOMMENDATION_ENV,
MODEL_RECOMMENDATION_ID,
INVALID_RECOMMENDATION_ID,
IR_COMPILATION_JOB_NAME,
IR_ENV,
Expand All @@ -34,6 +36,7 @@
IR_MODEL_PACKAGE_VERSION_ARN,
IR_COMPILATION_IMAGE,
IR_COMPILATION_MODEL_DATA,
NOT_EXISTED_MODEL_RECOMMENDATION_ID,
RECOMMENDATION_ID,
NOT_EXISTED_RECOMMENDATION_ID,
)
Expand Down Expand Up @@ -470,28 +473,33 @@ def test_deploy_wrong_async_inferenc_config(sagemaker_session):


def test_deploy_ir_with_incompatible_parameters(sagemaker_session):
sagemaker_session.sagemaker_client.describe_inference_recommendations_job.return_value = (
create_inference_recommendations_job_default_with_model_package_arn()
)
sagemaker_session.sagemaker_client.describe_model.return_value = None

model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session, role=ROLE)

with pytest.raises(
ValueError,
match="Please either do not specify instance_type and initial_instance_count"
"since they are in recommendation, or specify both of them if you want"
"to override the recommendation.",
match="instance_type and initial_instance_count are mutually exclusive with"
"recommendation id since they are in recommendation."
"Please specify both of them if you want to override the recommendation.",
):
model.deploy(
instance_type=INSTANCE_TYPE,
inference_recommendation_id=INFERENCE_RECOMMENDATION_ID,
inference_recommendation_id=RECOMMENDATION_ID,
)

with pytest.raises(
ValueError,
match="Please either do not specify instance_type and initial_instance_count"
"since they are in recommendation, or specify both of them if you want"
"to override the recommendation.",
match="instance_type and initial_instance_count are mutually exclusive with"
"recommendation id since they are in recommendation."
"Please specify both of them if you want to override the recommendation.",
):
model.deploy(
initial_instance_count=INSTANCE_COUNT,
inference_recommendation_id=INFERENCE_RECOMMENDATION_ID,
inference_recommendation_id=RECOMMENDATION_ID,
)

with pytest.raises(
Expand Down Expand Up @@ -524,7 +532,7 @@ def test_deploy_ir_with_incompatible_parameters(sagemaker_session):
def test_deploy_with_wrong_recommendation_id(sagemaker_session):
model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session, role=ROLE)

with pytest.raises(ValueError, match="Inference Recommendation id is not valid"):
with pytest.raises(ValueError, match="inference_recommendation_id is not valid"):
model.deploy(
inference_recommendation_id=INVALID_RECOMMENDATION_ID,
)
Expand All @@ -542,6 +550,7 @@ def test_deploy_with_recommendation_id_with_model_pkg_arn(sagemaker_session):
sagemaker_session.sagemaker_client.describe_model_package.side_effect = (
mock_describe_model_package
)
sagemaker_session.sagemaker_client.describe_model.return_value = None

model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session, role=ROLE)

Expand All @@ -554,11 +563,12 @@ def test_deploy_with_recommendation_id_with_model_pkg_arn(sagemaker_session):
assert model.env == IR_ENV


def test_deploy_with_recommendation_id_with_model_name(sagemaker_session):
def mock_describe_model(ModelName):
if ModelName == IR_MODEL_NAME:
return DESCRIBE_MODEL_RESPONSE
def mock_describe_model(ModelName):
if ModelName == IR_MODEL_NAME:
return DESCRIBE_MODEL_RESPONSE


def test_deploy_with_recommendation_id_with_model_name(sagemaker_session):
sagemaker_session.sagemaker_client.describe_inference_recommendations_job.return_value = (
create_inference_recommendations_job_default_with_model_name()
)
Expand All @@ -582,6 +592,7 @@ def test_deploy_with_recommendation_id_with_model_pkg_arn_and_compilation(sagema
sagemaker_session.sagemaker_client.describe_model_package.side_effect = (
mock_describe_model_package
)
sagemaker_session.sagemaker_client.describe_model.return_value = None

model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session, role=ROLE)

Expand All @@ -604,6 +615,7 @@ def mock_describe_compilation_job(CompilationJobName):
sagemaker_session.sagemaker_client.describe_compilation_job.side_effect = (
mock_describe_compilation_job
)
sagemaker_session.sagemaker_client.describe_model.side_effect = mock_describe_model

model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session, role=ROLE)

Expand All @@ -615,25 +627,56 @@ def mock_describe_compilation_job(CompilationJobName):
assert model.image_uri == IR_COMPILATION_IMAGE


def test_deploy_with_not_existed_recommendation_id(sagemaker_session):
def test_deploy_with_invalid_inference_recommendation_id(sagemaker_session):
sagemaker_session.sagemaker_client.describe_inference_recommendations_job.return_value = (
create_inference_recommendations_job_default_with_model_name_and_compilation()
)
sagemaker_session.sagemaker_client.describe_compilation_job.return_value = (
DESCRIBE_COMPILATION_JOB_RESPONSE
)
sagemaker_session.sagemaker_client.describe_model.return_value = None

model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session, role=ROLE)

with pytest.raises(
ValueError,
match="inference_recommendation_id does not exist in InferenceRecommendations list",
match="inference_recommendation_id is not valid",
):
model.deploy(
inference_recommendation_id=NOT_EXISTED_RECOMMENDATION_ID,
)


def test_deploy_with_invalid_model_recommendation_id(sagemaker_session):
sagemaker_session.sagemaker_client.describe_inference_recommendations_job.return_value = None
sagemaker_session.sagemaker_client.describe_model.side_effect = mock_describe_model

model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session, role=ROLE)

with pytest.raises(
ValueError,
match="inference_recommendation_id is not valid",
):
model.deploy(
inference_recommendation_id=NOT_EXISTED_MODEL_RECOMMENDATION_ID,
)


def test_deploy_with_valid_model_recommendation_id(sagemaker_session):
sagemaker_session.sagemaker_client.describe_inference_recommendations_job.return_value = None
sagemaker_session.sagemaker_client.describe_model.side_effect = mock_describe_model

model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session, role=ROLE)
model.deploy(
inference_recommendation_id=MODEL_RECOMMENDATION_ID,
initial_instance_count=INSTANCE_COUNT,
)

assert model.model_data == MODEL_DATA
assert model.image_uri == MODEL_IMAGE
assert model.env == MODEL_RECOMMENDATION_ENV


@patch("sagemaker.model.Model._create_sagemaker_model", Mock())
@patch("sagemaker.predictor.Predictor._get_endpoint_config_name", Mock())
@patch("sagemaker.predictor.Predictor._get_model_names", Mock())
Expand Down