Skip to content

feat: Add deployment support for deployment recommendations #3695

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Original file line number Diff line number Diff line change
Expand Up @@ -323,12 +323,6 @@ def _update_params_for_recommendation_id(
return (instance_type, initial_instance_count)

# Validate non-compatible parameters with recommendation id
if bool(instance_type) != bool(initial_instance_count):
raise ValueError(
"Please either do not specify instance_type and initial_instance_count"
"since they are in recommendation, or specify both of them if you want"
"to override the recommendation."
)
if accelerator_type is not None:
raise ValueError("accelerator_type is not compatible with inference_recommendation_id.")
if async_inference_config is not None:
Expand All @@ -346,26 +340,66 @@ def _update_params_for_recommendation_id(
recommendation_job_name = inference_recommendation_id.split("/")[0]

sage_client = self.sagemaker_session.sagemaker_client
recommendation_res = sage_client.describe_inference_recommendations_job(
JobName=recommendation_job_name
)
input_config = recommendation_res["InputConfig"]

recommendation = next(
(
rec
for rec in recommendation_res["InferenceRecommendations"]
if rec["RecommendationId"] == inference_recommendation_id
),
None,
)
# Retrieve model or inference recommendation job details
recommendation_res, model_res = None, None
try:
recommendation_res = sage_client.describe_inference_recommendations_job(
JobName=recommendation_job_name
)
except sage_client.exceptions.ResourceNotFound:
pass
try:
model_res = sage_client.describe_model(ModelName=recommendation_job_name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we don't need to make this call if describe_inference_recommendations_job works right?

May be we can create three private methods to modularize the code since this method is getting big.

def _get_model_recommendation_id(self):
   # get recommendation id from model

def _get_right_size_recommendation_id(self):
  # get recommendation id from right size job

def _get_recommendation_id(self):
  try:
    _get_right_size_recommendation_id(self)
  except:
    _get_model_recommendation_id(self)

def _update_params_for_recommendation_id(...):
  try:
    recommendation_id = _get_recommendation_id(self)
  except:
    raise ValueError("Inference Recommendation id is not valid")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. as we discussed before, what if customer's JobName and ModelName are the same, eg thanos/abcd1234 and thanos/abcd5678, then we need to find out it's inference or model recommendation. So go through two lists(inference_recommendations and model_recommendations) to get the recommendation is needed. So here I am thinking we need to loop through two lists anyway
  2. good point, will create separate methods

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should only call describemodel if the recommendation id is not in the right_size recommendations.

Otherwise it is a wasted api call

except sage_client.exceptions.ResourceNotFound:
pass
if recommendation_res is None and model_res is None:
raise ValueError("Inference Recommendation id is not valid")

if not recommendation:
raise ValueError(
"inference_recommendation_id does not exist in InferenceRecommendations list"
# Search the recommendation from above describe result lists
inference_recommendation, instant_recommendation = None, None
if recommendation_res:
inference_recommendation = next(
(
rec
for rec in recommendation_res["InferenceRecommendations"]
if rec["RecommendationId"] == inference_recommendation_id
),
None,
)
if model_res:
instant_recommendation = next(
(
rec
for rec in model_res["DeploymentRecommendation"][
"RealTimeInferenceRecommendations"
]
if rec["RecommendationId"] == inference_recommendation_id
),
None,
)
if inference_recommendation is None and instant_recommendation is None:
raise ValueError("Inference Recommendation id does not exist")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

id is invalid

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ack, will modify


# Update params beased on instant recommendation
if instant_recommendation:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could we not call it "instant"?

maybe use the names "right_size_recommendation" and "model_recommendation"?

Because that indicates the source of the recommendation

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

makes sense, will rename them

if initial_instance_count is None:
raise ValueError(
"Please specify initial_instance_count with instant recommendation id"
)
self.env.update(instant_recommendation["Environment"])
instance_type = instant_recommendation["InstanceType"]
return (instance_type, initial_instance_count)

model_config = recommendation["ModelConfiguration"]
# Update params based on default inference recommendation
if bool(instance_type) != bool(initial_instance_count):
raise ValueError(
"Please either do not specify instance_type and initial_instance_count"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

similar to this

instance type and intial instance count are mutually exclusive with recommendation id

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

modified in next rev

"since they are in recommendation, or specify both of them if you want"
"to override the recommendation."
)
input_config = recommendation_res["InputConfig"]
model_config = inference_recommendation["ModelConfiguration"]
envs = (
model_config["EnvironmentParameters"]
if "EnvironmentParameters" in model_config
Expand Down Expand Up @@ -414,8 +448,10 @@ def _update_params_for_recommendation_id(
self.model_data = compilation_res["ModelArtifacts"]["S3ModelArtifacts"]
self.image_uri = compilation_res["InferenceImage"]

instance_type = recommendation["EndpointConfiguration"]["InstanceType"]
initial_instance_count = recommendation["EndpointConfiguration"]["InitialInstanceCount"]
instance_type = inference_recommendation["EndpointConfiguration"]["InstanceType"]
initial_instance_count = inference_recommendation["EndpointConfiguration"][
"InitialInstanceCount"
]

return (instance_type, initial_instance_count)

Expand Down
18 changes: 18 additions & 0 deletions tests/unit/sagemaker/inference_recommender/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@

INVALID_RECOMMENDATION_ID = "ir-job6ab0ff22"
NOT_EXISTED_RECOMMENDATION_ID = IR_JOB_NAME + "/ad3ec9ee"
NOT_EXISTED_INSTANT_RECOMMENDATION_ID = IR_MODEL_NAME + "/ad3ec9ee"
RECOMMENDATION_ID = IR_JOB_NAME + "/5bcee92e"
INSTANT_RECOMMENDATION_ID = IR_MODEL_NAME + "/v0KObO5d"
INSTANT_RECOMMENDATION_ENV = {"TS_DEFAULT_WORKERS_PER_MODEL": "4"}

IR_CONTAINER_CONFIG = {
"Domain": "MACHINE_LEARNING",
Expand Down Expand Up @@ -95,6 +98,21 @@
"Image": IR_IMAGE,
"ModelDataUrl": IR_MODEL_DATA,
},
"DeploymentRecommendation": {
"RecommendationStatus": "COMPLETED",
"RealTimeInferenceRecommendations": [
{
"RecommendationId": INSTANT_RECOMMENDATION_ID,
"InstanceType": "ml.g4dn.2xlarge",
"Environment": INSTANT_RECOMMENDATION_ENV,
},
{
"RecommendationId": "test-model-name/d248qVYU",
"InstanceType": "ml.c6i.large",
"Environment": {},
},
],
},
}

DESCRIBE_MODEL_PACKAGE_RESPONSE = {
Expand Down
57 changes: 50 additions & 7 deletions tests/unit/sagemaker/model/test_deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
DESCRIBE_COMPILATION_JOB_RESPONSE,
DESCRIBE_MODEL_PACKAGE_RESPONSE,
DESCRIBE_MODEL_RESPONSE,
INSTANT_RECOMMENDATION_ENV,
INSTANT_RECOMMENDATION_ID,
INVALID_RECOMMENDATION_ID,
IR_COMPILATION_JOB_NAME,
IR_ENV,
Expand All @@ -34,6 +36,7 @@
IR_MODEL_PACKAGE_VERSION_ARN,
IR_COMPILATION_IMAGE,
IR_COMPILATION_MODEL_DATA,
NOT_EXISTED_INSTANT_RECOMMENDATION_ID,
RECOMMENDATION_ID,
NOT_EXISTED_RECOMMENDATION_ID,
)
Expand Down Expand Up @@ -470,6 +473,11 @@ def test_deploy_wrong_async_inferenc_config(sagemaker_session):


def test_deploy_ir_with_incompatible_parameters(sagemaker_session):
sagemaker_session.sagemaker_client.describe_inference_recommendations_job.return_value = (
create_inference_recommendations_job_default_with_model_package_arn()
)
sagemaker_session.sagemaker_client.describe_model.return_value = None

model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session, role=ROLE)

with pytest.raises(
Expand All @@ -480,7 +488,7 @@ def test_deploy_ir_with_incompatible_parameters(sagemaker_session):
):
model.deploy(
instance_type=INSTANCE_TYPE,
inference_recommendation_id=INFERENCE_RECOMMENDATION_ID,
inference_recommendation_id=RECOMMENDATION_ID,
)

with pytest.raises(
Expand All @@ -491,7 +499,7 @@ def test_deploy_ir_with_incompatible_parameters(sagemaker_session):
):
model.deploy(
initial_instance_count=INSTANCE_COUNT,
inference_recommendation_id=INFERENCE_RECOMMENDATION_ID,
inference_recommendation_id=RECOMMENDATION_ID,
)

with pytest.raises(
Expand Down Expand Up @@ -542,6 +550,7 @@ def test_deploy_with_recommendation_id_with_model_pkg_arn(sagemaker_session):
sagemaker_session.sagemaker_client.describe_model_package.side_effect = (
mock_describe_model_package
)
sagemaker_session.sagemaker_client.describe_model.return_value = None

model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session, role=ROLE)

Expand All @@ -554,11 +563,12 @@ def test_deploy_with_recommendation_id_with_model_pkg_arn(sagemaker_session):
assert model.env == IR_ENV


def test_deploy_with_recommendation_id_with_model_name(sagemaker_session):
def mock_describe_model(ModelName):
if ModelName == IR_MODEL_NAME:
return DESCRIBE_MODEL_RESPONSE
def mock_describe_model(ModelName):
if ModelName == IR_MODEL_NAME:
return DESCRIBE_MODEL_RESPONSE


def test_deploy_with_recommendation_id_with_model_name(sagemaker_session):
sagemaker_session.sagemaker_client.describe_inference_recommendations_job.return_value = (
create_inference_recommendations_job_default_with_model_name()
)
Expand All @@ -582,6 +592,7 @@ def test_deploy_with_recommendation_id_with_model_pkg_arn_and_compilation(sagema
sagemaker_session.sagemaker_client.describe_model_package.side_effect = (
mock_describe_model_package
)
sagemaker_session.sagemaker_client.describe_model.return_value = None

model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session, role=ROLE)

Expand All @@ -604,6 +615,7 @@ def mock_describe_compilation_job(CompilationJobName):
sagemaker_session.sagemaker_client.describe_compilation_job.side_effect = (
mock_describe_compilation_job
)
sagemaker_session.sagemaker_client.describe_model.side_effect = mock_describe_model

model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session, role=ROLE)

Expand All @@ -622,18 +634,49 @@ def test_deploy_with_not_existed_recommendation_id(sagemaker_session):
sagemaker_session.sagemaker_client.describe_compilation_job.return_value = (
DESCRIBE_COMPILATION_JOB_RESPONSE
)
sagemaker_session.sagemaker_client.describe_model.return_value = None

model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session, role=ROLE)

with pytest.raises(
ValueError,
match="inference_recommendation_id does not exist in InferenceRecommendations list",
match="Inference Recommendation id does not exist",
):
model.deploy(
inference_recommendation_id=NOT_EXISTED_RECOMMENDATION_ID,
)


def test_deploy_with_invalid_instant_recommendation_id(sagemaker_session):
sagemaker_session.sagemaker_client.describe_inference_recommendations_job.return_value = None
sagemaker_session.sagemaker_client.describe_model.side_effect = mock_describe_model

model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session, role=ROLE)

with pytest.raises(
ValueError,
match="Inference Recommendation id does not exist",
):
model.deploy(
inference_recommendation_id=NOT_EXISTED_INSTANT_RECOMMENDATION_ID,
)


def test_deploy_with_valid_instant_recommendation_id(sagemaker_session):
sagemaker_session.sagemaker_client.describe_inference_recommendations_job.return_value = None
sagemaker_session.sagemaker_client.describe_model.side_effect = mock_describe_model

model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session, role=ROLE)
model.deploy(
inference_recommendation_id=INSTANT_RECOMMENDATION_ID,
initial_instance_count=INSTANCE_COUNT,
)

assert model.model_data == MODEL_DATA
assert model.image_uri == MODEL_IMAGE
assert model.env == INSTANT_RECOMMENDATION_ENV


@patch("sagemaker.model.Model._create_sagemaker_model", Mock())
@patch("sagemaker.predictor.Predictor._get_endpoint_config_name", Mock())
@patch("sagemaker.predictor.Predictor._get_model_names", Mock())
Expand Down