Skip to content

Commit cdd4f05

Browse files
jinpengqiJinpeng Qi
andcommitted
feature: inference recommendation id deployment integ test (aws#806)
* Add integ test and refactor rec id deploy * Revert ir id hardcode change * Fix framework parse --------- Co-authored-by: Jinpeng Qi <[email protected]>
1 parent 8372c5e commit cdd4f05

File tree

4 files changed

+83
-51
lines changed

4 files changed

+83
-51
lines changed

src/sagemaker/inference_recommender/inference_recommender_mixin.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -100,13 +100,15 @@ def right_size(
100100
'OMP_NUM_THREADS': CategoricalParameter(['1', '2', '3', '4'])
101101
}]
102102
103-
phases (list[Phase]): Specifies the criteria for increasing load
104-
during endpoint load tests. (default: None).
105-
traffic_type (str): Specifies the traffic type that matches the phases. (default: None).
106-
max_invocations (str): defines invocation limit for endpoint load tests (default: None).
107-
model_latency_thresholds (list[ModelLatencyThreshold]): defines the response latency
108-
thresholds for endpoint load tests (default: None).
109-
max_tests (int): restricts how many endpoints are allowed to be
103+
phases (list[Phase]): Shape of the traffic pattern to use in the load test
104+
(default: None).
105+
traffic_type (str): Specifies the traffic pattern type. Currently only supports
106+
one type 'PHASES' (default: None).
107+
max_invocations (str): defines the minimum invocations per minute for the endpoint
108+
to support (default: None).
109+
model_latency_thresholds (list[ModelLatencyThreshold]): defines the maximum response
110+
latency for endpoints to support (default: None).
111+
max_tests (int): restricts how many endpoints in total are allowed to be
110112
spun up for this job (default: None).
111113
max_parallel_tests (int): restricts how many concurrent endpoints
112114
this job is allowed to spin up (default: None).
@@ -121,7 +123,7 @@ def right_size(
121123
raise ValueError("right_size() is currently only supported with a registered model")
122124

123125
if not framework and self._framework():
124-
framework = INFERENCE_RECOMMENDER_FRAMEWORK_MAPPING.get(self._framework, framework)
126+
framework = INFERENCE_RECOMMENDER_FRAMEWORK_MAPPING.get(self._framework(), framework)
125127

126128
framework_version = self._get_framework_version()
127129

@@ -177,15 +179,16 @@ def right_size(
177179

178180
def _update_params(
179181
self,
180-
instance_type,
181-
initial_instance_count,
182-
accelerator_type,
183-
async_inference_config,
184-
serverless_inference_config,
185-
inference_recommendation_id,
186-
inference_recommender_job_results,
182+
**kwargs,
187183
):
188184
"""Check and update params based on inference recommendation id or right size case"""
185+
instance_type = kwargs["instance_type"]
186+
initial_instance_count = kwargs["initial_instance_count"]
187+
accelerator_type = kwargs["accelerator_type"]
188+
async_inference_config = kwargs["async_inference_config"]
189+
serverless_inference_config = kwargs["serverless_inference_config"]
190+
inference_recommendation_id = kwargs["inference_recommendation_id"]
191+
inference_recommender_job_results = kwargs["inference_recommender_job_results"]
189192
if inference_recommendation_id is not None:
190193
inference_recommendation = self._update_params_for_recommendation_id(
191194
instance_type=instance_type,
@@ -394,7 +397,6 @@ def _update_params_for_recommendation_id(
394397
self.model_data = model_res["PrimaryContainer"]["ModelDataUrl"]
395398
self.image_uri = model_res["PrimaryContainer"]["Image"]
396399
else:
397-
# Update params with compilation recommendation results
398400
if "InferenceSpecificationName" in model_config:
399401
modelpkg_res = sage_client.describe_model_package(
400402
ModelPackageName=input_config["ModelPackageVersionArn"]

src/sagemaker/model.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1129,6 +1129,15 @@ def deploy(
11291129
"""
11301130
removed_kwargs("update_endpoint", kwargs)
11311131

1132+
self._init_sagemaker_session_if_does_not_exist(instance_type)
1133+
1134+
tags = add_jumpstart_tags(
1135+
tags=tags, inference_model_uri=self.model_data, inference_script_uri=self.source_dir
1136+
)
1137+
1138+
if self.role is None:
1139+
raise ValueError("Role can not be null for deploying a model")
1140+
11321141
if (
11331142
inference_recommendation_id is not None
11341143
or self.inference_recommender_job_results is not None
@@ -1143,15 +1152,6 @@ def deploy(
11431152
inference_recommender_job_results=self.inference_recommender_job_results,
11441153
)
11451154

1146-
self._init_sagemaker_session_if_does_not_exist(instance_type)
1147-
1148-
tags = add_jumpstart_tags(
1149-
tags=tags, inference_model_uri=self.model_data, inference_script_uri=self.source_dir
1150-
)
1151-
1152-
if self.role is None:
1153-
raise ValueError("Role can not be null for deploying a model")
1154-
11551155
is_async = async_inference_config is not None
11561156
if is_async and not isinstance(async_inference_config, AsyncInferenceConfig):
11571157
raise ValueError("async_inference_config needs to be a AsyncInferenceConfig object")

tests/integ/test_inference_recommender.py

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def default_right_sized_model(sagemaker_session, cpu_instance_type):
4343
with timeout(minutes=45):
4444
try:
4545
model_package_group_name = unique_name_from_base("test-ir-right-size-model-pkg-sklearn")
46+
ir_job_name = unique_name_from_base("test-ir-right-size-job-name")
4647
model_data = sagemaker_session.upload_data(path=IR_SKLEARN_MODEL)
4748
payload_data = sagemaker_session.upload_data(path=IR_SKLEARN_PAYLOAD)
4849

@@ -66,13 +67,15 @@ def default_right_sized_model(sagemaker_session, cpu_instance_type):
6667

6768
return (
6869
sklearn_model_package.right_size(
70+
job_name=ir_job_name,
6971
sample_payload_url=payload_data,
7072
supported_content_types=IR_SKLEARN_CONTENT_TYPE,
7173
supported_instance_types=[cpu_instance_type],
7274
framework=IR_SKLEARN_FRAMEWORK,
7375
log_level="Quiet",
7476
),
7577
model_package_group_name,
78+
ir_job_name,
7679
)
7780
except Exception:
7881
sagemaker_session.sagemaker_client.delete_model_package(
@@ -157,7 +160,7 @@ def test_default_right_size_and_deploy_registered_model_sklearn(
157160
):
158161
endpoint_name = unique_name_from_base("test-ir-right-size-default-sklearn")
159162

160-
right_size_model_package, model_package_group_name = default_right_sized_model
163+
right_size_model_package, model_package_group_name, ir_job_name = default_right_sized_model
161164
with timeout(minutes=45):
162165
try:
163166
right_size_model_package.predictor_cls = SKLearnPredictor
@@ -169,12 +172,6 @@ def test_default_right_size_and_deploy_registered_model_sklearn(
169172
assert inference is not None
170173
assert 26 == len(inference)
171174
finally:
172-
sagemaker_session.sagemaker_client.delete_model_package(
173-
ModelPackageName=right_size_model_package.model_package_arn
174-
)
175-
sagemaker_session.sagemaker_client.delete_model_package_group(
176-
ModelPackageGroupName=model_package_group_name
177-
)
178175
predictor.delete_model()
179176
predictor.delete_endpoint()
180177

@@ -209,3 +206,35 @@ def test_advanced_right_size_and_deploy_registered_model_sklearn(
209206

210207
# TODO when we've added support for inference_recommendation_id
211208
# then add tests to test Framework models
209+
@pytest.mark.slow_test
210+
def test_deploy_inference_recommendation_id_with_registered_model_sklearn(
211+
default_right_sized_model, sagemaker_session
212+
):
213+
right_size_model_package, model_package_group_name, ir_job_name = default_right_sized_model
214+
endpoint_name = unique_name_from_base("test-rec-id-deployment-default-sklearn")
215+
rec_res = sagemaker_session.sagemaker_client.describe_inference_recommendations_job(
216+
JobName=ir_job_name
217+
)
218+
rec_id = rec_res["InferenceRecommendations"][0]["RecommendationId"]
219+
220+
with timeout(minutes=45):
221+
try:
222+
right_size_model_package.predictor_cls = SKLearnPredictor
223+
predictor = right_size_model_package.deploy(
224+
inference_recommendation_id=rec_id, endpoint_name=endpoint_name
225+
)
226+
227+
payload = pd.read_csv(IR_SKLEARN_DATA, header=None)
228+
229+
inference = predictor.predict(payload)
230+
assert inference is not None
231+
assert 26 == len(inference)
232+
finally:
233+
sagemaker_session.sagemaker_client.delete_model_package(
234+
ModelPackageName=right_size_model_package.model_package_arn
235+
)
236+
sagemaker_session.sagemaker_client.delete_model_package_group(
237+
ModelPackageGroupName=model_package_group_name
238+
)
239+
predictor.delete_model()
240+
predictor.delete_endpoint()

tests/unit/sagemaker/model/test_deploy.py

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -530,12 +530,17 @@ def test_deploy_with_wrong_recommendation_id(sagemaker_session):
530530
)
531531

532532

533+
def mock_describe_model_package(ModelPackageName):
534+
if ModelPackageName == IR_MODEL_PACKAGE_VERSION_ARN:
535+
return DESCRIBE_MODEL_PACKAGE_RESPONSE
536+
537+
533538
def test_deploy_with_recommendation_id_with_model_pkg_arn(sagemaker_session):
534539
sagemaker_session.sagemaker_client.describe_inference_recommendations_job.return_value = (
535540
create_inference_recommendations_job_default_with_model_package_arn()
536541
)
537-
sagemaker_session.sagemaker_client.describe_model_package.return_value = (
538-
DESCRIBE_MODEL_PACKAGE_RESPONSE
542+
sagemaker_session.sagemaker_client.describe_model_package.side_effect = (
543+
mock_describe_model_package
539544
)
540545

541546
model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session, role=ROLE)
@@ -544,29 +549,27 @@ def test_deploy_with_recommendation_id_with_model_pkg_arn(sagemaker_session):
544549
inference_recommendation_id=RECOMMENDATION_ID,
545550
)
546551

547-
sagemaker_session.sagemaker_client.describe_model_package.assert_called_once_with(
548-
ModelPackageName=IR_MODEL_PACKAGE_VERSION_ARN
549-
)
550552
assert model.model_data == IR_MODEL_DATA
551553
assert model.image_uri == IR_IMAGE
552554
assert model.env == IR_ENV
553555

554556

555557
def test_deploy_with_recommendation_id_with_model_name(sagemaker_session):
558+
def mock_describe_model(ModelName):
559+
if ModelName == IR_MODEL_NAME:
560+
return DESCRIBE_MODEL_RESPONSE
561+
556562
sagemaker_session.sagemaker_client.describe_inference_recommendations_job.return_value = (
557563
create_inference_recommendations_job_default_with_model_name()
558564
)
559-
sagemaker_session.sagemaker_client.describe_model.return_value = DESCRIBE_MODEL_RESPONSE
565+
sagemaker_session.sagemaker_client.describe_model.side_effect = mock_describe_model
560566

561567
model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session, role=ROLE)
562568

563569
model.deploy(
564570
inference_recommendation_id=RECOMMENDATION_ID,
565571
)
566572

567-
sagemaker_session.sagemaker_client.describe_model.assert_called_once_with(
568-
ModelName=IR_MODEL_NAME
569-
)
570573
assert model.model_data == IR_MODEL_DATA
571574
assert model.image_uri == IR_IMAGE
572575
assert model.env == IR_ENV
@@ -576,8 +579,8 @@ def test_deploy_with_recommendation_id_with_model_pkg_arn_and_compilation(sagema
576579
sagemaker_session.sagemaker_client.describe_inference_recommendations_job.return_value = (
577580
create_inference_recommendations_job_default_with_model_package_arn_and_compilation()
578581
)
579-
sagemaker_session.sagemaker_client.describe_model_package.return_value = (
580-
DESCRIBE_MODEL_PACKAGE_RESPONSE
582+
sagemaker_session.sagemaker_client.describe_model_package.side_effect = (
583+
mock_describe_model_package
581584
)
582585

583586
model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session, role=ROLE)
@@ -586,19 +589,20 @@ def test_deploy_with_recommendation_id_with_model_pkg_arn_and_compilation(sagema
586589
inference_recommendation_id=RECOMMENDATION_ID,
587590
)
588591

589-
sagemaker_session.sagemaker_client.describe_model_package.assert_called_once_with(
590-
ModelPackageName=IR_MODEL_PACKAGE_VERSION_ARN
591-
)
592592
assert model.model_data == IR_COMPILATION_MODEL_DATA
593593
assert model.image_uri == IR_COMPILATION_IMAGE
594594

595595

596596
def test_deploy_with_recommendation_id_with_model_name_and_compilation(sagemaker_session):
597+
def mock_describe_compilation_job(CompilationJobName):
598+
if CompilationJobName == IR_COMPILATION_JOB_NAME:
599+
return DESCRIBE_COMPILATION_JOB_RESPONSE
600+
597601
sagemaker_session.sagemaker_client.describe_inference_recommendations_job.return_value = (
598602
create_inference_recommendations_job_default_with_model_name_and_compilation()
599603
)
600-
sagemaker_session.sagemaker_client.describe_compilation_job.return_value = (
601-
DESCRIBE_COMPILATION_JOB_RESPONSE
604+
sagemaker_session.sagemaker_client.describe_compilation_job.side_effect = (
605+
mock_describe_compilation_job
602606
)
603607

604608
model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session, role=ROLE)
@@ -607,9 +611,6 @@ def test_deploy_with_recommendation_id_with_model_name_and_compilation(sagemaker
607611
inference_recommendation_id=RECOMMENDATION_ID,
608612
)
609613

610-
sagemaker_session.sagemaker_client.describe_compilation_job.assert_called_once_with(
611-
CompilationJobName=IR_COMPILATION_JOB_NAME
612-
)
613614
assert model.model_data == IR_COMPILATION_MODEL_DATA
614615
assert model.image_uri == IR_COMPILATION_IMAGE
615616

0 commit comments

Comments
 (0)