Skip to content

Commit 3393e02

Browse files
Zhenshan-JinZhenshan Jin
authored andcommitted
change: add integration tests for Model Card (aws#722)
Co-authored-by: Zhenshan Jin <[email protected]>
1 parent 3045819 commit 3393e02

File tree

3 files changed

+258
-0
lines changed

3 files changed

+258
-0
lines changed

tests/integ/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
TUNING_DEFAULT_TIMEOUT_MINUTES = 40
2323
TRANSFORM_DEFAULT_TIMEOUT_MINUTES = 40
2424
AUTO_ML_DEFAULT_TIMEMOUT_MINUTES = 60
25+
MODEL_CARD_DEFAULT_TIMEOUT_MINUTES = 10
2526

2627
# these regions have some p2 and p3 instances, but not enough for continuous testing
2728
HOSTING_NO_P2_REGIONS = [

tests/integ/test_model_card.py

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
from __future__ import absolute_import
14+
15+
import os
16+
import io
17+
import numpy as np
18+
import pytest
19+
from six.moves.urllib.parse import urlparse
20+
import sagemaker
21+
from sagemaker.session import Session
22+
import sagemaker.amazon.common as smac
23+
from sagemaker.image_uris import retrieve
24+
from sagemaker.utils import unique_name_from_base
25+
from sagemaker.model_card import (
26+
ModelCard,
27+
ModelOverview,
28+
TrainingDetails,
29+
ModelCardStatusEnum,
30+
)
31+
from tests.integ import (
32+
MODEL_CARD_DEFAULT_TIMEOUT_MINUTES,
33+
)
34+
from tests.integ.timeout import timeout, timeout_and_delete_model_by_name
35+
36+
37+
ROLE = "SageMakerRole"
38+
39+
40+
@pytest.fixture(scope="module", name="binary_classifier")
41+
def binary_classifier_fixture(
42+
sagemaker_session: Session,
43+
cpu_instance_type: str,
44+
):
45+
"""Manage the model required for the model card integration test.
46+
47+
Args:
48+
sagemaker_session (Session): A SageMaker Session
49+
object, used for SageMaker interactions.
50+
cpu_instance_type (_type_): Instance type used for training model
51+
and deploy endpoint.
52+
"""
53+
model_name = unique_name_from_base("integ-test-binary-classifier-endpoint")
54+
with timeout_and_delete_model_by_name(
55+
model_name=model_name,
56+
sagemaker_session=sagemaker_session,
57+
minutes=MODEL_CARD_DEFAULT_TIMEOUT_MINUTES,
58+
):
59+
# upload data
60+
raw_data = (
61+
(0.5, 0),
62+
(0.75, 0),
63+
(1.0, 0),
64+
(1.25, 0),
65+
(1.50, 0),
66+
(1.75, 0),
67+
(2.0, 0),
68+
(2.25, 1),
69+
(2.5, 0),
70+
(2.75, 1),
71+
(3.0, 0),
72+
(3.25, 1),
73+
(3.5, 0),
74+
(4.0, 1),
75+
(4.25, 1),
76+
(4.5, 1),
77+
(4.75, 1),
78+
(5.0, 1),
79+
(5.5, 1),
80+
)
81+
training_data = np.array(raw_data).astype("float32")
82+
labels = training_data[:, 1]
83+
84+
bucket = sagemaker_session.default_bucket()
85+
prefix = "integ-test-data/model-card/binary-classifier"
86+
87+
buf = io.BytesIO()
88+
smac.write_numpy_to_dense_tensor(buf, training_data, labels)
89+
buf.seek(0)
90+
91+
sagemaker_session.boto_session.resource(
92+
"s3", region_name=sagemaker_session.boto_region_name
93+
).Bucket(bucket).Object(os.path.join(prefix, "train")).upload_fileobj(buf)
94+
95+
# train model
96+
s3_train_data = f"s3://{bucket}/{prefix}/train"
97+
output_location = f"s3://{bucket}/{prefix}/output"
98+
container = retrieve("linear-learner", sagemaker_session.boto_session.region_name)
99+
estimator = sagemaker.estimator.Estimator(
100+
container,
101+
role=ROLE,
102+
instance_count=1,
103+
instance_type=cpu_instance_type,
104+
output_path=output_location,
105+
sagemaker_session=sagemaker_session,
106+
)
107+
estimator.set_hyperparameters(
108+
feature_dim=2, mini_batch_size=10, predictor_type="binary_classifier"
109+
)
110+
estimator.fit({"train": s3_train_data})
111+
112+
model = estimator.create_model(name=model_name)
113+
container_def = model.prepare_container_def()
114+
sagemaker_session.create_model(model_name, ROLE, container_def)
115+
116+
# Yield to run the integration tests
117+
yield model_name, estimator.latest_training_job.name
118+
119+
# Cleanup resources
120+
sagemaker_session.delete_model(model_name)
121+
122+
# Validate resource cleanup
123+
with pytest.raises(Exception) as exception:
124+
sagemaker_session.sagemaker_client.describe_model(ModelName=model_name)
125+
assert "Could not find model" in str(exception.value)
126+
127+
128+
def test_model_card_create_read_update_and_delete(
129+
sagemaker_session,
130+
binary_classifier,
131+
):
132+
model_name, training_job_name = binary_classifier
133+
134+
with timeout(minutes=MODEL_CARD_DEFAULT_TIMEOUT_MINUTES):
135+
model_card_name = unique_name_from_base("model-card")
136+
137+
model_overview = ModelOverview.from_model_name(
138+
model_name=model_name,
139+
sagemaker_session=sagemaker_session,
140+
)
141+
assert model_overview.model_id
142+
143+
training_details1 = TrainingDetails.from_training_job_name(
144+
training_job_name=training_job_name,
145+
sagemaker_session=sagemaker_session,
146+
)
147+
assert training_details1.training_job_details.training_arn
148+
training_details2 = TrainingDetails.from_model_overview(
149+
model_overview=model_overview,
150+
sagemaker_session=sagemaker_session,
151+
)
152+
assert (
153+
training_details1.training_job_details.training_arn
154+
== training_details2.training_job_details.training_arn
155+
)
156+
assert (
157+
training_details1.training_job_details.training_environment.container_image[0]
158+
== training_details2.training_job_details.training_environment.container_image[0]
159+
)
160+
assert len(training_details1.training_job_details.training_metrics) == len(
161+
training_details2.training_job_details.training_metrics
162+
)
163+
164+
card = ModelCard(
165+
name=model_card_name,
166+
status=ModelCardStatusEnum.DRAFT,
167+
model_overview=model_overview,
168+
training_details=training_details1,
169+
sagemaker_session=sagemaker_session,
170+
)
171+
card.create()
172+
assert card.arn
173+
174+
new_model_description = "the model card is updated."
175+
card.model_overview.model_description = new_model_description
176+
card.update()
177+
assert len(card.get_version_history()) == 2
178+
179+
card_copy = ModelCard.load(
180+
name=model_card_name,
181+
sagemaker_session=sagemaker_session,
182+
)
183+
assert card_copy.arn == card.arn
184+
assert card_copy.model_overview.model_description == new_model_description
185+
186+
# export job
187+
bucket = sagemaker_session.default_bucket()
188+
prefix = "integ-test-data/model-card"
189+
s3_output_path = f"s3://{bucket}/{prefix}/export"
190+
pdf_s3_url = card.export_pdf(
191+
export_job_name=f"export-{model_card_name}", s3_output_path=s3_output_path
192+
)
193+
parsed_url = urlparse(pdf_s3_url)
194+
pdf_bucket = parsed_url.netloc
195+
pdf_key = parsed_url.path.lstrip("/")
196+
region = sagemaker_session.boto_region_name
197+
s3 = sagemaker_session.boto_session.client("s3", region_name=region)
198+
assert s3.list_objects_v2(Bucket=pdf_bucket, Prefix=pdf_key)["KeyCount"] == 1
199+
200+
# list export jobs
201+
assert len(card.list_export_jobs()["ModelCardExportJobSummaries"]) == 1
202+
203+
# clean resources
204+
s3.delete_object(Bucket=pdf_bucket, Key=pdf_key)
205+
card.delete()
206+
207+
# Validate resource cleanup
208+
with pytest.raises(Exception) as exception:
209+
sagemaker_session.sagemaker_client.describe_model_card(ModelCardName=model_card_name)
210+
assert "does not exist" in str(exception.value)
211+
212+
s3.get_object(Bucket=pdf_bucket, Key=pdf_key)
213+
assert "The specified key does not exist" in str(exception.value)

tests/integ/timeout.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,50 @@ def timeout_and_delete_model_with_transformer(
123123
sleep(sleep_between_cleanup_attempts)
124124

125125

126+
@contextmanager
127+
def timeout_and_delete_model_by_name(
128+
model_name,
129+
sagemaker_session,
130+
seconds=0,
131+
minutes=45,
132+
hours=0,
133+
sleep_between_cleanup_attempts=10,
134+
exponential_sleep=False,
135+
):
136+
limit = seconds + 60 * minutes + 3600 * hours
137+
138+
with stopit.ThreadingTimeout(limit, swallow_exc=False) as t:
139+
no_errors = False
140+
try:
141+
yield [t]
142+
no_errors = True
143+
finally:
144+
attempts = 3
145+
146+
while attempts > 0:
147+
attempts -= 1
148+
try:
149+
sagemaker_session.delete_model(model_name)
150+
LOGGER.info("deleted model {}".format(model_name))
151+
152+
_show_logs(model_name, "Models", sagemaker_session)
153+
if no_errors:
154+
_cleanup_logs(model_name, "Models", sagemaker_session)
155+
break
156+
except ClientError as ce:
157+
if ce.response["Error"]["Code"] == "ValidationException":
158+
# avoids the inner exception to be overwritten
159+
pass
160+
# trying to delete the resource again in 10 seconds
161+
if exponential_sleep:
162+
_sleep_between_cleanup_attempts = sleep_between_cleanup_attempts * (
163+
3 - attempts
164+
)
165+
else:
166+
_sleep_between_cleanup_attempts = sleep_between_cleanup_attempts
167+
sleep(_sleep_between_cleanup_attempts)
168+
169+
126170
def _delete_schedules_associated_with_endpoint(sagemaker_session, endpoint_name):
127171
"""Deletes schedules associated with a given endpoint. Per latest validation, ensures the
128172
schedule is stopped and no executions are running, before deleting (otherwise latest

0 commit comments

Comments
 (0)