Skip to content

Commit fa8c9c1

Browse files
knakadlaurenyu
authored andcommitted
change: modify schedule cleanup to abide by latest validations (#1154)
This commit also stops monitoring schedules as part of the tests as soon as the schedules no longer need to provide jobs and prevents ModelMonitoring tests from running in me-south-1.
1 parent a3a927b commit fa8c9c1

File tree

3 files changed

+134
-7
lines changed

3 files changed

+134
-7
lines changed

tests/integ/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
NO_LDA_REGIONS = ["eu-west-3", "eu-north-1", "sa-east-1", "ap-east-1", "me-south-1"]
7878
NO_MARKET_PLACE_REGIONS = ["eu-west-3", "eu-north-1", "sa-east-1", "ap-east-1", "me-south-1"]
7979
NO_AUTO_ML_REGIONS = ["sa-east-1", "me-south-1", "ap-east-1", "eu-west-3"]
80+
NO_MODEL_MONITORING_REGIONS = ["me-south-1"]
8081

8182
EFS_TEST_ENABLED_REGION = []
8283

tests/integ/test_model_monitor.py

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,10 @@ def updated_output_kms_key(sagemaker_session):
266266
)
267267

268268

269+
@pytest.mark.skipif(
270+
tests.integ.test_region() in tests.integ.NO_MODEL_MONITORING_REGIONS,
271+
reason="ModelMonitoring is not yet supported in this region.",
272+
)
269273
def test_default_monitor_suggest_baseline_and_create_monitoring_schedule_with_customizations(
270274
sagemaker_session, output_kms_key, volume_kms_key, predictor
271275
):
@@ -464,6 +468,10 @@ def test_default_monitor_suggest_baseline_and_create_monitoring_schedule_with_cu
464468
assert len(summary["MonitoringScheduleSummaries"]) > 0
465469

466470

471+
@pytest.mark.skipif(
472+
tests.integ.test_region() in tests.integ.NO_MODEL_MONITORING_REGIONS,
473+
reason="ModelMonitoring is not yet supported in this region.",
474+
)
467475
def test_default_monitor_suggest_baseline_and_create_monitoring_schedule_without_customizations(
468476
sagemaker_session, predictor
469477
):
@@ -637,6 +645,10 @@ def test_default_monitor_suggest_baseline_and_create_monitoring_schedule_without
637645
assert len(summary["MonitoringScheduleSummaries"]) > 0
638646

639647

648+
@pytest.mark.skipif(
649+
tests.integ.test_region() in tests.integ.NO_MODEL_MONITORING_REGIONS,
650+
reason="ModelMonitoring is not yet supported in this region.",
651+
)
640652
def test_default_monitor_create_stop_and_start_monitoring_schedule_with_customizations(
641653
sagemaker_session, output_kms_key, volume_kms_key, predictor
642654
):
@@ -792,7 +804,15 @@ def test_default_monitor_create_stop_and_start_monitoring_schedule_with_customiz
792804
started_schedule_description = my_default_monitor.describe_schedule()
793805
assert started_schedule_description["MonitoringScheduleStatus"] == "Scheduled"
794806

807+
my_default_monitor.stop_monitoring_schedule()
808+
809+
_wait_for_schedule_changes_to_apply(monitor=my_default_monitor)
795810

811+
812+
@pytest.mark.skipif(
813+
tests.integ.test_region() in tests.integ.NO_MODEL_MONITORING_REGIONS,
814+
reason="ModelMonitoring is not yet supported in this region.",
815+
)
796816
def test_default_monitor_create_and_update_schedule_config_with_customizations(
797817
sagemaker_session,
798818
predictor,
@@ -1061,9 +1081,20 @@ def test_default_monitor_create_and_update_schedule_config_with_customizations(
10611081
]["EnableNetworkIsolation"]
10621082
== UPDATED_NETWORK_CONFIG.enable_network_isolation
10631083
)
1084+
1085+
_wait_for_schedule_changes_to_apply(monitor=my_default_monitor)
1086+
1087+
my_default_monitor.stop_monitoring_schedule()
1088+
1089+
_wait_for_schedule_changes_to_apply(monitor=my_default_monitor)
1090+
10641091
assert len(predictor.list_monitors()) > 0
10651092

10661093

1094+
@pytest.mark.skipif(
1095+
tests.integ.test_region() in tests.integ.NO_MODEL_MONITORING_REGIONS,
1096+
reason="ModelMonitoring is not yet supported in this region.",
1097+
)
10671098
def test_default_monitor_create_and_update_schedule_config_without_customizations(
10681099
sagemaker_session, predictor
10691100
):
@@ -1277,7 +1308,17 @@ def test_default_monitor_create_and_update_schedule_config_without_customization
12771308
is None
12781309
)
12791310

1311+
_wait_for_schedule_changes_to_apply(monitor=my_default_monitor)
1312+
1313+
my_default_monitor.stop_monitoring_schedule()
1314+
1315+
_wait_for_schedule_changes_to_apply(monitor=my_default_monitor)
1316+
12801317

1318+
@pytest.mark.skipif(
1319+
tests.integ.test_region() in tests.integ.NO_MODEL_MONITORING_REGIONS,
1320+
reason="ModelMonitoring is not yet supported in this region.",
1321+
)
12811322
def test_default_monitor_attach_followed_by_baseline_and_update_monitoring_schedule(
12821323
sagemaker_session,
12831324
default_monitoring_schedule_name,
@@ -1421,11 +1462,20 @@ def test_default_monitor_attach_followed_by_baseline_and_update_monitoring_sched
14211462
== UPDATED_NETWORK_CONFIG.enable_network_isolation
14221463
)
14231464

1465+
_wait_for_schedule_changes_to_apply(monitor=my_attached_monitor)
1466+
1467+
my_attached_monitor.stop_monitoring_schedule()
1468+
1469+
_wait_for_schedule_changes_to_apply(monitor=my_attached_monitor)
1470+
14241471

1472+
@pytest.mark.skipif(
1473+
tests.integ.test_region() in tests.integ.NO_MODEL_MONITORING_REGIONS,
1474+
reason="ModelMonitoring is not yet supported in this region.",
1475+
)
14251476
def test_default_monitor_monitoring_execution_interactions(
14261477
sagemaker_session, default_monitoring_schedule_name
14271478
):
1428-
14291479
my_attached_monitor = DefaultModelMonitor.attach(
14301480
monitor_schedule_name=default_monitoring_schedule_name, sagemaker_session=sagemaker_session
14311481
)
@@ -1462,6 +1512,10 @@ def test_default_monitor_monitoring_execution_interactions(
14621512
assert constraint_violations.body_dict["violations"][0]["feature_name"] == "store_and_fwd_flag"
14631513

14641514

1515+
@pytest.mark.skipif(
1516+
tests.integ.test_region() in tests.integ.NO_MODEL_MONITORING_REGIONS,
1517+
reason="ModelMonitoring is not yet supported in this region.",
1518+
)
14651519
def test_byoc_monitor_suggest_baseline_and_create_monitoring_schedule_with_customizations(
14661520
sagemaker_session, output_kms_key, volume_kms_key, predictor
14671521
):
@@ -1666,10 +1720,20 @@ def test_byoc_monitor_suggest_baseline_and_create_monitoring_schedule_with_custo
16661720
== NETWORK_CONFIG.enable_network_isolation
16671721
)
16681722

1723+
_wait_for_schedule_changes_to_apply(monitor=my_byoc_monitor)
1724+
1725+
my_byoc_monitor.stop_monitoring_schedule()
1726+
1727+
_wait_for_schedule_changes_to_apply(monitor=my_byoc_monitor)
1728+
16691729
summary = sagemaker_session.list_monitoring_schedules()
16701730
assert len(summary["MonitoringScheduleSummaries"]) > 0
16711731

16721732

1733+
@pytest.mark.skipif(
1734+
tests.integ.test_region() in tests.integ.NO_MODEL_MONITORING_REGIONS,
1735+
reason="ModelMonitoring is not yet supported in this region.",
1736+
)
16731737
def test_byoc_monitor_suggest_baseline_and_create_monitoring_schedule_without_customizations(
16741738
sagemaker_session, predictor
16751739
):
@@ -1850,10 +1914,20 @@ def test_byoc_monitor_suggest_baseline_and_create_monitoring_schedule_without_cu
18501914
is None
18511915
)
18521916

1917+
_wait_for_schedule_changes_to_apply(monitor=my_byoc_monitor)
1918+
1919+
my_byoc_monitor.stop_monitoring_schedule()
1920+
1921+
_wait_for_schedule_changes_to_apply(monitor=my_byoc_monitor)
1922+
18531923
summary = sagemaker_session.list_monitoring_schedules()
18541924
assert len(summary["MonitoringScheduleSummaries"]) > 0
18551925

18561926

1927+
@pytest.mark.skipif(
1928+
tests.integ.test_region() in tests.integ.NO_MODEL_MONITORING_REGIONS,
1929+
reason="ModelMonitoring is not yet supported in this region.",
1930+
)
18571931
def test_byoc_monitor_create_and_update_schedule_config_with_customizations(
18581932
sagemaker_session,
18591933
predictor,
@@ -2123,9 +2197,20 @@ def test_byoc_monitor_create_and_update_schedule_config_with_customizations(
21232197
]["EnableNetworkIsolation"]
21242198
== UPDATED_NETWORK_CONFIG.enable_network_isolation
21252199
)
2200+
2201+
_wait_for_schedule_changes_to_apply(monitor=my_byoc_monitor)
2202+
2203+
my_byoc_monitor.stop_monitoring_schedule()
2204+
2205+
_wait_for_schedule_changes_to_apply(monitor=my_byoc_monitor)
2206+
21262207
assert len(predictor.list_monitors()) > 0
21272208

21282209

2210+
@pytest.mark.skipif(
2211+
tests.integ.test_region() in tests.integ.NO_MODEL_MONITORING_REGIONS,
2212+
reason="ModelMonitoring is not yet supported in this region.",
2213+
)
21292214
def test_byoc_monitor_attach_followed_by_baseline_and_update_monitoring_schedule(
21302215
sagemaker_session,
21312216
predictor,
@@ -2337,7 +2422,17 @@ def test_byoc_monitor_attach_followed_by_baseline_and_update_monitoring_schedule
23372422
== UPDATED_NETWORK_CONFIG.enable_network_isolation
23382423
)
23392424

2425+
_wait_for_schedule_changes_to_apply(monitor=my_attached_monitor)
2426+
2427+
my_attached_monitor.stop_monitoring_schedule()
2428+
2429+
_wait_for_schedule_changes_to_apply(monitor=my_attached_monitor)
2430+
23402431

2432+
@pytest.mark.skipif(
2433+
tests.integ.test_region() in tests.integ.NO_MODEL_MONITORING_REGIONS,
2434+
reason="ModelMonitoring is not yet supported in this region.",
2435+
)
23412436
def test_byoc_monitor_monitoring_execution_interactions(
23422437
sagemaker_session, byoc_monitoring_schedule_name
23432438
):

tests/integ/timeout.py

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
from botocore.exceptions import ClientError
2121
import stopit
2222

23+
from sagemaker import RealTimePredictor
24+
from tests.integ.retry import retries
2325

2426
LOGGER = logging.getLogger("timeout")
2527

@@ -115,12 +117,41 @@ def timeout_and_delete_model_with_transformer(
115117

116118

117119
def _delete_schedules_associated_with_endpoint(sagemaker_session, endpoint_name):
118-
response = sagemaker_session.list_monitoring_schedules(endpoint_name=endpoint_name)
119-
schedule_list = response["MonitoringScheduleSummaries"]
120-
for schedule in schedule_list:
121-
sagemaker_session.delete_monitoring_schedule(
122-
monitoring_schedule_name=schedule["MonitoringScheduleName"]
123-
)
120+
"""Deletes schedules associated with a given endpoint. Per latest validation, ensures the
121+
schedule is stopped and no executions are running, before deleting (otherwise latest
122+
server-side validations will prevent deletes).
123+
124+
Args:
125+
sagemaker_session (sagemaker.session.Session): A SageMaker Session
126+
object, used for SageMaker interactions (default: None). If not
127+
specified, one is created using the default AWS configuration
128+
chain.
129+
endpoint_name (str): The name of the endpoint to delete schedules from.
130+
131+
"""
132+
predictor = RealTimePredictor(endpoint=endpoint_name, sagemaker_session=sagemaker_session)
133+
monitors = predictor.list_monitors()
134+
for monitor in monitors:
135+
try:
136+
monitor._wait_for_schedule_changes_to_apply()
137+
# Stop the schedules to prevent new executions from triggering.
138+
monitor.stop_monitoring_schedule()
139+
executions = monitor.list_executions()
140+
for execution in executions:
141+
execution.stop()
142+
# Wait for all executions to completely stop.
143+
# Schedules can't be deleted with running executions.
144+
for execution in executions:
145+
for _ in retries(60, "Waiting for executions to stop", seconds_to_sleep=5):
146+
status = execution.describe()["ProcessingJobStatus"]
147+
if status == "Stopped":
148+
break
149+
# Delete schedules.
150+
monitor.delete_monitoring_schedule()
151+
except Exception as e:
152+
LOGGER.warning(
153+
"Failed to delete monitor {}".format(monitor.monitoring_schedule_name), e
154+
)
124155

125156

126157
def _show_logs(resource_name, resource_type, sagemaker_session):

0 commit comments

Comments
 (0)