Skip to content

Commit 2e07656

Browse files
Exponentially sleep during enpoint cleanup
1 parent 98f2a02 commit 2e07656

File tree

3 files changed

+47
-4
lines changed

3 files changed

+47
-4
lines changed

tests/integ/test_model_monitor.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,11 @@ def predictor(sagemaker_session, tensorflow_inference_latest_version):
9292
key_prefix="tensorflow-serving/models",
9393
)
9494
with tests.integ.timeout.timeout_and_delete_endpoint_by_name(
95-
endpoint_name=endpoint_name, sagemaker_session=sagemaker_session, hours=2
95+
endpoint_name=endpoint_name,
96+
sagemaker_session=sagemaker_session,
97+
hours=2,
98+
sleep_between_cleanup_attempts=20,
99+
exponential_sleep=True,
96100
):
97101
model = TensorFlowModel(
98102
model_data=model_data,

tests/integ/timeout.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def timeout_and_delete_endpoint_by_name(
5454
minutes=45,
5555
hours=0,
5656
sleep_between_cleanup_attempts=10,
57+
exponential_sleep=False,
5758
):
5859
limit = seconds + 60 * minutes + 3600 * hours
5960

@@ -83,7 +84,11 @@ def timeout_and_delete_endpoint_by_name(
8384
# avoids the inner exception to be overwritten
8485
pass
8586
# trying to delete the resource again in 10 seconds
86-
sleep(sleep_between_cleanup_attempts)
87+
if exponential_sleep:
88+
_sleep_between_cleanup_attempts = (sleep_between_cleanup_attempts * (3 - attempts))
89+
else:
90+
_sleep_between_cleanup_attempts = sleep_between_cleanup_attempts
91+
sleep(_sleep_between_cleanup_attempts)
8792

8893

8994
@contextmanager
@@ -150,7 +155,7 @@ def _delete_schedules_associated_with_endpoint(sagemaker_session, endpoint_name)
150155
monitor.delete_monitoring_schedule()
151156
except Exception as e:
152157
LOGGER.warning(
153-
"Failed to delete monitor {}".format(monitor.monitoring_schedule_name), e
158+
"Failed to delete monitor {},\nError: {}".format(monitor.monitoring_schedule_name, e)
154159
)
155160

156161

tests/unit/test_timeout.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import time
2020

2121
import pytest
22-
from mock import Mock, patch
22+
from mock import Mock, patch, call
2323
import stopit
2424

2525
from botocore.exceptions import ClientError
@@ -44,6 +44,7 @@
4444
LONG_DURATION_TO_EXCEED_TIMEOUT = 0.002
4545
LONG_TIMEOUT_THAT_WILL_NEVER_BE_EXCEEDED = 10
4646
DURATION_TO_SLEEP_TO_ALLOW_BACKGROUND_THREAD_TO_COMPLETE = 0.2
47+
DURATION_TO_SLEEP = 0.01
4748

4849

4950
@pytest.fixture()
@@ -174,6 +175,39 @@ def test_timeout_and_delete_endpoint_by_name_retries_resource_deletion_on_failur
174175
assert session.delete_endpoint.call_count == 3
175176

176177

178+
@patch("tests.integ.timeout._show_logs", return_value=None, autospec=True)
179+
@patch("tests.integ.timeout._cleanup_logs", return_value=None, autospec=True)
180+
@patch(
181+
"tests.integ.timeout._delete_schedules_associated_with_endpoint",
182+
return_value=None,
183+
autospec=True,
184+
)
185+
@patch('tests.integ.timeout.sleep', return_value=None)
186+
def test_timeout_and_delete_endpoint_by_name_retries_resource_deletion_on_failure_with_exp_sleep(
187+
mock_sleep, _show_logs, _cleanup_logs, _delete_schedules_associated_with_endpoint, session
188+
):
189+
session.delete_endpoint = Mock(
190+
side_effect=ClientError(
191+
error_response={"Error": {"Code": 403, "Message": "ValidationException"}},
192+
operation_name="Unit Test",
193+
)
194+
)
195+
196+
with timeout_and_delete_endpoint_by_name(
197+
endpoint_name=ENDPOINT_NAME,
198+
sagemaker_session=session,
199+
hours=0,
200+
minutes=0,
201+
seconds=LONG_TIMEOUT_THAT_WILL_NEVER_BE_EXCEEDED,
202+
sleep_between_cleanup_attempts=DURATION_TO_SLEEP,
203+
exponential_sleep=True,
204+
):
205+
pass
206+
assert session.delete_endpoint.call_count == 3
207+
assert mock_sleep.call_count == 3
208+
assert mock_sleep.mock_calls == [call(0.01), call(0.02), call(0.03)]
209+
210+
177211
@patch("tests.integ.timeout._show_logs", return_value=None, autospec=True)
178212
@patch("tests.integ.timeout._cleanup_logs", return_value=None, autospec=True)
179213
@patch(

0 commit comments

Comments
 (0)