|
13 | 13 | from __future__ import absolute_import
|
14 | 14 |
|
15 | 15 | import os
|
| 16 | +import time |
16 | 17 |
|
17 | 18 | import pytest
|
18 | 19 |
|
| 20 | +from botocore.exceptions import ClientError |
19 | 21 | from sagemaker import image_uris
|
20 | 22 | from sagemaker.model import Model
|
21 | 23 | from sagemaker.sklearn.model import SKLearnModel, SKLearnPredictor
|
|
40 | 42 | IR_SKLEARN_FRAMEWORK_VERSION = "1.0-1"
|
41 | 43 |
|
42 | 44 |
|
| 45 | +def retry_and_back_off(right_size_fn): |
| 46 | + tot_retries = 3 |
| 47 | + retries = 1 |
| 48 | + while retries <= tot_retries: |
| 49 | + try: |
| 50 | + return right_size_fn |
| 51 | + except ClientError as e: |
| 52 | + if e.response["Error"]["Code"] == "ThrottlingException": |
| 53 | + retries += 1 |
| 54 | + time.sleep(5 * retries) |
| 55 | + |
| 56 | + |
43 | 57 | @pytest.fixture(scope="module")
|
44 | 58 | def default_right_sized_model(sagemaker_session, cpu_instance_type):
|
45 | 59 | with timeout(minutes=45):
|
@@ -68,13 +82,15 @@ def default_right_sized_model(sagemaker_session, cpu_instance_type):
|
68 | 82 | )
|
69 | 83 |
|
70 | 84 | return (
|
71 |
| - sklearn_model_package.right_size( |
72 |
| - job_name=ir_job_name, |
73 |
| - sample_payload_url=payload_data, |
74 |
| - supported_content_types=IR_SKLEARN_CONTENT_TYPE, |
75 |
| - supported_instance_types=[cpu_instance_type], |
76 |
| - framework=IR_SKLEARN_FRAMEWORK, |
77 |
| - log_level="Quiet", |
| 85 | + retry_and_back_off( |
| 86 | + sklearn_model_package.right_size( |
| 87 | + job_name=ir_job_name, |
| 88 | + sample_payload_url=payload_data, |
| 89 | + supported_content_types=IR_SKLEARN_CONTENT_TYPE, |
| 90 | + supported_instance_types=[cpu_instance_type], |
| 91 | + framework=IR_SKLEARN_FRAMEWORK, |
| 92 | + log_level="Quiet", |
| 93 | + ) |
78 | 94 | ),
|
79 | 95 | model_package_group_name,
|
80 | 96 | ir_job_name,
|
@@ -133,17 +149,19 @@ def advanced_right_sized_model(sagemaker_session, cpu_instance_type):
|
133 | 149 | ]
|
134 | 150 |
|
135 | 151 | return (
|
136 |
| - sklearn_model_package.right_size( |
137 |
| - sample_payload_url=payload_data, |
138 |
| - supported_content_types=IR_SKLEARN_CONTENT_TYPE, |
139 |
| - framework=IR_SKLEARN_FRAMEWORK, |
140 |
| - job_duration_in_seconds=3600, |
141 |
| - hyperparameter_ranges=hyperparameter_ranges, |
142 |
| - phases=phases, |
143 |
| - model_latency_thresholds=model_latency_thresholds, |
144 |
| - max_invocations=100, |
145 |
| - max_tests=5, |
146 |
| - max_parallel_tests=5, |
| 152 | + retry_and_back_off( |
| 153 | + sklearn_model_package.right_size( |
| 154 | + sample_payload_url=payload_data, |
| 155 | + supported_content_types=IR_SKLEARN_CONTENT_TYPE, |
| 156 | + framework=IR_SKLEARN_FRAMEWORK, |
| 157 | + job_duration_in_seconds=3600, |
| 158 | + hyperparameter_ranges=hyperparameter_ranges, |
| 159 | + phases=phases, |
| 160 | + model_latency_thresholds=model_latency_thresholds, |
| 161 | + max_invocations=100, |
| 162 | + max_tests=5, |
| 163 | + max_parallel_tests=5, |
| 164 | + ) |
147 | 165 | ),
|
148 | 166 | model_package_group_name,
|
149 | 167 | )
|
@@ -175,13 +193,15 @@ def default_right_sized_unregistered_model(sagemaker_session, cpu_instance_type)
|
175 | 193 | )
|
176 | 194 |
|
177 | 195 | return (
|
178 |
| - sklearn_model.right_size( |
179 |
| - job_name=ir_job_name, |
180 |
| - sample_payload_url=payload_data, |
181 |
| - supported_content_types=IR_SKLEARN_CONTENT_TYPE, |
182 |
| - supported_instance_types=[cpu_instance_type], |
183 |
| - framework=IR_SKLEARN_FRAMEWORK, |
184 |
| - log_level="Quiet", |
| 196 | + retry_and_back_off( |
| 197 | + sklearn_model.right_size( |
| 198 | + job_name=ir_job_name, |
| 199 | + sample_payload_url=payload_data, |
| 200 | + supported_content_types=IR_SKLEARN_CONTENT_TYPE, |
| 201 | + supported_instance_types=[cpu_instance_type], |
| 202 | + framework=IR_SKLEARN_FRAMEWORK, |
| 203 | + log_level="Quiet", |
| 204 | + ) |
185 | 205 | ),
|
186 | 206 | ir_job_name,
|
187 | 207 | )
|
@@ -224,18 +244,20 @@ def advanced_right_sized_unregistered_model(sagemaker_session, cpu_instance_type
|
224 | 244 | ModelLatencyThreshold(percentile="P95", value_in_milliseconds=100)
|
225 | 245 | ]
|
226 | 246 |
|
227 |
| - return sklearn_model.right_size( |
228 |
| - sample_payload_url=payload_data, |
229 |
| - supported_content_types=IR_SKLEARN_CONTENT_TYPE, |
230 |
| - framework=IR_SKLEARN_FRAMEWORK, |
231 |
| - job_duration_in_seconds=3600, |
232 |
| - hyperparameter_ranges=hyperparameter_ranges, |
233 |
| - phases=phases, |
234 |
| - model_latency_thresholds=model_latency_thresholds, |
235 |
| - max_invocations=100, |
236 |
| - max_tests=5, |
237 |
| - max_parallel_tests=5, |
238 |
| - log_level="Quiet", |
| 247 | + return retry_and_back_off( |
| 248 | + sklearn_model.right_size( |
| 249 | + sample_payload_url=payload_data, |
| 250 | + supported_content_types=IR_SKLEARN_CONTENT_TYPE, |
| 251 | + framework=IR_SKLEARN_FRAMEWORK, |
| 252 | + job_duration_in_seconds=3600, |
| 253 | + hyperparameter_ranges=hyperparameter_ranges, |
| 254 | + phases=phases, |
| 255 | + model_latency_thresholds=model_latency_thresholds, |
| 256 | + max_invocations=100, |
| 257 | + max_tests=5, |
| 258 | + max_parallel_tests=5, |
| 259 | + log_level="Quiet", |
| 260 | + ) |
239 | 261 | )
|
240 | 262 |
|
241 | 263 | except Exception:
|
@@ -265,13 +287,15 @@ def default_right_sized_unregistered_base_model(sagemaker_session, cpu_instance_
|
265 | 287 | )
|
266 | 288 |
|
267 | 289 | return (
|
268 |
| - model.right_size( |
269 |
| - job_name=ir_job_name, |
270 |
| - sample_payload_url=payload_data, |
271 |
| - supported_content_types=IR_SKLEARN_CONTENT_TYPE, |
272 |
| - supported_instance_types=[cpu_instance_type], |
273 |
| - framework=IR_SKLEARN_FRAMEWORK, |
274 |
| - log_level="Quiet", |
| 290 | + retry_and_back_off( |
| 291 | + model.right_size( |
| 292 | + job_name=ir_job_name, |
| 293 | + sample_payload_url=payload_data, |
| 294 | + supported_content_types=IR_SKLEARN_CONTENT_TYPE, |
| 295 | + supported_instance_types=[cpu_instance_type], |
| 296 | + framework=IR_SKLEARN_FRAMEWORK, |
| 297 | + log_level="Quiet", |
| 298 | + ) |
275 | 299 | ),
|
276 | 300 | ir_job_name,
|
277 | 301 | )
|
|
0 commit comments