aws · mufaddal-rohawala · Jun 6, 2023 · Jun 5, 2023 · Jun 5, 2023
@@ -5313,6 +5313,7 @@ def _create_inference_recommendations_job_request(
         framework: str,
         sample_payload_url: str,
         supported_content_types: List[str],
+        tags: Dict[str, str],
         model_name: str = None,
         model_package_version_arn: str = None,
         job_duration_in_seconds: int = None,
@@ -5348,6 +5349,8 @@ def _create_inference_recommendations_job_request(
                 benchmarked by Amazon SageMaker Inference Recommender that matches your model.
             supported_instance_types (List[str]): A list of the instance types that are used
                 to generate inferences in real-time.
+            tags (Dict[str, str]): Tags used to identify where the Inference Recommendatons Call
+                was made from.
             endpoint_configurations (List[Dict[str, any]]): Specifies the endpoint configurations
                 to use for a job. Will be used for `Advanced` jobs.
             traffic_pattern (Dict[str, any]): Specifies the traffic pattern for the job.
@@ -5386,6 +5389,7 @@ def _create_inference_recommendations_job_request(
             "InputConfig": {
                 "ContainerConfig": containerConfig,
             },
+            "Tags": tags,
         }
 
         request.get("InputConfig").update(
@@ -5477,6 +5481,8 @@ def create_inference_recommendations_job(
             job_name = "SMPYTHONSDK-" + str(unique_tail)
         job_description = "#python-sdk-create"
 
+        tags = [{"Key": "ClientType", "Value": "PythonSDK-RightSize"}]
+
         create_inference_recommendations_job_request = (
             self._create_inference_recommendations_job_request(
                 role=role,
@@ -5496,6 +5502,7 @@ def create_inference_recommendations_job(
                 traffic_pattern=traffic_pattern,
                 stopping_conditions=stopping_conditions,
                 resource_limit=resource_limit,
+                tags=tags,
             )
         )
 

@@ -13,9 +13,11 @@
 from __future__ import absolute_import
 
 import os
+import time
 
 import pytest
 
+from botocore.exceptions import ClientError
 from sagemaker import image_uris
 from sagemaker.model import Model
 from sagemaker.sklearn.model import SKLearnModel, SKLearnPredictor
@@ -40,6 +42,18 @@
 IR_SKLEARN_FRAMEWORK_VERSION = "1.0-1"
 
 
+def retry_and_back_off(right_size_fn):
+    tot_retries = 3
+    retries = 1
+    while retries <= tot_retries:
+        try:
+            return right_size_fn
+        except ClientError as e:
+            if e.response["Error"]["Code"] == "ThrottlingException":
+                retries += 1
+                time.sleep(5 * retries)
+
+
 @pytest.fixture(scope="module")
 def default_right_sized_model(sagemaker_session, cpu_instance_type):
     with timeout(minutes=45):
@@ -68,13 +82,15 @@ def default_right_sized_model(sagemaker_session, cpu_instance_type):
             )
 
             return (
-                sklearn_model_package.right_size(
-                    job_name=ir_job_name,
-                    sample_payload_url=payload_data,
-                    supported_content_types=IR_SKLEARN_CONTENT_TYPE,
-                    supported_instance_types=[cpu_instance_type],
-                    framework=IR_SKLEARN_FRAMEWORK,
-                    log_level="Quiet",
+                retry_and_back_off(
+                    sklearn_model_package.right_size(
+                        job_name=ir_job_name,
+                        sample_payload_url=payload_data,
+                        supported_content_types=IR_SKLEARN_CONTENT_TYPE,
+                        supported_instance_types=[cpu_instance_type],
+                        framework=IR_SKLEARN_FRAMEWORK,
+                        log_level="Quiet",
+                    )
                 ),
                 model_package_group_name,
                 ir_job_name,
@@ -133,17 +149,19 @@ def advanced_right_sized_model(sagemaker_session, cpu_instance_type):
             ]
 
             return (
-                sklearn_model_package.right_size(
-                    sample_payload_url=payload_data,
-                    supported_content_types=IR_SKLEARN_CONTENT_TYPE,
-                    framework=IR_SKLEARN_FRAMEWORK,
-                    job_duration_in_seconds=3600,
-                    hyperparameter_ranges=hyperparameter_ranges,
-                    phases=phases,
-                    model_latency_thresholds=model_latency_thresholds,
-                    max_invocations=100,
-                    max_tests=5,
-                    max_parallel_tests=5,
+                retry_and_back_off(
+                    sklearn_model_package.right_size(
+                        sample_payload_url=payload_data,
+                        supported_content_types=IR_SKLEARN_CONTENT_TYPE,
+                        framework=IR_SKLEARN_FRAMEWORK,
+                        job_duration_in_seconds=3600,
+                        hyperparameter_ranges=hyperparameter_ranges,
+                        phases=phases,
+                        model_latency_thresholds=model_latency_thresholds,
+                        max_invocations=100,
+                        max_tests=5,
+                        max_parallel_tests=5,
+                    )
                 ),
                 model_package_group_name,
             )
@@ -175,13 +193,15 @@ def default_right_sized_unregistered_model(sagemaker_session, cpu_instance_type)
             )
 
             return (
-                sklearn_model.right_size(
-                    job_name=ir_job_name,
-                    sample_payload_url=payload_data,
-                    supported_content_types=IR_SKLEARN_CONTENT_TYPE,
-                    supported_instance_types=[cpu_instance_type],
-                    framework=IR_SKLEARN_FRAMEWORK,
-                    log_level="Quiet",
+                retry_and_back_off(
+                    sklearn_model.right_size(
+                        job_name=ir_job_name,
+                        sample_payload_url=payload_data,
+                        supported_content_types=IR_SKLEARN_CONTENT_TYPE,
+                        supported_instance_types=[cpu_instance_type],
+                        framework=IR_SKLEARN_FRAMEWORK,
+                        log_level="Quiet",
+                    )
                 ),
                 ir_job_name,
             )
@@ -224,18 +244,20 @@ def advanced_right_sized_unregistered_model(sagemaker_session, cpu_instance_type
                 ModelLatencyThreshold(percentile="P95", value_in_milliseconds=100)
             ]
 
-            return sklearn_model.right_size(
-                sample_payload_url=payload_data,
-                supported_content_types=IR_SKLEARN_CONTENT_TYPE,
-                framework=IR_SKLEARN_FRAMEWORK,
-                job_duration_in_seconds=3600,
-                hyperparameter_ranges=hyperparameter_ranges,
-                phases=phases,
-                model_latency_thresholds=model_latency_thresholds,
-                max_invocations=100,
-                max_tests=5,
-                max_parallel_tests=5,
-                log_level="Quiet",
+            return retry_and_back_off(
+                sklearn_model.right_size(
+                    sample_payload_url=payload_data,
+                    supported_content_types=IR_SKLEARN_CONTENT_TYPE,
+                    framework=IR_SKLEARN_FRAMEWORK,
+                    job_duration_in_seconds=3600,
+                    hyperparameter_ranges=hyperparameter_ranges,
+                    phases=phases,
+                    model_latency_thresholds=model_latency_thresholds,
+                    max_invocations=100,
+                    max_tests=5,
+                    max_parallel_tests=5,
+                    log_level="Quiet",
+                )
             )
 
         except Exception:
@@ -265,13 +287,15 @@ def default_right_sized_unregistered_base_model(sagemaker_session, cpu_instance_
             )
 
             return (
-                model.right_size(
-                    job_name=ir_job_name,
-                    sample_payload_url=payload_data,
-                    supported_content_types=IR_SKLEARN_CONTENT_TYPE,
-                    supported_instance_types=[cpu_instance_type],
-                    framework=IR_SKLEARN_FRAMEWORK,
-                    log_level="Quiet",
+                retry_and_back_off(
+                    model.right_size(
+                        job_name=ir_job_name,
+                        sample_payload_url=payload_data,
+                        supported_content_types=IR_SKLEARN_CONTENT_TYPE,
+                        supported_instance_types=[cpu_instance_type],
+                        framework=IR_SKLEARN_FRAMEWORK,
+                        log_level="Quiet",
+                    )
                 ),
                 ir_job_name,
             )

@@ -4704,6 +4704,7 @@ def create_inference_recommendations_job_default_happy_response():
             "ModelPackageVersionArn": IR_MODEL_PACKAGE_VERSION_ARN,
         },
         "JobDescription": "#python-sdk-create",
+        "Tags": [{"Key": "ClientType", "Value": "PythonSDK-RightSize"}],
     }
 
 
@@ -4728,6 +4729,7 @@ def create_inference_recommendations_job_default_model_name_happy_response():
             "ModelName": IR_MODEL_NAME,
         },
         "JobDescription": "#python-sdk-create",
+        "Tags": [{"Key": "ClientType", "Value": "PythonSDK-RightSize"}],
     }