decouple right_size() from model registry

Raymond Liu · Raymond Liu · commit 7c524dd5e870 · 2023-03-01T22:09:57.000Z
diff --git a/src/sagemaker/inference_recommender/inference_recommender_mixin.py b/src/sagemaker/inference_recommender/inference_recommender_mixin.py
@@ -15,6 +15,7 @@
 
 import logging
 import re
+import uuid
 
 from typing import List, Dict, Optional
 import sagemaker
@@ -38,7 +39,7 @@ class Phase:
     """
 
     def __init__(self, duration_in_seconds: int, initial_number_of_users: int, spawn_rate: int):
-        """Initialze a `Phase`"""
+        """Initialize a `Phase`"""
         self.to_json = {
             "DurationInSeconds": duration_in_seconds,
             "InitialNumberOfUsers": initial_number_of_users,
@@ -53,7 +54,7 @@ class ModelLatencyThreshold:
     """
 
     def __init__(self, percentile: str, value_in_milliseconds: int):
-        """Initialze a `ModelLatencyThreshold`"""
+        """Initialize a `ModelLatencyThreshold`"""
         self.to_json = {"Percentile": percentile, "ValueInMilliseconds": value_in_milliseconds}
 
 
@@ -119,8 +120,6 @@ def right_size(
             sagemaker.model.Model: A SageMaker ``Model`` object. See
             :func:`~sagemaker.model.Model` for full details.
         """
-        if not isinstance(self, sagemaker.model.ModelPackage):
-            raise ValueError("right_size() is currently only supported with a registered model")
 
         if not framework and self._framework():
             framework = INFERENCE_RECOMMENDER_FRAMEWORK_MAPPING.get(self._framework(), framework)
@@ -149,12 +148,26 @@ def right_size(
 
         self._init_sagemaker_session_if_does_not_exist()
 
+        model_name = None
+        if isinstance(self, sagemaker.model.FrameworkModel):
+
+            unique_tail = uuid.uuid4()
+            model_name = "SMPYTHONSDK-" + str(unique_tail)
+
+            self.sagemaker_session.create_model(
+                name=model_name,
+                role=self.role,
+                container_defs=None,
+                primary_container=self.prepare_container_def(),
+            )
+
         ret_name = self.sagemaker_session.create_inference_recommendations_job(
             role=self.role,
             job_name=job_name,
             job_type=job_type,
             job_duration_in_seconds=job_duration_in_seconds,
-            model_package_version_arn=self.model_package_arn,
+            model_name=model_name,
+            model_package_version_arn=getattr(self, "model_package_arn", None),
             framework=framework,
             framework_version=framework_version,
             sample_payload_url=sample_payload_url,
@@ -175,6 +188,8 @@ def right_size(
             "InferenceRecommendations"
         )
 
+        if model_name is not None:
+            self.sagemaker_session.delete_model(model_name)
         return self
 
     def _update_params(
diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py
@@ -4820,6 +4820,7 @@ def _create_inference_recommendations_job_request(
         framework: str,
         sample_payload_url: str,
         supported_content_types: List[str],
+        model_name: str = None,
         model_package_version_arn: str = None,
         job_duration_in_seconds: int = None,
         job_type: str = "Default",
@@ -4843,6 +4844,7 @@ def _create_inference_recommendations_job_request(
             framework (str): The machine learning framework of the Image URI.
             sample_payload_url (str): The S3 path where the sample payload is stored.
             supported_content_types (List[str]): The supported MIME types for the input data.
+            model_name (str): Name of the Amazon SageMaker ``Model`` to be used.
             model_package_version_arn (str): The Amazon Resource Name (ARN) of a
                 versioned model package.
             job_duration_in_seconds (int): The maximum job duration that a job
@@ -4884,15 +4886,26 @@ def _create_inference_recommendations_job_request(
         if supported_instance_types:
             containerConfig["SupportedInstanceTypes"] = supported_instance_types
 
-        request = {
-            "JobName": job_name,
-            "JobType": job_type,
-            "RoleArn": role,
-            "InputConfig": {
-                "ContainerConfig": containerConfig,
-                "ModelPackageVersionArn": model_package_version_arn,
-            },
-        }
+        if model_package_version_arn:
+            request = {
+                "JobName": job_name,
+                "JobType": job_type,
+                "RoleArn": role,
+                "InputConfig": {
+                    "ContainerConfig": containerConfig,
+                    "ModelPackageVersionArn": model_package_version_arn,
+                },
+            }
+        else:
+            request = {
+                "JobName": job_name,
+                "JobType": job_type,
+                "RoleArn": role,
+                "InputConfig": {
+                    "ContainerConfig": containerConfig,
+                    "ModelName": model_name,
+                },
+            }
 
         if job_description:
             request["JobDescription"] = job_description
@@ -4918,6 +4931,7 @@ def create_inference_recommendations_job(
         supported_content_types: List[str],
         job_name: str = None,
         job_type: str = "Default",
+        model_name: str = None,
         model_package_version_arn: str = None,
         job_duration_in_seconds: int = None,
         nearest_model_name: str = None,
@@ -4938,6 +4952,7 @@ def create_inference_recommendations_job(
                 You must grant sufficient permissions to this role.
             sample_payload_url (str): The S3 path where the sample payload is stored.
             supported_content_types (List[str]): The supported MIME types for the input data.
+            model_name (str): Name of the Amazon SageMaker ``Model`` to be used.
             model_package_version_arn (str): The Amazon Resource Name (ARN) of a
                 versioned model package.
             job_name (str): The name of the job being run.
@@ -4964,6 +4979,9 @@ def create_inference_recommendations_job(
             str: The name of the job created. In the form of `SMPYTHONSDK-<timestamp>`
         """
 
+        if model_name is None and model_package_version_arn is None:
+            raise ValueError("Either model_name or model_package_version_arn should be provided.")
+
         if not job_name:
             unique_tail = uuid.uuid4()
             job_name = "SMPYTHONSDK-" + str(unique_tail)
@@ -4972,6 +4990,7 @@ def create_inference_recommendations_job(
         create_inference_recommendations_job_request = (
             self._create_inference_recommendations_job_request(
                 role=role,
+                model_name=model_name,
                 model_package_version_arn=model_package_version_arn,
                 job_name=job_name,
                 job_type=job_type,
diff --git a/tests/unit/sagemaker/inference_recommender/test_inference_recommender_mixin.py b/tests/unit/sagemaker/inference_recommender/test_inference_recommender_mixin.py
@@ -175,6 +175,134 @@ def default_right_sized_model(model_package):
     )
 
 
+def test_right_size_default_with_model_name_successful(sagemaker_session, model):
+    inference_recommender_model = model.right_size(
+        sample_payload_url=IR_SAMPLE_PAYLOAD_URL,
+        supported_content_types=IR_SUPPORTED_CONTENT_TYPES,
+        supported_instance_types=[IR_SAMPLE_INSTANCE_TYPE],
+        job_name=IR_JOB_NAME,
+        framework=IR_SAMPLE_FRAMEWORK,
+    )
+
+    # assert that the create api has been called with default parameters with model name
+    assert sagemaker_session.create_inference_recommendations_job.called_with(
+        role=IR_ROLE_ARN,
+        job_name=IR_JOB_NAME,
+        job_type="Default",
+        job_duration_in_seconds=None,
+        model_name=ANY,
+        model_package_version_arn=None,
+        framework=IR_SAMPLE_FRAMEWORK,
+        framework_version=None,
+        sample_payload_url=IR_SAMPLE_PAYLOAD_URL,
+        supported_content_types=IR_SUPPORTED_CONTENT_TYPES,
+        supported_instance_types=[IR_SAMPLE_INSTANCE_TYPE],
+        endpoint_configurations=None,
+        traffic_pattern=None,
+        stopping_conditions=None,
+        resource_limit=None,
+    )
+
+    assert sagemaker_session.wait_for_inference_recommendations_job.called_with(IR_JOB_NAME)
+
+    # confirm that the IR instance attributes have been set
+    assert (
+        inference_recommender_model.inference_recommender_job_results
+        == IR_SAMPLE_INFERENCE_RESPONSE
+    )
+    assert (
+        inference_recommender_model.inference_recommendations
+        == IR_SAMPLE_INFERENCE_RESPONSE["InferenceRecommendations"]
+    )
+
+    # confirm that the returned object of right_size is itself
+    assert inference_recommender_model == model
+
+def test_right_size_advanced_list_instances_model_name_successful(sagemaker_session, model):
+    inference_recommender_model = model.right_size(
+        sample_payload_url=IR_SAMPLE_PAYLOAD_URL,
+        supported_content_types=IR_SUPPORTED_CONTENT_TYPES,
+        framework="SAGEMAKER-SCIKIT-LEARN",
+        job_duration_in_seconds=7200,
+        hyperparameter_ranges=IR_SAMPLE_LIST_OF_INSTANCES_HYPERPARAMETER_RANGES,
+        phases=IR_SAMPLE_PHASES,
+        traffic_type="PHASES",
+        max_invocations=100,
+        model_latency_thresholds=IR_SAMPLE_MODEL_LATENCY_THRESHOLDS,
+        max_tests=5,
+        max_parallel_tests=5,
+    )
+
+    # assert that the create api has been called with advanced parameters
+    assert sagemaker_session.create_inference_recommendations_job.called_with(
+        role=IR_ROLE_ARN,
+        job_name=IR_JOB_NAME,
+        job_type="Advanced",
+        job_duration_in_seconds=7200,
+        model_name=ANY,
+        model_package_version_arn=None,
+        framework=IR_SAMPLE_FRAMEWORK,
+        framework_version=None,
+        sample_payload_url=IR_SAMPLE_PAYLOAD_URL,
+        supported_content_types=IR_SUPPORTED_CONTENT_TYPES,
+        supported_instance_types=[IR_SAMPLE_INSTANCE_TYPE],
+        endpoint_configurations=IR_SAMPLE_ENDPOINT_CONFIG,
+        traffic_pattern=IR_SAMPLE_TRAFFIC_PATTERN,
+        stopping_conditions=IR_SAMPLE_STOPPING_CONDITIONS,
+        resource_limit=IR_SAMPLE_RESOURCE_LIMIT,
+    )
+
+    assert sagemaker_session.wait_for_inference_recommendations_job.called_with(IR_JOB_NAME)
+
+    # confirm that the IR instance attributes have been set
+    assert (
+        inference_recommender_model.inference_recommender_job_results
+        == IR_SAMPLE_INFERENCE_RESPONSE
+    )
+    assert (
+        inference_recommender_model.inference_recommendations
+        == IR_SAMPLE_INFERENCE_RESPONSE["InferenceRecommendations"]
+    )
+
+    # confirm that the returned object of right_size is itself
+    assert inference_recommender_model == model
+    
+def test_right_size_advanced_single_instances_model_name_successful(sagemaker_session, model):
+    model.right_size(
+        sample_payload_url=IR_SAMPLE_PAYLOAD_URL,
+        supported_content_types=IR_SUPPORTED_CONTENT_TYPES,
+        framework="SAGEMAKER-SCIKIT-LEARN",
+        job_duration_in_seconds=7200,
+        hyperparameter_ranges=IR_SAMPLE_SINGLE_INSTANCES_HYPERPARAMETER_RANGES,
+        phases=IR_SAMPLE_PHASES,
+        traffic_type="PHASES",
+        max_invocations=100,
+        model_latency_thresholds=IR_SAMPLE_MODEL_LATENCY_THRESHOLDS,
+        max_tests=5,
+        max_parallel_tests=5,
+    )
+
+    # assert that the create api has been called with advanced parameters
+    assert sagemaker_session.create_inference_recommendations_job.called_with(
+        role=IR_ROLE_ARN,
+        job_name=IR_JOB_NAME,
+        job_type="Advanced",
+        job_duration_in_seconds=7200,
+        model_name=ANY,
+        model_package_version_arn=None,
+        framework=IR_SAMPLE_FRAMEWORK,
+        framework_version=None,
+        sample_payload_url=IR_SAMPLE_PAYLOAD_URL,
+        supported_content_types=IR_SUPPORTED_CONTENT_TYPES,
+        supported_instance_types=[IR_SAMPLE_INSTANCE_TYPE],
+        endpoint_configurations=IR_SAMPLE_ENDPOINT_CONFIG,
+        traffic_pattern=IR_SAMPLE_TRAFFIC_PATTERN,
+        stopping_conditions=IR_SAMPLE_STOPPING_CONDITIONS,
+        resource_limit=IR_SAMPLE_RESOURCE_LIMIT,
+    )
+
+
+
 def test_right_size_default_with_model_package_successful(sagemaker_session, model_package):
     inference_recommender_model_pkg = model_package.right_size(
         sample_payload_url=IR_SAMPLE_PAYLOAD_URL,
@@ -190,6 +318,7 @@ def test_right_size_default_with_model_package_successful(sagemaker_session, mod
         job_name=IR_JOB_NAME,
         job_type="Default",
         job_duration_in_seconds=None,
+        model_name=None,
         model_package_version_arn=model_package.model_package_arn,
         framework=IR_SAMPLE_FRAMEWORK,
         framework_version=None,
@@ -202,7 +331,7 @@ def test_right_size_default_with_model_package_successful(sagemaker_session, mod
         resource_limit=None,
     )
 
-    assert sagemaker_session.wait_for_inference_recomendations_job.called_with(IR_JOB_NAME)
+    assert sagemaker_session.wait_for_inference_recommendations_job.called_with(IR_JOB_NAME)
 
     # confirm that the IR instance attributes have been set
     assert (
@@ -216,7 +345,7 @@ def test_right_size_default_with_model_package_successful(sagemaker_session, mod
 
     # confirm that the returned object of right_size is itself
     assert inference_recommender_model_pkg == model_package
-
+    
 
 def test_right_size_advanced_list_instances_model_package_successful(
     sagemaker_session, model_package
@@ -253,7 +382,7 @@ def test_right_size_advanced_list_instances_model_package_successful(
         resource_limit=IR_SAMPLE_RESOURCE_LIMIT,
     )
 
-    assert sagemaker_session.wait_for_inference_recomendations_job.called_with(IR_JOB_NAME)
+    assert sagemaker_session.wait_for_inference_recommendations_job.called_with(IR_JOB_NAME)
 
     # confirm that the IR instance attributes have been set
     assert (
@@ -359,21 +488,6 @@ def test_right_size_invalid_hyperparameter_ranges(sagemaker_session, model_packa
         )
 
 
-# TODO -> removed once model registry is decoupled
-def test_right_size_missing_model_package_arn(sagemaker_session, model):
-    with pytest.raises(
-        ValueError,
-        match="right_size\\(\\) is currently only supported with a registered model",
-    ):
-        model.right_size(
-            sample_payload_url=IR_SAMPLE_PAYLOAD_URL,
-            supported_content_types=IR_SUPPORTED_CONTENT_TYPES,
-            supported_instance_types=[IR_SAMPLE_INSTANCE_TYPE],
-            job_name=IR_JOB_NAME,
-            framework=IR_SAMPLE_FRAMEWORK,
-        )
-
-
 # TODO check our framework mapping when we add in inference_recommendation_id support