fix: advanced inference recommendation jobs parameters check

Jinpeng Qi · Jinpeng Qi · commit 42fd779a2211 · 2023-02-08T18:31:00.000+08:00
diff --git a/src/sagemaker/inference_recommender/__init__.py b/src/sagemaker/inference_recommender/__init__.py
@@ -12,3 +12,29 @@
 # language governing permissions and limitations under the License.
 """Classes for using Inference Recommender with Amazon SageMaker."""
 from __future__ import absolute_import
+
+
+class Phase:
+    """Used to store phases of a traffic pattern to perform endpoint load testing.
+
+    Required for an Advanced Inference Recommendations Job
+    """
+
+    def __init__(self, duration_in_seconds: int, initial_number_of_users: int, spawn_rate: int):
+        """Initialze a `Phase`"""
+        self.to_json = {
+            "DurationInSeconds": duration_in_seconds,
+            "InitialNumberOfUsers": initial_number_of_users,
+            "SpawnRate": spawn_rate,
+        }
+
+
+class ModelLatencyThreshold:
+    """Used to store inference request/response latency to perform endpoint load testing.
+
+    Required for an Advanced Inference Recommendations Job
+    """
+
+    def __init__(self, percentile: str, value_in_milliseconds: int):
+        """Initialze a `ModelLatencyThreshold`"""
+        self.to_json = {"Percentile": percentile, "ValueInMilliseconds": value_in_milliseconds}
diff --git a/src/sagemaker/inference_recommender/inference_recommender_mixin.py b/src/sagemaker/inference_recommender/inference_recommender_mixin.py
@@ -18,6 +18,7 @@
 
 from typing import List, Dict, Optional
 import sagemaker
+from sagemaker.inference_recommender import ModelLatencyThreshold, Phase
 from sagemaker.parameter import CategoricalParameter
 
 INFERENCE_RECOMMENDER_FRAMEWORK_MAPPING = {
@@ -31,32 +32,6 @@
 LOGGER = logging.getLogger("sagemaker")
 
 
-class Phase:
-    """Used to store phases of a traffic pattern to perform endpoint load testing.
-
-    Required for an Advanced Inference Recommendations Job
-    """
-
-    def __init__(self, duration_in_seconds: int, initial_number_of_users: int, spawn_rate: int):
-        """Initialze a `Phase`"""
-        self.to_json = {
-            "DurationInSeconds": duration_in_seconds,
-            "InitialNumberOfUsers": initial_number_of_users,
-            "SpawnRate": spawn_rate,
-        }
-
-
-class ModelLatencyThreshold:
-    """Used to store inference request/response latency to perform endpoint load testing.
-
-    Required for an Advanced Inference Recommendations Job
-    """
-
-    def __init__(self, percentile: str, value_in_milliseconds: int):
-        """Initialze a `ModelLatencyThreshold`"""
-        self.to_json = {"Percentile": percentile, "ValueInMilliseconds": value_in_milliseconds}
-
-
 class InferenceRecommenderMixin:
     """A mixin class for SageMaker ``Inference Recommender`` that will be extended by ``Model``"""
 
@@ -464,6 +439,14 @@ def _convert_to_resource_limit_json(self, max_tests: int, max_parallel_tests: in
         """Bundle right_size() parameters into a resource limit for Advanced job"""
         if not max_tests and not max_parallel_tests:
             return None
+        if max_tests and not max_parallel_tests:
+            return {
+                "MaxNumberOfTests": max_tests,
+            }
+        if not max_tests and max_parallel_tests:
+            return {
+                "MaxParallelOfTests": max_parallel_tests,
+            }
         return {
             "MaxNumberOfTests": max_tests,
             "MaxParallelOfTests": max_parallel_tests,
@@ -475,6 +458,16 @@ def _convert_to_stopping_conditions_json(
         """Bundle right_size() parameters into stopping conditions for Advanced job"""
         if not max_invocations and not model_latency_thresholds:
             return None
+        if max_invocations and not model_latency_thresholds:
+            return {
+                "MaxInvocations": max_invocations,
+            }
+        if not max_invocations and model_latency_thresholds:
+            return {
+                "ModelLatencyThresholds": [
+                    threshold.to_json for threshold in model_latency_thresholds
+                ],
+            }
         return {
             "MaxInvocations": max_invocations,
             "ModelLatencyThresholds": [threshold.to_json for threshold in model_latency_thresholds],
diff --git a/tests/integ/test_inference_recommender.py b/tests/integ/test_inference_recommender.py
@@ -21,7 +21,7 @@
 from tests.integ import DATA_DIR
 from tests.integ.timeout import timeout
 import pandas as pd
-from sagemaker.inference_recommender.inference_recommender_mixin import Phase, ModelLatencyThreshold
+from sagemaker.inference_recommender import ModelLatencyThreshold, Phase
 from sagemaker.parameter import CategoricalParameter
 import logging
 
diff --git a/tests/unit/sagemaker/inference_recommender/test_inference_recommender_mixin.py b/tests/unit/sagemaker/inference_recommender/test_inference_recommender_mixin.py
@@ -4,10 +4,7 @@
 
 from sagemaker.model import Model, ModelPackage
 from sagemaker.parameter import CategoricalParameter
-from sagemaker.inference_recommender.inference_recommender_mixin import (
-    Phase,
-    ModelLatencyThreshold,
-)
+from sagemaker.inference_recommender import ModelLatencyThreshold, Phase
 from sagemaker.async_inference import AsyncInferenceConfig
 from sagemaker.serverless import ServerlessInferenceConfig