fix: auto ml integ tests and add flaky test markers (#4136)

benieric · web-flow · commit d8ad44301364 · 2023-09-26T09:50:50.000-07:00
diff --git a/tests/integ/auto_ml_utils.py b/tests/integ/auto_ml_utils.py
@@ -25,9 +25,7 @@
 TARGET_ATTRIBUTE_NAME = "virginica"
 
 
-def create_auto_ml_job_if_not_exist(sagemaker_session):
-    auto_ml_job_name = "python-sdk-integ-test-base-job"
-
+def create_auto_ml_job_if_not_exist(sagemaker_session, auto_ml_job_name):
     try:
         sagemaker_session.describe_auto_ml_job(job_name=auto_ml_job_name)
     except Exception as e:  # noqa: F841
diff --git a/tests/integ/sagemaker/feature_store/feature_processor/test_feature_processor.py b/tests/integ/sagemaker/feature_store/feature_processor/test_feature_processor.py
@@ -223,6 +223,7 @@ def transform(raw_s3_data_as_df):
 
 
 @pytest.mark.slow_test
+@pytest.mark.flaky(reruns=5, reruns_delay=2)
 def test_feature_processor_transform_offline_only_store_ingestion(
     sagemaker_session,
 ):
diff --git a/tests/integ/test_auto_ml.py b/tests/integ/test_auto_ml.py
@@ -22,6 +22,7 @@
 from sagemaker.utils import unique_name_from_base
 from tests.integ import AUTO_ML_DEFAULT_TIMEMOUT_MINUTES, DATA_DIR, auto_ml_utils
 from tests.integ.timeout import timeout
+from tests.conftest import CUSTOM_S3_OBJECT_KEY_PREFIX
 
 ROLE = "SageMakerRole"
 PREFIX = "sagemaker/beta-automl-xgboost"
@@ -38,8 +39,6 @@
 BASE_JOB_NAME = "auto-ml"
 MODE = "ENSEMBLING"
 
-# use a succeeded AutoML job to test describe and list candidates method, otherwise tests will run too long
-AUTO_ML_JOB_NAME = "python-sdk-integ-test-base-job"
 DEFAULT_MODEL_NAME = "python-sdk-automl"
 
 
@@ -49,6 +48,14 @@
 }
 
 
+# use a succeeded AutoML job to test describe and list candidates method, otherwise tests will run too long
+# test-session-job will be created once per session if it doesn't exist, and be reused in relevant tests.
+@pytest.fixture(scope="module")
+def test_session_job_name():
+    job_name = unique_name_from_base("test-session-job", max_length=32)
+    return job_name
+
+
 @pytest.mark.slow_test
 @pytest.mark.skipif(
     tests.integ.test_region() in tests.integ.NO_AUTO_ML_REGIONS,
@@ -63,7 +70,7 @@ def test_auto_ml_fit(sagemaker_session):
         max_candidates=1,
     )
 
-    job_name = unique_name_from_base("auto-ml", max_length=32)
+    job_name = unique_name_from_base(BASE_JOB_NAME, max_length=32)
     inputs = sagemaker_session.upload_data(path=TRAINING_DATA, key_prefix=PREFIX + "/input")
     with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
         auto_ml.fit(inputs, job_name=job_name)
@@ -82,7 +89,7 @@ def test_auto_ml_fit_local_input(sagemaker_session):
     )
 
     inputs = TRAINING_DATA
-    job_name = unique_name_from_base("auto-ml", max_length=32)
+    job_name = unique_name_from_base(BASE_JOB_NAME, max_length=32)
     with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
         auto_ml.fit(inputs, job_name=job_name)
 
@@ -99,7 +106,7 @@ def test_auto_ml_input_object_fit(sagemaker_session):
         max_candidates=1,
         generate_candidate_definitions_only=True,
     )
-    job_name = unique_name_from_base("auto-ml", max_length=32)
+    job_name = unique_name_from_base(BASE_JOB_NAME, max_length=32)
     s3_input = sagemaker_session.upload_data(path=TRAINING_DATA, key_prefix=PREFIX + "/input")
     inputs = AutoMLInput(inputs=s3_input, target_attribute_name=TARGET_ATTRIBUTE_NAME)
     with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
@@ -118,7 +125,7 @@ def test_auto_ml_input_object_list_fit(sagemaker_session):
         max_candidates=1,
         mode=MODE,
     )
-    job_name = unique_name_from_base("auto-ml", max_length=32)
+    job_name = unique_name_from_base(BASE_JOB_NAME, max_length=32)
     s3_input_training = sagemaker_session.upload_data(
         path=TRAINING_DATA, key_prefix=PREFIX + "/input"
     )
@@ -178,7 +185,7 @@ def test_auto_ml_invalid_target_attribute(sagemaker_session):
     auto_ml = AutoML(
         role=ROLE, target_attribute_name="y", sagemaker_session=sagemaker_session, max_candidates=1
     )
-    job_name = unique_name_from_base("auto-ml", max_length=32)
+    job_name = unique_name_from_base(BASE_JOB_NAME, max_length=32)
     inputs = sagemaker_session.upload_data(path=TRAINING_DATA, key_prefix=PREFIX + "/input")
     with pytest.raises(
         ClientError,
@@ -192,14 +199,14 @@ def test_auto_ml_invalid_target_attribute(sagemaker_session):
     tests.integ.test_region() in tests.integ.NO_AUTO_ML_REGIONS,
     reason="AutoML is not supported in the region yet.",
 )
-def test_auto_ml_describe_auto_ml_job(sagemaker_session):
+def test_auto_ml_describe_auto_ml_job(sagemaker_session, test_session_job_name):
     expected_default_input_config = [
         {
             "DataSource": {
                 "S3DataSource": {
                     "S3DataType": "S3Prefix",
-                    "S3Uri": "s3://{}/{}/input/iris_training.csv".format(
-                        sagemaker_session.default_bucket(), PREFIX
+                    "S3Uri": "s3://{}/{}/{}/input/iris_training.csv".format(
+                        sagemaker_session.default_bucket(), CUSTOM_S3_OBJECT_KEY_PREFIX, PREFIX
                     ),
                 }
             },
@@ -209,16 +216,18 @@ def test_auto_ml_describe_auto_ml_job(sagemaker_session):
         }
     ]
     expected_default_output_config = {
-        "S3OutputPath": "s3://{}/".format(sagemaker_session.default_bucket())
+        "S3OutputPath": "s3://{}/{}/".format(
+            sagemaker_session.default_bucket(), CUSTOM_S3_OBJECT_KEY_PREFIX
+        )
     }
 
-    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
+    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session, test_session_job_name)
     auto_ml = AutoML(
         role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
     )
 
-    desc = auto_ml.describe_auto_ml_job(job_name=AUTO_ML_JOB_NAME)
-    assert desc["AutoMLJobName"] == AUTO_ML_JOB_NAME
+    desc = auto_ml.describe_auto_ml_job(job_name=test_session_job_name)
+    assert desc["AutoMLJobName"] == test_session_job_name
     assert desc["AutoMLJobStatus"] == "Completed"
     assert isinstance(desc["BestCandidate"], dict)
     assert desc["InputDataConfig"] == expected_default_input_config
@@ -230,14 +239,14 @@ def test_auto_ml_describe_auto_ml_job(sagemaker_session):
     tests.integ.test_region() in tests.integ.NO_AUTO_ML_REGIONS,
     reason="AutoML is not supported in the region yet.",
 )
-def test_auto_ml_attach(sagemaker_session):
+def test_auto_ml_attach(sagemaker_session, test_session_job_name):
     expected_default_input_config = [
         {
             "DataSource": {
                 "S3DataSource": {
                     "S3DataType": "S3Prefix",
-                    "S3Uri": "s3://{}/{}/input/iris_training.csv".format(
-                        sagemaker_session.default_bucket(), PREFIX
+                    "S3Uri": "s3://{}/{}/{}/input/iris_training.csv".format(
+                        sagemaker_session.default_bucket(), CUSTOM_S3_OBJECT_KEY_PREFIX, PREFIX
                     ),
                 }
             },
@@ -247,16 +256,18 @@ def test_auto_ml_attach(sagemaker_session):
         }
     ]
     expected_default_output_config = {
-        "S3OutputPath": "s3://{}/".format(sagemaker_session.default_bucket())
+        "S3OutputPath": "s3://{}/{}/".format(
+            sagemaker_session.default_bucket(), CUSTOM_S3_OBJECT_KEY_PREFIX
+        )
     }
 
-    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
+    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session, test_session_job_name)
 
     attached_automl_job = AutoML.attach(
-        auto_ml_job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session
+        auto_ml_job_name=test_session_job_name, sagemaker_session=sagemaker_session
     )
     attached_desc = attached_automl_job.describe_auto_ml_job()
-    assert attached_desc["AutoMLJobName"] == AUTO_ML_JOB_NAME
+    assert attached_desc["AutoMLJobName"] == test_session_job_name
     assert attached_desc["AutoMLJobStatus"] == "Completed"
     assert isinstance(attached_desc["BestCandidate"], dict)
     assert attached_desc["InputDataConfig"] == expected_default_input_config
@@ -268,28 +279,28 @@ def test_auto_ml_attach(sagemaker_session):
     tests.integ.test_region() in tests.integ.NO_AUTO_ML_REGIONS,
     reason="AutoML is not supported in the region yet.",
 )
-def test_list_candidates(sagemaker_session):
-    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
+def test_list_candidates(sagemaker_session, test_session_job_name):
+    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session, test_session_job_name)
 
     auto_ml = AutoML(
         role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
     )
 
-    candidates = auto_ml.list_candidates(job_name=AUTO_ML_JOB_NAME)
+    candidates = auto_ml.list_candidates(job_name=test_session_job_name)
     assert len(candidates) == 3
 
 
 @pytest.mark.skipif(
     tests.integ.test_region() in tests.integ.NO_AUTO_ML_REGIONS,
     reason="AutoML is not supported in the region yet.",
 )
-def test_best_candidate(sagemaker_session):
-    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
+def test_best_candidate(sagemaker_session, test_session_job_name):
+    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session, test_session_job_name)
 
     auto_ml = AutoML(
         role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
     )
-    best_candidate = auto_ml.best_candidate(job_name=AUTO_ML_JOB_NAME)
+    best_candidate = auto_ml.best_candidate(job_name=test_session_job_name)
     assert len(best_candidate["InferenceContainers"]) == 3
     assert len(best_candidate["CandidateSteps"]) == 4
     assert best_candidate["CandidateStatus"] == "Completed"
@@ -300,13 +311,13 @@ def test_best_candidate(sagemaker_session):
     reason="AutoML is not supported in the region yet.",
 )
 @pytest.mark.release
-def test_deploy_best_candidate(sagemaker_session, cpu_instance_type):
-    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
+def test_deploy_best_candidate(sagemaker_session, cpu_instance_type, test_session_job_name):
+    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session, test_session_job_name)
 
     auto_ml = AutoML(
         role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
     )
-    best_candidate = auto_ml.best_candidate(job_name=AUTO_ML_JOB_NAME)
+    best_candidate = auto_ml.best_candidate(job_name=test_session_job_name)
     endpoint_name = unique_name_from_base("sagemaker-auto-ml-best-candidate-test")
 
     with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
@@ -331,14 +342,16 @@ def test_deploy_best_candidate(sagemaker_session, cpu_instance_type):
 @pytest.mark.skip(
     reason="",
 )
-def test_candidate_estimator_default_rerun_and_deploy(sagemaker_session, cpu_instance_type):
-    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
+def test_candidate_estimator_default_rerun_and_deploy(
+    sagemaker_session, cpu_instance_type, test_session_job_name
+):
+    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session, test_session_job_name)
 
     auto_ml = AutoML(
         role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
     )
 
-    candidates = auto_ml.list_candidates(job_name=AUTO_ML_JOB_NAME)
+    candidates = auto_ml.list_candidates(job_name=test_session_job_name)
     candidate = candidates[1]
 
     candidate_estimator = CandidateEstimator(candidate, sagemaker_session)
@@ -364,13 +377,13 @@ def test_candidate_estimator_default_rerun_and_deploy(sagemaker_session, cpu_ins
     tests.integ.test_region() in tests.integ.NO_AUTO_ML_REGIONS,
     reason="AutoML is not supported in the region yet.",
 )
-def test_candidate_estimator_get_steps(sagemaker_session):
-    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
+def test_candidate_estimator_get_steps(sagemaker_session, test_session_job_name):
+    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session, test_session_job_name)
 
     auto_ml = AutoML(
         role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
     )
-    candidates = auto_ml.list_candidates(job_name=AUTO_ML_JOB_NAME)
+    candidates = auto_ml.list_candidates(job_name=test_session_job_name)
     candidate = candidates[1]
 
     candidate_estimator = CandidateEstimator(candidate, sagemaker_session)
diff --git a/tests/integ/test_inference_recommender.py b/tests/integ/test_inference_recommender.py
@@ -206,7 +206,7 @@ def default_right_sized_unregistered_model(sagemaker_session, cpu_instance_type)
                 ir_job_name,
             )
         except Exception:
-            sagemaker_session.delete_model(ModelName=sklearn_model.name)
+            sagemaker_session.delete_model(model_name=sklearn_model.name)
 
 
 @pytest.fixture(scope="module")
@@ -261,7 +261,7 @@ def advanced_right_sized_unregistered_model(sagemaker_session, cpu_instance_type
             )
 
         except Exception:
-            sagemaker_session.delete_model(ModelName=sklearn_model.name)
+            sagemaker_session.delete_model(model_name=sklearn_model.name)
 
 
 @pytest.fixture(scope="module")
@@ -300,7 +300,7 @@ def default_right_sized_unregistered_base_model(sagemaker_session, cpu_instance_
                 ir_job_name,
             )
         except Exception:
-            sagemaker_session.delete_model(ModelName=model.name)
+            sagemaker_session.delete_model(model_name=model.name)
 
 
 @pytest.fixture(scope="module")
@@ -328,6 +328,7 @@ def created_base_model(sagemaker_session, cpu_instance_type):
 
 
 @pytest.mark.slow_test
+@pytest.mark.flaky(reruns=3, reruns_delay=2)
 def test_default_right_size_and_deploy_registered_model_sklearn(
     default_right_sized_model, sagemaker_session
 ):
@@ -350,6 +351,7 @@ def test_default_right_size_and_deploy_registered_model_sklearn(
 
 
 @pytest.mark.slow_test
+@pytest.mark.flaky(reruns=3, reruns_delay=2)
 def test_default_right_size_and_deploy_unregistered_model_sklearn(
     default_right_sized_unregistered_model, sagemaker_session
 ):
@@ -372,6 +374,7 @@ def test_default_right_size_and_deploy_unregistered_model_sklearn(
 
 
 @pytest.mark.slow_test
+@pytest.mark.flaky(reruns=3, reruns_delay=2)
 def test_default_right_size_and_deploy_unregistered_base_model(
     default_right_sized_unregistered_base_model, sagemaker_session
 ):
@@ -394,6 +397,7 @@ def test_default_right_size_and_deploy_unregistered_base_model(
 
 
 @pytest.mark.slow_test
+@pytest.mark.flaky(reruns=3, reruns_delay=2)
 def test_advanced_right_size_and_deploy_unregistered_model_sklearn(
     advanced_right_sized_unregistered_model, sagemaker_session
 ):
@@ -416,6 +420,7 @@ def test_advanced_right_size_and_deploy_unregistered_model_sklearn(
 
 
 @pytest.mark.slow_test
+@pytest.mark.flaky(reruns=3, reruns_delay=2)
 def test_advanced_right_size_and_deploy_registered_model_sklearn(
     advanced_right_sized_model, sagemaker_session
 ):
@@ -446,6 +451,7 @@ def test_advanced_right_size_and_deploy_registered_model_sklearn(
 # TODO when we've added support for inference_recommendation_id
 # then add tests to test Framework models
 @pytest.mark.slow_test
+@pytest.mark.flaky(reruns=3, reruns_delay=2)
 def test_deploy_inference_recommendation_id_with_registered_model_sklearn(
     default_right_sized_model, sagemaker_session
 ):
@@ -480,6 +486,7 @@ def test_deploy_inference_recommendation_id_with_registered_model_sklearn(
 
 
 @pytest.mark.slow_test
+@pytest.mark.flaky(reruns=3, reruns_delay=2)
 def test_deploy_deployment_recommendation_id_with_model(created_base_model, sagemaker_session):
     with timeout(minutes=20):
         try:
diff --git a/tox.ini b/tox.ini
@@ -56,6 +56,8 @@ markers =
     canary_quick
     cron
     local_mode
+    slow_test
+    release
     timeout: mark a test as a timeout.
 
 [testenv]