fix: Remove workarounds and apply fixes to Clarify and MM integ tests (#541)

xgchena · ChoiByungWook · commit 1f26aee9e462 · 2020-12-08T14:46:06.000-08:00
diff --git a/tests/integ/test_clarify.py b/tests/integ/test_clarify.py
@@ -20,6 +20,7 @@
 import os
 import pandas as pd
 import pytest
+import statistics
 import tempfile
 
 from sagemaker import s3
@@ -32,7 +33,7 @@
     SHAPConfig,
 )
 
-from sagemaker.amazon.linear_learner import LinearLearner
+from sagemaker.amazon.linear_learner import LinearLearner, LinearLearnerPredictor
 from sagemaker import utils
 from tests import integ
 from tests.integ import timeout
@@ -80,11 +81,13 @@ def model_name(sagemaker_session, cpu_instance_type, training_set):
             cpu_instance_type,
             predictor_type="binary_classifier",
             sagemaker_session=sagemaker_session,
+            disable_profiler=True,
         )
         ll.binary_classifier_model_selection_criteria = "accuracy"
         ll.early_stopping_tolerance = 0.0001
         ll.early_stopping_patience = 3
         ll.num_models = 1
+        ll.epochs = 1
         ll.num_calibration_samples = 1
 
         features, label = training_set
@@ -106,8 +109,6 @@ def clarify_processor(sagemaker_session, cpu_instance_type):
         instance_type=cpu_instance_type,
         sagemaker_session=sagemaker_session,
     )
-    # TODO: remove once container ready.
-    processor.image_uri = "678264136642.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xai-analyzer:1.0"
     return processor
 
 
@@ -146,8 +147,15 @@ def model_config(model_name):
 
 
 @pytest.fixture(scope="module")
-def model_predicted_label_config():
-    return ModelPredictedLabelConfig(label="predicted_label")
+def model_predicted_label_config(sagemaker_session, model_name, training_set):
+    predictor = LinearLearnerPredictor(
+        model_name,
+        sagemaker_session=sagemaker_session,
+    )
+    result = predictor.predict(training_set[0].astype(np.float32))
+    predictions = [float(record.label["score"].float32_tensor.values[0]) for record in result]
+    probability_threshold = statistics.median(predictions)
+    return ModelPredictedLabelConfig(label="score", probability_threshold=probability_threshold)
 
 
 @pytest.fixture(scope="module")
@@ -166,11 +174,7 @@ def shap_config():
     )
 
 
-@pytest.mark.skipif(
-    integ.test_region() != "us-west-2",
-    reason="Image is not yet available in certain regions.",
-)
-def test_pre_training_bias(clarify_processor, data_config, data_bias_config):
+def test_pre_training_bias(clarify_processor, data_config, data_bias_config, sagemaker_session):
     with timeout.timeout(minutes=CLARIFY_DEFAULT_TIMEOUT_MINUTES):
         clarify_processor.run_pre_training_bias(
             data_config,
@@ -179,7 +183,8 @@ def test_pre_training_bias(clarify_processor, data_config, data_bias_config):
             wait=True,
         )
         analysis_result_json = s3.S3Downloader.read_file(
-            data_config.s3_output_path + "/analysis.json"
+            data_config.s3_output_path + "/analysis.json",
+            sagemaker_session,
         )
         analysis_result = json.loads(analysis_result_json)
         assert (
@@ -192,12 +197,13 @@ def test_pre_training_bias(clarify_processor, data_config, data_bias_config):
         )
 
 
-@pytest.mark.skipif(
-    integ.test_region() != "us-west-2",
-    reason="Image is not yet available in certain regions.",
-)
 def test_post_training_bias(
-    clarify_processor, data_config, data_bias_config, model_config, model_predicted_label_config
+    clarify_processor,
+    data_config,
+    data_bias_config,
+    model_config,
+    model_predicted_label_config,
+    sagemaker_session,
 ):
     with timeout.timeout(minutes=CLARIFY_DEFAULT_TIMEOUT_MINUTES):
         clarify_processor.run_post_training_bias(
@@ -209,7 +215,8 @@ def test_post_training_bias(
             wait=True,
         )
         analysis_result_json = s3.S3Downloader.read_file(
-            data_config.s3_output_path + "/analysis.json"
+            data_config.s3_output_path + "/analysis.json",
+            sagemaker_session,
         )
         analysis_result = json.loads(analysis_result_json)
         assert (
@@ -222,11 +229,7 @@ def test_post_training_bias(
         )
 
 
-@pytest.mark.skipif(
-    integ.test_region() != "us-west-2",
-    reason="Image is not yet available in certain regions.",
-)
-def test_shap(clarify_processor, data_config, model_config, shap_config):
+def test_shap(clarify_processor, data_config, model_config, shap_config, sagemaker_session):
     with timeout.timeout(minutes=CLARIFY_DEFAULT_TIMEOUT_MINUTES):
         clarify_processor.run_explainability(
             data_config,
@@ -237,7 +240,8 @@ def test_shap(clarify_processor, data_config, model_config, shap_config):
             wait=True,
         )
         analysis_result_json = s3.S3Downloader.read_file(
-            data_config.s3_output_path + "/analysis.json"
+            data_config.s3_output_path + "/analysis.json",
+            sagemaker_session,
         )
         analysis_result = json.loads(analysis_result_json)
         assert (
diff --git a/tests/integ/test_clarify_model_monitor.py b/tests/integ/test_clarify_model_monitor.py
@@ -66,24 +66,15 @@
 
 CRON = "cron(*/5 * * * ? *)"
 UPDATED_CRON = CronExpressionGenerator.daily()
-MAX_RUNTIME_IN_SECONDS = 45 * 60
-UPDATED_MAX_RUNTIME_IN_SECONDS = 60 * 60
+MAX_RUNTIME_IN_SECONDS = 30 * 60
+UPDATED_MAX_RUNTIME_IN_SECONDS = 25 * 60
 ROLE = "SageMakerRole"
 INSTANCE_COUNT = 1
 INSTANCE_TYPE = "ml.c5.xlarge"
 VOLUME_SIZE_IN_GB = 100
 START_TIME_OFFSET = "-PT1H"
 END_TIME_OFFSET = "-PT0H"
 TEST_TAGS = [{"Key": "integration", "Value": "test"}]
-TEST_ENV = {"CLOUDWATCH_METRICS_DIRECTORY": "/tmp"}
-
-# TODO: Use the same skipit mark as in test_model_monitor.py
-TEST_REGSION = "us-west-2"
-# TODO: Remove test image override once once 1p-registration went through.
-# NOTE: The test account only has the image in us-west-2 and us-east-2
-TEST_IMAGE_URI = "678264136642.dkr.ecr.{}.amazonaws.com/sagemaker-xai-analyzer:1.0".format(
-    TEST_REGSION
-)
 
 
 @pytest.yield_fixture(scope="module")
@@ -203,10 +194,8 @@ def bias_monitor(sagemaker_session):
         volume_size_in_gb=VOLUME_SIZE_IN_GB,
         max_runtime_in_seconds=MAX_RUNTIME_IN_SECONDS,
         sagemaker_session=sagemaker_session,
-        env=TEST_ENV,
         tags=TEST_TAGS,
     )
-    monitor.image_uri = TEST_IMAGE_URI
     return monitor
 
 
@@ -245,8 +234,8 @@ def scheduled_bias_monitor(
 
 
 @pytest.mark.skipif(
-    tests.integ.test_region() != TEST_REGSION,
-    reason="Image is not yet available in certain regions.",
+    tests.integ.test_region() in tests.integ.NO_MODEL_MONITORING_REGIONS,
+    reason="ModelMonitoring is not yet supported in this region.",
 )
 def test_bias_monitor(sagemaker_session, scheduled_bias_monitor, endpoint_name, ground_truth_input):
     monitor = scheduled_bias_monitor
@@ -297,8 +286,8 @@ def test_bias_monitor(sagemaker_session, scheduled_bias_monitor, endpoint_name,
 
 
 @pytest.mark.skipif(
-    tests.integ.test_region() != TEST_REGSION,
-    reason="Image is not yet available in certain regions.",
+    tests.integ.test_region() in tests.integ.NO_MODEL_MONITORING_REGIONS,
+    reason="ModelMonitoring is not yet supported in this region.",
 )
 def test_run_bias_monitor(
     scheduled_bias_monitor, sagemaker_session, endpoint_name, ground_truth_input, upload_actual_data
@@ -316,8 +305,8 @@ def test_run_bias_monitor(
 
 
 @pytest.mark.skipif(
-    tests.integ.test_region() != TEST_REGSION,
-    reason="Image is not yet available in certain regions.",
+    tests.integ.test_region() in tests.integ.NO_MODEL_MONITORING_REGIONS,
+    reason="ModelMonitoring is not yet supported in this region.",
 )
 def test_run_bias_monitor_baseline(
     sagemaker_session,
@@ -336,10 +325,8 @@ def test_run_bias_monitor_baseline(
         volume_size_in_gb=VOLUME_SIZE_IN_GB,
         max_runtime_in_seconds=MAX_RUNTIME_IN_SECONDS,
         sagemaker_session=sagemaker_session,
-        env=TEST_ENV,
         tags=TEST_TAGS,
     )
-    monitor.image_uri = TEST_IMAGE_URI
 
     baselining_job_name = utils.unique_name_from_base("bias-baselining-job")
     print("Creating baselining job: {}".format(baselining_job_name))
@@ -396,10 +383,8 @@ def explainability_monitor(sagemaker_session):
         volume_size_in_gb=VOLUME_SIZE_IN_GB,
         max_runtime_in_seconds=MAX_RUNTIME_IN_SECONDS,
         sagemaker_session=sagemaker_session,
-        env=TEST_ENV,
         tags=TEST_TAGS,
     )
-    monitor.image_uri = TEST_IMAGE_URI
     return monitor
 
 
@@ -429,8 +414,8 @@ def scheduled_explainability_monitor(
 
 
 @pytest.mark.skipif(
-    tests.integ.test_region() != TEST_REGSION,
-    reason="Image is not yet available in certain regions.",
+    tests.integ.test_region() in tests.integ.NO_MODEL_MONITORING_REGIONS,
+    reason="ModelMonitoring is not yet supported in this region.",
 )
 def test_explainability_monitor(sagemaker_session, scheduled_explainability_monitor, endpoint_name):
     monitor = scheduled_explainability_monitor
@@ -479,8 +464,8 @@ def test_explainability_monitor(sagemaker_session, scheduled_explainability_moni
 
 
 @pytest.mark.skipif(
-    tests.integ.test_region() != TEST_REGSION,
-    reason="Image is not yet available in certain regions.",
+    tests.integ.test_region() in tests.integ.NO_MODEL_MONITORING_REGIONS,
+    reason="ModelMonitoring is not yet supported in this region.",
 )
 def test_run_explainability_monitor(
     scheduled_explainability_monitor,
@@ -501,8 +486,8 @@ def test_run_explainability_monitor(
 
 
 @pytest.mark.skipif(
-    tests.integ.test_region() != TEST_REGSION,
-    reason="Image is not yet available in certain regions.",
+    tests.integ.test_region() in tests.integ.NO_MODEL_MONITORING_REGIONS,
+    reason="ModelMonitoring is not yet supported in this region.",
 )
 def test_run_explainability_monitor_baseline(
     sagemaker_session, shap_config, data_config, model_config, endpoint_name, upload_actual_data
@@ -514,10 +499,8 @@ def test_run_explainability_monitor_baseline(
         volume_size_in_gb=VOLUME_SIZE_IN_GB,
         max_runtime_in_seconds=MAX_RUNTIME_IN_SECONDS,
         sagemaker_session=sagemaker_session,
-        env=TEST_ENV,
         tags=TEST_TAGS,
     )
-    monitor.image_uri = TEST_IMAGE_URI
 
     baselining_job_name = utils.unique_name_from_base("explainability-baselining-job")
     print("Creating baselining job: {}".format(baselining_job_name))
diff --git a/tests/integ/test_model_quality_monitor.py b/tests/integ/test_model_quality_monitor.py
@@ -51,8 +51,8 @@
 
 CRON = "cron(*/5 * * * ? *)"
 UPDATED_CRON = CronExpressionGenerator.daily()
-MAX_RUNTIME_IN_SECONDS = 45 * 60
-UPDATED_MAX_RUNTIME_IN_SECONDS = 60 * 60
+MAX_RUNTIME_IN_SECONDS = 30 * 60
+UPDATED_MAX_RUNTIME_IN_SECONDS = 25 * 60
 ROLE = "SageMakerRole"
 INSTANCE_COUNT = 1
 INSTANCE_TYPE = "ml.c5.xlarge"