skip p2/p3 tests in eu-central-1

jesterhazy · jesterhazy · commit 3bef7484fa88 · 2019-04-27T05:07:19.000-07:00
diff --git a/tests/integ/__init__.py b/tests/integ/__init__.py
@@ -24,10 +24,11 @@
 TRANSFORM_DEFAULT_TIMEOUT_MINUTES = 20
 PYTHON_VERSION = 'py' + str(sys.version_info.major)
 
-# 'eu-central-1' has some p2, but no enough for continuous testing
-HOSTING_NO_P2_REGIONS = ['ca-central-1', 'eu-west-2', 'us-west-1', 'eu-central-1']
+# these regions have some p2 and p3 instances, but not enough for continuous testing
+HOSTING_NO_P2_REGIONS = ['ca-central-1', 'eu-central-1', 'eu-west-2', 'us-west-1']
 HOSTING_NO_P3_REGIONS = ['ap-southeast-1', 'ap-southeast-2', 'ap-south-1', 'ca-central-1',
-                         'eu-west-2', 'us-west-1']
+                         'eu-central-1', 'eu-west-2', 'us-west-1']
+
 # EI is currently only supported in the following regions
 # regions were derived from https://aws.amazon.com/machine-learning/elastic-inference/pricing/
 EI_SUPPORTED_REGIONS = ['us-east-1', 'us-east-2', 'us-west-2', 'eu-west-1', 'ap-northeast-1', 'ap-northeast-2']
diff --git a/tests/integ/test_tf_script_mode.py b/tests/integ/test_tf_script_mode.py
@@ -11,6 +11,7 @@
 # ANY KIND, either express or implied. See the License for the specific
 # language governing permissions and limitations under the License.
 from __future__ import absolute_import
+from __future__ import absolute_import
 
 import numpy as np
 import os
@@ -22,9 +23,8 @@
 from sagemaker.tensorflow import TensorFlow
 from six.moves.urllib.parse import urlparse
 from sagemaker.utils import unique_name_from_base
-import tests.integ as integ
-from tests.integ import kms_utils
-import tests.integ.timeout as timeout
+
+import tests.integ
 
 ROLE = 'SageMakerRole'
 
@@ -35,14 +35,18 @@
 TAGS = [{'Key': 'some-key', 'Value': 'some-value'}]
 
 
-@pytest.fixture(scope='session', params=['ml.c5.xlarge', 'ml.p2.xlarge'])
+@pytest.fixture(scope='session', params=[
+    'ml.c5.xlarge',
+    pytest.param('ml.p2.xlarge',
+                 marks=pytest.mark.skipif(
+                     tests.integ.test_region() in tests.integ.HOSTING_NO_P2_REGIONS,
+                     reason='no ml.p2 instances in this region'))])
 def instance_type(request):
     return request.param
 
 
-@pytest.mark.skipif(integ.test_region() in integ.HOSTING_NO_P2_REGIONS,
-                    reason='no ml.p2 instances in these regions')
-@pytest.mark.skipif(integ.PYTHON_VERSION != 'py3', reason="Script Mode tests are only configured to run with Python 3")
+@pytest.mark.skipif(tests.integ.PYTHON_VERSION != 'py3',
+                    reason="Script Mode tests are only configured to run with Python 3")
 def test_mnist(sagemaker_session, instance_type):
     estimator = TensorFlow(entry_point=SCRIPT,
                            role='SageMakerRole',
@@ -51,26 +55,26 @@ def test_mnist(sagemaker_session, instance_type):
                            sagemaker_session=sagemaker_session,
                            py_version='py3',
                            framework_version=TensorFlow.LATEST_VERSION,
-                           metric_definitions=[{'Name': 'train:global_steps', 'Regex': r'global_step\/sec:\s(.*)'}])
+                           metric_definitions=[
+                               {'Name': 'train:global_steps', 'Regex': r'global_step\/sec:\s(.*)'}])
     inputs = estimator.sagemaker_session.upload_data(
         path=os.path.join(RESOURCE_PATH, 'data'),
         key_prefix='scriptmode/mnist')
 
-    with timeout.timeout(minutes=integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
+    with tests.integ.timeout.timeout(minutes=tests.integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
         estimator.fit(inputs=inputs, job_name=unique_name_from_base('test-tf-sm-mnist'))
     _assert_s3_files_exist(estimator.model_dir,
                            ['graph.pbtxt', 'model.ckpt-0.index', 'model.ckpt-0.meta'])
     df = estimator.training_job_analytics.dataframe()
-    print(df)
     assert df.size > 0
 
 
 def test_server_side_encryption(sagemaker_session):
-
     boto_session = sagemaker_session.boto_session
-    with kms_utils.bucket_with_encryption(boto_session, ROLE) as (bucket_with_kms, kms_key):
-
-        output_path = os.path.join(bucket_with_kms, 'test-server-side-encryption', time.strftime('%y%m%d-%H%M'))
+    with tests.integ.kms_utils.bucket_with_encryption(boto_session, ROLE) as (
+            bucket_with_kms, kms_key):
+        output_path = os.path.join(bucket_with_kms, 'test-server-side-encryption',
+                                   time.strftime('%y%m%d-%H%M'))
 
         estimator = TensorFlow(entry_point=SCRIPT,
                                role=ROLE,
@@ -88,28 +92,29 @@ def test_server_side_encryption(sagemaker_session):
             path=os.path.join(RESOURCE_PATH, 'data'),
             key_prefix='scriptmode/mnist')
 
-        with timeout.timeout(minutes=integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
-            estimator.fit(inputs=inputs, job_name=unique_name_from_base('test-server-side-encryption'))
+        with tests.integ.timeout.timeout(minutes=tests.integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
+            estimator.fit(inputs=inputs,
+                          job_name=unique_name_from_base('test-server-side-encryption'))
 
 
 @pytest.mark.canary_quick
-@pytest.mark.skipif(integ.PYTHON_VERSION != 'py3', reason="Script Mode tests are only configured to run with Python 3")
+@pytest.mark.skipif(tests.integ.PYTHON_VERSION != 'py3',
+                    reason="Script Mode tests are only configured to run with Python 3")
 def test_mnist_distributed(sagemaker_session, instance_type):
     estimator = TensorFlow(entry_point=SCRIPT,
                            role=ROLE,
                            train_instance_count=2,
-                           # TODO: change train_instance_type to instance_type once the test is passing consistently
-                           train_instance_type='ml.c5.xlarge',
+                           train_instance_type=instance_type,
                            sagemaker_session=sagemaker_session,
-                           py_version=integ.PYTHON_VERSION,
+                           py_version=tests.integ.PYTHON_VERSION,
                            script_mode=True,
                            framework_version=TensorFlow.LATEST_VERSION,
                            distributions=PARAMETER_SERVER_DISTRIBUTION)
     inputs = estimator.sagemaker_session.upload_data(
         path=os.path.join(RESOURCE_PATH, 'data'),
         key_prefix='scriptmode/distributed_mnist')
 
-    with timeout.timeout(minutes=integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
+    with tests.integ.timeout.timeout(minutes=tests.integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
         estimator.fit(inputs=inputs, job_name=unique_name_from_base('test-tf-sm-distributed'))
     _assert_s3_files_exist(estimator.model_dir,
                            ['graph.pbtxt', 'model.ckpt-0.index', 'model.ckpt-0.meta'])
@@ -131,22 +136,26 @@ def test_mnist_async(sagemaker_session):
     training_job_name = estimator.latest_training_job.name
     time.sleep(20)
     endpoint_name = training_job_name
-    _assert_training_job_tags_match(sagemaker_session.sagemaker_client, estimator.latest_training_job.name, TAGS)
-    with timeout.timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
-        estimator = TensorFlow.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session)
+    _assert_training_job_tags_match(sagemaker_session.sagemaker_client,
+                                    estimator.latest_training_job.name, TAGS)
+    with tests.integ.timeout.timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
+        estimator = TensorFlow.attach(training_job_name=training_job_name,
+                                      sagemaker_session=sagemaker_session)
         predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge',
                                      endpoint_name=endpoint_name)
 
         result = predictor.predict(np.zeros(784))
         print('predict result: {}'.format(result))
         _assert_endpoint_tags_match(sagemaker_session.sagemaker_client, predictor.endpoint, TAGS)
-        _assert_model_tags_match(sagemaker_session.sagemaker_client, estimator.latest_training_job.name, TAGS)
+        _assert_model_tags_match(sagemaker_session.sagemaker_client,
+                                 estimator.latest_training_job.name, TAGS)
 
 
 def _assert_s3_files_exist(s3_url, files):
     parsed_url = urlparse(s3_url)
     s3 = boto3.client('s3')
-    contents = s3.list_objects_v2(Bucket=parsed_url.netloc, Prefix=parsed_url.path.lstrip('/'))["Contents"]
+    contents = s3.list_objects_v2(Bucket=parsed_url.netloc, Prefix=parsed_url.path.lstrip('/'))[
+        "Contents"]
     for f in files:
         found = [x['Key'] for x in contents if x['Key'].endswith(f)]
         if not found:
@@ -169,5 +178,6 @@ def _assert_endpoint_tags_match(sagemaker_client, endpoint_name, tags):
 
 
 def _assert_training_job_tags_match(sagemaker_client, training_job_name, tags):
-    training_job_description = sagemaker_client.describe_training_job(TrainingJobName=training_job_name)
+    training_job_description = sagemaker_client.describe_training_job(
+        TrainingJobName=training_job_name)
     _assert_tags_match(sagemaker_client, training_job_description['TrainingJobArn'], tags)