Skip to content

Commit 30deecd

Browse files
committed
skip p2/p3 tests in eu-central-1
1 parent a9e724c commit 30deecd

File tree

2 files changed

+41
-30
lines changed

2 files changed

+41
-30
lines changed

tests/integ/__init__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,11 @@
2424
TRANSFORM_DEFAULT_TIMEOUT_MINUTES = 20
2525
PYTHON_VERSION = 'py' + str(sys.version_info.major)
2626

27-
# 'eu-central-1' has some p2, but no enough for continuous testing
28-
HOSTING_NO_P2_REGIONS = ['ca-central-1', 'eu-west-2', 'us-west-1', 'eu-central-1']
27+
# these regions have some p2 and p3 instances, but not enough for continuous testing
28+
HOSTING_NO_P2_REGIONS = ['ca-central-1', 'eu-central-1', 'eu-west-2', 'us-west-1']
2929
HOSTING_NO_P3_REGIONS = ['ap-southeast-1', 'ap-southeast-2', 'ap-south-1', 'ca-central-1',
30-
'eu-west-2', 'us-west-1']
30+
'eu-central-1', 'eu-west-2', 'us-west-1']
31+
3132
# EI is currently only supported in the following regions
3233
# regions were derived from https://aws.amazon.com/machine-learning/elastic-inference/pricing/
3334
EI_SUPPORTED_REGIONS = ['us-east-1', 'us-east-2', 'us-west-2', 'eu-west-1', 'ap-northeast-1', 'ap-northeast-2']

tests/integ/test_tf_script_mode.py

Lines changed: 37 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
# ANY KIND, either express or implied. See the License for the specific
1212
# language governing permissions and limitations under the License.
1313
from __future__ import absolute_import
14+
from __future__ import absolute_import
1415

1516
import numpy as np
1617
import os
@@ -22,9 +23,8 @@
2223
from sagemaker.tensorflow import TensorFlow
2324
from six.moves.urllib.parse import urlparse
2425
from sagemaker.utils import unique_name_from_base
25-
import tests.integ as integ
26-
from tests.integ import kms_utils
27-
import tests.integ.timeout as timeout
26+
27+
import tests.integ
2828

2929
ROLE = 'SageMakerRole'
3030

@@ -35,14 +35,18 @@
3535
TAGS = [{'Key': 'some-key', 'Value': 'some-value'}]
3636

3737

38-
@pytest.fixture(scope='session', params=['ml.c5.xlarge', 'ml.p2.xlarge'])
38+
@pytest.fixture(scope='session', params=[
39+
'ml.c5.xlarge',
40+
pytest.param('ml.p2.xlarge',
41+
marks=pytest.mark.skipif(
42+
tests.integ.test_region() in tests.integ.HOSTING_NO_P2_REGIONS,
43+
reason='no ml.p2 instances in this region'))])
3944
def instance_type(request):
4045
return request.param
4146

4247

43-
@pytest.mark.skipif(integ.test_region() in integ.HOSTING_NO_P2_REGIONS,
44-
reason='no ml.p2 instances in these regions')
45-
@pytest.mark.skipif(integ.PYTHON_VERSION != 'py3', reason="Script Mode tests are only configured to run with Python 3")
48+
@pytest.mark.skipif(tests.integ.PYTHON_VERSION != 'py3',
49+
reason="Script Mode tests are only configured to run with Python 3")
4650
def test_mnist(sagemaker_session, instance_type):
4751
estimator = TensorFlow(entry_point=SCRIPT,
4852
role='SageMakerRole',
@@ -51,26 +55,26 @@ def test_mnist(sagemaker_session, instance_type):
5155
sagemaker_session=sagemaker_session,
5256
py_version='py3',
5357
framework_version=TensorFlow.LATEST_VERSION,
54-
metric_definitions=[{'Name': 'train:global_steps', 'Regex': r'global_step\/sec:\s(.*)'}])
58+
metric_definitions=[
59+
{'Name': 'train:global_steps', 'Regex': r'global_step\/sec:\s(.*)'}])
5560
inputs = estimator.sagemaker_session.upload_data(
5661
path=os.path.join(RESOURCE_PATH, 'data'),
5762
key_prefix='scriptmode/mnist')
5863

59-
with timeout.timeout(minutes=integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
64+
with tests.integ.timeout.timeout(minutes=tests.integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
6065
estimator.fit(inputs=inputs, job_name=unique_name_from_base('test-tf-sm-mnist'))
6166
_assert_s3_files_exist(estimator.model_dir,
6267
['graph.pbtxt', 'model.ckpt-0.index', 'model.ckpt-0.meta'])
6368
df = estimator.training_job_analytics.dataframe()
64-
print(df)
6569
assert df.size > 0
6670

6771

6872
def test_server_side_encryption(sagemaker_session):
69-
7073
boto_session = sagemaker_session.boto_session
71-
with kms_utils.bucket_with_encryption(boto_session, ROLE) as (bucket_with_kms, kms_key):
72-
73-
output_path = os.path.join(bucket_with_kms, 'test-server-side-encryption', time.strftime('%y%m%d-%H%M'))
74+
with tests.integ.kms_utils.bucket_with_encryption(boto_session, ROLE) as (
75+
bucket_with_kms, kms_key):
76+
output_path = os.path.join(bucket_with_kms, 'test-server-side-encryption',
77+
time.strftime('%y%m%d-%H%M'))
7478

7579
estimator = TensorFlow(entry_point=SCRIPT,
7680
role=ROLE,
@@ -88,28 +92,29 @@ def test_server_side_encryption(sagemaker_session):
8892
path=os.path.join(RESOURCE_PATH, 'data'),
8993
key_prefix='scriptmode/mnist')
9094

91-
with timeout.timeout(minutes=integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
92-
estimator.fit(inputs=inputs, job_name=unique_name_from_base('test-server-side-encryption'))
95+
with tests.integ.timeout.timeout(minutes=tests.integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
96+
estimator.fit(inputs=inputs,
97+
job_name=unique_name_from_base('test-server-side-encryption'))
9398

9499

95100
@pytest.mark.canary_quick
96-
@pytest.mark.skipif(integ.PYTHON_VERSION != 'py3', reason="Script Mode tests are only configured to run with Python 3")
101+
@pytest.mark.skipif(tests.integ.PYTHON_VERSION != 'py3',
102+
reason="Script Mode tests are only configured to run with Python 3")
97103
def test_mnist_distributed(sagemaker_session, instance_type):
98104
estimator = TensorFlow(entry_point=SCRIPT,
99105
role=ROLE,
100106
train_instance_count=2,
101-
# TODO: change train_instance_type to instance_type once the test is passing consistently
102-
train_instance_type='ml.c5.xlarge',
107+
train_instance_type=instance_type,
103108
sagemaker_session=sagemaker_session,
104-
py_version=integ.PYTHON_VERSION,
109+
py_version=tests.integ.PYTHON_VERSION,
105110
script_mode=True,
106111
framework_version=TensorFlow.LATEST_VERSION,
107112
distributions=PARAMETER_SERVER_DISTRIBUTION)
108113
inputs = estimator.sagemaker_session.upload_data(
109114
path=os.path.join(RESOURCE_PATH, 'data'),
110115
key_prefix='scriptmode/distributed_mnist')
111116

112-
with timeout.timeout(minutes=integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
117+
with tests.integ.timeout.timeout(minutes=tests.integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
113118
estimator.fit(inputs=inputs, job_name=unique_name_from_base('test-tf-sm-distributed'))
114119
_assert_s3_files_exist(estimator.model_dir,
115120
['graph.pbtxt', 'model.ckpt-0.index', 'model.ckpt-0.meta'])
@@ -131,22 +136,26 @@ def test_mnist_async(sagemaker_session):
131136
training_job_name = estimator.latest_training_job.name
132137
time.sleep(20)
133138
endpoint_name = training_job_name
134-
_assert_training_job_tags_match(sagemaker_session.sagemaker_client, estimator.latest_training_job.name, TAGS)
135-
with timeout.timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
136-
estimator = TensorFlow.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session)
139+
_assert_training_job_tags_match(sagemaker_session.sagemaker_client,
140+
estimator.latest_training_job.name, TAGS)
141+
with tests.integ.timeout.timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
142+
estimator = TensorFlow.attach(training_job_name=training_job_name,
143+
sagemaker_session=sagemaker_session)
137144
predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge',
138145
endpoint_name=endpoint_name)
139146

140147
result = predictor.predict(np.zeros(784))
141148
print('predict result: {}'.format(result))
142149
_assert_endpoint_tags_match(sagemaker_session.sagemaker_client, predictor.endpoint, TAGS)
143-
_assert_model_tags_match(sagemaker_session.sagemaker_client, estimator.latest_training_job.name, TAGS)
150+
_assert_model_tags_match(sagemaker_session.sagemaker_client,
151+
estimator.latest_training_job.name, TAGS)
144152

145153

146154
def _assert_s3_files_exist(s3_url, files):
147155
parsed_url = urlparse(s3_url)
148156
s3 = boto3.client('s3')
149-
contents = s3.list_objects_v2(Bucket=parsed_url.netloc, Prefix=parsed_url.path.lstrip('/'))["Contents"]
157+
contents = s3.list_objects_v2(Bucket=parsed_url.netloc, Prefix=parsed_url.path.lstrip('/'))[
158+
"Contents"]
150159
for f in files:
151160
found = [x['Key'] for x in contents if x['Key'].endswith(f)]
152161
if not found:
@@ -169,5 +178,6 @@ def _assert_endpoint_tags_match(sagemaker_client, endpoint_name, tags):
169178

170179

171180
def _assert_training_job_tags_match(sagemaker_client, training_job_name, tags):
172-
training_job_description = sagemaker_client.describe_training_job(TrainingJobName=training_job_name)
181+
training_job_description = sagemaker_client.describe_training_job(
182+
TrainingJobName=training_job_name)
173183
_assert_tags_match(sagemaker_client, training_job_description['TrainingJobArn'], tags)

0 commit comments

Comments
 (0)