Skip to content

Commit f339949

Browse files
authored
Use the test argement framework_version in all tests (#158)
* Use the test argement framework_version in all tests * Make flake8 happy
1 parent ec07c35 commit f339949

File tree

5 files changed

+37
-21
lines changed

5 files changed

+37
-21
lines changed

test/integration/local/test_horovod.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ def test_distributed_training_horovod_basic(instances,
3232
processes,
3333
sagemaker_local_session,
3434
docker_image,
35-
tmpdir):
35+
tmpdir,
36+
framework_version):
3637
output_path = 'file://%s' % tmpdir
3738
estimator = TensorFlow(
3839
entry_point=os.path.join(RESOURCE_PATH, 'hvdbasic', 'train_hvd_basic.py'),
@@ -42,6 +43,7 @@ def test_distributed_training_horovod_basic(instances,
4243
train_instance_count=instances,
4344
image_name=docker_image,
4445
output_path=output_path,
46+
framework_version=framework_version,
4547
hyperparameters={'sagemaker_mpi_enabled': True,
4648
'sagemaker_network_interface_name': 'eth0',
4749
'sagemaker_mpi_num_of_processes_per_host': processes})

test/integration/local/test_keras.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727

2828
@pytest.mark.skip_gpu
29-
def test_keras_training(sagemaker_local_session, docker_image, tmpdir):
29+
def test_keras_training(sagemaker_local_session, docker_image, tmpdir, framework_version):
3030
entry_point = os.path.join(RESOURCE_PATH, 'keras_inception.py')
3131
output_path = 'file://{}'.format(tmpdir)
3232

@@ -39,14 +39,14 @@ def test_keras_training(sagemaker_local_session, docker_image, tmpdir):
3939
sagemaker_session=sagemaker_local_session,
4040
model_dir='/opt/ml/model',
4141
output_path=output_path,
42-
framework_version='1.11.0',
42+
framework_version=framework_version,
4343
py_version='py3')
4444

4545
estimator.fit()
4646

4747
model = serving.Model(model_data=output_path,
4848
role='SageMakerRole',
49-
framework_version='1.11.0',
49+
framework_version=framework_version,
5050
sagemaker_session=sagemaker_local_session)
5151

5252
predictor = model.deploy(initial_instance_count=1, instance_type='local')

test/integration/local/test_training.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,52 +39,62 @@ def test_py_versions(docker_image, processor, py_full_version):
3939

4040

4141
@pytest.mark.skip_gpu
42-
def test_mnist_cpu(sagemaker_local_session, docker_image, tmpdir):
42+
def test_mnist_cpu(sagemaker_local_session, docker_image, tmpdir, framework_version):
4343
output_path = 'file://{}'.format(tmpdir)
4444
run_tf_training(script=os.path.join(RESOURCE_PATH, 'mnist', 'mnist.py'),
4545
instance_type='local',
4646
instance_count=1,
4747
sagemaker_local_session=sagemaker_local_session,
4848
docker_image=docker_image,
49+
framework_version=framework_version,
4950
output_path=output_path,
5051
training_data_path='file://{}'.format(
5152
os.path.join(RESOURCE_PATH, 'mnist', 'data')))
5253
_assert_files_exist_in_tar(output_path, ['my_model.h5'])
5354

5455

5556
@pytest.mark.skip_cpu
56-
def test_gpu(sagemaker_local_session, docker_image):
57+
def test_gpu(sagemaker_local_session, docker_image, framework_version):
5758
run_tf_training(script=os.path.join(RESOURCE_PATH, 'gpu_device_placement.py'),
5859
instance_type='local_gpu',
5960
instance_count=1,
6061
sagemaker_local_session=sagemaker_local_session,
6162
docker_image=docker_image,
63+
framework_version=framework_version,
6264
training_data_path='file://{}'.format(
6365
os.path.join(RESOURCE_PATH, 'mnist', 'data')))
6466

6567

6668
@pytest.mark.skip_gpu
67-
def test_distributed_training_cpu_no_ps(sagemaker_local_session, docker_image, tmpdir):
69+
def test_distributed_training_cpu_no_ps(sagemaker_local_session,
70+
docker_image,
71+
tmpdir,
72+
framework_version):
6873
output_path = 'file://{}'.format(tmpdir)
6974
run_tf_training(script=os.path.join(RESOURCE_PATH, 'mnist', 'mnist_estimator.py'),
7075
instance_type='local',
7176
instance_count=2,
7277
sagemaker_local_session=sagemaker_local_session,
7378
docker_image=docker_image,
79+
framework_version=framework_version,
7480
output_path=output_path,
7581
training_data_path='file://{}'.format(
7682
os.path.join(RESOURCE_PATH, 'mnist', 'data-distributed')))
7783
_assert_files_exist_in_tar(output_path, TF_CHECKPOINT_FILES)
7884

7985

8086
@pytest.mark.skip_gpu
81-
def test_distributed_training_cpu_ps(sagemaker_local_session, docker_image, tmpdir):
87+
def test_distributed_training_cpu_ps(sagemaker_local_session,
88+
docker_image,
89+
tmpdir,
90+
framework_version):
8291
output_path = 'file://{}'.format(tmpdir)
8392
run_tf_training(script=os.path.join(RESOURCE_PATH, 'mnist', 'mnist_estimator.py'),
8493
instance_type='local',
8594
instance_count=2,
8695
sagemaker_local_session=sagemaker_local_session,
8796
docker_image=docker_image,
97+
framework_version=framework_version,
8898
output_path=output_path,
8999
hyperparameters={'sagemaker_parameter_server_enabled': True},
90100
training_data_path='file://{}'.format(
@@ -96,7 +106,10 @@ def run_tf_training(script,
96106
instance_type,
97107
instance_count,
98108
sagemaker_local_session,
99-
docker_image, training_data_path, output_path=None,
109+
docker_image,
110+
framework_version,
111+
training_data_path,
112+
output_path=None,
100113
hyperparameters=None):
101114

102115
hyperparameters = hyperparameters or {}
@@ -111,7 +124,7 @@ def run_tf_training(script,
111124
output_path=output_path,
112125
hyperparameters=hyperparameters,
113126
base_job_name='test-tf',
114-
framework_version='1.11.0',
127+
framework_version=framework_version,
115128
py_version='py3')
116129

117130
estimator.fit(training_data_path)

test/integration/sagemaker/test_horovod.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ def test_distributed_training_horovod(sagemaker_session,
2424
sagemaker_local_session,
2525
instance_type,
2626
ecr_image,
27-
tmpdir):
27+
tmpdir,
28+
framework_version):
2829

2930
mpi_options = '-verbose -x orte_base_help_aggregate=0'
3031
estimator = TensorFlow(
@@ -33,7 +34,7 @@ def test_distributed_training_horovod(sagemaker_session,
3334
train_instance_type=instance_type,
3435
train_instance_count=2,
3536
image_name=ecr_image,
36-
framework_version='1.12',
37+
framework_version=framework_version,
3738
py_version='py3',
3839
script_mode=True,
3940
hyperparameters={'sagemaker_mpi_enabled': True,

test/integration/sagemaker/test_mnist.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from sagemaker_tensorflow_container.training import SAGEMAKER_PARAMETER_SERVER_ENABLED
2222

2323

24-
def test_mnist(sagemaker_session, ecr_image, instance_type):
24+
def test_mnist(sagemaker_session, ecr_image, instance_type, framework_version):
2525
resource_path = os.path.join(os.path.dirname(__file__), '../..', 'resources')
2626
script = os.path.join(resource_path, 'mnist', 'mnist.py')
2727
estimator = TensorFlow(entry_point=script,
@@ -30,7 +30,7 @@ def test_mnist(sagemaker_session, ecr_image, instance_type):
3030
train_instance_count=1,
3131
sagemaker_session=sagemaker_session,
3232
image_name=ecr_image,
33-
framework_version='1.12.0',
33+
framework_version=framework_version,
3434
py_version='py3',
3535
base_job_name='test-sagemaker-mnist')
3636
inputs = estimator.sagemaker_session.upload_data(
@@ -40,7 +40,7 @@ def test_mnist(sagemaker_session, ecr_image, instance_type):
4040
_assert_s3_file_exists(estimator.model_data)
4141

4242

43-
def test_distributed_mnist_no_ps(sagemaker_session, ecr_image, instance_type):
43+
def test_distributed_mnist_no_ps(sagemaker_session, ecr_image, instance_type, framework_version):
4444
resource_path = os.path.join(os.path.dirname(__file__), '../..', 'resources')
4545
script = os.path.join(resource_path, 'mnist', 'mnist.py')
4646
estimator = TensorFlow(entry_point=script,
@@ -49,7 +49,7 @@ def test_distributed_mnist_no_ps(sagemaker_session, ecr_image, instance_type):
4949
train_instance_type=instance_type,
5050
sagemaker_session=sagemaker_session,
5151
image_name=ecr_image,
52-
framework_version='1.12.0',
52+
framework_version=framework_version,
5353
py_version='py3',
5454
base_job_name='test-tf-sm-distributed-mnist')
5555
inputs = estimator.sagemaker_session.upload_data(
@@ -59,7 +59,7 @@ def test_distributed_mnist_no_ps(sagemaker_session, ecr_image, instance_type):
5959
_assert_s3_file_exists(estimator.model_data)
6060

6161

62-
def test_distributed_mnist_ps(sagemaker_session, ecr_image, instance_type):
62+
def test_distributed_mnist_ps(sagemaker_session, ecr_image, instance_type, framework_version):
6363
resource_path = os.path.join(os.path.dirname(__file__), '..', '..', 'resources')
6464
script = os.path.join(resource_path, 'mnist', 'mnist_estimator.py')
6565
estimator = TensorFlow(entry_point=script,
@@ -69,7 +69,7 @@ def test_distributed_mnist_ps(sagemaker_session, ecr_image, instance_type):
6969
train_instance_type=instance_type,
7070
sagemaker_session=sagemaker_session,
7171
image_name=ecr_image,
72-
framework_version='1.12.0',
72+
framework_version=framework_version,
7373
py_version='py3',
7474
base_job_name='test-tf-sm-distributed-mnist')
7575
inputs = estimator.sagemaker_session.upload_data(
@@ -80,7 +80,7 @@ def test_distributed_mnist_ps(sagemaker_session, ecr_image, instance_type):
8080
_assert_s3_file_exists(estimator.model_data)
8181

8282

83-
def test_s3_plugin(sagemaker_session, ecr_image, instance_type, region):
83+
def test_s3_plugin(sagemaker_session, ecr_image, instance_type, region, framework_version):
8484
resource_path = os.path.join(os.path.dirname(__file__), '..', '..', 'resources')
8585
script = os.path.join(resource_path, 'mnist', 'mnist_estimator.py')
8686
estimator = TensorFlow(entry_point=script,
@@ -91,7 +91,7 @@ def test_s3_plugin(sagemaker_session, ecr_image, instance_type, region):
9191
# Disable throttling for checkpoint and model saving
9292
'throttle-secs': 0,
9393
# Without the patch training jobs would fail around 100th to
94-
# 150th steps
94+
# 150th step
9595
'max-steps': 200,
9696
# Large batch size would result in a larger checkpoint file
9797
'batch-size': 2048,
@@ -103,7 +103,7 @@ def test_s3_plugin(sagemaker_session, ecr_image, instance_type, region):
103103
train_instance_type=instance_type,
104104
sagemaker_session=sagemaker_session,
105105
image_name=ecr_image,
106-
framework_version='1.12.0',
106+
framework_version=framework_version,
107107
py_version='py3',
108108
base_job_name='test-tf-sm-s3-mnist')
109109
estimator.fit('s3://sagemaker-sample-data-{}/tensorflow/mnist'.format(region))

0 commit comments

Comments
 (0)