Skip to content

Commit c286f01

Browse files
icywang86ruiyangaws
authored andcommitted
Tune test_s3_plugin test (#178)
1 parent c276dac commit c286f01

File tree

1 file changed

+3
-6
lines changed

1 file changed

+3
-6
lines changed

test/integration/sagemaker/test_mnist.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import os
1616

1717
import boto3
18-
import pytest
1918
from sagemaker.tensorflow import TensorFlow
2019
from six.moves.urllib.parse import urlparse
2120

@@ -81,23 +80,21 @@ def test_distributed_mnist_ps(sagemaker_session, ecr_image, instance_type, frame
8180
_assert_s3_file_exists(sagemaker_session.boto_region_name, estimator.model_data)
8281

8382

84-
# TODO: Enable this test when new binary fixing the s3 plugin released
85-
@pytest.mark.skip(reason='Skip the test until new binary released')
8683
def test_s3_plugin(sagemaker_session, ecr_image, instance_type, region, framework_version):
8784
resource_path = os.path.join(os.path.dirname(__file__), '..', '..', 'resources')
8885
script = os.path.join(resource_path, 'mnist', 'mnist_estimator.py')
8986
estimator = TensorFlow(entry_point=script,
9087
role='SageMakerRole',
9188
hyperparameters={
92-
# Saving a checkpoint after every step to hammer the S3 plugin
93-
'save-checkpoint-steps': 1,
89+
# Saving a checkpoint after every 5 steps to hammer the S3 plugin
90+
'save-checkpoint-steps': 10,
9491
# Disable throttling for checkpoint and model saving
9592
'throttle-secs': 0,
9693
# Without the patch training jobs would fail around 100th to
9794
# 150th step
9895
'max-steps': 200,
9996
# Large batch size would result in a larger checkpoint file
100-
'batch-size': 2048,
97+
'batch-size': 1024,
10198
# This makes the training job exporting model during training.
10299
# Stale model garbage collection will also be performed.
103100
'export-model-during-training': True

0 commit comments

Comments
 (0)