Skip to content

Commit 938bf30

Browse files
Dan ChoiChoiByungWook
authored andcommitted
change: bump boto3 and smdebug_rulesconfig versions for reinvent and enable data parallel integ tests
1 parent 993e716 commit 938bf30

File tree

4 files changed

+20
-14
lines changed

4 files changed

+20
-14
lines changed

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,12 @@ def read_version():
3434
# Declare minimal set for installation
3535
required_packages = [
3636
"attrs",
37-
"boto3>=1.16.27",
37+
"boto3>=1.16.32",
3838
"google-pasta",
3939
"numpy>=1.9.0",
4040
"protobuf>=3.1",
4141
"protobuf3-to-dict>=0.1.5",
42-
"smdebug_rulesconfig",
42+
"smdebug_rulesconfig>=1.0.0",
4343
"importlib-metadata>=1.4.0",
4444
"packaging>=20.0",
4545
]

src/sagemaker/image_uris.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,13 @@ def retrieve(
9191
if _should_auto_select_container_version(instance_type, distribution):
9292
container_versions = {
9393
"tensorflow-2.3-gpu-py37": "cu110-ubuntu18.04-v3",
94+
"tensorflow-2.3.1-gpu-py37": "cu110-ubuntu18.04",
9495
"tensorflow-1.15-gpu-py37": "cu110-ubuntu18.04-v8",
96+
"tensorflow-1.15.4-gpu-py37": "cu110-ubuntu18.04",
9597
"mxnet-1.8-gpu-py37": "cu110-ubuntu16.04-v1",
98+
"mxnet-1.8.0-gpu-py37": "cu110-ubuntu16.04",
9699
"pytorch-1.6-gpu-py36": "cu110-ubuntu18.04-v3",
100+
"pytorch-1.6.0-gpu-py36": "cu110-ubuntu18.04",
97101
}
98102
key = "-".join([framework, tag])
99103
if key in container_versions:

tests/integ/test_smdataparallel_pt.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
import os
1616

17-
import pytest
1817
import sagemaker.utils
1918
import tests.integ as integ
2019

@@ -27,19 +26,21 @@
2726
)
2827

2928

30-
@pytest.mark.skip(
31-
reason="SMDistributedDataParallel-enabled DLC isn't publicly released hence not accessible for this test"
32-
)
33-
def test_smdataparallel_pt_mnist(sagemaker_session):
29+
def test_smdataparallel_pt_mnist(
30+
sagemaker_session,
31+
pytorch_training_latest_version,
32+
pytorch_training_latest_py_version,
33+
):
3434
job_name = sagemaker.utils.unique_name_from_base("pt-sm-distributed-dataparallel")
3535
estimator = PyTorch(
3636
entry_point="mnist_pt.py",
3737
role="SageMakerRole",
38-
image_uri="redacted",
3938
source_dir=smdataparallel_dir,
4039
instance_count=2,
4140
instance_type="ml.p3.16xlarge",
4241
sagemaker_session=sagemaker_session,
42+
framework_version=pytorch_training_latest_version,
43+
py_version=pytorch_training_latest_py_version,
4344
distribution={"smdistributed": {"dataparallel": {"enabled": True}}},
4445
)
4546

tests/integ/test_smdataparallel_tf.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
import os
1616

17-
import pytest
1817
import sagemaker.utils
1918
import tests.integ as integ
2019

@@ -26,19 +25,21 @@
2625
)
2726

2827

29-
@pytest.mark.skip(
30-
reason="SMDistributedDataParallel-enabled DLC isn't publicly released hence not accessible for this test"
31-
)
32-
def test_smdataparallel_tf_mnist(sagemaker_session):
28+
def test_smdataparallel_tf_mnist(
29+
sagemaker_session,
30+
tensorflow_training_latest_version,
31+
tensorflow_training_latest_py_version,
32+
):
3333
job_name = sagemaker.utils.unique_name_from_base("tf-sm-distributed-dataparallel")
3434
estimator = TensorFlow(
3535
entry_point="mnist_tf.py",
3636
role="SageMakerRole",
37-
image_uri="redacted",
3837
source_dir=smdataparallel_dir,
3938
instance_count=2,
4039
instance_type="ml.p3.16xlarge",
4140
sagemaker_session=sagemaker_session,
41+
framework_version=tensorflow_training_latest_version,
42+
py_version=tensorflow_training_latest_py_version,
4243
distribution={"smdistributed": {"dataparallel": {"enabled": True}}},
4344
)
4445

0 commit comments

Comments
 (0)