File tree Expand file tree Collapse file tree 4 files changed +20
-14
lines changed Expand file tree Collapse file tree 4 files changed +20
-14
lines changed Original file line number Diff line number Diff line change @@ -34,12 +34,12 @@ def read_version():
34
34
# Declare minimal set for installation
35
35
required_packages = [
36
36
"attrs" ,
37
- "boto3>=1.16.27 " ,
37
+ "boto3>=1.16.32 " ,
38
38
"google-pasta" ,
39
39
"numpy>=1.9.0" ,
40
40
"protobuf>=3.1" ,
41
41
"protobuf3-to-dict>=0.1.5" ,
42
- "smdebug_rulesconfig" ,
42
+ "smdebug_rulesconfig>=1.0.0 " ,
43
43
"importlib-metadata>=1.4.0" ,
44
44
"packaging>=20.0" ,
45
45
]
Original file line number Diff line number Diff line change @@ -91,9 +91,13 @@ def retrieve(
91
91
if _should_auto_select_container_version (instance_type , distribution ):
92
92
container_versions = {
93
93
"tensorflow-2.3-gpu-py37" : "cu110-ubuntu18.04-v3" ,
94
+ "tensorflow-2.3.1-gpu-py37" : "cu110-ubuntu18.04" ,
94
95
"tensorflow-1.15-gpu-py37" : "cu110-ubuntu18.04-v8" ,
96
+ "tensorflow-1.15.4-gpu-py37" : "cu110-ubuntu18.04" ,
95
97
"mxnet-1.8-gpu-py37" : "cu110-ubuntu16.04-v1" ,
98
+ "mxnet-1.8.0-gpu-py37" : "cu110-ubuntu16.04" ,
96
99
"pytorch-1.6-gpu-py36" : "cu110-ubuntu18.04-v3" ,
100
+ "pytorch-1.6.0-gpu-py36" : "cu110-ubuntu18.04" ,
97
101
}
98
102
key = "-" .join ([framework , tag ])
99
103
if key in container_versions :
Original file line number Diff line number Diff line change 14
14
15
15
import os
16
16
17
- import pytest
18
17
import sagemaker .utils
19
18
import tests .integ as integ
20
19
27
26
)
28
27
29
28
30
- @pytest .mark .skip (
31
- reason = "SMDistributedDataParallel-enabled DLC isn't publicly released hence not accessible for this test"
32
- )
33
- def test_smdataparallel_pt_mnist (sagemaker_session ):
29
+ def test_smdataparallel_pt_mnist (
30
+ sagemaker_session ,
31
+ pytorch_training_latest_version ,
32
+ pytorch_training_latest_py_version ,
33
+ ):
34
34
job_name = sagemaker .utils .unique_name_from_base ("pt-sm-distributed-dataparallel" )
35
35
estimator = PyTorch (
36
36
entry_point = "mnist_pt.py" ,
37
37
role = "SageMakerRole" ,
38
- image_uri = "redacted" ,
39
38
source_dir = smdataparallel_dir ,
40
39
instance_count = 2 ,
41
40
instance_type = "ml.p3.16xlarge" ,
42
41
sagemaker_session = sagemaker_session ,
42
+ framework_version = pytorch_training_latest_version ,
43
+ py_version = pytorch_training_latest_py_version ,
43
44
distribution = {"smdistributed" : {"dataparallel" : {"enabled" : True }}},
44
45
)
45
46
Original file line number Diff line number Diff line change 14
14
15
15
import os
16
16
17
- import pytest
18
17
import sagemaker .utils
19
18
import tests .integ as integ
20
19
26
25
)
27
26
28
27
29
- @pytest .mark .skip (
30
- reason = "SMDistributedDataParallel-enabled DLC isn't publicly released hence not accessible for this test"
31
- )
32
- def test_smdataparallel_tf_mnist (sagemaker_session ):
28
+ def test_smdataparallel_tf_mnist (
29
+ sagemaker_session ,
30
+ tensorflow_training_latest_version ,
31
+ tensorflow_training_latest_py_version ,
32
+ ):
33
33
job_name = sagemaker .utils .unique_name_from_base ("tf-sm-distributed-dataparallel" )
34
34
estimator = TensorFlow (
35
35
entry_point = "mnist_tf.py" ,
36
36
role = "SageMakerRole" ,
37
- image_uri = "redacted" ,
38
37
source_dir = smdataparallel_dir ,
39
38
instance_count = 2 ,
40
39
instance_type = "ml.p3.16xlarge" ,
41
40
sagemaker_session = sagemaker_session ,
41
+ framework_version = tensorflow_training_latest_version ,
42
+ py_version = tensorflow_training_latest_py_version ,
42
43
distribution = {"smdistributed" : {"dataparallel" : {"enabled" : True }}},
43
44
)
44
45
You can’t perform that action at this time.
0 commit comments