Skip to content

Commit d53b957

Browse files
authored
feature: add tensorflow training 1.15.2 py37 support (#1458)
* feature: tensorflow training 1.15.2 py37 support
1 parent a66c48d commit d53b957

File tree

5 files changed

+86
-50
lines changed

5 files changed

+86
-50
lines changed

doc/using_tf.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ To train a TensorFlow model by using the SageMaker Python SDK:
4242
Prepare a Script Mode Training Script
4343
======================================
4444

45-
Your TensorFlow training script must be a Python 2.7- or 3.6-compatible source file.
45+
Your TensorFlow training script must be a Python 2.7-, 3.6- or 3.7-compatible source file.
4646

4747
The training script is very similar to a training script you might run outside of SageMaker, but you can access useful properties about the training environment through various environment variables, including the following:
4848

@@ -143,6 +143,11 @@ To use Script Mode, set at least one of these args
143143
- ``py_version='py3'``
144144
- ``script_mode=True``
145145

146+
To use Python 3.7, please specify both of the args:
147+
148+
- ``py_version='py37'``
149+
- ``framework_version='1.15.2'``
150+
146151
When using Script Mode, your training script needs to accept the following args:
147152

148153
- ``model_dir``

src/sagemaker/fw_utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
"{} framework does not support version {}. Please use one of the following: {}."
6363
)
6464

65-
VALID_PY_VERSIONS = ["py2", "py3"]
65+
VALID_PY_VERSIONS = ["py2", "py3", "py37"]
6666
VALID_EIA_FRAMEWORKS = [
6767
"tensorflow",
6868
"tensorflow-serving",
@@ -71,6 +71,7 @@
7171
"pytorch-serving",
7272
]
7373
PY2_RESTRICTED_EIA_FRAMEWORKS = ["pytorch-serving"]
74+
PY37_SUPPORTED_FRAMEWORKS = ["tensorflow-scriptmode"]
7475
VALID_ACCOUNTS_BY_REGION = {
7576
"us-gov-west-1": "246785580436",
7677
"us-iso-east-1": "744548109606",
@@ -103,7 +104,7 @@
103104
}
104105

105106
MERGED_FRAMEWORKS_LOWEST_VERSIONS = {
106-
"tensorflow-scriptmode": {"py3": [1, 13, 1], "py2": [1, 14, 0]},
107+
"tensorflow-scriptmode": {"py3": [1, 13, 1], "py2": [1, 14, 0], "py37": [1, 15, 2]},
107108
"tensorflow-serving": [1, 13, 0],
108109
"tensorflow-serving-eia": [1, 14, 0],
109110
"mxnet": {"py3": [1, 4, 1], "py2": [1, 6, 0]},
@@ -257,6 +258,9 @@ def create_image_uri(
257258
if py_version and py_version not in VALID_PY_VERSIONS:
258259
raise ValueError("invalid py_version argument: {}".format(py_version))
259260

261+
if py_version == "py37" and framework not in PY37_SUPPORTED_FRAMEWORKS:
262+
raise ValueError("{} does not support Python 3.7 at this time.".format(framework))
263+
260264
if _accelerator_type_valid_for_framework(
261265
framework=framework,
262266
py_version=py_version,

src/sagemaker/tensorflow/README.rst

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -31,45 +31,45 @@ SageMaker TensorFlow Docker containers
3131

3232
The latest containers include the following Python packages:
3333

34-
+--------------------------------+---------------+---------------+
35-
| Dependencies | TF 1.15.2 | TF 2.1 |
36-
+--------------------------------+---------------+---------------+
37-
| awscli | 1.18.1 | 1.18.3 |
38-
+--------------------------------+---------------+---------------+
39-
| boto3 | 1.12.1 | 1.12.3 |
40-
+--------------------------------+---------------+---------------+
41-
| botocore | 1.15.1 | 1.15.3 |
42-
+--------------------------------+---------------+---------------+
43-
| h5py | 2.10.0 | 2.10.0 |
44-
+--------------------------------+---------------+---------------+
45-
| horovod | 0.18.2 | 0.18.2 |
46-
+--------------------------------+---------------+---------------+
47-
| keras | 2.3.1 | 2.3.1 |
48-
+--------------------------------+---------------+---------------+
49-
| mpi4py | 3.0.2 | 3.0.3 |
50-
+--------------------------------+---------------+---------------+
51-
| numpy | 1.18.1 | 1.18.1 |
52-
+--------------------------------+---------------+---------------+
53-
| pandas | 0.24.2 | 1.0.1 |
54-
+--------------------------------+---------------+---------------+
55-
| pip | 20.0.2 | 20.0.2 |
56-
+--------------------------------+---------------+---------------+
57-
| Pillow | 6.2.1 | 7.0.0 |
58-
+--------------------------------+---------------+---------------+
59-
| Python | 2.7 or 3.6 | 2.7 or 3.6 |
60-
+--------------------------------+---------------+---------------+
61-
| requests | 2.22.0 | 2.22.0 |
62-
+--------------------------------+---------------+---------------+
63-
| sagemaker-containers | 2.7.0 | 2.8.0 |
64-
+--------------------------------+---------------+---------------+
65-
| sagemaker-tensorflow-container | 1.15.0.1.1.0 | 2.0.0.1.1.0 |
66-
+--------------------------------+---------------+---------------+
67-
| scipy | 1.2.2 | 1.4.1 |
68-
+--------------------------------+---------------+---------------+
69-
| tensorflow | 1.15.2 | 2.1.0 |
70-
+--------------------------------+---------------+---------------+
34+
+--------------------------------+--------------------+---------------+
35+
| Dependencies | TF 1.15.2 | TF 2.1 |
36+
+--------------------------------+--------------------+---------------+
37+
| awscli | 1.18.1 | 1.18.3 |
38+
+--------------------------------+--------------------+---------------+
39+
| boto3 | 1.12.1 | 1.12.3 |
40+
+--------------------------------+--------------------+---------------+
41+
| botocore | 1.15.1 | 1.15.3 |
42+
+--------------------------------+--------------------+---------------+
43+
| h5py | 2.10.0 | 2.10.0 |
44+
+--------------------------------+--------------------+---------------+
45+
| horovod | 0.18.2 | 0.18.2 |
46+
+--------------------------------+--------------------+---------------+
47+
| keras | 2.3.1 | 2.3.1 |
48+
+--------------------------------+--------------------+---------------+
49+
| mpi4py | 3.0.2 | 3.0.3 |
50+
+--------------------------------+--------------------+---------------+
51+
| numpy | 1.18.1 | 1.18.1 |
52+
+--------------------------------+--------------------+---------------+
53+
| pandas | 0.24.2 | 1.0.1 |
54+
+--------------------------------+--------------------+---------------+
55+
| pip | 20.0.2 | 20.0.2 |
56+
+--------------------------------+--------------------+---------------+
57+
| Pillow | 6.2.1 | 7.0.0 |
58+
+--------------------------------+--------------------+---------------+
59+
| Python | 2.7, 3.6 or 3.7 | 2.7 or 3.6 |
60+
+--------------------------------+--------------------+---------------+
61+
| requests | 2.22.0 | 2.22.0 |
62+
+--------------------------------+--------------------+---------------+
63+
| sagemaker-containers | 2.7.0 | 2.8.0 |
64+
+--------------------------------+--------------------+---------------+
65+
| sagemaker-tensorflow-container | 1.15.0.1.1.0 | 2.0.0.1.1.0 |
66+
+--------------------------------+--------------------+---------------+
67+
| scipy | 1.2.2 | 1.4.1 |
68+
+--------------------------------+--------------------+---------------+
69+
| tensorflow | 1.15.2 | 2.1.0 |
70+
+--------------------------------+--------------------+---------------+
7171

72-
Script Mode TensorFlow Docker images support both Python 2.7 and Python 3.6. The Docker images extend Ubuntu 16.04.
72+
Script Mode TensorFlow Docker images support Python 2.7 and Python 3.6, Python 3.7 for TensorFlow version 1.15.2. The Docker images extend Ubuntu 16.04.
7373

7474
You can select version of TensorFlow by passing a ``framework_version`` keyword arg to the TensorFlow Estimator constructor. Currently supported versions are listed in the table above. You can also set ``framework_version`` to only specify major and minor version, e.g ``'1.6'``, which will cause your training script to be run on the latest supported patch version of that minor version, which in this example would be 1.6.0.
7575
Alternatively, you can build your own image by following the instructions in the SageMaker TensorFlow containers

tests/integ/test_tf_script_mode.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,16 @@
3939
TAGS = [{"Key": "some-key", "Value": "some-value"}]
4040

4141

42-
def test_mnist_with_checkpoint_config(sagemaker_session, instance_type, tf_full_version):
42+
@pytest.fixture(scope="module")
43+
def py_version(tf_full_version):
44+
return (
45+
"py37" if tf_full_version == TensorFlow._LATEST_1X_VERSION else tests.integ.PYTHON_VERSION
46+
)
47+
48+
49+
def test_mnist_with_checkpoint_config(
50+
sagemaker_session, instance_type, tf_full_version, py_version
51+
):
4352
checkpoint_s3_uri = "s3://{}/checkpoints/tf-{}".format(
4453
sagemaker_session.default_bucket(), sagemaker_timestamp()
4554
)
@@ -52,7 +61,7 @@ def test_mnist_with_checkpoint_config(sagemaker_session, instance_type, tf_full_
5261
sagemaker_session=sagemaker_session,
5362
script_mode=True,
5463
framework_version=tf_full_version,
55-
py_version=tests.integ.PYTHON_VERSION,
64+
py_version=py_version,
5665
metric_definitions=[{"Name": "train:global_steps", "Regex": r"global_step\/sec:\s(.*)"}],
5766
checkpoint_s3_uri=checkpoint_s3_uri,
5867
checkpoint_local_path=checkpoint_local_path,
@@ -82,7 +91,7 @@ def test_mnist_with_checkpoint_config(sagemaker_session, instance_type, tf_full_
8291
assert actual_training_checkpoint_config == expected_training_checkpoint_config
8392

8493

85-
def test_server_side_encryption(sagemaker_session, tf_full_version):
94+
def test_server_side_encryption(sagemaker_session, tf_full_version, py_version):
8695
with kms_utils.bucket_with_encryption(sagemaker_session, ROLE) as (bucket_with_kms, kms_key):
8796
output_path = os.path.join(
8897
bucket_with_kms, "test-server-side-encryption", time.strftime("%y%m%d-%H%M")
@@ -97,7 +106,7 @@ def test_server_side_encryption(sagemaker_session, tf_full_version):
97106
sagemaker_session=sagemaker_session,
98107
script_mode=True,
99108
framework_version=tf_full_version,
100-
py_version=tests.integ.PYTHON_VERSION,
109+
py_version=py_version,
101110
code_location=output_path,
102111
output_path=output_path,
103112
model_dir="/opt/ml/model",
@@ -124,14 +133,14 @@ def test_server_side_encryption(sagemaker_session, tf_full_version):
124133

125134

126135
@pytest.mark.canary_quick
127-
def test_mnist_distributed(sagemaker_session, instance_type, tf_full_version):
136+
def test_mnist_distributed(sagemaker_session, instance_type, tf_full_version, py_version):
128137
estimator = TensorFlow(
129138
entry_point=SCRIPT,
130139
role=ROLE,
131140
train_instance_count=2,
132141
train_instance_type=instance_type,
133142
sagemaker_session=sagemaker_session,
134-
py_version=tests.integ.PYTHON_VERSION,
143+
py_version=py_version,
135144
script_mode=True,
136145
framework_version=tf_full_version,
137146
distributions=PARAMETER_SERVER_DISTRIBUTION,
@@ -149,7 +158,7 @@ def test_mnist_distributed(sagemaker_session, instance_type, tf_full_version):
149158
)
150159

151160

152-
def test_mnist_async(sagemaker_session, cpu_instance_type):
161+
def test_mnist_async(sagemaker_session, cpu_instance_type, tf_full_version, py_version):
153162
estimator = TensorFlow(
154163
entry_point=SCRIPT,
155164
role=ROLE,
@@ -191,14 +200,14 @@ def test_mnist_async(sagemaker_session, cpu_instance_type):
191200
_assert_model_name_match(sagemaker_session.sagemaker_client, endpoint_name, model_name)
192201

193202

194-
def test_deploy_with_input_handlers(sagemaker_session, instance_type, tf_full_version):
203+
def test_deploy_with_input_handlers(sagemaker_session, instance_type, tf_full_version, py_version):
195204
estimator = TensorFlow(
196205
entry_point="training.py",
197206
source_dir=TFS_RESOURCE_PATH,
198207
role=ROLE,
199208
train_instance_count=1,
200209
train_instance_type=instance_type,
201-
py_version=tests.integ.PYTHON_VERSION,
210+
py_version=py_version,
202211
sagemaker_session=sagemaker_session,
203212
script_mode=True,
204213
framework_version=tf_full_version,

tests/unit/test_fw_utils.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,24 @@ def test_create_image_uri_cn_northwest_1():
297297
}
298298

299299

300+
def test_create_image_uri_py37_invalid_framework():
301+
error_message = "{} does not support Python 3.7 at this time.".format(MOCK_FRAMEWORK)
302+
303+
with pytest.raises(ValueError) as error:
304+
fw_utils.create_image_uri(REGION, MOCK_FRAMEWORK, "ml.m4.xlarge", "1.4.0", "py37")
305+
assert error_message in str(error)
306+
307+
308+
def test_create_image_uri_py37():
309+
image_uri = fw_utils.create_image_uri(
310+
REGION, "tensorflow-scriptmode", "ml.m4.xlarge", "1.15.2", "py37"
311+
)
312+
assert (
313+
image_uri
314+
== "763104351884.dkr.ecr.us-west-2.amazonaws.com/tensorflow-training:1.15.2-cpu-py37"
315+
)
316+
317+
300318
def test_tf_eia_images():
301319
image_uri = fw_utils.create_image_uri(
302320
"us-west-2",

0 commit comments

Comments
 (0)