Skip to content

Commit ef75afb

Browse files
authored
Merge branch 'master' into requests
2 parents ebad9f3 + 9cf38de commit ef75afb

25 files changed

+910
-302
lines changed

CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ CHANGELOG
66
==========
77

88
* bug-fix: add version bound for requests module to avoid version conflicts between docker-compose and docker-py
9+
* bug-fix: Remove unnecessary dependency tensorflow
10+
* doc-fix: Change ``distribution`` to ``distributions``
11+
* bug-fix: Increase docker-compose http timeout and health check timeout to 120.
12+
* feature: Local Mode: Add support for intermediate output to a local directory.
913

1014
1.16.1.post1
1115
============
@@ -31,6 +35,7 @@ CHANGELOG
3135
* feature: Add support for SageMaker Inference Pipelines
3236
* feature: Add support for SparkML serving container
3337

38+
3439
1.15.2
3540
======
3641

src/sagemaker/fw_utils.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,9 +219,11 @@ def framework_name_from_image(image_name):
219219
else:
220220
# extract framework, python version and image tag
221221
# We must support both the legacy and current image name format.
222-
name_pattern = \
223-
re.compile('^sagemaker(?:-rl)?-(tensorflow|mxnet|chainer|pytorch|scikit-learn):(.*)-(.*?)-(py2|py3)$')
224-
legacy_name_pattern = re.compile('^sagemaker-(tensorflow|mxnet)-(py2|py3)-(cpu|gpu):(.*)$')
222+
name_pattern = re.compile(
223+
r'^sagemaker(?:-rl)?-(tensorflow|mxnet|chainer|pytorch|scikit-learn):(.*)-(.*?)-(py2|py3)$')
224+
legacy_name_pattern = re.compile(
225+
r'^sagemaker-(tensorflow|mxnet)-(py2|py3)-(cpu|gpu):(.*)$')
226+
225227
name_match = name_pattern.match(sagemaker_match.group(8))
226228
legacy_match = legacy_name_pattern.match(sagemaker_match.group(8))
227229

src/sagemaker/local/entities.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
logger.setLevel(logging.WARNING)
3030

3131
_UNUSED_ARN = 'local:arn-does-not-matter'
32-
HEALTH_CHECK_TIMEOUT_LIMIT = 30
32+
HEALTH_CHECK_TIMEOUT_LIMIT = 120
3333

3434

3535
class _LocalTrainingJob(object):
@@ -405,7 +405,7 @@ def _wait_for_serving_container(serving_port):
405405

406406
endpoint_url = 'http://localhost:%s/ping' % serving_port
407407
while True:
408-
i += 1
408+
i += 5
409409
if i >= HEALTH_CHECK_TIMEOUT_LIMIT:
410410
raise RuntimeError('Giving up, endpoint didn\'t launch correctly')
411411

@@ -416,7 +416,7 @@ def _wait_for_serving_container(serving_port):
416416
else:
417417
return
418418

419-
time.sleep(1)
419+
time.sleep(5)
420420

421421

422422
def _perform_request(endpoint_url, pool_manager=None):

src/sagemaker/local/image.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@
3939

4040
CONTAINER_PREFIX = 'algo'
4141
DOCKER_COMPOSE_FILENAME = 'docker-compose.yaml'
42+
DOCKER_COMPOSE_HTTP_TIMEOUT_ENV = 'COMPOSE_HTTP_TIMEOUT'
43+
DOCKER_COMPOSE_HTTP_TIMEOUT = '120'
44+
4245

4346
# Environment variables to be set during training
4447
REGION_ENV_NAME = 'AWS_REGION'
@@ -101,7 +104,8 @@ def train(self, input_data_config, output_data_config, hyperparameters, job_name
101104
os.mkdir(shared_dir)
102105

103106
data_dir = self._create_tmp_folder()
104-
volumes = self._prepare_training_volumes(data_dir, input_data_config, hyperparameters)
107+
volumes = self._prepare_training_volumes(data_dir, input_data_config, output_data_config,
108+
hyperparameters)
105109

106110
# Create the configuration files for each container that we will create
107111
# Each container will map the additional local volumes (if any).
@@ -278,7 +282,8 @@ def write_config_files(self, host, hyperparameters, input_data_config):
278282
_write_json_file(os.path.join(config_path, 'resourceconfig.json'), resource_config)
279283
_write_json_file(os.path.join(config_path, 'inputdataconfig.json'), json_input_data_config)
280284

281-
def _prepare_training_volumes(self, data_dir, input_data_config, hyperparameters):
285+
def _prepare_training_volumes(self, data_dir, input_data_config, output_data_config,
286+
hyperparameters):
282287
shared_dir = os.path.join(self.container_root, 'shared')
283288
model_dir = os.path.join(self.container_root, 'model')
284289
volumes = []
@@ -306,6 +311,14 @@ def _prepare_training_volumes(self, data_dir, input_data_config, hyperparameters
306311
# Also mount a directory that all the containers can access.
307312
volumes.append(_Volume(shared_dir, '/opt/ml/shared'))
308313

314+
parsed_uri = urlparse(output_data_config['S3OutputPath'])
315+
if parsed_uri.scheme == 'file' \
316+
and sagemaker.rl.estimator.SAGEMAKER_OUTPUT_LOCATION in hyperparameters:
317+
intermediate_dir = os.path.join(parsed_uri.path, 'output', 'intermediate')
318+
if not os.path.exists(intermediate_dir):
319+
os.makedirs(intermediate_dir)
320+
volumes.append(_Volume(intermediate_dir, '/opt/ml/output/intermediate'))
321+
309322
return volumes
310323

311324
def _prepare_serving_volumes(self, model_location):
@@ -359,6 +372,9 @@ def _generate_compose_file(self, command, additional_volumes=None, additional_en
359372
additional_env_var_list = ['{}={}'.format(k, v) for k, v in additional_env_vars.items()]
360373
environment.extend(additional_env_var_list)
361374

375+
if os.environ.get(DOCKER_COMPOSE_HTTP_TIMEOUT_ENV) is None:
376+
os.environ[DOCKER_COMPOSE_HTTP_TIMEOUT_ENV] = DOCKER_COMPOSE_HTTP_TIMEOUT
377+
362378
if command == 'train':
363379
optml_dirs = {'output', 'output/data', 'input'}
364380

src/sagemaker/mxnet/README.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,15 +209,15 @@ If you were previously relying on the default save method, you can now import on
209209
210210
save(args.model_dir, model)
211211
212-
Lastly, if you were relying on the container launching a parameter server for use with distributed training, you must now set ``distribution`` to the following dictionary when creating an MXNet estimator:
212+
Lastly, if you were relying on the container launching a parameter server for use with distributed training, you must now set ``distributions`` to the following dictionary when creating an MXNet estimator:
213213

214214
.. code:: python
215215
216216
from sagemaker.mxnet import MXNet
217217
218218
estimator = MXNet('path-to-distributed-training-script.py',
219219
...,
220-
distribution={'parameter_server': {'enabled': True}})
220+
distributions={'parameter_server': {'enabled': True}})
221221
222222
223223
Using third-party libraries
@@ -323,7 +323,7 @@ The following are optional arguments. When you create an ``MXNet`` object, you c
323323
framework_version and py_version. Refer to: `SageMaker MXNet Docker Containers
324324
<#sagemaker-mxnet-docker-containers>`_ for details on what the Official images support
325325
and where to find the source code to build your custom image.
326-
- ``distribution`` For versions 1.3 and above only.
326+
- ``distributions`` For versions 1.3 and above only.
327327
Specifies information for how to run distributed training.
328328
To launch a parameter server during training, set this argument to:
329329

src/sagemaker/mxnet/estimator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, py_versio
6767
Examples:
6868
123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0
6969
custom-image:latest.
70-
distribution (dict): A dictionary with information on how to run distributed training
70+
distributions (dict): A dictionary with information on how to run distributed training
7171
(default: None).
7272
**kwargs: Additional kwargs passed to the :class:`~sagemaker.estimator.Framework` constructor.
7373
"""

src/sagemaker/tensorflow/__init__.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,5 @@
1919
# classes for tensorflow serving. Currently tensorflow_serving_api can only be pip-installed for python 2.
2020
sys.path.append(os.path.dirname(__file__))
2121

22-
from distutils.version import LooseVersion # noqa: E402, F401 pylint: disable=no-name-in-module
23-
import tensorflow # noqa: E402, F401
24-
25-
if LooseVersion(tensorflow.__version__) < LooseVersion("1.3.0"): # pylint: disable=no-member
26-
message = 'Tensorflow version must be >= 1.3.0. Current version: {}'.format(
27-
tensorflow.__version__) # pylint: disable=no-member
28-
raise AssertionError(message)
29-
3022
from sagemaker.tensorflow.estimator import TensorFlow # noqa: E402, F401
3123
from sagemaker.tensorflow.model import TensorFlowModel, TensorFlowPredictor # noqa: E402, F401

src/sagemaker/tensorflow/estimator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ def __init__(self, training_steps=None, evaluation_steps=None, checkpoint_path=N
199199
custom-image:latest.
200200
script_mode (bool): If set to True will the estimator will use the Script Mode containers (default: False).
201201
This will be ignored if py_version is set to 'py3'.
202-
distribution (dict): A dictionary with information on how to run distributed training
202+
distributions (dict): A dictionary with information on how to run distributed training
203203
(default: None). Currently we only support distributed training with parameter servers. To enable it
204204
use the following setup:
205205
{

src/sagemaker/tensorflow/serving.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from __future__ import absolute_import
1414

1515
import logging
16-
1716
import sagemaker
1817
from sagemaker.content_types import CONTENT_TYPE_JSON
1918
from sagemaker.fw_utils import create_image_uri
@@ -144,7 +143,6 @@ def _get_image_uri(self, instance_type):
144143
if self.image:
145144
return self.image
146145

147-
# reuse standard image uri function, then strip unwanted python component
148146
region_name = self.sagemaker_session.boto_region_name
149147
return create_image_uri(region_name, Model.FRAMEWORK_NAME, instance_type,
150148
self._framework_version)

src/sagemaker/utils.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import errno
1616
import os
17+
import random
1718
import re
1819
import sys
1920
import tarfile
@@ -64,6 +65,14 @@ def name_from_base(base, max_length=63, short=False):
6465
return '{}-{}'.format(trimmed_base, timestamp)
6566

6667

68+
def unique_name_from_base(base, max_length=63):
69+
unique = '%04x' % random.randrange(16**4) # 4-digit hex
70+
ts = str(int(time.time()))
71+
available_length = max_length - 2 - len(ts) - len(unique)
72+
trimmed = base[:available_length]
73+
return '{}-{}-{}'.format(trimmed, ts, unique)
74+
75+
6776
def airflow_name_from_base(base, max_length=63, short=False):
6877
"""Append airflow execution_date macro (https://airflow.apache.org/code.html?#macros)
6978
to the provided string. The macro will beevaluated in Airflow operator runtime.

tests/conftest.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,11 @@ def mxnet_version(request):
9595
return request.param
9696

9797

98+
@pytest.fixture(scope='module', params=['1.3', '1.3.0'])
99+
def ei_mxnet_version(request):
100+
return request.param
101+
102+
98103
@pytest.fixture(scope='module', params=['0.4', '0.4.0'])
99104
def pytorch_version(request):
100105
return request.param
@@ -112,6 +117,11 @@ def tf_version(request):
112117
return request.param
113118

114119

120+
@pytest.fixture(scope='module', params=['1.11', '1.11.0'])
121+
def ei_tf_version(request):
122+
return request.param
123+
124+
115125
@pytest.fixture(scope='module', params=['0.10.1', '0.10.1', '0.11', '0.11.0'])
116126
def rl_coach_tf_version(request):
117127
return request.param
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
setosa,5.1,3.5,1.4,0.2
2+
setosa,4.9,3,1.4,0.2
3+
setosa,4.7,3.2,1.3,0.2
4+
setosa,4.6,3.1,1.5,0.2
5+
setosa,5,3.6,1.4,0.2
6+
setosa,5.4,3.9,1.7,0.4
7+
setosa,4.6,3.4,1.4,0.3
8+
setosa,5,3.4,1.5,0.2
9+
setosa,4.4,2.9,1.4,0.2
10+
setosa,4.9,3.1,1.5,0.1
11+
setosa,5.4,3.7,1.5,0.2
12+
setosa,4.8,3.4,1.6,0.2
13+
setosa,4.8,3,1.4,0.1
14+
setosa,4.3,3,1.1,0.1
15+
setosa,5.8,4,1.2,0.2
16+
setosa,5.7,4.4,1.5,0.4
17+
setosa,5.4,3.9,1.3,0.4
18+
setosa,5.1,3.5,1.4,0.3
19+
setosa,5.7,3.8,1.7,0.3
20+
setosa,5.1,3.8,1.5,0.3
21+
setosa,5.4,3.4,1.7,0.2
22+
setosa,5.1,3.7,1.5,0.4
23+
setosa,4.6,3.6,1,0.2
24+
setosa,5.1,3.3,1.7,0.5
25+
setosa,4.8,3.4,1.9,0.2
26+
setosa,5,3,1.6,0.2
27+
setosa,5,3.4,1.6,0.4
28+
setosa,5.2,3.5,1.5,0.2
29+
setosa,5.2,3.4,1.4,0.2
30+
setosa,4.7,3.2,1.6,0.2
31+
setosa,4.8,3.1,1.6,0.2
32+
setosa,5.4,3.4,1.5,0.4
33+
setosa,5.2,4.1,1.5,0.1
34+
setosa,5.5,4.2,1.4,0.2
35+
setosa,4.9,3.1,1.5,0.2
36+
setosa,5,3.2,1.2,0.2
37+
setosa,5.5,3.5,1.3,0.2
38+
setosa,4.9,3.6,1.4,0.1
39+
setosa,4.4,3,1.3,0.2
40+
setosa,5.1,3.4,1.5,0.2
41+
setosa,5,3.5,1.3,0.3
42+
setosa,4.5,2.3,1.3,0.3
43+
setosa,4.4,3.2,1.3,0.2
44+
setosa,5,3.5,1.6,0.6
45+
setosa,5.1,3.8,1.9,0.4
46+
setosa,4.8,3,1.4,0.3
47+
setosa,5.1,3.8,1.6,0.2
48+
setosa,4.6,3.2,1.4,0.2
49+
setosa,5.3,3.7,1.5,0.2
50+
setosa,5,3.3,1.4,0.2
51+
versicolor,7,3.2,4.7,1.4
52+
versicolor,6.4,3.2,4.5,1.5
53+
versicolor,6.9,3.1,4.9,1.5
54+
versicolor,5.5,2.3,4,1.3
55+
versicolor,6.5,2.8,4.6,1.5
56+
versicolor,5.7,2.8,4.5,1.3
57+
versicolor,6.3,3.3,4.7,1.6
58+
versicolor,4.9,2.4,3.3,1
59+
versicolor,6.6,2.9,4.6,1.3
60+
versicolor,5.2,2.7,3.9,1.4
61+
versicolor,5,2,3.5,1
62+
versicolor,5.9,3,4.2,1.5
63+
versicolor,6,2.2,4,1
64+
versicolor,6.1,2.9,4.7,1.4
65+
versicolor,5.6,2.9,3.6,1.3
66+
versicolor,6.7,3.1,4.4,1.4
67+
versicolor,5.6,3,4.5,1.5
68+
versicolor,5.8,2.7,4.1,1
69+
versicolor,6.2,2.2,4.5,1.5
70+
versicolor,5.6,2.5,3.9,1.1
71+
versicolor,5.9,3.2,4.8,1.8
72+
versicolor,6.1,2.8,4,1.3
73+
versicolor,6.3,2.5,4.9,1.5
74+
versicolor,6.1,2.8,4.7,1.2
75+
versicolor,6.4,2.9,4.3,1.3
76+
versicolor,6.6,3,4.4,1.4
77+
versicolor,6.8,2.8,4.8,1.4
78+
versicolor,6.7,3,5,1.7
79+
versicolor,6,2.9,4.5,1.5
80+
versicolor,5.7,2.6,3.5,1
81+
versicolor,5.5,2.4,3.8,1.1
82+
versicolor,5.5,2.4,3.7,1
83+
versicolor,5.8,2.7,3.9,1.2
84+
versicolor,6,2.7,5.1,1.6
85+
versicolor,5.4,3,4.5,1.5
86+
versicolor,6,3.4,4.5,1.6
87+
versicolor,6.7,3.1,4.7,1.5
88+
versicolor,6.3,2.3,4.4,1.3
89+
versicolor,5.6,3,4.1,1.3
90+
versicolor,5.5,2.5,4,1.3
91+
versicolor,5.5,2.6,4.4,1.2
92+
versicolor,6.1,3,4.6,1.4
93+
versicolor,5.8,2.6,4,1.2
94+
versicolor,5,2.3,3.3,1
95+
versicolor,5.6,2.7,4.2,1.3
96+
versicolor,5.7,3,4.2,1.2
97+
versicolor,5.7,2.9,4.2,1.3
98+
versicolor,6.2,2.9,4.3,1.3
99+
versicolor,5.1,2.5,3,1.1
100+
versicolor,5.7,2.8,4.1,1.3
101+
virginica,6.3,3.3,6,2.5
102+
virginica,5.8,2.7,5.1,1.9
103+
virginica,7.1,3,5.9,2.1
104+
virginica,6.3,2.9,5.6,1.8
105+
virginica,6.5,3,5.8,2.2
106+
virginica,7.6,3,6.6,2.1
107+
virginica,4.9,2.5,4.5,1.7
108+
virginica,7.3,2.9,6.3,1.8
109+
virginica,6.7,2.5,5.8,1.8
110+
virginica,7.2,3.6,6.1,2.5
111+
virginica,6.5,3.2,5.1,2
112+
virginica,6.4,2.7,5.3,1.9
113+
virginica,6.8,3,5.5,2.1
114+
virginica,5.7,2.5,5,2
115+
virginica,5.8,2.8,5.1,2.4
116+
virginica,6.4,3.2,5.3,2.3
117+
virginica,6.5,3,5.5,1.8
118+
virginica,7.7,3.8,6.7,2.2
119+
virginica,7.7,2.6,6.9,2.3
120+
virginica,6,2.2,5,1.5
121+
virginica,6.9,3.2,5.7,2.3
122+
virginica,5.6,2.8,4.9,2
123+
virginica,7.7,2.8,6.7,2
124+
virginica,6.3,2.7,4.9,1.8
125+
virginica,6.7,3.3,5.7,2.1
126+
virginica,7.2,3.2,6,1.8
127+
virginica,6.2,2.8,4.8,1.8
128+
virginica,6.1,3,4.9,1.8
129+
virginica,6.4,2.8,5.6,2.1
130+
virginica,7.2,3,5.8,1.6
131+
virginica,7.4,2.8,6.1,1.9
132+
virginica,7.9,3.8,6.4,2
133+
virginica,6.4,2.8,5.6,2.2
134+
virginica,6.3,2.8,5.1,1.5
135+
virginica,6.1,2.6,5.6,1.4
136+
virginica,7.7,3,6.1,2.3
137+
virginica,6.3,3.4,5.6,2.4
138+
virginica,6.4,3.1,5.5,1.8
139+
virginica,6,3,4.8,1.8
140+
virginica,6.9,3.1,5.4,2.1
141+
virginica,6.7,3.1,5.6,2.4
142+
virginica,6.9,3.1,5.1,2.3
143+
virginica,5.8,2.7,5.1,1.9
144+
virginica,6.8,3.2,5.9,2.3
145+
virginica,6.7,3.3,5.7,2.5
146+
virginica,6.7,3,5.2,2.3
147+
virginica,6.3,2.5,5,1.9
148+
virginica,6.5,3,5.2,2
149+
virginica,6.2,3.4,5.4,2.3
150+
virginica,5.9,3,5.1,1.8

0 commit comments

Comments
 (0)