Skip to content

Commit 80fa52e

Browse files
authored
Deprecate enable_cloudwatch_metrics from Frameworks (#292)
There is a warning message for now, but it will be removed later.
1 parent 4613f24 commit 80fa52e

File tree

7 files changed

+19
-37
lines changed

7 files changed

+19
-37
lines changed

CHANGELOG.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ CHANGELOG
66
========
77

88
* bug-fix: Estimators: Fix serialization of single records
9+
* bug-fix: deprecate enable_cloudwatch_metrics from Framework Estimators.
910

1011
1.9.0
1112
=====

src/sagemaker/estimator.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import json
1616
import logging
1717
import os
18+
import warnings
1819
from abc import ABCMeta
1920
from abc import abstractmethod
2021
from six import with_metaclass
@@ -550,8 +551,8 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl
550551
The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker.
551552
For convenience, this accepts other types for keys and values, but ``str()`` will be called
552553
to convert them before training.
553-
enable_cloudwatch_metrics (bool): Whether training and hosting containers will
554-
generate CloudWatch metrics under the AWS/SageMakerContainer namespace (default: False).
554+
enable_cloudwatch_metrics (bool): [DEPRECATED] Now there are cloudwatch metrics emitted by all SageMaker
555+
training jobs. This will be ignored for now and removed in a further release.
555556
container_log_level (int): Log level to use within the container (default: logging.INFO).
556557
Valid values are defined in the Python logging module.
557558
code_location (str): Name of the S3 bucket where custom code is uploaded (default: None).
@@ -564,7 +565,10 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl
564565
super(Framework, self).__init__(**kwargs)
565566
self.source_dir = source_dir
566567
self.entry_point = entry_point
567-
self.enable_cloudwatch_metrics = enable_cloudwatch_metrics
568+
if enable_cloudwatch_metrics:
569+
warnings.warn('enable_cloudwatch_metrics is now deprecated and will be removed in the future.',
570+
DeprecationWarning)
571+
self.enable_cloudwatch_metrics = False
568572
self.container_log_level = container_log_level
569573
self._hyperparameters = hyperparameters or {}
570574
self.code_location = code_location

src/sagemaker/mxnet/README.rst

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -543,9 +543,6 @@ The MXNetModel constructor takes the following arguments:
543543
directory with any other training source code dependencies including
544544
tne entry point file. Structure within this directory will be
545545
preserved when training on SageMaker.
546-
- ``enable_cloudwatch_metrics (boolean):`` Optional. If true, training
547-
and hosting containers will generate Cloudwatch metrics under the
548-
AWS/SageMakerContainer namespace.
549546
- ``container_log_level (int):`` Log level to use within the container.
550547
Valid values are defined in the Python logging module.
551548
- ``code_location (str):`` Optional. Name of the S3 bucket where your

tests/unit/test_chainer.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -66,15 +66,14 @@ def _get_full_gpu_image_uri(version):
6666

6767

6868
def _chainer_estimator(sagemaker_session, framework_version=defaults.CHAINER_VERSION, train_instance_type=None,
69-
enable_cloudwatch_metrics=False, base_job_name=None, use_mpi=None, num_processes=None,
69+
base_job_name=None, use_mpi=None, num_processes=None,
7070
process_slots_per_host=None, additional_mpi_options=None, **kwargs):
7171
return Chainer(entry_point=SCRIPT_PATH,
7272
framework_version=framework_version,
7373
role=ROLE,
7474
sagemaker_session=sagemaker_session,
7575
train_instance_count=INSTANCE_COUNT,
7676
train_instance_type=train_instance_type if train_instance_type else INSTANCE_TYPE,
77-
enable_cloudwatch_metrics=enable_cloudwatch_metrics,
7877
base_job_name=base_job_name,
7978
use_mpi=use_mpi,
8079
num_processes=num_processes,
@@ -152,7 +151,6 @@ def _create_train_job_with_additional_hyperparameters(version):
152151
},
153152
'hyperparameters': {
154153
'sagemaker_program': json.dumps('dummy_script.py'),
155-
'sagemaker_enable_cloudwatch_metrics': 'false',
156154
'sagemaker_container_log_level': str(logging.INFO),
157155
'sagemaker_job_name': json.dumps(JOB_NAME),
158156
'sagemaker_submit_directory':
@@ -225,12 +223,10 @@ def test_attach_with_additional_hyperparameters(sagemaker_session, chainer_versi
225223
def test_create_model(sagemaker_session, chainer_version):
226224
container_log_level = '"logging.INFO"'
227225
source_dir = 's3://mybucket/source'
228-
enable_cloudwatch_metrics = 'true'
229226
chainer = Chainer(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
230227
train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
231228
framework_version=chainer_version, container_log_level=container_log_level,
232-
py_version=PYTHON_VERSION, base_job_name='job', source_dir=source_dir,
233-
enable_cloudwatch_metrics=enable_cloudwatch_metrics)
229+
py_version=PYTHON_VERSION, base_job_name='job', source_dir=source_dir)
234230

235231
job_name = 'new_name'
236232
chainer.fit(inputs='s3://mybucket/train', job_name=job_name)
@@ -244,7 +240,6 @@ def test_create_model(sagemaker_session, chainer_version):
244240
assert model.name == job_name
245241
assert model.container_log_level == container_log_level
246242
assert model.source_dir == source_dir
247-
assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics
248243

249244

250245
def test_create_model_with_optional_params(sagemaker_session):
@@ -269,13 +264,11 @@ def test_create_model_with_optional_params(sagemaker_session):
269264
def test_create_model_with_custom_image(sagemaker_session):
270265
container_log_level = '"logging.INFO"'
271266
source_dir = 's3://mybucket/source'
272-
enable_cloudwatch_metrics = 'true'
273267
custom_image = 'ubuntu:latest'
274268
chainer = Chainer(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
275269
train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
276270
image_name=custom_image, container_log_level=container_log_level,
277-
py_version=PYTHON_VERSION, base_job_name='job', source_dir=source_dir,
278-
enable_cloudwatch_metrics=enable_cloudwatch_metrics)
271+
py_version=PYTHON_VERSION, base_job_name='job', source_dir=source_dir)
279272

280273
chainer.fit(inputs='s3://mybucket/train', job_name='new_name')
281274
model = chainer.create_model()

tests/unit/test_mxnet.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,10 @@ def _create_train_job(version):
101101
def test_create_model(sagemaker_session, mxnet_version):
102102
container_log_level = '"logging.INFO"'
103103
source_dir = 's3://mybucket/source'
104-
enable_cloudwatch_metrics = 'true'
105104
mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
106105
train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
107106
framework_version=mxnet_version, container_log_level=container_log_level,
108-
base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics)
107+
base_job_name='job', source_dir=source_dir)
109108

110109
job_name = 'new_name'
111110
mx.fit(inputs='s3://mybucket/train', job_name=job_name)
@@ -119,7 +118,6 @@ def test_create_model(sagemaker_session, mxnet_version):
119118
assert model.name == job_name
120119
assert model.container_log_level == container_log_level
121120
assert model.source_dir == source_dir
122-
assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics
123121

124122

125123
def test_create_model_with_optional_params(sagemaker_session):
@@ -144,12 +142,11 @@ def test_create_model_with_optional_params(sagemaker_session):
144142
def test_create_model_with_custom_image(sagemaker_session):
145143
container_log_level = '"logging.INFO"'
146144
source_dir = 's3://mybucket/source'
147-
enable_cloudwatch_metrics = 'true'
148145
custom_image = 'mxnet:2.0'
149146
mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
150147
train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
151148
image_name=custom_image, container_log_level=container_log_level,
152-
base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics)
149+
base_job_name='job', source_dir=source_dir)
153150

154151
job_name = 'new_name'
155152
mx.fit(inputs='s3://mybucket/train', job_name='new_name')
@@ -162,7 +159,6 @@ def test_create_model_with_custom_image(sagemaker_session):
162159
assert model.name == job_name
163160
assert model.container_log_level == container_log_level
164161
assert model.source_dir == source_dir
165-
assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics
166162

167163

168164
@patch('time.strftime', return_value=TIMESTAMP)

tests/unit/test_pytorch.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,15 +64,14 @@ def _get_full_gpu_image_uri(version, py_version=PYTHON_VERSION):
6464

6565

6666
def _pytorch_estimator(sagemaker_session, framework_version=defaults.PYTORCH_VERSION, train_instance_type=None,
67-
enable_cloudwatch_metrics=False, base_job_name=None, **kwargs):
67+
base_job_name=None, **kwargs):
6868
return PyTorch(entry_point=SCRIPT_PATH,
6969
framework_version=framework_version,
7070
py_version=PYTHON_VERSION,
7171
role=ROLE,
7272
sagemaker_session=sagemaker_session,
7373
train_instance_count=INSTANCE_COUNT,
7474
train_instance_type=train_instance_type if train_instance_type else INSTANCE_TYPE,
75-
enable_cloudwatch_metrics=enable_cloudwatch_metrics,
7675
base_job_name=base_job_name,
7776
**kwargs)
7877

@@ -119,11 +118,10 @@ def _create_train_job(version):
119118
def test_create_model(sagemaker_session, pytorch_version):
120119
container_log_level = '"logging.INFO"'
121120
source_dir = 's3://mybucket/source'
122-
enable_cloudwatch_metrics = 'true'
123121
pytorch = PyTorch(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
124122
train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
125123
framework_version=pytorch_version, container_log_level=container_log_level,
126-
base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics)
124+
base_job_name='job', source_dir=source_dir)
127125

128126
job_name = 'new_name'
129127
pytorch.fit(inputs='s3://mybucket/train', job_name='new_name')
@@ -137,7 +135,6 @@ def test_create_model(sagemaker_session, pytorch_version):
137135
assert model.name == job_name
138136
assert model.container_log_level == container_log_level
139137
assert model.source_dir == source_dir
140-
assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics
141138

142139

143140
def test_create_model_with_optional_params(sagemaker_session):
@@ -162,12 +159,11 @@ def test_create_model_with_optional_params(sagemaker_session):
162159
def test_create_model_with_custom_image(sagemaker_session):
163160
container_log_level = '"logging.INFO"'
164161
source_dir = 's3://mybucket/source'
165-
enable_cloudwatch_metrics = 'true'
166162
image = 'pytorch:9000'
167163
pytorch = PyTorch(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
168164
train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
169165
container_log_level=container_log_level, image_name=image,
170-
base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics)
166+
base_job_name='job', source_dir=source_dir)
171167

172168
job_name = 'new_name'
173169
pytorch.fit(inputs='s3://mybucket/train', job_name='new_name')
@@ -180,7 +176,6 @@ def test_create_model_with_custom_image(sagemaker_session):
180176
assert model.name == job_name
181177
assert model.container_log_level == container_log_level
182178
assert model.source_dir == source_dir
183-
assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics
184179

185180

186181
@patch('time.strftime', return_value=TIMESTAMP)

tests/unit/test_tf_estimator.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def _create_train_job(tf_version):
107107

108108

109109
def _build_tf(sagemaker_session, framework_version=defaults.TF_VERSION, train_instance_type=None,
110-
checkpoint_path=None, enable_cloudwatch_metrics=False, base_job_name=None,
110+
checkpoint_path=None, base_job_name=None,
111111
training_steps=None, evaluation_steps=None, **kwargs):
112112
return TensorFlow(entry_point=SCRIPT_PATH,
113113
training_steps=training_steps,
@@ -118,7 +118,6 @@ def _build_tf(sagemaker_session, framework_version=defaults.TF_VERSION, train_in
118118
train_instance_count=INSTANCE_COUNT,
119119
train_instance_type=train_instance_type if train_instance_type else INSTANCE_TYPE,
120120
checkpoint_path=checkpoint_path,
121-
enable_cloudwatch_metrics=enable_cloudwatch_metrics,
122121
base_job_name=base_job_name,
123122
**kwargs)
124123

@@ -183,12 +182,11 @@ def test_tf_nonexistent_requirements_path(sagemaker_session):
183182
def test_create_model(sagemaker_session, tf_version):
184183
container_log_level = '"logging.INFO"'
185184
source_dir = 's3://mybucket/source'
186-
enable_cloudwatch_metrics = 'true'
187185
tf = TensorFlow(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
188186
training_steps=1000, evaluation_steps=10, train_instance_count=INSTANCE_COUNT,
189187
train_instance_type=INSTANCE_TYPE, framework_version=tf_version,
190188
container_log_level=container_log_level, base_job_name='job',
191-
source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics)
189+
source_dir=source_dir)
192190

193191
job_name = 'doing something'
194192
tf.fit(inputs='s3://mybucket/train', job_name=job_name)
@@ -202,7 +200,6 @@ def test_create_model(sagemaker_session, tf_version):
202200
assert model.name == job_name
203201
assert model.container_log_level == container_log_level
204202
assert model.source_dir == source_dir
205-
assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics
206203

207204

208205
def test_create_model_with_optional_params(sagemaker_session):
@@ -228,13 +225,12 @@ def test_create_model_with_optional_params(sagemaker_session):
228225
def test_create_model_with_custom_image(sagemaker_session):
229226
container_log_level = '"logging.INFO"'
230227
source_dir = 's3://mybucket/source'
231-
enable_cloudwatch_metrics = 'true'
232228
custom_image = 'tensorflow:1.0'
233229
tf = TensorFlow(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
234230
training_steps=1000, evaluation_steps=10, train_instance_count=INSTANCE_COUNT,
235231
train_instance_type=INSTANCE_TYPE, image_name=custom_image,
236232
container_log_level=container_log_level, base_job_name='job',
237-
source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics)
233+
source_dir=source_dir)
238234

239235
job_name = 'doing something'
240236
tf.fit(inputs='s3://mybucket/train', job_name=job_name)

0 commit comments

Comments
 (0)