Skip to content

Deprecate enable_cloudwatch_metrics from Frameworks #292

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Aug 17, 2018
Merged
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ CHANGELOG
========

* bug-fix: Estimators: Fix serialization of single records
* bug-fix: deprecate enable_cloudwatch_metrics from Framework Estimators.

1.9.0
=====
Expand Down
10 changes: 7 additions & 3 deletions src/sagemaker/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import json
import logging
import os
import warnings
from abc import ABCMeta
from abc import abstractmethod
from six import with_metaclass
Expand Down Expand Up @@ -550,8 +551,8 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl
The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker.
For convenience, this accepts other types for keys and values, but ``str()`` will be called
to convert them before training.
enable_cloudwatch_metrics (bool): Whether training and hosting containers will
generate CloudWatch metrics under the AWS/SageMakerContainer namespace (default: False).
enable_cloudwatch_metrics (bool): [DEPRECATED] Now there are cloudwatch metrics emitted by all SageMaker
training jobs. This will be ignored for now and removed in a further release.
container_log_level (int): Log level to use within the container (default: logging.INFO).
Valid values are defined in the Python logging module.
code_location (str): Name of the S3 bucket where custom code is uploaded (default: None).
Expand All @@ -564,7 +565,10 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl
super(Framework, self).__init__(**kwargs)
self.source_dir = source_dir
self.entry_point = entry_point
self.enable_cloudwatch_metrics = enable_cloudwatch_metrics
if enable_cloudwatch_metrics:
warnings.warn('enable_cloudwatch_metrics is now deprecated and will be removed in the future.',
DeprecationWarning)
self.enable_cloudwatch_metrics = False
self.container_log_level = container_log_level
self._hyperparameters = hyperparameters or {}
self.code_location = code_location
Expand Down
3 changes: 0 additions & 3 deletions src/sagemaker/mxnet/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -543,9 +543,6 @@ The MXNetModel constructor takes the following arguments:
directory with any other training source code dependencies including
tne entry point file. Structure within this directory will be
preserved when training on SageMaker.
- ``enable_cloudwatch_metrics (boolean):`` Optional. If true, training
and hosting containers will generate Cloudwatch metrics under the
AWS/SageMakerContainer namespace.
- ``container_log_level (int):`` Log level to use within the container.
Valid values are defined in the Python logging module.
- ``code_location (str):`` Optional. Name of the S3 bucket where your
Expand Down
13 changes: 3 additions & 10 deletions tests/unit/test_chainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,14 @@ def _get_full_gpu_image_uri(version):


def _chainer_estimator(sagemaker_session, framework_version=defaults.CHAINER_VERSION, train_instance_type=None,
enable_cloudwatch_metrics=False, base_job_name=None, use_mpi=None, num_processes=None,
base_job_name=None, use_mpi=None, num_processes=None,
process_slots_per_host=None, additional_mpi_options=None, **kwargs):
return Chainer(entry_point=SCRIPT_PATH,
framework_version=framework_version,
role=ROLE,
sagemaker_session=sagemaker_session,
train_instance_count=INSTANCE_COUNT,
train_instance_type=train_instance_type if train_instance_type else INSTANCE_TYPE,
enable_cloudwatch_metrics=enable_cloudwatch_metrics,
base_job_name=base_job_name,
use_mpi=use_mpi,
num_processes=num_processes,
Expand Down Expand Up @@ -152,7 +151,6 @@ def _create_train_job_with_additional_hyperparameters(version):
},
'hyperparameters': {
'sagemaker_program': json.dumps('dummy_script.py'),
'sagemaker_enable_cloudwatch_metrics': 'false',
'sagemaker_container_log_level': str(logging.INFO),
'sagemaker_job_name': json.dumps(JOB_NAME),
'sagemaker_submit_directory':
Expand Down Expand Up @@ -225,12 +223,10 @@ def test_attach_with_additional_hyperparameters(sagemaker_session, chainer_versi
def test_create_model(sagemaker_session, chainer_version):
container_log_level = '"logging.INFO"'
source_dir = 's3://mybucket/source'
enable_cloudwatch_metrics = 'true'
chainer = Chainer(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
framework_version=chainer_version, container_log_level=container_log_level,
py_version=PYTHON_VERSION, base_job_name='job', source_dir=source_dir,
enable_cloudwatch_metrics=enable_cloudwatch_metrics)
py_version=PYTHON_VERSION, base_job_name='job', source_dir=source_dir)

job_name = 'new_name'
chainer.fit(inputs='s3://mybucket/train', job_name=job_name)
Expand All @@ -244,7 +240,6 @@ def test_create_model(sagemaker_session, chainer_version):
assert model.name == job_name
assert model.container_log_level == container_log_level
assert model.source_dir == source_dir
assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics


def test_create_model_with_optional_params(sagemaker_session):
Expand All @@ -269,13 +264,11 @@ def test_create_model_with_optional_params(sagemaker_session):
def test_create_model_with_custom_image(sagemaker_session):
container_log_level = '"logging.INFO"'
source_dir = 's3://mybucket/source'
enable_cloudwatch_metrics = 'true'
custom_image = 'ubuntu:latest'
chainer = Chainer(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
image_name=custom_image, container_log_level=container_log_level,
py_version=PYTHON_VERSION, base_job_name='job', source_dir=source_dir,
enable_cloudwatch_metrics=enable_cloudwatch_metrics)
py_version=PYTHON_VERSION, base_job_name='job', source_dir=source_dir)

chainer.fit(inputs='s3://mybucket/train', job_name='new_name')
model = chainer.create_model()
Expand Down
8 changes: 2 additions & 6 deletions tests/unit/test_mxnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,10 @@ def _create_train_job(version):
def test_create_model(sagemaker_session, mxnet_version):
container_log_level = '"logging.INFO"'
source_dir = 's3://mybucket/source'
enable_cloudwatch_metrics = 'true'
mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
framework_version=mxnet_version, container_log_level=container_log_level,
base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics)
base_job_name='job', source_dir=source_dir)

job_name = 'new_name'
mx.fit(inputs='s3://mybucket/train', job_name=job_name)
Expand All @@ -119,7 +118,6 @@ def test_create_model(sagemaker_session, mxnet_version):
assert model.name == job_name
assert model.container_log_level == container_log_level
assert model.source_dir == source_dir
assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics


def test_create_model_with_optional_params(sagemaker_session):
Expand All @@ -144,12 +142,11 @@ def test_create_model_with_optional_params(sagemaker_session):
def test_create_model_with_custom_image(sagemaker_session):
container_log_level = '"logging.INFO"'
source_dir = 's3://mybucket/source'
enable_cloudwatch_metrics = 'true'
custom_image = 'mxnet:2.0'
mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
image_name=custom_image, container_log_level=container_log_level,
base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics)
base_job_name='job', source_dir=source_dir)

job_name = 'new_name'
mx.fit(inputs='s3://mybucket/train', job_name='new_name')
Expand All @@ -162,7 +159,6 @@ def test_create_model_with_custom_image(sagemaker_session):
assert model.name == job_name
assert model.container_log_level == container_log_level
assert model.source_dir == source_dir
assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics


@patch('time.strftime', return_value=TIMESTAMP)
Expand Down
11 changes: 3 additions & 8 deletions tests/unit/test_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,14 @@ def _get_full_gpu_image_uri(version, py_version=PYTHON_VERSION):


def _pytorch_estimator(sagemaker_session, framework_version=defaults.PYTORCH_VERSION, train_instance_type=None,
enable_cloudwatch_metrics=False, base_job_name=None, **kwargs):
base_job_name=None, **kwargs):
return PyTorch(entry_point=SCRIPT_PATH,
framework_version=framework_version,
py_version=PYTHON_VERSION,
role=ROLE,
sagemaker_session=sagemaker_session,
train_instance_count=INSTANCE_COUNT,
train_instance_type=train_instance_type if train_instance_type else INSTANCE_TYPE,
enable_cloudwatch_metrics=enable_cloudwatch_metrics,
base_job_name=base_job_name,
**kwargs)

Expand Down Expand Up @@ -119,11 +118,10 @@ def _create_train_job(version):
def test_create_model(sagemaker_session, pytorch_version):
container_log_level = '"logging.INFO"'
source_dir = 's3://mybucket/source'
enable_cloudwatch_metrics = 'true'
pytorch = PyTorch(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
framework_version=pytorch_version, container_log_level=container_log_level,
base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics)
base_job_name='job', source_dir=source_dir)

job_name = 'new_name'
pytorch.fit(inputs='s3://mybucket/train', job_name='new_name')
Expand All @@ -137,7 +135,6 @@ def test_create_model(sagemaker_session, pytorch_version):
assert model.name == job_name
assert model.container_log_level == container_log_level
assert model.source_dir == source_dir
assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics


def test_create_model_with_optional_params(sagemaker_session):
Expand All @@ -162,12 +159,11 @@ def test_create_model_with_optional_params(sagemaker_session):
def test_create_model_with_custom_image(sagemaker_session):
container_log_level = '"logging.INFO"'
source_dir = 's3://mybucket/source'
enable_cloudwatch_metrics = 'true'
image = 'pytorch:9000'
pytorch = PyTorch(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
container_log_level=container_log_level, image_name=image,
base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics)
base_job_name='job', source_dir=source_dir)

job_name = 'new_name'
pytorch.fit(inputs='s3://mybucket/train', job_name='new_name')
Expand All @@ -180,7 +176,6 @@ def test_create_model_with_custom_image(sagemaker_session):
assert model.name == job_name
assert model.container_log_level == container_log_level
assert model.source_dir == source_dir
assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics


@patch('time.strftime', return_value=TIMESTAMP)
Expand Down
10 changes: 3 additions & 7 deletions tests/unit/test_tf_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def _create_train_job(tf_version):


def _build_tf(sagemaker_session, framework_version=defaults.TF_VERSION, train_instance_type=None,
checkpoint_path=None, enable_cloudwatch_metrics=False, base_job_name=None,
checkpoint_path=None, base_job_name=None,
training_steps=None, evaluation_steps=None, **kwargs):
return TensorFlow(entry_point=SCRIPT_PATH,
training_steps=training_steps,
Expand All @@ -118,7 +118,6 @@ def _build_tf(sagemaker_session, framework_version=defaults.TF_VERSION, train_in
train_instance_count=INSTANCE_COUNT,
train_instance_type=train_instance_type if train_instance_type else INSTANCE_TYPE,
checkpoint_path=checkpoint_path,
enable_cloudwatch_metrics=enable_cloudwatch_metrics,
base_job_name=base_job_name,
**kwargs)

Expand Down Expand Up @@ -183,12 +182,11 @@ def test_tf_nonexistent_requirements_path(sagemaker_session):
def test_create_model(sagemaker_session, tf_version):
container_log_level = '"logging.INFO"'
source_dir = 's3://mybucket/source'
enable_cloudwatch_metrics = 'true'
tf = TensorFlow(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
training_steps=1000, evaluation_steps=10, train_instance_count=INSTANCE_COUNT,
train_instance_type=INSTANCE_TYPE, framework_version=tf_version,
container_log_level=container_log_level, base_job_name='job',
source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics)
source_dir=source_dir)

job_name = 'doing something'
tf.fit(inputs='s3://mybucket/train', job_name=job_name)
Expand All @@ -202,7 +200,6 @@ def test_create_model(sagemaker_session, tf_version):
assert model.name == job_name
assert model.container_log_level == container_log_level
assert model.source_dir == source_dir
assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics


def test_create_model_with_optional_params(sagemaker_session):
Expand All @@ -228,13 +225,12 @@ def test_create_model_with_optional_params(sagemaker_session):
def test_create_model_with_custom_image(sagemaker_session):
container_log_level = '"logging.INFO"'
source_dir = 's3://mybucket/source'
enable_cloudwatch_metrics = 'true'
custom_image = 'tensorflow:1.0'
tf = TensorFlow(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
training_steps=1000, evaluation_steps=10, train_instance_count=INSTANCE_COUNT,
train_instance_type=INSTANCE_TYPE, image_name=custom_image,
container_log_level=container_log_level, base_job_name='job',
source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics)
source_dir=source_dir)

job_name = 'doing something'
tf.fit(inputs='s3://mybucket/train', job_name=job_name)
Expand Down