Skip to content

Commit cec723b

Browse files
laurenyuPiali Das
authored andcommitted
Add warning if framework_version is not set (aws#431)
In the future, a breaking change will be released that causes framework_version to be required when creating a Framework estimator. This change adds a warning when an estimator is created without framework_version defined.
1 parent 9021f29 commit cec723b

File tree

11 files changed

+117
-62
lines changed

11 files changed

+117
-62
lines changed

CHANGELOG.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ CHANGELOG
99
* feature: Add timestamp to secondary status in training job output
1010
* bug-fix: Local Mode: Set correct default values for additional_volumes and additional_env_vars
1111
* enhancement: Local Mode: support nvidia-docker2 natively
12+
* warning: Frameworks: add warning for upcoming breaking change that makes framework_version required
1213

1314
1.11.2
1415
======

README.rst

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -153,9 +153,10 @@ Here is an end to end example of how to use a SageMaker Estimator:
153153
154154
# Configure an MXNet Estimator (no training happens yet)
155155
mxnet_estimator = MXNet('train.py',
156-
role="SageMakerRole",
156+
role='SageMakerRole',
157157
train_instance_type='ml.p2.xlarge',
158-
train_instance_count = 1)
158+
train_instance_count=1,
159+
framework_version='1.2.1')
159160
160161
# Starts a SageMaker training job and waits until completion.
161162
mxnet_estimator.fit('s3://my_bucket/my_training_data/')
@@ -183,9 +184,10 @@ We can take the example in `Using Estimators <#using-estimators>`__ , and use e
183184
184185
# Configure an MXNet Estimator (no training happens yet)
185186
mxnet_estimator = MXNet('train.py',
186-
role="SageMakerRole",
187+
role='SageMakerRole',
187188
train_instance_type='local',
188-
train_instance_count=1)
189+
train_instance_count=1,
190+
framework_version='1.2.1')
189191
190192
# In Local Mode, fit will pull the MXNet container Docker image and run it locally
191193
mxnet_estimator.fit('s3://my_bucket/my_training_data/')
@@ -239,7 +241,8 @@ Here is an end-to-end example:
239241
240242
mxnet_estimator = MXNet('train.py',
241243
train_instance_type='local',
242-
train_instance_count=1)
244+
train_instance_count=1,
245+
framework_version='1.2.1')
243246
244247
mxnet_estimator.fit('file:///tmp/my_training_data')
245248
transformer = mxnet_estimator.transformer(1, 'local', assemble_with='Line', max_payload=1)
@@ -504,10 +507,11 @@ To train a model using your own VPC, set the optional parameters ``subnets`` and
504507
505508
# Configure an MXNet Estimator with subnets and security groups from your VPC
506509
mxnet_vpc_estimator = MXNet('train.py',
507-
train_instance_type='ml.p2.xlarge',
508-
train_instance_count = 1,
509-
subnets=['subnet-1', 'subnet-2'],
510-
security_group_ids=['sg-1'])
510+
train_instance_type='ml.p2.xlarge',
511+
train_instance_count=1,
512+
framework_version='1.2.1',
513+
subnets=['subnet-1', 'subnet-2'],
514+
security_group_ids=['sg-1'])
511515
512516
# SageMaker Training Job will set VpcConfig and container instances will run in your VPC
513517
mxnet_vpc_estimator.fit('s3://my_bucket/my_training_data/')

src/sagemaker/chainer/README.rst

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,12 @@ Suppose that you already have an Chainer training script called
2828
.. code:: python
2929
3030
from sagemaker.chainer import Chainer
31-
chainer_estimator = Chainer(entry_point="chainer-train.py",
32-
role="SageMakerRole",
33-
train_instance_type="ml.p3.2xlarge",
34-
train_instance_count=1)
35-
chainer_estimator.fit("s3://bucket/path/to/training/data")
31+
chainer_estimator = Chainer(entry_point='chainer-train.py',
32+
role='SageMakerRole',
33+
train_instance_type='ml.p3.2xlarge',
34+
train_instance_count=1,
35+
framework_version='4.1.0')
36+
chainer_estimator.fit('s3://bucket/path/to/training/data')
3637
3738
Where the S3 URL is a path to your training data, within Amazon S3. The constructor keyword arguments define how
3839
SageMaker runs your training script and are discussed in detail in a later section.
@@ -107,12 +108,13 @@ directories ('train' and 'test').
107108

108109
.. code:: python
109110
110-
chainer_estimator = Chainer("chainer-train.py",
111-
train_instance_type="ml.p3.2xlarge",
112-
train_instance_count=1,
113-
hyperparameters = {'epochs': 20, 'batch-size': 64, 'learning-rate':0.1})
111+
chainer_estimator = Chainer('chainer-train.py',
112+
train_instance_type='ml.p3.2xlarge',
113+
train_instance_count=1,
114+
framework_version='4.1.0',
115+
hyperparameters = {'epochs': 20, 'batch-size': 64, 'learning-rate': 0.1})
114116
chainer_estimator.fit({'train': 's3://my-data-bucket/path/to/my/training/data',
115-
'test': 's3://my-data-bucket/path/to/my/test/data'})
117+
'test': 's3://my-data-bucket/path/to/my/test/data'})
116118
117119
118120
Chainer Estimators
@@ -280,13 +282,14 @@ operation.
280282
.. code:: python
281283
282284
# Train my estimator
283-
chainer_estimator = Chainer(entry_point="train_and_deploy.py",
284-
train_instance_type="ml.p3.2xlarge",
285-
train_instance_count=1)
286-
chainer_estimator.fit("s3://my_bucket/my_training_data/")
285+
chainer_estimator = Chainer(entry_point='train_and_deploy.py',
286+
train_instance_type='ml.p3.2xlarge',
287+
train_instance_count=1,
288+
framework_version='4.1.0')
289+
chainer_estimator.fit('s3://my_bucket/my_training_data/')
287290
288291
# Deploy my estimator to a SageMaker Endpoint and get a Predictor
289-
predictor = chainer_estimator.deploy(instance_type="ml.m4.xlarge",
292+
predictor = chainer_estimator.deploy(instance_type='ml.m4.xlarge',
290293
initial_instance_count=1)
291294
292295
# `data` is a NumPy array or a Python list.

src/sagemaker/chainer/estimator.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,17 @@
1212
# language governing permissions and limitations under the License.
1313
from __future__ import absolute_import
1414

15+
import logging
16+
1517
from sagemaker.estimator import Framework
16-
from sagemaker.fw_utils import framework_name_from_image, framework_version_from_tag
18+
from sagemaker.fw_utils import framework_name_from_image, framework_version_from_tag, empty_framework_version_warning
1719
from sagemaker.chainer.defaults import CHAINER_VERSION
1820
from sagemaker.chainer.model import ChainerModel
1921
from sagemaker.vpc_utils import VPC_CONFIG_DEFAULT
2022

23+
logging.basicConfig()
24+
logger = logging.getLogger('sagemaker')
25+
2126

2227
class Chainer(Framework):
2328
"""Handle end-to-end training and deployment of custom Chainer code."""
@@ -32,7 +37,7 @@ class Chainer(Framework):
3237

3338
def __init__(self, entry_point, use_mpi=None, num_processes=None, process_slots_per_host=None,
3439
additional_mpi_options=None, source_dir=None, hyperparameters=None, py_version='py3',
35-
framework_version=CHAINER_VERSION, image_name=None, **kwargs):
40+
framework_version=None, image_name=None, **kwargs):
3641
"""
3742
This ``Estimator`` executes an Chainer script in a managed Chainer execution environment, within a SageMaker
3843
Training Job. The managed Chainer environment is an Amazon-built Docker container that executes functions
@@ -79,12 +84,15 @@ def __init__(self, entry_point, use_mpi=None, num_processes=None, process_slots_
7984
super(Chainer, self).__init__(entry_point, source_dir, hyperparameters,
8085
image_name=image_name, **kwargs)
8186
self.py_version = py_version
82-
self.framework_version = framework_version
8387
self.use_mpi = use_mpi
8488
self.num_processes = num_processes
8589
self.process_slots_per_host = process_slots_per_host
8690
self.additional_mpi_options = additional_mpi_options
8791

92+
if framework_version is None:
93+
logger.warning(empty_framework_version_warning(CHAINER_VERSION))
94+
self.framework_version = framework_version or CHAINER_VERSION
95+
8896
def hyperparameters(self):
8997
"""Return hyperparameters used by your custom Chainer code during training."""
9098
hyperparameters = super(Chainer, self).hyperparameters()

src/sagemaker/fw_utils.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,18 @@
2323

2424
"""This module contains utility functions shared across ``Framework`` components."""
2525

26-
2726
UploadedCode = namedtuple('UserCode', ['s3_prefix', 'script_name'])
2827
"""sagemaker.fw_utils.UserCode: An object containing the S3 prefix and script name.
2928
3029
This is for the source code used for the entry point with an ``Estimator``. It can be
3130
instantiated with positional or keyword arguments.
3231
"""
3332

33+
EMPTY_FRAMEWORK_VERSION_WARNING = 'In an upcoming version of the SageMaker Python SDK, ' \
34+
'framework_version will be required to create an estimator. ' \
35+
'Please add framework_version={} to your constructor to avoid ' \
36+
'an error in the future.'
37+
3438

3539
def create_image_uri(region, framework, instance_type, framework_version, py_version, account='520713654638',
3640
optimized_families=[]):
@@ -223,3 +227,7 @@ def model_code_key_prefix(code_location_key_prefix, model_name, image):
223227
str: the key prefix to be used in uploading code
224228
"""
225229
return '/'.join(filter(None, [code_location_key_prefix, model_name or name_from_image(image)]))
230+
231+
232+
def empty_framework_version_warning(default_version):
233+
return EMPTY_FRAMEWORK_VERSION_WARNING.format(default_version)

src/sagemaker/mxnet/README.rst

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,12 @@ Suppose that you already have an MXNet training script called
1717
.. code:: python
1818
1919
from sagemaker.mxnet import MXNet
20-
mxnet_estimator = MXNet("mxnet-train.py",
21-
role="SageMakerRole",
22-
train_instance_type="ml.p3.2xlarge",
23-
train_instance_count=1)
24-
mxnet_estimator.fit("s3://bucket/path/to/training/data")
20+
mxnet_estimator = MXNet('mxnet-train.py',
21+
role='SageMakerRole',
22+
train_instance_type='ml.p3.2xlarge',
23+
train_instance_count=1,
24+
framework_version='1.2.1')
25+
mxnet_estimator.fit('s3://bucket/path/to/training/data')
2526
2627
Where the s3 url is a path to your training data, within Amazon S3. The constructor keyword arguments define how SageMaker runs your training script and are discussed, in detail, in a later section.
2728

@@ -97,10 +98,11 @@ You run MXNet training scripts on SageMaker by creating ``MXNet`` Estimators. Sa
9798

9899
.. code:: python
99100
100-
mxnet_estimator = MXNet("train.py",
101-
train_instance_type="ml.p2.xlarge",
102-
train_instance_count=1)
103-
mxnet_estimator.fit("s3://my_bucket/my_training_data/")
101+
mxnet_estimator = MXNet('train.py',
102+
train_instance_type='ml.p2.xlarge',
103+
train_instance_count=1,
104+
framework_version='1.2.1')
105+
mxnet_estimator.fit('s3://my_bucket/my_training_data/')
104106
105107
MXNet Estimators
106108
^^^^^^^^^^^^^^^^
@@ -302,10 +304,11 @@ After calling ``fit``, you can call ``deploy`` on an ``MXNet`` Estimator to crea
302304
.. code:: python
303305
304306
# Train my estimator
305-
mxnet_estimator = MXNet("train.py",
306-
train_instance_type="ml.p2.xlarge",
307-
train_instance_count=1)
308-
mxnet_estimator.fit("s3://my_bucket/my_training_data/")
307+
mxnet_estimator = MXNet('train.py',
308+
train_instance_type='ml.p2.xlarge',
309+
train_instance_count=1,
310+
framework_version='1.2.1')
311+
mxnet_estimator.fit('s3://my_bucket/my_training_data/')
309312
310313
# Deploy my estimator to a SageMaker Endpoint and get a Predictor
311314
predictor = mxnet_estimator.deploy(instance_type='ml.m4.xlarge',

src/sagemaker/mxnet/estimator.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,25 @@
1212
# language governing permissions and limitations under the License.
1313
from __future__ import absolute_import
1414

15+
import logging
16+
1517
from sagemaker.estimator import Framework
16-
from sagemaker.fw_utils import framework_name_from_image, framework_version_from_tag
18+
from sagemaker.fw_utils import framework_name_from_image, framework_version_from_tag, empty_framework_version_warning
1719
from sagemaker.mxnet.defaults import MXNET_VERSION
1820
from sagemaker.mxnet.model import MXNetModel
1921
from sagemaker.vpc_utils import VPC_CONFIG_DEFAULT
2022

23+
logging.basicConfig()
24+
logger = logging.getLogger('sagemaker')
25+
2126

2227
class MXNet(Framework):
2328
"""Handle end-to-end training and deployment of custom MXNet code."""
2429

2530
__framework_name__ = "mxnet"
2631

2732
def __init__(self, entry_point, source_dir=None, hyperparameters=None, py_version='py2',
28-
framework_version=MXNET_VERSION, image_name=None, **kwargs):
33+
framework_version=None, image_name=None, **kwargs):
2934
"""
3035
This ``Estimator`` executes an MXNet script in a managed MXNet execution environment, within a SageMaker
3136
Training Job. The managed MXNet environment is an Amazon-built Docker container that executes functions
@@ -64,7 +69,10 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, py_versio
6469
super(MXNet, self).__init__(entry_point, source_dir, hyperparameters,
6570
image_name=image_name, **kwargs)
6671
self.py_version = py_version
67-
self.framework_version = framework_version
72+
73+
if framework_version is None:
74+
logger.warning(empty_framework_version_warning(MXNET_VERSION))
75+
self.framework_version = framework_version or MXNET_VERSION
6876

6977
def create_model(self, model_server_workers=None, role=None, vpc_config_override=VPC_CONFIG_DEFAULT):
7078
"""Create a SageMaker ``MXNetModel`` object that can be deployed to an ``Endpoint``.

src/sagemaker/pytorch/README.rst

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ You can then setup a ``PyTorch`` Estimator with keyword arguments to point to th
4848
pytorch_estimator = PyTorch(entry_point='pytorch-train.py',
4949
role='SageMakerRole',
5050
train_instance_type='ml.p3.2xlarge',
51-
train_instance_count=1)
51+
train_instance_count=1,
52+
framework_version='0.4.0')
5253
5354
After that, you simply tell the estimator to start a training job and provide an S3 URL
5455
that is the path to your training data within Amazon S3:
@@ -136,9 +137,10 @@ directories ('train' and 'test').
136137
pytorch_estimator = PyTorch('pytorch-train.py',
137138
train_instance_type='ml.p3.2xlarge',
138139
train_instance_count=1,
139-
hyperparameters = {'epochs': 20, 'batch-size': 64, 'learning-rate':0.1})
140+
framework_version='0.4.0',
141+
hyperparameters = {'epochs': 20, 'batch-size': 64, 'learning-rate': 0.1})
140142
pytorch_estimator.fit({'train': 's3://my-data-bucket/path/to/my/training/data',
141-
'test': 's3://my-data-bucket/path/to/my/test/data'})
143+
'test': 's3://my-data-bucket/path/to/my/test/data'})
142144
143145
144146
PyTorch Estimators
@@ -318,7 +320,8 @@ operation.
318320
# Train my estimator
319321
pytorch_estimator = PyTorch(entry_point='train_and_deploy.py',
320322
train_instance_type='ml.p3.2xlarge',
321-
train_instance_count=1)
323+
train_instance_count=1,
324+
framework_version='0.4.0')
322325
pytorch_estimator.fit('s3://my_bucket/my_training_data/')
323326
324327
# Deploy my estimator to a SageMaker Endpoint and get a Predictor

src/sagemaker/pytorch/estimator.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,26 @@
1111
# ANY KIND, either express or implied. See the License for the specific
1212
# language governing permissions and limitations under the License.
1313
from __future__ import absolute_import
14+
15+
import logging
16+
1417
from sagemaker.estimator import Framework
15-
from sagemaker.fw_utils import framework_name_from_image, framework_version_from_tag
18+
from sagemaker.fw_utils import framework_name_from_image, framework_version_from_tag, empty_framework_version_warning
1619
from sagemaker.pytorch.defaults import PYTORCH_VERSION, PYTHON_VERSION
1720
from sagemaker.pytorch.model import PyTorchModel
1821
from sagemaker.vpc_utils import VPC_CONFIG_DEFAULT
1922

23+
logging.basicConfig()
24+
logger = logging.getLogger('sagemaker')
25+
2026

2127
class PyTorch(Framework):
2228
"""Handle end-to-end training and deployment of custom PyTorch code."""
2329

2430
__framework_name__ = "pytorch"
2531

2632
def __init__(self, entry_point, source_dir=None, hyperparameters=None, py_version=PYTHON_VERSION,
27-
framework_version=PYTORCH_VERSION, image_name=None, **kwargs):
33+
framework_version=None, image_name=None, **kwargs):
2834
"""
2935
This ``Estimator`` executes an PyTorch script in a managed PyTorch execution environment, within a SageMaker
3036
Training Job. The managed PyTorch environment is an Amazon-built Docker container that executes functions
@@ -62,7 +68,10 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, py_versio
6268
"""
6369
super(PyTorch, self).__init__(entry_point, source_dir, hyperparameters, image_name=image_name, **kwargs)
6470
self.py_version = py_version
65-
self.framework_version = framework_version
71+
72+
if framework_version is None:
73+
logger.warning(empty_framework_version_warning(PYTORCH_VERSION))
74+
self.framework_version = framework_version or PYTORCH_VERSION
6675

6776
def create_model(self, model_server_workers=None, role=None, vpc_config_override=VPC_CONFIG_DEFAULT):
6877
"""Create a SageMaker ``PyTorchModel`` object that can be deployed to an ``Endpoint``.

0 commit comments

Comments
 (0)