Skip to content

Commit 9ae28d7

Browse files
committed
2 parents 92858df + 9127d60 commit 9ae28d7

26 files changed

+289
-132
lines changed

CHANGELOG.md

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,43 @@
11
# Changelog
22

3+
## v1.55.3 (2020-04-08)
4+
5+
### Bug Fixes and Other Changes
6+
7+
* remove .strip() from batch transform
8+
* allow model with network isolation when creating a Transformer from an Estimator
9+
* add enable_network_isolation to EstimatorBase
10+
11+
## v1.55.2 (2020-04-07)
12+
13+
### Bug Fixes and Other Changes
14+
15+
* use .format instead of os.path.join for Processing S3 paths.
16+
17+
### Testing and Release Infrastructure
18+
19+
* use m5.xlarge instances for "ap-northeast-1" region integ tests.
20+
21+
## v1.55.1 (2020-04-06)
22+
23+
### Bug Fixes and Other Changes
24+
25+
* correct local mode behavior for CN regions
26+
27+
## v1.55.0.post0 (2020-04-06)
28+
29+
### Documentation Changes
30+
31+
* fix documentation to provide working example.
32+
* add documentation for XGBoost
33+
* Correct comment in SKLearn Estimator about default Python version
34+
* document inferentia supported version
35+
* Merge Amazon Sagemaker Operators for Kubernetes and Kubernetes Jobs pages
36+
37+
### Testing and Release Infrastructure
38+
39+
* turn on warnings as errors for docs builds
40+
341
## v1.55.0 (2020-03-31)
442

543
### Features

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.55.1.dev0
1+
1.55.4.dev0

doc/using_tf.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -267,19 +267,19 @@ Training with ``MPI`` is configured by specifying following fields in ``distribu
267267
command executed by SageMaker to launch distributed horovod training.
268268

269269

270-
In the below example we create an estimator to launch Horovod distributed training with 2 processes on one host:
270+
In the below example we create an estimator to launch Horovod distributed training with 4 processes on one host:
271271

272272
.. code:: python
273273
274274
from sagemaker.tensorflow import TensorFlow
275275
276276
tf_estimator = TensorFlow(entry_point='tf-train.py', role='SageMakerRole',
277-
train_instance_count=1, train_instance_type='ml.p2.xlarge',
278-
framework_version='1.12', py_version='py3',
277+
train_instance_count=1, train_instance_type='ml.p3.8xlarge',
278+
framework_version='2.1.0', py_version='py3',
279279
distributions={
280280
'mpi': {
281281
'enabled': True,
282-
'processes_per_host': 2,
282+
'processes_per_host': 4,
283283
'custom_mpi_options': '--NCCL_DEBUG INFO'
284284
}
285285
})

src/sagemaker/amazon/amazon_estimator.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -82,14 +82,17 @@ def __init__(
8282
:class:`~sagemaker.estimator.EstimatorBase`.
8383
"""
8484
super(AmazonAlgorithmEstimatorBase, self).__init__(
85-
role, train_instance_count, train_instance_type, **kwargs
85+
role,
86+
train_instance_count,
87+
train_instance_type,
88+
enable_network_isolation=enable_network_isolation,
89+
**kwargs
8690
)
8791

8892
data_location = data_location or "s3://{}/sagemaker-record-sets/".format(
8993
self.sagemaker_session.default_bucket()
9094
)
9195
self._data_location = data_location
92-
self._enable_network_isolation = enable_network_isolation
9396

9497
def train_image(self):
9598
"""Placeholder docstring"""
@@ -101,14 +104,6 @@ def hyperparameters(self):
101104
"""Placeholder docstring"""
102105
return hp.serialize_all(self)
103106

104-
def enable_network_isolation(self):
105-
"""If this Estimator can use network isolation when running.
106-
107-
Returns:
108-
bool: Whether this Estimator can use network isolation or not.
109-
"""
110-
return self._enable_network_isolation
111-
112107
@property
113108
def data_location(self):
114109
"""Placeholder docstring"""

src/sagemaker/chainer/estimator.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,13 +206,15 @@ def create_model(
206206
if "image" not in kwargs:
207207
kwargs["image"] = self.image_name
208208

209+
if "name" not in kwargs:
210+
kwargs["name"] = self._current_job_name
211+
209212
return ChainerModel(
210213
self.model_data,
211214
role or self.role,
212215
entry_point or self.entry_point,
213216
source_dir=(source_dir or self._model_source_dir()),
214217
enable_cloudwatch_metrics=self.enable_cloudwatch_metrics,
215-
name=self._current_job_name,
216218
container_log_level=self.container_log_level,
217219
code_location=self.code_location,
218220
py_version=self.py_version,

src/sagemaker/estimator.py

Lines changed: 61 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def __init__(
9898
debugger_hook_config=None,
9999
tensorboard_output_config=None,
100100
enable_sagemaker_metrics=None,
101+
enable_network_isolation=False,
101102
):
102103
"""Initialize an ``EstimatorBase`` instance.
103104
@@ -199,6 +200,11 @@ def __init__(
199200
Series. For more information see:
200201
https://docs.aws.amazon.com/sagemaker/latest/dg/API_AlgorithmSpecification.html#SageMaker-Type-AlgorithmSpecification-EnableSageMakerMetricsTimeSeries
201202
(default: ``None``).
203+
enable_network_isolation (bool): Specifies whether container will
204+
run in network isolation mode (default: ``False``). Network
205+
isolation mode restricts the container access to outside networks
206+
(such as the Internet). The container does not make any inbound or
207+
outbound network calls. Also known as Internet-free mode.
202208
"""
203209
self.role = role
204210
self.train_instance_count = train_instance_count
@@ -260,6 +266,7 @@ def __init__(
260266
self.collection_configs = None
261267

262268
self.enable_sagemaker_metrics = enable_sagemaker_metrics
269+
self._enable_network_isolation = enable_network_isolation
263270

264271
@abstractmethod
265272
def train_image(self):
@@ -290,7 +297,7 @@ def enable_network_isolation(self):
290297
Returns:
291298
bool: Whether this Estimator needs network isolation or not.
292299
"""
293-
return False
300+
return self._enable_network_isolation
294301

295302
def prepare_workflow_for_training(self, job_name=None):
296303
"""Calls _prepare_for_training. Used when setting up a workflow.
@@ -818,6 +825,8 @@ def transformer(
818825
role=None,
819826
volume_kms_key=None,
820827
vpc_config_override=vpc_utils.VPC_CONFIG_DEFAULT,
828+
enable_network_isolation=None,
829+
model_name=None,
821830
):
822831
"""Return a ``Transformer`` that uses a SageMaker Model based on the
823832
training job. It reuses the SageMaker Session and base job name used by
@@ -856,8 +865,20 @@ def transformer(
856865
vpc_config_override (dict[str, list[str]]): Optional override for the
857866
VpcConfig set on the model.
858867
Default: use subnets and security groups from this Estimator.
868+
859869
* 'Subnets' (list[str]): List of subnet ids.
860870
* 'SecurityGroupIds' (list[str]): List of security group ids.
871+
872+
enable_network_isolation (bool): Specifies whether container will
873+
run in network isolation mode. Network isolation mode restricts
874+
the container access to outside networks (such as the internet).
875+
The container does not make any inbound or outbound network
876+
calls. If True, a channel named "code" will be created for any
877+
user entry script for inference. Also known as Internet-free mode.
878+
If not specified, this setting is taken from the estimator's
879+
current configuration.
880+
model_name (str): Name to use for creating an Amazon SageMaker
881+
model. If not specified, the name of the training job is used.
861882
"""
862883
tags = tags or self.tags
863884

@@ -866,11 +887,16 @@ def transformer(
866887
"No finished training job found associated with this estimator. Please make sure "
867888
"this estimator is only used for building workflow config"
868889
)
869-
model_name = self._current_job_name
890+
model_name = model_name or self._current_job_name
870891
else:
871-
model_name = self.latest_training_job.name
892+
model_name = model_name or self.latest_training_job.name
893+
if enable_network_isolation is None:
894+
enable_network_isolation = self.enable_network_isolation()
895+
872896
model = self.create_model(
873-
vpc_config_override=vpc_config_override, model_kms_key=self.output_kms_key
897+
vpc_config_override=vpc_config_override,
898+
model_kms_key=self.output_kms_key,
899+
enable_network_isolation=enable_network_isolation,
874900
)
875901

876902
# not all create_model() implementations have the same kwargs
@@ -1219,21 +1245,17 @@ def __init__(
12191245
checkpoints will be provided under `/opt/ml/checkpoints/`.
12201246
(default: ``None``).
12211247
enable_network_isolation (bool): Specifies whether container will
1222-
run in network isolation mode. Network isolation mode restricts
1223-
the container access to outside networks (such as the Internet).
1224-
The container does not make any inbound or outbound network
1225-
calls. If ``True``, a channel named "code" will be created for any
1226-
user entry script for training. The user entry script, files in
1227-
source_dir (if specified), and dependencies will be uploaded in
1228-
a tar to S3. Also known as internet-free mode (default: ``False``).
1248+
run in network isolation mode (default: ``False``). Network
1249+
isolation mode restricts the container access to outside networks
1250+
(such as the Internet). The container does not make any inbound or
1251+
outbound network calls. Also known as Internet-free mode.
12291252
enable_sagemaker_metrics (bool): enable SageMaker Metrics Time
12301253
Series. For more information see:
12311254
https://docs.aws.amazon.com/sagemaker/latest/dg/API_AlgorithmSpecification.html#SageMaker-Type-AlgorithmSpecification-EnableSageMakerMetricsTimeSeries
12321255
(default: ``None``).
12331256
"""
12341257
self.image_name = image_name
12351258
self.hyperparam_dict = hyperparameters.copy() if hyperparameters else {}
1236-
self._enable_network_isolation = enable_network_isolation
12371259
super(Estimator, self).__init__(
12381260
role,
12391261
train_instance_count,
@@ -1261,16 +1283,9 @@ def __init__(
12611283
debugger_hook_config=debugger_hook_config,
12621284
tensorboard_output_config=tensorboard_output_config,
12631285
enable_sagemaker_metrics=enable_sagemaker_metrics,
1286+
enable_network_isolation=enable_network_isolation,
12641287
)
12651288

1266-
def enable_network_isolation(self):
1267-
"""If this Estimator can use network isolation when running.
1268-
1269-
Returns:
1270-
bool: Whether this Estimator can use network isolation or not.
1271-
"""
1272-
return self._enable_network_isolation
1273-
12741289
def train_image(self):
12751290
"""Returns the docker image to use for training.
12761291
@@ -1358,14 +1373,16 @@ def predict_wrapper(endpoint, session):
13581373

13591374
role = role or self.role
13601375

1376+
if "enable_network_isolation" not in kwargs:
1377+
kwargs["enable_network_isolation"] = self.enable_network_isolation()
1378+
13611379
return Model(
13621380
self.model_data,
13631381
image or self.train_image(),
13641382
role,
13651383
vpc_config=self.get_vpc_config(vpc_config_override),
13661384
sagemaker_session=self.sagemaker_session,
13671385
predictor_cls=predictor_cls,
1368-
enable_network_isolation=self.enable_network_isolation(),
13691386
**kwargs
13701387
)
13711388

@@ -1498,15 +1515,15 @@ def __init__(
14981515
>>> |------ train.py
14991516
>>> |------ common
15001517
>>> |------ virtual-env
1518+
15011519
enable_network_isolation (bool): Specifies whether container will
15021520
run in network isolation mode. Network isolation mode restricts
15031521
the container access to outside networks (such as the internet).
15041522
The container does not make any inbound or outbound network
15051523
calls. If True, a channel named "code" will be created for any
15061524
user entry script for training. The user entry script, files in
15071525
source_dir (if specified), and dependencies will be uploaded in
1508-
a tar to S3. Also known as internet-free mode (default: `False`
1509-
).
1526+
a tar to S3. Also known as internet-free mode (default: `False`).
15101527
git_config (dict[str, str]): Git configurations used for cloning
15111528
files, including ``repo``, ``branch``, ``commit``,
15121529
``2FA_enabled``, ``username``, ``password`` and ``token``. The
@@ -1579,7 +1596,7 @@ def __init__(
15791596
You can find additional parameters for initializing this class at
15801597
:class:`~sagemaker.estimator.EstimatorBase`.
15811598
"""
1582-
super(Framework, self).__init__(**kwargs)
1599+
super(Framework, self).__init__(enable_network_isolation=enable_network_isolation, **kwargs)
15831600
if entry_point.startswith("s3://"):
15841601
raise ValueError(
15851602
"Invalid entry point script: {}. Must be a path to a local file.".format(
@@ -1599,7 +1616,6 @@ def __init__(
15991616
self.container_log_level = container_log_level
16001617
self.code_location = code_location
16011618
self.image_name = image_name
1602-
self._enable_network_isolation = enable_network_isolation
16031619

16041620
self.uploaded_code = None
16051621

@@ -1608,14 +1624,6 @@ def __init__(
16081624
self.checkpoint_local_path = checkpoint_local_path
16091625
self.enable_sagemaker_metrics = enable_sagemaker_metrics
16101626

1611-
def enable_network_isolation(self):
1612-
"""Return True if this Estimator can use network isolation to run.
1613-
1614-
Returns:
1615-
bool: Whether this Estimator can use network isolation or not.
1616-
"""
1617-
return self._enable_network_isolation
1618-
16191627
def _prepare_for_training(self, job_name=None):
16201628
"""Set hyperparameters needed for training. This method will also
16211629
validate ``source_dir``.
@@ -1891,6 +1899,8 @@ def transformer(
18911899
volume_kms_key=None,
18921900
entry_point=None,
18931901
vpc_config_override=vpc_utils.VPC_CONFIG_DEFAULT,
1902+
enable_network_isolation=None,
1903+
model_name=None,
18941904
):
18951905
"""Return a ``Transformer`` that uses a SageMaker Model based on the
18961906
training job. It reuses the SageMaker Session and base job name used by
@@ -1935,9 +1945,21 @@ def transformer(
19351945
vpc_config_override (dict[str, list[str]]): Optional override for
19361946
the VpcConfig set on the model.
19371947
Default: use subnets and security groups from this Estimator.
1948+
19381949
* 'Subnets' (list[str]): List of subnet ids.
19391950
* 'SecurityGroupIds' (list[str]): List of security group ids.
19401951
1952+
enable_network_isolation (bool): Specifies whether container will
1953+
run in network isolation mode. Network isolation mode restricts
1954+
the container access to outside networks (such as the internet).
1955+
The container does not make any inbound or outbound network
1956+
calls. If True, a channel named "code" will be created for any
1957+
user entry script for inference. Also known as Internet-free mode.
1958+
If not specified, this setting is taken from the estimator's
1959+
current configuration.
1960+
model_name (str): Name to use for creating an Amazon SageMaker
1961+
model. If not specified, the name of the training job is used.
1962+
19411963
Returns:
19421964
sagemaker.transformer.Transformer: a ``Transformer`` object that can be used to start a
19431965
SageMaker Batch Transform job.
@@ -1946,12 +1968,17 @@ def transformer(
19461968
tags = tags or self.tags
19471969

19481970
if self.latest_training_job is not None:
1971+
if enable_network_isolation is None:
1972+
enable_network_isolation = self.enable_network_isolation()
1973+
19491974
model = self.create_model(
19501975
role=role,
19511976
model_server_workers=model_server_workers,
19521977
entry_point=entry_point,
19531978
vpc_config_override=vpc_config_override,
19541979
model_kms_key=self.output_kms_key,
1980+
enable_network_isolation=enable_network_isolation,
1981+
name=model_name,
19551982
)
19561983
model._create_sagemaker_model(instance_type, tags=tags)
19571984

@@ -1964,7 +1991,7 @@ def transformer(
19641991
"No finished training job found associated with this estimator. Please make sure "
19651992
"this estimator is only used for building workflow config"
19661993
)
1967-
model_name = self._current_job_name
1994+
model_name = model_name or self._current_job_name
19681995
transform_env = env or {}
19691996

19701997
return Transformer(

src/sagemaker/local/entities.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ def _perform_batch_inference(self, input_data, output_data, **kwargs):
356356
)
357357

358358
response_body = response["Body"]
359-
data = response_body.read().strip()
359+
data = response_body.read()
360360
response_body.close()
361361
f.write(data)
362362
if "AssembleWith" in output_data and output_data["AssembleWith"] == "Line":

0 commit comments

Comments
 (0)