Skip to content

Commit f8f3dc3

Browse files
Merge branch 'master' into stopiteration
2 parents 69085d2 + 0cd7aa1 commit f8f3dc3

File tree

14 files changed

+221
-25
lines changed

14 files changed

+221
-25
lines changed

CHANGELOG.rst

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,17 @@
22
CHANGELOG
33
=========
44

5-
1.17.1.dev
5+
1.17.2.dev
66
==========
77

8-
* enhancement: Workflow: Specify tasks from which training/tuning operator to transform/deploy in related operators
98
* bug-fix: Handle StopIteration in CloudWatch Logs retrieval
109

10+
1.17.1
11+
======
12+
13+
* enhancement: Workflow: Specify tasks from which training/tuning operator to transform/deploy in related operators
14+
* feature: Supporting inter-container traffic encryption flag
15+
1116
1.17.0
1217
======
1318

README.rst

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,26 @@ To train a model using your own VPC, set the optional parameters ``subnets`` and
748748
# SageMaker Training Job will set VpcConfig and container instances will run in your VPC
749749
mxnet_vpc_estimator.fit('s3://my_bucket/my_training_data/')
750750
751+
To train a model with the inter-container traffic encrypted, set the optional parameters ``subnets`` and ``security_group_ids`` and
752+
the flag ``encrypt_inter_container_traffic`` as ``True`` on an Estimator (Note: This flag can be used only if you specify that the training
753+
job runs in a VPC):
754+
755+
.. code:: python
756+
757+
from sagemaker.mxnet import MXNet
758+
759+
# Configure an MXNet Estimator with subnets and security groups from your VPC
760+
mxnet_vpc_estimator = MXNet('train.py',
761+
train_instance_type='ml.p2.xlarge',
762+
train_instance_count=1,
763+
framework_version='1.2.1',
764+
subnets=['subnet-1', 'subnet-2'],
765+
security_group_ids=['sg-1'],
766+
encrypt_inter_container_traffic=True)
767+
768+
# The SageMaker training job sets the VpcConfig, and training container instances run in your VPC with traffic between the containers encrypted
769+
mxnet_vpc_estimator.fit('s3://my_bucket/my_training_data/')
770+
751771
When you create a ``Predictor`` from the ``Estimator`` using ``deploy()``, the same VPC configurations will be set on the SageMaker Model:
752772
753773
.. code:: python

doc/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def __getattr__(cls, name):
3232
'numpy', 'scipy', 'scipy.sparse']
3333
sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
3434

35-
version = '1.17.0'
35+
version = '1.17.1'
3636
project = u'sagemaker'
3737

3838
# Add any Sphinx extension module names here, as strings. They can be extensions

src/sagemaker/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,4 @@
3939
from sagemaker.session import s3_input # noqa: F401
4040
from sagemaker.session import get_execution_role # noqa: F401
4141

42-
__version__ = '1.17.0'
42+
__version__ = '1.17.1'

src/sagemaker/algorithm.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def __init__(
5050
model_uri=None,
5151
model_channel_name='model',
5252
metric_definitions=None,
53+
encrypt_inter_container_traffic=False
5354
):
5455
"""Initialize an ``AlgorithmEstimator`` instance.
5556
@@ -75,7 +76,7 @@ def __init__(
7576
* 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a Unix-named pipe.
7677
7778
This argument can be overriden on a per-channel basis using ``sagemaker.session.s3_input.input_mode``.
78-
output_path (str): S3 location for saving the trainig result (model artifacts and output files).
79+
output_path (str): S3 location for saving the training result (model artifacts and output files).
7980
If not specified, results are stored to a default bucket. If the bucket with the specific name
8081
does not exist, the estimator creates the bucket during the
8182
:meth:`~sagemaker.estimator.EstimatorBase.fit` method execution.
@@ -100,6 +101,8 @@ def __init__(
100101
metric_definitions (list[dict]): A list of dictionaries that defines the metric(s) used to evaluate the
101102
training jobs. Each dictionary contains two keys: 'Name' for the name of the metric, and 'Regex' for
102103
the regular expression used to extract the metric from the logs.
104+
encrypt_inter_container_traffic (bool): Specifies whether traffic between training containers is encrypted
105+
for the training job (default: ``False``).
103106
"""
104107
self.algorithm_arn = algorithm_arn
105108
super(AlgorithmEstimator, self).__init__(
@@ -120,6 +123,7 @@ def __init__(
120123
model_uri=model_uri,
121124
model_channel_name=model_channel_name,
122125
metric_definitions=metric_definitions,
126+
encrypt_inter_container_traffic=encrypt_inter_container_traffic
123127
)
124128

125129
self.algorithm_spec = self.sagemaker_session.sagemaker_client.describe_algorithm(

src/sagemaker/estimator.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def __init__(self, role, train_instance_count, train_instance_type,
5252
train_volume_size=30, train_volume_kms_key=None, train_max_run=24 * 60 * 60, input_mode='File',
5353
output_path=None, output_kms_key=None, base_job_name=None, sagemaker_session=None, tags=None,
5454
subnets=None, security_group_ids=None, model_uri=None, model_channel_name='model',
55-
metric_definitions=None):
55+
metric_definitions=None, encrypt_inter_container_traffic=False):
5656
"""Initialize an ``EstimatorBase`` instance.
5757
5858
Args:
@@ -103,6 +103,8 @@ def __init__(self, role, train_instance_count, train_instance_type,
103103
training jobs. Each dictionary contains two keys: 'Name' for the name of the metric, and 'Regex' for
104104
the regular expression used to extract the metric from the logs. This should be defined only
105105
for jobs that don't use an Amazon algorithm.
106+
encrypt_inter_container_traffic (bool): Specifies whether traffic between training containers is encrypted
107+
for the training job (default: ``False``).
106108
"""
107109
self.role = role
108110
self.train_instance_count = train_instance_count
@@ -138,6 +140,8 @@ def __init__(self, role, train_instance_count, train_instance_type,
138140
self.subnets = subnets
139141
self.security_group_ids = security_group_ids
140142

143+
self.encrypt_inter_container_traffic = encrypt_inter_container_traffic
144+
141145
@abstractmethod
142146
def train_image(self):
143147
"""Return the Docker image to use for training.
@@ -429,6 +433,10 @@ def _prepare_init_params_from_job_description(cls, job_details, model_channel_na
429433
if 'MetricDefinitons' in job_details['AlgorithmSpecification']:
430434
init_params['metric_definitions'] = job_details['AlgorithmSpecification']['MetricsDefinition']
431435

436+
if 'EnableInterContainerTrafficEncryption' in job_details:
437+
init_params['encrypt_inter_container_traffic'] = \
438+
job_details['EnableInterContainerTrafficEncryption']
439+
432440
subnets, security_group_ids = vpc_utils.from_dict(job_details.get(vpc_utils.VPC_CONFIG_KEY))
433441
if subnets:
434442
init_params['subnets'] = subnets
@@ -555,6 +563,9 @@ def start_new(cls, estimator, inputs):
555563
if estimator.enable_network_isolation():
556564
train_args['enable_network_isolation'] = True
557565

566+
if estimator.encrypt_inter_container_traffic:
567+
train_args['encrypt_inter_container_traffic'] = True
568+
558569
if isinstance(estimator, sagemaker.algorithm.AlgorithmEstimator):
559570
train_args['algorithm_arn'] = estimator.algorithm_arn
560571
else:
@@ -585,7 +596,8 @@ def __init__(self, image_name, role, train_instance_count, train_instance_type,
585596
train_volume_size=30, train_volume_kms_key=None, train_max_run=24 * 60 * 60,
586597
input_mode='File', output_path=None, output_kms_key=None, base_job_name=None,
587598
sagemaker_session=None, hyperparameters=None, tags=None, subnets=None, security_group_ids=None,
588-
model_uri=None, model_channel_name='model', metric_definitions=None):
599+
model_uri=None, model_channel_name='model', metric_definitions=None,
600+
encrypt_inter_container_traffic=False):
589601
"""Initialize an ``Estimator`` instance.
590602
591603
Args:
@@ -640,14 +652,17 @@ def __init__(self, image_name, role, train_instance_count, train_instance_type,
640652
training jobs. Each dictionary contains two keys: 'Name' for the name of the metric, and 'Regex' for
641653
the regular expression used to extract the metric from the logs. This should be defined only
642654
for jobs that don't use an Amazon algorithm.
655+
encrypt_inter_container_traffic (bool): Specifies whether traffic between training containers is encrypted
656+
for the training job (default: ``False``).
643657
"""
644658
self.image_name = image_name
645659
self.hyperparam_dict = hyperparameters.copy() if hyperparameters else {}
646660
super(Estimator, self).__init__(role, train_instance_count, train_instance_type,
647661
train_volume_size, train_volume_kms_key, train_max_run, input_mode,
648662
output_path, output_kms_key, base_job_name, sagemaker_session,
649663
tags, subnets, security_group_ids, model_uri=model_uri,
650-
model_channel_name=model_channel_name, metric_definitions=metric_definitions)
664+
model_channel_name=model_channel_name, metric_definitions=metric_definitions,
665+
encrypt_inter_container_traffic=encrypt_inter_container_traffic)
651666

652667
def train_image(self):
653668
"""
@@ -743,7 +758,7 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl
743758
entry_point (str): Path (absolute or relative) to the local Python source file which should be executed
744759
as the entry point to training. This should be compatible with either Python 2.7 or Python 3.5.
745760
source_dir (str): Path (absolute or relative) to a directory with any other training
746-
source code dependencies aside from tne entry point file (default: None). Structure within this
761+
source code dependencies aside from the entry point file (default: None). Structure within this
747762
directory are preserved when training on Amazon SageMaker.
748763
dependencies (list[str]): A list of paths to directories (absolute or relative) with
749764
any additional libraries that will be exported to the container (default: []).

src/sagemaker/session.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,8 @@ def default_bucket(self):
215215

216216
def train(self, input_mode, input_config, role, job_name, output_config, # noqa: C901
217217
resource_config, vpc_config, hyperparameters, stop_condition, tags, metric_definitions,
218-
enable_network_isolation=False, image=None, algorithm_arn=None):
218+
enable_network_isolation=False, image=None, algorithm_arn=None,
219+
encrypt_inter_container_traffic=False):
219220
"""Create an Amazon SageMaker training job.
220221
221222
Args:
@@ -261,6 +262,8 @@ def train(self, input_mode, input_config, role, job_name, output_config, # noqa
261262
network isolation or not.
262263
image (str): Docker image containing training code.
263264
algorithm_arn (str): Algorithm Arn from Marketplace.
265+
encrypt_inter_container_traffic (bool): Specifies whether traffic between training containers is
266+
encrypted for the training job (default: ``False``).
264267
265268
Returns:
266269
str: ARN of the training job, if it is created.
@@ -308,6 +311,10 @@ def train(self, input_mode, input_config, role, job_name, output_config, # noqa
308311
if enable_network_isolation:
309312
train_request['EnableNetworkIsolation'] = enable_network_isolation
310313

314+
if encrypt_inter_container_traffic:
315+
train_request['EnableInterContainerTrafficEncryption'] = \
316+
encrypt_inter_container_traffic
317+
311318
LOGGER.info('Creating training-job with name: {}'.format(job_name))
312319
LOGGER.debug('train request: {}'.format(json.dumps(train_request, indent=4)))
313320
self.sagemaker_client.create_training_job(**train_request)
@@ -351,7 +358,7 @@ def tune(self, job_name, strategy, objective_type, objective_metric_name,
351358
static_hyperparameters, input_mode, metric_definitions,
352359
role, input_config, output_config, resource_config, stop_condition, tags,
353360
warm_start_config, enable_network_isolation=False, image=None, algorithm_arn=None,
354-
early_stopping_type='Off'):
361+
early_stopping_type='Off', encrypt_inter_container_traffic=False):
355362
"""Create an Amazon SageMaker hyperparameter tuning job
356363
357364
Args:
@@ -400,6 +407,9 @@ def tune(self, job_name, strategy, objective_type, objective_metric_name,
400407
early_stopping_type (str): Specifies whether early stopping is enabled for the job.
401408
Can be either 'Auto' or 'Off'. If set to 'Off', early stopping will not be attempted.
402409
If set to 'Auto', early stopping of some training jobs may happen, but is not guaranteed to.
410+
encrypt_inter_container_traffic (bool): Specifies whether traffic between training containers
411+
is encrypted for the training jobs started for this hyperparameter tuning job. Set to ``False``
412+
by default.
403413
"""
404414
tune_request = {
405415
'HyperParameterTuningJobName': job_name,
@@ -450,6 +460,9 @@ def tune(self, job_name, strategy, objective_type, objective_metric_name,
450460
if enable_network_isolation:
451461
tune_request['TrainingJobDefinition']['EnableNetworkIsolation'] = True
452462

463+
if encrypt_inter_container_traffic:
464+
tune_request['TrainingJobDefinition']['EnableInterContainerTrafficEncryption'] = True
465+
453466
LOGGER.info('Creating hyperparameter tuning job with name: {}'.format(job_name))
454467
LOGGER.debug('tune request: {}'.format(json.dumps(tune_request, indent=4)))
455468
self.sagemaker_client.create_hyper_parameter_tuning_job(**tune_request)

src/sagemaker/tuner.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,8 @@ def start_new(cls, tuner, inputs):
641641
tuner_args['image'] = tuner.estimator.train_image()
642642

643643
tuner_args['enable_network_isolation'] = tuner.estimator.enable_network_isolation()
644+
tuner_args['encrypt_inter_container_traffic'] = \
645+
tuner.estimator.encrypt_inter_container_traffic
644646

645647
tuner.estimator.sagemaker_session.tune(**tuner_args)
646648

tests/integ/test_tf.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from sagemaker.utils import sagemaker_timestamp
2323
from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES, PYTHON_VERSION
2424
from tests.integ.timeout import timeout_and_delete_endpoint_by_name, timeout
25-
from tests.integ.vpc_test_utils import get_or_create_vpc_resources
25+
from tests.integ.vpc_test_utils import get_or_create_vpc_resources, setup_security_group_for_encryption
2626

2727
DATA_PATH = os.path.join(DATA_DIR, 'iris', 'data')
2828

@@ -149,6 +149,8 @@ def test_tf_vpc_multi(sagemaker_session, tf_full_version):
149149
subnet_ids, security_group_id = get_or_create_vpc_resources(ec2_client,
150150
sagemaker_session.boto_session.region_name)
151151

152+
setup_security_group_for_encryption(ec2_client, security_group_id)
153+
152154
estimator = TensorFlow(entry_point=script_path,
153155
role='SageMakerRole',
154156
framework_version=tf_full_version,
@@ -160,7 +162,8 @@ def test_tf_vpc_multi(sagemaker_session, tf_full_version):
160162
sagemaker_session=sagemaker_session,
161163
base_job_name='test-vpc-tf',
162164
subnets=subnet_ids,
163-
security_group_ids=[security_group_id])
165+
security_group_ids=[security_group_id],
166+
encrypt_inter_container_traffic=True)
164167

165168
with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
166169
estimator.fit(train_input)
@@ -170,6 +173,7 @@ def test_tf_vpc_multi(sagemaker_session, tf_full_version):
170173
TrainingJobName=estimator.latest_training_job.name)
171174
assert set(subnet_ids) == set(job_desc['VpcConfig']['Subnets'])
172175
assert [security_group_id] == job_desc['VpcConfig']['SecurityGroupIds']
176+
assert job_desc['EnableInterContainerTrafficEncryption'] is True
173177

174178
endpoint_name = estimator.latest_training_job.name
175179
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):

tests/integ/vpc_test_utils.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,17 @@ def get_or_create_vpc_resources(ec2_client, region, name=VPC_NAME):
8989
else:
9090
print('creating new vpc: {}'.format(name))
9191
return _create_vpc_with_name(ec2_client, region, name)
92+
93+
94+
def setup_security_group_for_encryption(ec2_client, security_group_id):
95+
sg_desc = ec2_client.describe_security_groups(GroupIds=[security_group_id])
96+
ingress_perms = sg_desc['SecurityGroups'][0]['IpPermissions']
97+
if len(ingress_perms) == 1:
98+
ec2_client.\
99+
authorize_security_group_ingress(GroupId=security_group_id,
100+
IpPermissions=[{'IpProtocol': '50',
101+
'UserIdGroupPairs': [{'GroupId': security_group_id}]},
102+
{'IpProtocol': 'udp',
103+
'FromPort': 500,
104+
'ToPort': 500,
105+
'UserIdGroupPairs': [{'GroupId': security_group_id}]}])

tests/unit/test_algorithm.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -894,3 +894,22 @@ def test_algorithm_enable_network_isolation_with_product_id(sagemaker_session):
894894

895895
network_isolation = estimator.enable_network_isolation()
896896
assert network_isolation is True
897+
898+
899+
def test_algorithm_encrypt_inter_container_traffic(sagemaker_session):
900+
response = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
901+
response['encrypt_inter_container_traffic'] = True
902+
sagemaker_session.sagemaker_client.describe_algorithm = Mock(
903+
return_value=response)
904+
905+
estimator = AlgorithmEstimator(
906+
algorithm_arn='arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees',
907+
role='SageMakerRole',
908+
train_instance_type='ml.m4.xlarge',
909+
train_instance_count=1,
910+
sagemaker_session=sagemaker_session,
911+
encrypt_inter_container_traffic=True
912+
)
913+
914+
encrypt_inter_container_traffic = estimator.encrypt_inter_container_traffic
915+
assert encrypt_inter_container_traffic is True

0 commit comments

Comments
 (0)