Skip to content

Commit 319d135

Browse files
authored
Merge branch 'master' into iss106487
2 parents e8e3d46 + a739945 commit 319d135

File tree

12 files changed

+258
-29
lines changed

12 files changed

+258
-29
lines changed

CHANGELOG.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,30 @@
11
# Changelog
22

3+
## v2.24.1 (2021-01-28)
4+
5+
### Bug Fixes and Other Changes
6+
7+
* fix collect-tests tox env
8+
* create profiler specific unsupported regions
9+
* Update smd_model_parallel_pytorch.rst
10+
11+
## v2.24.0 (2021-01-22)
12+
13+
### Features
14+
15+
* add support for Std:Join for pipelines
16+
* Map image name to image uri
17+
* friendly names for short URIs
18+
19+
### Bug Fixes and Other Changes
20+
21+
* increase allowed time for search to get updated
22+
* refactor distribution config construction
23+
24+
### Documentation Changes
25+
26+
* Add SMP 1.2.0 API docs
27+
328
## v2.23.6 (2021-01-20)
429

530
### Bug Fixes and Other Changes

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.23.7.dev0
1+
2.24.2.dev0

doc/api/training/smp_versions/v1.2.0/smd_model_parallel_pytorch.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,16 +140,16 @@ This API document assumes you use the following import statements in your traini
140140
computation. \ ``bucket_cap_mb``\ controls the bucket size in MegaBytes
141141
(MB).
142142

143-
- ``trace_memory_usage`` (default: False): When set to True, the library attempts
143+
- ``trace_memory_usage`` (default: False): When set to True, the library attempts
144144
to measure memory usage per module during tracing. If this is disabled,
145145
memory usage will be estimated through the sizes of tensors returned from
146146
the module.
147147

148-
- ``broadcast_buffers`` (default: True): Flag to be used with ``ddp=True``.
148+
- ``broadcast_buffers`` (default: True): Flag to be used with ``ddp=True``.
149149
This parameter is forwarded to the underlying ``DistributedDataParallel`` wrapper.
150150
Please see: `broadcast_buffer <https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html#torch.nn.parallel.DistributedDataParallel>`__.
151151

152-
- ``gradient_as_bucket_view (PyTorch 1.7 only)`` (default: False): To be
152+
- ``gradient_as_bucket_view (PyTorch 1.7 only)`` (default: False): To be
153153
used with ``ddp=True``. This parameter is forwarded to the underlying
154154
``DistributedDataParallel`` wrapper. Please see `gradient_as_bucket_view <https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html#torch.nn.parallel.DistributedDataParallel>`__.
155155

src/sagemaker/estimator.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
UploadedCode,
5050
validate_source_dir,
5151
_region_supports_debugger,
52+
_region_supports_profiler,
5253
get_mp_parameters,
5354
)
5455
from sagemaker.inputs import TrainingInput
@@ -494,7 +495,7 @@ def _prepare_profiler_for_training(self):
494495
"""Set necessary values and do basic validations in profiler config and profiler rules.
495496
496497
When user explicitly set rules to an empty list, default profiler rule won't be enabled.
497-
Default profiler rule will be enabled when either:
498+
Default profiler rule will be enabled in supported regions when either:
498499
1. user doesn't specify any rules, i.e., rules=None; or
499500
2. user only specify debugger rules, i.e., rules=[Rule.sagemaker(...)]
500501
"""
@@ -503,7 +504,7 @@ def _prepare_profiler_for_training(self):
503504
raise RuntimeError("profiler_config cannot be set when disable_profiler is True.")
504505
if self.profiler_rules:
505506
raise RuntimeError("ProfilerRule cannot be set when disable_profiler is True.")
506-
elif _region_supports_debugger(self.sagemaker_session.boto_region_name):
507+
elif _region_supports_profiler(self.sagemaker_session.boto_region_name):
507508
if self.profiler_config is None:
508509
self.profiler_config = ProfilerConfig(s3_output_path=self.output_path)
509510
if self.rules is None or (self.rules and not self.profiler_rules):

src/sagemaker/fw_utils.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949
)
5050

5151
DEBUGGER_UNSUPPORTED_REGIONS = ("us-iso-east-1",)
52+
PROFILER_UNSUPPORTED_REGIONS = ("us-iso-east-1", "cn-north-1", "cn-northwest-1")
53+
5254
SINGLE_GPU_INSTANCE_TYPES = ("ml.p2.xlarge", "ml.p3.2xlarge")
5355
SM_DATAPARALLEL_SUPPORTED_INSTANCE_TYPES = (
5456
"ml.p3.16xlarge",
@@ -550,6 +552,19 @@ def _region_supports_debugger(region_name):
550552
return region_name.lower() not in DEBUGGER_UNSUPPORTED_REGIONS
551553

552554

555+
def _region_supports_profiler(region_name):
556+
"""Returns bool indicating whether region supports Amazon SageMaker Debugger profiling feature.
557+
558+
Args:
559+
region_name (str): Name of the region to check against.
560+
561+
Returns:
562+
bool: Whether or not the region supports Amazon SageMaker Debugger profiling feature.
563+
564+
"""
565+
return region_name.lower() not in PROFILER_UNSUPPORTED_REGIONS
566+
567+
553568
def validate_version_or_image_args(framework_version, py_version, image_uri):
554569
"""Checks if version or image arguments are specified.
555570

src/sagemaker/processing.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from sagemaker.session import Session
3232
from sagemaker.network import NetworkConfig # noqa: F401 # pylint: disable=unused-import
3333
from sagemaker.workflow.properties import Properties
34+
from sagemaker.workflow.entities import Expression
3435
from sagemaker.dataset_definition.inputs import S3Input, DatasetDefinition
3536
from sagemaker.apiutils._base_types import ApiObject
3637

@@ -338,6 +339,10 @@ def _normalize_outputs(self, outputs=None):
338339
# Generate a name for the ProcessingOutput if it doesn't have one.
339340
if output.output_name is None:
340341
output.output_name = "output-{}".format(count)
342+
# if the output's destination is a workflow expression, do no normalization
343+
if isinstance(output.destination, Expression):
344+
normalized_outputs.append(output)
345+
continue
341346
# If the output's destination is not an s3_uri, create one.
342347
parse_result = urlparse(output.destination)
343348
if parse_result.scheme != "s3":

src/sagemaker/workflow/functions.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
"""The step definitions for workflow."""
14+
from __future__ import absolute_import
15+
16+
from typing import List
17+
18+
import attr
19+
20+
from sagemaker.workflow.entities import Expression
21+
22+
23+
@attr.s
24+
class Join(Expression):
25+
"""Join together properties.
26+
27+
Attributes:
28+
values (List[Union[PrimitiveType, Parameter]]): The primitive types
29+
and parameters to join.
30+
on_str (str): The string to join the values on (Defaults to "").
31+
"""
32+
33+
on: str = attr.ib(factory=str)
34+
values: List = attr.ib(factory=list)
35+
36+
@property
37+
def expr(self):
38+
"""The expression dict for a `Join` function."""
39+
return {
40+
"Std:Join": {
41+
"On": self.on,
42+
"Values": [
43+
value.expr if hasattr(value, "expr") else value for value in self.values
44+
],
45+
},
46+
}

tests/integ/test_experiments_analytics.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,17 @@ def experiment(sagemaker_session):
3838

3939
time.sleep(15) # wait for search to get updated
4040

41+
# allow search time thrice
42+
for _ in range(3):
43+
analytics = ExperimentAnalytics(
44+
experiment_name=experiment_name, sagemaker_session=sagemaker_session
45+
)
46+
47+
if len(analytics.dataframe().columns) > 0:
48+
break
49+
50+
time.sleep(15)
51+
4152
yield experiment_name
4253
finally:
4354
_delete_resources(sm, experiment_name, trials)
@@ -79,6 +90,17 @@ def experiment_with_artifacts(sagemaker_session):
7990

8091
time.sleep(15) # wait for search to get updated
8192

93+
# allow search time thrice
94+
for _ in range(3):
95+
analytics = ExperimentAnalytics(
96+
experiment_name=experiment_name, sagemaker_session=sagemaker_session
97+
)
98+
99+
if len(analytics.dataframe().columns) > 0:
100+
break
101+
102+
time.sleep(15)
103+
82104
yield experiment_name
83105
finally:
84106
_delete_resources(sm, experiment_name, trials)

tests/integ/test_workflow.py

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@
3838
from sagemaker.workflow.conditions import ConditionGreaterThanOrEqualTo
3939
from sagemaker.workflow.condition_step import ConditionStep
4040
from sagemaker.dataset_definition.inputs import DatasetDefinition, AthenaDatasetDefinition
41+
from sagemaker.workflow.execution_variables import ExecutionVariables
42+
from sagemaker.workflow.functions import Join
4143
from sagemaker.workflow.parameters import (
4244
ParameterInteger,
4345
ParameterString,
@@ -72,16 +74,9 @@ def role(sagemaker_session):
7274
return get_execution_role(sagemaker_session)
7375

7476

75-
# TODO-reinvent-2020: remove use of specific region and this session
7677
@pytest.fixture(scope="module")
77-
def region():
78-
return "us-east-2"
79-
80-
81-
# TODO-reinvent-2020: remove use of specific region and this session
82-
@pytest.fixture(scope="module")
83-
def workflow_session(region):
84-
boto_session = boto3.Session(region_name=region)
78+
def workflow_session(region_name):
79+
boto_session = boto3.Session(region_name=region_name)
8580

8681
sagemaker_client_config = dict()
8782
sagemaker_client_config.setdefault("config", Config(retries=dict(max_attempts=2)))
@@ -134,6 +129,7 @@ def test_three_step_definition(
134129
framework_version = "0.20.0"
135130
instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
136131
instance_count = ParameterInteger(name="InstanceCount", default_value=1)
132+
output_prefix = ParameterString(name="OutputPrefix", default_value="output")
137133

138134
input_data = f"s3://sagemaker-sample-data-{region_name}/processing/census/census-income.csv"
139135

@@ -154,7 +150,20 @@ def test_three_step_definition(
154150
],
155151
outputs=[
156152
ProcessingOutput(output_name="train_data", source="/opt/ml/processing/train"),
157-
ProcessingOutput(output_name="test_data", source="/opt/ml/processing/test"),
153+
ProcessingOutput(
154+
output_name="test_data",
155+
source="/opt/ml/processing/test",
156+
destination=Join(
157+
on="/",
158+
values=[
159+
"s3:/",
160+
sagemaker_session.default_bucket(),
161+
"test-sklearn",
162+
output_prefix,
163+
ExecutionVariables.PIPELINE_EXECUTION_ID,
164+
],
165+
),
166+
),
158167
],
159168
code=os.path.join(script_dir, "preprocessing.py"),
160169
)
@@ -194,7 +203,7 @@ def test_three_step_definition(
194203

195204
pipeline = Pipeline(
196205
name=pipeline_name,
197-
parameters=[instance_type, instance_count],
206+
parameters=[instance_type, instance_count, output_prefix],
198207
steps=[step_process, step_train, step_model],
199208
sagemaker_session=workflow_session,
200209
)
@@ -208,6 +217,7 @@ def test_three_step_definition(
208217
{"Name": "InstanceType", "Type": "String", "DefaultValue": "ml.m5.xlarge"}.items()
209218
),
210219
tuple({"Name": "InstanceCount", "Type": "Integer", "DefaultValue": 1}.items()),
220+
tuple({"Name": "OutputPrefix", "Type": "String", "DefaultValue": "output"}.items()),
211221
]
212222
)
213223

@@ -251,17 +261,28 @@ def test_three_step_definition(
251261
assert model_args["PrimaryContainer"]["ModelDataUrl"] == {
252262
"Get": "Steps.my-train.ModelArtifacts.S3ModelArtifacts"
253263
}
264+
try:
265+
response = pipeline.create(role)
266+
create_arn = response["PipelineArn"]
267+
assert re.match(
268+
fr"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
269+
create_arn,
270+
)
271+
finally:
272+
try:
273+
pipeline.delete()
274+
except Exception:
275+
pass
254276

255277

256-
# TODO-reinvent-2020: Modify use of the workflow client
257278
def test_one_step_sklearn_processing_pipeline(
258279
sagemaker_session,
259280
workflow_session,
260281
role,
261282
sklearn_latest_version,
262283
cpu_instance_type,
263284
pipeline_name,
264-
region,
285+
region_name,
265286
athena_dataset_definition,
266287
):
267288
instance_count = ParameterInteger(name="InstanceCount", default_value=2)
@@ -305,21 +326,21 @@ def test_one_step_sklearn_processing_pipeline(
305326
response = pipeline.create(role)
306327
create_arn = response["PipelineArn"]
307328
assert re.match(
308-
fr"arn:aws:sagemaker:{region}:\d{{12}}:pipeline/{pipeline_name}",
329+
fr"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
309330
create_arn,
310331
)
311332

312333
pipeline.parameters = [ParameterInteger(name="InstanceCount", default_value=1)]
313334
response = pipeline.update(role)
314335
update_arn = response["PipelineArn"]
315336
assert re.match(
316-
fr"arn:aws:sagemaker:{region}:\d{{12}}:pipeline/{pipeline_name}",
337+
fr"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
317338
update_arn,
318339
)
319340

320341
execution = pipeline.start(parameters={})
321342
assert re.match(
322-
fr"arn:aws:sagemaker:{region}:\d{{12}}:pipeline/{pipeline_name}/execution/",
343+
fr"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
323344
execution.arn,
324345
)
325346

@@ -340,14 +361,13 @@ def test_one_step_sklearn_processing_pipeline(
340361
pass
341362

342363

343-
# TODO-reinvent-2020: Modify use of the workflow client
344364
def test_conditional_pytorch_training_model_registration(
345365
sagemaker_session,
346366
workflow_session,
347367
role,
348368
cpu_instance_type,
349369
pipeline_name,
350-
region,
370+
region_name,
351371
):
352372
base_dir = os.path.join(DATA_DIR, "pytorch_mnist")
353373
entry_point = os.path.join(base_dir, "mnist.py")
@@ -420,18 +440,18 @@ def test_conditional_pytorch_training_model_registration(
420440
response = pipeline.create(role)
421441
create_arn = response["PipelineArn"]
422442
assert re.match(
423-
fr"arn:aws:sagemaker:{region}:\d{{12}}:pipeline/{pipeline_name}", create_arn
443+
fr"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}", create_arn
424444
)
425445

426446
execution = pipeline.start(parameters={})
427447
assert re.match(
428-
fr"arn:aws:sagemaker:{region}:\d{{12}}:pipeline/{pipeline_name}/execution/",
448+
fr"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
429449
execution.arn,
430450
)
431451

432452
execution = pipeline.start(parameters={"GoodEnoughInput": 0})
433453
assert re.match(
434-
fr"arn:aws:sagemaker:{region}:\d{{12}}:pipeline/{pipeline_name}/execution/",
454+
fr"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
435455
execution.arn,
436456
)
437457
finally:

0 commit comments

Comments
 (0)