Skip to content

Commit 5aa0335

Browse files
authored
Merge pull request aws#37 from verdimrc/pr-framework-processor-round-02
Merge framework-processor-round-02 to framework-processor branch
2 parents 6721bfe + d78850e commit 5aa0335

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1619
-309
lines changed

CHANGELOG.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,30 @@
11
# Changelog
22

3+
## v2.42.1 (2021-05-27)
4+
5+
### Bug Fixes and Other Changes
6+
7+
* default value removed if zero for integer param
8+
9+
## v2.42.0 (2021-05-24)
10+
11+
### Features
12+
13+
* support for custom pipeline execution name
14+
* Add data ingestion only data-wrangler flow recipe generation helper function
15+
16+
### Bug Fixes and Other Changes
17+
18+
* add kms key for processing job code upload
19+
* remove failing notebooks from notebook pr test
20+
* fix in and not in condition bug
21+
* Update overview.rst
22+
23+
### Documentation Changes
24+
25+
* Update "Ask a question" contact link
26+
* Update smdp docs with sparse_as_dense support
27+
328
## v2.41.0 (2021-05-17)
429

530
### Features

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.41.1.dev0
1+
2.42.2.dev0

src/sagemaker/_studio.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def _append_project_tags(tags=None, working_dir=None):
4646
return tags
4747

4848
all_tags = tags or []
49+
additional_tags = [tag for tag in additional_tags if tag not in all_tags]
4950
all_tags.extend(additional_tags)
5051

5152
return all_tags

src/sagemaker/huggingface/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@
1414
from __future__ import absolute_import
1515

1616
from sagemaker.huggingface.estimator import HuggingFace # noqa: F401
17+
from sagemaker.huggingface.processing import HuggingFaceProcessor # noqa:F401
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# Copyright 2019-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
"""This module contains code related to HuggingFace Processors which are used for Processing jobs.
14+
15+
These jobs let customers perform data pre-processing, post-processing, feature engineering,
16+
data validation, and model evaluation and interpretation on SageMaker.
17+
"""
18+
from __future__ import absolute_import
19+
20+
from sagemaker.processing import FrameworkProcessor
21+
from sagemaker.huggingface.estimator import HuggingFace
22+
23+
24+
class HuggingFaceProcessor(FrameworkProcessor):
25+
"""Handles Amazon SageMaker processing tasks for jobs using HuggingFace containers."""
26+
27+
estimator_cls = HuggingFace
28+
29+
def __init__(
30+
self,
31+
role,
32+
instance_count,
33+
instance_type,
34+
transformers_version=None,
35+
tensorflow_version=None,
36+
pytorch_version=None,
37+
py_version="py36",
38+
image_uri=None,
39+
command=None,
40+
volume_size_in_gb=30,
41+
volume_kms_key=None,
42+
output_kms_key=None,
43+
code_location=None,
44+
max_runtime_in_seconds=None,
45+
base_job_name=None,
46+
sagemaker_session=None,
47+
env=None,
48+
tags=None,
49+
network_config=None,
50+
):
51+
"""This processor executes a Python script in a HuggingFace execution environment.
52+
53+
Unless ``image_uri`` is specified, the environment is an Amazon-built Docker container
54+
that executes functions defined in the supplied ``code`` Python script.
55+
56+
The arguments have the same meaning as in ``FrameworkProcessor``, with the following
57+
exceptions.
58+
59+
Args:
60+
transformers_version (str): Transformers version you want to use for
61+
executing your model training code. Defaults to ``None``. Required unless
62+
``image_uri`` is provided. The current supported version is ``4.4.2``.
63+
tensorflow_version (str): TensorFlow version you want to use for
64+
executing your model training code. Defaults to ``None``. Required unless
65+
``pytorch_version`` is provided. The current supported version is ``1.6.0``.
66+
pytorch_version (str): PyTorch version you want to use for
67+
executing your model training code. Defaults to ``None``. Required unless
68+
``tensorflow_version`` is provided. The current supported version is ``2.4.1``.
69+
py_version (str): Python version you want to use for executing your model training
70+
code. Defaults to ``None``. Required unless ``image_uri`` is provided. If
71+
using PyTorch, the current supported version is ``py36``. If using TensorFlow,
72+
the current supported version is ``py37``.
73+
74+
.. tip::
75+
76+
You can find additional parameters for initializing this class at
77+
:class:`~sagemaker.processing.FrameworkProcessor`.
78+
"""
79+
self.pytorch_version = pytorch_version
80+
self.tensorflow_version = tensorflow_version
81+
super().__init__(
82+
self.estimator_cls,
83+
transformers_version,
84+
role,
85+
instance_count,
86+
instance_type,
87+
py_version,
88+
image_uri,
89+
command,
90+
volume_size_in_gb,
91+
volume_kms_key,
92+
output_kms_key,
93+
code_location,
94+
max_runtime_in_seconds,
95+
base_job_name,
96+
sagemaker_session,
97+
env,
98+
tags,
99+
network_config,
100+
)
101+
102+
def _create_estimator(
103+
self,
104+
entry_point="",
105+
source_dir=None,
106+
dependencies=None,
107+
git_config=None,
108+
):
109+
"""Override default estimator factory function for HuggingFace's different parameters
110+
111+
HuggingFace estimators have 3 framework version parameters instead of one: The version for
112+
Transformers, PyTorch, and TensorFlow.
113+
"""
114+
return self.estimator_cls(
115+
transformers_version=self.framework_version,
116+
tensorflow_version=self.tensorflow_version,
117+
pytorch_version=self.pytorch_version,
118+
py_version=self.py_version,
119+
entry_point=entry_point,
120+
source_dir=source_dir,
121+
dependencies=dependencies,
122+
git_config=git_config,
123+
code_location=self.code_location,
124+
enable_network_isolation=False,
125+
image_uri=self.image_uri,
126+
role=self.role,
127+
instance_count=self.instance_count,
128+
instance_type=self.instance_type,
129+
sagemaker_session=self.sagemaker_session,
130+
debugger_hook_config=False,
131+
disable_profiler=True,
132+
)

src/sagemaker/image_uri_config/xgboost.json

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,35 @@
183183
"us-west-2": "246618743249"
184184
},
185185
"repository": "sagemaker-xgboost"
186+
},
187+
"1.3-1": {
188+
"registries": {
189+
"af-south-1": "510948584623",
190+
"ap-east-1": "651117190479",
191+
"ap-northeast-1": "354813040037",
192+
"ap-northeast-2": "366743142698",
193+
"ap-south-1": "720646828776",
194+
"ap-southeast-1": "121021644041",
195+
"ap-southeast-2": "783357654285",
196+
"ca-central-1": "341280168497",
197+
"cn-north-1": "450853457545",
198+
"cn-northwest-1": "451049120500",
199+
"eu-central-1": "492215442770",
200+
"eu-north-1": "662702820516",
201+
"eu-west-1": "141502667606",
202+
"eu-west-2": "764974769150",
203+
"eu-west-3": "659782779980",
204+
"eu-south-1": "978288397137",
205+
"me-south-1": "801668240914",
206+
"sa-east-1": "737474898029",
207+
"us-east-1": "683313688378",
208+
"us-east-2": "257758044811",
209+
"us-gov-west-1": "414596584902",
210+
"us-iso-east-1": "833128469047",
211+
"us-west-1": "746614075791",
212+
"us-west-2": "246618743249"
213+
},
214+
"repository": "sagemaker-xgboost"
186215
}
187216
}
188217
}

src/sagemaker/local/local_session.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -475,10 +475,30 @@ def invoke_endpoint(
475475

476476

477477
class LocalSession(Session):
478-
"""A LocalSession class definition."""
478+
"""A SageMaker ``Session`` class for Local Mode.
479479
480-
def __init__(self, boto_session=None, s3_endpoint_url=None):
480+
This class provides alternative Local Mode implementations for the functionality of
481+
:class:`~sagemaker.session.Session`.
482+
"""
483+
484+
def __init__(self, boto_session=None, s3_endpoint_url=None, disable_local_code=False):
485+
"""Create a Local SageMaker Session.
486+
487+
Args:
488+
boto_session (boto3.session.Session): The underlying Boto3 session which AWS service
489+
calls are delegated to (default: None). If not provided, one is created with
490+
default AWS configuration chain.
491+
s3_endpoint_url (str): Override the default endpoint URL for Amazon S3, if set
492+
(default: None).
493+
disable_local_code (bool): Set ``True`` to override the default AWS configuration
494+
chain to disable the ``local.local_code`` setting, which may not be supported for
495+
some SDK features (default: False).
496+
"""
481497
self.s3_endpoint_url = s3_endpoint_url
498+
# We use this local variable to avoid disrupting the __init__->_initialize API of the
499+
# parent class... But overwriting it after constructor won't do anything, so prefix _ to
500+
# discourage external use:
501+
self._disable_local_code = disable_local_code
482502

483503
super(LocalSession, self).__init__(boto_session)
484504

@@ -530,6 +550,8 @@ def _initialize(
530550
raise e
531551

532552
self.config = yaml.load(open(sagemaker_config_file, "r"))
553+
if self._disable_local_code and "local" in self.config:
554+
self.config["local"]["local_code"] = False
533555

534556
def logs_for_job(self, job_name, wait=False, poll=5, log_type="All"):
535557
"""A no-op method meant to override the sagemaker client.

src/sagemaker/mxnet/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@
1010
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
1111
# ANY KIND, either express or implied. See the License for the specific
1212
# language governing permissions and limitations under the License.
13-
"""Placeholder docstring"""
13+
"""Classes for using MXNet with Amazon SageMaker."""
1414
from __future__ import absolute_import # noqa: F401
1515

1616
from sagemaker.mxnet.estimator import MXNet # noqa: F401
1717
from sagemaker.mxnet.model import MXNetModel, MXNetPredictor # noqa: F401
18+
from sagemaker.mxnet.processing import MXNetProcessor # noqa: F401

src/sagemaker/mxnet/processing.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,16 @@ class MXNetProcessor(FrameworkProcessor):
2929
def __init__(
3030
self,
3131
framework_version, # New arg
32-
s3_prefix, # New arg
3332
role,
3433
instance_count,
3534
instance_type,
3635
py_version="py3", # New kwarg
3736
image_uri=None,
37+
command=None,
3838
volume_size_in_gb=30,
3939
volume_kms_key=None,
4040
output_kms_key=None,
41+
code_location=None, # New arg
4142
max_runtime_in_seconds=None,
4243
base_job_name=None,
4344
sagemaker_session=None,
@@ -49,27 +50,28 @@ def __init__(
4950
5051
Unless ``image_uri`` is specified, the MXNet environment is an
5152
Amazon-built Docker container that executes functions defined in the supplied
52-
``entry_point`` Python script.
53+
``code`` Python script.
5354
5455
The arguments have the exact same meaning as in ``FrameworkProcessor``.
5556
5657
.. tip::
5758
5859
You can find additional parameters for initializing this class at
59-
:class:`~smallmatter.ds.FrameworkProcessor`.
60+
:class:`~sagemaker.processing.FrameworkProcessor`.
6061
"""
6162
super().__init__(
6263
self.estimator_cls,
6364
framework_version,
64-
s3_prefix,
6565
role,
6666
instance_count,
6767
instance_type,
6868
py_version,
6969
image_uri,
70+
command,
7071
volume_size_in_gb,
7172
volume_kms_key,
7273
output_kms_key,
74+
code_location,
7375
max_runtime_in_seconds,
7476
base_job_name,
7577
sagemaker_session,

0 commit comments

Comments
 (0)