Skip to content

Commit 2762955

Browse files
authored
Merge branch 'master' into update_HF_image
2 parents 436b49b + f7a57c2 commit 2762955

File tree

7 files changed

+111
-12
lines changed

7 files changed

+111
-12
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
# Changelog
22

3+
## v2.59.3.post0 (2021-09-22)
4+
5+
### Documentation Changes
6+
7+
* Info about offline s3 bucket key when creating feature group
8+
9+
## v2.59.3 (2021-09-20)
10+
311
## v2.59.2 (2021-09-15)
412

513
### Bug Fixes and Other Changes

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.59.3.dev0
1+
2.59.4.dev0

src/sagemaker/algorithm.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,8 @@ def __init__(
7171
access training data and model artifacts. After the endpoint
7272
is created, the inference code might use the IAM role, if it
7373
needs to access an AWS resource.
74-
instance_count (int): Number of Amazon EC2 instances to
75-
use for training. instance_type (str): Type of EC2
76-
instance to use for training, for example, 'ml.c4.xlarge'.
74+
instance_count (int): Number of Amazon EC2 instances to use for training.
75+
instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'.
7776
volume_size (int): Size in GB of the EBS volume to use for
7877
storing input data during training (default: 30). Must be large enough to store
7978
training data if File Mode is used (which is the default).

src/sagemaker/feature_store/feature_group.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,13 @@ def create(
457457
online_store_kms_key_id (str): KMS key id for online store.
458458
enable_online_store (bool): whether to enable online store or not.
459459
offline_store_kms_key_id (str): KMS key id for offline store.
460+
If a KMS encryption key is not specified, SageMaker encrypts all data at
461+
rest using the default AWS KMS key. By defining your bucket-level key for
462+
SSE, you can reduce the cost of AWS KMS requests.
463+
For more information, see
464+
`Bucket Key
465+
<https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucket-key.html>`_
466+
in the Amazon S3 User Guide.
460467
disable_glue_table_creation (bool): whether to turn off Glue table creation no not.
461468
data_catalog_config (DataCatalogConfig): configuration for Metadata store.
462469
description (str): description of the FeatureGroup.

src/sagemaker/image_uri_config/huggingface.json

Lines changed: 74 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,8 @@
179179
"us-west-1": "763104351884",
180180
"us-west-2": "763104351884"
181181
},
182-
"repository": "huggingface-pytorch-training"
182+
"repository": "huggingface-pytorch-training",
183+
"container_version": {"gpu":"cu110-ubuntu18.04"}
183184
},
184185
"pytorch1.7.1": {
185186
"py_versions": ["py36"],
@@ -210,7 +211,40 @@
210211
"us-west-1": "763104351884",
211212
"us-west-2": "763104351884"
212213
},
213-
"repository": "huggingface-pytorch-training"
214+
"repository": "huggingface-pytorch-training",
215+
"container_version": {"gpu":"cu110-ubuntu18.04"}
216+
},
217+
"pytorch1.8.1": {
218+
"py_versions": ["py36"],
219+
"registries": {
220+
"af-south-1": "626614931356",
221+
"ap-east-1": "871362719292",
222+
"ap-northeast-1": "763104351884",
223+
"ap-northeast-2": "763104351884",
224+
"ap-northeast-3": "364406365360",
225+
"ap-south-1": "763104351884",
226+
"ap-southeast-1": "763104351884",
227+
"ap-southeast-2": "763104351884",
228+
"ca-central-1": "763104351884",
229+
"cn-north-1": "727897471807",
230+
"cn-northwest-1": "727897471807",
231+
"eu-central-1": "763104351884",
232+
"eu-north-1": "763104351884",
233+
"eu-west-1": "763104351884",
234+
"eu-west-2": "763104351884",
235+
"eu-west-3": "763104351884",
236+
"eu-south-1": "692866216735",
237+
"me-south-1": "217643126080",
238+
"sa-east-1": "763104351884",
239+
"us-east-1": "763104351884",
240+
"us-east-2": "763104351884",
241+
"us-gov-west-1": "442386744353",
242+
"us-iso-east-1": "886529160074",
243+
"us-west-1": "763104351884",
244+
"us-west-2": "763104351884"
245+
},
246+
"repository": "huggingface-pytorch-training",
247+
"container_version": {"gpu":"cu111-ubuntu18.04"}
214248
},
215249
"pytorch1.8.1": {
216250
"py_versions": ["py36"],
@@ -272,7 +306,8 @@
272306
"us-west-1": "763104351884",
273307
"us-west-2": "763104351884"
274308
},
275-
"repository": "huggingface-tensorflow-training"
309+
"repository": "huggingface-tensorflow-training",
310+
"container_version": {"gpu":"cu110-ubuntu18.04"}
276311
}
277312
}
278313
}
@@ -319,7 +354,40 @@
319354
"us-west-1": "763104351884",
320355
"us-west-2": "763104351884"
321356
},
322-
"repository": "huggingface-pytorch-inference"
357+
"repository": "huggingface-pytorch-inference",
358+
"container_version": {"gpu":"cu110-ubuntu18.04", "cpu":"ubuntu18.04" }
359+
},
360+
"pytorch1.8.1": {
361+
"py_versions": ["py36"],
362+
"registries": {
363+
"af-south-1": "626614931356",
364+
"ap-east-1": "871362719292",
365+
"ap-northeast-1": "763104351884",
366+
"ap-northeast-2": "763104351884",
367+
"ap-northeast-3": "364406365360",
368+
"ap-south-1": "763104351884",
369+
"ap-southeast-1": "763104351884",
370+
"ap-southeast-2": "763104351884",
371+
"ca-central-1": "763104351884",
372+
"cn-north-1": "727897471807",
373+
"cn-northwest-1": "727897471807",
374+
"eu-central-1": "763104351884",
375+
"eu-north-1": "763104351884",
376+
"eu-west-1": "763104351884",
377+
"eu-west-2": "763104351884",
378+
"eu-west-3": "763104351884",
379+
"eu-south-1": "692866216735",
380+
"me-south-1": "217643126080",
381+
"sa-east-1": "763104351884",
382+
"us-east-1": "763104351884",
383+
"us-east-2": "763104351884",
384+
"us-gov-west-1": "442386744353",
385+
"us-iso-east-1": "886529160074",
386+
"us-west-1": "763104351884",
387+
"us-west-2": "763104351884"
388+
},
389+
"repository": "huggingface-pytorch-inference",
390+
"container_version": {"gpu":"cu111-ubuntu18.04", "cpu":"ubuntu18.04" }
323391
},
324392
"pytorch1.8.1": {
325393
"py_versions": ["py36"],
@@ -381,7 +449,8 @@
381449
"us-west-1": "763104351884",
382450
"us-west-2": "763104351884"
383451
},
384-
"repository": "huggingface-tensorflow-inference"
452+
"repository": "huggingface-tensorflow-inference",
453+
"container_version": {"gpu":"cu110-ubuntu18.04", "cpu":"ubuntu18.04" }
385454
}
386455
}
387456
}

src/sagemaker/image_uris.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ def retrieve(
4141
):
4242
"""Retrieves the ECR URI for the Docker image matching the given arguments.
4343
44+
Ideally this function should not be called directly, rather it should be called from the
45+
fit() function inside framework estimator.
46+
4447
Args:
4548
framework (str): The name of the framework or algorithm.
4649
region (str): The AWS region.
@@ -56,7 +59,11 @@ def retrieve(
5659
image_scope (str): The image type, i.e. what it is used for.
5760
Valid values: "training", "inference", "eia". If ``accelerator_type`` is set,
5861
``image_scope`` is ignored.
59-
container_version (str): the version of docker image
62+
container_version (str): the version of docker image.
63+
Ideally the value of parameter should be created inside the framework.
64+
For custom use, see the list of supported container versions:
65+
https://github.com/aws/deep-learning-containers/blob/master/available_images.md
66+
(default: None).
6067
distribution (dict): A dictionary with information on how to run distributed training
6168
(default: None).
6269
@@ -66,10 +73,12 @@ def retrieve(
6673
Raises:
6774
ValueError: If the combination of arguments specified is not supported.
6875
"""
76+
6977
config = _config_for_framework_and_scope(framework, image_scope, accelerator_type)
7078
original_version = version
7179
version = _validate_version_and_set_if_needed(version, config, framework)
7280
version_config = config["versions"][_version_for_config(version, config)]
81+
7382
if framework == HUGGING_FACE_FRAMEWORK:
7483
if version_config.get("version_aliases"):
7584
full_base_framework_version = version_config["version_aliases"].get(
@@ -81,7 +90,6 @@ def retrieve(
8190

8291
py_version = _validate_py_version_and_set_if_needed(py_version, version_config, framework)
8392
version_config = version_config.get(py_version) or version_config
84-
8593
registry = _registry_from_region(region, version_config["registries"])
8694
hostname = utils._botocore_resolver().construct_endpoint("ecr", region)["hostname"]
8795

@@ -91,11 +99,16 @@ def retrieve(
9199
instance_type, config.get("processors") or version_config.get("processors")
92100
)
93101

102+
# if container version is available in .json file, utilize that
103+
if version_config.get("container_version"):
104+
container_version = version_config["container_version"][processor]
105+
94106
if framework == HUGGING_FACE_FRAMEWORK:
95107
pt_or_tf_version = (
96108
re.compile("^(pytorch|tensorflow)(.*)$").match(base_framework_version).group(2)
97109
)
98110
tag_prefix = f"{pt_or_tf_version}-transformers{original_version}"
111+
99112
else:
100113
tag_prefix = version_config.get("tag_prefix", version)
101114

@@ -105,6 +118,7 @@ def retrieve(
105118
py_version,
106119
container_version,
107120
)
121+
108122
if _should_auto_select_container_version(instance_type, distribution):
109123
container_versions = {
110124
"tensorflow-2.3-gpu-py37": "cu110-ubuntu18.04-v3",
@@ -120,7 +134,9 @@ def retrieve(
120134
"pytorch-1.6-gpu-py3": "cu110-ubuntu18.04-v3",
121135
"pytorch-1.6.0-gpu-py3": "cu110-ubuntu18.04",
122136
}
137+
123138
key = "-".join([framework, tag])
139+
124140
if key in container_versions:
125141
tag = "-".join([tag, container_versions[key]])
126142

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ def _huggingface_base_fm_version(huggingface_version, base_fw, fixture_prefix):
400400
if len(original_version.split(".")) == 2:
401401
base_fw_version = ".".join(base_fw_version.split(".")[:-1])
402402
versions.append(base_fw_version)
403-
return versions
403+
return sorted(versions, reverse=True)
404404

405405

406406
def _generate_huggingface_base_fw_latest_versions(

0 commit comments

Comments
 (0)