Merge branch 'master' into update_HF_image

ahsan-z-khan · web-flow · commit 2762955aba95 · 2021-09-23T08:42:54.000-07:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## v2.59.3.post0 (2021-09-22)
+
+### Documentation Changes
+
+ * Info about offline s3 bucket key when creating feature group
+
+## v2.59.3 (2021-09-20)
+
 ## v2.59.2 (2021-09-15)
 
 ### Bug Fixes and Other Changes
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.59.3.dev0
+2.59.4.dev0
diff --git a/src/sagemaker/algorithm.py b/src/sagemaker/algorithm.py
@@ -71,9 +71,8 @@ def __init__(
                 access training data and model artifacts. After the endpoint
                 is created, the inference code might use the IAM role, if it
                 needs to access an AWS resource.
-            instance_count (int): Number of Amazon EC2 instances to
-                use for training. instance_type (str): Type of EC2
-                instance to use for training, for example, 'ml.c4.xlarge'.
+            instance_count (int): Number of Amazon EC2 instances to use for training.
+            instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'.
             volume_size (int): Size in GB of the EBS volume to use for
                 storing input data during training (default: 30). Must be large enough to store
                 training data if File Mode is used (which is the default).
diff --git a/src/sagemaker/feature_store/feature_group.py b/src/sagemaker/feature_store/feature_group.py
@@ -457,6 +457,13 @@ def create(
             online_store_kms_key_id (str): KMS key id for online store.
             enable_online_store (bool): whether to enable online store or not.
             offline_store_kms_key_id (str): KMS key id for offline store.
+                If a KMS encryption key is not specified, SageMaker encrypts all data at
+                rest using the default AWS KMS key. By defining your bucket-level key for
+                SSE, you can reduce the cost of AWS KMS requests.
+                For more information, see
+                `Bucket Key
+                <https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucket-key.html>`_
+                in the Amazon S3 User Guide.
             disable_glue_table_creation (bool): whether to turn off Glue table creation no not.
             data_catalog_config (DataCatalogConfig): configuration for Metadata store.
             description (str): description of the FeatureGroup.
diff --git a/src/sagemaker/image_uri_config/huggingface.json b/src/sagemaker/image_uri_config/huggingface.json
@@ -179,7 +179,8 @@
                         "us-west-1": "763104351884",
                         "us-west-2": "763104351884"
                     },
-                    "repository": "huggingface-pytorch-training"
+                    "repository": "huggingface-pytorch-training",
+                    "container_version": {"gpu":"cu110-ubuntu18.04"}
                 },
                 "pytorch1.7.1": {
                     "py_versions": ["py36"],
@@ -210,7 +211,40 @@
                         "us-west-1": "763104351884",
                         "us-west-2": "763104351884"
                     },
-                    "repository": "huggingface-pytorch-training"
+                    "repository": "huggingface-pytorch-training",
+                    "container_version": {"gpu":"cu110-ubuntu18.04"}
+                },
+                "pytorch1.8.1": {
+                    "py_versions": ["py36"],
+                    "registries": {
+                        "af-south-1": "626614931356",
+                        "ap-east-1": "871362719292",
+                        "ap-northeast-1": "763104351884",
+                        "ap-northeast-2": "763104351884",
+                        "ap-northeast-3": "364406365360",
+                        "ap-south-1": "763104351884",
+                        "ap-southeast-1": "763104351884",
+                        "ap-southeast-2": "763104351884",
+                        "ca-central-1": "763104351884",
+                        "cn-north-1": "727897471807",
+                        "cn-northwest-1": "727897471807",
+                        "eu-central-1": "763104351884",
+                        "eu-north-1": "763104351884",
+                        "eu-west-1": "763104351884",
+                        "eu-west-2": "763104351884",
+                        "eu-west-3": "763104351884",
+                        "eu-south-1": "692866216735",
+                        "me-south-1": "217643126080",
+                        "sa-east-1": "763104351884",
+                        "us-east-1": "763104351884",
+                        "us-east-2": "763104351884",
+                        "us-gov-west-1": "442386744353",
+                        "us-iso-east-1": "886529160074",
+                        "us-west-1": "763104351884",
+                        "us-west-2": "763104351884"
+                    },
+                    "repository": "huggingface-pytorch-training",
+                    "container_version": {"gpu":"cu111-ubuntu18.04"}
                 },
                 "pytorch1.8.1": {
                     "py_versions": ["py36"],
@@ -272,7 +306,8 @@
                         "us-west-1": "763104351884",
                         "us-west-2": "763104351884"
                     },
-                    "repository": "huggingface-tensorflow-training"
+                    "repository": "huggingface-tensorflow-training",
+                    "container_version": {"gpu":"cu110-ubuntu18.04"}
                 }
             }
         }
@@ -319,7 +354,40 @@
                         "us-west-1": "763104351884",
                         "us-west-2": "763104351884"
                     },
-                    "repository": "huggingface-pytorch-inference"
+                    "repository": "huggingface-pytorch-inference",
+                    "container_version": {"gpu":"cu110-ubuntu18.04", "cpu":"ubuntu18.04" }
+                },
+                "pytorch1.8.1": {
+                    "py_versions": ["py36"],
+                    "registries": {
+                        "af-south-1": "626614931356",
+                        "ap-east-1": "871362719292",
+                        "ap-northeast-1": "763104351884",
+                        "ap-northeast-2": "763104351884",
+                        "ap-northeast-3": "364406365360",
+                        "ap-south-1": "763104351884",
+                        "ap-southeast-1": "763104351884",
+                        "ap-southeast-2": "763104351884",
+                        "ca-central-1": "763104351884",
+                        "cn-north-1": "727897471807",
+                        "cn-northwest-1": "727897471807",
+                        "eu-central-1": "763104351884",
+                        "eu-north-1": "763104351884",
+                        "eu-west-1": "763104351884",
+                        "eu-west-2": "763104351884",
+                        "eu-west-3": "763104351884",
+                        "eu-south-1": "692866216735",
+                        "me-south-1": "217643126080",
+                        "sa-east-1": "763104351884",
+                        "us-east-1": "763104351884",
+                        "us-east-2": "763104351884",
+                        "us-gov-west-1": "442386744353",
+                        "us-iso-east-1": "886529160074",
+                        "us-west-1": "763104351884",
+                        "us-west-2": "763104351884"
+                    },
+                    "repository": "huggingface-pytorch-inference",
+                    "container_version": {"gpu":"cu111-ubuntu18.04", "cpu":"ubuntu18.04" }
                 },
                 "pytorch1.8.1": {
                     "py_versions": ["py36"],
@@ -381,7 +449,8 @@
                         "us-west-1": "763104351884",
                         "us-west-2": "763104351884"
                     },
-                    "repository": "huggingface-tensorflow-inference"
+                    "repository": "huggingface-tensorflow-inference",
+                    "container_version": {"gpu":"cu110-ubuntu18.04", "cpu":"ubuntu18.04" }
                 }
             }
         }
diff --git a/src/sagemaker/image_uris.py b/src/sagemaker/image_uris.py
@@ -41,6 +41,9 @@ def retrieve(
 ):
     """Retrieves the ECR URI for the Docker image matching the given arguments.
 
+    Ideally this function should not be called directly, rather it should be called from the
+    fit() function inside framework estimator.
+
     Args:
         framework (str): The name of the framework or algorithm.
         region (str): The AWS region.
@@ -56,7 +59,11 @@ def retrieve(
         image_scope (str): The image type, i.e. what it is used for.
             Valid values: "training", "inference", "eia". If ``accelerator_type`` is set,
             ``image_scope`` is ignored.
-        container_version (str): the version of docker image
+        container_version (str): the version of docker image.
+            Ideally the value of parameter should be created inside the framework.
+            For custom use, see the list of supported container versions:
+            https://github.com/aws/deep-learning-containers/blob/master/available_images.md
+            (default: None).
         distribution (dict): A dictionary with information on how to run distributed training
             (default: None).
 
@@ -66,10 +73,12 @@ def retrieve(
     Raises:
         ValueError: If the combination of arguments specified is not supported.
     """
+
     config = _config_for_framework_and_scope(framework, image_scope, accelerator_type)
     original_version = version
     version = _validate_version_and_set_if_needed(version, config, framework)
     version_config = config["versions"][_version_for_config(version, config)]
+
     if framework == HUGGING_FACE_FRAMEWORK:
         if version_config.get("version_aliases"):
             full_base_framework_version = version_config["version_aliases"].get(
@@ -81,7 +90,6 @@ def retrieve(
 
     py_version = _validate_py_version_and_set_if_needed(py_version, version_config, framework)
     version_config = version_config.get(py_version) or version_config
-
     registry = _registry_from_region(region, version_config["registries"])
     hostname = utils._botocore_resolver().construct_endpoint("ecr", region)["hostname"]
 
@@ -91,11 +99,16 @@ def retrieve(
         instance_type, config.get("processors") or version_config.get("processors")
     )
 
+    # if container version is available in .json file, utilize that
+    if version_config.get("container_version"):
+        container_version = version_config["container_version"][processor]
+
     if framework == HUGGING_FACE_FRAMEWORK:
         pt_or_tf_version = (
             re.compile("^(pytorch|tensorflow)(.*)$").match(base_framework_version).group(2)
         )
         tag_prefix = f"{pt_or_tf_version}-transformers{original_version}"
+
     else:
         tag_prefix = version_config.get("tag_prefix", version)
 
@@ -105,6 +118,7 @@ def retrieve(
         py_version,
         container_version,
     )
+
     if _should_auto_select_container_version(instance_type, distribution):
         container_versions = {
             "tensorflow-2.3-gpu-py37": "cu110-ubuntu18.04-v3",
@@ -120,7 +134,9 @@ def retrieve(
             "pytorch-1.6-gpu-py3": "cu110-ubuntu18.04-v3",
             "pytorch-1.6.0-gpu-py3": "cu110-ubuntu18.04",
         }
+
         key = "-".join([framework, tag])
+
         if key in container_versions:
             tag = "-".join([tag, container_versions[key]])
 
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -400,7 +400,7 @@ def _huggingface_base_fm_version(huggingface_version, base_fw, fixture_prefix):
             if len(original_version.split(".")) == 2:
                 base_fw_version = ".".join(base_fw_version.split(".")[:-1])
             versions.append(base_fw_version)
-    return versions
+    return sorted(versions, reverse=True)
 
 
 def _generate_huggingface_base_fw_latest_versions(