aws
diff --git a/‎CHANGELOG.md
Lines changed: 21 additions & 1 deletion b/‎CHANGELOG.md
Lines changed: 21 additions & 1 deletion
diff --git a/‎VERSION
Lines changed: 1 addition & 1 deletion b/‎VERSION
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/api/training/distributed.rst
Lines changed: 2 additions & 2 deletions b/‎doc/api/training/distributed.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/api/training/smd_model_parallel_release_notes/smd_model_parallel_change_log.rst
Lines changed: 40 additions & 6 deletions b/‎doc/api/training/smd_model_parallel_release_notes/smd_model_parallel_change_log.rst
Lines changed: 40 additions & 6 deletions
diff --git a/‎doc/api/training/smp_versions/latest.rst
Lines changed: 2 additions & 2 deletions b/‎doc/api/training/smp_versions/latest.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/doc_utils/jumpstart_doc_utils.py
Lines changed: 74 additions & 5 deletions b/‎doc/doc_utils/jumpstart_doc_utils.py
Lines changed: 74 additions & 5 deletions
diff --git a/‎doc/overview.rst
Lines changed: 1 addition & 1 deletion b/‎doc/overview.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/sagemaker/estimator.py
Lines changed: 1 addition & 1 deletion b/‎src/sagemaker/estimator.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/sagemaker/image_uri_config/huggingface-training-compiler.json
Lines changed: 64 additions & 1 deletion b/‎src/sagemaker/image_uri_config/huggingface-training-compiler.json
Lines changed: 64 additions & 1 deletion
@@ -1,5 +1,25 @@
 # Changelog
 
+## v2.87.0 (2022-04-20)
+
+### Features
+
+ * Add Jumpstart example notebooks
+ * add Tensorflow and Pytorch version for SM Training Compiler and expand to regular regions
+
+### Bug Fixes and Other Changes
+
+ * integs for training compiler in non-PDX regions
+ * TrainingStep cache misses due to timestamp based job name
+ * retry context delete
+ * Add more logging when unexpected number of artifacts found
+
+## v2.86.2 (2022-04-14)
+
+### Bug Fixes and Other Changes
+
+ * #using uuid to randomize, otherwise system timestamp is used
+
 ## v2.86.1 (2022-04-13)
 
 ### Bug Fixes and Other Changes
@@ -159,7 +179,7 @@
 ### Features
 
  * override jumpstart content bucket
- * jumpstart model id suggestions
+ * jumpstart model ID suggestions
  * adding customer metadata support to registermodel step
 
 ### Bug Fixes and Other Changes
 
@@ -1 +1 @@
-2.86.2.dev0
+2.87.1.dev0
@@ -10,7 +10,7 @@ The SageMaker Distributed Data Parallel Library
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. toctree::
-    :maxdepth: 3
+    :maxdepth: 2
 
     smd_data_parallel
     sdp_versions/latest
@@ -23,7 +23,7 @@ The SageMaker Distributed Model Parallel Library
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. toctree::
-   :maxdepth: 3
+   :maxdepth: 2
 
    smd_model_parallel
    smp_versions/latest
 
@@ -5,9 +5,48 @@ Release Notes
 New features, bug fixes, and improvements are regularly made to the SageMaker
 distributed model parallel library.
 
-SageMaker Distributed Model Parallel 1.7.0 Release Notes
+SageMaker Distributed Model Parallel 1.8.0 Release Notes
 ========================================================
 
+*Date: March. 23. 2022*
+
+**New Features**
+
+* Added tensor parallelism support for the `GPT-J model
+  <https://huggingface.co/docs/transformers/model_doc/gptj>`_.
+  When using the GPT-J model of Hugging Face Transformers v4.17.0 with
+  tensor parallelism, the SageMaker model parallel library automatically
+  replaces the model with a tensor parallel distributed GPT-J model.
+  For more information, see `Support for Hugging Face Transformer Models
+  <https://docs.aws.amazon.com/sagemaker/latest/dg/model-parallel-extended-features-pytorch-hugging-face.html>`_
+  in the *Amazon SageMaker Model Parallel Training developer guide*.
+
+**Migration to AWS Deep Learning Containers**
+
+This version passed benchmark testing and is migrated to the following AWS Deep Learning Containers:
+
+* HuggingFace 4.17.0 DLC with PyTorch 1.10.2
+
+    .. code::
+
+      763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:1.10.2-transformers4.17.0-gpu-py38-cu113-ubuntu20.04
+
+
+    The binary file of this version of the library for custom container users:
+
+    .. code::
+
+      https://sagemaker-distributed-model-parallel.s3.us-west-2.amazonaws.com/pytorch-1.10.0/build-artifacts/2022-03-12-00-33/smdistributed_modelparallel-1.8.0-cp38-cp38-linux_x86_64.whl
+
+
+----
+
+Release History
+===============
+
+SageMaker Distributed Model Parallel 1.7.0 Release Notes
+--------------------------------------------------------
+
 *Date: March. 07. 2022*
 
 **Currency Updates**
@@ -49,11 +88,6 @@ This version passed benchmark testing and is migrated to the following AWS Deep
     763104351884.dkr.ecr.<region>.amazonaws.com/pytorch-training:1.10.2-gpu-py38-cu113-ubuntu20.04-sagemaker
 
 
-----
-
-Release History
-===============
-
 SageMaker Distributed Model Parallel 1.6.0 Release Notes
 --------------------------------------------------------
 
 
@@ -10,8 +10,8 @@ depending on which version of the library you need to use.
 To use the library, reference the
 **Common API** documentation alongside the framework specific API documentation.
 
-Version 1.7.0 (Latest)
-======================
+Version 1.7.0, 1.8.0 (Latest)
+=============================
 
 To use the library, reference the Common API documentation alongside the framework specific API documentation.
 
 
@@ -14,12 +14,73 @@
 from urllib import request
 import json
 from packaging.version import Version
+from enum import Enum
+
+
+class Tasks(str, Enum):
+    """The ML task name as referenced in the infix of the model ID."""
+
+    IC = "ic"
+    OD = "od"
+    OD1 = "od1"
+    SEMSEG = "semseg"
+    IS = "is"
+    TC = "tc"
+    SPC = "spc"
+    EQA = "eqa"
+    TEXT_GENERATION = "textgeneration"
+    IC_EMBEDDING = "icembedding"
+    TC_EMBEDDING = "tcembedding"
+    NER = "ner"
+    SUMMARIZATION = "summarization"
+    TRANSLATION = "translation"
+    TABULAR_REGRESSION = "regression"
+    TABULAR_CLASSIFICATION = "classification"
+
+
+class ProblemTypes(str, Enum):
+    """Possible problem types for JumpStart models."""
+
+    IMAGE_CLASSIFICATION = "Image Classification"
+    IMAGE_EMBEDDING = "Image Embedding"
+    OBJECT_DETECTION = "Object Detection"
+    SEMANTIC_SEGMENTATION = "Semantic Segmentation"
+    INSTANCE_SEGMENTATION = "Instance Segmentation"
+    TEXT_CLASSIFICATION = "Text Classification"
+    TEXT_EMBEDDING = "Text Embedding"
+    QUESTION_ANSWERING = "Question Answering"
+    SENTENCE_PAIR_CLASSIFICATION = "Sentence Pair Classification"
+    TEXT_GENERATION = "Text Generation"
+    TEXT_SUMMARIZATION = "Text Summarization"
+    MACHINE_TRANSLATION = "Machine Translation"
+    NAMED_ENTITY_RECOGNITION = "Named Entity Recognition"
+    TABULAR_REGRESSION = "Regression"
+    TABULAR_CLASSIFICATION = "Classification"
+
 
 JUMPSTART_REGION = "eu-west-2"
 SDK_MANIFEST_FILE = "models_manifest.json"
 JUMPSTART_BUCKET_BASE_URL = "https://jumpstart-cache-prod-{}.s3.{}.amazonaws.com".format(
     JUMPSTART_REGION, JUMPSTART_REGION
 )
+TASK_MAP = {
+    Tasks.IC: ProblemTypes.IMAGE_CLASSIFICATION,
+    Tasks.IC_EMBEDDING: ProblemTypes.IMAGE_EMBEDDING,
+    Tasks.OD: ProblemTypes.OBJECT_DETECTION,
+    Tasks.OD1: ProblemTypes.OBJECT_DETECTION,
+    Tasks.SEMSEG: ProblemTypes.SEMANTIC_SEGMENTATION,
+    Tasks.IS: ProblemTypes.INSTANCE_SEGMENTATION,
+    Tasks.TC: ProblemTypes.TEXT_CLASSIFICATION,
+    Tasks.TC_EMBEDDING: ProblemTypes.TEXT_EMBEDDING,
+    Tasks.EQA: ProblemTypes.QUESTION_ANSWERING,
+    Tasks.SPC: ProblemTypes.SENTENCE_PAIR_CLASSIFICATION,
+    Tasks.TEXT_GENERATION: ProblemTypes.TEXT_GENERATION,
+    Tasks.SUMMARIZATION: ProblemTypes.TEXT_SUMMARIZATION,
+    Tasks.TRANSLATION: ProblemTypes.MACHINE_TRANSLATION,
+    Tasks.NER: ProblemTypes.NAMED_ENTITY_RECOGNITION,
+    Tasks.TABULAR_REGRESSION: ProblemTypes.TABULAR_REGRESSION,
+    Tasks.TABULAR_CLASSIFICATION: ProblemTypes.TABULAR_CLASSIFICATION,
+}
 
 
 def get_jumpstart_sdk_manifest():
@@ -36,6 +97,11 @@ def get_jumpstart_sdk_spec(key):
     return json.loads(model_spec)
 
 
+def get_model_task(id):
+    task_short = id.split("-")[1]
+    return TASK_MAP[task_short] if task_short in TASK_MAP else "Source"
+
+
 def create_jumpstart_model_table():
     sdk_manifest = get_jumpstart_sdk_manifest()
     sdk_manifest_top_versions_for_models = {}
@@ -56,9 +122,9 @@ def create_jumpstart_model_table():
     file_content.append("==================================\n")
     file_content.append(
         """
-    JumpStart for the SageMaker Python SDK uses model ids and model versions to access the necessary
+    JumpStart for the SageMaker Python SDK uses model IDs and model versions to access the necessary
     utilities. This table serves to provide the core material plus some extra information that can be useful
-    in selecting the correct model id and corresponding parameters.\n
+    in selecting the correct model ID and corresponding parameters.\n
     """
     )
     file_content.append(
@@ -69,26 +135,29 @@ def create_jumpstart_model_table():
     )
     file_content.append(
         """
-    Each model id is linked to an external page that describes the model.\n
+    Click on the Problem Type to navigate to the source of the model.\n
     """
     )
     file_content.append("\n")
     file_content.append(".. list-table:: Available Models\n")
-    file_content.append("   :widths: 50 20 20 20\n")
+    file_content.append("   :widths: 50 20 20 20 30\n")
     file_content.append("   :header-rows: 1\n")
     file_content.append("   :class: datatable\n")
     file_content.append("\n")
     file_content.append("   * - Model ID\n")
     file_content.append("     - Fine Tunable?\n")
     file_content.append("     - Latest Version\n")
     file_content.append("     - Min SDK Version\n")
+    file_content.append("     - Problem Type/Source\n")
 
     for model in sdk_manifest_top_versions_for_models.values():
         model_spec = get_jumpstart_sdk_spec(model["spec_key"])
-        file_content.append("   * - `{} <{}>`_\n".format(model_spec["model_id"], model_spec["url"]))
+        model_task = get_model_task(model_spec["model_id"])
+        file_content.append("   * - {}\n".format(model_spec["model_id"]))
         file_content.append("     - {}\n".format(model_spec["training_supported"]))
         file_content.append("     - {}\n".format(model["version"]))
         file_content.append("     - {}\n".format(model["min_version"]))
+        file_content.append("     - `{} <{}>`__\n".format(model_task, model_spec["url"]))
 
     f = open("doc_utils/jumpstart.rst", "w")
     f.writelines(file_content)
@@ -670,7 +670,7 @@ the ``model_id`` and ``model_version`` needed to retrieve the URI.
       model. To use the latest version, enter ``"*"``. This is a
       required parameter.
 
-To retrieve a model, first select a ``model id`` and ``version`` from
+To retrieve a model, first select a ``model ID`` and ``version`` from
 the :doc:`available models <./doc_utils/jumpstart>`.
 
 .. code:: python
 
@@ -457,7 +457,7 @@ def __init__(
         self._hyperparameters = hyperparameters.copy() if hyperparameters else {}
         self.code_location = code_location
         self.entry_point = entry_point
-        self.dependencies = dependencies
+        self.dependencies = dependencies or []
         self.uploaded_code = None
         self.tags = add_jumpstart_tags(
             tags=tags, training_model_uri=self.model_uri, training_script_uri=self.source_dir
 
@@ -2,7 +2,8 @@
     "training": {
         "processors": ["gpu"],
         "version_aliases": {
-            "4.11": "4.11.0"
+            "4.11": "4.11.0",
+            "4.17": "4.17.0"
         },
         "versions": {
             "4.11.0": {
@@ -32,6 +33,68 @@
                     "repository": "huggingface-tensorflow-trcomp-training",
                     "container_version": {"gpu":"cu112-ubuntu18.04"}
                 }
+            },
+            "4.17.0": {
+                "version_aliases": {
+                    "pytorch1.10": "pytorch1.10.2",
+                    "tensorflow2.6": "tensorflow2.6.3"
+                },
+                "pytorch1.10.2": {
+                    "py_versions": ["py38"],
+                    "registries": {
+                        "af-south-1": "626614931356",
+                        "ap-east-1": "871362719292",
+                        "ap-northeast-1": "763104351884",
+                        "ap-northeast-2": "763104351884",
+                        "ap-northeast-3": "364406365360",
+                        "ap-south-1": "763104351884",
+                        "ap-southeast-1": "763104351884",
+                        "ap-southeast-2": "763104351884",
+                        "ca-central-1": "763104351884",
+                        "eu-central-1": "763104351884",
+                        "eu-north-1": "763104351884",
+                        "eu-south-1": "692866216735",
+                        "eu-west-1": "763104351884",
+                        "eu-west-2": "763104351884",
+                        "eu-west-3": "763104351884",
+                        "me-south-1": "217643126080",
+                        "sa-east-1": "763104351884",
+                        "us-east-1": "763104351884",
+                        "us-east-2": "763104351884",
+                        "us-west-1": "763104351884",
+                        "us-west-2": "763104351884"
+                    },
+                    "repository": "huggingface-pytorch-trcomp-training",
+                    "container_version": {"gpu":"cu113-ubuntu20.04"}
+                },
+                "tensorflow2.6.3": {
+                    "py_versions": ["py38"],
+                    "registries": {
+                        "af-south-1": "626614931356",
+                        "ap-east-1": "871362719292",
+                        "ap-northeast-1": "763104351884",
+                        "ap-northeast-2": "763104351884",
+                        "ap-northeast-3": "364406365360",
+                        "ap-south-1": "763104351884",
+                        "ap-southeast-1": "763104351884",
+                        "ap-southeast-2": "763104351884",
+                        "ca-central-1": "763104351884",
+                        "eu-central-1": "763104351884",
+                        "eu-north-1": "763104351884",
+                        "eu-south-1": "692866216735",
+                        "eu-west-1": "763104351884",
+                        "eu-west-2": "763104351884",
+                        "eu-west-3": "763104351884",
+                        "me-south-1": "217643126080",
+                        "sa-east-1": "763104351884",
+                        "us-east-1": "763104351884",
+                        "us-east-2": "763104351884",
+                        "us-west-1": "763104351884",
+                        "us-west-2": "763104351884"
+                    },
+                    "repository": "huggingface-tensorflow-trcomp-training",
+                    "container_version": {"gpu":"cu112-ubuntu20.04"}
+                }
             }
         }
     }