aws
diff --git a/‎.github/ISSUE_TEMPLATE/config.yml
Lines changed: 2 additions & 2 deletions b/‎.github/ISSUE_TEMPLATE/config.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎CHANGELOG.md
Lines changed: 29 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 29 additions & 0 deletions
diff --git a/‎VERSION
Lines changed: 1 addition & 1 deletion b/‎VERSION
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/api/training/sdp_versions/latest/smd_data_parallel_tensorflow.rst
Lines changed: 3 additions & 1 deletion b/‎doc/api/training/sdp_versions/latest/smd_data_parallel_tensorflow.rst
Lines changed: 3 additions & 1 deletion
diff --git a/‎doc/api/training/sdp_versions/v1.0.0/smd_data_parallel_tensorflow.rst
Lines changed: 3 additions & 1 deletion b/‎doc/api/training/sdp_versions/v1.0.0/smd_data_parallel_tensorflow.rst
Lines changed: 3 additions & 1 deletion
diff --git a/‎doc/api/training/sdp_versions/v1.1.x/smd_data_parallel_tensorflow.rst
Lines changed: 3 additions & 1 deletion b/‎doc/api/training/sdp_versions/v1.1.x/smd_data_parallel_tensorflow.rst
Lines changed: 3 additions & 1 deletion
diff --git a/‎doc/frameworks/sklearn/using_sklearn.rst
Lines changed: 4 additions & 0 deletions b/‎doc/frameworks/sklearn/using_sklearn.rst
Lines changed: 4 additions & 0 deletions
diff --git a/‎doc/frameworks/xgboost/using_xgboost.rst
Lines changed: 4 additions & 0 deletions b/‎doc/frameworks/xgboost/using_xgboost.rst
Lines changed: 4 additions & 0 deletions
diff --git a/‎doc/overview.rst
Lines changed: 2 additions & 2 deletions b/‎doc/overview.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/sagemaker/estimator.py
Lines changed: 31 additions & 0 deletions b/‎src/sagemaker/estimator.py
Lines changed: 31 additions & 0 deletions
diff --git a/‎src/sagemaker/image_uri_config/data-wrangler.json
Lines changed: 33 additions & 0 deletions b/‎src/sagemaker/image_uri_config/data-wrangler.json
Lines changed: 33 additions & 0 deletions
diff --git a/‎src/sagemaker/processing.py
Lines changed: 26 additions & 11 deletions b/‎src/sagemaker/processing.py
Lines changed: 26 additions & 11 deletions
@@ -1,5 +1,5 @@
 blank_issues_enabled: false
 contact_links:
   - name: Ask a question
-    url: https://stackoverflow.com/questions/tagged/amazon-sagemaker
-    about: Use Stack Overflow to ask and answer questions
+    url: https://github.com/aws/sagemaker-python-sdk/discussions
+    about: Use GitHub Discussions to ask and answer questions
@@ -1,5 +1,34 @@
 # Changelog
 
+## v2.41.0 (2021-05-17)
+
+### Features
+
+ * add pipeline experiment config
+ * add data wrangler processor
+ * support RetryStrategy for training jobs
+
+### Bug Fixes and Other Changes
+
+ * fix repack pipeline step by putting inference.py in "code" sub dir
+ * add data wrangler image uri
+ * fix black-check errors
+
+## v2.40.0 (2021-05-11)
+
+### Features
+
+ * add xgboost framework version 1.2-2
+
+### Bug Fixes and Other Changes
+
+ * fix get_execution_role on Studio
+ * [fix] Check py_version existence in RegisterModel step
+
+### Documentation Changes
+
+ * SM Distributed EFA Launch
+
 ## v2.39.1 (2021-05-05)
 
 ### Bug Fixes and Other Changes
 
@@ -1 +1 @@
-2.39.2.dev0
+2.41.1.dev0
@@ -443,7 +443,7 @@ TensorFlow API
 
       *   Supported compression types - ``none``, ``fp16``
 
-   - ``sparse_as_dense:`` Not supported. Raises not supported error.
+   - ``sparse_as_dense:`` Treats sparse gradient tensor as dense tensor. Defaults to ``False``.
 
    - ``op (smdistributed.dataparallel.tensorflow.ReduceOp)(optional)``: The reduction operation to combine tensors across different ranks. Defaults to ``Average`` if None is given.
 
@@ -482,6 +482,8 @@ TensorFlow API
 
       *   Supported compression types - ``none``, ``fp16``
 
+   - ``sparse_as_dense:`` Treats sparse gradient tensor as dense tensor. Defaults to ``False``.
+
    - ``op (smdistributed.dataparallel.tensorflow.ReduceOp)(optional)``: The reduction operation to combine tensors across different ranks. Defaults to ``Average`` if None is given.
 
       *  Supported ops: ``AVERAGE``
 
@@ -456,7 +456,7 @@ TensorFlow API
 
       *   Supported compression types - ``none``, ``fp16``
 
-   - ``sparse_as_dense:`` Not supported. Raises not supported error.
+   - ``sparse_as_dense:`` Treats sparse gradient tensor as dense tensor. Defaults to ``False``.
 
    - ``op (smdistributed.dataparallel.tensorflow.ReduceOp)(optional)``: The reduction operation to combine tensors across different ranks. Defaults to ``Average`` if None is given.
 
@@ -496,6 +496,8 @@ TensorFlow API
 
       *   Supported compression types - ``none``, ``fp16``
 
+   - ``sparse_as_dense:`` Treats sparse gradient tensor as dense tensor. Defaults to ``False``.
+
    - ``op (smdistributed.dataparallel.tensorflow.ReduceOp)(optional)``: The reduction operation to combine tensors across different ranks. Defaults to ``Average`` if None is given.
 
       *  Supported ops: ``AVERAGE``
 
@@ -459,7 +459,7 @@ library with TensorFlow.
 
       *   Supported compression types - ``none``, ``fp16``
 
-   - ``sparse_as_dense:`` Not supported. Raises not supported error.
+   - ``sparse_as_dense:`` Treats sparse gradient tensor as dense tensor. Defaults to ``False``.
 
    - ``op (smdistributed.dataparallel.tensorflow.ReduceOp)(optional)``: The reduction operation to combine tensors across different ranks. Defaults to ``Average`` if None is given.
 
@@ -499,6 +499,8 @@ library with TensorFlow.
 
       *   Supported compression types - ``none``, ``fp16``
 
+   - ``sparse_as_dense:`` Treats sparse gradient tensor as dense tensor. Defaults to ``False``.
+
    - ``op (smdistributed.dataparallel.tensorflow.ReduceOp)(optional)``: The reduction operation to combine tensors across different ranks. Defaults to ``Average`` if None is given.
 
       *  Supported ops: ``AVERAGE``
 
@@ -84,6 +84,10 @@ inadvertently run your training code at the wrong point in execution.
 
 For more on training environment variables, please visit https://github.com/aws/sagemaker-containers.
 
+.. important::
+    The sagemaker-containers repository has been deprecated,
+    however it is still used to define Scikit-learn and XGBoost environment variables.
+
 Save the Model
 --------------
 
 
@@ -88,6 +88,10 @@ but you can access useful properties about the training environment through vari
 
 For the exhaustive list of available environment variables, see the `SageMaker Containers documentation <https://github.com/aws/sagemaker-containers#list-of-provided-environment-variables-by-sagemaker-containers>`__.
 
+.. important::
+    The sagemaker-containers repository has been deprecated,
+    however it is still used to define Scikit-learn and XGBoost environment variables.
+
 Let's look at the main elements of the script. Starting with the ``__main__`` guard,
 use a parser to read the hyperparameters passed to the estimator when creating the training job.
 These hyperparameters are made available as arguments to our input script.
 
@@ -374,7 +374,7 @@ Here are examples of how to use Amazon FSx for Lustre as input for training:
 
         file_system_input = FileSystemInput(file_system_id='fs-2',
                                             file_system_type='FSxLustre',
-                                            directory_path='/fsx/tensorflow',
+                                            directory_path='/<mount-id>/tensorflow',
                                             file_system_access_mode='ro')
 
         # Start an Amazon SageMaker training job with FSx using the FileSystemInput class
@@ -394,7 +394,7 @@ Here are examples of how to use Amazon FSx for Lustre as input for training:
 
         records = FileSystemRecordSet(file_system_id='fs-=2,
                                       file_system_type='FSxLustre',
-                                      directory_path='/fsx/kmeans',
+                                      directory_path='/<mount-id>/kmeans',
                                       num_records=784,
                                       feature_dim=784)
 
 
@@ -124,6 +124,7 @@ def __init__(
         profiler_config=None,
         disable_profiler=False,
         environment=None,
+        max_retry_attempts=None,
         **kwargs,
     ):
         """Initialize an ``EstimatorBase`` instance.
@@ -269,6 +270,13 @@ def __init__(
                 will be disabled (default: ``False``).
             environment (dict[str, str]) : Environment variables to be set for
                 use during training job (default: ``None``)
+             max_retry_attempts (int): The number of times to move a job to the STARTING status.
+                You can specify between 1 and 30 attempts.
+                If the value of attempts is greater than zero,
+                the job is retried on InternalServerFailure
+                the same number of attempts as the value.
+                You can cap the total duration for your job by setting ``max_wait`` and ``max_run``
+                (default: ``None``)
 
         """
         instance_count = renamed_kwargs(
@@ -357,6 +365,8 @@ def __init__(
 
         self.environment = environment
 
+        self.max_retry_attempts = max_retry_attempts
+
         if not _region_supports_profiler(self.sagemaker_session.boto_region_name):
             self.disable_profiler = True
 
@@ -1114,6 +1124,13 @@ def _prepare_init_params_from_job_description(cls, job_details, model_channel_na
             if max_wait:
                 init_params["max_wait"] = max_wait
 
+        if job_details.get("RetryStrategy", False):
+            init_params["max_retry_attempts"] = job_details.get("RetryStrategy", {}).get(
+                "MaximumRetryAttempts"
+            )
+            max_wait = job_details.get("StoppingCondition", {}).get("MaxWaitTimeInSeconds")
+            if max_wait:
+                init_params["max_wait"] = max_wait
         return init_params
 
     def transformer(
@@ -1489,6 +1506,11 @@ def _get_train_args(cls, estimator, inputs, experiment_config):
         if estimator.enable_network_isolation():
             train_args["enable_network_isolation"] = True
 
+        if estimator.max_retry_attempts is not None:
+            train_args["retry_strategy"] = {"MaximumRetryAttempts": estimator.max_retry_attempts}
+        else:
+            train_args["retry_strategy"] = None
+
         if estimator.encrypt_inter_container_traffic:
             train_args["encrypt_inter_container_traffic"] = True
 
@@ -1666,6 +1688,7 @@ def __init__(
         profiler_config=None,
         disable_profiler=False,
         environment=None,
+        max_retry_attempts=None,
         **kwargs,
     ):
         """Initialize an ``Estimator`` instance.
@@ -1816,6 +1839,13 @@ def __init__(
                 will be disabled (default: ``False``).
             environment (dict[str, str]) : Environment variables to be set for
                 use during training job (default: ``None``)
+            max_retry_attempts (int): The number of times to move a job to the STARTING status.
+                You can specify between 1 and 30 attempts.
+                If the value of attempts is greater than zero,
+                the job is retried on InternalServerFailure
+                the same number of attempts as the value.
+                You can cap the total duration for your job by setting ``max_wait`` and ``max_run``
+                (default: ``None``)
         """
         self.image_uri = image_uri
         self.hyperparam_dict = hyperparameters.copy() if hyperparameters else {}
@@ -1850,6 +1880,7 @@ def __init__(
             profiler_config=profiler_config,
             disable_profiler=disable_profiler,
             environment=environment,
+            max_retry_attempts=max_retry_attempts,
             **kwargs,
         )
 
 
@@ -0,0 +1,33 @@
+{
+  "processing": {
+    "versions": {
+      "1.x": {
+        "registries": {
+          "af-south-1": "143210264188",
+          "ap-east-1": "707077482487",
+          "ap-northeast-1": "649008135260",
+          "ap-northeast-2": "131546521161",
+          "ap-south-1": "089933028263",
+          "ap-southeast-1": "119527597002",
+          "ap-southeast-2": "422173101802",
+          "ca-central-1": "557239378090",
+          "eu-central-1": "024640144536",
+          "eu-north-1": "054986407534",
+          "eu-south-1": "488287956546",
+          "eu-west-1": "245179582081",
+          "eu-west-2": "894491911112",
+          "eu-west-3": "807237891255",
+          "me-south-1": "376037874950",
+          "sa-east-1": "424196993095",
+          "us-east-1": "663277389841",
+          "us-east-2": "415577184552",
+          "us-west-1": "926135532090",
+          "us-west-2": "174368400705",
+          "cn-north-1": "245909111842",
+          "cn-northwest-1": "249157047649"
+        },
+        "repository": "sagemaker-data-wrangler-container"
+      }
+    }
+  }
+}
@@ -30,7 +30,6 @@
 from sagemaker.local import LocalSession
 from sagemaker.utils import base_name_from_image, name_from_base
 from sagemaker.session import Session
-from sagemaker.network import NetworkConfig  # noqa: F401 # pylint: disable=unused-import
 from sagemaker.workflow.properties import Properties
 from sagemaker.workflow.parameters import Parameter
 from sagemaker.workflow.entities import Expression
@@ -219,14 +218,14 @@ def _normalize_args(
         """
         self._current_job_name = self._generate_current_job_name(job_name=job_name)
 
-        inputs_with_code = self._include_code_in_inputs(inputs, code)
+        inputs_with_code = self._include_code_in_inputs(inputs, code, kms_key)
         normalized_inputs = self._normalize_inputs(inputs_with_code, kms_key)
         normalized_outputs = self._normalize_outputs(outputs)
         self.arguments = arguments
 
         return normalized_inputs, normalized_outputs
 
-    def _include_code_in_inputs(self, inputs, _code):
+    def _include_code_in_inputs(self, inputs, _code, _kms_key):
         """A no op in the base class to include code in the processing job inputs.
 
         Args:
@@ -235,6 +234,8 @@ def _include_code_in_inputs(self, inputs, _code):
                 :class:`~sagemaker.processing.ProcessingInput` objects.
             _code (str): This can be an S3 URI or a local path to a file with the framework
                 script to run (default: None). A no op in the base class.
+            kms_key (str): The ARN of the KMS key that is used to encrypt the
+                user code file (default: None).
 
         Returns:
             list[:class:`~sagemaker.processing.ProcessingInput`]: inputs
@@ -528,7 +529,7 @@ def run(
         if wait:
             self.latest_job.wait(logs=logs)
 
-    def _include_code_in_inputs(self, inputs, code):
+    def _include_code_in_inputs(self, inputs, code, kms_key=None):
         """Converts code to appropriate input and includes in input list.
 
         Side effects include:
@@ -541,12 +542,14 @@ def _include_code_in_inputs(self, inputs, code):
                 :class:`~sagemaker.processing.ProcessingInput` objects.
             code (str): This can be an S3 URI or a local path to a file with the framework
                 script to run (default: None).
+            kms_key (str): The ARN of the KMS key that is used to encrypt the
+                user code file (default: None).
 
         Returns:
             list[:class:`~sagemaker.processing.ProcessingInput`]: inputs together with the
                 code as `ProcessingInput`.
         """
-        user_code_s3_uri = self._handle_user_code_url(code)
+        user_code_s3_uri = self._handle_user_code_url(code, kms_key)
         user_script_name = self._get_user_code_name(code)
 
         inputs_with_code = self._convert_code_and_add_to_inputs(inputs, user_code_s3_uri)
@@ -567,14 +570,16 @@ def _get_user_code_name(self, code):
         code_url = urlparse(code)
         return os.path.basename(code_url.path)
 
-    def _handle_user_code_url(self, code):
+    def _handle_user_code_url(self, code, kms_key=None):
         """Gets the S3 URL containing the user's code.
 
            Inspects the scheme the customer passed in ("s3://" for code in S3, "file://" or nothing
            for absolute or local file paths. Uploads the code to S3 if the code is a local file.
 
         Args:
             code (str): A URL to the customer's code.
+            kms_key (str): The ARN of the KMS key that is used to encrypt the
+                user code file (default: None).
 
         Returns:
             str: The S3 URL to the customer's code.
@@ -603,7 +608,7 @@ def _handle_user_code_url(self, code):
                         code
                     )
                 )
-            user_code_s3_uri = self._upload_code(code_path)
+            user_code_s3_uri = self._upload_code(code_path, kms_key)
         else:
             raise ValueError(
                 "code {} url scheme {} is not recognized. Please pass a file path or S3 url".format(
@@ -612,11 +617,13 @@ def _handle_user_code_url(self, code):
             )
         return user_code_s3_uri
 
-    def _upload_code(self, code):
+    def _upload_code(self, code, kms_key=None):
         """Uploads a code file or directory specified as a string and returns the S3 URI.
 
         Args:
             code (str): A file or directory to be uploaded to S3.
+            kms_key (str): The ARN of the KMS key that is used to encrypt the
+                user code file (default: None).
 
         Returns:
             str: The S3 URI of the uploaded file or directory.
@@ -630,7 +637,10 @@ def _upload_code(self, code):
             self._CODE_CONTAINER_INPUT_NAME,
         )
         return s3.S3Uploader.upload(
-            local_path=code, desired_s3_uri=desired_s3_uri, sagemaker_session=self.sagemaker_session
+            local_path=code,
+            desired_s3_uri=desired_s3_uri,
+            kms_key=kms_key,
+            sagemaker_session=self.sagemaker_session,
         )
 
     def _convert_code_and_add_to_inputs(self, inputs, s3_uri):
@@ -666,7 +676,9 @@ def _set_entrypoint(self, command, user_script_name):
         """
         user_script_location = str(
             pathlib.PurePosixPath(
-                self._CODE_CONTAINER_BASE_PATH, self._CODE_CONTAINER_INPUT_NAME, user_script_name
+                self._CODE_CONTAINER_BASE_PATH,
+                self._CODE_CONTAINER_INPUT_NAME,
+                user_script_name,
             )
         )
         self.entrypoint = command + [user_script_location]
@@ -1066,7 +1078,10 @@ def _to_request_dict(self):
         """Generates a request dictionary using the parameters provided to the class."""
 
         # Create the request dictionary.
-        s3_input_request = {"InputName": self.input_name, "AppManaged": self.app_managed}
+        s3_input_request = {
+            "InputName": self.input_name,
+            "AppManaged": self.app_managed,
+        }
 
         if self.s3_input:
             # Check the compression type, then add it to the dictionary.