Merge branch 'master' into framework-processor-python3

ahsan-z-khan · web-flow · commit 30417db600f9 · 2021-08-15T21:23:30.000-07:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,30 @@
 # Changelog
 
+## v2.53.0 (2021-08-12)
+
+### Features
+
+ * support tuning step parameter range parameterization + support retry strategy in tuner
+
+## v2.52.2.post0 (2021-08-11)
+
+### Documentation Changes
+
+ * clarify that default_bucket creates a bucket
+ * Minor updates to Clarify API documentation
+
+## v2.52.2 (2021-08-10)
+
+### Bug Fixes and Other Changes
+
+ * sklearn integ tests, remove swallowing exception on feature group delete attempt
+ * sklearn integ test for custom bucket
+
+### Documentation Changes
+
+ * Fix dataset_definition links
+ * Document LambdaModel and LambdaPredictor classes
+
 ## v2.52.1 (2021-08-06)
 
 ### Bug Fixes and Other Changes
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.52.2.dev0
+2.53.1.dev0
diff --git a/doc/workflows/pipelines/sagemaker.workflow.pipelines.rst b/doc/workflows/pipelines/sagemaker.workflow.pipelines.rst
@@ -5,7 +5,6 @@ ConditionStep
 -------------
 
 .. autoclass:: sagemaker.workflow.condition_step.ConditionStep
-
 .. deprecated:: sagemaker.workflow.condition_step.JsonGet
 
 Conditions
diff --git a/src/sagemaker/clarify.py b/src/sagemaker/clarify.py
@@ -48,12 +48,17 @@ def __init__(
             headers (list[str]): A list of column names in the input dataset.
             features (str): JSONPath for locating the feature columns for bias metrics if the
                 dataset format is JSONLines.
-            dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV
-                and "application/jsonlines" for JSONLines.
+            dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV,
+                "application/jsonlines" for JSONLines, and "application/x-parquet" for Parquet.
             s3_data_distribution_type (str): Valid options are "FullyReplicated" or
                 "ShardedByS3Key".
             s3_compression_type (str): Valid options are "None" or "Gzip".
         """
+        if dataset_type not in ["text/csv", "application/jsonlines", "application/x-parquet"]:
+            raise ValueError(
+                f"Invalid dataset_type '{dataset_type}'."
+                f" Please check the API documentation for the supported dataset types."
+            )
         self.s3_data_input_path = s3_data_input_path
         self.s3_output_path = s3_output_path
         self.s3_data_distribution_type = s3_data_distribution_type
@@ -508,7 +513,7 @@ def run_pre_training_bias(
         kms_key=None,
         experiment_config=None,
     ):
-        """Runs a ProcessingJob to compute the requested bias 'methods' of the input data.
+        """Runs a ProcessingJob to compute the pre-training bias methods of the input data.
 
         Computes the requested methods that compare 'methods' (e.g. fraction of examples) for the
         sensitive group vs the other examples.
@@ -517,14 +522,14 @@ def run_pre_training_bias(
             data_config (:class:`~sagemaker.clarify.DataConfig`): Config of the input/output data.
             data_bias_config (:class:`~sagemaker.clarify.BiasConfig`): Config of sensitive groups.
             methods (str or list[str]): Selector of a subset of potential metrics:
-                ["`CI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ci.html>`_",
-                "`DPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-dpl.html>`_",
-                "`KL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-kl.html>`_",
-                "`JS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-js.html>`_",
-                "`LP <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-lp.html>`_",
-                "`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-tvd.html>`_",
-                "`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ks.html>`_",
-                "`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-cdd.html>`_"].
+                ["`CI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-bias-metric-class-imbalance.html>`_",
+                "`DPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-true-label-imbalance.html>`_",
+                "`KL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kl-divergence.html>`_",
+                "`JS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-jensen-shannon-divergence.html>`_",
+                "`LP <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-lp-norm.html>`_",
+                "`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-total-variation-distance.html>`_",
+                "`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kolmogorov-smirnov.html>`_",
+                "`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-cddl.html>`_"].
                 Defaults to computing all.
             wait (bool): Whether the call should wait until the job completes (default: True).
             logs (bool): Whether to show the logs produced by the job.
@@ -538,7 +543,7 @@ def run_pre_training_bias(
             experiment_config (dict[str, str]): Experiment management configuration.
                 Dictionary contains three optional keys:
                 'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
-        """
+        """  # noqa E501
         analysis_config = data_config.get_config()
         analysis_config.update(data_bias_config.get_config())
         analysis_config["methods"] = {"pre_training_bias": {"methods": methods}}
@@ -562,7 +567,7 @@ def run_post_training_bias(
         kms_key=None,
         experiment_config=None,
     ):
-        """Runs a ProcessingJob to compute the requested bias 'methods' of the model predictions.
+        """Runs a ProcessingJob to compute the post-training bias methods of the model predictions.
 
         Spins up a model endpoint, runs inference over the input example in the
         's3_data_input_path' to obtain predicted labels. Computes a the requested methods that
@@ -633,12 +638,11 @@ def run_bias(
         kms_key=None,
         experiment_config=None,
     ):
-        """Runs a ProcessingJob to compute the requested bias 'methods' of the model predictions.
+        """Runs a ProcessingJob to compute the requested bias methods.
 
-        Spins up a model endpoint, runs inference over the input example in the
-        's3_data_input_path' to obtain predicted labels. Computes a the requested methods that
-        compare 'methods' (e.g. accuracy, precision, recall) for the sensitive group vs the other
-        examples.
+        It computes the metrics of both the pre-training methods and the post-training methods.
+        To calculate post-training methods, it needs to spin up a model endpoint, runs inference
+        over the input example in the 's3_data_input_path' to obtain predicted labels.
 
         Args:
             data_config (:class:`~sagemaker.clarify.DataConfig`): Config of the input/output data.
@@ -648,14 +652,14 @@ def run_bias(
             model_predicted_label_config (:class:`~sagemaker.clarify.ModelPredictedLabelConfig`):
                 Config of how to extract the predicted label from the model output.
             pre_training_methods (str or list[str]): Selector of a subset of potential metrics:
-                ["`CI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ci.html>`_",
-                "`DPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-dpl.html>`_",
-                "`KL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-kl.html>`_",
-                "`JS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-js.html>`_",
-                "`LP <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-lp.html>`_",
-                "`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-tvd.html>`_",
-                "`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ks.html>`_",
-                "`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-cdd.html>`_"].
+                ["`CI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-bias-metric-class-imbalance.html>`_",
+                "`DPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-true-label-imbalance.html>`_",
+                "`KL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kl-divergence.html>`_",
+                "`JS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-jensen-shannon-divergence.html>`_",
+                "`LP <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-lp-norm.html>`_",
+                "`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-total-variation-distance.html>`_",
+                "`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kolmogorov-smirnov.html>`_",
+                "`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-cddl.html>`_"].
                 Defaults to computing all.
             post_training_methods (str or list[str]): Selector of a subset of potential metrics:
                 ["`DPPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-dppl.html>`_"
@@ -682,7 +686,7 @@ def run_bias(
             experiment_config (dict[str, str]): Experiment management configuration.
                 Dictionary contains three optional keys:
                 'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
-        """
+        """  # noqa E501
         analysis_config = data_config.get_config()
         analysis_config.update(bias_config.get_config())
         analysis_config["predictor"] = model_config.get_predictor_config()
diff --git a/src/sagemaker/dataset_definition/inputs.py b/src/sagemaker/dataset_definition/inputs.py
@@ -99,9 +99,9 @@ class DatasetDefinition(ApiObject):
             Definition inputs to run a processing job. LocalPath is an absolute path to the input
             data. This is a required parameter when `AppManaged` is False (default).
         redshift_dataset_definition
-            (:class:`~sagemaker.dataset_definition.RedshiftDatasetDefinition`): Redshift
+            (:class:`~sagemaker.dataset_definition.inputs.RedshiftDatasetDefinition`): Redshift
             dataset definition.
-        athena_dataset_definition (:class:`~sagemaker.dataset_definition.AthenaDatasetDefinition`):
+        athena_dataset_definition (:class:`~sagemaker.dataset_definition.inputs.AthenaDatasetDefinition`):
             Configuration for Athena Dataset Definition input.
     """
 
diff --git a/src/sagemaker/image_uri_config/pytorch.json b/src/sagemaker/image_uri_config/pytorch.json
@@ -4,16 +4,15 @@
             "cpu"
         ],
         "version_aliases": {
-            "1.3": "1.3.1"
+            "1.3": "1.3.1",
+            "1.5": "1.5.1"
         },
         "versions": {
             "1.3.1": {
                 "py_versions": [
                     "py3"
                 ],
                 "registries": {
-                    "af-south-1": "626614931356",
-                    "ap-east-1": "871362719292",
                     "ap-northeast-1": "763104351884",
                     "ap-northeast-2": "763104351884",
                     "ap-northeast-3": "364406365360",
@@ -26,16 +25,22 @@
                     "eu-central-1": "763104351884",
                     "eu-north-1": "763104351884",
                     "eu-west-1": "763104351884",
-                    "eu-west-2": "763104351884",
-                    "eu-west-3": "763104351884",
-                    "eu-south-1": "692866216735",
-                    "me-south-1": "217643126080",
-                    "sa-east-1": "763104351884",
                     "us-east-1": "763104351884",
                     "us-east-2": "763104351884",
-                    "us-gov-west-1": "442386744353",
-                    "us-iso-east-1": "886529160074",
-                    "us-west-1": "763104351884",
+                    "us-west-2": "763104351884"
+                },
+                "repository": "pytorch-inference-eia"
+            },
+            "1.5.1": {
+                "py_versions": [
+                    "py3"
+                ],
+                "registries": {
+                    "ap-northeast-1": "763104351884",
+                    "ap-northeast-2": "763104351884",
+                    "eu-west-1": "763104351884",
+                    "us-east-1": "763104351884",
+                    "us-east-2": "763104351884",
                     "us-west-2": "763104351884"
                 },
                 "repository": "pytorch-inference-eia"
diff --git a/src/sagemaker/local/local_session.py b/src/sagemaker/local/local_session.py
@@ -571,6 +571,21 @@ def logs_for_job(self, job_name, wait=False, poll=5, log_type="All"):
         # on local mode.
         pass  # pylint: disable=unnecessary-pass
 
+    def logs_for_processing_job(self, job_name, wait=False, poll=10):
+        """A no-op method meant to override the sagemaker client.
+
+        Args:
+          job_name:
+          wait:  (Default value = False)
+          poll:  (Default value = 10)
+
+        Returns:
+
+        """
+        # override logs_for_job() as it doesn't need to perform any action
+        # on local mode.
+        pass  # pylint: disable=unnecessary-pass
+
 
 class file_input(object):
     """Amazon SageMaker channel configuration for FILE data sources, used in local mode."""
diff --git a/src/sagemaker/parameter.py b/src/sagemaker/parameter.py
@@ -12,7 +12,9 @@
 # language governing permissions and limitations under the License.
 """Placeholder docstring"""
 from __future__ import absolute_import
+
 import json
+from sagemaker.workflow.parameters import Parameter as PipelineParameter
 
 
 class ParameterRange(object):
@@ -68,8 +70,12 @@ def as_tuning_range(self, name):
         """
         return {
             "Name": name,
-            "MinValue": str(self.min_value),
-            "MaxValue": str(self.max_value),
+            "MinValue": str(self.min_value)
+            if not isinstance(self.min_value, PipelineParameter)
+            else self.min_value,
+            "MaxValue": str(self.max_value)
+            if not isinstance(self.max_value, PipelineParameter)
+            else self.max_value,
             "ScalingType": self.scaling_type,
         }
 
@@ -103,9 +109,9 @@ def __init__(self, values):  # pylint: disable=super-init-not-called
                 This input will be converted into a list of strings.
         """
         if isinstance(values, list):
-            self.values = [str(v) for v in values]
+            self.values = [str(v) if not isinstance(v, PipelineParameter) else v for v in values]
         else:
-            self.values = [str(values)]
+            self.values = [str(values) if not isinstance(values, PipelineParameter) else values]
 
     def as_tuning_range(self, name):
         """Represent the parameter range as a dictionary.
diff --git a/src/sagemaker/processing.py b/src/sagemaker/processing.py
@@ -1066,9 +1066,9 @@ def __init__(
             s3_data_distribution_type (str): Valid options are "FullyReplicated"
                 or "ShardedByS3Key".
             s3_compression_type (str): Valid options are "None" or "Gzip".
-            s3_input (:class:`~sagemaker.dataset_definition.S3Input`)
+            s3_input (:class:`~sagemaker.dataset_definition.inputs.S3Input`)
                 Metadata of data objects stored in S3
-            dataset_definition (:class:`~sagemaker.dataset_definition.DatasetDefinition`)
+            dataset_definition (:class:`~sagemaker.dataset_definition.inputs.DatasetDefinition`)
                 DatasetDefinition input
             app_managed (bool): Whether the input are managed by SageMaker or application
         """
diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py
@@ -357,6 +357,8 @@ def list_s3_files(self, bucket, key_prefix):
     def default_bucket(self):
         """Return the name of the default bucket to use in relevant Amazon SageMaker interactions.
 
+        This function will create the s3 bucket if it does not exist.
+
         Returns:
             str: The name of the default bucket, which is of the form:
                 ``sagemaker-{region}-{AWS account ID}``.
@@ -2211,6 +2213,7 @@ def _map_training_config(
         use_spot_instances=False,
         checkpoint_s3_uri=None,
         checkpoint_local_path=None,
+        max_retry_attempts=None,
     ):
         """Construct a dictionary of training job configuration from the arguments.
 
@@ -2264,6 +2267,7 @@ def _map_training_config(
             objective_metric_name (str): Name of the metric for evaluating training jobs.
             parameter_ranges (dict): Dictionary of parameter ranges. These parameter ranges can
                 be one of three types: Continuous, Integer, or Categorical.
+            max_retry_attempts (int): The number of times to retry the job.
 
         Returns:
             A dictionary of training job configuration. For format details, please refer to
@@ -2320,6 +2324,8 @@ def _map_training_config(
         if parameter_ranges is not None:
             training_job_definition["HyperParameterRanges"] = parameter_ranges
 
+        if max_retry_attempts is not None:
+            training_job_definition["RetryStrategy"] = {"MaximumRetryAttempts": max_retry_attempts}
         return training_job_definition
 
     def stop_tuning_job(self, name):
diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py
@@ -1507,7 +1507,10 @@ def _get_tuner_args(cls, tuner, inputs):
 
         if tuner.estimator is not None:
             tuner_args["training_config"] = cls._prepare_training_config(
-                inputs, tuner.estimator, tuner.static_hyperparameters, tuner.metric_definitions
+                inputs=inputs,
+                estimator=tuner.estimator,
+                static_hyperparameters=tuner.static_hyperparameters,
+                metric_definitions=tuner.metric_definitions,
             )
 
         if tuner.estimator_dict is not None:
@@ -1580,6 +1583,9 @@ def _prepare_training_config(
         if parameter_ranges is not None:
             training_config["parameter_ranges"] = parameter_ranges
 
+        if estimator.max_retry_attempts is not None:
+            training_config["max_retry_attempts"] = estimator.max_retry_attempts
+
         return training_config
 
     def stop(self):
diff --git a/src/sagemaker/workflow/pipeline.py b/src/sagemaker/workflow/pipeline.py
@@ -320,6 +320,7 @@ def _interpolate(
     """
     if isinstance(obj, (Expression, Parameter, Properties)):
         return obj.expr
+
     if isinstance(obj, CallbackOutput):
         step_name = callback_output_to_step_map[obj.output_name]
         return obj.expr(step_name)
diff --git a/tests/integ/test_feature_store.py b/tests/integ/test_feature_store.py
@@ -312,4 +312,4 @@ def cleanup_feature_group(feature_group: FeatureGroup):
         try:
             feature_group.delete()
         except Exception:
-            pass
+            raise RuntimeError(f"Failed to delete feature group with name {feature_group.name}")
diff --git a/tests/integ/test_processing.py b/tests/integ/test_processing.py
@@ -162,6 +162,7 @@ def test_sklearn_with_customizations(
     sklearn_processor = SKLearnProcessor(
         framework_version=sklearn_latest_version,
         role=ROLE,
+        command=["python3"],
         instance_type=cpu_instance_type,
         instance_count=1,
         volume_size_in_gb=100,
@@ -685,6 +686,7 @@ def test_sklearn_with_network_config(sagemaker_session, sklearn_latest_version,
     sklearn_processor = SKLearnProcessor(
         framework_version=sklearn_latest_version,
         role=ROLE,
+        command=["python3"],
         instance_type=cpu_instance_type,
         instance_count=1,
         sagemaker_session=sagemaker_session,
diff --git a/tests/integ/test_workflow.py b/tests/integ/test_workflow.py
diff --git a/tests/unit/sagemaker/image_uris/test_dlc_frameworks.py b/tests/unit/sagemaker/image_uris/test_dlc_frameworks.py
diff --git a/tests/unit/sagemaker/workflow/test_steps.py b/tests/unit/sagemaker/workflow/test_steps.py
diff --git a/tests/unit/test_clarify.py b/tests/unit/test_clarify.py
diff --git a/tests/unit/test_local_session.py b/tests/unit/test_local_session.py