Merge branch 'master' into accept-step-object-in-dependson-list

ahsan-z-khan · web-flow · commit 5c45e2c97a21 · 2021-07-17T18:01:25.000-07:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,17 @@
 # Changelog
 
+## v2.49.0 (2021-07-15)
+
+### Features
+
+ * Adding serial inference pipeline support to RegisterModel Step
+
+### Documentation Changes
+
+ * add tuning step get_top_model_s3_uri and callback step to doc
+ * links for HF in sdk
+ * Add Clarify module to Model Monitoring API docs
+
 ## v2.48.2 (2021-07-12)
 
 ### Bug Fixes and Other Changes
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.48.3.dev0
+2.49.1.dev0
diff --git a/src/sagemaker/clarify.py b/src/sagemaker/clarify.py
@@ -88,21 +88,34 @@ def __init__(
         Args:
             label_values_or_threshold (Any): List of label values or threshold to indicate positive
                 outcome used for bias metrics.
-            facet_name (str): Sensitive attribute in the input data for which we like to compare
-                metrics.
+            facet_name (str or [str]): String or List of strings of sensitive attribute(s) in the
+            input data for which we like to compare metrics.
             facet_values_or_threshold (list): Optional list of values to form a sensitive group or
                 threshold for a numeric facet column that defines the lower bound of a sensitive
                 group. Defaults to considering each possible value as sensitive group and
                 computing metrics vs all the other examples.
+                If facet_name is a list, this needs to be None or a List consisting of lists or None
+                with the same length as facet_name list.
             group_name (str): Optional column name or index to indicate a group column to be used
                 for the bias metric 'Conditional Demographic Disparity in Labels - CDDL' or
                 'Conditional Demographic Disparity in Predicted Labels - CDDPL'.
         """
-        facet = {"name_or_index": facet_name}
-        _set(facet_values_or_threshold, "value_or_threshold", facet)
+        if isinstance(facet_name, str):
+            facet = {"name_or_index": facet_name}
+            _set(facet_values_or_threshold, "value_or_threshold", facet)
+            facet_list = [facet]
+        elif facet_values_or_threshold is None or len(facet_name) == len(facet_values_or_threshold):
+            facet_list = []
+            for i, single_facet_name in enumerate(facet_name):
+                facet = {"name_or_index": single_facet_name}
+                if facet_values_or_threshold is not None:
+                    _set(facet_values_or_threshold[i], "value_or_threshold", facet)
+                facet_list.append(facet)
+        else:
+            raise ValueError("Wrong combination of argument values passed")
         self.analysis_config = {
             "label_values_or_threshold": label_values_or_threshold,
-            "facet": [facet],
+            "facet": facet_list,
         }
         _set(group_name, "group_variable", self.analysis_config)
 
diff --git a/src/sagemaker/debugger/__init__.py b/src/sagemaker/debugger/__init__.py
@@ -15,6 +15,7 @@
 
 from sagemaker.debugger.debugger import (  # noqa: F401
     CollectionConfig,
+    DEBUGGER_FLAG,
     DebuggerHookConfig,
     framework_name,
     get_default_profiler_rule,
diff --git a/src/sagemaker/debugger/debugger.py b/src/sagemaker/debugger/debugger.py
@@ -32,6 +32,7 @@
 from sagemaker.utils import build_dict
 
 framework_name = "debugger"
+DEBUGGER_FLAG = "USE_SMDEBUG"
 
 
 def get_rule_container_image_uri(region):
diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
@@ -29,6 +29,7 @@
 from sagemaker.analytics import TrainingJobAnalytics
 from sagemaker.debugger import TensorBoardOutputConfig  # noqa: F401 # pylint: disable=unused-import
 from sagemaker.debugger import (
+    DEBUGGER_FLAG,
     DebuggerHookConfig,
     FrameworkProfile,
     get_default_profiler_rule,
@@ -2269,6 +2270,11 @@ def _validate_and_set_debugger_configs(self):
                     )
                     self.debugger_hook_config = False
 
+        if self.debugger_hook_config is False:
+            if self.environment is None:
+                self.environment = {}
+            self.environment[DEBUGGER_FLAG] = "0"
+
     def _stage_user_code_in_s3(self):
         """Upload the user training script to s3 and return the location.
 
diff --git a/src/sagemaker/huggingface/estimator.py b/src/sagemaker/huggingface/estimator.py
@@ -70,10 +70,10 @@ def __init__(
                 ``image_uri`` is provided. The current supported version is ``4.6.1``.
             tensorflow_version (str): TensorFlow version you want to use for
                 executing your model training code. Defaults to ``None``. Required unless
-                ``pytorch_version`` is provided. The current supported version is ``1.6.0``.
+                ``pytorch_version`` is provided. The current supported version is ``2.4.1``.
             pytorch_version (str): PyTorch version you want to use for
                 executing your model training code. Defaults to ``None``. Required unless
-                ``tensorflow_version`` is provided. The current supported version is ``2.4.1``.
+                ``tensorflow_version`` is provided. The current supported versions are ``1.7.1`` and ``1.6.0``.
             source_dir (str): Path (absolute, relative or an S3 URI) to a directory
                 with any other training source code dependencies aside from the entry
                 point file (default: None). If ``source_dir`` is an S3 URI, it must
diff --git a/src/sagemaker/workflow/step_collections.py b/src/sagemaker/workflow/step_collections.py
@@ -112,8 +112,8 @@ def __init__(
         if "entry_point" in kwargs:
             repack_model = True
             entry_point = kwargs.pop("entry_point", None)
-            source_dir = kwargs.get("source_dir")
-            dependencies = kwargs.get("dependencies")
+            source_dir = kwargs.pop("source_dir", None)
+            dependencies = kwargs.pop("dependencies", None)
             kwargs = dict(**kwargs, output_kms_key=kwargs.pop("model_kms_key", None))
 
             repack_model_step = _RepackModelStep(
@@ -130,13 +130,10 @@ def __init__(
             steps.append(repack_model_step)
             model_data = repack_model_step.properties.ModelArtifacts.S3ModelArtifacts
 
-        # remove kwargs consumed by model repacking step
-        kwargs.pop("entry_point", None)
-        kwargs.pop("source_dir", None)
-        kwargs.pop("dependencies", None)
-        kwargs.pop("output_kms_key", None)
+            # remove kwargs consumed by model repacking step
+            kwargs.pop("output_kms_key", None)
 
-        if model is not None:
+        elif model is not None:
             if isinstance(model, PipelineModel):
                 self.model_list = model.models
                 self.container_def_list = model.pipeline_container_def(inference_instances[0])
@@ -156,7 +153,9 @@ def __init__(
                     entry_point = model_entity.entry_point
                     source_dir = model_entity.source_dir
                     dependencies = model_entity.dependencies
+                    kwargs = dict(**kwargs, output_kms_key=model_entity.model_kms_key)
                     name = model_entity.name or model_entity._framework_name
+
                     repack_model_step = _RepackModelStep(
                         name=f"{name}RepackModel",
                         depends_on=depends_on,
@@ -166,12 +165,16 @@ def __init__(
                         entry_point=entry_point,
                         source_dir=source_dir,
                         dependencies=dependencies,
+                        **kwargs,
                     )
                     steps.append(repack_model_step)
                     model_entity.model_data = (
                         repack_model_step.properties.ModelArtifacts.S3ModelArtifacts
                     )
 
+                    # remove kwargs consumed by model repacking step
+                    kwargs.pop("output_kms_key", None)
+
         register_model_step = _RegisterModelStep(
             name=name,
             estimator=estimator,
diff --git a/tests/integ/test_debugger.py b/tests/integ/test_debugger.py
@@ -18,6 +18,7 @@
 import pytest
 
 from sagemaker.debugger.debugger import (
+    DEBUGGER_FLAG,
     DebuggerHookConfig,
     Rule,
     rule_configs,
@@ -748,6 +749,7 @@ def test_mxnet_with_debugger_hook_config_disabled(
         job_description = mx.latest_training_job.describe()
 
         assert job_description.get("DebugHookConfig") is None
+        assert job_description.get("Environment", {}).get(DEBUGGER_FLAG) == "0"
 
 
 def _get_rule_evaluation_statuses(job_description):
diff --git a/tests/unit/sagemaker/workflow/test_steps.py b/tests/unit/sagemaker/workflow/test_steps.py
@@ -23,7 +23,7 @@
     patch,
 )
 
-from sagemaker.debugger import ProfilerConfig
+from sagemaker.debugger import DEBUGGER_FLAG, ProfilerConfig
 from sagemaker.estimator import Estimator
 from sagemaker.tensorflow import TensorFlow
 from sagemaker.inputs import TrainingInput, TransformInput, CreateModelInput
@@ -275,6 +275,7 @@ def test_training_step_tensorflow(sagemaker_session):
                 "sagemaker_distributed_dataparallel_custom_mpi_options": '""',
             },
             "ProfilerConfig": {"S3OutputPath": "s3://my-bucket/"},
+            "Environment": {DEBUGGER_FLAG: "0"},
         },
         "CacheConfig": {"Enabled": True, "ExpireAfter": "PT1H"},
     }
diff --git a/tests/unit/test_clarify.py b/tests/unit/test_clarify.py
@@ -89,6 +89,54 @@ def test_data_bias_config():
     assert expected_config == data_bias_config.get_config()
 
 
+def test_data_bias_config_multi_facet():
+    label_values = [1]
+    facet_name = ["Facet1", "Facet2"]
+    facet_threshold = [[0], [1, 2]]
+    group_name = "A151"
+
+    data_bias_config = BiasConfig(
+        label_values_or_threshold=label_values,
+        facet_name=facet_name,
+        facet_values_or_threshold=facet_threshold,
+        group_name=group_name,
+    )
+
+    expected_config = {
+        "label_values_or_threshold": label_values,
+        "facet": [
+            {"name_or_index": facet_name[0], "value_or_threshold": facet_threshold[0]},
+            {"name_or_index": facet_name[1], "value_or_threshold": facet_threshold[1]},
+        ],
+        "group_variable": group_name,
+    }
+    assert expected_config == data_bias_config.get_config()
+
+
+def test_data_bias_config_multi_facet_not_all_with_value():
+    label_values = [1]
+    facet_name = ["Facet1", "Facet2"]
+    facet_threshold = [[0], None]
+    group_name = "A151"
+
+    data_bias_config = BiasConfig(
+        label_values_or_threshold=label_values,
+        facet_name=facet_name,
+        facet_values_or_threshold=facet_threshold,
+        group_name=group_name,
+    )
+
+    expected_config = {
+        "label_values_or_threshold": label_values,
+        "facet": [
+            {"name_or_index": facet_name[0], "value_or_threshold": facet_threshold[0]},
+            {"name_or_index": facet_name[1]},
+        ],
+        "group_variable": group_name,
+    }
+    assert expected_config == data_bias_config.get_config()
+
+
 def test_model_config():
     model_name = "xgboost-model"
     instance_type = "ml.c5.xlarge"