aws · ahsan-z-khan · Jan 10, 2022 · Dec 17, 2021 · Dec 27, 2021 · Dec 27, 2021
@@ -27,4 +27,5 @@ venv/
 *.swp
 .docker/
 env/
-.vscode/
+.vscode/
+.python-version
@@ -82,6 +82,12 @@ Pipeline
 .. autoclass:: sagemaker.workflow.pipeline._PipelineExecution
     :members:
 
+Parallelism Configuration
+-------------------------
+
+.. autoclass:: sagemaker.workflow.parallelism_config.ParallelismConfiguration
+    :members:
+
 Pipeline Experiment Config
 --------------------------
 

@@ -290,11 +290,15 @@ def __init__(
             probability_threshold (float): An optional value for binary prediction tasks in which
                 the model returns a probability, to indicate the threshold to convert the
                 prediction to a boolean value. Default is 0.5.
-            label_headers (list): List of label values - one for each score of the ``probability``.
+            label_headers (list[str]): List of headers, each for a predicted score in model output.
+                For bias analysis, it is used to extract the label value with the highest score as
+                predicted label. For explainability job, It is used to beautify the analysis report
+                by replacing placeholders like "label0".
         """
         self.label = label
         self.probability = probability
         self.probability_threshold = probability_threshold
+        self.label_headers = label_headers
         if probability_threshold is not None:
             try:
                 float(probability_threshold)
@@ -1060,10 +1064,10 @@ def run_explainability(
             explainability_config (:class:`~sagemaker.clarify.ExplainabilityConfig` or list):
                 Config of the specific explainability method or a list of ExplainabilityConfig
                 objects. Currently, SHAP and PDP are the two methods supported.
-            model_scores(str|int|ModelPredictedLabelConfig):  Index or JSONPath location in the
-                model output for the predicted scores to be explained. This is not required if the
-                model output is a single score. Alternatively, an instance of
-                ModelPredictedLabelConfig can be provided.
+            model_scores (int or str or :class:`~sagemaker.clarify.ModelPredictedLabelConfig`):
+                Index or JSONPath to locate the predicted scores in the model output. This is not
+                required if the model output is a single score. Alternatively, it can be an instance
+                of ModelPredictedLabelConfig to provide more parameters like label_headers.
             wait (bool): Whether the call should wait until the job completes (default: True).
             logs (bool): Whether to show the logs produced by the job.
                 Only meaningful when ``wait`` is True (default: True).

@@ -2343,6 +2343,7 @@ def _stage_user_code_in_s3(self):
             dependencies=self.dependencies,
             kms_key=kms_key,
             s3_resource=self.sagemaker_session.s3_resource,
+            settings=self.sagemaker_session.settings,
         )
 
     def _model_source_dir(self):

@@ -19,8 +19,10 @@
 import shutil
 import tempfile
 from collections import namedtuple
+from typing import Optional
 
 import sagemaker.image_uris
+from sagemaker.session_settings import SessionSettings
 import sagemaker.utils
 
 from sagemaker.deprecations import renamed_warning
@@ -73,7 +75,20 @@
         "2.6.0",
         "2.6.2",
     ],
-    "pytorch": ["1.6", "1.6.0", "1.7", "1.7.1", "1.8", "1.8.0", "1.8.1", "1.9", "1.9.0", "1.9.1"],
+    "pytorch": [
+        "1.6",
+        "1.6.0",
+        "1.7",
+        "1.7.1",
+        "1.8",
+        "1.8.0",
+        "1.8.1",
+        "1.9",
+        "1.9.0",
+        "1.9.1",
+        "1.10",
+        "1.10.0",
+    ],
 }
 SMDISTRIBUTED_SUPPORTED_STRATEGIES = ["dataparallel", "modelparallel"]
 
@@ -203,6 +218,7 @@ def tar_and_upload_dir(
     dependencies=None,
     kms_key=None,
     s3_resource=None,
+    settings: Optional[SessionSettings] = None,
 ):
     """Package source files and upload a compress tar file to S3.
 
@@ -230,6 +246,9 @@ def tar_and_upload_dir(
         s3_resource (boto3.resource("s3")): Optional. Pre-instantiated Boto3 Resource
             for S3 connections, can be used to customize the configuration,
             e.g. set the endpoint URL (default: None).
+        settings (sagemaker.session_settings.SessionSettings): Optional. The settings
+            of the SageMaker ``Session``, can be used to override the default encryption
+            behavior (default: None).
     Returns:
         sagemaker.fw_utils.UserCode: An object with the S3 bucket and key (S3 prefix) and
             script name.
@@ -241,6 +260,7 @@ def tar_and_upload_dir(
     dependencies = dependencies or []
     key = "%s/sourcedir.tar.gz" % s3_key_prefix
     tmp = tempfile.mkdtemp()
+    encrypt_artifact = True if settings is None else settings.encrypt_repacked_artifacts
 
     try:
         source_files = _list_files_to_compress(script, directory) + dependencies
@@ -250,6 +270,10 @@ def tar_and_upload_dir(
 
         if kms_key:
             extra_args = {"ServerSideEncryption": "aws:kms", "SSEKMSKeyId": kms_key}
+        elif encrypt_artifact:
+            # encrypt the tarball at rest in S3 with the default AWS managed KMS key for S3
+            # see https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html#API_PutObject_RequestSyntax
+            extra_args = {"ServerSideEncryption": "aws:kms"}
         else:
             extra_args = None
 

@@ -63,7 +63,8 @@
             "1.6": "1.6.0",
             "1.7": "1.7.1",
             "1.8": "1.8.1",
-            "1.9": "1.9.1"
+            "1.9": "1.9.1",
+            "1.10": "1.10.0"
         },
         "versions": {
             "0.4.0": {
@@ -500,6 +501,39 @@
                     "us-west-2": "763104351884"
                 },
                 "repository": "pytorch-inference"
+            },
+            "1.10.0": {
+                "py_versions": [
+                    "py38"
+                ],
+                "registries": {
+                    "af-south-1": "626614931356",
+                    "ap-east-1": "871362719292",
+                    "ap-northeast-1": "763104351884",
+                    "ap-northeast-2": "763104351884",
+                    "ap-northeast-3": "364406365360",
+                    "ap-south-1": "763104351884",
+                    "ap-southeast-1": "763104351884",
+                    "ap-southeast-2": "763104351884",
+                    "ca-central-1": "763104351884",
+                    "cn-north-1": "727897471807",
+                    "cn-northwest-1": "727897471807",
+                    "eu-central-1": "763104351884",
+                    "eu-north-1": "763104351884",
+                    "eu-west-1": "763104351884",
+                    "eu-west-2": "763104351884",
+                    "eu-west-3": "763104351884",
+                    "eu-south-1": "692866216735",
+                    "me-south-1": "217643126080",
+                    "sa-east-1": "763104351884",
+                    "us-east-1": "763104351884",
+                    "us-east-2": "763104351884",
+                    "us-gov-west-1": "442386744353",
+                    "us-iso-east-1": "886529160074",
+                    "us-west-1": "763104351884",
+                    "us-west-2": "763104351884"
+                },
+                "repository": "pytorch-inference"
             }
         }
     },
@@ -519,7 +553,8 @@
             "1.6": "1.6.0",
             "1.7": "1.7.1",
             "1.8": "1.8.1",
-            "1.9": "1.9.1"
+            "1.9": "1.9.1",
+            "1.10": "1.10.0"
         },
         "versions": {
             "0.4.0": {
@@ -957,6 +992,39 @@
                     "us-west-2": "763104351884"
                 },
                 "repository": "pytorch-training"
+            },
+            "1.10.0": {
+                "py_versions": [
+                    "py38"
+                ],
+                "registries": {
+                    "af-south-1": "626614931356",
+                    "ap-east-1": "871362719292",
+                    "ap-northeast-1": "763104351884",
+                    "ap-northeast-2": "763104351884",
+                    "ap-northeast-3": "364406365360",
+                    "ap-south-1": "763104351884",
+                    "ap-southeast-1": "763104351884",
+                    "ap-southeast-2": "763104351884",
+                    "ca-central-1": "763104351884",
+                    "cn-north-1": "727897471807",
+                    "cn-northwest-1": "727897471807",
+                    "eu-central-1": "763104351884",
+                    "eu-north-1": "763104351884",
+                    "eu-west-1": "763104351884",
+                    "eu-west-2": "763104351884",
+                    "eu-west-3": "763104351884",
+                    "eu-south-1": "692866216735",
+                    "me-south-1": "217643126080",
+                    "sa-east-1": "763104351884",
+                    "us-east-1": "763104351884",
+                    "us-east-2": "763104351884",
+                    "us-gov-west-1": "442386744353",
+                    "us-iso-east-1": "886529160074",
+                    "us-west-1": "763104351884",
+                    "us-west-2": "763104351884"
+                },
+                "repository": "pytorch-training"
             }
         }
     }

@@ -83,10 +83,11 @@ def __init__(
         self._session = sagemaker_session
 
     def to_lineage_object(self):
-        """Convert the ``Vertex`` object to its corresponding ``Artifact`` or ``Context`` object."""
+        """Convert the ``Vertex`` object to its corresponding Artifact, Action, Context object."""
         from sagemaker.lineage.artifact import Artifact, ModelArtifact
         from sagemaker.lineage.context import Context, EndpointContext
         from sagemaker.lineage.artifact import DatasetArtifact
+        from sagemaker.lineage.action import Action
 
         if self.lineage_entity == LineageEntityEnum.CONTEXT.value:
             resource_name = get_resource_name_from_arn(self.arn)
@@ -103,6 +104,9 @@ def to_lineage_object(self):
                 return DatasetArtifact.load(artifact_arn=self.arn, sagemaker_session=self._session)
             return Artifact.load(artifact_arn=self.arn, sagemaker_session=self._session)
 
+        if self.lineage_entity == LineageEntityEnum.ACTION.value:
+            return Action.load(action_name=self.arn.split("/")[1], sagemaker_session=self._session)
+
         raise ValueError("Vertex cannot be converted to a lineage object.")
 
 
@@ -208,6 +212,44 @@ def _convert_api_response(self, response) -> LineageQueryResult:
 
         return converted
 
+    def _collapse_cross_account_artifacts(self, query_response):
+        """Collapse the duplicate vertices and edges for cross-account."""
+        for edge in query_response.edges:
+            if (
+                "artifact" in edge.source_arn
+                and "artifact" in edge.destination_arn
+                and edge.source_arn.split("/")[1] == edge.destination_arn.split("/")[1]
+                and edge.source_arn != edge.destination_arn
+            ):
+                edge_source_arn = edge.source_arn
+                edge_destination_arn = edge.destination_arn
+                self._update_cross_account_edge(
+                    edges=query_response.edges,
+                    arn=edge_source_arn,
+                    duplicate_arn=edge_destination_arn,
+                )
+                self._update_cross_account_vertex(
+                    query_response=query_response, duplicate_arn=edge_destination_arn
+                )
+
+        # remove the duplicate edges from cross account
+        new_edge = [e for e in query_response.edges if not e.source_arn == e.destination_arn]
+        query_response.edges = new_edge
+
+        return query_response
+
+    def _update_cross_account_edge(self, edges, arn, duplicate_arn):
+        """Replace the duplicate arn with arn in edges list."""
+        for idx, e in enumerate(edges):
+            if e.destination_arn == duplicate_arn:
+                edges[idx].destination_arn = arn
+            elif e.source_arn == duplicate_arn:
+                edges[idx].source_arn = arn
+
+    def _update_cross_account_vertex(self, query_response, duplicate_arn):
+        """Remove the vertex with duplicate arn in the vertices list."""
+        query_response.vertices = [v for v in query_response.vertices if not v.arn == duplicate_arn]
+
     def query(
         self,
         start_arns: List[str],
@@ -235,5 +277,7 @@ def query(
             Filters=query_filter._to_request_dict() if query_filter else {},
             MaxDepth=max_depth,
         )
+        query_response = self._convert_api_response(query_response)
+        query_response = self._collapse_cross_account_artifacts(query_response)
 
-        return self._convert_api_response(query_response)
+        return query_response
@@ -1131,6 +1131,7 @@ def _upload_code(self, key_prefix, repack=False):
                 script=self.entry_point,
                 directory=self.source_dir,
                 dependencies=self.dependencies,
+                settings=self.sagemaker_session.settings,
             )
 
         if repack and self.model_data is not None and self.entry_point is not None:

@@ -26,7 +26,7 @@
 from sagemaker import image_uris, s3
 from sagemaker.session import Session
 from sagemaker.utils import name_from_base
-from sagemaker.clarify import SageMakerClarifyProcessor
+from sagemaker.clarify import SageMakerClarifyProcessor, ModelPredictedLabelConfig
 
 _LOGGER = logging.getLogger(__name__)
 
@@ -833,9 +833,10 @@ def suggest_baseline(
                 specific explainability method. Currently, only SHAP is supported.
             model_config (:class:`~sagemaker.clarify.ModelConfig`): Config of the model and its
                 endpoint to be created.
-            model_scores (int or str): Index or JSONPath location in the model output for the
-                predicted scores to be explained. This is not required if the model output is
-                a single score.
+            model_scores (int or str or :class:`~sagemaker.clarify.ModelPredictedLabelConfig`):
+                Index or JSONPath to locate the predicted scores in the model output. This is not
+                required if the model output is a single score. Alternatively, it can be an instance
+                of ModelPredictedLabelConfig to provide more parameters like label_headers.
             wait (bool): Whether the call should wait until the job completes (default: False).
             logs (bool): Whether to show the logs produced by the job.
                 Only meaningful when wait is True (default: False).
@@ -865,14 +866,24 @@ def suggest_baseline(
         headers = copy.deepcopy(data_config.headers)
         if headers and data_config.label in headers:
             headers.remove(data_config.label)
+        if model_scores is None:
+            inference_attribute = None
+            label_headers = None
+        elif isinstance(model_scores, ModelPredictedLabelConfig):
+            inference_attribute = str(model_scores.label)
+            label_headers = model_scores.label_headers
+        else:
+            inference_attribute = str(model_scores)
+            label_headers = None
         self.latest_baselining_job_config = ClarifyBaseliningConfig(
             analysis_config=ExplainabilityAnalysisConfig(
                 explainability_config=explainability_config,
                 model_config=model_config,
                 headers=headers,
+                label_headers=label_headers,
             ),
             features_attribute=data_config.features,
-            inference_attribute=model_scores if model_scores is None else str(model_scores),
+            inference_attribute=inference_attribute,
         )
         self.latest_baselining_job_name = baselining_job_name
         self.latest_baselining_job = ClarifyBaseliningJob(
@@ -1166,7 +1177,7 @@ def attach(cls, monitor_schedule_name, sagemaker_session=None):
 class ExplainabilityAnalysisConfig:
     """Analysis configuration for ModelExplainabilityMonitor."""
 
-    def __init__(self, explainability_config, model_config, headers=None):
+    def __init__(self, explainability_config, model_config, headers=None, label_headers=None):
         """Creates an analysis config dictionary.
 
         Args:
@@ -1175,13 +1186,19 @@ def __init__(self, explainability_config, model_config, headers=None):
             model_config (sagemaker.clarify.ModelConfig): Config object related to bias
                 configurations.
             headers (list[str]): A list of feature names (without label) of model/endpint input.
+            label_headers (list[str]): List of headers, each for a predicted score in model output.
+                It is used to beautify the analysis report by replacing placeholders like "label0".
+
         """
+        predictor_config = model_config.get_predictor_config()
         self.analysis_config = {
             "methods": explainability_config.get_explainability_config(),
-            "predictor": model_config.get_predictor_config(),
+            "predictor": predictor_config,
         }
         if headers is not None:
             self.analysis_config["headers"] = headers
+        if label_headers is not None:
+            predictor_config["label_headers"] = label_headers
 
     def _to_dict(self):
         """Generates a request dictionary using the parameters provided to the class."""