Skip to content

change: update master from dev #2836

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jan 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,5 @@ venv/
*.swp
.docker/
env/
.vscode/
.vscode/
.python-version
6 changes: 6 additions & 0 deletions doc/workflows/pipelines/sagemaker.workflow.pipelines.rst
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ Pipeline
.. autoclass:: sagemaker.workflow.pipeline._PipelineExecution
:members:

Parallelism Configuration
-------------------------

.. autoclass:: sagemaker.workflow.parallelism_config.ParallelismConfiguration
:members:

Pipeline Experiment Config
--------------------------

Expand Down
14 changes: 9 additions & 5 deletions src/sagemaker/clarify.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,11 +290,15 @@ def __init__(
probability_threshold (float): An optional value for binary prediction tasks in which
the model returns a probability, to indicate the threshold to convert the
prediction to a boolean value. Default is 0.5.
label_headers (list): List of label values - one for each score of the ``probability``.
label_headers (list[str]): List of headers, each for a predicted score in model output.
For bias analysis, it is used to extract the label value with the highest score as
predicted label. For explainability job, It is used to beautify the analysis report
by replacing placeholders like "label0".
"""
self.label = label
self.probability = probability
self.probability_threshold = probability_threshold
self.label_headers = label_headers
if probability_threshold is not None:
try:
float(probability_threshold)
Expand Down Expand Up @@ -1060,10 +1064,10 @@ def run_explainability(
explainability_config (:class:`~sagemaker.clarify.ExplainabilityConfig` or list):
Config of the specific explainability method or a list of ExplainabilityConfig
objects. Currently, SHAP and PDP are the two methods supported.
model_scores(str|int|ModelPredictedLabelConfig): Index or JSONPath location in the
model output for the predicted scores to be explained. This is not required if the
model output is a single score. Alternatively, an instance of
ModelPredictedLabelConfig can be provided.
model_scores (int or str or :class:`~sagemaker.clarify.ModelPredictedLabelConfig`):
Index or JSONPath to locate the predicted scores in the model output. This is not
required if the model output is a single score. Alternatively, it can be an instance
of ModelPredictedLabelConfig to provide more parameters like label_headers.
wait (bool): Whether the call should wait until the job completes (default: True).
logs (bool): Whether to show the logs produced by the job.
Only meaningful when ``wait`` is True (default: True).
Expand Down
1 change: 1 addition & 0 deletions src/sagemaker/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2343,6 +2343,7 @@ def _stage_user_code_in_s3(self):
dependencies=self.dependencies,
kms_key=kms_key,
s3_resource=self.sagemaker_session.s3_resource,
settings=self.sagemaker_session.settings,
)

def _model_source_dir(self):
Expand Down
26 changes: 25 additions & 1 deletion src/sagemaker/fw_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@
import shutil
import tempfile
from collections import namedtuple
from typing import Optional

import sagemaker.image_uris
from sagemaker.session_settings import SessionSettings
import sagemaker.utils

from sagemaker.deprecations import renamed_warning
Expand Down Expand Up @@ -73,7 +75,20 @@
"2.6.0",
"2.6.2",
],
"pytorch": ["1.6", "1.6.0", "1.7", "1.7.1", "1.8", "1.8.0", "1.8.1", "1.9", "1.9.0", "1.9.1"],
"pytorch": [
"1.6",
"1.6.0",
"1.7",
"1.7.1",
"1.8",
"1.8.0",
"1.8.1",
"1.9",
"1.9.0",
"1.9.1",
"1.10",
"1.10.0",
],
}
SMDISTRIBUTED_SUPPORTED_STRATEGIES = ["dataparallel", "modelparallel"]

Expand Down Expand Up @@ -203,6 +218,7 @@ def tar_and_upload_dir(
dependencies=None,
kms_key=None,
s3_resource=None,
settings: Optional[SessionSettings] = None,
):
"""Package source files and upload a compress tar file to S3.

Expand Down Expand Up @@ -230,6 +246,9 @@ def tar_and_upload_dir(
s3_resource (boto3.resource("s3")): Optional. Pre-instantiated Boto3 Resource
for S3 connections, can be used to customize the configuration,
e.g. set the endpoint URL (default: None).
settings (sagemaker.session_settings.SessionSettings): Optional. The settings
of the SageMaker ``Session``, can be used to override the default encryption
behavior (default: None).
Returns:
sagemaker.fw_utils.UserCode: An object with the S3 bucket and key (S3 prefix) and
script name.
Expand All @@ -241,6 +260,7 @@ def tar_and_upload_dir(
dependencies = dependencies or []
key = "%s/sourcedir.tar.gz" % s3_key_prefix
tmp = tempfile.mkdtemp()
encrypt_artifact = True if settings is None else settings.encrypt_repacked_artifacts

try:
source_files = _list_files_to_compress(script, directory) + dependencies
Expand All @@ -250,6 +270,10 @@ def tar_and_upload_dir(

if kms_key:
extra_args = {"ServerSideEncryption": "aws:kms", "SSEKMSKeyId": kms_key}
elif encrypt_artifact:
# encrypt the tarball at rest in S3 with the default AWS managed KMS key for S3
# see https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html#API_PutObject_RequestSyntax
extra_args = {"ServerSideEncryption": "aws:kms"}
else:
extra_args = None

Expand Down
72 changes: 70 additions & 2 deletions src/sagemaker/image_uri_config/pytorch.json
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@
"1.6": "1.6.0",
"1.7": "1.7.1",
"1.8": "1.8.1",
"1.9": "1.9.1"
"1.9": "1.9.1",
"1.10": "1.10.0"
},
"versions": {
"0.4.0": {
Expand Down Expand Up @@ -500,6 +501,39 @@
"us-west-2": "763104351884"
},
"repository": "pytorch-inference"
},
"1.10.0": {
"py_versions": [
"py38"
],
"registries": {
"af-south-1": "626614931356",
"ap-east-1": "871362719292",
"ap-northeast-1": "763104351884",
"ap-northeast-2": "763104351884",
"ap-northeast-3": "364406365360",
"ap-south-1": "763104351884",
"ap-southeast-1": "763104351884",
"ap-southeast-2": "763104351884",
"ca-central-1": "763104351884",
"cn-north-1": "727897471807",
"cn-northwest-1": "727897471807",
"eu-central-1": "763104351884",
"eu-north-1": "763104351884",
"eu-west-1": "763104351884",
"eu-west-2": "763104351884",
"eu-west-3": "763104351884",
"eu-south-1": "692866216735",
"me-south-1": "217643126080",
"sa-east-1": "763104351884",
"us-east-1": "763104351884",
"us-east-2": "763104351884",
"us-gov-west-1": "442386744353",
"us-iso-east-1": "886529160074",
"us-west-1": "763104351884",
"us-west-2": "763104351884"
},
"repository": "pytorch-inference"
}
}
},
Expand All @@ -519,7 +553,8 @@
"1.6": "1.6.0",
"1.7": "1.7.1",
"1.8": "1.8.1",
"1.9": "1.9.1"
"1.9": "1.9.1",
"1.10": "1.10.0"
},
"versions": {
"0.4.0": {
Expand Down Expand Up @@ -957,6 +992,39 @@
"us-west-2": "763104351884"
},
"repository": "pytorch-training"
},
"1.10.0": {
"py_versions": [
"py38"
],
"registries": {
"af-south-1": "626614931356",
"ap-east-1": "871362719292",
"ap-northeast-1": "763104351884",
"ap-northeast-2": "763104351884",
"ap-northeast-3": "364406365360",
"ap-south-1": "763104351884",
"ap-southeast-1": "763104351884",
"ap-southeast-2": "763104351884",
"ca-central-1": "763104351884",
"cn-north-1": "727897471807",
"cn-northwest-1": "727897471807",
"eu-central-1": "763104351884",
"eu-north-1": "763104351884",
"eu-west-1": "763104351884",
"eu-west-2": "763104351884",
"eu-west-3": "763104351884",
"eu-south-1": "692866216735",
"me-south-1": "217643126080",
"sa-east-1": "763104351884",
"us-east-1": "763104351884",
"us-east-2": "763104351884",
"us-gov-west-1": "442386744353",
"us-iso-east-1": "886529160074",
"us-west-1": "763104351884",
"us-west-2": "763104351884"
},
"repository": "pytorch-training"
}
}
}
Expand Down
48 changes: 46 additions & 2 deletions src/sagemaker/lineage/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,11 @@ def __init__(
self._session = sagemaker_session

def to_lineage_object(self):
"""Convert the ``Vertex`` object to its corresponding ``Artifact`` or ``Context`` object."""
"""Convert the ``Vertex`` object to its corresponding Artifact, Action, Context object."""
from sagemaker.lineage.artifact import Artifact, ModelArtifact
from sagemaker.lineage.context import Context, EndpointContext
from sagemaker.lineage.artifact import DatasetArtifact
from sagemaker.lineage.action import Action

if self.lineage_entity == LineageEntityEnum.CONTEXT.value:
resource_name = get_resource_name_from_arn(self.arn)
Expand All @@ -103,6 +104,9 @@ def to_lineage_object(self):
return DatasetArtifact.load(artifact_arn=self.arn, sagemaker_session=self._session)
return Artifact.load(artifact_arn=self.arn, sagemaker_session=self._session)

if self.lineage_entity == LineageEntityEnum.ACTION.value:
return Action.load(action_name=self.arn.split("/")[1], sagemaker_session=self._session)

raise ValueError("Vertex cannot be converted to a lineage object.")


Expand Down Expand Up @@ -208,6 +212,44 @@ def _convert_api_response(self, response) -> LineageQueryResult:

return converted

def _collapse_cross_account_artifacts(self, query_response):
"""Collapse the duplicate vertices and edges for cross-account."""
for edge in query_response.edges:
if (
"artifact" in edge.source_arn
and "artifact" in edge.destination_arn
and edge.source_arn.split("/")[1] == edge.destination_arn.split("/")[1]
and edge.source_arn != edge.destination_arn
):
edge_source_arn = edge.source_arn
edge_destination_arn = edge.destination_arn
self._update_cross_account_edge(
edges=query_response.edges,
arn=edge_source_arn,
duplicate_arn=edge_destination_arn,
)
self._update_cross_account_vertex(
query_response=query_response, duplicate_arn=edge_destination_arn
)

# remove the duplicate edges from cross account
new_edge = [e for e in query_response.edges if not e.source_arn == e.destination_arn]
query_response.edges = new_edge

return query_response

def _update_cross_account_edge(self, edges, arn, duplicate_arn):
"""Replace the duplicate arn with arn in edges list."""
for idx, e in enumerate(edges):
if e.destination_arn == duplicate_arn:
edges[idx].destination_arn = arn
elif e.source_arn == duplicate_arn:
edges[idx].source_arn = arn

def _update_cross_account_vertex(self, query_response, duplicate_arn):
"""Remove the vertex with duplicate arn in the vertices list."""
query_response.vertices = [v for v in query_response.vertices if not v.arn == duplicate_arn]

def query(
self,
start_arns: List[str],
Expand Down Expand Up @@ -235,5 +277,7 @@ def query(
Filters=query_filter._to_request_dict() if query_filter else {},
MaxDepth=max_depth,
)
query_response = self._convert_api_response(query_response)
query_response = self._collapse_cross_account_artifacts(query_response)

return self._convert_api_response(query_response)
return query_response
1 change: 1 addition & 0 deletions src/sagemaker/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1131,6 +1131,7 @@ def _upload_code(self, key_prefix, repack=False):
script=self.entry_point,
directory=self.source_dir,
dependencies=self.dependencies,
settings=self.sagemaker_session.settings,
)

if repack and self.model_data is not None and self.entry_point is not None:
Expand Down
31 changes: 24 additions & 7 deletions src/sagemaker/model_monitor/clarify_model_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from sagemaker import image_uris, s3
from sagemaker.session import Session
from sagemaker.utils import name_from_base
from sagemaker.clarify import SageMakerClarifyProcessor
from sagemaker.clarify import SageMakerClarifyProcessor, ModelPredictedLabelConfig

_LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -833,9 +833,10 @@ def suggest_baseline(
specific explainability method. Currently, only SHAP is supported.
model_config (:class:`~sagemaker.clarify.ModelConfig`): Config of the model and its
endpoint to be created.
model_scores (int or str): Index or JSONPath location in the model output for the
predicted scores to be explained. This is not required if the model output is
a single score.
model_scores (int or str or :class:`~sagemaker.clarify.ModelPredictedLabelConfig`):
Index or JSONPath to locate the predicted scores in the model output. This is not
required if the model output is a single score. Alternatively, it can be an instance
of ModelPredictedLabelConfig to provide more parameters like label_headers.
wait (bool): Whether the call should wait until the job completes (default: False).
logs (bool): Whether to show the logs produced by the job.
Only meaningful when wait is True (default: False).
Expand Down Expand Up @@ -865,14 +866,24 @@ def suggest_baseline(
headers = copy.deepcopy(data_config.headers)
if headers and data_config.label in headers:
headers.remove(data_config.label)
if model_scores is None:
inference_attribute = None
label_headers = None
elif isinstance(model_scores, ModelPredictedLabelConfig):
inference_attribute = str(model_scores.label)
label_headers = model_scores.label_headers
else:
inference_attribute = str(model_scores)
label_headers = None
self.latest_baselining_job_config = ClarifyBaseliningConfig(
analysis_config=ExplainabilityAnalysisConfig(
explainability_config=explainability_config,
model_config=model_config,
headers=headers,
label_headers=label_headers,
),
features_attribute=data_config.features,
inference_attribute=model_scores if model_scores is None else str(model_scores),
inference_attribute=inference_attribute,
)
self.latest_baselining_job_name = baselining_job_name
self.latest_baselining_job = ClarifyBaseliningJob(
Expand Down Expand Up @@ -1166,7 +1177,7 @@ def attach(cls, monitor_schedule_name, sagemaker_session=None):
class ExplainabilityAnalysisConfig:
"""Analysis configuration for ModelExplainabilityMonitor."""

def __init__(self, explainability_config, model_config, headers=None):
def __init__(self, explainability_config, model_config, headers=None, label_headers=None):
"""Creates an analysis config dictionary.

Args:
Expand All @@ -1175,13 +1186,19 @@ def __init__(self, explainability_config, model_config, headers=None):
model_config (sagemaker.clarify.ModelConfig): Config object related to bias
configurations.
headers (list[str]): A list of feature names (without label) of model/endpint input.
label_headers (list[str]): List of headers, each for a predicted score in model output.
It is used to beautify the analysis report by replacing placeholders like "label0".

"""
predictor_config = model_config.get_predictor_config()
self.analysis_config = {
"methods": explainability_config.get_explainability_config(),
"predictor": model_config.get_predictor_config(),
"predictor": predictor_config,
}
if headers is not None:
self.analysis_config["headers"] = headers
if label_headers is not None:
predictor_config["label_headers"] = label_headers

def _to_dict(self):
"""Generates a request dictionary using the parameters provided to the class."""
Expand Down
Loading