Skip to content

Commit 5c7a9fd

Browse files
authored
Merge branch 'master' into fix/remove-unnecessary-get-caller-identity-call
2 parents 37ed673 + 0967a93 commit 5c7a9fd

File tree

15 files changed

+1936
-75
lines changed

15 files changed

+1936
-75
lines changed

CHANGELOG.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,22 @@
11
# Changelog
22

3+
## v2.169.0 (2023-06-29)
4+
5+
### Features
6+
7+
* Add support for tags in to_pipeline API for feature processor
8+
* model registry integration to model cards to support model packages
9+
* SDK Defaults - DebugHookConfig defaults in TrainingJob API
10+
* Add segment config for Clarify
11+
12+
### Bug Fixes and Other Changes
13+
14+
* Neuronx image retrieval missing sdk information
15+
16+
### Documentation Changes
17+
18+
* Doc updates for SDK defaults - S3 Params, Env Variables, Disable Profiler, and DebugHookConfig
19+
320
## v2.168.0 (2023-06-22)
421

522
### Features

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.168.1.dev0
1+
2.169.1.dev0

src/sagemaker/feature_store/feature_processor/_constants.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,8 @@
4040
S3_DATA_DISTRIBUTION_TYPE = "FullyReplicated"
4141
PIPELINE_CONTEXT_NAME_TAG_KEY = "sm-fs-fe:feature-engineering-pipeline-context-name"
4242
PIPELINE_VERSION_CONTEXT_NAME_TAG_KEY = "sm-fs-fe:feature-engineering-pipeline-version-context-name"
43+
TO_PIPELINE_RESERVED_TAG_KEYS = [
44+
FEATURE_PROCESSOR_TAG_KEY,
45+
PIPELINE_CONTEXT_NAME_TAG_KEY,
46+
PIPELINE_VERSION_CONTEXT_NAME_TAG_KEY,
47+
]

src/sagemaker/feature_store/feature_processor/feature_scheduler.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import json
1717
import re
1818
from datetime import datetime
19-
from typing import Callable, List, Optional, Dict, Sequence, Union, Any
19+
from typing import Callable, List, Optional, Dict, Sequence, Union, Any, Tuple
2020

2121
import pytz
2222
from botocore.exceptions import ClientError
@@ -58,6 +58,7 @@
5858
PIPELINE_NAME_MAXIMUM_LENGTH,
5959
RESOURCE_NOT_FOUND,
6060
FEATURE_GROUP_ARN_REGEX_PATTERN,
61+
TO_PIPELINE_RESERVED_TAG_KEYS,
6162
)
6263
from sagemaker.feature_store.feature_processor._feature_processor_config import (
6364
FeatureProcessorConfig,
@@ -107,6 +108,7 @@ def to_pipeline(
107108
role: Optional[str] = None,
108109
transformation_code: Optional[TransformationCode] = None,
109110
max_retries: Optional[int] = None,
111+
tags: Optional[List[Tuple[str, str]]] = None,
110112
sagemaker_session: Optional[Session] = None,
111113
) -> str:
112114
"""Creates a sagemaker pipeline that takes in a callable as a training step.
@@ -127,6 +129,8 @@ def to_pipeline(
127129
code for Lineage tracking. This code is not used for actual transformation.
128130
max_retries (Optional[int]): The number of times to retry sagemaker pipeline step.
129131
If not specified, sagemaker pipline step will not retry.
132+
tags (List[Tuple[str, str]): A list of tags attached to the pipeline. If not specified,
133+
no custom tags will be attached to the pipeline.
130134
sagemaker_session (Optional[Session]): Session object which manages interactions
131135
with Amazon SageMaker APIs and any other AWS services needed. If not specified, the
132136
function creates one using the default AWS configuration chain.
@@ -135,6 +139,8 @@ def to_pipeline(
135139
"""
136140

137141
_validate_input_for_to_pipeline_api(pipeline_name, step)
142+
if tags:
143+
_validate_tags_for_to_pipeline_api(tags)
138144

139145
_sagemaker_session = sagemaker_session or Session()
140146

@@ -200,12 +206,15 @@ def to_pipeline(
200206
sagemaker_session=_sagemaker_session,
201207
parameters=[SCHEDULED_TIME_PIPELINE_PARAMETER],
202208
)
209+
pipeline_tags = [dict(Key=FEATURE_PROCESSOR_TAG_KEY, Value=FEATURE_PROCESSOR_TAG_VALUE)]
210+
if tags:
211+
pipeline_tags.extend([dict(Key=k, Value=v) for k, v in tags])
203212

204213
pipeline = Pipeline(**pipeline_request_dict)
205214
logger.info("Creating/Updating sagemaker pipeline %s", pipeline_name)
206215
pipeline.upsert(
207216
role_arn=_role,
208-
tags=[dict(Key=FEATURE_PROCESSOR_TAG_KEY, Value=FEATURE_PROCESSOR_TAG_VALUE)],
217+
tags=pipeline_tags,
209218
)
210219
logger.info("Created sagemaker pipeline %s", pipeline_name)
211220

@@ -514,6 +523,23 @@ def _validate_input_for_to_pipeline_api(pipeline_name: str, step: Callable) -> N
514523
)
515524

516525

526+
def _validate_tags_for_to_pipeline_api(tags: List[Tuple[str, str]]) -> None:
527+
"""Validate tags provided to to_pipeline API.
528+
529+
Args:
530+
tags (List[Tuple[str, str]]): A list of tags attached to the pipeline.
531+
532+
Raises (ValueError): raises ValueError when any of the following scenario happen:
533+
1. reserved tag keys are provided to API.
534+
"""
535+
provided_tag_keys = [tag_key_value_pair[0] for tag_key_value_pair in tags]
536+
for reserved_tag_key in TO_PIPELINE_RESERVED_TAG_KEYS:
537+
if reserved_tag_key in provided_tag_keys:
538+
raise ValueError(
539+
f"{reserved_tag_key} is a reserved tag key for to_pipeline API. Please choose another tag."
540+
)
541+
542+
517543
def _validate_lineage_resources_for_to_pipeline_api(
518544
feature_processor_config: FeatureProcessorConfig, sagemaker_session: Session
519545
) -> None:

src/sagemaker/model_card/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
EvaluationJob,
2929
AdditionalInformation,
3030
ModelCard,
31+
ModelPackage,
3132
)
3233

3334
from sagemaker.model_card.schema_constraints import ( # noqa: F401 # pylint: disable=unused-import

src/sagemaker/model_card/evaluation_metric_parsers.py

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class EvaluationMetricTypeEnum(str, Enum):
3131
MODEL_CARD_METRIC_SCHEMA = "Model Card Metric Schema"
3232
CLARIFY_BIAS = "Clarify Bias"
3333
CLARIFY_EXPLAINABILITY = "Clarify Explainability"
34+
MODEL_MONITOR_MODEL_QUALITY = "Model Monitor Model Quality"
3435
REGRESSION = "Model Monitor Model Quality Regression"
3536
BINARY_CLASSIFICATION = "Model Monitor Model Quality Binary Classification"
3637
MULTICLASS_CLASSIFICATION = "Model Monitor Model Quality Multiclass Classification"
@@ -138,6 +139,7 @@ def _parse(self, json_data: dict):
138139
[
139140
{"name": i["name"], "value": i["value"], "type": "number"}
140141
for i in item["metrics"]
142+
if i["value"] is not None
141143
]
142144
)
143145
for group_name, metric_data in group_data.items():
@@ -368,9 +370,10 @@ def _parse(self, json_data: dict):
368370
result = {"metric_groups": []}
369371
for group_name, group_data in json_data.items():
370372
metric_data = []
371-
for metric_name, raw_data in group_data.item():
372-
metric_data.extend(self._parse_basic_metric(metric_name, raw_data))
373-
result["metric_groups"].append({"name": group_name, "metric_data": metric_data})
373+
if group_name == "regression_metrics":
374+
for metric_name, raw_data in group_data.items():
375+
metric_data.extend(self._parse_basic_metric(metric_name, raw_data))
376+
result["metric_groups"].append({"name": group_name, "metric_data": metric_data})
374377
return result
375378

376379

@@ -388,7 +391,7 @@ def _validate(self, json_data: dict):
388391
"""
389392
if (
390393
"binary_classification_metrics" not in json_data
391-
and "multiclass_classification_metrics" in json_data
394+
and "multiclass_classification_metrics" not in json_data
392395
):
393396
raise ValueError("Missing *_classification_metrics from the metric data.")
394397

@@ -401,6 +404,11 @@ def _parse(self, json_data: dict):
401404
result = {"metric_groups": []}
402405
for group_name, group_data in json_data.items():
403406
metric_data = []
407+
if group_name not in (
408+
"binary_classification_metrics",
409+
"multiclass_classification_metrics",
410+
):
411+
continue
404412
for metric_name, raw_data in group_data.items():
405413
metric_data.extend(self._parse_confusion_matrix(metric_name, raw_data))
406414
metric_data.extend(
@@ -506,11 +514,45 @@ def _parse_precision_recall_curve(self, metric_name, raw_data):
506514
return metric_data
507515

508516

517+
class ModelMonitorModelQualityParser(ParserBase):
518+
"""Top level parser for model monitor model quality metric type"""
519+
520+
def _validate(self, json_data: dict):
521+
"""Implement ParserBase._validate.
522+
523+
Args:
524+
json_data (dict): Metric data to be validated.
525+
526+
Raises:
527+
ValueError: missing model monitor model quality metrics.
528+
"""
529+
if len(json_data) == 0:
530+
raise ValueError("Missing model monitor model quality metrics from the metric data.")
531+
532+
def _parse(self, json_data: dict):
533+
"""Implement ParserBase._parse.
534+
535+
Args:
536+
json_data (dict): Raw metric data.
537+
"""
538+
result = {"metric_groups": []}
539+
if "regression_metrics" in json_data:
540+
result = RegressionParser().run(json_data)
541+
elif (
542+
"binary_classification_metrics" in json_data
543+
or "multiclass_classification_metrics" in json_data
544+
):
545+
result = ClassificationParser().run(json_data)
546+
547+
return result
548+
549+
509550
EVALUATION_METRIC_PARSERS = {
510551
EvaluationMetricTypeEnum.MODEL_CARD_METRIC_SCHEMA: DefaultParser(),
511552
EvaluationMetricTypeEnum.CLARIFY_BIAS: ClarifyBiasParser(),
512553
EvaluationMetricTypeEnum.CLARIFY_EXPLAINABILITY: ClarifyExplainabilityParser(),
513554
EvaluationMetricTypeEnum.REGRESSION: RegressionParser(),
514555
EvaluationMetricTypeEnum.BINARY_CLASSIFICATION: ClassificationParser(),
515556
EvaluationMetricTypeEnum.MULTICLASS_CLASSIFICATION: ClassificationParser(),
557+
EvaluationMetricTypeEnum.MODEL_MONITOR_MODEL_QUALITY: ModelMonitorModelQualityParser(),
516558
}

src/sagemaker/model_card/helpers.py

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,21 @@ def _clean_descriptor_name(self, name: str):
6262

6363
return name
6464

65+
def _skip_encoding(self, attr: str):
66+
"""Skip encoding if the attribute is an instance of _SkipEncodingDecoding descriptor"""
67+
if attr in self.__class__.__dict__:
68+
return isinstance(self.__class__.__dict__[attr], _SkipEncodingDecoding)
69+
70+
return False
71+
6572
def _to_request_dict(self):
6673
"""Implement this method in a subclass to return a custom request_dict."""
6774
request_data = {}
6875
for attr, value in self.__dict__.items():
6976
if value is not None:
7077
name = self._clean_descriptor_name(attr)
71-
request_data[name] = value
78+
if not self._skip_encoding(name):
79+
request_data[name] = value
7280

7381
return request_data
7482

@@ -149,6 +157,38 @@ def decode(self, value: dict):
149157
pass # pylint: disable=W0107
150158

151159

160+
class _SkipEncodingDecoding(_DescriptorBase):
161+
"""Object that skip the encoding/decoding in model card attributes."""
162+
163+
def __init__(self, value_type: Any):
164+
"""Initialize an SkipEncodingDecoding descriptor.
165+
166+
Args:
167+
value_type (Any): Value type of the attribute.
168+
"""
169+
self.value_type = value_type
170+
171+
def validate(self, value: Any):
172+
"""Check if value type is valid.
173+
174+
Args:
175+
value (Any): value type depends on self.value_type
176+
177+
Raises:
178+
ValueError: value is not a self.value_type.
179+
"""
180+
if value is not None and not isinstance(value, self.value_type):
181+
raise ValueError(f"Please assign a {self.value_type} to {self.private_name[1:]}")
182+
183+
def require_decode(self, value: Any):
184+
"""No decoding is required."""
185+
return False
186+
187+
def decode(self, value: Any):
188+
"""No decoding is required. Required placeholder for abstractmethod"""
189+
pass # pylint: disable=W0107
190+
191+
152192
class _OneOf(_DescriptorBase):
153193
"""Verifies that a value is one of a restricted set of options"""
154194

@@ -463,9 +503,12 @@ def _read_s3_json(session: Session, bucket: str, key: str):
463503
raise
464504

465505
result = {}
466-
if data["ContentType"] == "application/json":
506+
if data["ContentType"] == "application/json" or data["ContentType"] == "binary/octet-stream":
467507
result = json.loads(data["Body"].read().decode("utf-8"))
468508
else:
469-
logger.warning("Invalid file type %s. application/json is expected.", data["ContentType"])
509+
logger.warning(
510+
"Invalid file type %s. application/json or binary/octet-stream is expected.",
511+
data["ContentType"],
512+
)
470513

471514
return result

0 commit comments

Comments
 (0)