Skip to content

Commit 30417db

Browse files
authored
Merge branch 'master' into framework-processor-python3
2 parents 4a562bd + c62ce81 commit 30417db

File tree

19 files changed

+260
-60
lines changed

19 files changed

+260
-60
lines changed

CHANGELOG.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,30 @@
11
# Changelog
22

3+
## v2.53.0 (2021-08-12)
4+
5+
### Features
6+
7+
* support tuning step parameter range parameterization + support retry strategy in tuner
8+
9+
## v2.52.2.post0 (2021-08-11)
10+
11+
### Documentation Changes
12+
13+
* clarify that default_bucket creates a bucket
14+
* Minor updates to Clarify API documentation
15+
16+
## v2.52.2 (2021-08-10)
17+
18+
### Bug Fixes and Other Changes
19+
20+
* sklearn integ tests, remove swallowing exception on feature group delete attempt
21+
* sklearn integ test for custom bucket
22+
23+
### Documentation Changes
24+
25+
* Fix dataset_definition links
26+
* Document LambdaModel and LambdaPredictor classes
27+
328
## v2.52.1 (2021-08-06)
429

530
### Bug Fixes and Other Changes

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.52.2.dev0
1+
2.53.1.dev0

doc/workflows/pipelines/sagemaker.workflow.pipelines.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ ConditionStep
55
-------------
66

77
.. autoclass:: sagemaker.workflow.condition_step.ConditionStep
8-
98
.. deprecated:: sagemaker.workflow.condition_step.JsonGet
109

1110
Conditions

src/sagemaker/clarify.py

Lines changed: 31 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,17 @@ def __init__(
4848
headers (list[str]): A list of column names in the input dataset.
4949
features (str): JSONPath for locating the feature columns for bias metrics if the
5050
dataset format is JSONLines.
51-
dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV
52-
and "application/jsonlines" for JSONLines.
51+
dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV,
52+
"application/jsonlines" for JSONLines, and "application/x-parquet" for Parquet.
5353
s3_data_distribution_type (str): Valid options are "FullyReplicated" or
5454
"ShardedByS3Key".
5555
s3_compression_type (str): Valid options are "None" or "Gzip".
5656
"""
57+
if dataset_type not in ["text/csv", "application/jsonlines", "application/x-parquet"]:
58+
raise ValueError(
59+
f"Invalid dataset_type '{dataset_type}'."
60+
f" Please check the API documentation for the supported dataset types."
61+
)
5762
self.s3_data_input_path = s3_data_input_path
5863
self.s3_output_path = s3_output_path
5964
self.s3_data_distribution_type = s3_data_distribution_type
@@ -508,7 +513,7 @@ def run_pre_training_bias(
508513
kms_key=None,
509514
experiment_config=None,
510515
):
511-
"""Runs a ProcessingJob to compute the requested bias 'methods' of the input data.
516+
"""Runs a ProcessingJob to compute the pre-training bias methods of the input data.
512517
513518
Computes the requested methods that compare 'methods' (e.g. fraction of examples) for the
514519
sensitive group vs the other examples.
@@ -517,14 +522,14 @@ def run_pre_training_bias(
517522
data_config (:class:`~sagemaker.clarify.DataConfig`): Config of the input/output data.
518523
data_bias_config (:class:`~sagemaker.clarify.BiasConfig`): Config of sensitive groups.
519524
methods (str or list[str]): Selector of a subset of potential metrics:
520-
["`CI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ci.html>`_",
521-
"`DPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-dpl.html>`_",
522-
"`KL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-kl.html>`_",
523-
"`JS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-js.html>`_",
524-
"`LP <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-lp.html>`_",
525-
"`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-tvd.html>`_",
526-
"`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ks.html>`_",
527-
"`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-cdd.html>`_"].
525+
["`CI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-bias-metric-class-imbalance.html>`_",
526+
"`DPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-true-label-imbalance.html>`_",
527+
"`KL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kl-divergence.html>`_",
528+
"`JS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-jensen-shannon-divergence.html>`_",
529+
"`LP <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-lp-norm.html>`_",
530+
"`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-total-variation-distance.html>`_",
531+
"`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kolmogorov-smirnov.html>`_",
532+
"`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-cddl.html>`_"].
528533
Defaults to computing all.
529534
wait (bool): Whether the call should wait until the job completes (default: True).
530535
logs (bool): Whether to show the logs produced by the job.
@@ -538,7 +543,7 @@ def run_pre_training_bias(
538543
experiment_config (dict[str, str]): Experiment management configuration.
539544
Dictionary contains three optional keys:
540545
'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
541-
"""
546+
""" # noqa E501
542547
analysis_config = data_config.get_config()
543548
analysis_config.update(data_bias_config.get_config())
544549
analysis_config["methods"] = {"pre_training_bias": {"methods": methods}}
@@ -562,7 +567,7 @@ def run_post_training_bias(
562567
kms_key=None,
563568
experiment_config=None,
564569
):
565-
"""Runs a ProcessingJob to compute the requested bias 'methods' of the model predictions.
570+
"""Runs a ProcessingJob to compute the post-training bias methods of the model predictions.
566571
567572
Spins up a model endpoint, runs inference over the input example in the
568573
's3_data_input_path' to obtain predicted labels. Computes a the requested methods that
@@ -633,12 +638,11 @@ def run_bias(
633638
kms_key=None,
634639
experiment_config=None,
635640
):
636-
"""Runs a ProcessingJob to compute the requested bias 'methods' of the model predictions.
641+
"""Runs a ProcessingJob to compute the requested bias methods.
637642
638-
Spins up a model endpoint, runs inference over the input example in the
639-
's3_data_input_path' to obtain predicted labels. Computes a the requested methods that
640-
compare 'methods' (e.g. accuracy, precision, recall) for the sensitive group vs the other
641-
examples.
643+
It computes the metrics of both the pre-training methods and the post-training methods.
644+
To calculate post-training methods, it needs to spin up a model endpoint, runs inference
645+
over the input example in the 's3_data_input_path' to obtain predicted labels.
642646
643647
Args:
644648
data_config (:class:`~sagemaker.clarify.DataConfig`): Config of the input/output data.
@@ -648,14 +652,14 @@ def run_bias(
648652
model_predicted_label_config (:class:`~sagemaker.clarify.ModelPredictedLabelConfig`):
649653
Config of how to extract the predicted label from the model output.
650654
pre_training_methods (str or list[str]): Selector of a subset of potential metrics:
651-
["`CI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ci.html>`_",
652-
"`DPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-dpl.html>`_",
653-
"`KL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-kl.html>`_",
654-
"`JS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-js.html>`_",
655-
"`LP <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-lp.html>`_",
656-
"`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-tvd.html>`_",
657-
"`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ks.html>`_",
658-
"`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-cdd.html>`_"].
655+
["`CI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-bias-metric-class-imbalance.html>`_",
656+
"`DPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-true-label-imbalance.html>`_",
657+
"`KL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kl-divergence.html>`_",
658+
"`JS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-jensen-shannon-divergence.html>`_",
659+
"`LP <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-lp-norm.html>`_",
660+
"`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-total-variation-distance.html>`_",
661+
"`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kolmogorov-smirnov.html>`_",
662+
"`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-cddl.html>`_"].
659663
Defaults to computing all.
660664
post_training_methods (str or list[str]): Selector of a subset of potential metrics:
661665
["`DPPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-dppl.html>`_"
@@ -682,7 +686,7 @@ def run_bias(
682686
experiment_config (dict[str, str]): Experiment management configuration.
683687
Dictionary contains three optional keys:
684688
'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
685-
"""
689+
""" # noqa E501
686690
analysis_config = data_config.get_config()
687691
analysis_config.update(bias_config.get_config())
688692
analysis_config["predictor"] = model_config.get_predictor_config()

src/sagemaker/dataset_definition/inputs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,9 @@ class DatasetDefinition(ApiObject):
9999
Definition inputs to run a processing job. LocalPath is an absolute path to the input
100100
data. This is a required parameter when `AppManaged` is False (default).
101101
redshift_dataset_definition
102-
(:class:`~sagemaker.dataset_definition.RedshiftDatasetDefinition`): Redshift
102+
(:class:`~sagemaker.dataset_definition.inputs.RedshiftDatasetDefinition`): Redshift
103103
dataset definition.
104-
athena_dataset_definition (:class:`~sagemaker.dataset_definition.AthenaDatasetDefinition`):
104+
athena_dataset_definition (:class:`~sagemaker.dataset_definition.inputs.AthenaDatasetDefinition`):
105105
Configuration for Athena Dataset Definition input.
106106
"""
107107

src/sagemaker/image_uri_config/pytorch.json

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,15 @@
44
"cpu"
55
],
66
"version_aliases": {
7-
"1.3": "1.3.1"
7+
"1.3": "1.3.1",
8+
"1.5": "1.5.1"
89
},
910
"versions": {
1011
"1.3.1": {
1112
"py_versions": [
1213
"py3"
1314
],
1415
"registries": {
15-
"af-south-1": "626614931356",
16-
"ap-east-1": "871362719292",
1716
"ap-northeast-1": "763104351884",
1817
"ap-northeast-2": "763104351884",
1918
"ap-northeast-3": "364406365360",
@@ -26,16 +25,22 @@
2625
"eu-central-1": "763104351884",
2726
"eu-north-1": "763104351884",
2827
"eu-west-1": "763104351884",
29-
"eu-west-2": "763104351884",
30-
"eu-west-3": "763104351884",
31-
"eu-south-1": "692866216735",
32-
"me-south-1": "217643126080",
33-
"sa-east-1": "763104351884",
3428
"us-east-1": "763104351884",
3529
"us-east-2": "763104351884",
36-
"us-gov-west-1": "442386744353",
37-
"us-iso-east-1": "886529160074",
38-
"us-west-1": "763104351884",
30+
"us-west-2": "763104351884"
31+
},
32+
"repository": "pytorch-inference-eia"
33+
},
34+
"1.5.1": {
35+
"py_versions": [
36+
"py3"
37+
],
38+
"registries": {
39+
"ap-northeast-1": "763104351884",
40+
"ap-northeast-2": "763104351884",
41+
"eu-west-1": "763104351884",
42+
"us-east-1": "763104351884",
43+
"us-east-2": "763104351884",
3944
"us-west-2": "763104351884"
4045
},
4146
"repository": "pytorch-inference-eia"

src/sagemaker/local/local_session.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,21 @@ def logs_for_job(self, job_name, wait=False, poll=5, log_type="All"):
571571
# on local mode.
572572
pass # pylint: disable=unnecessary-pass
573573

574+
def logs_for_processing_job(self, job_name, wait=False, poll=10):
575+
"""A no-op method meant to override the sagemaker client.
576+
577+
Args:
578+
job_name:
579+
wait: (Default value = False)
580+
poll: (Default value = 10)
581+
582+
Returns:
583+
584+
"""
585+
# override logs_for_job() as it doesn't need to perform any action
586+
# on local mode.
587+
pass # pylint: disable=unnecessary-pass
588+
574589

575590
class file_input(object):
576591
"""Amazon SageMaker channel configuration for FILE data sources, used in local mode."""

src/sagemaker/parameter.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212
# language governing permissions and limitations under the License.
1313
"""Placeholder docstring"""
1414
from __future__ import absolute_import
15+
1516
import json
17+
from sagemaker.workflow.parameters import Parameter as PipelineParameter
1618

1719

1820
class ParameterRange(object):
@@ -68,8 +70,12 @@ def as_tuning_range(self, name):
6870
"""
6971
return {
7072
"Name": name,
71-
"MinValue": str(self.min_value),
72-
"MaxValue": str(self.max_value),
73+
"MinValue": str(self.min_value)
74+
if not isinstance(self.min_value, PipelineParameter)
75+
else self.min_value,
76+
"MaxValue": str(self.max_value)
77+
if not isinstance(self.max_value, PipelineParameter)
78+
else self.max_value,
7379
"ScalingType": self.scaling_type,
7480
}
7581

@@ -103,9 +109,9 @@ def __init__(self, values): # pylint: disable=super-init-not-called
103109
This input will be converted into a list of strings.
104110
"""
105111
if isinstance(values, list):
106-
self.values = [str(v) for v in values]
112+
self.values = [str(v) if not isinstance(v, PipelineParameter) else v for v in values]
107113
else:
108-
self.values = [str(values)]
114+
self.values = [str(values) if not isinstance(values, PipelineParameter) else values]
109115

110116
def as_tuning_range(self, name):
111117
"""Represent the parameter range as a dictionary.

src/sagemaker/processing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,9 +1066,9 @@ def __init__(
10661066
s3_data_distribution_type (str): Valid options are "FullyReplicated"
10671067
or "ShardedByS3Key".
10681068
s3_compression_type (str): Valid options are "None" or "Gzip".
1069-
s3_input (:class:`~sagemaker.dataset_definition.S3Input`)
1069+
s3_input (:class:`~sagemaker.dataset_definition.inputs.S3Input`)
10701070
Metadata of data objects stored in S3
1071-
dataset_definition (:class:`~sagemaker.dataset_definition.DatasetDefinition`)
1071+
dataset_definition (:class:`~sagemaker.dataset_definition.inputs.DatasetDefinition`)
10721072
DatasetDefinition input
10731073
app_managed (bool): Whether the input are managed by SageMaker or application
10741074
"""

src/sagemaker/session.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,8 @@ def list_s3_files(self, bucket, key_prefix):
357357
def default_bucket(self):
358358
"""Return the name of the default bucket to use in relevant Amazon SageMaker interactions.
359359
360+
This function will create the s3 bucket if it does not exist.
361+
360362
Returns:
361363
str: The name of the default bucket, which is of the form:
362364
``sagemaker-{region}-{AWS account ID}``.
@@ -2211,6 +2213,7 @@ def _map_training_config(
22112213
use_spot_instances=False,
22122214
checkpoint_s3_uri=None,
22132215
checkpoint_local_path=None,
2216+
max_retry_attempts=None,
22142217
):
22152218
"""Construct a dictionary of training job configuration from the arguments.
22162219
@@ -2264,6 +2267,7 @@ def _map_training_config(
22642267
objective_metric_name (str): Name of the metric for evaluating training jobs.
22652268
parameter_ranges (dict): Dictionary of parameter ranges. These parameter ranges can
22662269
be one of three types: Continuous, Integer, or Categorical.
2270+
max_retry_attempts (int): The number of times to retry the job.
22672271
22682272
Returns:
22692273
A dictionary of training job configuration. For format details, please refer to
@@ -2320,6 +2324,8 @@ def _map_training_config(
23202324
if parameter_ranges is not None:
23212325
training_job_definition["HyperParameterRanges"] = parameter_ranges
23222326

2327+
if max_retry_attempts is not None:
2328+
training_job_definition["RetryStrategy"] = {"MaximumRetryAttempts": max_retry_attempts}
23232329
return training_job_definition
23242330

23252331
def stop_tuning_job(self, name):

src/sagemaker/tuner.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1507,7 +1507,10 @@ def _get_tuner_args(cls, tuner, inputs):
15071507

15081508
if tuner.estimator is not None:
15091509
tuner_args["training_config"] = cls._prepare_training_config(
1510-
inputs, tuner.estimator, tuner.static_hyperparameters, tuner.metric_definitions
1510+
inputs=inputs,
1511+
estimator=tuner.estimator,
1512+
static_hyperparameters=tuner.static_hyperparameters,
1513+
metric_definitions=tuner.metric_definitions,
15111514
)
15121515

15131516
if tuner.estimator_dict is not None:
@@ -1580,6 +1583,9 @@ def _prepare_training_config(
15801583
if parameter_ranges is not None:
15811584
training_config["parameter_ranges"] = parameter_ranges
15821585

1586+
if estimator.max_retry_attempts is not None:
1587+
training_config["max_retry_attempts"] = estimator.max_retry_attempts
1588+
15831589
return training_config
15841590

15851591
def stop(self):

src/sagemaker/workflow/pipeline.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,7 @@ def _interpolate(
320320
"""
321321
if isinstance(obj, (Expression, Parameter, Properties)):
322322
return obj.expr
323+
323324
if isinstance(obj, CallbackOutput):
324325
step_name = callback_output_to_step_map[obj.output_name]
325326
return obj.expr(step_name)

tests/integ/test_feature_store.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,4 +312,4 @@ def cleanup_feature_group(feature_group: FeatureGroup):
312312
try:
313313
feature_group.delete()
314314
except Exception:
315-
pass
315+
raise RuntimeError(f"Failed to delete feature group with name {feature_group.name}")

tests/integ/test_processing.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ def test_sklearn_with_customizations(
162162
sklearn_processor = SKLearnProcessor(
163163
framework_version=sklearn_latest_version,
164164
role=ROLE,
165+
command=["python3"],
165166
instance_type=cpu_instance_type,
166167
instance_count=1,
167168
volume_size_in_gb=100,
@@ -685,6 +686,7 @@ def test_sklearn_with_network_config(sagemaker_session, sklearn_latest_version,
685686
sklearn_processor = SKLearnProcessor(
686687
framework_version=sklearn_latest_version,
687688
role=ROLE,
689+
command=["python3"],
688690
instance_type=cpu_instance_type,
689691
instance_count=1,
690692
sagemaker_session=sagemaker_session,

0 commit comments

Comments
 (0)