Skip to content

Commit 5c45e2c

Browse files
authored
Merge branch 'master' into accept-step-object-in-dependson-list
2 parents dad08c4 + 8414360 commit 5c45e2c

File tree

11 files changed

+104
-17
lines changed

11 files changed

+104
-17
lines changed

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
11
# Changelog
22

3+
## v2.49.0 (2021-07-15)
4+
5+
### Features
6+
7+
* Adding serial inference pipeline support to RegisterModel Step
8+
9+
### Documentation Changes
10+
11+
* add tuning step get_top_model_s3_uri and callback step to doc
12+
* links for HF in sdk
13+
* Add Clarify module to Model Monitoring API docs
14+
315
## v2.48.2 (2021-07-12)
416

517
### Bug Fixes and Other Changes

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.48.3.dev0
1+
2.49.1.dev0

src/sagemaker/clarify.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -88,21 +88,34 @@ def __init__(
8888
Args:
8989
label_values_or_threshold (Any): List of label values or threshold to indicate positive
9090
outcome used for bias metrics.
91-
facet_name (str): Sensitive attribute in the input data for which we like to compare
92-
metrics.
91+
facet_name (str or [str]): String or List of strings of sensitive attribute(s) in the
92+
input data for which we like to compare metrics.
9393
facet_values_or_threshold (list): Optional list of values to form a sensitive group or
9494
threshold for a numeric facet column that defines the lower bound of a sensitive
9595
group. Defaults to considering each possible value as sensitive group and
9696
computing metrics vs all the other examples.
97+
If facet_name is a list, this needs to be None or a List consisting of lists or None
98+
with the same length as facet_name list.
9799
group_name (str): Optional column name or index to indicate a group column to be used
98100
for the bias metric 'Conditional Demographic Disparity in Labels - CDDL' or
99101
'Conditional Demographic Disparity in Predicted Labels - CDDPL'.
100102
"""
101-
facet = {"name_or_index": facet_name}
102-
_set(facet_values_or_threshold, "value_or_threshold", facet)
103+
if isinstance(facet_name, str):
104+
facet = {"name_or_index": facet_name}
105+
_set(facet_values_or_threshold, "value_or_threshold", facet)
106+
facet_list = [facet]
107+
elif facet_values_or_threshold is None or len(facet_name) == len(facet_values_or_threshold):
108+
facet_list = []
109+
for i, single_facet_name in enumerate(facet_name):
110+
facet = {"name_or_index": single_facet_name}
111+
if facet_values_or_threshold is not None:
112+
_set(facet_values_or_threshold[i], "value_or_threshold", facet)
113+
facet_list.append(facet)
114+
else:
115+
raise ValueError("Wrong combination of argument values passed")
103116
self.analysis_config = {
104117
"label_values_or_threshold": label_values_or_threshold,
105-
"facet": [facet],
118+
"facet": facet_list,
106119
}
107120
_set(group_name, "group_variable", self.analysis_config)
108121

src/sagemaker/debugger/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from sagemaker.debugger.debugger import ( # noqa: F401
1717
CollectionConfig,
18+
DEBUGGER_FLAG,
1819
DebuggerHookConfig,
1920
framework_name,
2021
get_default_profiler_rule,

src/sagemaker/debugger/debugger.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from sagemaker.utils import build_dict
3333

3434
framework_name = "debugger"
35+
DEBUGGER_FLAG = "USE_SMDEBUG"
3536

3637

3738
def get_rule_container_image_uri(region):

src/sagemaker/estimator.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from sagemaker.analytics import TrainingJobAnalytics
3030
from sagemaker.debugger import TensorBoardOutputConfig # noqa: F401 # pylint: disable=unused-import
3131
from sagemaker.debugger import (
32+
DEBUGGER_FLAG,
3233
DebuggerHookConfig,
3334
FrameworkProfile,
3435
get_default_profiler_rule,
@@ -2269,6 +2270,11 @@ def _validate_and_set_debugger_configs(self):
22692270
)
22702271
self.debugger_hook_config = False
22712272

2273+
if self.debugger_hook_config is False:
2274+
if self.environment is None:
2275+
self.environment = {}
2276+
self.environment[DEBUGGER_FLAG] = "0"
2277+
22722278
def _stage_user_code_in_s3(self):
22732279
"""Upload the user training script to s3 and return the location.
22742280

src/sagemaker/huggingface/estimator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,10 @@ def __init__(
7070
``image_uri`` is provided. The current supported version is ``4.6.1``.
7171
tensorflow_version (str): TensorFlow version you want to use for
7272
executing your model training code. Defaults to ``None``. Required unless
73-
``pytorch_version`` is provided. The current supported version is ``1.6.0``.
73+
``pytorch_version`` is provided. The current supported version is ``2.4.1``.
7474
pytorch_version (str): PyTorch version you want to use for
7575
executing your model training code. Defaults to ``None``. Required unless
76-
``tensorflow_version`` is provided. The current supported version is ``2.4.1``.
76+
``tensorflow_version`` is provided. The current supported versions are ``1.7.1`` and ``1.6.0``.
7777
source_dir (str): Path (absolute, relative or an S3 URI) to a directory
7878
with any other training source code dependencies aside from the entry
7979
point file (default: None). If ``source_dir`` is an S3 URI, it must

src/sagemaker/workflow/step_collections.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,8 @@ def __init__(
112112
if "entry_point" in kwargs:
113113
repack_model = True
114114
entry_point = kwargs.pop("entry_point", None)
115-
source_dir = kwargs.get("source_dir")
116-
dependencies = kwargs.get("dependencies")
115+
source_dir = kwargs.pop("source_dir", None)
116+
dependencies = kwargs.pop("dependencies", None)
117117
kwargs = dict(**kwargs, output_kms_key=kwargs.pop("model_kms_key", None))
118118

119119
repack_model_step = _RepackModelStep(
@@ -130,13 +130,10 @@ def __init__(
130130
steps.append(repack_model_step)
131131
model_data = repack_model_step.properties.ModelArtifacts.S3ModelArtifacts
132132

133-
# remove kwargs consumed by model repacking step
134-
kwargs.pop("entry_point", None)
135-
kwargs.pop("source_dir", None)
136-
kwargs.pop("dependencies", None)
137-
kwargs.pop("output_kms_key", None)
133+
# remove kwargs consumed by model repacking step
134+
kwargs.pop("output_kms_key", None)
138135

139-
if model is not None:
136+
elif model is not None:
140137
if isinstance(model, PipelineModel):
141138
self.model_list = model.models
142139
self.container_def_list = model.pipeline_container_def(inference_instances[0])
@@ -156,7 +153,9 @@ def __init__(
156153
entry_point = model_entity.entry_point
157154
source_dir = model_entity.source_dir
158155
dependencies = model_entity.dependencies
156+
kwargs = dict(**kwargs, output_kms_key=model_entity.model_kms_key)
159157
name = model_entity.name or model_entity._framework_name
158+
160159
repack_model_step = _RepackModelStep(
161160
name=f"{name}RepackModel",
162161
depends_on=depends_on,
@@ -166,12 +165,16 @@ def __init__(
166165
entry_point=entry_point,
167166
source_dir=source_dir,
168167
dependencies=dependencies,
168+
**kwargs,
169169
)
170170
steps.append(repack_model_step)
171171
model_entity.model_data = (
172172
repack_model_step.properties.ModelArtifacts.S3ModelArtifacts
173173
)
174174

175+
# remove kwargs consumed by model repacking step
176+
kwargs.pop("output_kms_key", None)
177+
175178
register_model_step = _RegisterModelStep(
176179
name=name,
177180
estimator=estimator,

tests/integ/test_debugger.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import pytest
1919

2020
from sagemaker.debugger.debugger import (
21+
DEBUGGER_FLAG,
2122
DebuggerHookConfig,
2223
Rule,
2324
rule_configs,
@@ -748,6 +749,7 @@ def test_mxnet_with_debugger_hook_config_disabled(
748749
job_description = mx.latest_training_job.describe()
749750

750751
assert job_description.get("DebugHookConfig") is None
752+
assert job_description.get("Environment", {}).get(DEBUGGER_FLAG) == "0"
751753

752754

753755
def _get_rule_evaluation_statuses(job_description):

tests/unit/sagemaker/workflow/test_steps.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
patch,
2424
)
2525

26-
from sagemaker.debugger import ProfilerConfig
26+
from sagemaker.debugger import DEBUGGER_FLAG, ProfilerConfig
2727
from sagemaker.estimator import Estimator
2828
from sagemaker.tensorflow import TensorFlow
2929
from sagemaker.inputs import TrainingInput, TransformInput, CreateModelInput
@@ -275,6 +275,7 @@ def test_training_step_tensorflow(sagemaker_session):
275275
"sagemaker_distributed_dataparallel_custom_mpi_options": '""',
276276
},
277277
"ProfilerConfig": {"S3OutputPath": "s3://my-bucket/"},
278+
"Environment": {DEBUGGER_FLAG: "0"},
278279
},
279280
"CacheConfig": {"Enabled": True, "ExpireAfter": "PT1H"},
280281
}

tests/unit/test_clarify.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,54 @@ def test_data_bias_config():
8989
assert expected_config == data_bias_config.get_config()
9090

9191

92+
def test_data_bias_config_multi_facet():
93+
label_values = [1]
94+
facet_name = ["Facet1", "Facet2"]
95+
facet_threshold = [[0], [1, 2]]
96+
group_name = "A151"
97+
98+
data_bias_config = BiasConfig(
99+
label_values_or_threshold=label_values,
100+
facet_name=facet_name,
101+
facet_values_or_threshold=facet_threshold,
102+
group_name=group_name,
103+
)
104+
105+
expected_config = {
106+
"label_values_or_threshold": label_values,
107+
"facet": [
108+
{"name_or_index": facet_name[0], "value_or_threshold": facet_threshold[0]},
109+
{"name_or_index": facet_name[1], "value_or_threshold": facet_threshold[1]},
110+
],
111+
"group_variable": group_name,
112+
}
113+
assert expected_config == data_bias_config.get_config()
114+
115+
116+
def test_data_bias_config_multi_facet_not_all_with_value():
117+
label_values = [1]
118+
facet_name = ["Facet1", "Facet2"]
119+
facet_threshold = [[0], None]
120+
group_name = "A151"
121+
122+
data_bias_config = BiasConfig(
123+
label_values_or_threshold=label_values,
124+
facet_name=facet_name,
125+
facet_values_or_threshold=facet_threshold,
126+
group_name=group_name,
127+
)
128+
129+
expected_config = {
130+
"label_values_or_threshold": label_values,
131+
"facet": [
132+
{"name_or_index": facet_name[0], "value_or_threshold": facet_threshold[0]},
133+
{"name_or_index": facet_name[1]},
134+
],
135+
"group_variable": group_name,
136+
}
137+
assert expected_config == data_bias_config.get_config()
138+
139+
92140
def test_model_config():
93141
model_name = "xgboost-model"
94142
instance_type = "ml.c5.xlarge"

0 commit comments

Comments
 (0)