Skip to content

Commit 62cfe98

Browse files
Merge branch 'master' into master
2 parents 1d4e23a + 5adc2d3 commit 62cfe98

39 files changed

+1916
-94
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,6 @@ venv/
2929
env/
3030
.vscode/
3131
**/tmp
32-
.python-version
32+
.python-version
33+
**/_repack_model.py
34+
**/_repack_script_launcher.sh

CHANGELOG.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,20 @@
11
# Changelog
22

3+
## v2.113.0 (2022-10-21)
4+
5+
### Features
6+
7+
* support torch_distributed distribution for Trainium instances
8+
9+
### Bug Fixes and Other Changes
10+
11+
* bump apache-airflow from 2.4.0 to 2.4.1 in /requirements/extras
12+
13+
### Documentation Changes
14+
15+
* fix kwargs and descriptions of the smdmp checkpoint function
16+
* add the doc for the MonitorBatchTransformStep
17+
318
## v2.112.2 (2022-10-11)
419

520
### Bug Fixes and Other Changes

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.112.3.dev0
1+
2.113.1.dev0

src/sagemaker/fw_utils.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,10 +134,13 @@
134134
"1.12.0",
135135
]
136136

137+
137138
TORCH_DISTRIBUTED_SUPPORTED_FRAMEWORK_VERSIONS = ["1.11", "1.11.0"]
138139

140+
139141
TRAINIUM_SUPPORTED_DISTRIBUTION_STRATEGIES = ["torch_distributed"]
140142

143+
141144
SMDISTRIBUTED_SUPPORTED_STRATEGIES = ["dataparallel", "modelparallel"]
142145

143146

@@ -160,6 +163,12 @@ def validate_source_dir(script, directory):
160163
return True
161164

162165

166+
GRAVITON_ALLOWED_TARGET_INSTANCE_FAMILY = ["c6g", "t4g", "r6g", "m6g"]
167+
168+
169+
GRAVITON_ALLOWED_FRAMEWORKS = set(["tensorflow", "pytorch"])
170+
171+
163172
def validate_source_code_input_against_pipeline_variables(
164173
entry_point: Optional[Union[str, PipelineVariable]] = None,
165174
source_dir: Optional[Union[str, PipelineVariable]] = None,

src/sagemaker/image_uri_config/pytorch.json

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,51 @@
654654
}
655655
}
656656
},
657+
"inference_graviton": {
658+
"processors": [
659+
"cpu"
660+
],
661+
"version_aliases": {
662+
"1.12": "1.12.1"
663+
},
664+
"versions": {
665+
"1.12.1": {
666+
"py_versions": [
667+
"py38"
668+
],
669+
"registries": {
670+
"af-south-1": "626614931356",
671+
"ap-east-1": "871362719292",
672+
"ap-northeast-1": "763104351884",
673+
"ap-northeast-2": "763104351884",
674+
"ap-northeast-3": "364406365360",
675+
"ap-south-1": "763104351884",
676+
"ap-southeast-1": "763104351884",
677+
"ap-southeast-2": "763104351884",
678+
"ap-southeast-3": "907027046896",
679+
"ca-central-1": "763104351884",
680+
"cn-north-1": "727897471807",
681+
"cn-northwest-1": "727897471807",
682+
"eu-central-1": "763104351884",
683+
"eu-north-1": "763104351884",
684+
"eu-west-1": "763104351884",
685+
"eu-west-2": "763104351884",
686+
"eu-west-3": "763104351884",
687+
"eu-south-1": "692866216735",
688+
"me-south-1": "217643126080",
689+
"sa-east-1": "763104351884",
690+
"us-east-1": "763104351884",
691+
"us-east-2": "763104351884",
692+
"us-gov-west-1": "442386744353",
693+
"us-iso-east-1": "886529160074",
694+
"us-west-1": "763104351884",
695+
"us-west-2": "763104351884"
696+
},
697+
"repository": "pytorch-inference-graviton",
698+
"container_version": {"cpu": "ubuntu20.04"}
699+
}
700+
}
701+
},
657702
"training": {
658703
"processors": [
659704
"cpu",

src/sagemaker/image_uri_config/tensorflow.json

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1471,6 +1471,51 @@
14711471
}
14721472
}
14731473
},
1474+
"inference_graviton": {
1475+
"processors": [
1476+
"cpu"
1477+
],
1478+
"version_aliases": {
1479+
"2.9": "2.9.1"
1480+
},
1481+
"versions": {
1482+
"2.9.1": {
1483+
"py_versions": [
1484+
"py38"
1485+
],
1486+
"registries": {
1487+
"af-south-1": "626614931356",
1488+
"ap-east-1": "871362719292",
1489+
"ap-northeast-1": "763104351884",
1490+
"ap-northeast-2": "763104351884",
1491+
"ap-northeast-3": "364406365360",
1492+
"ap-south-1": "763104351884",
1493+
"ap-southeast-1": "763104351884",
1494+
"ap-southeast-2": "763104351884",
1495+
"ap-southeast-3": "907027046896",
1496+
"ca-central-1": "763104351884",
1497+
"cn-north-1": "727897471807",
1498+
"cn-northwest-1": "727897471807",
1499+
"eu-central-1": "763104351884",
1500+
"eu-north-1": "763104351884",
1501+
"eu-west-1": "763104351884",
1502+
"eu-west-2": "763104351884",
1503+
"eu-west-3": "763104351884",
1504+
"eu-south-1": "692866216735",
1505+
"me-south-1": "217643126080",
1506+
"sa-east-1": "763104351884",
1507+
"us-east-1": "763104351884",
1508+
"us-east-2": "763104351884",
1509+
"us-gov-west-1": "442386744353",
1510+
"us-iso-east-1": "886529160074",
1511+
"us-west-1": "763104351884",
1512+
"us-west-2": "763104351884"
1513+
},
1514+
"repository": "tensorflow-inference-graviton",
1515+
"container_version": {"cpu": "ubuntu20.04"}
1516+
}
1517+
}
1518+
},
14741519
"training": {
14751520
"processors": [
14761521
"cpu",

src/sagemaker/image_uris.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from sagemaker.jumpstart import artifacts
2626
from sagemaker.workflow import is_pipeline_variable
2727
from sagemaker.workflow.utilities import override_pipeline_parameter_var
28+
from sagemaker.fw_utils import GRAVITON_ALLOWED_TARGET_INSTANCE_FAMILY, GRAVITON_ALLOWED_FRAMEWORKS
2829

2930
logger = logging.getLogger(__name__)
3031

@@ -151,6 +152,7 @@ def retrieve(
151152
inference_tool = _get_inference_tool(inference_tool, instance_type)
152153
if inference_tool == "neuron":
153154
_framework = f"{framework}-{inference_tool}"
155+
image_scope = _get_image_scope_for_instance_type(_framework, instance_type, image_scope)
154156
config = _config_for_framework_and_scope(_framework, image_scope, accelerator_type)
155157

156158
original_version = version
@@ -216,6 +218,9 @@ def retrieve(
216218
else:
217219
tag_prefix = version_config.get("tag_prefix", version)
218220

221+
if repo == f"{framework}-inference-graviton":
222+
container_version = f"{container_version}-sagemaker"
223+
219224
tag = _format_tag(tag_prefix, processor, py_version, container_version, inference_tool)
220225

221226
if instance_type is not None and _should_auto_select_container_version(
@@ -287,6 +292,15 @@ def config_for_framework(framework):
287292
return json.load(f)
288293

289294

295+
def _get_image_scope_for_instance_type(framework, instance_type, image_scope):
296+
"""Extract the image scope from instance type."""
297+
if framework in GRAVITON_ALLOWED_FRAMEWORKS and isinstance(instance_type, str):
298+
match = re.match(r"^ml[\._]([a-z\d]+)\.?\w*$", instance_type)
299+
if match and match[1] in GRAVITON_ALLOWED_TARGET_INSTANCE_FAMILY:
300+
return "inference_graviton"
301+
return image_scope
302+
303+
290304
def _get_inference_tool(inference_tool, instance_type):
291305
"""Extract the inference tool name from instance type."""
292306
if not inference_tool and instance_type:

src/sagemaker/inputs.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ class TransformInput(object):
176176
output_filter: str = attr.ib(default=None)
177177
join_source: str = attr.ib(default=None)
178178
model_client_config: dict = attr.ib(default=None)
179+
batch_data_capture_config: dict = attr.ib(default=None)
179180

180181

181182
class FileSystemInput(object):
@@ -232,3 +233,45 @@ def __init__(
232233

233234
if content_type:
234235
self.config["ContentType"] = content_type
236+
237+
238+
class BatchDataCaptureConfig(object):
239+
"""Configuration object passed in when create a batch transform job.
240+
241+
Specifies configuration related to batch transform job data capture for use with
242+
Amazon SageMaker Model Monitoring
243+
"""
244+
245+
def __init__(
246+
self,
247+
destination_s3_uri: str,
248+
kms_key_id: str = None,
249+
generate_inference_id: bool = None,
250+
):
251+
"""Create new BatchDataCaptureConfig
252+
253+
Args:
254+
destination_s3_uri (str): S3 Location to store the captured data
255+
kms_key_id (str): The KMS key to use when writing to S3.
256+
KmsKeyId can be an ID of a KMS key, ARN of a KMS key, alias of a KMS key,
257+
or alias of a KMS key. The KmsKeyId is applied to all outputs.
258+
(default: None)
259+
generate_inference_id (bool): Flag to generate an inference id
260+
(default: None)
261+
"""
262+
self.destination_s3_uri = destination_s3_uri
263+
self.kms_key_id = kms_key_id
264+
self.generate_inference_id = generate_inference_id
265+
266+
def _to_request_dict(self):
267+
"""Generates a request dictionary using the parameters provided to the class."""
268+
batch_data_capture_config = {
269+
"DestinationS3Uri": self.destination_s3_uri,
270+
}
271+
272+
if self.kms_key_id is not None:
273+
batch_data_capture_config["KmsKeyId"] = self.kms_key_id
274+
if self.generate_inference_id is not None:
275+
batch_data_capture_config["GenerateInferenceId"] = self.generate_inference_id
276+
277+
return batch_data_capture_config

src/sagemaker/job.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def _load_config(inputs, estimator, expand_role=True, validate_uri=True):
6868
input_config = _Job._format_inputs_to_input_config(inputs, validate_uri)
6969
role = (
7070
estimator.sagemaker_session.expand_role(estimator.role)
71-
if expand_role
71+
if (expand_role and not is_pipeline_variable(estimator.role))
7272
else estimator.role
7373
)
7474
output_config = _Job._prepare_output_config(estimator.output_path, estimator.output_kms_key)

src/sagemaker/model_monitor/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from sagemaker.model_monitor.model_monitoring import BaseliningJob # noqa: F401
2424
from sagemaker.model_monitor.model_monitoring import MonitoringExecution # noqa: F401
2525
from sagemaker.model_monitor.model_monitoring import EndpointInput # noqa: F401
26+
from sagemaker.model_monitor.model_monitoring import BatchTransformInput # noqa: F401
2627
from sagemaker.model_monitor.model_monitoring import MonitoringOutput # noqa: F401
2728
from sagemaker.model_monitor.model_monitoring import ModelQualityMonitor # noqa: F401
2829

@@ -42,5 +43,6 @@
4243

4344
from sagemaker.model_monitor.data_capture_config import DataCaptureConfig # noqa: F401
4445
from sagemaker.model_monitor.dataset_format import DatasetFormat # noqa: F401
46+
from sagemaker.model_monitor.dataset_format import MonitoringDatasetFormat # noqa: F401
4547

4648
from sagemaker.network import NetworkConfig # noqa: F401

0 commit comments

Comments
 (0)