Skip to content

Commit b236877

Browse files
authored
Merge branch 'master' into trt
2 parents 99fa6cd + 486cf44 commit b236877

File tree

11 files changed

+92
-36
lines changed

11 files changed

+92
-36
lines changed

CHANGELOG.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,21 @@
11
# Changelog
22

3+
## v2.32.0 (2021-03-26)
4+
5+
### Features
6+
7+
* upgrade neo mxnet to 1.8
8+
* Enable Profiler in China Regions
9+
10+
### Bug Fixes and Other Changes
11+
12+
* use workflow parameters in training hyperparameters (#2114) (#2115)
13+
* skip HuggingFace tests in regions without p2 instances
14+
15+
### Documentation Changes
16+
17+
* add Feature Store methods docs
18+
319
## v2.31.1 (2021-03-23)
420

521
### Bug Fixes and Other Changes

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.31.2.dev0
1+
2.32.1.dev0

src/sagemaker/estimator.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@
5252
_region_supports_profiler,
5353
get_mp_parameters,
5454
)
55+
from sagemaker.workflow.properties import Properties
56+
from sagemaker.workflow.parameters import Parameter
57+
from sagemaker.workflow.entities import Expression
5558
from sagemaker.inputs import TrainingInput
5659
from sagemaker.job import _Job
5760
from sagemaker.local import LocalSession
@@ -1456,7 +1459,10 @@ def _get_train_args(cls, estimator, inputs, experiment_config):
14561459

14571460
current_hyperparameters = estimator.hyperparameters()
14581461
if current_hyperparameters is not None:
1459-
hyperparameters = {str(k): str(v) for (k, v) in current_hyperparameters.items()}
1462+
hyperparameters = {
1463+
str(k): (v if isinstance(v, (Parameter, Expression, Properties)) else str(v))
1464+
for (k, v) in current_hyperparameters.items()
1465+
}
14601466

14611467
train_args = config.copy()
14621468
train_args["input_mode"] = estimator.input_mode

src/sagemaker/image_uri_config/neo-mxnet.json

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,21 @@
22
"processors": ["cpu", "gpu"],
33
"scope": ["inference"],
44
"version_aliases": {
5-
"0.12.1": "1.7",
6-
"1.0.0": "1.7",
7-
"1.1.0": "1.7",
8-
"1.2": "1.7",
9-
"1.2.0": "1.7",
10-
"1.2.1": "1.7",
11-
"1.3": "1.7",
12-
"1.3.0": "1.7",
13-
"1.4": "1.7",
14-
"1.4.0": "1.7",
15-
"1.4.1": "1.7"
5+
"0.12.1": "1.8",
6+
"1.0.0": "1.8",
7+
"1.1.0": "1.8",
8+
"1.2": "1.8",
9+
"1.2.0": "1.8",
10+
"1.2.1": "1.8",
11+
"1.3": "1.8",
12+
"1.3.0": "1.8",
13+
"1.4": "1.8",
14+
"1.4.0": "1.8",
15+
"1.4.1": "1.8",
16+
"1.7": "1.8"
1617
},
1718
"versions": {
18-
"1.7": {
19+
"1.8": {
1920
"py_versions": ["py3"],
2021
"registries": {
2122
"af-south-1": "774647643957",

src/sagemaker/processing.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from sagemaker.session import Session
3333
from sagemaker.network import NetworkConfig # noqa: F401 # pylint: disable=unused-import
3434
from sagemaker.workflow.properties import Properties
35+
from sagemaker.workflow.parameters import Parameter
3536
from sagemaker.workflow.entities import Expression
3637
from sagemaker.dataset_definition.inputs import S3Input, DatasetDefinition
3738
from sagemaker.apiutils._base_types import ApiObject
@@ -292,7 +293,9 @@ def _normalize_inputs(self, inputs=None, kms_key=None):
292293
if isinstance(file_input.source, Properties) or file_input.dataset_definition:
293294
normalized_inputs.append(file_input)
294295
continue
295-
296+
if isinstance(file_input.s3_input.s3_uri, (Parameter, Expression, Properties)):
297+
normalized_inputs.append(file_input)
298+
continue
296299
# If the source is a local path, upload it to S3
297300
# and save the S3 uri in the ProcessingInput source.
298301
parse_result = urlparse(file_input.s3_input.s3_uri)
@@ -340,8 +343,7 @@ def _normalize_outputs(self, outputs=None):
340343
# Generate a name for the ProcessingOutput if it doesn't have one.
341344
if output.output_name is None:
342345
output.output_name = "output-{}".format(count)
343-
# if the output's destination is a workflow expression, do no normalization
344-
if isinstance(output.destination, Expression):
346+
if isinstance(output.destination, (Parameter, Expression, Properties)):
345347
normalized_outputs.append(output)
346348
continue
347349
# If the output's destination is not an s3_uri, create one.
@@ -1099,7 +1101,7 @@ def _create_s3_input(self):
10991101
self.s3_data_type = self.s3_input.s3_data_type
11001102
self.s3_input_mode = self.s3_input.s3_input_mode
11011103
self.s3_data_distribution_type = self.s3_input.s3_data_distribution_type
1102-
elif self.source and self.destination:
1104+
elif self.source is not None and self.destination is not None:
11031105
self.s3_input = S3Input(
11041106
s3_uri=self.source,
11051107
local_path=self.destination,

src/sagemaker/workflow/pipeline.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def create(
8383
8484
Args:
8585
role_arn (str): The role arn that is assumed by the pipeline to create step artifacts.
86-
pipeline_description (str): A description of the pipeline.
86+
description (str): A description of the pipeline.
8787
experiment_name (str): The name of the experiment.
8888
tags (List[Dict[str, str]]): A list of {"Key": "string", "Value": "string"} dicts as
8989
tags.

tests/data/mxnet_mnist/mnist_neo.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def train(
105105

106106

107107
def model_fn(path_to_model_files):
108-
import neomxnet # noqa: F401
108+
import neomx # noqa: F401
109109

110110
ctx = mx.cpu()
111111
sym, arg_params, aux_params = mx.model.load_checkpoint(
@@ -120,7 +120,7 @@ def model_fn(path_to_model_files):
120120

121121

122122
def transform_fn(mod, payload, input_content_type, requested_output_content_type):
123-
import neomxnet # noqa: F401
123+
import neomx # noqa: F401
124124

125125
if input_content_type != "application/vnd+python.numpy+binary":
126126
raise RuntimeError("Input content type must be application/vnd+python.numpy+binary")

tests/integ/test_huggingface.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,15 @@
1717
import pytest
1818

1919
from sagemaker.huggingface import HuggingFace
20+
from tests import integ
2021
from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES
2122
from tests.integ.timeout import timeout
2223

2324

2425
@pytest.mark.release
26+
@pytest.mark.skipif(
27+
integ.test_region() in integ.TRAINING_NO_P2_REGIONS, reason="no ml.p2 instances in this region"
28+
)
2529
def test_huggingface_training(
2630
sagemaker_session,
2731
gpu_instance_type,

tests/integ/test_neo_mxnet.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def test_attach_deploy(
7171

7272
estimator.compile_model(
7373
target_instance_family=cpu_instance_family,
74-
input_shape={"data": [1, 1, 28, 28], "softmax_label": [1]},
74+
input_shape={"data": [1, 1, 28, 28]},
7575
output_path=estimator.output_path,
7676
)
7777

@@ -121,7 +121,7 @@ def test_deploy_model(
121121

122122
model.compile(
123123
target_instance_family=cpu_instance_family,
124-
input_shape={"data": [1, 1, 28, 28], "softmax_label": [1]},
124+
input_shape={"data": [1, 1, 28, 28]},
125125
role=role,
126126
job_name=unique_name_from_base("test-deploy-model-compilation-job"),
127127
output_path="/".join(model_data.split("/")[:-1]),
@@ -163,7 +163,7 @@ def test_inferentia_deploy_model(
163163

164164
model.compile(
165165
target_instance_family=inf_instance_family,
166-
input_shape={"data": [1, 1, 28, 28], "softmax_label": [1]},
166+
input_shape={"data": [1, 1, 28, 28]},
167167
role=role,
168168
job_name=unique_name_from_base("test-deploy-model-compilation-job"),
169169
output_path="/".join(model_data.split("/")[:-1]),

tests/integ/test_tfs.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,9 @@ def test_predict_with_accelerator(tfs_predictor_with_accelerator):
161161

162162

163163
@pytest.mark.local_mode
164+
@pytest.mark.skip(
165+
reason="This test is broken due to a regression." "This test should be reenabled later."
166+
)
164167
def test_predict_with_entry_point(tfs_predictor_with_model_and_entry_point_same_tar):
165168
input_data = {"instances": [1.0, 2.0, 5.0]}
166169
expected_result = {"predictions": [4.0, 4.5, 6.0]}
@@ -170,6 +173,9 @@ def test_predict_with_entry_point(tfs_predictor_with_model_and_entry_point_same_
170173

171174

172175
@pytest.mark.local_mode
176+
@pytest.mark.skip(
177+
reason="This test is broken due to a regression." "This test should be reenabled later."
178+
)
173179
def test_predict_with_model_and_entry_point_and_dependencies_separated(
174180
tfs_predictor_with_model_and_entry_point_and_dependencies,
175181
):

tests/unit/sagemaker/workflow/test_steps.py

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from sagemaker.network import NetworkConfig
3636
from sagemaker.transformer import Transformer
3737
from sagemaker.workflow.properties import Properties
38+
from sagemaker.workflow.parameters import ParameterString, ParameterInteger
3839
from sagemaker.workflow.steps import (
3940
ProcessingStep,
4041
Step,
@@ -112,16 +113,27 @@ def test_custom_step():
112113

113114

114115
def test_training_step(sagemaker_session):
116+
instance_type_parameter = ParameterString(name="InstanceType", default_value="c4.4xlarge")
117+
instance_count_parameter = ParameterInteger(name="InstanceCount", default_value=1)
118+
data_source_uri_parameter = ParameterString(
119+
name="DataSourceS3Uri", default_value=f"s3://{BUCKET}/train_manifest"
120+
)
121+
training_epochs_parameter = ParameterInteger(name="TrainingEpochs", default_value=5)
122+
training_batch_size_parameter = ParameterInteger(name="TrainingBatchSize", default_value=500)
115123
estimator = Estimator(
116124
image_uri=IMAGE_URI,
117125
role=ROLE,
118-
instance_count=1,
119-
instance_type="c4.4xlarge",
126+
instance_count=instance_count_parameter,
127+
instance_type=instance_type_parameter,
120128
profiler_config=ProfilerConfig(system_monitor_interval_millis=500),
129+
hyperparameters={
130+
"batch-size": training_batch_size_parameter,
131+
"epochs": training_epochs_parameter,
132+
},
121133
rules=[],
122134
sagemaker_session=sagemaker_session,
123135
)
124-
inputs = TrainingInput(f"s3://{BUCKET}/train_manifest")
136+
inputs = TrainingInput(s3_data=data_source_uri_parameter)
125137
cache_config = CacheConfig(enable_caching=True, expire_after="PT1H")
126138
step = TrainingStep(
127139
name="MyTrainingStep", estimator=estimator, inputs=inputs, cache_config=cache_config
@@ -131,22 +143,26 @@ def test_training_step(sagemaker_session):
131143
"Type": "Training",
132144
"Arguments": {
133145
"AlgorithmSpecification": {"TrainingImage": IMAGE_URI, "TrainingInputMode": "File"},
146+
"HyperParameters": {
147+
"batch-size": training_batch_size_parameter,
148+
"epochs": training_epochs_parameter,
149+
},
134150
"InputDataConfig": [
135151
{
136152
"ChannelName": "training",
137153
"DataSource": {
138154
"S3DataSource": {
139155
"S3DataDistributionType": "FullyReplicated",
140156
"S3DataType": "S3Prefix",
141-
"S3Uri": f"s3://{BUCKET}/train_manifest",
157+
"S3Uri": data_source_uri_parameter,
142158
}
143159
},
144160
}
145161
],
146162
"OutputDataConfig": {"S3OutputPath": f"s3://{BUCKET}/"},
147163
"ResourceConfig": {
148-
"InstanceCount": 1,
149-
"InstanceType": "c4.4xlarge",
164+
"InstanceCount": instance_count_parameter,
165+
"InstanceType": instance_type_parameter,
150166
"VolumeSizeInGB": 30,
151167
},
152168
"RoleArn": ROLE,
@@ -162,16 +178,21 @@ def test_training_step(sagemaker_session):
162178

163179

164180
def test_processing_step(sagemaker_session):
181+
processing_input_data_uri_parameter = ParameterString(
182+
name="ProcessingInputDataUri", default_value=f"s3://{BUCKET}/processing_manifest"
183+
)
184+
instance_type_parameter = ParameterString(name="InstanceType", default_value="ml.m4.4xlarge")
185+
instance_count_parameter = ParameterInteger(name="InstanceCount", default_value=1)
165186
processor = Processor(
166187
image_uri=IMAGE_URI,
167188
role=ROLE,
168-
instance_count=1,
169-
instance_type="ml.m4.4xlarge",
189+
instance_count=instance_count_parameter,
190+
instance_type=instance_type_parameter,
170191
sagemaker_session=sagemaker_session,
171192
)
172193
inputs = [
173194
ProcessingInput(
174-
source=f"s3://{BUCKET}/processing_manifest",
195+
source=processing_input_data_uri_parameter,
175196
destination="processing_manifest",
176197
)
177198
]
@@ -198,14 +219,14 @@ def test_processing_step(sagemaker_session):
198219
"S3DataDistributionType": "FullyReplicated",
199220
"S3DataType": "S3Prefix",
200221
"S3InputMode": "File",
201-
"S3Uri": "s3://my-bucket/processing_manifest",
222+
"S3Uri": processing_input_data_uri_parameter,
202223
},
203224
}
204225
],
205226
"ProcessingResources": {
206227
"ClusterConfig": {
207-
"InstanceCount": 1,
208-
"InstanceType": "ml.m4.4xlarge",
228+
"InstanceCount": instance_count_parameter,
229+
"InstanceType": instance_type_parameter,
209230
"VolumeSizeInGB": 30,
210231
}
211232
},

0 commit comments

Comments
 (0)