Skip to content

Commit b16eac4

Browse files
authored
Merge branch 'master' into change/dw_image_uri
2 parents 5aa794a + 7b1e5c1 commit b16eac4

25 files changed

+206
-42
lines changed

src/sagemaker/estimator.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2443,7 +2443,13 @@ def attach(cls, training_job_name, sagemaker_session=None, model_channel_name="m
24432443
@staticmethod
24442444
def _json_encode_hyperparameters(hyperparameters):
24452445
"""Placeholder docstring"""
2446-
return {str(k): json.dumps(v) for (k, v) in hyperparameters.items()}
2446+
current_hyperparameters = hyperparameters
2447+
if current_hyperparameters is not None:
2448+
hyperparameters = {
2449+
str(k): (v if isinstance(v, (Parameter, Expression, Properties)) else json.dumps(v))
2450+
for (k, v) in current_hyperparameters.items()
2451+
}
2452+
return hyperparameters
24472453

24482454
@classmethod
24492455
def _update_init_params(cls, hp, tf_arguments):

src/sagemaker/image_uri_config/huggingface.json

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
"training": {
33
"processors": ["gpu"],
44
"version_aliases": {
5-
"4.4": "4.4.2"
5+
"4.4": "4.4.2",
6+
"4.5": "4.5.0"
67
},
78
"versions": {
89
"4.4.2": {
@@ -70,6 +71,72 @@
7071
},
7172
"repository": "huggingface-tensorflow-training"
7273
}
74+
},
75+
"4.5.0": {
76+
"version_aliases": {
77+
"pytorch1.6": "pytorch1.6.0",
78+
"tensorflow2.4": "tensorflow2.4.1"
79+
},
80+
"pytorch1.6.0": {
81+
"py_versions": ["py36"],
82+
"registries": {
83+
"af-south-1": "626614931356",
84+
"ap-east-1": "871362719292",
85+
"ap-northeast-1": "763104351884",
86+
"ap-northeast-2": "763104351884",
87+
"ap-south-1": "763104351884",
88+
"ap-southeast-1": "763104351884",
89+
"ap-southeast-2": "763104351884",
90+
"ca-central-1": "763104351884",
91+
"cn-north-1": "727897471807",
92+
"cn-northwest-1": "727897471807",
93+
"eu-central-1": "763104351884",
94+
"eu-north-1": "763104351884",
95+
"eu-west-1": "763104351884",
96+
"eu-west-2": "763104351884",
97+
"eu-west-3": "763104351884",
98+
"eu-south-1": "692866216735",
99+
"me-south-1": "217643126080",
100+
"sa-east-1": "763104351884",
101+
"us-east-1": "763104351884",
102+
"us-east-2": "763104351884",
103+
"us-gov-west-1": "442386744353",
104+
"us-iso-east-1": "886529160074",
105+
"us-west-1": "763104351884",
106+
"us-west-2": "763104351884"
107+
},
108+
"repository": "huggingface-pytorch-training"
109+
},
110+
"tensorflow2.4.1": {
111+
"py_versions": ["py37"],
112+
"registries": {
113+
"af-south-1": "626614931356",
114+
"ap-east-1": "871362719292",
115+
"ap-northeast-1": "763104351884",
116+
"ap-northeast-2": "763104351884",
117+
"ap-south-1": "763104351884",
118+
"ap-southeast-1": "763104351884",
119+
"ap-southeast-2": "763104351884",
120+
"ca-central-1": "763104351884",
121+
"cn-north-1": "727897471807",
122+
"cn-northwest-1": "727897471807",
123+
"eu-central-1": "763104351884",
124+
"eu-north-1": "763104351884",
125+
"eu-south-1": "692866216735",
126+
"eu-west-1": "763104351884",
127+
"eu-west-2": "763104351884",
128+
"eu-west-3": "763104351884",
129+
"me-south-1": "217643126080",
130+
"sa-east-1": "763104351884",
131+
"us-east-1": "763104351884",
132+
"us-east-2": "763104351884",
133+
"us-gov-west-1": "442386744353",
134+
"us-iso-east-1": "886529160074",
135+
"us-west-1": "763104351884",
136+
"us-west-2": "763104351884"
137+
},
138+
"repository": "huggingface-tensorflow-training"
139+
}
73140
}
74141
}
75142
}

tests/unit/sagemaker/model/test_framework_model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def __init__(self, sagemaker_session, **kwargs):
5656
ROLE,
5757
ENTRY_POINT,
5858
sagemaker_session=sagemaker_session,
59-
**kwargs
59+
**kwargs,
6060
)
6161

6262
def create_predictor(self, endpoint_name):
@@ -71,7 +71,7 @@ def __init__(self, sagemaker_session, entry_point, **kwargs):
7171
ROLE,
7272
entry_point=entry_point,
7373
sagemaker_session=sagemaker_session,
74-
**kwargs
74+
**kwargs,
7575
)
7676

7777
def create_predictor(self, endpoint_name):

tests/unit/sagemaker/tensorflow/test_estimator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def _build_tf(
159159
py_version=None,
160160
instance_type=None,
161161
base_job_name=None,
162-
**kwargs
162+
**kwargs,
163163
):
164164
return TensorFlow(
165165
entry_point=SCRIPT_PATH,
@@ -170,7 +170,7 @@ def _build_tf(
170170
instance_count=INSTANCE_COUNT,
171171
instance_type=instance_type if instance_type else INSTANCE_TYPE,
172172
base_job_name=base_job_name,
173-
**kwargs
173+
**kwargs,
174174
)
175175

176176

tests/unit/sagemaker/tensorflow/test_estimator_init.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def _build_tf(sagemaker_session, **kwargs):
3535
role="dummy-role",
3636
instance_count=1,
3737
instance_type="ml.c4.xlarge",
38-
**kwargs
38+
**kwargs,
3939
)
4040

4141

tests/unit/sagemaker/workflow/test_steps.py

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import pytest
1717
import sagemaker
18+
import os
1819

1920
from mock import (
2021
Mock,
@@ -24,6 +25,7 @@
2425

2526
from sagemaker.debugger import ProfilerConfig
2627
from sagemaker.estimator import Estimator
28+
from sagemaker.tensorflow import TensorFlow
2729
from sagemaker.inputs import TrainingInput, TransformInput, CreateModelInput
2830
from sagemaker.model import Model
2931
from sagemaker.processing import (
@@ -45,6 +47,10 @@
4547
CreateModelStep,
4648
CacheConfig,
4749
)
50+
from tests.unit import DATA_DIR
51+
52+
SCRIPT_FILE = "dummy_script.py"
53+
SCRIPT_PATH = os.path.join(DATA_DIR, SCRIPT_FILE)
4854

4955
REGION = "us-west-2"
5056
BUCKET = "my-bucket"
@@ -112,7 +118,7 @@ def test_custom_step():
112118
assert step.to_request() == {"Name": "MyStep", "Type": "Training", "Arguments": dict()}
113119

114120

115-
def test_training_step(sagemaker_session):
121+
def test_training_step_base_estimator(sagemaker_session):
116122
instance_type_parameter = ParameterString(name="InstanceType", default_value="c4.4xlarge")
117123
instance_count_parameter = ParameterInteger(name="InstanceCount", default_value=1)
118124
data_source_uri_parameter = ParameterString(
@@ -177,6 +183,91 @@ def test_training_step(sagemaker_session):
177183
assert step.properties.TrainingJobName.expr == {"Get": "Steps.MyTrainingStep.TrainingJobName"}
178184

179185

186+
def test_training_step_tensorflow(sagemaker_session):
187+
instance_type_parameter = ParameterString(name="InstanceType", default_value="ml.p3.16xlarge")
188+
instance_count_parameter = ParameterInteger(name="InstanceCount", default_value=1)
189+
data_source_uri_parameter = ParameterString(
190+
name="DataSourceS3Uri", default_value=f"s3://{BUCKET}/train_manifest"
191+
)
192+
training_epochs_parameter = ParameterInteger(name="TrainingEpochs", default_value=5)
193+
training_batch_size_parameter = ParameterInteger(name="TrainingBatchSize", default_value=500)
194+
estimator = TensorFlow(
195+
entry_point=os.path.join(DATA_DIR, SCRIPT_FILE),
196+
role=ROLE,
197+
model_dir=False,
198+
image_uri=IMAGE_URI,
199+
source_dir="s3://mybucket/source",
200+
framework_version="2.4.1",
201+
py_version="py37",
202+
instance_count=instance_count_parameter,
203+
instance_type=instance_type_parameter,
204+
sagemaker_session=sagemaker_session,
205+
# subnets=subnets,
206+
hyperparameters={
207+
"batch-size": training_batch_size_parameter,
208+
"epochs": training_epochs_parameter,
209+
},
210+
# security_group_ids=security_group_ids,
211+
debugger_hook_config=False,
212+
# Training using SMDataParallel Distributed Training Framework
213+
distribution={"smdistributed": {"dataparallel": {"enabled": True}}},
214+
)
215+
216+
inputs = TrainingInput(s3_data=data_source_uri_parameter)
217+
cache_config = CacheConfig(enable_caching=True, expire_after="PT1H")
218+
step = TrainingStep(
219+
name="MyTrainingStep", estimator=estimator, inputs=inputs, cache_config=cache_config
220+
)
221+
step_request = step.to_request()
222+
step_request["Arguments"]["HyperParameters"].pop("sagemaker_job_name", None)
223+
step_request["Arguments"]["HyperParameters"].pop("sagemaker_program", None)
224+
step_request["Arguments"].pop("ProfilerRuleConfigurations", None)
225+
assert step_request == {
226+
"Name": "MyTrainingStep",
227+
"Type": "Training",
228+
"Arguments": {
229+
"AlgorithmSpecification": {
230+
"TrainingInputMode": "File",
231+
"TrainingImage": "fakeimage",
232+
"EnableSageMakerMetricsTimeSeries": True,
233+
},
234+
"OutputDataConfig": {"S3OutputPath": "s3://my-bucket/"},
235+
"StoppingCondition": {"MaxRuntimeInSeconds": 86400},
236+
"ResourceConfig": {
237+
"InstanceCount": instance_count_parameter,
238+
"InstanceType": instance_type_parameter,
239+
"VolumeSizeInGB": 30,
240+
},
241+
"RoleArn": "DummyRole",
242+
"InputDataConfig": [
243+
{
244+
"DataSource": {
245+
"S3DataSource": {
246+
"S3DataType": "S3Prefix",
247+
"S3Uri": data_source_uri_parameter,
248+
"S3DataDistributionType": "FullyReplicated",
249+
}
250+
},
251+
"ChannelName": "training",
252+
}
253+
],
254+
"HyperParameters": {
255+
"batch-size": training_batch_size_parameter,
256+
"epochs": training_epochs_parameter,
257+
"sagemaker_submit_directory": '"s3://mybucket/source"',
258+
"sagemaker_container_log_level": "20",
259+
"sagemaker_region": '"us-west-2"',
260+
"sagemaker_distributed_dataparallel_enabled": "true",
261+
"sagemaker_instance_type": instance_type_parameter,
262+
"sagemaker_distributed_dataparallel_custom_mpi_options": '""',
263+
},
264+
"ProfilerConfig": {"S3OutputPath": "s3://my-bucket/"},
265+
},
266+
"CacheConfig": {"Enabled": True, "ExpireAfter": "PT1H"},
267+
}
268+
assert step.properties.TrainingJobName.expr == {"Get": "Steps.MyTrainingStep.TrainingJobName"}
269+
270+
180271
def test_processing_step(sagemaker_session):
181272
processing_input_data_uri_parameter = ParameterString(
182273
name="ProcessingInputDataUri", default_value=f"s3://{BUCKET}/processing_manifest"

tests/unit/test_amazon_estimator.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def test_init_enable_network_isolation(sagemaker_session):
8686
num_components=55,
8787
sagemaker_session=sagemaker_session,
8888
enable_network_isolation=True,
89-
**COMMON_ARGS
89+
**COMMON_ARGS,
9090
)
9191
assert pca.num_components == 55
9292
assert pca.enable_network_isolation() is True
@@ -99,7 +99,7 @@ def test_init_all_pca_hyperparameters(sagemaker_session):
9999
subtract_mean=True,
100100
extra_components=33,
101101
sagemaker_session=sagemaker_session,
102-
**COMMON_ARGS
102+
**COMMON_ARGS,
103103
)
104104
assert pca.num_components == 55
105105
assert pca.algorithm_mode == "randomized"
@@ -112,7 +112,7 @@ def test_init_estimator_args(sagemaker_session):
112112
max_run=1234,
113113
sagemaker_session=sagemaker_session,
114114
data_location="s3://some-bucket/some-key/",
115-
**COMMON_ARGS
115+
**COMMON_ARGS,
116116
)
117117
assert pca.instance_type == COMMON_ARGS["instance_type"]
118118
assert pca.instance_count == COMMON_ARGS["instance_count"]
@@ -133,7 +133,7 @@ def test_data_location_does_not_call_default_bucket(sagemaker_session):
133133
num_components=2,
134134
sagemaker_session=sagemaker_session,
135135
data_location=data_location,
136-
**COMMON_ARGS
136+
**COMMON_ARGS,
137137
)
138138
assert pca.data_location == data_location
139139
assert not sagemaker_session.default_bucket.called
@@ -205,7 +205,7 @@ def test_fit_ndarray(time, sagemaker_session):
205205
num_components=55,
206206
sagemaker_session=sagemaker_session,
207207
data_location="s3://{}/key-prefix/".format(BUCKET_NAME),
208-
**kwargs
208+
**kwargs,
209209
)
210210
train = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 8.0], [44.0, 55.0, 66.0]]
211211
labels = [99, 85, 87, 2]
@@ -233,7 +233,7 @@ def test_fit_pass_experiment_config(sagemaker_session):
233233
num_components=55,
234234
sagemaker_session=sagemaker_session,
235235
data_location="s3://{}/key-prefix/".format(BUCKET_NAME),
236-
**kwargs
236+
**kwargs,
237237
)
238238
train = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 8.0], [44.0, 55.0, 66.0]]
239239
labels = [99, 85, 87, 2]

tests/unit/test_chainer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def _chainer_estimator(
8888
num_processes=None,
8989
process_slots_per_host=None,
9090
additional_mpi_options=None,
91-
**kwargs
91+
**kwargs,
9292
):
9393
return Chainer(
9494
entry_point=SCRIPT_PATH,
@@ -103,7 +103,7 @@ def _chainer_estimator(
103103
num_processes=num_processes,
104104
process_slots_per_host=process_slots_per_host,
105105
additional_mpi_options=additional_mpi_options,
106-
**kwargs
106+
**kwargs,
107107
)
108108

109109

tests/unit/test_estimator.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def create_model(
140140
vpc_config_override=vpc_utils.VPC_CONFIG_DEFAULT,
141141
enable_network_isolation=None,
142142
model_dir=None,
143-
**kwargs
143+
**kwargs,
144144
):
145145
if enable_network_isolation is None:
146146
enable_network_isolation = self.enable_network_isolation()
@@ -151,7 +151,7 @@ def create_model(
151151
entry_point=entry_point,
152152
enable_network_isolation=enable_network_isolation,
153153
role=role,
154-
**kwargs
154+
**kwargs,
155155
)
156156

157157
@classmethod
@@ -171,7 +171,7 @@ def __init__(self, sagemaker_session, entry_point=None, role=ROLE, **kwargs):
171171
role,
172172
entry_point or ENTRY_POINT,
173173
sagemaker_session=sagemaker_session,
174-
**kwargs
174+
**kwargs,
175175
)
176176

177177
def create_predictor(self, endpoint_name):

tests/unit/test_fm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def test_all_hyperparameters(sagemaker_session):
115115
factors_init_scale=1.101,
116116
factors_init_sigma=1.202,
117117
factors_init_value=1.303,
118-
**ALL_REQ_ARGS
118+
**ALL_REQ_ARGS,
119119
)
120120
assert fm.hyperparameters() == dict(
121121
num_factors=str(ALL_REQ_ARGS["num_factors"]),

tests/unit/test_image.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -751,10 +751,10 @@ def test_ecr_login_needed(check_output):
751751
token_response = "AWS:%s" % token
752752
b64_token = base64.b64encode(token_response.encode("utf-8"))
753753
response = {
754-
u"authorizationData": [
754+
"authorizationData": [
755755
{
756-
u"authorizationToken": b64_token,
757-
u"proxyEndpoint": u"https://520713654638.dkr.ecr.us-east-1.amazonaws.com",
756+
"authorizationToken": b64_token,
757+
"proxyEndpoint": "https://520713654638.dkr.ecr.us-east-1.amazonaws.com",
758758
}
759759
],
760760
"ResponseMetadata": {

0 commit comments

Comments
 (0)