Skip to content

Commit 20dfac8

Browse files
authored
Merge branch 'master' into tf242ioc
2 parents a70bd89 + 80c3c13 commit 20dfac8

26 files changed

+672
-134
lines changed

.githooks/pre-push

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,5 @@ start_time=`date +%s`
1212
tox -e sphinx,doc8 --parallel all
1313
./ci-scripts/displaytime.sh 'sphinx,doc8' $start_time
1414
start_time=`date +%s`
15-
tox -e py36,py37,py38 --parallel all -- tests/unit
16-
./ci-scripts/displaytime.sh 'py36,py37,py38 unit' $start_time
15+
tox -e py36,py37,py38,py39 --parallel all -- tests/unit
16+
./ci-scripts/displaytime.sh 'py36,py37,py38,py39 unit' $start_time

CHANGELOG.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,33 @@
11
# Changelog
22

3+
## v2.82.2 (2022-04-01)
4+
5+
### Bug Fixes and Other Changes
6+
7+
* Revert "fix: Fix Pipeline variables related customer issues (#2959)"
8+
* Refactor repack_model script injection, fixes tar.gz error
9+
10+
## v2.82.1 (2022-03-31)
11+
12+
### Bug Fixes and Other Changes
13+
14+
* Update Inferentia Image URI Config
15+
* Fix Pipeline variables related customer issues
16+
* more logging info for static pipeline test data setup
17+
18+
## v2.82.0 (2022-03-30)
19+
20+
### Features
21+
22+
* pluggable instance fallback mechanism, add CapacityError
23+
* support passing Env Vars to local mode training
24+
25+
## v2.81.1 (2022-03-29)
26+
27+
### Bug Fixes and Other Changes
28+
29+
* Update black-check version, add support for Spark 3.1 Processing
30+
331
## v2.81.0 (2022-03-26)
432

533
### Features

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.81.1.dev0
1+
2.82.3.dev0

src/sagemaker/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ def __init__(self, message, allowed_statuses, actual_status):
2323
super(UnexpectedStatusException, self).__init__(message)
2424

2525

26+
class CapacityError(UnexpectedStatusException):
27+
"""Raised when resource status is not expected and fails with a reason of CapacityError"""
28+
29+
2630
class AsyncInferenceError(Exception):
2731
"""The base exception class for Async Inference exceptions."""
2832

src/sagemaker/image_uri_config/huggingface.json

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,8 @@
561561
},
562562
"4.17.0": {
563563
"version_aliases": {
564-
"pytorch1.10": "pytorch1.10.2"
564+
"pytorch1.10": "pytorch1.10.2",
565+
"tensorflow2.6": "tensorflow2.6.3"
565566
},
566567
"pytorch1.10.2": {
567568
"py_versions": ["py38"],
@@ -594,6 +595,38 @@
594595
},
595596
"repository": "huggingface-pytorch-training",
596597
"container_version": {"gpu": "cu113-ubuntu20.04"}
598+
},
599+
"tensorflow2.6.3": {
600+
"py_versions": ["py38"],
601+
"registries": {
602+
"af-south-1": "626614931356",
603+
"ap-east-1": "871362719292",
604+
"ap-northeast-1": "763104351884",
605+
"ap-northeast-2": "763104351884",
606+
"ap-northeast-3": "364406365360",
607+
"ap-south-1": "763104351884",
608+
"ap-southeast-1": "763104351884",
609+
"ap-southeast-2": "763104351884",
610+
"ca-central-1": "763104351884",
611+
"cn-north-1": "727897471807",
612+
"cn-northwest-1": "727897471807",
613+
"eu-central-1": "763104351884",
614+
"eu-north-1": "763104351884",
615+
"eu-south-1": "692866216735",
616+
"eu-west-1": "763104351884",
617+
"eu-west-2": "763104351884",
618+
"eu-west-3": "763104351884",
619+
"me-south-1": "217643126080",
620+
"sa-east-1": "763104351884",
621+
"us-east-1": "763104351884",
622+
"us-east-2": "763104351884",
623+
"us-gov-west-1": "442386744353",
624+
"us-iso-east-1": "886529160074",
625+
"us-west-1": "763104351884",
626+
"us-west-2": "763104351884"
627+
},
628+
"repository": "huggingface-tensorflow-training",
629+
"container_version": {"gpu": "cu112-ubuntu20.04"}
597630
}
598631
}
599632
}
@@ -989,7 +1022,8 @@
9891022
},
9901023
"4.17.0": {
9911024
"version_aliases": {
992-
"pytorch1.10": "pytorch1.10.2"
1025+
"pytorch1.10": "pytorch1.10.2",
1026+
"tensorflow2.6": "tensorflow2.6.3"
9931027
},
9941028
"pytorch1.10.2": {
9951029
"py_versions": ["py38"],
@@ -1022,6 +1056,38 @@
10221056
},
10231057
"repository": "huggingface-pytorch-inference",
10241058
"container_version": {"gpu": "cu113-ubuntu20.04", "cpu": "ubuntu20.04" }
1059+
},
1060+
"tensorflow2.6.3": {
1061+
"py_versions": ["py38"],
1062+
"registries": {
1063+
"af-south-1": "626614931356",
1064+
"ap-east-1": "871362719292",
1065+
"ap-northeast-1": "763104351884",
1066+
"ap-northeast-2": "763104351884",
1067+
"ap-northeast-3": "364406365360",
1068+
"ap-south-1": "763104351884",
1069+
"ap-southeast-1": "763104351884",
1070+
"ap-southeast-2": "763104351884",
1071+
"ca-central-1": "763104351884",
1072+
"cn-north-1": "727897471807",
1073+
"cn-northwest-1": "727897471807",
1074+
"eu-central-1": "763104351884",
1075+
"eu-north-1": "763104351884",
1076+
"eu-south-1": "692866216735",
1077+
"eu-west-1": "763104351884",
1078+
"eu-west-2": "763104351884",
1079+
"eu-west-3": "763104351884",
1080+
"me-south-1": "217643126080",
1081+
"sa-east-1": "763104351884",
1082+
"us-east-1": "763104351884",
1083+
"us-east-2": "763104351884",
1084+
"us-gov-west-1": "442386744353",
1085+
"us-iso-east-1": "886529160074",
1086+
"us-west-1": "763104351884",
1087+
"us-west-2": "763104351884"
1088+
},
1089+
"repository": "huggingface-tensorflow-inference",
1090+
"container_version": {"gpu": "cu112-ubuntu20.04", "cpu": "ubuntu20.04" }
10251091
}
10261092
}
10271093
}

src/sagemaker/image_uri_config/inferentia-mxnet.json

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,36 @@
3131
"us-west-2": "301217895009"
3232
},
3333
"repository": "sagemaker-neo-mxnet"
34+
},
35+
"1.8": {
36+
"py_versions": ["py3"],
37+
"registries": {
38+
"af-south-1": "774647643957",
39+
"ap-east-1": "110948597952",
40+
"ap-northeast-1": "941853720454",
41+
"ap-northeast-2": "151534178276",
42+
"ap-northeast-3": "925152966179",
43+
"ap-south-1": "763008648453",
44+
"ap-southeast-1": "324986816169",
45+
"ap-southeast-2": "355873309152",
46+
"ca-central-1": "464438896020",
47+
"cn-north-1": "472730292857",
48+
"cn-northwest-1": "474822919863",
49+
"eu-central-1": "746233611703",
50+
"eu-north-1": "601324751636",
51+
"eu-south-1": "966458181534",
52+
"eu-west-1": "802834080501",
53+
"eu-west-2": "205493899709",
54+
"eu-west-3": "254080097072",
55+
"me-south-1": "836785723513",
56+
"sa-east-1": "756306329178",
57+
"us-east-1": "785573368785",
58+
"us-east-2": "007439368137",
59+
"us-gov-west-1": "263933020539",
60+
"us-west-1": "710691900526",
61+
"us-west-2": "301217895009"
62+
},
63+
"repository": "sagemaker-neo-mxnet"
3464
}
3565
}
3666
}

src/sagemaker/image_uri_config/inferentia-pytorch.json

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,67 @@
22
"processors": ["inf"],
33
"scope": ["inference"],
44
"versions": {
5-
"1.5.1": {
5+
"1.7": {
6+
"py_versions": ["py3"],
7+
"registries": {
8+
"af-south-1": "774647643957",
9+
"ap-east-1": "110948597952",
10+
"ap-northeast-1": "941853720454",
11+
"ap-northeast-2": "151534178276",
12+
"ap-northeast-3": "925152966179",
13+
"ap-south-1": "763008648453",
14+
"ap-southeast-1": "324986816169",
15+
"ap-southeast-2": "355873309152",
16+
"ca-central-1": "464438896020",
17+
"cn-north-1": "472730292857",
18+
"cn-northwest-1": "474822919863",
19+
"eu-central-1": "746233611703",
20+
"eu-north-1": "601324751636",
21+
"eu-south-1": "966458181534",
22+
"eu-west-1": "802834080501",
23+
"eu-west-2": "205493899709",
24+
"eu-west-3": "254080097072",
25+
"me-south-1": "836785723513",
26+
"sa-east-1": "756306329178",
27+
"us-east-1": "785573368785",
28+
"us-east-2": "007439368137",
29+
"us-gov-west-1": "263933020539",
30+
"us-west-1": "710691900526",
31+
"us-west-2": "301217895009"
32+
},
33+
"repository": "sagemaker-neo-pytorch"
34+
},
35+
"1.8": {
36+
"py_versions": ["py3"],
37+
"registries": {
38+
"af-south-1": "774647643957",
39+
"ap-east-1": "110948597952",
40+
"ap-northeast-1": "941853720454",
41+
"ap-northeast-2": "151534178276",
42+
"ap-northeast-3": "925152966179",
43+
"ap-south-1": "763008648453",
44+
"ap-southeast-1": "324986816169",
45+
"ap-southeast-2": "355873309152",
46+
"ca-central-1": "464438896020",
47+
"cn-north-1": "472730292857",
48+
"cn-northwest-1": "474822919863",
49+
"eu-central-1": "746233611703",
50+
"eu-north-1": "601324751636",
51+
"eu-south-1": "966458181534",
52+
"eu-west-1": "802834080501",
53+
"eu-west-2": "205493899709",
54+
"eu-west-3": "254080097072",
55+
"me-south-1": "836785723513",
56+
"sa-east-1": "756306329178",
57+
"us-east-1": "785573368785",
58+
"us-east-2": "007439368137",
59+
"us-gov-west-1": "263933020539",
60+
"us-west-1": "710691900526",
61+
"us-west-2": "301217895009"
62+
},
63+
"repository": "sagemaker-neo-pytorch"
64+
},
65+
"1.9": {
666
"py_versions": ["py3"],
767
"registries": {
868
"af-south-1": "774647643957",

src/sagemaker/image_uri_config/inferentia-tensorflow.json

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,36 @@
3131
"us-west-2": "301217895009"
3232
},
3333
"repository": "sagemaker-neo-tensorflow"
34+
},
35+
"2.5.2": {
36+
"py_versions": ["py3"],
37+
"registries": {
38+
"af-south-1": "774647643957",
39+
"ap-east-1": "110948597952",
40+
"ap-northeast-1": "941853720454",
41+
"ap-northeast-2": "151534178276",
42+
"ap-northeast-3": "925152966179",
43+
"ap-south-1": "763008648453",
44+
"ap-southeast-1": "324986816169",
45+
"ap-southeast-2": "355873309152",
46+
"ca-central-1": "464438896020",
47+
"cn-north-1": "472730292857",
48+
"cn-northwest-1": "474822919863",
49+
"eu-central-1": "746233611703",
50+
"eu-north-1": "601324751636",
51+
"eu-south-1": "966458181534",
52+
"eu-west-1": "802834080501",
53+
"eu-west-2": "205493899709",
54+
"eu-west-3": "254080097072",
55+
"me-south-1": "836785723513",
56+
"sa-east-1": "756306329178",
57+
"us-east-1": "785573368785",
58+
"us-east-2": "007439368137",
59+
"us-gov-west-1": "263933020539",
60+
"us-west-1": "710691900526",
61+
"us-west-2": "301217895009"
62+
},
63+
"repository": "sagemaker-neo-tensorflow"
3464
}
3565
}
3666
}

src/sagemaker/image_uri_config/spark.json

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,35 @@
5959
"us-gov-west-1": "271483468897"
6060
},
6161
"repository": "sagemaker-spark-processing"
62+
},
63+
"3.1": {
64+
"py_versions": ["py37"],
65+
"registries": {
66+
"me-south-1": "750251592176",
67+
"ap-south-1": "105495057255",
68+
"eu-north-1": "330188676905",
69+
"eu-west-3": "136845547031",
70+
"us-east-2": "314815235551",
71+
"eu-west-1": "571004829621",
72+
"eu-central-1": "906073651304",
73+
"sa-east-1": "737130764395",
74+
"ap-east-1": "732049463269",
75+
"us-east-1": "173754725891",
76+
"ap-northeast-2": "860869212795",
77+
"eu-west-2": "836651553127",
78+
"ap-northeast-1": "411782140378",
79+
"us-west-2": "153931337802",
80+
"us-west-1": "667973535471",
81+
"ap-southeast-1": "759080221371",
82+
"ap-southeast-2": "440695851116",
83+
"ca-central-1": "446299261295",
84+
"cn-north-1": "671472414489",
85+
"cn-northwest-1": "844356804704",
86+
"eu-south-1": "753923664805",
87+
"af-south-1": "309385258863",
88+
"us-gov-west-1": "271483468897"
89+
},
90+
"repository": "sagemaker-spark-processing"
6291
}
6392
}
6493
}

src/sagemaker/local/entities.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,22 +175,37 @@ def describe(self):
175175

176176

177177
class _LocalTrainingJob(object):
178-
"""Placeholder docstring"""
178+
"""Defines and starts a local training job."""
179179

180180
_STARTING = "Starting"
181181
_TRAINING = "Training"
182182
_COMPLETED = "Completed"
183183
_states = ["Starting", "Training", "Completed"]
184184

185185
def __init__(self, container):
186+
"""Creates a local training job.
187+
188+
Args:
189+
container: the local container object.
190+
"""
186191
self.container = container
187192
self.model_artifacts = None
188193
self.state = "created"
189194
self.start_time = None
190195
self.end_time = None
196+
self.environment = None
197+
198+
def start(self, input_data_config, output_data_config, hyperparameters, environment, job_name):
199+
"""Starts a local training job.
191200
192-
def start(self, input_data_config, output_data_config, hyperparameters, job_name):
193-
"""Placeholder docstring."""
201+
Args:
202+
input_data_config (dict): The Input Data Configuration, this contains data such as the
203+
channels to be used for training.
204+
output_data_config (dict): The configuration of the output data.
205+
hyperparameters (dict): The HyperParameters for the training job.
206+
environment (dict): The collection of environment variables passed to the job.
207+
job_name (str): Name of the local training job being run.
208+
"""
194209
for channel in input_data_config:
195210
if channel["DataSource"] and "S3DataSource" in channel["DataSource"]:
196211
data_distribution = channel["DataSource"]["S3DataSource"]["S3DataDistributionType"]
@@ -216,9 +231,10 @@ def start(self, input_data_config, output_data_config, hyperparameters, job_name
216231

217232
self.start_time = datetime.datetime.now()
218233
self.state = self._TRAINING
234+
self.environment = environment
219235

220236
self.model_artifacts = self.container.train(
221-
input_data_config, output_data_config, hyperparameters, job_name
237+
input_data_config, output_data_config, hyperparameters, environment, job_name
222238
)
223239
self.end_time = datetime.datetime.now()
224240
self.state = self._COMPLETED

0 commit comments

Comments
 (0)