Skip to content

Commit 0c5e022

Browse files
authored
Merge branch 'aws:master' into master
2 parents 937fe57 + da08405 commit 0c5e022

File tree

12 files changed

+75
-27
lines changed

12 files changed

+75
-27
lines changed

CHANGELOG.md

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,24 @@
11
# Changelog
22

3+
## v2.55.0 (2021-08-25)
4+
5+
### Features
6+
7+
* Add information of Amazon-provided analysis image used by Mo…
8+
9+
### Bug Fixes and Other Changes
10+
11+
* Update Changelog to fix release
12+
* Fixing the order of populating container list
13+
* pass network isolation config to pipelineModel
14+
* Deference symbolic link when create tar file
15+
* multiprocess issue in feature_group.py
16+
* deprecate tag logic on Association
17+
18+
### Documentation Changes
19+
20+
* add dataset_definition to processing page
21+
322
## v2.54.0 (2021-08-16)
423

524
### Features
@@ -8,7 +27,7 @@
827

928
### Bug Fixes and Other Changes
1029

11-
* issue #2253 where Processing job in Local mode would call Describe
30+
* issue #2253 where Processing job in Local mode would call Describe API
1231

1332
## v2.53.0 (2021-08-12)
1433

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.54.1.dev0
1+
2.55.1.dev0

doc/api/utility/inputs.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,8 @@ Inputs
55
:members:
66
:undoc-members:
77
:show-inheritance:
8-
:noindex:
8+
9+
.. automodule:: sagemaker.dataset_definition.inputs
10+
:members:
11+
:undoc-members:
12+
:show-inheritance:

src/sagemaker/automl/automl.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ def create_model(
328328
predictor_cls=predictor_cls,
329329
name=name,
330330
vpc_config=vpc_config,
331+
enable_network_isolation=enable_network_isolation,
331332
sagemaker_session=sagemaker_session or self.sagemaker_session,
332333
)
333334
return pipeline

src/sagemaker/dataset_definition/inputs.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class RedshiftDatasetDefinition(ApiObject):
2727
2828
With this input, SQL queries will be executed using Redshift to generate datasets to S3.
2929
30-
Attributes:
30+
Parameters:
3131
cluster_id (str): The Redshift cluster Identifier.
3232
database (str): The name of the Redshift database used in Redshift query execution.
3333
db_user (str): The database user name used in Redshift query execution.
@@ -60,7 +60,7 @@ class AthenaDatasetDefinition(ApiObject):
6060
6161
With this input, SQL queries will be executed using Athena to generate datasets to S3.
6262
63-
Attributes:
63+
Parameters:
6464
catalog (str): The name of the data catalog used in Athena query execution.
6565
database (str): The name of the database used in the Athena query execution.
6666
query_string (str): The SQL query statements, to be executed.
@@ -87,7 +87,7 @@ class AthenaDatasetDefinition(ApiObject):
8787
class DatasetDefinition(ApiObject):
8888
"""DatasetDefinition input.
8989
90-
Attributes:
90+
Parameters:
9191
data_distribution_type (str): Whether the generated dataset is FullyReplicated or
9292
ShardedByS3Key (default).
9393
input_mode (str): Whether to use File or Pipe input mode. In File (default) mode, Amazon
@@ -98,9 +98,8 @@ class DatasetDefinition(ApiObject):
9898
local_path (str): The local path where you want Amazon SageMaker to download the Dataset
9999
Definition inputs to run a processing job. LocalPath is an absolute path to the input
100100
data. This is a required parameter when `AppManaged` is False (default).
101-
redshift_dataset_definition
102-
(:class:`~sagemaker.dataset_definition.inputs.RedshiftDatasetDefinition`): Redshift
103-
dataset definition.
101+
redshift_dataset_definition (:class:`~sagemaker.dataset_definition.inputs.RedshiftDatasetDefinition`):
102+
Configuration for Redshift Dataset Definition input.
104103
athena_dataset_definition (:class:`~sagemaker.dataset_definition.inputs.AthenaDatasetDefinition`):
105104
Configuration for Athena Dataset Definition input.
106105
"""
@@ -126,7 +125,7 @@ class S3Input(ApiObject):
126125
S3 list operations are not strongly consistent.
127126
Use ManifestFile if strong consistency is required.
128127
129-
Attributes:
128+
Parameters:
130129
s3_uri (str): the path to a specific S3 object or a S3 prefix
131130
local_path (str): the path to a local directory. If not provided, skips data download
132131
by SageMaker platform.

src/sagemaker/feature_store/feature_group.py

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,8 @@ def _ingest_single_batch(
207207
for row in data_frame[start_index:end_index].itertuples():
208208
record = [
209209
FeatureValue(
210-
feature_name=data_frame.columns[index - 1], value_as_string=str(row[index])
210+
feature_name=data_frame.columns[index - 1],
211+
value_as_string=str(row[index]),
211212
)
212213
for index in range(1, len(row))
213214
if pd.notna(row[index])
@@ -270,13 +271,24 @@ def _run_multi_process(self, data_frame: DataFrame, wait=True, timeout=None):
270271
timeout (Union[int, float]): ``concurrent.futures.TimeoutError`` will be raised
271272
if timeout is reached.
272273
"""
274+
# pylint: disable=I1101
273275
batch_size = math.ceil(data_frame.shape[0] / self.max_processes)
276+
# pylint: enable=I1101
274277

275278
args = []
276279
for i in range(self.max_processes):
277280
start_index = min(i * batch_size, data_frame.shape[0])
278281
end_index = min(i * batch_size + batch_size, data_frame.shape[0])
279-
args += [(data_frame[start_index:end_index], start_index, timeout)]
282+
args += [
283+
(
284+
self.max_workers,
285+
self.feature_group_name,
286+
self.sagemaker_fs_runtime_client_config,
287+
data_frame[start_index:end_index],
288+
start_index,
289+
timeout,
290+
)
291+
]
280292

281293
def init_worker():
282294
# ignore keyboard interrupts in child processes.
@@ -285,13 +297,21 @@ def init_worker():
285297
self._processing_pool = ProcessingPool(self.max_processes, init_worker)
286298
self._processing_pool.restart(force=True)
287299

288-
f = lambda x: self._run_multi_threaded(*x) # noqa: E731
300+
f = lambda x: IngestionManagerPandas._run_multi_threaded(*x) # noqa: E731
289301
self._async_result = self._processing_pool.amap(f, args)
290302

291303
if wait:
292304
self.wait(timeout=timeout)
293305

294-
def _run_multi_threaded(self, data_frame: DataFrame, row_offset=0, timeout=None) -> List[int]:
306+
@staticmethod
307+
def _run_multi_threaded(
308+
max_workers: int,
309+
feature_group_name: str,
310+
sagemaker_fs_runtime_client_config: Config,
311+
data_frame: DataFrame,
312+
row_offset=0,
313+
timeout=None,
314+
) -> List[int]:
295315
"""Start the ingestion process.
296316
297317
Args:
@@ -305,21 +325,23 @@ def _run_multi_threaded(self, data_frame: DataFrame, row_offset=0, timeout=None)
305325
Returns:
306326
List of row indices that failed to be ingested.
307327
"""
308-
executor = ThreadPoolExecutor(max_workers=self.max_workers)
309-
batch_size = math.ceil(data_frame.shape[0] / self.max_workers)
328+
executor = ThreadPoolExecutor(max_workers=max_workers)
329+
# pylint: disable=I1101
330+
batch_size = math.ceil(data_frame.shape[0] / max_workers)
331+
# pylint: enable=I1101
310332

311333
futures = {}
312-
for i in range(self.max_workers):
334+
for i in range(max_workers):
313335
start_index = min(i * batch_size, data_frame.shape[0])
314336
end_index = min(i * batch_size + batch_size, data_frame.shape[0])
315337
futures[
316338
executor.submit(
317-
self._ingest_single_batch,
318-
feature_group_name=self.feature_group_name,
339+
IngestionManagerPandas._ingest_single_batch,
340+
feature_group_name=feature_group_name,
319341
data_frame=data_frame,
320342
start_index=start_index,
321343
end_index=end_index,
322-
client_config=self.sagemaker_fs_runtime_client_config,
344+
client_config=sagemaker_fs_runtime_client_config,
323345
)
324346
] = (start_index + row_offset, end_index + row_offset)
325347

src/sagemaker/image_uri_config/model-monitor.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
"ap-east-1": "001633400207",
88
"ap-northeast-1": "574779866223",
99
"ap-northeast-2": "709848358524",
10+
"ap-northeast-3": "990339680094",
1011
"ap-south-1": "126357580389",
1112
"ap-southeast-1": "245545462676",
1213
"ap-southeast-2": "563025443158",

src/sagemaker/inputs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def __init__(self, seed):
127127
class CreateModelInput(object):
128128
"""A class containing parameters which can be used to create a SageMaker Model
129129
130-
Attributes:
130+
Parameters:
131131
instance_type (str): type or EC2 instance will be used for model deployment.
132132
accelerator_type (str): elastic inference accelerator type.
133133
"""

src/sagemaker/model.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,6 +1097,8 @@ def _upload_code(self, key_prefix, repack=False):
10971097

10981098
def _framework_env_vars(self):
10991099
"""Placeholder docstring"""
1100+
script_name = None
1101+
dir_name = None
11001102
if self.uploaded_code:
11011103
script_name = self.uploaded_code.script_name
11021104
if self.enable_network_isolation():
@@ -1105,10 +1107,8 @@ def _framework_env_vars(self):
11051107
dir_name = self.uploaded_code.s3_prefix
11061108
elif self.entry_point is not None:
11071109
script_name = self.entry_point
1108-
dir_name = "file://" + self.source_dir
1109-
else:
1110-
script_name = None
1111-
dir_name = None
1110+
if self.source_dir is not None:
1111+
dir_name = "file://" + self.source_dir
11121112

11131113
return {
11141114
SCRIPT_PARAM_NAME.upper(): script_name or str(),

src/sagemaker/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ def create_tar_file(source_files, target=None):
331331
else:
332332
_, filename = tempfile.mkstemp()
333333

334-
with tarfile.open(filename, mode="w:gz") as t:
334+
with tarfile.open(filename, mode="w:gz", dereference=True) as t:
335335
for sf in source_files:
336336
# Add all files from the directory into the root of the directory structure of the tar
337337
t.add(sf, arcname=os.path.basename(sf))

tests/scripts/run-notebook-test.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,5 @@ echo "set SAGEMAKER_ROLE_ARN=$SAGEMAKER_ROLE_ARN"
3030
./amazon-sagemaker-examples/sagemaker-python-sdk/tensorflow_moving_from_framework_mode_to_script_mode/tensorflow_moving_from_framework_mode_to_script_mode.ipynb \
3131
./amazon-sagemaker-examples/sagemaker-python-sdk/tensorflow_script_mode_pipe_mode/tensorflow_script_mode_pipe_mode.ipynb \
3232
./amazon-sagemaker-examples/sagemaker-python-sdk/tensorflow_script_mode_quickstart/tensorflow_script_mode_quickstart.ipynb \
33-
./amazon-sagemaker-examples/sagemaker-python-sdk/tensorflow_serving_using_elastic_inference_with_your_own_model/tensorflow_serving_pretrained_model_elastic_inference.ipynb
33+
./amazon-sagemaker-examples/sagemaker-python-sdk/tensorflow_serving_using_elastic_inference_with_your_own_model/tensorflow_serving_pretrained_model_elastic_inference.ipynb \
34+
./amazon-sagemaker-examples/sagemaker-pipelines/tabular/abalone_build_train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform.ipynb

tests/unit/sagemaker/image_uris/test_model_monitor.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
"ap-east-1": "001633400207",
2121
"ap-northeast-1": "574779866223",
2222
"ap-northeast-2": "709848358524",
23+
"ap-northeast-3": "990339680094",
2324
"ap-south-1": "126357580389",
2425
"ap-southeast-1": "245545462676",
2526
"ap-southeast-2": "563025443158",

0 commit comments

Comments
 (0)