Skip to content

Commit a31074e

Browse files
authored
Merge branch 'master' into keras_fn
2 parents b293a39 + 021b67d commit a31074e

File tree

10 files changed

+35
-48
lines changed

10 files changed

+35
-48
lines changed

CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22
CHANGELOG
33
=========
44

5+
1.9.1dev
6+
========
7+
8+
* bug-fix: Estimators: Fix serialization of single records
9+
510
1.9.0
611
=====
712

src/sagemaker/amazon/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def __init__(self, content_type='application/x-recordio-protobuf'):
2929

3030
def __call__(self, array):
3131
if len(array.shape) == 1:
32-
array.reshape(1, array.shape[0])
32+
array = array.reshape(1, array.shape[0])
3333
assert len(array.shape) == 2, "Expecting a 1 or 2 dimensional array"
3434
buf = io.BytesIO()
3535
write_numpy_to_dense_tensor(buf, array)

tests/data/dummy_tensor

311 KB
Binary file not shown.

tests/integ/test_byo_estimator.py

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,15 @@
1313
from __future__ import absolute_import
1414

1515
import gzip
16-
import io
1716
import json
1817
import os
1918
import pickle
2019
import sys
2120

22-
import boto3
23-
import numpy as np
2421
import pytest
2522

2623
import sagemaker
2724
from sagemaker.amazon.amazon_estimator import registry
28-
from sagemaker.amazon.common import write_numpy_to_dense_tensor
2925
from sagemaker.estimator import Estimator
3026
from sagemaker.utils import name_from_base
3127
from tests.integ import DATA_DIR
@@ -57,6 +53,7 @@ def test_byo_estimator(sagemaker_session, region):
5753
5854
"""
5955
image_name = registry(region) + "/factorization-machines:1"
56+
training_data_path = os.path.join(DATA_DIR, 'dummy_tensor')
6057

6158
with timeout(minutes=15):
6259
data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
@@ -65,19 +62,11 @@ def test_byo_estimator(sagemaker_session, region):
6562
with gzip.open(data_path, 'rb') as f:
6663
train_set, _, _ = pickle.load(f, **pickle_args)
6764

68-
# take 100 examples for faster execution
69-
vectors = np.array([t.tolist() for t in train_set[0][:100]]).astype('float32')
70-
labels = np.where(np.array([t.tolist() for t in train_set[1][:100]]) == 0, 1.0, 0.0).astype('float32')
71-
72-
buf = io.BytesIO()
73-
write_numpy_to_dense_tensor(buf, vectors, labels)
74-
buf.seek(0)
75-
76-
bucket = sagemaker_session.default_bucket()
7765
prefix = 'test_byo_estimator'
7866
key = 'recordio-pb-data'
79-
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
80-
s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)
67+
68+
s3_train_data = sagemaker_session.upload_data(path=training_data_path,
69+
key_prefix=os.path.join(prefix, 'train', key))
8170

8271
estimator = Estimator(image_name=image_name,
8372
role='SageMakerRole', train_instance_count=1,
@@ -111,6 +100,7 @@ def test_byo_estimator(sagemaker_session, region):
111100
def test_async_byo_estimator(sagemaker_session, region):
112101
image_name = registry(region) + "/factorization-machines:1"
113102
endpoint_name = name_from_base('byo')
103+
training_data_path = os.path.join(DATA_DIR, 'dummy_tensor')
114104
training_job_name = ""
115105

116106
with timeout(minutes=5):
@@ -120,19 +110,11 @@ def test_async_byo_estimator(sagemaker_session, region):
120110
with gzip.open(data_path, 'rb') as f:
121111
train_set, _, _ = pickle.load(f, **pickle_args)
122112

123-
# take 100 examples for faster execution
124-
vectors = np.array([t.tolist() for t in train_set[0][:100]]).astype('float32')
125-
labels = np.where(np.array([t.tolist() for t in train_set[1][:100]]) == 0, 1.0, 0.0).astype('float32')
126-
127-
buf = io.BytesIO()
128-
write_numpy_to_dense_tensor(buf, vectors, labels)
129-
buf.seek(0)
130-
131-
bucket = sagemaker_session.default_bucket()
132113
prefix = 'test_byo_estimator'
133114
key = 'recordio-pb-data'
134-
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
135-
s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)
115+
116+
s3_train_data = sagemaker_session.upload_data(path=training_data_path,
117+
key_prefix=os.path.join(prefix, 'train', key))
136118

137119
estimator = Estimator(image_name=image_name,
138120
role='SageMakerRole', train_instance_count=1,

tests/integ/test_chainer_train.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,15 +66,15 @@ def test_training_with_additional_hyperparameters(sagemaker_session, chainer_ful
6666
def test_attach_deploy(chainer_training_job, sagemaker_session):
6767
endpoint_name = 'test-chainer-attach-deploy-{}'.format(sagemaker_timestamp())
6868

69-
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20):
69+
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
7070
estimator = Chainer.attach(chainer_training_job, sagemaker_session=sagemaker_session)
7171
predictor = estimator.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)
7272
_predict_and_assert(predictor)
7373

7474

7575
def test_deploy_model(chainer_training_job, sagemaker_session):
7676
endpoint_name = 'test-chainer-deploy-model-{}'.format(sagemaker_timestamp())
77-
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20):
77+
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
7878
desc = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=chainer_training_job)
7979
model_data = desc['ModelArtifacts']['S3ModelArtifacts']
8080
script_path = os.path.join(DATA_DIR, 'chainer_mnist', 'mnist.py')
@@ -93,7 +93,7 @@ def test_async_fit(sagemaker_session):
9393
print("Waiting to re-attach to the training job: %s" % training_job_name)
9494
time.sleep(20)
9595

96-
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=35):
96+
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
9797
print("Re-attaching now to: %s" % training_job_name)
9898
estimator = Chainer.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session)
9999
predictor = estimator.deploy(1, "ml.c4.xlarge", endpoint_name=endpoint_name)

tests/integ/test_pytorch_train.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def fixture_training_job(sagemaker_session, pytorch_full_version):
4141
def test_sync_fit_deploy(pytorch_training_job, sagemaker_session):
4242
# TODO: add tests against local mode when it's ready to be used
4343
endpoint_name = 'test-pytorch-sync-fit-attach-deploy{}'.format(sagemaker_timestamp())
44-
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20):
44+
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
4545
estimator = PyTorch.attach(pytorch_training_job, sagemaker_session=sagemaker_session)
4646
predictor = estimator.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name)
4747
data = numpy.zeros(shape=(1, 1, 28, 28), dtype=numpy.float32)

tests/integ/test_randomcutforest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def test_randomcutforest(sagemaker_session):
3434
rcf.fit(rcf.record_set(train_input))
3535

3636
endpoint_name = name_from_base('randomcutforest')
37-
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20):
37+
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
3838
model = RandomCutForestModel(rcf.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session)
3939
predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name)
4040

tests/integ/test_tuner.py

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,18 @@
1313
from __future__ import absolute_import
1414

1515
import gzip
16-
import io
1716
import json
1817
import os
1918
import pickle
2019
import sys
2120
import time
2221

23-
import boto3
2422
import numpy as np
2523
import pytest
2624

2725
from sagemaker import KMeans, LDA, RandomCutForest
2826
from sagemaker.amazon.amazon_estimator import registry
29-
from sagemaker.amazon.common import read_records, write_numpy_to_dense_tensor
27+
from sagemaker.amazon.common import read_records
3028
from sagemaker.chainer import Chainer
3129
from sagemaker.estimator import Estimator
3230
from sagemaker.mxnet.estimator import MXNet
@@ -344,7 +342,7 @@ def test_attach_tuning_pytorch(sagemaker_session):
344342

345343
attached_tuner = HyperparameterTuner.attach(tuning_job_name, sagemaker_session=sagemaker_session)
346344
best_training_job = tuner.best_training_job()
347-
with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session, minutes=20):
345+
with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
348346
predictor = attached_tuner.deploy(1, 'ml.c4.xlarge')
349347
data = np.zeros(shape=(1, 1, 28, 28), dtype=np.float32)
350348
predictor.predict(data)
@@ -368,6 +366,7 @@ def test_tuning_byo_estimator(sagemaker_session):
368366
Default predictor is updated with json serializer and deserializer.
369367
"""
370368
image_name = registry(sagemaker_session.boto_session.region_name) + '/factorization-machines:1'
369+
training_data_path = os.path.join(DATA_DIR, 'dummy_tensor')
371370

372371
with timeout(minutes=15):
373372
data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
@@ -376,19 +375,10 @@ def test_tuning_byo_estimator(sagemaker_session):
376375
with gzip.open(data_path, 'rb') as f:
377376
train_set, _, _ = pickle.load(f, **pickle_args)
378377

379-
# take 100 examples for faster execution
380-
vectors = np.array([t.tolist() for t in train_set[0][:100]]).astype('float32')
381-
labels = np.where(np.array([t.tolist() for t in train_set[1][:100]]) == 0, 1.0, 0.0).astype('float32')
382-
383-
buf = io.BytesIO()
384-
write_numpy_to_dense_tensor(buf, vectors, labels)
385-
buf.seek(0)
386-
387-
bucket = sagemaker_session.default_bucket()
388378
prefix = 'test_byo_estimator'
389379
key = 'recordio-pb-data'
390-
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
391-
s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)
380+
s3_train_data = sagemaker_session.upload_data(path=training_data_path,
381+
key_prefix=os.path.join(prefix, 'train', key))
392382

393383
estimator = Estimator(image_name=image_name,
394384
role='SageMakerRole', train_instance_count=1,

tests/integ/timeout.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def handler(signum, frame):
6060

6161

6262
@contextmanager
63-
def timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, seconds=0, minutes=35, hours=0):
63+
def timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, seconds=0, minutes=45, hours=0):
6464
with timeout(seconds=seconds, minutes=minutes, hours=hours) as t:
6565
no_errors = False
6666
try:

tests/unit/test_common.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,16 @@ def test_serializer():
3232
assert record.features["values"].float64_tensor.values == expected
3333

3434

35+
def test_serializer_accepts_one_dimensional_array():
36+
s = numpy_to_record_serializer()
37+
array_data = [1.0, 2.0, 3.0]
38+
buf = s(np.array(array_data))
39+
record_data = next(_read_recordio(buf))
40+
record = Record()
41+
record.ParseFromString(record_data)
42+
assert record.features["values"].float64_tensor.values == array_data
43+
44+
3545
def test_deserializer():
3646
array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]
3747
s = numpy_to_record_serializer()

0 commit comments

Comments
 (0)