13
13
from __future__ import absolute_import
14
14
15
15
import gzip
16
- import io
17
16
import json
18
17
import os
19
18
import pickle
20
19
import sys
21
20
22
- import boto3
23
- import numpy as np
24
21
import pytest
25
22
26
23
import sagemaker
@@ -57,6 +54,7 @@ def test_byo_estimator(sagemaker_session, region):
57
54
58
55
"""
59
56
image_name = registry (region ) + "/factorization-machines:1"
57
+ training_data_path = os .path .join (DATA_DIR , 'dummy_tensor' )
60
58
61
59
with timeout (minutes = 15 ):
62
60
data_path = os .path .join (DATA_DIR , 'one_p_mnist' , 'mnist.pkl.gz' )
@@ -65,19 +63,11 @@ def test_byo_estimator(sagemaker_session, region):
65
63
with gzip .open (data_path , 'rb' ) as f :
66
64
train_set , _ , _ = pickle .load (f , ** pickle_args )
67
65
68
- # take 100 examples for faster execution
69
- vectors = np .array ([t .tolist () for t in train_set [0 ][:100 ]]).astype ('float32' )
70
- labels = np .where (np .array ([t .tolist () for t in train_set [1 ][:100 ]]) == 0 , 1.0 , 0.0 ).astype ('float32' )
71
-
72
- buf = io .BytesIO ()
73
- write_numpy_to_dense_tensor (buf , vectors , labels )
74
- buf .seek (0 )
75
-
76
- bucket = sagemaker_session .default_bucket ()
77
66
prefix = 'test_byo_estimator'
78
67
key = 'recordio-pb-data'
79
- boto3 .resource ('s3' ).Bucket (bucket ).Object (os .path .join (prefix , 'train' , key )).upload_fileobj (buf )
80
- s3_train_data = 's3://{}/{}/train/{}' .format (bucket , prefix , key )
68
+
69
+ s3_train_data = sagemaker_session .upload_data (path = training_data_path ,
70
+ key_prefix = os .path .join (prefix , 'train' , key ))
81
71
82
72
estimator = Estimator (image_name = image_name ,
83
73
role = 'SageMakerRole' , train_instance_count = 1 ,
@@ -111,6 +101,7 @@ def test_byo_estimator(sagemaker_session, region):
111
101
def test_async_byo_estimator (sagemaker_session , region ):
112
102
image_name = registry (region ) + "/factorization-machines:1"
113
103
endpoint_name = name_from_base ('byo' )
104
+ training_data_path = os .path .join (DATA_DIR , 'dummy_tensor' )
114
105
training_job_name = ""
115
106
116
107
with timeout (minutes = 5 ):
@@ -120,19 +111,11 @@ def test_async_byo_estimator(sagemaker_session, region):
120
111
with gzip .open (data_path , 'rb' ) as f :
121
112
train_set , _ , _ = pickle .load (f , ** pickle_args )
122
113
123
- # take 100 examples for faster execution
124
- vectors = np .array ([t .tolist () for t in train_set [0 ][:100 ]]).astype ('float32' )
125
- labels = np .where (np .array ([t .tolist () for t in train_set [1 ][:100 ]]) == 0 , 1.0 , 0.0 ).astype ('float32' )
126
-
127
- buf = io .BytesIO ()
128
- write_numpy_to_dense_tensor (buf , vectors , labels )
129
- buf .seek (0 )
130
-
131
- bucket = sagemaker_session .default_bucket ()
132
114
prefix = 'test_byo_estimator'
133
115
key = 'recordio-pb-data'
134
- boto3 .resource ('s3' ).Bucket (bucket ).Object (os .path .join (prefix , 'train' , key )).upload_fileobj (buf )
135
- s3_train_data = 's3://{}/{}/train/{}' .format (bucket , prefix , key )
116
+
117
+ s3_train_data = sagemaker_session .upload_data (path = training_data_path ,
118
+ key_prefix = os .path .join (prefix , 'train' , key ))
136
119
137
120
estimator = Estimator (image_name = image_name ,
138
121
role = 'SageMakerRole' , train_instance_count = 1 ,
0 commit comments