Skip to content

Commit 08828ed

Browse files
author
Chuyang Deng
committed
recover test_common tests
1 parent 8b59aea commit 08828ed

File tree

3 files changed

+194
-2
lines changed

3 files changed

+194
-2
lines changed

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def read_version():
5252
"PyYAML>=5.3, <6", # PyYAML version has to match docker-compose requirements
5353
],
5454
"tensorflow": ["tensorflow>=1.3.0"],
55+
"scipy" : ["scipy>=0.19.0"],
5556
}
5657
# Meta dependency groups
5758
extras["all"] = [item for group in extras.values() for item in group]

src/sagemaker/amazon/common.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,8 @@ def write_spmatrix_to_sparse_tensor(file, array, labels=None):
175175
try:
176176
import scipy
177177
except ImportError as e:
178-
logging.warning("urllib3 failed to import. Local mode features will be impaired or broken.")
179-
# Any subsequent attempt to use urllib3 will raise the ImportError
178+
logging.warning("scipy is removed from SageMaker Python SDK v2.")
179+
# Any subsequent attempt to use scipy will raise the ImportError
180180
scipy = DeferredError(e)
181181

182182
if not scipy.sparse.issparse(array):

tests/unit/test_common.py

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
import numpy as np
1616
import tempfile
1717
import pytest
18+
import itertools
19+
from scipy.sparse import coo_matrix
1820
from sagemaker.amazon.common import (
1921
record_deserializer,
2022
write_numpy_to_dense_tensor,
@@ -150,6 +152,195 @@ def test_invalid_label():
150152
write_numpy_to_dense_tensor(f, array, label_data)
151153

152154

155+
def test_dense_float_write_spmatrix_to_sparse_tensor():
156+
array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]
157+
keys_data = [[0, 1, 2], [0, 1, 2]]
158+
array = coo_matrix(np.array(array_data))
159+
with tempfile.TemporaryFile() as f:
160+
write_spmatrix_to_sparse_tensor(f, array)
161+
f.seek(0)
162+
for record_data, expected_data, expected_keys in zip(
163+
read_recordio(f), array_data, keys_data
164+
):
165+
record = Record()
166+
record.ParseFromString(record_data)
167+
assert record.features["values"].float64_tensor.values == expected_data
168+
assert record.features["values"].float64_tensor.keys == expected_keys
169+
assert record.features["values"].float64_tensor.shape == [len(expected_data)]
170+
171+
172+
def test_dense_float32_write_spmatrix_to_sparse_tensor():
173+
array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]
174+
keys_data = [[0, 1, 2], [0, 1, 2]]
175+
array = coo_matrix(np.array(array_data).astype(np.dtype("float32")))
176+
with tempfile.TemporaryFile() as f:
177+
write_spmatrix_to_sparse_tensor(f, array)
178+
f.seek(0)
179+
for record_data, expected_data, expected_keys in zip(
180+
read_recordio(f), array_data, keys_data
181+
):
182+
record = Record()
183+
record.ParseFromString(record_data)
184+
assert record.features["values"].float32_tensor.values == expected_data
185+
assert record.features["values"].float32_tensor.keys == expected_keys
186+
assert record.features["values"].float32_tensor.shape == [len(expected_data)]
187+
188+
189+
def test_dense_int_write_spmatrix_to_sparse_tensor():
190+
array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]
191+
keys_data = [[0, 1, 2], [0, 1, 2]]
192+
array = coo_matrix(np.array(array_data).astype(np.dtype("int")))
193+
with tempfile.TemporaryFile() as f:
194+
write_spmatrix_to_sparse_tensor(f, array)
195+
f.seek(0)
196+
for record_data, expected_data, expected_keys in zip(
197+
read_recordio(f), array_data, keys_data
198+
):
199+
record = Record()
200+
record.ParseFromString(record_data)
201+
assert record.features["values"].int32_tensor.values == expected_data
202+
assert record.features["values"].int32_tensor.keys == expected_keys
203+
assert record.features["values"].int32_tensor.shape == [len(expected_data)]
204+
205+
206+
def test_dense_int_spmatrix_to_sparse_label():
207+
array_data = [[1, 2, 3], [10, 20, 3]]
208+
keys_data = [[0, 1, 2], [0, 1, 2]]
209+
array = coo_matrix(np.array(array_data))
210+
label_data = np.array([99, 98, 97])
211+
with tempfile.TemporaryFile() as f:
212+
write_spmatrix_to_sparse_tensor(f, array, label_data)
213+
f.seek(0)
214+
for record_data, expected_data, expected_keys, label in zip(
215+
read_recordio(f), array_data, keys_data, label_data
216+
):
217+
record = Record()
218+
record.ParseFromString(record_data)
219+
assert record.features["values"].int32_tensor.values == expected_data
220+
assert record.features["values"].int32_tensor.keys == expected_keys
221+
assert record.label["values"].int32_tensor.values == [label]
222+
assert record.features["values"].int32_tensor.shape == [len(expected_data)]
223+
224+
225+
def test_dense_float32_spmatrix_to_sparse_label():
226+
array_data = [[1, 2, 3], [10, 20, 3]]
227+
keys_data = [[0, 1, 2], [0, 1, 2]]
228+
array = coo_matrix(np.array(array_data).astype("float32"))
229+
label_data = np.array([99, 98, 97])
230+
with tempfile.TemporaryFile() as f:
231+
write_spmatrix_to_sparse_tensor(f, array, label_data)
232+
f.seek(0)
233+
for record_data, expected_data, expected_keys, label in zip(
234+
read_recordio(f), array_data, keys_data, label_data
235+
):
236+
record = Record()
237+
record.ParseFromString(record_data)
238+
assert record.features["values"].float32_tensor.values == expected_data
239+
assert record.features["values"].float32_tensor.keys == expected_keys
240+
assert record.label["values"].int32_tensor.values == [label]
241+
assert record.features["values"].float32_tensor.shape == [len(expected_data)]
242+
243+
244+
def test_dense_float64_spmatrix_to_sparse_label():
245+
array_data = [[1, 2, 3], [10, 20, 3]]
246+
keys_data = [[0, 1, 2], [0, 1, 2]]
247+
array = coo_matrix(np.array(array_data).astype("float64"))
248+
label_data = np.array([99, 98, 97])
249+
with tempfile.TemporaryFile() as f:
250+
write_spmatrix_to_sparse_tensor(f, array, label_data)
251+
f.seek(0)
252+
for record_data, expected_data, expected_keys, label in zip(
253+
read_recordio(f), array_data, keys_data, label_data
254+
):
255+
record = Record()
256+
record.ParseFromString(record_data)
257+
assert record.features["values"].float64_tensor.values == expected_data
258+
assert record.features["values"].float64_tensor.keys == expected_keys
259+
assert record.label["values"].int32_tensor.values == [label]
260+
assert record.features["values"].float64_tensor.shape == [len(expected_data)]
261+
262+
263+
def test_invalid_sparse_label():
264+
array_data = [[1, 2, 3], [10, 20, 3]]
265+
array = coo_matrix(np.array(array_data))
266+
label_data = np.array([99, 98, 97, 1000]).astype(np.dtype("float64"))
267+
with tempfile.TemporaryFile() as f:
268+
with pytest.raises(ValueError):
269+
write_spmatrix_to_sparse_tensor(f, array, label_data)
270+
271+
272+
def test_sparse_float_write_spmatrix_to_sparse_tensor():
273+
n = 4
274+
array_data = [[1.0, 2.0], [10.0, 30.0], [100.0, 200.0, 300.0, 400.0], [1000.0, 2000.0, 3000.0]]
275+
keys_data = [[0, 1], [1, 2], [0, 1, 2, 3], [0, 2, 3]]
276+
277+
flatten_data = list(itertools.chain.from_iterable(array_data))
278+
y_indices = list(itertools.chain.from_iterable(keys_data))
279+
x_indices = [[i] * len(keys_data[i]) for i in range(len(keys_data))]
280+
x_indices = list(itertools.chain.from_iterable(x_indices))
281+
282+
array = coo_matrix((flatten_data, (x_indices, y_indices)), dtype="float64")
283+
with tempfile.TemporaryFile() as f:
284+
write_spmatrix_to_sparse_tensor(f, array)
285+
f.seek(0)
286+
for record_data, expected_data, expected_keys in zip(
287+
read_recordio(f), array_data, keys_data
288+
):
289+
record = Record()
290+
record.ParseFromString(record_data)
291+
assert record.features["values"].float64_tensor.values == expected_data
292+
assert record.features["values"].float64_tensor.keys == expected_keys
293+
assert record.features["values"].float64_tensor.shape == [n]
294+
295+
296+
def test_sparse_float32_write_spmatrix_to_sparse_tensor():
297+
n = 4
298+
array_data = [[1.0, 2.0], [10.0, 30.0], [100.0, 200.0, 300.0, 400.0], [1000.0, 2000.0, 3000.0]]
299+
keys_data = [[0, 1], [1, 2], [0, 1, 2, 3], [0, 2, 3]]
300+
301+
flatten_data = list(itertools.chain.from_iterable(array_data))
302+
y_indices = list(itertools.chain.from_iterable(keys_data))
303+
x_indices = [[i] * len(keys_data[i]) for i in range(len(keys_data))]
304+
x_indices = list(itertools.chain.from_iterable(x_indices))
305+
306+
array = coo_matrix((flatten_data, (x_indices, y_indices)), dtype="float32")
307+
with tempfile.TemporaryFile() as f:
308+
write_spmatrix_to_sparse_tensor(f, array)
309+
f.seek(0)
310+
for record_data, expected_data, expected_keys in zip(
311+
read_recordio(f), array_data, keys_data
312+
):
313+
record = Record()
314+
record.ParseFromString(record_data)
315+
assert record.features["values"].float32_tensor.values == expected_data
316+
assert record.features["values"].float32_tensor.keys == expected_keys
317+
assert record.features["values"].float32_tensor.shape == [n]
318+
319+
320+
def test_sparse_int_write_spmatrix_to_sparse_tensor():
321+
n = 4
322+
array_data = [[1.0, 2.0], [10.0, 30.0], [100.0, 200.0, 300.0, 400.0], [1000.0, 2000.0, 3000.0]]
323+
keys_data = [[0, 1], [1, 2], [0, 1, 2, 3], [0, 2, 3]]
324+
325+
flatten_data = list(itertools.chain.from_iterable(array_data))
326+
y_indices = list(itertools.chain.from_iterable(keys_data))
327+
x_indices = [[i] * len(keys_data[i]) for i in range(len(keys_data))]
328+
x_indices = list(itertools.chain.from_iterable(x_indices))
329+
330+
array = coo_matrix((flatten_data, (x_indices, y_indices)), dtype="int")
331+
with tempfile.TemporaryFile() as f:
332+
write_spmatrix_to_sparse_tensor(f, array)
333+
f.seek(0)
334+
for record_data, expected_data, expected_keys in zip(
335+
read_recordio(f), array_data, keys_data
336+
):
337+
record = Record()
338+
record.ParseFromString(record_data)
339+
assert record.features["values"].int32_tensor.values == expected_data
340+
assert record.features["values"].int32_tensor.keys == expected_keys
341+
assert record.features["values"].int32_tensor.shape == [n]
342+
343+
153344
def test_dense_to_sparse():
154345
array_data = [[1, 2, 3], [10, 20, 3]]
155346
array = np.array(array_data)

0 commit comments

Comments
 (0)