-
Notifications
You must be signed in to change notification settings - Fork 1.2k
fix: remove scipy from dependency #1518
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,3 @@ | ||
sphinx==2.2.2 | ||
numpy | ||
scipy | ||
requests==2.20 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,13 +14,14 @@ | |
from __future__ import absolute_import | ||
|
||
import io | ||
import logging | ||
import struct | ||
import sys | ||
|
||
import numpy as np | ||
from scipy.sparse import issparse | ||
|
||
from sagemaker.amazon.record_pb2 import Record | ||
from sagemaker.utils import DeferredError | ||
|
||
|
||
class numpy_to_record_serializer(object): | ||
|
@@ -171,8 +172,14 @@ def write_spmatrix_to_sparse_tensor(file, array, labels=None): | |
array: | ||
labels: | ||
""" | ||
|
||
if not issparse(array): | ||
try: | ||
import scipy | ||
except ImportError as e: | ||
logging.warning("urllib3 failed to import. Local mode features will be impaired or broken.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. update the warning message :) |
||
# Any subsequent attempt to use urllib3 will raise the ImportError | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. update the comment :) |
||
scipy = DeferredError(e) | ||
|
||
if not scipy.sparse.issparse(array): | ||
raise TypeError("Array must be sparse") | ||
|
||
# Validate shape of array and labels, resolve array and label types | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,8 +15,6 @@ | |
import numpy as np | ||
import tempfile | ||
import pytest | ||
import itertools | ||
from scipy.sparse import coo_matrix | ||
from sagemaker.amazon.common import ( | ||
record_deserializer, | ||
write_numpy_to_dense_tensor, | ||
|
@@ -152,195 +150,6 @@ def test_invalid_label(): | |
write_numpy_to_dense_tensor(f, array, label_data) | ||
|
||
|
||
def test_dense_float_write_spmatrix_to_sparse_tensor(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should still keep the tests |
||
array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]] | ||
keys_data = [[0, 1, 2], [0, 1, 2]] | ||
array = coo_matrix(np.array(array_data)) | ||
with tempfile.TemporaryFile() as f: | ||
write_spmatrix_to_sparse_tensor(f, array) | ||
f.seek(0) | ||
for record_data, expected_data, expected_keys in zip( | ||
read_recordio(f), array_data, keys_data | ||
): | ||
record = Record() | ||
record.ParseFromString(record_data) | ||
assert record.features["values"].float64_tensor.values == expected_data | ||
assert record.features["values"].float64_tensor.keys == expected_keys | ||
assert record.features["values"].float64_tensor.shape == [len(expected_data)] | ||
|
||
|
||
def test_dense_float32_write_spmatrix_to_sparse_tensor(): | ||
array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]] | ||
keys_data = [[0, 1, 2], [0, 1, 2]] | ||
array = coo_matrix(np.array(array_data).astype(np.dtype("float32"))) | ||
with tempfile.TemporaryFile() as f: | ||
write_spmatrix_to_sparse_tensor(f, array) | ||
f.seek(0) | ||
for record_data, expected_data, expected_keys in zip( | ||
read_recordio(f), array_data, keys_data | ||
): | ||
record = Record() | ||
record.ParseFromString(record_data) | ||
assert record.features["values"].float32_tensor.values == expected_data | ||
assert record.features["values"].float32_tensor.keys == expected_keys | ||
assert record.features["values"].float32_tensor.shape == [len(expected_data)] | ||
|
||
|
||
def test_dense_int_write_spmatrix_to_sparse_tensor(): | ||
array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]] | ||
keys_data = [[0, 1, 2], [0, 1, 2]] | ||
array = coo_matrix(np.array(array_data).astype(np.dtype("int"))) | ||
with tempfile.TemporaryFile() as f: | ||
write_spmatrix_to_sparse_tensor(f, array) | ||
f.seek(0) | ||
for record_data, expected_data, expected_keys in zip( | ||
read_recordio(f), array_data, keys_data | ||
): | ||
record = Record() | ||
record.ParseFromString(record_data) | ||
assert record.features["values"].int32_tensor.values == expected_data | ||
assert record.features["values"].int32_tensor.keys == expected_keys | ||
assert record.features["values"].int32_tensor.shape == [len(expected_data)] | ||
|
||
|
||
def test_dense_int_spmatrix_to_sparse_label(): | ||
array_data = [[1, 2, 3], [10, 20, 3]] | ||
keys_data = [[0, 1, 2], [0, 1, 2]] | ||
array = coo_matrix(np.array(array_data)) | ||
label_data = np.array([99, 98, 97]) | ||
with tempfile.TemporaryFile() as f: | ||
write_spmatrix_to_sparse_tensor(f, array, label_data) | ||
f.seek(0) | ||
for record_data, expected_data, expected_keys, label in zip( | ||
read_recordio(f), array_data, keys_data, label_data | ||
): | ||
record = Record() | ||
record.ParseFromString(record_data) | ||
assert record.features["values"].int32_tensor.values == expected_data | ||
assert record.features["values"].int32_tensor.keys == expected_keys | ||
assert record.label["values"].int32_tensor.values == [label] | ||
assert record.features["values"].int32_tensor.shape == [len(expected_data)] | ||
|
||
|
||
def test_dense_float32_spmatrix_to_sparse_label(): | ||
array_data = [[1, 2, 3], [10, 20, 3]] | ||
keys_data = [[0, 1, 2], [0, 1, 2]] | ||
array = coo_matrix(np.array(array_data).astype("float32")) | ||
label_data = np.array([99, 98, 97]) | ||
with tempfile.TemporaryFile() as f: | ||
write_spmatrix_to_sparse_tensor(f, array, label_data) | ||
f.seek(0) | ||
for record_data, expected_data, expected_keys, label in zip( | ||
read_recordio(f), array_data, keys_data, label_data | ||
): | ||
record = Record() | ||
record.ParseFromString(record_data) | ||
assert record.features["values"].float32_tensor.values == expected_data | ||
assert record.features["values"].float32_tensor.keys == expected_keys | ||
assert record.label["values"].int32_tensor.values == [label] | ||
assert record.features["values"].float32_tensor.shape == [len(expected_data)] | ||
|
||
|
||
def test_dense_float64_spmatrix_to_sparse_label(): | ||
array_data = [[1, 2, 3], [10, 20, 3]] | ||
keys_data = [[0, 1, 2], [0, 1, 2]] | ||
array = coo_matrix(np.array(array_data).astype("float64")) | ||
label_data = np.array([99, 98, 97]) | ||
with tempfile.TemporaryFile() as f: | ||
write_spmatrix_to_sparse_tensor(f, array, label_data) | ||
f.seek(0) | ||
for record_data, expected_data, expected_keys, label in zip( | ||
read_recordio(f), array_data, keys_data, label_data | ||
): | ||
record = Record() | ||
record.ParseFromString(record_data) | ||
assert record.features["values"].float64_tensor.values == expected_data | ||
assert record.features["values"].float64_tensor.keys == expected_keys | ||
assert record.label["values"].int32_tensor.values == [label] | ||
assert record.features["values"].float64_tensor.shape == [len(expected_data)] | ||
|
||
|
||
def test_invalid_sparse_label(): | ||
array_data = [[1, 2, 3], [10, 20, 3]] | ||
array = coo_matrix(np.array(array_data)) | ||
label_data = np.array([99, 98, 97, 1000]).astype(np.dtype("float64")) | ||
with tempfile.TemporaryFile() as f: | ||
with pytest.raises(ValueError): | ||
write_spmatrix_to_sparse_tensor(f, array, label_data) | ||
|
||
|
||
def test_sparse_float_write_spmatrix_to_sparse_tensor(): | ||
n = 4 | ||
array_data = [[1.0, 2.0], [10.0, 30.0], [100.0, 200.0, 300.0, 400.0], [1000.0, 2000.0, 3000.0]] | ||
keys_data = [[0, 1], [1, 2], [0, 1, 2, 3], [0, 2, 3]] | ||
|
||
flatten_data = list(itertools.chain.from_iterable(array_data)) | ||
y_indices = list(itertools.chain.from_iterable(keys_data)) | ||
x_indices = [[i] * len(keys_data[i]) for i in range(len(keys_data))] | ||
x_indices = list(itertools.chain.from_iterable(x_indices)) | ||
|
||
array = coo_matrix((flatten_data, (x_indices, y_indices)), dtype="float64") | ||
with tempfile.TemporaryFile() as f: | ||
write_spmatrix_to_sparse_tensor(f, array) | ||
f.seek(0) | ||
for record_data, expected_data, expected_keys in zip( | ||
read_recordio(f), array_data, keys_data | ||
): | ||
record = Record() | ||
record.ParseFromString(record_data) | ||
assert record.features["values"].float64_tensor.values == expected_data | ||
assert record.features["values"].float64_tensor.keys == expected_keys | ||
assert record.features["values"].float64_tensor.shape == [n] | ||
|
||
|
||
def test_sparse_float32_write_spmatrix_to_sparse_tensor(): | ||
n = 4 | ||
array_data = [[1.0, 2.0], [10.0, 30.0], [100.0, 200.0, 300.0, 400.0], [1000.0, 2000.0, 3000.0]] | ||
keys_data = [[0, 1], [1, 2], [0, 1, 2, 3], [0, 2, 3]] | ||
|
||
flatten_data = list(itertools.chain.from_iterable(array_data)) | ||
y_indices = list(itertools.chain.from_iterable(keys_data)) | ||
x_indices = [[i] * len(keys_data[i]) for i in range(len(keys_data))] | ||
x_indices = list(itertools.chain.from_iterable(x_indices)) | ||
|
||
array = coo_matrix((flatten_data, (x_indices, y_indices)), dtype="float32") | ||
with tempfile.TemporaryFile() as f: | ||
write_spmatrix_to_sparse_tensor(f, array) | ||
f.seek(0) | ||
for record_data, expected_data, expected_keys in zip( | ||
read_recordio(f), array_data, keys_data | ||
): | ||
record = Record() | ||
record.ParseFromString(record_data) | ||
assert record.features["values"].float32_tensor.values == expected_data | ||
assert record.features["values"].float32_tensor.keys == expected_keys | ||
assert record.features["values"].float32_tensor.shape == [n] | ||
|
||
|
||
def test_sparse_int_write_spmatrix_to_sparse_tensor(): | ||
n = 4 | ||
array_data = [[1.0, 2.0], [10.0, 30.0], [100.0, 200.0, 300.0, 400.0], [1000.0, 2000.0, 3000.0]] | ||
keys_data = [[0, 1], [1, 2], [0, 1, 2, 3], [0, 2, 3]] | ||
|
||
flatten_data = list(itertools.chain.from_iterable(array_data)) | ||
y_indices = list(itertools.chain.from_iterable(keys_data)) | ||
x_indices = [[i] * len(keys_data[i]) for i in range(len(keys_data))] | ||
x_indices = list(itertools.chain.from_iterable(x_indices)) | ||
|
||
array = coo_matrix((flatten_data, (x_indices, y_indices)), dtype="int") | ||
with tempfile.TemporaryFile() as f: | ||
write_spmatrix_to_sparse_tensor(f, array) | ||
f.seek(0) | ||
for record_data, expected_data, expected_keys in zip( | ||
read_recordio(f), array_data, keys_data | ||
): | ||
record = Record() | ||
record.ParseFromString(record_data) | ||
assert record.features["values"].int32_tensor.values == expected_data | ||
assert record.features["values"].int32_tensor.keys == expected_keys | ||
assert record.features["values"].int32_tensor.shape == [n] | ||
|
||
|
||
def test_dense_to_sparse(): | ||
array_data = [[1, 2, 3], [10, 20, 3]] | ||
array = np.array(array_data) | ||
|
Uh oh!
There was an error while loading. Please reload this page.