Skip to content

fix: remove scipy from dependency #1518

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 1, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ def __getattr__(cls, name):
"tensorflow.python.framework",
"tensorflow_serving",
"tensorflow_serving.apis",
"scipy",
"scipy.sparse",
]
sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)

Expand Down
1 change: 0 additions & 1 deletion doc/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
sphinx==2.2.2
numpy
scipy
requests==2.20
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def read_version():
"boto3>=1.13.6",
"numpy>=1.9.0",
"protobuf>=3.1",
"scipy>=0.19.0",
"protobuf3-to-dict>=0.1.5",
"smdebug-rulesconfig==0.1.2",
"importlib-metadata>=1.4.0",
Expand Down
13 changes: 10 additions & 3 deletions src/sagemaker/amazon/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@
from __future__ import absolute_import

import io
import logging
import struct
import sys

import numpy as np
from scipy.sparse import issparse

from sagemaker.amazon.record_pb2 import Record
from sagemaker.utils import DeferredError


class numpy_to_record_serializer(object):
Expand Down Expand Up @@ -171,8 +172,14 @@ def write_spmatrix_to_sparse_tensor(file, array, labels=None):
array:
labels:
"""

if not issparse(array):
try:
import scipy
except ImportError as e:
logging.warning("urllib3 failed to import. Local mode features will be impaired or broken.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

update the warning message :)

# Any subsequent attempt to use urllib3 will raise the ImportError
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

update the comment :)

scipy = DeferredError(e)

if not scipy.sparse.issparse(array):
raise TypeError("Array must be sparse")

# Validate shape of array and labels, resolve array and label types
Expand Down
191 changes: 0 additions & 191 deletions tests/unit/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
import numpy as np
import tempfile
import pytest
import itertools
from scipy.sparse import coo_matrix
from sagemaker.amazon.common import (
record_deserializer,
write_numpy_to_dense_tensor,
Expand Down Expand Up @@ -152,195 +150,6 @@ def test_invalid_label():
write_numpy_to_dense_tensor(f, array, label_data)


def test_dense_float_write_spmatrix_to_sparse_tensor():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should still keep the tests

array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]
keys_data = [[0, 1, 2], [0, 1, 2]]
array = coo_matrix(np.array(array_data))
with tempfile.TemporaryFile() as f:
write_spmatrix_to_sparse_tensor(f, array)
f.seek(0)
for record_data, expected_data, expected_keys in zip(
read_recordio(f), array_data, keys_data
):
record = Record()
record.ParseFromString(record_data)
assert record.features["values"].float64_tensor.values == expected_data
assert record.features["values"].float64_tensor.keys == expected_keys
assert record.features["values"].float64_tensor.shape == [len(expected_data)]


def test_dense_float32_write_spmatrix_to_sparse_tensor():
array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]
keys_data = [[0, 1, 2], [0, 1, 2]]
array = coo_matrix(np.array(array_data).astype(np.dtype("float32")))
with tempfile.TemporaryFile() as f:
write_spmatrix_to_sparse_tensor(f, array)
f.seek(0)
for record_data, expected_data, expected_keys in zip(
read_recordio(f), array_data, keys_data
):
record = Record()
record.ParseFromString(record_data)
assert record.features["values"].float32_tensor.values == expected_data
assert record.features["values"].float32_tensor.keys == expected_keys
assert record.features["values"].float32_tensor.shape == [len(expected_data)]


def test_dense_int_write_spmatrix_to_sparse_tensor():
array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]
keys_data = [[0, 1, 2], [0, 1, 2]]
array = coo_matrix(np.array(array_data).astype(np.dtype("int")))
with tempfile.TemporaryFile() as f:
write_spmatrix_to_sparse_tensor(f, array)
f.seek(0)
for record_data, expected_data, expected_keys in zip(
read_recordio(f), array_data, keys_data
):
record = Record()
record.ParseFromString(record_data)
assert record.features["values"].int32_tensor.values == expected_data
assert record.features["values"].int32_tensor.keys == expected_keys
assert record.features["values"].int32_tensor.shape == [len(expected_data)]


def test_dense_int_spmatrix_to_sparse_label():
array_data = [[1, 2, 3], [10, 20, 3]]
keys_data = [[0, 1, 2], [0, 1, 2]]
array = coo_matrix(np.array(array_data))
label_data = np.array([99, 98, 97])
with tempfile.TemporaryFile() as f:
write_spmatrix_to_sparse_tensor(f, array, label_data)
f.seek(0)
for record_data, expected_data, expected_keys, label in zip(
read_recordio(f), array_data, keys_data, label_data
):
record = Record()
record.ParseFromString(record_data)
assert record.features["values"].int32_tensor.values == expected_data
assert record.features["values"].int32_tensor.keys == expected_keys
assert record.label["values"].int32_tensor.values == [label]
assert record.features["values"].int32_tensor.shape == [len(expected_data)]


def test_dense_float32_spmatrix_to_sparse_label():
array_data = [[1, 2, 3], [10, 20, 3]]
keys_data = [[0, 1, 2], [0, 1, 2]]
array = coo_matrix(np.array(array_data).astype("float32"))
label_data = np.array([99, 98, 97])
with tempfile.TemporaryFile() as f:
write_spmatrix_to_sparse_tensor(f, array, label_data)
f.seek(0)
for record_data, expected_data, expected_keys, label in zip(
read_recordio(f), array_data, keys_data, label_data
):
record = Record()
record.ParseFromString(record_data)
assert record.features["values"].float32_tensor.values == expected_data
assert record.features["values"].float32_tensor.keys == expected_keys
assert record.label["values"].int32_tensor.values == [label]
assert record.features["values"].float32_tensor.shape == [len(expected_data)]


def test_dense_float64_spmatrix_to_sparse_label():
array_data = [[1, 2, 3], [10, 20, 3]]
keys_data = [[0, 1, 2], [0, 1, 2]]
array = coo_matrix(np.array(array_data).astype("float64"))
label_data = np.array([99, 98, 97])
with tempfile.TemporaryFile() as f:
write_spmatrix_to_sparse_tensor(f, array, label_data)
f.seek(0)
for record_data, expected_data, expected_keys, label in zip(
read_recordio(f), array_data, keys_data, label_data
):
record = Record()
record.ParseFromString(record_data)
assert record.features["values"].float64_tensor.values == expected_data
assert record.features["values"].float64_tensor.keys == expected_keys
assert record.label["values"].int32_tensor.values == [label]
assert record.features["values"].float64_tensor.shape == [len(expected_data)]


def test_invalid_sparse_label():
array_data = [[1, 2, 3], [10, 20, 3]]
array = coo_matrix(np.array(array_data))
label_data = np.array([99, 98, 97, 1000]).astype(np.dtype("float64"))
with tempfile.TemporaryFile() as f:
with pytest.raises(ValueError):
write_spmatrix_to_sparse_tensor(f, array, label_data)


def test_sparse_float_write_spmatrix_to_sparse_tensor():
n = 4
array_data = [[1.0, 2.0], [10.0, 30.0], [100.0, 200.0, 300.0, 400.0], [1000.0, 2000.0, 3000.0]]
keys_data = [[0, 1], [1, 2], [0, 1, 2, 3], [0, 2, 3]]

flatten_data = list(itertools.chain.from_iterable(array_data))
y_indices = list(itertools.chain.from_iterable(keys_data))
x_indices = [[i] * len(keys_data[i]) for i in range(len(keys_data))]
x_indices = list(itertools.chain.from_iterable(x_indices))

array = coo_matrix((flatten_data, (x_indices, y_indices)), dtype="float64")
with tempfile.TemporaryFile() as f:
write_spmatrix_to_sparse_tensor(f, array)
f.seek(0)
for record_data, expected_data, expected_keys in zip(
read_recordio(f), array_data, keys_data
):
record = Record()
record.ParseFromString(record_data)
assert record.features["values"].float64_tensor.values == expected_data
assert record.features["values"].float64_tensor.keys == expected_keys
assert record.features["values"].float64_tensor.shape == [n]


def test_sparse_float32_write_spmatrix_to_sparse_tensor():
n = 4
array_data = [[1.0, 2.0], [10.0, 30.0], [100.0, 200.0, 300.0, 400.0], [1000.0, 2000.0, 3000.0]]
keys_data = [[0, 1], [1, 2], [0, 1, 2, 3], [0, 2, 3]]

flatten_data = list(itertools.chain.from_iterable(array_data))
y_indices = list(itertools.chain.from_iterable(keys_data))
x_indices = [[i] * len(keys_data[i]) for i in range(len(keys_data))]
x_indices = list(itertools.chain.from_iterable(x_indices))

array = coo_matrix((flatten_data, (x_indices, y_indices)), dtype="float32")
with tempfile.TemporaryFile() as f:
write_spmatrix_to_sparse_tensor(f, array)
f.seek(0)
for record_data, expected_data, expected_keys in zip(
read_recordio(f), array_data, keys_data
):
record = Record()
record.ParseFromString(record_data)
assert record.features["values"].float32_tensor.values == expected_data
assert record.features["values"].float32_tensor.keys == expected_keys
assert record.features["values"].float32_tensor.shape == [n]


def test_sparse_int_write_spmatrix_to_sparse_tensor():
n = 4
array_data = [[1.0, 2.0], [10.0, 30.0], [100.0, 200.0, 300.0, 400.0], [1000.0, 2000.0, 3000.0]]
keys_data = [[0, 1], [1, 2], [0, 1, 2, 3], [0, 2, 3]]

flatten_data = list(itertools.chain.from_iterable(array_data))
y_indices = list(itertools.chain.from_iterable(keys_data))
x_indices = [[i] * len(keys_data[i]) for i in range(len(keys_data))]
x_indices = list(itertools.chain.from_iterable(x_indices))

array = coo_matrix((flatten_data, (x_indices, y_indices)), dtype="int")
with tempfile.TemporaryFile() as f:
write_spmatrix_to_sparse_tensor(f, array)
f.seek(0)
for record_data, expected_data, expected_keys in zip(
read_recordio(f), array_data, keys_data
):
record = Record()
record.ParseFromString(record_data)
assert record.features["values"].int32_tensor.values == expected_data
assert record.features["values"].int32_tensor.keys == expected_keys
assert record.features["values"].int32_tensor.shape == [n]


def test_dense_to_sparse():
array_data = [[1, 2, 3], [10, 20, 3]]
array = np.array(array_data)
Expand Down