Skip to content

Commit 8b59aea

Browse files
author
Chuyang Deng
committed
fix: remove scipy from dependency
1 parent 57b2a22 commit 8b59aea

File tree

5 files changed

+10
-198
lines changed

5 files changed

+10
-198
lines changed

doc/conf.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,6 @@ def __getattr__(cls, name):
4040
"tensorflow.python.framework",
4141
"tensorflow_serving",
4242
"tensorflow_serving.apis",
43-
"scipy",
44-
"scipy.sparse",
4543
]
4644
sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
4745

doc/requirements.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
sphinx==2.2.2
22
numpy
3-
scipy
43
requests==2.20

setup.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ def read_version():
3737
"boto3>=1.13.6",
3838
"numpy>=1.9.0",
3939
"protobuf>=3.1",
40-
"scipy>=0.19.0",
4140
"protobuf3-to-dict>=0.1.5",
4241
"smdebug-rulesconfig==0.1.2",
4342
"importlib-metadata>=1.4.0",

src/sagemaker/amazon/common.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,14 @@
1414
from __future__ import absolute_import
1515

1616
import io
17+
import logging
1718
import struct
1819
import sys
1920

2021
import numpy as np
21-
from scipy.sparse import issparse
2222

2323
from sagemaker.amazon.record_pb2 import Record
24+
from sagemaker.utils import DeferredError
2425

2526

2627
class numpy_to_record_serializer(object):
@@ -171,8 +172,14 @@ def write_spmatrix_to_sparse_tensor(file, array, labels=None):
171172
array:
172173
labels:
173174
"""
174-
175-
if not issparse(array):
175+
try:
176+
import scipy
177+
except ImportError as e:
178+
logging.warning("urllib3 failed to import. Local mode features will be impaired or broken.")
179+
# Any subsequent attempt to use urllib3 will raise the ImportError
180+
scipy = DeferredError(e)
181+
182+
if not scipy.sparse.issparse(array):
176183
raise TypeError("Array must be sparse")
177184

178185
# Validate shape of array and labels, resolve array and label types

tests/unit/test_common.py

Lines changed: 0 additions & 191 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@
1515
import numpy as np
1616
import tempfile
1717
import pytest
18-
import itertools
19-
from scipy.sparse import coo_matrix
2018
from sagemaker.amazon.common import (
2119
record_deserializer,
2220
write_numpy_to_dense_tensor,
@@ -152,195 +150,6 @@ def test_invalid_label():
152150
write_numpy_to_dense_tensor(f, array, label_data)
153151

154152

155-
def test_dense_float_write_spmatrix_to_sparse_tensor():
156-
array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]
157-
keys_data = [[0, 1, 2], [0, 1, 2]]
158-
array = coo_matrix(np.array(array_data))
159-
with tempfile.TemporaryFile() as f:
160-
write_spmatrix_to_sparse_tensor(f, array)
161-
f.seek(0)
162-
for record_data, expected_data, expected_keys in zip(
163-
read_recordio(f), array_data, keys_data
164-
):
165-
record = Record()
166-
record.ParseFromString(record_data)
167-
assert record.features["values"].float64_tensor.values == expected_data
168-
assert record.features["values"].float64_tensor.keys == expected_keys
169-
assert record.features["values"].float64_tensor.shape == [len(expected_data)]
170-
171-
172-
def test_dense_float32_write_spmatrix_to_sparse_tensor():
173-
array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]
174-
keys_data = [[0, 1, 2], [0, 1, 2]]
175-
array = coo_matrix(np.array(array_data).astype(np.dtype("float32")))
176-
with tempfile.TemporaryFile() as f:
177-
write_spmatrix_to_sparse_tensor(f, array)
178-
f.seek(0)
179-
for record_data, expected_data, expected_keys in zip(
180-
read_recordio(f), array_data, keys_data
181-
):
182-
record = Record()
183-
record.ParseFromString(record_data)
184-
assert record.features["values"].float32_tensor.values == expected_data
185-
assert record.features["values"].float32_tensor.keys == expected_keys
186-
assert record.features["values"].float32_tensor.shape == [len(expected_data)]
187-
188-
189-
def test_dense_int_write_spmatrix_to_sparse_tensor():
190-
array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]
191-
keys_data = [[0, 1, 2], [0, 1, 2]]
192-
array = coo_matrix(np.array(array_data).astype(np.dtype("int")))
193-
with tempfile.TemporaryFile() as f:
194-
write_spmatrix_to_sparse_tensor(f, array)
195-
f.seek(0)
196-
for record_data, expected_data, expected_keys in zip(
197-
read_recordio(f), array_data, keys_data
198-
):
199-
record = Record()
200-
record.ParseFromString(record_data)
201-
assert record.features["values"].int32_tensor.values == expected_data
202-
assert record.features["values"].int32_tensor.keys == expected_keys
203-
assert record.features["values"].int32_tensor.shape == [len(expected_data)]
204-
205-
206-
def test_dense_int_spmatrix_to_sparse_label():
207-
array_data = [[1, 2, 3], [10, 20, 3]]
208-
keys_data = [[0, 1, 2], [0, 1, 2]]
209-
array = coo_matrix(np.array(array_data))
210-
label_data = np.array([99, 98, 97])
211-
with tempfile.TemporaryFile() as f:
212-
write_spmatrix_to_sparse_tensor(f, array, label_data)
213-
f.seek(0)
214-
for record_data, expected_data, expected_keys, label in zip(
215-
read_recordio(f), array_data, keys_data, label_data
216-
):
217-
record = Record()
218-
record.ParseFromString(record_data)
219-
assert record.features["values"].int32_tensor.values == expected_data
220-
assert record.features["values"].int32_tensor.keys == expected_keys
221-
assert record.label["values"].int32_tensor.values == [label]
222-
assert record.features["values"].int32_tensor.shape == [len(expected_data)]
223-
224-
225-
def test_dense_float32_spmatrix_to_sparse_label():
226-
array_data = [[1, 2, 3], [10, 20, 3]]
227-
keys_data = [[0, 1, 2], [0, 1, 2]]
228-
array = coo_matrix(np.array(array_data).astype("float32"))
229-
label_data = np.array([99, 98, 97])
230-
with tempfile.TemporaryFile() as f:
231-
write_spmatrix_to_sparse_tensor(f, array, label_data)
232-
f.seek(0)
233-
for record_data, expected_data, expected_keys, label in zip(
234-
read_recordio(f), array_data, keys_data, label_data
235-
):
236-
record = Record()
237-
record.ParseFromString(record_data)
238-
assert record.features["values"].float32_tensor.values == expected_data
239-
assert record.features["values"].float32_tensor.keys == expected_keys
240-
assert record.label["values"].int32_tensor.values == [label]
241-
assert record.features["values"].float32_tensor.shape == [len(expected_data)]
242-
243-
244-
def test_dense_float64_spmatrix_to_sparse_label():
245-
array_data = [[1, 2, 3], [10, 20, 3]]
246-
keys_data = [[0, 1, 2], [0, 1, 2]]
247-
array = coo_matrix(np.array(array_data).astype("float64"))
248-
label_data = np.array([99, 98, 97])
249-
with tempfile.TemporaryFile() as f:
250-
write_spmatrix_to_sparse_tensor(f, array, label_data)
251-
f.seek(0)
252-
for record_data, expected_data, expected_keys, label in zip(
253-
read_recordio(f), array_data, keys_data, label_data
254-
):
255-
record = Record()
256-
record.ParseFromString(record_data)
257-
assert record.features["values"].float64_tensor.values == expected_data
258-
assert record.features["values"].float64_tensor.keys == expected_keys
259-
assert record.label["values"].int32_tensor.values == [label]
260-
assert record.features["values"].float64_tensor.shape == [len(expected_data)]
261-
262-
263-
def test_invalid_sparse_label():
264-
array_data = [[1, 2, 3], [10, 20, 3]]
265-
array = coo_matrix(np.array(array_data))
266-
label_data = np.array([99, 98, 97, 1000]).astype(np.dtype("float64"))
267-
with tempfile.TemporaryFile() as f:
268-
with pytest.raises(ValueError):
269-
write_spmatrix_to_sparse_tensor(f, array, label_data)
270-
271-
272-
def test_sparse_float_write_spmatrix_to_sparse_tensor():
273-
n = 4
274-
array_data = [[1.0, 2.0], [10.0, 30.0], [100.0, 200.0, 300.0, 400.0], [1000.0, 2000.0, 3000.0]]
275-
keys_data = [[0, 1], [1, 2], [0, 1, 2, 3], [0, 2, 3]]
276-
277-
flatten_data = list(itertools.chain.from_iterable(array_data))
278-
y_indices = list(itertools.chain.from_iterable(keys_data))
279-
x_indices = [[i] * len(keys_data[i]) for i in range(len(keys_data))]
280-
x_indices = list(itertools.chain.from_iterable(x_indices))
281-
282-
array = coo_matrix((flatten_data, (x_indices, y_indices)), dtype="float64")
283-
with tempfile.TemporaryFile() as f:
284-
write_spmatrix_to_sparse_tensor(f, array)
285-
f.seek(0)
286-
for record_data, expected_data, expected_keys in zip(
287-
read_recordio(f), array_data, keys_data
288-
):
289-
record = Record()
290-
record.ParseFromString(record_data)
291-
assert record.features["values"].float64_tensor.values == expected_data
292-
assert record.features["values"].float64_tensor.keys == expected_keys
293-
assert record.features["values"].float64_tensor.shape == [n]
294-
295-
296-
def test_sparse_float32_write_spmatrix_to_sparse_tensor():
297-
n = 4
298-
array_data = [[1.0, 2.0], [10.0, 30.0], [100.0, 200.0, 300.0, 400.0], [1000.0, 2000.0, 3000.0]]
299-
keys_data = [[0, 1], [1, 2], [0, 1, 2, 3], [0, 2, 3]]
300-
301-
flatten_data = list(itertools.chain.from_iterable(array_data))
302-
y_indices = list(itertools.chain.from_iterable(keys_data))
303-
x_indices = [[i] * len(keys_data[i]) for i in range(len(keys_data))]
304-
x_indices = list(itertools.chain.from_iterable(x_indices))
305-
306-
array = coo_matrix((flatten_data, (x_indices, y_indices)), dtype="float32")
307-
with tempfile.TemporaryFile() as f:
308-
write_spmatrix_to_sparse_tensor(f, array)
309-
f.seek(0)
310-
for record_data, expected_data, expected_keys in zip(
311-
read_recordio(f), array_data, keys_data
312-
):
313-
record = Record()
314-
record.ParseFromString(record_data)
315-
assert record.features["values"].float32_tensor.values == expected_data
316-
assert record.features["values"].float32_tensor.keys == expected_keys
317-
assert record.features["values"].float32_tensor.shape == [n]
318-
319-
320-
def test_sparse_int_write_spmatrix_to_sparse_tensor():
321-
n = 4
322-
array_data = [[1.0, 2.0], [10.0, 30.0], [100.0, 200.0, 300.0, 400.0], [1000.0, 2000.0, 3000.0]]
323-
keys_data = [[0, 1], [1, 2], [0, 1, 2, 3], [0, 2, 3]]
324-
325-
flatten_data = list(itertools.chain.from_iterable(array_data))
326-
y_indices = list(itertools.chain.from_iterable(keys_data))
327-
x_indices = [[i] * len(keys_data[i]) for i in range(len(keys_data))]
328-
x_indices = list(itertools.chain.from_iterable(x_indices))
329-
330-
array = coo_matrix((flatten_data, (x_indices, y_indices)), dtype="int")
331-
with tempfile.TemporaryFile() as f:
332-
write_spmatrix_to_sparse_tensor(f, array)
333-
f.seek(0)
334-
for record_data, expected_data, expected_keys in zip(
335-
read_recordio(f), array_data, keys_data
336-
):
337-
record = Record()
338-
record.ParseFromString(record_data)
339-
assert record.features["values"].int32_tensor.values == expected_data
340-
assert record.features["values"].int32_tensor.keys == expected_keys
341-
assert record.features["values"].int32_tensor.shape == [n]
342-
343-
344153
def test_dense_to_sparse():
345154
array_data = [[1, 2, 3], [10, 20, 3]]
346155
array = np.array(array_data)

0 commit comments

Comments
 (0)