Skip to content

Commit 91dcc3a

Browse files
authored
BUG: Fix wrong reading sparse matrix (#31991)
1 parent a046596 commit 91dcc3a

File tree

4 files changed

+29
-5
lines changed

4 files changed

+29
-5
lines changed

doc/source/whatsnew/v1.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -623,7 +623,7 @@ Reshaping
623623
Sparse
624624
^^^^^^
625625
- Creating a :class:`SparseArray` from timezone-aware dtype will issue a warning before dropping timezone information, instead of doing so silently (:issue:`32501`)
626-
-
626+
- Bug in :meth:`arrays.SparseArray.from_spmatrix` wrongly read scipy sparse matrix (:issue:`31991`)
627627
-
628628

629629
ExtensionArray

pandas/core/arrays/sparse/array.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -439,11 +439,10 @@ def from_spmatrix(cls, data):
439439

440440
# our sparse index classes require that the positions be strictly
441441
# increasing. So we need to sort loc, and arr accordingly.
442+
data = data.tocsc()
443+
data.sort_indices()
442444
arr = data.data
443-
idx, _ = data.nonzero()
444-
loc = np.argsort(idx)
445-
arr = arr.take(loc)
446-
idx.sort()
445+
idx = data.indices
447446

448447
zero = np.array(0, dtype=arr.dtype).item()
449448
dtype = SparseDtype(arr.dtype, zero)

pandas/tests/arrays/sparse/test_accessor.py

+12
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,18 @@ def test_from_spmatrix(self, format, labels, dtype):
4141
).astype(sp_dtype)
4242
tm.assert_frame_equal(result, expected)
4343

44+
@pytest.mark.parametrize("format", ["csc", "csr", "coo"])
45+
@td.skip_if_no_scipy
46+
def test_from_spmatrix_including_explicit_zero(self, format):
47+
import scipy.sparse
48+
49+
mat = scipy.sparse.random(10, 2, density=0.5, format=format)
50+
mat.data[0] = 0
51+
result = pd.DataFrame.sparse.from_spmatrix(mat)
52+
dtype = SparseDtype("float64", 0.0)
53+
expected = pd.DataFrame(mat.todense()).astype(dtype)
54+
tm.assert_frame_equal(result, expected)
55+
4456
@pytest.mark.parametrize(
4557
"columns",
4658
[["a", "b"], pd.MultiIndex.from_product([["A"], ["a", "b"]]), ["a", "a"]],

pandas/tests/arrays/sparse/test_array.py

+13
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,19 @@ def test_from_spmatrix(self, size, format):
208208
expected = mat.toarray().ravel()
209209
tm.assert_numpy_array_equal(result, expected)
210210

211+
@pytest.mark.parametrize("format", ["coo", "csc", "csr"])
212+
@td.skip_if_no_scipy
213+
def test_from_spmatrix_including_explicit_zero(self, format):
214+
import scipy.sparse
215+
216+
mat = scipy.sparse.random(10, 1, density=0.5, format=format)
217+
mat.data[0] = 0
218+
result = SparseArray.from_spmatrix(mat)
219+
220+
result = np.asarray(result)
221+
expected = mat.toarray().ravel()
222+
tm.assert_numpy_array_equal(result, expected)
223+
211224
@td.skip_if_no_scipy
212225
def test_from_spmatrix_raises(self):
213226
import scipy.sparse

0 commit comments

Comments
 (0)