diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 718de09a0c3e4..0dd24bcc54933 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -572,7 +572,7 @@ Reshaping Sparse ^^^^^^ - Creating a :class:`SparseArray` from timezone-aware dtype will issue a warning before dropping timezone information, instead of doing so silently (:issue:`32501`) -- +- Bug in :meth:`arrays.SparseArray.from_spmatrix` wrongly read scipy sparse matrix (:issue:`31991`) - ExtensionArray diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index a98875ace09aa..620e157ee54ec 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -439,11 +439,10 @@ def from_spmatrix(cls, data): # our sparse index classes require that the positions be strictly # increasing. So we need to sort loc, and arr accordingly. + data = data.tocsc() + data.sort_indices() arr = data.data - idx, _ = data.nonzero() - loc = np.argsort(idx) - arr = arr.take(loc) - idx.sort() + idx = data.indices zero = np.array(0, dtype=arr.dtype).item() dtype = SparseDtype(arr.dtype, zero) diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py index d8a1831cd61ec..2a81b94ce779c 100644 --- a/pandas/tests/arrays/sparse/test_accessor.py +++ b/pandas/tests/arrays/sparse/test_accessor.py @@ -41,6 +41,18 @@ def test_from_spmatrix(self, format, labels, dtype): ).astype(sp_dtype) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("format", ["csc", "csr", "coo"]) + @td.skip_if_no_scipy + def test_from_spmatrix_including_explicit_zero(self, format): + import scipy.sparse + + mat = scipy.sparse.random(10, 2, density=0.5, format=format) + mat.data[0] = 0 + result = pd.DataFrame.sparse.from_spmatrix(mat) + dtype = SparseDtype("float64", 0.0) + expected = pd.DataFrame(mat.todense()).astype(dtype) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( "columns", [["a", "b"], pd.MultiIndex.from_product([["A"], ["a", "b"]]), ["a", "a"]], diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index cb3a70e934dcb..f1e5050fa8a2e 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -208,6 +208,19 @@ def test_from_spmatrix(self, size, format): expected = mat.toarray().ravel() tm.assert_numpy_array_equal(result, expected) + @pytest.mark.parametrize("format", ["coo", "csc", "csr"]) + @td.skip_if_no_scipy + def test_from_spmatrix_including_explicit_zero(self, format): + import scipy.sparse + + mat = scipy.sparse.random(10, 1, density=0.5, format=format) + mat.data[0] = 0 + result = SparseArray.from_spmatrix(mat) + + result = np.asarray(result) + expected = mat.toarray().ravel() + tm.assert_numpy_array_equal(result, expected) + @td.skip_if_no_scipy def test_from_spmatrix_raises(self): import scipy.sparse