diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py index ff1c4c92fe551..d882942a03ced 100644 --- a/asv_bench/benchmarks/sparse.py +++ b/asv_bench/benchmarks/sparse.py @@ -95,11 +95,11 @@ class ToCooFrame: def setup(self): N = 10000 k = 10 - arr = np.full((N, k), np.nan) + arr = np.zeros((N, k), dtype=float) arr[0, 0] = 3.0 arr[12, 7] = -1.0 arr[0, 9] = 11.2 - self.df = pd.DataFrame(arr, dtype=pd.SparseDtype("float")) + self.df = pd.DataFrame(arr, dtype=pd.SparseDtype("float", fill_value=0.0)) def time_to_coo(self): self.df.sparse.to_coo() diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 243bcf6900d2e..de815a0a9d3a8 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -498,7 +498,7 @@ Sparse ^^^^^^ - Bug in :meth:`DataFrame.sparse.to_coo` raising ``AttributeError`` when column names are not unique (:issue:`29564`) - Bug in :meth:`SparseArray.max` and :meth:`SparseArray.min` raising ``ValueError`` for arrays with 0 non-null elements (:issue:`43527`) -- +- Bug in :meth:`DataFrame.sparse.to_coo` silently converting non-zero fill values to zero (:issue:`24817`) - ExtensionArray diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py index 90af1cf082981..3bbe936befea9 100644 --- a/pandas/core/arrays/sparse/accessor.py +++ b/pandas/core/arrays/sparse/accessor.py @@ -340,10 +340,14 @@ def to_coo(self): cols, rows, data = [], [], [] for col, (_, ser) in enumerate(self._parent.iteritems()): - row = ser.array.sp_index.to_int_index().indices + sp_arr = ser.array + if sp_arr.fill_value != 0: + raise ValueError("fill value must be 0 when converting to COO matrix") + + row = sp_arr.sp_index.to_int_index().indices cols.append(np.repeat(col, len(row))) rows.append(row) - data.append(ser.array.sp_values.astype(dtype, copy=False)) + data.append(sp_arr.sp_values.astype(dtype, copy=False)) cols = np.concatenate(cols) rows = np.concatenate(rows) diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py index 6b8dc8821d4fa..e45dbb393a8de 100644 --- a/pandas/tests/arrays/sparse/test_accessor.py +++ b/pandas/tests/arrays/sparse/test_accessor.py @@ -85,6 +85,22 @@ def test_to_coo(self, colnames): expected = scipy.sparse.coo_matrix(np.asarray(df)) assert (result != expected).nnz == 0 + @pytest.mark.parametrize("fill_value", [1, np.nan]) + @td.skip_if_no_scipy + def test_to_coo_nonzero_fill_val_raises(self, fill_value): + df = pd.DataFrame( + { + "A": SparseArray( + [fill_value, fill_value, fill_value, 2], fill_value=fill_value + ), + "B": SparseArray( + [fill_value, 2, fill_value, fill_value], fill_value=fill_value + ), + } + ) + with pytest.raises(ValueError, match="fill value must be 0"): + df.sparse.to_coo() + def test_to_dense(self): df = pd.DataFrame( {