Skip to content

Commit 6b75ed6

Browse files
authored
BUG: to_coo silently converting non-zero fill values (#43763)
* BUG: to_coo silently converting non-zero fill values * fix to_coo benchmark to have 0 fill value
1 parent e727a56 commit 6b75ed6

File tree

4 files changed

+25
-5
lines changed

4 files changed

+25
-5
lines changed

asv_bench/benchmarks/sparse.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,11 @@ class ToCooFrame:
9595
def setup(self):
9696
N = 10000
9797
k = 10
98-
arr = np.full((N, k), np.nan)
98+
arr = np.zeros((N, k), dtype=float)
9999
arr[0, 0] = 3.0
100100
arr[12, 7] = -1.0
101101
arr[0, 9] = 11.2
102-
self.df = pd.DataFrame(arr, dtype=pd.SparseDtype("float"))
102+
self.df = pd.DataFrame(arr, dtype=pd.SparseDtype("float", fill_value=0.0))
103103

104104
def time_to_coo(self):
105105
self.df.sparse.to_coo()

doc/source/whatsnew/v1.4.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ Sparse
499499
^^^^^^
500500
- Bug in :meth:`DataFrame.sparse.to_coo` raising ``AttributeError`` when column names are not unique (:issue:`29564`)
501501
- Bug in :meth:`SparseArray.max` and :meth:`SparseArray.min` raising ``ValueError`` for arrays with 0 non-null elements (:issue:`43527`)
502-
-
502+
- Bug in :meth:`DataFrame.sparse.to_coo` silently converting non-zero fill values to zero (:issue:`24817`)
503503
-
504504

505505
ExtensionArray

pandas/core/arrays/sparse/accessor.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -340,10 +340,14 @@ def to_coo(self):
340340

341341
cols, rows, data = [], [], []
342342
for col, (_, ser) in enumerate(self._parent.iteritems()):
343-
row = ser.array.sp_index.to_int_index().indices
343+
sp_arr = ser.array
344+
if sp_arr.fill_value != 0:
345+
raise ValueError("fill value must be 0 when converting to COO matrix")
346+
347+
row = sp_arr.sp_index.to_int_index().indices
344348
cols.append(np.repeat(col, len(row)))
345349
rows.append(row)
346-
data.append(ser.array.sp_values.astype(dtype, copy=False))
350+
data.append(sp_arr.sp_values.astype(dtype, copy=False))
347351

348352
cols = np.concatenate(cols)
349353
rows = np.concatenate(rows)

pandas/tests/arrays/sparse/test_accessor.py

+16
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,22 @@ def test_to_coo(self, colnames):
8585
expected = scipy.sparse.coo_matrix(np.asarray(df))
8686
assert (result != expected).nnz == 0
8787

88+
@pytest.mark.parametrize("fill_value", [1, np.nan])
89+
@td.skip_if_no_scipy
90+
def test_to_coo_nonzero_fill_val_raises(self, fill_value):
91+
df = pd.DataFrame(
92+
{
93+
"A": SparseArray(
94+
[fill_value, fill_value, fill_value, 2], fill_value=fill_value
95+
),
96+
"B": SparseArray(
97+
[fill_value, 2, fill_value, fill_value], fill_value=fill_value
98+
),
99+
}
100+
)
101+
with pytest.raises(ValueError, match="fill value must be 0"):
102+
df.sparse.to_coo()
103+
88104
def test_to_dense(self):
89105
df = pd.DataFrame(
90106
{

0 commit comments

Comments
 (0)