Skip to content

Commit 0072fa8

Browse files
authored
BUG: df.sparse.to_coo() raising on duplicate colnames (#43491)
1 parent df1de66 commit 0072fa8

File tree

4 files changed

+21
-5
lines changed

4 files changed

+21
-5
lines changed

asv_bench/benchmarks/sparse.py

+14
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,20 @@ def time_sparse_series_to_coo_single_level(self, sort_labels):
9191
self.ss_two_lvl.sparse.to_coo(sort_labels=sort_labels)
9292

9393

94+
class ToCooFrame:
95+
def setup(self):
96+
N = 10000
97+
k = 10
98+
arr = np.full((N, k), np.nan)
99+
arr[0, 0] = 3.0
100+
arr[12, 7] = -1.0
101+
arr[0, 9] = 11.2
102+
self.df = pd.DataFrame(arr, dtype=pd.SparseDtype("float"))
103+
104+
def time_to_coo(self):
105+
self.df.sparse.to_coo()
106+
107+
94108
class Arithmetic:
95109

96110
params = ([0.1, 0.01], [0, np.nan])

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,7 @@ Reshaping
431431

432432
Sparse
433433
^^^^^^
434+
- Bug in :meth:`DataFrame.sparse.to_coo` raising ``AttributeError`` when column names are not unique (:issue:`29564`)
434435
-
435436
-
436437

pandas/core/arrays/sparse/accessor.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -339,12 +339,11 @@ def to_coo(self):
339339
dtype = dtype.subtype
340340

341341
cols, rows, data = [], [], []
342-
for col, name in enumerate(self._parent):
343-
s = self._parent[name]
344-
row = s.array.sp_index.to_int_index().indices
342+
for col, (_, ser) in enumerate(self._parent.iteritems()):
343+
row = ser.array.sp_index.to_int_index().indices
345344
cols.append(np.repeat(col, len(row)))
346345
rows.append(row)
347-
data.append(s.array.sp_values.astype(dtype, copy=False))
346+
data.append(ser.array.sp_values.astype(dtype, copy=False))
348347

349348
cols = np.concatenate(cols)
350349
rows = np.concatenate(rows)

pandas/tests/arrays/sparse/test_accessor.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,9 @@ def test_from_spmatrix_columns(self, columns):
7171
expected = pd.DataFrame(mat.toarray(), columns=columns).astype(dtype)
7272
tm.assert_frame_equal(result, expected)
7373

74-
@pytest.mark.parametrize("colnames", [("A", "B"), (1, 2), (1, pd.NA), (0.1, 0.2)])
74+
@pytest.mark.parametrize(
75+
"colnames", [("A", "B"), (1, 2), (1, pd.NA), (0.1, 0.2), ("x", "x"), (0, 0)]
76+
)
7577
@td.skip_if_no_scipy
7678
def test_to_coo(self, colnames):
7779
import scipy.sparse

0 commit comments

Comments
 (0)