BUG: df.sparse.to_coo() raising on duplicate colnames (#43491)

mzeitlin11 · web-flow · commit 0072fa8cb5ec · 2021-09-10T14:48:15.000-04:00
diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py
@@ -91,6 +91,20 @@ def time_sparse_series_to_coo_single_level(self, sort_labels):
         self.ss_two_lvl.sparse.to_coo(sort_labels=sort_labels)
 
 
+class ToCooFrame:
+    def setup(self):
+        N = 10000
+        k = 10
+        arr = np.full((N, k), np.nan)
+        arr[0, 0] = 3.0
+        arr[12, 7] = -1.0
+        arr[0, 9] = 11.2
+        self.df = pd.DataFrame(arr, dtype=pd.SparseDtype("float"))
+
+    def time_to_coo(self):
+        self.df.sparse.to_coo()
+
+
 class Arithmetic:
 
     params = ([0.1, 0.01], [0, np.nan])
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -431,6 +431,7 @@ Reshaping
 
 Sparse
 ^^^^^^
+- Bug in :meth:`DataFrame.sparse.to_coo` raising ``AttributeError`` when column names are not unique (:issue:`29564`)
 -
 -
 
diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py
@@ -339,12 +339,11 @@ def to_coo(self):
             dtype = dtype.subtype
 
         cols, rows, data = [], [], []
-        for col, name in enumerate(self._parent):
-            s = self._parent[name]
-            row = s.array.sp_index.to_int_index().indices
+        for col, (_, ser) in enumerate(self._parent.iteritems()):
+            row = ser.array.sp_index.to_int_index().indices
             cols.append(np.repeat(col, len(row)))
             rows.append(row)
-            data.append(s.array.sp_values.astype(dtype, copy=False))
+            data.append(ser.array.sp_values.astype(dtype, copy=False))
 
         cols = np.concatenate(cols)
         rows = np.concatenate(rows)
diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py
@@ -71,7 +71,9 @@ def test_from_spmatrix_columns(self, columns):
         expected = pd.DataFrame(mat.toarray(), columns=columns).astype(dtype)
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize("colnames", [("A", "B"), (1, 2), (1, pd.NA), (0.1, 0.2)])
+    @pytest.mark.parametrize(
+        "colnames", [("A", "B"), (1, 2), (1, pd.NA), (0.1, 0.2), ("x", "x"), (0, 0)]
+    )
     @td.skip_if_no_scipy
     def test_to_coo(self, colnames):
         import scipy.sparse

Original file line number	Diff line number	Diff line change
`@@ -431,6 +431,7 @@ Reshaping`
`431`	`431`
`432`	`432`	`Sparse`
`433`	`433`	`^^^^^^`
	`434`	+- Bug in :meth:`DataFrame.sparse.to_coo` raising ``AttributeError`` when column names are not unique (:issue:`29564`)
`434`	`435`	`-`
`435`	`436`	`-`
`436`	`437`