Skip to content

Commit 7dbfe9f

Browse files
authored
ENH: Add allow_duplicates to MultiIndex.to_frame (#45318)
1 parent 243b5bc commit 7dbfe9f

File tree

4 files changed

+55
-7
lines changed

4 files changed

+55
-7
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ enhancement2
3131

3232
Other enhancements
3333
^^^^^^^^^^^^^^^^^^
34+
- :meth:`MultiIndex.to_frame` now supports the argument ``allow_duplicates`` and raises on duplicate labels if it is missing or False (:issue:`45245`)
3435
- :class:`StringArray` now accepts array-likes containing nan-likes (``None``, ``np.nan``) for the ``values`` parameter in its constructor in addition to strings and :attr:`pandas.NA`. (:issue:`40839`)
3536
- Improved the rendering of ``categories`` in :class:`CategoricalIndex` (:issue:`45218`)
3637
- :meth:`to_numeric` now preserves float64 arrays when downcasting would generate values not representable in float32 (:issue:`43693`)

pandas/core/indexes/base.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -1354,6 +1354,18 @@ def _format_attrs(self) -> list[tuple[str_t, str_t | int | bool | None]]:
13541354
attrs.append(("length", len(self)))
13551355
return attrs
13561356

1357+
@final
1358+
def _get_level_names(self) -> Hashable | Sequence[Hashable]:
1359+
"""
1360+
Return a name or list of names with None replaced by the level number.
1361+
"""
1362+
if self._is_multi:
1363+
return [
1364+
level if name is None else name for level, name in enumerate(self.names)
1365+
]
1366+
else:
1367+
return 0 if self.name is None else self.name
1368+
13571369
@final
13581370
def _mpl_repr(self) -> np.ndarray:
13591371
# how to represent ourselves to matplotlib
@@ -1640,7 +1652,7 @@ def to_frame(
16401652
name = lib.no_default
16411653

16421654
if name is lib.no_default:
1643-
name = self.name or 0
1655+
name = self._get_level_names()
16441656
result = DataFrame({name: self._values.copy()})
16451657

16461658
if index:

pandas/core/indexes/multi.py

+19-6
Original file line numberDiff line numberDiff line change
@@ -1710,7 +1710,12 @@ def unique(self, level=None):
17101710
level = self._get_level_number(level)
17111711
return self._get_level_values(level=level, unique=True)
17121712

1713-
def to_frame(self, index: bool = True, name=lib.no_default) -> DataFrame:
1713+
def to_frame(
1714+
self,
1715+
index: bool = True,
1716+
name=lib.no_default,
1717+
allow_duplicates: bool = False,
1718+
) -> DataFrame:
17141719
"""
17151720
Create a DataFrame with the levels of the MultiIndex as columns.
17161721
@@ -1725,6 +1730,11 @@ def to_frame(self, index: bool = True, name=lib.no_default) -> DataFrame:
17251730
name : list / sequence of str, optional
17261731
The passed names should substitute index level names.
17271732
1733+
allow_duplicates : bool, optional default False
1734+
Allow duplicate column labels to be created.
1735+
1736+
.. versionadded:: 1.5.0
1737+
17281738
Returns
17291739
-------
17301740
DataFrame : a DataFrame containing the original MultiIndex data.
@@ -1783,16 +1793,19 @@ def to_frame(self, index: bool = True, name=lib.no_default) -> DataFrame:
17831793
)
17841794
idx_names = name
17851795
else:
1786-
idx_names = self.names
1796+
idx_names = self._get_level_names()
1797+
1798+
if not allow_duplicates and len(set(idx_names)) != len(idx_names):
1799+
raise ValueError(
1800+
"Cannot create duplicate column labels if allow_duplicates is False"
1801+
)
17871802

17881803
# Guarantee resulting column order - PY36+ dict maintains insertion order
17891804
result = DataFrame(
1790-
{
1791-
(level if lvlname is None else lvlname): self._get_level_values(level)
1792-
for lvlname, level in zip(idx_names, range(len(self.levels)))
1793-
},
1805+
{level: self._get_level_values(level) for level in range(len(self.levels))},
17941806
copy=False,
17951807
)
1808+
result.columns = idx_names
17961809

17971810
if index:
17981811
result.index = self

pandas/tests/indexes/multi/test_conversion.py

+22
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,28 @@ def test_to_frame_resulting_column_order():
126126
assert result == expected
127127

128128

129+
def test_to_frame_duplicate_labels():
130+
# GH 45245
131+
data = [(1, 2), (3, 4)]
132+
names = ["a", "a"]
133+
index = MultiIndex.from_tuples(data, names=names)
134+
with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
135+
index.to_frame()
136+
137+
result = index.to_frame(allow_duplicates=True)
138+
expected = DataFrame(data, index=index, columns=names)
139+
tm.assert_frame_equal(result, expected)
140+
141+
names = [None, 0]
142+
index = MultiIndex.from_tuples(data, names=names)
143+
with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
144+
index.to_frame()
145+
146+
result = index.to_frame(allow_duplicates=True)
147+
expected = DataFrame(data, index=index, columns=[0, 0])
148+
tm.assert_frame_equal(result, expected)
149+
150+
129151
def test_to_flat_index(idx):
130152
expected = pd.Index(
131153
(

0 commit comments

Comments
 (0)