diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 1ae76984484af..dec0ede2cddf3 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -31,6 +31,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ +- :meth:`MultiIndex.to_frame` now supports the argument ``allow_duplicates`` and raises on duplicate labels if it is missing or False (:issue:`45245`) - :class:`StringArray` now accepts array-likes containing nan-likes (``None``, ``np.nan``) for the ``values`` parameter in its constructor in addition to strings and :attr:`pandas.NA`. (:issue:`40839`) - Improved the rendering of ``categories`` in :class:`CategoricalIndex` (:issue:`45218`) - :meth:`to_numeric` now preserves float64 arrays when downcasting would generate values not representable in float32 (:issue:`43693`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 717abb998dd65..e612f5ee10181 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1354,6 +1354,18 @@ def _format_attrs(self) -> list[tuple[str_t, str_t | int | bool | None]]: attrs.append(("length", len(self))) return attrs + @final + def _get_level_names(self) -> Hashable | Sequence[Hashable]: + """ + Return a name or list of names with None replaced by the level number. + """ + if self._is_multi: + return [ + level if name is None else name for level, name in enumerate(self.names) + ] + else: + return 0 if self.name is None else self.name + @final def _mpl_repr(self) -> np.ndarray: # how to represent ourselves to matplotlib @@ -1629,7 +1641,7 @@ def to_frame( from pandas import DataFrame if name is lib.no_default: - name = self.name or 0 + name = self._get_level_names() result = DataFrame({name: self._values.copy()}) if index: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 676fa4962cd56..9da513c60c997 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1710,7 +1710,12 @@ def unique(self, level=None): level = self._get_level_number(level) return self._get_level_values(level=level, unique=True) - def to_frame(self, index: bool = True, name=lib.no_default) -> DataFrame: + def to_frame( + self, + index: bool = True, + name=lib.no_default, + allow_duplicates: bool = False, + ) -> DataFrame: """ Create a DataFrame with the levels of the MultiIndex as columns. @@ -1725,6 +1730,11 @@ def to_frame(self, index: bool = True, name=lib.no_default) -> DataFrame: name : list / sequence of str, optional The passed names should substitute index level names. + allow_duplicates : bool, optional default False + Allow duplicate column labels to be created. + + .. versionadded:: 1.5.0 + Returns ------- DataFrame : a DataFrame containing the original MultiIndex data. @@ -1772,16 +1782,19 @@ def to_frame(self, index: bool = True, name=lib.no_default) -> DataFrame: ) idx_names = name else: - idx_names = self.names + idx_names = self._get_level_names() + + if not allow_duplicates and len(set(idx_names)) != len(idx_names): + raise ValueError( + "Cannot create duplicate column labels if allow_duplicates is False" + ) # Guarantee resulting column order - PY36+ dict maintains insertion order result = DataFrame( - { - (level if lvlname is None else lvlname): self._get_level_values(level) - for lvlname, level in zip(idx_names, range(len(self.levels))) - }, + {level: self._get_level_values(level) for level in range(len(self.levels))}, copy=False, ) + result.columns = idx_names if index: result.index = self diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index 072055e4824a7..3c2ca045d6f99 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -126,6 +126,28 @@ def test_to_frame_resulting_column_order(): assert result == expected +def test_to_frame_duplicate_labels(): + # GH 45245 + data = [(1, 2), (3, 4)] + names = ["a", "a"] + index = MultiIndex.from_tuples(data, names=names) + with pytest.raises(ValueError, match="Cannot create duplicate column labels"): + index.to_frame() + + result = index.to_frame(allow_duplicates=True) + expected = DataFrame(data, index=index, columns=names) + tm.assert_frame_equal(result, expected) + + names = [None, 0] + index = MultiIndex.from_tuples(data, names=names) + with pytest.raises(ValueError, match="Cannot create duplicate column labels"): + index.to_frame() + + result = index.to_frame(allow_duplicates=True) + expected = DataFrame(data, index=index, columns=[0, 0]) + tm.assert_frame_equal(result, expected) + + def test_to_flat_index(idx): expected = pd.Index( (