Skip to content

Add 'name' as argument for index 'to_frame' method #22580

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ Other Enhancements
- :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`)
- :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`).
- :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`).
- :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`).

.. _whatsnew_0240.api_breaking:

Expand Down
19 changes: 16 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1115,17 +1115,21 @@ def to_series(self, index=None, name=None):

return Series(self._to_embed(), index=index, name=name)

def to_frame(self, index=True):
def to_frame(self, index=True, name=None):
"""
Create a DataFrame with a column containing the Index.

.. versionadded:: 0.21.0
.. versionadded:: 0.24.0

Parameters
----------
index : boolean, default True
Set the index of the returned DataFrame as the original Index.

name : object, default None
The passed name should substitute for the index name (if it has
one).

Returns
-------
DataFrame
Expand Down Expand Up @@ -1153,10 +1157,19 @@ def to_frame(self, index=True):
0 Ant
1 Bear
2 Cow

To override the name of the resulting column, specify `name`:

>>> idx.to_frame(index=False, name='zoo')
zoo
0 Ant
1 Bear
2 Cow
"""

from pandas import DataFrame
name = self.name or 0
if name is None:
name = self.name or 0
result = DataFrame({name: self.values.copy()})

if index:
Expand Down
21 changes: 18 additions & 3 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1126,20 +1126,23 @@ def _to_safe_for_reshape(self):
""" convert to object if we are a categorical """
return self.set_levels([i._to_safe_for_reshape() for i in self.levels])

def to_frame(self, index=True):
def to_frame(self, index=True, name=None):
"""
Create a DataFrame with the levels of the MultiIndex as columns.

Column ordering is determined by the DataFrame constructor with data as
a dict.

.. versionadded:: 0.20.0
.. versionadded:: 0.24.0

Parameters
----------
index : boolean, default True
Set the index of the returned DataFrame as the original MultiIndex.

name : list / sequence of strings, optional
The passed names should substitute index level names.

Returns
-------
DataFrame : a DataFrame containing the original MultiIndex data.
Expand All @@ -1150,10 +1153,22 @@ def to_frame(self, index=True):
"""

from pandas import DataFrame
if name is not None:
if not is_list_like(name):
raise TypeError("'name' must be a list / sequence "
"of column names.")

if len(name) != len(self.levels):
raise ValueError("'name' should have same length as "
"number of levels on index.")
idx_names = name
else:
idx_names = self.names

result = DataFrame({(name or level):
self._get_level_values(level)
for name, level in
zip(self.names, range(len(self.levels)))},
zip(idx_names, range(len(self.levels)))},
copy=False)
if index:
result.index = self
Expand Down
19 changes: 12 additions & 7 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,19 +66,24 @@ def test_to_series_with_arguments(self):
assert s.index is not idx
assert s.name != idx.name

def test_to_frame(self):
# see gh-15230
@pytest.mark.parametrize("name", [None, "new_name"])
def test_to_frame(self, name):
# see GH-15230, GH-22580
idx = self.create_index()
name = idx.name or 0

df = idx.to_frame()
if name:
idx_name = name
else:
idx_name = idx.name or 0

df = idx.to_frame(name=idx_name)

assert df.index is idx
assert len(df.columns) == 1
assert df.columns[0] == name
assert df[name].values is not idx.values
assert df.columns[0] == idx_name
assert df[idx_name].values is not idx.values

df = idx.to_frame(index=False)
df = idx.to_frame(index=False, name=idx_name)
assert df.index is not idx

def test_shift(self):
Expand Down
34 changes: 32 additions & 2 deletions pandas/tests/indexes/multi/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,27 @@ def test_to_frame():
expected.index = index
tm.assert_frame_equal(result, expected)

# See GH-22580
index = MultiIndex.from_tuples(tuples)
result = index.to_frame(index=False, name=['first', 'second'])
expected = DataFrame(tuples)
expected.columns = ['first', 'second']
tm.assert_frame_equal(result, expected)

result = index.to_frame(name=['first', 'second'])
expected.index = index
expected.columns = ['first', 'second']
tm.assert_frame_equal(result, expected)

msg = "'name' must be a list / sequence of column names."
with tm.assert_raises_regex(TypeError, msg):
index.to_frame(name='first')

msg = "'name' should have same length as number of levels on index."
with tm.assert_raises_regex(ValueError, msg):
index.to_frame(name=['first'])

# Tests for datetime index
index = MultiIndex.from_product([range(5),
pd.date_range('20130101', periods=3)])
result = index.to_frame(index=False)
Expand All @@ -45,12 +66,21 @@ def test_to_frame():
1: np.tile(pd.date_range('20130101', periods=3), 5)})
tm.assert_frame_equal(result, expected)

index = MultiIndex.from_product([range(5),
pd.date_range('20130101', periods=3)])
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)

# See GH-22580
result = index.to_frame(index=False, name=['first', 'second'])
expected = DataFrame(
{'first': np.repeat(np.arange(5, dtype='int64'), 3),
'second': np.tile(pd.date_range('20130101', periods=3), 5)})
tm.assert_frame_equal(result, expected)

result = index.to_frame(name=['first', 'second'])
expected.index = index
tm.assert_frame_equal(result, expected)


def test_to_hierarchical():
index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (
Expand Down