Skip to content

Add 'name' as argument for index 'to_frame' method #22580

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ Other Enhancements
- :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`)
- :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`).
- :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`).
- :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`).

.. _whatsnew_0240.api_breaking:

Expand Down
19 changes: 16 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1115,17 +1115,21 @@ def to_series(self, index=None, name=None):

return Series(self._to_embed(), index=index, name=name)

def to_frame(self, index=True):
def to_frame(self, index=True, name=None):
"""
Create a DataFrame with a column containing the Index.

.. versionadded:: 0.21.0
.. versionadded:: 0.24.0

Parameters
----------
index : boolean, default True
Set the index of the returned DataFrame as the original Index.

name : object, default None
The passed name should substitute for the index name (if it has
one).

Returns
-------
DataFrame
Expand Down Expand Up @@ -1153,10 +1157,19 @@ def to_frame(self, index=True):
0 Ant
1 Bear
2 Cow

To override the name of the resulting column, specify `name`:

>>> idx.to_frame(index=False, name='zoo')
zoo
0 Ant
1 Bear
2 Cow
"""

from pandas import DataFrame
name = self.name or 0
if name is None:
name = self.name or 0
result = DataFrame({name: self.values.copy()})

if index:
Expand Down
21 changes: 18 additions & 3 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1126,27 +1126,42 @@ def _to_safe_for_reshape(self):
""" convert to object if we are a categorical """
return self.set_levels([i._to_safe_for_reshape() for i in self.levels])

def to_frame(self, index=True):
def to_frame(self, index=True, name=None):
"""
Create a DataFrame with the levels of the MultiIndex as columns.

.. versionadded:: 0.20.0
.. versionadded:: 0.24.0

Parameters
----------
index : boolean, default True
Set the index of the returned DataFrame as the original MultiIndex.

name : list / sequence of strings, optional
The passed names should substitute index level names.

Returns
-------
DataFrame : a DataFrame containing the original MultiIndex data.
"""

from pandas import DataFrame
if name is not None:
if not is_list_like(name):
raise TypeError("'name' must be a list / sequence "
"of array-likes.")

if len(name) != len(self.levels):
raise ValueError("'name' should have same length as "
"number of levels on index")
idx_names = name
else:
idx_names = self.names

result = DataFrame({(name or level):
self._get_level_values(level)
for name, level in
zip(self.names, range(len(self.levels)))},
zip(idx_names, range(len(self.levels)))},
copy=False)
if index:
result.index = self
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,18 @@ def test_to_frame(self):
df = idx.to_frame(index=False)
assert df.index is not idx

# See GH-22580
new_idx_name = 'new_name'
df = idx.to_frame(name=new_idx_name)

assert df.index is idx
assert len(df.columns) == 1
assert df.columns[0] == new_idx_name
assert df[new_idx_name].values is not idx.values

df = idx.to_frame(index=False, name=new_idx_name)
assert df.index is not idx

def test_shift(self):

# GH8083 test the base class for shift
Expand Down
34 changes: 32 additions & 2 deletions pandas/tests/indexes/multi/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,27 @@ def test_to_frame():
expected.index = index
tm.assert_frame_equal(result, expected)

# See GH-22580
index = MultiIndex.from_tuples(tuples)
result = index.to_frame(index=False, name=['first', 'second'])
expected = DataFrame(tuples)
expected.columns = ['first', 'second']
tm.assert_frame_equal(result, expected)

result = index.to_frame(name=['first', 'second'])
expected.index = index
expected.columns = ['first', 'second']
tm.assert_frame_equal(result, expected)

msg = "'name' must be a list / sequence of array-likes."
with tm.assert_raises_regex(TypeError, msg):
index.to_frame(name='first')

msg = "'name' should have same length as number of levels on index"
with tm.assert_raises_regex(ValueError, msg):
index.to_frame(name=['first'])

# Tests for datetime index
index = MultiIndex.from_product([range(5),
pd.date_range('20130101', periods=3)])
result = index.to_frame(index=False)
Expand All @@ -45,12 +66,21 @@ def test_to_frame():
1: np.tile(pd.date_range('20130101', periods=3), 5)})
tm.assert_frame_equal(result, expected)

index = MultiIndex.from_product([range(5),
pd.date_range('20130101', periods=3)])
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)

# See GH-22580
result = index.to_frame(index=False, name=['first', 'second'])
expected = DataFrame(
{'first': np.repeat(np.arange(5, dtype='int64'), 3),
'second': np.tile(pd.date_range('20130101', periods=3), 5)})
tm.assert_frame_equal(result, expected)

result = index.to_frame(name=['first', 'second'])
expected.index = index
tm.assert_frame_equal(result, expected)


def test_to_hierarchical():
index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (
Expand Down