From 11284ccc851302dec3f9fdf446525a601c257dd7 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Mon, 3 Sep 2018 12:08:32 +0100 Subject: [PATCH 01/14] add the possibility to change name when converting index to frame --- pandas/core/indexes/base.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7b7fb968b3050..b8149ba1cfd04 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1116,7 +1116,7 @@ def to_series(self, index=None, name=None): return Series(self._to_embed(), index=index, name=name) - def to_frame(self, index=True): + def to_frame(self, index=True, name=None): """ Create a DataFrame with a column containing the Index. @@ -1126,6 +1126,10 @@ def to_frame(self, index=True): ---------- index : boolean, default True Set the index of the returned DataFrame as the original Index. + + name : object, default None + The passed name should substitute for the series name (if it has + one). Returns ------- @@ -1154,10 +1158,17 @@ def to_frame(self, index=True): 0 Ant 1 Bear 2 Cow + + >>> idx.to_frame(index=False, name='zoo') + zoo + 0 Ant + 1 Bear + 2 Cow """ from pandas import DataFrame - name = self.name or 0 + if not name: + name = self.name or 0 result = DataFrame({name: self.values.copy()}) if index: From 36bbc14851619ae463316282b92157fdfce4f53b Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Mon, 3 Sep 2018 12:11:24 +0100 Subject: [PATCH 02/14] fix flake8 --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b8149ba1cfd04..372fce845be86 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1126,7 +1126,7 @@ def to_frame(self, index=True, name=None): ---------- index : boolean, default True Set the index of the returned DataFrame as the original Index. - + name : object, default None The passed name should substitute for the series name (if it has one). From ecaaa692ad78babc21bb3bdf467fc0414820b04d Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Mon, 3 Sep 2018 14:36:10 +0100 Subject: [PATCH 03/14] change the way of verifying if name is None --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 372fce845be86..0124418a50fa6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1167,7 +1167,7 @@ def to_frame(self, index=True, name=None): """ from pandas import DataFrame - if not name: + if name is None: name = self.name or 0 result = DataFrame({name: self.values.copy()}) From 7d24424f8ef760cf8031ac559d421c67eb68bd37 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Thu, 6 Sep 2018 14:36:35 +0100 Subject: [PATCH 04/14] add 'names' argument on to_frame function for MultiIndex --- pandas/core/indexes/multi.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 5b2e3a76adf05..da8e97d586647 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1126,7 +1126,7 @@ def _to_safe_for_reshape(self): """ convert to object if we are a categorical """ return self.set_levels([i._to_safe_for_reshape() for i in self.levels]) - def to_frame(self, index=True): + def to_frame(self, index=True, names=None): """ Create a DataFrame with the levels of the MultiIndex as columns. @@ -1143,11 +1143,21 @@ def to_frame(self, index=True): """ from pandas import DataFrame - result = DataFrame({(name or level): - self._get_level_values(level) - for name, level in - zip(self.names, range(len(self.levels)))}, - copy=False) + if names is not None: + if len(names) != len(self.levels): + raise AssertionError("'names' should have same lenght as " + "number of levels on index") + result = DataFrame({(name): + self._get_level_values(level) + for name, level in + zip(names, range(len(self.levels)))}, + copy=False) + else: + result = DataFrame({(name or level): + self._get_level_values(level) + for name, level in + zip(self.names, range(len(self.levels)))}, + copy=False) if index: result.index = self return result From bf691f4802ca601ecedadb954018036b0f99d2d3 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Thu, 6 Sep 2018 14:37:26 +0100 Subject: [PATCH 05/14] add more tests for indexes' to_frame function using name argument --- pandas/tests/indexes/common.py | 11 +++++++++ pandas/tests/indexes/multi/test_conversion.py | 23 +++++++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 56f59851d6d04..1e1bd95f8af5a 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -81,6 +81,17 @@ def test_to_frame(self): df = idx.to_frame(index=False) assert df.index is not idx + new_idx_name = 'new_name' + df = idx.to_frame(name=new_idx_name) + + assert df.index is idx + assert len(df.columns) == 1 + assert df.columns[0] == new_idx_name + assert df[new_idx_name].values is not idx.values + + df = idx.to_frame(index=False, name=new_idx_name) + assert df.index is not idx + def test_shift(self): # GH8083 test the base class for shift diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index fcc22390e17a1..c04340e16c2d4 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -37,6 +37,17 @@ def test_to_frame(): expected.index = index tm.assert_frame_equal(result, expected) + index = MultiIndex.from_tuples(tuples) + result = index.to_frame(index=False, names=['first', 'second']) + expected = DataFrame(tuples) + expected.columns = ['first', 'second'] + tm.assert_frame_equal(result, expected) + + result = index.to_frame(names=['first', 'second']) + expected.index = index + expected.columns = ['first', 'second'] + tm.assert_frame_equal(result, expected) + index = MultiIndex.from_product([range(5), pd.date_range('20130101', periods=3)]) result = index.to_frame(index=False) @@ -45,12 +56,20 @@ def test_to_frame(): 1: np.tile(pd.date_range('20130101', periods=3), 5)}) tm.assert_frame_equal(result, expected) - index = MultiIndex.from_product([range(5), - pd.date_range('20130101', periods=3)]) result = index.to_frame() expected.index = index tm.assert_frame_equal(result, expected) + result = index.to_frame(index=False, names=['first', 'second']) + expected = DataFrame( + {'first': np.repeat(np.arange(5, dtype='int64'), 3), + 'second': np.tile(pd.date_range('20130101', periods=3), 5)}) + tm.assert_frame_equal(result, expected) + + result = index.to_frame(names=['first', 'second']) + expected.index = index + tm.assert_frame_equal(result, expected) + def test_to_hierarchical(): index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( From edad46b10043d5a56dfc650175b1f48ce21add9d Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Fri, 7 Sep 2018 15:47:39 +0100 Subject: [PATCH 06/14] fix some issues. For details check GH #22580 --- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/multi.py | 32 ++++++++++--------- pandas/tests/indexes/common.py | 1 + pandas/tests/indexes/multi/test_conversion.py | 15 ++++++--- 4 files changed, 30 insertions(+), 20 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0124418a50fa6..189523bd63d8f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1120,7 +1120,7 @@ def to_frame(self, index=True, name=None): """ Create a DataFrame with a column containing the Index. - .. versionadded:: 0.21.0 + .. versionadded:: 0.24.0 Parameters ---------- diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index da8e97d586647..5962cc2113f8d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1126,38 +1126,40 @@ def _to_safe_for_reshape(self): """ convert to object if we are a categorical """ return self.set_levels([i._to_safe_for_reshape() for i in self.levels]) - def to_frame(self, index=True, names=None): + def to_frame(self, index=True, name=None): """ Create a DataFrame with the levels of the MultiIndex as columns. - .. versionadded:: 0.20.0 + .. versionadded:: 0.24.0 Parameters ---------- index : boolean, default True Set the index of the returned DataFrame as the original MultiIndex. + name : list / sequence of strings or None + The passed names should substitute for the series names (if it has + one). + Returns ------- DataFrame : a DataFrame containing the original MultiIndex data. """ from pandas import DataFrame - if names is not None: - if len(names) != len(self.levels): - raise AssertionError("'names' should have same lenght as " + if name is not None: + if len(name) != len(self.levels): + raise ValueError("'name' should have same lenght as " "number of levels on index") - result = DataFrame({(name): - self._get_level_values(level) - for name, level in - zip(names, range(len(self.levels)))}, - copy=False) + idx_names = name else: - result = DataFrame({(name or level): - self._get_level_values(level) - for name, level in - zip(self.names, range(len(self.levels)))}, - copy=False) + idx_names = self.names + + result = DataFrame({(name or level): + self._get_level_values(level) + for name, level in + zip(idx_names, range(len(self.levels)))}, + copy=False) if index: result.index = self return result diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 1e1bd95f8af5a..0eb4edd545886 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -81,6 +81,7 @@ def test_to_frame(self): df = idx.to_frame(index=False) assert df.index is not idx + # See GH-22580 new_idx_name = 'new_name' df = idx.to_frame(name=new_idx_name) diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index c04340e16c2d4..791651b4dd1da 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -37,17 +37,23 @@ def test_to_frame(): expected.index = index tm.assert_frame_equal(result, expected) + # See GH-22580 index = MultiIndex.from_tuples(tuples) - result = index.to_frame(index=False, names=['first', 'second']) + result = index.to_frame(index=False, name=['first', 'second']) expected = DataFrame(tuples) expected.columns = ['first', 'second'] tm.assert_frame_equal(result, expected) - result = index.to_frame(names=['first', 'second']) + result = index.to_frame(name=['first', 'second']) expected.index = index expected.columns = ['first', 'second'] tm.assert_frame_equal(result, expected) + msg = "'name' should have same lenght as number of levels on index" + with tm.assert_raises_regex(ValueError, msg): + index.to_frame(name=['first']) + + # Tests for datetime index index = MultiIndex.from_product([range(5), pd.date_range('20130101', periods=3)]) result = index.to_frame(index=False) @@ -60,13 +66,14 @@ def test_to_frame(): expected.index = index tm.assert_frame_equal(result, expected) - result = index.to_frame(index=False, names=['first', 'second']) + # See GH-22580 + result = index.to_frame(index=False, name=['first', 'second']) expected = DataFrame( {'first': np.repeat(np.arange(5, dtype='int64'), 3), 'second': np.tile(pd.date_range('20130101', periods=3), 5)}) tm.assert_frame_equal(result, expected) - result = index.to_frame(names=['first', 'second']) + result = index.to_frame(name=['first', 'second']) expected.index = index tm.assert_frame_equal(result, expected) From b0eaa7cc18d53c93eb0dfab156ad77262af1235f Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Sat, 8 Sep 2018 16:30:51 +0100 Subject: [PATCH 07/14] correct lint --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 05c4fa9b94398..94d41b80b57ce 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1150,7 +1150,7 @@ def to_frame(self, index=True, name=None): if name is not None: if len(name) != len(self.levels): raise ValueError("'name' should have same lenght as " - "number of levels on index") + "number of levels on index") idx_names = name else: idx_names = self.names From 1b5ecf8153cb6864f0c93ebb1a498b3dd9996ef1 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Tue, 11 Sep 2018 11:19:20 +0100 Subject: [PATCH 08/14] fix some typos and docstrings --- pandas/core/indexes/base.py | 4 +++- pandas/core/indexes/multi.py | 10 ++++++---- pandas/tests/indexes/multi/test_conversion.py | 6 +++++- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f5a2d757526fb..9fb559b9e75fb 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1127,7 +1127,7 @@ def to_frame(self, index=True, name=None): Set the index of the returned DataFrame as the original Index. name : object, default None - The passed name should substitute for the series name (if it has + The passed name should substitute for the index name (if it has one). Returns @@ -1158,6 +1158,8 @@ def to_frame(self, index=True, name=None): 1 Bear 2 Cow + To override the name of the resulting column, specify `name`: + >>> idx.to_frame(index=False, name='zoo') zoo 0 Ant diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 94d41b80b57ce..7701c1033be06 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1137,9 +1137,8 @@ def to_frame(self, index=True, name=None): index : boolean, default True Set the index of the returned DataFrame as the original MultiIndex. - name : list / sequence of strings or None - The passed names should substitute for the series names (if it has - one). + name : list / sequence of strings, optional + The passed names should substitute index level names. Returns ------- @@ -1147,9 +1146,12 @@ def to_frame(self, index=True, name=None): """ from pandas import DataFrame + if not is_list_like(name): + raise TypeError("'name' must be a list / sequence of array-likes.") + if name is not None: if len(name) != len(self.levels): - raise ValueError("'name' should have same lenght as " + raise ValueError("'name' should have same length as " "number of levels on index") idx_names = name else: diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index 791651b4dd1da..f9f51c6e3d91b 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -49,7 +49,11 @@ def test_to_frame(): expected.columns = ['first', 'second'] tm.assert_frame_equal(result, expected) - msg = "'name' should have same lenght as number of levels on index" + msg = "'name' must be a list / sequence of array-likes." + with tm.assert_raises_regex(TypeError, msg): + index.to_frame(name='first') + + msg = "'name' should have same length as number of levels on index" with tm.assert_raises_regex(ValueError, msg): index.to_frame(name=['first']) From f6ba76646540d42071d057355c77b6e5d54c178c Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Tue, 11 Sep 2018 12:06:06 +0100 Subject: [PATCH 09/14] fix wrong checking --- pandas/core/indexes/multi.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 7701c1033be06..41fe1af58bcd8 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1146,10 +1146,11 @@ def to_frame(self, index=True, name=None): """ from pandas import DataFrame - if not is_list_like(name): - raise TypeError("'name' must be a list / sequence of array-likes.") - if name is not None: + if not is_list_like(name): + raise TypeError("'name' must be a list / sequence " + "of array-likes.") + if len(name) != len(self.levels): raise ValueError("'name' should have same length as " "number of levels on index") From 8e12e0f5f0e7a05096d479346681b6352004e96c Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Tue, 11 Sep 2018 12:10:08 +0100 Subject: [PATCH 10/14] add release note --- doc/source/whatsnew/v0.24.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index fb7af00f61534..1956578bc6aa8 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -184,6 +184,7 @@ Other Enhancements - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`). - :ref:`Series.resample` and :ref:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). +- :meth:Both Index and MultiIndex now support overriding column name(s) when using `to_frame` method (:issue:`22580`). .. _whatsnew_0240.api_breaking: From b46e729729c8cbc82808b612c3178f4a1c05edc4 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Wed, 12 Sep 2018 11:16:16 +0100 Subject: [PATCH 11/14] improve release note message --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 1956578bc6aa8..9c802b91fa37f 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -184,7 +184,7 @@ Other Enhancements - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`). - :ref:`Series.resample` and :ref:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). -- :meth:Both Index and MultiIndex now support overriding column name(s) when using `to_frame` method (:issue:`22580`). +- :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`). .. _whatsnew_0240.api_breaking: From b1365ead3b20c3e97d2489c6d9416fc1e4914cb2 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Thu, 13 Sep 2018 17:14:53 +0100 Subject: [PATCH 12/14] fix doc string --- pandas/core/indexes/multi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 41fe1af58bcd8..033e5e4b51d7c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1149,11 +1149,11 @@ def to_frame(self, index=True, name=None): if name is not None: if not is_list_like(name): raise TypeError("'name' must be a list / sequence " - "of array-likes.") + "of column names.") if len(name) != len(self.levels): raise ValueError("'name' should have same length as " - "number of levels on index") + "number of levels on index.") idx_names = name else: idx_names = self.names From b5e9d9a38912014d287ac4753f218397ff6b11ec Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Thu, 13 Sep 2018 17:15:34 +0100 Subject: [PATCH 13/14] fixx error messages --- pandas/tests/indexes/multi/test_conversion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index f9f51c6e3d91b..8c9566b7e651f 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -49,11 +49,11 @@ def test_to_frame(): expected.columns = ['first', 'second'] tm.assert_frame_equal(result, expected) - msg = "'name' must be a list / sequence of array-likes." + msg = "'name' must be a list / sequence of column names." with tm.assert_raises_regex(TypeError, msg): index.to_frame(name='first') - msg = "'name' should have same length as number of levels on index" + msg = "'name' should have same length as number of levels on index." with tm.assert_raises_regex(ValueError, msg): index.to_frame(name=['first']) From 59620b5709fcb2b153c4a73f9c9fa0328eec89b4 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Thu, 13 Sep 2018 17:15:43 +0100 Subject: [PATCH 14/14] parametrize tests --- pandas/tests/indexes/common.py | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 0eb4edd545886..49a247608ab0b 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -66,31 +66,24 @@ def test_to_series_with_arguments(self): assert s.index is not idx assert s.name != idx.name - def test_to_frame(self): - # see gh-15230 + @pytest.mark.parametrize("name", [None, "new_name"]) + def test_to_frame(self, name): + # see GH-15230, GH-22580 idx = self.create_index() - name = idx.name or 0 - df = idx.to_frame() - - assert df.index is idx - assert len(df.columns) == 1 - assert df.columns[0] == name - assert df[name].values is not idx.values - - df = idx.to_frame(index=False) - assert df.index is not idx + if name: + idx_name = name + else: + idx_name = idx.name or 0 - # See GH-22580 - new_idx_name = 'new_name' - df = idx.to_frame(name=new_idx_name) + df = idx.to_frame(name=idx_name) assert df.index is idx assert len(df.columns) == 1 - assert df.columns[0] == new_idx_name - assert df[new_idx_name].values is not idx.values + assert df.columns[0] == idx_name + assert df[idx_name].values is not idx.values - df = idx.to_frame(index=False, name=new_idx_name) + df = idx.to_frame(index=False, name=idx_name) assert df.index is not idx def test_shift(self):