From b744fb5f63c96a7e28ac7df814874d44e4db0058 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 24 Jan 2017 17:44:31 -0500 Subject: [PATCH] ENH: add MultiIndex.to_dataframe closes #12397 --- doc/source/api.rst | 1 + doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/indexes/multi.py | 24 +++++++++++++++++++ pandas/tests/indexes/test_multi.py | 37 ++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 1 deletion(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 04a85bf63a6f8..92f290b5ee0a9 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1460,6 +1460,7 @@ MultiIndex Components MultiIndex.set_levels MultiIndex.set_labels MultiIndex.to_hierarchical + MultiIndex.to_frame MultiIndex.is_lexsorted MultiIndex.droplevel MultiIndex.swaplevel diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index a73bb24521b0d..86fa919fec304 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -131,7 +131,7 @@ Other enhancements - New ``UnsortedIndexError`` (subclass of ``KeyError``) raised when indexing/slicing into an unsorted MultiIndex (:issue:`11897`). This allows differentiation between errors due to lack of sorting or an incorrect key. See :ref:`here ` - +- ``MultiIndex`` has gained a ``.to_frame()`` method to convert to a ``DataFrame`` (:issue:`12397`) - ``pd.cut`` and ``pd.qcut`` now support datetime64 and timedelta64 dtypes (:issue:`14714`, :issue:`14798`) - ``pd.qcut`` has gained the ``duplicates='raise'|'drop'`` option to control whether to raise on duplicated edges (:issue:`7751`) - ``Series`` provides a ``to_excel`` method to output Excel files (:issue:`8825`) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 2afafaeb544d1..d8991a0982db2 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -827,6 +827,30 @@ def _to_safe_for_reshape(self): """ convert to object if we are a categorical """ return self.set_levels([i._to_safe_for_reshape() for i in self.levels]) + def to_frame(self, index=True): + """ + Create a DataFrame with the columns the levels of the MultiIndex + + .. versionadded:: 0.20.0 + + Parameters + ---------- + index : boolean, default True + return this MultiIndex as the index + + Returns + ------- + DataFrame + """ + + from pandas import DataFrame + result = DataFrame({(name or level): self.get_level_values(level) + for name, level in + zip(self.names, range(len(self.levels)))}) + if index: + result.index = self + return result + def to_hierarchical(self, n_repeat, n_shuffle=1): """ Return a MultiIndex reshaped to conform to the diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 343078aeafaf0..7d9ceb526b912 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1348,6 +1348,43 @@ def test_format_sparse_config(self): warnings.filters = warn_filters + def test_to_frame(self): + tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')] + + index = MultiIndex.from_tuples(tuples) + result = index.to_frame(index=False) + expected = DataFrame(tuples) + tm.assert_frame_equal(result, expected) + + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + + tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')] + index = MultiIndex.from_tuples(tuples, names=['first', 'second']) + result = index.to_frame(index=False) + expected = DataFrame(tuples) + expected.columns = ['first', 'second'] + tm.assert_frame_equal(result, expected) + + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + + index = MultiIndex.from_product([range(5), + pd.date_range('20130101', periods=3)]) + result = index.to_frame(index=False) + expected = DataFrame( + {0: np.repeat(np.arange(5, dtype='int64'), 3), + 1: np.tile(pd.date_range('20130101', periods=3), 5)}) + tm.assert_frame_equal(result, expected) + + index = MultiIndex.from_product([range(5), + pd.date_range('20130101', periods=3)]) + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + def test_to_hierarchical(self): index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( 2, 'two')])