diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst index 5c11be34e6ed4..4406243aa8b5e 100644 --- a/doc/source/user_guide/reshaping.rst +++ b/doc/source/user_guide/reshaping.rst @@ -90,6 +90,19 @@ You can then select subsets from the pivoted ``DataFrame``: Note that this returns a view on the underlying data in the case where the data are homogeneously-typed. +Now :meth:`DataFrame.pivot` method also supports multiple columns as indexes. + +.. ipython:: python + + df1 = pd.DataFrame({'variable1': ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'B'], + 'variable2': ['a', 'a', 'b', 'b', 'a', 'a', 'b', 'b'], + 'variable3': ['C', 'D', 'C', 'D', 'C', 'D', 'C', 'D'], + 'value': np.arange(8)}) + df1 + + df1.pivot(index=['variable1', 'variable2'], columns='variable3', + values='value') + .. note:: :func:`~pandas.pivot` will error with a ``ValueError: Index contains duplicate entries, cannot reshape`` if the index/column pair is not unique. In this diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 2ed2c21ba5584..8bbd972fa9ed0 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -28,6 +28,7 @@ Other Enhancements - Indexing of ``DataFrame`` and ``Series`` now accepts zerodim ``np.ndarray`` (:issue:`24919`) - :meth:`Timestamp.replace` now supports the ``fold`` argument to disambiguate DST transition times (:issue:`25017`) - :meth:`DataFrame.at_time` and :meth:`Series.at_time` now support :meth:`datetime.time` objects with timezones (:issue:`24043`) +- :meth:`DataFrame.pivot` now supports multiple column indexes by accepting a list of columns (:issue:`21425`) - ``Series.str`` has gained :meth:`Series.str.casefold` method to removes all case distinctions present in a string (:issue:`25405`) - :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`) - :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behaviour of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 8d7616c4b6b61..843193a85ed7d 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -368,18 +368,34 @@ def _convert_by(by): @Appender(_shared_docs['pivot'], indents=1) def pivot(data, index=None, columns=None, values=None): if values is None: - cols = [columns] if index is None else [index, columns] + # Make acceptable for multiple column indexes. + # cols = [] + if is_list_like(index): + cols = index # cols.extend(index) + elif index is not None: + cols = [index] + else: + cols = [] + cols.append(columns) + append = index is None indexed = data.set_index(cols, append=append) + else: if index is None: - index = data.index + index = MultiIndex.from_arrays([data.index, data[columns]]) + elif is_list_like(index): + # Iterating through the list of multiple columns of an index. + indexes = [data[column] for column in index] + indexes.append(data[columns]) + index = MultiIndex.from_arrays(indexes) else: + # Build multi-indexes if index is not None and not a list. index = data[index] - index = MultiIndex.from_arrays([index, data[columns]]) + index = MultiIndex.from_arrays([index, data[columns]]) if is_list_like(values) and not isinstance(values, tuple): - # Exclude tuple because it is seen as a single column name + # Exclude tuple because it is seen as a single column name. indexed = data._constructor(data[values].values, index=index, columns=values) else: diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index e4fbb204af533..a897f6465c811 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -301,6 +301,33 @@ def test_pivot_multi_functions(self): expected = concat([means, stds], keys=['mean', 'std'], axis=1) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize('method', [True, False]) + def test_pivot_multiple_columns_as_index(self, method): + # adding the test case for multiple columns as index (#21425) + df = DataFrame({'lev1': [1, 1, 1, 1, 2, 2, 2, 2], + 'lev2': [1, 1, 2, 2, 1, 1, 2, 2], + 'lev3': [1, 2, 1, 2, 1, 2, 1, 2], + 'values': [0, 1, 2, 3, 4, 5, 6, 7]}) + data = [[0, 1], [2, 3], [4, 5], [6, 7]] + exp_index = pd.MultiIndex.from_product([[1, 2], [1, 2]], + names=['lev1', 'lev2']) + if method: + result = df.pivot(index=['lev1', 'lev2'], + columns='lev3', + values='values') + exp_columns = Index([1, 2], name='lev3') + + else: + result = df.pivot(index=['lev1', 'lev2'], + columns='lev3') + exp_columns = MultiIndex(levels=[['values'], [1, 2]], + codes=[[0, 0], [0, 1]], + names=[None, 'lev3']) + + expected = DataFrame(data=data, index=exp_index, + columns=exp_columns) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize('method', [True, False]) def test_pivot_index_with_nan(self, method): # GH 3588