From 79dacb287a193f661eaf4ebc744dff70f66e774d Mon Sep 17 00:00:00 2001 From: NikhilKumarM <33mnikhilkumar@gmail.com> Date: Wed, 13 Jun 2018 12:49:07 -0400 Subject: [PATCH 01/15] Adding Multiindex support to dataframe pivot function(Fixes #21425) --- pandas/core/reshape/reshape.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 2757e0797a410..1dc9518032ba3 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -392,12 +392,22 @@ def pivot(self, index=None, columns=None, values=None): cols = [columns] if index is None else [index, columns] append = index is None indexed = self.set_index(cols, append=append) + # adding the support for multi-index in pivot function + # assuming that for multi-index, index parameter for pivot function is list else: if index is None: index = self.index + index = MultiIndex.from_arrays([index, self[columns]]) + # added this case to handle multi-index + elif isinstance(index, list): + indexes = [] + for i in index: + indexes.append(self[i]) + indexes.append(self[columns]) + index = MultiIndex.from_arrays(indexes) else: index = self[index] - index = MultiIndex.from_arrays([index, self[columns]]) + index = MultiIndex.from_arrays([index, self[columns]]) if is_list_like(values) and not isinstance(values, tuple): # Exclude tuple because it is seen as a single column name From f95f8544e37f1cdc83f76dd4432270fcb20c4326 Mon Sep 17 00:00:00 2001 From: NikhilKumarM <33mnikhilkumar@gmail.com> Date: Thu, 14 Jun 2018 19:12:17 -0400 Subject: [PATCH 02/15] added whatsnew entry in v0.23.2.txt file --- doc/source/whatsnew/v0.23.2.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index 3e4326dea2ecc..bfe40cbbd521e 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -66,7 +66,7 @@ Bug Fixes **Reshaping** -- +- Bug in: DataFrame.pivot() function where error was raised when multiple columns are set as index - **Categorical** From 100a4bcc2c34196c4039218d33a857b903d88ed1 Mon Sep 17 00:00:00 2001 From: NikhilKumarM <33mnikhilkumar@gmail.com> Date: Thu, 14 Jun 2018 20:35:51 -0400 Subject: [PATCH 03/15] Added test case for testing dataframe pivot function with multile columns as index --- pandas/tests/reshape/test_pivot.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 3ec60d50f2792..2244bbab99163 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -283,6 +283,18 @@ def test_pivot_multi_functions(self): expected = concat([means, stds], keys=['mean', 'std'], axis=1) tm.assert_frame_equal(result, expected) + # adding the test case for multiple columns as index (#21425) + def test_pivot_multiple_columns_as_index(self): + df = DataFrame({'lev1': [1, 1, 1, 1, 2, 2, 2, 2], + 'lev2': [1, 1, 2, 2, 1, 1, 2, 2], + 'lev3': [1, 2, 1, 2, 1, 2, 1, 2], + 'values': [0, 1, 2, 3, 4, 5, 6, 7]}) + result = df.pivot(index=['lev1', 'lev2'], columns='lev3', values='values') + exp_index = pd.MultiIndex.from_product([[1, 2], [1, 2]], names=['lev1', 'lev2']) + exp_columns = pd.MultiIndex.from_arrays([[1, 2]], names=['lev3']) + expected = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], exp_index, exp_columns) + tm.assert_frame_equal(result,expected) + def test_pivot_index_with_nan(self): # GH 3588 nan = np.nan From ecc19d2ad197b5931d3c8f1b62aa304d3a4bb36d Mon Sep 17 00:00:00 2001 From: NikhilKumarM <33mnikhilkumar@gmail.com> Date: Thu, 14 Jun 2018 21:00:11 -0400 Subject: [PATCH 04/15] Made changes as per PEP8 requirements --- pandas/tests/reshape/test_pivot.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 2244bbab99163..719467086609e 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -289,11 +289,15 @@ def test_pivot_multiple_columns_as_index(self): 'lev2': [1, 1, 2, 2, 1, 1, 2, 2], 'lev3': [1, 2, 1, 2, 1, 2, 1, 2], 'values': [0, 1, 2, 3, 4, 5, 6, 7]}) - result = df.pivot(index=['lev1', 'lev2'], columns='lev3', values='values') - exp_index = pd.MultiIndex.from_product([[1, 2], [1, 2]], names=['lev1', 'lev2']) - exp_columns = pd.MultiIndex.from_arrays([[1, 2]], names=['lev3']) + result = df.pivot(index=['lev1', 'lev2'], + columns='lev3', + values='values') + exp_index = pd.MultiIndex.from_product([[1, 2], [1, 2]], + names=['lev1', 'lev2']) + exp_columns = pd.MultiIndex.from_arrays([[1, 2]], + names=['lev3']) expected = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], exp_index, exp_columns) - tm.assert_frame_equal(result,expected) + tm.assert_frame_equal(result, expected) def test_pivot_index_with_nan(self): # GH 3588 From 7f6c09c5d925985e13a491dd086689afc241147d Mon Sep 17 00:00:00 2001 From: NikhilKumarM <33mnikhilkumar@gmail.com> Date: Thu, 14 Jun 2018 21:30:37 -0400 Subject: [PATCH 05/15] changed the type of exp_columns to Index --- pandas/tests/reshape/test_pivot.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 719467086609e..1de10e8ede337 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -294,9 +294,10 @@ def test_pivot_multiple_columns_as_index(self): values='values') exp_index = pd.MultiIndex.from_product([[1, 2], [1, 2]], names=['lev1', 'lev2']) - exp_columns = pd.MultiIndex.from_arrays([[1, 2]], - names=['lev3']) - expected = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], exp_index, exp_columns) + exp_columns = Index([1, 2], name='lev3') + expected = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], + exp_index, + exp_columns) tm.assert_frame_equal(result, expected) def test_pivot_index_with_nan(self): From 34fb002470b1cf95bb5f38385970a5ff5ab263d0 Mon Sep 17 00:00:00 2001 From: NikhilKumarM <33mnikhilkumar@gmail.com> Date: Fri, 15 Jun 2018 12:11:29 -0400 Subject: [PATCH 06/15] Moved whatsnew entry from v0.23.2.txt to v0.24.0.txt --- doc/source/whatsnew/v0.23.2.txt | 2 +- doc/source/whatsnew/v0.24.0.txt | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index bfe40cbbd521e..3e4326dea2ecc 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -66,7 +66,7 @@ Bug Fixes **Reshaping** -- Bug in: DataFrame.pivot() function where error was raised when multiple columns are set as index +- - **Categorical** diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 68c1839221508..ad9afee237848 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -16,6 +16,8 @@ Other Enhancements - :func:`Series.mode` and :func:`DataFrame.mode` now support the ``dropna`` parameter which can be used to specify whether NaN/NaT values should be considered (:issue:`17534`) - :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`) - :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with MultiIndex (:issue:`21115`) +- :func:'Dataframe.pivot' now supports multiple columns as index. (:issue:'21425') + .. _whatsnew_0240.api_breaking: From 787b289aa770fba3b565746e06dc5bcdcd97b1e7 Mon Sep 17 00:00:00 2001 From: NikhilKumarM <33mnikhilkumar@gmail.com> Date: Fri, 15 Jun 2018 12:14:34 -0400 Subject: [PATCH 07/15] Removed the comments that are not required --- pandas/core/reshape/reshape.py | 3 +-- pandas/tests/reshape/test_pivot.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 1dc9518032ba3..2b9dabc121bcf 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -392,8 +392,7 @@ def pivot(self, index=None, columns=None, values=None): cols = [columns] if index is None else [index, columns] append = index is None indexed = self.set_index(cols, append=append) - # adding the support for multi-index in pivot function - # assuming that for multi-index, index parameter for pivot function is list + else: if index is None: index = self.index diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 1de10e8ede337..273a95cfad554 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -283,8 +283,8 @@ def test_pivot_multi_functions(self): expected = concat([means, stds], keys=['mean', 'std'], axis=1) tm.assert_frame_equal(result, expected) - # adding the test case for multiple columns as index (#21425) def test_pivot_multiple_columns_as_index(self): + # adding the test case for multiple columns as index (#21425) df = DataFrame({'lev1': [1, 1, 1, 1, 2, 2, 2, 2], 'lev2': [1, 1, 2, 2, 1, 1, 2, 2], 'lev3': [1, 2, 1, 2, 1, 2, 1, 2], From 358e0ea8df85c1e12bc454ed47c80beedf2c6346 Mon Sep 17 00:00:00 2001 From: NikhilKumarM <33mnikhilkumar@gmail.com> Date: Fri, 15 Jun 2018 16:16:53 -0400 Subject: [PATCH 08/15] corrected whatsnew entry --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index ad9afee237848..665da3630f37d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -16,7 +16,7 @@ Other Enhancements - :func:`Series.mode` and :func:`DataFrame.mode` now support the ``dropna`` parameter which can be used to specify whether NaN/NaT values should be considered (:issue:`17534`) - :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`) - :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with MultiIndex (:issue:`21115`) -- :func:'Dataframe.pivot' now supports multiple columns as index. (:issue:'21425') +- :func:'Dataframe.pivot' now supports multiple columns as an index. (:issue:'21425') From 811ab86af17dd37543504bbbd9b0b9ced913239e Mon Sep 17 00:00:00 2001 From: NikhilKumarM <33mnikhilkumar@gmail.com> Date: Fri, 15 Jun 2018 16:18:15 -0400 Subject: [PATCH 09/15] Changed indexes to list comprehension --- pandas/core/reshape/reshape.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 2b9dabc121bcf..12caa98ac3475 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -397,11 +397,9 @@ def pivot(self, index=None, columns=None, values=None): if index is None: index = self.index index = MultiIndex.from_arrays([index, self[columns]]) - # added this case to handle multi-index elif isinstance(index, list): - indexes = [] - for i in index: - indexes.append(self[i]) + # Iterating through the list of multiple columns of an index + indexes = [self[column] for column in index] indexes.append(self[columns]) index = MultiIndex.from_arrays(indexes) else: From 7b6b5a476edb6da4666a53b5f43499f91238d815 Mon Sep 17 00:00:00 2001 From: NikhilKumarM <33mnikhilkumar@gmail.com> Date: Fri, 29 Jun 2018 11:08:57 -0400 Subject: [PATCH 10/15] used is_list_like to check if input is list --- pandas/core/reshape/reshape.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 12caa98ac3475..35171cf399843 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -397,7 +397,7 @@ def pivot(self, index=None, columns=None, values=None): if index is None: index = self.index index = MultiIndex.from_arrays([index, self[columns]]) - elif isinstance(index, list): + elif is_list_like(index): # Iterating through the list of multiple columns of an index indexes = [self[column] for column in index] indexes.append(self[columns]) From 3bcfc16518fa279834e2f9eb5370229f7a95e95c Mon Sep 17 00:00:00 2001 From: NikhilKumarM <33mnikhilkumar@gmail.com> Date: Fri, 29 Jun 2018 11:10:48 -0400 Subject: [PATCH 11/15] resolving merge conflict in whatsnew --- doc/source/whatsnew/v0.24.0.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 665da3630f37d..11efe3306a961 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -16,6 +16,14 @@ Other Enhancements - :func:`Series.mode` and :func:`DataFrame.mode` now support the ``dropna`` parameter which can be used to specify whether NaN/NaT values should be considered (:issue:`17534`) - :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`) - :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with MultiIndex (:issue:`21115`) +- :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with :class:`MultiIndex` (:issue:`21115`) +- Added support for reading from Google Cloud Storage via the ``gcsfs`` library (:issue:`19454`) +- :func:`to_gbq` and :func:`read_gbq` signature and documentation updated to + reflect changes from the `Pandas-GBQ library version 0.5.0 + `__. + (:issue:`21627`) +- New method :meth:`HDFStore.walk` will recursively walk the group hierarchy of an HDF5 file (:issue:`10932`) +- :meth:`Series.nlargest`, :meth:`Series.nsmallest`, :meth:`DataFrame.nlargest`, and :meth:`DataFrame.nsmallest` now accept the value ``"all"`` for the ``keep` argument. This keeps all ties for the nth largest/smallest value (:issue:`16818`) - :func:'Dataframe.pivot' now supports multiple columns as an index. (:issue:'21425') From 0234b38295db83ae2f5bdc1d9db03c2028243ee8 Mon Sep 17 00:00:00 2001 From: NikhilKumarM <33mnikhilkumar@gmail.com> Date: Fri, 29 Jun 2018 11:16:24 -0400 Subject: [PATCH 12/15] trying to solve merge conflict --- doc/source/whatsnew/v0.24.0.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 11efe3306a961..00a04e949802a 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -15,7 +15,6 @@ Other Enhancements - :func:`to_datetime` now supports the ``%Z`` and ``%z`` directive when passed into ``format`` (:issue:`13486`) - :func:`Series.mode` and :func:`DataFrame.mode` now support the ``dropna`` parameter which can be used to specify whether NaN/NaT values should be considered (:issue:`17534`) - :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`) -- :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with MultiIndex (:issue:`21115`) - :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with :class:`MultiIndex` (:issue:`21115`) - Added support for reading from Google Cloud Storage via the ``gcsfs`` library (:issue:`19454`) - :func:`to_gbq` and :func:`read_gbq` signature and documentation updated to @@ -25,7 +24,7 @@ Other Enhancements - New method :meth:`HDFStore.walk` will recursively walk the group hierarchy of an HDF5 file (:issue:`10932`) - :meth:`Series.nlargest`, :meth:`Series.nsmallest`, :meth:`DataFrame.nlargest`, and :meth:`DataFrame.nsmallest` now accept the value ``"all"`` for the ``keep` argument. This keeps all ties for the nth largest/smallest value (:issue:`16818`) - :func:'Dataframe.pivot' now supports multiple columns as an index. (:issue:'21425') - +- .. _whatsnew_0240.api_breaking: From 7a050d6f1e05c9834335084a7800a86f83dc4620 Mon Sep 17 00:00:00 2001 From: NikhilKumarM <33mnikhilkumar@gmail.com> Date: Sat, 30 Jun 2018 17:38:55 -0400 Subject: [PATCH 13/15] Handling the case when there is no values argument in pivot func --- pandas/core/reshape/reshape.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index d27dd9e58c27f..e2396693286f1 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -395,7 +395,14 @@ def pivot(self, index=None, columns=None, values=None): See DataFrame.pivot """ if values is None: - cols = [columns] if index is None else [index, columns] + if index is None: + cols = [columns] + else: + if is_list_like(index): + cols = [column for column in index] + else: + cols =[index] + cols.append(columns) append = index is None indexed = self.set_index(cols, append=append) From beb768b0489c0c0844da7228970dfbeb59453ac4 Mon Sep 17 00:00:00 2001 From: NikhilKumarM <33mnikhilkumar@gmail.com> Date: Sat, 30 Jun 2018 17:41:46 -0400 Subject: [PATCH 14/15] added one more test case for pivot function with multi column index --- pandas/tests/reshape/test_pivot.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 8ebf6a86556ef..4474c61dddfe3 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -310,13 +310,24 @@ def test_pivot_multiple_columns_as_index(self): result = df.pivot(index=['lev1', 'lev2'], columns='lev3', values='values') + result_no_values = df.pivot(index=['lev1', 'lev2'], + columns='lev3') + data = [[0, 1], [2, 3], [4, 5], [6, 7]] exp_index = pd.MultiIndex.from_product([[1, 2], [1, 2]], names=['lev1', 'lev2']) - exp_columns = Index([1, 2], name='lev3') - expected = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], - exp_index, - exp_columns) - tm.assert_frame_equal(result, expected) + exp_columns_1 = Index([1, 2], name='lev3') + expected_1 = DataFrame(data=data, index=exp_index, + columns=exp_columns_1) + + exp_columns_2 = MultiIndex(levels=[['values'], [1, 2]], + labels=[[0, 0], [0, 1]], + names=[None, 'lev3']) + + expected_2 = DataFrame(data=data, index=exp_index, + columns=exp_columns_2) + + tm.assert_frame_equal(result, expected_1) + tm.assert_frame_equal(result_no_values, expected_2) def test_pivot_index_with_nan(self): # GH 3588 From faab956f7ea0f5c3e8ad8483904a8897d2632701 Mon Sep 17 00:00:00 2001 From: NikhilKumarM <33mnikhilkumar@gmail.com> Date: Sun, 1 Jul 2018 13:52:08 -0400 Subject: [PATCH 15/15] added whitespace to remove linting error --- pandas/core/reshape/reshape.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index e2396693286f1..b22d143544b7b 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -401,7 +401,7 @@ def pivot(self, index=None, columns=None, values=None): if is_list_like(index): cols = [column for column in index] else: - cols =[index] + cols = [index] cols.append(columns) append = index is None indexed = self.set_index(cols, append=append)