From c242a8e68f2bb5fbe5fbaaafacd895014b4efb77 Mon Sep 17 00:00:00 2001 From: liwenhuan Date: Thu, 6 Jun 2019 18:25:00 +0800 Subject: [PATCH 01/11] BUG: Cannot use categorical IntervalIndex as index when creating pivot_table (#25814) --- pandas/core/arrays/categorical.py | 2 +- pandas/tests/reshape/test_pivot.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index dc77599444505..c079b860bb924 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -181,7 +181,7 @@ def contains(cat, key, container): # can't be in container either. try: loc = cat.categories.get_loc(key) - except KeyError: + except (KeyError, TypeError): return False # loc is the location of key in categories, but also the *value* diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index cc91bef525eff..7b03ac4529f48 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -198,6 +198,17 @@ def test_pivot_with_non_observable_dropna(self, dropna): tm.assert_frame_equal(result, expected) + def test_pivot_with_interval_index(self, dropna): + df = pd.DataFrame( + {'A': pd.Categorical([pd.Interval(0, 1)] * 4), + 'B': [1] * 4}) + result = df.pivot_table(index='A', values='B', dropna=dropna) + expected = pd.DataFrame( + {'B': [1]}, + index=pd.Index(pd.Categorical([pd.Interval(0, 1)]), + name='A')) + tm.assert_frame_equal(result, expected) + def test_pass_array(self): result = self.data.pivot_table( 'D', index=self.data.A, columns=self.data.C) From 28fa0718fe4d10ea1eb50563945bf50379c15035 Mon Sep 17 00:00:00 2001 From: liwenhuan Date: Tue, 11 Jun 2019 14:28:08 +0800 Subject: [PATCH 02/11] modify tests for reshape pivot for interval index --- pandas/tests/reshape/test_pivot.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 7b03ac4529f48..da8df5691cf35 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -19,6 +19,18 @@ def dropna(request): return request.param +@pytest.fixture( + params=[ + pd.Categorical([pd.Interval(0, 1, 'right')] * 4), + pd.Categorical([pd.Interval(0, 1, 'left')] * 4), + pd.Categorical([pd.Interval(low, high, 'right') + for low, high in zip(range(0, 3), range(1, 4))]), + pd.Categorical([pd.Interval(low, high, 'left') + for low, high in zip(range(0, 3), range(1, 4))])]) +def interval_values(request): + return request.param + + class TestPivotTable: def setup_method(self, method): @@ -198,14 +210,14 @@ def test_pivot_with_non_observable_dropna(self, dropna): tm.assert_frame_equal(result, expected) - def test_pivot_with_interval_index(self, dropna): + def test_pivot_with_interval_index(self, interval_values, dropna): df = pd.DataFrame( - {'A': pd.Categorical([pd.Interval(0, 1)] * 4), - 'B': [1] * 4}) + {'A': interval_values, + 'B': [1] * interval_values.size}) result = df.pivot_table(index='A', values='B', dropna=dropna) expected = pd.DataFrame( {'B': [1]}, - index=pd.Index(pd.Categorical([pd.Interval(0, 1)]), + index=pd.Index(interval_values.unique(), name='A')) tm.assert_frame_equal(result, expected) From f8b39f3252ecd5e6a4778bb62f2a229094452eb4 Mon Sep 17 00:00:00 2001 From: liwenhuan Date: Tue, 11 Jun 2019 15:07:51 +0800 Subject: [PATCH 03/11] add issue number in the updated test for inteval index --- pandas/tests/reshape/test_pivot.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index da8df5691cf35..f1f277135785d 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -211,6 +211,7 @@ def test_pivot_with_non_observable_dropna(self, dropna): tm.assert_frame_equal(result, expected) def test_pivot_with_interval_index(self, interval_values, dropna): + # GH 25814 df = pd.DataFrame( {'A': interval_values, 'B': [1] * interval_values.size}) From e6d1249c7cfe9685c768abc49046a44bece5a5df Mon Sep 17 00:00:00 2001 From: liwenhuan Date: Tue, 11 Jun 2019 15:46:24 +0800 Subject: [PATCH 04/11] update whatsnews's reshaping section --- doc/source/whatsnew/v0.24.2.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index c3b442e2352bb..e1e087f5600f0 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -59,6 +59,7 @@ Bug Fixes - Bug in :meth:`~pandas.core.groupby.GroupBy.transform` where applying a function to a timezone aware column would return a timezone naive result (:issue:`24198`) - Bug in :func:`DataFrame.join` when joining on a timezone aware :class:`DatetimeIndex` (:issue:`23931`) +- Bug in :func:`DataFrame.pivot_table` where use :class:`IntervalIndex` as pivot index would raise TypeError (:issue:`25814`) **Visualization** From 6ddeb26d1fff187d446babd51c8ae69f2a8bc69c Mon Sep 17 00:00:00 2001 From: liwenhuan Date: Wed, 12 Jun 2019 11:10:15 +0800 Subject: [PATCH 05/11] update reshape/test_pivot.py --- pandas/tests/reshape/test_pivot.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index f1f277135785d..4150552df76d2 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -19,18 +19,17 @@ def dropna(request): return request.param -@pytest.fixture( - params=[ - pd.Categorical([pd.Interval(0, 1, 'right')] * 4), - pd.Categorical([pd.Interval(0, 1, 'left')] * 4), - pd.Categorical([pd.Interval(low, high, 'right') - for low, high in zip(range(0, 3), range(1, 4))]), - pd.Categorical([pd.Interval(low, high, 'left') - for low, high in zip(range(0, 3), range(1, 4))])]) -def interval_values(request): +@pytest.fixture(params=['right', 'left']) +def closed(request): return request.param +@pytest.fixture(params=[([0] * 4, [1] * 4), (range(0, 3), range(1, 4))]) +def interval_values(request, closed): + left, right = request.param + return Categorical(pd.IntervalIndex.from_arrays(left, right, closed)) + + class TestPivotTable: def setup_method(self, method): @@ -212,14 +211,14 @@ def test_pivot_with_non_observable_dropna(self, dropna): def test_pivot_with_interval_index(self, interval_values, dropna): # GH 25814 - df = pd.DataFrame( + df = DataFrame( {'A': interval_values, 'B': [1] * interval_values.size}) result = df.pivot_table(index='A', values='B', dropna=dropna) - expected = pd.DataFrame( + expected = DataFrame( {'B': [1]}, - index=pd.Index(interval_values.unique(), - name='A')) + index=Index(interval_values.unique(), + name='A')) tm.assert_frame_equal(result, expected) def test_pass_array(self): From 44ab1110384a079b30ca9afd8394f43d4df8e553 Mon Sep 17 00:00:00 2001 From: liwenhuan Date: Wed, 12 Jun 2019 11:10:53 +0800 Subject: [PATCH 06/11] update whatsnew ,add backticks for TypeError --- doc/source/whatsnew/v0.24.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index e1e087f5600f0..658e87369d824 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -59,7 +59,7 @@ Bug Fixes - Bug in :meth:`~pandas.core.groupby.GroupBy.transform` where applying a function to a timezone aware column would return a timezone naive result (:issue:`24198`) - Bug in :func:`DataFrame.join` when joining on a timezone aware :class:`DatetimeIndex` (:issue:`23931`) -- Bug in :func:`DataFrame.pivot_table` where use :class:`IntervalIndex` as pivot index would raise TypeError (:issue:`25814`) +- Bug in :func:`DataFrame.pivot_table` where use :class:`IntervalIndex` as pivot index would raise `TypeError` (:issue:`25814`) **Visualization** From b011bbc4185b82e96ae1337432bb40f7f32d74cf Mon Sep 17 00:00:00 2001 From: liwenhuan Date: Wed, 12 Jun 2019 11:18:31 +0800 Subject: [PATCH 07/11] simplify tests cases when create Dataframe --- pandas/tests/reshape/test_pivot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 4150552df76d2..0fa4c3bbe027d 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -213,10 +213,10 @@ def test_pivot_with_interval_index(self, interval_values, dropna): # GH 25814 df = DataFrame( {'A': interval_values, - 'B': [1] * interval_values.size}) + 'B': 1}) result = df.pivot_table(index='A', values='B', dropna=dropna) expected = DataFrame( - {'B': [1]}, + {'B': 1}, index=Index(interval_values.unique(), name='A')) tm.assert_frame_equal(result, expected) From d6b06e4c398fdbce6302054433b0e654472b16bd Mon Sep 17 00:00:00 2001 From: liwenhuan Date: Wed, 12 Jun 2019 11:22:24 +0800 Subject: [PATCH 08/11] use double backticks for TypeError in whatsnew --- doc/source/whatsnew/v0.24.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index 658e87369d824..705ade9a955cb 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -59,7 +59,7 @@ Bug Fixes - Bug in :meth:`~pandas.core.groupby.GroupBy.transform` where applying a function to a timezone aware column would return a timezone naive result (:issue:`24198`) - Bug in :func:`DataFrame.join` when joining on a timezone aware :class:`DatetimeIndex` (:issue:`23931`) -- Bug in :func:`DataFrame.pivot_table` where use :class:`IntervalIndex` as pivot index would raise `TypeError` (:issue:`25814`) +- Bug in :func:`DataFrame.pivot_table` where use :class:`IntervalIndex` as pivot index would raise ``TypeError`` (:issue:`25814`) **Visualization** From 72a316e6a11f1012dd8a2fceb9a9f6f609312bf8 Mon Sep 17 00:00:00 2001 From: liwenhuan Date: Wed, 12 Jun 2019 13:30:06 +0800 Subject: [PATCH 09/11] move whatsnew entry from v0.24.2 to v0.25.0 --- doc/source/whatsnew/v0.24.2.rst | 1 - doc/source/whatsnew/v0.25.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index 705ade9a955cb..c3b442e2352bb 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -59,7 +59,6 @@ Bug Fixes - Bug in :meth:`~pandas.core.groupby.GroupBy.transform` where applying a function to a timezone aware column would return a timezone naive result (:issue:`24198`) - Bug in :func:`DataFrame.join` when joining on a timezone aware :class:`DatetimeIndex` (:issue:`23931`) -- Bug in :func:`DataFrame.pivot_table` where use :class:`IntervalIndex` as pivot index would raise ``TypeError`` (:issue:`25814`) **Visualization** diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index df22a21196dab..5ea29db57f29d 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -700,6 +700,7 @@ Reshaping - Bug in :func:`pandas.cut` where large bins could incorrectly raise an error due to an integer overflow (:issue:`26045`) - Bug in :func:`DataFrame.sort_index` where an error is thrown when a multi-indexed ``DataFrame`` is sorted on all levels with the initial level sorted last (:issue:`26053`) - Bug in :meth:`Series.nlargest` treats ``True`` as smaller than ``False`` (:issue:`26154`) +- Bug in :func:`DataFrame.pivot_table` where use :class:`IntervalIndex` as pivot index would raise ``TypeError`` (:issue:`25814`) Sparse ^^^^^^ From 1e0b5e44e7beb2092d61b908e9a6a0e59c769c7c Mon Sep 17 00:00:00 2001 From: liwenhuan Date: Wed, 12 Jun 2019 13:41:04 +0800 Subject: [PATCH 10/11] remove extra fixtures in test_pivot, use top level conftest instead --- pandas/tests/reshape/test_pivot.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 0fa4c3bbe027d..8543d2c2df7d6 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -19,11 +19,6 @@ def dropna(request): return request.param -@pytest.fixture(params=['right', 'left']) -def closed(request): - return request.param - - @pytest.fixture(params=[([0] * 4, [1] * 4), (range(0, 3), range(1, 4))]) def interval_values(request, closed): left, right = request.param From be7f21a26a4463c348fc940e4b94ca86ca098f67 Mon Sep 17 00:00:00 2001 From: liwenhuan Date: Wed, 12 Jun 2019 14:13:19 +0800 Subject: [PATCH 11/11] modification of whatsnew entry --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 5ea29db57f29d..7bde070fd6492 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -700,7 +700,7 @@ Reshaping - Bug in :func:`pandas.cut` where large bins could incorrectly raise an error due to an integer overflow (:issue:`26045`) - Bug in :func:`DataFrame.sort_index` where an error is thrown when a multi-indexed ``DataFrame`` is sorted on all levels with the initial level sorted last (:issue:`26053`) - Bug in :meth:`Series.nlargest` treats ``True`` as smaller than ``False`` (:issue:`26154`) -- Bug in :func:`DataFrame.pivot_table` where use :class:`IntervalIndex` as pivot index would raise ``TypeError`` (:issue:`25814`) +- Bug in :func:`DataFrame.pivot_table` with a :class:`IntervalIndex` as pivot index would raise ``TypeError`` (:issue:`25814`) Sparse ^^^^^^