From c5f4df6640d900ccdc280406087afc787dbab908 Mon Sep 17 00:00:00 2001 From: liwenhuan Date: Fri, 5 Jul 2019 16:21:46 +0800 Subject: [PATCH 1/4] TST: add tests to validate margin results for pivot (#25815) --- pandas/tests/reshape/test_pivot.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index b497f6c3aa9b4..84df97c79e789 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -286,6 +286,25 @@ def test_pivot_with_interval_index(self, interval_values, dropna): expected = DataFrame({"B": 1}, index=Index(interval_values.unique(), name="A")) tm.assert_frame_equal(result, expected) + def test_pivot_with_interval_index_margins(self, dropna): + # GH 25815 + ordered_cat = pd.IntervalIndex.from_arrays( + [0, 0, 1, 1], [1, 1, 2, 2]) + df = pd.DataFrame({ + 'A': np.arange(4, 0, -1), + 'B': ['a', 'b', 'a', 'b'], + 'C': pd.Categorical(ordered_cat, + ordered=True).sort_values(ascending=False) + }) + + pivot_tab = pd.pivot_table(data=df, index='C', columns='B', values='A', + aggfunc='sum', margins=True) + + result = pivot_tab['All'] + expected = pivot_tab.iloc[:, :-1].sum(axis=1) + tm.assert_series_equal(result, expected, check_dtype=False, + check_names=False) + def test_pass_array(self): result = self.data.pivot_table("D", index=self.data.A, columns=self.data.C) expected = self.data.pivot_table("D", index="A", columns="C") From 7547903c830ba8d37ea58dcda74384dfb1d2bc4b Mon Sep 17 00:00:00 2001 From: liwenhuan Date: Mon, 8 Jul 2019 17:27:52 +0800 Subject: [PATCH 2/4] fix style errors, construct expected results manually --- pandas/tests/reshape/test_pivot.py | 36 ++++++++++++++++-------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 84df97c79e789..6b6eea54b95c5 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -286,24 +286,26 @@ def test_pivot_with_interval_index(self, interval_values, dropna): expected = DataFrame({"B": 1}, index=Index(interval_values.unique(), name="A")) tm.assert_frame_equal(result, expected) - def test_pivot_with_interval_index_margins(self, dropna): + def test_pivot_with_interval_index_margins(self): # GH 25815 - ordered_cat = pd.IntervalIndex.from_arrays( - [0, 0, 1, 1], [1, 1, 2, 2]) - df = pd.DataFrame({ - 'A': np.arange(4, 0, -1), - 'B': ['a', 'b', 'a', 'b'], - 'C': pd.Categorical(ordered_cat, - ordered=True).sort_values(ascending=False) - }) - - pivot_tab = pd.pivot_table(data=df, index='C', columns='B', values='A', - aggfunc='sum', margins=True) - - result = pivot_tab['All'] - expected = pivot_tab.iloc[:, :-1].sum(axis=1) - tm.assert_series_equal(result, expected, check_dtype=False, - check_names=False) + ordered_cat = pd.IntervalIndex.from_arrays([0, 0, 1, 1], [1, 1, 2, 2]) + df = pd.DataFrame( + { + "A": np.arange(4, 0, -1), + "B": ["a", "b", "a", "b"], + "C": pd.Categorical(ordered_cat, ordered=True).sort_values( + ascending=False + ), + } + ) + + pivot_tab = pd.pivot_table( + df, index="C", columns="B", values="A", aggfunc="sum", margins=True + ) + + result = pivot_tab["All"] + expected = pd.Series([3, 7, 10], index=result.index, name="All", dtype="int32") + tm.assert_series_equal(result, expected) def test_pass_array(self): result = self.data.pivot_table("D", index=self.data.A, columns=self.data.C) From 0c8a66cc285ecb959da43e08f9fa18dbefeb0582 Mon Sep 17 00:00:00 2001 From: liwenhuan Date: Mon, 8 Jul 2019 18:16:16 +0800 Subject: [PATCH 3/4] force to use same dtype in tests --- pandas/tests/reshape/test_pivot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 6b6eea54b95c5..5bf22902022ad 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -291,7 +291,7 @@ def test_pivot_with_interval_index_margins(self): ordered_cat = pd.IntervalIndex.from_arrays([0, 0, 1, 1], [1, 1, 2, 2]) df = pd.DataFrame( { - "A": np.arange(4, 0, -1), + "A": np.arange(4, 0, -1).astype('int32'), "B": ["a", "b", "a", "b"], "C": pd.Categorical(ordered_cat, ordered=True).sort_values( ascending=False From c536e78b9856b136f69d4c6b3723a2eda96ea990 Mon Sep 17 00:00:00 2001 From: liwenhuan Date: Tue, 9 Jul 2019 14:14:04 +0800 Subject: [PATCH 4/4] fix style errors in pivot test --- pandas/tests/reshape/test_pivot.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 5bf22902022ad..4e2302472b294 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -289,9 +289,9 @@ def test_pivot_with_interval_index(self, interval_values, dropna): def test_pivot_with_interval_index_margins(self): # GH 25815 ordered_cat = pd.IntervalIndex.from_arrays([0, 0, 1, 1], [1, 1, 2, 2]) - df = pd.DataFrame( + df = DataFrame( { - "A": np.arange(4, 0, -1).astype('int32'), + "A": np.arange(4, 0, -1, dtype=np.intp), "B": ["a", "b", "a", "b"], "C": pd.Categorical(ordered_cat, ordered=True).sort_values( ascending=False @@ -304,7 +304,12 @@ def test_pivot_with_interval_index_margins(self): ) result = pivot_tab["All"] - expected = pd.Series([3, 7, 10], index=result.index, name="All", dtype="int32") + expected = Series( + [3, 7, 10], + index=Index([pd.Interval(0, 1), pd.Interval(1, 2), "All"], name="C"), + name="All", + dtype=np.intp, + ) tm.assert_series_equal(result, expected) def test_pass_array(self):