Skip to content

Commit ed2a2e4

Browse files
Nicholas Ver Halenjreback
Nicholas Ver Halen
authored andcommitted
BUG: pivot_table over Categorical Columns
closes #15193 Author: Nicholas Ver Halen <[email protected]> Closes #15511 from verhalenn/issue15193 and squashes the following commits: bf0fdeb [Nicholas Ver Halen] Added description to code change. adf8616 [Nicholas Ver Halen] Added whatsnew for issue 15193 a643267 [Nicholas Ver Halen] Added test for issue 15193 d605251 [Nicholas Ver Halen] Made sure pivot_table propped na columns
1 parent ca6d88b commit ed2a2e4

File tree

3 files changed

+38
-0
lines changed

3 files changed

+38
-0
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,7 @@ Bug Fixes
735735

736736

737737
- Bug in ``pd.merge_asof()`` where ``left_index``/``right_index`` together caused a failure when ``tolerance`` was specified (:issue:`15135`)
738+
- Bug in ``DataFrame.pivot_table()`` where ``dropna=True`` would not drop all-NaN columns when the columns was a ``category`` dtype (:issue:`15193`)
738739

739740

740741
- Bug in ``pd.read_hdf()`` passing a ``Timestamp`` to the ``where`` parameter with a non date column (:issue:`15492`)

pandas/tests/tools/test_pivot.py

+33
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,39 @@ def test_pivot_table_dropna(self):
8686
tm.assert_index_equal(pv_col.columns, m)
8787
tm.assert_index_equal(pv_ind.index, m)
8888

89+
def test_pivot_table_dropna_categoricals(self):
90+
# GH 15193
91+
categories = ['a', 'b', 'c', 'd']
92+
93+
df = DataFrame({'A': ['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c'],
94+
'B': [1, 2, 3, 1, 2, 3, 1, 2, 3],
95+
'C': range(0, 9)})
96+
97+
df['A'] = df['A'].astype('category', ordered=False,
98+
categories=categories)
99+
result_true = df.pivot_table(index='B', columns='A', values='C',
100+
dropna=True)
101+
expected_columns = Series(['a', 'b', 'c'], name='A')
102+
expected_columns = expected_columns.astype('category', ordered=False,
103+
categories=categories)
104+
expected_index = Series([1, 2, 3], name='B')
105+
expected_true = DataFrame([[0.0, 3.0, 6.0],
106+
[1.0, 4.0, 7.0],
107+
[2.0, 5.0, 8.0]],
108+
index=expected_index,
109+
columns=expected_columns,)
110+
tm.assert_frame_equal(expected_true, result_true)
111+
112+
result_false = df.pivot_table(index='B', columns='A', values='C',
113+
dropna=False)
114+
expected_columns = Series(['a', 'b', 'c', 'd'], name='A')
115+
expected_false = DataFrame([[0.0, 3.0, 6.0, np.NaN],
116+
[1.0, 4.0, 7.0, np.NaN],
117+
[2.0, 5.0, 8.0, np.NaN]],
118+
index=expected_index,
119+
columns=expected_columns,)
120+
tm.assert_frame_equal(expected_false, result_false)
121+
89122
def test_pass_array(self):
90123
result = self.data.pivot_table(
91124
'D', index=self.data.A, columns=self.data.C)

pandas/tools/pivot.py

+4
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,10 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
175175
if len(index) == 0 and len(columns) > 0:
176176
table = table.T
177177

178+
# GH 15193 Makse sure empty columns are removed if dropna=True
179+
if isinstance(table, DataFrame) and dropna:
180+
table = table.dropna(how='all', axis=1)
181+
178182
return table
179183

180184

0 commit comments

Comments
 (0)