Skip to content

Commit 1745fb5

Browse files
committed
BUG: dropna incorrect with categoricals in pivot_table
closes pandas-dev#21133
1 parent 4cbbcc6 commit 1745fb5

File tree

3 files changed

+44
-3
lines changed

3 files changed

+44
-3
lines changed

doc/source/whatsnew/v0.23.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ Reshaping
9898
^^^^^^^^^
9999

100100
- Bug in :func:`concat` where error was raised in concatenating :class:`Series` with numpy scalar and tuple names (:issue:`21015`)
101+
- Bug in :func:`pivot_table` with ``dropna=True``, an ordered ``Categorical`` for the index pivots and missing values in the ``values`` would give a mis-ordered result (:issue:`21133`)
101102
-
102103

103104
Other

pandas/core/reshape/pivot.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
# pylint: disable=E1103
22

33

4-
from pandas.core.dtypes.common import is_list_like, is_scalar
4+
from pandas.core.dtypes.common import (
5+
is_list_like, is_scalar, is_integer_dtype)
56
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
7+
from pandas.core.dtypes.cast import maybe_downcast_to_dtype
68

79
from pandas.core.reshape.concat import concat
810
from pandas.core.series import Series
@@ -79,8 +81,22 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
7981
pass
8082
values = list(values)
8183

82-
grouped = data.groupby(keys, observed=dropna)
84+
# group by the cartesian product of the grouper
85+
# if we have a categorical
86+
grouped = data.groupby(keys, observed=False)
8387
agged = grouped.agg(aggfunc)
88+
if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
89+
agged = agged.dropna(how='all')
90+
91+
# gh-21133
92+
# we want to down cast if
93+
# the original values are ints
94+
# as we grouped with a NaN value
95+
# and then dropped, coercing to floats
96+
for v in [v for v in values if v in data and v in agged]:
97+
if (is_integer_dtype(data[v]) and
98+
not is_integer_dtype(agged[v])):
99+
agged[v] = maybe_downcast_to_dtype(agged[v], data[v].dtype)
84100

85101
table = agged
86102
if table.index.nlevels > 1:

pandas/tests/reshape/test_pivot.py

+25-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12

23
from datetime import datetime, date, timedelta
34

@@ -16,6 +17,11 @@
1617
from pandas.api.types import CategoricalDtype as CDT
1718

1819

20+
@pytest.fixture(params=[True, False])
21+
def dropna(request):
22+
return request.param
23+
24+
1925
class TestPivotTable(object):
2026

2127
def setup_method(self, method):
@@ -109,7 +115,6 @@ def test_pivot_table_categorical(self):
109115
index=exp_index)
110116
tm.assert_frame_equal(result, expected)
111117

112-
@pytest.mark.parametrize('dropna', [True, False])
113118
def test_pivot_table_dropna_categoricals(self, dropna):
114119
# GH 15193
115120
categories = ['a', 'b', 'c', 'd']
@@ -137,6 +142,25 @@ def test_pivot_table_dropna_categoricals(self, dropna):
137142

138143
tm.assert_frame_equal(result, expected)
139144

145+
def test_pivot_with_non_observable_dropna(self, dropna):
146+
# gh-21133
147+
df = pd.DataFrame(
148+
{'A': pd.Categorical([np.nan, 'low', 'high', 'low', 'high'],
149+
categories=['low', 'high'],
150+
ordered=True),
151+
'B': range(5)})
152+
153+
result = df.pivot_table(index='A', values='B', dropna=dropna)
154+
expected = pd.DataFrame(
155+
{'B': [2, 3]},
156+
index=pd.Index(
157+
pd.Categorical.from_codes([0, 1],
158+
categories=['low', 'high'],
159+
ordered=True),
160+
name='A'))
161+
162+
tm.assert_frame_equal(result, expected)
163+
140164
def test_pass_array(self):
141165
result = self.data.pivot_table(
142166
'D', index=self.data.A, columns=self.data.C)

0 commit comments

Comments
 (0)