Skip to content

Commit db884d9

Browse files
committed
Merge pull request pandas-dev#11371 from jreback/jakevdp-pivot-table-categorical
BUG: pivot table bug with Categorical indexes, pandas-dev#10993
2 parents 5d953e3 + 7ca878e commit db884d9

File tree

5 files changed

+52
-5
lines changed

5 files changed

+52
-5
lines changed

doc/source/whatsnew/v0.17.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ Bug Fixes
8787

8888
- Bug in list-like indexing with a mixed-integer Index (:issue:`11320`)
8989

90-
90+
- Bug in ``pivot_table`` with ``margins=True`` when indexes are of ``Categorical`` dtype (:issue:`10993`)
9191
- Bug in ``DataFrame.plot`` cannot use hex strings colors (:issue:`10299`)
9292

9393

pandas/core/index.py

+12
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,10 @@ def astype(self, dtype):
627627
return Index(self.values.astype(dtype), name=self.name,
628628
dtype=dtype)
629629

630+
def _to_safe_for_reshape(self):
631+
""" convert to object if we are a categorical """
632+
return self
633+
630634
def to_datetime(self, dayfirst=False):
631635
"""
632636
For an Index containing strings or datetime.datetime objects, attempt
@@ -3190,6 +3194,10 @@ def duplicated(self, keep='first'):
31903194
from pandas.hashtable import duplicated_int64
31913195
return duplicated_int64(self.codes.astype('i8'), keep)
31923196

3197+
def _to_safe_for_reshape(self):
3198+
""" convert to object if we are a categorical """
3199+
return self.astype('object')
3200+
31933201
def get_loc(self, key, method=None):
31943202
"""
31953203
Get integer location for requested label
@@ -4529,6 +4537,10 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
45294537
else:
45304538
return result_levels
45314539

4540+
def _to_safe_for_reshape(self):
4541+
""" convert to object if we are a categorical """
4542+
return self.set_levels([ i._to_safe_for_reshape() for i in self.levels ])
4543+
45324544
def to_hierarchical(self, n_repeat, n_shuffle=1):
45334545
"""
45344546
Return a MultiIndex reshaped to conform to the

pandas/core/internals.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -3427,6 +3427,9 @@ def insert(self, loc, item, value, allow_duplicates=False):
34273427
if not isinstance(loc, int):
34283428
raise TypeError("loc must be int")
34293429

3430+
# insert to the axis; this could possibly raise a TypeError
3431+
new_axis = self.items.insert(loc, item)
3432+
34303433
block = make_block(values=value,
34313434
ndim=self.ndim,
34323435
placement=slice(loc, loc+1))
@@ -3449,8 +3452,7 @@ def insert(self, loc, item, value, allow_duplicates=False):
34493452
self._blklocs = np.insert(self._blklocs, loc, 0)
34503453
self._blknos = np.insert(self._blknos, loc, len(self.blocks))
34513454

3452-
self.axes[0] = self.items.insert(loc, item)
3453-
3455+
self.axes[0] = new_axis
34543456
self.blocks += (block,)
34553457
self._shape = None
34563458

pandas/tools/pivot.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,13 @@ def _add_margins(table, data, values, rows, cols, aggfunc):
189189
margin_dummy = DataFrame(row_margin, columns=[key]).T
190190

191191
row_names = result.index.names
192-
result = result.append(margin_dummy)
192+
try:
193+
result = result.append(margin_dummy)
194+
except TypeError:
195+
196+
# we cannot reshape, so coerce the axis
197+
result.index = result.index._to_safe_for_reshape()
198+
result = result.append(margin_dummy)
193199
result.index.names = row_names
194200

195201
return result
@@ -218,6 +224,7 @@ def _compute_grand_margin(data, values, aggfunc):
218224

219225

220226
def _generate_marginal_results(table, data, values, rows, cols, aggfunc, grand_margin):
227+
221228
if len(cols) > 0:
222229
# need to "interleave" the margins
223230
table_pieces = []
@@ -235,7 +242,13 @@ def _all_key(key):
235242

236243
# we are going to mutate this, so need to copy!
237244
piece = piece.copy()
238-
piece[all_key] = margin[key]
245+
try:
246+
piece[all_key] = margin[key]
247+
except TypeError:
248+
249+
# we cannot reshape, so coerce the axis
250+
piece.set_axis(cat_axis, piece._get_axis(cat_axis)._to_safe_for_reshape())
251+
piece[all_key] = margin[key]
239252

240253
table_pieces.append(piece)
241254
margin_keys.append(all_key)

pandas/tools/tests/test_pivot.py

+20
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,26 @@ def test_crosstab_dropna(self):
719719
('two', 'dull'), ('two', 'shiny')])
720720
assert_equal(res.columns.values, m.values)
721721

722+
def test_categorical_margins(self):
723+
# GH 10989
724+
df = pd.DataFrame({'x': np.arange(8),
725+
'y': np.arange(8) // 4,
726+
'z': np.arange(8) % 2})
727+
728+
expected = pd.DataFrame([[1.0, 2.0, 1.5],[5, 6, 5.5],[3, 4, 3.5]])
729+
expected.index = Index([0,1,'All'],name='y')
730+
expected.columns = Index([0,1,'All'],name='z')
731+
732+
data = df.copy()
733+
table = data.pivot_table('x', 'y', 'z', margins=True)
734+
tm.assert_frame_equal(table, expected)
735+
736+
data = df.copy()
737+
data.y = data.y.astype('category')
738+
data.z = data.z.astype('category')
739+
table = data.pivot_table('x', 'y', 'z', margins=True)
740+
tm.assert_frame_equal(table, expected)
741+
722742
if __name__ == '__main__':
723743
import nose
724744
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

0 commit comments

Comments
 (0)