diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index ea2b85d983ade..bdfbf08b37e57 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -87,7 +87,7 @@ Bug Fixes - Bug in list-like indexing with a mixed-integer Index (:issue:`11320`) - +- Bug in ``pivot_table`` with ``margins=True`` when indexes are of ``Categorical`` dtype (:issue:`10993`) - Bug in ``DataFrame.plot`` cannot use hex strings colors (:issue:`10299`) diff --git a/pandas/core/index.py b/pandas/core/index.py index ede848c1103ab..7049ac33feac6 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -627,6 +627,10 @@ def astype(self, dtype): return Index(self.values.astype(dtype), name=self.name, dtype=dtype) + def _to_safe_for_reshape(self): + """ convert to object if we are a categorical """ + return self + def to_datetime(self, dayfirst=False): """ For an Index containing strings or datetime.datetime objects, attempt @@ -3190,6 +3194,10 @@ def duplicated(self, keep='first'): from pandas.hashtable import duplicated_int64 return duplicated_int64(self.codes.astype('i8'), keep) + def _to_safe_for_reshape(self): + """ convert to object if we are a categorical """ + return self.astype('object') + def get_loc(self, key, method=None): """ Get integer location for requested label @@ -4529,6 +4537,10 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, else: return result_levels + def _to_safe_for_reshape(self): + """ convert to object if we are a categorical """ + return self.set_levels([ i._to_safe_for_reshape() for i in self.levels ]) + def to_hierarchical(self, n_repeat, n_shuffle=1): """ Return a MultiIndex reshaped to conform to the diff --git a/pandas/core/internals.py b/pandas/core/internals.py index d98121520b8b0..f1d82ec1f3b2e 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -3427,6 +3427,9 @@ def insert(self, loc, item, value, allow_duplicates=False): if not isinstance(loc, int): raise TypeError("loc must be int") + # insert to the axis; this could possibly raise a TypeError + new_axis = self.items.insert(loc, item) + block = make_block(values=value, ndim=self.ndim, placement=slice(loc, loc+1)) @@ -3449,8 +3452,7 @@ def insert(self, loc, item, value, allow_duplicates=False): self._blklocs = np.insert(self._blklocs, loc, 0) self._blknos = np.insert(self._blknos, loc, len(self.blocks)) - self.axes[0] = self.items.insert(loc, item) - + self.axes[0] = new_axis self.blocks += (block,) self._shape = None diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index 89fe9463282b6..de7a5f5a73f3d 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -189,7 +189,13 @@ def _add_margins(table, data, values, rows, cols, aggfunc): margin_dummy = DataFrame(row_margin, columns=[key]).T row_names = result.index.names - result = result.append(margin_dummy) + try: + result = result.append(margin_dummy) + except TypeError: + + # we cannot reshape, so coerce the axis + result.index = result.index._to_safe_for_reshape() + result = result.append(margin_dummy) result.index.names = row_names return result @@ -218,6 +224,7 @@ def _compute_grand_margin(data, values, aggfunc): def _generate_marginal_results(table, data, values, rows, cols, aggfunc, grand_margin): + if len(cols) > 0: # need to "interleave" the margins table_pieces = [] @@ -235,7 +242,13 @@ def _all_key(key): # we are going to mutate this, so need to copy! piece = piece.copy() - piece[all_key] = margin[key] + try: + piece[all_key] = margin[key] + except TypeError: + + # we cannot reshape, so coerce the axis + piece.set_axis(cat_axis, piece._get_axis(cat_axis)._to_safe_for_reshape()) + piece[all_key] = margin[key] table_pieces.append(piece) margin_keys.append(all_key) diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 50ae574c03067..f0052774d66a2 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -719,6 +719,26 @@ def test_crosstab_dropna(self): ('two', 'dull'), ('two', 'shiny')]) assert_equal(res.columns.values, m.values) + def test_categorical_margins(self): + # GH 10989 + df = pd.DataFrame({'x': np.arange(8), + 'y': np.arange(8) // 4, + 'z': np.arange(8) % 2}) + + expected = pd.DataFrame([[1.0, 2.0, 1.5],[5, 6, 5.5],[3, 4, 3.5]]) + expected.index = Index([0,1,'All'],name='y') + expected.columns = Index([0,1,'All'],name='z') + + data = df.copy() + table = data.pivot_table('x', 'y', 'z', margins=True) + tm.assert_frame_equal(table, expected) + + data = df.copy() + data.y = data.y.astype('category') + data.z = data.z.astype('category') + table = data.pivot_table('x', 'y', 'z', margins=True) + tm.assert_frame_equal(table, expected) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],