diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt index 5c03408cbf20f..0046e2553ad6b 100644 --- a/doc/source/whatsnew/v0.19.1.txt +++ b/doc/source/whatsnew/v0.19.1.txt @@ -46,3 +46,4 @@ Bug Fixes - Bug in ``pd.concat`` where names of the ``keys`` were not propagated to the resulting ``MultiIndex`` (:issue:`14252`) - Bug in ``MultiIndex.set_levels`` where illegal level values were still set after raising an error (:issue:`13754`) - Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`) +- Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 3c376e3188eac..5223c0ac270f3 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2201,36 +2201,12 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None, raise AssertionError('Level %s not in index' % str(level)) level = index.names.index(level) - inds = index.labels[level] - level_index = index.levels[level] - if self.name is None: self.name = index.names[level] - # XXX complete hack - - if grouper is not None: - level_values = index.levels[level].take(inds) - self.grouper = level_values.map(self.grouper) - else: - # all levels may not be observed - labels, uniques = algos.factorize(inds, sort=True) - - if len(uniques) > 0 and uniques[0] == -1: - # handle NAs - mask = inds != -1 - ok_labels, uniques = algos.factorize(inds[mask], sort=True) - - labels = np.empty(len(inds), dtype=inds.dtype) - labels[mask] = ok_labels - labels[~mask] = -1 - - if len(uniques) < len(level_index): - level_index = level_index.take(uniques) + self.grouper, self._labels, self._group_index = \ + index._get_grouper_for_level(self.grouper, level) - self._labels = labels - self._group_index = level_index - self.grouper = level_index.take(labels) else: if isinstance(self.grouper, (list, tuple)): self.grouper = com._asarray_tuplesafe(self.grouper) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 5082fc84982c6..1c24a0db34b2b 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -432,6 +432,36 @@ def _update_inplace(self, result, **kwargs): # guard when called from IndexOpsMixin raise TypeError("Index can't be updated inplace") + _index_shared_docs['_get_grouper_for_level'] = """ + Get index grouper corresponding to an index level + + Parameters + ---------- + mapper: Group mapping function or None + Function mapping index values to groups + level : int or None + Index level + + Returns + ------- + grouper : Index + Index of values to group on + labels : ndarray of int or None + Array of locations in level_index + uniques : Index or None + Index of unique values for level + """ + + @Appender(_index_shared_docs['_get_grouper_for_level']) + def _get_grouper_for_level(self, mapper, level=None): + assert level is None or level == 0 + if mapper is None: + grouper = self + else: + grouper = self.map(mapper) + + return grouper, None, None + def is_(self, other): """ More flexible, faster check like ``is`` but that works through views diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 0c465da24a17e..a9f452db69659 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -539,6 +539,37 @@ def _format_native_types(self, na_rep='nan', **kwargs): return mi.values + @Appender(_index_shared_docs['_get_grouper_for_level']) + def _get_grouper_for_level(self, mapper, level): + indexer = self.labels[level] + level_index = self.levels[level] + + if mapper is not None: + # Handle group mapping function and return + level_values = self.levels[level].take(indexer) + grouper = level_values.map(mapper) + return grouper, None, None + + labels, uniques = algos.factorize(indexer, sort=True) + + if len(uniques) > 0 and uniques[0] == -1: + # Handle NAs + mask = indexer != -1 + ok_labels, uniques = algos.factorize(indexer[mask], + sort=True) + + labels = np.empty(len(indexer), dtype=indexer.dtype) + labels[mask] = ok_labels + labels[~mask] = -1 + + if len(uniques) < len(level_index): + # Remove unobserved levels from level_index + level_index = level_index.take(uniques) + + grouper = level_index.take(labels) + + return grouper, labels, level_index + @property def _constructor(self): return MultiIndex.from_tuples diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 02917ab18c29f..f3791ee1d5c91 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -458,6 +458,39 @@ def test_grouper_creation_bug(self): expected = s.groupby(level='one').sum() assert_series_equal(result, expected) + def test_grouper_column_and_index(self): + # GH 14327 + + # Grouping a multi-index frame by a column and an index level should + # be equivalent to resetting the index and grouping by two columns + idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3), + ('b', 1), ('b', 2), ('b', 3)]) + idx.names = ['outer', 'inner'] + df_multi = pd.DataFrame({"A": np.arange(6), + 'B': ['one', 'one', 'two', + 'two', 'one', 'one']}, + index=idx) + result = df_multi.groupby(['B', pd.Grouper(level='inner')]).mean() + expected = df_multi.reset_index().groupby(['B', 'inner']).mean() + assert_frame_equal(result, expected) + + # Test the reverse grouping order + result = df_multi.groupby([pd.Grouper(level='inner'), 'B']).mean() + expected = df_multi.reset_index().groupby(['inner', 'B']).mean() + assert_frame_equal(result, expected) + + # Grouping a single-index frame by a column and the index should + # be equivalent to resetting the index and grouping by two columns + df_single = df_multi.reset_index('outer') + result = df_single.groupby(['B', pd.Grouper(level='inner')]).mean() + expected = df_single.reset_index().groupby(['B', 'inner']).mean() + assert_frame_equal(result, expected) + + # Test the reverse grouping order + result = df_single.groupby([pd.Grouper(level='inner'), 'B']).mean() + expected = df_single.reset_index().groupby(['inner', 'B']).mean() + assert_frame_equal(result, expected) + def test_grouper_getting_correct_binner(self): # GH 10063