-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Bug: Grouping by index and column fails on DataFrame with single index (GH14327) #14333
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
a421a52
ec9340f
848c9bb
0f95bca
75a0390
6b37bd4
897ec1c
05e6557
33eb725
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -524,6 +524,39 @@ def _format_native_types(self, na_rep='nan', **kwargs): | |
|
||
return mi.values | ||
|
||
def _get_grouper_for_level(self, grouper, level): | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a docstring here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep, same as above for styling. |
||
inds = self.labels[level] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. call this indexer |
||
level_index = self.levels[level] | ||
|
||
# XXX complete hack | ||
|
||
if grouper is not None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you want to put a comment here explain what is going on would be great (for future readers). Further you can just return if |
||
level_values = self.levels[level].take(inds) | ||
grouper = level_values.map(grouper) | ||
labels = None | ||
level_index = None | ||
else: | ||
# all levels may not be observed | ||
labels, uniques = algos.factorize(inds, sort=True) | ||
|
||
if len(uniques) > 0 and uniques[0] == -1: | ||
# handle NAs | ||
mask = inds != -1 | ||
ok_labels, uniques = algos.factorize(inds[mask], | ||
sort=True) | ||
|
||
labels = np.empty(len(inds), dtype=inds.dtype) | ||
labels[mask] = ok_labels | ||
labels[~mask] = -1 | ||
|
||
if len(uniques) < len(level_index): | ||
level_index = level_index.take(uniques) | ||
|
||
grouper = level_index.take(labels) | ||
|
||
return grouper, labels, level_index | ||
|
||
@property | ||
def _constructor(self): | ||
return MultiIndex.from_tuples | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -458,6 +458,28 @@ def test_grouper_creation_bug(self): | |
expected = s.groupby(level='one').sum() | ||
assert_series_equal(result, expected) | ||
|
||
def test_grouper_column_and_index(self): | ||
# GH 14327 | ||
|
||
# Grouping a multi-index frame by a column and an index level should | ||
# be equivalent to resetting the index and grouping by two columns | ||
idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3), | ||
('b', 1), ('b', 2), ('b', 3)]) | ||
idx.names = ['outer', 'inner'] | ||
df_multi = pd.DataFrame({"A": np.arange(6), | ||
'B': ['one', 'one', 'two', | ||
'two', 'one', 'one']}, | ||
index=idx) | ||
result = df_multi.groupby(['B', pd.Grouper(level='inner')]).mean() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you try with these reversed as well, e.g. |
||
expected = df_multi.reset_index().groupby(['B', 'inner']).mean() | ||
assert_frame_equal(result, expected) | ||
|
||
# Grouping a single-index frame by a column and the index should | ||
# be equivalent to resetting the index and grouping by two columns | ||
df_single = df_multi.reset_index('outer') | ||
result = df_single.groupby(['B', pd.Grouper(level='inner')]).mean() | ||
assert_frame_equal(result, expected) | ||
|
||
def test_grouper_getting_correct_binner(self): | ||
|
||
# GH 10063 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you add a
Parameters
section in the doc-string. and move the in-line comment to theReturns
part.