Skip to content

Commit 3e31383

Browse files
P-Tillmannjowens
P-Tillmann
authored andcommitted
Bug: groupby multiindex levels equals rows (pandas-dev#16859)
closes pandas-dev#16843
1 parent a9574b0 commit 3e31383

File tree

3 files changed

+19
-5
lines changed

3 files changed

+19
-5
lines changed

doc/source/whatsnew/v0.21.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ Groupby/Resample/Rolling
384384
- Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`)
385385
- Bug in ``Series.resample(...).apply()`` where an empty ``Series`` modified the source index and did not return the name of a ``Series`` (:issue:`14313`)
386386
- Bug in ``.rolling(...).apply(...)`` with a ``DataFrame`` with a ``DatetimeIndex``, a ``window`` of a timedelta-convertible and ``min_periods >= 1` (:issue:`15305`)
387-
387+
- Bug in ``DataFrame.groupby`` where index and column keys were not recognized correctly when the number of keys equaled the number of elements on the groupby axis (:issue:`16859`)
388388

389389
Sparse
390390
^^^^^^

pandas/core/groupby.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -2629,13 +2629,14 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
26292629

26302630
try:
26312631
if isinstance(obj, DataFrame):
2632-
all_in_columns = all(g in obj.columns for g in keys)
2632+
all_in_columns_index = all(g in obj.columns or g in obj.index.names
2633+
for g in keys)
26332634
else:
2634-
all_in_columns = False
2635+
all_in_columns_index = False
26352636
except Exception:
2636-
all_in_columns = False
2637+
all_in_columns_index = False
26372638

2638-
if not any_callable and not all_in_columns and \
2639+
if not any_callable and not all_in_columns_index and \
26392640
not any_arraylike and not any_groupers and \
26402641
match_axis_length and level is None:
26412642
keys = [com._asarray_tuplesafe(keys)]

pandas/tests/groupby/test_groupby.py

+13
Original file line numberDiff line numberDiff line change
@@ -3891,6 +3891,19 @@ def predictions(tool):
38913891
result = df2.groupby('Key').apply(predictions).p1
38923892
tm.assert_series_equal(expected, result)
38933893

3894+
def test_gb_key_len_equal_axis_len(self):
3895+
# GH16843
3896+
# test ensures that index and column keys are recognized correctly
3897+
# when number of keys equals axis length of groupby
3898+
df = pd.DataFrame([['foo', 'bar', 'B', 1],
3899+
['foo', 'bar', 'B', 2],
3900+
['foo', 'baz', 'C', 3]],
3901+
columns=['first', 'second', 'third', 'one'])
3902+
df = df.set_index(['first', 'second'])
3903+
df = df.groupby(['first', 'second', 'third']).size()
3904+
assert df.loc[('foo', 'bar', 'B')] == 2
3905+
assert df.loc[('foo', 'baz', 'C')] == 1
3906+
38943907

38953908
def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
38963909
tups = lmap(tuple, df[keys].values)

0 commit comments

Comments
 (0)