Skip to content

Commit 9b75df4

Browse files
committed
BUG: Groupby.nth includes group key inconsistently pandas-dev#12839
Added tests
1 parent af4ed0f commit 9b75df4

File tree

3 files changed

+44
-1
lines changed

3 files changed

+44
-1
lines changed

doc/source/whatsnew/v0.18.2.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ Bug Fixes
313313

314314

315315
- Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`)
316-
316+
- Bug in ``groupby(..).nth()`` where the group key is included inconsistently (:issue:`12839`)
317317

318318

319319

pandas/core/groupby.py

+8
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,11 @@ def _selected_obj(self):
457457
else:
458458
return self.obj[self._selection]
459459

460+
def _reset_group_selection(self):
461+
if self._group_selection is not None:
462+
self._group_selection = None
463+
self._reset_cache('_selected_obj')
464+
460465
def _set_selection_from_grouper(self):
461466
""" we may need create a selection if we have non-level groupers """
462467
grp = self.grouper
@@ -468,6 +473,7 @@ def _set_selection_from_grouper(self):
468473

469474
if len(groupers):
470475
self._group_selection = ax.difference(Index(groupers)).tolist()
476+
self._reset_cache('_selected_obj')
471477

472478
def _set_result_index_ordered(self, result):
473479
# set the result index on the passed values object and
@@ -1402,6 +1408,7 @@ def head(self, n=5):
14021408
0 1 2
14031409
2 5 6
14041410
"""
1411+
self._reset_group_selection()
14051412
mask = self._cumcount_array() < n
14061413
return self._selected_obj[mask]
14071414

@@ -1428,6 +1435,7 @@ def tail(self, n=5):
14281435
0 a 1
14291436
2 b 1
14301437
"""
1438+
self._reset_group_selection()
14311439
mask = self._cumcount_array(ascending=False) < n
14321440
return self._selected_obj[mask]
14331441

pandas/tests/test_groupby.py

+35
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,41 @@ def test_nth_multi_index_as_expected(self):
354354
names=['A', 'B']))
355355
assert_frame_equal(result, expected)
356356

357+
def test_group_selection_cache(self):
358+
# GH 12839 nth, head, and tail should return same result consistently
359+
df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
360+
expected = df.iloc[[0, 2]].set_index('A')
361+
362+
g = df.groupby('A')
363+
g.head()
364+
result = g.nth(0)
365+
assert_frame_equal(result, expected)
366+
367+
g = df.groupby('A')
368+
g.tail()
369+
result = g.nth(0)
370+
assert_frame_equal(result, expected)
371+
372+
g = df.groupby('A')
373+
g.nth(0)
374+
result = g.head(n=2)
375+
assert_frame_equal(result, df)
376+
377+
g = df.groupby('A')
378+
g.nth(0)
379+
result = g.tail(n=2)
380+
assert_frame_equal(result, df)
381+
382+
g = df.groupby('A')
383+
g.head()
384+
result = g.head(n=2)
385+
assert_frame_equal(result, df)
386+
387+
g = df.groupby('A')
388+
g.tail()
389+
result = g.tail(n=2)
390+
assert_frame_equal(result, df)
391+
357392
def test_grouper_index_types(self):
358393
# related GH5375
359394
# groupby misbehaving when using a Floatlike index

0 commit comments

Comments
 (0)