Skip to content

Commit ea9a5a8

Browse files
sinhrksjreback
authored andcommitted
BUG: GroupBy with TimeGrouper sorts unstably
closes #7453 Author: sinhrks <[email protected]> Closes #12840 from sinhrks/tgrouper_first and squashes the following commits: 567e823 [sinhrks] BUG: GroupBy with TimeGrouper sorts unstably
1 parent f813425 commit ea9a5a8

File tree

3 files changed

+30
-4
lines changed

3 files changed

+30
-4
lines changed

doc/source/whatsnew/v0.18.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ Bug Fixes
207207

208208

209209
- Bug in equality testing with a ``Categorical`` in a ``DataFrame`` (:issue:`12564`)
210+
- Bug in ``GroupBy.first()``, ``.last()`` returns incorrect row when ``TimeGrouper`` is used (:issue:`7453`)
210211

211212

212213

pandas/core/groupby.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,8 @@ def _set_grouper(self, obj, sort=False):
273273

274274
# possibly sort
275275
if (self.sort or sort) and not ax.is_monotonic:
276-
indexer = self.indexer = ax.argsort(kind='quicksort')
276+
# use stable sort to support first, last, nth
277+
indexer = self.indexer = ax.argsort(kind='mergesort')
277278
ax = ax.take(indexer)
278279
obj = obj.take(indexer, axis=self.axis,
279280
convert=False, is_copy=False)

pandas/tseries/tests/test_resample.py

+27-3
Original file line numberDiff line numberDiff line change
@@ -2365,6 +2365,28 @@ def test_fails_on_no_datetime_index(self):
23652365
"got an instance of 'PeriodIndex'"):
23662366
df.groupby(TimeGrouper('D'))
23672367

2368+
def test_aaa_group_order(self):
2369+
# GH 12840
2370+
# check TimeGrouper perform stable sorts
2371+
n = 20
2372+
data = np.random.randn(n, 4)
2373+
df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
2374+
df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2),
2375+
datetime(2013, 1, 3), datetime(2013, 1, 4),
2376+
datetime(2013, 1, 5)] * 4
2377+
grouped = df.groupby(TimeGrouper(key='key', freq='D'))
2378+
2379+
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)),
2380+
df[::5])
2381+
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)),
2382+
df[1::5])
2383+
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)),
2384+
df[2::5])
2385+
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)),
2386+
df[3::5])
2387+
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)),
2388+
df[4::5])
2389+
23682390
def test_aggregate_normal(self):
23692391
# check TimeGrouper's aggregation is identical as normal groupby
23702392

@@ -2402,23 +2424,25 @@ def test_aggregate_normal(self):
24022424
periods=5, name='key')
24032425
dt_result = getattr(dt_grouped, func)()
24042426
assert_series_equal(expected, dt_result)
2405-
"""
2427+
2428+
# GH 7453
24062429
for func in ['first', 'last']:
24072430
expected = getattr(normal_grouped, func)()
24082431
expected.index = date_range(start='2013-01-01', freq='D',
24092432
periods=5, name='key')
24102433
dt_result = getattr(dt_grouped, func)()
24112434
assert_frame_equal(expected, dt_result)
24122435

2436+
# if TimeGrouper is used included, 'nth' doesn't work yet
2437+
2438+
"""
24132439
for func in ['nth']:
24142440
expected = getattr(normal_grouped, func)(3)
24152441
expected.index = date_range(start='2013-01-01',
24162442
freq='D', periods=5, name='key')
24172443
dt_result = getattr(dt_grouped, func)(3)
24182444
assert_frame_equal(expected, dt_result)
24192445
"""
2420-
# if TimeGrouper is used included, 'first','last' and 'nth' doesn't
2421-
# work yet
24222446

24232447
def test_aggregate_with_nat(self):
24242448
# check TimeGrouper's aggregation is identical as normal groupby

0 commit comments

Comments
 (0)