Skip to content

Commit b56cea2

Browse files
sinhrksjreback
authored andcommitted
BUG/TST: TimeGrouper has erroneous groups if key length is too short
closes #8542 Author: sinhrks <[email protected]> Closes #13028 from sinhrks/tgrouper_groups and squashes the following commits: 93b41d5 [sinhrks] BUG: TimeGrouper has erroneous groups if key length is too short
1 parent a959bd5 commit b56cea2

File tree

1 file changed

+51
-0
lines changed

1 file changed

+51
-0
lines changed

pandas/tests/test_groupby.py

+51
Original file line numberDiff line numberDiff line change
@@ -4507,6 +4507,57 @@ def test_groupby_with_empty(self):
45074507
grouped = series.groupby(grouper)
45084508
assert next(iter(grouped), None) is None
45094509

4510+
def test_aaa_groupby_with_small_elem(self):
4511+
# GH 8542
4512+
# length=2
4513+
df = pd.DataFrame({'event': ['start', 'start'],
4514+
'change': [1234, 5678]},
4515+
index=pd.DatetimeIndex(['2014-09-10', '2013-10-10']))
4516+
grouped = df.groupby([pd.TimeGrouper(freq='M'), 'event'])
4517+
self.assertEqual(len(grouped.groups), 2)
4518+
self.assertEqual(grouped.ngroups, 2)
4519+
self.assertIn((pd.Timestamp('2014-09-30'), 'start'), grouped.groups)
4520+
self.assertIn((pd.Timestamp('2013-10-31'), 'start'), grouped.groups)
4521+
4522+
res = grouped.get_group((pd.Timestamp('2014-09-30'), 'start'))
4523+
tm.assert_frame_equal(res, df.iloc[[0], :])
4524+
res = grouped.get_group((pd.Timestamp('2013-10-31'), 'start'))
4525+
tm.assert_frame_equal(res, df.iloc[[1], :])
4526+
4527+
df = pd.DataFrame({'event': ['start', 'start', 'start'],
4528+
'change': [1234, 5678, 9123]},
4529+
index=pd.DatetimeIndex(['2014-09-10', '2013-10-10',
4530+
'2014-09-15']))
4531+
grouped = df.groupby([pd.TimeGrouper(freq='M'), 'event'])
4532+
self.assertEqual(len(grouped.groups), 2)
4533+
self.assertEqual(grouped.ngroups, 2)
4534+
self.assertIn((pd.Timestamp('2014-09-30'), 'start'), grouped.groups)
4535+
self.assertIn((pd.Timestamp('2013-10-31'), 'start'), grouped.groups)
4536+
4537+
res = grouped.get_group((pd.Timestamp('2014-09-30'), 'start'))
4538+
tm.assert_frame_equal(res, df.iloc[[0, 2], :])
4539+
res = grouped.get_group((pd.Timestamp('2013-10-31'), 'start'))
4540+
tm.assert_frame_equal(res, df.iloc[[1], :])
4541+
4542+
# length=3
4543+
df = pd.DataFrame({'event': ['start', 'start', 'start'],
4544+
'change': [1234, 5678, 9123]},
4545+
index=pd.DatetimeIndex(['2014-09-10', '2013-10-10',
4546+
'2014-08-05']))
4547+
grouped = df.groupby([pd.TimeGrouper(freq='M'), 'event'])
4548+
self.assertEqual(len(grouped.groups), 3)
4549+
self.assertEqual(grouped.ngroups, 3)
4550+
self.assertIn((pd.Timestamp('2014-09-30'), 'start'), grouped.groups)
4551+
self.assertIn((pd.Timestamp('2013-10-31'), 'start'), grouped.groups)
4552+
self.assertIn((pd.Timestamp('2014-08-31'), 'start'), grouped.groups)
4553+
4554+
res = grouped.get_group((pd.Timestamp('2014-09-30'), 'start'))
4555+
tm.assert_frame_equal(res, df.iloc[[0], :])
4556+
res = grouped.get_group((pd.Timestamp('2013-10-31'), 'start'))
4557+
tm.assert_frame_equal(res, df.iloc[[1], :])
4558+
res = grouped.get_group((pd.Timestamp('2014-08-31'), 'start'))
4559+
tm.assert_frame_equal(res, df.iloc[[2], :])
4560+
45104561
def test_groupby_with_timezone_selection(self):
45114562
# GH 11616
45124563
# Test that column selection returns output in correct timezone.

0 commit comments

Comments
 (0)