Skip to content

Commit 7b23e65

Browse files
committed
TST: Fix existing tests for groupby
1 parent c3a1701 commit 7b23e65

File tree

3 files changed

+47
-27
lines changed

3 files changed

+47
-27
lines changed

pandas/tests/groupby/test_groupby.py

+25-19
Original file line numberDiff line numberDiff line change
@@ -1791,18 +1791,19 @@ def aggfun(ser):
17911791
agged2 = df.groupby(keys).aggregate(aggfun)
17921792
assert len(agged2.columns) + 1 == len(df.columns)
17931793

1794-
def test_groupby_level(self):
1794+
@pytest.mark.parametrize('sort', [True, False])
1795+
def test_groupby_level(self, sort):
17951796
frame = self.mframe
17961797
deleveled = frame.reset_index()
17971798

1798-
result0 = frame.groupby(level=0).sum()
1799-
result1 = frame.groupby(level=1).sum()
1799+
result0 = frame.groupby(level=0, sort=sort).sum()
1800+
result1 = frame.groupby(level=1, sort=sort).sum()
18001801

1801-
expected0 = frame.groupby(deleveled['first'].values).sum()
1802-
expected1 = frame.groupby(deleveled['second'].values).sum()
1802+
expected0 = frame.groupby(deleveled['first'].values, sort=sort).sum()
1803+
expected1 = frame.groupby(deleveled['second'].values, sort=sort).sum()
18031804

1804-
expected0 = expected0.reindex(frame.index.levels[0])
1805-
expected1 = expected1.reindex(frame.index.levels[1])
1805+
expected0.index.name = 'first'
1806+
expected1.index.name = 'second'
18061807

18071808
assert result0.index.name == 'first'
18081809
assert result1.index.name == 'second'
@@ -1813,15 +1814,15 @@ def test_groupby_level(self):
18131814
assert result1.index.name == frame.index.names[1]
18141815

18151816
# groupby level name
1816-
result0 = frame.groupby(level='first').sum()
1817-
result1 = frame.groupby(level='second').sum()
1817+
result0 = frame.groupby(level='first', sort=sort).sum()
1818+
result1 = frame.groupby(level='second', sort=sort).sum()
18181819
assert_frame_equal(result0, expected0)
18191820
assert_frame_equal(result1, expected1)
18201821

18211822
# axis=1
18221823

1823-
result0 = frame.T.groupby(level=0, axis=1).sum()
1824-
result1 = frame.T.groupby(level=1, axis=1).sum()
1824+
result0 = frame.T.groupby(level=0, axis=1, sort=sort).sum()
1825+
result1 = frame.T.groupby(level=1, axis=1, sort=sort).sum()
18251826
assert_frame_equal(result0, expected0.T)
18261827
assert_frame_equal(result1, expected1.T)
18271828

@@ -1835,15 +1836,16 @@ def test_groupby_level_index_names(self):
18351836
df.groupby(level='exp')
18361837
pytest.raises(ValueError, df.groupby, level='foo')
18371838

1838-
def test_groupby_level_with_nas(self):
1839+
@pytest.mark.parametrize('sort', [True, False])
1840+
def test_groupby_level_with_nas(self, sort):
18391841
index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]],
18401842
labels=[[1, 1, 1, 1, 0, 0, 0, 0], [0, 1, 2, 3, 0, 1,
18411843
2, 3]])
18421844

18431845
# factorizing doesn't confuse things
18441846
s = Series(np.arange(8.), index=index)
1845-
result = s.groupby(level=0).sum()
1846-
expected = Series([22., 6.], index=[1, 0])
1847+
result = s.groupby(level=0, sort=sort).sum()
1848+
expected = Series([6., 22.], index=[0, 1])
18471849
assert_series_equal(result, expected)
18481850

18491851
index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]],
@@ -1852,8 +1854,8 @@ def test_groupby_level_with_nas(self):
18521854

18531855
# factorizing doesn't confuse things
18541856
s = Series(np.arange(8.), index=index)
1855-
result = s.groupby(level=0).sum()
1856-
expected = Series([18., 6.], index=[1, 0])
1857+
result = s.groupby(level=0, sort=sort).sum()
1858+
expected = Series([6., 18.], index=[0.0, 1.0])
18571859
assert_series_equal(result, expected)
18581860

18591861
def test_groupby_level_apply(self):
@@ -1936,9 +1938,13 @@ def test_groupby_complex(self):
19361938
result = a.sum(level=0)
19371939
assert_series_equal(result, expected)
19381940

1939-
def test_level_preserve_order(self):
1940-
grouped = self.mframe.groupby(level=0)
1941-
exp_labels = np.array([0, 0, 0, 1, 1, 2, 2, 3, 3, 3], np.intp)
1941+
@pytest.mark.parametrize('sort,labels', [
1942+
[True, [2, 2, 2, 0, 0, 1, 1, 3, 3, 3]],
1943+
[False, [0, 0, 0, 1, 1, 2, 2, 3, 3, 3]]
1944+
])
1945+
def test_level_preserve_order(self, sort, labels):
1946+
grouped = self.mframe.groupby(level=0, sort=sort)
1947+
exp_labels = np.array(labels, np.intp)
19421948
assert_almost_equal(grouped.grouper.labels[0], exp_labels)
19431949

19441950
def test_grouping_labels(self):

pandas/tests/groupby/test_whitelist.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -174,11 +174,14 @@ def raw_frame():
174174

175175

176176
@pytest.mark.parametrize(
177-
"op, level, axis, skipna",
177+
"op, level, axis, skipna, sort",
178178
product(AGG_FUNCTIONS,
179179
lrange(2), lrange(2),
180+
[True, False],
180181
[True, False]))
181-
def test_regression_whitelist_methods(raw_frame, op, level, axis, skipna):
182+
def test_regression_whitelist_methods(
183+
raw_frame, op, level,
184+
axis, skipna, sort):
182185
# GH6944
183186
# explicity test the whitelest methods
184187

@@ -188,15 +191,19 @@ def test_regression_whitelist_methods(raw_frame, op, level, axis, skipna):
188191
frame = raw_frame.T
189192

190193
if op in AGG_FUNCTIONS_WITH_SKIPNA:
191-
grouped = frame.groupby(level=level, axis=axis)
194+
grouped = frame.groupby(level=level, axis=axis, sort=sort)
192195
result = getattr(grouped, op)(skipna=skipna)
193196
expected = getattr(frame, op)(level=level, axis=axis,
194197
skipna=skipna)
198+
if sort:
199+
expected = expected.sort_index(axis=axis, level=level)
195200
tm.assert_frame_equal(result, expected)
196201
else:
197-
grouped = frame.groupby(level=level, axis=axis)
202+
grouped = frame.groupby(level=level, axis=axis, sort=sort)
198203
result = getattr(grouped, op)()
199204
expected = getattr(frame, op)(level=level, axis=axis)
205+
if sort:
206+
expected = expected.sort_index(axis=axis, level=level)
200207
tm.assert_frame_equal(result, expected)
201208

202209

pandas/tests/test_multilevel.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -1392,17 +1392,21 @@ def test_count(self):
13921392
AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew',
13931393
'mad', 'std', 'var', 'sem']
13941394

1395-
def test_series_group_min_max(self):
1395+
@pytest.mark.parametrize('sort', [True, False])
1396+
def test_series_group_min_max(self, sort):
13961397
for op, level, skipna in cart_product(self.AGG_FUNCTIONS, lrange(2),
13971398
[False, True]):
1398-
grouped = self.series.groupby(level=level)
1399+
grouped = self.series.groupby(level=level, sort=sort)
13991400
aggf = lambda x: getattr(x, op)(skipna=skipna)
14001401
# skipna=True
14011402
leftside = grouped.agg(aggf)
14021403
rightside = getattr(self.series, op)(level=level, skipna=skipna)
1404+
if sort:
1405+
rightside = rightside.sort_index(level=level)
14031406
tm.assert_series_equal(leftside, rightside)
14041407

1405-
def test_frame_group_ops(self):
1408+
@pytest.mark.parametrize('sort', [True, False])
1409+
def test_frame_group_ops(self, sort):
14061410
self.frame.iloc[1, [1, 2]] = np.nan
14071411
self.frame.iloc[7, [0, 1]] = np.nan
14081412

@@ -1415,7 +1419,7 @@ def test_frame_group_ops(self):
14151419
else:
14161420
frame = self.frame.T
14171421

1418-
grouped = frame.groupby(level=level, axis=axis)
1422+
grouped = frame.groupby(level=level, axis=axis, sort=sort)
14191423

14201424
pieces = []
14211425

@@ -1426,6 +1430,9 @@ def aggf(x):
14261430
leftside = grouped.agg(aggf)
14271431
rightside = getattr(frame, op)(level=level, axis=axis,
14281432
skipna=skipna)
1433+
if sort:
1434+
rightside = rightside.sort_index(level=level, axis=axis)
1435+
frame = frame.sort_index(level=level, axis=axis)
14291436

14301437
# for good measure, groupby detail
14311438
level_index = frame._get_axis(axis).levels[level]

0 commit comments

Comments
 (0)