Skip to content

Commit 689ce83

Browse files
author
tp
committed
Fix tests for bug where df.agg(..., axis=1) gives wrong result
1 parent 42ad231 commit 689ce83

File tree

6 files changed

+236
-159
lines changed

6 files changed

+236
-159
lines changed

doc/source/whatsnew/v0.24.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ Offsets
119119
Numeric
120120
^^^^^^^
121121

122-
-
122+
- :meth:`~DataFrame.agg` now handles built-in methods like ``sum`` in the same manner when axis=1 as when axis=0 (:issue:`21224`)
123123
-
124124
-
125125

pandas/conftest.py

+8
Original file line numberDiff line numberDiff line change
@@ -170,3 +170,11 @@ def string_dtype(request):
170170
* 'U'
171171
"""
172172
return request.param
173+
174+
175+
@pytest.fixture(params=[0, 1], ids=lambda x: "axis {}".format(x))
176+
def axis(request):
177+
"""
178+
Fixture for returning the axis numbers of a dataframe.
179+
"""
180+
return request.param

pandas/core/frame.py

-1
Original file line numberDiff line numberDiff line change
@@ -5829,7 +5829,6 @@ def aggregate(self, func, axis=0, *args, **kwargs):
58295829
return self.apply(func, axis=axis, args=args, **kwargs)
58305830
return result
58315831

5832-
@Appender(NDFrame._aggregate.__doc__, indents=2)
58335832
def _aggregate(self, arg, axis=0, *args, **kwargs):
58345833
obj = self.T if axis == 1 else self
58355834
return super(DataFrame, obj)._aggregate(arg, *args, **kwargs)

pandas/core/indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1795,7 +1795,7 @@ def error():
17951795
error()
17961796
raise
17971797
except:
1798-
error()
1798+
raise
17991799

18001800
def _is_scalar_access(self, key):
18011801
# this is a shortcut accessor to both .loc and .iloc

pandas/tests/frame/test_apply.py

+124-89
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import operator
88
from datetime import datetime
9+
from itertools import chain
910

1011
import warnings
1112
import numpy as np
@@ -21,6 +22,37 @@
2122
from pandas.tests.frame.common import TestData
2223

2324

25+
def _get_cython_table_params(frame, func_names_and_expected):
26+
"""combine frame, functions from SelectionMixin._cython_table
27+
keys and expected result.
28+
29+
Parameters
30+
----------
31+
frame: DataFrame
32+
A symmetrical DataFrame
33+
func_names_and_expected: Sequence of two items
34+
The first item is a name of a NDFrame method ('sum', 'prod') etc.
35+
The second item is the expected return value
36+
37+
Returns
38+
-------
39+
Sequence of three items (DataFrame, function, expected result)
40+
"""
41+
table = pd.core.base.SelectionMixin._cython_table
42+
if compat.PY36:
43+
table = list(table.items())
44+
else: # dicts have random order in Python<3.6, which xdist doesn't like
45+
table = sorted(((key, value) for key, value in table.items()),
46+
key=lambda x: x[0].__class__.__name__)
47+
results = []
48+
for func_name, expected in func_names_and_expected:
49+
results.append((frame, func_name, expected))
50+
results += [
51+
(frame, func, expected) for func, name in table
52+
if name == func_name]
53+
return results
54+
55+
2456
class TestDataFrameApply(TestData):
2557

2658
def test_apply(self):
@@ -950,38 +982,47 @@ def test_agg_dict_nested_renaming_depr(self):
950982
df.agg({'A': {'foo': 'min'},
951983
'B': {'bar': 'max'}})
952984

953-
def test_agg_reduce(self):
985+
def test_agg_reduce(self, axis):
986+
other_axis = abs(axis - 1)
987+
name1, name2 = self.frame.axes[other_axis].unique()[:2]
988+
954989
# all reducers
955-
expected = zip_frames(self.frame.mean().to_frame(),
956-
self.frame.max().to_frame(),
957-
self.frame.sum().to_frame()).T
990+
expected = zip_frames(self.frame.mean(axis=axis).to_frame(),
991+
self.frame.max(axis=axis).to_frame(),
992+
self.frame.sum(axis=axis).to_frame()).T
958993
expected.index = ['mean', 'max', 'sum']
959-
result = self.frame.agg(['mean', 'max', 'sum'])
994+
result = self.frame.agg(['mean', 'max', 'sum'], axis=axis)
960995
assert_frame_equal(result, expected)
961996

962997
# dict input with scalars
963-
result = self.frame.agg({'A': 'mean', 'B': 'sum'})
964-
expected = Series([self.frame.A.mean(), self.frame.B.sum()],
965-
index=['A', 'B'])
998+
func = {name1: 'mean', name2: 'sum'}
999+
result = self.frame.agg(func, axis=axis)
1000+
expected = Series([self.frame.loc(other_axis)[name1].mean(),
1001+
self.frame.loc(other_axis)[name2].sum()],
1002+
index=[name1, name2])
9661003
assert_series_equal(result.reindex_like(expected), expected)
9671004

9681005
# dict input with lists
969-
result = self.frame.agg({'A': ['mean'], 'B': ['sum']})
970-
expected = DataFrame({'A': Series([self.frame.A.mean()],
971-
index=['mean']),
972-
'B': Series([self.frame.B.sum()],
973-
index=['sum'])})
1006+
func = {name1: ['mean'], name2: ['sum']}
1007+
result = self.frame.agg(func, axis=axis)
1008+
expected = DataFrame({
1009+
name1: Series([self.frame.loc(other_axis)[name1].mean()],
1010+
index=['mean']),
1011+
name2: Series([self.frame.loc(other_axis)[name2].sum()],
1012+
index=['sum'])})
9741013
assert_frame_equal(result.reindex_like(expected), expected)
9751014

9761015
# dict input with lists with multiple
977-
result = self.frame.agg({'A': ['mean', 'sum'],
978-
'B': ['sum', 'max']})
979-
expected = DataFrame({'A': Series([self.frame.A.mean(),
980-
self.frame.A.sum()],
981-
index=['mean', 'sum']),
982-
'B': Series([self.frame.B.sum(),
983-
self.frame.B.max()],
984-
index=['sum', 'max'])})
1016+
func = {name1: ['mean', 'sum'],
1017+
name2: ['sum', 'max']}
1018+
result = self.frame.agg(func, axis=axis)
1019+
expected = DataFrame({
1020+
name1: Series([self.frame.loc(other_axis)[name1].mean(),
1021+
self.frame.loc(other_axis)[name1].sum()],
1022+
index=['mean', 'sum']),
1023+
name2: Series([self.frame.loc(other_axis)[name2].sum(),
1024+
self.frame.loc(other_axis)[name2].max()],
1025+
index=['sum', 'max'])})
9851026
assert_frame_equal(result.reindex_like(expected), expected)
9861027

9871028
def test_nuiscance_columns(self):
@@ -1057,72 +1098,66 @@ def test_non_callable_aggregates(self):
10571098

10581099
assert result == expected
10591100

1060-
@pytest.mark.parametrize("frame, expected_dict", [
1061-
[DataFrame(), {
1062-
'sum': Series(),
1063-
'max': Series(),
1064-
'min': Series(),
1065-
'all': Series(dtype=bool),
1066-
'any': Series(dtype=bool),
1067-
'mean': Series(),
1068-
'prod': Series(),
1069-
'std': Series(),
1070-
'var': Series(),
1071-
'median': Series(),
1072-
'cumprod': DataFrame(),
1073-
'cumsum': DataFrame(),
1074-
}],
1075-
[DataFrame([[np.nan, 1], [1, 2]]), {
1076-
'sum': Series([1., 3]),
1077-
'max': Series([1., 2]),
1078-
'min': Series([1., 1]),
1079-
'all': Series([True, True]),
1080-
'any': Series([True, True]),
1081-
'mean': Series([1, 1.5]),
1082-
'prod': Series([1., 2]),
1083-
'std': Series([np.nan, 0.707107]),
1084-
'var': Series([np.nan, 0.5]),
1085-
'median': Series([1, 1.5]),
1086-
'cumprod': DataFrame([[np.nan, 1], [1., 2.]]),
1087-
'cumsum': DataFrame([[np.nan, 1], [1., 3.]]),
1088-
}],
1089-
[DataFrame([['a', 'b'], ['b', 'a']]), {
1090-
'sum': Series(['ab', 'ba']),
1091-
'max': Series(['b', 'b']),
1092-
'min': Series(['a', 'a']),
1093-
'all': Series([True, True]),
1094-
'any': Series([True, True]),
1095-
'mean': Series([], index=pd.Index([], dtype='int64')),
1096-
'prod': Series([], index=pd.Index([], dtype='int64')),
1097-
'std': Series([], index=pd.Index([], dtype='int64')),
1098-
'var': Series([], index=pd.Index([], dtype='int64')),
1099-
'median': Series([], index=pd.Index([], dtype='int64')),
1100-
'cumprod': TypeError,
1101-
'cumsum': DataFrame([['a', 'b'], ['ab', 'ba']]),
1102-
}],
1103-
])
1104-
@pytest.mark.parametrize("axis", [0, 1], ids=lambda x: "axis {}".format(x))
1105-
def test_agg_cython_table(self, cython_table_items,
1106-
frame, expected_dict, axis):
1101+
@pytest.mark.parametrize("df, func, expected", chain(
1102+
_get_cython_table_params(
1103+
DataFrame(), [
1104+
('sum', Series()),
1105+
('max', Series()),
1106+
('min', Series()),
1107+
('all', Series(dtype=bool)),
1108+
('any', Series(dtype=bool)),
1109+
('mean', Series()),
1110+
('prod', Series()),
1111+
('std', Series()),
1112+
('var', Series()),
1113+
('median', Series()),
1114+
]),
1115+
_get_cython_table_params(
1116+
DataFrame([[np.nan, 1], [1, 2]]), [
1117+
('sum', Series([1., 3])),
1118+
('max', Series([1., 2])),
1119+
('min', Series([1., 1])),
1120+
('all', Series([True, True])),
1121+
('any', Series([True, True])),
1122+
('mean', Series([1, 1.5])),
1123+
('prod', Series([1., 2])),
1124+
('std', Series([np.nan, 0.707107])),
1125+
('var', Series([np.nan, 0.5])),
1126+
('median', Series([1, 1.5])),
1127+
]),
1128+
))
1129+
def test_agg_cython_table(self, df, func, expected, axis):
11071130
# GH21224
1108-
# test if using items in pandas.core.base.SelectionMixin._cython_table
1109-
# in agg gives correct results
1110-
np_func, str_func = cython_table_items
1111-
expected = expected_dict[str_func]
1112-
1113-
if isinstance(expected, type) and issubclass(expected, Exception):
1114-
with pytest.raises(expected):
1115-
# e.g. DataFrame(['a b'.split()]).cumprod() will raise
1116-
frame.agg(np_func, axis=axis)
1117-
with pytest.raises(expected):
1118-
frame.agg(str_func, axis=axis)
1119-
return
1120-
1121-
result = frame.agg(np_func, axis=axis)
1122-
result_str_func = frame.agg(str_func, axis=axis)
1123-
if str_func in ('cumprod', 'cumsum'):
1124-
tm.assert_frame_equal(result, expected)
1125-
tm.assert_frame_equal(result_str_func, expected)
1126-
else:
1127-
tm.assert_series_equal(result, expected)
1128-
tm.assert_series_equal(result_str_func, expected)
1131+
# test reducing functions in
1132+
# pandas.core.base.SelectionMixin._cython_table
1133+
result = df.agg(func, axis=axis)
1134+
tm.assert_series_equal(result, expected)
1135+
1136+
@pytest.mark.parametrize("df, func, expected", chain(
1137+
_get_cython_table_params(
1138+
DataFrame(), [
1139+
('cumprod', DataFrame()),
1140+
('cumsum', DataFrame()),
1141+
]),
1142+
_get_cython_table_params(
1143+
DataFrame([[np.nan, 1], [1, 2]]), [
1144+
('cumprod', DataFrame([[np.nan, 1], [1., 2.]])),
1145+
('cumsum', DataFrame([[np.nan, 1], [1., 3.]])),
1146+
]),
1147+
))
1148+
def test_agg_cython_table_transform(self, df, func, expected, axis):
1149+
# GH21224
1150+
# test transforming functions in
1151+
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
1152+
result = df.agg(func, axis=axis)
1153+
tm.assert_frame_equal(result, expected)
1154+
1155+
@pytest.mark.parametrize("df, func, expected", _get_cython_table_params(
1156+
DataFrame([['a', 'b'], ['b', 'a']]), [
1157+
['cumprod', TypeError],
1158+
]),
1159+
)
1160+
def test_agg_cython_table_raises(self, df, func, expected, axis):
1161+
# GH21224
1162+
with pytest.raises(expected):
1163+
df.agg(func, axis=axis)

0 commit comments

Comments
 (0)