Skip to content

Commit ad06799

Browse files
author
tp
committed
Fix tests for bug where df.agg(..., axis=1) gives wrong result
1 parent c566215 commit ad06799

File tree

4 files changed

+238
-163
lines changed

4 files changed

+238
-163
lines changed

pandas/core/frame.py

-1
Original file line numberDiff line numberDiff line change
@@ -5832,7 +5832,6 @@ def aggregate(self, func, axis=0, *args, **kwargs):
58325832
return self.apply(func, axis=axis, args=args, **kwargs)
58335833
return result
58345834

5835-
@Appender(NDFrame._aggregate.__doc__, indents=2)
58365835
def _aggregate(self, arg, axis=0, *args, **kwargs):
58375836
obj = self.T if axis == 1 else self
58385837
return super(DataFrame, obj)._aggregate(arg, *args, **kwargs)

pandas/core/indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1829,7 +1829,7 @@ def error():
18291829
error()
18301830
raise
18311831
except:
1832-
error()
1832+
raise
18331833

18341834
if not ax.contains(key):
18351835
error()

pandas/tests/frame/test_apply.py

+130-94
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import operator
88
from datetime import datetime
9+
from itertools import chain
910

1011
import warnings
1112
import numpy as np
@@ -21,6 +22,38 @@
2122
from pandas.tests.frame.common import TestData
2223

2324

25+
def _get_cython_table_params(frame, func_names_and_expected):
26+
"""combine frame, functions from SelectionMixin._cython_table
27+
keys and expected result.
28+
29+
Parameters
30+
----------
31+
frame : DataFrame
32+
A symmetrical DataFrame
33+
func_names_and_expected : Sequence of two items
34+
The first item is a name of a NDFrame method ('sum', 'prod') etc.
35+
The second item is the expected return value
36+
37+
Returns
38+
-------
39+
results : list
40+
List of three items (DataFrame, function, expected result)
41+
"""
42+
table = pd.core.base.SelectionMixin._cython_table
43+
if compat.PY36:
44+
table = list(table.items())
45+
else: # dicts have random order in Python<3.6, which xdist doesn't like
46+
table = sorted(((key, value) for key, value in table.items()),
47+
key=lambda x: x[0].__class__.__name__)
48+
results = []
49+
for func_name, expected in func_names_and_expected:
50+
results.append((frame, func_name, expected))
51+
results += [
52+
(frame, func, expected) for func, name in table
53+
if name == func_name]
54+
return results
55+
56+
2457
class TestDataFrameApply(TestData):
2558

2659
def test_apply(self):
@@ -867,27 +900,27 @@ def test_agg_transform(self):
867900
result = self.frame.transform(['sqrt', np.abs])
868901
assert_frame_equal(result, expected)
869902

870-
def test_transform_and_agg_err(self):
903+
def test_transform_and_agg_err(self, axis):
871904
# cannot both transform and agg
872905
def f():
873-
self.frame.transform(['max', 'min'])
906+
self.frame.transform(['max', 'min'], axis=axis)
874907
pytest.raises(ValueError, f)
875908

876909
def f():
877910
with np.errstate(all='ignore'):
878-
self.frame.agg(['max', 'sqrt'])
911+
self.frame.agg(['max', 'sqrt'], axis=axis)
879912
pytest.raises(ValueError, f)
880913

881914
def f():
882915
with np.errstate(all='ignore'):
883-
self.frame.transform(['max', 'sqrt'])
916+
self.frame.transform(['max', 'sqrt'], axis=axis)
884917
pytest.raises(ValueError, f)
885918

886919
df = pd.DataFrame({'A': range(5), 'B': 5})
887920

888921
def f():
889922
with np.errstate(all='ignore'):
890-
df.agg({'A': ['abs', 'sum'], 'B': ['mean', 'max']})
923+
df.agg({'A': ['abs', 'sum'], 'B': ['mean', 'max']}, axis=axis)
891924

892925
@pytest.mark.parametrize('method', [
893926
'abs', 'shift', 'pct_change', 'cumsum', 'rank',
@@ -950,38 +983,47 @@ def test_agg_dict_nested_renaming_depr(self):
950983
df.agg({'A': {'foo': 'min'},
951984
'B': {'bar': 'max'}})
952985

953-
def test_agg_reduce(self):
986+
def test_agg_reduce(self, axis):
987+
other_axis = abs(axis - 1)
988+
name1, name2 = self.frame.axes[other_axis].unique()[:2]
989+
954990
# all reducers
955-
expected = zip_frames(self.frame.mean().to_frame(),
956-
self.frame.max().to_frame(),
957-
self.frame.sum().to_frame()).T
991+
expected = zip_frames(self.frame.mean(axis=axis).to_frame(),
992+
self.frame.max(axis=axis).to_frame(),
993+
self.frame.sum(axis=axis).to_frame()).T
958994
expected.index = ['mean', 'max', 'sum']
959-
result = self.frame.agg(['mean', 'max', 'sum'])
995+
result = self.frame.agg(['mean', 'max', 'sum'], axis=axis)
960996
assert_frame_equal(result, expected)
961997

962998
# dict input with scalars
963-
result = self.frame.agg({'A': 'mean', 'B': 'sum'})
964-
expected = Series([self.frame.A.mean(), self.frame.B.sum()],
965-
index=['A', 'B'])
999+
func = {name1: 'mean', name2: 'sum'}
1000+
result = self.frame.agg(func, axis=axis)
1001+
expected = Series([self.frame.loc(other_axis)[name1].mean(),
1002+
self.frame.loc(other_axis)[name2].sum()],
1003+
index=[name1, name2])
9661004
assert_series_equal(result.reindex_like(expected), expected)
9671005

9681006
# dict input with lists
969-
result = self.frame.agg({'A': ['mean'], 'B': ['sum']})
970-
expected = DataFrame({'A': Series([self.frame.A.mean()],
971-
index=['mean']),
972-
'B': Series([self.frame.B.sum()],
973-
index=['sum'])})
1007+
func = {name1: ['mean'], name2: ['sum']}
1008+
result = self.frame.agg(func, axis=axis)
1009+
expected = DataFrame({
1010+
name1: Series([self.frame.loc(other_axis)[name1].mean()],
1011+
index=['mean']),
1012+
name2: Series([self.frame.loc(other_axis)[name2].sum()],
1013+
index=['sum'])})
9741014
assert_frame_equal(result.reindex_like(expected), expected)
9751015

9761016
# dict input with lists with multiple
977-
result = self.frame.agg({'A': ['mean', 'sum'],
978-
'B': ['sum', 'max']})
979-
expected = DataFrame({'A': Series([self.frame.A.mean(),
980-
self.frame.A.sum()],
981-
index=['mean', 'sum']),
982-
'B': Series([self.frame.B.sum(),
983-
self.frame.B.max()],
984-
index=['sum', 'max'])})
1017+
func = {name1: ['mean', 'sum'],
1018+
name2: ['sum', 'max']}
1019+
result = self.frame.agg(func, axis=axis)
1020+
expected = DataFrame({
1021+
name1: Series([self.frame.loc(other_axis)[name1].mean(),
1022+
self.frame.loc(other_axis)[name1].sum()],
1023+
index=['mean', 'sum']),
1024+
name2: Series([self.frame.loc(other_axis)[name2].sum(),
1025+
self.frame.loc(other_axis)[name2].max()],
1026+
index=['sum', 'max'])})
9851027
assert_frame_equal(result.reindex_like(expected), expected)
9861028

9871029
def test_nuiscance_columns(self):
@@ -1057,72 +1099,66 @@ def test_non_callable_aggregates(self):
10571099

10581100
assert result == expected
10591101

1060-
@pytest.mark.parametrize("frame, expected_dict", [
1061-
[DataFrame(), {
1062-
'sum': Series(),
1063-
'max': Series(),
1064-
'min': Series(),
1065-
'all': Series(dtype=bool),
1066-
'any': Series(dtype=bool),
1067-
'mean': Series(),
1068-
'prod': Series(),
1069-
'std': Series(),
1070-
'var': Series(),
1071-
'median': Series(),
1072-
'cumprod': DataFrame(),
1073-
'cumsum': DataFrame(),
1074-
}],
1075-
[DataFrame([[np.nan, 1], [1, 2]]), {
1076-
'sum': Series([1., 3]),
1077-
'max': Series([1., 2]),
1078-
'min': Series([1., 1]),
1079-
'all': Series([True, True]),
1080-
'any': Series([True, True]),
1081-
'mean': Series([1, 1.5]),
1082-
'prod': Series([1., 2]),
1083-
'std': Series([np.nan, 0.707107]),
1084-
'var': Series([np.nan, 0.5]),
1085-
'median': Series([1, 1.5]),
1086-
'cumprod': DataFrame([[np.nan, 1], [1., 2.]]),
1087-
'cumsum': DataFrame([[np.nan, 1], [1., 3.]]),
1088-
}],
1089-
[DataFrame([['a', 'b'], ['b', 'a']]), {
1090-
'sum': Series(['ab', 'ba']),
1091-
'max': Series(['b', 'b']),
1092-
'min': Series(['a', 'a']),
1093-
'all': Series([True, True]),
1094-
'any': Series([True, True]),
1095-
'mean': Series([], index=pd.Index([], dtype='int64')),
1096-
'prod': Series([], index=pd.Index([], dtype='int64')),
1097-
'std': Series([], index=pd.Index([], dtype='int64')),
1098-
'var': Series([], index=pd.Index([], dtype='int64')),
1099-
'median': Series([], index=pd.Index([], dtype='int64')),
1100-
'cumprod': TypeError,
1101-
'cumsum': DataFrame([['a', 'b'], ['ab', 'ba']]),
1102-
}],
1103-
])
1104-
@pytest.mark.parametrize("axis", [0, 1], ids=lambda x: "axis {}".format(x))
1105-
def test_agg_cython_table(self, cython_table_items,
1106-
frame, expected_dict, axis):
1102+
@pytest.mark.parametrize("df, func, expected", chain(
1103+
_get_cython_table_params(
1104+
DataFrame(), [
1105+
('sum', Series()),
1106+
('max', Series()),
1107+
('min', Series()),
1108+
('all', Series(dtype=bool)),
1109+
('any', Series(dtype=bool)),
1110+
('mean', Series()),
1111+
('prod', Series()),
1112+
('std', Series()),
1113+
('var', Series()),
1114+
('median', Series()),
1115+
]),
1116+
_get_cython_table_params(
1117+
DataFrame([[np.nan, 1], [1, 2]]), [
1118+
('sum', Series([1., 3])),
1119+
('max', Series([1., 2])),
1120+
('min', Series([1., 1])),
1121+
('all', Series([True, True])),
1122+
('any', Series([True, True])),
1123+
('mean', Series([1, 1.5])),
1124+
('prod', Series([1., 2])),
1125+
('std', Series([np.nan, 0.707107])),
1126+
('var', Series([np.nan, 0.5])),
1127+
('median', Series([1, 1.5])),
1128+
]),
1129+
))
1130+
def test_agg_cython_table(self, df, func, expected, axis):
11071131
# GH21224
1108-
# test if using items in pandas.core.base.SelectionMixin._cython_table
1109-
# in agg gives correct results
1110-
np_func, str_func = cython_table_items
1111-
expected = expected_dict[str_func]
1112-
1113-
if isinstance(expected, type) and issubclass(expected, Exception):
1114-
with pytest.raises(expected):
1115-
# e.g. DataFrame(['a b'.split()]).cumprod() will raise
1116-
frame.agg(np_func, axis=axis)
1117-
with pytest.raises(expected):
1118-
frame.agg(str_func, axis=axis)
1119-
return
1120-
1121-
result = frame.agg(np_func, axis=axis)
1122-
result_str_func = frame.agg(str_func, axis=axis)
1123-
if str_func in ('cumprod', 'cumsum'):
1124-
tm.assert_frame_equal(result, expected)
1125-
tm.assert_frame_equal(result_str_func, expected)
1126-
else:
1127-
tm.assert_series_equal(result, expected)
1128-
tm.assert_series_equal(result_str_func, expected)
1132+
# test reducing functions in
1133+
# pandas.core.base.SelectionMixin._cython_table
1134+
result = df.agg(func, axis=axis)
1135+
tm.assert_series_equal(result, expected)
1136+
1137+
@pytest.mark.parametrize("df, func, expected", chain(
1138+
_get_cython_table_params(
1139+
DataFrame(), [
1140+
('cumprod', DataFrame()),
1141+
('cumsum', DataFrame()),
1142+
]),
1143+
_get_cython_table_params(
1144+
DataFrame([[np.nan, 1], [1, 2]]), [
1145+
('cumprod', DataFrame([[np.nan, 1], [1., 2.]])),
1146+
('cumsum', DataFrame([[np.nan, 1], [1., 3.]])),
1147+
]),
1148+
))
1149+
def test_agg_cython_table_transform(self, df, func, expected, axis):
1150+
# GH21224
1151+
# test transforming functions in
1152+
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
1153+
result = df.agg(func, axis=axis)
1154+
tm.assert_frame_equal(result, expected)
1155+
1156+
@pytest.mark.parametrize("df, func, expected", _get_cython_table_params(
1157+
DataFrame([['a', 'b'], ['b', 'a']]), [
1158+
['cumprod', TypeError],
1159+
]),
1160+
)
1161+
def test_agg_cython_table_raises(self, df, func, expected, axis):
1162+
# GH21224
1163+
with pytest.raises(expected):
1164+
df.agg(func, axis=axis)

0 commit comments

Comments
 (0)