|
6 | 6 |
|
7 | 7 | import operator
|
8 | 8 | from datetime import datetime
|
| 9 | +from itertools import chain |
9 | 10 |
|
10 | 11 | import warnings
|
11 | 12 | import numpy as np
|
|
21 | 22 | from pandas.tests.frame.common import TestData
|
22 | 23 |
|
23 | 24 |
|
| 25 | +def _get_cython_table_params(frame, func_names_and_expected): |
| 26 | + """combine frame, functions from SelectionMixin._cython_table |
| 27 | + keys and expected result. |
| 28 | +
|
| 29 | + Parameters |
| 30 | + ---------- |
| 31 | + frame : DataFrame |
| 32 | + A symmetrical DataFrame |
| 33 | + func_names_and_expected : Sequence of two items |
| 34 | + The first item is a name of a NDFrame method ('sum', 'prod') etc. |
| 35 | + The second item is the expected return value |
| 36 | +
|
| 37 | + Returns |
| 38 | + ------- |
| 39 | + results : list |
| 40 | + List of three items (DataFrame, function, expected result) |
| 41 | + """ |
| 42 | + table = pd.core.base.SelectionMixin._cython_table |
| 43 | + if compat.PY36: |
| 44 | + table = list(table.items()) |
| 45 | + else: # dicts have random order in Python<3.6, which xdist doesn't like |
| 46 | + table = sorted(((key, value) for key, value in table.items()), |
| 47 | + key=lambda x: x[0].__class__.__name__) |
| 48 | + results = [] |
| 49 | + for func_name, expected in func_names_and_expected: |
| 50 | + results.append((frame, func_name, expected)) |
| 51 | + results += [ |
| 52 | + (frame, func, expected) for func, name in table |
| 53 | + if name == func_name] |
| 54 | + return results |
| 55 | + |
| 56 | + |
24 | 57 | class TestDataFrameApply(TestData):
|
25 | 58 |
|
26 | 59 | def test_apply(self):
|
@@ -950,38 +983,47 @@ def test_agg_dict_nested_renaming_depr(self):
|
950 | 983 | df.agg({'A': {'foo': 'min'},
|
951 | 984 | 'B': {'bar': 'max'}})
|
952 | 985 |
|
953 |
| - def test_agg_reduce(self): |
| 986 | + def test_agg_reduce(self, axis): |
| 987 | + other_axis = abs(axis - 1) |
| 988 | + name1, name2 = self.frame.axes[other_axis].unique()[:2] |
| 989 | + |
954 | 990 | # all reducers
|
955 |
| - expected = zip_frames(self.frame.mean().to_frame(), |
956 |
| - self.frame.max().to_frame(), |
957 |
| - self.frame.sum().to_frame()).T |
| 991 | + expected = zip_frames(self.frame.mean(axis=axis).to_frame(), |
| 992 | + self.frame.max(axis=axis).to_frame(), |
| 993 | + self.frame.sum(axis=axis).to_frame()).T |
958 | 994 | expected.index = ['mean', 'max', 'sum']
|
959 |
| - result = self.frame.agg(['mean', 'max', 'sum']) |
| 995 | + result = self.frame.agg(['mean', 'max', 'sum'], axis=axis) |
960 | 996 | assert_frame_equal(result, expected)
|
961 | 997 |
|
962 | 998 | # dict input with scalars
|
963 |
| - result = self.frame.agg({'A': 'mean', 'B': 'sum'}) |
964 |
| - expected = Series([self.frame.A.mean(), self.frame.B.sum()], |
965 |
| - index=['A', 'B']) |
| 999 | + func = {name1: 'mean', name2: 'sum'} |
| 1000 | + result = self.frame.agg(func, axis=axis) |
| 1001 | + expected = Series([self.frame.loc(other_axis)[name1].mean(), |
| 1002 | + self.frame.loc(other_axis)[name2].sum()], |
| 1003 | + index=[name1, name2]) |
966 | 1004 | assert_series_equal(result.reindex_like(expected), expected)
|
967 | 1005 |
|
968 | 1006 | # dict input with lists
|
969 |
| - result = self.frame.agg({'A': ['mean'], 'B': ['sum']}) |
970 |
| - expected = DataFrame({'A': Series([self.frame.A.mean()], |
971 |
| - index=['mean']), |
972 |
| - 'B': Series([self.frame.B.sum()], |
973 |
| - index=['sum'])}) |
| 1007 | + func = {name1: ['mean'], name2: ['sum']} |
| 1008 | + result = self.frame.agg(func, axis=axis) |
| 1009 | + expected = DataFrame({ |
| 1010 | + name1: Series([self.frame.loc(other_axis)[name1].mean()], |
| 1011 | + index=['mean']), |
| 1012 | + name2: Series([self.frame.loc(other_axis)[name2].sum()], |
| 1013 | + index=['sum'])}) |
974 | 1014 | assert_frame_equal(result.reindex_like(expected), expected)
|
975 | 1015 |
|
976 | 1016 | # dict input with lists with multiple
|
977 |
| - result = self.frame.agg({'A': ['mean', 'sum'], |
978 |
| - 'B': ['sum', 'max']}) |
979 |
| - expected = DataFrame({'A': Series([self.frame.A.mean(), |
980 |
| - self.frame.A.sum()], |
981 |
| - index=['mean', 'sum']), |
982 |
| - 'B': Series([self.frame.B.sum(), |
983 |
| - self.frame.B.max()], |
984 |
| - index=['sum', 'max'])}) |
| 1017 | + func = {name1: ['mean', 'sum'], |
| 1018 | + name2: ['sum', 'max']} |
| 1019 | + result = self.frame.agg(func, axis=axis) |
| 1020 | + expected = DataFrame({ |
| 1021 | + name1: Series([self.frame.loc(other_axis)[name1].mean(), |
| 1022 | + self.frame.loc(other_axis)[name1].sum()], |
| 1023 | + index=['mean', 'sum']), |
| 1024 | + name2: Series([self.frame.loc(other_axis)[name2].sum(), |
| 1025 | + self.frame.loc(other_axis)[name2].max()], |
| 1026 | + index=['sum', 'max'])}) |
985 | 1027 | assert_frame_equal(result.reindex_like(expected), expected)
|
986 | 1028 |
|
987 | 1029 | def test_nuiscance_columns(self):
|
@@ -1057,72 +1099,66 @@ def test_non_callable_aggregates(self):
|
1057 | 1099 |
|
1058 | 1100 | assert result == expected
|
1059 | 1101 |
|
1060 |
| - @pytest.mark.parametrize("frame, expected_dict", [ |
1061 |
| - [DataFrame(), { |
1062 |
| - 'sum': Series(), |
1063 |
| - 'max': Series(), |
1064 |
| - 'min': Series(), |
1065 |
| - 'all': Series(dtype=bool), |
1066 |
| - 'any': Series(dtype=bool), |
1067 |
| - 'mean': Series(), |
1068 |
| - 'prod': Series(), |
1069 |
| - 'std': Series(), |
1070 |
| - 'var': Series(), |
1071 |
| - 'median': Series(), |
1072 |
| - 'cumprod': DataFrame(), |
1073 |
| - 'cumsum': DataFrame(), |
1074 |
| - }], |
1075 |
| - [DataFrame([[np.nan, 1], [1, 2]]), { |
1076 |
| - 'sum': Series([1., 3]), |
1077 |
| - 'max': Series([1., 2]), |
1078 |
| - 'min': Series([1., 1]), |
1079 |
| - 'all': Series([True, True]), |
1080 |
| - 'any': Series([True, True]), |
1081 |
| - 'mean': Series([1, 1.5]), |
1082 |
| - 'prod': Series([1., 2]), |
1083 |
| - 'std': Series([np.nan, 0.707107]), |
1084 |
| - 'var': Series([np.nan, 0.5]), |
1085 |
| - 'median': Series([1, 1.5]), |
1086 |
| - 'cumprod': DataFrame([[np.nan, 1], [1., 2.]]), |
1087 |
| - 'cumsum': DataFrame([[np.nan, 1], [1., 3.]]), |
1088 |
| - }], |
1089 |
| - [DataFrame([['a', 'b'], ['b', 'a']]), { |
1090 |
| - 'sum': Series(['ab', 'ba']), |
1091 |
| - 'max': Series(['b', 'b']), |
1092 |
| - 'min': Series(['a', 'a']), |
1093 |
| - 'all': Series([True, True]), |
1094 |
| - 'any': Series([True, True]), |
1095 |
| - 'mean': Series([], index=pd.Index([], dtype='int64')), |
1096 |
| - 'prod': Series([], index=pd.Index([], dtype='int64')), |
1097 |
| - 'std': Series([], index=pd.Index([], dtype='int64')), |
1098 |
| - 'var': Series([], index=pd.Index([], dtype='int64')), |
1099 |
| - 'median': Series([], index=pd.Index([], dtype='int64')), |
1100 |
| - 'cumprod': TypeError, |
1101 |
| - 'cumsum': DataFrame([['a', 'b'], ['ab', 'ba']]), |
1102 |
| - }], |
1103 |
| - ]) |
1104 |
| - @pytest.mark.parametrize("axis", [0, 1], ids=lambda x: "axis {}".format(x)) |
1105 |
| - def test_agg_cython_table(self, cython_table_items, |
1106 |
| - frame, expected_dict, axis): |
| 1102 | + @pytest.mark.parametrize("df, func, expected", chain( |
| 1103 | + _get_cython_table_params( |
| 1104 | + DataFrame(), [ |
| 1105 | + ('sum', Series()), |
| 1106 | + ('max', Series()), |
| 1107 | + ('min', Series()), |
| 1108 | + ('all', Series(dtype=bool)), |
| 1109 | + ('any', Series(dtype=bool)), |
| 1110 | + ('mean', Series()), |
| 1111 | + ('prod', Series()), |
| 1112 | + ('std', Series()), |
| 1113 | + ('var', Series()), |
| 1114 | + ('median', Series()), |
| 1115 | + ]), |
| 1116 | + _get_cython_table_params( |
| 1117 | + DataFrame([[np.nan, 1], [1, 2]]), [ |
| 1118 | + ('sum', Series([1., 3])), |
| 1119 | + ('max', Series([1., 2])), |
| 1120 | + ('min', Series([1., 1])), |
| 1121 | + ('all', Series([True, True])), |
| 1122 | + ('any', Series([True, True])), |
| 1123 | + ('mean', Series([1, 1.5])), |
| 1124 | + ('prod', Series([1., 2])), |
| 1125 | + ('std', Series([np.nan, 0.707107])), |
| 1126 | + ('var', Series([np.nan, 0.5])), |
| 1127 | + ('median', Series([1, 1.5])), |
| 1128 | + ]), |
| 1129 | + )) |
| 1130 | + def test_agg_cython_table(self, df, func, expected, axis): |
1107 | 1131 | # GH21224
|
1108 |
| - # test if using items in pandas.core.base.SelectionMixin._cython_table |
1109 |
| - # in agg gives correct results |
1110 |
| - np_func, str_func = cython_table_items |
1111 |
| - expected = expected_dict[str_func] |
1112 |
| - |
1113 |
| - if isinstance(expected, type) and issubclass(expected, Exception): |
1114 |
| - with pytest.raises(expected): |
1115 |
| - # e.g. DataFrame(['a b'.split()]).cumprod() will raise |
1116 |
| - frame.agg(np_func, axis=axis) |
1117 |
| - with pytest.raises(expected): |
1118 |
| - frame.agg(str_func, axis=axis) |
1119 |
| - return |
1120 |
| - |
1121 |
| - result = frame.agg(np_func, axis=axis) |
1122 |
| - result_str_func = frame.agg(str_func, axis=axis) |
1123 |
| - if str_func in ('cumprod', 'cumsum'): |
1124 |
| - tm.assert_frame_equal(result, expected) |
1125 |
| - tm.assert_frame_equal(result_str_func, expected) |
1126 |
| - else: |
1127 |
| - tm.assert_series_equal(result, expected) |
1128 |
| - tm.assert_series_equal(result_str_func, expected) |
| 1132 | + # test reducing functions in |
| 1133 | + # pandas.core.base.SelectionMixin._cython_table |
| 1134 | + result = df.agg(func, axis=axis) |
| 1135 | + tm.assert_series_equal(result, expected) |
| 1136 | + |
| 1137 | + @pytest.mark.parametrize("df, func, expected", chain( |
| 1138 | + _get_cython_table_params( |
| 1139 | + DataFrame(), [ |
| 1140 | + ('cumprod', DataFrame()), |
| 1141 | + ('cumsum', DataFrame()), |
| 1142 | + ]), |
| 1143 | + _get_cython_table_params( |
| 1144 | + DataFrame([[np.nan, 1], [1, 2]]), [ |
| 1145 | + ('cumprod', DataFrame([[np.nan, 1], [1., 2.]])), |
| 1146 | + ('cumsum', DataFrame([[np.nan, 1], [1., 3.]])), |
| 1147 | + ]), |
| 1148 | + )) |
| 1149 | + def test_agg_cython_table_transform(self, df, func, expected, axis): |
| 1150 | + # GH21224 |
| 1151 | + # test transforming functions in |
| 1152 | + # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) |
| 1153 | + result = df.agg(func, axis=axis) |
| 1154 | + tm.assert_frame_equal(result, expected) |
| 1155 | + |
| 1156 | + @pytest.mark.parametrize("df, func, expected", _get_cython_table_params( |
| 1157 | + DataFrame([['a', 'b'], ['b', 'a']]), [ |
| 1158 | + ['cumprod', TypeError], |
| 1159 | + ]), |
| 1160 | + ) |
| 1161 | + def test_agg_cython_table_raises(self, df, func, expected, axis): |
| 1162 | + # GH21224 |
| 1163 | + with pytest.raises(expected): |
| 1164 | + df.agg(func, axis=axis) |
0 commit comments