|
12 | 12 | )
|
13 | 13 |
|
14 | 14 | from pandas import compat
|
15 |
| -from pandas.compat.numpy import function as nv |
16 |
| -from pandas.compat.numpy import _np_version_under1p8 |
| 15 | +from pandas.compat.numpy import function as nv, _np_version_under1p8 |
| 16 | +from pandas.compat import set_function_name |
17 | 17 |
|
18 | 18 | from pandas.types.common import (is_numeric_dtype,
|
19 | 19 | is_timedelta64_dtype, is_datetime64_dtype,
|
|
172 | 172 | 'cummin', 'cummax'])
|
173 | 173 |
|
174 | 174 |
|
175 |
| -def _groupby_function(name, alias, npfunc, numeric_only=True, |
176 |
| - _convert=False): |
177 |
| - |
178 |
| - _local_template = "Compute %(f)s of group values" |
179 |
| - |
180 |
| - @Substitution(name='groupby', f=name) |
181 |
| - @Appender(_doc_template) |
182 |
| - @Appender(_local_template) |
183 |
| - def f(self, **kwargs): |
184 |
| - if 'numeric_only' not in kwargs: |
185 |
| - kwargs['numeric_only'] = numeric_only |
186 |
| - self._set_group_selection() |
187 |
| - try: |
188 |
| - return self._cython_agg_general(alias, alt=npfunc, **kwargs) |
189 |
| - except AssertionError as e: |
190 |
| - raise SpecificationError(str(e)) |
191 |
| - except Exception: |
192 |
| - result = self.aggregate(lambda x: npfunc(x, axis=self.axis)) |
193 |
| - if _convert: |
194 |
| - result = result._convert(datetime=True) |
195 |
| - return result |
196 |
| - |
197 |
| - f.__name__ = name |
198 |
| - |
199 |
| - return f |
200 |
| - |
201 |
| - |
202 |
| -def _first_compat(x, axis=0): |
203 |
| - |
204 |
| - def _first(x): |
205 |
| - |
206 |
| - x = np.asarray(x) |
207 |
| - x = x[notnull(x)] |
208 |
| - if len(x) == 0: |
209 |
| - return np.nan |
210 |
| - return x[0] |
211 |
| - |
212 |
| - if isinstance(x, DataFrame): |
213 |
| - return x.apply(_first, axis=axis) |
214 |
| - else: |
215 |
| - return _first(x) |
216 |
| - |
217 |
| - |
218 |
| -def _last_compat(x, axis=0): |
219 |
| - def _last(x): |
220 |
| - |
221 |
| - x = np.asarray(x) |
222 |
| - x = x[notnull(x)] |
223 |
| - if len(x) == 0: |
224 |
| - return np.nan |
225 |
| - return x[-1] |
226 |
| - |
227 |
| - if isinstance(x, DataFrame): |
228 |
| - return x.apply(_last, axis=axis) |
229 |
| - else: |
230 |
| - return _last(x) |
231 |
| - |
232 |
| - |
233 | 175 | class Grouper(object):
|
234 | 176 | """
|
235 | 177 | A Grouper allows the user to specify a groupby instruction for a target
|
@@ -1184,14 +1126,76 @@ def size(self):
|
1184 | 1126 | result.name = getattr(self, 'name', None)
|
1185 | 1127 | return result
|
1186 | 1128 |
|
1187 |
| - sum = _groupby_function('sum', 'add', np.sum) |
1188 |
| - prod = _groupby_function('prod', 'prod', np.prod) |
1189 |
| - min = _groupby_function('min', 'min', np.min, numeric_only=False) |
1190 |
| - max = _groupby_function('max', 'max', np.max, numeric_only=False) |
1191 |
| - first = _groupby_function('first', 'first', _first_compat, |
1192 |
| - numeric_only=False, _convert=True) |
1193 |
| - last = _groupby_function('last', 'last', _last_compat, numeric_only=False, |
1194 |
| - _convert=True) |
| 1129 | + @classmethod |
| 1130 | + def _add_numeric_operations(cls): |
| 1131 | + """ add numeric operations to the GroupBy generically """ |
| 1132 | + |
| 1133 | + def groupby_function(name, alias, npfunc, |
| 1134 | + numeric_only=True, _convert=False): |
| 1135 | + |
| 1136 | + _local_template = "Compute %(f)s of group values" |
| 1137 | + |
| 1138 | + @Substitution(name='groupby', f=name) |
| 1139 | + @Appender(_doc_template) |
| 1140 | + @Appender(_local_template) |
| 1141 | + def f(self, **kwargs): |
| 1142 | + if 'numeric_only' not in kwargs: |
| 1143 | + kwargs['numeric_only'] = numeric_only |
| 1144 | + self._set_group_selection() |
| 1145 | + try: |
| 1146 | + return self._cython_agg_general( |
| 1147 | + alias, alt=npfunc, **kwargs) |
| 1148 | + except AssertionError as e: |
| 1149 | + raise SpecificationError(str(e)) |
| 1150 | + except Exception: |
| 1151 | + result = self.aggregate( |
| 1152 | + lambda x: npfunc(x, axis=self.axis)) |
| 1153 | + if _convert: |
| 1154 | + result = result._convert(datetime=True) |
| 1155 | + return result |
| 1156 | + |
| 1157 | + set_function_name(f, name, cls) |
| 1158 | + |
| 1159 | + return f |
| 1160 | + |
| 1161 | + def first_compat(x, axis=0): |
| 1162 | + |
| 1163 | + def first(x): |
| 1164 | + |
| 1165 | + x = np.asarray(x) |
| 1166 | + x = x[notnull(x)] |
| 1167 | + if len(x) == 0: |
| 1168 | + return np.nan |
| 1169 | + return x[0] |
| 1170 | + |
| 1171 | + if isinstance(x, DataFrame): |
| 1172 | + return x.apply(first, axis=axis) |
| 1173 | + else: |
| 1174 | + return first(x) |
| 1175 | + |
| 1176 | + def last_compat(x, axis=0): |
| 1177 | + |
| 1178 | + def last(x): |
| 1179 | + |
| 1180 | + x = np.asarray(x) |
| 1181 | + x = x[notnull(x)] |
| 1182 | + if len(x) == 0: |
| 1183 | + return np.nan |
| 1184 | + return x[-1] |
| 1185 | + |
| 1186 | + if isinstance(x, DataFrame): |
| 1187 | + return x.apply(last, axis=axis) |
| 1188 | + else: |
| 1189 | + return last(x) |
| 1190 | + |
| 1191 | + cls.sum = groupby_function('sum', 'add', np.sum) |
| 1192 | + cls.prod = groupby_function('prod', 'prod', np.prod) |
| 1193 | + cls.min = groupby_function('min', 'min', np.min, numeric_only=False) |
| 1194 | + cls.max = groupby_function('max', 'max', np.max, numeric_only=False) |
| 1195 | + cls.first = groupby_function('first', 'first', first_compat, |
| 1196 | + numeric_only=False, _convert=True) |
| 1197 | + cls.last = groupby_function('last', 'last', last_compat, |
| 1198 | + numeric_only=False, _convert=True) |
1195 | 1199 |
|
1196 | 1200 | @Substitution(name='groupby')
|
1197 | 1201 | @Appender(_doc_template)
|
@@ -1604,6 +1608,9 @@ def tail(self, n=5):
|
1604 | 1608 | return self._selected_obj[mask]
|
1605 | 1609 |
|
1606 | 1610 |
|
| 1611 | +GroupBy._add_numeric_operations() |
| 1612 | + |
| 1613 | + |
1607 | 1614 | @Appender(GroupBy.__doc__)
|
1608 | 1615 | def groupby(obj, by, **kwds):
|
1609 | 1616 | if isinstance(obj, Series):
|
|
0 commit comments