Skip to content

Commit 2e64614

Browse files
committed
COMPAT: Fix indent level bug preventing wrapper function rename
Original code intends to rename the wrapper function f using the provided name, but this isn't happening because code is incorrectly indented an extra level. from pandas.core.groupby import GroupBy GroupBy.sum.__name__ Should be 'sum'. Author: Jeff Reback <[email protected]> Author: Matt Hagy <[email protected]> Author: Matt Hagy <[email protected]> Closes #14620 from matthagy/patch-1 and squashes the following commits: db3c6e4 [Jeff Reback] clean/reorg tests 205489b [Jeff Reback] doc 8b185b4 [Jeff Reback] PEP 781b9b3 [Jeff Reback] Move _groupby_function inside GroupBy 68013bf [Matt Hagy] Added a test for known inconsistent attribute/method names 3bf8993 [Matt Hagy] Revise attribute/method consistency check to skip known inconsistencies 033e42d [Matt Hagy] Test for consistency of attribute and method names 2a54b77 [Matt Hagy] Test renaming of _groupby_function wrapper function a492b5a [Matt Hagy] Fix indent level bug preventing wrapper function rename
1 parent 34c6bd0 commit 2e64614

File tree

5 files changed

+400
-304
lines changed

5 files changed

+400
-304
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -887,6 +887,7 @@ Bug Fixes
887887

888888
- Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`)
889889

890+
- Properly set ``__name__`` and ``__qualname__`` for ``Groupby.*`` functions (:issue:`14620`)
890891
- Bug in ``.at`` when selecting from a tz-aware column (:issue:`15822`)
891892
- Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`)
892893
- Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`)

pandas/core/groupby.py

+75-68
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
)
1313

1414
from pandas import compat
15-
from pandas.compat.numpy import function as nv
16-
from pandas.compat.numpy import _np_version_under1p8
15+
from pandas.compat.numpy import function as nv, _np_version_under1p8
16+
from pandas.compat import set_function_name
1717

1818
from pandas.types.common import (is_numeric_dtype,
1919
is_timedelta64_dtype, is_datetime64_dtype,
@@ -172,64 +172,6 @@
172172
'cummin', 'cummax'])
173173

174174

175-
def _groupby_function(name, alias, npfunc, numeric_only=True,
176-
_convert=False):
177-
178-
_local_template = "Compute %(f)s of group values"
179-
180-
@Substitution(name='groupby', f=name)
181-
@Appender(_doc_template)
182-
@Appender(_local_template)
183-
def f(self, **kwargs):
184-
if 'numeric_only' not in kwargs:
185-
kwargs['numeric_only'] = numeric_only
186-
self._set_group_selection()
187-
try:
188-
return self._cython_agg_general(alias, alt=npfunc, **kwargs)
189-
except AssertionError as e:
190-
raise SpecificationError(str(e))
191-
except Exception:
192-
result = self.aggregate(lambda x: npfunc(x, axis=self.axis))
193-
if _convert:
194-
result = result._convert(datetime=True)
195-
return result
196-
197-
f.__name__ = name
198-
199-
return f
200-
201-
202-
def _first_compat(x, axis=0):
203-
204-
def _first(x):
205-
206-
x = np.asarray(x)
207-
x = x[notnull(x)]
208-
if len(x) == 0:
209-
return np.nan
210-
return x[0]
211-
212-
if isinstance(x, DataFrame):
213-
return x.apply(_first, axis=axis)
214-
else:
215-
return _first(x)
216-
217-
218-
def _last_compat(x, axis=0):
219-
def _last(x):
220-
221-
x = np.asarray(x)
222-
x = x[notnull(x)]
223-
if len(x) == 0:
224-
return np.nan
225-
return x[-1]
226-
227-
if isinstance(x, DataFrame):
228-
return x.apply(_last, axis=axis)
229-
else:
230-
return _last(x)
231-
232-
233175
class Grouper(object):
234176
"""
235177
A Grouper allows the user to specify a groupby instruction for a target
@@ -1184,14 +1126,76 @@ def size(self):
11841126
result.name = getattr(self, 'name', None)
11851127
return result
11861128

1187-
sum = _groupby_function('sum', 'add', np.sum)
1188-
prod = _groupby_function('prod', 'prod', np.prod)
1189-
min = _groupby_function('min', 'min', np.min, numeric_only=False)
1190-
max = _groupby_function('max', 'max', np.max, numeric_only=False)
1191-
first = _groupby_function('first', 'first', _first_compat,
1192-
numeric_only=False, _convert=True)
1193-
last = _groupby_function('last', 'last', _last_compat, numeric_only=False,
1194-
_convert=True)
1129+
@classmethod
1130+
def _add_numeric_operations(cls):
1131+
""" add numeric operations to the GroupBy generically """
1132+
1133+
def groupby_function(name, alias, npfunc,
1134+
numeric_only=True, _convert=False):
1135+
1136+
_local_template = "Compute %(f)s of group values"
1137+
1138+
@Substitution(name='groupby', f=name)
1139+
@Appender(_doc_template)
1140+
@Appender(_local_template)
1141+
def f(self, **kwargs):
1142+
if 'numeric_only' not in kwargs:
1143+
kwargs['numeric_only'] = numeric_only
1144+
self._set_group_selection()
1145+
try:
1146+
return self._cython_agg_general(
1147+
alias, alt=npfunc, **kwargs)
1148+
except AssertionError as e:
1149+
raise SpecificationError(str(e))
1150+
except Exception:
1151+
result = self.aggregate(
1152+
lambda x: npfunc(x, axis=self.axis))
1153+
if _convert:
1154+
result = result._convert(datetime=True)
1155+
return result
1156+
1157+
set_function_name(f, name, cls)
1158+
1159+
return f
1160+
1161+
def first_compat(x, axis=0):
1162+
1163+
def first(x):
1164+
1165+
x = np.asarray(x)
1166+
x = x[notnull(x)]
1167+
if len(x) == 0:
1168+
return np.nan
1169+
return x[0]
1170+
1171+
if isinstance(x, DataFrame):
1172+
return x.apply(first, axis=axis)
1173+
else:
1174+
return first(x)
1175+
1176+
def last_compat(x, axis=0):
1177+
1178+
def last(x):
1179+
1180+
x = np.asarray(x)
1181+
x = x[notnull(x)]
1182+
if len(x) == 0:
1183+
return np.nan
1184+
return x[-1]
1185+
1186+
if isinstance(x, DataFrame):
1187+
return x.apply(last, axis=axis)
1188+
else:
1189+
return last(x)
1190+
1191+
cls.sum = groupby_function('sum', 'add', np.sum)
1192+
cls.prod = groupby_function('prod', 'prod', np.prod)
1193+
cls.min = groupby_function('min', 'min', np.min, numeric_only=False)
1194+
cls.max = groupby_function('max', 'max', np.max, numeric_only=False)
1195+
cls.first = groupby_function('first', 'first', first_compat,
1196+
numeric_only=False, _convert=True)
1197+
cls.last = groupby_function('last', 'last', last_compat,
1198+
numeric_only=False, _convert=True)
11951199

11961200
@Substitution(name='groupby')
11971201
@Appender(_doc_template)
@@ -1604,6 +1608,9 @@ def tail(self, n=5):
16041608
return self._selected_obj[mask]
16051609

16061610

1611+
GroupBy._add_numeric_operations()
1612+
1613+
16071614
@Appender(GroupBy.__doc__)
16081615
def groupby(obj, by, **kwds):
16091616
if isinstance(obj, Series):

pandas/tests/groupby/common.py

+23-13
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,31 @@
11
""" Base setup """
22

3+
import pytest
34
import numpy as np
45
from pandas.util import testing as tm
56
from pandas import DataFrame, MultiIndex
67

78

9+
@pytest.fixture
10+
def mframe():
11+
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
12+
'three']],
13+
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
14+
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
15+
names=['first', 'second'])
16+
return DataFrame(np.random.randn(10, 3), index=index,
17+
columns=['A', 'B', 'C'])
18+
19+
20+
@pytest.fixture
21+
def df():
22+
return DataFrame(
23+
{'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
24+
'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
25+
'C': np.random.randn(8),
26+
'D': np.random.randn(8)})
27+
28+
829
class MixIn(object):
930

1031
def setUp(self):
@@ -15,26 +36,15 @@ def setUp(self):
1536
self.frame = DataFrame(self.seriesd)
1637
self.tsframe = DataFrame(self.tsd)
1738

18-
self.df = DataFrame(
19-
{'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
20-
'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
21-
'C': np.random.randn(8),
22-
'D': np.random.randn(8)})
23-
39+
self.df = df()
2440
self.df_mixed_floats = DataFrame(
2541
{'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
2642
'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
2743
'C': np.random.randn(8),
2844
'D': np.array(
2945
np.random.randn(8), dtype='float32')})
3046

31-
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
32-
'three']],
33-
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
34-
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
35-
names=['first', 'second'])
36-
self.mframe = DataFrame(np.random.randn(10, 3), index=index,
37-
columns=['A', 'B', 'C'])
47+
self.mframe = mframe()
3848

3949
self.three_group = DataFrame(
4050
{'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',

0 commit comments

Comments
 (0)