Skip to content

COMPAT: Fix indent level bug preventing wrapper function rename #14620

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -883,6 +883,7 @@ Bug Fixes

- Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`)

- Properly set ``__name__`` and ``__qualname__`` for ``Groupby.*`` functions (:issue:`14620`)
- Bug in ``.at`` when selecting from a tz-aware column (:issue:`15822`)
- Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`)
- Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`)
Expand Down
143 changes: 75 additions & 68 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
)

from pandas import compat
from pandas.compat.numpy import function as nv
from pandas.compat.numpy import _np_version_under1p8
from pandas.compat.numpy import function as nv, _np_version_under1p8
from pandas.compat import set_function_name

from pandas.types.common import (is_numeric_dtype,
is_timedelta64_dtype, is_datetime64_dtype,
Expand Down Expand Up @@ -172,64 +172,6 @@
'cummin', 'cummax'])


def _groupby_function(name, alias, npfunc, numeric_only=True,
_convert=False):

_local_template = "Compute %(f)s of group values"

@Substitution(name='groupby', f=name)
@Appender(_doc_template)
@Appender(_local_template)
def f(self, **kwargs):
if 'numeric_only' not in kwargs:
kwargs['numeric_only'] = numeric_only
self._set_group_selection()
try:
return self._cython_agg_general(alias, alt=npfunc, **kwargs)
except AssertionError as e:
raise SpecificationError(str(e))
except Exception:
result = self.aggregate(lambda x: npfunc(x, axis=self.axis))
if _convert:
result = result._convert(datetime=True)
return result

f.__name__ = name

return f


def _first_compat(x, axis=0):

def _first(x):

x = np.asarray(x)
x = x[notnull(x)]
if len(x) == 0:
return np.nan
return x[0]

if isinstance(x, DataFrame):
return x.apply(_first, axis=axis)
else:
return _first(x)


def _last_compat(x, axis=0):
def _last(x):

x = np.asarray(x)
x = x[notnull(x)]
if len(x) == 0:
return np.nan
return x[-1]

if isinstance(x, DataFrame):
return x.apply(_last, axis=axis)
else:
return _last(x)


class Grouper(object):
"""
A Grouper allows the user to specify a groupby instruction for a target
Expand Down Expand Up @@ -1184,14 +1126,76 @@ def size(self):
result.name = getattr(self, 'name', None)
return result

sum = _groupby_function('sum', 'add', np.sum)
prod = _groupby_function('prod', 'prod', np.prod)
min = _groupby_function('min', 'min', np.min, numeric_only=False)
max = _groupby_function('max', 'max', np.max, numeric_only=False)
first = _groupby_function('first', 'first', _first_compat,
numeric_only=False, _convert=True)
last = _groupby_function('last', 'last', _last_compat, numeric_only=False,
_convert=True)
@classmethod
def _add_numeric_operations(cls):
""" add numeric operations to the GroupBy generically """

def groupby_function(name, alias, npfunc,
numeric_only=True, _convert=False):

_local_template = "Compute %(f)s of group values"

@Substitution(name='groupby', f=name)
@Appender(_doc_template)
@Appender(_local_template)
def f(self, **kwargs):
if 'numeric_only' not in kwargs:
kwargs['numeric_only'] = numeric_only
self._set_group_selection()
try:
return self._cython_agg_general(
alias, alt=npfunc, **kwargs)
except AssertionError as e:
raise SpecificationError(str(e))
except Exception:
result = self.aggregate(
lambda x: npfunc(x, axis=self.axis))
if _convert:
result = result._convert(datetime=True)
return result

set_function_name(f, name, cls)

return f

def first_compat(x, axis=0):

def first(x):

x = np.asarray(x)
x = x[notnull(x)]
if len(x) == 0:
return np.nan
return x[0]

if isinstance(x, DataFrame):
return x.apply(first, axis=axis)
else:
return first(x)

def last_compat(x, axis=0):

def last(x):

x = np.asarray(x)
x = x[notnull(x)]
if len(x) == 0:
return np.nan
return x[-1]

if isinstance(x, DataFrame):
return x.apply(last, axis=axis)
else:
return last(x)

cls.sum = groupby_function('sum', 'add', np.sum)
cls.prod = groupby_function('prod', 'prod', np.prod)
cls.min = groupby_function('min', 'min', np.min, numeric_only=False)
cls.max = groupby_function('max', 'max', np.max, numeric_only=False)
cls.first = groupby_function('first', 'first', first_compat,
numeric_only=False, _convert=True)
cls.last = groupby_function('last', 'last', last_compat,
numeric_only=False, _convert=True)

@Substitution(name='groupby')
@Appender(_doc_template)
Expand Down Expand Up @@ -1604,6 +1608,9 @@ def tail(self, n=5):
return self._selected_obj[mask]


GroupBy._add_numeric_operations()


@Appender(GroupBy.__doc__)
def groupby(obj, by, **kwds):
if isinstance(obj, Series):
Expand Down
36 changes: 23 additions & 13 deletions pandas/tests/groupby/common.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,31 @@
""" Base setup """

import pytest
import numpy as np
from pandas.util import testing as tm
from pandas import DataFrame, MultiIndex


@pytest.fixture
def mframe():
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
'three']],
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
return DataFrame(np.random.randn(10, 3), index=index,
columns=['A', 'B', 'C'])


@pytest.fixture
def df():
return DataFrame(
{'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
'C': np.random.randn(8),
'D': np.random.randn(8)})


class MixIn(object):

def setUp(self):
Expand All @@ -15,26 +36,15 @@ def setUp(self):
self.frame = DataFrame(self.seriesd)
self.tsframe = DataFrame(self.tsd)

self.df = DataFrame(
{'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
'C': np.random.randn(8),
'D': np.random.randn(8)})

self.df = df()
self.df_mixed_floats = DataFrame(
{'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
'C': np.random.randn(8),
'D': np.array(
np.random.randn(8), dtype='float32')})

index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
'three']],
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
self.mframe = DataFrame(np.random.randn(10, 3), index=index,
columns=['A', 'B', 'C'])
self.mframe = mframe()

self.three_group = DataFrame(
{'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
Expand Down
Loading