Skip to content

Commit d73f332

Browse files
ENH BinGrouper use BaseGrouper's apply
The `BinGrouper.apply` and `BaseGrouper.apply` have different output types. To make them consistent, remove `BinGrouper.apply` and let it use the same method as the superclass `BaseGrouper`. This requires changing `BinGrouper.groupings` to return a list of `Grouping` objects (there will always only be one) instead of `None`.
1 parent 22cf50b commit d73f332

File tree

3 files changed

+102
-21
lines changed

3 files changed

+102
-21
lines changed

doc/source/whatsnew/v0.18.1.txt

+66
Original file line numberDiff line numberDiff line change
@@ -72,15 +72,81 @@ API changes
7272

7373

7474
- ``CParserError`` is now a ``ValueError`` instead of just an ``Exception`` (:issue:`12551`)
75+
7576
- ``pd.show_versions()`` now includes ``pandas_datareader`` version (:issue:`12740`)
7677

78+
- Using ``apply`` on resampling groupby operations (e.g. ``df.groupby(pd.TimeGrouper(freq='M', key='date')).apply(...)``) now has the same output types as similar ``apply``s on other groupby operations (e.g. ``df.groupby(pd.Grouper(key='color')).apply(...)``). (:issue:`11742`).
79+
80+
Previous behavior:
81+
82+
.. code-block:: python
83+
84+
In [1]: df = pd.DataFrame({'date': pd.to_datetime(['10/10/2000', '11/10/2000']), 'value': [10, 13]})
85+
86+
In [2]: df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x.value.sum())
87+
Out[2]:
88+
...
89+
TypeError: cannot concatenate a non-NDFrame object
90+
91+
In [3]: df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x[['value']].sum())
92+
Out[3]:
93+
date
94+
2000-10-31 value 10
95+
2000-11-30 value 13
96+
dtype: int64
97+
98+
In [3]: type(df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x[['value']].sum()))
99+
Out[3]: pandas.core.series.Series
100+
101+
102+
In [4]: df.groupby(pd.Grouper(key='date')).apply(lambda x: x.value.sum())
103+
Out[4]:
104+
date
105+
2000-10-10 10
106+
2000-11-10 13
107+
dtype: int64
108+
109+
In [5]: type(df.groupby(pd.Grouper(key='date')).apply(lambda x: x.value.sum()))
110+
Out[5]: pandas.core.series.Series
111+
112+
113+
In [6]: df.groupby(pd.Grouper(key='date')).apply(lambda x: x[['value']].sum())
114+
Out[6]:
115+
value
116+
date
117+
2000-10-10 10
118+
2000-11-10 13
119+
120+
In [7]: type(df.groupby(pd.Grouper(key='date')).apply(lambda x: x[['value']].sum()))
121+
Out[7]: pandas.core.frame.DataFrame
122+
123+
124+
New Behavior:
77125

126+
.. code-block:: python
78127

128+
In [1]: df = pd.DataFrame({'date': pd.to_datetime(['10/10/2000', '11/10/2000']), 'value': [10, 13]})
79129

130+
In [2]: df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x.value.sum())
131+
Out[2]:
132+
date
133+
2000-10-31 10
134+
2000-11-30 13
135+
Freq: M, dtype: int64
80136

137+
In [3]: type(df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x.value.sum()))
138+
Out[3]: pandas.core.series.Series
81139

82140

141+
In [4]: df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x[['value']].sum())
142+
Out[4]:
143+
value
144+
date
145+
2000-10-31 10
146+
2000-11-30 13
83147

148+
In [5]: type(df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x[['value']].sum()))
149+
Out[5]: pandas.core.frame.DataFrame
84150

85151

86152
.. _whatsnew_0181.deprecations:

pandas/core/groupby.py

+2-21
Original file line numberDiff line numberDiff line change
@@ -2004,25 +2004,6 @@ def get_iterator(self, data, axis=0):
20042004
if start < length:
20052005
yield self.binlabels[-1], slicer(start, None)
20062006

2007-
def apply(self, f, data, axis=0):
2008-
result_keys = []
2009-
result_values = []
2010-
mutated = False
2011-
for key, group in self.get_iterator(data, axis=axis):
2012-
object.__setattr__(group, 'name', key)
2013-
2014-
# group might be modified
2015-
group_axes = _get_axes(group)
2016-
res = f(group)
2017-
2018-
if not _is_indexed_like(res, group_axes):
2019-
mutated = True
2020-
2021-
result_keys.append(key)
2022-
result_values.append(res)
2023-
2024-
return result_keys, result_values, mutated
2025-
20262007
@cache_readonly
20272008
def indices(self):
20282009
indices = collections.defaultdict(list)
@@ -2071,8 +2052,8 @@ def names(self):
20712052

20722053
@property
20732054
def groupings(self):
2074-
# for compat
2075-
return None
2055+
return [Grouping(lvl, lvl, in_axis=False, level=None, name=name)
2056+
for lvl, name in zip(self.levels, self.names)]
20762057

20772058
def agg_series(self, obj, func):
20782059
dummy = obj[:0]

pandas/tests/test_groupby.py

+34
Original file line numberDiff line numberDiff line change
@@ -4824,6 +4824,40 @@ def test_timegrouper_get_group(self):
48244824
result = grouped.get_group(dt)
48254825
assert_frame_equal(result, expected)
48264826

4827+
def test_timegrouper_apply_return_type_series(self):
4828+
# Using `apply` with the `TimeGrouper` should give the
4829+
# same return type as an `apply` with a `Grouper`.
4830+
df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'],
4831+
'value': [10, 13]})
4832+
df_dt = df.copy()
4833+
df_dt['date'] = pd.to_datetime(df_dt['date'])
4834+
4835+
def sumfunc_series(x):
4836+
return pd.Series([x['value'].sum()], ('sum',))
4837+
4838+
expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_series)
4839+
result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date'))
4840+
.apply(sumfunc_series))
4841+
assert_frame_equal(result.reset_index(drop=True),
4842+
expected.reset_index(drop=True))
4843+
4844+
def test_timegrouper_apply_return_type_value(self):
4845+
# Using `apply` with the `TimeGrouper` should give the
4846+
# same return type as an `apply` with a `Grouper`.
4847+
df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'],
4848+
'value': [10, 13]})
4849+
df_dt = df.copy()
4850+
df_dt['date'] = pd.to_datetime(df_dt['date'])
4851+
4852+
def sumfunc_value(x):
4853+
return x.value.sum()
4854+
4855+
expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_value)
4856+
result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date'))
4857+
.apply(sumfunc_value))
4858+
assert_series_equal(result.reset_index(drop=True),
4859+
expected.reset_index(drop=True))
4860+
48274861
def test_cumcount(self):
48284862
df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'])
48294863
g = df.groupby('A')

0 commit comments

Comments
 (0)