Skip to content

Commit 8654a9e

Browse files
chris-b1jreback
authored andcommitted
API: Expanded resample
closes #13500 Author: Chris <[email protected]> Closes #13961 from chris-b1/resample-api and squashes the following commits: b8dd114 [Chris] make _from_selection a property 10c7280 [Chris] NotImp -> ValueError e203fcf [Chris] doc updates 384026b [Chris] remove PeriodIndex workaround c7b299e [Chris] cleanup debugging 5fd97d9 [Chris] add from_selection bookkeeping 7f9add4 [Chris] more wip b55309a [Chris] wip c4db0e7 [Chris] move error handling; doc fixups def74de [Chris] API: Expanded resample
1 parent f92cd7e commit 8654a9e

File tree

6 files changed

+221
-54
lines changed

6 files changed

+221
-54
lines changed

doc/source/timeseries.rst

+24
Original file line numberDiff line numberDiff line change
@@ -1473,6 +1473,30 @@ Furthermore, you can also specify multiple aggregation functions for each column
14731473
r.agg({'A' : ['sum','std'], 'B' : ['mean','std'] })
14741474
14751475
1476+
If a ``DataFrame`` does not have a datetimelike index, but instead you want
1477+
to resample based on datetimelike column in the frame, it can passed to the
1478+
``on`` keyword.
1479+
1480+
.. ipython:: python
1481+
1482+
df = pd.DataFrame({'date': pd.date_range('2015-01-01', freq='W', periods=5),
1483+
'a': np.arange(5)},
1484+
index=pd.MultiIndex.from_arrays([
1485+
[1,2,3,4,5],
1486+
pd.date_range('2015-01-01', freq='W', periods=5)],
1487+
names=['v','d']))
1488+
df
1489+
df.resample('M', on='date').sum()
1490+
1491+
Similarly, if you instead want to resample by a datetimelike
1492+
level of ``MultiIndex``, its name or location can be passed to the
1493+
``level`` keyword.
1494+
1495+
.. ipython:: python
1496+
1497+
df.resample(level='d').sum()
1498+
1499+
14761500
.. _timeseries.periods:
14771501

14781502
Time Span Representation

doc/source/whatsnew/v0.19.0.txt

+14
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,20 @@ Other enhancements
397397

398398
pd.Timestamp(year=2012, month=1, day=1, hour=8, minute=30)
399399

400+
- the ``.resample()`` function now accepts a ``on=`` or ``level=`` parameter for resampling on a datetimelike column or ``MultiIndex`` level (:issue:`13500`)
401+
402+
.. ipython:: python
403+
404+
df = pd.DataFrame({'date': pd.date_range('2015-01-01', freq='W', periods=5),
405+
'a': np.arange(5)},
406+
index=pd.MultiIndex.from_arrays([
407+
[1,2,3,4,5],
408+
pd.date_range('2015-01-01', freq='W', periods=5)],
409+
names=['v','d']))
410+
df
411+
df.resample('M', on='date').sum()
412+
df.resample('M', level='d').sum()
413+
400414
- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``decimal`` option (:issue:`12933`)
401415
- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``na_filter`` option (:issue:`13321`)
402416
- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``memory_map`` option (:issue:`13381`)

pandas/core/generic.py

+16-5
Original file line numberDiff line numberDiff line change
@@ -4047,10 +4047,12 @@ def between_time(self, start_time, end_time, include_start=True,
40474047

40484048
def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
40494049
label=None, convention='start', kind=None, loffset=None,
4050-
limit=None, base=0):
4050+
limit=None, base=0, on=None, level=None):
40514051
"""
4052-
Convenience method for frequency conversion and resampling of regular
4053-
time-series data.
4052+
Convenience method for frequency conversion and resampling of time
4053+
series. Object must have a datetime-like index (DatetimeIndex,
4054+
PeriodIndex, or TimedeltaIndex), or pass datetime-like values
4055+
to the on or level keyword.
40544056
40554057
Parameters
40564058
----------
@@ -4068,7 +4070,17 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
40684070
For frequencies that evenly subdivide 1 day, the "origin" of the
40694071
aggregated intervals. For example, for '5min' frequency, base could
40704072
range from 0 through 4. Defaults to 0
4073+
on : string, optional
4074+
For a DataFrame, column to use instead of index for resampling.
4075+
Column must be datetime-like.
40714076
4077+
.. versionadded:: 0.19.0
4078+
4079+
level : string or int, optional
4080+
For a MultiIndex, level (name or number) to use for
4081+
resampling. Level must be datetime-like.
4082+
4083+
.. versionadded:: 0.19.0
40724084
40734085
To learn more about the offset strings, please see `this link
40744086
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
@@ -4173,12 +4185,11 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
41734185
"""
41744186
from pandas.tseries.resample import (resample,
41754187
_maybe_process_deprecations)
4176-
41774188
axis = self._get_axis_number(axis)
41784189
r = resample(self, freq=rule, label=label, closed=closed,
41794190
axis=axis, kind=kind, loffset=loffset,
41804191
convention=convention,
4181-
base=base)
4192+
base=base, key=on, level=level)
41824193
return _maybe_process_deprecations(r,
41834194
how=how,
41844195
fill_method=fill_method,

pandas/core/groupby.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,8 @@ def _set_grouper(self, obj, sort=False):
255255
Parameters
256256
----------
257257
obj : the subject object
258-
258+
sort : bool, default False
259+
whether the resulting grouper should be sorted
259260
"""
260261

261262
if self.key is not None and self.level is not None:

pandas/tseries/resample.py

+32-1
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,15 @@ def _typ(self):
112112
return 'series'
113113
return 'dataframe'
114114

115+
@property
116+
def _from_selection(self):
117+
""" is the resampling from a DataFrame column or MultiIndex level """
118+
# upsampling and PeriodIndex resampling do not work
119+
# with selection, this state used to catch and raise an error
120+
return (self.groupby is not None and
121+
(self.groupby.key is not None or
122+
self.groupby.level is not None))
123+
115124
def _deprecated(self, op):
116125
warnings.warn(("\n.resample() is now a deferred operation\n"
117126
"You called {op}(...) on this deferred object "
@@ -207,6 +216,10 @@ def _convert_obj(self, obj):
207216
Parameters
208217
----------
209218
obj : the object to be resampled
219+
220+
Returns
221+
-------
222+
obj : converted object
210223
"""
211224
obj = obj.consolidate()
212225
return obj
@@ -706,6 +719,11 @@ def _upsample(self, method, limit=None):
706719
self._set_binner()
707720
if self.axis:
708721
raise AssertionError('axis must be 0')
722+
if self._from_selection:
723+
raise ValueError("Upsampling from level= or on= selection"
724+
" is not supported, use .set_index(...)"
725+
" to explicitly set index to"
726+
" datetime-like")
709727

710728
ax = self.ax
711729
obj = self._selected_obj
@@ -763,7 +781,15 @@ def _convert_obj(self, obj):
763781

764782
# convert to timestamp
765783
if not (self.kind is None or self.kind == 'period'):
766-
obj = obj.to_timestamp(how=self.convention)
784+
if self._from_selection:
785+
# see GH 14008, GH 12871
786+
msg = ("Resampling from level= or on= selection"
787+
" with a PeriodIndex is not currently supported,"
788+
" use .set_index(...) to explicitly set index")
789+
raise NotImplementedError(msg)
790+
else:
791+
obj = obj.to_timestamp(how=self.convention)
792+
767793
return obj
768794

769795
def aggregate(self, arg, *args, **kwargs):
@@ -841,6 +867,11 @@ def _upsample(self, method, limit=None):
841867
.fillna
842868
843869
"""
870+
if self._from_selection:
871+
raise ValueError("Upsampling from level= or on= selection"
872+
" is not supported, use .set_index(...)"
873+
" to explicitly set index to"
874+
" datetime-like")
844875
# we may need to actually resample as if we are timestamps
845876
if self.kind == 'timestamp':
846877
return super(PeriodIndexResampler, self)._upsample(method,

0 commit comments

Comments
 (0)