Skip to content

Commit def74de

Browse files
committed
API: Expanded resample
1 parent 257ac88 commit def74de

File tree

3 files changed

+77
-14
lines changed

3 files changed

+77
-14
lines changed

doc/source/whatsnew/v0.19.0.txt

+14
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,20 @@ Other enhancements
377377

378378
pd.Timestamp(year=2012, month=1, day=1, hour=8, minute=30)
379379

380+
- the ``.resample()`` function now accepts a ``on=`` or ``key=`` parameter for resampling on a column or ``MultiIndex`` level (:issue:`13500`)
381+
382+
.. ipython:: python
383+
384+
df = pd.DataFrame({'date': pd.date_range('2015-01-01', freq='W', periods=5),
385+
'a': np.arange(5)},
386+
index=pd.MultiIndex.from_arrays([
387+
[1,2,3,4,5],
388+
pd.date_range('2015-01-01', freq='W', periods=5)],
389+
names=['v','d']))
390+
df
391+
df.resample('M', on='date').sum()
392+
df.resample('M', level='d').sum()
393+
380394
- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``decimal`` option (:issue:`12933`)
381395
- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``na_filter`` option (:issue:`13321`)
382396
- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``memory_map`` option (:issue:`13381`)

pandas/core/generic.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -4038,7 +4038,7 @@ def between_time(self, start_time, end_time, include_start=True,
40384038

40394039
def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
40404040
label=None, convention='start', kind=None, loffset=None,
4041-
limit=None, base=0):
4041+
limit=None, base=0, on=None, level=None):
40424042
"""
40434043
Convenience method for frequency conversion and resampling of regular
40444044
time-series data.
@@ -4059,7 +4059,12 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
40594059
For frequencies that evenly subdivide 1 day, the "origin" of the
40604060
aggregated intervals. For example, for '5min' frequency, base could
40614061
range from 0 through 4. Defaults to 0
4062-
4062+
on : string, optional
4063+
For a DataFrame, column to use for resampling, rather than
4064+
the index
4065+
level : string or int, optional
4066+
For a MultiIndex, level (name or number) to use for
4067+
resampling
40634068
40644069
To learn more about the offset strings, please see `this link
40654070
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
@@ -4164,12 +4169,16 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
41644169
"""
41654170
from pandas.tseries.resample import (resample,
41664171
_maybe_process_deprecations)
4172+
if is_list_like(on):
4173+
raise ValueError("Only a single column may be passed to on")
4174+
if is_list_like(level):
4175+
raise ValueError("Only a single column may be passed to level")
41674176

41684177
axis = self._get_axis_number(axis)
41694178
r = resample(self, freq=rule, label=label, closed=closed,
41704179
axis=axis, kind=kind, loffset=loffset,
41714180
convention=convention,
4172-
base=base)
4181+
base=base, key=on, level=level)
41734182
return _maybe_process_deprecations(r,
41744183
how=how,
41754184
fill_method=fill_method,

pandas/tseries/tests/test_resample.py

+51-11
Original file line numberDiff line numberDiff line change
@@ -450,20 +450,30 @@ def test_agg(self):
450450
('r2', 'B', 'sum')])
451451

452452
def test_agg_misc(self):
453-
# test with both a Resampler and a TimeGrouper
453+
# test with all three Resampler apis and TimeGrouper
454454

455455
np.random.seed(1234)
456456
df = pd.DataFrame(np.random.rand(10, 2),
457457
columns=list('AB'),
458458
index=pd.date_range('2010-01-01 09:00:00',
459459
periods=10,
460-
freq='s'))
460+
freq='s',
461+
name='date'))
462+
df_col = df.reset_index()
463+
df_mult = df_col.copy()
464+
df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index],
465+
names=['index', 'date'])
461466

462467
r = df.resample('2s')
463-
g = df.groupby(pd.Grouper(freq='2s'))
468+
cases = [
469+
r,
470+
df_col.resample('2s', on='date'),
471+
df_mult.resample('2s', level='date'),
472+
df.groupby(pd.Grouper(freq='2s'))
473+
]
464474

465475
# passed lambda
466-
for t in [r, g]:
476+
for t in cases:
467477
result = t.agg({'A': np.sum,
468478
'B': lambda x: np.std(x, ddof=1)})
469479
rcustom = t['B'].apply(lambda x: np.std(x, ddof=1))
@@ -480,7 +490,7 @@ def test_agg_misc(self):
480490
('result1', 'B'),
481491
('result2', 'A'),
482492
('result2', 'B')])
483-
for t in [r, g]:
493+
for t in cases:
484494
result = t[['A', 'B']].agg(OrderedDict([('result1', np.sum),
485495
('result2', np.mean)]))
486496
assert_frame_equal(result, expected, check_like=True)
@@ -495,19 +505,19 @@ def test_agg_misc(self):
495505
('A', 'std'),
496506
('B', 'mean'),
497507
('B', 'std')])
498-
for t in [r, g]:
508+
for t in cases:
499509
result = t.agg(OrderedDict([('A', ['sum', 'std']),
500510
('B', ['mean', 'std'])]))
501511
assert_frame_equal(result, expected, check_like=True)
502512

503513
# equivalent of using a selection list / or not
504-
for t in [r, g]:
505-
result = g[['A', 'B']].agg({'A': ['sum', 'std'],
514+
for t in cases:
515+
result = t[['A', 'B']].agg({'A': ['sum', 'std'],
506516
'B': ['mean', 'std']})
507517
assert_frame_equal(result, expected, check_like=True)
508518

509519
# series like aggs
510-
for t in [r, g]:
520+
for t in cases:
511521
result = t['A'].agg({'A': ['sum', 'std']})
512522
expected = pd.concat([t['A'].sum(),
513523
t['A'].std()],
@@ -528,9 +538,9 @@ def test_agg_misc(self):
528538

529539
# errors
530540
# invalid names in the agg specification
531-
for t in [r, g]:
541+
for t in cases:
532542
def f():
533-
r[['A']].agg({'A': ['sum', 'std'],
543+
t[['A']].agg({'A': ['sum', 'std'],
534544
'B': ['mean', 'std']})
535545

536546
self.assertRaises(SpecificationError, f)
@@ -581,6 +591,36 @@ def test_agg_consistency(self):
581591
result = r.agg({'r1': 'mean', 'r2': 'sum'})
582592
assert_frame_equal(result, expected)
583593

594+
def test_api_validation(self):
595+
# GH 13500
596+
dates = pd.date_range('2015-01-01', freq='W', periods=10)
597+
df = pd.DataFrame({'date': dates,
598+
'a': np.arange(10, dtype='int64')},
599+
index=pd.MultiIndex.from_arrays([
600+
np.arange(10),
601+
dates], names=['v', 'd']))
602+
603+
exp_index = pd.date_range('2015-01-31', periods=3,
604+
freq='M', name='date')
605+
expected = pd.DataFrame({'a': [6, 22, 17]},
606+
index=exp_index)
607+
608+
actual = df.resample('M', on='date').sum()
609+
assert_frame_equal(actual, expected)
610+
611+
actual = df.resample('M', level='d').sum()
612+
expected.index.name = 'd'
613+
assert_frame_equal(actual, expected)
614+
615+
with tm.assertRaises(ValueError):
616+
df.resample('M', on='date', level='d')
617+
618+
with tm.assertRaises(ValueError):
619+
df.resample('M', on=['a', 'date'])
620+
621+
with tm.assertRaises(ValueError):
622+
df.resample('M', level=['a', 'date'])
623+
584624

585625
class Base(object):
586626
"""

0 commit comments

Comments
 (0)