-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
API: Expanded resample #13961
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
API: Expanded resample #13961
Changes from 1 commit
def74de
c4db0e7
b55309a
7f9add4
5fd97d9
c7b299e
384026b
e203fcf
10c7280
b8dd114
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -377,6 +377,20 @@ Other enhancements | |
|
||
pd.Timestamp(year=2012, month=1, day=1, hour=8, minute=30) | ||
|
||
- the ``.resample()`` function now accepts a ``on=`` or ``key=`` parameter for resampling on a column or ``MultiIndex`` level (:issue:`13500`) | ||
|
||
.. ipython:: python | ||
|
||
df = pd.DataFrame({'date': pd.date_range('2015-01-01', freq='W', periods=5), | ||
'a': np.arange(5)}, | ||
index=pd.MultiIndex.from_arrays([ | ||
[1,2,3,4,5], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would add to the main docs a similar example |
||
pd.date_range('2015-01-01', freq='W', periods=5)], | ||
names=['v','d'])) | ||
df | ||
df.resample('M', on='date').sum() | ||
df.resample('M', level='d').sum() | ||
|
||
- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``decimal`` option (:issue:`12933`) | ||
- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``na_filter`` option (:issue:`13321`) | ||
- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``memory_map`` option (:issue:`13381`) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4038,7 +4038,7 @@ def between_time(self, start_time, end_time, include_start=True, | |
|
||
def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, | ||
label=None, convention='start', kind=None, loffset=None, | ||
limit=None, base=0): | ||
limit=None, base=0, on=None, level=None): | ||
""" | ||
Convenience method for frequency conversion and resampling of regular | ||
time-series data. | ||
|
@@ -4059,7 +4059,12 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, | |
For frequencies that evenly subdivide 1 day, the "origin" of the | ||
aggregated intervals. For example, for '5min' frequency, base could | ||
range from 0 through 4. Defaults to 0 | ||
|
||
on : string, optional | ||
For a DataFrame, column to use for resampling, rather than | ||
the index | ||
level : string or int, optional | ||
For a MultiIndex, level (name or number) to use for | ||
resampling | ||
|
||
To learn more about the offset strings, please see `this link | ||
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__. | ||
|
@@ -4164,12 +4169,16 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, | |
""" | ||
from pandas.tseries.resample import (resample, | ||
_maybe_process_deprecations) | ||
if is_list_like(on): | ||
raise ValueError("Only a single column may be passed to on") | ||
if is_list_like(level): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would move these inside resample There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually I think might be able to remove these entirely. When |
||
raise ValueError("Only a single column may be passed to level") | ||
|
||
axis = self._get_axis_number(axis) | ||
r = resample(self, freq=rule, label=label, closed=closed, | ||
axis=axis, kind=kind, loffset=loffset, | ||
convention=convention, | ||
base=base) | ||
base=base, key=on, level=level) | ||
return _maybe_process_deprecations(r, | ||
how=how, | ||
fill_method=fill_method, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -450,20 +450,30 @@ def test_agg(self): | |
('r2', 'B', 'sum')]) | ||
|
||
def test_agg_misc(self): | ||
# test with both a Resampler and a TimeGrouper | ||
# test with all three Resampler apis and TimeGrouper | ||
|
||
np.random.seed(1234) | ||
df = pd.DataFrame(np.random.rand(10, 2), | ||
columns=list('AB'), | ||
index=pd.date_range('2010-01-01 09:00:00', | ||
periods=10, | ||
freq='s')) | ||
freq='s', | ||
name='date')) | ||
df_col = df.reset_index() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you might be able to move this to |
||
df_mult = df_col.copy() | ||
df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index], | ||
names=['index', 'date']) | ||
|
||
r = df.resample('2s') | ||
g = df.groupby(pd.Grouper(freq='2s')) | ||
cases = [ | ||
r, | ||
df_col.resample('2s', on='date'), | ||
df_mult.resample('2s', level='date'), | ||
df.groupby(pd.Grouper(freq='2s')) | ||
] | ||
|
||
# passed lambda | ||
for t in [r, g]: | ||
for t in cases: | ||
result = t.agg({'A': np.sum, | ||
'B': lambda x: np.std(x, ddof=1)}) | ||
rcustom = t['B'].apply(lambda x: np.std(x, ddof=1)) | ||
|
@@ -480,7 +490,7 @@ def test_agg_misc(self): | |
('result1', 'B'), | ||
('result2', 'A'), | ||
('result2', 'B')]) | ||
for t in [r, g]: | ||
for t in cases: | ||
result = t[['A', 'B']].agg(OrderedDict([('result1', np.sum), | ||
('result2', np.mean)])) | ||
assert_frame_equal(result, expected, check_like=True) | ||
|
@@ -495,19 +505,19 @@ def test_agg_misc(self): | |
('A', 'std'), | ||
('B', 'mean'), | ||
('B', 'std')]) | ||
for t in [r, g]: | ||
for t in cases: | ||
result = t.agg(OrderedDict([('A', ['sum', 'std']), | ||
('B', ['mean', 'std'])])) | ||
assert_frame_equal(result, expected, check_like=True) | ||
|
||
# equivalent of using a selection list / or not | ||
for t in [r, g]: | ||
result = g[['A', 'B']].agg({'A': ['sum', 'std'], | ||
for t in cases: | ||
result = t[['A', 'B']].agg({'A': ['sum', 'std'], | ||
'B': ['mean', 'std']}) | ||
assert_frame_equal(result, expected, check_like=True) | ||
|
||
# series like aggs | ||
for t in [r, g]: | ||
for t in cases: | ||
result = t['A'].agg({'A': ['sum', 'std']}) | ||
expected = pd.concat([t['A'].sum(), | ||
t['A'].std()], | ||
|
@@ -528,9 +538,9 @@ def test_agg_misc(self): | |
|
||
# errors | ||
# invalid names in the agg specification | ||
for t in [r, g]: | ||
for t in cases: | ||
def f(): | ||
r[['A']].agg({'A': ['sum', 'std'], | ||
t[['A']].agg({'A': ['sum', 'std'], | ||
'B': ['mean', 'std']}) | ||
|
||
self.assertRaises(SpecificationError, f) | ||
|
@@ -581,6 +591,36 @@ def test_agg_consistency(self): | |
result = r.agg({'r1': 'mean', 'r2': 'sum'}) | ||
assert_frame_equal(result, expected) | ||
|
||
def test_api_validation(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here |
||
# GH 13500 | ||
dates = pd.date_range('2015-01-01', freq='W', periods=10) | ||
df = pd.DataFrame({'date': dates, | ||
'a': np.arange(10, dtype='int64')}, | ||
index=pd.MultiIndex.from_arrays([ | ||
np.arange(10), | ||
dates], names=['v', 'd'])) | ||
|
||
exp_index = pd.date_range('2015-01-31', periods=3, | ||
freq='M', name='date') | ||
expected = pd.DataFrame({'a': [6, 22, 17]}, | ||
index=exp_index) | ||
|
||
actual = df.resample('M', on='date').sum() | ||
assert_frame_equal(actual, expected) | ||
|
||
actual = df.resample('M', level='d').sum() | ||
expected.index.name = 'd' | ||
assert_frame_equal(actual, expected) | ||
|
||
with tm.assertRaises(ValueError): | ||
df.resample('M', on='date', level='d') | ||
|
||
with tm.assertRaises(ValueError): | ||
df.resample('M', on=['a', 'date']) | ||
|
||
with tm.assertRaises(ValueError): | ||
df.resample('M', level=['a', 'date']) | ||
|
||
|
||
class Base(object): | ||
""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
key -> level