-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
API: Expanded resample #13961
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
API: Expanded resample #13961
Changes from 2 commits
def74de
c4db0e7
b55309a
7f9add4
5fd97d9
c7b299e
384026b
e203fcf
10c7280
b8dd114
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -377,6 +377,20 @@ Other enhancements | |
|
||
pd.Timestamp(year=2012, month=1, day=1, hour=8, minute=30) | ||
|
||
- the ``.resample()`` function now accepts a ``on=`` or ``level=`` parameter for resampling on a column or ``MultiIndex`` level (:issue:`13500`) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. again would say datetimelike as well. Further I think the doc-string of what we have now
|
||
|
||
.. ipython:: python | ||
|
||
df = pd.DataFrame({'date': pd.date_range('2015-01-01', freq='W', periods=5), | ||
'a': np.arange(5)}, | ||
index=pd.MultiIndex.from_arrays([ | ||
[1,2,3,4,5], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would add to the main docs a similar example |
||
pd.date_range('2015-01-01', freq='W', periods=5)], | ||
names=['v','d'])) | ||
df | ||
df.resample('M', on='date').sum() | ||
df.resample('M', level='d').sum() | ||
|
||
- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``decimal`` option (:issue:`12933`) | ||
- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``na_filter`` option (:issue:`13321`) | ||
- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``memory_map`` option (:issue:`13381`) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -450,20 +450,30 @@ def test_agg(self): | |
('r2', 'B', 'sum')]) | ||
|
||
def test_agg_misc(self): | ||
# test with both a Resampler and a TimeGrouper | ||
# test with all three Resampler apis and TimeGrouper | ||
|
||
np.random.seed(1234) | ||
df = pd.DataFrame(np.random.rand(10, 2), | ||
columns=list('AB'), | ||
index=pd.date_range('2010-01-01 09:00:00', | ||
periods=10, | ||
freq='s')) | ||
freq='s', | ||
name='date')) | ||
df_col = df.reset_index() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you might be able to move this to |
||
df_mult = df_col.copy() | ||
df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index], | ||
names=['index', 'date']) | ||
|
||
r = df.resample('2s') | ||
g = df.groupby(pd.Grouper(freq='2s')) | ||
cases = [ | ||
r, | ||
df_col.resample('2s', on='date'), | ||
df_mult.resample('2s', level='date'), | ||
df.groupby(pd.Grouper(freq='2s')) | ||
] | ||
|
||
# passed lambda | ||
for t in [r, g]: | ||
for t in cases: | ||
result = t.agg({'A': np.sum, | ||
'B': lambda x: np.std(x, ddof=1)}) | ||
rcustom = t['B'].apply(lambda x: np.std(x, ddof=1)) | ||
|
@@ -480,7 +490,7 @@ def test_agg_misc(self): | |
('result1', 'B'), | ||
('result2', 'A'), | ||
('result2', 'B')]) | ||
for t in [r, g]: | ||
for t in cases: | ||
result = t[['A', 'B']].agg(OrderedDict([('result1', np.sum), | ||
('result2', np.mean)])) | ||
assert_frame_equal(result, expected, check_like=True) | ||
|
@@ -495,19 +505,19 @@ def test_agg_misc(self): | |
('A', 'std'), | ||
('B', 'mean'), | ||
('B', 'std')]) | ||
for t in [r, g]: | ||
for t in cases: | ||
result = t.agg(OrderedDict([('A', ['sum', 'std']), | ||
('B', ['mean', 'std'])])) | ||
assert_frame_equal(result, expected, check_like=True) | ||
|
||
# equivalent of using a selection list / or not | ||
for t in [r, g]: | ||
result = g[['A', 'B']].agg({'A': ['sum', 'std'], | ||
for t in cases: | ||
result = t[['A', 'B']].agg({'A': ['sum', 'std'], | ||
'B': ['mean', 'std']}) | ||
assert_frame_equal(result, expected, check_like=True) | ||
|
||
# series like aggs | ||
for t in [r, g]: | ||
for t in cases: | ||
result = t['A'].agg({'A': ['sum', 'std']}) | ||
expected = pd.concat([t['A'].sum(), | ||
t['A'].std()], | ||
|
@@ -528,9 +538,9 @@ def test_agg_misc(self): | |
|
||
# errors | ||
# invalid names in the agg specification | ||
for t in [r, g]: | ||
for t in cases: | ||
def f(): | ||
r[['A']].agg({'A': ['sum', 'std'], | ||
t[['A']].agg({'A': ['sum', 'std'], | ||
'B': ['mean', 'std']}) | ||
|
||
self.assertRaises(SpecificationError, f) | ||
|
@@ -581,6 +591,43 @@ def test_agg_consistency(self): | |
result = r.agg({'r1': 'mean', 'r2': 'sum'}) | ||
assert_frame_equal(result, expected) | ||
|
||
def test_api_validation(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here |
||
# GH 13500 | ||
dates = pd.date_range('2015-01-01', freq='W', periods=10) | ||
df = pd.DataFrame({'date': dates, | ||
'a': np.arange(10, dtype='int64')}, | ||
index=pd.MultiIndex.from_arrays([ | ||
np.arange(10), | ||
dates], names=['v', 'd'])) | ||
|
||
exp_index = pd.date_range('2015-01-31', periods=3, | ||
freq='M', name='date') | ||
expected = pd.DataFrame({'a': [6, 22, 17]}, | ||
index=exp_index) | ||
|
||
actual = df.resample('M', on='date').sum() | ||
assert_frame_equal(actual, expected) | ||
|
||
expected.index.name = 'd' | ||
actual = df.resample('M', level='d').sum() | ||
assert_frame_equal(actual, expected) | ||
|
||
actual = df.resample('M', level=1).sum() | ||
assert_frame_equal(actual, expected) | ||
|
||
# non DatetimeIndex | ||
with tm.assertRaises(TypeError): | ||
df.resample('M', level='v') | ||
|
||
with tm.assertRaises(ValueError): | ||
df.resample('M', on='date', level='d') | ||
|
||
with tm.assertRaises(TypeError): | ||
df.resample('M', on=['a', 'date']) | ||
|
||
with tm.assertRaises(KeyError): | ||
df.resample('M', level=['a', 'date']) | ||
|
||
|
||
class Base(object): | ||
""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
make it clear that the
on
(currently) still must be a datetimelike (so we of course acceptPeriodIndex/TimedeltaIndex
here as well (add tests if we don't have them for those as well)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use
datetimelike
rather thanDatetimeIndex