From 5205a3609ce8ba3055183f5dab5e12968207faf3 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 23 Nov 2018 15:57:23 +0000 Subject: [PATCH 01/17] move file to subdirectory --- pandas/tests/{ => resample}/test_resample.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pandas/tests/{ => resample}/test_resample.py (100%) diff --git a/pandas/tests/test_resample.py b/pandas/tests/resample/test_resample.py similarity index 100% rename from pandas/tests/test_resample.py rename to pandas/tests/resample/test_resample.py From b3a7ff97b0138de8f81cb0d0bae9ee5b7ac134ee Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 23 Nov 2018 16:31:17 +0000 Subject: [PATCH 02/17] split off TestTimeGrouper class --- pandas/tests/resample/test_resample.py | 307 ------------------- pandas/tests/resample/test_time_grouper.py | 325 +++++++++++++++++++++ 2 files changed, 325 insertions(+), 307 deletions(-) create mode 100644 pandas/tests/resample/test_time_grouper.py diff --git a/pandas/tests/resample/test_resample.py b/pandas/tests/resample/test_resample.py index d38f2a237c31d..bd35cad1fbd75 100644 --- a/pandas/tests/resample/test_resample.py +++ b/pandas/tests/resample/test_resample.py @@ -4,7 +4,6 @@ from datetime import datetime, timedelta from functools import partial from textwrap import dedent -from operator import methodcaller import pytz import pytest @@ -3221,309 +3220,3 @@ def test_median_duplicate_columns(self): result = df.resample('5s').median() expected.columns = result.columns assert_frame_equal(result, expected) - - -class TestTimeGrouper(object): - - def setup_method(self, method): - self.ts = Series(np.random.randn(1000), - index=date_range('1/1/2000', periods=1000)) - - def test_apply(self): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - grouper = pd.TimeGrouper(freq='A', label='right', closed='right') - - grouped = self.ts.groupby(grouper) - - f = lambda x: x.sort_values()[-3:] - - applied = grouped.apply(f) - expected = self.ts.groupby(lambda x: x.year).apply(f) - - applied.index = applied.index.droplevel(0) - expected.index = expected.index.droplevel(0) - assert_series_equal(applied, expected) - - def test_count(self): - self.ts[::3] = np.nan - - expected = self.ts.groupby(lambda x: x.year).count() - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - grouper = pd.TimeGrouper(freq='A', label='right', closed='right') - result = self.ts.groupby(grouper).count() - expected.index = result.index - assert_series_equal(result, expected) - - result = self.ts.resample('A').count() - expected.index = result.index - assert_series_equal(result, expected) - - def test_numpy_reduction(self): - result = self.ts.resample('A', closed='right').prod() - - expected = self.ts.groupby(lambda x: x.year).agg(np.prod) - expected.index = result.index - - assert_series_equal(result, expected) - - def test_apply_iteration(self): - # #2300 - N = 1000 - ind = pd.date_range(start="2000-01-01", freq="D", periods=N) - df = DataFrame({'open': 1, 'close': 2}, index=ind) - tg = TimeGrouper('M') - - _, grouper, _ = tg._get_grouper(df) - - # Errors - grouped = df.groupby(grouper, group_keys=False) - f = lambda df: df['close'] / df['open'] - - # it works! - result = grouped.apply(f) - tm.assert_index_equal(result.index, df.index) - - @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") - def test_panel_aggregation(self): - ind = pd.date_range('1/1/2000', periods=100) - data = np.random.randn(2, len(ind), 4) - - wp = Panel(data, items=['Item1', 'Item2'], major_axis=ind, - minor_axis=['A', 'B', 'C', 'D']) - - tg = TimeGrouper('M', axis=1) - _, grouper, _ = tg._get_grouper(wp) - bingrouped = wp.groupby(grouper) - binagg = bingrouped.mean() - - def f(x): - assert (isinstance(x, Panel)) - return x.mean(1) - - result = bingrouped.agg(f) - tm.assert_panel_equal(result, binagg) - - def test_fails_on_no_datetime_index(self): - index_names = ('Int64Index', 'Index', 'Float64Index', 'MultiIndex') - index_funcs = (tm.makeIntIndex, - tm.makeUnicodeIndex, tm.makeFloatIndex, - lambda m: tm.makeCustomIndex(m, 2)) - n = 2 - for name, func in zip(index_names, index_funcs): - index = func(n) - df = DataFrame({'a': np.random.randn(n)}, index=index) - - msg = ("Only valid with DatetimeIndex, TimedeltaIndex " - "or PeriodIndex, but got an instance of %r" % name) - with pytest.raises(TypeError, match=msg): - df.groupby(TimeGrouper('D')) - - def test_aaa_group_order(self): - # GH 12840 - # check TimeGrouper perform stable sorts - n = 20 - data = np.random.randn(n, 4) - df = DataFrame(data, columns=['A', 'B', 'C', 'D']) - df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), - datetime(2013, 1, 3), datetime(2013, 1, 4), - datetime(2013, 1, 5)] * 4 - grouped = df.groupby(TimeGrouper(key='key', freq='D')) - - tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)), - df[::5]) - tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)), - df[1::5]) - tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)), - df[2::5]) - tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)), - df[3::5]) - tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)), - df[4::5]) - - def test_aggregate_normal(self): - # check TimeGrouper's aggregation is identical as normal groupby - - n = 20 - data = np.random.randn(n, 4) - normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) - normal_df['key'] = [1, 2, 3, 4, 5] * 4 - - dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) - dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), - datetime(2013, 1, 3), datetime(2013, 1, 4), - datetime(2013, 1, 5)] * 4 - - normal_grouped = normal_df.groupby('key') - dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) - - for func in ['min', 'max', 'prod', 'var', 'std', 'mean']: - expected = getattr(normal_grouped, func)() - dt_result = getattr(dt_grouped, func)() - expected.index = date_range(start='2013-01-01', freq='D', - periods=5, name='key') - assert_frame_equal(expected, dt_result) - - for func in ['count', 'sum']: - expected = getattr(normal_grouped, func)() - expected.index = date_range(start='2013-01-01', freq='D', - periods=5, name='key') - dt_result = getattr(dt_grouped, func)() - assert_frame_equal(expected, dt_result) - - # GH 7453 - for func in ['size']: - expected = getattr(normal_grouped, func)() - expected.index = date_range(start='2013-01-01', freq='D', - periods=5, name='key') - dt_result = getattr(dt_grouped, func)() - assert_series_equal(expected, dt_result) - - # GH 7453 - for func in ['first', 'last']: - expected = getattr(normal_grouped, func)() - expected.index = date_range(start='2013-01-01', freq='D', - periods=5, name='key') - dt_result = getattr(dt_grouped, func)() - assert_frame_equal(expected, dt_result) - - # if TimeGrouper is used included, 'nth' doesn't work yet - - """ - for func in ['nth']: - expected = getattr(normal_grouped, func)(3) - expected.index = date_range(start='2013-01-01', - freq='D', periods=5, name='key') - dt_result = getattr(dt_grouped, func)(3) - assert_frame_equal(expected, dt_result) - """ - - @pytest.mark.parametrize('method, unit', [ - ('sum', 0), - ('prod', 1), - ]) - def test_resample_entirly_nat_window(self, method, unit): - s = pd.Series([0] * 2 + [np.nan] * 2, - index=pd.date_range('2017', periods=4)) - # 0 / 1 by default - result = methodcaller(method)(s.resample("2d")) - expected = pd.Series([0.0, unit], - index=pd.to_datetime(['2017-01-01', - '2017-01-03'])) - tm.assert_series_equal(result, expected) - - # min_count=0 - result = methodcaller(method, min_count=0)(s.resample("2d")) - expected = pd.Series([0.0, unit], - index=pd.to_datetime(['2017-01-01', - '2017-01-03'])) - tm.assert_series_equal(result, expected) - - # min_count=1 - result = methodcaller(method, min_count=1)(s.resample("2d")) - expected = pd.Series([0.0, np.nan], - index=pd.to_datetime(['2017-01-01', - '2017-01-03'])) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize('func, fill_value', [ - ('min', np.nan), - ('max', np.nan), - ('sum', 0), - ('prod', 1), - ('count', 0), - ]) - def test_aggregate_with_nat(self, func, fill_value): - # check TimeGrouper's aggregation is identical as normal groupby - # if NaT is included, 'var', 'std', 'mean', 'first','last' - # and 'nth' doesn't work yet - - n = 20 - data = np.random.randn(n, 4).astype('int64') - normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) - normal_df['key'] = [1, 2, np.nan, 4, 5] * 4 - - dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) - dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT, - datetime(2013, 1, 4), datetime(2013, 1, 5)] * 4 - - normal_grouped = normal_df.groupby('key') - dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) - - normal_result = getattr(normal_grouped, func)() - dt_result = getattr(dt_grouped, func)() - - pad = DataFrame([[fill_value] * 4], index=[3], - columns=['A', 'B', 'C', 'D']) - expected = normal_result.append(pad) - expected = expected.sort_index() - expected.index = date_range(start='2013-01-01', freq='D', - periods=5, name='key') - assert_frame_equal(expected, dt_result) - assert dt_result.index.name == 'key' - - def test_aggregate_with_nat_size(self): - # GH 9925 - n = 20 - data = np.random.randn(n, 4).astype('int64') - normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) - normal_df['key'] = [1, 2, np.nan, 4, 5] * 4 - - dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) - dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT, - datetime(2013, 1, 4), datetime(2013, 1, 5)] * 4 - - normal_grouped = normal_df.groupby('key') - dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) - - normal_result = normal_grouped.size() - dt_result = dt_grouped.size() - - pad = Series([0], index=[3]) - expected = normal_result.append(pad) - expected = expected.sort_index() - expected.index = date_range(start='2013-01-01', freq='D', - periods=5, name='key') - assert_series_equal(expected, dt_result) - assert dt_result.index.name == 'key' - - def test_repr(self): - # GH18203 - result = repr(TimeGrouper(key='A', freq='H')) - expected = ("TimeGrouper(key='A', freq=, axis=0, sort=True, " - "closed='left', label='left', how='mean', " - "convention='e', base=0)") - assert result == expected - - @pytest.mark.parametrize('method, unit', [ - ('sum', 0), - ('prod', 1), - ]) - def test_upsample_sum(self, method, unit): - s = pd.Series(1, index=pd.date_range("2017", periods=2, freq="H")) - resampled = s.resample("30T") - index = pd.to_datetime(['2017-01-01T00:00:00', - '2017-01-01T00:30:00', - '2017-01-01T01:00:00']) - - # 0 / 1 by default - result = methodcaller(method)(resampled) - expected = pd.Series([1, unit, 1], index=index) - tm.assert_series_equal(result, expected) - - # min_count=0 - result = methodcaller(method, min_count=0)(resampled) - expected = pd.Series([1, unit, 1], index=index) - tm.assert_series_equal(result, expected) - - # min_count=1 - result = methodcaller(method, min_count=1)(resampled) - expected = pd.Series([1, np.nan, 1], index=index) - tm.assert_series_equal(result, expected) - - # min_count>1 - result = methodcaller(method, min_count=2)(resampled) - expected = pd.Series([np.nan, np.nan, np.nan], index=index) - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py new file mode 100644 index 0000000000000..d39d3294d48b9 --- /dev/null +++ b/pandas/tests/resample/test_time_grouper.py @@ -0,0 +1,325 @@ +# pylint: disable=E1101 + +from datetime import datetime +from operator import methodcaller + +import pytest +import numpy as np + +import pandas.util.testing as tm +from pandas.util.testing import assert_series_equal, assert_frame_equal + +import pandas as pd + +from pandas import Series, DataFrame, Panel + +from pandas.compat import zip + +from pandas.core.indexes.datetimes import date_range +from pandas.core.resample import TimeGrouper + + +class TestTimeGrouper(object): + + def setup_method(self, method): + self.ts = Series(np.random.randn(1000), + index=date_range('1/1/2000', periods=1000)) + + def test_apply(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + grouper = pd.TimeGrouper(freq='A', label='right', closed='right') + + grouped = self.ts.groupby(grouper) + + f = lambda x: x.sort_values()[-3:] + + applied = grouped.apply(f) + expected = self.ts.groupby(lambda x: x.year).apply(f) + + applied.index = applied.index.droplevel(0) + expected.index = expected.index.droplevel(0) + assert_series_equal(applied, expected) + + def test_count(self): + self.ts[::3] = np.nan + + expected = self.ts.groupby(lambda x: x.year).count() + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + grouper = pd.TimeGrouper(freq='A', label='right', closed='right') + result = self.ts.groupby(grouper).count() + expected.index = result.index + assert_series_equal(result, expected) + + result = self.ts.resample('A').count() + expected.index = result.index + assert_series_equal(result, expected) + + def test_numpy_reduction(self): + result = self.ts.resample('A', closed='right').prod() + + expected = self.ts.groupby(lambda x: x.year).agg(np.prod) + expected.index = result.index + + assert_series_equal(result, expected) + + def test_apply_iteration(self): + # #2300 + N = 1000 + ind = pd.date_range(start="2000-01-01", freq="D", periods=N) + df = DataFrame({'open': 1, 'close': 2}, index=ind) + tg = TimeGrouper('M') + + _, grouper, _ = tg._get_grouper(df) + + # Errors + grouped = df.groupby(grouper, group_keys=False) + f = lambda df: df['close'] / df['open'] + + # it works! + result = grouped.apply(f) + tm.assert_index_equal(result.index, df.index) + + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") + def test_panel_aggregation(self): + ind = pd.date_range('1/1/2000', periods=100) + data = np.random.randn(2, len(ind), 4) + + wp = Panel(data, items=['Item1', 'Item2'], major_axis=ind, + minor_axis=['A', 'B', 'C', 'D']) + + tg = TimeGrouper('M', axis=1) + _, grouper, _ = tg._get_grouper(wp) + bingrouped = wp.groupby(grouper) + binagg = bingrouped.mean() + + def f(x): + assert (isinstance(x, Panel)) + return x.mean(1) + + result = bingrouped.agg(f) + tm.assert_panel_equal(result, binagg) + + def test_fails_on_no_datetime_index(self): + index_names = ('Int64Index', 'Index', 'Float64Index', 'MultiIndex') + index_funcs = (tm.makeIntIndex, + tm.makeUnicodeIndex, tm.makeFloatIndex, + lambda m: tm.makeCustomIndex(m, 2)) + n = 2 + for name, func in zip(index_names, index_funcs): + index = func(n) + df = DataFrame({'a': np.random.randn(n)}, index=index) + + msg = ("Only valid with DatetimeIndex, TimedeltaIndex " + "or PeriodIndex, but got an instance of %r" % name) + with pytest.raises(TypeError, match=msg): + df.groupby(TimeGrouper('D')) + + def test_aaa_group_order(self): + # GH 12840 + # check TimeGrouper perform stable sorts + n = 20 + data = np.random.randn(n, 4) + df = DataFrame(data, columns=['A', 'B', 'C', 'D']) + df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), + datetime(2013, 1, 3), datetime(2013, 1, 4), + datetime(2013, 1, 5)] * 4 + grouped = df.groupby(TimeGrouper(key='key', freq='D')) + + tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)), + df[::5]) + tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)), + df[1::5]) + tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)), + df[2::5]) + tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)), + df[3::5]) + tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)), + df[4::5]) + + def test_aggregate_normal(self): + # check TimeGrouper's aggregation is identical as normal groupby + + n = 20 + data = np.random.randn(n, 4) + normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) + normal_df['key'] = [1, 2, 3, 4, 5] * 4 + + dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) + dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), + datetime(2013, 1, 3), datetime(2013, 1, 4), + datetime(2013, 1, 5)] * 4 + + normal_grouped = normal_df.groupby('key') + dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) + + for func in ['min', 'max', 'prod', 'var', 'std', 'mean']: + expected = getattr(normal_grouped, func)() + dt_result = getattr(dt_grouped, func)() + expected.index = date_range(start='2013-01-01', freq='D', + periods=5, name='key') + assert_frame_equal(expected, dt_result) + + for func in ['count', 'sum']: + expected = getattr(normal_grouped, func)() + expected.index = date_range(start='2013-01-01', freq='D', + periods=5, name='key') + dt_result = getattr(dt_grouped, func)() + assert_frame_equal(expected, dt_result) + + # GH 7453 + for func in ['size']: + expected = getattr(normal_grouped, func)() + expected.index = date_range(start='2013-01-01', freq='D', + periods=5, name='key') + dt_result = getattr(dt_grouped, func)() + assert_series_equal(expected, dt_result) + + # GH 7453 + for func in ['first', 'last']: + expected = getattr(normal_grouped, func)() + expected.index = date_range(start='2013-01-01', freq='D', + periods=5, name='key') + dt_result = getattr(dt_grouped, func)() + assert_frame_equal(expected, dt_result) + + # if TimeGrouper is used included, 'nth' doesn't work yet + + """ + for func in ['nth']: + expected = getattr(normal_grouped, func)(3) + expected.index = date_range(start='2013-01-01', + freq='D', periods=5, name='key') + dt_result = getattr(dt_grouped, func)(3) + assert_frame_equal(expected, dt_result) + """ + + @pytest.mark.parametrize('method, unit', [ + ('sum', 0), + ('prod', 1), + ]) + def test_resample_entirly_nat_window(self, method, unit): + s = pd.Series([0] * 2 + [np.nan] * 2, + index=pd.date_range('2017', periods=4)) + # 0 / 1 by default + result = methodcaller(method)(s.resample("2d")) + expected = pd.Series([0.0, unit], + index=pd.to_datetime(['2017-01-01', + '2017-01-03'])) + tm.assert_series_equal(result, expected) + + # min_count=0 + result = methodcaller(method, min_count=0)(s.resample("2d")) + expected = pd.Series([0.0, unit], + index=pd.to_datetime(['2017-01-01', + '2017-01-03'])) + tm.assert_series_equal(result, expected) + + # min_count=1 + result = methodcaller(method, min_count=1)(s.resample("2d")) + expected = pd.Series([0.0, np.nan], + index=pd.to_datetime(['2017-01-01', + '2017-01-03'])) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('func, fill_value', [ + ('min', np.nan), + ('max', np.nan), + ('sum', 0), + ('prod', 1), + ('count', 0), + ]) + def test_aggregate_with_nat(self, func, fill_value): + # check TimeGrouper's aggregation is identical as normal groupby + # if NaT is included, 'var', 'std', 'mean', 'first','last' + # and 'nth' doesn't work yet + + n = 20 + data = np.random.randn(n, 4).astype('int64') + normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) + normal_df['key'] = [1, 2, np.nan, 4, 5] * 4 + + dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) + dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT, + datetime(2013, 1, 4), datetime(2013, 1, 5)] * 4 + + normal_grouped = normal_df.groupby('key') + dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) + + normal_result = getattr(normal_grouped, func)() + dt_result = getattr(dt_grouped, func)() + + pad = DataFrame([[fill_value] * 4], index=[3], + columns=['A', 'B', 'C', 'D']) + expected = normal_result.append(pad) + expected = expected.sort_index() + expected.index = date_range(start='2013-01-01', freq='D', + periods=5, name='key') + assert_frame_equal(expected, dt_result) + assert dt_result.index.name == 'key' + + def test_aggregate_with_nat_size(self): + # GH 9925 + n = 20 + data = np.random.randn(n, 4).astype('int64') + normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) + normal_df['key'] = [1, 2, np.nan, 4, 5] * 4 + + dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) + dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT, + datetime(2013, 1, 4), datetime(2013, 1, 5)] * 4 + + normal_grouped = normal_df.groupby('key') + dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) + + normal_result = normal_grouped.size() + dt_result = dt_grouped.size() + + pad = Series([0], index=[3]) + expected = normal_result.append(pad) + expected = expected.sort_index() + expected.index = date_range(start='2013-01-01', freq='D', + periods=5, name='key') + assert_series_equal(expected, dt_result) + assert dt_result.index.name == 'key' + + def test_repr(self): + # GH18203 + result = repr(TimeGrouper(key='A', freq='H')) + expected = ("TimeGrouper(key='A', freq=, axis=0, sort=True, " + "closed='left', label='left', how='mean', " + "convention='e', base=0)") + assert result == expected + + @pytest.mark.parametrize('method, unit', [ + ('sum', 0), + ('prod', 1), + ]) + def test_upsample_sum(self, method, unit): + s = pd.Series(1, index=pd.date_range("2017", periods=2, freq="H")) + resampled = s.resample("30T") + index = pd.to_datetime(['2017-01-01T00:00:00', + '2017-01-01T00:30:00', + '2017-01-01T01:00:00']) + + # 0 / 1 by default + result = methodcaller(method)(resampled) + expected = pd.Series([1, unit, 1], index=index) + tm.assert_series_equal(result, expected) + + # min_count=0 + result = methodcaller(method, min_count=0)(resampled) + expected = pd.Series([1, unit, 1], index=index) + tm.assert_series_equal(result, expected) + + # min_count=1 + result = methodcaller(method, min_count=1)(resampled) + expected = pd.Series([1, np.nan, 1], index=index) + tm.assert_series_equal(result, expected) + + # min_count>1 + result = methodcaller(method, min_count=2)(resampled) + expected = pd.Series([np.nan, np.nan, np.nan], index=index) + tm.assert_series_equal(result, expected) From f6b8da647e7631334d03f8746516bcec420c5b84 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 23 Nov 2018 16:42:48 +0000 Subject: [PATCH 03/17] split off TestResamplerGrouper class --- pandas/tests/resample/test_resample.py | 237 ---------------- .../tests/resample/test_resampler_grouper.py | 252 ++++++++++++++++++ 2 files changed, 252 insertions(+), 237 deletions(-) create mode 100644 pandas/tests/resample/test_resampler_grouper.py diff --git a/pandas/tests/resample/test_resample.py b/pandas/tests/resample/test_resample.py index bd35cad1fbd75..55ccf9d61cf4b 100644 --- a/pandas/tests/resample/test_resample.py +++ b/pandas/tests/resample/test_resample.py @@ -3,7 +3,6 @@ from warnings import catch_warnings, simplefilter from datetime import datetime, timedelta from functools import partial -from textwrap import dedent import pytz import pytest @@ -2984,239 +2983,3 @@ def test_resample_as_freq_with_subperiod(self): index=timedelta_range('00:00:00', '00:10:00', freq='2T')) tm.assert_frame_equal(result, expected) - - -class TestResamplerGrouper(object): - - def setup_method(self, method): - self.frame = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8, - 'B': np.arange(40)}, - index=date_range('1/1/2000', - freq='s', - periods=40)) - - def test_tab_complete_ipython6_warning(self, ip): - from IPython.core.completer import provisionalcompleter - code = dedent("""\ - import pandas.util.testing as tm - s = tm.makeTimeSeries() - rs = s.resample("D") - """) - ip.run_code(code) - - with tm.assert_produces_warning(None): - with provisionalcompleter('ignore'): - list(ip.Completer.completions('rs.', 1)) - - def test_deferred_with_groupby(self): - - # GH 12486 - # support deferred resample ops with groupby - data = [['2010-01-01', 'A', 2], ['2010-01-02', 'A', 3], - ['2010-01-05', 'A', 8], ['2010-01-10', 'A', 7], - ['2010-01-13', 'A', 3], ['2010-01-01', 'B', 5], - ['2010-01-03', 'B', 2], ['2010-01-04', 'B', 1], - ['2010-01-11', 'B', 7], ['2010-01-14', 'B', 3]] - - df = DataFrame(data, columns=['date', 'id', 'score']) - df.date = pd.to_datetime(df.date) - f = lambda x: x.set_index('date').resample('D').asfreq() - expected = df.groupby('id').apply(f) - result = df.set_index('date').groupby('id').resample('D').asfreq() - assert_frame_equal(result, expected) - - df = DataFrame({'date': pd.date_range(start='2016-01-01', - periods=4, - freq='W'), - 'group': [1, 1, 2, 2], - 'val': [5, 6, 7, 8]}).set_index('date') - - f = lambda x: x.resample('1D').ffill() - expected = df.groupby('group').apply(f) - result = df.groupby('group').resample('1D').ffill() - assert_frame_equal(result, expected) - - def test_getitem(self): - g = self.frame.groupby('A') - - expected = g.B.apply(lambda x: x.resample('2s').mean()) - - result = g.resample('2s').B.mean() - assert_series_equal(result, expected) - - result = g.B.resample('2s').mean() - assert_series_equal(result, expected) - - result = g.resample('2s').mean().B - assert_series_equal(result, expected) - - def test_getitem_multiple(self): - - # GH 13174 - # multiple calls after selection causing an issue with aliasing - data = [{'id': 1, 'buyer': 'A'}, {'id': 2, 'buyer': 'B'}] - df = DataFrame(data, index=pd.date_range('2016-01-01', periods=2)) - r = df.groupby('id').resample('1D') - result = r['buyer'].count() - expected = Series([1, 1], - index=pd.MultiIndex.from_tuples( - [(1, Timestamp('2016-01-01')), - (2, Timestamp('2016-01-02'))], - names=['id', None]), - name='buyer') - assert_series_equal(result, expected) - - result = r['buyer'].count() - assert_series_equal(result, expected) - - def test_groupby_resample_on_api_with_getitem(self): - # GH 17813 - df = pd.DataFrame({'id': list('aabbb'), - 'date': pd.date_range('1-1-2016', periods=5), - 'data': 1}) - exp = df.set_index('date').groupby('id').resample('2D')['data'].sum() - result = df.groupby('id').resample('2D', on='date')['data'].sum() - assert_series_equal(result, exp) - - def test_nearest(self): - - # GH 17496 - # Resample nearest - index = pd.date_range('1/1/2000', periods=3, freq='T') - result = Series(range(3), index=index).resample('20s').nearest() - - expected = Series( - [0, 0, 1, 1, 1, 2, 2], - index=pd.DatetimeIndex( - ['2000-01-01 00:00:00', '2000-01-01 00:00:20', - '2000-01-01 00:00:40', '2000-01-01 00:01:00', - '2000-01-01 00:01:20', '2000-01-01 00:01:40', - '2000-01-01 00:02:00'], - dtype='datetime64[ns]', - freq='20S')) - assert_series_equal(result, expected) - - def test_methods(self): - g = self.frame.groupby('A') - r = g.resample('2s') - - for f in ['first', 'last', 'median', 'sem', 'sum', 'mean', - 'min', 'max']: - result = getattr(r, f)() - expected = g.apply(lambda x: getattr(x.resample('2s'), f)()) - assert_frame_equal(result, expected) - - for f in ['size']: - result = getattr(r, f)() - expected = g.apply(lambda x: getattr(x.resample('2s'), f)()) - assert_series_equal(result, expected) - - for f in ['count']: - result = getattr(r, f)() - expected = g.apply(lambda x: getattr(x.resample('2s'), f)()) - assert_frame_equal(result, expected) - - # series only - for f in ['nunique']: - result = getattr(r.B, f)() - expected = g.B.apply(lambda x: getattr(x.resample('2s'), f)()) - assert_series_equal(result, expected) - - for f in ['nearest', 'backfill', 'ffill', 'asfreq']: - result = getattr(r, f)() - expected = g.apply(lambda x: getattr(x.resample('2s'), f)()) - assert_frame_equal(result, expected) - - result = r.ohlc() - expected = g.apply(lambda x: x.resample('2s').ohlc()) - assert_frame_equal(result, expected) - - for f in ['std', 'var']: - result = getattr(r, f)(ddof=1) - expected = g.apply(lambda x: getattr(x.resample('2s'), f)(ddof=1)) - assert_frame_equal(result, expected) - - def test_apply(self): - - g = self.frame.groupby('A') - r = g.resample('2s') - - # reduction - expected = g.resample('2s').sum() - - def f(x): - return x.resample('2s').sum() - - result = r.apply(f) - assert_frame_equal(result, expected) - - def f(x): - return x.resample('2s').apply(lambda y: y.sum()) - - result = g.apply(f) - assert_frame_equal(result, expected) - - def test_apply_with_mutated_index(self): - # GH 15169 - index = pd.date_range('1-1-2015', '12-31-15', freq='D') - df = DataFrame(data={'col1': np.random.rand(len(index))}, index=index) - - def f(x): - s = Series([1, 2], index=['a', 'b']) - return s - - expected = df.groupby(pd.Grouper(freq='M')).apply(f) - - result = df.resample('M').apply(f) - assert_frame_equal(result, expected) - - # A case for series - expected = df['col1'].groupby(pd.Grouper(freq='M')).apply(f) - result = df['col1'].resample('M').apply(f) - assert_series_equal(result, expected) - - def test_resample_groupby_with_label(self): - # GH 13235 - index = date_range('2000-01-01', freq='2D', periods=5) - df = DataFrame(index=index, - data={'col0': [0, 0, 1, 1, 2], 'col1': [1, 1, 1, 1, 1]} - ) - result = df.groupby('col0').resample('1W', label='left').sum() - - mi = [np.array([0, 0, 1, 2]), - pd.to_datetime(np.array(['1999-12-26', '2000-01-02', - '2000-01-02', '2000-01-02']) - ) - ] - mindex = pd.MultiIndex.from_arrays(mi, names=['col0', None]) - expected = DataFrame(data={'col0': [0, 0, 2, 2], 'col1': [1, 1, 2, 1]}, - index=mindex - ) - - assert_frame_equal(result, expected) - - def test_consistency_with_window(self): - - # consistent return values with window - df = self.frame - expected = pd.Int64Index([1, 2, 3], name='A') - result = df.groupby('A').resample('2s').mean() - assert result.index.nlevels == 2 - tm.assert_index_equal(result.index.levels[0], expected) - - result = df.groupby('A').rolling(20).mean() - assert result.index.nlevels == 2 - tm.assert_index_equal(result.index.levels[0], expected) - - def test_median_duplicate_columns(self): - # GH 14233 - - df = DataFrame(np.random.randn(20, 3), - columns=list('aaa'), - index=pd.date_range('2012-01-01', periods=20, freq='s')) - df2 = df.copy() - df2.columns = ['a', 'b', 'c'] - expected = df2.resample('5s').median() - result = df.resample('5s').median() - expected.columns = result.columns - assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py new file mode 100644 index 0000000000000..03deaa16ab4e0 --- /dev/null +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -0,0 +1,252 @@ +# pylint: disable=E1101 + +from textwrap import dedent + +import numpy as np + +import pandas.util.testing as tm +from pandas.util.testing import assert_series_equal, assert_frame_equal + +import pandas as pd + +from pandas import Series, DataFrame, Timestamp + +from pandas.compat import range + +from pandas.core.indexes.datetimes import date_range + + +class TestResamplerGrouper(object): + + def setup_method(self, method): + self.frame = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8, + 'B': np.arange(40)}, + index=date_range('1/1/2000', + freq='s', + periods=40)) + + def test_tab_complete_ipython6_warning(self, ip): + from IPython.core.completer import provisionalcompleter + code = dedent("""\ + import pandas.util.testing as tm + s = tm.makeTimeSeries() + rs = s.resample("D") + """) + ip.run_code(code) + + with tm.assert_produces_warning(None): + with provisionalcompleter('ignore'): + list(ip.Completer.completions('rs.', 1)) + + def test_deferred_with_groupby(self): + + # GH 12486 + # support deferred resample ops with groupby + data = [['2010-01-01', 'A', 2], ['2010-01-02', 'A', 3], + ['2010-01-05', 'A', 8], ['2010-01-10', 'A', 7], + ['2010-01-13', 'A', 3], ['2010-01-01', 'B', 5], + ['2010-01-03', 'B', 2], ['2010-01-04', 'B', 1], + ['2010-01-11', 'B', 7], ['2010-01-14', 'B', 3]] + + df = DataFrame(data, columns=['date', 'id', 'score']) + df.date = pd.to_datetime(df.date) + f = lambda x: x.set_index('date').resample('D').asfreq() + expected = df.groupby('id').apply(f) + result = df.set_index('date').groupby('id').resample('D').asfreq() + assert_frame_equal(result, expected) + + df = DataFrame({'date': pd.date_range(start='2016-01-01', + periods=4, + freq='W'), + 'group': [1, 1, 2, 2], + 'val': [5, 6, 7, 8]}).set_index('date') + + f = lambda x: x.resample('1D').ffill() + expected = df.groupby('group').apply(f) + result = df.groupby('group').resample('1D').ffill() + assert_frame_equal(result, expected) + + def test_getitem(self): + g = self.frame.groupby('A') + + expected = g.B.apply(lambda x: x.resample('2s').mean()) + + result = g.resample('2s').B.mean() + assert_series_equal(result, expected) + + result = g.B.resample('2s').mean() + assert_series_equal(result, expected) + + result = g.resample('2s').mean().B + assert_series_equal(result, expected) + + def test_getitem_multiple(self): + + # GH 13174 + # multiple calls after selection causing an issue with aliasing + data = [{'id': 1, 'buyer': 'A'}, {'id': 2, 'buyer': 'B'}] + df = DataFrame(data, index=pd.date_range('2016-01-01', periods=2)) + r = df.groupby('id').resample('1D') + result = r['buyer'].count() + expected = Series([1, 1], + index=pd.MultiIndex.from_tuples( + [(1, Timestamp('2016-01-01')), + (2, Timestamp('2016-01-02'))], + names=['id', None]), + name='buyer') + assert_series_equal(result, expected) + + result = r['buyer'].count() + assert_series_equal(result, expected) + + def test_groupby_resample_on_api_with_getitem(self): + # GH 17813 + df = pd.DataFrame({'id': list('aabbb'), + 'date': pd.date_range('1-1-2016', periods=5), + 'data': 1}) + exp = df.set_index('date').groupby('id').resample('2D')['data'].sum() + result = df.groupby('id').resample('2D', on='date')['data'].sum() + assert_series_equal(result, exp) + + def test_nearest(self): + + # GH 17496 + # Resample nearest + index = pd.date_range('1/1/2000', periods=3, freq='T') + result = Series(range(3), index=index).resample('20s').nearest() + + expected = Series( + [0, 0, 1, 1, 1, 2, 2], + index=pd.DatetimeIndex( + ['2000-01-01 00:00:00', '2000-01-01 00:00:20', + '2000-01-01 00:00:40', '2000-01-01 00:01:00', + '2000-01-01 00:01:20', '2000-01-01 00:01:40', + '2000-01-01 00:02:00'], + dtype='datetime64[ns]', + freq='20S')) + assert_series_equal(result, expected) + + def test_methods(self): + g = self.frame.groupby('A') + r = g.resample('2s') + + for f in ['first', 'last', 'median', 'sem', 'sum', 'mean', + 'min', 'max']: + result = getattr(r, f)() + expected = g.apply(lambda x: getattr(x.resample('2s'), f)()) + assert_frame_equal(result, expected) + + for f in ['size']: + result = getattr(r, f)() + expected = g.apply(lambda x: getattr(x.resample('2s'), f)()) + assert_series_equal(result, expected) + + for f in ['count']: + result = getattr(r, f)() + expected = g.apply(lambda x: getattr(x.resample('2s'), f)()) + assert_frame_equal(result, expected) + + # series only + for f in ['nunique']: + result = getattr(r.B, f)() + expected = g.B.apply(lambda x: getattr(x.resample('2s'), f)()) + assert_series_equal(result, expected) + + for f in ['nearest', 'backfill', 'ffill', 'asfreq']: + result = getattr(r, f)() + expected = g.apply(lambda x: getattr(x.resample('2s'), f)()) + assert_frame_equal(result, expected) + + result = r.ohlc() + expected = g.apply(lambda x: x.resample('2s').ohlc()) + assert_frame_equal(result, expected) + + for f in ['std', 'var']: + result = getattr(r, f)(ddof=1) + expected = g.apply(lambda x: getattr(x.resample('2s'), f)(ddof=1)) + assert_frame_equal(result, expected) + + def test_apply(self): + + g = self.frame.groupby('A') + r = g.resample('2s') + + # reduction + expected = g.resample('2s').sum() + + def f(x): + return x.resample('2s').sum() + + result = r.apply(f) + assert_frame_equal(result, expected) + + def f(x): + return x.resample('2s').apply(lambda y: y.sum()) + + result = g.apply(f) + assert_frame_equal(result, expected) + + def test_apply_with_mutated_index(self): + # GH 15169 + index = pd.date_range('1-1-2015', '12-31-15', freq='D') + df = DataFrame(data={'col1': np.random.rand(len(index))}, index=index) + + def f(x): + s = Series([1, 2], index=['a', 'b']) + return s + + expected = df.groupby(pd.Grouper(freq='M')).apply(f) + + result = df.resample('M').apply(f) + assert_frame_equal(result, expected) + + # A case for series + expected = df['col1'].groupby(pd.Grouper(freq='M')).apply(f) + result = df['col1'].resample('M').apply(f) + assert_series_equal(result, expected) + + def test_resample_groupby_with_label(self): + # GH 13235 + index = date_range('2000-01-01', freq='2D', periods=5) + df = DataFrame(index=index, + data={'col0': [0, 0, 1, 1, 2], 'col1': [1, 1, 1, 1, 1]} + ) + result = df.groupby('col0').resample('1W', label='left').sum() + + mi = [np.array([0, 0, 1, 2]), + pd.to_datetime(np.array(['1999-12-26', '2000-01-02', + '2000-01-02', '2000-01-02']) + ) + ] + mindex = pd.MultiIndex.from_arrays(mi, names=['col0', None]) + expected = DataFrame(data={'col0': [0, 0, 2, 2], 'col1': [1, 1, 2, 1]}, + index=mindex + ) + + assert_frame_equal(result, expected) + + def test_consistency_with_window(self): + + # consistent return values with window + df = self.frame + expected = pd.Int64Index([1, 2, 3], name='A') + result = df.groupby('A').resample('2s').mean() + assert result.index.nlevels == 2 + tm.assert_index_equal(result.index.levels[0], expected) + + result = df.groupby('A').rolling(20).mean() + assert result.index.nlevels == 2 + tm.assert_index_equal(result.index.levels[0], expected) + + def test_median_duplicate_columns(self): + # GH 14233 + + df = DataFrame(np.random.randn(20, 3), + columns=list('aaa'), + index=pd.date_range('2012-01-01', periods=20, freq='s')) + df2 = df.copy() + df2.columns = ['a', 'b', 'c'] + expected = df2.resample('5s').median() + result = df.resample('5s').median() + expected.columns = result.columns + assert_frame_equal(result, expected) From da3f61761caefb080d80de7ef465f4909e1190e4 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 23 Nov 2018 17:03:59 +0000 Subject: [PATCH 04/17] isort imports --- pandas/tests/resample/test_resample.py | 37 +++++++++---------- .../tests/resample/test_resampler_grouper.py | 11 ++---- pandas/tests/resample/test_time_grouper.py | 13 +++---- 3 files changed, 26 insertions(+), 35 deletions(-) diff --git a/pandas/tests/resample/test_resample.py b/pandas/tests/resample/test_resample.py index 55ccf9d61cf4b..60df5eddb4f35 100644 --- a/pandas/tests/resample/test_resample.py +++ b/pandas/tests/resample/test_resample.py @@ -1,37 +1,34 @@ # pylint: disable=E1101 -from warnings import catch_warnings, simplefilter from datetime import datetime, timedelta from functools import partial +from warnings import catch_warnings, simplefilter -import pytz -import pytest import dateutil import numpy as np +import pytest +import pytz -from pandas._libs.tslibs.period import IncompatibleFrequency from pandas._libs.tslibs.ccalendar import DAYS, MONTHS - -import pandas.util.testing as tm -from pandas.util.testing import (assert_series_equal, assert_almost_equal, - assert_frame_equal, assert_index_equal) - -import pandas as pd - -from pandas import (Series, DataFrame, Panel, Index, isna, - notna, Timestamp, Timedelta) - -from pandas.compat import range, lrange, zip, OrderedDict +from pandas._libs.tslibs.period import IncompatibleFrequency +from pandas.compat import OrderedDict, lrange, range, zip from pandas.errors import AbstractMethodError, UnsupportedFunctionCall -import pandas.tseries.offsets as offsets -from pandas.tseries.offsets import Minute, BDay +import pandas as pd +from pandas import ( + DataFrame, Index, Panel, Series, Timedelta, Timestamp, isna, notna) from pandas.core.groupby.groupby import DataError - from pandas.core.indexes.datetimes import date_range -from pandas.core.indexes.period import period_range, PeriodIndex, Period +from pandas.core.indexes.period import Period, PeriodIndex, period_range +from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range from pandas.core.resample import DatetimeIndex, TimeGrouper -from pandas.core.indexes.timedeltas import timedelta_range, TimedeltaIndex +import pandas.util.testing as tm +from pandas.util.testing import ( + assert_almost_equal, assert_frame_equal, assert_index_equal, + assert_series_equal) + +import pandas.tseries.offsets as offsets +from pandas.tseries.offsets import BDay, Minute bday = BDay() diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 03deaa16ab4e0..395e05aa280b4 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -4,16 +4,13 @@ import numpy as np -import pandas.util.testing as tm -from pandas.util.testing import assert_series_equal, assert_frame_equal - -import pandas as pd - -from pandas import Series, DataFrame, Timestamp - from pandas.compat import range +import pandas as pd +from pandas import DataFrame, Series, Timestamp from pandas.core.indexes.datetimes import date_range +import pandas.util.testing as tm +from pandas.util.testing import assert_frame_equal, assert_series_equal class TestResamplerGrouper(object): diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index d39d3294d48b9..0c507b58d6ec5 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -3,20 +3,17 @@ from datetime import datetime from operator import methodcaller -import pytest import numpy as np - -import pandas.util.testing as tm -from pandas.util.testing import assert_series_equal, assert_frame_equal - -import pandas as pd - -from pandas import Series, DataFrame, Panel +import pytest from pandas.compat import zip +import pandas as pd +from pandas import DataFrame, Panel, Series from pandas.core.indexes.datetimes import date_range from pandas.core.resample import TimeGrouper +import pandas.util.testing as tm +from pandas.util.testing import assert_frame_equal, assert_series_equal class TestTimeGrouper(object): From d356fd74f62d2e3fbe73e99df7a67e83fbe9239a Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 23 Nov 2018 17:15:37 +0000 Subject: [PATCH 05/17] split off TestTimedeltaIndex class --- pandas/tests/resample/test_resample.py | 54 --------------- pandas/tests/resample/test_timedelta_index.py | 65 +++++++++++++++++++ 2 files changed, 65 insertions(+), 54 deletions(-) create mode 100644 pandas/tests/resample/test_timedelta_index.py diff --git a/pandas/tests/resample/test_resample.py b/pandas/tests/resample/test_resample.py index 60df5eddb4f35..1077bf2f6590c 100644 --- a/pandas/tests/resample/test_resample.py +++ b/pandas/tests/resample/test_resample.py @@ -2926,57 +2926,3 @@ def test_resample_with_only_nat(self): expected = DataFrame([], index=expected_index) result = frame.resample('1s').mean() assert_frame_equal(result, expected) - - -class TestTimedeltaIndex(Base): - _index_factory = lambda x: timedelta_range - - @pytest.fixture - def _index_start(self): - return '1 day' - - @pytest.fixture - def _index_end(self): - return '10 day' - - @pytest.fixture - def _series_name(self): - return 'tdi' - - def create_series(self): - i = timedelta_range('1 day', - '10 day', freq='D') - - return Series(np.arange(len(i)), index=i, name='tdi') - - def test_asfreq_bug(self): - import datetime as dt - df = DataFrame(data=[1, 3], - index=[dt.timedelta(), dt.timedelta(minutes=3)]) - result = df.resample('1T').asfreq() - expected = DataFrame(data=[1, np.nan, np.nan, 3], - index=timedelta_range('0 day', - periods=4, - freq='1T')) - assert_frame_equal(result, expected) - - def test_resample_with_nat(self): - # GH 13223 - index = pd.to_timedelta(['0s', pd.NaT, '2s']) - result = DataFrame({'value': [2, 3, 5]}, index).resample('1s').mean() - expected = DataFrame({'value': [2.5, np.nan, 5.0]}, - index=timedelta_range('0 day', - periods=3, - freq='1S')) - assert_frame_equal(result, expected) - - def test_resample_as_freq_with_subperiod(self): - # GH 13022 - index = timedelta_range('00:00:00', '00:10:00', freq='5T') - df = DataFrame(data={'value': [1, 5, 10]}, index=index) - result = df.resample('2T').asfreq() - expected_data = {'value': [1, np.nan, np.nan, np.nan, np.nan, 10]} - expected = DataFrame(data=expected_data, - index=timedelta_range('00:00:00', - '00:10:00', freq='2T')) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_timedelta_index.py b/pandas/tests/resample/test_timedelta_index.py new file mode 100644 index 0000000000000..6e9eb562ab31c --- /dev/null +++ b/pandas/tests/resample/test_timedelta_index.py @@ -0,0 +1,65 @@ +# pylint: disable=E1101 + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series +from pandas.core.indexes.timedeltas import timedelta_range +import pandas.util.testing as tm +from pandas.util.testing import assert_frame_equal +from test_resample import Base + + +class TestTimedeltaIndex(Base): + _index_factory = lambda x: timedelta_range + + @pytest.fixture + def _index_start(self): + return '1 day' + + @pytest.fixture + def _index_end(self): + return '10 day' + + @pytest.fixture + def _series_name(self): + return 'tdi' + + def create_series(self): + i = timedelta_range('1 day', + '10 day', freq='D') + + return Series(np.arange(len(i)), index=i, name='tdi') + + def test_asfreq_bug(self): + import datetime as dt + df = DataFrame(data=[1, 3], + index=[dt.timedelta(), dt.timedelta(minutes=3)]) + result = df.resample('1T').asfreq() + expected = DataFrame(data=[1, np.nan, np.nan, 3], + index=timedelta_range('0 day', + periods=4, + freq='1T')) + assert_frame_equal(result, expected) + + def test_resample_with_nat(self): + # GH 13223 + index = pd.to_timedelta(['0s', pd.NaT, '2s']) + result = DataFrame({'value': [2, 3, 5]}, index).resample('1s').mean() + expected = DataFrame({'value': [2.5, np.nan, 5.0]}, + index=timedelta_range('0 day', + periods=3, + freq='1S')) + assert_frame_equal(result, expected) + + def test_resample_as_freq_with_subperiod(self): + # GH 13022 + index = timedelta_range('00:00:00', '00:10:00', freq='5T') + df = DataFrame(data={'value': [1, 5, 10]}, index=index) + result = df.resample('2T').asfreq() + expected_data = {'value': [1, np.nan, np.nan, np.nan, np.nan, 10]} + expected = DataFrame(data=expected_data, + index=timedelta_range('00:00:00', + '00:10:00', freq='2T')) + tm.assert_frame_equal(result, expected) From d51b65478ca07b88ab1a0565f406ca674d18972f Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 23 Nov 2018 17:27:48 +0000 Subject: [PATCH 06/17] split off TestPeriodIndex class --- pandas/tests/resample/test_period_index.py | 739 +++++++++++++++++++++ pandas/tests/resample/test_resample.py | 720 +------------------- 2 files changed, 740 insertions(+), 719 deletions(-) create mode 100644 pandas/tests/resample/test_period_index.py diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py new file mode 100644 index 0000000000000..2b9eea3ac2228 --- /dev/null +++ b/pandas/tests/resample/test_period_index.py @@ -0,0 +1,739 @@ +# pylint: disable=E1101 + +from datetime import datetime, timedelta + +import dateutil +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs.ccalendar import DAYS, MONTHS +from pandas._libs.tslibs.period import IncompatibleFrequency +from pandas.compat import lrange, range, zip + +import pandas as pd +from pandas import DataFrame, Series, Timestamp +from pandas.core.indexes.datetimes import date_range +from pandas.core.indexes.period import Period, PeriodIndex, period_range +from pandas.core.resample import DatetimeIndex +import pandas.util.testing as tm +from pandas.util.testing import ( + assert_almost_equal, assert_frame_equal, assert_series_equal) +from test_resample import Base, _simple_pts, resample_methods + +import pandas.tseries.offsets as offsets + + +class TestPeriodIndex(Base): + _index_factory = lambda x: period_range + + @pytest.fixture + def _series_name(self): + return 'pi' + + def create_series(self): + # TODO: replace calls to .create_series() by injecting the series + # fixture + i = period_range(datetime(2005, 1, 1), + datetime(2005, 1, 10), freq='D') + + return Series(np.arange(len(i)), index=i, name='pi') + + @pytest.mark.parametrize('freq', ['2D', '1H', '2H']) + @pytest.mark.parametrize('kind', ['period', None, 'timestamp']) + def test_asfreq(self, series_and_frame, freq, kind): + # GH 12884, 15944 + # make sure .asfreq() returns PeriodIndex (except kind='timestamp') + + obj = series_and_frame + if kind == 'timestamp': + expected = obj.to_timestamp().resample(freq).asfreq() + else: + start = obj.index[0].to_timestamp(how='start') + end = (obj.index[-1] + obj.index.freq).to_timestamp(how='start') + new_index = date_range(start=start, end=end, freq=freq, + closed='left') + expected = obj.to_timestamp().reindex(new_index).to_period(freq) + result = obj.resample(freq, kind=kind).asfreq() + assert_almost_equal(result, expected) + + def test_asfreq_fill_value(self): + # test for fill value during resampling, issue 3715 + + s = self.create_series() + new_index = date_range(s.index[0].to_timestamp(how='start'), + (s.index[-1]).to_timestamp(how='start'), + freq='1H') + expected = s.to_timestamp().reindex(new_index, fill_value=4.0) + result = s.resample('1H', kind='timestamp').asfreq(fill_value=4.0) + assert_series_equal(result, expected) + + frame = s.to_frame('value') + new_index = date_range(frame.index[0].to_timestamp(how='start'), + (frame.index[-1]).to_timestamp(how='start'), + freq='1H') + expected = frame.to_timestamp().reindex(new_index, fill_value=3.0) + result = frame.resample('1H', kind='timestamp').asfreq(fill_value=3.0) + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('freq', ['H', '12H', '2D', 'W']) + @pytest.mark.parametrize('kind', [None, 'period', 'timestamp']) + def test_selection(self, index, freq, kind): + # This is a bug, these should be implemented + # GH 14008 + rng = np.arange(len(index), dtype=np.int64) + df = DataFrame({'date': index, 'a': rng}, + index=pd.MultiIndex.from_arrays([rng, index], + names=['v', 'd'])) + with pytest.raises(NotImplementedError): + df.resample(freq, on='date', kind=kind) + with pytest.raises(NotImplementedError): + df.resample(freq, level='d', kind=kind) + + def test_annual_upsample_D_s_f(self): + self._check_annual_upsample_cases('D', 'start', 'ffill') + + def test_annual_upsample_D_e_f(self): + self._check_annual_upsample_cases('D', 'end', 'ffill') + + def test_annual_upsample_D_s_b(self): + self._check_annual_upsample_cases('D', 'start', 'bfill') + + def test_annual_upsample_D_e_b(self): + self._check_annual_upsample_cases('D', 'end', 'bfill') + + def test_annual_upsample_B_s_f(self): + self._check_annual_upsample_cases('B', 'start', 'ffill') + + def test_annual_upsample_B_e_f(self): + self._check_annual_upsample_cases('B', 'end', 'ffill') + + def test_annual_upsample_B_s_b(self): + self._check_annual_upsample_cases('B', 'start', 'bfill') + + def test_annual_upsample_B_e_b(self): + self._check_annual_upsample_cases('B', 'end', 'bfill') + + def test_annual_upsample_M_s_f(self): + self._check_annual_upsample_cases('M', 'start', 'ffill') + + def test_annual_upsample_M_e_f(self): + self._check_annual_upsample_cases('M', 'end', 'ffill') + + def test_annual_upsample_M_s_b(self): + self._check_annual_upsample_cases('M', 'start', 'bfill') + + def test_annual_upsample_M_e_b(self): + self._check_annual_upsample_cases('M', 'end', 'bfill') + + def _check_annual_upsample_cases(self, targ, conv, meth, end='12/31/1991'): + for month in MONTHS: + ts = _simple_pts('1/1/1990', end, freq='A-%s' % month) + + result = getattr(ts.resample(targ, convention=conv), meth)() + expected = result.to_timestamp(targ, how=conv) + expected = expected.asfreq(targ, meth).to_period() + assert_series_equal(result, expected) + + def test_basic_downsample(self): + ts = _simple_pts('1/1/1990', '6/30/1995', freq='M') + result = ts.resample('a-dec').mean() + + expected = ts.groupby(ts.index.year).mean() + expected.index = period_range('1/1/1990', '6/30/1995', freq='a-dec') + assert_series_equal(result, expected) + + # this is ok + assert_series_equal(ts.resample('a-dec').mean(), result) + assert_series_equal(ts.resample('a').mean(), result) + + def test_not_subperiod(self): + # These are incompatible period rules for resampling + ts = _simple_pts('1/1/1990', '6/30/1995', freq='w-wed') + pytest.raises(ValueError, lambda: ts.resample('a-dec').mean()) + pytest.raises(ValueError, lambda: ts.resample('q-mar').mean()) + pytest.raises(ValueError, lambda: ts.resample('M').mean()) + pytest.raises(ValueError, lambda: ts.resample('w-thu').mean()) + + @pytest.mark.parametrize('freq', ['D', '2D']) + def test_basic_upsample(self, freq): + ts = _simple_pts('1/1/1990', '6/30/1995', freq='M') + result = ts.resample('a-dec').mean() + + resampled = result.resample(freq, convention='end').ffill() + expected = result.to_timestamp(freq, how='end') + expected = expected.asfreq(freq, 'ffill').to_period(freq) + assert_series_equal(resampled, expected) + + def test_upsample_with_limit(self): + rng = period_range('1/1/2000', periods=5, freq='A') + ts = Series(np.random.randn(len(rng)), rng) + + result = ts.resample('M', convention='end').ffill(limit=2) + expected = ts.asfreq('M').reindex(result.index, method='ffill', + limit=2) + assert_series_equal(result, expected) + + def test_annual_upsample(self): + ts = _simple_pts('1/1/1990', '12/31/1995', freq='A-DEC') + df = DataFrame({'a': ts}) + rdf = df.resample('D').ffill() + exp = df['a'].resample('D').ffill() + assert_series_equal(rdf['a'], exp) + + rng = period_range('2000', '2003', freq='A-DEC') + ts = Series([1, 2, 3, 4], index=rng) + + result = ts.resample('M').ffill() + ex_index = period_range('2000-01', '2003-12', freq='M') + + expected = ts.asfreq('M', how='start').reindex(ex_index, + method='ffill') + assert_series_equal(result, expected) + + @pytest.mark.parametrize('month', MONTHS) + @pytest.mark.parametrize('target', ['D', 'B', 'M']) + @pytest.mark.parametrize('convention', ['start', 'end']) + def test_quarterly_upsample(self, month, target, convention): + freq = 'Q-{month}'.format(month=month) + ts = _simple_pts('1/1/1990', '12/31/1995', freq=freq) + result = ts.resample(target, convention=convention).ffill() + expected = result.to_timestamp(target, how=convention) + expected = expected.asfreq(target, 'ffill').to_period() + assert_series_equal(result, expected) + + @pytest.mark.parametrize('target', ['D', 'B']) + @pytest.mark.parametrize('convention', ['start', 'end']) + def test_monthly_upsample(self, target, convention): + ts = _simple_pts('1/1/1990', '12/31/1995', freq='M') + result = ts.resample(target, convention=convention).ffill() + expected = result.to_timestamp(target, how=convention) + expected = expected.asfreq(target, 'ffill').to_period() + assert_series_equal(result, expected) + + def test_resample_basic(self): + # GH3609 + s = Series(range(100), index=date_range( + '20130101', freq='s', periods=100, name='idx'), dtype='float') + s[10:30] = np.nan + index = PeriodIndex([ + Period('2013-01-01 00:00', 'T'), + Period('2013-01-01 00:01', 'T')], name='idx') + expected = Series([34.5, 79.5], index=index) + result = s.to_period().resample('T', kind='period').mean() + assert_series_equal(result, expected) + result2 = s.resample('T', kind='period').mean() + assert_series_equal(result2, expected) + + @pytest.mark.parametrize('freq,expected_vals', [('M', [31, 29, 31, 9]), + ('2M', [31 + 29, 31 + 9])]) + def test_resample_count(self, freq, expected_vals): + # GH12774 + series = Series(1, index=pd.period_range(start='2000', periods=100)) + result = series.resample(freq).count() + expected_index = pd.period_range(start='2000', freq=freq, + periods=len(expected_vals)) + expected = Series(expected_vals, index=expected_index) + assert_series_equal(result, expected) + + def test_resample_same_freq(self): + + # GH12770 + series = Series(range(3), index=pd.period_range( + start='2000', periods=3, freq='M')) + expected = series + + for method in resample_methods: + result = getattr(series.resample('M'), method)() + assert_series_equal(result, expected) + + def test_resample_incompat_freq(self): + + with pytest.raises(IncompatibleFrequency): + Series(range(3), index=pd.period_range( + start='2000', periods=3, freq='M')).resample('W').mean() + + def test_with_local_timezone_pytz(self): + # see gh-5430 + local_timezone = pytz.timezone('America/Los_Angeles') + + start = datetime(year=2013, month=11, day=1, hour=0, minute=0, + tzinfo=pytz.utc) + # 1 day later + end = datetime(year=2013, month=11, day=2, hour=0, minute=0, + tzinfo=pytz.utc) + + index = pd.date_range(start, end, freq='H') + + series = Series(1, index=index) + series = series.tz_convert(local_timezone) + result = series.resample('D', kind='period').mean() + + # Create the expected series + # Index is moved back a day with the timezone conversion from UTC to + # Pacific + expected_index = (pd.period_range(start=start, end=end, freq='D') - + offsets.Day()) + expected = Series(1, index=expected_index) + assert_series_equal(result, expected) + + def test_resample_with_pytz(self): + # GH 13238 + s = Series(2, index=pd.date_range('2017-01-01', periods=48, freq="H", + tz="US/Eastern")) + result = s.resample("D").mean() + expected = Series(2, index=pd.DatetimeIndex(['2017-01-01', + '2017-01-02'], + tz="US/Eastern")) + assert_series_equal(result, expected) + # Especially assert that the timezone is LMT for pytz + assert result.index.tz == pytz.timezone('US/Eastern') + + def test_with_local_timezone_dateutil(self): + # see gh-5430 + local_timezone = 'dateutil/America/Los_Angeles' + + start = datetime(year=2013, month=11, day=1, hour=0, minute=0, + tzinfo=dateutil.tz.tzutc()) + # 1 day later + end = datetime(year=2013, month=11, day=2, hour=0, minute=0, + tzinfo=dateutil.tz.tzutc()) + + index = pd.date_range(start, end, freq='H', name='idx') + + series = Series(1, index=index) + series = series.tz_convert(local_timezone) + result = series.resample('D', kind='period').mean() + + # Create the expected series + # Index is moved back a day with the timezone conversion from UTC to + # Pacific + expected_index = (pd.period_range(start=start, end=end, freq='D', + name='idx') - offsets.Day()) + expected = Series(1, index=expected_index) + assert_series_equal(result, expected) + + def test_resample_nonexistent_time_bin_edge(self): + # GH 19375 + index = date_range('2017-03-12', '2017-03-12 1:45:00', freq='15T') + s = Series(np.zeros(len(index)), index=index) + expected = s.tz_localize('US/Pacific') + result = expected.resample('900S').mean() + tm.assert_series_equal(result, expected) + + # GH 23742 + index = date_range(start='2017-10-10', end='2017-10-20', freq='1H') + index = index.tz_localize('UTC').tz_convert('America/Sao_Paulo') + df = DataFrame(data=list(range(len(index))), index=index) + result = df.groupby(pd.Grouper(freq='1D')) + expected = date_range(start='2017-10-09', end='2017-10-20', freq='D', + tz="America/Sao_Paulo") + tm.assert_index_equal(result.count().index, expected) + + def test_resample_ambiguous_time_bin_edge(self): + # GH 10117 + idx = pd.date_range("2014-10-25 22:00:00", "2014-10-26 00:30:00", + freq="30T", tz="Europe/London") + expected = Series(np.zeros(len(idx)), index=idx) + result = expected.resample('30T').mean() + tm.assert_series_equal(result, expected) + + def test_fill_method_and_how_upsample(self): + # GH2073 + s = Series(np.arange(9, dtype='int64'), + index=date_range('2010-01-01', periods=9, freq='Q')) + last = s.resample('M').ffill() + both = s.resample('M').ffill().resample('M').last().astype('int64') + assert_series_equal(last, both) + + @pytest.mark.parametrize('day', DAYS) + @pytest.mark.parametrize('target', ['D', 'B']) + @pytest.mark.parametrize('convention', ['start', 'end']) + def test_weekly_upsample(self, day, target, convention): + freq = 'W-{day}'.format(day=day) + ts = _simple_pts('1/1/1990', '12/31/1995', freq=freq) + result = ts.resample(target, convention=convention).ffill() + expected = result.to_timestamp(target, how=convention) + expected = expected.asfreq(target, 'ffill').to_period() + assert_series_equal(result, expected) + + def test_resample_to_timestamps(self): + ts = _simple_pts('1/1/1990', '12/31/1995', freq='M') + + result = ts.resample('A-DEC', kind='timestamp').mean() + expected = ts.to_timestamp(how='start').resample('A-DEC').mean() + assert_series_equal(result, expected) + + def test_resample_to_quarterly(self): + for month in MONTHS: + ts = _simple_pts('1990', '1992', freq='A-%s' % month) + quar_ts = ts.resample('Q-%s' % month).ffill() + + stamps = ts.to_timestamp('D', how='start') + qdates = period_range(ts.index[0].asfreq('D', 'start'), + ts.index[-1].asfreq('D', 'end'), + freq='Q-%s' % month) + + expected = stamps.reindex(qdates.to_timestamp('D', 's'), + method='ffill') + expected.index = qdates + + assert_series_equal(quar_ts, expected) + + # conforms, but different month + ts = _simple_pts('1990', '1992', freq='A-JUN') + + for how in ['start', 'end']: + result = ts.resample('Q-MAR', convention=how).ffill() + expected = ts.asfreq('Q-MAR', how=how) + expected = expected.reindex(result.index, method='ffill') + + # .to_timestamp('D') + # expected = expected.resample('Q-MAR').ffill() + + assert_series_equal(result, expected) + + def test_resample_fill_missing(self): + rng = PeriodIndex([2000, 2005, 2007, 2009], freq='A') + + s = Series(np.random.randn(4), index=rng) + + stamps = s.to_timestamp() + filled = s.resample('A').ffill() + expected = stamps.resample('A').ffill().to_period('A') + assert_series_equal(filled, expected) + + def test_cant_fill_missing_dups(self): + rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq='A') + s = Series(np.random.randn(5), index=rng) + pytest.raises(Exception, lambda: s.resample('A').ffill()) + + @pytest.mark.parametrize('freq', ['5min']) + @pytest.mark.parametrize('kind', ['period', None, 'timestamp']) + def test_resample_5minute(self, freq, kind): + rng = period_range('1/1/2000', '1/5/2000', freq='T') + ts = Series(np.random.randn(len(rng)), index=rng) + expected = ts.to_timestamp().resample(freq).mean() + if kind != 'timestamp': + expected = expected.to_period(freq) + result = ts.resample(freq, kind=kind).mean() + assert_series_equal(result, expected) + + def test_upsample_daily_business_daily(self): + ts = _simple_pts('1/1/2000', '2/1/2000', freq='B') + + result = ts.resample('D').asfreq() + expected = ts.asfreq('D').reindex(period_range('1/3/2000', '2/1/2000')) + assert_series_equal(result, expected) + + ts = _simple_pts('1/1/2000', '2/1/2000') + result = ts.resample('H', convention='s').asfreq() + exp_rng = period_range('1/1/2000', '2/1/2000 23:00', freq='H') + expected = ts.asfreq('H', how='s').reindex(exp_rng) + assert_series_equal(result, expected) + + def test_resample_irregular_sparse(self): + dr = date_range(start='1/1/2012', freq='5min', periods=1000) + s = Series(np.array(100), index=dr) + # subset the data. + subset = s[:'2012-01-04 06:55'] + + result = subset.resample('10min').apply(len) + expected = s.resample('10min').apply(len).loc[result.index] + assert_series_equal(result, expected) + + def test_resample_weekly_all_na(self): + rng = date_range('1/1/2000', periods=10, freq='W-WED') + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts.resample('W-THU').asfreq() + + assert result.isna().all() + + result = ts.resample('W-THU').asfreq().ffill()[:-1] + expected = ts.asfreq('W-THU').ffill() + assert_series_equal(result, expected) + + def test_resample_tz_localized(self): + dr = date_range(start='2012-4-13', end='2012-5-1') + ts = Series(lrange(len(dr)), dr) + + ts_utc = ts.tz_localize('UTC') + ts_local = ts_utc.tz_convert('America/Los_Angeles') + + result = ts_local.resample('W').mean() + + ts_local_naive = ts_local.copy() + ts_local_naive.index = [x.replace(tzinfo=None) + for x in ts_local_naive.index.to_pydatetime()] + + exp = ts_local_naive.resample( + 'W').mean().tz_localize('America/Los_Angeles') + + assert_series_equal(result, exp) + + # it works + result = ts_local.resample('D').mean() + + # #2245 + idx = date_range('2001-09-20 15:59', '2001-09-20 16:00', freq='T', + tz='Australia/Sydney') + s = Series([1, 2], index=idx) + + result = s.resample('D', closed='right', label='right').mean() + ex_index = date_range('2001-09-21', periods=1, freq='D', + tz='Australia/Sydney') + expected = Series([1.5], index=ex_index) + + assert_series_equal(result, expected) + + # for good measure + result = s.resample('D', kind='period').mean() + ex_index = period_range('2001-09-20', periods=1, freq='D') + expected = Series([1.5], index=ex_index) + assert_series_equal(result, expected) + + # GH 6397 + # comparing an offset that doesn't propagate tz's + rng = date_range('1/1/2011', periods=20000, freq='H') + rng = rng.tz_localize('EST') + ts = DataFrame(index=rng) + ts['first'] = np.random.randn(len(rng)) + ts['second'] = np.cumsum(np.random.randn(len(rng))) + expected = DataFrame( + { + 'first': ts.resample('A').sum()['first'], + 'second': ts.resample('A').mean()['second']}, + columns=['first', 'second']) + result = ts.resample( + 'A').agg({'first': np.sum, + 'second': np.mean}).reindex(columns=['first', 'second']) + assert_frame_equal(result, expected) + + def test_closed_left_corner(self): + # #1465 + s = Series(np.random.randn(21), + index=date_range(start='1/1/2012 9:30', + freq='1min', periods=21)) + s[0] = np.nan + + result = s.resample('10min', closed='left', label='right').mean() + exp = s[1:].resample('10min', closed='left', label='right').mean() + assert_series_equal(result, exp) + + result = s.resample('10min', closed='left', label='left').mean() + exp = s[1:].resample('10min', closed='left', label='left').mean() + + ex_index = date_range(start='1/1/2012 9:30', freq='10min', periods=3) + + tm.assert_index_equal(result.index, ex_index) + assert_series_equal(result, exp) + + def test_quarterly_resampling(self): + rng = period_range('2000Q1', periods=10, freq='Q-DEC') + ts = Series(np.arange(10), index=rng) + + result = ts.resample('A').mean() + exp = ts.to_timestamp().resample('A').mean().to_period() + assert_series_equal(result, exp) + + def test_resample_weekly_bug_1726(self): + # 8/6/12 is a Monday + ind = DatetimeIndex(start="8/6/2012", end="8/26/2012", freq="D") + n = len(ind) + data = [[x] * 5 for x in range(n)] + df = DataFrame(data, columns=['open', 'high', 'low', 'close', 'vol'], + index=ind) + + # it works! + df.resample('W-MON', closed='left', label='left').first() + + def test_resample_with_dst_time_change(self): + # GH 15549 + index = pd.DatetimeIndex([1457537600000000000, 1458059600000000000], + tz='UTC').tz_convert('America/Chicago') + df = pd.DataFrame([1, 2], index=index) + result = df.resample('12h', closed='right', + label='right').last().ffill() + + expected_index_values = ['2016-03-09 12:00:00-06:00', + '2016-03-10 00:00:00-06:00', + '2016-03-10 12:00:00-06:00', + '2016-03-11 00:00:00-06:00', + '2016-03-11 12:00:00-06:00', + '2016-03-12 00:00:00-06:00', + '2016-03-12 12:00:00-06:00', + '2016-03-13 00:00:00-06:00', + '2016-03-13 13:00:00-05:00', + '2016-03-14 01:00:00-05:00', + '2016-03-14 13:00:00-05:00', + '2016-03-15 01:00:00-05:00', + '2016-03-15 13:00:00-05:00'] + index = pd.to_datetime(expected_index_values, utc=True).tz_convert( + 'America/Chicago') + expected = pd.DataFrame([1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 2.0], index=index) + assert_frame_equal(result, expected) + + def test_resample_bms_2752(self): + # GH2753 + foo = Series(index=pd.bdate_range('20000101', '20000201')) + res1 = foo.resample("BMS").mean() + res2 = foo.resample("BMS").mean().resample("B").mean() + assert res1.index[0] == Timestamp('20000103') + assert res1.index[0] == res2.index[0] + + # def test_monthly_convention_span(self): + # rng = period_range('2000-01', periods=3, freq='M') + # ts = Series(np.arange(3), index=rng) + + # # hacky way to get same thing + # exp_index = period_range('2000-01-01', '2000-03-31', freq='D') + # expected = ts.asfreq('D', how='end').reindex(exp_index) + # expected = expected.fillna(method='bfill') + + # result = ts.resample('D', convention='span').mean() + + # assert_series_equal(result, expected) + + def test_default_right_closed_label(self): + end_freq = ['D', 'Q', 'M', 'D'] + end_types = ['M', 'A', 'Q', 'W'] + + for from_freq, to_freq in zip(end_freq, end_types): + idx = DatetimeIndex(start='8/15/2012', periods=100, freq=from_freq) + df = DataFrame(np.random.randn(len(idx), 2), idx) + + resampled = df.resample(to_freq).mean() + assert_frame_equal(resampled, df.resample(to_freq, closed='right', + label='right').mean()) + + def test_default_left_closed_label(self): + others = ['MS', 'AS', 'QS', 'D', 'H'] + others_freq = ['D', 'Q', 'M', 'H', 'T'] + + for from_freq, to_freq in zip(others_freq, others): + idx = DatetimeIndex(start='8/15/2012', periods=100, freq=from_freq) + df = DataFrame(np.random.randn(len(idx), 2), idx) + + resampled = df.resample(to_freq).mean() + assert_frame_equal(resampled, df.resample(to_freq, closed='left', + label='left').mean()) + + def test_all_values_single_bin(self): + # 2070 + index = period_range(start="2012-01-01", end="2012-12-31", freq="M") + s = Series(np.random.randn(len(index)), index=index) + + result = s.resample("A").mean() + tm.assert_almost_equal(result[0], s.mean()) + + def test_evenly_divisible_with_no_extra_bins(self): + # 4076 + # when the frequency is evenly divisible, sometimes extra bins + + df = DataFrame(np.random.randn(9, 3), + index=date_range('2000-1-1', periods=9)) + result = df.resample('5D').mean() + expected = pd.concat( + [df.iloc[0:5].mean(), df.iloc[5:].mean()], axis=1).T + expected.index = [Timestamp('2000-1-1'), Timestamp('2000-1-6')] + assert_frame_equal(result, expected) + + index = date_range(start='2001-5-4', periods=28) + df = DataFrame( + [{'REST_KEY': 1, 'DLY_TRN_QT': 80, 'DLY_SLS_AMT': 90, + 'COOP_DLY_TRN_QT': 30, 'COOP_DLY_SLS_AMT': 20}] * 28 + + [{'REST_KEY': 2, 'DLY_TRN_QT': 70, 'DLY_SLS_AMT': 10, + 'COOP_DLY_TRN_QT': 50, 'COOP_DLY_SLS_AMT': 20}] * 28, + index=index.append(index)).sort_index() + + index = date_range('2001-5-4', periods=4, freq='7D') + expected = DataFrame( + [{'REST_KEY': 14, 'DLY_TRN_QT': 14, 'DLY_SLS_AMT': 14, + 'COOP_DLY_TRN_QT': 14, 'COOP_DLY_SLS_AMT': 14}] * 4, + index=index) + result = df.resample('7D').count() + assert_frame_equal(result, expected) + + expected = DataFrame( + [{'REST_KEY': 21, 'DLY_TRN_QT': 1050, 'DLY_SLS_AMT': 700, + 'COOP_DLY_TRN_QT': 560, 'COOP_DLY_SLS_AMT': 280}] * 4, + index=index) + result = df.resample('7D').sum() + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('kind', ['period', None, 'timestamp']) + @pytest.mark.parametrize('agg_arg', ['mean', {'value': 'mean'}, ['mean']]) + def test_loffset_returns_datetimeindex(self, frame, kind, agg_arg): + # make sure passing loffset returns DatetimeIndex in all cases + # basic method taken from Base.test_resample_loffset_arg_type() + df = frame + expected_means = [df.values[i:i + 2].mean() + for i in range(0, len(df.values), 2)] + expected_index = self.create_index(df.index[0], + periods=len(df.index) / 2, + freq='2D') + + # loffset coerces PeriodIndex to DateTimeIndex + expected_index = expected_index.to_timestamp() + expected_index += timedelta(hours=2) + expected = DataFrame({'value': expected_means}, index=expected_index) + + result_agg = df.resample('2D', loffset='2H', kind=kind).agg(agg_arg) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result_how = df.resample('2D', how=agg_arg, loffset='2H', + kind=kind) + if isinstance(agg_arg, list): + expected.columns = pd.MultiIndex.from_tuples([('value', 'mean')]) + assert_frame_equal(result_agg, expected) + assert_frame_equal(result_how, expected) + + @pytest.mark.parametrize('freq, period_mult', [('H', 24), ('12H', 2)]) + @pytest.mark.parametrize('kind', [None, 'period']) + def test_upsampling_ohlc(self, freq, period_mult, kind): + # GH 13083 + pi = PeriodIndex(start='2000', freq='D', periods=10) + s = Series(range(len(pi)), index=pi) + expected = s.to_timestamp().resample(freq).ohlc().to_period(freq) + + # timestamp-based resampling doesn't include all sub-periods + # of the last original period, so extend accordingly: + new_index = PeriodIndex(start='2000', freq=freq, + periods=period_mult * len(pi)) + expected = expected.reindex(new_index) + result = s.resample(freq, kind=kind).ohlc() + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('periods, values', + [([pd.NaT, '1970-01-01 00:00:00', pd.NaT, + '1970-01-01 00:00:02', '1970-01-01 00:00:03'], + [2, 3, 5, 7, 11]), + ([pd.NaT, pd.NaT, '1970-01-01 00:00:00', pd.NaT, + pd.NaT, pd.NaT, '1970-01-01 00:00:02', + '1970-01-01 00:00:03', pd.NaT, pd.NaT], + [1, 2, 3, 5, 6, 8, 7, 11, 12, 13])]) + @pytest.mark.parametrize('freq, expected_values', + [('1s', [3, np.NaN, 7, 11]), + ('2s', [3, int((7 + 11) / 2)]), + ('3s', [int((3 + 7) / 2), 11])]) + def test_resample_with_nat(self, periods, values, freq, expected_values): + # GH 13224 + index = PeriodIndex(periods, freq='S') + frame = DataFrame(values, index=index) + + expected_index = period_range('1970-01-01 00:00:00', + periods=len(expected_values), freq=freq) + expected = DataFrame(expected_values, index=expected_index) + result = frame.resample(freq).mean() + assert_frame_equal(result, expected) + + def test_resample_with_only_nat(self): + # GH 13224 + pi = PeriodIndex([pd.NaT] * 3, freq='S') + frame = DataFrame([2, 3, 5], index=pi) + expected_index = PeriodIndex(data=[], freq=pi.freq) + expected = DataFrame([], index=expected_index) + result = frame.resample('1s').mean() + assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_resample.py b/pandas/tests/resample/test_resample.py index 1077bf2f6590c..96e25957fa2d3 100644 --- a/pandas/tests/resample/test_resample.py +++ b/pandas/tests/resample/test_resample.py @@ -4,14 +4,11 @@ from functools import partial from warnings import catch_warnings, simplefilter -import dateutil import numpy as np import pytest import pytz -from pandas._libs.tslibs.ccalendar import DAYS, MONTHS -from pandas._libs.tslibs.period import IncompatibleFrequency -from pandas.compat import OrderedDict, lrange, range, zip +from pandas.compat import OrderedDict, range, zip from pandas.errors import AbstractMethodError, UnsupportedFunctionCall import pandas as pd @@ -2211,718 +2208,3 @@ def f(data, add_arg): result = df.groupby("A").resample("D").agg(f, multiplier) expected = df.groupby("A").resample('D').mean().multiply(multiplier) assert_frame_equal(result, expected) - - -class TestPeriodIndex(Base): - _index_factory = lambda x: period_range - - @pytest.fixture - def _series_name(self): - return 'pi' - - def create_series(self): - # TODO: replace calls to .create_series() by injecting the series - # fixture - i = period_range(datetime(2005, 1, 1), - datetime(2005, 1, 10), freq='D') - - return Series(np.arange(len(i)), index=i, name='pi') - - @pytest.mark.parametrize('freq', ['2D', '1H', '2H']) - @pytest.mark.parametrize('kind', ['period', None, 'timestamp']) - def test_asfreq(self, series_and_frame, freq, kind): - # GH 12884, 15944 - # make sure .asfreq() returns PeriodIndex (except kind='timestamp') - - obj = series_and_frame - if kind == 'timestamp': - expected = obj.to_timestamp().resample(freq).asfreq() - else: - start = obj.index[0].to_timestamp(how='start') - end = (obj.index[-1] + obj.index.freq).to_timestamp(how='start') - new_index = date_range(start=start, end=end, freq=freq, - closed='left') - expected = obj.to_timestamp().reindex(new_index).to_period(freq) - result = obj.resample(freq, kind=kind).asfreq() - assert_almost_equal(result, expected) - - def test_asfreq_fill_value(self): - # test for fill value during resampling, issue 3715 - - s = self.create_series() - new_index = date_range(s.index[0].to_timestamp(how='start'), - (s.index[-1]).to_timestamp(how='start'), - freq='1H') - expected = s.to_timestamp().reindex(new_index, fill_value=4.0) - result = s.resample('1H', kind='timestamp').asfreq(fill_value=4.0) - assert_series_equal(result, expected) - - frame = s.to_frame('value') - new_index = date_range(frame.index[0].to_timestamp(how='start'), - (frame.index[-1]).to_timestamp(how='start'), - freq='1H') - expected = frame.to_timestamp().reindex(new_index, fill_value=3.0) - result = frame.resample('1H', kind='timestamp').asfreq(fill_value=3.0) - assert_frame_equal(result, expected) - - @pytest.mark.parametrize('freq', ['H', '12H', '2D', 'W']) - @pytest.mark.parametrize('kind', [None, 'period', 'timestamp']) - def test_selection(self, index, freq, kind): - # This is a bug, these should be implemented - # GH 14008 - rng = np.arange(len(index), dtype=np.int64) - df = DataFrame({'date': index, 'a': rng}, - index=pd.MultiIndex.from_arrays([rng, index], - names=['v', 'd'])) - with pytest.raises(NotImplementedError): - df.resample(freq, on='date', kind=kind) - with pytest.raises(NotImplementedError): - df.resample(freq, level='d', kind=kind) - - def test_annual_upsample_D_s_f(self): - self._check_annual_upsample_cases('D', 'start', 'ffill') - - def test_annual_upsample_D_e_f(self): - self._check_annual_upsample_cases('D', 'end', 'ffill') - - def test_annual_upsample_D_s_b(self): - self._check_annual_upsample_cases('D', 'start', 'bfill') - - def test_annual_upsample_D_e_b(self): - self._check_annual_upsample_cases('D', 'end', 'bfill') - - def test_annual_upsample_B_s_f(self): - self._check_annual_upsample_cases('B', 'start', 'ffill') - - def test_annual_upsample_B_e_f(self): - self._check_annual_upsample_cases('B', 'end', 'ffill') - - def test_annual_upsample_B_s_b(self): - self._check_annual_upsample_cases('B', 'start', 'bfill') - - def test_annual_upsample_B_e_b(self): - self._check_annual_upsample_cases('B', 'end', 'bfill') - - def test_annual_upsample_M_s_f(self): - self._check_annual_upsample_cases('M', 'start', 'ffill') - - def test_annual_upsample_M_e_f(self): - self._check_annual_upsample_cases('M', 'end', 'ffill') - - def test_annual_upsample_M_s_b(self): - self._check_annual_upsample_cases('M', 'start', 'bfill') - - def test_annual_upsample_M_e_b(self): - self._check_annual_upsample_cases('M', 'end', 'bfill') - - def _check_annual_upsample_cases(self, targ, conv, meth, end='12/31/1991'): - for month in MONTHS: - ts = _simple_pts('1/1/1990', end, freq='A-%s' % month) - - result = getattr(ts.resample(targ, convention=conv), meth)() - expected = result.to_timestamp(targ, how=conv) - expected = expected.asfreq(targ, meth).to_period() - assert_series_equal(result, expected) - - def test_basic_downsample(self): - ts = _simple_pts('1/1/1990', '6/30/1995', freq='M') - result = ts.resample('a-dec').mean() - - expected = ts.groupby(ts.index.year).mean() - expected.index = period_range('1/1/1990', '6/30/1995', freq='a-dec') - assert_series_equal(result, expected) - - # this is ok - assert_series_equal(ts.resample('a-dec').mean(), result) - assert_series_equal(ts.resample('a').mean(), result) - - def test_not_subperiod(self): - # These are incompatible period rules for resampling - ts = _simple_pts('1/1/1990', '6/30/1995', freq='w-wed') - pytest.raises(ValueError, lambda: ts.resample('a-dec').mean()) - pytest.raises(ValueError, lambda: ts.resample('q-mar').mean()) - pytest.raises(ValueError, lambda: ts.resample('M').mean()) - pytest.raises(ValueError, lambda: ts.resample('w-thu').mean()) - - @pytest.mark.parametrize('freq', ['D', '2D']) - def test_basic_upsample(self, freq): - ts = _simple_pts('1/1/1990', '6/30/1995', freq='M') - result = ts.resample('a-dec').mean() - - resampled = result.resample(freq, convention='end').ffill() - expected = result.to_timestamp(freq, how='end') - expected = expected.asfreq(freq, 'ffill').to_period(freq) - assert_series_equal(resampled, expected) - - def test_upsample_with_limit(self): - rng = period_range('1/1/2000', periods=5, freq='A') - ts = Series(np.random.randn(len(rng)), rng) - - result = ts.resample('M', convention='end').ffill(limit=2) - expected = ts.asfreq('M').reindex(result.index, method='ffill', - limit=2) - assert_series_equal(result, expected) - - def test_annual_upsample(self): - ts = _simple_pts('1/1/1990', '12/31/1995', freq='A-DEC') - df = DataFrame({'a': ts}) - rdf = df.resample('D').ffill() - exp = df['a'].resample('D').ffill() - assert_series_equal(rdf['a'], exp) - - rng = period_range('2000', '2003', freq='A-DEC') - ts = Series([1, 2, 3, 4], index=rng) - - result = ts.resample('M').ffill() - ex_index = period_range('2000-01', '2003-12', freq='M') - - expected = ts.asfreq('M', how='start').reindex(ex_index, - method='ffill') - assert_series_equal(result, expected) - - @pytest.mark.parametrize('month', MONTHS) - @pytest.mark.parametrize('target', ['D', 'B', 'M']) - @pytest.mark.parametrize('convention', ['start', 'end']) - def test_quarterly_upsample(self, month, target, convention): - freq = 'Q-{month}'.format(month=month) - ts = _simple_pts('1/1/1990', '12/31/1995', freq=freq) - result = ts.resample(target, convention=convention).ffill() - expected = result.to_timestamp(target, how=convention) - expected = expected.asfreq(target, 'ffill').to_period() - assert_series_equal(result, expected) - - @pytest.mark.parametrize('target', ['D', 'B']) - @pytest.mark.parametrize('convention', ['start', 'end']) - def test_monthly_upsample(self, target, convention): - ts = _simple_pts('1/1/1990', '12/31/1995', freq='M') - result = ts.resample(target, convention=convention).ffill() - expected = result.to_timestamp(target, how=convention) - expected = expected.asfreq(target, 'ffill').to_period() - assert_series_equal(result, expected) - - def test_resample_basic(self): - # GH3609 - s = Series(range(100), index=date_range( - '20130101', freq='s', periods=100, name='idx'), dtype='float') - s[10:30] = np.nan - index = PeriodIndex([ - Period('2013-01-01 00:00', 'T'), - Period('2013-01-01 00:01', 'T')], name='idx') - expected = Series([34.5, 79.5], index=index) - result = s.to_period().resample('T', kind='period').mean() - assert_series_equal(result, expected) - result2 = s.resample('T', kind='period').mean() - assert_series_equal(result2, expected) - - @pytest.mark.parametrize('freq,expected_vals', [('M', [31, 29, 31, 9]), - ('2M', [31 + 29, 31 + 9])]) - def test_resample_count(self, freq, expected_vals): - # GH12774 - series = Series(1, index=pd.period_range(start='2000', periods=100)) - result = series.resample(freq).count() - expected_index = pd.period_range(start='2000', freq=freq, - periods=len(expected_vals)) - expected = Series(expected_vals, index=expected_index) - assert_series_equal(result, expected) - - def test_resample_same_freq(self): - - # GH12770 - series = Series(range(3), index=pd.period_range( - start='2000', periods=3, freq='M')) - expected = series - - for method in resample_methods: - result = getattr(series.resample('M'), method)() - assert_series_equal(result, expected) - - def test_resample_incompat_freq(self): - - with pytest.raises(IncompatibleFrequency): - Series(range(3), index=pd.period_range( - start='2000', periods=3, freq='M')).resample('W').mean() - - def test_with_local_timezone_pytz(self): - # see gh-5430 - local_timezone = pytz.timezone('America/Los_Angeles') - - start = datetime(year=2013, month=11, day=1, hour=0, minute=0, - tzinfo=pytz.utc) - # 1 day later - end = datetime(year=2013, month=11, day=2, hour=0, minute=0, - tzinfo=pytz.utc) - - index = pd.date_range(start, end, freq='H') - - series = Series(1, index=index) - series = series.tz_convert(local_timezone) - result = series.resample('D', kind='period').mean() - - # Create the expected series - # Index is moved back a day with the timezone conversion from UTC to - # Pacific - expected_index = (pd.period_range(start=start, end=end, freq='D') - - offsets.Day()) - expected = Series(1, index=expected_index) - assert_series_equal(result, expected) - - def test_resample_with_pytz(self): - # GH 13238 - s = Series(2, index=pd.date_range('2017-01-01', periods=48, freq="H", - tz="US/Eastern")) - result = s.resample("D").mean() - expected = Series(2, index=pd.DatetimeIndex(['2017-01-01', - '2017-01-02'], - tz="US/Eastern")) - assert_series_equal(result, expected) - # Especially assert that the timezone is LMT for pytz - assert result.index.tz == pytz.timezone('US/Eastern') - - def test_with_local_timezone_dateutil(self): - # see gh-5430 - local_timezone = 'dateutil/America/Los_Angeles' - - start = datetime(year=2013, month=11, day=1, hour=0, minute=0, - tzinfo=dateutil.tz.tzutc()) - # 1 day later - end = datetime(year=2013, month=11, day=2, hour=0, minute=0, - tzinfo=dateutil.tz.tzutc()) - - index = pd.date_range(start, end, freq='H', name='idx') - - series = Series(1, index=index) - series = series.tz_convert(local_timezone) - result = series.resample('D', kind='period').mean() - - # Create the expected series - # Index is moved back a day with the timezone conversion from UTC to - # Pacific - expected_index = (pd.period_range(start=start, end=end, freq='D', - name='idx') - offsets.Day()) - expected = Series(1, index=expected_index) - assert_series_equal(result, expected) - - def test_resample_nonexistent_time_bin_edge(self): - # GH 19375 - index = date_range('2017-03-12', '2017-03-12 1:45:00', freq='15T') - s = Series(np.zeros(len(index)), index=index) - expected = s.tz_localize('US/Pacific') - result = expected.resample('900S').mean() - tm.assert_series_equal(result, expected) - - # GH 23742 - index = date_range(start='2017-10-10', end='2017-10-20', freq='1H') - index = index.tz_localize('UTC').tz_convert('America/Sao_Paulo') - df = DataFrame(data=list(range(len(index))), index=index) - result = df.groupby(pd.Grouper(freq='1D')) - expected = date_range(start='2017-10-09', end='2017-10-20', freq='D', - tz="America/Sao_Paulo") - tm.assert_index_equal(result.count().index, expected) - - def test_resample_ambiguous_time_bin_edge(self): - # GH 10117 - idx = pd.date_range("2014-10-25 22:00:00", "2014-10-26 00:30:00", - freq="30T", tz="Europe/London") - expected = Series(np.zeros(len(idx)), index=idx) - result = expected.resample('30T').mean() - tm.assert_series_equal(result, expected) - - def test_fill_method_and_how_upsample(self): - # GH2073 - s = Series(np.arange(9, dtype='int64'), - index=date_range('2010-01-01', periods=9, freq='Q')) - last = s.resample('M').ffill() - both = s.resample('M').ffill().resample('M').last().astype('int64') - assert_series_equal(last, both) - - @pytest.mark.parametrize('day', DAYS) - @pytest.mark.parametrize('target', ['D', 'B']) - @pytest.mark.parametrize('convention', ['start', 'end']) - def test_weekly_upsample(self, day, target, convention): - freq = 'W-{day}'.format(day=day) - ts = _simple_pts('1/1/1990', '12/31/1995', freq=freq) - result = ts.resample(target, convention=convention).ffill() - expected = result.to_timestamp(target, how=convention) - expected = expected.asfreq(target, 'ffill').to_period() - assert_series_equal(result, expected) - - def test_resample_to_timestamps(self): - ts = _simple_pts('1/1/1990', '12/31/1995', freq='M') - - result = ts.resample('A-DEC', kind='timestamp').mean() - expected = ts.to_timestamp(how='start').resample('A-DEC').mean() - assert_series_equal(result, expected) - - def test_resample_to_quarterly(self): - for month in MONTHS: - ts = _simple_pts('1990', '1992', freq='A-%s' % month) - quar_ts = ts.resample('Q-%s' % month).ffill() - - stamps = ts.to_timestamp('D', how='start') - qdates = period_range(ts.index[0].asfreq('D', 'start'), - ts.index[-1].asfreq('D', 'end'), - freq='Q-%s' % month) - - expected = stamps.reindex(qdates.to_timestamp('D', 's'), - method='ffill') - expected.index = qdates - - assert_series_equal(quar_ts, expected) - - # conforms, but different month - ts = _simple_pts('1990', '1992', freq='A-JUN') - - for how in ['start', 'end']: - result = ts.resample('Q-MAR', convention=how).ffill() - expected = ts.asfreq('Q-MAR', how=how) - expected = expected.reindex(result.index, method='ffill') - - # .to_timestamp('D') - # expected = expected.resample('Q-MAR').ffill() - - assert_series_equal(result, expected) - - def test_resample_fill_missing(self): - rng = PeriodIndex([2000, 2005, 2007, 2009], freq='A') - - s = Series(np.random.randn(4), index=rng) - - stamps = s.to_timestamp() - filled = s.resample('A').ffill() - expected = stamps.resample('A').ffill().to_period('A') - assert_series_equal(filled, expected) - - def test_cant_fill_missing_dups(self): - rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq='A') - s = Series(np.random.randn(5), index=rng) - pytest.raises(Exception, lambda: s.resample('A').ffill()) - - @pytest.mark.parametrize('freq', ['5min']) - @pytest.mark.parametrize('kind', ['period', None, 'timestamp']) - def test_resample_5minute(self, freq, kind): - rng = period_range('1/1/2000', '1/5/2000', freq='T') - ts = Series(np.random.randn(len(rng)), index=rng) - expected = ts.to_timestamp().resample(freq).mean() - if kind != 'timestamp': - expected = expected.to_period(freq) - result = ts.resample(freq, kind=kind).mean() - assert_series_equal(result, expected) - - def test_upsample_daily_business_daily(self): - ts = _simple_pts('1/1/2000', '2/1/2000', freq='B') - - result = ts.resample('D').asfreq() - expected = ts.asfreq('D').reindex(period_range('1/3/2000', '2/1/2000')) - assert_series_equal(result, expected) - - ts = _simple_pts('1/1/2000', '2/1/2000') - result = ts.resample('H', convention='s').asfreq() - exp_rng = period_range('1/1/2000', '2/1/2000 23:00', freq='H') - expected = ts.asfreq('H', how='s').reindex(exp_rng) - assert_series_equal(result, expected) - - def test_resample_irregular_sparse(self): - dr = date_range(start='1/1/2012', freq='5min', periods=1000) - s = Series(np.array(100), index=dr) - # subset the data. - subset = s[:'2012-01-04 06:55'] - - result = subset.resample('10min').apply(len) - expected = s.resample('10min').apply(len).loc[result.index] - assert_series_equal(result, expected) - - def test_resample_weekly_all_na(self): - rng = date_range('1/1/2000', periods=10, freq='W-WED') - ts = Series(np.random.randn(len(rng)), index=rng) - - result = ts.resample('W-THU').asfreq() - - assert result.isna().all() - - result = ts.resample('W-THU').asfreq().ffill()[:-1] - expected = ts.asfreq('W-THU').ffill() - assert_series_equal(result, expected) - - def test_resample_tz_localized(self): - dr = date_range(start='2012-4-13', end='2012-5-1') - ts = Series(lrange(len(dr)), dr) - - ts_utc = ts.tz_localize('UTC') - ts_local = ts_utc.tz_convert('America/Los_Angeles') - - result = ts_local.resample('W').mean() - - ts_local_naive = ts_local.copy() - ts_local_naive.index = [x.replace(tzinfo=None) - for x in ts_local_naive.index.to_pydatetime()] - - exp = ts_local_naive.resample( - 'W').mean().tz_localize('America/Los_Angeles') - - assert_series_equal(result, exp) - - # it works - result = ts_local.resample('D').mean() - - # #2245 - idx = date_range('2001-09-20 15:59', '2001-09-20 16:00', freq='T', - tz='Australia/Sydney') - s = Series([1, 2], index=idx) - - result = s.resample('D', closed='right', label='right').mean() - ex_index = date_range('2001-09-21', periods=1, freq='D', - tz='Australia/Sydney') - expected = Series([1.5], index=ex_index) - - assert_series_equal(result, expected) - - # for good measure - result = s.resample('D', kind='period').mean() - ex_index = period_range('2001-09-20', periods=1, freq='D') - expected = Series([1.5], index=ex_index) - assert_series_equal(result, expected) - - # GH 6397 - # comparing an offset that doesn't propagate tz's - rng = date_range('1/1/2011', periods=20000, freq='H') - rng = rng.tz_localize('EST') - ts = DataFrame(index=rng) - ts['first'] = np.random.randn(len(rng)) - ts['second'] = np.cumsum(np.random.randn(len(rng))) - expected = DataFrame( - { - 'first': ts.resample('A').sum()['first'], - 'second': ts.resample('A').mean()['second']}, - columns=['first', 'second']) - result = ts.resample( - 'A').agg({'first': np.sum, - 'second': np.mean}).reindex(columns=['first', 'second']) - assert_frame_equal(result, expected) - - def test_closed_left_corner(self): - # #1465 - s = Series(np.random.randn(21), - index=date_range(start='1/1/2012 9:30', - freq='1min', periods=21)) - s[0] = np.nan - - result = s.resample('10min', closed='left', label='right').mean() - exp = s[1:].resample('10min', closed='left', label='right').mean() - assert_series_equal(result, exp) - - result = s.resample('10min', closed='left', label='left').mean() - exp = s[1:].resample('10min', closed='left', label='left').mean() - - ex_index = date_range(start='1/1/2012 9:30', freq='10min', periods=3) - - tm.assert_index_equal(result.index, ex_index) - assert_series_equal(result, exp) - - def test_quarterly_resampling(self): - rng = period_range('2000Q1', periods=10, freq='Q-DEC') - ts = Series(np.arange(10), index=rng) - - result = ts.resample('A').mean() - exp = ts.to_timestamp().resample('A').mean().to_period() - assert_series_equal(result, exp) - - def test_resample_weekly_bug_1726(self): - # 8/6/12 is a Monday - ind = DatetimeIndex(start="8/6/2012", end="8/26/2012", freq="D") - n = len(ind) - data = [[x] * 5 for x in range(n)] - df = DataFrame(data, columns=['open', 'high', 'low', 'close', 'vol'], - index=ind) - - # it works! - df.resample('W-MON', closed='left', label='left').first() - - def test_resample_with_dst_time_change(self): - # GH 15549 - index = pd.DatetimeIndex([1457537600000000000, 1458059600000000000], - tz='UTC').tz_convert('America/Chicago') - df = pd.DataFrame([1, 2], index=index) - result = df.resample('12h', closed='right', - label='right').last().ffill() - - expected_index_values = ['2016-03-09 12:00:00-06:00', - '2016-03-10 00:00:00-06:00', - '2016-03-10 12:00:00-06:00', - '2016-03-11 00:00:00-06:00', - '2016-03-11 12:00:00-06:00', - '2016-03-12 00:00:00-06:00', - '2016-03-12 12:00:00-06:00', - '2016-03-13 00:00:00-06:00', - '2016-03-13 13:00:00-05:00', - '2016-03-14 01:00:00-05:00', - '2016-03-14 13:00:00-05:00', - '2016-03-15 01:00:00-05:00', - '2016-03-15 13:00:00-05:00'] - index = pd.to_datetime(expected_index_values, utc=True).tz_convert( - 'America/Chicago') - expected = pd.DataFrame([1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 2.0], index=index) - assert_frame_equal(result, expected) - - def test_resample_bms_2752(self): - # GH2753 - foo = Series(index=pd.bdate_range('20000101', '20000201')) - res1 = foo.resample("BMS").mean() - res2 = foo.resample("BMS").mean().resample("B").mean() - assert res1.index[0] == Timestamp('20000103') - assert res1.index[0] == res2.index[0] - - # def test_monthly_convention_span(self): - # rng = period_range('2000-01', periods=3, freq='M') - # ts = Series(np.arange(3), index=rng) - - # # hacky way to get same thing - # exp_index = period_range('2000-01-01', '2000-03-31', freq='D') - # expected = ts.asfreq('D', how='end').reindex(exp_index) - # expected = expected.fillna(method='bfill') - - # result = ts.resample('D', convention='span').mean() - - # assert_series_equal(result, expected) - - def test_default_right_closed_label(self): - end_freq = ['D', 'Q', 'M', 'D'] - end_types = ['M', 'A', 'Q', 'W'] - - for from_freq, to_freq in zip(end_freq, end_types): - idx = DatetimeIndex(start='8/15/2012', periods=100, freq=from_freq) - df = DataFrame(np.random.randn(len(idx), 2), idx) - - resampled = df.resample(to_freq).mean() - assert_frame_equal(resampled, df.resample(to_freq, closed='right', - label='right').mean()) - - def test_default_left_closed_label(self): - others = ['MS', 'AS', 'QS', 'D', 'H'] - others_freq = ['D', 'Q', 'M', 'H', 'T'] - - for from_freq, to_freq in zip(others_freq, others): - idx = DatetimeIndex(start='8/15/2012', periods=100, freq=from_freq) - df = DataFrame(np.random.randn(len(idx), 2), idx) - - resampled = df.resample(to_freq).mean() - assert_frame_equal(resampled, df.resample(to_freq, closed='left', - label='left').mean()) - - def test_all_values_single_bin(self): - # 2070 - index = period_range(start="2012-01-01", end="2012-12-31", freq="M") - s = Series(np.random.randn(len(index)), index=index) - - result = s.resample("A").mean() - tm.assert_almost_equal(result[0], s.mean()) - - def test_evenly_divisible_with_no_extra_bins(self): - # 4076 - # when the frequency is evenly divisible, sometimes extra bins - - df = DataFrame(np.random.randn(9, 3), - index=date_range('2000-1-1', periods=9)) - result = df.resample('5D').mean() - expected = pd.concat( - [df.iloc[0:5].mean(), df.iloc[5:].mean()], axis=1).T - expected.index = [Timestamp('2000-1-1'), Timestamp('2000-1-6')] - assert_frame_equal(result, expected) - - index = date_range(start='2001-5-4', periods=28) - df = DataFrame( - [{'REST_KEY': 1, 'DLY_TRN_QT': 80, 'DLY_SLS_AMT': 90, - 'COOP_DLY_TRN_QT': 30, 'COOP_DLY_SLS_AMT': 20}] * 28 + - [{'REST_KEY': 2, 'DLY_TRN_QT': 70, 'DLY_SLS_AMT': 10, - 'COOP_DLY_TRN_QT': 50, 'COOP_DLY_SLS_AMT': 20}] * 28, - index=index.append(index)).sort_index() - - index = date_range('2001-5-4', periods=4, freq='7D') - expected = DataFrame( - [{'REST_KEY': 14, 'DLY_TRN_QT': 14, 'DLY_SLS_AMT': 14, - 'COOP_DLY_TRN_QT': 14, 'COOP_DLY_SLS_AMT': 14}] * 4, - index=index) - result = df.resample('7D').count() - assert_frame_equal(result, expected) - - expected = DataFrame( - [{'REST_KEY': 21, 'DLY_TRN_QT': 1050, 'DLY_SLS_AMT': 700, - 'COOP_DLY_TRN_QT': 560, 'COOP_DLY_SLS_AMT': 280}] * 4, - index=index) - result = df.resample('7D').sum() - assert_frame_equal(result, expected) - - @pytest.mark.parametrize('kind', ['period', None, 'timestamp']) - @pytest.mark.parametrize('agg_arg', ['mean', {'value': 'mean'}, ['mean']]) - def test_loffset_returns_datetimeindex(self, frame, kind, agg_arg): - # make sure passing loffset returns DatetimeIndex in all cases - # basic method taken from Base.test_resample_loffset_arg_type() - df = frame - expected_means = [df.values[i:i + 2].mean() - for i in range(0, len(df.values), 2)] - expected_index = self.create_index(df.index[0], - periods=len(df.index) / 2, - freq='2D') - - # loffset coerces PeriodIndex to DateTimeIndex - expected_index = expected_index.to_timestamp() - expected_index += timedelta(hours=2) - expected = DataFrame({'value': expected_means}, index=expected_index) - - result_agg = df.resample('2D', loffset='2H', kind=kind).agg(agg_arg) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result_how = df.resample('2D', how=agg_arg, loffset='2H', - kind=kind) - if isinstance(agg_arg, list): - expected.columns = pd.MultiIndex.from_tuples([('value', 'mean')]) - assert_frame_equal(result_agg, expected) - assert_frame_equal(result_how, expected) - - @pytest.mark.parametrize('freq, period_mult', [('H', 24), ('12H', 2)]) - @pytest.mark.parametrize('kind', [None, 'period']) - def test_upsampling_ohlc(self, freq, period_mult, kind): - # GH 13083 - pi = PeriodIndex(start='2000', freq='D', periods=10) - s = Series(range(len(pi)), index=pi) - expected = s.to_timestamp().resample(freq).ohlc().to_period(freq) - - # timestamp-based resampling doesn't include all sub-periods - # of the last original period, so extend accordingly: - new_index = PeriodIndex(start='2000', freq=freq, - periods=period_mult * len(pi)) - expected = expected.reindex(new_index) - result = s.resample(freq, kind=kind).ohlc() - assert_frame_equal(result, expected) - - @pytest.mark.parametrize('periods, values', - [([pd.NaT, '1970-01-01 00:00:00', pd.NaT, - '1970-01-01 00:00:02', '1970-01-01 00:00:03'], - [2, 3, 5, 7, 11]), - ([pd.NaT, pd.NaT, '1970-01-01 00:00:00', pd.NaT, - pd.NaT, pd.NaT, '1970-01-01 00:00:02', - '1970-01-01 00:00:03', pd.NaT, pd.NaT], - [1, 2, 3, 5, 6, 8, 7, 11, 12, 13])]) - @pytest.mark.parametrize('freq, expected_values', - [('1s', [3, np.NaN, 7, 11]), - ('2s', [3, int((7 + 11) / 2)]), - ('3s', [int((3 + 7) / 2), 11])]) - def test_resample_with_nat(self, periods, values, freq, expected_values): - # GH 13224 - index = PeriodIndex(periods, freq='S') - frame = DataFrame(values, index=index) - - expected_index = period_range('1970-01-01 00:00:00', - periods=len(expected_values), freq=freq) - expected = DataFrame(expected_values, index=expected_index) - result = frame.resample(freq).mean() - assert_frame_equal(result, expected) - - def test_resample_with_only_nat(self): - # GH 13224 - pi = PeriodIndex([pd.NaT] * 3, freq='S') - frame = DataFrame([2, 3, 5], index=pi) - expected_index = PeriodIndex(data=[], freq=pi.freq) - expected = DataFrame([], index=expected_index) - result = frame.resample('1s').mean() - assert_frame_equal(result, expected) From a3fab2f075fd0b66e605c500f059fb8b20c83eea Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 23 Nov 2018 17:40:56 +0000 Subject: [PATCH 07/17] split off TestDatetimeIndex class --- pandas/tests/resample/test_datetime_index.py | 1461 ++++++++++++++++++ pandas/tests/resample/test_resample.py | 1448 +---------------- 2 files changed, 1466 insertions(+), 1443 deletions(-) create mode 100644 pandas/tests/resample/test_datetime_index.py diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py new file mode 100644 index 0000000000000..cc68f4119fba3 --- /dev/null +++ b/pandas/tests/resample/test_datetime_index.py @@ -0,0 +1,1461 @@ +# pylint: disable=E1101 + +from datetime import datetime, timedelta +from functools import partial +from warnings import catch_warnings, simplefilter + +import numpy as np +import pytest +import pytz + +from pandas.compat import range +from pandas.errors import UnsupportedFunctionCall + +import pandas as pd +from pandas import ( + DataFrame, Index, Panel, Series, Timedelta, Timestamp, isna, notna) +from pandas.core.indexes.datetimes import date_range +from pandas.core.indexes.period import Period, period_range +from pandas.core.indexes.timedeltas import timedelta_range +from pandas.core.resample import DatetimeIndex, TimeGrouper +import pandas.util.testing as tm +from pandas.util.testing import ( + assert_almost_equal, assert_frame_equal, assert_series_equal) +from test_resample import ( + Base, _simple_pts, _simple_ts, bday, downsample_methods) + +import pandas.tseries.offsets as offsets +from pandas.tseries.offsets import Minute + + +class TestDatetimeIndex(Base): + _index_factory = lambda x: date_range + + @pytest.fixture + def _series_name(self): + return 'dti' + + def setup_method(self, method): + dti = DatetimeIndex(start=datetime(2005, 1, 1), + end=datetime(2005, 1, 10), freq='Min') + + self.series = Series(np.random.rand(len(dti)), dti) + + def create_series(self): + i = date_range(datetime(2005, 1, 1), + datetime(2005, 1, 10), freq='D') + + return Series(np.arange(len(i)), index=i, name='dti') + + def test_custom_grouper(self): + + dti = DatetimeIndex(freq='Min', start=datetime(2005, 1, 1), + end=datetime(2005, 1, 10)) + + s = Series(np.array([1] * len(dti)), index=dti, dtype='int64') + + b = TimeGrouper(Minute(5)) + g = s.groupby(b) + + # check all cython functions work + funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var'] + for f in funcs: + g._cython_agg_general(f) + + b = TimeGrouper(Minute(5), closed='right', label='right') + g = s.groupby(b) + # check all cython functions work + funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var'] + for f in funcs: + g._cython_agg_general(f) + + assert g.ngroups == 2593 + assert notna(g.mean()).all() + + # construct expected val + arr = [1] + [5] * 2592 + idx = dti[0:-1:5] + idx = idx.append(dti[-1:]) + expect = Series(arr, index=idx) + + # GH2763 - return in put dtype if we can + result = g.agg(np.sum) + assert_series_equal(result, expect) + + df = DataFrame(np.random.rand(len(dti), 10), + index=dti, dtype='float64') + r = df.groupby(b).agg(np.sum) + + assert len(r.columns) == 10 + assert len(r.index) == 2593 + + def test_resample_basic(self): + rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min', + name='index') + s = Series(np.random.randn(14), index=rng) + + result = s.resample('5min', closed='right', label='right').mean() + + exp_idx = date_range('1/1/2000', periods=4, freq='5min', name='index') + expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], + index=exp_idx) + assert_series_equal(result, expected) + assert result.index.name == 'index' + + result = s.resample('5min', closed='left', label='right').mean() + + exp_idx = date_range('1/1/2000 00:05', periods=3, freq='5min', + name='index') + expected = Series([s[:5].mean(), s[5:10].mean(), + s[10:].mean()], index=exp_idx) + assert_series_equal(result, expected) + + s = self.series + result = s.resample('5Min').last() + grouper = TimeGrouper(Minute(5), closed='left', label='left') + expect = s.groupby(grouper).agg(lambda x: x[-1]) + assert_series_equal(result, expect) + + def test_resample_string_kwargs(self): + # Test for issue #19303 + rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min', + name='index') + s = Series(np.random.randn(14), index=rng) + + # Check that wrong keyword argument strings raise an error + with pytest.raises(ValueError): + s.resample('5min', label='righttt').mean() + with pytest.raises(ValueError): + s.resample('5min', closed='righttt').mean() + with pytest.raises(ValueError): + s.resample('5min', convention='starttt').mean() + + def test_resample_how(self): + rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min', + name='index') + s = Series(np.random.randn(14), index=rng) + grouplist = np.ones_like(s) + grouplist[0] = 0 + grouplist[1:6] = 1 + grouplist[6:11] = 2 + grouplist[11:] = 3 + args = downsample_methods + + def _ohlc(group): + if isna(group).all(): + return np.repeat(np.nan, 4) + return [group[0], group.max(), group.min(), group[-1]] + + inds = date_range('1/1/2000', periods=4, freq='5min', name='index') + + for arg in args: + if arg == 'ohlc': + func = _ohlc + else: + func = arg + try: + result = getattr(s.resample( + '5min', closed='right', label='right'), arg)() + + expected = s.groupby(grouplist).agg(func) + assert result.index.name == 'index' + if arg == 'ohlc': + expected = DataFrame(expected.values.tolist()) + expected.columns = ['open', 'high', 'low', 'close'] + expected.index = Index(inds, name='index') + assert_frame_equal(result, expected) + else: + expected.index = inds + assert_series_equal(result, expected) + except BaseException as exc: + + exc.args += ('how=%s' % arg,) + raise + + def test_numpy_compat(self): + # see gh-12811 + s = Series([1, 2, 3, 4, 5], index=date_range( + '20130101', periods=5, freq='s')) + r = s.resample('2s') + + msg = "numpy operations are not valid with resample" + + for func in ('min', 'max', 'sum', 'prod', + 'mean', 'var', 'std'): + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, func)(func, 1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, func)(axis=1) + + def test_resample_how_callables(self): + # GH 7929 + data = np.arange(5, dtype=np.int64) + ind = pd.DatetimeIndex(start='2014-01-01', periods=len(data), freq='d') + df = DataFrame({"A": data, "B": data}, index=ind) + + def fn(x, a=1): + return str(type(x)) + + class FnClass(object): + + def __call__(self, x): + return str(type(x)) + + df_standard = df.resample("M").apply(fn) + df_lambda = df.resample("M").apply(lambda x: str(type(x))) + df_partial = df.resample("M").apply(partial(fn)) + df_partial2 = df.resample("M").apply(partial(fn, a=2)) + df_class = df.resample("M").apply(FnClass()) + + assert_frame_equal(df_standard, df_lambda) + assert_frame_equal(df_standard, df_partial) + assert_frame_equal(df_standard, df_partial2) + assert_frame_equal(df_standard, df_class) + + def test_resample_with_timedeltas(self): + + expected = DataFrame({'A': np.arange(1480)}) + expected = expected.groupby(expected.index // 30).sum() + expected.index = pd.timedelta_range('0 days', freq='30T', periods=50) + + df = DataFrame({'A': np.arange(1480)}, index=pd.to_timedelta( + np.arange(1480), unit='T')) + result = df.resample('30T').sum() + + assert_frame_equal(result, expected) + + s = df['A'] + result = s.resample('30T').sum() + assert_series_equal(result, expected['A']) + + def test_resample_single_period_timedelta(self): + + s = Series(list(range(5)), index=pd.timedelta_range( + '1 day', freq='s', periods=5)) + result = s.resample('2s').sum() + expected = Series([1, 5, 4], index=pd.timedelta_range( + '1 day', freq='2s', periods=3)) + assert_series_equal(result, expected) + + def test_resample_timedelta_idempotency(self): + + # GH 12072 + index = pd.timedelta_range('0', periods=9, freq='10L') + series = Series(range(9), index=index) + result = series.resample('10L').mean() + expected = series + assert_series_equal(result, expected) + + def test_resample_rounding(self): + # GH 8371 + # odd results when rounding is needed + + data = """date,time,value +11-08-2014,00:00:01.093,1 +11-08-2014,00:00:02.159,1 +11-08-2014,00:00:02.667,1 +11-08-2014,00:00:03.175,1 +11-08-2014,00:00:07.058,1 +11-08-2014,00:00:07.362,1 +11-08-2014,00:00:08.324,1 +11-08-2014,00:00:08.830,1 +11-08-2014,00:00:08.982,1 +11-08-2014,00:00:09.815,1 +11-08-2014,00:00:10.540,1 +11-08-2014,00:00:11.061,1 +11-08-2014,00:00:11.617,1 +11-08-2014,00:00:13.607,1 +11-08-2014,00:00:14.535,1 +11-08-2014,00:00:15.525,1 +11-08-2014,00:00:17.960,1 +11-08-2014,00:00:20.674,1 +11-08-2014,00:00:21.191,1""" + + from pandas.compat import StringIO + df = pd.read_csv(StringIO(data), parse_dates={'timestamp': [ + 'date', 'time']}, index_col='timestamp') + df.index.name = None + result = df.resample('6s').sum() + expected = DataFrame({'value': [ + 4, 9, 4, 2 + ]}, index=date_range('2014-11-08', freq='6s', periods=4)) + assert_frame_equal(result, expected) + + result = df.resample('7s').sum() + expected = DataFrame({'value': [ + 4, 10, 4, 1 + ]}, index=date_range('2014-11-08', freq='7s', periods=4)) + assert_frame_equal(result, expected) + + result = df.resample('11s').sum() + expected = DataFrame({'value': [ + 11, 8 + ]}, index=date_range('2014-11-08', freq='11s', periods=2)) + assert_frame_equal(result, expected) + + result = df.resample('13s').sum() + expected = DataFrame({'value': [ + 13, 6 + ]}, index=date_range('2014-11-08', freq='13s', periods=2)) + assert_frame_equal(result, expected) + + result = df.resample('17s').sum() + expected = DataFrame({'value': [ + 16, 3 + ]}, index=date_range('2014-11-08', freq='17s', periods=2)) + assert_frame_equal(result, expected) + + def test_resample_basic_from_daily(self): + # from daily + dti = DatetimeIndex(start=datetime(2005, 1, 1), + end=datetime(2005, 1, 10), freq='D', name='index') + + s = Series(np.random.rand(len(dti)), dti) + + # to weekly + result = s.resample('w-sun').last() + + assert len(result) == 3 + assert (result.index.dayofweek == [6, 6, 6]).all() + assert result.iloc[0] == s['1/2/2005'] + assert result.iloc[1] == s['1/9/2005'] + assert result.iloc[2] == s.iloc[-1] + + result = s.resample('W-MON').last() + assert len(result) == 2 + assert (result.index.dayofweek == [0, 0]).all() + assert result.iloc[0] == s['1/3/2005'] + assert result.iloc[1] == s['1/10/2005'] + + result = s.resample('W-TUE').last() + assert len(result) == 2 + assert (result.index.dayofweek == [1, 1]).all() + assert result.iloc[0] == s['1/4/2005'] + assert result.iloc[1] == s['1/10/2005'] + + result = s.resample('W-WED').last() + assert len(result) == 2 + assert (result.index.dayofweek == [2, 2]).all() + assert result.iloc[0] == s['1/5/2005'] + assert result.iloc[1] == s['1/10/2005'] + + result = s.resample('W-THU').last() + assert len(result) == 2 + assert (result.index.dayofweek == [3, 3]).all() + assert result.iloc[0] == s['1/6/2005'] + assert result.iloc[1] == s['1/10/2005'] + + result = s.resample('W-FRI').last() + assert len(result) == 2 + assert (result.index.dayofweek == [4, 4]).all() + assert result.iloc[0] == s['1/7/2005'] + assert result.iloc[1] == s['1/10/2005'] + + # to biz day + result = s.resample('B').last() + assert len(result) == 7 + assert (result.index.dayofweek == [4, 0, 1, 2, 3, 4, 0]).all() + + assert result.iloc[0] == s['1/2/2005'] + assert result.iloc[1] == s['1/3/2005'] + assert result.iloc[5] == s['1/9/2005'] + assert result.index.name == 'index' + + def test_resample_upsampling_picked_but_not_correct(self): + + # Test for issue #3020 + dates = date_range('01-Jan-2014', '05-Jan-2014', freq='D') + series = Series(1, index=dates) + + result = series.resample('D').mean() + assert result.index[0] == dates[0] + + # GH 5955 + # incorrect deciding to upsample when the axis frequency matches the + # resample frequency + + import datetime + s = Series(np.arange(1., 6), index=[datetime.datetime( + 1975, 1, i, 12, 0) for i in range(1, 6)]) + expected = Series(np.arange(1., 6), index=date_range( + '19750101', periods=5, freq='D')) + + result = s.resample('D').count() + assert_series_equal(result, Series(1, index=expected.index)) + + result1 = s.resample('D').sum() + result2 = s.resample('D').mean() + assert_series_equal(result1, expected) + assert_series_equal(result2, expected) + + def test_resample_frame_basic(self): + df = tm.makeTimeDataFrame() + + b = TimeGrouper('M') + g = df.groupby(b) + + # check all cython functions work + funcs = ['add', 'mean', 'prod', 'min', 'max', 'var'] + for f in funcs: + g._cython_agg_general(f) + + result = df.resample('A').mean() + assert_series_equal(result['A'], df['A'].resample('A').mean()) + + result = df.resample('M').mean() + assert_series_equal(result['A'], df['A'].resample('M').mean()) + + df.resample('M', kind='period').mean() + df.resample('W-WED', kind='period').mean() + + @pytest.mark.parametrize('loffset', [timedelta(minutes=1), + '1min', Minute(1), + np.timedelta64(1, 'm')]) + def test_resample_loffset(self, loffset): + # GH 7687 + rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min') + s = Series(np.random.randn(14), index=rng) + + result = s.resample('5min', closed='right', label='right', + loffset=loffset).mean() + idx = date_range('1/1/2000', periods=4, freq='5min') + expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], + index=idx + timedelta(minutes=1)) + assert_series_equal(result, expected) + assert result.index.freq == Minute(5) + + # from daily + dti = DatetimeIndex(start=datetime(2005, 1, 1), + end=datetime(2005, 1, 10), freq='D') + ser = Series(np.random.rand(len(dti)), dti) + + # to weekly + result = ser.resample('w-sun').last() + expected = ser.resample('w-sun', loffset=-bday).last() + assert result.index[0] - bday == expected.index[0] + + def test_resample_loffset_upsample(self): + # GH 20744 + rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min') + s = Series(np.random.randn(14), index=rng) + + result = s.resample('5min', closed='right', label='right', + loffset=timedelta(minutes=1)).ffill() + idx = date_range('1/1/2000', periods=4, freq='5min') + expected = Series([s[0], s[5], s[10], s[-1]], + index=idx + timedelta(minutes=1)) + + assert_series_equal(result, expected) + + def test_resample_loffset_count(self): + # GH 12725 + start_time = '1/1/2000 00:00:00' + rng = date_range(start_time, periods=100, freq='S') + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts.resample('10S', loffset='1s').count() + + expected_index = ( + date_range(start_time, periods=10, freq='10S') + + timedelta(seconds=1) + ) + expected = Series(10, index=expected_index) + + assert_series_equal(result, expected) + + # Same issue should apply to .size() since it goes through + # same code path + result = ts.resample('10S', loffset='1s').size() + + assert_series_equal(result, expected) + + def test_resample_upsample(self): + # from daily + dti = DatetimeIndex(start=datetime(2005, 1, 1), + end=datetime(2005, 1, 10), freq='D', name='index') + + s = Series(np.random.rand(len(dti)), dti) + + # to minutely, by padding + result = s.resample('Min').pad() + assert len(result) == 12961 + assert result[0] == s[0] + assert result[-1] == s[-1] + + assert result.index.name == 'index' + + def test_resample_how_method(self): + # GH9915 + s = Series([11, 22], + index=[Timestamp('2015-03-31 21:48:52.672000'), + Timestamp('2015-03-31 21:49:52.739000')]) + expected = Series([11, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, 22], + index=[Timestamp('2015-03-31 21:48:50'), + Timestamp('2015-03-31 21:49:00'), + Timestamp('2015-03-31 21:49:10'), + Timestamp('2015-03-31 21:49:20'), + Timestamp('2015-03-31 21:49:30'), + Timestamp('2015-03-31 21:49:40'), + Timestamp('2015-03-31 21:49:50')]) + assert_series_equal(s.resample("10S").mean(), expected) + + def test_resample_extra_index_point(self): + # GH 9756 + index = DatetimeIndex(start='20150101', end='20150331', freq='BM') + expected = DataFrame({'A': Series([21, 41, 63], index=index)}) + + index = DatetimeIndex(start='20150101', end='20150331', freq='B') + df = DataFrame( + {'A': Series(range(len(index)), index=index)}, dtype='int64') + result = df.resample('BM').last() + assert_frame_equal(result, expected) + + def test_upsample_with_limit(self): + rng = date_range('1/1/2000', periods=3, freq='5t') + ts = Series(np.random.randn(len(rng)), rng) + + result = ts.resample('t').ffill(limit=2) + expected = ts.reindex(result.index, method='ffill', limit=2) + assert_series_equal(result, expected) + + def test_nearest_upsample_with_limit(self): + rng = date_range('1/1/2000', periods=3, freq='5t') + ts = Series(np.random.randn(len(rng)), rng) + + result = ts.resample('t').nearest(limit=2) + expected = ts.reindex(result.index, method='nearest', limit=2) + assert_series_equal(result, expected) + + def test_resample_ohlc(self): + s = self.series + + grouper = TimeGrouper(Minute(5)) + expect = s.groupby(grouper).agg(lambda x: x[-1]) + result = s.resample('5Min').ohlc() + + assert len(result) == len(expect) + assert len(result.columns) == 4 + + xs = result.iloc[-2] + assert xs['open'] == s[-6] + assert xs['high'] == s[-6:-1].max() + assert xs['low'] == s[-6:-1].min() + assert xs['close'] == s[-2] + + xs = result.iloc[0] + assert xs['open'] == s[0] + assert xs['high'] == s[:5].max() + assert xs['low'] == s[:5].min() + assert xs['close'] == s[4] + + def test_resample_ohlc_result(self): + + # GH 12332 + index = pd.date_range('1-1-2000', '2-15-2000', freq='h') + index = index.union(pd.date_range('4-15-2000', '5-15-2000', freq='h')) + s = Series(range(len(index)), index=index) + + a = s.loc[:'4-15-2000'].resample('30T').ohlc() + assert isinstance(a, DataFrame) + + b = s.loc[:'4-14-2000'].resample('30T').ohlc() + assert isinstance(b, DataFrame) + + # GH12348 + # raising on odd period + rng = date_range('2013-12-30', '2014-01-07') + index = rng.drop([Timestamp('2014-01-01'), + Timestamp('2013-12-31'), + Timestamp('2014-01-04'), + Timestamp('2014-01-05')]) + df = DataFrame(data=np.arange(len(index)), index=index) + result = df.resample('B').mean() + expected = df.reindex(index=date_range(rng[0], rng[-1], freq='B')) + assert_frame_equal(result, expected) + + def test_resample_ohlc_dataframe(self): + df = ( + DataFrame({ + 'PRICE': { + Timestamp('2011-01-06 10:59:05', tz=None): 24990, + Timestamp('2011-01-06 12:43:33', tz=None): 25499, + Timestamp('2011-01-06 12:54:09', tz=None): 25499}, + 'VOLUME': { + Timestamp('2011-01-06 10:59:05', tz=None): 1500000000, + Timestamp('2011-01-06 12:43:33', tz=None): 5000000000, + Timestamp('2011-01-06 12:54:09', tz=None): 100000000}}) + ).reindex(['VOLUME', 'PRICE'], axis=1) + res = df.resample('H').ohlc() + exp = pd.concat([df['VOLUME'].resample('H').ohlc(), + df['PRICE'].resample('H').ohlc()], + axis=1, + keys=['VOLUME', 'PRICE']) + assert_frame_equal(exp, res) + + df.columns = [['a', 'b'], ['c', 'd']] + res = df.resample('H').ohlc() + exp.columns = pd.MultiIndex.from_tuples([ + ('a', 'c', 'open'), ('a', 'c', 'high'), ('a', 'c', 'low'), + ('a', 'c', 'close'), ('b', 'd', 'open'), ('b', 'd', 'high'), + ('b', 'd', 'low'), ('b', 'd', 'close')]) + assert_frame_equal(exp, res) + + # dupe columns fail atm + # df.columns = ['PRICE', 'PRICE'] + + def test_resample_dup_index(self): + + # GH 4812 + # dup columns with resample raising + df = DataFrame(np.random.randn(4, 12), index=[2000, 2000, 2000, 2000], + columns=[Period(year=2000, month=i + 1, freq='M') + for i in range(12)]) + df.iloc[3, :] = np.nan + result = df.resample('Q', axis=1).mean() + expected = df.groupby(lambda x: int((x.month - 1) / 3), axis=1).mean() + expected.columns = [ + Period(year=2000, quarter=i + 1, freq='Q') for i in range(4)] + assert_frame_equal(result, expected) + + def test_resample_reresample(self): + dti = DatetimeIndex(start=datetime(2005, 1, 1), + end=datetime(2005, 1, 10), freq='D') + s = Series(np.random.rand(len(dti)), dti) + bs = s.resample('B', closed='right', label='right').mean() + result = bs.resample('8H').mean() + assert len(result) == 22 + assert isinstance(result.index.freq, offsets.DateOffset) + assert result.index.freq == offsets.Hour(8) + + def test_resample_timestamp_to_period(self): + ts = _simple_ts('1/1/1990', '1/1/2000') + + result = ts.resample('A-DEC', kind='period').mean() + expected = ts.resample('A-DEC').mean() + expected.index = period_range('1990', '2000', freq='a-dec') + assert_series_equal(result, expected) + + result = ts.resample('A-JUN', kind='period').mean() + expected = ts.resample('A-JUN').mean() + expected.index = period_range('1990', '2000', freq='a-jun') + assert_series_equal(result, expected) + + result = ts.resample('M', kind='period').mean() + expected = ts.resample('M').mean() + expected.index = period_range('1990-01', '2000-01', freq='M') + assert_series_equal(result, expected) + + result = ts.resample('M', kind='period').mean() + expected = ts.resample('M').mean() + expected.index = period_range('1990-01', '2000-01', freq='M') + assert_series_equal(result, expected) + + def test_ohlc_5min(self): + def _ohlc(group): + if isna(group).all(): + return np.repeat(np.nan, 4) + return [group[0], group.max(), group.min(), group[-1]] + + rng = date_range('1/1/2000 00:00:00', '1/1/2000 5:59:50', freq='10s') + ts = Series(np.random.randn(len(rng)), index=rng) + + resampled = ts.resample('5min', closed='right', + label='right').ohlc() + + assert (resampled.loc['1/1/2000 00:00'] == ts[0]).all() + + exp = _ohlc(ts[1:31]) + assert (resampled.loc['1/1/2000 00:05'] == exp).all() + + exp = _ohlc(ts['1/1/2000 5:55:01':]) + assert (resampled.loc['1/1/2000 6:00:00'] == exp).all() + + def test_downsample_non_unique(self): + rng = date_range('1/1/2000', '2/29/2000') + rng2 = rng.repeat(5).values + ts = Series(np.random.randn(len(rng2)), index=rng2) + + result = ts.resample('M').mean() + + expected = ts.groupby(lambda x: x.month).mean() + assert len(result) == 2 + assert_almost_equal(result[0], expected[1]) + assert_almost_equal(result[1], expected[2]) + + def test_asfreq_non_unique(self): + # GH #1077 + rng = date_range('1/1/2000', '2/29/2000') + rng2 = rng.repeat(2).values + ts = Series(np.random.randn(len(rng2)), index=rng2) + + pytest.raises(Exception, ts.asfreq, 'B') + + def test_resample_axis1(self): + rng = date_range('1/1/2000', '2/29/2000') + df = DataFrame(np.random.randn(3, len(rng)), columns=rng, + index=['a', 'b', 'c']) + + result = df.resample('M', axis=1).mean() + expected = df.T.resample('M').mean().T + tm.assert_frame_equal(result, expected) + + def test_resample_panel(self): + rng = date_range('1/1/2000', '6/30/2000') + n = len(rng) + + with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) + panel = Panel(np.random.randn(3, n, 5), + items=['one', 'two', 'three'], + major_axis=rng, + minor_axis=['a', 'b', 'c', 'd', 'e']) + + result = panel.resample('M', axis=1).mean() + + def p_apply(panel, f): + result = {} + for item in panel.items: + result[item] = f(panel[item]) + return Panel(result, items=panel.items) + + expected = p_apply(panel, lambda x: x.resample('M').mean()) + tm.assert_panel_equal(result, expected) + + panel2 = panel.swapaxes(1, 2) + result = panel2.resample('M', axis=2).mean() + expected = p_apply(panel2, + lambda x: x.resample('M', axis=1).mean()) + tm.assert_panel_equal(result, expected) + + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") + def test_resample_panel_numpy(self): + rng = date_range('1/1/2000', '6/30/2000') + n = len(rng) + + with catch_warnings(record=True): + panel = Panel(np.random.randn(3, n, 5), + items=['one', 'two', 'three'], + major_axis=rng, + minor_axis=['a', 'b', 'c', 'd', 'e']) + + result = panel.resample('M', axis=1).apply(lambda x: x.mean(1)) + expected = panel.resample('M', axis=1).mean() + tm.assert_panel_equal(result, expected) + + panel = panel.swapaxes(1, 2) + result = panel.resample('M', axis=2).apply(lambda x: x.mean(2)) + expected = panel.resample('M', axis=2).mean() + tm.assert_panel_equal(result, expected) + + def test_resample_anchored_ticks(self): + # If a fixed delta (5 minute, 4 hour) evenly divides a day, we should + # "anchor" the origin at midnight so we get regular intervals rather + # than starting from the first timestamp which might start in the + # middle of a desired interval + + rng = date_range('1/1/2000 04:00:00', periods=86400, freq='s') + ts = Series(np.random.randn(len(rng)), index=rng) + ts[:2] = np.nan # so results are the same + + freqs = ['t', '5t', '15t', '30t', '4h', '12h'] + for freq in freqs: + result = ts[2:].resample(freq, closed='left', label='left').mean() + expected = ts.resample(freq, closed='left', label='left').mean() + assert_series_equal(result, expected) + + def test_resample_single_group(self): + mysum = lambda x: x.sum() + + rng = date_range('2000-1-1', '2000-2-10', freq='D') + ts = Series(np.random.randn(len(rng)), index=rng) + assert_series_equal(ts.resample('M').sum(), + ts.resample('M').apply(mysum)) + + rng = date_range('2000-1-1', '2000-1-10', freq='D') + ts = Series(np.random.randn(len(rng)), index=rng) + assert_series_equal(ts.resample('M').sum(), + ts.resample('M').apply(mysum)) + + # GH 3849 + s = Series([30.1, 31.6], index=[Timestamp('20070915 15:30:00'), + Timestamp('20070915 15:40:00')]) + expected = Series([0.75], index=[Timestamp('20070915')]) + result = s.resample('D').apply(lambda x: np.std(x)) + assert_series_equal(result, expected) + + def test_resample_base(self): + rng = date_range('1/1/2000 00:00:00', '1/1/2000 02:00', freq='s') + ts = Series(np.random.randn(len(rng)), index=rng) + + resampled = ts.resample('5min', base=2).mean() + exp_rng = date_range('12/31/1999 23:57:00', '1/1/2000 01:57', + freq='5min') + tm.assert_index_equal(resampled.index, exp_rng) + + def test_resample_base_with_timedeltaindex(self): + + # GH 10530 + rng = timedelta_range(start='0s', periods=25, freq='s') + ts = Series(np.random.randn(len(rng)), index=rng) + + with_base = ts.resample('2s', base=5).mean() + without_base = ts.resample('2s').mean() + + exp_without_base = timedelta_range(start='0s', end='25s', freq='2s') + exp_with_base = timedelta_range(start='5s', end='29s', freq='2s') + + tm.assert_index_equal(without_base.index, exp_without_base) + tm.assert_index_equal(with_base.index, exp_with_base) + + def test_resample_categorical_data_with_timedeltaindex(self): + # GH #12169 + df = DataFrame({'Group_obj': 'A'}, + index=pd.to_timedelta(list(range(20)), unit='s')) + df['Group'] = df['Group_obj'].astype('category') + result = df.resample('10s').agg(lambda x: (x.value_counts().index[0])) + expected = DataFrame({'Group_obj': ['A', 'A'], + 'Group': ['A', 'A']}, + index=pd.to_timedelta([0, 10], unit='s')) + expected = expected.reindex(['Group_obj', 'Group'], axis=1) + expected['Group'] = expected['Group_obj'].astype('category') + tm.assert_frame_equal(result, expected) + + def test_resample_daily_anchored(self): + rng = date_range('1/1/2000 0:00:00', periods=10000, freq='T') + ts = Series(np.random.randn(len(rng)), index=rng) + ts[:2] = np.nan # so results are the same + + result = ts[2:].resample('D', closed='left', label='left').mean() + expected = ts.resample('D', closed='left', label='left').mean() + assert_series_equal(result, expected) + + def test_resample_to_period_monthly_buglet(self): + # GH #1259 + + rng = date_range('1/1/2000', '12/31/2000') + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts.resample('M', kind='period').mean() + exp_index = period_range('Jan-2000', 'Dec-2000', freq='M') + tm.assert_index_equal(result.index, exp_index) + + def test_period_with_agg(self): + + # aggregate a period resampler with a lambda + s2 = Series(np.random.randint(0, 5, 50), + index=pd.period_range('2012-01-01', freq='H', periods=50), + dtype='float64') + + expected = s2.to_timestamp().resample('D').mean().to_period() + result = s2.resample('D').agg(lambda x: x.mean()) + assert_series_equal(result, expected) + + def test_resample_segfault(self): + # GH 8573 + # segfaulting in older versions + all_wins_and_wagers = [ + (1, datetime(2013, 10, 1, 16, 20), 1, 0), + (2, datetime(2013, 10, 1, 16, 10), 1, 0), + (2, datetime(2013, 10, 1, 18, 15), 1, 0), + (2, datetime(2013, 10, 1, 16, 10, 31), 1, 0)] + + df = DataFrame.from_records(all_wins_and_wagers, + columns=("ID", "timestamp", "A", "B") + ).set_index("timestamp") + result = df.groupby("ID").resample("5min").sum() + expected = df.groupby("ID").apply(lambda x: x.resample("5min").sum()) + assert_frame_equal(result, expected) + + def test_resample_dtype_preservation(self): + + # GH 12202 + # validation tests for dtype preservation + + df = DataFrame({'date': pd.date_range(start='2016-01-01', + periods=4, freq='W'), + 'group': [1, 1, 2, 2], + 'val': Series([5, 6, 7, 8], + dtype='int32')} + ).set_index('date') + + result = df.resample('1D').ffill() + assert result.val.dtype == np.int32 + + result = df.groupby('group').resample('1D').ffill() + assert result.val.dtype == np.int32 + + def test_resample_dtype_coerceion(self): + + pytest.importorskip('scipy.interpolate') + + # GH 16361 + df = {"a": [1, 3, 1, 4]} + df = DataFrame(df, index=pd.date_range("2017-01-01", "2017-01-04")) + + expected = (df.astype("float64") + .resample("H") + .mean() + ["a"] + .interpolate("cubic") + ) + + result = df.resample("H")["a"].mean().interpolate("cubic") + tm.assert_series_equal(result, expected) + + result = df.resample("H").mean()["a"].interpolate("cubic") + tm.assert_series_equal(result, expected) + + def test_weekly_resample_buglet(self): + # #1327 + rng = date_range('1/1/2000', freq='B', periods=20) + ts = Series(np.random.randn(len(rng)), index=rng) + + resampled = ts.resample('W').mean() + expected = ts.resample('W-SUN').mean() + assert_series_equal(resampled, expected) + + def test_monthly_resample_error(self): + # #1451 + dates = date_range('4/16/2012 20:00', periods=5000, freq='h') + ts = Series(np.random.randn(len(dates)), index=dates) + # it works! + ts.resample('M') + + def test_nanosecond_resample_error(self): + # GH 12307 - Values falls after last bin when + # Resampling using pd.tseries.offsets.Nano as period + start = 1443707890427 + exp_start = 1443707890400 + indx = pd.date_range( + start=pd.to_datetime(start), + periods=10, + freq='100n' + ) + ts = Series(range(len(indx)), index=indx) + r = ts.resample(pd.tseries.offsets.Nano(100)) + result = r.agg('mean') + + exp_indx = pd.date_range( + start=pd.to_datetime(exp_start), + periods=10, + freq='100n' + ) + exp = Series(range(len(exp_indx)), index=exp_indx) + + assert_series_equal(result, exp) + + def test_resample_anchored_intraday(self): + # #1471, #1458 + + rng = date_range('1/1/2012', '4/1/2012', freq='100min') + df = DataFrame(rng.month, index=rng) + + result = df.resample('M').mean() + expected = df.resample( + 'M', kind='period').mean().to_timestamp(how='end') + expected.index += Timedelta(1, 'ns') - Timedelta(1, 'D') + tm.assert_frame_equal(result, expected) + + result = df.resample('M', closed='left').mean() + exp = df.tshift(1, freq='D').resample('M', kind='period').mean() + exp = exp.to_timestamp(how='end') + + exp.index = exp.index + Timedelta(1, 'ns') - Timedelta(1, 'D') + tm.assert_frame_equal(result, exp) + + rng = date_range('1/1/2012', '4/1/2012', freq='100min') + df = DataFrame(rng.month, index=rng) + + result = df.resample('Q').mean() + expected = df.resample( + 'Q', kind='period').mean().to_timestamp(how='end') + expected.index += Timedelta(1, 'ns') - Timedelta(1, 'D') + tm.assert_frame_equal(result, expected) + + result = df.resample('Q', closed='left').mean() + expected = df.tshift(1, freq='D').resample('Q', kind='period', + closed='left').mean() + expected = expected.to_timestamp(how='end') + expected.index += Timedelta(1, 'ns') - Timedelta(1, 'D') + tm.assert_frame_equal(result, expected) + + ts = _simple_ts('2012-04-29 23:00', '2012-04-30 5:00', freq='h') + resampled = ts.resample('M').mean() + assert len(resampled) == 1 + + def test_resample_anchored_monthstart(self): + ts = _simple_ts('1/1/2000', '12/31/2002') + + freqs = ['MS', 'BMS', 'QS-MAR', 'AS-DEC', 'AS-JUN'] + + for freq in freqs: + ts.resample(freq).mean() + + def test_resample_anchored_multiday(self): + # When resampling a range spanning multiple days, ensure that the + # start date gets used to determine the offset. Fixes issue where + # a one day period is not a multiple of the frequency. + # + # See: https://github.com/pandas-dev/pandas/issues/8683 + + index = pd.date_range( + '2014-10-14 23:06:23.206', periods=3, freq='400L' + ) | pd.date_range( + '2014-10-15 23:00:00', periods=2, freq='2200L') + + s = Series(np.random.randn(5), index=index) + + # Ensure left closing works + result = s.resample('2200L').mean() + assert result.index[-1] == Timestamp('2014-10-15 23:00:02.000') + + # Ensure right closing works + result = s.resample('2200L', label='right').mean() + assert result.index[-1] == Timestamp('2014-10-15 23:00:04.200') + + def test_corner_cases(self): + # miscellaneous test coverage + + rng = date_range('1/1/2000', periods=12, freq='t') + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts.resample('5t', closed='right', label='left').mean() + ex_index = date_range('1999-12-31 23:55', periods=4, freq='5t') + tm.assert_index_equal(result.index, ex_index) + + len0pts = _simple_pts('2007-01', '2010-05', freq='M')[:0] + # it works + result = len0pts.resample('A-DEC').mean() + assert len(result) == 0 + + # resample to periods + ts = _simple_ts('2000-04-28', '2000-04-30 11:00', freq='h') + result = ts.resample('M', kind='period').mean() + assert len(result) == 1 + assert result.index[0] == Period('2000-04', freq='M') + + def test_anchored_lowercase_buglet(self): + dates = date_range('4/16/2012 20:00', periods=50000, freq='s') + ts = Series(np.random.randn(len(dates)), index=dates) + # it works! + ts.resample('d').mean() + + def test_upsample_apply_functions(self): + # #1596 + rng = pd.date_range('2012-06-12', periods=4, freq='h') + + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts.resample('20min').aggregate(['mean', 'sum']) + assert isinstance(result, DataFrame) + + def test_resample_not_monotonic(self): + rng = pd.date_range('2012-06-12', periods=200, freq='h') + ts = Series(np.random.randn(len(rng)), index=rng) + + ts = ts.take(np.random.permutation(len(ts))) + + result = ts.resample('D').sum() + exp = ts.sort_index().resample('D').sum() + assert_series_equal(result, exp) + + def test_resample_median_bug_1688(self): + + for dtype in ['int64', 'int32', 'float64', 'float32']: + df = DataFrame([1, 2], index=[datetime(2012, 1, 1, 0, 0, 0), + datetime(2012, 1, 1, 0, 5, 0)], + dtype=dtype) + + result = df.resample("T").apply(lambda x: x.mean()) + exp = df.asfreq('T') + tm.assert_frame_equal(result, exp) + + result = df.resample("T").median() + exp = df.asfreq('T') + tm.assert_frame_equal(result, exp) + + def test_how_lambda_functions(self): + + ts = _simple_ts('1/1/2000', '4/1/2000') + + result = ts.resample('M').apply(lambda x: x.mean()) + exp = ts.resample('M').mean() + tm.assert_series_equal(result, exp) + + foo_exp = ts.resample('M').mean() + foo_exp.name = 'foo' + bar_exp = ts.resample('M').std() + bar_exp.name = 'bar' + + result = ts.resample('M').apply( + [lambda x: x.mean(), lambda x: x.std(ddof=1)]) + result.columns = ['foo', 'bar'] + tm.assert_series_equal(result['foo'], foo_exp) + tm.assert_series_equal(result['bar'], bar_exp) + + # this is a MI Series, so comparing the names of the results + # doesn't make sense + result = ts.resample('M').aggregate({'foo': lambda x: x.mean(), + 'bar': lambda x: x.std(ddof=1)}) + tm.assert_series_equal(result['foo'], foo_exp, check_names=False) + tm.assert_series_equal(result['bar'], bar_exp, check_names=False) + + def test_resample_unequal_times(self): + # #1772 + start = datetime(1999, 3, 1, 5) + # end hour is less than start + end = datetime(2012, 7, 31, 4) + bad_ind = date_range(start, end, freq="30min") + df = DataFrame({'close': 1}, index=bad_ind) + + # it works! + df.resample('AS').sum() + + def test_resample_consistency(self): + + # GH 6418 + # resample with bfill / limit / reindex consistency + + i30 = pd.date_range('2002-02-02', periods=4, freq='30T') + s = Series(np.arange(4.), index=i30) + s[2] = np.NaN + + # Upsample by factor 3 with reindex() and resample() methods: + i10 = pd.date_range(i30[0], i30[-1], freq='10T') + + s10 = s.reindex(index=i10, method='bfill') + s10_2 = s.reindex(index=i10, method='bfill', limit=2) + rl = s.reindex_like(s10, method='bfill', limit=2) + r10_2 = s.resample('10Min').bfill(limit=2) + r10 = s.resample('10Min').bfill() + + # s10_2, r10, r10_2, rl should all be equal + assert_series_equal(s10_2, r10) + assert_series_equal(s10_2, r10_2) + assert_series_equal(s10_2, rl) + + def test_resample_timegrouper(self): + # GH 7227 + dates1 = [datetime(2014, 10, 1), datetime(2014, 9, 3), + datetime(2014, 11, 5), datetime(2014, 9, 5), + datetime(2014, 10, 8), datetime(2014, 7, 15)] + + dates2 = dates1[:2] + [pd.NaT] + dates1[2:4] + [pd.NaT] + dates1[4:] + dates3 = [pd.NaT] + dates1 + [pd.NaT] + + for dates in [dates1, dates2, dates3]: + df = DataFrame(dict(A=dates, B=np.arange(len(dates)))) + result = df.set_index('A').resample('M').count() + exp_idx = pd.DatetimeIndex(['2014-07-31', '2014-08-31', + '2014-09-30', + '2014-10-31', '2014-11-30'], + freq='M', name='A') + expected = DataFrame({'B': [1, 0, 2, 2, 1]}, index=exp_idx) + assert_frame_equal(result, expected) + + result = df.groupby(pd.Grouper(freq='M', key='A')).count() + assert_frame_equal(result, expected) + + df = DataFrame(dict(A=dates, B=np.arange(len(dates)), C=np.arange( + len(dates)))) + result = df.set_index('A').resample('M').count() + expected = DataFrame({'B': [1, 0, 2, 2, 1], 'C': [1, 0, 2, 2, 1]}, + index=exp_idx, columns=['B', 'C']) + assert_frame_equal(result, expected) + + result = df.groupby(pd.Grouper(freq='M', key='A')).count() + assert_frame_equal(result, expected) + + def test_resample_nunique(self): + + # GH 12352 + df = DataFrame({ + 'ID': {Timestamp('2015-06-05 00:00:00'): '0010100903', + Timestamp('2015-06-08 00:00:00'): '0010150847'}, + 'DATE': {Timestamp('2015-06-05 00:00:00'): '2015-06-05', + Timestamp('2015-06-08 00:00:00'): '2015-06-08'}}) + r = df.resample('D') + g = df.groupby(pd.Grouper(freq='D')) + expected = df.groupby(pd.Grouper(freq='D')).ID.apply(lambda x: + x.nunique()) + assert expected.name == 'ID' + + for t in [r, g]: + result = r.ID.nunique() + assert_series_equal(result, expected) + + result = df.ID.resample('D').nunique() + assert_series_equal(result, expected) + + result = df.ID.groupby(pd.Grouper(freq='D')).nunique() + assert_series_equal(result, expected) + + def test_resample_nunique_with_date_gap(self): + # GH 13453 + index = pd.date_range('1-1-2000', '2-15-2000', freq='h') + index2 = pd.date_range('4-15-2000', '5-15-2000', freq='h') + index3 = index.append(index2) + s = Series(range(len(index3)), index=index3, dtype='int64') + r = s.resample('M') + + # Since all elements are unique, these should all be the same + results = [ + r.count(), + r.nunique(), + r.agg(Series.nunique), + r.agg('nunique') + ] + + assert_series_equal(results[0], results[1]) + assert_series_equal(results[0], results[2]) + assert_series_equal(results[0], results[3]) + + @pytest.mark.parametrize('n', [10000, 100000]) + @pytest.mark.parametrize('k', [10, 100, 1000]) + def test_resample_group_info(self, n, k): + # GH10914 + dr = date_range(start='2015-08-27', periods=n // 10, freq='T') + ts = Series(np.random.randint(0, n // k, n).astype('int64'), + index=np.random.choice(dr, n)) + + left = ts.resample('30T').nunique() + ix = date_range(start=ts.index.min(), end=ts.index.max(), + freq='30T') + + vals = ts.values + bins = np.searchsorted(ix.values, ts.index, side='right') + + sorter = np.lexsort((vals, bins)) + vals, bins = vals[sorter], bins[sorter] + + mask = np.r_[True, vals[1:] != vals[:-1]] + mask |= np.r_[True, bins[1:] != bins[:-1]] + + arr = np.bincount(bins[mask] - 1, + minlength=len(ix)).astype('int64', copy=False) + right = Series(arr, index=ix) + + assert_series_equal(left, right) + + def test_resample_size(self): + n = 10000 + dr = date_range('2015-09-19', periods=n, freq='T') + ts = Series(np.random.randn(n), index=np.random.choice(dr, n)) + + left = ts.resample('7T').size() + ix = date_range(start=left.index.min(), end=ts.index.max(), freq='7T') + + bins = np.searchsorted(ix.values, ts.index.values, side='right') + val = np.bincount(bins, minlength=len(ix) + 1)[1:].astype('int64', + copy=False) + + right = Series(val, index=ix) + assert_series_equal(left, right) + + def test_resample_across_dst(self): + # The test resamples a DatetimeIndex with values before and after a + # DST change + # Issue: 14682 + + # The DatetimeIndex we will start with + # (note that DST happens at 03:00+02:00 -> 02:00+01:00) + # 2016-10-30 02:23:00+02:00, 2016-10-30 02:23:00+01:00 + df1 = DataFrame([1477786980, 1477790580], columns=['ts']) + dti1 = DatetimeIndex(pd.to_datetime(df1.ts, unit='s') + .dt.tz_localize('UTC') + .dt.tz_convert('Europe/Madrid')) + + # The expected DatetimeIndex after resampling. + # 2016-10-30 02:00:00+02:00, 2016-10-30 02:00:00+01:00 + df2 = DataFrame([1477785600, 1477789200], columns=['ts']) + dti2 = DatetimeIndex(pd.to_datetime(df2.ts, unit='s') + .dt.tz_localize('UTC') + .dt.tz_convert('Europe/Madrid')) + df = DataFrame([5, 5], index=dti1) + + result = df.resample(rule='H').sum() + expected = DataFrame([5, 5], index=dti2) + + assert_frame_equal(result, expected) + + def test_resample_dst_anchor(self): + # 5172 + dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz='US/Eastern') + df = DataFrame([5], index=dti) + assert_frame_equal(df.resample(rule='CD').sum(), + DataFrame([5], index=df.index.normalize())) + df.resample(rule='MS').sum() + assert_frame_equal( + df.resample(rule='MS').sum(), + DataFrame([5], index=DatetimeIndex([datetime(2012, 11, 1)], + tz='US/Eastern'))) + + dti = date_range('2013-09-30', '2013-11-02', freq='30Min', + tz='Europe/Paris') + values = range(dti.size) + df = DataFrame({"a": values, + "b": values, + "c": values}, index=dti, dtype='int64') + how = {"a": "min", "b": "max", "c": "count"} + + assert_frame_equal( + df.resample("W-MON").agg(how)[["a", "b", "c"]], + DataFrame({"a": [0, 48, 384, 720, 1056, 1394], + "b": [47, 383, 719, 1055, 1393, 1586], + "c": [48, 336, 336, 336, 338, 193]}, + index=date_range('9/30/2013', '11/4/2013', + freq='W-MON', tz='Europe/Paris')), + 'W-MON Frequency') + + assert_frame_equal( + df.resample("2W-MON").agg(how)[["a", "b", "c"]], + DataFrame({"a": [0, 48, 720, 1394], + "b": [47, 719, 1393, 1586], + "c": [48, 672, 674, 193]}, + index=date_range('9/30/2013', '11/11/2013', + freq='2W-MON', tz='Europe/Paris')), + '2W-MON Frequency') + + assert_frame_equal( + df.resample("MS").agg(how)[["a", "b", "c"]], + DataFrame({"a": [0, 48, 1538], + "b": [47, 1537, 1586], + "c": [48, 1490, 49]}, + index=date_range('9/1/2013', '11/1/2013', + freq='MS', tz='Europe/Paris')), + 'MS Frequency') + + assert_frame_equal( + df.resample("2MS").agg(how)[["a", "b", "c"]], + DataFrame({"a": [0, 1538], + "b": [1537, 1586], + "c": [1538, 49]}, + index=date_range('9/1/2013', '11/1/2013', + freq='2MS', tz='Europe/Paris')), + '2MS Frequency') + + df_daily = df['10/26/2013':'10/29/2013'] + assert_frame_equal( + df_daily.resample("CD").agg({"a": "min", "b": "max", "c": "count"}) + [["a", "b", "c"]], + DataFrame({"a": [1248, 1296, 1346, 1394], + "b": [1295, 1345, 1393, 1441], + "c": [48, 50, 48, 48]}, + index=date_range('10/26/2013', '10/29/2013', + freq='CD', tz='Europe/Paris')), + 'CD Frequency') + + def test_downsample_across_dst(self): + # GH 8531 + tz = pytz.timezone('Europe/Berlin') + dt = datetime(2014, 10, 26) + dates = date_range(tz.localize(dt), periods=4, freq='2H') + result = Series(5, index=dates).resample('H').mean() + expected = Series([5., np.nan] * 3 + [5.], + index=date_range(tz.localize(dt), periods=7, + freq='H')) + tm.assert_series_equal(result, expected) + + def test_downsample_across_dst_weekly(self): + # GH 9119, GH 21459 + df = DataFrame(index=DatetimeIndex([ + '2017-03-25', '2017-03-26', '2017-03-27', + '2017-03-28', '2017-03-29' + ], tz='Europe/Amsterdam'), + data=[11, 12, 13, 14, 15]) + result = df.resample('1W').sum() + expected = DataFrame([23, 42], index=pd.DatetimeIndex([ + '2017-03-26', '2017-04-02' + ], tz='Europe/Amsterdam')) + tm.assert_frame_equal(result, expected) + + idx = pd.date_range("2013-04-01", "2013-05-01", tz='Europe/London', + freq='H') + s = Series(index=idx) + result = s.resample('W').mean() + expected = Series(index=pd.date_range( + '2013-04-07', freq='W', periods=5, tz='Europe/London' + )) + tm.assert_series_equal(result, expected) + + def test_resample_with_nat(self): + # GH 13020 + index = DatetimeIndex([pd.NaT, + '1970-01-01 00:00:00', + pd.NaT, + '1970-01-01 00:00:01', + '1970-01-01 00:00:02']) + frame = DataFrame([2, 3, 5, 7, 11], index=index) + + index_1s = DatetimeIndex(['1970-01-01 00:00:00', + '1970-01-01 00:00:01', + '1970-01-01 00:00:02']) + frame_1s = DataFrame([3, 7, 11], index=index_1s) + assert_frame_equal(frame.resample('1s').mean(), frame_1s) + + index_2s = DatetimeIndex(['1970-01-01 00:00:00', + '1970-01-01 00:00:02']) + frame_2s = DataFrame([5, 11], index=index_2s) + assert_frame_equal(frame.resample('2s').mean(), frame_2s) + + index_3s = DatetimeIndex(['1970-01-01 00:00:00']) + frame_3s = DataFrame([7], index=index_3s) + assert_frame_equal(frame.resample('3s').mean(), frame_3s) + + assert_frame_equal(frame.resample('60s').mean(), frame_3s) + + def test_resample_timedelta_values(self): + # GH 13119 + # check that timedelta dtype is preserved when NaT values are + # introduced by the resampling + + times = timedelta_range('1 day', '4 day', freq='4D') + df = DataFrame({'time': times}, index=times) + + times2 = timedelta_range('1 day', '4 day', freq='2D') + exp = Series(times2, index=times2, name='time') + exp.iloc[1] = pd.NaT + + res = df.resample('2D').first()['time'] + tm.assert_series_equal(res, exp) + res = df['time'].resample('2D').first() + tm.assert_series_equal(res, exp) + + def test_resample_datetime_values(self): + # GH 13119 + # check that datetime dtype is preserved when NaT values are + # introduced by the resampling + + dates = [datetime(2016, 1, 15), datetime(2016, 1, 19)] + df = DataFrame({'timestamp': dates}, index=dates) + + exp = Series([datetime(2016, 1, 15), pd.NaT, datetime(2016, 1, 19)], + index=date_range('2016-01-15', periods=3, freq='2D'), + name='timestamp') + + res = df.resample('2D').first()['timestamp'] + tm.assert_series_equal(res, exp) + res = df['timestamp'].resample('2D').first() + tm.assert_series_equal(res, exp) + + def test_resample_apply_with_additional_args(self): + # GH 14615 + def f(data, add_arg): + return np.mean(data) * add_arg + + multiplier = 10 + result = self.series.resample('D').apply(f, multiplier) + expected = self.series.resample('D').mean().multiply(multiplier) + tm.assert_series_equal(result, expected) + + # Testing as kwarg + result = self.series.resample('D').apply(f, add_arg=multiplier) + expected = self.series.resample('D').mean().multiply(multiplier) + tm.assert_series_equal(result, expected) + + # Testing dataframe + df = pd.DataFrame({"A": 1, "B": 2}, + index=pd.date_range('2017', periods=10)) + result = df.groupby("A").resample("D").agg(f, multiplier) + expected = df.groupby("A").resample('D').mean().multiply(multiplier) + assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_resample.py b/pandas/tests/resample/test_resample.py index 96e25957fa2d3..1be36ee4b8c7b 100644 --- a/pandas/tests/resample/test_resample.py +++ b/pandas/tests/resample/test_resample.py @@ -1,31 +1,26 @@ # pylint: disable=E1101 from datetime import datetime, timedelta -from functools import partial -from warnings import catch_warnings, simplefilter import numpy as np import pytest -import pytz from pandas.compat import OrderedDict, range, zip -from pandas.errors import AbstractMethodError, UnsupportedFunctionCall +from pandas.errors import AbstractMethodError import pandas as pd -from pandas import ( - DataFrame, Index, Panel, Series, Timedelta, Timestamp, isna, notna) +from pandas import DataFrame, Series from pandas.core.groupby.groupby import DataError from pandas.core.indexes.datetimes import date_range -from pandas.core.indexes.period import Period, PeriodIndex, period_range -from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range +from pandas.core.indexes.period import PeriodIndex, period_range +from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.resample import DatetimeIndex, TimeGrouper import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_index_equal, assert_series_equal) -import pandas.tseries.offsets as offsets -from pandas.tseries.offsets import BDay, Minute +from pandas.tseries.offsets import BDay bday = BDay() @@ -775,1436 +770,3 @@ def test_resample_quantile(self): result = s.resample(freq).quantile(q) expected = s.resample(freq).agg(lambda x: x.quantile(q)) tm.assert_series_equal(result, expected) - - -class TestDatetimeIndex(Base): - _index_factory = lambda x: date_range - - @pytest.fixture - def _series_name(self): - return 'dti' - - def setup_method(self, method): - dti = DatetimeIndex(start=datetime(2005, 1, 1), - end=datetime(2005, 1, 10), freq='Min') - - self.series = Series(np.random.rand(len(dti)), dti) - - def create_series(self): - i = date_range(datetime(2005, 1, 1), - datetime(2005, 1, 10), freq='D') - - return Series(np.arange(len(i)), index=i, name='dti') - - def test_custom_grouper(self): - - dti = DatetimeIndex(freq='Min', start=datetime(2005, 1, 1), - end=datetime(2005, 1, 10)) - - s = Series(np.array([1] * len(dti)), index=dti, dtype='int64') - - b = TimeGrouper(Minute(5)) - g = s.groupby(b) - - # check all cython functions work - funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var'] - for f in funcs: - g._cython_agg_general(f) - - b = TimeGrouper(Minute(5), closed='right', label='right') - g = s.groupby(b) - # check all cython functions work - funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var'] - for f in funcs: - g._cython_agg_general(f) - - assert g.ngroups == 2593 - assert notna(g.mean()).all() - - # construct expected val - arr = [1] + [5] * 2592 - idx = dti[0:-1:5] - idx = idx.append(dti[-1:]) - expect = Series(arr, index=idx) - - # GH2763 - return in put dtype if we can - result = g.agg(np.sum) - assert_series_equal(result, expect) - - df = DataFrame(np.random.rand(len(dti), 10), - index=dti, dtype='float64') - r = df.groupby(b).agg(np.sum) - - assert len(r.columns) == 10 - assert len(r.index) == 2593 - - def test_resample_basic(self): - rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min', - name='index') - s = Series(np.random.randn(14), index=rng) - - result = s.resample('5min', closed='right', label='right').mean() - - exp_idx = date_range('1/1/2000', periods=4, freq='5min', name='index') - expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], - index=exp_idx) - assert_series_equal(result, expected) - assert result.index.name == 'index' - - result = s.resample('5min', closed='left', label='right').mean() - - exp_idx = date_range('1/1/2000 00:05', periods=3, freq='5min', - name='index') - expected = Series([s[:5].mean(), s[5:10].mean(), - s[10:].mean()], index=exp_idx) - assert_series_equal(result, expected) - - s = self.series - result = s.resample('5Min').last() - grouper = TimeGrouper(Minute(5), closed='left', label='left') - expect = s.groupby(grouper).agg(lambda x: x[-1]) - assert_series_equal(result, expect) - - def test_resample_string_kwargs(self): - # Test for issue #19303 - rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min', - name='index') - s = Series(np.random.randn(14), index=rng) - - # Check that wrong keyword argument strings raise an error - with pytest.raises(ValueError): - s.resample('5min', label='righttt').mean() - with pytest.raises(ValueError): - s.resample('5min', closed='righttt').mean() - with pytest.raises(ValueError): - s.resample('5min', convention='starttt').mean() - - def test_resample_how(self): - rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min', - name='index') - s = Series(np.random.randn(14), index=rng) - grouplist = np.ones_like(s) - grouplist[0] = 0 - grouplist[1:6] = 1 - grouplist[6:11] = 2 - grouplist[11:] = 3 - args = downsample_methods - - def _ohlc(group): - if isna(group).all(): - return np.repeat(np.nan, 4) - return [group[0], group.max(), group.min(), group[-1]] - - inds = date_range('1/1/2000', periods=4, freq='5min', name='index') - - for arg in args: - if arg == 'ohlc': - func = _ohlc - else: - func = arg - try: - result = getattr(s.resample( - '5min', closed='right', label='right'), arg)() - - expected = s.groupby(grouplist).agg(func) - assert result.index.name == 'index' - if arg == 'ohlc': - expected = DataFrame(expected.values.tolist()) - expected.columns = ['open', 'high', 'low', 'close'] - expected.index = Index(inds, name='index') - assert_frame_equal(result, expected) - else: - expected.index = inds - assert_series_equal(result, expected) - except BaseException as exc: - - exc.args += ('how=%s' % arg,) - raise - - def test_numpy_compat(self): - # see gh-12811 - s = Series([1, 2, 3, 4, 5], index=date_range( - '20130101', periods=5, freq='s')) - r = s.resample('2s') - - msg = "numpy operations are not valid with resample" - - for func in ('min', 'max', 'sum', 'prod', - 'mean', 'var', 'std'): - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(r, func)(func, 1, 2, 3) - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(r, func)(axis=1) - - def test_resample_how_callables(self): - # GH 7929 - data = np.arange(5, dtype=np.int64) - ind = pd.DatetimeIndex(start='2014-01-01', periods=len(data), freq='d') - df = DataFrame({"A": data, "B": data}, index=ind) - - def fn(x, a=1): - return str(type(x)) - - class FnClass(object): - - def __call__(self, x): - return str(type(x)) - - df_standard = df.resample("M").apply(fn) - df_lambda = df.resample("M").apply(lambda x: str(type(x))) - df_partial = df.resample("M").apply(partial(fn)) - df_partial2 = df.resample("M").apply(partial(fn, a=2)) - df_class = df.resample("M").apply(FnClass()) - - assert_frame_equal(df_standard, df_lambda) - assert_frame_equal(df_standard, df_partial) - assert_frame_equal(df_standard, df_partial2) - assert_frame_equal(df_standard, df_class) - - def test_resample_with_timedeltas(self): - - expected = DataFrame({'A': np.arange(1480)}) - expected = expected.groupby(expected.index // 30).sum() - expected.index = pd.timedelta_range('0 days', freq='30T', periods=50) - - df = DataFrame({'A': np.arange(1480)}, index=pd.to_timedelta( - np.arange(1480), unit='T')) - result = df.resample('30T').sum() - - assert_frame_equal(result, expected) - - s = df['A'] - result = s.resample('30T').sum() - assert_series_equal(result, expected['A']) - - def test_resample_single_period_timedelta(self): - - s = Series(list(range(5)), index=pd.timedelta_range( - '1 day', freq='s', periods=5)) - result = s.resample('2s').sum() - expected = Series([1, 5, 4], index=pd.timedelta_range( - '1 day', freq='2s', periods=3)) - assert_series_equal(result, expected) - - def test_resample_timedelta_idempotency(self): - - # GH 12072 - index = pd.timedelta_range('0', periods=9, freq='10L') - series = Series(range(9), index=index) - result = series.resample('10L').mean() - expected = series - assert_series_equal(result, expected) - - def test_resample_rounding(self): - # GH 8371 - # odd results when rounding is needed - - data = """date,time,value -11-08-2014,00:00:01.093,1 -11-08-2014,00:00:02.159,1 -11-08-2014,00:00:02.667,1 -11-08-2014,00:00:03.175,1 -11-08-2014,00:00:07.058,1 -11-08-2014,00:00:07.362,1 -11-08-2014,00:00:08.324,1 -11-08-2014,00:00:08.830,1 -11-08-2014,00:00:08.982,1 -11-08-2014,00:00:09.815,1 -11-08-2014,00:00:10.540,1 -11-08-2014,00:00:11.061,1 -11-08-2014,00:00:11.617,1 -11-08-2014,00:00:13.607,1 -11-08-2014,00:00:14.535,1 -11-08-2014,00:00:15.525,1 -11-08-2014,00:00:17.960,1 -11-08-2014,00:00:20.674,1 -11-08-2014,00:00:21.191,1""" - - from pandas.compat import StringIO - df = pd.read_csv(StringIO(data), parse_dates={'timestamp': [ - 'date', 'time']}, index_col='timestamp') - df.index.name = None - result = df.resample('6s').sum() - expected = DataFrame({'value': [ - 4, 9, 4, 2 - ]}, index=date_range('2014-11-08', freq='6s', periods=4)) - assert_frame_equal(result, expected) - - result = df.resample('7s').sum() - expected = DataFrame({'value': [ - 4, 10, 4, 1 - ]}, index=date_range('2014-11-08', freq='7s', periods=4)) - assert_frame_equal(result, expected) - - result = df.resample('11s').sum() - expected = DataFrame({'value': [ - 11, 8 - ]}, index=date_range('2014-11-08', freq='11s', periods=2)) - assert_frame_equal(result, expected) - - result = df.resample('13s').sum() - expected = DataFrame({'value': [ - 13, 6 - ]}, index=date_range('2014-11-08', freq='13s', periods=2)) - assert_frame_equal(result, expected) - - result = df.resample('17s').sum() - expected = DataFrame({'value': [ - 16, 3 - ]}, index=date_range('2014-11-08', freq='17s', periods=2)) - assert_frame_equal(result, expected) - - def test_resample_basic_from_daily(self): - # from daily - dti = DatetimeIndex(start=datetime(2005, 1, 1), - end=datetime(2005, 1, 10), freq='D', name='index') - - s = Series(np.random.rand(len(dti)), dti) - - # to weekly - result = s.resample('w-sun').last() - - assert len(result) == 3 - assert (result.index.dayofweek == [6, 6, 6]).all() - assert result.iloc[0] == s['1/2/2005'] - assert result.iloc[1] == s['1/9/2005'] - assert result.iloc[2] == s.iloc[-1] - - result = s.resample('W-MON').last() - assert len(result) == 2 - assert (result.index.dayofweek == [0, 0]).all() - assert result.iloc[0] == s['1/3/2005'] - assert result.iloc[1] == s['1/10/2005'] - - result = s.resample('W-TUE').last() - assert len(result) == 2 - assert (result.index.dayofweek == [1, 1]).all() - assert result.iloc[0] == s['1/4/2005'] - assert result.iloc[1] == s['1/10/2005'] - - result = s.resample('W-WED').last() - assert len(result) == 2 - assert (result.index.dayofweek == [2, 2]).all() - assert result.iloc[0] == s['1/5/2005'] - assert result.iloc[1] == s['1/10/2005'] - - result = s.resample('W-THU').last() - assert len(result) == 2 - assert (result.index.dayofweek == [3, 3]).all() - assert result.iloc[0] == s['1/6/2005'] - assert result.iloc[1] == s['1/10/2005'] - - result = s.resample('W-FRI').last() - assert len(result) == 2 - assert (result.index.dayofweek == [4, 4]).all() - assert result.iloc[0] == s['1/7/2005'] - assert result.iloc[1] == s['1/10/2005'] - - # to biz day - result = s.resample('B').last() - assert len(result) == 7 - assert (result.index.dayofweek == [4, 0, 1, 2, 3, 4, 0]).all() - - assert result.iloc[0] == s['1/2/2005'] - assert result.iloc[1] == s['1/3/2005'] - assert result.iloc[5] == s['1/9/2005'] - assert result.index.name == 'index' - - def test_resample_upsampling_picked_but_not_correct(self): - - # Test for issue #3020 - dates = date_range('01-Jan-2014', '05-Jan-2014', freq='D') - series = Series(1, index=dates) - - result = series.resample('D').mean() - assert result.index[0] == dates[0] - - # GH 5955 - # incorrect deciding to upsample when the axis frequency matches the - # resample frequency - - import datetime - s = Series(np.arange(1., 6), index=[datetime.datetime( - 1975, 1, i, 12, 0) for i in range(1, 6)]) - expected = Series(np.arange(1., 6), index=date_range( - '19750101', periods=5, freq='D')) - - result = s.resample('D').count() - assert_series_equal(result, Series(1, index=expected.index)) - - result1 = s.resample('D').sum() - result2 = s.resample('D').mean() - assert_series_equal(result1, expected) - assert_series_equal(result2, expected) - - def test_resample_frame_basic(self): - df = tm.makeTimeDataFrame() - - b = TimeGrouper('M') - g = df.groupby(b) - - # check all cython functions work - funcs = ['add', 'mean', 'prod', 'min', 'max', 'var'] - for f in funcs: - g._cython_agg_general(f) - - result = df.resample('A').mean() - assert_series_equal(result['A'], df['A'].resample('A').mean()) - - result = df.resample('M').mean() - assert_series_equal(result['A'], df['A'].resample('M').mean()) - - df.resample('M', kind='period').mean() - df.resample('W-WED', kind='period').mean() - - @pytest.mark.parametrize('loffset', [timedelta(minutes=1), - '1min', Minute(1), - np.timedelta64(1, 'm')]) - def test_resample_loffset(self, loffset): - # GH 7687 - rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min') - s = Series(np.random.randn(14), index=rng) - - result = s.resample('5min', closed='right', label='right', - loffset=loffset).mean() - idx = date_range('1/1/2000', periods=4, freq='5min') - expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], - index=idx + timedelta(minutes=1)) - assert_series_equal(result, expected) - assert result.index.freq == Minute(5) - - # from daily - dti = DatetimeIndex(start=datetime(2005, 1, 1), - end=datetime(2005, 1, 10), freq='D') - ser = Series(np.random.rand(len(dti)), dti) - - # to weekly - result = ser.resample('w-sun').last() - expected = ser.resample('w-sun', loffset=-bday).last() - assert result.index[0] - bday == expected.index[0] - - def test_resample_loffset_upsample(self): - # GH 20744 - rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min') - s = Series(np.random.randn(14), index=rng) - - result = s.resample('5min', closed='right', label='right', - loffset=timedelta(minutes=1)).ffill() - idx = date_range('1/1/2000', periods=4, freq='5min') - expected = Series([s[0], s[5], s[10], s[-1]], - index=idx + timedelta(minutes=1)) - - assert_series_equal(result, expected) - - def test_resample_loffset_count(self): - # GH 12725 - start_time = '1/1/2000 00:00:00' - rng = date_range(start_time, periods=100, freq='S') - ts = Series(np.random.randn(len(rng)), index=rng) - - result = ts.resample('10S', loffset='1s').count() - - expected_index = ( - date_range(start_time, periods=10, freq='10S') + - timedelta(seconds=1) - ) - expected = Series(10, index=expected_index) - - assert_series_equal(result, expected) - - # Same issue should apply to .size() since it goes through - # same code path - result = ts.resample('10S', loffset='1s').size() - - assert_series_equal(result, expected) - - def test_resample_upsample(self): - # from daily - dti = DatetimeIndex(start=datetime(2005, 1, 1), - end=datetime(2005, 1, 10), freq='D', name='index') - - s = Series(np.random.rand(len(dti)), dti) - - # to minutely, by padding - result = s.resample('Min').pad() - assert len(result) == 12961 - assert result[0] == s[0] - assert result[-1] == s[-1] - - assert result.index.name == 'index' - - def test_resample_how_method(self): - # GH9915 - s = Series([11, 22], - index=[Timestamp('2015-03-31 21:48:52.672000'), - Timestamp('2015-03-31 21:49:52.739000')]) - expected = Series([11, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, 22], - index=[Timestamp('2015-03-31 21:48:50'), - Timestamp('2015-03-31 21:49:00'), - Timestamp('2015-03-31 21:49:10'), - Timestamp('2015-03-31 21:49:20'), - Timestamp('2015-03-31 21:49:30'), - Timestamp('2015-03-31 21:49:40'), - Timestamp('2015-03-31 21:49:50')]) - assert_series_equal(s.resample("10S").mean(), expected) - - def test_resample_extra_index_point(self): - # GH 9756 - index = DatetimeIndex(start='20150101', end='20150331', freq='BM') - expected = DataFrame({'A': Series([21, 41, 63], index=index)}) - - index = DatetimeIndex(start='20150101', end='20150331', freq='B') - df = DataFrame( - {'A': Series(range(len(index)), index=index)}, dtype='int64') - result = df.resample('BM').last() - assert_frame_equal(result, expected) - - def test_upsample_with_limit(self): - rng = date_range('1/1/2000', periods=3, freq='5t') - ts = Series(np.random.randn(len(rng)), rng) - - result = ts.resample('t').ffill(limit=2) - expected = ts.reindex(result.index, method='ffill', limit=2) - assert_series_equal(result, expected) - - def test_nearest_upsample_with_limit(self): - rng = date_range('1/1/2000', periods=3, freq='5t') - ts = Series(np.random.randn(len(rng)), rng) - - result = ts.resample('t').nearest(limit=2) - expected = ts.reindex(result.index, method='nearest', limit=2) - assert_series_equal(result, expected) - - def test_resample_ohlc(self): - s = self.series - - grouper = TimeGrouper(Minute(5)) - expect = s.groupby(grouper).agg(lambda x: x[-1]) - result = s.resample('5Min').ohlc() - - assert len(result) == len(expect) - assert len(result.columns) == 4 - - xs = result.iloc[-2] - assert xs['open'] == s[-6] - assert xs['high'] == s[-6:-1].max() - assert xs['low'] == s[-6:-1].min() - assert xs['close'] == s[-2] - - xs = result.iloc[0] - assert xs['open'] == s[0] - assert xs['high'] == s[:5].max() - assert xs['low'] == s[:5].min() - assert xs['close'] == s[4] - - def test_resample_ohlc_result(self): - - # GH 12332 - index = pd.date_range('1-1-2000', '2-15-2000', freq='h') - index = index.union(pd.date_range('4-15-2000', '5-15-2000', freq='h')) - s = Series(range(len(index)), index=index) - - a = s.loc[:'4-15-2000'].resample('30T').ohlc() - assert isinstance(a, DataFrame) - - b = s.loc[:'4-14-2000'].resample('30T').ohlc() - assert isinstance(b, DataFrame) - - # GH12348 - # raising on odd period - rng = date_range('2013-12-30', '2014-01-07') - index = rng.drop([Timestamp('2014-01-01'), - Timestamp('2013-12-31'), - Timestamp('2014-01-04'), - Timestamp('2014-01-05')]) - df = DataFrame(data=np.arange(len(index)), index=index) - result = df.resample('B').mean() - expected = df.reindex(index=date_range(rng[0], rng[-1], freq='B')) - assert_frame_equal(result, expected) - - def test_resample_ohlc_dataframe(self): - df = ( - DataFrame({ - 'PRICE': { - Timestamp('2011-01-06 10:59:05', tz=None): 24990, - Timestamp('2011-01-06 12:43:33', tz=None): 25499, - Timestamp('2011-01-06 12:54:09', tz=None): 25499}, - 'VOLUME': { - Timestamp('2011-01-06 10:59:05', tz=None): 1500000000, - Timestamp('2011-01-06 12:43:33', tz=None): 5000000000, - Timestamp('2011-01-06 12:54:09', tz=None): 100000000}}) - ).reindex(['VOLUME', 'PRICE'], axis=1) - res = df.resample('H').ohlc() - exp = pd.concat([df['VOLUME'].resample('H').ohlc(), - df['PRICE'].resample('H').ohlc()], - axis=1, - keys=['VOLUME', 'PRICE']) - assert_frame_equal(exp, res) - - df.columns = [['a', 'b'], ['c', 'd']] - res = df.resample('H').ohlc() - exp.columns = pd.MultiIndex.from_tuples([ - ('a', 'c', 'open'), ('a', 'c', 'high'), ('a', 'c', 'low'), - ('a', 'c', 'close'), ('b', 'd', 'open'), ('b', 'd', 'high'), - ('b', 'd', 'low'), ('b', 'd', 'close')]) - assert_frame_equal(exp, res) - - # dupe columns fail atm - # df.columns = ['PRICE', 'PRICE'] - - def test_resample_dup_index(self): - - # GH 4812 - # dup columns with resample raising - df = DataFrame(np.random.randn(4, 12), index=[2000, 2000, 2000, 2000], - columns=[Period(year=2000, month=i + 1, freq='M') - for i in range(12)]) - df.iloc[3, :] = np.nan - result = df.resample('Q', axis=1).mean() - expected = df.groupby(lambda x: int((x.month - 1) / 3), axis=1).mean() - expected.columns = [ - Period(year=2000, quarter=i + 1, freq='Q') for i in range(4)] - assert_frame_equal(result, expected) - - def test_resample_reresample(self): - dti = DatetimeIndex(start=datetime(2005, 1, 1), - end=datetime(2005, 1, 10), freq='D') - s = Series(np.random.rand(len(dti)), dti) - bs = s.resample('B', closed='right', label='right').mean() - result = bs.resample('8H').mean() - assert len(result) == 22 - assert isinstance(result.index.freq, offsets.DateOffset) - assert result.index.freq == offsets.Hour(8) - - def test_resample_timestamp_to_period(self): - ts = _simple_ts('1/1/1990', '1/1/2000') - - result = ts.resample('A-DEC', kind='period').mean() - expected = ts.resample('A-DEC').mean() - expected.index = period_range('1990', '2000', freq='a-dec') - assert_series_equal(result, expected) - - result = ts.resample('A-JUN', kind='period').mean() - expected = ts.resample('A-JUN').mean() - expected.index = period_range('1990', '2000', freq='a-jun') - assert_series_equal(result, expected) - - result = ts.resample('M', kind='period').mean() - expected = ts.resample('M').mean() - expected.index = period_range('1990-01', '2000-01', freq='M') - assert_series_equal(result, expected) - - result = ts.resample('M', kind='period').mean() - expected = ts.resample('M').mean() - expected.index = period_range('1990-01', '2000-01', freq='M') - assert_series_equal(result, expected) - - def test_ohlc_5min(self): - def _ohlc(group): - if isna(group).all(): - return np.repeat(np.nan, 4) - return [group[0], group.max(), group.min(), group[-1]] - - rng = date_range('1/1/2000 00:00:00', '1/1/2000 5:59:50', freq='10s') - ts = Series(np.random.randn(len(rng)), index=rng) - - resampled = ts.resample('5min', closed='right', - label='right').ohlc() - - assert (resampled.loc['1/1/2000 00:00'] == ts[0]).all() - - exp = _ohlc(ts[1:31]) - assert (resampled.loc['1/1/2000 00:05'] == exp).all() - - exp = _ohlc(ts['1/1/2000 5:55:01':]) - assert (resampled.loc['1/1/2000 6:00:00'] == exp).all() - - def test_downsample_non_unique(self): - rng = date_range('1/1/2000', '2/29/2000') - rng2 = rng.repeat(5).values - ts = Series(np.random.randn(len(rng2)), index=rng2) - - result = ts.resample('M').mean() - - expected = ts.groupby(lambda x: x.month).mean() - assert len(result) == 2 - assert_almost_equal(result[0], expected[1]) - assert_almost_equal(result[1], expected[2]) - - def test_asfreq_non_unique(self): - # GH #1077 - rng = date_range('1/1/2000', '2/29/2000') - rng2 = rng.repeat(2).values - ts = Series(np.random.randn(len(rng2)), index=rng2) - - pytest.raises(Exception, ts.asfreq, 'B') - - def test_resample_axis1(self): - rng = date_range('1/1/2000', '2/29/2000') - df = DataFrame(np.random.randn(3, len(rng)), columns=rng, - index=['a', 'b', 'c']) - - result = df.resample('M', axis=1).mean() - expected = df.T.resample('M').mean().T - tm.assert_frame_equal(result, expected) - - def test_resample_panel(self): - rng = date_range('1/1/2000', '6/30/2000') - n = len(rng) - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - panel = Panel(np.random.randn(3, n, 5), - items=['one', 'two', 'three'], - major_axis=rng, - minor_axis=['a', 'b', 'c', 'd', 'e']) - - result = panel.resample('M', axis=1).mean() - - def p_apply(panel, f): - result = {} - for item in panel.items: - result[item] = f(panel[item]) - return Panel(result, items=panel.items) - - expected = p_apply(panel, lambda x: x.resample('M').mean()) - tm.assert_panel_equal(result, expected) - - panel2 = panel.swapaxes(1, 2) - result = panel2.resample('M', axis=2).mean() - expected = p_apply(panel2, - lambda x: x.resample('M', axis=1).mean()) - tm.assert_panel_equal(result, expected) - - @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") - def test_resample_panel_numpy(self): - rng = date_range('1/1/2000', '6/30/2000') - n = len(rng) - - with catch_warnings(record=True): - panel = Panel(np.random.randn(3, n, 5), - items=['one', 'two', 'three'], - major_axis=rng, - minor_axis=['a', 'b', 'c', 'd', 'e']) - - result = panel.resample('M', axis=1).apply(lambda x: x.mean(1)) - expected = panel.resample('M', axis=1).mean() - tm.assert_panel_equal(result, expected) - - panel = panel.swapaxes(1, 2) - result = panel.resample('M', axis=2).apply(lambda x: x.mean(2)) - expected = panel.resample('M', axis=2).mean() - tm.assert_panel_equal(result, expected) - - def test_resample_anchored_ticks(self): - # If a fixed delta (5 minute, 4 hour) evenly divides a day, we should - # "anchor" the origin at midnight so we get regular intervals rather - # than starting from the first timestamp which might start in the - # middle of a desired interval - - rng = date_range('1/1/2000 04:00:00', periods=86400, freq='s') - ts = Series(np.random.randn(len(rng)), index=rng) - ts[:2] = np.nan # so results are the same - - freqs = ['t', '5t', '15t', '30t', '4h', '12h'] - for freq in freqs: - result = ts[2:].resample(freq, closed='left', label='left').mean() - expected = ts.resample(freq, closed='left', label='left').mean() - assert_series_equal(result, expected) - - def test_resample_single_group(self): - mysum = lambda x: x.sum() - - rng = date_range('2000-1-1', '2000-2-10', freq='D') - ts = Series(np.random.randn(len(rng)), index=rng) - assert_series_equal(ts.resample('M').sum(), - ts.resample('M').apply(mysum)) - - rng = date_range('2000-1-1', '2000-1-10', freq='D') - ts = Series(np.random.randn(len(rng)), index=rng) - assert_series_equal(ts.resample('M').sum(), - ts.resample('M').apply(mysum)) - - # GH 3849 - s = Series([30.1, 31.6], index=[Timestamp('20070915 15:30:00'), - Timestamp('20070915 15:40:00')]) - expected = Series([0.75], index=[Timestamp('20070915')]) - result = s.resample('D').apply(lambda x: np.std(x)) - assert_series_equal(result, expected) - - def test_resample_base(self): - rng = date_range('1/1/2000 00:00:00', '1/1/2000 02:00', freq='s') - ts = Series(np.random.randn(len(rng)), index=rng) - - resampled = ts.resample('5min', base=2).mean() - exp_rng = date_range('12/31/1999 23:57:00', '1/1/2000 01:57', - freq='5min') - tm.assert_index_equal(resampled.index, exp_rng) - - def test_resample_base_with_timedeltaindex(self): - - # GH 10530 - rng = timedelta_range(start='0s', periods=25, freq='s') - ts = Series(np.random.randn(len(rng)), index=rng) - - with_base = ts.resample('2s', base=5).mean() - without_base = ts.resample('2s').mean() - - exp_without_base = timedelta_range(start='0s', end='25s', freq='2s') - exp_with_base = timedelta_range(start='5s', end='29s', freq='2s') - - tm.assert_index_equal(without_base.index, exp_without_base) - tm.assert_index_equal(with_base.index, exp_with_base) - - def test_resample_categorical_data_with_timedeltaindex(self): - # GH #12169 - df = DataFrame({'Group_obj': 'A'}, - index=pd.to_timedelta(list(range(20)), unit='s')) - df['Group'] = df['Group_obj'].astype('category') - result = df.resample('10s').agg(lambda x: (x.value_counts().index[0])) - expected = DataFrame({'Group_obj': ['A', 'A'], - 'Group': ['A', 'A']}, - index=pd.to_timedelta([0, 10], unit='s')) - expected = expected.reindex(['Group_obj', 'Group'], axis=1) - expected['Group'] = expected['Group_obj'].astype('category') - tm.assert_frame_equal(result, expected) - - def test_resample_daily_anchored(self): - rng = date_range('1/1/2000 0:00:00', periods=10000, freq='T') - ts = Series(np.random.randn(len(rng)), index=rng) - ts[:2] = np.nan # so results are the same - - result = ts[2:].resample('D', closed='left', label='left').mean() - expected = ts.resample('D', closed='left', label='left').mean() - assert_series_equal(result, expected) - - def test_resample_to_period_monthly_buglet(self): - # GH #1259 - - rng = date_range('1/1/2000', '12/31/2000') - ts = Series(np.random.randn(len(rng)), index=rng) - - result = ts.resample('M', kind='period').mean() - exp_index = period_range('Jan-2000', 'Dec-2000', freq='M') - tm.assert_index_equal(result.index, exp_index) - - def test_period_with_agg(self): - - # aggregate a period resampler with a lambda - s2 = Series(np.random.randint(0, 5, 50), - index=pd.period_range('2012-01-01', freq='H', periods=50), - dtype='float64') - - expected = s2.to_timestamp().resample('D').mean().to_period() - result = s2.resample('D').agg(lambda x: x.mean()) - assert_series_equal(result, expected) - - def test_resample_segfault(self): - # GH 8573 - # segfaulting in older versions - all_wins_and_wagers = [ - (1, datetime(2013, 10, 1, 16, 20), 1, 0), - (2, datetime(2013, 10, 1, 16, 10), 1, 0), - (2, datetime(2013, 10, 1, 18, 15), 1, 0), - (2, datetime(2013, 10, 1, 16, 10, 31), 1, 0)] - - df = DataFrame.from_records(all_wins_and_wagers, - columns=("ID", "timestamp", "A", "B") - ).set_index("timestamp") - result = df.groupby("ID").resample("5min").sum() - expected = df.groupby("ID").apply(lambda x: x.resample("5min").sum()) - assert_frame_equal(result, expected) - - def test_resample_dtype_preservation(self): - - # GH 12202 - # validation tests for dtype preservation - - df = DataFrame({'date': pd.date_range(start='2016-01-01', - periods=4, freq='W'), - 'group': [1, 1, 2, 2], - 'val': Series([5, 6, 7, 8], - dtype='int32')} - ).set_index('date') - - result = df.resample('1D').ffill() - assert result.val.dtype == np.int32 - - result = df.groupby('group').resample('1D').ffill() - assert result.val.dtype == np.int32 - - def test_resample_dtype_coerceion(self): - - pytest.importorskip('scipy.interpolate') - - # GH 16361 - df = {"a": [1, 3, 1, 4]} - df = DataFrame(df, index=pd.date_range("2017-01-01", "2017-01-04")) - - expected = (df.astype("float64") - .resample("H") - .mean() - ["a"] - .interpolate("cubic") - ) - - result = df.resample("H")["a"].mean().interpolate("cubic") - tm.assert_series_equal(result, expected) - - result = df.resample("H").mean()["a"].interpolate("cubic") - tm.assert_series_equal(result, expected) - - def test_weekly_resample_buglet(self): - # #1327 - rng = date_range('1/1/2000', freq='B', periods=20) - ts = Series(np.random.randn(len(rng)), index=rng) - - resampled = ts.resample('W').mean() - expected = ts.resample('W-SUN').mean() - assert_series_equal(resampled, expected) - - def test_monthly_resample_error(self): - # #1451 - dates = date_range('4/16/2012 20:00', periods=5000, freq='h') - ts = Series(np.random.randn(len(dates)), index=dates) - # it works! - ts.resample('M') - - def test_nanosecond_resample_error(self): - # GH 12307 - Values falls after last bin when - # Resampling using pd.tseries.offsets.Nano as period - start = 1443707890427 - exp_start = 1443707890400 - indx = pd.date_range( - start=pd.to_datetime(start), - periods=10, - freq='100n' - ) - ts = Series(range(len(indx)), index=indx) - r = ts.resample(pd.tseries.offsets.Nano(100)) - result = r.agg('mean') - - exp_indx = pd.date_range( - start=pd.to_datetime(exp_start), - periods=10, - freq='100n' - ) - exp = Series(range(len(exp_indx)), index=exp_indx) - - assert_series_equal(result, exp) - - def test_resample_anchored_intraday(self): - # #1471, #1458 - - rng = date_range('1/1/2012', '4/1/2012', freq='100min') - df = DataFrame(rng.month, index=rng) - - result = df.resample('M').mean() - expected = df.resample( - 'M', kind='period').mean().to_timestamp(how='end') - expected.index += Timedelta(1, 'ns') - Timedelta(1, 'D') - tm.assert_frame_equal(result, expected) - - result = df.resample('M', closed='left').mean() - exp = df.tshift(1, freq='D').resample('M', kind='period').mean() - exp = exp.to_timestamp(how='end') - - exp.index = exp.index + Timedelta(1, 'ns') - Timedelta(1, 'D') - tm.assert_frame_equal(result, exp) - - rng = date_range('1/1/2012', '4/1/2012', freq='100min') - df = DataFrame(rng.month, index=rng) - - result = df.resample('Q').mean() - expected = df.resample( - 'Q', kind='period').mean().to_timestamp(how='end') - expected.index += Timedelta(1, 'ns') - Timedelta(1, 'D') - tm.assert_frame_equal(result, expected) - - result = df.resample('Q', closed='left').mean() - expected = df.tshift(1, freq='D').resample('Q', kind='period', - closed='left').mean() - expected = expected.to_timestamp(how='end') - expected.index += Timedelta(1, 'ns') - Timedelta(1, 'D') - tm.assert_frame_equal(result, expected) - - ts = _simple_ts('2012-04-29 23:00', '2012-04-30 5:00', freq='h') - resampled = ts.resample('M').mean() - assert len(resampled) == 1 - - def test_resample_anchored_monthstart(self): - ts = _simple_ts('1/1/2000', '12/31/2002') - - freqs = ['MS', 'BMS', 'QS-MAR', 'AS-DEC', 'AS-JUN'] - - for freq in freqs: - ts.resample(freq).mean() - - def test_resample_anchored_multiday(self): - # When resampling a range spanning multiple days, ensure that the - # start date gets used to determine the offset. Fixes issue where - # a one day period is not a multiple of the frequency. - # - # See: https://github.com/pandas-dev/pandas/issues/8683 - - index = pd.date_range( - '2014-10-14 23:06:23.206', periods=3, freq='400L' - ) | pd.date_range( - '2014-10-15 23:00:00', periods=2, freq='2200L') - - s = Series(np.random.randn(5), index=index) - - # Ensure left closing works - result = s.resample('2200L').mean() - assert result.index[-1] == Timestamp('2014-10-15 23:00:02.000') - - # Ensure right closing works - result = s.resample('2200L', label='right').mean() - assert result.index[-1] == Timestamp('2014-10-15 23:00:04.200') - - def test_corner_cases(self): - # miscellaneous test coverage - - rng = date_range('1/1/2000', periods=12, freq='t') - ts = Series(np.random.randn(len(rng)), index=rng) - - result = ts.resample('5t', closed='right', label='left').mean() - ex_index = date_range('1999-12-31 23:55', periods=4, freq='5t') - tm.assert_index_equal(result.index, ex_index) - - len0pts = _simple_pts('2007-01', '2010-05', freq='M')[:0] - # it works - result = len0pts.resample('A-DEC').mean() - assert len(result) == 0 - - # resample to periods - ts = _simple_ts('2000-04-28', '2000-04-30 11:00', freq='h') - result = ts.resample('M', kind='period').mean() - assert len(result) == 1 - assert result.index[0] == Period('2000-04', freq='M') - - def test_anchored_lowercase_buglet(self): - dates = date_range('4/16/2012 20:00', periods=50000, freq='s') - ts = Series(np.random.randn(len(dates)), index=dates) - # it works! - ts.resample('d').mean() - - def test_upsample_apply_functions(self): - # #1596 - rng = pd.date_range('2012-06-12', periods=4, freq='h') - - ts = Series(np.random.randn(len(rng)), index=rng) - - result = ts.resample('20min').aggregate(['mean', 'sum']) - assert isinstance(result, DataFrame) - - def test_resample_not_monotonic(self): - rng = pd.date_range('2012-06-12', periods=200, freq='h') - ts = Series(np.random.randn(len(rng)), index=rng) - - ts = ts.take(np.random.permutation(len(ts))) - - result = ts.resample('D').sum() - exp = ts.sort_index().resample('D').sum() - assert_series_equal(result, exp) - - def test_resample_median_bug_1688(self): - - for dtype in ['int64', 'int32', 'float64', 'float32']: - df = DataFrame([1, 2], index=[datetime(2012, 1, 1, 0, 0, 0), - datetime(2012, 1, 1, 0, 5, 0)], - dtype=dtype) - - result = df.resample("T").apply(lambda x: x.mean()) - exp = df.asfreq('T') - tm.assert_frame_equal(result, exp) - - result = df.resample("T").median() - exp = df.asfreq('T') - tm.assert_frame_equal(result, exp) - - def test_how_lambda_functions(self): - - ts = _simple_ts('1/1/2000', '4/1/2000') - - result = ts.resample('M').apply(lambda x: x.mean()) - exp = ts.resample('M').mean() - tm.assert_series_equal(result, exp) - - foo_exp = ts.resample('M').mean() - foo_exp.name = 'foo' - bar_exp = ts.resample('M').std() - bar_exp.name = 'bar' - - result = ts.resample('M').apply( - [lambda x: x.mean(), lambda x: x.std(ddof=1)]) - result.columns = ['foo', 'bar'] - tm.assert_series_equal(result['foo'], foo_exp) - tm.assert_series_equal(result['bar'], bar_exp) - - # this is a MI Series, so comparing the names of the results - # doesn't make sense - result = ts.resample('M').aggregate({'foo': lambda x: x.mean(), - 'bar': lambda x: x.std(ddof=1)}) - tm.assert_series_equal(result['foo'], foo_exp, check_names=False) - tm.assert_series_equal(result['bar'], bar_exp, check_names=False) - - def test_resample_unequal_times(self): - # #1772 - start = datetime(1999, 3, 1, 5) - # end hour is less than start - end = datetime(2012, 7, 31, 4) - bad_ind = date_range(start, end, freq="30min") - df = DataFrame({'close': 1}, index=bad_ind) - - # it works! - df.resample('AS').sum() - - def test_resample_consistency(self): - - # GH 6418 - # resample with bfill / limit / reindex consistency - - i30 = pd.date_range('2002-02-02', periods=4, freq='30T') - s = Series(np.arange(4.), index=i30) - s[2] = np.NaN - - # Upsample by factor 3 with reindex() and resample() methods: - i10 = pd.date_range(i30[0], i30[-1], freq='10T') - - s10 = s.reindex(index=i10, method='bfill') - s10_2 = s.reindex(index=i10, method='bfill', limit=2) - rl = s.reindex_like(s10, method='bfill', limit=2) - r10_2 = s.resample('10Min').bfill(limit=2) - r10 = s.resample('10Min').bfill() - - # s10_2, r10, r10_2, rl should all be equal - assert_series_equal(s10_2, r10) - assert_series_equal(s10_2, r10_2) - assert_series_equal(s10_2, rl) - - def test_resample_timegrouper(self): - # GH 7227 - dates1 = [datetime(2014, 10, 1), datetime(2014, 9, 3), - datetime(2014, 11, 5), datetime(2014, 9, 5), - datetime(2014, 10, 8), datetime(2014, 7, 15)] - - dates2 = dates1[:2] + [pd.NaT] + dates1[2:4] + [pd.NaT] + dates1[4:] - dates3 = [pd.NaT] + dates1 + [pd.NaT] - - for dates in [dates1, dates2, dates3]: - df = DataFrame(dict(A=dates, B=np.arange(len(dates)))) - result = df.set_index('A').resample('M').count() - exp_idx = pd.DatetimeIndex(['2014-07-31', '2014-08-31', - '2014-09-30', - '2014-10-31', '2014-11-30'], - freq='M', name='A') - expected = DataFrame({'B': [1, 0, 2, 2, 1]}, index=exp_idx) - assert_frame_equal(result, expected) - - result = df.groupby(pd.Grouper(freq='M', key='A')).count() - assert_frame_equal(result, expected) - - df = DataFrame(dict(A=dates, B=np.arange(len(dates)), C=np.arange( - len(dates)))) - result = df.set_index('A').resample('M').count() - expected = DataFrame({'B': [1, 0, 2, 2, 1], 'C': [1, 0, 2, 2, 1]}, - index=exp_idx, columns=['B', 'C']) - assert_frame_equal(result, expected) - - result = df.groupby(pd.Grouper(freq='M', key='A')).count() - assert_frame_equal(result, expected) - - def test_resample_nunique(self): - - # GH 12352 - df = DataFrame({ - 'ID': {Timestamp('2015-06-05 00:00:00'): '0010100903', - Timestamp('2015-06-08 00:00:00'): '0010150847'}, - 'DATE': {Timestamp('2015-06-05 00:00:00'): '2015-06-05', - Timestamp('2015-06-08 00:00:00'): '2015-06-08'}}) - r = df.resample('D') - g = df.groupby(pd.Grouper(freq='D')) - expected = df.groupby(pd.Grouper(freq='D')).ID.apply(lambda x: - x.nunique()) - assert expected.name == 'ID' - - for t in [r, g]: - result = r.ID.nunique() - assert_series_equal(result, expected) - - result = df.ID.resample('D').nunique() - assert_series_equal(result, expected) - - result = df.ID.groupby(pd.Grouper(freq='D')).nunique() - assert_series_equal(result, expected) - - def test_resample_nunique_with_date_gap(self): - # GH 13453 - index = pd.date_range('1-1-2000', '2-15-2000', freq='h') - index2 = pd.date_range('4-15-2000', '5-15-2000', freq='h') - index3 = index.append(index2) - s = Series(range(len(index3)), index=index3, dtype='int64') - r = s.resample('M') - - # Since all elements are unique, these should all be the same - results = [ - r.count(), - r.nunique(), - r.agg(Series.nunique), - r.agg('nunique') - ] - - assert_series_equal(results[0], results[1]) - assert_series_equal(results[0], results[2]) - assert_series_equal(results[0], results[3]) - - @pytest.mark.parametrize('n', [10000, 100000]) - @pytest.mark.parametrize('k', [10, 100, 1000]) - def test_resample_group_info(self, n, k): - # GH10914 - dr = date_range(start='2015-08-27', periods=n // 10, freq='T') - ts = Series(np.random.randint(0, n // k, n).astype('int64'), - index=np.random.choice(dr, n)) - - left = ts.resample('30T').nunique() - ix = date_range(start=ts.index.min(), end=ts.index.max(), - freq='30T') - - vals = ts.values - bins = np.searchsorted(ix.values, ts.index, side='right') - - sorter = np.lexsort((vals, bins)) - vals, bins = vals[sorter], bins[sorter] - - mask = np.r_[True, vals[1:] != vals[:-1]] - mask |= np.r_[True, bins[1:] != bins[:-1]] - - arr = np.bincount(bins[mask] - 1, - minlength=len(ix)).astype('int64', copy=False) - right = Series(arr, index=ix) - - assert_series_equal(left, right) - - def test_resample_size(self): - n = 10000 - dr = date_range('2015-09-19', periods=n, freq='T') - ts = Series(np.random.randn(n), index=np.random.choice(dr, n)) - - left = ts.resample('7T').size() - ix = date_range(start=left.index.min(), end=ts.index.max(), freq='7T') - - bins = np.searchsorted(ix.values, ts.index.values, side='right') - val = np.bincount(bins, minlength=len(ix) + 1)[1:].astype('int64', - copy=False) - - right = Series(val, index=ix) - assert_series_equal(left, right) - - def test_resample_across_dst(self): - # The test resamples a DatetimeIndex with values before and after a - # DST change - # Issue: 14682 - - # The DatetimeIndex we will start with - # (note that DST happens at 03:00+02:00 -> 02:00+01:00) - # 2016-10-30 02:23:00+02:00, 2016-10-30 02:23:00+01:00 - df1 = DataFrame([1477786980, 1477790580], columns=['ts']) - dti1 = DatetimeIndex(pd.to_datetime(df1.ts, unit='s') - .dt.tz_localize('UTC') - .dt.tz_convert('Europe/Madrid')) - - # The expected DatetimeIndex after resampling. - # 2016-10-30 02:00:00+02:00, 2016-10-30 02:00:00+01:00 - df2 = DataFrame([1477785600, 1477789200], columns=['ts']) - dti2 = DatetimeIndex(pd.to_datetime(df2.ts, unit='s') - .dt.tz_localize('UTC') - .dt.tz_convert('Europe/Madrid')) - df = DataFrame([5, 5], index=dti1) - - result = df.resample(rule='H').sum() - expected = DataFrame([5, 5], index=dti2) - - assert_frame_equal(result, expected) - - def test_resample_dst_anchor(self): - # 5172 - dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz='US/Eastern') - df = DataFrame([5], index=dti) - assert_frame_equal(df.resample(rule='CD').sum(), - DataFrame([5], index=df.index.normalize())) - df.resample(rule='MS').sum() - assert_frame_equal( - df.resample(rule='MS').sum(), - DataFrame([5], index=DatetimeIndex([datetime(2012, 11, 1)], - tz='US/Eastern'))) - - dti = date_range('2013-09-30', '2013-11-02', freq='30Min', - tz='Europe/Paris') - values = range(dti.size) - df = DataFrame({"a": values, - "b": values, - "c": values}, index=dti, dtype='int64') - how = {"a": "min", "b": "max", "c": "count"} - - assert_frame_equal( - df.resample("W-MON").agg(how)[["a", "b", "c"]], - DataFrame({"a": [0, 48, 384, 720, 1056, 1394], - "b": [47, 383, 719, 1055, 1393, 1586], - "c": [48, 336, 336, 336, 338, 193]}, - index=date_range('9/30/2013', '11/4/2013', - freq='W-MON', tz='Europe/Paris')), - 'W-MON Frequency') - - assert_frame_equal( - df.resample("2W-MON").agg(how)[["a", "b", "c"]], - DataFrame({"a": [0, 48, 720, 1394], - "b": [47, 719, 1393, 1586], - "c": [48, 672, 674, 193]}, - index=date_range('9/30/2013', '11/11/2013', - freq='2W-MON', tz='Europe/Paris')), - '2W-MON Frequency') - - assert_frame_equal( - df.resample("MS").agg(how)[["a", "b", "c"]], - DataFrame({"a": [0, 48, 1538], - "b": [47, 1537, 1586], - "c": [48, 1490, 49]}, - index=date_range('9/1/2013', '11/1/2013', - freq='MS', tz='Europe/Paris')), - 'MS Frequency') - - assert_frame_equal( - df.resample("2MS").agg(how)[["a", "b", "c"]], - DataFrame({"a": [0, 1538], - "b": [1537, 1586], - "c": [1538, 49]}, - index=date_range('9/1/2013', '11/1/2013', - freq='2MS', tz='Europe/Paris')), - '2MS Frequency') - - df_daily = df['10/26/2013':'10/29/2013'] - assert_frame_equal( - df_daily.resample("CD").agg({"a": "min", "b": "max", "c": "count"}) - [["a", "b", "c"]], - DataFrame({"a": [1248, 1296, 1346, 1394], - "b": [1295, 1345, 1393, 1441], - "c": [48, 50, 48, 48]}, - index=date_range('10/26/2013', '10/29/2013', - freq='CD', tz='Europe/Paris')), - 'CD Frequency') - - def test_downsample_across_dst(self): - # GH 8531 - tz = pytz.timezone('Europe/Berlin') - dt = datetime(2014, 10, 26) - dates = date_range(tz.localize(dt), periods=4, freq='2H') - result = Series(5, index=dates).resample('H').mean() - expected = Series([5., np.nan] * 3 + [5.], - index=date_range(tz.localize(dt), periods=7, - freq='H')) - tm.assert_series_equal(result, expected) - - def test_downsample_across_dst_weekly(self): - # GH 9119, GH 21459 - df = DataFrame(index=DatetimeIndex([ - '2017-03-25', '2017-03-26', '2017-03-27', - '2017-03-28', '2017-03-29' - ], tz='Europe/Amsterdam'), - data=[11, 12, 13, 14, 15]) - result = df.resample('1W').sum() - expected = DataFrame([23, 42], index=pd.DatetimeIndex([ - '2017-03-26', '2017-04-02' - ], tz='Europe/Amsterdam')) - tm.assert_frame_equal(result, expected) - - idx = pd.date_range("2013-04-01", "2013-05-01", tz='Europe/London', - freq='H') - s = Series(index=idx) - result = s.resample('W').mean() - expected = Series(index=pd.date_range( - '2013-04-07', freq='W', periods=5, tz='Europe/London' - )) - tm.assert_series_equal(result, expected) - - def test_resample_with_nat(self): - # GH 13020 - index = DatetimeIndex([pd.NaT, - '1970-01-01 00:00:00', - pd.NaT, - '1970-01-01 00:00:01', - '1970-01-01 00:00:02']) - frame = DataFrame([2, 3, 5, 7, 11], index=index) - - index_1s = DatetimeIndex(['1970-01-01 00:00:00', - '1970-01-01 00:00:01', - '1970-01-01 00:00:02']) - frame_1s = DataFrame([3, 7, 11], index=index_1s) - assert_frame_equal(frame.resample('1s').mean(), frame_1s) - - index_2s = DatetimeIndex(['1970-01-01 00:00:00', - '1970-01-01 00:00:02']) - frame_2s = DataFrame([5, 11], index=index_2s) - assert_frame_equal(frame.resample('2s').mean(), frame_2s) - - index_3s = DatetimeIndex(['1970-01-01 00:00:00']) - frame_3s = DataFrame([7], index=index_3s) - assert_frame_equal(frame.resample('3s').mean(), frame_3s) - - assert_frame_equal(frame.resample('60s').mean(), frame_3s) - - def test_resample_timedelta_values(self): - # GH 13119 - # check that timedelta dtype is preserved when NaT values are - # introduced by the resampling - - times = timedelta_range('1 day', '4 day', freq='4D') - df = DataFrame({'time': times}, index=times) - - times2 = timedelta_range('1 day', '4 day', freq='2D') - exp = Series(times2, index=times2, name='time') - exp.iloc[1] = pd.NaT - - res = df.resample('2D').first()['time'] - tm.assert_series_equal(res, exp) - res = df['time'].resample('2D').first() - tm.assert_series_equal(res, exp) - - def test_resample_datetime_values(self): - # GH 13119 - # check that datetime dtype is preserved when NaT values are - # introduced by the resampling - - dates = [datetime(2016, 1, 15), datetime(2016, 1, 19)] - df = DataFrame({'timestamp': dates}, index=dates) - - exp = Series([datetime(2016, 1, 15), pd.NaT, datetime(2016, 1, 19)], - index=date_range('2016-01-15', periods=3, freq='2D'), - name='timestamp') - - res = df.resample('2D').first()['timestamp'] - tm.assert_series_equal(res, exp) - res = df['timestamp'].resample('2D').first() - tm.assert_series_equal(res, exp) - - def test_resample_apply_with_additional_args(self): - # GH 14615 - def f(data, add_arg): - return np.mean(data) * add_arg - - multiplier = 10 - result = self.series.resample('D').apply(f, multiplier) - expected = self.series.resample('D').mean().multiply(multiplier) - tm.assert_series_equal(result, expected) - - # Testing as kwarg - result = self.series.resample('D').apply(f, add_arg=multiplier) - expected = self.series.resample('D').mean().multiply(multiplier) - tm.assert_series_equal(result, expected) - - # Testing dataframe - df = pd.DataFrame({"A": 1, "B": 2}, - index=pd.date_range('2017', periods=10)) - result = df.groupby("A").resample("D").agg(f, multiplier) - expected = df.groupby("A").resample('D').mean().multiply(multiplier) - assert_frame_equal(result, expected) From 150a214e2107a5b301ec60cfe4546e84de8d683e Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 23 Nov 2018 19:41:31 +0000 Subject: [PATCH 08/17] split off TestResampleAPI class --- pandas/tests/resample/test_resample.py | 518 +------------------- pandas/tests/resample/test_resample_api.py | 529 +++++++++++++++++++++ 2 files changed, 531 insertions(+), 516 deletions(-) create mode 100644 pandas/tests/resample/test_resample_api.py diff --git a/pandas/tests/resample/test_resample.py b/pandas/tests/resample/test_resample.py index 1be36ee4b8c7b..3931f461f5b09 100644 --- a/pandas/tests/resample/test_resample.py +++ b/pandas/tests/resample/test_resample.py @@ -5,7 +5,7 @@ import numpy as np import pytest -from pandas.compat import OrderedDict, range, zip +from pandas.compat import range, zip from pandas.errors import AbstractMethodError import pandas as pd @@ -14,7 +14,7 @@ from pandas.core.indexes.datetimes import date_range from pandas.core.indexes.period import PeriodIndex, period_range from pandas.core.indexes.timedeltas import TimedeltaIndex -from pandas.core.resample import DatetimeIndex, TimeGrouper +from pandas.core.resample import TimeGrouper import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_index_equal, @@ -42,520 +42,6 @@ def _simple_pts(start, end, freq='D'): return Series(np.random.randn(len(rng)), index=rng) -class TestResampleAPI(object): - - def setup_method(self, method): - dti = DatetimeIndex(start=datetime(2005, 1, 1), - end=datetime(2005, 1, 10), freq='Min') - - self.series = Series(np.random.rand(len(dti)), dti) - self.frame = DataFrame( - {'A': self.series, 'B': self.series, 'C': np.arange(len(dti))}) - - def test_str(self): - - r = self.series.resample('H') - assert ('DatetimeIndexResampler [freq=, axis=0, closed=left, ' - 'label=left, convention=start, base=0]' in str(r)) - - def test_api(self): - - r = self.series.resample('H') - result = r.mean() - assert isinstance(result, Series) - assert len(result) == 217 - - r = self.series.to_frame().resample('H') - result = r.mean() - assert isinstance(result, DataFrame) - assert len(result) == 217 - - def test_groupby_resample_api(self): - - # GH 12448 - # .groupby(...).resample(...) hitting warnings - # when appropriate - df = DataFrame({'date': pd.date_range(start='2016-01-01', - periods=4, - freq='W'), - 'group': [1, 1, 2, 2], - 'val': [5, 6, 7, 8]}).set_index('date') - - # replication step - i = pd.date_range('2016-01-03', periods=8).tolist() + \ - pd.date_range('2016-01-17', periods=8).tolist() - index = pd.MultiIndex.from_arrays([[1] * 8 + [2] * 8, i], - names=['group', 'date']) - expected = DataFrame({'val': [5] * 7 + [6] + [7] * 7 + [8]}, - index=index) - result = df.groupby('group').apply( - lambda x: x.resample('1D').ffill())[['val']] - assert_frame_equal(result, expected) - - def test_groupby_resample_on_api(self): - - # GH 15021 - # .groupby(...).resample(on=...) results in an unexpected - # keyword warning. - df = DataFrame({'key': ['A', 'B'] * 5, - 'dates': pd.date_range('2016-01-01', periods=10), - 'values': np.random.randn(10)}) - - expected = df.set_index('dates').groupby('key').resample('D').mean() - - result = df.groupby('key').resample('D', on='dates').mean() - assert_frame_equal(result, expected) - - def test_pipe(self): - # GH17905 - - # series - r = self.series.resample('H') - expected = r.max() - r.mean() - result = r.pipe(lambda x: x.max() - x.mean()) - tm.assert_series_equal(result, expected) - - # dataframe - r = self.frame.resample('H') - expected = r.max() - r.mean() - result = r.pipe(lambda x: x.max() - x.mean()) - tm.assert_frame_equal(result, expected) - - def test_getitem(self): - - r = self.frame.resample('H') - tm.assert_index_equal(r._selected_obj.columns, self.frame.columns) - - r = self.frame.resample('H')['B'] - assert r._selected_obj.name == self.frame.columns[1] - - # technically this is allowed - r = self.frame.resample('H')['A', 'B'] - tm.assert_index_equal(r._selected_obj.columns, - self.frame.columns[[0, 1]]) - - r = self.frame.resample('H')['A', 'B'] - tm.assert_index_equal(r._selected_obj.columns, - self.frame.columns[[0, 1]]) - - def test_select_bad_cols(self): - - g = self.frame.resample('H') - pytest.raises(KeyError, g.__getitem__, ['D']) - - pytest.raises(KeyError, g.__getitem__, ['A', 'D']) - with pytest.raises(KeyError, match='^[^A]+$'): - # A should not be referenced as a bad column... - # will have to rethink regex if you change message! - g[['A', 'D']] - - def test_attribute_access(self): - - r = self.frame.resample('H') - tm.assert_series_equal(r.A.sum(), r['A'].sum()) - - def test_api_compat_before_use(self): - - # make sure that we are setting the binner - # on these attributes - for attr in ['groups', 'ngroups', 'indices']: - rng = pd.date_range('1/1/2012', periods=100, freq='S') - ts = Series(np.arange(len(rng)), index=rng) - rs = ts.resample('30s') - - # before use - getattr(rs, attr) - - # after grouper is initialized is ok - rs.mean() - getattr(rs, attr) - - def tests_skip_nuisance(self): - - df = self.frame - df['D'] = 'foo' - r = df.resample('H') - result = r[['A', 'B']].sum() - expected = pd.concat([r.A.sum(), r.B.sum()], axis=1) - assert_frame_equal(result, expected) - - expected = r[['A', 'B', 'C']].sum() - result = r.sum() - assert_frame_equal(result, expected) - - def test_downsample_but_actually_upsampling(self): - - # this is reindex / asfreq - rng = pd.date_range('1/1/2012', periods=100, freq='S') - ts = Series(np.arange(len(rng), dtype='int64'), index=rng) - result = ts.resample('20s').asfreq() - expected = Series([0, 20, 40, 60, 80], - index=pd.date_range('2012-01-01 00:00:00', - freq='20s', - periods=5)) - assert_series_equal(result, expected) - - def test_combined_up_downsampling_of_irregular(self): - - # since we are reallydoing an operation like this - # ts2.resample('2s').mean().ffill() - # preserve these semantics - - rng = pd.date_range('1/1/2012', periods=100, freq='S') - ts = Series(np.arange(len(rng)), index=rng) - ts2 = ts.iloc[[0, 1, 2, 3, 5, 7, 11, 15, 16, 25, 30]] - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = ts2.resample('2s', how='mean', fill_method='ffill') - expected = ts2.resample('2s').mean().ffill() - assert_series_equal(result, expected) - - def test_transform(self): - - r = self.series.resample('20min') - expected = self.series.groupby( - pd.Grouper(freq='20min')).transform('mean') - result = r.transform('mean') - assert_series_equal(result, expected) - - def test_fillna(self): - - # need to upsample here - rng = pd.date_range('1/1/2012', periods=10, freq='2S') - ts = Series(np.arange(len(rng), dtype='int64'), index=rng) - r = ts.resample('s') - - expected = r.ffill() - result = r.fillna(method='ffill') - assert_series_equal(result, expected) - - expected = r.bfill() - result = r.fillna(method='bfill') - assert_series_equal(result, expected) - - with pytest.raises(ValueError): - r.fillna(0) - - def test_apply_without_aggregation(self): - - # both resample and groupby should work w/o aggregation - r = self.series.resample('20min') - g = self.series.groupby(pd.Grouper(freq='20min')) - - for t in [g, r]: - result = t.apply(lambda x: x) - assert_series_equal(result, self.series) - - def test_agg_consistency(self): - - # make sure that we are consistent across - # similar aggregations with and w/o selection list - df = DataFrame(np.random.randn(1000, 3), - index=pd.date_range('1/1/2012', freq='S', periods=1000), - columns=['A', 'B', 'C']) - - r = df.resample('3T') - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - expected = r[['A', 'B', 'C']].agg({'r1': 'mean', 'r2': 'sum'}) - result = r.agg({'r1': 'mean', 'r2': 'sum'}) - assert_frame_equal(result, expected) - - # TODO: once GH 14008 is fixed, move these tests into - # `Base` test class - def test_agg(self): - # test with all three Resampler apis and TimeGrouper - - np.random.seed(1234) - index = date_range(datetime(2005, 1, 1), - datetime(2005, 1, 10), freq='D') - index.name = 'date' - df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index) - df_col = df.reset_index() - df_mult = df_col.copy() - df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index], - names=['index', 'date']) - r = df.resample('2D') - cases = [ - r, - df_col.resample('2D', on='date'), - df_mult.resample('2D', level='date'), - df.groupby(pd.Grouper(freq='2D')) - ] - - a_mean = r['A'].mean() - a_std = r['A'].std() - a_sum = r['A'].sum() - b_mean = r['B'].mean() - b_std = r['B'].std() - b_sum = r['B'].sum() - - expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) - expected.columns = pd.MultiIndex.from_product([['A', 'B'], - ['mean', 'std']]) - for t in cases: - result = t.aggregate([np.mean, np.std]) - assert_frame_equal(result, expected) - - expected = pd.concat([a_mean, b_std], axis=1) - for t in cases: - result = t.aggregate({'A': np.mean, - 'B': np.std}) - assert_frame_equal(result, expected, check_like=True) - - expected = pd.concat([a_mean, a_std], axis=1) - expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), - ('A', 'std')]) - for t in cases: - result = t.aggregate({'A': ['mean', 'std']}) - assert_frame_equal(result, expected) - - expected = pd.concat([a_mean, a_sum], axis=1) - expected.columns = ['mean', 'sum'] - for t in cases: - result = t['A'].aggregate(['mean', 'sum']) - assert_frame_equal(result, expected) - - expected = pd.concat([a_mean, a_sum], axis=1) - expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), - ('A', 'sum')]) - for t in cases: - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}}) - assert_frame_equal(result, expected, check_like=True) - - expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1) - expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), - ('A', 'sum'), - ('B', 'mean2'), - ('B', 'sum2')]) - for t in cases: - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}, - 'B': {'mean2': 'mean', 'sum2': 'sum'}}) - assert_frame_equal(result, expected, check_like=True) - - expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) - expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), - ('A', 'std'), - ('B', 'mean'), - ('B', 'std')]) - for t in cases: - result = t.aggregate({'A': ['mean', 'std'], - 'B': ['mean', 'std']}) - assert_frame_equal(result, expected, check_like=True) - - expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1) - expected.columns = pd.MultiIndex.from_tuples([('r1', 'A', 'mean'), - ('r1', 'A', 'sum'), - ('r2', 'B', 'mean'), - ('r2', 'B', 'sum')]) - - def test_agg_misc(self): - # test with all three Resampler apis and TimeGrouper - - np.random.seed(1234) - index = date_range(datetime(2005, 1, 1), - datetime(2005, 1, 10), freq='D') - index.name = 'date' - df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index) - df_col = df.reset_index() - df_mult = df_col.copy() - df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index], - names=['index', 'date']) - - r = df.resample('2D') - cases = [ - r, - df_col.resample('2D', on='date'), - df_mult.resample('2D', level='date'), - df.groupby(pd.Grouper(freq='2D')) - ] - - # passed lambda - for t in cases: - result = t.agg({'A': np.sum, - 'B': lambda x: np.std(x, ddof=1)}) - rcustom = t['B'].apply(lambda x: np.std(x, ddof=1)) - expected = pd.concat([r['A'].sum(), rcustom], axis=1) - assert_frame_equal(result, expected, check_like=True) - - # agg with renamers - expected = pd.concat([t['A'].sum(), - t['B'].sum(), - t['A'].mean(), - t['B'].mean()], - axis=1) - expected.columns = pd.MultiIndex.from_tuples([('result1', 'A'), - ('result1', 'B'), - ('result2', 'A'), - ('result2', 'B')]) - - for t in cases: - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = t[['A', 'B']].agg(OrderedDict([('result1', np.sum), - ('result2', np.mean)])) - assert_frame_equal(result, expected, check_like=True) - - # agg with different hows - expected = pd.concat([t['A'].sum(), - t['A'].std(), - t['B'].mean(), - t['B'].std()], - axis=1) - expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'), - ('A', 'std'), - ('B', 'mean'), - ('B', 'std')]) - for t in cases: - result = t.agg(OrderedDict([('A', ['sum', 'std']), - ('B', ['mean', 'std'])])) - assert_frame_equal(result, expected, check_like=True) - - # equivalent of using a selection list / or not - for t in cases: - result = t[['A', 'B']].agg({'A': ['sum', 'std'], - 'B': ['mean', 'std']}) - assert_frame_equal(result, expected, check_like=True) - - # series like aggs - for t in cases: - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = t['A'].agg({'A': ['sum', 'std']}) - expected = pd.concat([t['A'].sum(), - t['A'].std()], - axis=1) - expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'), - ('A', 'std')]) - assert_frame_equal(result, expected, check_like=True) - - expected = pd.concat([t['A'].agg(['sum', 'std']), - t['A'].agg(['mean', 'std'])], - axis=1) - expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'), - ('A', 'std'), - ('B', 'mean'), - ('B', 'std')]) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = t['A'].agg({'A': ['sum', 'std'], - 'B': ['mean', 'std']}) - assert_frame_equal(result, expected, check_like=True) - - # errors - # invalid names in the agg specification - for t in cases: - def f(): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - t[['A']].agg({'A': ['sum', 'std'], - 'B': ['mean', 'std']}) - - pytest.raises(KeyError, f) - - def test_agg_nested_dicts(self): - - np.random.seed(1234) - index = date_range(datetime(2005, 1, 1), - datetime(2005, 1, 10), freq='D') - index.name = 'date' - df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index) - df_col = df.reset_index() - df_mult = df_col.copy() - df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index], - names=['index', 'date']) - r = df.resample('2D') - cases = [ - r, - df_col.resample('2D', on='date'), - df_mult.resample('2D', level='date'), - df.groupby(pd.Grouper(freq='2D')) - ] - - for t in cases: - def f(): - t.aggregate({'r1': {'A': ['mean', 'sum']}, - 'r2': {'B': ['mean', 'sum']}}) - pytest.raises(ValueError, f) - - for t in cases: - expected = pd.concat([t['A'].mean(), t['A'].std(), t['B'].mean(), - t['B'].std()], axis=1) - expected.columns = pd.MultiIndex.from_tuples([('ra', 'mean'), ( - 'ra', 'std'), ('rb', 'mean'), ('rb', 'std')]) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = t[['A', 'B']].agg({'A': {'ra': ['mean', 'std']}, - 'B': {'rb': ['mean', 'std']}}) - assert_frame_equal(result, expected, check_like=True) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = t.agg({'A': {'ra': ['mean', 'std']}, - 'B': {'rb': ['mean', 'std']}}) - assert_frame_equal(result, expected, check_like=True) - - def test_try_aggregate_non_existing_column(self): - # GH 16766 - data = [ - {'dt': datetime(2017, 6, 1, 0), 'x': 1.0, 'y': 2.0}, - {'dt': datetime(2017, 6, 1, 1), 'x': 2.0, 'y': 2.0}, - {'dt': datetime(2017, 6, 1, 2), 'x': 3.0, 'y': 1.5} - ] - df = DataFrame(data).set_index('dt') - - # Error as we don't have 'z' column - with pytest.raises(KeyError): - df.resample('30T').agg({'x': ['mean'], - 'y': ['median'], - 'z': ['sum']}) - - def test_selection_api_validation(self): - # GH 13500 - index = date_range(datetime(2005, 1, 1), - datetime(2005, 1, 10), freq='D') - - rng = np.arange(len(index), dtype=np.int64) - df = DataFrame({'date': index, 'a': rng}, - index=pd.MultiIndex.from_arrays([rng, index], - names=['v', 'd'])) - df_exp = DataFrame({'a': rng}, index=index) - - # non DatetimeIndex - with pytest.raises(TypeError): - df.resample('2D', level='v') - - with pytest.raises(ValueError): - df.resample('2D', on='date', level='d') - - with pytest.raises(TypeError): - df.resample('2D', on=['a', 'date']) - - with pytest.raises(KeyError): - df.resample('2D', level=['a', 'date']) - - # upsampling not allowed - with pytest.raises(ValueError): - df.resample('2D', level='d').asfreq() - - with pytest.raises(ValueError): - df.resample('2D', on='date').asfreq() - - exp = df_exp.resample('2D').sum() - exp.index.name = 'date' - assert_frame_equal(exp, df.resample('2D', on='date').sum()) - - exp.index.name = 'd' - assert_frame_equal(exp, df.resample('2D', level='d').sum()) - - class Base(object): """ base class for resampling testing, calling diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py new file mode 100644 index 0000000000000..94a3d7db7eeef --- /dev/null +++ b/pandas/tests/resample/test_resample_api.py @@ -0,0 +1,529 @@ +# pylint: disable=E1101 + +from datetime import datetime + +import numpy as np +import pytest + +from pandas.compat import OrderedDict, range + +import pandas as pd +from pandas import DataFrame, Series +from pandas.core.indexes.datetimes import date_range +from pandas.core.resample import DatetimeIndex +import pandas.util.testing as tm +from pandas.util.testing import assert_frame_equal, assert_series_equal + + +class TestResampleAPI(object): + + def setup_method(self, method): + dti = DatetimeIndex(start=datetime(2005, 1, 1), + end=datetime(2005, 1, 10), freq='Min') + + self.series = Series(np.random.rand(len(dti)), dti) + self.frame = DataFrame( + {'A': self.series, 'B': self.series, 'C': np.arange(len(dti))}) + + def test_str(self): + + r = self.series.resample('H') + assert ('DatetimeIndexResampler [freq=, axis=0, closed=left, ' + 'label=left, convention=start, base=0]' in str(r)) + + def test_api(self): + + r = self.series.resample('H') + result = r.mean() + assert isinstance(result, Series) + assert len(result) == 217 + + r = self.series.to_frame().resample('H') + result = r.mean() + assert isinstance(result, DataFrame) + assert len(result) == 217 + + def test_groupby_resample_api(self): + + # GH 12448 + # .groupby(...).resample(...) hitting warnings + # when appropriate + df = DataFrame({'date': pd.date_range(start='2016-01-01', + periods=4, + freq='W'), + 'group': [1, 1, 2, 2], + 'val': [5, 6, 7, 8]}).set_index('date') + + # replication step + i = pd.date_range('2016-01-03', periods=8).tolist() + \ + pd.date_range('2016-01-17', periods=8).tolist() + index = pd.MultiIndex.from_arrays([[1] * 8 + [2] * 8, i], + names=['group', 'date']) + expected = DataFrame({'val': [5] * 7 + [6] + [7] * 7 + [8]}, + index=index) + result = df.groupby('group').apply( + lambda x: x.resample('1D').ffill())[['val']] + assert_frame_equal(result, expected) + + def test_groupby_resample_on_api(self): + + # GH 15021 + # .groupby(...).resample(on=...) results in an unexpected + # keyword warning. + df = DataFrame({'key': ['A', 'B'] * 5, + 'dates': pd.date_range('2016-01-01', periods=10), + 'values': np.random.randn(10)}) + + expected = df.set_index('dates').groupby('key').resample('D').mean() + + result = df.groupby('key').resample('D', on='dates').mean() + assert_frame_equal(result, expected) + + def test_pipe(self): + # GH17905 + + # series + r = self.series.resample('H') + expected = r.max() - r.mean() + result = r.pipe(lambda x: x.max() - x.mean()) + tm.assert_series_equal(result, expected) + + # dataframe + r = self.frame.resample('H') + expected = r.max() - r.mean() + result = r.pipe(lambda x: x.max() - x.mean()) + tm.assert_frame_equal(result, expected) + + def test_getitem(self): + + r = self.frame.resample('H') + tm.assert_index_equal(r._selected_obj.columns, self.frame.columns) + + r = self.frame.resample('H')['B'] + assert r._selected_obj.name == self.frame.columns[1] + + # technically this is allowed + r = self.frame.resample('H')['A', 'B'] + tm.assert_index_equal(r._selected_obj.columns, + self.frame.columns[[0, 1]]) + + r = self.frame.resample('H')['A', 'B'] + tm.assert_index_equal(r._selected_obj.columns, + self.frame.columns[[0, 1]]) + + def test_select_bad_cols(self): + + g = self.frame.resample('H') + pytest.raises(KeyError, g.__getitem__, ['D']) + + pytest.raises(KeyError, g.__getitem__, ['A', 'D']) + with pytest.raises(KeyError, match='^[^A]+$'): + # A should not be referenced as a bad column... + # will have to rethink regex if you change message! + g[['A', 'D']] + + def test_attribute_access(self): + + r = self.frame.resample('H') + tm.assert_series_equal(r.A.sum(), r['A'].sum()) + + def test_api_compat_before_use(self): + + # make sure that we are setting the binner + # on these attributes + for attr in ['groups', 'ngroups', 'indices']: + rng = pd.date_range('1/1/2012', periods=100, freq='S') + ts = Series(np.arange(len(rng)), index=rng) + rs = ts.resample('30s') + + # before use + getattr(rs, attr) + + # after grouper is initialized is ok + rs.mean() + getattr(rs, attr) + + def tests_skip_nuisance(self): + + df = self.frame + df['D'] = 'foo' + r = df.resample('H') + result = r[['A', 'B']].sum() + expected = pd.concat([r.A.sum(), r.B.sum()], axis=1) + assert_frame_equal(result, expected) + + expected = r[['A', 'B', 'C']].sum() + result = r.sum() + assert_frame_equal(result, expected) + + def test_downsample_but_actually_upsampling(self): + + # this is reindex / asfreq + rng = pd.date_range('1/1/2012', periods=100, freq='S') + ts = Series(np.arange(len(rng), dtype='int64'), index=rng) + result = ts.resample('20s').asfreq() + expected = Series([0, 20, 40, 60, 80], + index=pd.date_range('2012-01-01 00:00:00', + freq='20s', + periods=5)) + assert_series_equal(result, expected) + + def test_combined_up_downsampling_of_irregular(self): + + # since we are reallydoing an operation like this + # ts2.resample('2s').mean().ffill() + # preserve these semantics + + rng = pd.date_range('1/1/2012', periods=100, freq='S') + ts = Series(np.arange(len(rng)), index=rng) + ts2 = ts.iloc[[0, 1, 2, 3, 5, 7, 11, 15, 16, 25, 30]] + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = ts2.resample('2s', how='mean', fill_method='ffill') + expected = ts2.resample('2s').mean().ffill() + assert_series_equal(result, expected) + + def test_transform(self): + + r = self.series.resample('20min') + expected = self.series.groupby( + pd.Grouper(freq='20min')).transform('mean') + result = r.transform('mean') + assert_series_equal(result, expected) + + def test_fillna(self): + + # need to upsample here + rng = pd.date_range('1/1/2012', periods=10, freq='2S') + ts = Series(np.arange(len(rng), dtype='int64'), index=rng) + r = ts.resample('s') + + expected = r.ffill() + result = r.fillna(method='ffill') + assert_series_equal(result, expected) + + expected = r.bfill() + result = r.fillna(method='bfill') + assert_series_equal(result, expected) + + with pytest.raises(ValueError): + r.fillna(0) + + def test_apply_without_aggregation(self): + + # both resample and groupby should work w/o aggregation + r = self.series.resample('20min') + g = self.series.groupby(pd.Grouper(freq='20min')) + + for t in [g, r]: + result = t.apply(lambda x: x) + assert_series_equal(result, self.series) + + def test_agg_consistency(self): + + # make sure that we are consistent across + # similar aggregations with and w/o selection list + df = DataFrame(np.random.randn(1000, 3), + index=pd.date_range('1/1/2012', freq='S', periods=1000), + columns=['A', 'B', 'C']) + + r = df.resample('3T') + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + expected = r[['A', 'B', 'C']].agg({'r1': 'mean', 'r2': 'sum'}) + result = r.agg({'r1': 'mean', 'r2': 'sum'}) + assert_frame_equal(result, expected) + + # TODO: once GH 14008 is fixed, move these tests into + # `Base` test class + def test_agg(self): + # test with all three Resampler apis and TimeGrouper + + np.random.seed(1234) + index = date_range(datetime(2005, 1, 1), + datetime(2005, 1, 10), freq='D') + index.name = 'date' + df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index) + df_col = df.reset_index() + df_mult = df_col.copy() + df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index], + names=['index', 'date']) + r = df.resample('2D') + cases = [ + r, + df_col.resample('2D', on='date'), + df_mult.resample('2D', level='date'), + df.groupby(pd.Grouper(freq='2D')) + ] + + a_mean = r['A'].mean() + a_std = r['A'].std() + a_sum = r['A'].sum() + b_mean = r['B'].mean() + b_std = r['B'].std() + b_sum = r['B'].sum() + + expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) + expected.columns = pd.MultiIndex.from_product([['A', 'B'], + ['mean', 'std']]) + for t in cases: + result = t.aggregate([np.mean, np.std]) + assert_frame_equal(result, expected) + + expected = pd.concat([a_mean, b_std], axis=1) + for t in cases: + result = t.aggregate({'A': np.mean, + 'B': np.std}) + assert_frame_equal(result, expected, check_like=True) + + expected = pd.concat([a_mean, a_std], axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), + ('A', 'std')]) + for t in cases: + result = t.aggregate({'A': ['mean', 'std']}) + assert_frame_equal(result, expected) + + expected = pd.concat([a_mean, a_sum], axis=1) + expected.columns = ['mean', 'sum'] + for t in cases: + result = t['A'].aggregate(['mean', 'sum']) + assert_frame_equal(result, expected) + + expected = pd.concat([a_mean, a_sum], axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), + ('A', 'sum')]) + for t in cases: + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}}) + assert_frame_equal(result, expected, check_like=True) + + expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), + ('A', 'sum'), + ('B', 'mean2'), + ('B', 'sum2')]) + for t in cases: + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}, + 'B': {'mean2': 'mean', 'sum2': 'sum'}}) + assert_frame_equal(result, expected, check_like=True) + + expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), + ('A', 'std'), + ('B', 'mean'), + ('B', 'std')]) + for t in cases: + result = t.aggregate({'A': ['mean', 'std'], + 'B': ['mean', 'std']}) + assert_frame_equal(result, expected, check_like=True) + + expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1) + expected.columns = pd.MultiIndex.from_tuples([('r1', 'A', 'mean'), + ('r1', 'A', 'sum'), + ('r2', 'B', 'mean'), + ('r2', 'B', 'sum')]) + + def test_agg_misc(self): + # test with all three Resampler apis and TimeGrouper + + np.random.seed(1234) + index = date_range(datetime(2005, 1, 1), + datetime(2005, 1, 10), freq='D') + index.name = 'date' + df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index) + df_col = df.reset_index() + df_mult = df_col.copy() + df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index], + names=['index', 'date']) + + r = df.resample('2D') + cases = [ + r, + df_col.resample('2D', on='date'), + df_mult.resample('2D', level='date'), + df.groupby(pd.Grouper(freq='2D')) + ] + + # passed lambda + for t in cases: + result = t.agg({'A': np.sum, + 'B': lambda x: np.std(x, ddof=1)}) + rcustom = t['B'].apply(lambda x: np.std(x, ddof=1)) + expected = pd.concat([r['A'].sum(), rcustom], axis=1) + assert_frame_equal(result, expected, check_like=True) + + # agg with renamers + expected = pd.concat([t['A'].sum(), + t['B'].sum(), + t['A'].mean(), + t['B'].mean()], + axis=1) + expected.columns = pd.MultiIndex.from_tuples([('result1', 'A'), + ('result1', 'B'), + ('result2', 'A'), + ('result2', 'B')]) + + for t in cases: + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t[['A', 'B']].agg(OrderedDict([('result1', np.sum), + ('result2', np.mean)])) + assert_frame_equal(result, expected, check_like=True) + + # agg with different hows + expected = pd.concat([t['A'].sum(), + t['A'].std(), + t['B'].mean(), + t['B'].std()], + axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'), + ('A', 'std'), + ('B', 'mean'), + ('B', 'std')]) + for t in cases: + result = t.agg(OrderedDict([('A', ['sum', 'std']), + ('B', ['mean', 'std'])])) + assert_frame_equal(result, expected, check_like=True) + + # equivalent of using a selection list / or not + for t in cases: + result = t[['A', 'B']].agg({'A': ['sum', 'std'], + 'B': ['mean', 'std']}) + assert_frame_equal(result, expected, check_like=True) + + # series like aggs + for t in cases: + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t['A'].agg({'A': ['sum', 'std']}) + expected = pd.concat([t['A'].sum(), + t['A'].std()], + axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'), + ('A', 'std')]) + assert_frame_equal(result, expected, check_like=True) + + expected = pd.concat([t['A'].agg(['sum', 'std']), + t['A'].agg(['mean', 'std'])], + axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'), + ('A', 'std'), + ('B', 'mean'), + ('B', 'std')]) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t['A'].agg({'A': ['sum', 'std'], + 'B': ['mean', 'std']}) + assert_frame_equal(result, expected, check_like=True) + + # errors + # invalid names in the agg specification + for t in cases: + def f(): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + t[['A']].agg({'A': ['sum', 'std'], + 'B': ['mean', 'std']}) + + pytest.raises(KeyError, f) + + def test_agg_nested_dicts(self): + + np.random.seed(1234) + index = date_range(datetime(2005, 1, 1), + datetime(2005, 1, 10), freq='D') + index.name = 'date' + df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index) + df_col = df.reset_index() + df_mult = df_col.copy() + df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index], + names=['index', 'date']) + r = df.resample('2D') + cases = [ + r, + df_col.resample('2D', on='date'), + df_mult.resample('2D', level='date'), + df.groupby(pd.Grouper(freq='2D')) + ] + + for t in cases: + def f(): + t.aggregate({'r1': {'A': ['mean', 'sum']}, + 'r2': {'B': ['mean', 'sum']}}) + pytest.raises(ValueError, f) + + for t in cases: + expected = pd.concat([t['A'].mean(), t['A'].std(), t['B'].mean(), + t['B'].std()], axis=1) + expected.columns = pd.MultiIndex.from_tuples([('ra', 'mean'), ( + 'ra', 'std'), ('rb', 'mean'), ('rb', 'std')]) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t[['A', 'B']].agg({'A': {'ra': ['mean', 'std']}, + 'B': {'rb': ['mean', 'std']}}) + assert_frame_equal(result, expected, check_like=True) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t.agg({'A': {'ra': ['mean', 'std']}, + 'B': {'rb': ['mean', 'std']}}) + assert_frame_equal(result, expected, check_like=True) + + def test_try_aggregate_non_existing_column(self): + # GH 16766 + data = [ + {'dt': datetime(2017, 6, 1, 0), 'x': 1.0, 'y': 2.0}, + {'dt': datetime(2017, 6, 1, 1), 'x': 2.0, 'y': 2.0}, + {'dt': datetime(2017, 6, 1, 2), 'x': 3.0, 'y': 1.5} + ] + df = DataFrame(data).set_index('dt') + + # Error as we don't have 'z' column + with pytest.raises(KeyError): + df.resample('30T').agg({'x': ['mean'], + 'y': ['median'], + 'z': ['sum']}) + + def test_selection_api_validation(self): + # GH 13500 + index = date_range(datetime(2005, 1, 1), + datetime(2005, 1, 10), freq='D') + + rng = np.arange(len(index), dtype=np.int64) + df = DataFrame({'date': index, 'a': rng}, + index=pd.MultiIndex.from_arrays([rng, index], + names=['v', 'd'])) + df_exp = DataFrame({'a': rng}, index=index) + + # non DatetimeIndex + with pytest.raises(TypeError): + df.resample('2D', level='v') + + with pytest.raises(ValueError): + df.resample('2D', on='date', level='d') + + with pytest.raises(TypeError): + df.resample('2D', on=['a', 'date']) + + with pytest.raises(KeyError): + df.resample('2D', level=['a', 'date']) + + # upsampling not allowed + with pytest.raises(ValueError): + df.resample('2D', level='d').asfreq() + + with pytest.raises(ValueError): + df.resample('2D', on='date').asfreq() + + exp = df_exp.resample('2D').sum() + exp.index.name = 'date' + assert_frame_equal(exp, df.resample('2D', on='date').sum()) + + exp.index.name = 'd' + assert_frame_equal(exp, df.resample('2D', level='d').sum()) From e71882f2e331c26df742b25f5395ce0e574d3c9e Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 23 Nov 2018 20:39:21 +0000 Subject: [PATCH 09/17] test_resample.py renamed test_base.py --- pandas/tests/resample/{test_resample.py => test_base.py} | 0 pandas/tests/resample/test_datetime_index.py | 4 ++-- pandas/tests/resample/test_period_index.py | 2 +- pandas/tests/resample/test_timedelta_index.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) rename pandas/tests/resample/{test_resample.py => test_base.py} (100%) diff --git a/pandas/tests/resample/test_resample.py b/pandas/tests/resample/test_base.py similarity index 100% rename from pandas/tests/resample/test_resample.py rename to pandas/tests/resample/test_base.py diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index cc68f4119fba3..45e6ef4a60bb7 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -18,11 +18,11 @@ from pandas.core.indexes.period import Period, period_range from pandas.core.indexes.timedeltas import timedelta_range from pandas.core.resample import DatetimeIndex, TimeGrouper +from pandas.tests.resample.test_base import ( + Base, _simple_pts, _simple_ts, bday, downsample_methods) import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_series_equal) -from test_resample import ( - Base, _simple_pts, _simple_ts, bday, downsample_methods) import pandas.tseries.offsets as offsets from pandas.tseries.offsets import Minute diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 2b9eea3ac2228..dffec6300d772 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -16,10 +16,10 @@ from pandas.core.indexes.datetimes import date_range from pandas.core.indexes.period import Period, PeriodIndex, period_range from pandas.core.resample import DatetimeIndex +from pandas.tests.resample.test_base import Base, _simple_pts, resample_methods import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_series_equal) -from test_resample import Base, _simple_pts, resample_methods import pandas.tseries.offsets as offsets diff --git a/pandas/tests/resample/test_timedelta_index.py b/pandas/tests/resample/test_timedelta_index.py index 6e9eb562ab31c..6cc920d66aeb3 100644 --- a/pandas/tests/resample/test_timedelta_index.py +++ b/pandas/tests/resample/test_timedelta_index.py @@ -6,9 +6,9 @@ import pandas as pd from pandas import DataFrame, Series from pandas.core.indexes.timedeltas import timedelta_range +from pandas.tests.resample.test_base import Base import pandas.util.testing as tm from pandas.util.testing import assert_frame_equal -from test_resample import Base class TestTimedeltaIndex(Base): From 549781132a08ea59e5f66fd0836547abce4c15c3 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 23 Nov 2018 21:19:31 +0000 Subject: [PATCH 10/17] remove class from test_resample_api.py --- pandas/tests/resample/test_resample_api.py | 917 +++++++++++---------- 1 file changed, 468 insertions(+), 449 deletions(-) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 94a3d7db7eeef..0c265ba9e94c3 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -15,515 +15,534 @@ from pandas.util.testing import assert_frame_equal, assert_series_equal -class TestResampleAPI(object): - - def setup_method(self, method): - dti = DatetimeIndex(start=datetime(2005, 1, 1), - end=datetime(2005, 1, 10), freq='Min') - - self.series = Series(np.random.rand(len(dti)), dti) - self.frame = DataFrame( - {'A': self.series, 'B': self.series, 'C': np.arange(len(dti))}) - - def test_str(self): - - r = self.series.resample('H') - assert ('DatetimeIndexResampler [freq=, axis=0, closed=left, ' - 'label=left, convention=start, base=0]' in str(r)) - - def test_api(self): - - r = self.series.resample('H') - result = r.mean() - assert isinstance(result, Series) - assert len(result) == 217 - - r = self.series.to_frame().resample('H') - result = r.mean() - assert isinstance(result, DataFrame) - assert len(result) == 217 - - def test_groupby_resample_api(self): - - # GH 12448 - # .groupby(...).resample(...) hitting warnings - # when appropriate - df = DataFrame({'date': pd.date_range(start='2016-01-01', - periods=4, - freq='W'), - 'group': [1, 1, 2, 2], - 'val': [5, 6, 7, 8]}).set_index('date') - - # replication step - i = pd.date_range('2016-01-03', periods=8).tolist() + \ - pd.date_range('2016-01-17', periods=8).tolist() - index = pd.MultiIndex.from_arrays([[1] * 8 + [2] * 8, i], - names=['group', 'date']) - expected = DataFrame({'val': [5] * 7 + [6] + [7] * 7 + [8]}, - index=index) - result = df.groupby('group').apply( - lambda x: x.resample('1D').ffill())[['val']] - assert_frame_equal(result, expected) +dti = DatetimeIndex(start=datetime(2005, 1, 1), + end=datetime(2005, 1, 10), freq='Min') - def test_groupby_resample_on_api(self): +test_series = Series(np.random.rand(len(dti)), dti) +test_frame = DataFrame( + {'A': test_series, 'B': test_series, 'C': np.arange(len(dti))}) - # GH 15021 - # .groupby(...).resample(on=...) results in an unexpected - # keyword warning. - df = DataFrame({'key': ['A', 'B'] * 5, - 'dates': pd.date_range('2016-01-01', periods=10), - 'values': np.random.randn(10)}) - expected = df.set_index('dates').groupby('key').resample('D').mean() +def test_str(): - result = df.groupby('key').resample('D', on='dates').mean() - assert_frame_equal(result, expected) + r = test_series.resample('H') + assert ('DatetimeIndexResampler [freq=, axis=0, closed=left, ' + 'label=left, convention=start, base=0]' in str(r)) - def test_pipe(self): - # GH17905 - # series - r = self.series.resample('H') - expected = r.max() - r.mean() - result = r.pipe(lambda x: x.max() - x.mean()) - tm.assert_series_equal(result, expected) +def test_api(): - # dataframe - r = self.frame.resample('H') - expected = r.max() - r.mean() - result = r.pipe(lambda x: x.max() - x.mean()) - tm.assert_frame_equal(result, expected) + r = test_series.resample('H') + result = r.mean() + assert isinstance(result, Series) + assert len(result) == 217 - def test_getitem(self): + r = test_series.to_frame().resample('H') + result = r.mean() + assert isinstance(result, DataFrame) + assert len(result) == 217 - r = self.frame.resample('H') - tm.assert_index_equal(r._selected_obj.columns, self.frame.columns) - r = self.frame.resample('H')['B'] - assert r._selected_obj.name == self.frame.columns[1] +def test_groupby_resample_api(): - # technically this is allowed - r = self.frame.resample('H')['A', 'B'] - tm.assert_index_equal(r._selected_obj.columns, - self.frame.columns[[0, 1]]) + # GH 12448 + # .groupby(...).resample(...) hitting warnings + # when appropriate + df = DataFrame({'date': pd.date_range(start='2016-01-01', + periods=4, + freq='W'), + 'group': [1, 1, 2, 2], + 'val': [5, 6, 7, 8]}).set_index('date') - r = self.frame.resample('H')['A', 'B'] - tm.assert_index_equal(r._selected_obj.columns, - self.frame.columns[[0, 1]]) + # replication step + i = pd.date_range('2016-01-03', periods=8).tolist() + \ + pd.date_range('2016-01-17', periods=8).tolist() + index = pd.MultiIndex.from_arrays([[1] * 8 + [2] * 8, i], + names=['group', 'date']) + expected = DataFrame({'val': [5] * 7 + [6] + [7] * 7 + [8]}, + index=index) + result = df.groupby('group').apply( + lambda x: x.resample('1D').ffill())[['val']] + assert_frame_equal(result, expected) - def test_select_bad_cols(self): - g = self.frame.resample('H') - pytest.raises(KeyError, g.__getitem__, ['D']) +def test_groupby_resample_on_api(): - pytest.raises(KeyError, g.__getitem__, ['A', 'D']) - with pytest.raises(KeyError, match='^[^A]+$'): - # A should not be referenced as a bad column... - # will have to rethink regex if you change message! - g[['A', 'D']] + # GH 15021 + # .groupby(...).resample(on=...) results in an unexpected + # keyword warning. + df = DataFrame({'key': ['A', 'B'] * 5, + 'dates': pd.date_range('2016-01-01', periods=10), + 'values': np.random.randn(10)}) - def test_attribute_access(self): + expected = df.set_index('dates').groupby('key').resample('D').mean() - r = self.frame.resample('H') - tm.assert_series_equal(r.A.sum(), r['A'].sum()) + result = df.groupby('key').resample('D', on='dates').mean() + assert_frame_equal(result, expected) - def test_api_compat_before_use(self): - # make sure that we are setting the binner - # on these attributes - for attr in ['groups', 'ngroups', 'indices']: - rng = pd.date_range('1/1/2012', periods=100, freq='S') - ts = Series(np.arange(len(rng)), index=rng) - rs = ts.resample('30s') +def test_pipe(): + # GH17905 - # before use - getattr(rs, attr) + # series + r = test_series.resample('H') + expected = r.max() - r.mean() + result = r.pipe(lambda x: x.max() - x.mean()) + tm.assert_series_equal(result, expected) - # after grouper is initialized is ok - rs.mean() - getattr(rs, attr) + # dataframe + r = test_frame.resample('H') + expected = r.max() - r.mean() + result = r.pipe(lambda x: x.max() - x.mean()) + tm.assert_frame_equal(result, expected) - def tests_skip_nuisance(self): - df = self.frame - df['D'] = 'foo' - r = df.resample('H') - result = r[['A', 'B']].sum() - expected = pd.concat([r.A.sum(), r.B.sum()], axis=1) - assert_frame_equal(result, expected) +def test_getitem(): - expected = r[['A', 'B', 'C']].sum() - result = r.sum() - assert_frame_equal(result, expected) + r = test_frame.resample('H') + tm.assert_index_equal(r._selected_obj.columns, test_frame.columns) - def test_downsample_but_actually_upsampling(self): + r = test_frame.resample('H')['B'] + assert r._selected_obj.name == test_frame.columns[1] + + # technically this is allowed + r = test_frame.resample('H')['A', 'B'] + tm.assert_index_equal(r._selected_obj.columns, + test_frame.columns[[0, 1]]) + + r = test_frame.resample('H')['A', 'B'] + tm.assert_index_equal(r._selected_obj.columns, + test_frame.columns[[0, 1]]) + + +def test_select_bad_cols(): + + g = test_frame.resample('H') + pytest.raises(KeyError, g.__getitem__, ['D']) + + pytest.raises(KeyError, g.__getitem__, ['A', 'D']) + with pytest.raises(KeyError, match='^[^A]+$'): + # A should not be referenced as a bad column... + # will have to rethink regex if you change message! + g[['A', 'D']] - # this is reindex / asfreq - rng = pd.date_range('1/1/2012', periods=100, freq='S') - ts = Series(np.arange(len(rng), dtype='int64'), index=rng) - result = ts.resample('20s').asfreq() - expected = Series([0, 20, 40, 60, 80], - index=pd.date_range('2012-01-01 00:00:00', - freq='20s', - periods=5)) - assert_series_equal(result, expected) - def test_combined_up_downsampling_of_irregular(self): +def test_attribute_access(): - # since we are reallydoing an operation like this - # ts2.resample('2s').mean().ffill() - # preserve these semantics + r = test_frame.resample('H') + tm.assert_series_equal(r.A.sum(), r['A'].sum()) + +def test_api_compat_before_use(): + + # make sure that we are setting the binner + # on these attributes + for attr in ['groups', 'ngroups', 'indices']: rng = pd.date_range('1/1/2012', periods=100, freq='S') ts = Series(np.arange(len(rng)), index=rng) - ts2 = ts.iloc[[0, 1, 2, 3, 5, 7, 11, 15, 16, 25, 30]] + rs = ts.resample('30s') - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = ts2.resample('2s', how='mean', fill_method='ffill') - expected = ts2.resample('2s').mean().ffill() - assert_series_equal(result, expected) + # before use + getattr(rs, attr) - def test_transform(self): + # after grouper is initialized is ok + rs.mean() + getattr(rs, attr) - r = self.series.resample('20min') - expected = self.series.groupby( - pd.Grouper(freq='20min')).transform('mean') - result = r.transform('mean') - assert_series_equal(result, expected) - def test_fillna(self): +def tests_skip_nuisance(): - # need to upsample here - rng = pd.date_range('1/1/2012', periods=10, freq='2S') - ts = Series(np.arange(len(rng), dtype='int64'), index=rng) - r = ts.resample('s') + df = test_frame + df['D'] = 'foo' + r = df.resample('H') + result = r[['A', 'B']].sum() + expected = pd.concat([r.A.sum(), r.B.sum()], axis=1) + assert_frame_equal(result, expected) - expected = r.ffill() - result = r.fillna(method='ffill') - assert_series_equal(result, expected) + expected = r[['A', 'B', 'C']].sum() + result = r.sum() + assert_frame_equal(result, expected) - expected = r.bfill() - result = r.fillna(method='bfill') - assert_series_equal(result, expected) - with pytest.raises(ValueError): - r.fillna(0) +def test_downsample_but_actually_upsampling(): - def test_apply_without_aggregation(self): + # this is reindex / asfreq + rng = pd.date_range('1/1/2012', periods=100, freq='S') + ts = Series(np.arange(len(rng), dtype='int64'), index=rng) + result = ts.resample('20s').asfreq() + expected = Series([0, 20, 40, 60, 80], + index=pd.date_range('2012-01-01 00:00:00', + freq='20s', + periods=5)) + assert_series_equal(result, expected) - # both resample and groupby should work w/o aggregation - r = self.series.resample('20min') - g = self.series.groupby(pd.Grouper(freq='20min')) - for t in [g, r]: - result = t.apply(lambda x: x) - assert_series_equal(result, self.series) +def test_combined_up_downsampling_of_irregular(): - def test_agg_consistency(self): + # since we are reallydoing an operation like this + # ts2.resample('2s').mean().ffill() + # preserve these semantics - # make sure that we are consistent across - # similar aggregations with and w/o selection list - df = DataFrame(np.random.randn(1000, 3), - index=pd.date_range('1/1/2012', freq='S', periods=1000), - columns=['A', 'B', 'C']) + rng = pd.date_range('1/1/2012', periods=100, freq='S') + ts = Series(np.arange(len(rng)), index=rng) + ts2 = ts.iloc[[0, 1, 2, 3, 5, 7, 11, 15, 16, 25, 30]] - r = df.resample('3T') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = ts2.resample('2s', how='mean', fill_method='ffill') + expected = ts2.resample('2s').mean().ffill() + assert_series_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - expected = r[['A', 'B', 'C']].agg({'r1': 'mean', 'r2': 'sum'}) - result = r.agg({'r1': 'mean', 'r2': 'sum'}) + +def test_transform(): + + r = test_series.resample('20min') + expected = test_series.groupby( + pd.Grouper(freq='20min')).transform('mean') + result = r.transform('mean') + assert_series_equal(result, expected) + + +def test_fillna(): + + # need to upsample here + rng = pd.date_range('1/1/2012', periods=10, freq='2S') + ts = Series(np.arange(len(rng), dtype='int64'), index=rng) + r = ts.resample('s') + + expected = r.ffill() + result = r.fillna(method='ffill') + assert_series_equal(result, expected) + + expected = r.bfill() + result = r.fillna(method='bfill') + assert_series_equal(result, expected) + + with pytest.raises(ValueError): + r.fillna(0) + + +def test_apply_without_aggregation(): + + # both resample and groupby should work w/o aggregation + r = test_series.resample('20min') + g = test_series.groupby(pd.Grouper(freq='20min')) + + for t in [g, r]: + result = t.apply(lambda x: x) + assert_series_equal(result, test_series) + + +def test_agg_consistency(): + + # make sure that we are consistent across + # similar aggregations with and w/o selection list + df = DataFrame(np.random.randn(1000, 3), + index=pd.date_range('1/1/2012', freq='S', periods=1000), + columns=['A', 'B', 'C']) + + r = df.resample('3T') + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + expected = r[['A', 'B', 'C']].agg({'r1': 'mean', 'r2': 'sum'}) + result = r.agg({'r1': 'mean', 'r2': 'sum'}) + assert_frame_equal(result, expected) + +# TODO: once GH 14008 is fixed, move these tests into +# `Base` test class + + +def test_agg(): + # test with all three Resampler apis and TimeGrouper + + np.random.seed(1234) + index = date_range(datetime(2005, 1, 1), + datetime(2005, 1, 10), freq='D') + index.name = 'date' + df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index) + df_col = df.reset_index() + df_mult = df_col.copy() + df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index], + names=['index', 'date']) + r = df.resample('2D') + cases = [ + r, + df_col.resample('2D', on='date'), + df_mult.resample('2D', level='date'), + df.groupby(pd.Grouper(freq='2D')) + ] + + a_mean = r['A'].mean() + a_std = r['A'].std() + a_sum = r['A'].sum() + b_mean = r['B'].mean() + b_std = r['B'].std() + b_sum = r['B'].sum() + + expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) + expected.columns = pd.MultiIndex.from_product([['A', 'B'], + ['mean', 'std']]) + for t in cases: + result = t.aggregate([np.mean, np.std]) assert_frame_equal(result, expected) - # TODO: once GH 14008 is fixed, move these tests into - # `Base` test class - def test_agg(self): - # test with all three Resampler apis and TimeGrouper - - np.random.seed(1234) - index = date_range(datetime(2005, 1, 1), - datetime(2005, 1, 10), freq='D') - index.name = 'date' - df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index) - df_col = df.reset_index() - df_mult = df_col.copy() - df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index], - names=['index', 'date']) - r = df.resample('2D') - cases = [ - r, - df_col.resample('2D', on='date'), - df_mult.resample('2D', level='date'), - df.groupby(pd.Grouper(freq='2D')) - ] - - a_mean = r['A'].mean() - a_std = r['A'].std() - a_sum = r['A'].sum() - b_mean = r['B'].mean() - b_std = r['B'].std() - b_sum = r['B'].sum() - - expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) - expected.columns = pd.MultiIndex.from_product([['A', 'B'], - ['mean', 'std']]) - for t in cases: - result = t.aggregate([np.mean, np.std]) - assert_frame_equal(result, expected) - - expected = pd.concat([a_mean, b_std], axis=1) - for t in cases: - result = t.aggregate({'A': np.mean, - 'B': np.std}) - assert_frame_equal(result, expected, check_like=True) - - expected = pd.concat([a_mean, a_std], axis=1) - expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), - ('A', 'std')]) - for t in cases: - result = t.aggregate({'A': ['mean', 'std']}) - assert_frame_equal(result, expected) - - expected = pd.concat([a_mean, a_sum], axis=1) - expected.columns = ['mean', 'sum'] - for t in cases: - result = t['A'].aggregate(['mean', 'sum']) + expected = pd.concat([a_mean, b_std], axis=1) + for t in cases: + result = t.aggregate({'A': np.mean, + 'B': np.std}) + assert_frame_equal(result, expected, check_like=True) + + expected = pd.concat([a_mean, a_std], axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), + ('A', 'std')]) + for t in cases: + result = t.aggregate({'A': ['mean', 'std']}) assert_frame_equal(result, expected) - expected = pd.concat([a_mean, a_sum], axis=1) - expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), - ('A', 'sum')]) - for t in cases: - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}}) - assert_frame_equal(result, expected, check_like=True) - - expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1) - expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), - ('A', 'sum'), - ('B', 'mean2'), - ('B', 'sum2')]) - for t in cases: - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}, - 'B': {'mean2': 'mean', 'sum2': 'sum'}}) - assert_frame_equal(result, expected, check_like=True) + expected = pd.concat([a_mean, a_sum], axis=1) + expected.columns = ['mean', 'sum'] + for t in cases: + result = t['A'].aggregate(['mean', 'sum']) + assert_frame_equal(result, expected) - expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) - expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), - ('A', 'std'), - ('B', 'mean'), - ('B', 'std')]) - for t in cases: - result = t.aggregate({'A': ['mean', 'std'], - 'B': ['mean', 'std']}) - assert_frame_equal(result, expected, check_like=True) - - expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1) - expected.columns = pd.MultiIndex.from_tuples([('r1', 'A', 'mean'), - ('r1', 'A', 'sum'), - ('r2', 'B', 'mean'), - ('r2', 'B', 'sum')]) - - def test_agg_misc(self): - # test with all three Resampler apis and TimeGrouper - - np.random.seed(1234) - index = date_range(datetime(2005, 1, 1), - datetime(2005, 1, 10), freq='D') - index.name = 'date' - df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index) - df_col = df.reset_index() - df_mult = df_col.copy() - df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index], - names=['index', 'date']) - - r = df.resample('2D') - cases = [ - r, - df_col.resample('2D', on='date'), - df_mult.resample('2D', level='date'), - df.groupby(pd.Grouper(freq='2D')) - ] - - # passed lambda - for t in cases: - result = t.agg({'A': np.sum, - 'B': lambda x: np.std(x, ddof=1)}) - rcustom = t['B'].apply(lambda x: np.std(x, ddof=1)) - expected = pd.concat([r['A'].sum(), rcustom], axis=1) - assert_frame_equal(result, expected, check_like=True) - - # agg with renamers + expected = pd.concat([a_mean, a_sum], axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), + ('A', 'sum')]) + for t in cases: + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}}) + assert_frame_equal(result, expected, check_like=True) + + expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), + ('A', 'sum'), + ('B', 'mean2'), + ('B', 'sum2')]) + for t in cases: + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}, + 'B': {'mean2': 'mean', 'sum2': 'sum'}}) + assert_frame_equal(result, expected, check_like=True) + + expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), + ('A', 'std'), + ('B', 'mean'), + ('B', 'std')]) + for t in cases: + result = t.aggregate({'A': ['mean', 'std'], + 'B': ['mean', 'std']}) + assert_frame_equal(result, expected, check_like=True) + + expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1) + expected.columns = pd.MultiIndex.from_tuples([('r1', 'A', 'mean'), + ('r1', 'A', 'sum'), + ('r2', 'B', 'mean'), + ('r2', 'B', 'sum')]) + + +def test_agg_misc(): + # test with all three Resampler apis and TimeGrouper + + np.random.seed(1234) + index = date_range(datetime(2005, 1, 1), + datetime(2005, 1, 10), freq='D') + index.name = 'date' + df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index) + df_col = df.reset_index() + df_mult = df_col.copy() + df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index], + names=['index', 'date']) + + r = df.resample('2D') + cases = [ + r, + df_col.resample('2D', on='date'), + df_mult.resample('2D', level='date'), + df.groupby(pd.Grouper(freq='2D')) + ] + + # passed lambda + for t in cases: + result = t.agg({'A': np.sum, + 'B': lambda x: np.std(x, ddof=1)}) + rcustom = t['B'].apply(lambda x: np.std(x, ddof=1)) + expected = pd.concat([r['A'].sum(), rcustom], axis=1) + assert_frame_equal(result, expected, check_like=True) + + # agg with renamers + expected = pd.concat([t['A'].sum(), + t['B'].sum(), + t['A'].mean(), + t['B'].mean()], + axis=1) + expected.columns = pd.MultiIndex.from_tuples([('result1', 'A'), + ('result1', 'B'), + ('result2', 'A'), + ('result2', 'B')]) + + for t in cases: + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t[['A', 'B']].agg(OrderedDict([('result1', np.sum), + ('result2', np.mean)])) + assert_frame_equal(result, expected, check_like=True) + + # agg with different hows + expected = pd.concat([t['A'].sum(), + t['A'].std(), + t['B'].mean(), + t['B'].std()], + axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'), + ('A', 'std'), + ('B', 'mean'), + ('B', 'std')]) + for t in cases: + result = t.agg(OrderedDict([('A', ['sum', 'std']), + ('B', ['mean', 'std'])])) + assert_frame_equal(result, expected, check_like=True) + + # equivalent of using a selection list / or not + for t in cases: + result = t[['A', 'B']].agg({'A': ['sum', 'std'], + 'B': ['mean', 'std']}) + assert_frame_equal(result, expected, check_like=True) + + # series like aggs + for t in cases: + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t['A'].agg({'A': ['sum', 'std']}) expected = pd.concat([t['A'].sum(), - t['B'].sum(), - t['A'].mean(), - t['B'].mean()], + t['A'].std()], axis=1) - expected.columns = pd.MultiIndex.from_tuples([('result1', 'A'), - ('result1', 'B'), - ('result2', 'A'), - ('result2', 'B')]) - - for t in cases: - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = t[['A', 'B']].agg(OrderedDict([('result1', np.sum), - ('result2', np.mean)])) - assert_frame_equal(result, expected, check_like=True) + expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'), + ('A', 'std')]) + assert_frame_equal(result, expected, check_like=True) - # agg with different hows - expected = pd.concat([t['A'].sum(), - t['A'].std(), - t['B'].mean(), - t['B'].std()], + expected = pd.concat([t['A'].agg(['sum', 'std']), + t['A'].agg(['mean', 'std'])], axis=1) expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'), ('A', 'std'), ('B', 'mean'), ('B', 'std')]) - for t in cases: - result = t.agg(OrderedDict([('A', ['sum', 'std']), - ('B', ['mean', 'std'])])) - assert_frame_equal(result, expected, check_like=True) - - # equivalent of using a selection list / or not - for t in cases: - result = t[['A', 'B']].agg({'A': ['sum', 'std'], - 'B': ['mean', 'std']}) - assert_frame_equal(result, expected, check_like=True) - - # series like aggs - for t in cases: - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = t['A'].agg({'A': ['sum', 'std']}) - expected = pd.concat([t['A'].sum(), - t['A'].std()], - axis=1) - expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'), - ('A', 'std')]) - assert_frame_equal(result, expected, check_like=True) - - expected = pd.concat([t['A'].agg(['sum', 'std']), - t['A'].agg(['mean', 'std'])], - axis=1) - expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'), - ('A', 'std'), - ('B', 'mean'), - ('B', 'std')]) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t['A'].agg({'A': ['sum', 'std'], + 'B': ['mean', 'std']}) + assert_frame_equal(result, expected, check_like=True) + + # errors + # invalid names in the agg specification + for t in cases: + def f(): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = t['A'].agg({'A': ['sum', 'std'], - 'B': ['mean', 'std']}) - assert_frame_equal(result, expected, check_like=True) - - # errors - # invalid names in the agg specification - for t in cases: - def f(): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - t[['A']].agg({'A': ['sum', 'std'], - 'B': ['mean', 'std']}) - - pytest.raises(KeyError, f) - - def test_agg_nested_dicts(self): - - np.random.seed(1234) - index = date_range(datetime(2005, 1, 1), - datetime(2005, 1, 10), freq='D') - index.name = 'date' - df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index) - df_col = df.reset_index() - df_mult = df_col.copy() - df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index], - names=['index', 'date']) - r = df.resample('2D') - cases = [ - r, - df_col.resample('2D', on='date'), - df_mult.resample('2D', level='date'), - df.groupby(pd.Grouper(freq='2D')) - ] - - for t in cases: - def f(): - t.aggregate({'r1': {'A': ['mean', 'sum']}, - 'r2': {'B': ['mean', 'sum']}}) - pytest.raises(ValueError, f) - - for t in cases: - expected = pd.concat([t['A'].mean(), t['A'].std(), t['B'].mean(), - t['B'].std()], axis=1) - expected.columns = pd.MultiIndex.from_tuples([('ra', 'mean'), ( - 'ra', 'std'), ('rb', 'mean'), ('rb', 'std')]) + t[['A']].agg({'A': ['sum', 'std'], + 'B': ['mean', 'std']}) + + pytest.raises(KeyError, f) + + +def test_agg_nested_dicts(): + + np.random.seed(1234) + index = date_range(datetime(2005, 1, 1), + datetime(2005, 1, 10), freq='D') + index.name = 'date' + df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index) + df_col = df.reset_index() + df_mult = df_col.copy() + df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index], + names=['index', 'date']) + r = df.resample('2D') + cases = [ + r, + df_col.resample('2D', on='date'), + df_mult.resample('2D', level='date'), + df.groupby(pd.Grouper(freq='2D')) + ] + + for t in cases: + def f(): + t.aggregate({'r1': {'A': ['mean', 'sum']}, + 'r2': {'B': ['mean', 'sum']}}) + pytest.raises(ValueError, f) + + for t in cases: + expected = pd.concat([t['A'].mean(), t['A'].std(), t['B'].mean(), + t['B'].std()], axis=1) + expected.columns = pd.MultiIndex.from_tuples([('ra', 'mean'), ( + 'ra', 'std'), ('rb', 'mean'), ('rb', 'std')]) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = t[['A', 'B']].agg({'A': {'ra': ['mean', 'std']}, - 'B': {'rb': ['mean', 'std']}}) - assert_frame_equal(result, expected, check_like=True) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t[['A', 'B']].agg({'A': {'ra': ['mean', 'std']}, + 'B': {'rb': ['mean', 'std']}}) + assert_frame_equal(result, expected, check_like=True) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = t.agg({'A': {'ra': ['mean', 'std']}, - 'B': {'rb': ['mean', 'std']}}) - assert_frame_equal(result, expected, check_like=True) - - def test_try_aggregate_non_existing_column(self): - # GH 16766 - data = [ - {'dt': datetime(2017, 6, 1, 0), 'x': 1.0, 'y': 2.0}, - {'dt': datetime(2017, 6, 1, 1), 'x': 2.0, 'y': 2.0}, - {'dt': datetime(2017, 6, 1, 2), 'x': 3.0, 'y': 1.5} - ] - df = DataFrame(data).set_index('dt') - - # Error as we don't have 'z' column - with pytest.raises(KeyError): - df.resample('30T').agg({'x': ['mean'], - 'y': ['median'], - 'z': ['sum']}) - - def test_selection_api_validation(self): - # GH 13500 - index = date_range(datetime(2005, 1, 1), - datetime(2005, 1, 10), freq='D') - - rng = np.arange(len(index), dtype=np.int64) - df = DataFrame({'date': index, 'a': rng}, - index=pd.MultiIndex.from_arrays([rng, index], - names=['v', 'd'])) - df_exp = DataFrame({'a': rng}, index=index) - - # non DatetimeIndex - with pytest.raises(TypeError): - df.resample('2D', level='v') - - with pytest.raises(ValueError): - df.resample('2D', on='date', level='d') - - with pytest.raises(TypeError): - df.resample('2D', on=['a', 'date']) - - with pytest.raises(KeyError): - df.resample('2D', level=['a', 'date']) - - # upsampling not allowed - with pytest.raises(ValueError): - df.resample('2D', level='d').asfreq() - - with pytest.raises(ValueError): - df.resample('2D', on='date').asfreq() - - exp = df_exp.resample('2D').sum() - exp.index.name = 'date' - assert_frame_equal(exp, df.resample('2D', on='date').sum()) - - exp.index.name = 'd' - assert_frame_equal(exp, df.resample('2D', level='d').sum()) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t.agg({'A': {'ra': ['mean', 'std']}, + 'B': {'rb': ['mean', 'std']}}) + assert_frame_equal(result, expected, check_like=True) + + +def test_try_aggregate_non_existing_column(): + # GH 16766 + data = [ + {'dt': datetime(2017, 6, 1, 0), 'x': 1.0, 'y': 2.0}, + {'dt': datetime(2017, 6, 1, 1), 'x': 2.0, 'y': 2.0}, + {'dt': datetime(2017, 6, 1, 2), 'x': 3.0, 'y': 1.5} + ] + df = DataFrame(data).set_index('dt') + + # Error as we don't have 'z' column + with pytest.raises(KeyError): + df.resample('30T').agg({'x': ['mean'], + 'y': ['median'], + 'z': ['sum']}) + + +def test_selection_api_validation(): + # GH 13500 + index = date_range(datetime(2005, 1, 1), + datetime(2005, 1, 10), freq='D') + + rng = np.arange(len(index), dtype=np.int64) + df = DataFrame({'date': index, 'a': rng}, + index=pd.MultiIndex.from_arrays([rng, index], + names=['v', 'd'])) + df_exp = DataFrame({'a': rng}, index=index) + + # non DatetimeIndex + with pytest.raises(TypeError): + df.resample('2D', level='v') + + with pytest.raises(ValueError): + df.resample('2D', on='date', level='d') + + with pytest.raises(TypeError): + df.resample('2D', on=['a', 'date']) + + with pytest.raises(KeyError): + df.resample('2D', level=['a', 'date']) + + # upsampling not allowed + with pytest.raises(ValueError): + df.resample('2D', level='d').asfreq() + + with pytest.raises(ValueError): + df.resample('2D', on='date').asfreq() + + exp = df_exp.resample('2D').sum() + exp.index.name = 'date' + assert_frame_equal(exp, df.resample('2D', on='date').sum()) + + exp.index.name = 'd' + assert_frame_equal(exp, df.resample('2D', level='d').sum()) From b85459c80665691bcff04b902a588d3432a13fb1 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 23 Nov 2018 21:28:46 +0000 Subject: [PATCH 11/17] remove class from test_resampler_grouper.py --- .../tests/resample/test_resampler_grouper.py | 421 +++++++++--------- 1 file changed, 216 insertions(+), 205 deletions(-) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 395e05aa280b4..b61acfc3d2c5e 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -12,238 +12,249 @@ import pandas.util.testing as tm from pandas.util.testing import assert_frame_equal, assert_series_equal +test_frame = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8, + 'B': np.arange(40)}, + index=date_range('1/1/2000', + freq='s', + periods=40)) -class TestResamplerGrouper(object): - - def setup_method(self, method): - self.frame = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8, - 'B': np.arange(40)}, - index=date_range('1/1/2000', - freq='s', - periods=40)) - - def test_tab_complete_ipython6_warning(self, ip): - from IPython.core.completer import provisionalcompleter - code = dedent("""\ - import pandas.util.testing as tm - s = tm.makeTimeSeries() - rs = s.resample("D") - """) - ip.run_code(code) - - with tm.assert_produces_warning(None): - with provisionalcompleter('ignore'): - list(ip.Completer.completions('rs.', 1)) - - def test_deferred_with_groupby(self): - - # GH 12486 - # support deferred resample ops with groupby - data = [['2010-01-01', 'A', 2], ['2010-01-02', 'A', 3], - ['2010-01-05', 'A', 8], ['2010-01-10', 'A', 7], - ['2010-01-13', 'A', 3], ['2010-01-01', 'B', 5], - ['2010-01-03', 'B', 2], ['2010-01-04', 'B', 1], - ['2010-01-11', 'B', 7], ['2010-01-14', 'B', 3]] - - df = DataFrame(data, columns=['date', 'id', 'score']) - df.date = pd.to_datetime(df.date) - f = lambda x: x.set_index('date').resample('D').asfreq() - expected = df.groupby('id').apply(f) - result = df.set_index('date').groupby('id').resample('D').asfreq() - assert_frame_equal(result, expected) - df = DataFrame({'date': pd.date_range(start='2016-01-01', - periods=4, - freq='W'), - 'group': [1, 1, 2, 2], - 'val': [5, 6, 7, 8]}).set_index('date') +def test_tab_complete_ipython6_warning(ip): + from IPython.core.completer import provisionalcompleter + code = dedent("""\ + import pandas.util.testing as tm + s = tm.makeTimeSeries() + rs = s.resample("D") + """) + ip.run_code(code) + + with tm.assert_produces_warning(None): + with provisionalcompleter('ignore'): + list(ip.Completer.completions('rs.', 1)) + + +def test_deferred_with_groupby(): + + # GH 12486 + # support deferred resample ops with groupby + data = [['2010-01-01', 'A', 2], ['2010-01-02', 'A', 3], + ['2010-01-05', 'A', 8], ['2010-01-10', 'A', 7], + ['2010-01-13', 'A', 3], ['2010-01-01', 'B', 5], + ['2010-01-03', 'B', 2], ['2010-01-04', 'B', 1], + ['2010-01-11', 'B', 7], ['2010-01-14', 'B', 3]] + + df = DataFrame(data, columns=['date', 'id', 'score']) + df.date = pd.to_datetime(df.date) + + def f(x): + return x.set_index('date').resample('D').asfreq() + expected = df.groupby('id').apply(f) + result = df.set_index('date').groupby('id').resample('D').asfreq() + assert_frame_equal(result, expected) + + df = DataFrame({'date': pd.date_range(start='2016-01-01', + periods=4, + freq='W'), + 'group': [1, 1, 2, 2], + 'val': [5, 6, 7, 8]}).set_index('date') + + def f(x): + return x.resample('1D').ffill() + expected = df.groupby('group').apply(f) + result = df.groupby('group').resample('1D').ffill() + assert_frame_equal(result, expected) + + +def test_getitem(): + g = test_frame.groupby('A') + + expected = g.B.apply(lambda x: x.resample('2s').mean()) + + result = g.resample('2s').B.mean() + assert_series_equal(result, expected) + + result = g.B.resample('2s').mean() + assert_series_equal(result, expected) + + result = g.resample('2s').mean().B + assert_series_equal(result, expected) + + +def test_getitem_multiple(): + + # GH 13174 + # multiple calls after selection causing an issue with aliasing + data = [{'id': 1, 'buyer': 'A'}, {'id': 2, 'buyer': 'B'}] + df = DataFrame(data, index=pd.date_range('2016-01-01', periods=2)) + r = df.groupby('id').resample('1D') + result = r['buyer'].count() + expected = Series([1, 1], + index=pd.MultiIndex.from_tuples( + [(1, Timestamp('2016-01-01')), + (2, Timestamp('2016-01-02'))], + names=['id', None]), + name='buyer') + assert_series_equal(result, expected) + + result = r['buyer'].count() + assert_series_equal(result, expected) + + +def test_groupby_resample_on_api_with_getitem(): + # GH 17813 + df = pd.DataFrame({'id': list('aabbb'), + 'date': pd.date_range('1-1-2016', periods=5), + 'data': 1}) + exp = df.set_index('date').groupby('id').resample('2D')['data'].sum() + result = df.groupby('id').resample('2D', on='date')['data'].sum() + assert_series_equal(result, exp) + + +def test_nearest(): + + # GH 17496 + # Resample nearest + index = pd.date_range('1/1/2000', periods=3, freq='T') + result = Series(range(3), index=index).resample('20s').nearest() + + expected = Series( + [0, 0, 1, 1, 1, 2, 2], + index=pd.DatetimeIndex( + ['2000-01-01 00:00:00', '2000-01-01 00:00:20', + '2000-01-01 00:00:40', '2000-01-01 00:01:00', + '2000-01-01 00:01:20', '2000-01-01 00:01:40', + '2000-01-01 00:02:00'], + dtype='datetime64[ns]', + freq='20S')) + assert_series_equal(result, expected) - f = lambda x: x.resample('1D').ffill() - expected = df.groupby('group').apply(f) - result = df.groupby('group').resample('1D').ffill() + +def test_methods(): + g = test_frame.groupby('A') + r = g.resample('2s') + + for f in ['first', 'last', 'median', 'sem', 'sum', 'mean', + 'min', 'max']: + result = getattr(r, f)() + expected = g.apply(lambda x: getattr(x.resample('2s'), f)()) assert_frame_equal(result, expected) - def test_getitem(self): - g = self.frame.groupby('A') + for f in ['size']: + result = getattr(r, f)() + expected = g.apply(lambda x: getattr(x.resample('2s'), f)()) + assert_series_equal(result, expected) - expected = g.B.apply(lambda x: x.resample('2s').mean()) + for f in ['count']: + result = getattr(r, f)() + expected = g.apply(lambda x: getattr(x.resample('2s'), f)()) + assert_frame_equal(result, expected) - result = g.resample('2s').B.mean() + # series only + for f in ['nunique']: + result = getattr(r.B, f)() + expected = g.B.apply(lambda x: getattr(x.resample('2s'), f)()) assert_series_equal(result, expected) - result = g.B.resample('2s').mean() - assert_series_equal(result, expected) + for f in ['nearest', 'backfill', 'ffill', 'asfreq']: + result = getattr(r, f)() + expected = g.apply(lambda x: getattr(x.resample('2s'), f)()) + assert_frame_equal(result, expected) - result = g.resample('2s').mean().B - assert_series_equal(result, expected) + result = r.ohlc() + expected = g.apply(lambda x: x.resample('2s').ohlc()) + assert_frame_equal(result, expected) - def test_getitem_multiple(self): - - # GH 13174 - # multiple calls after selection causing an issue with aliasing - data = [{'id': 1, 'buyer': 'A'}, {'id': 2, 'buyer': 'B'}] - df = DataFrame(data, index=pd.date_range('2016-01-01', periods=2)) - r = df.groupby('id').resample('1D') - result = r['buyer'].count() - expected = Series([1, 1], - index=pd.MultiIndex.from_tuples( - [(1, Timestamp('2016-01-01')), - (2, Timestamp('2016-01-02'))], - names=['id', None]), - name='buyer') - assert_series_equal(result, expected) + for f in ['std', 'var']: + result = getattr(r, f)(ddof=1) + expected = g.apply(lambda x: getattr(x.resample('2s'), f)(ddof=1)) + assert_frame_equal(result, expected) - result = r['buyer'].count() - assert_series_equal(result, expected) - def test_groupby_resample_on_api_with_getitem(self): - # GH 17813 - df = pd.DataFrame({'id': list('aabbb'), - 'date': pd.date_range('1-1-2016', periods=5), - 'data': 1}) - exp = df.set_index('date').groupby('id').resample('2D')['data'].sum() - result = df.groupby('id').resample('2D', on='date')['data'].sum() - assert_series_equal(result, exp) - - def test_nearest(self): - - # GH 17496 - # Resample nearest - index = pd.date_range('1/1/2000', periods=3, freq='T') - result = Series(range(3), index=index).resample('20s').nearest() - - expected = Series( - [0, 0, 1, 1, 1, 2, 2], - index=pd.DatetimeIndex( - ['2000-01-01 00:00:00', '2000-01-01 00:00:20', - '2000-01-01 00:00:40', '2000-01-01 00:01:00', - '2000-01-01 00:01:20', '2000-01-01 00:01:40', - '2000-01-01 00:02:00'], - dtype='datetime64[ns]', - freq='20S')) - assert_series_equal(result, expected) +def test_apply(): - def test_methods(self): - g = self.frame.groupby('A') - r = g.resample('2s') - - for f in ['first', 'last', 'median', 'sem', 'sum', 'mean', - 'min', 'max']: - result = getattr(r, f)() - expected = g.apply(lambda x: getattr(x.resample('2s'), f)()) - assert_frame_equal(result, expected) - - for f in ['size']: - result = getattr(r, f)() - expected = g.apply(lambda x: getattr(x.resample('2s'), f)()) - assert_series_equal(result, expected) - - for f in ['count']: - result = getattr(r, f)() - expected = g.apply(lambda x: getattr(x.resample('2s'), f)()) - assert_frame_equal(result, expected) - - # series only - for f in ['nunique']: - result = getattr(r.B, f)() - expected = g.B.apply(lambda x: getattr(x.resample('2s'), f)()) - assert_series_equal(result, expected) - - for f in ['nearest', 'backfill', 'ffill', 'asfreq']: - result = getattr(r, f)() - expected = g.apply(lambda x: getattr(x.resample('2s'), f)()) - assert_frame_equal(result, expected) - - result = r.ohlc() - expected = g.apply(lambda x: x.resample('2s').ohlc()) - assert_frame_equal(result, expected) + g = test_frame.groupby('A') + r = g.resample('2s') - for f in ['std', 'var']: - result = getattr(r, f)(ddof=1) - expected = g.apply(lambda x: getattr(x.resample('2s'), f)(ddof=1)) - assert_frame_equal(result, expected) + # reduction + expected = g.resample('2s').sum() - def test_apply(self): + def f(x): + return x.resample('2s').sum() - g = self.frame.groupby('A') - r = g.resample('2s') + result = r.apply(f) + assert_frame_equal(result, expected) - # reduction - expected = g.resample('2s').sum() + def f(x): + return x.resample('2s').apply(lambda y: y.sum()) - def f(x): - return x.resample('2s').sum() + result = g.apply(f) + assert_frame_equal(result, expected) - result = r.apply(f) - assert_frame_equal(result, expected) - def f(x): - return x.resample('2s').apply(lambda y: y.sum()) +def test_apply_with_mutated_index(): + # GH 15169 + index = pd.date_range('1-1-2015', '12-31-15', freq='D') + df = DataFrame(data={'col1': np.random.rand(len(index))}, index=index) - result = g.apply(f) - assert_frame_equal(result, expected) + def f(x): + s = Series([1, 2], index=['a', 'b']) + return s - def test_apply_with_mutated_index(self): - # GH 15169 - index = pd.date_range('1-1-2015', '12-31-15', freq='D') - df = DataFrame(data={'col1': np.random.rand(len(index))}, index=index) + expected = df.groupby(pd.Grouper(freq='M')).apply(f) - def f(x): - s = Series([1, 2], index=['a', 'b']) - return s + result = df.resample('M').apply(f) + assert_frame_equal(result, expected) - expected = df.groupby(pd.Grouper(freq='M')).apply(f) + # A case for series + expected = df['col1'].groupby(pd.Grouper(freq='M')).apply(f) + result = df['col1'].resample('M').apply(f) + assert_series_equal(result, expected) - result = df.resample('M').apply(f) - assert_frame_equal(result, expected) - # A case for series - expected = df['col1'].groupby(pd.Grouper(freq='M')).apply(f) - result = df['col1'].resample('M').apply(f) - assert_series_equal(result, expected) +def test_resample_groupby_with_label(): + # GH 13235 + index = date_range('2000-01-01', freq='2D', periods=5) + df = DataFrame(index=index, + data={'col0': [0, 0, 1, 1, 2], 'col1': [1, 1, 1, 1, 1]} + ) + result = df.groupby('col0').resample('1W', label='left').sum() - def test_resample_groupby_with_label(self): - # GH 13235 - index = date_range('2000-01-01', freq='2D', periods=5) - df = DataFrame(index=index, - data={'col0': [0, 0, 1, 1, 2], 'col1': [1, 1, 1, 1, 1]} - ) - result = df.groupby('col0').resample('1W', label='left').sum() - - mi = [np.array([0, 0, 1, 2]), - pd.to_datetime(np.array(['1999-12-26', '2000-01-02', - '2000-01-02', '2000-01-02']) - ) - ] - mindex = pd.MultiIndex.from_arrays(mi, names=['col0', None]) - expected = DataFrame(data={'col0': [0, 0, 2, 2], 'col1': [1, 1, 2, 1]}, - index=mindex - ) + mi = [np.array([0, 0, 1, 2]), + pd.to_datetime(np.array(['1999-12-26', '2000-01-02', + '2000-01-02', '2000-01-02']) + ) + ] + mindex = pd.MultiIndex.from_arrays(mi, names=['col0', None]) + expected = DataFrame(data={'col0': [0, 0, 2, 2], 'col1': [1, 1, 2, 1]}, + index=mindex + ) - assert_frame_equal(result, expected) + assert_frame_equal(result, expected) - def test_consistency_with_window(self): - - # consistent return values with window - df = self.frame - expected = pd.Int64Index([1, 2, 3], name='A') - result = df.groupby('A').resample('2s').mean() - assert result.index.nlevels == 2 - tm.assert_index_equal(result.index.levels[0], expected) - - result = df.groupby('A').rolling(20).mean() - assert result.index.nlevels == 2 - tm.assert_index_equal(result.index.levels[0], expected) - - def test_median_duplicate_columns(self): - # GH 14233 - - df = DataFrame(np.random.randn(20, 3), - columns=list('aaa'), - index=pd.date_range('2012-01-01', periods=20, freq='s')) - df2 = df.copy() - df2.columns = ['a', 'b', 'c'] - expected = df2.resample('5s').median() - result = df.resample('5s').median() - expected.columns = result.columns - assert_frame_equal(result, expected) + +def test_consistency_with_window(): + + # consistent return values with window + df = test_frame + expected = pd.Int64Index([1, 2, 3], name='A') + result = df.groupby('A').resample('2s').mean() + assert result.index.nlevels == 2 + tm.assert_index_equal(result.index.levels[0], expected) + + result = df.groupby('A').rolling(20).mean() + assert result.index.nlevels == 2 + tm.assert_index_equal(result.index.levels[0], expected) + + +def test_median_duplicate_columns(): + # GH 14233 + + df = DataFrame(np.random.randn(20, 3), + columns=list('aaa'), + index=pd.date_range('2012-01-01', periods=20, freq='s')) + df2 = df.copy() + df2.columns = ['a', 'b', 'c'] + expected = df2.resample('5s').median() + result = df.resample('5s').median() + expected.columns = result.columns + assert_frame_equal(result, expected) From 5efb53b58df52ae479744d15535a542200ba6162 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 23 Nov 2018 21:37:18 +0000 Subject: [PATCH 12/17] remove class from test_time_grouper.py --- pandas/tests/resample/test_time_grouper.py | 598 +++++++++++---------- 1 file changed, 305 insertions(+), 293 deletions(-) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 0c507b58d6ec5..927060609822e 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -15,308 +15,320 @@ import pandas.util.testing as tm from pandas.util.testing import assert_frame_equal, assert_series_equal +test_series = Series(np.random.randn(1000), + index=date_range('1/1/2000', periods=1000)) -class TestTimeGrouper(object): - def setup_method(self, method): - self.ts = Series(np.random.randn(1000), - index=date_range('1/1/2000', periods=1000)) +def test_apply(): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + grouper = pd.TimeGrouper(freq='A', label='right', closed='right') - def test_apply(self): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - grouper = pd.TimeGrouper(freq='A', label='right', closed='right') - - grouped = self.ts.groupby(grouper) - - f = lambda x: x.sort_values()[-3:] - - applied = grouped.apply(f) - expected = self.ts.groupby(lambda x: x.year).apply(f) - - applied.index = applied.index.droplevel(0) - expected.index = expected.index.droplevel(0) - assert_series_equal(applied, expected) - - def test_count(self): - self.ts[::3] = np.nan - - expected = self.ts.groupby(lambda x: x.year).count() - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - grouper = pd.TimeGrouper(freq='A', label='right', closed='right') - result = self.ts.groupby(grouper).count() - expected.index = result.index - assert_series_equal(result, expected) - - result = self.ts.resample('A').count() - expected.index = result.index - assert_series_equal(result, expected) - - def test_numpy_reduction(self): - result = self.ts.resample('A', closed='right').prod() - - expected = self.ts.groupby(lambda x: x.year).agg(np.prod) - expected.index = result.index - - assert_series_equal(result, expected) - - def test_apply_iteration(self): - # #2300 - N = 1000 - ind = pd.date_range(start="2000-01-01", freq="D", periods=N) - df = DataFrame({'open': 1, 'close': 2}, index=ind) - tg = TimeGrouper('M') - - _, grouper, _ = tg._get_grouper(df) - - # Errors - grouped = df.groupby(grouper, group_keys=False) - f = lambda df: df['close'] / df['open'] - - # it works! - result = grouped.apply(f) - tm.assert_index_equal(result.index, df.index) - - @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") - def test_panel_aggregation(self): - ind = pd.date_range('1/1/2000', periods=100) - data = np.random.randn(2, len(ind), 4) - - wp = Panel(data, items=['Item1', 'Item2'], major_axis=ind, - minor_axis=['A', 'B', 'C', 'D']) - - tg = TimeGrouper('M', axis=1) - _, grouper, _ = tg._get_grouper(wp) - bingrouped = wp.groupby(grouper) - binagg = bingrouped.mean() - - def f(x): - assert (isinstance(x, Panel)) - return x.mean(1) - - result = bingrouped.agg(f) - tm.assert_panel_equal(result, binagg) - - def test_fails_on_no_datetime_index(self): - index_names = ('Int64Index', 'Index', 'Float64Index', 'MultiIndex') - index_funcs = (tm.makeIntIndex, - tm.makeUnicodeIndex, tm.makeFloatIndex, - lambda m: tm.makeCustomIndex(m, 2)) - n = 2 - for name, func in zip(index_names, index_funcs): - index = func(n) - df = DataFrame({'a': np.random.randn(n)}, index=index) - - msg = ("Only valid with DatetimeIndex, TimedeltaIndex " - "or PeriodIndex, but got an instance of %r" % name) - with pytest.raises(TypeError, match=msg): - df.groupby(TimeGrouper('D')) - - def test_aaa_group_order(self): - # GH 12840 - # check TimeGrouper perform stable sorts - n = 20 - data = np.random.randn(n, 4) - df = DataFrame(data, columns=['A', 'B', 'C', 'D']) - df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), - datetime(2013, 1, 3), datetime(2013, 1, 4), - datetime(2013, 1, 5)] * 4 - grouped = df.groupby(TimeGrouper(key='key', freq='D')) - - tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)), - df[::5]) - tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)), - df[1::5]) - tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)), - df[2::5]) - tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)), - df[3::5]) - tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)), - df[4::5]) - - def test_aggregate_normal(self): - # check TimeGrouper's aggregation is identical as normal groupby - - n = 20 - data = np.random.randn(n, 4) - normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) - normal_df['key'] = [1, 2, 3, 4, 5] * 4 - - dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) - dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), - datetime(2013, 1, 3), datetime(2013, 1, 4), - datetime(2013, 1, 5)] * 4 - - normal_grouped = normal_df.groupby('key') - dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) - - for func in ['min', 'max', 'prod', 'var', 'std', 'mean']: - expected = getattr(normal_grouped, func)() - dt_result = getattr(dt_grouped, func)() - expected.index = date_range(start='2013-01-01', freq='D', - periods=5, name='key') - assert_frame_equal(expected, dt_result) - - for func in ['count', 'sum']: - expected = getattr(normal_grouped, func)() - expected.index = date_range(start='2013-01-01', freq='D', - periods=5, name='key') - dt_result = getattr(dt_grouped, func)() - assert_frame_equal(expected, dt_result) - - # GH 7453 - for func in ['size']: - expected = getattr(normal_grouped, func)() - expected.index = date_range(start='2013-01-01', freq='D', - periods=5, name='key') - dt_result = getattr(dt_grouped, func)() - assert_series_equal(expected, dt_result) - - # GH 7453 - for func in ['first', 'last']: - expected = getattr(normal_grouped, func)() - expected.index = date_range(start='2013-01-01', freq='D', - periods=5, name='key') - dt_result = getattr(dt_grouped, func)() - assert_frame_equal(expected, dt_result) - - # if TimeGrouper is used included, 'nth' doesn't work yet - - """ - for func in ['nth']: - expected = getattr(normal_grouped, func)(3) - expected.index = date_range(start='2013-01-01', - freq='D', periods=5, name='key') - dt_result = getattr(dt_grouped, func)(3) - assert_frame_equal(expected, dt_result) - """ - - @pytest.mark.parametrize('method, unit', [ - ('sum', 0), - ('prod', 1), - ]) - def test_resample_entirly_nat_window(self, method, unit): - s = pd.Series([0] * 2 + [np.nan] * 2, - index=pd.date_range('2017', periods=4)) - # 0 / 1 by default - result = methodcaller(method)(s.resample("2d")) - expected = pd.Series([0.0, unit], - index=pd.to_datetime(['2017-01-01', - '2017-01-03'])) - tm.assert_series_equal(result, expected) - - # min_count=0 - result = methodcaller(method, min_count=0)(s.resample("2d")) - expected = pd.Series([0.0, unit], - index=pd.to_datetime(['2017-01-01', - '2017-01-03'])) - tm.assert_series_equal(result, expected) - - # min_count=1 - result = methodcaller(method, min_count=1)(s.resample("2d")) - expected = pd.Series([0.0, np.nan], - index=pd.to_datetime(['2017-01-01', - '2017-01-03'])) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize('func, fill_value', [ - ('min', np.nan), - ('max', np.nan), - ('sum', 0), - ('prod', 1), - ('count', 0), - ]) - def test_aggregate_with_nat(self, func, fill_value): - # check TimeGrouper's aggregation is identical as normal groupby - # if NaT is included, 'var', 'std', 'mean', 'first','last' - # and 'nth' doesn't work yet - - n = 20 - data = np.random.randn(n, 4).astype('int64') - normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) - normal_df['key'] = [1, 2, np.nan, 4, 5] * 4 - - dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) - dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT, - datetime(2013, 1, 4), datetime(2013, 1, 5)] * 4 - - normal_grouped = normal_df.groupby('key') - dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) - - normal_result = getattr(normal_grouped, func)() - dt_result = getattr(dt_grouped, func)() + grouped = test_series.groupby(grouper) - pad = DataFrame([[fill_value] * 4], index=[3], - columns=['A', 'B', 'C', 'D']) - expected = normal_result.append(pad) - expected = expected.sort_index() - expected.index = date_range(start='2013-01-01', freq='D', - periods=5, name='key') - assert_frame_equal(expected, dt_result) - assert dt_result.index.name == 'key' + def f(x): + return x.sort_values()[-3:] + + applied = grouped.apply(f) + expected = test_series.groupby(lambda x: x.year).apply(f) + + applied.index = applied.index.droplevel(0) + expected.index = expected.index.droplevel(0) + assert_series_equal(applied, expected) + + +def test_count(): + test_series[::3] = np.nan + + expected = test_series.groupby(lambda x: x.year).count() + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + grouper = pd.TimeGrouper(freq='A', label='right', closed='right') + result = test_series.groupby(grouper).count() + expected.index = result.index + assert_series_equal(result, expected) + + result = test_series.resample('A').count() + expected.index = result.index + assert_series_equal(result, expected) + + +def test_numpy_reduction(): + result = test_series.resample('A', closed='right').prod() + + expected = test_series.groupby(lambda x: x.year).agg(np.prod) + expected.index = result.index + + assert_series_equal(result, expected) + + +def test_apply_iteration(): + # #2300 + N = 1000 + ind = pd.date_range(start="2000-01-01", freq="D", periods=N) + df = DataFrame({'open': 1, 'close': 2}, index=ind) + tg = TimeGrouper('M') + + _, grouper, _ = tg._get_grouper(df) + + # Errors + grouped = df.groupby(grouper, group_keys=False) + + def f(df): + return df['close'] / df['open'] + + # it works! + result = grouped.apply(f) + tm.assert_index_equal(result.index, df.index) + + +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") +def test_panel_aggregation(): + ind = pd.date_range('1/1/2000', periods=100) + data = np.random.randn(2, len(ind), 4) + + wp = Panel(data, items=['Item1', 'Item2'], major_axis=ind, + minor_axis=['A', 'B', 'C', 'D']) + + tg = TimeGrouper('M', axis=1) + _, grouper, _ = tg._get_grouper(wp) + bingrouped = wp.groupby(grouper) + binagg = bingrouped.mean() + + def f(x): + assert (isinstance(x, Panel)) + return x.mean(1) + + result = bingrouped.agg(f) + tm.assert_panel_equal(result, binagg) - def test_aggregate_with_nat_size(self): - # GH 9925 - n = 20 - data = np.random.randn(n, 4).astype('int64') - normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) - normal_df['key'] = [1, 2, np.nan, 4, 5] * 4 - dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) - dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT, - datetime(2013, 1, 4), datetime(2013, 1, 5)] * 4 +def test_fails_on_no_datetime_index(): + index_names = ('Int64Index', 'Index', 'Float64Index', 'MultiIndex') + index_funcs = (tm.makeIntIndex, + tm.makeUnicodeIndex, tm.makeFloatIndex, + lambda m: tm.makeCustomIndex(m, 2)) + n = 2 + for name, func in zip(index_names, index_funcs): + index = func(n) + df = DataFrame({'a': np.random.randn(n)}, index=index) - normal_grouped = normal_df.groupby('key') - dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) + msg = ("Only valid with DatetimeIndex, TimedeltaIndex " + "or PeriodIndex, but got an instance of %r" % name) + with pytest.raises(TypeError, match=msg): + df.groupby(TimeGrouper('D')) - normal_result = normal_grouped.size() - dt_result = dt_grouped.size() - pad = Series([0], index=[3]) - expected = normal_result.append(pad) - expected = expected.sort_index() +def test_aaa_group_order(): + # GH 12840 + # check TimeGrouper perform stable sorts + n = 20 + data = np.random.randn(n, 4) + df = DataFrame(data, columns=['A', 'B', 'C', 'D']) + df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), + datetime(2013, 1, 3), datetime(2013, 1, 4), + datetime(2013, 1, 5)] * 4 + grouped = df.groupby(TimeGrouper(key='key', freq='D')) + + tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)), + df[::5]) + tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)), + df[1::5]) + tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)), + df[2::5]) + tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)), + df[3::5]) + tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)), + df[4::5]) + + +def test_aggregate_normal(): + # check TimeGrouper's aggregation is identical as normal groupby + + n = 20 + data = np.random.randn(n, 4) + normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) + normal_df['key'] = [1, 2, 3, 4, 5] * 4 + + dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) + dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), + datetime(2013, 1, 3), datetime(2013, 1, 4), + datetime(2013, 1, 5)] * 4 + + normal_grouped = normal_df.groupby('key') + dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) + + for func in ['min', 'max', 'prod', 'var', 'std', 'mean']: + expected = getattr(normal_grouped, func)() + dt_result = getattr(dt_grouped, func)() + expected.index = date_range(start='2013-01-01', freq='D', + periods=5, name='key') + assert_frame_equal(expected, dt_result) + + for func in ['count', 'sum']: + expected = getattr(normal_grouped, func)() + expected.index = date_range(start='2013-01-01', freq='D', + periods=5, name='key') + dt_result = getattr(dt_grouped, func)() + assert_frame_equal(expected, dt_result) + + # GH 7453 + for func in ['size']: + expected = getattr(normal_grouped, func)() expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key') + dt_result = getattr(dt_grouped, func)() assert_series_equal(expected, dt_result) - assert dt_result.index.name == 'key' - - def test_repr(self): - # GH18203 - result = repr(TimeGrouper(key='A', freq='H')) - expected = ("TimeGrouper(key='A', freq=, axis=0, sort=True, " - "closed='left', label='left', how='mean', " - "convention='e', base=0)") - assert result == expected - - @pytest.mark.parametrize('method, unit', [ - ('sum', 0), - ('prod', 1), - ]) - def test_upsample_sum(self, method, unit): - s = pd.Series(1, index=pd.date_range("2017", periods=2, freq="H")) - resampled = s.resample("30T") - index = pd.to_datetime(['2017-01-01T00:00:00', - '2017-01-01T00:30:00', - '2017-01-01T01:00:00']) - - # 0 / 1 by default - result = methodcaller(method)(resampled) - expected = pd.Series([1, unit, 1], index=index) - tm.assert_series_equal(result, expected) - - # min_count=0 - result = methodcaller(method, min_count=0)(resampled) - expected = pd.Series([1, unit, 1], index=index) - tm.assert_series_equal(result, expected) - - # min_count=1 - result = methodcaller(method, min_count=1)(resampled) - expected = pd.Series([1, np.nan, 1], index=index) - tm.assert_series_equal(result, expected) - - # min_count>1 - result = methodcaller(method, min_count=2)(resampled) - expected = pd.Series([np.nan, np.nan, np.nan], index=index) - tm.assert_series_equal(result, expected) + + # GH 7453 + for func in ['first', 'last']: + expected = getattr(normal_grouped, func)() + expected.index = date_range(start='2013-01-01', freq='D', + periods=5, name='key') + dt_result = getattr(dt_grouped, func)() + assert_frame_equal(expected, dt_result) + + # if TimeGrouper is used included, 'nth' doesn't work yet + + """ + for func in ['nth']: + expected = getattr(normal_grouped, func)(3) + expected.index = date_range(start='2013-01-01', + freq='D', periods=5, name='key') + dt_result = getattr(dt_grouped, func)(3) + assert_frame_equal(expected, dt_result) + """ + + +@pytest.mark.parametrize('method, unit', [ + ('sum', 0), + ('prod', 1), +]) +def test_resample_entirly_nat_window(method, unit): + s = pd.Series([0] * 2 + [np.nan] * 2, + index=pd.date_range('2017', periods=4)) + # 0 / 1 by default + result = methodcaller(method)(s.resample("2d")) + expected = pd.Series([0.0, unit], + index=pd.to_datetime(['2017-01-01', + '2017-01-03'])) + tm.assert_series_equal(result, expected) + + # min_count=0 + result = methodcaller(method, min_count=0)(s.resample("2d")) + expected = pd.Series([0.0, unit], + index=pd.to_datetime(['2017-01-01', + '2017-01-03'])) + tm.assert_series_equal(result, expected) + + # min_count=1 + result = methodcaller(method, min_count=1)(s.resample("2d")) + expected = pd.Series([0.0, np.nan], + index=pd.to_datetime(['2017-01-01', + '2017-01-03'])) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize('func, fill_value', [ + ('min', np.nan), + ('max', np.nan), + ('sum', 0), + ('prod', 1), + ('count', 0), +]) +def test_aggregate_with_nat(func, fill_value): + # check TimeGrouper's aggregation is identical as normal groupby + # if NaT is included, 'var', 'std', 'mean', 'first','last' + # and 'nth' doesn't work yet + + n = 20 + data = np.random.randn(n, 4).astype('int64') + normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) + normal_df['key'] = [1, 2, np.nan, 4, 5] * 4 + + dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) + dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT, + datetime(2013, 1, 4), datetime(2013, 1, 5)] * 4 + + normal_grouped = normal_df.groupby('key') + dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) + + normal_result = getattr(normal_grouped, func)() + dt_result = getattr(dt_grouped, func)() + + pad = DataFrame([[fill_value] * 4], index=[3], + columns=['A', 'B', 'C', 'D']) + expected = normal_result.append(pad) + expected = expected.sort_index() + expected.index = date_range(start='2013-01-01', freq='D', + periods=5, name='key') + assert_frame_equal(expected, dt_result) + assert dt_result.index.name == 'key' + + +def test_aggregate_with_nat_size(): + # GH 9925 + n = 20 + data = np.random.randn(n, 4).astype('int64') + normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) + normal_df['key'] = [1, 2, np.nan, 4, 5] * 4 + + dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) + dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT, + datetime(2013, 1, 4), datetime(2013, 1, 5)] * 4 + + normal_grouped = normal_df.groupby('key') + dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) + + normal_result = normal_grouped.size() + dt_result = dt_grouped.size() + + pad = Series([0], index=[3]) + expected = normal_result.append(pad) + expected = expected.sort_index() + expected.index = date_range(start='2013-01-01', freq='D', + periods=5, name='key') + assert_series_equal(expected, dt_result) + assert dt_result.index.name == 'key' + + +def test_repr(): + # GH18203 + result = repr(TimeGrouper(key='A', freq='H')) + expected = ("TimeGrouper(key='A', freq=, axis=0, sort=True, " + "closed='left', label='left', how='mean', " + "convention='e', base=0)") + assert result == expected + + +@pytest.mark.parametrize('method, unit', [ + ('sum', 0), + ('prod', 1), +]) +def test_upsample_sum(method, unit): + s = pd.Series(1, index=pd.date_range("2017", periods=2, freq="H")) + resampled = s.resample("30T") + index = pd.to_datetime(['2017-01-01T00:00:00', + '2017-01-01T00:30:00', + '2017-01-01T01:00:00']) + + # 0 / 1 by default + result = methodcaller(method)(resampled) + expected = pd.Series([1, unit, 1], index=index) + tm.assert_series_equal(result, expected) + + # min_count=0 + result = methodcaller(method, min_count=0)(resampled) + expected = pd.Series([1, unit, 1], index=index) + tm.assert_series_equal(result, expected) + + # min_count=1 + result = methodcaller(method, min_count=1)(resampled) + expected = pd.Series([1, np.nan, 1], index=index) + tm.assert_series_equal(result, expected) + + # min_count>1 + result = methodcaller(method, min_count=2)(resampled) + expected = pd.Series([np.nan, np.nan, np.nan], index=index) + tm.assert_series_equal(result, expected) From 759128b36979fb2ed918d58262d5a243675dfb5e Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 23 Nov 2018 21:38:40 +0000 Subject: [PATCH 13/17] isort pandas\tests\resample\test_resample_api.py --- pandas/tests/resample/test_resample_api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 0c265ba9e94c3..51cf09c7640e5 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -14,7 +14,6 @@ import pandas.util.testing as tm from pandas.util.testing import assert_frame_equal, assert_series_equal - dti = DatetimeIndex(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq='Min') From dbf0d64115040b267fbb59a821b405cf64fc9889 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 23 Nov 2018 21:40:10 +0000 Subject: [PATCH 14/17] test_timedelta_index.py renamed test_timedelta.py --- .../tests/resample/{test_timedelta_index.py => test_timedelta.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pandas/tests/resample/{test_timedelta_index.py => test_timedelta.py} (100%) diff --git a/pandas/tests/resample/test_timedelta_index.py b/pandas/tests/resample/test_timedelta.py similarity index 100% rename from pandas/tests/resample/test_timedelta_index.py rename to pandas/tests/resample/test_timedelta.py From b66efb75baa7de8ce5dd98e5dfbcf307c7812c61 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 23 Nov 2018 21:43:02 +0000 Subject: [PATCH 15/17] add __init__.py --- pandas/tests/resample/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 pandas/tests/resample/__init__.py diff --git a/pandas/tests/resample/__init__.py b/pandas/tests/resample/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d From 489979a84befb2370b8a759d413790d990fb15d6 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 23 Nov 2018 21:46:27 +0000 Subject: [PATCH 16/17] remove pandas/tests/test_resample.py from setup.cfg --- setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 4027e040cf421..a0b6558032f44 100644 --- a/setup.cfg +++ b/setup.cfg @@ -122,7 +122,6 @@ skip= pandas/tests/test_common.py, pandas/tests/test_compat.py, pandas/tests/test_sorting.py, - pandas/tests/test_resample.py, pandas/tests/test_algos.py, pandas/tests/test_expressions.py, pandas/tests/test_strings.py, From 7c3b79bffaeda42426cd9478a53f5184059e93b0 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 23 Nov 2018 23:25:43 +0000 Subject: [PATCH 17/17] rename imports --- pandas/tests/resample/test_base.py | 12 ++++++-- pandas/tests/resample/test_datetime_index.py | 22 ++++++++------ pandas/tests/resample/test_period_index.py | 31 +++++++++++--------- 3 files changed, 39 insertions(+), 26 deletions(-) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 3931f461f5b09..8d710289aecc1 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -22,7 +22,7 @@ from pandas.tseries.offsets import BDay -bday = BDay() +business_day_offset = BDay() # The various methods we support downsample_methods = ['min', 'max', 'first', 'last', 'sum', 'mean', 'sem', @@ -32,12 +32,18 @@ resample_methods = downsample_methods + upsample_methods + series_methods -def _simple_ts(start, end, freq='D'): +def simple_date_range_series(start, end, freq='D'): + """ + Series with date range index and random data for test purposes. + """ rng = date_range(start, end, freq=freq) return Series(np.random.randn(len(rng)), index=rng) -def _simple_pts(start, end, freq='D'): +def simple_period_range_series(start, end, freq='D'): + """ + Series with period range index and random data for test purposes. + """ rng = period_range(start, end, freq=freq) return Series(np.random.randn(len(rng)), index=rng) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 45e6ef4a60bb7..44a3b7005477d 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -19,7 +19,8 @@ from pandas.core.indexes.timedeltas import timedelta_range from pandas.core.resample import DatetimeIndex, TimeGrouper from pandas.tests.resample.test_base import ( - Base, _simple_pts, _simple_ts, bday, downsample_methods) + Base, business_day_offset, downsample_methods, simple_date_range_series, + simple_period_range_series) import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_series_equal) @@ -431,8 +432,8 @@ def test_resample_loffset(self, loffset): # to weekly result = ser.resample('w-sun').last() - expected = ser.resample('w-sun', loffset=-bday).last() - assert result.index[0] - bday == expected.index[0] + expected = ser.resample('w-sun', loffset=-business_day_offset).last() + assert result.index[0] - business_day_offset == expected.index[0] def test_resample_loffset_upsample(self): # GH 20744 @@ -628,7 +629,7 @@ def test_resample_reresample(self): assert result.index.freq == offsets.Hour(8) def test_resample_timestamp_to_period(self): - ts = _simple_ts('1/1/1990', '1/1/2000') + ts = simple_date_range_series('1/1/1990', '1/1/2000') result = ts.resample('A-DEC', kind='period').mean() expected = ts.resample('A-DEC').mean() @@ -979,12 +980,13 @@ def test_resample_anchored_intraday(self): expected.index += Timedelta(1, 'ns') - Timedelta(1, 'D') tm.assert_frame_equal(result, expected) - ts = _simple_ts('2012-04-29 23:00', '2012-04-30 5:00', freq='h') + ts = simple_date_range_series('2012-04-29 23:00', '2012-04-30 5:00', + freq='h') resampled = ts.resample('M').mean() assert len(resampled) == 1 def test_resample_anchored_monthstart(self): - ts = _simple_ts('1/1/2000', '12/31/2002') + ts = simple_date_range_series('1/1/2000', '12/31/2002') freqs = ['MS', 'BMS', 'QS-MAR', 'AS-DEC', 'AS-JUN'] @@ -1023,13 +1025,15 @@ def test_corner_cases(self): ex_index = date_range('1999-12-31 23:55', periods=4, freq='5t') tm.assert_index_equal(result.index, ex_index) - len0pts = _simple_pts('2007-01', '2010-05', freq='M')[:0] + len0pts = simple_period_range_series( + '2007-01', '2010-05', freq='M')[:0] # it works result = len0pts.resample('A-DEC').mean() assert len(result) == 0 # resample to periods - ts = _simple_ts('2000-04-28', '2000-04-30 11:00', freq='h') + ts = simple_date_range_series( + '2000-04-28', '2000-04-30 11:00', freq='h') result = ts.resample('M', kind='period').mean() assert len(result) == 1 assert result.index[0] == Period('2000-04', freq='M') @@ -1076,7 +1080,7 @@ def test_resample_median_bug_1688(self): def test_how_lambda_functions(self): - ts = _simple_ts('1/1/2000', '4/1/2000') + ts = simple_date_range_series('1/1/2000', '4/1/2000') result = ts.resample('M').apply(lambda x: x.mean()) exp = ts.resample('M').mean() diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index dffec6300d772..99b8edd5dbbea 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -16,7 +16,8 @@ from pandas.core.indexes.datetimes import date_range from pandas.core.indexes.period import Period, PeriodIndex, period_range from pandas.core.resample import DatetimeIndex -from pandas.tests.resample.test_base import Base, _simple_pts, resample_methods +from pandas.tests.resample.test_base import ( + Base, resample_methods, simple_period_range_series) import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_series_equal) @@ -128,7 +129,8 @@ def test_annual_upsample_M_e_b(self): def _check_annual_upsample_cases(self, targ, conv, meth, end='12/31/1991'): for month in MONTHS: - ts = _simple_pts('1/1/1990', end, freq='A-%s' % month) + ts = simple_period_range_series( + '1/1/1990', end, freq='A-%s' % month) result = getattr(ts.resample(targ, convention=conv), meth)() expected = result.to_timestamp(targ, how=conv) @@ -136,7 +138,7 @@ def _check_annual_upsample_cases(self, targ, conv, meth, end='12/31/1991'): assert_series_equal(result, expected) def test_basic_downsample(self): - ts = _simple_pts('1/1/1990', '6/30/1995', freq='M') + ts = simple_period_range_series('1/1/1990', '6/30/1995', freq='M') result = ts.resample('a-dec').mean() expected = ts.groupby(ts.index.year).mean() @@ -149,7 +151,7 @@ def test_basic_downsample(self): def test_not_subperiod(self): # These are incompatible period rules for resampling - ts = _simple_pts('1/1/1990', '6/30/1995', freq='w-wed') + ts = simple_period_range_series('1/1/1990', '6/30/1995', freq='w-wed') pytest.raises(ValueError, lambda: ts.resample('a-dec').mean()) pytest.raises(ValueError, lambda: ts.resample('q-mar').mean()) pytest.raises(ValueError, lambda: ts.resample('M').mean()) @@ -157,7 +159,7 @@ def test_not_subperiod(self): @pytest.mark.parametrize('freq', ['D', '2D']) def test_basic_upsample(self, freq): - ts = _simple_pts('1/1/1990', '6/30/1995', freq='M') + ts = simple_period_range_series('1/1/1990', '6/30/1995', freq='M') result = ts.resample('a-dec').mean() resampled = result.resample(freq, convention='end').ffill() @@ -175,7 +177,7 @@ def test_upsample_with_limit(self): assert_series_equal(result, expected) def test_annual_upsample(self): - ts = _simple_pts('1/1/1990', '12/31/1995', freq='A-DEC') + ts = simple_period_range_series('1/1/1990', '12/31/1995', freq='A-DEC') df = DataFrame({'a': ts}) rdf = df.resample('D').ffill() exp = df['a'].resample('D').ffill() @@ -196,7 +198,7 @@ def test_annual_upsample(self): @pytest.mark.parametrize('convention', ['start', 'end']) def test_quarterly_upsample(self, month, target, convention): freq = 'Q-{month}'.format(month=month) - ts = _simple_pts('1/1/1990', '12/31/1995', freq=freq) + ts = simple_period_range_series('1/1/1990', '12/31/1995', freq=freq) result = ts.resample(target, convention=convention).ffill() expected = result.to_timestamp(target, how=convention) expected = expected.asfreq(target, 'ffill').to_period() @@ -205,7 +207,7 @@ def test_quarterly_upsample(self, month, target, convention): @pytest.mark.parametrize('target', ['D', 'B']) @pytest.mark.parametrize('convention', ['start', 'end']) def test_monthly_upsample(self, target, convention): - ts = _simple_pts('1/1/1990', '12/31/1995', freq='M') + ts = simple_period_range_series('1/1/1990', '12/31/1995', freq='M') result = ts.resample(target, convention=convention).ffill() expected = result.to_timestamp(target, how=convention) expected = expected.asfreq(target, 'ffill').to_period() @@ -351,14 +353,14 @@ def test_fill_method_and_how_upsample(self): @pytest.mark.parametrize('convention', ['start', 'end']) def test_weekly_upsample(self, day, target, convention): freq = 'W-{day}'.format(day=day) - ts = _simple_pts('1/1/1990', '12/31/1995', freq=freq) + ts = simple_period_range_series('1/1/1990', '12/31/1995', freq=freq) result = ts.resample(target, convention=convention).ffill() expected = result.to_timestamp(target, how=convention) expected = expected.asfreq(target, 'ffill').to_period() assert_series_equal(result, expected) def test_resample_to_timestamps(self): - ts = _simple_pts('1/1/1990', '12/31/1995', freq='M') + ts = simple_period_range_series('1/1/1990', '12/31/1995', freq='M') result = ts.resample('A-DEC', kind='timestamp').mean() expected = ts.to_timestamp(how='start').resample('A-DEC').mean() @@ -366,7 +368,8 @@ def test_resample_to_timestamps(self): def test_resample_to_quarterly(self): for month in MONTHS: - ts = _simple_pts('1990', '1992', freq='A-%s' % month) + ts = simple_period_range_series( + '1990', '1992', freq='A-%s' % month) quar_ts = ts.resample('Q-%s' % month).ffill() stamps = ts.to_timestamp('D', how='start') @@ -381,7 +384,7 @@ def test_resample_to_quarterly(self): assert_series_equal(quar_ts, expected) # conforms, but different month - ts = _simple_pts('1990', '1992', freq='A-JUN') + ts = simple_period_range_series('1990', '1992', freq='A-JUN') for how in ['start', 'end']: result = ts.resample('Q-MAR', convention=how).ffill() @@ -420,13 +423,13 @@ def test_resample_5minute(self, freq, kind): assert_series_equal(result, expected) def test_upsample_daily_business_daily(self): - ts = _simple_pts('1/1/2000', '2/1/2000', freq='B') + ts = simple_period_range_series('1/1/2000', '2/1/2000', freq='B') result = ts.resample('D').asfreq() expected = ts.asfreq('D').reindex(period_range('1/3/2000', '2/1/2000')) assert_series_equal(result, expected) - ts = _simple_pts('1/1/2000', '2/1/2000') + ts = simple_period_range_series('1/1/2000', '2/1/2000') result = ts.resample('H', convention='s').asfreq() exp_rng = period_range('1/1/2000', '2/1/2000 23:00', freq='H') expected = ts.asfreq('H', how='s').reindex(exp_rng)