From 5b2a4007c649dd060ea58bdb1b54c4a360485dc1 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 11 Dec 2018 14:39:55 +0000 Subject: [PATCH 01/10] fixtures for simple date_range and period_range series --- pandas/tests/resample/conftest.py | 26 ++++++ pandas/tests/resample/test_base.py | 19 +---- pandas/tests/resample/test_datetime_index.py | 14 ++-- pandas/tests/resample/test_period_index.py | 84 +++++++------------- 4 files changed, 61 insertions(+), 82 deletions(-) diff --git a/pandas/tests/resample/conftest.py b/pandas/tests/resample/conftest.py index 2130bd635b180..50178d6f9faa1 100644 --- a/pandas/tests/resample/conftest.py +++ b/pandas/tests/resample/conftest.py @@ -1,5 +1,9 @@ +import numpy as np import pytest +from pandas import Series +from pandas.core.indexes.datetimes import date_range +from pandas.core.indexes.period import period_range from pandas.tests.resample.test_base import ( downsample_methods, resample_methods, upsample_methods) @@ -20,3 +24,25 @@ def upsample_method(request): def resample_method(request): """Fixture for parametrization of Grouper resample methods.""" return request.param + + +@pytest.fixture() +def simple_date_range_series(): + """ + Series with date range index and random data for test purposes. + """ + def _simple_date_range_series(start, end, freq='D'): + rng = date_range(start, end, freq=freq) + return Series(np.random.randn(len(rng)), index=rng) + return _simple_date_range_series + + +@pytest.fixture() +def simple_period_range_series(): + """ + Series with period range index and random data for test purposes. + """ + def _simple_period_range_series(start, end, freq='D'): + rng = period_range(start, end, freq=freq) + return Series(np.random.randn(len(rng)), index=rng) + return _simple_period_range_series diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index db2162e9357e2..06660ca124691 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -11,8 +11,7 @@ import pandas as pd from pandas import DataFrame, Series from pandas.core.groupby.groupby import DataError -from pandas.core.indexes.datetimes import date_range -from pandas.core.indexes.period import PeriodIndex, period_range +from pandas.core.indexes.period import PeriodIndex from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.resample import TimeGrouper import pandas.util.testing as tm @@ -32,22 +31,6 @@ resample_methods = downsample_methods + upsample_methods + series_methods -def simple_date_range_series(start, end, freq='D'): - """ - Series with date range index and random data for test purposes. - """ - rng = date_range(start, end, freq=freq) - return Series(np.random.randn(len(rng)), index=rng) - - -def simple_period_range_series(start, end, freq='D'): - """ - Series with period range index and random data for test purposes. - """ - rng = period_range(start, end, freq=freq) - return Series(np.random.randn(len(rng)), index=rng) - - class Base(object): """ base class for resampling testing, calling diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index b287eb468cd94..343a92d09dff9 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -19,8 +19,7 @@ from pandas.core.indexes.timedeltas import timedelta_range from pandas.core.resample import DatetimeIndex, TimeGrouper from pandas.tests.resample.test_base import ( - Base, business_day_offset, downsample_methods, simple_date_range_series, - simple_period_range_series) + Base, business_day_offset, downsample_methods) import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_series_equal) @@ -628,7 +627,7 @@ def test_resample_reresample(self): assert isinstance(result.index.freq, offsets.DateOffset) assert result.index.freq == offsets.Hour(8) - def test_resample_timestamp_to_period(self): + def test_resample_timestamp_to_period(self, simple_date_range_series): ts = simple_date_range_series('1/1/1990', '1/1/2000') result = ts.resample('A-DEC', kind='period').mean() @@ -945,7 +944,7 @@ def test_nanosecond_resample_error(self): assert_series_equal(result, exp) - def test_resample_anchored_intraday(self): + def test_resample_anchored_intraday(self, simple_date_range_series): # #1471, #1458 rng = date_range('1/1/2012', '4/1/2012', freq='100min') @@ -985,7 +984,7 @@ def test_resample_anchored_intraday(self): resampled = ts.resample('M').mean() assert len(resampled) == 1 - def test_resample_anchored_monthstart(self): + def test_resample_anchored_monthstart(self, simple_date_range_series): ts = simple_date_range_series('1/1/2000', '12/31/2002') freqs = ['MS', 'BMS', 'QS-MAR', 'AS-DEC', 'AS-JUN'] @@ -1015,7 +1014,8 @@ def test_resample_anchored_multiday(self): result = s.resample('2200L', label='right').mean() assert result.index[-1] == Timestamp('2014-10-15 23:00:04.200') - def test_corner_cases(self): + def test_corner_cases(self, simple_period_range_series, + simple_date_range_series): # miscellaneous test coverage rng = date_range('1/1/2000', periods=12, freq='t') @@ -1078,7 +1078,7 @@ def test_resample_median_bug_1688(self): exp = df.asfreq('T') tm.assert_frame_equal(result, exp) - def test_how_lambda_functions(self): + def test_how_lambda_functions(self, simple_date_range_series): ts = simple_date_range_series('1/1/2000', '4/1/2000') diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 0b3e67ca0525a..900e2a0f96ac8 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -15,8 +15,7 @@ from pandas import DataFrame, Series, Timestamp from pandas.core.indexes.datetimes import date_range from pandas.core.indexes.period import Period, PeriodIndex, period_range -from pandas.tests.resample.test_base import ( - Base, resample_methods, simple_period_range_series) +from pandas.tests.resample.test_base import Base, resample_methods import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_series_equal) @@ -90,53 +89,21 @@ def test_selection(self, index, freq, kind): with pytest.raises(NotImplementedError): df.resample(freq, level='d', kind=kind) - def test_annual_upsample_D_s_f(self): - self._check_annual_upsample_cases('D', 'start', 'ffill') - - def test_annual_upsample_D_e_f(self): - self._check_annual_upsample_cases('D', 'end', 'ffill') - - def test_annual_upsample_D_s_b(self): - self._check_annual_upsample_cases('D', 'start', 'bfill') - - def test_annual_upsample_D_e_b(self): - self._check_annual_upsample_cases('D', 'end', 'bfill') - - def test_annual_upsample_B_s_f(self): - self._check_annual_upsample_cases('B', 'start', 'ffill') - - def test_annual_upsample_B_e_f(self): - self._check_annual_upsample_cases('B', 'end', 'ffill') - - def test_annual_upsample_B_s_b(self): - self._check_annual_upsample_cases('B', 'start', 'bfill') - - def test_annual_upsample_B_e_b(self): - self._check_annual_upsample_cases('B', 'end', 'bfill') - - def test_annual_upsample_M_s_f(self): - self._check_annual_upsample_cases('M', 'start', 'ffill') - - def test_annual_upsample_M_e_f(self): - self._check_annual_upsample_cases('M', 'end', 'ffill') - - def test_annual_upsample_M_s_b(self): - self._check_annual_upsample_cases('M', 'start', 'bfill') - - def test_annual_upsample_M_e_b(self): - self._check_annual_upsample_cases('M', 'end', 'bfill') - - def _check_annual_upsample_cases(self, targ, conv, meth, end='12/31/1991'): - for month in MONTHS: - ts = simple_period_range_series( - '1/1/1990', end, freq='A-%s' % month) - - result = getattr(ts.resample(targ, convention=conv), meth)() - expected = result.to_timestamp(targ, how=conv) - expected = expected.asfreq(targ, meth).to_period() - assert_series_equal(result, expected) + @pytest.mark.parametrize('month', MONTHS) + @pytest.mark.parametrize('meth', ['ffill', 'bfill']) + @pytest.mark.parametrize('conv', ['start', 'end']) + @pytest.mark.parametrize('targ', ['D', 'B', 'M']) + def test_annual_upsample_cases(self, targ, conv, meth, month, + simple_period_range_series): + ts = simple_period_range_series( + '1/1/1990', '12/31/1991', freq='A-%s' % month) + + result = getattr(ts.resample(targ, convention=conv), meth)() + expected = result.to_timestamp(targ, how=conv) + expected = expected.asfreq(targ, meth).to_period() + assert_series_equal(result, expected) - def test_basic_downsample(self): + def test_basic_downsample(self, simple_period_range_series): ts = simple_period_range_series('1/1/1990', '6/30/1995', freq='M') result = ts.resample('a-dec').mean() @@ -148,7 +115,7 @@ def test_basic_downsample(self): assert_series_equal(ts.resample('a-dec').mean(), result) assert_series_equal(ts.resample('a').mean(), result) - def test_not_subperiod(self): + def test_not_subperiod(self, simple_period_range_series): # These are incompatible period rules for resampling ts = simple_period_range_series('1/1/1990', '6/30/1995', freq='w-wed') pytest.raises(ValueError, lambda: ts.resample('a-dec').mean()) @@ -157,7 +124,7 @@ def test_not_subperiod(self): pytest.raises(ValueError, lambda: ts.resample('w-thu').mean()) @pytest.mark.parametrize('freq', ['D', '2D']) - def test_basic_upsample(self, freq): + def test_basic_upsample(self, freq, simple_period_range_series): ts = simple_period_range_series('1/1/1990', '6/30/1995', freq='M') result = ts.resample('a-dec').mean() @@ -175,7 +142,7 @@ def test_upsample_with_limit(self): limit=2) assert_series_equal(result, expected) - def test_annual_upsample(self): + def test_annual_upsample(self, simple_period_range_series): ts = simple_period_range_series('1/1/1990', '12/31/1995', freq='A-DEC') df = DataFrame({'a': ts}) rdf = df.resample('D').ffill() @@ -195,7 +162,8 @@ def test_annual_upsample(self): @pytest.mark.parametrize('month', MONTHS) @pytest.mark.parametrize('target', ['D', 'B', 'M']) @pytest.mark.parametrize('convention', ['start', 'end']) - def test_quarterly_upsample(self, month, target, convention): + def test_quarterly_upsample(self, month, target, convention, + simple_period_range_series): freq = 'Q-{month}'.format(month=month) ts = simple_period_range_series('1/1/1990', '12/31/1995', freq=freq) result = ts.resample(target, convention=convention).ffill() @@ -205,7 +173,8 @@ def test_quarterly_upsample(self, month, target, convention): @pytest.mark.parametrize('target', ['D', 'B']) @pytest.mark.parametrize('convention', ['start', 'end']) - def test_monthly_upsample(self, target, convention): + def test_monthly_upsample(self, target, convention, + simple_period_range_series): ts = simple_period_range_series('1/1/1990', '12/31/1995', freq='M') result = ts.resample(target, convention=convention).ffill() expected = result.to_timestamp(target, how=convention) @@ -350,7 +319,8 @@ def test_fill_method_and_how_upsample(self): @pytest.mark.parametrize('day', DAYS) @pytest.mark.parametrize('target', ['D', 'B']) @pytest.mark.parametrize('convention', ['start', 'end']) - def test_weekly_upsample(self, day, target, convention): + def test_weekly_upsample(self, day, target, convention, + simple_period_range_series): freq = 'W-{day}'.format(day=day) ts = simple_period_range_series('1/1/1990', '12/31/1995', freq=freq) result = ts.resample(target, convention=convention).ffill() @@ -358,14 +328,14 @@ def test_weekly_upsample(self, day, target, convention): expected = expected.asfreq(target, 'ffill').to_period() assert_series_equal(result, expected) - def test_resample_to_timestamps(self): + def test_resample_to_timestamps(self, simple_period_range_series): ts = simple_period_range_series('1/1/1990', '12/31/1995', freq='M') result = ts.resample('A-DEC', kind='timestamp').mean() expected = ts.to_timestamp(how='start').resample('A-DEC').mean() assert_series_equal(result, expected) - def test_resample_to_quarterly(self): + def test_resample_to_quarterly(self, simple_period_range_series): for month in MONTHS: ts = simple_period_range_series( '1990', '1992', freq='A-%s' % month) @@ -421,7 +391,7 @@ def test_resample_5minute(self, freq, kind): result = ts.resample(freq, kind=kind).mean() assert_series_equal(result, expected) - def test_upsample_daily_business_daily(self): + def test_upsample_daily_business_daily(self, simple_period_range_series): ts = simple_period_range_series('1/1/2000', '2/1/2000', freq='B') result = ts.resample('D').asfreq() From 872e72487fed5ca276771c3d7fb4dc837906ec57 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 11 Dec 2018 15:11:10 +0000 Subject: [PATCH 02/10] remove business_day_offset import --- pandas/tests/resample/test_base.py | 4 ---- pandas/tests/resample/test_datetime_index.py | 6 +++--- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 06660ca124691..432eedd763ea7 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -19,10 +19,6 @@ assert_almost_equal, assert_frame_equal, assert_index_equal, assert_series_equal) -from pandas.tseries.offsets import BDay - -business_day_offset = BDay() - # The various methods we support downsample_methods = ['min', 'max', 'first', 'last', 'sum', 'mean', 'sem', 'median', 'prod', 'var', 'std', 'ohlc', 'quantile'] diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 343a92d09dff9..55175f92ddc29 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -18,14 +18,13 @@ from pandas.core.indexes.period import Period, period_range from pandas.core.indexes.timedeltas import timedelta_range from pandas.core.resample import DatetimeIndex, TimeGrouper -from pandas.tests.resample.test_base import ( - Base, business_day_offset, downsample_methods) +from pandas.tests.resample.test_base import Base, downsample_methods import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_series_equal) import pandas.tseries.offsets as offsets -from pandas.tseries.offsets import Minute +from pandas.tseries.offsets import BDay, Minute class TestDatetimeIndex(Base): @@ -431,6 +430,7 @@ def test_resample_loffset(self, loffset): # to weekly result = ser.resample('w-sun').last() + business_day_offset = BDay() expected = ser.resample('w-sun', loffset=-business_day_offset).last() assert result.index[0] - business_day_offset == expected.index[0] From f578e275e7d659e4c1ec1cdbc0cbccf4055975ef Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 11 Dec 2018 15:26:44 +0000 Subject: [PATCH 03/10] replace resample/downsample_methods import with fixture --- pandas/tests/resample/test_datetime_index.py | 47 ++++++++++---------- pandas/tests/resample/test_period_index.py | 9 ++-- 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 55175f92ddc29..880c9165de220 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -18,7 +18,7 @@ from pandas.core.indexes.period import Period, period_range from pandas.core.indexes.timedeltas import timedelta_range from pandas.core.resample import DatetimeIndex, TimeGrouper -from pandas.tests.resample.test_base import Base, downsample_methods +from pandas.tests.resample.test_base import Base import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_series_equal) @@ -129,7 +129,7 @@ def test_resample_string_kwargs(self): with pytest.raises(ValueError): s.resample('5min', convention='starttt').mean() - def test_resample_how(self): + def test_resample_how(self, downsample_method): rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min', name='index') s = Series(np.random.randn(14), index=rng) @@ -138,7 +138,7 @@ def test_resample_how(self): grouplist[1:6] = 1 grouplist[6:11] = 2 grouplist[11:] = 3 - args = downsample_methods + arg = downsample_method def _ohlc(group): if isna(group).all(): @@ -147,29 +147,28 @@ def _ohlc(group): inds = date_range('1/1/2000', periods=4, freq='5min', name='index') - for arg in args: + if arg == 'ohlc': + func = _ohlc + else: + func = arg + try: + result = getattr(s.resample( + '5min', closed='right', label='right'), arg)() + + expected = s.groupby(grouplist).agg(func) + assert result.index.name == 'index' if arg == 'ohlc': - func = _ohlc + expected = DataFrame(expected.values.tolist()) + expected.columns = ['open', 'high', 'low', 'close'] + expected.index = Index(inds, name='index') + assert_frame_equal(result, expected) else: - func = arg - try: - result = getattr(s.resample( - '5min', closed='right', label='right'), arg)() - - expected = s.groupby(grouplist).agg(func) - assert result.index.name == 'index' - if arg == 'ohlc': - expected = DataFrame(expected.values.tolist()) - expected.columns = ['open', 'high', 'low', 'close'] - expected.index = Index(inds, name='index') - assert_frame_equal(result, expected) - else: - expected.index = inds - assert_series_equal(result, expected) - except BaseException as exc: - - exc.args += ('how=%s' % arg,) - raise + expected.index = inds + assert_series_equal(result, expected) + except BaseException as exc: + + exc.args += ('how=%s' % arg,) + raise def test_numpy_compat(self): # see gh-12811 diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 900e2a0f96ac8..51963a21fdcdb 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -15,7 +15,7 @@ from pandas import DataFrame, Series, Timestamp from pandas.core.indexes.datetimes import date_range from pandas.core.indexes.period import Period, PeriodIndex, period_range -from pandas.tests.resample.test_base import Base, resample_methods +from pandas.tests.resample.test_base import Base import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_series_equal) @@ -206,16 +206,15 @@ def test_resample_count(self, freq, expected_vals): expected = Series(expected_vals, index=expected_index) assert_series_equal(result, expected) - def test_resample_same_freq(self): + def test_resample_same_freq(self, resample_method): # GH12770 series = Series(range(3), index=pd.period_range( start='2000', periods=3, freq='M')) expected = series - for method in resample_methods: - result = getattr(series.resample('M'), method)() - assert_series_equal(result, expected) + result = getattr(series.resample('M'), resample_method)() + assert_series_equal(result, expected) def test_resample_incompat_freq(self): From 148470870e0aaae8e558b90a49930bef167625e5 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 11 Dec 2018 16:13:42 +0000 Subject: [PATCH 04/10] replace *_methods in test_base.py with fixture --- pandas/tests/resample/conftest.py | 9 +++- pandas/tests/resample/test_base.py | 84 +++++++++++++----------------- 2 files changed, 43 insertions(+), 50 deletions(-) diff --git a/pandas/tests/resample/conftest.py b/pandas/tests/resample/conftest.py index 50178d6f9faa1..90c0209c281b9 100644 --- a/pandas/tests/resample/conftest.py +++ b/pandas/tests/resample/conftest.py @@ -4,8 +4,13 @@ from pandas import Series from pandas.core.indexes.datetimes import date_range from pandas.core.indexes.period import period_range -from pandas.tests.resample.test_base import ( - downsample_methods, resample_methods, upsample_methods) + +# The various methods we support +downsample_methods = ['min', 'max', 'first', 'last', 'sum', 'mean', 'sem', + 'median', 'prod', 'var', 'std', 'ohlc', 'quantile'] +upsample_methods = ['count', 'size'] +series_methods = ['nunique'] +resample_methods = downsample_methods + upsample_methods + series_methods @pytest.fixture(params=downsample_methods) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 432eedd763ea7..963b903ab0948 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -19,13 +19,6 @@ assert_almost_equal, assert_frame_equal, assert_index_equal, assert_series_equal) -# The various methods we support -downsample_methods = ['min', 'max', 'first', 'last', 'sum', 'mean', 'sem', - 'median', 'prod', 'var', 'std', 'ohlc', 'quantile'] -upsample_methods = ['count', 'size'] -series_methods = ['nunique'] -resample_methods = downsample_methods + upsample_methods + series_methods - class Base(object): """ @@ -116,64 +109,59 @@ def test_raises_on_non_datetimelike_index(self): xp = DataFrame() pytest.raises(TypeError, lambda: xp.resample('A').mean()) - def test_resample_empty_series(self): + @pytest.mark.parametrize('freq', ['M', 'D', 'H']) + def test_resample_empty_series(self, freq, resample_method): # GH12771 & GH12868 + if resample_method == 'ohlc': + pytest.skip('need to test for ohlc from GH13083') + s = self.create_series()[:0] + result = getattr(s.resample(freq), resample_method)() - for freq in ['M', 'D', 'H']: - # need to test for ohlc from GH13083 - methods = [method for method in resample_methods - if method != 'ohlc'] - for method in methods: - result = getattr(s.resample(freq), method)() - - expected = s.copy() - expected.index = s.index._shallow_copy(freq=freq) - assert_index_equal(result.index, expected.index) - assert result.index.freq == expected.index.freq - assert_series_equal(result, expected, check_dtype=False) - - def test_resample_empty_dataframe(self): + expected = s.copy() + expected.index = s.index._shallow_copy(freq=freq) + assert_index_equal(result.index, expected.index) + assert result.index.freq == expected.index.freq + assert_series_equal(result, expected, check_dtype=False) + + @pytest.mark.parametrize('freq', ['M', 'D', 'H']) + def test_resample_empty_dataframe(self, freq, resample_method): # GH13212 index = self.create_series().index[:0] f = DataFrame(index=index) - for freq in ['M', 'D', 'H']: - # count retains dimensions too - methods = downsample_methods + upsample_methods - for method in methods: - result = getattr(f.resample(freq), method)() - if method != 'size': - expected = f.copy() - else: - # GH14962 - expected = Series([]) - - expected.index = f.index._shallow_copy(freq=freq) - assert_index_equal(result.index, expected.index) - assert result.index.freq == expected.index.freq - assert_almost_equal(result, expected, check_dtype=False) - - # test size for GH13212 (currently stays as df) + # count retains dimensions too + result = getattr(f.resample(freq), resample_method)() + if resample_method != 'size': + expected = f.copy() + else: + # GH14962 + expected = Series([]) + + expected.index = f.index._shallow_copy(freq=freq) + assert_index_equal(result.index, expected.index) + assert result.index.freq == expected.index.freq + assert_almost_equal(result, expected, check_dtype=False) + + # test size for GH13212 (currently stays as df) @pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0)) @pytest.mark.parametrize( "dtype", [np.float, np.int, np.object, 'datetime64[ns]']) - def test_resample_empty_dtypes(self, index, dtype): + def test_resample_empty_dtypes(self, index, dtype, resample_method): # Empty series were sometimes causing a segfault (for the functions # with Cython bounds-checking disabled) or an IndexError. We just run # them to ensure they no longer do. (GH #10228) - for how in downsample_methods + upsample_methods: - empty_series = Series([], index, dtype) - try: - getattr(empty_series.resample('d'), how)() - except DataError: - # Ignore these since some combinations are invalid - # (ex: doing mean with dtype of np.object) - pass + empty_series = Series([], index, dtype) + try: + getattr(empty_series.resample('d'), resample_method)() + except DataError: + # Ignore these since some combinations are invalid + # (ex: doing mean with dtype of np.object) + pass def test_resample_loffset_arg_type(self): # GH 13218, 15002 From 096f4c0664208994a7b43bb6b54261a445021d13 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 12 Dec 2018 10:01:10 +0000 Subject: [PATCH 05/10] remove import of Base class for test_timedelta.py --- pandas/tests/resample/test_base.py | 24 ++++++++++++++++++++- pandas/tests/resample/test_timedelta.py | 28 ++----------------------- 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 963b903ab0948..9157c77336461 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -12,7 +12,7 @@ from pandas import DataFrame, Series from pandas.core.groupby.groupby import DataError from pandas.core.indexes.period import PeriodIndex -from pandas.core.indexes.timedeltas import TimedeltaIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range from pandas.core.resample import TimeGrouper import pandas.util.testing as tm from pandas.util.testing import ( @@ -229,3 +229,25 @@ def test_resample_quantile(self): result = s.resample(freq).quantile(q) expected = s.resample(freq).agg(lambda x: x.quantile(q)) tm.assert_series_equal(result, expected) + + +class TestTimedeltaIndex(Base): + _index_factory = lambda x: timedelta_range + + @pytest.fixture + def _index_start(self): + return '1 day' + + @pytest.fixture + def _index_end(self): + return '10 day' + + @pytest.fixture + def _series_name(self): + return 'tdi' + + def create_series(self): + i = timedelta_range('1 day', + '10 day', freq='D') + + return Series(np.arange(len(i)), index=i, name='tdi') diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py index 6cc920d66aeb3..5c81370d0d04b 100644 --- a/pandas/tests/resample/test_timedelta.py +++ b/pandas/tests/resample/test_timedelta.py @@ -1,37 +1,13 @@ -# pylint: disable=E1101 - import numpy as np -import pytest import pandas as pd -from pandas import DataFrame, Series +from pandas import DataFrame from pandas.core.indexes.timedeltas import timedelta_range -from pandas.tests.resample.test_base import Base import pandas.util.testing as tm from pandas.util.testing import assert_frame_equal -class TestTimedeltaIndex(Base): - _index_factory = lambda x: timedelta_range - - @pytest.fixture - def _index_start(self): - return '1 day' - - @pytest.fixture - def _index_end(self): - return '10 day' - - @pytest.fixture - def _series_name(self): - return 'tdi' - - def create_series(self): - i = timedelta_range('1 day', - '10 day', freq='D') - - return Series(np.arange(len(i)), index=i, name='tdi') - +class TestTimedeltaIndex(object): def test_asfreq_bug(self): import datetime as dt df = DataFrame(data=[1, 3], From 06220b061f96101fca96091352af095ee84108bc Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 12 Dec 2018 10:25:19 +0000 Subject: [PATCH 06/10] remove import of Base class for test_datetime_index.py --- pandas/tests/resample/test_base.py | 15 +++++++++++++++ pandas/tests/resample/test_datetime_index.py | 17 +---------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 9157c77336461..fde3e8727b036 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -11,6 +11,7 @@ import pandas as pd from pandas import DataFrame, Series from pandas.core.groupby.groupby import DataError +from pandas.core.indexes.datetimes import date_range from pandas.core.indexes.period import PeriodIndex from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range from pandas.core.resample import TimeGrouper @@ -231,6 +232,20 @@ def test_resample_quantile(self): tm.assert_series_equal(result, expected) +class TestDatetimeIndex(Base): + _index_factory = lambda x: date_range + + @pytest.fixture + def _series_name(self): + return 'dti' + + def create_series(self): + i = date_range(datetime(2005, 1, 1), + datetime(2005, 1, 10), freq='D') + + return Series(np.arange(len(i)), index=i, name='dti') + + class TestTimedeltaIndex(Base): _index_factory = lambda x: timedelta_range diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 880c9165de220..835baadaf46cf 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1,5 +1,3 @@ -# pylint: disable=E1101 - from datetime import datetime, timedelta from functools import partial from warnings import catch_warnings, simplefilter @@ -18,7 +16,6 @@ from pandas.core.indexes.period import Period, period_range from pandas.core.indexes.timedeltas import timedelta_range from pandas.core.resample import DatetimeIndex, TimeGrouper -from pandas.tests.resample.test_base import Base import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_series_equal) @@ -27,25 +24,13 @@ from pandas.tseries.offsets import BDay, Minute -class TestDatetimeIndex(Base): - _index_factory = lambda x: date_range - - @pytest.fixture - def _series_name(self): - return 'dti' - +class TestDatetimeIndex(object): def setup_method(self, method): dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq='Min') self.series = Series(np.random.rand(len(dti)), dti) - def create_series(self): - i = date_range(datetime(2005, 1, 1), - datetime(2005, 1, 10), freq='D') - - return Series(np.arange(len(i)), index=i, name='dti') - def test_custom_grouper(self): dti = date_range(freq='Min', start=datetime(2005, 1, 1), From 5df05a24c651466c76c2b15608f3b82939321bac Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 12 Dec 2018 15:37:40 +0000 Subject: [PATCH 07/10] remove import of Base class for test_period_index.py --- pandas/tests/resample/conftest.py | 59 +++++++++++- pandas/tests/resample/test_base.py | 101 +++++++++------------ pandas/tests/resample/test_period_index.py | 31 +++---- 3 files changed, 111 insertions(+), 80 deletions(-) diff --git a/pandas/tests/resample/conftest.py b/pandas/tests/resample/conftest.py index 90c0209c281b9..6711576a5840d 100644 --- a/pandas/tests/resample/conftest.py +++ b/pandas/tests/resample/conftest.py @@ -1,7 +1,9 @@ +from datetime import datetime + import numpy as np import pytest -from pandas import Series +from pandas import DataFrame, Series from pandas.core.indexes.datetimes import date_range from pandas.core.indexes.period import period_range @@ -31,7 +33,7 @@ def resample_method(request): return request.param -@pytest.fixture() +@pytest.fixture def simple_date_range_series(): """ Series with date range index and random data for test purposes. @@ -42,7 +44,7 @@ def _simple_date_range_series(start, end, freq='D'): return _simple_date_range_series -@pytest.fixture() +@pytest.fixture def simple_period_range_series(): """ Series with period range index and random data for test purposes. @@ -51,3 +53,54 @@ def _simple_period_range_series(start, end, freq='D'): rng = period_range(start, end, freq=freq) return Series(np.random.randn(len(rng)), index=rng) return _simple_period_range_series + + +@pytest.fixture +def _index_start(): + return datetime(2005, 1, 1) + + +@pytest.fixture +def _index_end(): + return datetime(2005, 1, 10) + + +@pytest.fixture +def _index_freq(): + return 'D' + + +@pytest.fixture +def index(_index_factory, _index_start, _index_end, _index_freq): + return _index_factory(_index_start, _index_end, freq=_index_freq) + + +@pytest.fixture +def create_index(_index_factory): + def _create_index(*args, **kwargs): + """ return the _index_factory created using the args, kwargs """ + return _index_factory(*args, **kwargs) + return _create_index + + +@pytest.fixture +def _static_values(index): + return np.arange(len(index)) + + +@pytest.fixture +def series(index, _series_name, _static_values): + return Series(_static_values, index=index, name=_series_name) + + +@pytest.fixture +def frame(index, _static_values): + return DataFrame({'value': _static_values}, index=index) + + +@pytest.fixture(params=[Series, DataFrame]) +def series_and_frame(request, series, frame): + if request.param == Series: + return series + if request.param == DataFrame: + return frame diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index fde3e8727b036..3bc2663d907fa 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -6,13 +6,12 @@ import pytest from pandas.compat import range, zip -from pandas.errors import AbstractMethodError import pandas as pd from pandas import DataFrame, Series from pandas.core.groupby.groupby import DataError from pandas.core.indexes.datetimes import date_range -from pandas.core.indexes.period import PeriodIndex +from pandas.core.indexes.period import PeriodIndex, period_range from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range from pandas.core.resample import TimeGrouper import pandas.util.testing as tm @@ -27,74 +26,30 @@ class Base(object): .create_series() generates a series of each index type """ - def create_index(self, *args, **kwargs): - """ return the _index_factory created using the args, kwargs """ - factory = self._index_factory() - return factory(*args, **kwargs) - - @pytest.fixture - def _index_start(self): - return datetime(2005, 1, 1) - - @pytest.fixture - def _index_end(self): - return datetime(2005, 1, 10) - - @pytest.fixture - def _index_freq(self): - return 'D' - - @pytest.fixture - def index(self, _index_start, _index_end, _index_freq): - return self.create_index(_index_start, _index_end, freq=_index_freq) - - @pytest.fixture - def _series_name(self): - raise AbstractMethodError(self) - - @pytest.fixture - def _static_values(self, index): - return np.arange(len(index)) - - @pytest.fixture - def series(self, index, _series_name, _static_values): - return Series(_static_values, index=index, name=_series_name) - - @pytest.fixture - def frame(self, index, _static_values): - return DataFrame({'value': _static_values}, index=index) - - @pytest.fixture(params=[Series, DataFrame]) - def series_and_frame(self, request, index, _series_name, _static_values): - if request.param == Series: - return Series(_static_values, index=index, name=_series_name) - if request.param == DataFrame: - return DataFrame({'value': _static_values}, index=index) - @pytest.mark.parametrize('freq', ['2D', '1H']) - def test_asfreq(self, series_and_frame, freq): + def test_asfreq(self, series_and_frame, freq, create_index): obj = series_and_frame result = obj.resample(freq).asfreq() - new_index = self.create_index(obj.index[0], obj.index[-1], freq=freq) + new_index = create_index(obj.index[0], obj.index[-1], freq=freq) expected = obj.reindex(new_index) assert_almost_equal(result, expected) - def test_asfreq_fill_value(self): + def test_asfreq_fill_value(self, create_index): # test for fill value during resampling, issue 3715 s = self.create_series() result = s.resample('1H').asfreq() - new_index = self.create_index(s.index[0], s.index[-1], freq='1H') + new_index = create_index(s.index[0], s.index[-1], freq='1H') expected = s.reindex(new_index) assert_series_equal(result, expected) frame = s.to_frame('value') frame.iloc[1] = None result = frame.resample('1H').asfreq(fill_value=4.0) - new_index = self.create_index(frame.index[0], - frame.index[-1], freq='1H') + new_index = create_index(frame.index[0], + frame.index[-1], freq='1H') expected = frame.reindex(new_index, fill_value=4.0) assert_frame_equal(result, expected) @@ -164,14 +119,14 @@ def test_resample_empty_dtypes(self, index, dtype, resample_method): # (ex: doing mean with dtype of np.object) pass - def test_resample_loffset_arg_type(self): + def test_resample_loffset_arg_type(self, create_index): # GH 13218, 15002 df = self.create_series().to_frame('value') expected_means = [df.values[i:i + 2].mean() for i in range(0, len(df.values), 2)] - expected_index = self.create_index(df.index[0], - periods=len(df.index) / 2, - freq='2D') + expected_index = create_index(df.index[0], + periods=len(df.index) / 2, + freq='2D') # loffset coerces PeriodIndex to DateTimeIndex if isinstance(expected_index, PeriodIndex): @@ -233,7 +188,9 @@ def test_resample_quantile(self): class TestDatetimeIndex(Base): - _index_factory = lambda x: date_range + @pytest.fixture + def _index_factory(self): + return date_range @pytest.fixture def _series_name(self): @@ -246,8 +203,36 @@ def create_series(self): return Series(np.arange(len(i)), index=i, name='dti') +class TestPeriodIndex(Base): + @pytest.fixture + def _index_factory(self): + return period_range + + @pytest.fixture + def _series_name(self): + return 'pi' + + def create_series(self): + # TODO: replace calls to .create_series() by injecting the series + # fixture + i = period_range(datetime(2005, 1, 1), + datetime(2005, 1, 10), freq='D') + + return Series(np.arange(len(i)), index=i, name='pi') + + @pytest.mark.skip() + def test_asfreq(self): + pass + + @pytest.mark.skip() + def test_asfreq_fill_value(self): + pass + + class TestTimedeltaIndex(Base): - _index_factory = lambda x: timedelta_range + @pytest.fixture + def _index_factory(self): + return timedelta_range @pytest.fixture def _index_start(self): diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 51963a21fdcdb..7cb3185ccbbaf 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -1,5 +1,3 @@ -# pylint: disable=E1101 - from datetime import datetime, timedelta import dateutil @@ -15,7 +13,6 @@ from pandas import DataFrame, Series, Timestamp from pandas.core.indexes.datetimes import date_range from pandas.core.indexes.period import Period, PeriodIndex, period_range -from pandas.tests.resample.test_base import Base import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_series_equal) @@ -23,20 +20,17 @@ import pandas.tseries.offsets as offsets -class TestPeriodIndex(Base): - _index_factory = lambda x: period_range +@pytest.fixture() +def _index_factory(): + return period_range + - @pytest.fixture - def _series_name(self): - return 'pi' +@pytest.fixture +def _series_name(): + return 'pi' - def create_series(self): - # TODO: replace calls to .create_series() by injecting the series - # fixture - i = period_range(datetime(2005, 1, 1), - datetime(2005, 1, 10), freq='D') - return Series(np.arange(len(i)), index=i, name='pi') +class TestPeriodIndex(object): @pytest.mark.parametrize('freq', ['2D', '1H', '2H']) @pytest.mark.parametrize('kind', ['period', None, 'timestamp']) @@ -56,10 +50,10 @@ def test_asfreq(self, series_and_frame, freq, kind): result = obj.resample(freq, kind=kind).asfreq() assert_almost_equal(result, expected) - def test_asfreq_fill_value(self): + def test_asfreq_fill_value(self, series): # test for fill value during resampling, issue 3715 - s = self.create_series() + s = series new_index = date_range(s.index[0].to_timestamp(how='start'), (s.index[-1]).to_timestamp(how='start'), freq='1H') @@ -643,9 +637,8 @@ def test_loffset_returns_datetimeindex(self, frame, kind, agg_arg): df = frame expected_means = [df.values[i:i + 2].mean() for i in range(0, len(df.values), 2)] - expected_index = self.create_index(df.index[0], - periods=len(df.index) / 2, - freq='2D') + expected_index = period_range( + df.index[0], periods=len(df.index) / 2, freq='2D') # loffset coerces PeriodIndex to DateTimeIndex expected_index = expected_index.to_timestamp() From 5d42be26b8364b2fc30560610ceab9b2cfc33871 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 12 Dec 2018 17:19:23 +0000 Subject: [PATCH 08/10] move create_index fixture from conftest.py to module scope --- pandas/tests/resample/conftest.py | 8 -------- pandas/tests/resample/test_base.py | 8 ++++++++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/tests/resample/conftest.py b/pandas/tests/resample/conftest.py index 6711576a5840d..d5b32891ea1df 100644 --- a/pandas/tests/resample/conftest.py +++ b/pandas/tests/resample/conftest.py @@ -75,14 +75,6 @@ def index(_index_factory, _index_start, _index_end, _index_freq): return _index_factory(_index_start, _index_end, freq=_index_freq) -@pytest.fixture -def create_index(_index_factory): - def _create_index(*args, **kwargs): - """ return the _index_factory created using the args, kwargs """ - return _index_factory(*args, **kwargs) - return _create_index - - @pytest.fixture def _static_values(index): return np.arange(len(index)) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 3bc2663d907fa..74003151abfb2 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -20,6 +20,14 @@ assert_series_equal) +@pytest.fixture +def create_index(_index_factory): + def _create_index(*args, **kwargs): + """ return the _index_factory created using the args, kwargs """ + return _index_factory(*args, **kwargs) + return _create_index + + class Base(object): """ base class for resampling testing, calling From 8ce8586f4478fccb57e466ca3ce7df21d574b622 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 12 Dec 2018 17:27:15 +0000 Subject: [PATCH 09/10] remove catching of BaseException --- pandas/tests/resample/test_datetime_index.py | 32 +++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 835baadaf46cf..6c8b30bff1dac 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -136,24 +136,20 @@ def _ohlc(group): func = _ohlc else: func = arg - try: - result = getattr(s.resample( - '5min', closed='right', label='right'), arg)() - - expected = s.groupby(grouplist).agg(func) - assert result.index.name == 'index' - if arg == 'ohlc': - expected = DataFrame(expected.values.tolist()) - expected.columns = ['open', 'high', 'low', 'close'] - expected.index = Index(inds, name='index') - assert_frame_equal(result, expected) - else: - expected.index = inds - assert_series_equal(result, expected) - except BaseException as exc: - - exc.args += ('how=%s' % arg,) - raise + + result = getattr(s.resample( + '5min', closed='right', label='right'), arg)() + + expected = s.groupby(grouplist).agg(func) + assert result.index.name == 'index' + if arg == 'ohlc': + expected = DataFrame(expected.values.tolist()) + expected.columns = ['open', 'high', 'low', 'close'] + expected.index = Index(inds, name='index') + assert_frame_equal(result, expected) + else: + expected.index = inds + assert_series_equal(result, expected) def test_numpy_compat(self): # see gh-12811 From e29ffec1944ff9feb08f16089f4296dd5724159a Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 12 Dec 2018 19:32:37 +0000 Subject: [PATCH 10/10] move ohlc to a separate test --- pandas/tests/resample/test_datetime_index.py | 48 ++++++++++++-------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 6c8b30bff1dac..176cb7cb4b5db 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -115,6 +115,29 @@ def test_resample_string_kwargs(self): s.resample('5min', convention='starttt').mean() def test_resample_how(self, downsample_method): + if downsample_method == 'ohlc': + pytest.skip('covered by test_resample_how_ohlc') + + rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min', + name='index') + s = Series(np.random.randn(14), index=rng) + + grouplist = np.ones_like(s) + grouplist[0] = 0 + grouplist[1:6] = 1 + grouplist[6:11] = 2 + grouplist[11:] = 3 + expected = s.groupby(grouplist).agg(downsample_method) + expected.index = date_range( + '1/1/2000', periods=4, freq='5min', name='index') + + result = getattr(s.resample( + '5min', closed='right', label='right'), downsample_method)() + + assert result.index.name == 'index' # redundant assert? + assert_series_equal(result, expected) + + def test_resample_how_ohlc(self): rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min', name='index') s = Series(np.random.randn(14), index=rng) @@ -123,7 +146,6 @@ def test_resample_how(self, downsample_method): grouplist[1:6] = 1 grouplist[6:11] = 2 grouplist[11:] = 3 - arg = downsample_method def _ohlc(group): if isna(group).all(): @@ -131,25 +153,15 @@ def _ohlc(group): return [group[0], group.max(), group.min(), group[-1]] inds = date_range('1/1/2000', periods=4, freq='5min', name='index') + expected = s.groupby(grouplist).agg(_ohlc) + expected = DataFrame(expected.values.tolist(), + index=Index(inds, name='index'), + columns=['open', 'high', 'low', 'close']) - if arg == 'ohlc': - func = _ohlc - else: - func = arg - - result = getattr(s.resample( - '5min', closed='right', label='right'), arg)() + result = s.resample('5min', closed='right', label='right').ohlc() - expected = s.groupby(grouplist).agg(func) - assert result.index.name == 'index' - if arg == 'ohlc': - expected = DataFrame(expected.values.tolist()) - expected.columns = ['open', 'high', 'low', 'close'] - expected.index = Index(inds, name='index') - assert_frame_equal(result, expected) - else: - expected.index = inds - assert_series_equal(result, expected) + assert result.index.name == 'index' # redundant assert? + assert_frame_equal(result, expected) def test_numpy_compat(self): # see gh-12811