From 05fa12bd145ff99ab7b4b97b4993851bf6ef006d Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 28 Nov 2018 20:57:35 +0000 Subject: [PATCH 1/3] parametrize tests\resample\test_time_grouper.py --- pandas/tests/resample/test_time_grouper.py | 142 ++++++++------------- 1 file changed, 53 insertions(+), 89 deletions(-) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 927060609822e..192d879091285 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -1,13 +1,9 @@ -# pylint: disable=E1101 - from datetime import datetime from operator import methodcaller import numpy as np import pytest -from pandas.compat import zip - import pandas as pd from pandas import DataFrame, Panel, Series from pandas.core.indexes.datetimes import date_range @@ -104,20 +100,21 @@ def f(x): tm.assert_panel_equal(result, binagg) -def test_fails_on_no_datetime_index(): - index_names = ('Int64Index', 'Index', 'Float64Index', 'MultiIndex') - index_funcs = (tm.makeIntIndex, - tm.makeUnicodeIndex, tm.makeFloatIndex, - lambda m: tm.makeCustomIndex(m, 2)) +@pytest.mark.parametrize('name, func', [ + ('Int64Index', tm.makeIntIndex), + ('Index', tm.makeUnicodeIndex), + ('Float64Index', tm.makeFloatIndex), + ('MultiIndex', lambda m: tm.makeCustomIndex(m, 2)) +]) +def test_fails_on_no_datetime_index(name, func): n = 2 - for name, func in zip(index_names, index_funcs): - index = func(n) - df = DataFrame({'a': np.random.randn(n)}, index=index) + index = func(n) + df = DataFrame({'a': np.random.randn(n)}, index=index) - msg = ("Only valid with DatetimeIndex, TimedeltaIndex " - "or PeriodIndex, but got an instance of %r" % name) - with pytest.raises(TypeError, match=msg): - df.groupby(TimeGrouper('D')) + msg = ("Only valid with DatetimeIndex, TimedeltaIndex " + "or PeriodIndex, but got an instance of %r" % name) + with pytest.raises(TypeError, match=msg): + df.groupby(TimeGrouper('D')) def test_aaa_group_order(): @@ -143,7 +140,20 @@ def test_aaa_group_order(): df[4::5]) -def test_aggregate_normal(): +@pytest.mark.parametrize('func, assert_func', [ + ('min', assert_frame_equal), + ('max', assert_frame_equal), + ('prod', assert_frame_equal), + ('var', assert_frame_equal), + ('std', assert_frame_equal), + ('mean', assert_frame_equal), + ('count', assert_frame_equal), + ('sum', assert_frame_equal), + ('size', assert_series_equal), # GH 7453 + ('first', assert_frame_equal), # GH 7453 + ('last', assert_frame_equal), # GH 7453 +]) +def test_aggregate_normal(func, assert_func): # check TimeGrouper's aggregation is identical as normal groupby n = 20 @@ -159,35 +169,11 @@ def test_aggregate_normal(): normal_grouped = normal_df.groupby('key') dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) - for func in ['min', 'max', 'prod', 'var', 'std', 'mean']: - expected = getattr(normal_grouped, func)() - dt_result = getattr(dt_grouped, func)() - expected.index = date_range(start='2013-01-01', freq='D', - periods=5, name='key') - assert_frame_equal(expected, dt_result) - - for func in ['count', 'sum']: - expected = getattr(normal_grouped, func)() - expected.index = date_range(start='2013-01-01', freq='D', - periods=5, name='key') - dt_result = getattr(dt_grouped, func)() - assert_frame_equal(expected, dt_result) - - # GH 7453 - for func in ['size']: - expected = getattr(normal_grouped, func)() - expected.index = date_range(start='2013-01-01', freq='D', - periods=5, name='key') - dt_result = getattr(dt_grouped, func)() - assert_series_equal(expected, dt_result) - - # GH 7453 - for func in ['first', 'last']: - expected = getattr(normal_grouped, func)() - expected.index = date_range(start='2013-01-01', freq='D', - periods=5, name='key') - dt_result = getattr(dt_grouped, func)() - assert_frame_equal(expected, dt_result) + expected = getattr(normal_grouped, func)() + dt_result = getattr(dt_grouped, func)() + expected.index = date_range(start='2013-01-01', freq='D', + periods=5, name='key') + assert_func(expected, dt_result) # if TimeGrouper is used included, 'nth' doesn't work yet @@ -201,34 +187,23 @@ def test_aggregate_normal(): """ -@pytest.mark.parametrize('method, unit', [ - ('sum', 0), - ('prod', 1), +@pytest.mark.parametrize('method, method_args, unit', [ + ('sum', dict(), 0), + ('sum', dict(min_count=0), 0), + ('sum', dict(min_count=1), np.nan), + ('prod', dict(), 1), + ('prod', dict(min_count=0), 1), + ('prod', dict(min_count=1), np.nan) ]) -def test_resample_entirly_nat_window(method, unit): +def test_resample_entirly_nat_window(method, method_args, unit): s = pd.Series([0] * 2 + [np.nan] * 2, index=pd.date_range('2017', periods=4)) - # 0 / 1 by default - result = methodcaller(method)(s.resample("2d")) - expected = pd.Series([0.0, unit], - index=pd.to_datetime(['2017-01-01', - '2017-01-03'])) - tm.assert_series_equal(result, expected) - - # min_count=0 - result = methodcaller(method, min_count=0)(s.resample("2d")) + result = methodcaller(method, **method_args)(s.resample("2d")) expected = pd.Series([0.0, unit], index=pd.to_datetime(['2017-01-01', '2017-01-03'])) tm.assert_series_equal(result, expected) - # min_count=1 - result = methodcaller(method, min_count=1)(s.resample("2d")) - expected = pd.Series([0.0, np.nan], - index=pd.to_datetime(['2017-01-01', - '2017-01-03'])) - tm.assert_series_equal(result, expected) - @pytest.mark.parametrize('func, fill_value', [ ('min', np.nan), @@ -302,33 +277,22 @@ def test_repr(): assert result == expected -@pytest.mark.parametrize('method, unit', [ - ('sum', 0), - ('prod', 1), +@pytest.mark.parametrize('method, method_args, expected_values', [ + ('sum', dict(), [1, 0, 1]), + ('sum', dict(min_count=0), [1, 0, 1]), + ('sum', dict(min_count=1), [1, np.nan, 1]), + ('sum', dict(min_count=2), [np.nan, np.nan, np.nan]), + ('prod', dict(), [1, 1, 1]), + ('prod', dict(min_count=0), [1, 1, 1]), + ('prod', dict(min_count=1), [1, np.nan, 1]), + ('prod', dict(min_count=2), [np.nan, np.nan, np.nan]), ]) -def test_upsample_sum(method, unit): +def test_upsample_sum(method, method_args, expected_values): s = pd.Series(1, index=pd.date_range("2017", periods=2, freq="H")) resampled = s.resample("30T") index = pd.to_datetime(['2017-01-01T00:00:00', '2017-01-01T00:30:00', '2017-01-01T01:00:00']) - - # 0 / 1 by default - result = methodcaller(method)(resampled) - expected = pd.Series([1, unit, 1], index=index) - tm.assert_series_equal(result, expected) - - # min_count=0 - result = methodcaller(method, min_count=0)(resampled) - expected = pd.Series([1, unit, 1], index=index) - tm.assert_series_equal(result, expected) - - # min_count=1 - result = methodcaller(method, min_count=1)(resampled) - expected = pd.Series([1, np.nan, 1], index=index) - tm.assert_series_equal(result, expected) - - # min_count>1 - result = methodcaller(method, min_count=2)(resampled) - expected = pd.Series([np.nan, np.nan, np.nan], index=index) + result = methodcaller(method, **method_args)(resampled) + expected = pd.Series(expected_values, index=index) tm.assert_series_equal(result, expected) From e92cd2fbc74d110a692a01e897d6b7ae361a339e Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 30 Nov 2018 23:08:13 +0000 Subject: [PATCH 2/3] resample method fixture --- pandas/tests/resample/conftest.py | 26 +++++++++++++++++++ pandas/tests/resample/test_base.py | 2 +- pandas/tests/resample/test_time_grouper.py | 29 +++++++--------------- 3 files changed, 36 insertions(+), 21 deletions(-) create mode 100644 pandas/tests/resample/conftest.py diff --git a/pandas/tests/resample/conftest.py b/pandas/tests/resample/conftest.py new file mode 100644 index 0000000000000..a8dc1e1f6c901 --- /dev/null +++ b/pandas/tests/resample/conftest.py @@ -0,0 +1,26 @@ +import pytest + +# The various methods we support +downsample_methods = ['min', 'max', 'first', 'last', 'sum', 'mean', 'sem', + 'median', 'prod', 'var', 'std', 'ohlc', 'quantile'] +upsample_methods = ['count', 'size'] +series_methods = ['nunique'] +resample_methods = downsample_methods + upsample_methods + series_methods + + +@pytest.fixture(params=downsample_methods) +def downsample_method(request): + """Fixture for parametrization of Grouper downsample methods.""" + return request.param + + +@pytest.fixture(params=upsample_methods) +def upsample_method(request): + """Fixture for parametrization of Grouper upsample methods.""" + return request.param + + +@pytest.fixture(params=resample_methods) +def resample_method(request): + """Fixture for parametrization of Grouper resample methods.""" + return request.param diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 8d710289aecc1..db2162e9357e2 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -26,7 +26,7 @@ # The various methods we support downsample_methods = ['min', 'max', 'first', 'last', 'sum', 'mean', 'sem', - 'median', 'prod', 'var', 'ohlc', 'quantile'] + 'median', 'prod', 'var', 'std', 'ohlc', 'quantile'] upsample_methods = ['count', 'size'] series_methods = ['nunique'] resample_methods = downsample_methods + upsample_methods + series_methods diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 192d879091285..ec29b55ac9d67 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -140,24 +140,13 @@ def test_aaa_group_order(): df[4::5]) -@pytest.mark.parametrize('func, assert_func', [ - ('min', assert_frame_equal), - ('max', assert_frame_equal), - ('prod', assert_frame_equal), - ('var', assert_frame_equal), - ('std', assert_frame_equal), - ('mean', assert_frame_equal), - ('count', assert_frame_equal), - ('sum', assert_frame_equal), - ('size', assert_series_equal), # GH 7453 - ('first', assert_frame_equal), # GH 7453 - ('last', assert_frame_equal), # GH 7453 -]) -def test_aggregate_normal(func, assert_func): - # check TimeGrouper's aggregation is identical as normal groupby +def test_aggregate_normal(resample_method): + """Check TimeGrouper's aggregation is identical as normal groupby.""" - n = 20 - data = np.random.randn(n, 4) + if resample_method == 'ohlc': + pytest.xfail(reason='DataError: No numeric types to aggregate') + + data = np.random.randn(20, 4) normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) normal_df['key'] = [1, 2, 3, 4, 5] * 4 @@ -169,11 +158,11 @@ def test_aggregate_normal(func, assert_func): normal_grouped = normal_df.groupby('key') dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) - expected = getattr(normal_grouped, func)() - dt_result = getattr(dt_grouped, func)() + expected = getattr(normal_grouped, resample_method)() + dt_result = getattr(dt_grouped, resample_method)() expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key') - assert_func(expected, dt_result) + tm.assert_equal(expected, dt_result) # if TimeGrouper is used included, 'nth' doesn't work yet From 8e65852532b1d255a4892f5e6dca7f02de9b0cac Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 3 Dec 2018 00:59:21 +0000 Subject: [PATCH 3/3] use import in conftest.py to avoid duplication --- pandas/tests/resample/conftest.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/tests/resample/conftest.py b/pandas/tests/resample/conftest.py index a8dc1e1f6c901..2130bd635b180 100644 --- a/pandas/tests/resample/conftest.py +++ b/pandas/tests/resample/conftest.py @@ -1,11 +1,7 @@ import pytest -# The various methods we support -downsample_methods = ['min', 'max', 'first', 'last', 'sum', 'mean', 'sem', - 'median', 'prod', 'var', 'std', 'ohlc', 'quantile'] -upsample_methods = ['count', 'size'] -series_methods = ['nunique'] -resample_methods = downsample_methods + upsample_methods + series_methods +from pandas.tests.resample.test_base import ( + downsample_methods, resample_methods, upsample_methods) @pytest.fixture(params=downsample_methods)