Skip to content

TST/CLN: parametrize tests\resample\test_time_grouper.py #24013

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Dec 3, 2018
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions pandas/tests/resample/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import pytest

# The various methods we support
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we might be able to move these to pandas/conftest IF they can also be used in groupby tests (but that would be in a future PR)

downsample_methods = ['min', 'max', 'first', 'last', 'sum', 'mean', 'sem',
'median', 'prod', 'var', 'std', 'ohlc', 'quantile']
upsample_methods = ['count', 'size']
series_methods = ['nunique']
resample_methods = downsample_methods + upsample_methods + series_methods


@pytest.fixture(params=downsample_methods)
def downsample_method(request):
"""Fixture for parametrization of Grouper downsample methods."""
return request.param


@pytest.fixture(params=upsample_methods)
def upsample_method(request):
"""Fixture for parametrization of Grouper upsample methods."""
return request.param


@pytest.fixture(params=resample_methods)
def resample_method(request):
"""Fixture for parametrization of Grouper resample methods."""
return request.param
2 changes: 1 addition & 1 deletion pandas/tests/resample/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

# The various methods we support
downsample_methods = ['min', 'max', 'first', 'last', 'sum', 'mean', 'sem',
'median', 'prod', 'var', 'ohlc', 'quantile']
'median', 'prod', 'var', 'std', 'ohlc', 'quantile']
upsample_methods = ['count', 'size']
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are these still needed then? (as you have fixtures)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or these are needed by current methods, until we migrate all?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the other tests that use this are not yet parametrized. what i probably should have done was import the declarations into conftest.py to avoid duplication.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, if you want to push that change would be good

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ping on green.

series_methods = ['nunique']
resample_methods = downsample_methods + upsample_methods + series_methods
Expand Down
137 changes: 45 additions & 92 deletions pandas/tests/resample/test_time_grouper.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
# pylint: disable=E1101

from datetime import datetime
from operator import methodcaller

import numpy as np
import pytest

from pandas.compat import zip

import pandas as pd
from pandas import DataFrame, Panel, Series
from pandas.core.indexes.datetimes import date_range
Expand Down Expand Up @@ -104,20 +100,21 @@ def f(x):
tm.assert_panel_equal(result, binagg)


def test_fails_on_no_datetime_index():
index_names = ('Int64Index', 'Index', 'Float64Index', 'MultiIndex')
index_funcs = (tm.makeIntIndex,
tm.makeUnicodeIndex, tm.makeFloatIndex,
lambda m: tm.makeCustomIndex(m, 2))
@pytest.mark.parametrize('name, func', [
('Int64Index', tm.makeIntIndex),
('Index', tm.makeUnicodeIndex),
('Float64Index', tm.makeFloatIndex),
('MultiIndex', lambda m: tm.makeCustomIndex(m, 2))
])
def test_fails_on_no_datetime_index(name, func):
n = 2
for name, func in zip(index_names, index_funcs):
index = func(n)
df = DataFrame({'a': np.random.randn(n)}, index=index)
index = func(n)
df = DataFrame({'a': np.random.randn(n)}, index=index)

msg = ("Only valid with DatetimeIndex, TimedeltaIndex "
"or PeriodIndex, but got an instance of %r" % name)
with pytest.raises(TypeError, match=msg):
df.groupby(TimeGrouper('D'))
msg = ("Only valid with DatetimeIndex, TimedeltaIndex "
"or PeriodIndex, but got an instance of %r" % name)
with pytest.raises(TypeError, match=msg):
df.groupby(TimeGrouper('D'))


def test_aaa_group_order():
Expand All @@ -143,11 +140,13 @@ def test_aaa_group_order():
df[4::5])


def test_aggregate_normal():
# check TimeGrouper's aggregation is identical as normal groupby
def test_aggregate_normal(resample_method):
"""Check TimeGrouper's aggregation is identical as normal groupby."""

n = 20
data = np.random.randn(n, 4)
if resample_method == 'ohlc':
pytest.xfail(reason='DataError: No numeric types to aggregate')

data = np.random.randn(20, 4)
normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
normal_df['key'] = [1, 2, 3, 4, 5] * 4

Expand All @@ -159,35 +158,11 @@ def test_aggregate_normal():
normal_grouped = normal_df.groupby('key')
dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))

for func in ['min', 'max', 'prod', 'var', 'std', 'mean']:
expected = getattr(normal_grouped, func)()
dt_result = getattr(dt_grouped, func)()
expected.index = date_range(start='2013-01-01', freq='D',
periods=5, name='key')
assert_frame_equal(expected, dt_result)

for func in ['count', 'sum']:
expected = getattr(normal_grouped, func)()
expected.index = date_range(start='2013-01-01', freq='D',
periods=5, name='key')
dt_result = getattr(dt_grouped, func)()
assert_frame_equal(expected, dt_result)

# GH 7453
for func in ['size']:
expected = getattr(normal_grouped, func)()
expected.index = date_range(start='2013-01-01', freq='D',
periods=5, name='key')
dt_result = getattr(dt_grouped, func)()
assert_series_equal(expected, dt_result)

# GH 7453
for func in ['first', 'last']:
expected = getattr(normal_grouped, func)()
expected.index = date_range(start='2013-01-01', freq='D',
periods=5, name='key')
dt_result = getattr(dt_grouped, func)()
assert_frame_equal(expected, dt_result)
expected = getattr(normal_grouped, resample_method)()
dt_result = getattr(dt_grouped, resample_method)()
expected.index = date_range(start='2013-01-01', freq='D',
periods=5, name='key')
tm.assert_equal(expected, dt_result)

# if TimeGrouper is used included, 'nth' doesn't work yet

Expand All @@ -201,34 +176,23 @@ def test_aggregate_normal():
"""


@pytest.mark.parametrize('method, unit', [
('sum', 0),
('prod', 1),
@pytest.mark.parametrize('method, method_args, unit', [
('sum', dict(), 0),
('sum', dict(min_count=0), 0),
('sum', dict(min_count=1), np.nan),
('prod', dict(), 1),
('prod', dict(min_count=0), 1),
('prod', dict(min_count=1), np.nan)
])
def test_resample_entirly_nat_window(method, unit):
def test_resample_entirly_nat_window(method, method_args, unit):
s = pd.Series([0] * 2 + [np.nan] * 2,
index=pd.date_range('2017', periods=4))
# 0 / 1 by default
result = methodcaller(method)(s.resample("2d"))
expected = pd.Series([0.0, unit],
index=pd.to_datetime(['2017-01-01',
'2017-01-03']))
tm.assert_series_equal(result, expected)

# min_count=0
result = methodcaller(method, min_count=0)(s.resample("2d"))
result = methodcaller(method, **method_args)(s.resample("2d"))
expected = pd.Series([0.0, unit],
index=pd.to_datetime(['2017-01-01',
'2017-01-03']))
tm.assert_series_equal(result, expected)

# min_count=1
result = methodcaller(method, min_count=1)(s.resample("2d"))
expected = pd.Series([0.0, np.nan],
index=pd.to_datetime(['2017-01-01',
'2017-01-03']))
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize('func, fill_value', [
('min', np.nan),
Expand Down Expand Up @@ -302,33 +266,22 @@ def test_repr():
assert result == expected


@pytest.mark.parametrize('method, unit', [
('sum', 0),
('prod', 1),
@pytest.mark.parametrize('method, method_args, expected_values', [
('sum', dict(), [1, 0, 1]),
('sum', dict(min_count=0), [1, 0, 1]),
('sum', dict(min_count=1), [1, np.nan, 1]),
('sum', dict(min_count=2), [np.nan, np.nan, np.nan]),
('prod', dict(), [1, 1, 1]),
('prod', dict(min_count=0), [1, 1, 1]),
('prod', dict(min_count=1), [1, np.nan, 1]),
('prod', dict(min_count=2), [np.nan, np.nan, np.nan]),
])
def test_upsample_sum(method, unit):
def test_upsample_sum(method, method_args, expected_values):
s = pd.Series(1, index=pd.date_range("2017", periods=2, freq="H"))
resampled = s.resample("30T")
index = pd.to_datetime(['2017-01-01T00:00:00',
'2017-01-01T00:30:00',
'2017-01-01T01:00:00'])

# 0 / 1 by default
result = methodcaller(method)(resampled)
expected = pd.Series([1, unit, 1], index=index)
tm.assert_series_equal(result, expected)

# min_count=0
result = methodcaller(method, min_count=0)(resampled)
expected = pd.Series([1, unit, 1], index=index)
tm.assert_series_equal(result, expected)

# min_count=1
result = methodcaller(method, min_count=1)(resampled)
expected = pd.Series([1, np.nan, 1], index=index)
tm.assert_series_equal(result, expected)

# min_count>1
result = methodcaller(method, min_count=2)(resampled)
expected = pd.Series([np.nan, np.nan, np.nan], index=index)
result = methodcaller(method, **method_args)(resampled)
expected = pd.Series(expected_values, index=index)
tm.assert_series_equal(result, expected)