|
| 1 | +# pylint: disable=E1101 |
| 2 | + |
| 3 | +from datetime import datetime, timedelta |
| 4 | + |
| 5 | +import numpy as np |
| 6 | +import pytest |
| 7 | + |
| 8 | +from pandas.compat import range, zip |
| 9 | +from pandas.errors import AbstractMethodError |
| 10 | + |
| 11 | +import pandas as pd |
| 12 | +from pandas import DataFrame, Series |
| 13 | +from pandas.core.groupby.groupby import DataError |
| 14 | +from pandas.core.indexes.datetimes import date_range |
| 15 | +from pandas.core.indexes.period import PeriodIndex, period_range |
| 16 | +from pandas.core.indexes.timedeltas import TimedeltaIndex |
| 17 | +from pandas.core.resample import TimeGrouper |
| 18 | +import pandas.util.testing as tm |
| 19 | +from pandas.util.testing import ( |
| 20 | + assert_almost_equal, assert_frame_equal, assert_index_equal, |
| 21 | + assert_series_equal) |
| 22 | + |
| 23 | +from pandas.tseries.offsets import BDay |
| 24 | + |
| 25 | +business_day_offset = BDay() |
| 26 | + |
| 27 | +# The various methods we support |
| 28 | +downsample_methods = ['min', 'max', 'first', 'last', 'sum', 'mean', 'sem', |
| 29 | + 'median', 'prod', 'var', 'ohlc', 'quantile'] |
| 30 | +upsample_methods = ['count', 'size'] |
| 31 | +series_methods = ['nunique'] |
| 32 | +resample_methods = downsample_methods + upsample_methods + series_methods |
| 33 | + |
| 34 | + |
| 35 | +def simple_date_range_series(start, end, freq='D'): |
| 36 | + """ |
| 37 | + Series with date range index and random data for test purposes. |
| 38 | + """ |
| 39 | + rng = date_range(start, end, freq=freq) |
| 40 | + return Series(np.random.randn(len(rng)), index=rng) |
| 41 | + |
| 42 | + |
| 43 | +def simple_period_range_series(start, end, freq='D'): |
| 44 | + """ |
| 45 | + Series with period range index and random data for test purposes. |
| 46 | + """ |
| 47 | + rng = period_range(start, end, freq=freq) |
| 48 | + return Series(np.random.randn(len(rng)), index=rng) |
| 49 | + |
| 50 | + |
| 51 | +class Base(object): |
| 52 | + """ |
| 53 | + base class for resampling testing, calling |
| 54 | + .create_series() generates a series of each index type |
| 55 | + """ |
| 56 | + |
| 57 | + def create_index(self, *args, **kwargs): |
| 58 | + """ return the _index_factory created using the args, kwargs """ |
| 59 | + factory = self._index_factory() |
| 60 | + return factory(*args, **kwargs) |
| 61 | + |
| 62 | + @pytest.fixture |
| 63 | + def _index_start(self): |
| 64 | + return datetime(2005, 1, 1) |
| 65 | + |
| 66 | + @pytest.fixture |
| 67 | + def _index_end(self): |
| 68 | + return datetime(2005, 1, 10) |
| 69 | + |
| 70 | + @pytest.fixture |
| 71 | + def _index_freq(self): |
| 72 | + return 'D' |
| 73 | + |
| 74 | + @pytest.fixture |
| 75 | + def index(self, _index_start, _index_end, _index_freq): |
| 76 | + return self.create_index(_index_start, _index_end, freq=_index_freq) |
| 77 | + |
| 78 | + @pytest.fixture |
| 79 | + def _series_name(self): |
| 80 | + raise AbstractMethodError(self) |
| 81 | + |
| 82 | + @pytest.fixture |
| 83 | + def _static_values(self, index): |
| 84 | + return np.arange(len(index)) |
| 85 | + |
| 86 | + @pytest.fixture |
| 87 | + def series(self, index, _series_name, _static_values): |
| 88 | + return Series(_static_values, index=index, name=_series_name) |
| 89 | + |
| 90 | + @pytest.fixture |
| 91 | + def frame(self, index, _static_values): |
| 92 | + return DataFrame({'value': _static_values}, index=index) |
| 93 | + |
| 94 | + @pytest.fixture(params=[Series, DataFrame]) |
| 95 | + def series_and_frame(self, request, index, _series_name, _static_values): |
| 96 | + if request.param == Series: |
| 97 | + return Series(_static_values, index=index, name=_series_name) |
| 98 | + if request.param == DataFrame: |
| 99 | + return DataFrame({'value': _static_values}, index=index) |
| 100 | + |
| 101 | + @pytest.mark.parametrize('freq', ['2D', '1H']) |
| 102 | + def test_asfreq(self, series_and_frame, freq): |
| 103 | + obj = series_and_frame |
| 104 | + |
| 105 | + result = obj.resample(freq).asfreq() |
| 106 | + new_index = self.create_index(obj.index[0], obj.index[-1], freq=freq) |
| 107 | + expected = obj.reindex(new_index) |
| 108 | + assert_almost_equal(result, expected) |
| 109 | + |
| 110 | + def test_asfreq_fill_value(self): |
| 111 | + # test for fill value during resampling, issue 3715 |
| 112 | + |
| 113 | + s = self.create_series() |
| 114 | + |
| 115 | + result = s.resample('1H').asfreq() |
| 116 | + new_index = self.create_index(s.index[0], s.index[-1], freq='1H') |
| 117 | + expected = s.reindex(new_index) |
| 118 | + assert_series_equal(result, expected) |
| 119 | + |
| 120 | + frame = s.to_frame('value') |
| 121 | + frame.iloc[1] = None |
| 122 | + result = frame.resample('1H').asfreq(fill_value=4.0) |
| 123 | + new_index = self.create_index(frame.index[0], |
| 124 | + frame.index[-1], freq='1H') |
| 125 | + expected = frame.reindex(new_index, fill_value=4.0) |
| 126 | + assert_frame_equal(result, expected) |
| 127 | + |
| 128 | + def test_resample_interpolate(self): |
| 129 | + # # 12925 |
| 130 | + df = self.create_series().to_frame('value') |
| 131 | + assert_frame_equal( |
| 132 | + df.resample('1T').asfreq().interpolate(), |
| 133 | + df.resample('1T').interpolate()) |
| 134 | + |
| 135 | + def test_raises_on_non_datetimelike_index(self): |
| 136 | + # this is a non datetimelike index |
| 137 | + xp = DataFrame() |
| 138 | + pytest.raises(TypeError, lambda: xp.resample('A').mean()) |
| 139 | + |
| 140 | + def test_resample_empty_series(self): |
| 141 | + # GH12771 & GH12868 |
| 142 | + |
| 143 | + s = self.create_series()[:0] |
| 144 | + |
| 145 | + for freq in ['M', 'D', 'H']: |
| 146 | + # need to test for ohlc from GH13083 |
| 147 | + methods = [method for method in resample_methods |
| 148 | + if method != 'ohlc'] |
| 149 | + for method in methods: |
| 150 | + result = getattr(s.resample(freq), method)() |
| 151 | + |
| 152 | + expected = s.copy() |
| 153 | + expected.index = s.index._shallow_copy(freq=freq) |
| 154 | + assert_index_equal(result.index, expected.index) |
| 155 | + assert result.index.freq == expected.index.freq |
| 156 | + assert_series_equal(result, expected, check_dtype=False) |
| 157 | + |
| 158 | + def test_resample_empty_dataframe(self): |
| 159 | + # GH13212 |
| 160 | + index = self.create_series().index[:0] |
| 161 | + f = DataFrame(index=index) |
| 162 | + |
| 163 | + for freq in ['M', 'D', 'H']: |
| 164 | + # count retains dimensions too |
| 165 | + methods = downsample_methods + upsample_methods |
| 166 | + for method in methods: |
| 167 | + result = getattr(f.resample(freq), method)() |
| 168 | + if method != 'size': |
| 169 | + expected = f.copy() |
| 170 | + else: |
| 171 | + # GH14962 |
| 172 | + expected = Series([]) |
| 173 | + |
| 174 | + expected.index = f.index._shallow_copy(freq=freq) |
| 175 | + assert_index_equal(result.index, expected.index) |
| 176 | + assert result.index.freq == expected.index.freq |
| 177 | + assert_almost_equal(result, expected, check_dtype=False) |
| 178 | + |
| 179 | + # test size for GH13212 (currently stays as df) |
| 180 | + |
| 181 | + @pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0)) |
| 182 | + @pytest.mark.parametrize( |
| 183 | + "dtype", |
| 184 | + [np.float, np.int, np.object, 'datetime64[ns]']) |
| 185 | + def test_resample_empty_dtypes(self, index, dtype): |
| 186 | + |
| 187 | + # Empty series were sometimes causing a segfault (for the functions |
| 188 | + # with Cython bounds-checking disabled) or an IndexError. We just run |
| 189 | + # them to ensure they no longer do. (GH #10228) |
| 190 | + for how in downsample_methods + upsample_methods: |
| 191 | + empty_series = Series([], index, dtype) |
| 192 | + try: |
| 193 | + getattr(empty_series.resample('d'), how)() |
| 194 | + except DataError: |
| 195 | + # Ignore these since some combinations are invalid |
| 196 | + # (ex: doing mean with dtype of np.object) |
| 197 | + pass |
| 198 | + |
| 199 | + def test_resample_loffset_arg_type(self): |
| 200 | + # GH 13218, 15002 |
| 201 | + df = self.create_series().to_frame('value') |
| 202 | + expected_means = [df.values[i:i + 2].mean() |
| 203 | + for i in range(0, len(df.values), 2)] |
| 204 | + expected_index = self.create_index(df.index[0], |
| 205 | + periods=len(df.index) / 2, |
| 206 | + freq='2D') |
| 207 | + |
| 208 | + # loffset coerces PeriodIndex to DateTimeIndex |
| 209 | + if isinstance(expected_index, PeriodIndex): |
| 210 | + expected_index = expected_index.to_timestamp() |
| 211 | + |
| 212 | + expected_index += timedelta(hours=2) |
| 213 | + expected = DataFrame({'value': expected_means}, index=expected_index) |
| 214 | + |
| 215 | + for arg in ['mean', {'value': 'mean'}, ['mean']]: |
| 216 | + |
| 217 | + result_agg = df.resample('2D', loffset='2H').agg(arg) |
| 218 | + |
| 219 | + with tm.assert_produces_warning(FutureWarning, |
| 220 | + check_stacklevel=False): |
| 221 | + result_how = df.resample('2D', how=arg, loffset='2H') |
| 222 | + |
| 223 | + if isinstance(arg, list): |
| 224 | + expected.columns = pd.MultiIndex.from_tuples([('value', |
| 225 | + 'mean')]) |
| 226 | + |
| 227 | + # GH 13022, 7687 - TODO: fix resample w/ TimedeltaIndex |
| 228 | + if isinstance(expected.index, TimedeltaIndex): |
| 229 | + with pytest.raises(AssertionError): |
| 230 | + assert_frame_equal(result_agg, expected) |
| 231 | + assert_frame_equal(result_how, expected) |
| 232 | + else: |
| 233 | + assert_frame_equal(result_agg, expected) |
| 234 | + assert_frame_equal(result_how, expected) |
| 235 | + |
| 236 | + def test_apply_to_empty_series(self): |
| 237 | + # GH 14313 |
| 238 | + series = self.create_series()[:0] |
| 239 | + |
| 240 | + for freq in ['M', 'D', 'H']: |
| 241 | + result = series.resample(freq).apply(lambda x: 1) |
| 242 | + expected = series.resample(freq).apply(np.sum) |
| 243 | + |
| 244 | + assert_series_equal(result, expected, check_dtype=False) |
| 245 | + |
| 246 | + def test_resampler_is_iterable(self): |
| 247 | + # GH 15314 |
| 248 | + series = self.create_series() |
| 249 | + freq = 'H' |
| 250 | + tg = TimeGrouper(freq, convention='start') |
| 251 | + grouped = series.groupby(tg) |
| 252 | + resampled = series.resample(freq) |
| 253 | + for (rk, rv), (gk, gv) in zip(resampled, grouped): |
| 254 | + assert rk == gk |
| 255 | + assert_series_equal(rv, gv) |
| 256 | + |
| 257 | + def test_resample_quantile(self): |
| 258 | + # GH 15023 |
| 259 | + s = self.create_series() |
| 260 | + q = 0.75 |
| 261 | + freq = 'H' |
| 262 | + result = s.resample(freq).quantile(q) |
| 263 | + expected = s.resample(freq).agg(lambda x: x.quantile(q)) |
| 264 | + tm.assert_series_equal(result, expected) |
0 commit comments