From 6bdf6fb300d7d95c699888c5f09a20f4a4cfe98d Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 26 Mar 2024 16:23:34 -0700 Subject: [PATCH 1/2] DEPR: resample with PeriodIndex --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/api/typing/__init__.py | 2 - pandas/core/groupby/groupby.py | 1 - pandas/core/resample.py | 159 +-- pandas/tests/api/test_api.py | 1 - pandas/tests/resample/test_base.py | 177 +-- pandas/tests/resample/test_datetime_index.py | 16 - pandas/tests/resample/test_period_index.py | 1091 ------------------ pandas/tests/resample/test_resample_api.py | 2 +- pandas/tests/resample/test_time_grouper.py | 4 +- 10 files changed, 45 insertions(+), 1409 deletions(-) delete mode 100644 pandas/tests/resample/test_period_index.py diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4b7b075ceafaf..f1de2907df39f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -215,6 +215,7 @@ Removal of prior version deprecations/changes - Enforced deprecation in :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype performing dtype inference on the ``.index`` of the result (:issue:`56161`) - Enforced deprecation of :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` allowing the ``name`` argument to be a non-tuple when grouping by a list of length 1 (:issue:`54155`) - Enforced deprecation of :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` for object-dtype (:issue:`57820`) +- Enforced deprecation of :meth:`Series.resample` and :meth:`DataFrame.resample` with a :class:`PeriodIndex`, convert the index to :class:`DatetimeIndex` before resampling instead (:issue:`53481`) - Enforced deprecation of :meth:`offsets.Tick.delta`, use ``pd.Timedelta(obj)`` instead (:issue:`55498`) - Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`) - Enforced deprecation of parsing system timezone strings to ``tzlocal``, which depended on system timezone, pass the 'tz' keyword instead (:issue:`50791`) diff --git a/pandas/api/typing/__init__.py b/pandas/api/typing/__init__.py index 9b5d2cb06b523..f3a5bd2de319e 100644 --- a/pandas/api/typing/__init__.py +++ b/pandas/api/typing/__init__.py @@ -11,7 +11,6 @@ ) from pandas.core.resample import ( DatetimeIndexResamplerGroupby, - PeriodIndexResamplerGroupby, Resampler, TimedeltaIndexResamplerGroupby, TimeGrouper, @@ -41,7 +40,6 @@ "JsonReader", "NaTType", "NAType", - "PeriodIndexResamplerGroupby", "Resampler", "Rolling", "RollingGroupby", diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 0b61938d474b9..c64502a2533c1 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3512,7 +3512,6 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp Returns ------- pandas.api.typing.DatetimeIndexResamplerGroupby, - pandas.api.typing.PeriodIndexResamplerGroupby, or pandas.api.typing.TimedeltaIndexResamplerGroupby Return a new groupby object, with type depending on the data being resampled. diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 43077e7aeecb4..b7f4035a1c8e6 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -11,14 +11,12 @@ no_type_check, overload, ) -import warnings import numpy as np from pandas._libs import lib from pandas._libs.tslibs import ( BaseOffset, - IncompatibleFrequency, NaT, Period, Timedelta, @@ -32,10 +30,7 @@ Substitution, doc, ) -from pandas.util._exceptions import ( - find_stack_level, - rewrite_warning, -) +from pandas.util._exceptions import rewrite_warning from pandas.core.dtypes.dtypes import ( ArrowDtype, @@ -81,10 +76,6 @@ timedelta_range, ) -from pandas.tseries.frequencies import ( - is_subperiod, - is_superperiod, -) from pandas.tseries.offsets import ( Day, Tick, @@ -1704,127 +1695,6 @@ def _resampler_cls(self): return DatetimeIndexResampler -class PeriodIndexResampler(DatetimeIndexResampler): - # error: Incompatible types in assignment (expression has type "PeriodIndex", base - # class "DatetimeIndexResampler" defined the type as "DatetimeIndex") - ax: PeriodIndex # type: ignore[assignment] - - @property - def _resampler_for_grouping(self): - warnings.warn( - "Resampling a groupby with a PeriodIndex is deprecated. " - "Cast to DatetimeIndex before resampling instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - return PeriodIndexResamplerGroupby - - def _get_binner_for_time(self): - if self.kind == "timestamp": - return super()._get_binner_for_time() - return self._timegrouper._get_period_bins(self.ax) - - def _convert_obj(self, obj: NDFrameT) -> NDFrameT: - obj = super()._convert_obj(obj) - - if self._from_selection: - # see GH 14008, GH 12871 - msg = ( - "Resampling from level= or on= selection " - "with a PeriodIndex is not currently supported, " - "use .set_index(...) to explicitly set index" - ) - raise NotImplementedError(msg) - - # convert to timestamp - if self.kind == "timestamp": - obj = obj.to_timestamp(how=self.convention) - - return obj - - def _downsample(self, how, **kwargs): - """ - Downsample the cython defined function. - - Parameters - ---------- - how : string / cython mapped function - **kwargs : kw args passed to how function - """ - # we may need to actually resample as if we are timestamps - if self.kind == "timestamp": - return super()._downsample(how, **kwargs) - - ax = self.ax - - if is_subperiod(ax.freq, self.freq): - # Downsampling - return self._groupby_and_aggregate(how, **kwargs) - elif is_superperiod(ax.freq, self.freq): - if how == "ohlc": - # GH #13083 - # upsampling to subperiods is handled as an asfreq, which works - # for pure aggregating/reducing methods - # OHLC reduces along the time dimension, but creates multiple - # values for each period -> handle by _groupby_and_aggregate() - return self._groupby_and_aggregate(how) - return self.asfreq() - elif ax.freq == self.freq: - return self.asfreq() - - raise IncompatibleFrequency( - f"Frequency {ax.freq} cannot be resampled to {self.freq}, " - "as they are not sub or super periods" - ) - - def _upsample(self, method, limit: int | None = None, fill_value=None): - """ - Parameters - ---------- - method : {'backfill', 'bfill', 'pad', 'ffill'} - Method for upsampling. - limit : int, default None - Maximum size gap to fill when reindexing. - fill_value : scalar, default None - Value to use for missing values. - """ - # we may need to actually resample as if we are timestamps - if self.kind == "timestamp": - return super()._upsample(method, limit=limit, fill_value=fill_value) - - ax = self.ax - obj = self.obj - new_index = self.binner - - # Start vs. end of period - memb = ax.asfreq(self.freq, how=self.convention) - - # Get the fill indexer - if method == "asfreq": - method = None - indexer = memb.get_indexer(new_index, method=method, limit=limit) - new_obj = _take_new_index( - obj, - indexer, - new_index, - ) - return self._wrap_result(new_obj) - - -# error: Definition of "ax" in base class "_GroupByMixin" is incompatible with -# definition in base class "PeriodIndexResampler" -class PeriodIndexResamplerGroupby( # type: ignore[misc] - _GroupByMixin, PeriodIndexResampler -): - """ - Provides a resample of a groupby implementation. - """ - - @property - def _resampler_cls(self): - return PeriodIndexResampler - - class TimedeltaIndexResampler(DatetimeIndexResampler): # error: Incompatible types in assignment (expression has type "TimedeltaIndex", # base class "DatetimeIndexResampler" defined the type as "DatetimeIndex") @@ -2054,27 +1924,11 @@ def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler: gpr_index=ax, ) elif isinstance(ax, PeriodIndex) or kind == "period": - if isinstance(ax, PeriodIndex): + raise TypeError( # GH#53481 - warnings.warn( - "Resampling with a PeriodIndex is deprecated. " - "Cast index to DatetimeIndex before resampling instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - warnings.warn( - "Resampling with kind='period' is deprecated. " - "Use datetime paths instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - return PeriodIndexResampler( - obj, - timegrouper=self, - kind=kind, - group_keys=self.group_keys, - gpr_index=ax, + "Resample is no longer supported with PeriodIndex. " + "Cast index to DatetimeIndex (with obj.index.to_timestamp()) " + "first instead." ) elif isinstance(ax, TimedeltaIndex): return TimedeltaIndexResampler( @@ -2085,8 +1939,7 @@ def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler: ) raise TypeError( - "Only valid with DatetimeIndex, " - "TimedeltaIndex or PeriodIndex, " + "Only valid with DatetimeIndex or TimedeltaIndex, " f"but got an instance of '{type(ax).__name__}'" ) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 82c5c305b574c..80fbfebd695ce 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -259,7 +259,6 @@ class TestApi(Base): "JsonReader", "NaTType", "NAType", - "PeriodIndexResamplerGroupby", "Resampler", "Rolling", "RollingGroupby", diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 9cd51b95d6efd..c022feec84034 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -11,8 +11,6 @@ DatetimeIndex, Index, MultiIndex, - NaT, - PeriodIndex, Series, TimedeltaIndex, ) @@ -75,19 +73,13 @@ def test_asfreq_fill_value(index): [ timedelta_range("1 day", "10 day", freq="D"), date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"), - period_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"), ], ) def test_resample_interpolate(index): # GH#12925 df = DataFrame(range(len(index)), index=index) - warn = None - if isinstance(df.index, PeriodIndex): - warn = FutureWarning - msg = "Resampling with a PeriodIndex is deprecated" - with tm.assert_produces_warning(warn, match=msg): - result = df.resample("1min").asfreq().interpolate() - expected = df.resample("1min").interpolate() + result = df.resample("1min").asfreq().interpolate() + expected = df.resample("1min").interpolate() tm.assert_frame_equal(result, expected) @@ -95,7 +87,7 @@ def test_raises_on_non_datetimelike_index(): # this is a non datetimelike index xp = DataFrame() msg = ( - "Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, " + "Only valid with DatetimeIndex or TimedeltaIndex, " "but got an instance of 'RangeIndex'" ) with pytest.raises(TypeError, match=msg): @@ -105,7 +97,6 @@ def test_raises_on_non_datetimelike_index(): @pytest.mark.parametrize( "index", [ - PeriodIndex([], freq="D", name="a"), DatetimeIndex([], name="a"), TimedeltaIndex([], name="a"), ], @@ -123,16 +114,8 @@ def test_resample_empty_series(freq, index, resample_method): with pytest.raises(ValueError, match=msg): ser.resample(freq) return - elif freq == "ME" and isinstance(ser.index, PeriodIndex): - # index is PeriodIndex, so convert to corresponding Period freq - freq = "M" - - warn = None - if isinstance(ser.index, PeriodIndex): - warn = FutureWarning - msg = "Resampling with a PeriodIndex is deprecated" - with tm.assert_produces_warning(warn, match=msg): - rs = ser.resample(freq) + + rs = ser.resample(freq) result = getattr(rs, resample_method)() if resample_method == "ohlc": @@ -150,40 +133,9 @@ def test_resample_empty_series(freq, index, resample_method): assert result.index.freq == expected.index.freq -@pytest.mark.parametrize( - "freq", - [ - pytest.param("ME", marks=pytest.mark.xfail(reason="Don't know why this fails")), - "D", - "h", - ], -) -def test_resample_nat_index_series(freq, resample_method): - # GH39227 - - ser = Series(range(5), index=PeriodIndex([NaT] * 5, freq=freq)) - - msg = "Resampling with a PeriodIndex is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - rs = ser.resample(freq) - result = getattr(rs, resample_method)() - - if resample_method == "ohlc": - expected = DataFrame( - [], index=ser.index[:0], columns=["open", "high", "low", "close"] - ) - tm.assert_frame_equal(result, expected, check_dtype=False) - else: - expected = ser[:0].copy() - tm.assert_series_equal(result, expected, check_dtype=False) - tm.assert_index_equal(result.index, expected.index) - assert result.index.freq == expected.index.freq - - @pytest.mark.parametrize( "index", [ - PeriodIndex([], freq="D", name="a"), DatetimeIndex([], name="a"), TimedeltaIndex([], name="a"), ], @@ -201,16 +153,8 @@ def test_resample_count_empty_series(freq, index, resample_method): with pytest.raises(ValueError, match=msg): ser.resample(freq) return - elif freq == "ME" and isinstance(ser.index, PeriodIndex): - # index is PeriodIndex, so convert to corresponding Period freq - freq = "M" - warn = None - if isinstance(ser.index, PeriodIndex): - warn = FutureWarning - msg = "Resampling with a PeriodIndex is deprecated" - with tm.assert_produces_warning(warn, match=msg): - rs = ser.resample(freq) + rs = ser.resample(freq) result = getattr(rs, resample_method)() @@ -221,9 +165,7 @@ def test_resample_count_empty_series(freq, index, resample_method): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize( - "index", [DatetimeIndex([]), TimedeltaIndex([]), PeriodIndex([], freq="D")] -) +@pytest.mark.parametrize("index", [DatetimeIndex([]), TimedeltaIndex([])]) @pytest.mark.parametrize("freq", ["ME", "D", "h"]) def test_resample_empty_dataframe(index, freq, resample_method): # GH13212 @@ -237,16 +179,8 @@ def test_resample_empty_dataframe(index, freq, resample_method): with pytest.raises(ValueError, match=msg): df.resample(freq, group_keys=False) return - elif freq == "ME" and isinstance(df.index, PeriodIndex): - # index is PeriodIndex, so convert to corresponding Period freq - freq = "M" - - warn = None - if isinstance(df.index, PeriodIndex): - warn = FutureWarning - msg = "Resampling with a PeriodIndex is deprecated" - with tm.assert_produces_warning(warn, match=msg): - rs = df.resample(freq, group_keys=False) + + rs = df.resample(freq, group_keys=False) result = getattr(rs, resample_method)() if resample_method == "ohlc": # TODO: no tests with len(df.columns) > 0 @@ -269,9 +203,7 @@ def test_resample_empty_dataframe(index, freq, resample_method): # test size for GH13212 (currently stays as df) -@pytest.mark.parametrize( - "index", [DatetimeIndex([]), TimedeltaIndex([]), PeriodIndex([], freq="D")] -) +@pytest.mark.parametrize("index", [DatetimeIndex([]), TimedeltaIndex([])]) @pytest.mark.parametrize("freq", ["ME", "D", "h"]) def test_resample_count_empty_dataframe(freq, index): # GH28427 @@ -285,16 +217,8 @@ def test_resample_count_empty_dataframe(freq, index): with pytest.raises(ValueError, match=msg): empty_frame_dti.resample(freq) return - elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex): - # index is PeriodIndex, so convert to corresponding Period freq - freq = "M" - - warn = None - if isinstance(empty_frame_dti.index, PeriodIndex): - warn = FutureWarning - msg = "Resampling with a PeriodIndex is deprecated" - with tm.assert_produces_warning(warn, match=msg): - rs = empty_frame_dti.resample(freq) + + rs = empty_frame_dti.resample(freq) result = rs.count() index = _asfreq_compat(empty_frame_dti.index, freq) @@ -304,9 +228,7 @@ def test_resample_count_empty_dataframe(freq, index): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize( - "index", [DatetimeIndex([]), TimedeltaIndex([]), PeriodIndex([], freq="D")] -) +@pytest.mark.parametrize("index", [DatetimeIndex([]), TimedeltaIndex([])]) @pytest.mark.parametrize("freq", ["ME", "D", "h"]) def test_resample_size_empty_dataframe(freq, index): # GH28427 @@ -321,16 +243,8 @@ def test_resample_size_empty_dataframe(freq, index): with pytest.raises(ValueError, match=msg): empty_frame_dti.resample(freq) return - elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex): - # index is PeriodIndex, so convert to corresponding Period freq - freq = "M" - - msg = "Resampling with a PeriodIndex" - warn = None - if isinstance(empty_frame_dti.index, PeriodIndex): - warn = FutureWarning - with tm.assert_produces_warning(warn, match=msg): - rs = empty_frame_dti.resample(freq) + + rs = empty_frame_dti.resample(freq) result = rs.size() index = _asfreq_compat(empty_frame_dti.index, freq) @@ -343,27 +257,18 @@ def test_resample_size_empty_dataframe(freq, index): @pytest.mark.parametrize( "index", [ - PeriodIndex([], freq="M", name="a"), DatetimeIndex([], name="a"), TimedeltaIndex([], name="a"), ], ) @pytest.mark.parametrize("dtype", [float, int, object, "datetime64[ns]"]) -@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") def test_resample_empty_dtypes(index, dtype, resample_method): # Empty series were sometimes causing a segfault (for the functions # with Cython bounds-checking disabled) or an IndexError. We just run # them to ensure they no longer do. (GH #10228) - warn = None - if isinstance(index, PeriodIndex): - # GH#53511 - index = PeriodIndex([], freq="B", name=index.name) - warn = FutureWarning - msg = "Resampling with a PeriodIndex is deprecated" empty_series_dti = Series([], index, dtype) - with tm.assert_produces_warning(warn, match=msg): - rs = empty_series_dti.resample("d", group_keys=False) + rs = empty_series_dti.resample("d", group_keys=False) try: getattr(rs, resample_method)() except DataError: @@ -375,7 +280,6 @@ def test_resample_empty_dtypes(index, dtype, resample_method): @pytest.mark.parametrize( "index", [ - PeriodIndex([], freq="D", name="a"), DatetimeIndex([], name="a"), TimedeltaIndex([], name="a"), ], @@ -393,21 +297,11 @@ def test_apply_to_empty_series(index, freq): with pytest.raises(ValueError, match=msg): ser.resample(freq) return - elif freq == "ME" and isinstance(ser.index, PeriodIndex): - # index is PeriodIndex, so convert to corresponding Period freq - freq = "M" - - msg = "Resampling with a PeriodIndex" - warn = None - if isinstance(ser.index, PeriodIndex): - warn = FutureWarning - with tm.assert_produces_warning(warn, match=msg): - rs = ser.resample(freq, group_keys=False) + rs = ser.resample(freq, group_keys=False) result = rs.apply(lambda x: 1) - with tm.assert_produces_warning(warn, match=msg): - expected = ser.resample(freq).apply("sum") + expected = ser.resample(freq).apply("sum") tm.assert_series_equal(result, expected, check_dtype=False) @@ -417,7 +311,6 @@ def test_apply_to_empty_series(index, freq): [ timedelta_range("1 day", "10 day", freq="D"), date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"), - period_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"), ], ) def test_resampler_is_iterable(index): @@ -425,16 +318,9 @@ def test_resampler_is_iterable(index): series = Series(range(len(index)), index=index) freq = "h" tg = Grouper(freq=freq, convention="start") - msg = "Resampling with a PeriodIndex" - warn = None - if isinstance(series.index, PeriodIndex): - warn = FutureWarning - with tm.assert_produces_warning(warn, match=msg): - grouped = series.groupby(tg) - - with tm.assert_produces_warning(warn, match=msg): - resampled = series.resample(freq) + grouped = series.groupby(tg) + resampled = series.resample(freq) for (rk, rv), (gk, gv) in zip(resampled, grouped): assert rk == gk tm.assert_series_equal(rv, gv) @@ -445,7 +331,6 @@ def test_resampler_is_iterable(index): [ timedelta_range("1 day", "10 day", freq="D"), date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"), - period_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"), ], ) def test_resample_quantile(index): @@ -454,13 +339,8 @@ def test_resample_quantile(index): q = 0.75 freq = "h" - msg = "Resampling with a PeriodIndex" - warn = None - if isinstance(ser.index, PeriodIndex): - warn = FutureWarning - with tm.assert_produces_warning(warn, match=msg): - result = ser.resample(freq).quantile(q) - expected = ser.resample(freq).agg(lambda x: x.quantile(q)).rename(ser.name) + result = ser.resample(freq).quantile(q) + expected = ser.resample(freq).agg(lambda x: x.quantile(q)).rename(ser.name) tm.assert_series_equal(result, expected) @@ -488,3 +368,16 @@ def test_first_last_skipna(any_real_nullable_dtype, skipna, how): expected = getattr(gb, how)(skipna=skipna) expected.index.freq = "ME" tm.assert_frame_equal(result, expected) + + +def test_resample_with_period_disallowed(): + # GH#53481 + ser = Series( + np.random.default_rng(2).integers(0, 5, 50), + index=period_range("2012-01-01", freq="h", periods=50), + dtype="float64", + ) + + msg = "Resample is no longer supported with PeriodIndex" + with pytest.raises(TypeError, match=msg): + ser.resample("D") diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index fecd24c9a4b40..6ee98762a74f3 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -992,22 +992,6 @@ def test_resample_to_period_monthly_buglet(unit): tm.assert_index_equal(result.index, exp_index) -def test_period_with_agg(): - # aggregate a period resampler with a lambda - s2 = Series( - np.random.default_rng(2).integers(0, 5, 50), - index=period_range("2012-01-01", freq="h", periods=50), - dtype="float64", - ) - - expected = s2.to_timestamp().resample("D").mean().to_period() - msg = "Resampling with a PeriodIndex is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - rs = s2.resample("D") - result = rs.agg(lambda x: x.mean()) - tm.assert_series_equal(result, expected) - - def test_resample_segfault(unit): # GH 8573 # segfaulting in older versions diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py deleted file mode 100644 index dd058ada60974..0000000000000 --- a/pandas/tests/resample/test_period_index.py +++ /dev/null @@ -1,1091 +0,0 @@ -from datetime import datetime -import re -import warnings - -import dateutil -import numpy as np -import pytest -import pytz - -from pandas._libs.tslibs.ccalendar import ( - DAYS, - MONTHS, -) -from pandas._libs.tslibs.period import IncompatibleFrequency -from pandas.errors import InvalidIndexError - -import pandas as pd -from pandas import ( - DataFrame, - Series, - Timestamp, -) -import pandas._testing as tm -from pandas.core.indexes.datetimes import date_range -from pandas.core.indexes.period import ( - Period, - PeriodIndex, - period_range, -) -from pandas.core.resample import _get_period_range_edges - -from pandas.tseries import offsets - -pytestmark = pytest.mark.filterwarnings( - "ignore:Resampling with a PeriodIndex is deprecated:FutureWarning" -) - - -@pytest.fixture -def simple_period_range_series(): - """ - Series with period range index and random data for test purposes. - """ - - def _simple_period_range_series(start, end, freq="D"): - with warnings.catch_warnings(): - # suppress Period[B] deprecation warning - msg = "|".join(["Period with BDay freq", r"PeriodDtype\[B\] is deprecated"]) - warnings.filterwarnings( - "ignore", - msg, - category=FutureWarning, - ) - rng = period_range(start, end, freq=freq) - return Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) - - return _simple_period_range_series - - -class TestPeriodIndex: - @pytest.mark.parametrize("freq", ["2D", "1h", "2h"]) - @pytest.mark.parametrize("kind", ["period", None, "timestamp"]) - def test_asfreq(self, frame_or_series, freq, kind): - # GH 12884, 15944 - # make sure .asfreq() returns PeriodIndex (except kind='timestamp') - - obj = frame_or_series(range(5), index=period_range("2020-01-01", periods=5)) - if kind == "timestamp": - expected = obj.to_timestamp().resample(freq).asfreq() - else: - start = obj.index[0].to_timestamp(how="start") - end = (obj.index[-1] + obj.index.freq).to_timestamp(how="start") - new_index = date_range(start=start, end=end, freq=freq, inclusive="left") - expected = obj.to_timestamp().reindex(new_index).to_period(freq) - msg = "The 'kind' keyword in (Series|DataFrame).resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = obj.resample(freq, kind=kind).asfreq() - tm.assert_almost_equal(result, expected) - - def test_asfreq_fill_value(self): - # test for fill value during resampling, issue 3715 - - index = period_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") - s = Series(range(len(index)), index=index) - new_index = date_range( - s.index[0].to_timestamp(how="start"), - (s.index[-1]).to_timestamp(how="start"), - freq="1h", - ) - expected = s.to_timestamp().reindex(new_index, fill_value=4.0) - msg = "The 'kind' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = s.resample("1h", kind="timestamp").asfreq(fill_value=4.0) - tm.assert_series_equal(result, expected) - - frame = s.to_frame("value") - new_index = date_range( - frame.index[0].to_timestamp(how="start"), - (frame.index[-1]).to_timestamp(how="start"), - freq="1h", - ) - expected = frame.to_timestamp().reindex(new_index, fill_value=3.0) - msg = "The 'kind' keyword in DataFrame.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = frame.resample("1h", kind="timestamp").asfreq(fill_value=3.0) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize("freq", ["h", "12h", "2D", "W"]) - @pytest.mark.parametrize("kind", [None, "period", "timestamp"]) - @pytest.mark.parametrize("kwargs", [{"on": "date"}, {"level": "d"}]) - def test_selection(self, freq, kind, kwargs): - # This is a bug, these should be implemented - # GH 14008 - index = period_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") - rng = np.arange(len(index), dtype=np.int64) - df = DataFrame( - {"date": index, "a": rng}, - index=pd.MultiIndex.from_arrays([rng, index], names=["v", "d"]), - ) - msg = ( - "Resampling from level= or on= selection with a PeriodIndex is " - r"not currently supported, use \.set_index\(\.\.\.\) to " - "explicitly set index" - ) - depr_msg = "The 'kind' keyword in DataFrame.resample is deprecated" - with pytest.raises(NotImplementedError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - df.resample(freq, kind=kind, **kwargs) - - @pytest.mark.parametrize("month", MONTHS) - @pytest.mark.parametrize("meth", ["ffill", "bfill"]) - @pytest.mark.parametrize("conv", ["start", "end"]) - @pytest.mark.parametrize( - ("offset", "period"), [("D", "D"), ("B", "B"), ("ME", "M"), ("QE", "Q")] - ) - def test_annual_upsample_cases( - self, offset, period, conv, meth, month, simple_period_range_series - ): - ts = simple_period_range_series("1/1/1990", "12/31/1991", freq=f"Y-{month}") - warn = FutureWarning if period == "B" else None - msg = r"PeriodDtype\[B\] is deprecated" - if warn is None: - msg = "Resampling with a PeriodIndex is deprecated" - warn = FutureWarning - with tm.assert_produces_warning(warn, match=msg): - result = getattr(ts.resample(period, convention=conv), meth)() - expected = result.to_timestamp(period, how=conv) - expected = expected.asfreq(offset, meth).to_period() - tm.assert_series_equal(result, expected) - - def test_basic_downsample(self, simple_period_range_series): - ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="M") - result = ts.resample("Y-DEC").mean() - - expected = ts.groupby(ts.index.year).mean() - expected.index = period_range("1/1/1990", "6/30/1995", freq="Y-DEC") - tm.assert_series_equal(result, expected) - - # this is ok - tm.assert_series_equal(ts.resample("Y-DEC").mean(), result) - tm.assert_series_equal(ts.resample("Y").mean(), result) - - @pytest.mark.parametrize( - "rule,expected_error_msg", - [ - ("Y-DEC", ""), - ("Q-MAR", ""), - ("M", ""), - ("w-thu", ""), - ], - ) - def test_not_subperiod(self, simple_period_range_series, rule, expected_error_msg): - # These are incompatible period rules for resampling - ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="w-wed") - msg = ( - "Frequency cannot be resampled to " - f"{expected_error_msg}, as they are not sub or super periods" - ) - with pytest.raises(IncompatibleFrequency, match=msg): - ts.resample(rule).mean() - - @pytest.mark.parametrize("freq", ["D", "2D"]) - def test_basic_upsample(self, freq, simple_period_range_series): - ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="M") - result = ts.resample("Y-DEC").mean() - - msg = "The 'convention' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - resampled = result.resample(freq, convention="end").ffill() - expected = result.to_timestamp(freq, how="end") - expected = expected.asfreq(freq, "ffill").to_period(freq) - tm.assert_series_equal(resampled, expected) - - def test_upsample_with_limit(self): - rng = period_range("1/1/2000", periods=5, freq="Y") - ts = Series(np.random.default_rng(2).standard_normal(len(rng)), rng) - - msg = "The 'convention' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = ts.resample("M", convention="end").ffill(limit=2) - expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2) - tm.assert_series_equal(result, expected) - - def test_annual_upsample(self, simple_period_range_series): - ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="Y-DEC") - df = DataFrame({"a": ts}) - rdf = df.resample("D").ffill() - exp = df["a"].resample("D").ffill() - tm.assert_series_equal(rdf["a"], exp) - - def test_annual_upsample2(self): - rng = period_range("2000", "2003", freq="Y-DEC") - ts = Series([1, 2, 3, 4], index=rng) - - result = ts.resample("M").ffill() - ex_index = period_range("2000-01", "2003-12", freq="M") - - expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill") - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("month", MONTHS) - @pytest.mark.parametrize("convention", ["start", "end"]) - @pytest.mark.parametrize( - ("offset", "period"), [("D", "D"), ("B", "B"), ("ME", "M")] - ) - def test_quarterly_upsample( - self, month, offset, period, convention, simple_period_range_series - ): - freq = f"Q-{month}" - ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq) - warn = FutureWarning if period == "B" else None - msg = r"PeriodDtype\[B\] is deprecated" - if warn is None: - msg = "Resampling with a PeriodIndex is deprecated" - warn = FutureWarning - with tm.assert_produces_warning(warn, match=msg): - result = ts.resample(period, convention=convention).ffill() - expected = result.to_timestamp(period, how=convention) - expected = expected.asfreq(offset, "ffill").to_period() - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("target", ["D", "B"]) - @pytest.mark.parametrize("convention", ["start", "end"]) - def test_monthly_upsample(self, target, convention, simple_period_range_series): - ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="M") - - warn = None if target == "D" else FutureWarning - msg = r"PeriodDtype\[B\] is deprecated" - if warn is None: - msg = "Resampling with a PeriodIndex is deprecated" - warn = FutureWarning - with tm.assert_produces_warning(warn, match=msg): - result = ts.resample(target, convention=convention).ffill() - expected = result.to_timestamp(target, how=convention) - expected = expected.asfreq(target, "ffill").to_period() - tm.assert_series_equal(result, expected) - - def test_resample_basic(self): - # GH3609 - s = Series( - range(100), - index=date_range("20130101", freq="s", periods=100, name="idx"), - dtype="float", - ) - s[10:30] = np.nan - index = PeriodIndex( - [Period("2013-01-01 00:00", "min"), Period("2013-01-01 00:01", "min")], - name="idx", - ) - expected = Series([34.5, 79.5], index=index) - msg = "The 'kind' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = s.to_period().resample("min", kind="period").mean() - tm.assert_series_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match=msg): - result2 = s.resample("min", kind="period").mean() - tm.assert_series_equal(result2, expected) - - @pytest.mark.parametrize( - "freq,expected_vals", [("M", [31, 29, 31, 9]), ("2M", [31 + 29, 31 + 9])] - ) - def test_resample_count(self, freq, expected_vals): - # GH12774 - series = Series(1, index=period_range(start="2000", periods=100)) - result = series.resample(freq).count() - expected_index = period_range( - start="2000", freq=freq, periods=len(expected_vals) - ) - expected = Series(expected_vals, index=expected_index) - tm.assert_series_equal(result, expected) - - def test_resample_same_freq(self, resample_method): - # GH12770 - series = Series(range(3), index=period_range(start="2000", periods=3, freq="M")) - expected = series - - result = getattr(series.resample("M"), resample_method)() - tm.assert_series_equal(result, expected) - - def test_resample_incompat_freq(self): - msg = ( - "Frequency cannot be resampled to , " - "as they are not sub or super periods" - ) - pi = period_range(start="2000", periods=3, freq="M") - ser = Series(range(3), index=pi) - rs = ser.resample("W") - with pytest.raises(IncompatibleFrequency, match=msg): - # TODO: should this raise at the resample call instead of at the mean call? - rs.mean() - - @pytest.mark.parametrize( - "tz", - [ - pytz.timezone("America/Los_Angeles"), - dateutil.tz.gettz("America/Los_Angeles"), - ], - ) - def test_with_local_timezone(self, tz): - # see gh-5430 - local_timezone = tz - - start = datetime(year=2013, month=11, day=1, hour=0, minute=0, tzinfo=pytz.utc) - # 1 day later - end = datetime(year=2013, month=11, day=2, hour=0, minute=0, tzinfo=pytz.utc) - - index = date_range(start, end, freq="h", name="idx") - - series = Series(1, index=index) - series = series.tz_convert(local_timezone) - msg = "The 'kind' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = series.resample("D", kind="period").mean() - - # Create the expected series - # Index is moved back a day with the timezone conversion from UTC to - # Pacific - expected_index = ( - period_range(start=start, end=end, freq="D", name="idx") - offsets.Day() - ) - expected = Series(1.0, index=expected_index) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize( - "tz", - [ - pytz.timezone("America/Los_Angeles"), - dateutil.tz.gettz("America/Los_Angeles"), - ], - ) - def test_resample_with_tz(self, tz, unit): - # GH 13238 - dti = date_range("2017-01-01", periods=48, freq="h", tz=tz, unit=unit) - ser = Series(2, index=dti) - result = ser.resample("D").mean() - exp_dti = pd.DatetimeIndex( - ["2017-01-01", "2017-01-02"], tz=tz, freq="D" - ).as_unit(unit) - expected = Series( - 2.0, - index=exp_dti, - ) - tm.assert_series_equal(result, expected) - # Especially assert that the timezone is LMT for pytz - assert result.index.tz == tz - - def test_resample_nonexistent_time_bin_edge(self): - # GH 19375 - index = date_range("2017-03-12", "2017-03-12 1:45:00", freq="15min") - s = Series(np.zeros(len(index)), index=index) - expected = s.tz_localize("US/Pacific") - expected.index = pd.DatetimeIndex(expected.index, freq="900s") - result = expected.resample("900s").mean() - tm.assert_series_equal(result, expected) - - def test_resample_nonexistent_time_bin_edge2(self): - # GH 23742 - index = date_range(start="2017-10-10", end="2017-10-20", freq="1h") - index = index.tz_localize("UTC").tz_convert("America/Sao_Paulo") - df = DataFrame(data=list(range(len(index))), index=index) - result = df.groupby(pd.Grouper(freq="1D")).count() - expected = date_range( - start="2017-10-09", - end="2017-10-20", - freq="D", - tz="America/Sao_Paulo", - nonexistent="shift_forward", - inclusive="left", - ) - tm.assert_index_equal(result.index, expected) - - def test_resample_ambiguous_time_bin_edge(self): - # GH 10117 - idx = date_range( - "2014-10-25 22:00:00", - "2014-10-26 00:30:00", - freq="30min", - tz="Europe/London", - ) - expected = Series(np.zeros(len(idx)), index=idx) - result = expected.resample("30min").mean() - tm.assert_series_equal(result, expected) - - def test_fill_method_and_how_upsample(self): - # GH2073 - s = Series( - np.arange(9, dtype="int64"), - index=date_range("2010-01-01", periods=9, freq="QE"), - ) - last = s.resample("ME").ffill() - both = s.resample("ME").ffill().resample("ME").last().astype("int64") - tm.assert_series_equal(last, both) - - @pytest.mark.parametrize("day", DAYS) - @pytest.mark.parametrize("target", ["D", "B"]) - @pytest.mark.parametrize("convention", ["start", "end"]) - def test_weekly_upsample(self, day, target, convention, simple_period_range_series): - freq = f"W-{day}" - ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq) - - warn = None if target == "D" else FutureWarning - msg = r"PeriodDtype\[B\] is deprecated" - if warn is None: - msg = "Resampling with a PeriodIndex is deprecated" - warn = FutureWarning - with tm.assert_produces_warning(warn, match=msg): - result = ts.resample(target, convention=convention).ffill() - expected = result.to_timestamp(target, how=convention) - expected = expected.asfreq(target, "ffill").to_period() - tm.assert_series_equal(result, expected) - - def test_resample_to_timestamps(self, simple_period_range_series): - ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="M") - - msg = "The 'kind' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = ts.resample("Y-DEC", kind="timestamp").mean() - expected = ts.to_timestamp(how="start").resample("YE-DEC").mean() - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("month", MONTHS) - def test_resample_to_quarterly(self, simple_period_range_series, month): - ts = simple_period_range_series("1990", "1992", freq=f"Y-{month}") - quar_ts = ts.resample(f"Q-{month}").ffill() - - stamps = ts.to_timestamp("D", how="start") - qdates = period_range( - ts.index[0].asfreq("D", "start"), - ts.index[-1].asfreq("D", "end"), - freq=f"Q-{month}", - ) - - expected = stamps.reindex(qdates.to_timestamp("D", "s"), method="ffill") - expected.index = qdates - - tm.assert_series_equal(quar_ts, expected) - - @pytest.mark.parametrize("how", ["start", "end"]) - def test_resample_to_quarterly_start_end(self, simple_period_range_series, how): - # conforms, but different month - ts = simple_period_range_series("1990", "1992", freq="Y-JUN") - msg = "The 'convention' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = ts.resample("Q-MAR", convention=how).ffill() - expected = ts.asfreq("Q-MAR", how=how) - expected = expected.reindex(result.index, method="ffill") - - # FIXME: don't leave commented-out - # .to_timestamp('D') - # expected = expected.resample('Q-MAR').ffill() - - tm.assert_series_equal(result, expected) - - def test_resample_fill_missing(self): - rng = PeriodIndex([2000, 2005, 2007, 2009], freq="Y") - - s = Series(np.random.default_rng(2).standard_normal(4), index=rng) - - stamps = s.to_timestamp() - filled = s.resample("Y").ffill() - expected = stamps.resample("YE").ffill().to_period("Y") - tm.assert_series_equal(filled, expected) - - def test_cant_fill_missing_dups(self): - rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq="Y") - s = Series(np.random.default_rng(2).standard_normal(5), index=rng) - msg = "Reindexing only valid with uniquely valued Index objects" - with pytest.raises(InvalidIndexError, match=msg): - s.resample("Y").ffill() - - @pytest.mark.parametrize("freq", ["5min"]) - @pytest.mark.parametrize("kind", ["period", None, "timestamp"]) - def test_resample_5minute(self, freq, kind): - rng = period_range("1/1/2000", "1/5/2000", freq="min") - ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) - expected = ts.to_timestamp().resample(freq).mean() - if kind != "timestamp": - expected = expected.to_period(freq) - msg = "The 'kind' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = ts.resample(freq, kind=kind).mean() - tm.assert_series_equal(result, expected) - - def test_upsample_daily_business_daily(self, simple_period_range_series): - ts = simple_period_range_series("1/1/2000", "2/1/2000", freq="B") - - result = ts.resample("D").asfreq() - expected = ts.asfreq("D").reindex(period_range("1/3/2000", "2/1/2000")) - tm.assert_series_equal(result, expected) - - ts = simple_period_range_series("1/1/2000", "2/1/2000") - msg = "The 'convention' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = ts.resample("h", convention="s").asfreq() - exp_rng = period_range("1/1/2000", "2/1/2000 23:00", freq="h") - expected = ts.asfreq("h", how="s").reindex(exp_rng) - tm.assert_series_equal(result, expected) - - def test_resample_irregular_sparse(self): - dr = date_range(start="1/1/2012", freq="5min", periods=1000) - s = Series(np.array(100), index=dr) - # subset the data. - subset = s[:"2012-01-04 06:55"] - - result = subset.resample("10min").apply(len) - expected = s.resample("10min").apply(len).loc[result.index] - tm.assert_series_equal(result, expected) - - def test_resample_weekly_all_na(self): - rng = date_range("1/1/2000", periods=10, freq="W-WED") - ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) - - result = ts.resample("W-THU").asfreq() - - assert result.isna().all() - - result = ts.resample("W-THU").asfreq().ffill()[:-1] - expected = ts.asfreq("W-THU").ffill() - tm.assert_series_equal(result, expected) - - def test_resample_tz_localized(self, unit): - dr = date_range(start="2012-4-13", end="2012-5-1", unit=unit) - ts = Series(range(len(dr)), index=dr) - - ts_utc = ts.tz_localize("UTC") - ts_local = ts_utc.tz_convert("America/Los_Angeles") - - result = ts_local.resample("W").mean() - - ts_local_naive = ts_local.copy() - ts_local_naive.index = ts_local_naive.index.tz_localize(None) - - exp = ts_local_naive.resample("W").mean().tz_localize("America/Los_Angeles") - exp.index = pd.DatetimeIndex(exp.index, freq="W") - - tm.assert_series_equal(result, exp) - - # it works - result = ts_local.resample("D").mean() - - def test_resample_tz_localized2(self): - # #2245 - idx = date_range( - "2001-09-20 15:59", "2001-09-20 16:00", freq="min", tz="Australia/Sydney" - ) - s = Series([1, 2], index=idx) - - result = s.resample("D", closed="right", label="right").mean() - ex_index = date_range("2001-09-21", periods=1, freq="D", tz="Australia/Sydney") - expected = Series([1.5], index=ex_index) - - tm.assert_series_equal(result, expected) - - # for good measure - msg = "The 'kind' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = s.resample("D", kind="period").mean() - ex_index = period_range("2001-09-20", periods=1, freq="D") - expected = Series([1.5], index=ex_index) - tm.assert_series_equal(result, expected) - - def test_resample_tz_localized3(self): - # GH 6397 - # comparing an offset that doesn't propagate tz's - rng = date_range("1/1/2011", periods=20000, freq="h") - rng = rng.tz_localize("EST") - ts = DataFrame(index=rng) - ts["first"] = np.random.default_rng(2).standard_normal(len(rng)) - ts["second"] = np.cumsum(np.random.default_rng(2).standard_normal(len(rng))) - expected = DataFrame( - { - "first": ts.resample("YE").sum()["first"], - "second": ts.resample("YE").mean()["second"], - }, - columns=["first", "second"], - ) - result = ( - ts.resample("YE") - .agg({"first": "sum", "second": "mean"}) - .reindex(columns=["first", "second"]) - ) - tm.assert_frame_equal(result, expected) - - def test_closed_left_corner(self): - # #1465 - s = Series( - np.random.default_rng(2).standard_normal(21), - index=date_range(start="1/1/2012 9:30", freq="1min", periods=21), - ) - s.iloc[0] = np.nan - - result = s.resample("10min", closed="left", label="right").mean() - exp = s[1:].resample("10min", closed="left", label="right").mean() - tm.assert_series_equal(result, exp) - - result = s.resample("10min", closed="left", label="left").mean() - exp = s[1:].resample("10min", closed="left", label="left").mean() - - ex_index = date_range(start="1/1/2012 9:30", freq="10min", periods=3) - - tm.assert_index_equal(result.index, ex_index) - tm.assert_series_equal(result, exp) - - def test_quarterly_resampling(self): - rng = period_range("2000Q1", periods=10, freq="Q-DEC") - ts = Series(np.arange(10), index=rng) - - result = ts.resample("Y").mean() - exp = ts.to_timestamp().resample("YE").mean().to_period() - tm.assert_series_equal(result, exp) - - def test_resample_weekly_bug_1726(self): - # 8/6/12 is a Monday - ind = date_range(start="8/6/2012", end="8/26/2012", freq="D") - n = len(ind) - data = [[x] * 5 for x in range(n)] - df = DataFrame(data, columns=["open", "high", "low", "close", "vol"], index=ind) - - # it works! - df.resample("W-MON", closed="left", label="left").first() - - def test_resample_with_dst_time_change(self): - # GH 15549 - index = ( - pd.DatetimeIndex([1457537600000000000, 1458059600000000000]) - .tz_localize("UTC") - .tz_convert("America/Chicago") - ) - df = DataFrame([1, 2], index=index) - result = df.resample("12h", closed="right", label="right").last().ffill() - - expected_index_values = [ - "2016-03-09 12:00:00-06:00", - "2016-03-10 00:00:00-06:00", - "2016-03-10 12:00:00-06:00", - "2016-03-11 00:00:00-06:00", - "2016-03-11 12:00:00-06:00", - "2016-03-12 00:00:00-06:00", - "2016-03-12 12:00:00-06:00", - "2016-03-13 00:00:00-06:00", - "2016-03-13 13:00:00-05:00", - "2016-03-14 01:00:00-05:00", - "2016-03-14 13:00:00-05:00", - "2016-03-15 01:00:00-05:00", - "2016-03-15 13:00:00-05:00", - ] - index = ( - pd.to_datetime(expected_index_values, utc=True) - .tz_convert("America/Chicago") - .as_unit(index.unit) - ) - index = pd.DatetimeIndex(index, freq="12h") - expected = DataFrame( - [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0], - index=index, - ) - tm.assert_frame_equal(result, expected) - - def test_resample_bms_2752(self): - # GH2753 - timeseries = Series( - index=pd.bdate_range("20000101", "20000201"), dtype=np.float64 - ) - res1 = timeseries.resample("BMS").mean() - res2 = timeseries.resample("BMS").mean().resample("B").mean() - assert res1.index[0] == Timestamp("20000103") - assert res1.index[0] == res2.index[0] - - @pytest.mark.xfail(reason="Commented out for more than 3 years. Should this work?") - def test_monthly_convention_span(self): - rng = period_range("2000-01", periods=3, freq="ME") - ts = Series(np.arange(3), index=rng) - - # hacky way to get same thing - exp_index = period_range("2000-01-01", "2000-03-31", freq="D") - expected = ts.asfreq("D", how="end").reindex(exp_index) - expected = expected.fillna(method="bfill") - - result = ts.resample("D").mean() - - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize( - "from_freq, to_freq", [("D", "ME"), ("QE", "YE"), ("ME", "QE"), ("D", "W")] - ) - def test_default_right_closed_label(self, from_freq, to_freq): - idx = date_range(start="8/15/2012", periods=100, freq=from_freq) - df = DataFrame(np.random.default_rng(2).standard_normal((len(idx), 2)), idx) - - resampled = df.resample(to_freq).mean() - tm.assert_frame_equal( - resampled, df.resample(to_freq, closed="right", label="right").mean() - ) - - @pytest.mark.parametrize( - "from_freq, to_freq", - [("D", "MS"), ("QE", "YS"), ("ME", "QS"), ("h", "D"), ("min", "h")], - ) - def test_default_left_closed_label(self, from_freq, to_freq): - idx = date_range(start="8/15/2012", periods=100, freq=from_freq) - df = DataFrame(np.random.default_rng(2).standard_normal((len(idx), 2)), idx) - - resampled = df.resample(to_freq).mean() - tm.assert_frame_equal( - resampled, df.resample(to_freq, closed="left", label="left").mean() - ) - - def test_all_values_single_bin(self): - # GH#2070 - index = period_range(start="2012-01-01", end="2012-12-31", freq="M") - ser = Series(np.random.default_rng(2).standard_normal(len(index)), index=index) - - result = ser.resample("Y").mean() - tm.assert_almost_equal(result.iloc[0], ser.mean()) - - def test_evenly_divisible_with_no_extra_bins(self): - # GH#4076 - # when the frequency is evenly divisible, sometimes extra bins - - df = DataFrame( - np.random.default_rng(2).standard_normal((9, 3)), - index=date_range("2000-1-1", periods=9), - ) - result = df.resample("5D").mean() - expected = pd.concat([df.iloc[0:5].mean(), df.iloc[5:].mean()], axis=1).T - expected.index = pd.DatetimeIndex( - [Timestamp("2000-1-1"), Timestamp("2000-1-6")], dtype="M8[ns]", freq="5D" - ) - tm.assert_frame_equal(result, expected) - - def test_evenly_divisible_with_no_extra_bins2(self): - index = date_range(start="2001-5-4", periods=28) - df = DataFrame( - [ - { - "REST_KEY": 1, - "DLY_TRN_QT": 80, - "DLY_SLS_AMT": 90, - "COOP_DLY_TRN_QT": 30, - "COOP_DLY_SLS_AMT": 20, - } - ] - * 28 - + [ - { - "REST_KEY": 2, - "DLY_TRN_QT": 70, - "DLY_SLS_AMT": 10, - "COOP_DLY_TRN_QT": 50, - "COOP_DLY_SLS_AMT": 20, - } - ] - * 28, - index=index.append(index), - ).sort_index() - - index = date_range("2001-5-4", periods=4, freq="7D") - expected = DataFrame( - [ - { - "REST_KEY": 14, - "DLY_TRN_QT": 14, - "DLY_SLS_AMT": 14, - "COOP_DLY_TRN_QT": 14, - "COOP_DLY_SLS_AMT": 14, - } - ] - * 4, - index=index, - ) - result = df.resample("7D").count() - tm.assert_frame_equal(result, expected) - - expected = DataFrame( - [ - { - "REST_KEY": 21, - "DLY_TRN_QT": 1050, - "DLY_SLS_AMT": 700, - "COOP_DLY_TRN_QT": 560, - "COOP_DLY_SLS_AMT": 280, - } - ] - * 4, - index=index, - ) - result = df.resample("7D").sum() - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize("freq, period_mult", [("h", 24), ("12h", 2)]) - @pytest.mark.parametrize("kind", [None, "period"]) - def test_upsampling_ohlc(self, freq, period_mult, kind): - # GH 13083 - pi = period_range(start="2000", freq="D", periods=10) - s = Series(range(len(pi)), index=pi) - expected = s.to_timestamp().resample(freq).ohlc().to_period(freq) - - # timestamp-based resampling doesn't include all sub-periods - # of the last original period, so extend accordingly: - new_index = period_range(start="2000", freq=freq, periods=period_mult * len(pi)) - expected = expected.reindex(new_index) - msg = "The 'kind' keyword in Series.resample is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = s.resample(freq, kind=kind).ohlc() - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( - "periods, values", - [ - ( - [ - pd.NaT, - "1970-01-01 00:00:00", - pd.NaT, - "1970-01-01 00:00:02", - "1970-01-01 00:00:03", - ], - [2, 3, 5, 7, 11], - ), - ( - [ - pd.NaT, - pd.NaT, - "1970-01-01 00:00:00", - pd.NaT, - pd.NaT, - pd.NaT, - "1970-01-01 00:00:02", - "1970-01-01 00:00:03", - pd.NaT, - pd.NaT, - ], - [1, 2, 3, 5, 6, 8, 7, 11, 12, 13], - ), - ], - ) - @pytest.mark.parametrize( - "freq, expected_values", - [ - ("1s", [3, np.nan, 7, 11]), - ("2s", [3, (7 + 11) / 2]), - ("3s", [(3 + 7) / 2, 11]), - ], - ) - def test_resample_with_nat(self, periods, values, freq, expected_values): - # GH 13224 - index = PeriodIndex(periods, freq="s") - frame = DataFrame(values, index=index) - - expected_index = period_range( - "1970-01-01 00:00:00", periods=len(expected_values), freq=freq - ) - expected = DataFrame(expected_values, index=expected_index) - msg = "Resampling with a PeriodIndex is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - rs = frame.resample(freq) - result = rs.mean() - tm.assert_frame_equal(result, expected) - - def test_resample_with_only_nat(self): - # GH 13224 - pi = PeriodIndex([pd.NaT] * 3, freq="s") - frame = DataFrame([2, 3, 5], index=pi, columns=["a"]) - expected_index = PeriodIndex(data=[], freq=pi.freq) - expected = DataFrame(index=expected_index, columns=["a"], dtype="float64") - result = frame.resample("1s").mean() - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( - "start,end,start_freq,end_freq,offset", - [ - ("19910905", "19910909 03:00", "h", "24h", "10h"), - ("19910905", "19910909 12:00", "h", "24h", "10h"), - ("19910905", "19910909 23:00", "h", "24h", "10h"), - ("19910905 10:00", "19910909", "h", "24h", "10h"), - ("19910905 10:00", "19910909 10:00", "h", "24h", "10h"), - ("19910905", "19910909 10:00", "h", "24h", "10h"), - ("19910905 12:00", "19910909", "h", "24h", "10h"), - ("19910905 12:00", "19910909 03:00", "h", "24h", "10h"), - ("19910905 12:00", "19910909 12:00", "h", "24h", "10h"), - ("19910905 12:00", "19910909 12:00", "h", "24h", "34h"), - ("19910905 12:00", "19910909 12:00", "h", "17h", "10h"), - ("19910905 12:00", "19910909 12:00", "h", "17h", "3h"), - ("19910905", "19910913 06:00", "2h", "24h", "10h"), - ("19910905", "19910905 01:39", "Min", "5Min", "3Min"), - ("19910905", "19910905 03:18", "2Min", "5Min", "3Min"), - ], - ) - def test_resample_with_offset(self, start, end, start_freq, end_freq, offset): - # GH 23882 & 31809 - pi = period_range(start, end, freq=start_freq) - ser = Series(np.arange(len(pi)), index=pi) - msg = "Resampling with a PeriodIndex is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - rs = ser.resample(end_freq, offset=offset) - result = rs.mean() - result = result.to_timestamp(end_freq) - - expected = ser.to_timestamp().resample(end_freq, offset=offset).mean() - tm.assert_series_equal(result, expected) - - def test_resample_with_offset_month(self): - # GH 23882 & 31809 - pi = period_range("19910905 12:00", "19910909 1:00", freq="h") - ser = Series(np.arange(len(pi)), index=pi) - msg = "Resampling with a PeriodIndex is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - rs = ser.resample("M", offset="3h") - result = rs.mean() - result = result.to_timestamp("M") - expected = ser.to_timestamp().resample("ME", offset="3h").mean() - # TODO: is non-tick the relevant characteristic? (GH 33815) - expected.index = expected.index._with_freq(None) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize( - "first,last,freq,freq_to_offset,exp_first,exp_last", - [ - ("19910905", "19920406", "D", "D", "19910905", "19920406"), - ("19910905 00:00", "19920406 06:00", "D", "D", "19910905", "19920406"), - ( - "19910905 06:00", - "19920406 06:00", - "h", - "h", - "19910905 06:00", - "19920406 06:00", - ), - ("19910906", "19920406", "M", "ME", "1991-09", "1992-04"), - ("19910831", "19920430", "M", "ME", "1991-08", "1992-04"), - ("1991-08", "1992-04", "M", "ME", "1991-08", "1992-04"), - ], - ) - def test_get_period_range_edges( - self, first, last, freq, freq_to_offset, exp_first, exp_last - ): - first = Period(first) - last = Period(last) - - exp_first = Period(exp_first, freq=freq) - exp_last = Period(exp_last, freq=freq) - - freq = pd.tseries.frequencies.to_offset(freq_to_offset) - result = _get_period_range_edges(first, last, freq) - expected = (exp_first, exp_last) - assert result == expected - - def test_sum_min_count(self): - # GH 19974 - index = date_range(start="2018", freq="ME", periods=6) - data = np.ones(6) - data[3:6] = np.nan - s = Series(data, index).to_period() - msg = "Resampling with a PeriodIndex is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - rs = s.resample("Q") - result = rs.sum(min_count=1) - expected = Series( - [3.0, np.nan], index=PeriodIndex(["2018Q1", "2018Q2"], freq="Q-DEC") - ) - tm.assert_series_equal(result, expected) - - def test_resample_t_l_deprecated(self): - # GH#52536 - msg_t = "Invalid frequency: T" - msg_l = "Invalid frequency: L" - - with pytest.raises(ValueError, match=msg_l): - period_range( - "2020-01-01 00:00:00 00:00", "2020-01-01 00:00:00 00:01", freq="L" - ) - rng_l = period_range( - "2020-01-01 00:00:00 00:00", "2020-01-01 00:00:00 00:01", freq="ms" - ) - ser = Series(np.arange(len(rng_l)), index=rng_l) - - with pytest.raises(ValueError, match=msg_t): - ser.resample("T").mean() - - @pytest.mark.parametrize( - "freq, freq_depr, freq_res, freq_depr_res, data", - [ - ("2Q", "2q", "2Y", "2y", [0.5]), - ("2M", "2m", "2Q", "2q", [1.0, 3.0]), - ], - ) - def test_resample_lowercase_frequency_deprecated( - self, freq, freq_depr, freq_res, freq_depr_res, data - ): - depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed in a " - f"future version. Please use '{freq[1:]}' instead." - depr_msg_res = f"'{freq_depr_res[1:]}' is deprecated and will be removed in a " - f"future version. Please use '{freq_res[1:]}' instead." - - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - rng_l = period_range("2020-01-01", "2020-08-01", freq=freq_depr) - ser = Series(np.arange(len(rng_l)), index=rng_l) - - rng = period_range("2020-01-01", "2020-08-01", freq=freq_res) - expected = Series(data=data, index=rng) - - with tm.assert_produces_warning(FutureWarning, match=depr_msg_res): - result = ser.resample(freq_depr_res).mean() - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize( - "offset", - [ - offsets.MonthBegin(), - offsets.BYearBegin(2), - offsets.BusinessHour(2), - ], - ) - def test_asfreq_invalid_period_offset(self, offset, frame_or_series): - # GH#55785 - msg = re.escape(f"{offset} is not supported as period frequency") - - obj = frame_or_series(range(5), index=period_range("2020-01-01", periods=5)) - with pytest.raises(ValueError, match=msg): - obj.asfreq(freq=offset) - - -@pytest.mark.parametrize( - "freq,freq_depr", - [ - ("2M", "2ME"), - ("2Q", "2QE"), - ("2Q-FEB", "2QE-FEB"), - ("2Y", "2YE"), - ("2Y-MAR", "2YE-MAR"), - ("2M", "2me"), - ("2Q", "2qe"), - ("2Y-MAR", "2ye-mar"), - ], -) -def test_resample_frequency_ME_QE_YE_error_message(frame_or_series, freq, freq_depr): - # GH#9586 - msg = f"for Period, please use '{freq[1:]}' instead of '{freq_depr[1:]}'" - - obj = frame_or_series(range(5), index=period_range("2020-01-01", periods=5)) - with pytest.raises(ValueError, match=msg): - obj.resample(freq_depr) - - -def test_corner_cases_period(simple_period_range_series): - # miscellaneous test coverage - len0pts = simple_period_range_series("2007-01", "2010-05", freq="M")[:0] - # it works - msg = "Resampling with a PeriodIndex is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = len0pts.resample("Y-DEC").mean() - assert len(result) == 0 - - -@pytest.mark.parametrize( - "freq_depr", - [ - "2BME", - "2CBME", - "2SME", - "2BQE-FEB", - "2BYE-MAR", - ], -) -def test_resample_frequency_invalid_freq(frame_or_series, freq_depr): - # GH#9586 - msg = f"Invalid frequency: {freq_depr[1:]}" - - obj = frame_or_series(range(5), index=period_range("2020-01-01", periods=5)) - with pytest.raises(ValueError, match=msg): - obj.resample(freq_depr) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index f3b9c909290a8..ac86a67afa4a4 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -674,7 +674,7 @@ def test_selection_api_validation(): # non DatetimeIndex msg = ( - "Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, " + "Only valid with DatetimeIndex or TimedeltaIndex, " "but got an instance of 'Index'" ) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 11ad9240527d5..fedbcab26c80c 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -96,8 +96,8 @@ def test_fails_on_no_datetime_index(index): df = DataFrame({"a": range(len(index))}, index=index) msg = ( - "Only valid with DatetimeIndex, TimedeltaIndex " - f"or PeriodIndex, but got an instance of '{name}'" + "Only valid with DatetimeIndex or TimedeltaIndex, " + f"but got an instance of '{name}'" ) with pytest.raises(TypeError, match=msg): df.groupby(Grouper(freq="D")) From 7842bc0045dcd8139edf65af2448cc2451f88613 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 26 Mar 2024 20:15:31 -0700 Subject: [PATCH 2/2] update asv --- asv_bench/benchmarks/timeseries.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index 06f488f7baaaf..f94e51bdd28ba 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -158,12 +158,11 @@ def time_method(self, method): class ResampleSeries: - params = (["period", "datetime"], ["5min", "1D"], ["mean", "ohlc"]) + params = (["datetime"], ["5min", "1D"], ["mean", "ohlc"]) param_names = ["index", "freq", "method"] def setup(self, index, freq, method): indexes = { - "period": period_range(start="1/1/2000", end="1/1/2001", freq="min"), "datetime": date_range(start="1/1/2000", end="1/1/2001", freq="min"), } idx = indexes[index]