diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 1f245b585df48..f86c5099a685f 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -273,6 +273,7 @@ Removal of prior version deprecations/changes - Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`) - Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`) - Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`) +- Removed the deprecated ``base`` and ``loffset`` arguments from :meth:`pandas.DataFrame.resample`, :meth:`pandas.Series.resample` and :class:`pandas.Grouper`. Use ``offset`` or ``origin`` instead (:issue:`31809`) - Changed behavior of :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``; object-dtype columns with all-bool values will no longer be included, manually cast to ``bool`` dtype first (:issue:`46188`) - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fe6fda34a89ef..7c0405495754d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11336,8 +11336,6 @@ def resample( label: str | None = None, convention: str = "start", kind: str | None = None, - loffset=None, - base: int | None = None, on: Level = None, level: Level = None, origin: str | TimestampConvertibleTypes = "start_day", @@ -11351,8 +11349,6 @@ def resample( label=label, convention=convention, kind=kind, - loffset=loffset, - base=base, on=on, level=level, origin=origin, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 05494e37256df..8bf3820d2ea3c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8454,8 +8454,6 @@ def resample( label: str | None = None, convention: str = "start", kind: str | None = None, - loffset=None, - base: int | None = None, on: Level = None, level: Level = None, origin: str | TimestampConvertibleTypes = "start_day", @@ -8493,20 +8491,6 @@ def resample( Pass 'timestamp' to convert the resulting index to a `DateTimeIndex` or 'period' to convert it to a `PeriodIndex`. By default the input representation is retained. - loffset : timedelta, default None - Adjust the resampled time labels. - - .. deprecated:: 1.1.0 - You should add the loffset to the `df.index` after the resample. - See below. - - base : int, default 0 - For frequencies that evenly subdivide 1 day, the "origin" of the - aggregated intervals. For example, for '5min' frequency, base could - range from 0 through 4. Defaults to 0. - - .. deprecated:: 1.1.0 - The new arguments that you should use are 'offset' or 'origin'. on : str, optional For a DataFrame, column to use instead of index for resampling. @@ -8842,31 +8826,6 @@ def resample( 2000-10-02 00:12:00 45 2000-10-02 00:29:00 45 Freq: 17T, dtype: int64 - - To replace the use of the deprecated `base` argument, you can now use `offset`, - in this example it is equivalent to have `base=2`: - - >>> ts.resample('17min', offset='2min').sum() - 2000-10-01 23:16:00 0 - 2000-10-01 23:33:00 9 - 2000-10-01 23:50:00 36 - 2000-10-02 00:07:00 39 - 2000-10-02 00:24:00 24 - Freq: 17T, dtype: int64 - - To replace the use of the deprecated `loffset` argument: - - >>> from pandas.tseries.frequencies import to_offset - >>> loffset = '19min' - >>> ts_out = ts.resample('17min').sum() - >>> ts_out.index = ts_out.index + to_offset(loffset) - >>> ts_out - 2000-10-01 23:33:00 0 - 2000-10-01 23:50:00 9 - 2000-10-02 00:07:00 21 - 2000-10-02 00:24:00 54 - 2000-10-02 00:41:00 24 - Freq: 17T, dtype: int64 """ from pandas.core.resample import get_resampler @@ -8878,9 +8837,7 @@ def resample( closed=closed, axis=axis, kind=kind, - loffset=loffset, convention=convention, - base=base, key=on, level=level, origin=origin, diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 7110c74e34473..175af95867c8e 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -11,7 +11,6 @@ Iterator, final, ) -import warnings import numpy as np @@ -23,7 +22,6 @@ ) from pandas.errors import InvalidIndexError from pandas.util._decorators import cache_readonly -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_categorical_dtype, @@ -86,23 +84,6 @@ class Grouper: Only when `freq` parameter is passed. convention : {'start', 'end', 'e', 's'} If grouper is PeriodIndex and `freq` parameter is passed. - base : int, default 0 - Only when `freq` parameter is passed. - For frequencies that evenly subdivide 1 day, the "origin" of the - aggregated intervals. For example, for '5min' frequency, base could - range from 0 through 4. Defaults to 0. - - .. deprecated:: 1.1.0 - The new arguments that you should use are 'offset' or 'origin'. - - loffset : str, DateOffset, timedelta object - Only when `freq` parameter is passed. - - .. deprecated:: 1.1.0 - loffset is only working for ``.resample(...)`` and not for - Grouper (:issue:`28302`). - However, loffset is also deprecated for ``.resample(...)`` - See: :class:`DataFrame.resample` origin : Timestamp or str, default 'start_day' The timestamp on which to adjust the grouping. The timezone of origin must @@ -266,7 +247,6 @@ def __new__(cls, *args, **kwargs): if kwargs.get("freq") is not None: from pandas.core.resample import TimeGrouper - _check_deprecated_resample_kwargs(kwargs, origin=cls) cls = TimeGrouper return super().__new__(cls) @@ -954,51 +934,3 @@ def _convert_grouper(axis: Index, grouper): return grouper else: return grouper - - -def _check_deprecated_resample_kwargs(kwargs, origin) -> None: - """ - Check for use of deprecated parameters in ``resample`` and related functions. - - Raises the appropriate warnings if these parameters are detected. - Only sets an approximate ``stacklevel`` for the warnings (see #37603, #36629). - - Parameters - ---------- - kwargs : dict - Dictionary of keyword arguments to check for deprecated parameters. - origin : object - From where this function is being called; either Grouper or TimeGrouper. Used - to determine an approximate stacklevel. - """ - # Deprecation warning of `base` and `loffset` since v1.1.0: - # we are raising the warning here to be able to set the `stacklevel` - # properly since we need to raise the `base` and `loffset` deprecation - # warning from three different cases: - # core/generic.py::NDFrame.resample - # core/groupby/groupby.py::GroupBy.resample - # core/groupby/grouper.py::Grouper - # raising these warnings from TimeGrouper directly would fail the test: - # tests/resample/test_deprecated.py::test_deprecating_on_loffset_and_base - - if kwargs.get("base", None) is not None: - warnings.warn( - "'base' in .resample() and in Grouper() is deprecated.\n" - "The new arguments that you should use are 'offset' or 'origin'.\n" - '\n>>> df.resample(freq="3s", base=2)\n' - "\nbecomes:\n" - '\n>>> df.resample(freq="3s", offset="2s")\n', - FutureWarning, - stacklevel=find_stack_level(), - ) - if kwargs.get("loffset", None) is not None: - warnings.warn( - "'loffset' in .resample() and in Grouper() is deprecated.\n" - '\n>>> df.resample(freq="3s", loffset="8H")\n' - "\nbecomes:\n" - "\n>>> from pandas.tseries.frequencies import to_offset" - '\n>>> df = df.resample(freq="3s").mean()' - '\n>>> df.index = df.index.to_timestamp() + to_offset("8H")\n', - FutureWarning, - stacklevel=find_stack_level(), - ) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index ee738b43a481b..e32e89b705cc0 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -89,7 +89,6 @@ is_superperiod, ) from pandas.tseries.offsets import ( - DateOffset, Day, Nano, Tick, @@ -139,7 +138,6 @@ class Resampler(BaseGroupBy, PandasObject): "closed", "label", "convention", - "loffset", "kind", "origin", "offset", @@ -358,7 +356,6 @@ def aggregate(self, func=None, *args, **kwargs): how = func result = self._groupby_and_aggregate(how, *args, **kwargs) - result = self._apply_loffset(result) return result agg = aggregate @@ -475,38 +472,8 @@ def _groupby_and_aggregate(self, how, *args, **kwargs): # try to evaluate result = grouped.apply(how, *args, **kwargs) - result = self._apply_loffset(result) return self._wrap_result(result) - def _apply_loffset(self, result): - """ - If loffset is set, offset the result index. - - This is NOT an idempotent routine, it will be applied - exactly once to the result. - - Parameters - ---------- - result : Series or DataFrame - the result of resample - """ - # error: Cannot determine type of 'loffset' - needs_offset = ( - isinstance( - self.loffset, # type: ignore[has-type] - (DateOffset, timedelta, np.timedelta64), - ) - and isinstance(result.index, DatetimeIndex) - and len(result.index) > 0 - ) - - if needs_offset: - # error: Cannot determine type of 'loffset' - result.index = result.index + self.loffset # type: ignore[has-type] - - self.loffset = None - return result - def _get_resampler_for_grouping(self, groupby, key=None): """ Return the correct class for resampling with groupby. @@ -1295,7 +1262,6 @@ def _downsample(self, how, **kwargs): # we want to call the actual grouper method here result = obj.groupby(self.grouper, axis=self.axis).aggregate(how, **kwargs) - result = self._apply_loffset(result) return self._wrap_result(result) def _adjust_binner_for_upsample(self, binner): @@ -1353,7 +1319,6 @@ def _upsample(self, method, limit=None, fill_value=None): res_index, method=method, limit=limit, fill_value=fill_value ) - result = self._apply_loffset(result) return self._wrap_result(result) def _wrap_result(self, result): @@ -1398,11 +1363,6 @@ def _convert_obj(self, obj: NDFrameT) -> NDFrameT: ) raise NotImplementedError(msg) - if self.loffset is not None: - # Cannot apply loffset/timedelta to PeriodIndex -> convert to - # timestamps - self.kind = "timestamp" - # convert to timestamp if self.kind == "timestamp": obj = obj.to_timestamp(how=self.convention) @@ -1563,7 +1523,6 @@ class TimeGrouper(Grouper): "closed", "label", "how", - "loffset", "kind", "convention", "origin", @@ -1581,10 +1540,8 @@ def __init__( axis: Axis = 0, fill_method=None, limit=None, - loffset=None, kind: str | None = None, convention: Literal["start", "end", "e", "s"] | None = None, - base: int | None = None, origin: Literal["epoch", "start", "start_day", "end", "end_day"] | TimestampConvertibleTypes = "start_day", offset: TimedeltaConvertibleTypes | None = None, @@ -1664,22 +1621,6 @@ def __init__( # always sort time groupers kwargs["sort"] = True - # Handle deprecated arguments since v1.1.0 of `base` and `loffset` (GH #31809) - if base is not None and offset is not None: - raise ValueError("'offset' and 'base' cannot be present at the same time") - - if base and isinstance(freq, Tick): - # this conversion handle the default behavior of base and the - # special case of GH #10530. Indeed in case when dealing with - # a TimedeltaIndex base was treated as a 'pure' offset even though - # the default behavior of base was equivalent of a modulo on - # freq_nanos. - self.offset = Timedelta(base * freq.nanos // freq.n) - - if isinstance(loffset, str): - loffset = to_offset(loffset) - self.loffset = loffset - super().__init__(freq=freq, axis=axis, **kwargs) def _get_resampler(self, obj, kind=None): @@ -1840,9 +1781,6 @@ def _get_time_delta_bins(self, ax: TimedeltaIndex): if self.offset: # GH 10530 & 31809 labels += self.offset - if self.loffset: - # GH 33498 - labels += self.loffset return binner, bins, labels diff --git a/pandas/core/series.py b/pandas/core/series.py index 9bfb2a0561532..7854bf6180733 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5809,8 +5809,6 @@ def resample( label: str | None = None, convention: str = "start", kind: str | None = None, - loffset=None, - base: int | None = None, on: Level = None, level: Level = None, origin: str | TimestampConvertibleTypes = "start_day", @@ -5824,8 +5822,6 @@ def resample( label=label, convention=convention, kind=kind, - loffset=loffset, - base=base, on=on, level=level, origin=origin, diff --git a/pandas/tests/resample/test_deprecated.py b/pandas/tests/resample/test_deprecated.py deleted file mode 100644 index e1e042aae1447..0000000000000 --- a/pandas/tests/resample/test_deprecated.py +++ /dev/null @@ -1,280 +0,0 @@ -from datetime import ( - datetime, - timedelta, -) - -import numpy as np -import pytest - -import pandas as pd -from pandas import ( - DataFrame, - Series, -) -import pandas._testing as tm -from pandas.core.indexes.datetimes import date_range -from pandas.core.indexes.period import ( - PeriodIndex, - period_range, -) -from pandas.core.indexes.timedeltas import timedelta_range - -from pandas.tseries.offsets import ( - BDay, - Minute, -) - -DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10)) -PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10)) -TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day") - -all_ts = pytest.mark.parametrize( - "_index_factory,_series_name,_index_start,_index_end", - [DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE], -) - - -@pytest.fixture() -def _index_factory(): - return period_range - - -@pytest.fixture -def create_index(_index_factory): - def _create_index(*args, **kwargs): - """return the _index_factory created using the args, kwargs""" - return _index_factory(*args, **kwargs) - - return _create_index - - -# new test to check that all FutureWarning are triggered -def test_deprecating_on_loffset_and_base(): - # GH 31809 - - idx = date_range("2001-01-01", periods=4, freq="T") - df = DataFrame(data=4 * [range(2)], index=idx, columns=["a", "b"]) - - with tm.assert_produces_warning(FutureWarning): - pd.Grouper(freq="10s", base=0) - with tm.assert_produces_warning(FutureWarning): - pd.Grouper(freq="10s", loffset="0s") - - # not checking the stacklevel for .groupby().resample() because it's complicated to - # reconcile it with the stacklevel for Series.resample() and DataFrame.resample(); - # see GH #37603 - with tm.assert_produces_warning(FutureWarning): - df.groupby("a").resample("3T", base=0).sum() - with tm.assert_produces_warning(FutureWarning): - df.groupby("a").resample("3T", loffset="0s").sum() - msg = "'offset' and 'base' cannot be present at the same time" - with tm.assert_produces_warning(FutureWarning): - with pytest.raises(ValueError, match=msg): - df.groupby("a").resample("3T", base=0, offset=0).sum() - - with tm.assert_produces_warning(FutureWarning): - df.resample("3T", base=0).sum() - with tm.assert_produces_warning(FutureWarning): - df.resample("3T", loffset="0s").sum() - - -@all_ts -@pytest.mark.parametrize("arg", ["mean", {"value": "mean"}, ["mean"]]) -def test_resample_loffset_arg_type(frame, create_index, arg): - # GH 13218, 15002 - df = frame - expected_means = [df.values[i : i + 2].mean() for i in range(0, len(df.values), 2)] - expected_index = create_index(df.index[0], periods=len(df.index) / 2, freq="2D") - - # loffset coerces PeriodIndex to DateTimeIndex - if isinstance(expected_index, PeriodIndex): - expected_index = expected_index.to_timestamp() - - expected_index += timedelta(hours=2) - expected = DataFrame({"value": expected_means}, index=expected_index) - - with tm.assert_produces_warning(FutureWarning): - result_agg = df.resample("2D", loffset="2H").agg(arg) - - if isinstance(arg, list): - expected.columns = pd.MultiIndex.from_tuples([("value", "mean")]) - - tm.assert_frame_equal(result_agg, expected) - - -@pytest.mark.parametrize( - "loffset", [timedelta(minutes=1), "1min", Minute(1), np.timedelta64(1, "m")] -) -def test_resample_loffset(loffset): - # GH 7687 - rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min") - s = Series(np.random.randn(14), index=rng) - - with tm.assert_produces_warning(FutureWarning): - result = s.resample( - "5min", closed="right", label="right", loffset=loffset - ).mean() - idx = date_range("1/1/2000", periods=4, freq="5min") - expected = Series( - [s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], - index=idx + timedelta(minutes=1), - ) - tm.assert_series_equal(result, expected) - assert result.index.freq == Minute(5) - - # from daily - dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D") - ser = Series(np.random.rand(len(dti)), dti) - - # to weekly - result = ser.resample("w-sun").last() - business_day_offset = BDay() - with tm.assert_produces_warning(FutureWarning): - expected = ser.resample("w-sun", loffset=-business_day_offset).last() - assert result.index[0] - business_day_offset == expected.index[0] - - -def test_resample_loffset_upsample(): - # GH 20744 - rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min") - s = Series(np.random.randn(14), index=rng) - - with tm.assert_produces_warning(FutureWarning): - result = s.resample( - "5min", closed="right", label="right", loffset=timedelta(minutes=1) - ).ffill() - idx = date_range("1/1/2000", periods=4, freq="5min") - expected = Series([s[0], s[5], s[10], s[-1]], index=idx + timedelta(minutes=1)) - - tm.assert_series_equal(result, expected) - - -def test_resample_loffset_count(): - # GH 12725 - start_time = "1/1/2000 00:00:00" - rng = date_range(start_time, periods=100, freq="S") - ts = Series(np.random.randn(len(rng)), index=rng) - - with tm.assert_produces_warning(FutureWarning): - result = ts.resample("10S", loffset="1s").count() - - expected_index = date_range(start_time, periods=10, freq="10S") + timedelta( - seconds=1 - ) - expected = Series(10, index=expected_index) - - tm.assert_series_equal(result, expected) - - # Same issue should apply to .size() since it goes through - # same code path - with tm.assert_produces_warning(FutureWarning): - result = ts.resample("10S", loffset="1s").size() - - tm.assert_series_equal(result, expected) - - -def test_resample_base(): - rng = date_range("1/1/2000 00:00:00", "1/1/2000 02:00", freq="s") - ts = Series(np.random.randn(len(rng)), index=rng) - - with tm.assert_produces_warning(FutureWarning): - resampled = ts.resample("5min", base=2).mean() - exp_rng = date_range("12/31/1999 23:57:00", "1/1/2000 01:57", freq="5min") - tm.assert_index_equal(resampled.index, exp_rng) - - -def test_resample_float_base(): - # GH25161 - dt = pd.to_datetime( - ["2018-11-26 16:17:43.51", "2018-11-26 16:17:44.51", "2018-11-26 16:17:45.51"] - ) - s = Series(np.arange(3), index=dt) - - base = 17 + 43.51 / 60 - with tm.assert_produces_warning(FutureWarning): - result = s.resample("3min", base=base).size() - expected = Series( - 3, index=pd.DatetimeIndex(["2018-11-26 16:17:43.51"], freq="3min") - ) - tm.assert_series_equal(result, expected) - - -@pytest.mark.parametrize("kind", ["period", None, "timestamp"]) -@pytest.mark.parametrize("agg_arg", ["mean", {"value": "mean"}, ["mean"]]) -def test_loffset_returns_datetimeindex(frame, kind, agg_arg): - # make sure passing loffset returns DatetimeIndex in all cases - # basic method taken from Base.test_resample_loffset_arg_type() - df = frame - expected_means = [df.values[i : i + 2].mean() for i in range(0, len(df.values), 2)] - expected_index = period_range(df.index[0], periods=len(df.index) / 2, freq="2D") - - # loffset coerces PeriodIndex to DateTimeIndex - expected_index = expected_index.to_timestamp() - expected_index += timedelta(hours=2) - expected = DataFrame({"value": expected_means}, index=expected_index) - - with tm.assert_produces_warning(FutureWarning): - result_agg = df.resample("2D", loffset="2H", kind=kind).agg(agg_arg) - if isinstance(agg_arg, list): - expected.columns = pd.MultiIndex.from_tuples([("value", "mean")]) - tm.assert_frame_equal(result_agg, expected) - - -@pytest.mark.parametrize( - "start,end,start_freq,end_freq,base,offset", - [ - ("19910905", "19910909 03:00", "H", "24H", 10, "10H"), - ("19910905", "19910909 12:00", "H", "24H", 10, "10H"), - ("19910905", "19910909 23:00", "H", "24H", 10, "10H"), - ("19910905 10:00", "19910909", "H", "24H", 10, "10H"), - ("19910905 10:00", "19910909 10:00", "H", "24H", 10, "10H"), - ("19910905", "19910909 10:00", "H", "24H", 10, "10H"), - ("19910905 12:00", "19910909", "H", "24H", 10, "10H"), - ("19910905 12:00", "19910909 03:00", "H", "24H", 10, "10H"), - ("19910905 12:00", "19910909 12:00", "H", "24H", 10, "10H"), - ("19910905 12:00", "19910909 12:00", "H", "24H", 34, "34H"), - ("19910905 12:00", "19910909 12:00", "H", "17H", 10, "10H"), - ("19910905 12:00", "19910909 12:00", "H", "17H", 3, "3H"), - ("19910905 12:00", "19910909 1:00", "H", "M", 3, "3H"), - ("19910905", "19910913 06:00", "2H", "24H", 10, "10H"), - ("19910905", "19910905 01:39", "Min", "5Min", 3, "3Min"), - ("19910905", "19910905 03:18", "2Min", "5Min", 3, "3Min"), - ], -) -def test_resample_with_non_zero_base(start, end, start_freq, end_freq, base, offset): - # GH 23882 - s = Series(0, index=period_range(start, end, freq=start_freq)) - s = s + np.arange(len(s)) - with tm.assert_produces_warning(FutureWarning): - result = s.resample(end_freq, base=base).mean() - result = result.to_timestamp(end_freq) - - # test that the replacement argument 'offset' works - result_offset = s.resample(end_freq, offset=offset).mean() - result_offset = result_offset.to_timestamp(end_freq) - tm.assert_series_equal(result, result_offset) - - # to_timestamp casts 24H -> D - result = result.asfreq(end_freq) if end_freq == "24H" else result - with tm.assert_produces_warning(FutureWarning): - expected = s.to_timestamp().resample(end_freq, base=base).mean() - if end_freq == "M": - # TODO: is non-tick the relevant characteristic? (GH 33815) - expected.index = expected.index._with_freq(None) - tm.assert_series_equal(result, expected) - - -def test_resample_base_with_timedeltaindex(): - # GH 10530 - rng = timedelta_range(start="0s", periods=25, freq="s") - ts = Series(np.random.randn(len(rng)), index=rng) - - with tm.assert_produces_warning(FutureWarning): - with_base = ts.resample("2s", base=5).mean() - without_base = ts.resample("2s").mean() - - exp_without_base = timedelta_range(start="0s", end="25s", freq="2s") - exp_with_base = timedelta_range(start="5s", end="29s", freq="2s") - - tm.assert_index_equal(without_base.index, exp_without_base) - tm.assert_index_equal(with_base.index, exp_with_base) diff --git a/pyproject.toml b/pyproject.toml index f61aed03331a9..0ce8cf87ab17e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,7 +59,6 @@ exclude = ''' max-line-length = 88 disable = [ "abstract-class-instantiated", - "access-member-before-definition", "import-error", "invalid-repr-returned", "invalid-unary-operand-type",