From d1941c9ad4fb2ebcc1405e0c77c4dba055304952 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 25 Mar 2024 13:29:34 -0700 Subject: [PATCH 1/7] DEPR: Enforce datetimelike deprecations --- doc/source/whatsnew/v3.0.0.rst | 2 + pandas/core/arrays/datetimelike.py | 37 ++++++------------- pandas/tests/groupby/test_raises.py | 8 ++-- .../indexes/datetimes/test_date_range.py | 12 +++--- .../indexes/interval/test_interval_range.py | 9 ++--- .../tests/indexes/period/test_constructors.py | 8 ++-- .../indexes/timedeltas/test_constructors.py | 10 ++--- 7 files changed, 32 insertions(+), 54 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4d2381ae1e5e4..453b2cf2ab36d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -198,6 +198,8 @@ Removal of prior version deprecations/changes - All arguments in :meth:`Index.sort_values` are now keyword only (:issue:`56493`) - All arguments in :meth:`Series.to_dict` are now keyword only (:issue:`56493`) - Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`) +- Enforced deprecation disallowing :meth:`GroupBy.all` and :meth:`GroupBy.any` with ``datetime64`` and :class:`PeriodDtype` dtypes (:issue:`34479`) +- Enforced deprecation disallowing ``float`` "periods" in :func:`date_range`, :func:`period_range`, :func:`timedelta_range`, func:`interval_range`, (:issue:`56036`) - Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`) - Enforced deprecation of :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` allowing the ``name`` argument to be a non-tuple when grouping by a list of length 1 (:issue:`54155`) - Enforced deprecation of :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` for object-dtype (:issue:`57820`) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 745774b34a3ad..d4c945ad23fd3 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1666,11 +1666,9 @@ def _groupby_op( raise TypeError(f"datetime64 type does not support {how} operations") if how in ["any", "all"]: # GH#34479 - warnings.warn( - f"'{how}' with datetime64 dtypes is deprecated and will raise in a " - f"future version. Use (obj != pd.Timestamp(0)).{how}() instead.", - FutureWarning, - stacklevel=find_stack_level(), + raise TypeError( + f"'{how}' with datetime64 dtypes is no longer supported. " + f"Use (obj != pd.Timestamp(0)).{how}() instead." ) elif isinstance(dtype, PeriodDtype): @@ -1679,11 +1677,9 @@ def _groupby_op( raise TypeError(f"Period type does not support {how} operations") if how in ["any", "all"]: # GH#34479 - warnings.warn( - f"'{how}' with PeriodDtype is deprecated and will raise in a " - f"future version. Use (obj != pd.Period(0, freq)).{how}() instead.", - FutureWarning, - stacklevel=find_stack_level(), + raise TypeError( + f"'{how}' with PeriodDtype is no longer supported. " + f"Use (obj != pd.Period(0, freq)).{how}() instead." ) else: # timedeltas we can add but not multiply @@ -2530,14 +2526,14 @@ def validate_periods(periods: None) -> None: ... def validate_periods(periods: int | float) -> int: ... -def validate_periods(periods: int | float | None) -> int | None: +def validate_periods(periods: int | None) -> int | None: """ If a `periods` argument is passed to the Datetime/Timedelta Array/Index constructor, cast it to an integer. Parameters ---------- - periods : None, float, int + periods : None, int Returns ------- @@ -2546,21 +2542,10 @@ def validate_periods(periods: int | float | None) -> int | None: Raises ------ TypeError - if periods is None, float, or int + if periods is not None or int """ - if periods is not None: - if lib.is_float(periods): - warnings.warn( - # GH#56036 - "Non-integer 'periods' in pd.date_range, pd.timedelta_range, " - "pd.period_range, and pd.interval_range are deprecated and " - "will raise in a future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - periods = int(periods) - elif not lib.is_integer(periods): - raise TypeError(f"periods must be a number, got {periods}") + if periods is not None and not lib.is_integer(periods): + raise TypeError(f"periods must be an integer, got {periods}") return periods diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py index f9d5de72eda1d..9f7d64b63caf7 100644 --- a/pandas/tests/groupby/test_raises.py +++ b/pandas/tests/groupby/test_raises.py @@ -241,8 +241,8 @@ def test_groupby_raises_datetime( return klass, msg = { - "all": (None, ""), - "any": (None, ""), + "all": (TypeError, "'all' with datetime64 dtypes is no longer supported"), + "any": (TypeError, "'any' with datetime64 dtypes is no longer supported"), "bfill": (None, ""), "corrwith": (TypeError, "cannot perform __mul__ with this index type"), "count": (None, ""), @@ -285,9 +285,7 @@ def test_groupby_raises_datetime( "var": (TypeError, "datetime64 type does not support var operations"), }[groupby_func] - if groupby_func in ["any", "all"]: - warn_msg = f"'{groupby_func}' with datetime64 dtypes is deprecated" - elif groupby_func == "fillna": + if groupby_func == "fillna": kind = "Series" if groupby_series else "DataFrame" warn_msg = f"{kind}GroupBy.fillna is deprecated" else: diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 43fcfd1e59670..45c4f981d5fe0 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -135,16 +135,14 @@ def test_date_range_name(self): assert idx.name == "TEST" def test_date_range_invalid_periods(self): - msg = "periods must be a number, got foo" + msg = "periods must be a integer, got foo" with pytest.raises(TypeError, match=msg): date_range(start="1/1/2000", periods="foo", freq="D") def test_date_range_fractional_period(self): - msg = "Non-integer 'periods' in pd.date_range, pd.timedelta_range" - with tm.assert_produces_warning(FutureWarning, match=msg): - rng = date_range("1/1/2000", periods=10.5) - exp = date_range("1/1/2000", periods=10) - tm.assert_index_equal(rng, exp) + msg = "periods must be an integer" + with pytest.raises(TypeError, match=msg): + date_range("1/1/2000", periods=10.5) @pytest.mark.parametrize( "freq,freq_depr", @@ -1042,7 +1040,7 @@ def test_constructor(self): bdate_range(START, periods=20, freq=BDay()) bdate_range(end=START, periods=20, freq=BDay()) - msg = "periods must be a number, got B" + msg = "periods must be an integer, got B" with pytest.raises(TypeError, match=msg): date_range("2011-1-1", "2012-1-1", "B") diff --git a/pandas/tests/indexes/interval/test_interval_range.py b/pandas/tests/indexes/interval/test_interval_range.py index 7aea481b49221..db25617df4c7d 100644 --- a/pandas/tests/indexes/interval/test_interval_range.py +++ b/pandas/tests/indexes/interval/test_interval_range.py @@ -236,11 +236,10 @@ def test_interval_dtype(self, start, end, expected): def test_interval_range_fractional_period(self): # float value for periods - expected = interval_range(start=0, periods=10) - msg = "Non-integer 'periods' in pd.date_range, .* pd.interval_range" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = interval_range(start=0, periods=10.5) - tm.assert_index_equal(result, expected) + msg = "periods must be an integer, got 10.5" + ts = Timestamp("2024-03-25") + with pytest.raises(TypeError, match=msg): + interval_range(ts, periods=10.5) def test_constructor_coverage(self): # equivalent timestamp-like start/end diff --git a/pandas/tests/indexes/period/test_constructors.py b/pandas/tests/indexes/period/test_constructors.py index ec2216c102c3f..6aba9f17326ba 100644 --- a/pandas/tests/indexes/period/test_constructors.py +++ b/pandas/tests/indexes/period/test_constructors.py @@ -196,11 +196,9 @@ def test_constructor_invalid_quarters(self): ) def test_period_range_fractional_period(self): - msg = "Non-integer 'periods' in pd.date_range, pd.timedelta_range" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = period_range("2007-01", periods=10.5, freq="M") - exp = period_range("2007-01", periods=10, freq="M") - tm.assert_index_equal(result, exp) + msg = "periods must be an integer, got 10.5" + with pytest.raises(TypeError, match=msg): + period_range("2007-01", periods=10.5, freq="M") def test_constructor_with_without_freq(self): # GH53687 diff --git a/pandas/tests/indexes/timedeltas/test_constructors.py b/pandas/tests/indexes/timedeltas/test_constructors.py index 0510700bb64d7..ec4a5e3a597ea 100644 --- a/pandas/tests/indexes/timedeltas/test_constructors.py +++ b/pandas/tests/indexes/timedeltas/test_constructors.py @@ -186,14 +186,12 @@ def test_constructor_iso(self): tm.assert_index_equal(result, expected) def test_timedelta_range_fractional_period(self): - msg = "Non-integer 'periods' in pd.date_range, pd.timedelta_range" - with tm.assert_produces_warning(FutureWarning, match=msg): - rng = timedelta_range("1 days", periods=10.5) - exp = timedelta_range("1 days", periods=10) - tm.assert_index_equal(rng, exp) + msg = "periods must be an integer" + with pytest.raises(TypeError, match=msg): + timedelta_range("1 days", periods=10.5) def test_constructor_coverage(self): - msg = "periods must be a number, got foo" + msg = "periods must be an integer, got foo" with pytest.raises(TypeError, match=msg): timedelta_range(start="1 days", periods="foo", freq="D") From a2b0712f4fc900def8046444c126a652a9672d4f Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 25 Mar 2024 18:34:36 -0700 Subject: [PATCH 2/7] update message in tests --- pandas/tests/indexes/datetimes/test_date_range.py | 4 ++-- pandas/tests/indexes/interval/test_interval_range.py | 2 +- pandas/tests/indexes/period/test_period_range.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 45c4f981d5fe0..99d05dd0f26e4 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -135,7 +135,7 @@ def test_date_range_name(self): assert idx.name == "TEST" def test_date_range_invalid_periods(self): - msg = "periods must be a integer, got foo" + msg = "periods must be an integer, got foo" with pytest.raises(TypeError, match=msg): date_range(start="1/1/2000", periods="foo", freq="D") @@ -1118,7 +1118,7 @@ def test_constructor(self): bdate_range(START, periods=20, freq=CDay()) bdate_range(end=START, periods=20, freq=CDay()) - msg = "periods must be a number, got C" + msg = "periods must be an integer, got C" with pytest.raises(TypeError, match=msg): date_range("2011-1-1", "2012-1-1", "C") diff --git a/pandas/tests/indexes/interval/test_interval_range.py b/pandas/tests/indexes/interval/test_interval_range.py index db25617df4c7d..5252b85ad8d0e 100644 --- a/pandas/tests/indexes/interval/test_interval_range.py +++ b/pandas/tests/indexes/interval/test_interval_range.py @@ -339,7 +339,7 @@ def test_errors(self): interval_range(start=Timedelta("1 day"), end=Timedelta("10 days"), freq=2) # invalid periods - msg = "periods must be a number, got foo" + msg = "periods must be an integer, got foo" with pytest.raises(TypeError, match=msg): interval_range(start=0, periods="foo") diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index fb200d071951e..67f4d7421df23 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -70,7 +70,7 @@ def test_start_end_non_nat(self): def test_periods_requires_integer(self): # invalid periods param - msg = "periods must be a number, got foo" + msg = "periods must be an integer, got foo" with pytest.raises(TypeError, match=msg): period_range(start="2017Q1", periods="foo") From 8e2aa6fab65aaaebf06e7cd93e9d17ec2a7c91c7 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 26 Mar 2024 10:52:25 -0700 Subject: [PATCH 3/7] update overload --- pandas/core/arrays/datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index d4c945ad23fd3..180677a0690eb 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -2523,7 +2523,7 @@ def validate_periods(periods: None) -> None: ... @overload -def validate_periods(periods: int | float) -> int: ... +def validate_periods(periods: int) -> int: ... def validate_periods(periods: int | None) -> int | None: From 36072697030b7d42998e8105919e52bd209f6078 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 27 Mar 2024 10:45:07 -0700 Subject: [PATCH 4/7] mypy fixup --- pandas/core/arrays/datetimelike.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 86e56f6e0965c..b35c648bacd3b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -2451,7 +2451,9 @@ def validate_periods(periods: int | None) -> int | None: """ if periods is not None and not lib.is_integer(periods): raise TypeError(f"periods must be an integer, got {periods}") - return periods + # error: Incompatible return value type (got "int | integer[Any] | None", + # expected "int | None") + return periods # type: ignore[return-value] def _validate_inferred_freq( From 65eb1733f21817b1584d2138d82f38c426f2f507 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 30 Mar 2024 14:54:13 -0700 Subject: [PATCH 5/7] post-merge fixup --- pandas/core/arrays/datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 077cfca51622f..234a8feff885a 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1662,7 +1662,7 @@ def _groupby_op( if dtype.kind == "M": # Adding/multiplying datetimes is not valid if how in ["sum", "prod", "cumsum", "cumprod", "var", "skew"]: - raise TypeError(f"datetime64 type does not support {how} operations") + raise TypeError(f"datetime64 type does not support operation: '{how}'") if how in ["any", "all"]: # GH#34479 raise TypeError( From c816c17e319c4eeca71404758d3c729c68152b1e Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 1 Apr 2024 13:44:05 -0700 Subject: [PATCH 6/7] update asvs --- asv_bench/benchmarks/categoricals.py | 2 +- asv_bench/benchmarks/timeseries.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 69697906e493e..7d5b250c7b157 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -24,7 +24,7 @@ def setup(self): self.codes = np.tile(range(len(self.categories)), N) self.datetimes = pd.Series( - pd.date_range("1995-01-01 00:00:00", periods=N / 10, freq="s") + pd.date_range("1995-01-01 00:00:00", periods=N // 10, freq="s") ) self.datetimes_with_nat = self.datetimes.copy() self.datetimes_with_nat.iloc[-1] = pd.NaT diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index 06f488f7baaaf..8deec502898d9 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -29,7 +29,7 @@ def setup(self, index_type): "dst": date_range( start="10/29/2000 1:00:00", end="10/29/2000 1:59:59", freq="s" ), - "repeated": date_range(start="2000", periods=N / 10, freq="s").repeat(10), + "repeated": date_range(start="2000", periods=N // 10, freq="s").repeat(10), "tz_aware": date_range(start="2000", periods=N, freq="s", tz="US/Eastern"), "tz_local": date_range( start="2000", periods=N, freq="s", tz=dateutil.tz.tzlocal() From 1c9757382209777a6f65eac5bfc6d069b38b2cdf Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 5 Apr 2024 10:05:59 -0700 Subject: [PATCH 7/7] Update doc/source/whatsnew/v3.0.0.rst --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index bbec339153634..003f3ea513c8d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -222,7 +222,7 @@ Removal of prior version deprecations/changes - Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`) - Enforce deprecation in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` with object dtype and mismatched null-like values, which are now considered not-equal (:issue:`18463`) - Enforced deprecation ``all`` and ``any`` reductions with ``datetime64``, :class:`DatetimeTZDtype`, and :class:`PeriodDtype` dtypes (:issue:`58029`) -- Enforced deprecation disallowing ``float`` "periods" in :func:`date_range`, :func:`period_range`, :func:`timedelta_range`, func:`interval_range`, (:issue:`56036`) +- Enforced deprecation disallowing ``float`` "periods" in :func:`date_range`, :func:`period_range`, :func:`timedelta_range`, :func:`interval_range`, (:issue:`56036`) - Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`) - Enforced deprecation in :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype performing dtype inference on the ``.index`` of the result (:issue:`56161`) - Enforced deprecation of :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` allowing the ``name`` argument to be a non-tuple when grouping by a list of length 1 (:issue:`54155`)