From 368cd7bfa267220c1748bb0316b771d003a35f9d Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 27 Oct 2022 13:45:15 -0700 Subject: [PATCH] DEPR: DTA.astype unitless, DTA(float_data, dtype=dt64tz) --- doc/source/whatsnew/v2.0.0.rst | 3 ++ pandas/core/arrays/datetimes.py | 35 +++++----------------- pandas/tests/dtypes/test_missing.py | 10 +++---- pandas/tests/series/methods/test_astype.py | 32 ++++---------------- 4 files changed, 20 insertions(+), 60 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 252c444b2e60c..f70996a400410 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -176,6 +176,7 @@ Removal of prior version deprecations/changes - Enforced deprecation disallowing passing a sequence of timezone-aware values and ``dtype="datetime64[ns]"`` to to :class:`Series` or :class:`DataFrame` constructors (:issue:`41555`) - Enforced deprecation disallowing using ``.astype`` to convert a ``datetime64[ns]`` :class:`Series`, :class:`DataFrame`, or :class:`DatetimeIndex` to timezone-aware dtype, use ``obj.tz_localize`` or ``ser.dt.tz_localize`` instead (:issue:`39258`) - Enforced deprecation disallowing using ``.astype`` to convert a timezone-aware :class:`Series`, :class:`DataFrame`, or :class:`DatetimeIndex` to timezone-naive ``datetime64[ns]`` dtype, use ``obj.tz_localize(None)`` or ``obj.tz_convert("UTC").tz_localize(None)`` instead (:issue:`39258`) +- Enforced deprecation disallowing passing a unit-less datetime64 dtype (i.e. "datetime64", ``np.datetime64``, or ``np.dtype("datetime64")``) to :meth:`Series.astype` and :meth:`DataFrame.astype` (:issue:`48555`) - Removed Date parser functions :func:`~pandas.io.date_converters.parse_date_time`, :func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields` and :func:`~pandas.io.date_converters.generic_parser` (:issue:`24518`) @@ -257,6 +258,8 @@ Removal of prior version deprecations/changes - Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`) - Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`) - Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`) +- Changed behavior of :class:`Index`, :class:`Series`, :class:`DataFrame` constructors with floating-dtype data and a :class:`DatetimeTZDtype`, the data are now interpreted as UTC-times instead of wall-times, consistent with how integer-dtype data are treated (:issue:`45573`) +- .. --------------------------------------------------------------------------- .. _whatsnew_200.performance: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 1768bb7507dd9..a301c2274bb0f 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -685,15 +685,11 @@ def astype(self, dtype, copy: bool = True): and dtype != self.dtype and is_unitless(dtype) ): - # TODO(2.0): just fall through to dtl.DatetimeLikeArrayMixin.astype - warnings.warn( - "Passing unit-less datetime64 dtype to .astype is deprecated " - "and will raise in a future version. Pass 'datetime64[ns]' instead", - FutureWarning, - stacklevel=find_stack_level(), + # GH#48555 + raise ValueError( + "Passing unit-less datetime64 dtype to .astype is not supported. " + "Pass 'datetime64[ns]' instead." ) - # unit conversion e.g. datetime64[s] - return self._ndarray.astype(dtype) elif is_period_dtype(dtype): return self.to_period(freq=dtype.freq) @@ -2263,27 +2259,10 @@ def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None): return data, copy if is_float_dtype(data.dtype): - # Note: we must cast to datetime64[ns] here in order to treat these - # as wall-times instead of UTC timestamps. - data = data.astype(DT64NS_DTYPE) + # pre-2.0 we treated these as wall-times, inconsistent with ints + # GH#23675, GH#45573 deprecated to treat symmetrically with integer dtypes + data = data.astype(np.int64) copy = False - if ( - tz is not None - and len(data) > 0 - and not timezones.is_utc(timezones.maybe_get_tz(tz)) - ): - # GH#23675, GH#45573 deprecate to treat symmetrically with integer dtypes - warnings.warn( - "The behavior of DatetimeArray._from_sequence with a timezone-aware " - "dtype and floating-dtype data is deprecated. In a future version, " - "this data will be interpreted as nanosecond UTC timestamps " - "instead of wall-times, matching the behavior with integer dtypes. " - "To retain the old behavior, explicitly cast to 'datetime64[ns]' " - "before passing the data to pandas. To get the future behavior, " - "first cast to 'int64'.", - FutureWarning, - stacklevel=find_stack_level(), - ) elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype): # GH#29794 enforcing deprecation introduced in GH#23539 diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index e1dd182a5ae30..cc365bef2b183 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -426,11 +426,9 @@ def test_array_equivalent(dtype_equal): dtype_equal=dtype_equal, ) - msg = "will be interpreted as nanosecond UTC timestamps instead of wall-times" - with tm.assert_produces_warning(FutureWarning, match=msg): - dti1 = DatetimeIndex([0, np.nan], tz="US/Eastern") - dti2 = DatetimeIndex([0, np.nan], tz="CET") - dti3 = DatetimeIndex([1, np.nan], tz="US/Eastern") + dti1 = DatetimeIndex([0, np.nan], tz="US/Eastern") + dti2 = DatetimeIndex([0, np.nan], tz="CET") + dti3 = DatetimeIndex([1, np.nan], tz="US/Eastern") assert array_equivalent( dti1, @@ -444,7 +442,7 @@ def test_array_equivalent(dtype_equal): ) # The rest are not dtype_equal assert not array_equivalent(DatetimeIndex([0, np.nan]), dti1) - assert not array_equivalent( + assert array_equivalent( dti2, dti1, ) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 9b57f0f634a6c..d322f7d103413 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -34,14 +34,12 @@ def test_astype_unitless_dt64_deprecated(self): # GH#47844 ser = Series(["1970-01-01", "1970-01-01", "1970-01-01"], dtype="datetime64[ns]") - msg = "Passing unit-less datetime64 dtype to .astype is deprecated and " - with tm.assert_produces_warning(FutureWarning, match=msg): - res = ser.astype(np.datetime64) - tm.assert_series_equal(ser, res) + msg = "Passing unit-less datetime64 dtype to .astype is not supported" + with pytest.raises(ValueError, match=msg): + ser.astype(np.datetime64) - with tm.assert_produces_warning(FutureWarning, match=msg): - res = ser.astype("datetime64") - tm.assert_series_equal(ser, res) + with pytest.raises(ValueError, match=msg): + ser.astype("datetime64") def test_arg_for_errors_in_astype(self): # see GH#14878 @@ -394,10 +392,7 @@ def test_astype_nan_to_bool(self): def test_astype_ea_to_datetimetzdtype(self, dtype): # GH37553 ser = Series([4, 0, 9], dtype=dtype) - warn = FutureWarning if ser.dtype.kind == "f" else None - msg = "with a timezone-aware dtype and floating-dtype data" - with tm.assert_produces_warning(warn, match=msg): - result = ser.astype(DatetimeTZDtype(tz="US/Pacific")) + result = ser.astype(DatetimeTZDtype(tz="US/Pacific")) expected = Series( { @@ -407,21 +402,6 @@ def test_astype_ea_to_datetimetzdtype(self, dtype): } ) - if dtype in tm.FLOAT_EA_DTYPES: - expected = Series( - { - 0: Timestamp( - "1970-01-01 00:00:00.000000004-08:00", tz="US/Pacific" - ), - 1: Timestamp( - "1970-01-01 00:00:00.000000000-08:00", tz="US/Pacific" - ), - 2: Timestamp( - "1970-01-01 00:00:00.000000009-08:00", tz="US/Pacific" - ), - } - ) - tm.assert_series_equal(result, expected) def test_astype_retain_Attrs(self, any_numpy_dtype):