diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 71394a858aefe..d5b092e21a596 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -240,6 +240,7 @@ Other Deprecations - Deprecated :meth:`Series.is_monotonic` and :meth:`Index.is_monotonic` in favor of :meth:`Series.is_monotonic_increasing` and :meth:`Index.is_monotonic_increasing` (:issue:`45422`, :issue:`21335`) - Deprecated behavior of :meth:`DatetimeIndex.astype`, :meth:`TimedeltaIndex.astype`, :meth:`PeriodIndex.astype` when converting to an integer dtype other than ``int64``. In a future version, these will convert to exactly the specified dtype (instead of always ``int64``) and will raise if the conversion overflows (:issue:`45034`) - Deprecated the ``__array_wrap__`` method of DataFrame and Series, rely on standard numpy ufuncs instead (:issue:`45451`) +- Deprecated treating float-dtype data as wall-times when passed with a timezone to :class:`Series` or :class:`DatetimeIndex` (:issue:`45573`) - Deprecated the behavior of :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype and incompatible fill value; in a future version this will cast to a common dtype (usually object) instead of raising, matching the behavior of other dtypes (:issue:`45746`) - Deprecated the ``warn`` parameter in :func:`infer_freq` (:issue:`45947`) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index f652e0b870ae7..e21f2e9d7b46e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2066,7 +2066,7 @@ def _sequence_to_dt64ns( inferred_freq = data.freq # By this point we are assured to have either a numpy array or Index - data, copy = maybe_convert_dtype(data, copy) + data, copy = maybe_convert_dtype(data, copy, tz=tz) data_dtype = getattr(data, "dtype", None) if ( @@ -2246,7 +2246,7 @@ def objects_to_datetime64ns( raise TypeError(result) -def maybe_convert_dtype(data, copy: bool): +def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None): """ Convert data based on dtype conventions, issuing deprecation warnings or errors where appropriate. @@ -2255,6 +2255,7 @@ def maybe_convert_dtype(data, copy: bool): ---------- data : np.ndarray or pd.Index copy : bool + tz : tzinfo or None, default None Returns ------- @@ -2274,8 +2275,23 @@ def maybe_convert_dtype(data, copy: bool): # as wall-times instead of UTC timestamps. data = data.astype(DT64NS_DTYPE) copy = False - # TODO: deprecate this behavior to instead treat symmetrically - # with integer dtypes. See discussion in GH#23675 + if ( + tz is not None + and len(data) > 0 + and not timezones.is_utc(timezones.maybe_get_tz(tz)) + ): + # GH#23675, GH#45573 deprecate to treat symmetrically with integer dtypes + warnings.warn( + "The behavior of DatetimeArray._from_sequence with a timezone-aware " + "dtype and floating-dtype data is deprecated. In a future version, " + "this data will be interpreted as nanosecond UTC timestamps " + "instead of wall-times, matching the behavior with integer dtypes. " + "To retain the old behavior, explicitly cast to 'datetime64[ns]' " + "before passing the data to pandas. To get the future behavior, " + "first cast to 'int64'.", + FutureWarning, + stacklevel=find_stack_level(), + ) elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype): # GH#29794 enforcing deprecation introduced in GH#23539 diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 3f0a5deb97548..33ed64c7ae364 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -27,6 +27,7 @@ iNaT, nat_strings, parsing, + timezones, ) from pandas._libs.tslibs.parsing import ( # noqa:F401 DateParseError, @@ -364,7 +365,7 @@ def _convert_listlike_datetimes( # NB: this must come after unit transformation orig_arg = arg try: - arg, _ = maybe_convert_dtype(arg, copy=False) + arg, _ = maybe_convert_dtype(arg, copy=False, tz=timezones.maybe_get_tz(tz)) except TypeError: if errors == "coerce": npvalues = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg)) diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 1917fc615118a..a986e8d659202 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -415,23 +415,28 @@ def test_array_equivalent(dtype_equal): TimedeltaIndex([1, np.nan]), dtype_equal=dtype_equal, ) + + msg = "will be interpreted as nanosecond UTC timestamps instead of wall-times" + with tm.assert_produces_warning(FutureWarning, match=msg): + dti1 = DatetimeIndex([0, np.nan], tz="US/Eastern") + dti2 = DatetimeIndex([0, np.nan], tz="CET") + dti3 = DatetimeIndex([1, np.nan], tz="US/Eastern") + assert array_equivalent( - DatetimeIndex([0, np.nan], tz="US/Eastern"), - DatetimeIndex([0, np.nan], tz="US/Eastern"), + dti1, + dti1, dtype_equal=dtype_equal, ) assert not array_equivalent( - DatetimeIndex([0, np.nan], tz="US/Eastern"), - DatetimeIndex([1, np.nan], tz="US/Eastern"), + dti1, + dti3, dtype_equal=dtype_equal, ) # The rest are not dtype_equal + assert not array_equivalent(DatetimeIndex([0, np.nan]), dti1) assert not array_equivalent( - DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan], tz="US/Eastern") - ) - assert not array_equivalent( - DatetimeIndex([0, np.nan], tz="CET"), - DatetimeIndex([0, np.nan], tz="US/Eastern"), + dti2, + dti1, ) assert not array_equivalent(DatetimeIndex([0, np.nan]), TimedeltaIndex([0, np.nan])) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index f286dc4a09cb2..eda902d34bff5 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -140,7 +140,7 @@ def create_block(typestr, placement, item_shape=None, num_offset=0, maker=new_bl assert m is not None, f"incompatible typestr -> {typestr}" tz = m.groups()[0] assert num_items == 1, "must have only 1 num items for a tz-aware" - values = DatetimeIndex(np.arange(N) * 1e9, tz=tz)._data + values = DatetimeIndex(np.arange(N) * 10**9, tz=tz)._data values = ensure_block_shape(values, ndim=len(shape)) elif typestr in ("timedelta", "td", "m8[ns]"): values = (mat * 1).astype("m8[ns]") diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index ff9bb3edeedb1..630f4cc3194f4 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -381,7 +381,12 @@ def test_astype_nan_to_bool(self): ) def test_astype_ea_to_datetimetzdtype(self, dtype): # GH37553 - result = Series([4, 0, 9], dtype=dtype).astype(DatetimeTZDtype(tz="US/Pacific")) + ser = Series([4, 0, 9], dtype=dtype) + warn = FutureWarning if ser.dtype.kind == "f" else None + msg = "with a timezone-aware dtype and floating-dtype data" + with tm.assert_produces_warning(warn, match=msg): + result = ser.astype(DatetimeTZDtype(tz="US/Pacific")) + expected = Series( { 0: Timestamp("1969-12-31 16:00:00.000000004-08:00", tz="US/Pacific"),