Skip to content

Commit 2a4c9df

Browse files
authored
DEPR: treating float data as wall-times in DTA._from_sequence (pandas-dev#45573)
* DEPR: treating float data as wall-times in DTA._from_sequence * GH refs * mypy fixup * lint fixup
1 parent 971752a commit 2a4c9df

File tree

6 files changed

+44
-16
lines changed

6 files changed

+44
-16
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ Other Deprecations
240240
- Deprecated :meth:`Series.is_monotonic` and :meth:`Index.is_monotonic` in favor of :meth:`Series.is_monotonic_increasing` and :meth:`Index.is_monotonic_increasing` (:issue:`45422`, :issue:`21335`)
241241
- Deprecated behavior of :meth:`DatetimeIndex.astype`, :meth:`TimedeltaIndex.astype`, :meth:`PeriodIndex.astype` when converting to an integer dtype other than ``int64``. In a future version, these will convert to exactly the specified dtype (instead of always ``int64``) and will raise if the conversion overflows (:issue:`45034`)
242242
- Deprecated the ``__array_wrap__`` method of DataFrame and Series, rely on standard numpy ufuncs instead (:issue:`45451`)
243+
- Deprecated treating float-dtype data as wall-times when passed with a timezone to :class:`Series` or :class:`DatetimeIndex` (:issue:`45573`)
243244
- Deprecated the behavior of :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype and incompatible fill value; in a future version this will cast to a common dtype (usually object) instead of raising, matching the behavior of other dtypes (:issue:`45746`)
244245
- Deprecated the ``warn`` parameter in :func:`infer_freq` (:issue:`45947`)
245246

pandas/core/arrays/datetimes.py

+20-4
Original file line numberDiff line numberDiff line change
@@ -2066,7 +2066,7 @@ def _sequence_to_dt64ns(
20662066
inferred_freq = data.freq
20672067

20682068
# By this point we are assured to have either a numpy array or Index
2069-
data, copy = maybe_convert_dtype(data, copy)
2069+
data, copy = maybe_convert_dtype(data, copy, tz=tz)
20702070
data_dtype = getattr(data, "dtype", None)
20712071

20722072
if (
@@ -2246,7 +2246,7 @@ def objects_to_datetime64ns(
22462246
raise TypeError(result)
22472247

22482248

2249-
def maybe_convert_dtype(data, copy: bool):
2249+
def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None):
22502250
"""
22512251
Convert data based on dtype conventions, issuing deprecation warnings
22522252
or errors where appropriate.
@@ -2255,6 +2255,7 @@ def maybe_convert_dtype(data, copy: bool):
22552255
----------
22562256
data : np.ndarray or pd.Index
22572257
copy : bool
2258+
tz : tzinfo or None, default None
22582259
22592260
Returns
22602261
-------
@@ -2274,8 +2275,23 @@ def maybe_convert_dtype(data, copy: bool):
22742275
# as wall-times instead of UTC timestamps.
22752276
data = data.astype(DT64NS_DTYPE)
22762277
copy = False
2277-
# TODO: deprecate this behavior to instead treat symmetrically
2278-
# with integer dtypes. See discussion in GH#23675
2278+
if (
2279+
tz is not None
2280+
and len(data) > 0
2281+
and not timezones.is_utc(timezones.maybe_get_tz(tz))
2282+
):
2283+
# GH#23675, GH#45573 deprecate to treat symmetrically with integer dtypes
2284+
warnings.warn(
2285+
"The behavior of DatetimeArray._from_sequence with a timezone-aware "
2286+
"dtype and floating-dtype data is deprecated. In a future version, "
2287+
"this data will be interpreted as nanosecond UTC timestamps "
2288+
"instead of wall-times, matching the behavior with integer dtypes. "
2289+
"To retain the old behavior, explicitly cast to 'datetime64[ns]' "
2290+
"before passing the data to pandas. To get the future behavior, "
2291+
"first cast to 'int64'.",
2292+
FutureWarning,
2293+
stacklevel=find_stack_level(),
2294+
)
22792295

22802296
elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype):
22812297
# GH#29794 enforcing deprecation introduced in GH#23539

pandas/core/tools/datetimes.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
iNaT,
2828
nat_strings,
2929
parsing,
30+
timezones,
3031
)
3132
from pandas._libs.tslibs.parsing import ( # noqa:F401
3233
DateParseError,
@@ -364,7 +365,7 @@ def _convert_listlike_datetimes(
364365
# NB: this must come after unit transformation
365366
orig_arg = arg
366367
try:
367-
arg, _ = maybe_convert_dtype(arg, copy=False)
368+
arg, _ = maybe_convert_dtype(arg, copy=False, tz=timezones.maybe_get_tz(tz))
368369
except TypeError:
369370
if errors == "coerce":
370371
npvalues = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg))

pandas/tests/dtypes/test_missing.py

+14-9
Original file line numberDiff line numberDiff line change
@@ -425,23 +425,28 @@ def test_array_equivalent(dtype_equal):
425425
TimedeltaIndex([1, np.nan]),
426426
dtype_equal=dtype_equal,
427427
)
428+
429+
msg = "will be interpreted as nanosecond UTC timestamps instead of wall-times"
430+
with tm.assert_produces_warning(FutureWarning, match=msg):
431+
dti1 = DatetimeIndex([0, np.nan], tz="US/Eastern")
432+
dti2 = DatetimeIndex([0, np.nan], tz="CET")
433+
dti3 = DatetimeIndex([1, np.nan], tz="US/Eastern")
434+
428435
assert array_equivalent(
429-
DatetimeIndex([0, np.nan], tz="US/Eastern"),
430-
DatetimeIndex([0, np.nan], tz="US/Eastern"),
436+
dti1,
437+
dti1,
431438
dtype_equal=dtype_equal,
432439
)
433440
assert not array_equivalent(
434-
DatetimeIndex([0, np.nan], tz="US/Eastern"),
435-
DatetimeIndex([1, np.nan], tz="US/Eastern"),
441+
dti1,
442+
dti3,
436443
dtype_equal=dtype_equal,
437444
)
438445
# The rest are not dtype_equal
446+
assert not array_equivalent(DatetimeIndex([0, np.nan]), dti1)
439447
assert not array_equivalent(
440-
DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan], tz="US/Eastern")
441-
)
442-
assert not array_equivalent(
443-
DatetimeIndex([0, np.nan], tz="CET"),
444-
DatetimeIndex([0, np.nan], tz="US/Eastern"),
448+
dti2,
449+
dti1,
445450
)
446451

447452
assert not array_equivalent(DatetimeIndex([0, np.nan]), TimedeltaIndex([0, np.nan]))

pandas/tests/internals/test_internals.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def create_block(typestr, placement, item_shape=None, num_offset=0, maker=new_bl
140140
assert m is not None, f"incompatible typestr -> {typestr}"
141141
tz = m.groups()[0]
142142
assert num_items == 1, "must have only 1 num items for a tz-aware"
143-
values = DatetimeIndex(np.arange(N) * 1e9, tz=tz)._data
143+
values = DatetimeIndex(np.arange(N) * 10**9, tz=tz)._data
144144
values = ensure_block_shape(values, ndim=len(shape))
145145
elif typestr in ("timedelta", "td", "m8[ns]"):
146146
values = (mat * 1).astype("m8[ns]")

pandas/tests/series/methods/test_astype.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,12 @@ def test_astype_nan_to_bool(self):
381381
)
382382
def test_astype_ea_to_datetimetzdtype(self, dtype):
383383
# GH37553
384-
result = Series([4, 0, 9], dtype=dtype).astype(DatetimeTZDtype(tz="US/Pacific"))
384+
ser = Series([4, 0, 9], dtype=dtype)
385+
warn = FutureWarning if ser.dtype.kind == "f" else None
386+
msg = "with a timezone-aware dtype and floating-dtype data"
387+
with tm.assert_produces_warning(warn, match=msg):
388+
result = ser.astype(DatetimeTZDtype(tz="US/Pacific"))
389+
385390
expected = Series(
386391
{
387392
0: Timestamp("1969-12-31 16:00:00.000000004-08:00", tz="US/Pacific"),

0 commit comments

Comments
 (0)